tre_regex.hpp

00001 #line 1047 "./lpsrc/tre.pak"
00002 /*
00003   regex.h - POSIX.2 compatible regexp interface and TRE extensions
00004 
00005   Copyright (C) 2001-2004 Ville Laurikari <vl@iki.fi>.
00006 
00007   This program is free software; you can redistribute it and/or modify
00008   it under the terms of the GNU General Public License version 2 (June
00009   1991) as published by the Free Software Foundation.
00010 
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015 
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019 
00020 */
00021 
00022 #ifndef TRE_REGEX_H
00023 #define TRE_REGEX_H 1
00024 
00025 #include "tre_config.hpp"
00026 
00027 #ifdef HAVE_SYS_TYPES_H
00028 #include <sys/types.h>
00029 #endif /* HAVE_SYS_TYPES_H */
00030 
00031 #ifdef HAVE_LIBUTF8_H
00032 #include <libutf8.h>
00033 #endif /* HAVE_LIBUTF8_H */
00034 
00035 #ifdef TRE_USE_SYSTEM_REGEX_H
00036 /* Include the system regex.h to make TRE ABI compatible with the
00037    system regex. */
00038 #include TRE_SYSTEM_REGEX_H_PATH
00039 #endif /* TRE_USE_SYSTEM_REGEX_H */
00040 
00041 #ifdef __cplusplus
00042 extern "C" {
00043 #endif
00044 
00045 #ifdef TRE_USE_SYSTEM_REGEX_H
00046 
00047 #ifndef REG_OK
00048 #define REG_OK 0
00049 #endif /* !REG_OK */
00050 
00051 #ifndef HAVE_REG_ERRCODE_T
00052 typedef int reg_errcode_t;
00053 #endif /* !HAVE_REG_ERRCODE_T */
00054 
00055 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
00056 #define REG_LITERAL 0x1000
00057 #endif
00058 
00059 /* Extra regcomp() flags. */
00060 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
00061 
00062 /* Extra regexec() flags. */
00063 #define REG_APPROX_MATCHER       0x1000
00064 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
00065 
00066 #else /* !TRE_USE_SYSTEM_REGEX_H */
00067 
00068 /* If the we're not using system regex.h, we need to define the
00069    structs and enums ourselves. */
00070 
00071 typedef int regoff_t;
00072 typedef struct {
00073   size_t re_nsub;  /* Number of parenthesized subexpressions. */
00074   void *value;     /* For internal use only. */
00075 } regex_t;
00076 
00077 typedef struct {
00078   regoff_t rm_so;
00079   regoff_t rm_eo;
00080 } regmatch_t;
00081 
00082 
00083 typedef enum {
00084   REG_OK = 0,           /* No error. */
00085   /* POSIX regcomp() return error codes.  (In the order listed in the
00086      standard.)  */
00087   REG_NOMATCH,          /* No match. */
00088   REG_BADPAT,           /* Invalid regexp. */
00089   REG_ECOLLATE,         /* Unknown collating element. */
00090   REG_ECTYPE,           /* Unknown character klass name. */
00091   REG_EESCAPE,          /* Trailing backslash. */
00092   REG_ESUBREG,          /* Invalid back reference. */
00093   REG_EBRACK,           /* "[]" imbalance */
00094   REG_EPAREN,           /* "\(\)" or "()" imbalance */
00095   REG_EBRACE,           /* "\{\}" or "{}" imbalance */
00096   REG_BADBR,            /* Invalid content of {} */
00097   REG_ERANGE,           /* Invalid use of range operator */
00098   REG_ESPACE,           /* Out of memory.  */
00099   REG_BADRPT
00100 } reg_errcode_t;
00101 
00102 /* POSIX regcomp() flags. */
00103 #define REG_EXTENDED    1
00104 #define REG_ICASE       (REG_EXTENDED << 1)
00105 #define REG_NEWLINE     (REG_ICASE << 1)
00106 #define REG_NOSUB       (REG_NEWLINE << 1)
00107 
00108 /* Extra regcomp() flags. */
00109 #define REG_BASIC       0
00110 #define REG_LITERAL     (REG_NOSUB << 1)
00111 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
00112 
00113 /* POSIX regexec() flags. */
00114 #define REG_NOTBOL 1
00115 #define REG_NOTEOL (REG_NOTBOL << 1)
00116 
00117 /* Extra regexec() flags. */
00118 #define REG_APPROX_MATCHER       (REG_NOTEOL << 1)
00119 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
00120 
00121 #endif /* !TRE_USE_SYSTEM_REGEX_H */
00122 
00123 /* REG_NOSPEC and REG_LITERAL mean the same thing. */
00124 #ifdef REG_LITERAL
00125 #define REG_NOSPEC      REG_LITERAL
00126 #elif defined(REG_NOSPEC)
00127 #define REG_LITERAL     REG_NOSPEC
00128 #endif /* defined(REG_NOSPEC) */
00129 
00130 /* The maximum number of iterations in a bound expression. */
00131 #undef RE_DUP_MAX
00132 #define RE_DUP_MAX 255
00133 
00134 /* The POSIX.2 regexp functions */
00135 TRE_EXTERN int regcomp(regex_t *preg, const char *regex, int cflags);
00136 TRE_EXTERN int regexec(const regex_t *preg, const char *string, size_t nmatch,
00137             regmatch_t pmatch[], int eflags);
00138 TRE_EXTERN size_t regerror(int errcode, const regex_t *preg, char *errbuf,
00139                 size_t errbuf_size);
00140 TRE_EXTERN void regfree(regex_t *preg);
00141 
00142 #ifdef TRE_WCHAR
00143 #ifdef HAVE_WCHAR_H
00144 #include <wchar.h>
00145 #endif /* HAVE_WCHAR_H */
00146 
00147 /* Wide character versions (not in POSIX.2). */
00148 int regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
00149 int regwexec(const regex_t *preg, const wchar_t *string, size_t nmatch,
00150              regmatch_t pmatch[], int eflags);
00151 #endif /* TRE_WCHAR */
00152 
00153 /* Versions with a maximum length argument and therefore the capability to
00154    handle null characters in the middle of the strings (not in POSIX.2). */
00155 TRE_EXTERN int regncomp(regex_t *preg, const char *regex, size_t len,
00156   int cflags);
00157 TRE_EXTERN int regnexec(const regex_t *preg, const char *string, size_t len,
00158              size_t nmatch, regmatch_t pmatch[], int eflags);
00159 #ifdef TRE_WCHAR
00160 TRE_EXTERN int regwncomp(regex_t *preg, const wchar_t *regex, size_t len,
00161   int cflags);
00162 TRE_EXTERN int regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
00163               size_t nmatch, regmatch_t pmatch[], int eflags);
00164 #endif /* TRE_WCHAR */
00165 
00166 #ifdef TRE_APPROX
00167 
00168 /* Approximate matching parameter struct. */
00169 typedef struct {
00170   int cost_ins;        /* Default cost of an inserted character. */
00171   int cost_del;        /* Default cost of a deleted character. */
00172   int cost_subst;      /* Default cost of a substituted character. */
00173   int max_cost;        /* Maximum allowed cost of a match. */
00174 
00175   int max_ins;         /* Maximum allowed number of inserts. */
00176   int max_del;         /* Maximum allowed number of deletes. */
00177   int max_subst;       /* Maximum allowed number of substitutes. */
00178   int max_err;         /* Maximum allowed number of errors total. */
00179 } regaparams_t;
00180 
00181 /* Approximate matching result struct. */
00182 typedef struct {
00183   size_t nmatch;       /* Length of pmatch[] array. */
00184   regmatch_t *pmatch;  /* Submatch data. */
00185   int cost;            /* Cost of the match. */
00186   int num_ins;         /* Number of inserts in the match. */
00187   int num_del;         /* Number of deletes in the match. */
00188   int num_subst;       /* Number of substitutes in the match. */
00189 } regamatch_t;
00190 
00191 
00192 /* Approximate matching functions. */
00193 int regaexec(const regex_t *preg, const char *string,
00194              regamatch_t *match, regaparams_t params, int eflags);
00195 int reganexec(const regex_t *preg, const char *string, size_t len,
00196               regamatch_t *match, regaparams_t params, int eflags);
00197 #ifdef TRE_WCHAR
00198 /* Wide character approximate matching. */
00199 int regawexec(const regex_t *preg, const wchar_t *string,
00200               regamatch_t *match, regaparams_t params, int eflags);
00201 int regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
00202                regamatch_t *match, regaparams_t params, int eflags);
00203 #endif /* TRE_WCHAR */
00204 
00205 /* Sets the parameters to default values. */
00206 void regaparams_default(regaparams_t *params);
00207 #endif /* TRE_APPROX */
00208 
00209 #ifdef TRE_WCHAR
00210 typedef wchar_t tre_char_t;
00211 #else /* !TRE_WCHAR */
00212 typedef unsigned char tre_char_t;
00213 #endif /* !TRE_WCHAR */
00214 
00215 typedef struct {
00216   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
00217   void (*rewind)(size_t pos, void *context);
00218   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
00219   void *context;
00220 } tre_str_source;
00221 
00222 int reguexec(const regex_t *preg, const tre_str_source *string,
00223              size_t nmatch, regmatch_t pmatch[], int eflags);
00224 
00225 /* Returns the version string.  The returned string is static. */
00226 TRE_EXTERN char *tre_version(void);
00227 
00228 /* Returns the value for a config parameter.  The type to which `result'
00229    must point to depends of the value of `query', see documentation for
00230    more details. */
00231 TRE_EXTERN int tre_config(int query, void *result);
00232 
00233 enum {
00234   TRE_CONFIG_APPROX,
00235   TRE_CONFIG_WCHAR,
00236   TRE_CONFIG_MULTIBYTE,
00237   TRE_CONFIG_SYSTEM_ABI,
00238   TRE_CONFIG_VERSION
00239 };
00240 
00241 /* Returns 1 if the compiled pattern has back references, 0 if not. */
00242 TRE_EXTERN int tre_have_backrefs(const regex_t *preg);
00243 
00244 /* Returns 1 if the compiled pattern uses approximate matching features,
00245    0 if not. */
00246 TRE_EXTERN int tre_have_approx(const regex_t *preg);
00247 
00248 #ifdef __cplusplus
00249 }
00250 #endif
00251 #endif                          /* TRE_REGEX_H */
00252 
00253 /* EOF */

Generated on Thu May 29 14:36:12 2008 for Felix by  doxygen 1.5.5