tre_match-utils.hpp

00001 #line 6546 "./lpsrc/tre.pak"
00002 
00003 /*
00004   tre-match-utils.h - TRE matcher helper definitions
00005 
00006   Copyright (C) 2001-2004 Ville Laurikari <vl@iki.fi>.
00007 
00008   This program is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License version 2 (June
00010   1991) as published by the Free Software Foundation.
00011 
00012   This program is distributed in the hope that it will be useful,
00013   but WITHOUT ANY WARRANTY; without even the implied warranty of
00014   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015   GNU General Public License for more details.
00016 
00017   You should have received a copy of the GNU General Public License
00018   along with this program; if not, write to the Free Software
00019   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 
00021 */
00022 
00023 #define str_source ((tre_str_source*)string)
00024 
00025 #ifdef TRE_WCHAR
00026 
00027 #ifdef TRE_MULTIBYTE
00028 
00029 /* Wide character and multibyte support. */
00030 
00031 #define GET_NEXT_WCHAR()                                                      \
00032   do {                                                                        \
00033     prev_c = next_c;                                                          \
00034     if (type == STR_BYTE)                                                     \
00035       {                                                                       \
00036         pos++;                                                                \
00037         if (len >= 0 && pos >= len)                                           \
00038           next_c = '\0';                                                      \
00039         else                                                                  \
00040           next_c = (unsigned char)(*str_byte++);                              \
00041       }                                                                       \
00042     else if (type == STR_WIDE)                                                \
00043       {                                                                       \
00044         pos++;                                                                \
00045         if (len >= 0 && pos >= len)                                           \
00046           next_c = L'\0';                                                     \
00047         else                                                                  \
00048           next_c = *str_wide++;                                               \
00049       }                                                                       \
00050     else if (type == STR_MBS)                                                 \
00051       {                                                                       \
00052         pos += pos_add_next;                                                  \
00053         if (str_byte == NULL)                                                 \
00054           next_c = L'\0';                                                     \
00055         else                                                                  \
00056           {                                                                   \
00057             size_t w;                                                         \
00058             int max;                                                          \
00059             if (len >= 0)                                                     \
00060               max = len - pos;                                                \
00061             else                                                              \
00062               max = 32;                                                       \
00063             if (max <= 0)                                                     \
00064               {                                                               \
00065                 next_c = L'\0';                                               \
00066                 pos_add_next = 1;                                             \
00067               }                                                               \
00068             else                                                              \
00069               {                                                               \
00070                 w = tre_mbrtowc(&next_c, str_byte, max, &mbstate);            \
00071                 if (w == (size_t)-1 || w == (size_t)-2)                       \
00072                   return REG_NOMATCH;                                         \
00073                 if (w == 0 && len >= 0)                                       \
00074                   {                                                           \
00075                     pos_add_next = 1;                                         \
00076                     next_c = 0;                                               \
00077                     str_byte++;                                               \
00078                   }                                                           \
00079                 else                                                          \
00080                   {                                                           \
00081                     pos_add_next = w;                                         \
00082                     str_byte += w;                                            \
00083                   }                                                           \
00084               }                                                               \
00085           }                                                                   \
00086       }                                                                       \
00087     else if (type == STR_USER)                                                \
00088       {                                                                       \
00089         pos += pos_add_next;                                                  \
00090         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
00091                                                  str_source->context);        \
00092       }                                                                       \
00093   } while(0)
00094 
00095 #else /* !TRE_MULTIBYTE */
00096 
00097 /* Wide character support, no multibyte support. */
00098 
00099 #define GET_NEXT_WCHAR()                                                      \
00100   do {                                                                        \
00101     prev_c = next_c;                                                          \
00102     if (type == STR_BYTE)                                                     \
00103       {                                                                       \
00104         pos++;                                                                \
00105         if (len >= 0 && pos >= len)                                           \
00106           next_c = '\0';                                                      \
00107         else                                                                  \
00108           next_c = (unsigned char)(*str_byte++);                              \
00109       }                                                                       \
00110     else if (type == STR_WIDE)                                                \
00111       {                                                                       \
00112         pos++;                                                                \
00113         if (len >= 0 && pos >= len)                                           \
00114           next_c = L'\0';                                                     \
00115         else                                                                  \
00116           next_c = *str_wide++;                                               \
00117       }                                                                       \
00118     else if (type == STR_USER)                                                \
00119       {                                                                       \
00120         pos += pos_add_next;                                                  \
00121         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
00122                                                  str_source->context);        \
00123       }                                                                       \
00124   } while(0)
00125 
00126 #endif /* !TRE_MULTIBYTE */
00127 
00128 #else /* !TRE_WCHAR */
00129 
00130 /* No wide character or multibyte support. */
00131 
00132 #define GET_NEXT_WCHAR()                                                      \
00133   do {                                                                        \
00134     prev_c = next_c;                                                          \
00135     if (type == STR_BYTE)                                                     \
00136       {                                                                       \
00137         pos++;                                                                \
00138         if (len >= 0 && pos >= len)                                           \
00139           next_c = '\0';                                                      \
00140         else                                                                  \
00141           next_c = (unsigned char)(*str_byte++);                              \
00142       }                                                                       \
00143     else if (type == STR_USER)                                                \
00144       {                                                                       \
00145         pos += pos_add_next;                                                  \
00146         str_user_end = str_source->get_next_char(&next_c, &pos_add_next,      \
00147                                                  str_source->context);        \
00148       }                                                                       \
00149   } while(0)
00150 
00151 #endif /* !TRE_WCHAR */
00152 
00153 
00154 
00155 #define IS_WORD_CHAR(c)  ((c) == L'_' || tre_isalnum(c))
00156 
00157 #define CHECK_ASSERTIONS(assertions)                                          \
00158   (((assertions & ASSERT_AT_BOL)                                              \
00159     && (pos > 0 || reg_notbol)                                                \
00160     && (prev_c != L'\n' || !reg_newline))                                     \
00161    || ((assertions & ASSERT_AT_EOL)                                           \
00162        && (next_c != L'\0' || reg_noteol)                                     \
00163        && (next_c != L'\n' || !reg_newline))                                  \
00164    || ((assertions & ASSERT_AT_BOW)                                           \
00165        && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))))       \
00166    || ((assertions & ASSERT_AT_EOW)                                           \
00167        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                    \
00168    || ((assertions & ASSERT_AT_WB)                                            \
00169        && (pos != 0 && next_c != L'\0'                                        \
00170            && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
00171    || ((assertions & ASSERT_AT_WB_NEG)                                        \
00172        && (pos == 0 || next_c == L'\0'                                        \
00173            || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
00174 
00175 
00176 
00177 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
00178 inline static int
00179 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
00180               int *t1, int *t2)
00181 {
00182   int i;
00183   for (i = 0; i < num_tags; i++)
00184     {
00185       if (tag_directions[i] == TRE_TAG_MINIMIZE)
00186         {
00187           if (t1[i] < t2[i])
00188             return 1;
00189           if (t1[i] > t2[i])
00190             return 0;
00191         }
00192       else
00193         {
00194           if (t1[i] > t2[i])
00195             return 1;
00196           if (t1[i] < t2[i])
00197             return 0;
00198         }
00199     }
00200   /*  assert(0);*/
00201   return 0;
00202 }
00203 
00204 inline static int
00205 tre_neg_char_klasses_match(tre_ctype_t *klasses, tre_cint_t wc, int icase)
00206 {
00207   DPRINT(("neg_char_klasses_test: %p, %d, %d\n", klasses, wc, icase));
00208   while (*klasses != (tre_ctype_t)0)
00209     if ((!icase && tre_isctype(wc, *klasses))
00210         || (icase && (tre_isctype(tre_toupper(wc), *klasses)
00211                       || tre_isctype(tre_tolower(wc), *klasses))))
00212       return 1; /* Match. */
00213     else
00214       klasses++;
00215   return 0; /* No match. */
00216 }

Generated on Fri Dec 19 05:34:06 2008 for Felix by  doxygen 1.5.7.1