tre_ast.hpp

00001 #line 1598 "./lpsrc/tre.pak"
00002 /*
00003   tre-ast.h - Abstract syntax tree (AST) definitions
00004 
00005   Copyright (C) 2001-2004 Ville Laurikari <vl@iki.fi>
00006 
00007   This program is free software; you can redistribute it and/or modify
00008   it under the terms of the GNU General Public License version 2 (June
00009   1991) as published by the Free Software Foundation.
00010 
00011   This program is distributed in the hope that it will be useful,
00012   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014   GNU General Public License for more details.
00015 
00016   You should have received a copy of the GNU General Public License
00017   along with this program; if not, write to the Free Software
00018   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019 */
00020 
00021 
00022 #ifndef TRE_AST_H
00023 #define TRE_AST_H 1
00024 
00025 #include "tre_mem.hpp"
00026 #include "tre_internal.hpp"
00027 #include "tre_compile.hpp"
00028 
00029 /* The different AST node types. */
00030 typedef enum {
00031   LITERAL,
00032   CATENATION,
00033   ITERATION,
00034   UNION
00035 } tre_ast_type_t;
00036 
00037 /* Special subtypes of TRE_LITERAL. */
00038 #define EMPTY     -1   /* Empty leaf (denotes empty string). */
00039 #define ASSERTION -2   /* Assertion leaf. */
00040 #define TAG       -3   /* Tag leaf. */
00041 #define BACKREF   -4   /* Back reference leaf. */
00042 #define PARAMETER -5   /* Parameter. */
00043 
00044 #define IS_SPECIAL(x)   ((x)->code_min < 0)
00045 #define IS_EMPTY(x)     ((x)->code_min == EMPTY)
00046 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
00047 #define IS_TAG(x)       ((x)->code_min == TAG)
00048 #define IS_BACKREF(x)   ((x)->code_min == BACKREF)
00049 #define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
00050 
00051 
00052 /* A generic AST node.  All AST nodes consist of this node on the top
00053    level with `obj' pointing to the actual content. */
00054 typedef struct {
00055   tre_ast_type_t type;   /* Type of the node. */
00056   void *obj;             /* Pointer to actual node. */
00057   int nullable;
00058   int submatch_id;
00059   int num_submatches;
00060   int num_tags;
00061   tre_pos_and_tags_t *firstpos;
00062   tre_pos_and_tags_t *lastpos;
00063 } tre_ast_node_t;
00064 
00065 
00066 /* A "literal" node.  These are created for assertions, back references,
00067    tags, matching parameter settings, and all expressions that match one
00068    character. */
00069 typedef struct {
00070   long code_min;
00071   long code_max;
00072   int position;
00073   union {
00074     tre_ctype_t klass;
00075     unsigned int *params;
00076   } u;
00077   tre_ctype_t *neg_klasses;
00078 } tre_literal_t;
00079 
00080 /* A "catenation" node.  These are created when two regexps are concatenated.
00081    If there are more than one subexpressions in sequence, the `left' part
00082    holds all but the last, and `right' part holds the last subexpression
00083    (catenation is left associative). */
00084 typedef struct {
00085   tre_ast_node_t *left;
00086   tre_ast_node_t *right;
00087 } tre_catenation_t;
00088 
00089 /* An "iteration" node.  These are created for the "*", "+", "?", and "{m,n}"
00090    operators. */
00091 typedef struct {
00092   /* Subexpression to match. */
00093   tre_ast_node_t *arg;
00094   /* Minimum number of consecutive matches. */
00095   int min;
00096   /* Maximum number of consecutive matches. */
00097   int max;
00098   /* If 0, match as many characters as possible, if 1 match as few as
00099      possible.  Note that this does not always mean the same thing as
00100      matching as many/few repetitions as possible. */
00101   unsigned int minimal:1;
00102   /* Approximate matching parameters (or NULL). */
00103   unsigned int *params;
00104 } tre_iteration_t;
00105 
00106 /* An "union" node.  These are created for the "|" operator. */
00107 typedef struct {
00108   tre_ast_node_t *left;
00109   tre_ast_node_t *right;
00110 } tre_union_t;
00111 
00112 tre_ast_node_t *
00113 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
00114 
00115 tre_ast_node_t *
00116 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
00117 
00118 tre_ast_node_t *
00119 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
00120                  int minimal);
00121 
00122 tre_ast_node_t *
00123 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
00124 
00125 tre_ast_node_t *
00126 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
00127                        tre_ast_node_t *right);
00128 
00129 #ifdef TRE_DEBUG
00130 void
00131 tre_ast_print(tre_ast_node_t *tree);
00132 
00133 /* XXX - rethink AST printing API */
00134 void
00135 tre_print_params(int *params);
00136 #endif /* TRE_DEBUG */
00137 
00138 #endif /* TRE_AST_H */
00139 
00140 /* EOF */

Generated on Mon Dec 11 17:13:32 2006 for Felix by  doxygen 1.5.1