1. elkhound
Start data section to licences/elk_licence.txt[1
/1
]
1: The elkhound software
2: Copyright (c) 2002, Regents of the University of California
3: All rights reserved.
4:
5: Redistribution and use in source and binary forms, with or without
6: modification, are permitted provided that the following conditions are
7: met:
8:
9: * Redistributions of source code must retain the above copyright
10: notice, this list of conditions and the following disclaimer.
11:
12: * Redistributions in binary form must reproduce the above
13: copyright notice, this list of conditions and the following
14: disclaimer in the documentation and/or other materials provided
15: with the distribution.
16:
17: * Neither the name of the University of California, Berkeley nor
18: the names of its contributors may be used to endorse or promote
19: products derived from this software without specific prior
20: written permission.
21:
22: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33:
Start python section to spkgs/elk.py[1
/1
]
1: #line 41 "./lpsrc/elk.pak"
2:
3:
4:
5: SMBASE = [
6: ]
7:
8:
9: SMRTL = [
10: 'elk/sm_malloc_stub',
11: 'elk/sm_nonport',
12: 'elk/sm_autofile',
13: 'elk/sm_bflatten',
14: 'elk/sm_bit2d',
15: 'elk/sm_bitarray',
16: 'elk/sm_boxprint',
17: 'elk/sm_breaker',
18: 'elk/sm_crc',
19: 'elk/sm_datablok',
20: 'elk/sm_flatten',
21: 'elk/sm_growbuf',
22: 'elk/sm_gprintf',
23: 'elk/sm_hashline',
24: 'elk/sm_hashtbl',
25: 'elk/sm_missing',
26: 'elk/sm_point',
27: 'elk/sm_pprint',
28: 'elk/sm_strdict',
29: 'elk/sm_strhash',
30: 'elk/sm_stringset',
31: 'elk/sm_strtokp',
32: 'elk/sm_strutil',
33: 'elk/sm_svdict',
34: 'elk/sm_vdtllist',
35: 'elk/sm_vptrmap',
36: 'elk/sm_warn',
37: 'elk/sm_srcloc',
38: 'elk/sm_syserr',
39: 'elk/sm_str',
40: 'elk/sm_trace',
41: 'elk/sm_trdelete',
42: 'elk/sm_voidlist',
43: 'elk/sm_exc',
44: ]
45:
46: ASTGEN = [
47: 'elk/ast_gramlex',
48: 'elk/ast_ccsstr',
49: 'elk/ast_reporterr',
50: 'elk/ast_embedded',
51: 'elk/ast_asthelp',
52: 'elk/ast_strtable',
53: 'elk/ast_locstr',
54: ]
55:
56: ELKHOUND = [
57: 'elk/elk_asockind',
58: 'elk/elk_grammar',
59: 'elk/elk_emitcode',
60: 'elk/elk_mlsstr',
61: 'elk/elk_genml',
62: 'elk/elk_gramast.ast.gen',
63: 'elk/elk_gramlex.yy',
64: 'elk/elk_grampar',
65: 'elk/elk_grampar.tab',
66: 'elk/elk_gramexpl',
67: ]
68:
69: ELKRTL = [
70: 'elk/elk_glr',
71: 'elk/elk_parsetables',
72: 'elk/elk_useract',
73: 'elk/elk_ptreenode',
74: 'elk/elk_ptreeact',
75: ]
76:
77: ELKRTL_INTERFACES = [
78: 'elk/sm_array.h',
79: 'elk/sm_objpool.h',
80: 'elk/sm_sobjlist.h',
81: 'elk/sm_trdelete.h',
82: 'elk/sm_voidlist.h',
83: 'elk/sm_macros.h',
84: 'elk/sm_srcloc.h',
85: 'elk/sm_typ.h',
86: 'elk/sm_xassert.h',
87: 'elk/sm_objlist.h',
88: 'elk/sm_str.h',
89: 'elk/elk_lexerint.h',
90: 'elk/elk_glrconfig.h',
91: 'elk/elk_parsetables.h',
92: 'elk/elk_glr.h',
93: 'elk/elk_rcptr.h',
94: 'elk/elk_useract.h',
95: ]
96:
97: host_cpp_cpps = SMRTL+ASTGEN+ELKHOUND+ELKRTL
98: host_exes = [("elk/elk_gramanl","bin/flx_elkhound")]
99: host_exes_require_libs = ["elk/libelk_host_static"]
100:
101: rtl_interfaces = ELKRTL_INTERFACES
102: cpp_cpps = ELKRTL + SMRTL
103: iscr_source = ['lpsrc/sm.pak','lpsrc/ast.pak','lpsrc/elk.pak']
104: build_macro = "ELK"
105: weaver_directory = "doc/elkhound/"
106:
Start data section to config/elk.fpc[1
/1
]
1: Name: elk
2: Description: Elhound
3: Version: 1
4: provides_dlib: -lelk_dynamic
5: provides_slib: -lelk_static
Start cpp section to rtl/flx_target_elk_config.hpp[1
/1
]
1: #line 160 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
Start cpp section to rtl/flx_host_elk_config.hpp[1
/1
]
1: #line 172 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
Start cpp section to rtl/flx_elk_config.hpp[1
/1
]
1: #line 184 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
Start cpp section to elk/elk_gramexpl.cpp[1
/1
]
1: #line 197 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12: void grammarExplorer(GrammarAnalysis &g)
13: {
14: std::cout << "exploring the grammar:\n";
15:
16: #if 0
17: for (;;) {
18: std::cout << "commands:\n"
19: " terminals\n"
20: " nonterminals\n"
21: " productions <nonterm-id>\n"
22: " state <state-id>\n"
23: " suppress-except <term-id> (-1 to disable)\n"
24: " reach <state-id>\n"
25: " track-la <state-id> <prod-id> <term-id>\n"
26: " quit\n";
27: std::cout << "command> ";
28: std::cout.flush();
29:
30: char buf[80];
31: cin >> buf;
32: if (cin.eof()) break;
33:
34: StrtokParse tok(buf, " \n\t");
35: if (tok == 0) continue;
36:
37: try {
38: if (0==strcmp(tok[0], "terminals")) {
39: for (int i=0; i < g.numTerminals(); i++) {
40: Terminal const *t = g.getTerminal(i);
41: t->print(std::cout);
42: }
43: }
44:
45: else if (0==strcmp(tok[0], "nonterminals")) {
46: for (int i=0; i < g.numNonterminals(); i++) {
47: Nonterminal const *nt = g.getNonterminal(i);
48: nt->print(std::cout);
49: }
50: }
51:
52: else if (0==strcmp(tok[0], "productions")) {
53: int id = atoi(tok[1]);
54: Nonterminal const *nt = g.getNonterminal(i);
55: int ct=0;
56: FOREACH_PRODUCTION(g.productions, iter) {
57: if (iter.data()->left == nt) {
58: std::cout << "[" << ct << "] ";
59: iter.data()->print(std::cout);
60: }
61: ct++;
62: }
63: }
64:
65: else if (0==strcmp(tok[0], "state")) {
66: ItemSet const *is = g.getItemSet(atoi(tok[1]));
67: is->print(std::cout, g);
68: }
69:
70: else if (0==strcmp(tok[0], "suppress-except")) {
71: int id = atoi(tok[1]);
72: Terminal const *t = (id==-1? NULL : g.getTerminal(atoi(tok[1])));
73: DottedProduction::lookaheadSuppressExcept = t;
74: if (t) {
75: std::cout << "suppressing " << t->name << std::endl;
76: }
77: else {
78: std::cout << "suppressing nothing\n";
79: }
80: }
81:
82: else if (0==strcmp(tok[0], "reach")) {
83: int targetId = atoi(tok[1]);
84:
85:
86: for (int i=0; i < g.numItemSets(); i++) {
87: ItemSet const *set = g.getItemSet(i);
88:
89:
90: for (int termId=0; termId < g.numTerminals(); termId++) {
91: ItemSet const *dest = set->transitionC(g.getTerminal(termId));
92: if (dest && dest->id == targetId) {
93: dest->print(std::cout, g);
94: }
95: }
96: for (int nontermId=0; nontermId < g.numNonterminals(); nontermId++) {
97: ItemSet const *dest = set->transitionC(g.getNonterminal(nontermId));
98: if (dest && dest->id == targetId) {
99: dest->print(std::cout, g);
100: }
101: }
102: }
103: }
104:
105: else if (0==strcmp(tok[0], "track-la")) {
106: int stateId = atoi(tok[1]);
107: ItemSet const *set = g.getItemSet(stateId);
108:
109: int prodId = atoi(tok[2]);
110: Production const *prod = g.productions.nth(prodId);
111:
112: int termId = atoi(tok[3]);
113: Terminal const *term = g.getTerminal(termId);
114:
115:
116:
117:
118:
119:
120:
121:
122: }
123: else if (0==strcmp(tok[0], "quit")) {
124: }
125: else {
126: std::cout << "unknown command: " << tok[0] << std::endl;
127: }
128: }
129: catch (xArrayBounds &) {
130: std::cout << "too few arguments to " << tok[0] << std::endl;
131: }
132:
133:
134:
135:
136:
137:
138:
139:
140: #endif
141:
142: }
143:
144:
Start C section to elk/elk_asockind.h[1
/1
]
1: #line 342 "./lpsrc/elk.pak"
2: // asockind.h see license.txt for copyright and terms of use
3: // AssocKind; pulled out on its own so I don't have dependency problems
4:
5: #ifndef ASOCKIND_H
6: #define ASOCKIND_H
7:
8: #include "sm_str.h"
9:
10: // specifies what to do when there is a shift/reduce conflict, and
11: // the production and token have the same precedence; this is attached
12: // to the token
13: enum AssocKind {
14: AK_LEFT, // disambiguate by reducing
15: AK_RIGHT, // disambiguate by shifting
16: AK_NONASSOC, // make it a parse-time syntax error
17: AK_NEVERASSOC, // make it a parsgen-time specification error
18: AK_SPLIT, // (GLR-specific) fork the parser
19:
20: NUM_ASSOC_KINDS
21: };
22:
23: sm_string toString(AssocKind k);
24:
25: #endif // ASOCKIND_H
26:
Start C section to elk/elk_emitcode.h[1
/1
]
1: #line 369 "./lpsrc/elk.pak"
2: // emitcode.h see license.txt for copyright and terms of use
3: // track state of emitted code so I can emit #line too
4:
5: #ifndef EMITCODE_H
6: #define EMITCODE_H
7:
8: #include <fstream> // std::ofstream
9: #include "sm_str.h"
10: #include "sm_srcloc.h"
11:
12: class EmitCode : public sm_stringBuilder {
13: private: // data
14: std::ofstream os; // stream to write to
15: sm_string fname; // filename for emitting #line
16: int line; // current line number
17:
18: public: // funcs
19: EmitCode(char const *fname);
20: ~EmitCode();
21:
22: sm_string const &getFname() const { return fname; }
23:
24: // get current line number; flushes internally
25: int getLine();
26:
27: // flush data in sm_stringBuffer to 'os'
28: void flush();
29: };
30:
31:
32: // return a #line directive for the given location
33: sm_string lineDirective(SourceLoc loc);
34:
35: // emit a #line directive to restore reporting to the
36: // EmitCode file itself (the 'sb' argument must be an EmitFile object)
37: sm_stringBuilder &restoreLine(sm_stringBuilder &sb);
38:
39:
40: #endif // EMITCODE_H
Start C section to elk/elk_flatutil.h[1
/1
]
1: #line 410 "./lpsrc/elk.pak"
2: // flatutil.h see license.txt for copyright and terms of use
3: // flatten helpers
4:
5: #ifndef FLATUTIL_H
6: #define FLATUTIL_H
7:
8: #include "sm_flatten.h"
9: #include "sm_objlist.h"
10: #include "sm_sobjlist.h"
11:
12:
13: // ------------- xfer of owners -----------------
14: template <class T>
15: void xferOwnerPtr(Flatten &flat, T *&ptr)
16: {
17: if (flat.reading()) {
18: // construct a new, empty object
19: ptr = new T(flat);
20: }
21:
22: // read/write it
23: ptr->xfer(flat);
24:
25: // note it so we can have serfs to it
26: flat.noteOwner(ptr);
27: }
28:
29:
30: template <class T>
31: void xferOwnerPtr_readObj(Flatten &flat, T *&ptr)
32: {
33: if (flat.reading()) {
34: // construct a new object, *and* read it from file
35: ptr = T::readObj(flat);
36: }
37: else {
38: // write it
39: ptr->xfer(flat);
40: }
41:
42: // note it so we can have serfs to it
43: flat.noteOwner(ptr);
44: }
45:
46:
47: template <class T>
48: void xferObjList(Flatten &flat, ObjList <T> &list)
49: {
50: if (flat.writing()) {
51: flat.writeInt(list.count());
52:
53: MUTATE_EACH_OBJLIST(T, list, iter) {
54: iter.data()->xfer(flat);
55: flat.noteOwner(iter.data());
56: }
57: }
58: else {
59: int listLen = flat.readInt();
60:
61: ObjListMutator<T> mut(list);
62: while (listLen--) {
63: // construct a new, empty object
64: T *obj = new T(flat);
65:
66: // read it
67: obj->xfer(flat);
68: flat.noteOwner(obj);
69:
70: // add it to the list
71: mut.append(obj);
72: }
73: }
74: }
75:
76:
77: // for things like AExprNode which have a readObj
78: // static method .. it's possible to merge this with
79: // the above code, but I'm not sure that's a good idea yet
80: template <class T>
81: void xferObjList_readObj(Flatten &flat, ObjList <T> &list)
82: {
83: if (flat.writing()) {
84: flat.writeInt(list.count());
85:
86: MUTATE_EACH_OBJLIST(T, list, iter) {
87: iter.data()->xfer(flat);
88: flat.noteOwner(iter.data());
89: }
90: }
91: else {
92: int listLen = flat.readInt();
93:
94: ObjListMutator<T> mut(list);
95: while (listLen--) {
96: // construct a new object, *and* read its
97: // contents from the file
98: T *obj = T::readObj(flat);
99: flat.noteOwner(obj);
100:
101: // add it to the list
102: mut.append(obj);
103: }
104: }
105: }
106:
107:
108: // ------------- xfer of serfs -----------------
109: // xfer a list of serf pointers to objects, each object
110: // could be in one of several owner lists
111: template <class T>
112: void xferSObjList_multi(Flatten &flat, SObjList<T> &list,
113: ObjList<T> **masterLists, int numMasters)
114: {
115: // be sure the same number of master lists are used at
116: // read and write time
117: flat.checkpoint(numMasters);
118:
119: if (flat.writing()) {
120: flat.writeInt(list.count());
121:
122: SMUTATE_EACH_OBJLIST(T, list, iter) {
123: // determine which master list it's in
124: int master;
125: for (master = 0; master<numMasters; master++) {
126: int index = masterLists[master]->indexOf(iter.data());
127: if (index != -1) {
128: // we found it -- encode the list and its index
129: if (numMasters > 1) {
130: flat.writeInt(master); // only do this if multiple masters
131: }
132: flat.writeInt(index);
133: break;
134: }
135: }
136:
137: if (master == numMasters) {
138: // failed to find the master list
139: xfailure("xferSObjList_multi: obj not in any of the lists");
140: }
141: }
142: }
143:
144: else {
145: int listLen = flat.readInt();
146:
147: SObjListMutator<T> mut(list);
148: while (listLen--) {
149: int master = 0; // assume just 1 master
150: if (numMasters > 1) {
151: master = flat.readInt(); // then refine
152: }
153:
154: mut.append(masterLists[master]->nth(flat.readInt()));
155: }
156: }
157: }
158:
159:
160: // xfer a list of serf pointers to objects owner by 'masterList'
161: template <class T>
162: void xferSObjList(Flatten &flat, SObjList<T> &list, ObjList<T> &masterList)
163: {
164: ObjList<T> *ptr = &masterList;
165: xferSObjList_multi(flat, list, &ptr, 1 /*numMasters*/);
166: }
167:
168:
169: // xfer a pointer which points to something in a master list
170: template <class T>
171: void xferSerfPtrToList(Flatten &flat, T *&ptr, ObjList<T> &masterList)
172: {
173: if (flat.writing()) {
174: flat.writeInt(masterList.indexOfF(ptr));
175: }
176: else {
177: ptr = masterList.nth(flat.readInt());
178: }
179: }
180:
181:
182: template <class T>
183: void xferNullableSerfPtrToList(Flatten &flat, T *&ptr, ObjList<T> &masterList)
184: {
185: if (flat.writing()) {
186: flat.writeInt(masterList.indexOf(ptr));
187: }
188: else {
189: int index = flat.readInt();
190: if (index >= 0) {
191: ptr = masterList.nth(index);
192: }
193: else {
194: ptr = NULL;
195: }
196: }
197: }
198:
199:
200: template <class T>
201: void computedValue(Flatten &flat, T &variable, T value)
202: {
203: if (flat.writing()) {
204: // check it
205: xassert(variable == value);
206: }
207: else {
208: // set it
209: variable = value;
210: }
211: }
212:
213:
214: // void* implementation
215: //#define Leaf void
216: //#define Root void
217: //#define FirstLevel void
218: template <class Root, class FirstLevel, class Leaf>
219: void xferSerfPtr_twoLevelAccess(
220: Flatten &flat,
221: Leaf *&leaf,
222: Root *root,
223: FirstLevel* (*getNthFirst)(Root *r, int n),
224: Leaf* (*getNthLeaf)(FirstLevel *f, int n))
225: {
226: if (flat.writing()) {
227: // determine both indices
228: for (int index1=0; ; index1++) {
229: // get a first-level obj
230: FirstLevel *first = getNthFirst(root, index1);
231: if (!first) {
232: // exhausted first-level objs
233: xfailure("xferSerfPtr_twoLevelAccess: couldn't find obj to xfer");
234: }
235:
236: // look for the leaf inside it
237: for (int index2=0; ; index2++) {
238: Leaf *second = getNthLeaf(first, index2);
239: if (second == leaf) {
240: // found it; encode both indices
241: flat.writeInt(index1);
242: flat.writeInt(index2);
243: return;
244: }
245: if (second == NULL) {
246: // exhausted this subtree
247: break;
248: }
249: } // end of iter over leaves
250: } // end of iter over first-lvl objs
251: }
252:
253: else /*reading*/ {
254: // read both indicies
255: int index1 = flat.readInt();
256: int index2 = flat.readInt();
257:
258: // follow the access path
259: FirstLevel *first = getNthFirst(root, index1);
260: formatAssert(first != NULL);
261: Leaf *second = getNthLeaf(first, index2);
262: formatAssert(second != NULL);
263:
264: // found it
265: leaf = second;
266: }
267: }
268: //#undef Leaf
269: //#undef Root
270: //#undef FirstLevel
271:
272:
273: #if 0
274: typedef void *accessFunc_void(void *parent, int childNum);
275:
276: // typesafe interface
277: template <class Root, class FirstLevel, class Leaf>
278: inline void xferSerfPtr_twoLevelAccess(
279: Flatten &flat,
280: Leaf *&leaf,
281: Root *root,
282: FirstLevel* (*getNthFirst)(Root *r, int n),
283: Leaf* (*getNthLeaf)(FirstLevel *f, int n))
284: {
285: xferSerfPtr_twoLevelAccess(
286: flat,
287: (void*&)leaf,
288: (void*)root,
289: (accessFunc_void)getNthFirst,
290: (accessFunc_void)getNthLeaf);
291: }
292: #endif // 0
293:
294:
295: template <class Root, class FirstLevel, class Leaf>
296: void xferSObjList_twoLevelAccess(
297: Flatten &flat,
298: SObjList<Leaf> &serfList,
299: Root *root,
300: FirstLevel* (*getNthFirst)(Root *r, int n),
301: Leaf* (*getNthLeaf)(FirstLevel *f, int n))
302: {
303: if (flat.writing()) {
304: // length of list
305: flat.writeInt(serfList.count());
306:
307: // iterate over list
308: SMUTATE_EACH_OBJLIST(Leaf, serfList, iter) {
309: // write the obj
310: Leaf *leaf = iter.data();
311: xferSerfPtr_twoLevelAccess(
312: flat, leaf, root,
313: getNthFirst, getNthLeaf);
314: }
315: }
316: else {
317: int length = flat.readInt();
318:
319: SObjListMutator<Leaf> mut(serfList);
320: while (length--) {
321: // read the obj
322: Leaf *leaf;
323: xferSerfPtr_twoLevelAccess(
324: flat, leaf, root,
325: getNthFirst, getNthLeaf);
326:
327: // store it in the list
328: mut.append(leaf);
329: }
330: }
331: }
332:
333:
334: template <class T>
335: void xferSerfPtr(Flatten &flat, T *&serfPtr)
336: {
337: flat.xferSerf((void*&)serfPtr, false /*nullable*/);
338: }
339:
340: template <class T>
341: void xferNullableSerfPtr(Flatten &flat, T *&serfPtr)
342: {
343: flat.xferSerf((void*&)serfPtr, true /*nullable*/);
344: }
345:
346:
347: #endif // FLATUTIL_H
Start C section to elk/elk_genml.h[1
/1
]
1: #line 758 "./lpsrc/elk.pak"
2: // genml.h see license.txt for copyright and terms of use
3: // extension to gramanl module that generates ML instead of C
4:
5: #ifndef GENML_H
6: #define GENML_H
7:
8: class GrammarAnalysis;
9:
10: // entry point
11: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
12: char const *mlFname, char const *srcFname);
13:
14: #endif // GENML_H
Start C section to elk/elk_glrconfig.h[1
/1
]
1: #line 773 "./lpsrc/elk.pak"
2: // glrconfig.h
3: // do not edit; generated by ./configure
4:
5: // glrconfig.h.in see license.txt for copyright and terms of use
6: // compile-time configuration options which affect the generated
7: // GLR parser, and the interface to the user actions
8:
9: #ifndef GLRCONFIG_H
10: #define GLRCONFIG_H
11:
12:
13: // when NO_GLR_SOURCELOC is #defined, we disable all support for
14: // automatically propagating source location information in the
15: // parser; user actions can still refer to 'loc', but they just get
16: // a dummy no-location value
17: #ifndef GLR_SOURCELOC
18: #define GLR_SOURCELOC 1 // set by ./configure
19: #endif
20:
21: #if GLR_SOURCELOC
22: #define SOURCELOC(stuff) stuff
23:
24: // this one adds a leading comma (I can't put that into the
25: // argument <stuff>, because then it looks like the macro is
26: // being passed 2 arguments)
27: #define SOURCELOCARG(stuff) , stuff
28:
29: #define NOSOURCELOC(stuff)
30: #else
31: #define SOURCELOC(stuff)
32: #define SOURCELOCARG(stuff)
33: #define NOSOURCELOC(stuff) stuff
34: #endif
35:
36:
37: // when enabled, NODE_COLUMN tracks in each stack node the
38: // appropriate column to display it for in debugging dump.
39: // in the new RWL core, this is required to always be 1.
40: #ifndef ENABLE_NODE_COLUMNS
41: #define ENABLE_NODE_COLUMNS 1
42: #endif
43: #if ENABLE_NODE_COLUMNS
44: #define NODE_COLUMN(stuff) stuff
45: #else
46: #define NODE_COLUMN(stuff)
47: #endif
48:
49:
50: // when enabled, YIELD_COUNT keeps track of the number of times a
51: // given semantic value is yielded; this is useful for warning the
52: // user when a merge is performed but one of the merged values has
53: // already been yielded to another semantic action, which implies
54: // that the induced parse forest is incomplete
55: #ifndef ENABLE_YIELD_COUNT
56: #define ENABLE_YIELD_COUNT 1
57: #endif
58: #if ENABLE_YIELD_COUNT
59: #define YIELD_COUNT(stuff) stuff
60: #else
61: #define YIELD_COUNT(stuff)
62: #endif
63:
64:
65: // when true, error entries in the action table are extracted into
66: // their own bitmap; this then enables compression on the action
67: // table, since it makes it sparse
68: #ifndef ENABLE_EEF_COMPRESSION
69: #define ENABLE_EEF_COMPRESSION 0
70: #endif
71:
72: // when true, the action and goto tables are compressed using
73: // graph coloring
74: #ifndef ENABLE_GCS_COMPRESSION
75: #define ENABLE_GCS_COMPRESSION 0
76: #endif
77:
78: // when true, action and goto *columns* are merged during GCS;
79: // otherwise, only rows are merged
80: #ifndef ENABLE_GCS_COLUMN_COMPRESSION
81: #define ENABLE_GCS_COLUMN_COMPRESSION 0
82: #endif
83:
84: // when true, entries in the action and goto tables are a
85: // 1-byte index into an appropriate map
86: #ifndef ENABLE_CRS_COMPRESSION
87: #define ENABLE_CRS_COMPRESSION 0
88: #endif
89:
90:
91:
92: #endif // GLRCONFIG_H
Start C section to elk/elk_glr.h[1
/1
]
1: #line 866 "./lpsrc/elk.pak"
2: // glr.h see license.txt for copyright and terms of use
3: // GLR parsing algorithm
4:
5: /*
6: * Author: Scott McPeak, April 2000
7: *
8: * The fundamental concept in Generalized LR (GLR) parsing
9: * is to permit (at least local) ambiguity by "forking" the
10: * parse stack. If the input is actually unambiguous, then
11: * all but one of the forked parsers will, at some point,
12: * fail to shift a symbol, and die. If the input is truly
13: * ambiguous, forked parsers rejoin at some point, and the
14: * parse tree becomes a parse DAG, representing all possible
15: * parses. (In fact, since cyclic grammars are supported,
16: * which can have an infinite number of parse trees for
17: * some inputs, we may end up with a cyclic parse *graph*.)
18: *
19: * In the larger scheme of things, this level of support for
20: * ambiguity is useful because it lets us use simpler and
21: * more intuitive grammars, more sophisticated disambiguation
22: * techniques, and parsing in the presence of incomplete
23: * or incorrect information (e.g. in an editor).
24: *
25: * The downside is that parsing is slower, and whatever tool
26: * processes the parse graph needs to have ways of dealing
27: * with the multiple parse interpretations.
28: *
29: * references:
30: *
31: * [GLR] J. Rekers. Parser Generation for Interactive
32: * Environments. PhD thesis, University of
33: * Amsterdam, 1992. Available by ftp from
34: * ftp://ftp.cwi.nl/pub/gipe/reports/Rek92.ps.Z .
35: * [Contains a good description of the Generalized
36: * LR (GLR) algorithm.]
37: */
38:
39: #ifndef GLR_H
40: #define GLR_H
41:
42: #include "elk_glrconfig.h"
43: #include "elk_parsetables.h"
44: #include "elk_rcptr.h"
45: #include "elk_useract.h"
46: #include "sm_objpool.h"
47: #include "sm_objlist.h"
48: #include "sm_srcloc.h"
49: #include "sm_sobjlist.h"
50:
51: #include <stdio.h> // FILE
52: #include <iostream> // std::ostream
53: #include "flx_elk_config.hpp"
54:
55: // fwds from other files
56: class LexerInterface; // lexerint.h
57:
58: // forward decls for things declared below
59: class StackNode; // unit of parse state
60: class SiblingLink; // connections between stack nodes
61: class PendingShift; // for postponing shifts.. may remove
62: class ELK_EXTERN GLR; // main class for GLR parsing
63:
64:
65: // a pointer from a stacknode to one 'below' it (in the LR
66: // parse stack sense); also has a link to the parse graph
67: // we're constructing
68: class SiblingLink {
69: public:
70: // the stack node being pointed-at; it was created eariler
71: // than the one doing the pointing
72: RCPtr<StackNode> sib;
73:
74: // this is the semantic value associated with this link
75: // (parse tree nodes are *not* associated with stack nodes --
76: // that's now it was originally, but I figured out the hard
77: // way that's wrong (more info in compiler.notes.txt));
78: // this is an *owner* pointer
79: SemanticValue sval;
80:
81: // the source location of the left edge of the subtree rooted
82: // at this stack node; this is in essence part of the semantic
83: // value, but automatically propagated by the parser
84: SOURCELOC( SourceLoc loc; )
85:
86: // number of times this 'sval' has been yielded; this is used
87: // to track cases where we yield a value and then merge it
88: // (which means the induced parse forest is incomplete)
89: YIELD_COUNT( int yieldCount; )
90:
91: // if you add additional fields, they need to be inited in the
92: // constructor *and* in StackNode::addFirstSiblingLink_noRefCt
93:
94: public:
95: SiblingLink(StackNode *s, SemanticValue sv
96: SOURCELOCARG( SourceLoc L ) );
97: ~SiblingLink();
98:
99: #if GLR_SOURCELOC
100: bool validLoc() const { return loc != SL_UNKNOWN; }
101: #else
102: bool validLoc() const { return false; }
103: #endif
104: };
105:
106:
107: // the GLR parse state is primarily made up of a graph of these
108: // nodes, which play a role analogous to the stack nodes of a
109: // normal LR parser; GLR nodes form a graph instead of a linear
110: // stack because choice points (real or potential ambiguities)
111: // are represented as multiple left-siblings
112: class StackNode {
113: public:
114: // the LR state the parser is in when this node is at the
115: // top ("at the top" means that nothing, besides perhaps itself,
116: // is pointing to it)
117: //ItemSet const * const state; // (serf)
118: StateId state; // now it is an id
119:
120: // each leftSibling points to a stack node in one possible LR stack.
121: // if there is more than one, it means two or more LR stacks have
122: // been joined at this point. this is the parse-time representation
123: // of ambiguity (actually, unambiguous grammars or inputs do
124: // sometimes lead to multiple siblings)
125: ObjList<SiblingLink> leftSiblings; // this is a set
126:
127: // the *first* sibling is simply embedded directly into the
128: // stack node, to avoid list overhead in the common case of
129: // only one sibling; when firstSib.sib==NULL, there are no
130: // siblings
131: SiblingLink firstSib;
132:
133: // number of sibling links pointing at 'this', plus the number
134: // of worklists on which 'this' appears (some liberty is taken
135: // in the mini-LR parser, but it is carefully documented there)
136: int referenceCount;
137:
138: // how many stack nodes can I pop before hitting a nondeterminism?
139: // if this node itself has >1 sibling link, determinDepth==0; if
140: // this node has 1 sibling, but that sibling has >1 sibling, then
141: // determinDepth==1, and so on; if this node has 0 siblings, then
142: // determinDepth==1
143: int determinDepth;
144:
145: union {
146: // somewhat nonideal: I need access to the 'userActions' to
147: // deallocate semantic values when refCt hits zero, and I need
148: // to map states to state-symbols for the same reason.
149: // update: now I'm also using this to support pool-based
150: // deallocation in decRefCt()
151: GLR *glr;
152:
153: // this is used by the ObjectPool which handles allocation of
154: // StackNodes
155: StackNode *nextInFreeList;
156: };
157:
158: // ordinal position of the token that was being processed
159: // when this stack node was created; this information is useful
160: // for laying out the nodes when visualizing the GSS, but is
161: // not used by the parsing algorithm itself
162: NODE_COLUMN( int column; )
163:
164: // count and high-water for stack nodes
165: static int numStackNodesAllocd;
166: static int maxStackNodesAllocd;
167:
168:
169: private: // funcs
170: SiblingLink *
171: addAdditionalSiblingLink(StackNode *leftSib, SemanticValue sval
172: SOURCELOCARG( SourceLoc loc ) );
173:
174: public: // funcs
175: StackNode();
176: ~StackNode();
177:
178: // ctor/dtor from point of view of the object pool user
179: void init(StateId state, GLR *glr);
180: void deinit();
181:
182: // internal workings of 'deinit', exposed for performance reasons
183: inline void decrementAllocCounter();
184: void deallocSemanticValues();
185:
186: // add a new link with the given tree node; return the link
187: SiblingLink *addSiblingLink(StackNode *leftSib, SemanticValue sval
188: SOURCELOCARG( SourceLoc loc ) );
189:
190: // specialized version for performance-critical sections
191: inline void
192: addFirstSiblingLink_noRefCt(StackNode *leftSib, SemanticValue sval
193: SOURCELOCARG( SourceLoc loc ) );
194:
195: // return the symbol represented by this stack node; it's
196: // the symbol shifted or reduced-to to get to this state
197: // (this used to be a data member, but there are at least
198: // two ways to compute it, so there's no need to store it)
199: SymbolId getSymbolC() const;
200:
201: // reference count stuff
202: void incRefCt() { referenceCount++; }
203: void decRefCt();
204:
205: // sibling count queries (each one answerable in constant time)
206: bool hasZeroSiblings() const { return firstSib.sib==NULL; }
207: bool hasOneSibling() const { return firstSib.sib!=NULL && leftSiblings.isEmpty(); }
208: bool hasMultipleSiblings() const { return leftSiblings.isNotEmpty(); }
209:
210: // when you expect there's only one sibling link, get it this way
211: SiblingLink const *getUniqueLinkC() const;
212: SiblingLink *getUniqueLink() { return const_cast<SiblingLink*>(getUniqueLinkC()); }
213:
214: // retrieve pointer to the sibling link to a given node, or NULL if none
215: SiblingLink *getLinkTo(StackNode *another);
216:
217: // recompute my determinDepth based on siblings,
218: // but don't actually change the state
219: int computeDeterminDepth() const;
220:
221: // debugging
222: static void printAllocStats();
223: void checkLocalInvariants() const;
224: };
225:
226:
227: // this is a priority queue of stack node paths that are candidates to
228: // reduce, maintained such that we can select paths in an order which
229: // will avoid yield-then-merge
230: class ReductionPathQueue {
231: public: // types
232: // a single path in the stack
233: class Path {
234: public: // data
235: // ---- right edge info ----
236: // the rightmost state's id; we're reducing in this state
237: StateId startStateId;
238:
239: // id of the production with which we're reducing
240: int prodIndex;
241:
242: // ---- left edge info ----
243: // the token column (ordinal position of a token in the token
244: // stream) of the leftmost stack node; the smaller the
245: // startColumn, the more tokens this reduction spans
246: int startColumn;
247:
248: // stack node at the left edge; our reduction will push a new
249: // stack node on top of this one
250: StackNode *leftEdgeNode;
251:
252: // ---- path in between ----
253: // array of sibling links, naming the path; 'sibLink[0]' is the
254: // leftmost link; array length is given by the rhsLen of
255: // prodIndex's production
256: GrowArray<SiblingLink*> sibLinks; // (array of serfs)
257:
258: // corresponding array of symbol ids so we know how to interpret
259: // the semantic values in the links
260: GrowArray<SymbolId> symbols;
261:
262: union {
263: // link between nodes for construction of a linked list,
264: // kept in sorted order
265: Path *next;
266:
267: // link for free list in the object pool
268: Path *nextInFreeList;
269: };
270:
271: public: // funcs
272: Path();
273: ~Path();
274:
275: void init(StateId startStateId, int prodIndex, int rhsLen);
276: void deinit() {}
277: };
278:
279: private: // data
280: // head of the list
281: Path *top;
282:
283: // allocation pool of Path objects
284: ObjectPool<Path> pathPool;
285:
286: // parse tables, so we can decode prodIndex and also compare
287: // production ids for sorting purposes
288: ParseTables *tables;
289:
290: private: // funcs
291: bool goesBefore(Path const *p1, Path const *p2) const;
292:
293: public: // funcs
294: ReductionPathQueue(ParseTables *t);
295: ~ReductionPathQueue();
296:
297: // get another Path object, inited with these values
298: Path *newPath(StateId startStateId, int prodIndex, int rhsLen);
299:
300: // make a copy of the prototype 'src', fill in its left-edge
301: // fields using 'leftEdge', and insert it into sorted order
302: // in the queue
303: void insertPathCopy(Path const *src, StackNode *leftEdge);
304:
305: // true if there are no more paths
306: bool isEmpty() const { return top == NULL; }
307: bool isNotEmpty() const { return !isEmpty(); }
308:
309: // remove the next path to reduce from the list, and return it
310: Path *dequeue();
311:
312: // mark a path as not being used, so it will be recycled into the pool
313: void deletePath(Path *p);
314: };
315:
316:
317: // each GLR object is a parser for a specific grammar, but can be
318: // used to parse multiple token streams
319: class ELK_EXTERN GLR {
320: public:
321: // ---- grammar-wide data ----
322: // user-specified actions
323: UserActions *userAct; // (serf)
324:
325: // parse tables derived from the grammar
326: ParseTables *tables; // (serf)
327:
328: // ---- parser state between tokens ----
329: // I keep a pointer to this so I can ask for token descriptions
330: // inside some of the helper functions
331: LexerInterface *lexerPtr; // (serf)
332:
333: // Every node in this set is (the top of) a parser that might
334: // ultimately succeed to parse the input, or might reach a
335: // point where it cannot proceed, and therefore dies. (See
336: // comments at top of glr.cc for more details.)
337: ArrayStack<StackNode*> topmostParsers; // (refct list)
338:
339: // index: StateId -> index in 'topmostParsers' of unique parser
340: // with that state, or INDEX_NO_PARSER if none has that state
341: typedef unsigned char ParserIndexEntry;
342: enum { INDEX_NO_PARSER = 255 };
343: ParserIndexEntry *parserIndex; // (owner)
344:
345: // this is for assigning unique ids to stack nodes
346: int nextStackNodeId;
347: enum { initialStackNodeId = 1 };
348:
349: // ---- parser state during each token ----
350: // I used to have fields:
351: // int currentTokenType;
352: // SemanticValue currentTokenValue;
353: // SourceLoc currentTokenLoc;
354: // but these have been now replaced by, respectively,
355: // lexerPtr->type
356: // lexerPtr->sval
357: // lexerPtr->loc
358:
359: // ---- scratch space re-used at token-level (or finer) granularity ----
360: // to be regarded as a local variable of GLR::rwlProcessWorklist
361: GrowArray<SemanticValue> toPass;
362:
363: // persistent array that I swap with 'topmostParsers' during
364: // 'rwlShiftTerminals' to avoid extra copying or allocation;
365: // this should be regarded as variable local to that function
366: ArrayStack<StackNode*> prevTopmost; // (refct list)
367:
368: // ---- allocation pools ----
369: // this is a pointer to the same-named local variable in innerGlrParse
370: ObjectPool<StackNode> *stackNodePool;
371:
372: // pool and list for the RWL implementation
373: ReductionPathQueue pathQueue;
374:
375: // ---- user options ----
376: // when true, failed parses are accompanied by some rudimentary
377: // diagnosis; when false, failed parses are silent (default: true)
378: bool noisyFailedParse;
379:
380: // ---- debugging trace ----
381: // these are computed during GLR::GLR since the profiler reports
382: // there is significant expense to computing the debug sm_strings
383: // (that are then usually not printed)
384: bool trParse; // tracingSys("parse")
385: std::ostream &trsParse; // trace("parse")
386:
387: // track column for new nodes
388: NODE_COLUMN( int globalNodeColumn; )
389:
390: // statistics on parser actions
391: int detShift, detReduce, nondetShift, nondetReduce;
392:
393: // count of # of times yield-then-merge happens
394: int yieldThenMergeCt;
395:
396: private: // funcs
397: // comments in glr.cc
398: SemanticValue duplicateSemanticValue(SymbolId sym, SemanticValue sval);
399: void deallocateSemanticValue(SymbolId sym, SemanticValue sval);
400: SemanticValue grabTopSval(StackNode *node);
401:
402: StackNode *findTopmostParser(StateId state);
403: StackNode *makeStackNode(StateId state);
404: void writeParseGraph(char const *input) const;
405: void clearAllStackNodes();
406: void addTopmostParser(StackNode *parser);
407: void pullFromTopmostParsers(StackNode *parser);
408: bool canMakeProgress(StackNode *parser);
409: void dumpGSS(int tokenNumber) const;
410: void dumpGSSEdge(FILE *dest, StackNode const *src,
411: StackNode const *target) const;
412: void printConfig() const;
413: void buildParserIndex();
414: void printParseErrorMessage(StateId lastToDie);
415: bool cleanupAfterParse(SemanticValue &treeTop);
416: bool nondeterministicParseToken();
417: static bool innerGlrParse(GLR &glr, LexerInterface &lexer, SemanticValue &treeTop);
418: SemanticValue doReductionAction(
419: int productionId, SemanticValue const *svals
420: SOURCELOCARG( SourceLoc loc ) );
421:
422: void rwlProcessWorklist();
423: SiblingLink *rwlShiftNonterminal(StackNode *leftSibling, int lhsIndex,
424: SemanticValue /*owner*/ sval
425: SOURCELOCARG( SourceLoc loc ) );
426: int rwlEnqueueReductions(StackNode *parser, ActionEntry action,
427: SiblingLink *sibLink);
428: void rwlCollectPathLink(
429: ReductionPathQueue::Path *proto, int popsRemaining,
430: StackNode *currentNode, SiblingLink *mustUseLink, SiblingLink *linkToAdd);
431: void rwlRecursiveEnqueue(
432: ReductionPathQueue::Path *proto,
433: int popsRemaining,
434: StackNode *currentNode,
435: SiblingLink *mustUseLink);
436: void rwlShiftTerminals();
437:
438: void configCheck(char const *option, bool core, bool table);
439:
440: sm_string stackSummary() const;
441: void nodeSummary(sm_stringBuilder &sb, StackNode const *node) const;
442: void innerStackSummary(sm_stringBuilder &sb,
443: SObjList<StackNode const> &printed,
444: StackNode const *node) const;
445:
446: public: // funcs
447: GLR(UserActions *userAct, ParseTables *tables);
448: ~GLR();
449:
450: // ------- primary interface -------
451: // read the named grammar file (.bin extension, typically)
452: void readBinaryGrammar(char const *grammarFname);
453:
454: // parse, using the token stream in 'lexer', and store the final
455: // semantic value in 'treeTop'
456: bool glrParse(LexerInterface &lexer, SemanticValue &treeTop);
457:
458: };
459:
460:
461: #endif // GLR_H
Start C section to elk/elk_gramanl.h[1
/1
]
1: #line 1328 "./lpsrc/elk.pak"
2: // gramanl.h see license.txt for copyright and terms of use
3: // grammar analysis module; separated from grammar.h to
4: // reduce mixing of representation and algorithm; this
5: // module should be entirely algorithm
6:
7: // Author: Scott McPeak, April 2000
8: // Updates: March 2002
9:
10: // references:
11: //
12: // [ASU] Aho, Sethi Ullman. Compilers: Principles,
13: // Techniques, and Tools. Addison-Wesley,
14: // Reading, MA. 1986. Second printing (3/88).
15: // [A classic reference for LR parsing.]
16:
17:
18: #ifndef __GRAMANL_H
19: #define __GRAMANL_H
20:
21: #include "elk_grammar.h"
22: #include "sm_ohashtbl.h"
23: #include "sm_okhashtbl.h"
24: #include "sm_okhasharr.h"
25: #include "elk_glrconfig.h"
26: #include "elk_parsetables.h"
27:
28: // forward decls
29: class Bit2d; // bit2d.h
30: class BitArray; // bitarray.h
31: class EmitCode; // emitcode.h
32:
33: // this file
34: class GrammarAnalysis;
35:
36:
37: // ---------------- DottedProduction --------------------
38: // a production, with an indicator that says how much of this
39: // production has been matched by some part of the input sm_string
40: // (exactly which part of the input depends on where this appears
41: // in the algorithm's data structures)
42: class DottedProduction {
43: // ------ representation ------
44: private: // data
45: Production const *prod; // (serf) the base production
46: int dot; // 0 means it's before all RHS symbols, 1 means after first, etc.
47:
48: // -------- annotation ----------
49: private: // data
50: // performance optimization: NULL if dot at end, or else pointer
51: // to the symbol right after the dot
52: Symbol *afterDot;
53:
54: public: // data
55: // First of the sentential form that follows the dot; this set
56: // is computed by GrammarAnalysis::computeDProdFirsts
57: TerminalSet firstSet;
58:
59: // also computed by computeDProdFirsts, this is true if the
60: // sentential form can derive epsilon (the empty sm_string)
61: bool canDeriveEmpty;
62:
63: // during item set closure, I need a way to map from dotted prods to
64: // the items which use them; so rather than use a hash table, I'll
65: // just annotate the dprods themselves with backpointers; these
66: // backpointers *must* be maintained as NULL when there's no
67: // association
68: mutable class LRItem *backPointer;
69:
70: private: // funcs
71: void init();
72:
73: public: // funcs
74: //DottedProduction(DottedProduction const &obj);
75:
76: // need the grammar passed during creation so we know how big
77: // to make 'lookahead'
78: //DottedProduction(GrammarAnalysis const &g); // for later filling-in
79: //DottedProduction(/*GrammarAnalysis const &g,*/ Production *p, int d);
80: DottedProduction(); // for creating arrays of them
81: ~DottedProduction();
82:
83: // no point to flattening these because they're easily re-computable
84: #if 0
85: DottedProduction(Flatten&);
86: void xfer(Flatten &flat);
87: void xferSerfs(Flatten &flat, GrammarAnalysis &g);
88: #endif // 0
89:
90: // simple queries
91: Production const *getProd() const { return prod; }
92: int getDot() const { return dot; }
93: bool isDotAtStart() const { return dot==0; }
94: bool isDotAtEnd() const { return afterDot==NULL; }
95:
96: // no need for equality now, since all DPs with the same
97: // prod/dot are shared
98: //bool isEqual(DottedProduction const &obj) const;
99: //bool operator== (DottedProduction const &obj) const;
100:
101: // call this to change prod and dot
102: void setProdAndDot(Production const *p, int d);
103:
104: // dot must not be at the start (left edge)
105: Symbol const *symbolBeforeDotC() const;
106: Symbol *symbolBeforeDot() { return const_cast<Symbol*>(symbolBeforeDotC()); }
107:
108: // dot must not be at the end (right edge)
109: Symbol const *symbolAfterDotC() const { return afterDot; }
110: Symbol *symbolAfterDot() { return const_cast<Symbol*>(symbolAfterDotC()); }
111:
112: // print to std::cout as 'A -> B . c D' (no newline)
113: void print(std::ostream &os/*, GrammarAnalysis const &g*/) const;
114: OSTREAM_OPERATOR(DottedProduction)
115: };
116:
117: // lists of dotted productions
118: typedef ObjList<DottedProduction> DProductionList;
119: typedef ObjListIter<DottedProduction> DProductionListIter;
120: typedef SObjList<DottedProduction> SDProductionList;
121: typedef SObjListIter<DottedProduction> SDProductionListIter;
122:
123: #define FOREACH_DOTTEDPRODUCTION(list, iter) FOREACH_OBJLIST(DottedProduction, list, iter)
124: #define MUTATE_EACH_DOTTEDPRODUCTION(list, iter) MUTATE_EACH_OBJLIST(DottedProduction, list, iter)
125: #define SFOREACH_DOTTEDPRODUCTION(list, iter) SFOREACH_OBJLIST(DottedProduction, list, iter)
126: #define SMUTATE_EACH_DOTTEDPRODUCTION(list, iter) SMUTATE_EACH_OBJLIST(DottedProduction, list, iter)
127:
128:
129: // --------------- LRItem ---------------
130: // a dotted production with a lookahead; whereas each production
131: // has a fixed number of dotted versions of that production, there
132: // can be lots of items, because of the differing lookahead sets
133: // (I prefer the name "LRItem" to simply "Item" because the latter
134: // easily collides with other uses)
135: class LRItem {
136: public: // data
137: DottedProduction const *dprod; // (serf) production and dot position
138: TerminalSet lookahead; // lookahead symbols
139:
140: public: // funcs
141: LRItem(LRItem const &obj);
142: ~LRItem();
143:
144: // need 'numTerms' to tell how big to make 'lookahead'
145: LRItem(int numTerms, DottedProduction const *dp);
146:
147: LRItem(Flatten&);
148: void xfer(Flatten &flat);
149: void xferSerfs(Flatten &flat, GrammarAnalysis &g);
150:
151: // comparison
152: static int diff(LRItem const *a, LRItem const *b, void*);
153: bool equalNoLA(LRItem const &obj) const
154: { return dprod == obj.dprod; }
155:
156: // manipulate the lookahead set
157: bool laContains(int terminalId) const
158: { return lookahead.contains(terminalId); }
159: void laAdd(int terminalId)
160: { lookahead.add(terminalId); }
161: void laRemove(int terminalId)
162: { lookahead.remove(terminalId); }
163: void laCopy(LRItem const &obj)
164: { lookahead.copy(obj.lookahead); }
165: bool laMerge(LRItem const &obj) // returns true if merging changed lookahead
166: { return lookahead.merge(obj.lookahead); }
167: bool laIsEqual(LRItem const &obj) const
168: { return lookahead.isEqual(obj.lookahead); }
169:
170: // pass-thru queries into 'dprod'
171: Production const *getProd() const
172: { return dprod->getProd(); }
173: int getDot() const
174: { return dprod->getDot(); }
175: bool isDotAtStart() const
176: { return dprod->isDotAtStart(); }
177: bool isDotAtEnd() const
178: { return dprod->isDotAtEnd(); }
179: Symbol const *symbolBeforeDotC() const
180: { return dprod->symbolBeforeDotC(); }
181: Symbol const *symbolAfterDotC() const
182: { return dprod->symbolAfterDotC(); }
183:
184: int prodIndex() const
185: { return getProd()->prodIndex; }
186:
187: // stuff for insertion into a hash table
188: static unsigned hash(DottedProduction const *key);
189: static DottedProduction const *dataToKey(LRItem *dp);
190: static bool dpEqual(DottedProduction const *key1, DottedProduction const *key2);
191:
192: // true if this item is "A -> alpha * t beta"
193: bool isExtendingShift(Nonterminal const *A, Terminal const *t) const;
194:
195: void print(std::ostream &os, GrammarAnalysis const &g) const;
196: };
197:
198:
199: // ---------------- ItemSet -------------------
200: // a set of dotted productions, and the transitions between
201: // item sets, as in LR(0) set-of-items construction
202: class ItemSet {
203: public: // intended to be read-only public
204: // kernel items: the items that define the set; except for
205: // the special case of the initial item in the initial state,
206: // the kernel items are distinguished by having the dot *not*
207: // at the left edge
208: ObjList<LRItem> kernelItems;
209:
210: // nonkernel items: those derived as the closure of the kernel
211: // items by expanding symbols to the right of dots; here I am
212: // making the choice to materialize them, rather than derive
213: // them on the spot as needed (and may change this decision)
214: ObjList<LRItem> nonkernelItems;
215:
216: private: // data
217: // transition function (where we go on shifts); NULL means no transition
218: // Map : (Terminal id or Nonterminal id) -> ItemSet*
219: ItemSet **termTransition; // (owner ptr to array of serf ptrs)
220: ItemSet **nontermTransition; // (owner ptr to array of serf ptrs)
221:
222: // bounds for above
223: int terms;
224: int nonterms;
225:
226: // profiler reports I'm spending significant time rifling through
227: // the items looking for those that have the dot at the end; so this
228: // array will point to all such items
229: LRItem const **dotsAtEnd; // (owner ptr to array of serf ptrs)
230: int numDotsAtEnd; // number of elements in 'dotsAtEnd'
231:
232: // profiler also reports I'm still spending time comparing item sets; this
233: // stores a CRC of the numerically sorted kernel item pointer addresses,
234: // concatenated into a buffer of sufficient size
235: unsigned long kernelItemsCRC;
236:
237: // need to store this, because I can't compute it once I throw
238: // away the items
239: Symbol const *stateSymbol;
240:
241: public: // data
242: // numerical state id, should be unique among item sets
243: // in a particular grammar's sets
244: StateId id;
245:
246: // it's useful to have a BFS tree superimposed on the transition
247: // graph; for example, it makes it easy to generate sample inputs
248: // for each state. so we store the parent pointer; we can derive
249: // child pointers by looking at all outgoing transitions, and
250: // filtering for those whose targets' parent pointers equal 'this'.
251: // the start state's parent is NULL, since it is the root of the
252: // BFS tree
253: ItemSet *BFSparent; // (serf)
254:
255: private: // funcs
256: int bcheckTerm(int index) const;
257: int bcheckNonterm(int index) const;
258: ItemSet *&refTransition(Symbol const *sym);
259:
260: void allocateTransitionFunction();
261: Symbol const *computeStateSymbolC() const;
262:
263: void deleteNonReductions(ObjList<LRItem> &list);
264:
265: public: // funcs
266: ItemSet(StateId id, int numTerms, int numNonterms);
267: ~ItemSet();
268:
269: ItemSet(Flatten&);
270: void xfer(Flatten &flat);
271: void xferSerfs(Flatten &flat, GrammarAnalysis &g);
272:
273: // ---- item queries ----
274: // the set of items names a symbol as the symbol used
275: // to reach this state -- namely, the symbol that appears
276: // to the left of a dot. this fn retrieves that symbol
277: // (if all items have dots at left edge, returns NULL; this
278: // would be true only for the initial state)
279: Symbol const *getStateSymbolC() const { return stateSymbol; }
280:
281: // equality is defined as having the same items (basic set equality)
282: bool operator== (ItemSet const &obj) const;
283:
284: // sometimes it's convenient to have all items mixed together
285: // (CONSTNESS: allows modification of items...)
286: void getAllItems(SObjList<LRItem> &dest, bool nonkernel=true) const;
287:
288: // used for sorting by id
289: static int diffById(ItemSet const *left, ItemSet const *right, void*);
290:
291: // ---- transition queries ----
292: // query transition fn for an arbitrary symbol; returns
293: // NULL if no transition is defined
294: ItemSet const *transitionC(Symbol const *sym) const;
295: ItemSet *transition(Symbol const *sym)
296: { return const_cast<ItemSet*>(transitionC(sym)); }
297:
298: // alternate interface; also might return NULL
299: ItemSet const *getTermTransition(int termId) const
300: { return termTransition[bcheckTerm(termId)]; }
301: ItemSet const *getNontermTransition(int nontermId) const
302: { return nontermTransition[bcheckNonterm(nontermId)]; }
303:
304: // get the list of productions that are ready to reduce, given
305: // that the next input symbol is 'lookahead' (i.e. in the follow
306: // of a production's LHS); parsing=true means we are actually
307: // parsing input, so certain tracing output is appropriate;
308: // 'reductions' is a list of const Productions
309: void getPossibleReductions(ProductionList &reductions,
310: Terminal const *lookahead,
311: bool parsing) const;
312:
313:
314: // assuming this itemset has at least one reduction ready (an assertion
315: // checks this), retrieve the first one
316: Production const *getFirstReduction() const;
317:
318: // ---- item mutations ----
319: // add a kernel item; used while constructing the state
320: void addKernelItem(LRItem * /*owner*/ item);
321:
322: // after adding all kernel items, call this
323: void sortKernelItems();
324:
325: // add a nonkernel item; used while computing closure; this
326: // item must not already be in the item set
327: void addNonkernelItem(LRItem * /*owner*/ item);
328:
329: // computes things derived from the item set lists:
330: // dotsAtEnd, numDotsAtEnd, kernelItemsCRC, stateSymbol;
331: // do this after adding things to the items lists
332: void changedItems();
333:
334: // a part of 'changedItems', this is used in a specialized way
335: // during LR item set construction; it leaves 'this' in a somewhat
336: // half-baked state (if changedItems is not also called), so some
337: // care needs to be taken when using this directly
338: void computeKernelCRC(GrowArray<DottedProduction const*> &array);
339:
340: // remove the reduce using 'prod' on lookahead 'sym;
341: // calls 'changedItems' internally
342: void removeReduce(Production const *prod, Terminal const *sym);
343:
344: // throw away information not needed during parsing
345: void throwAwayItems();
346:
347: // 'dest' has already been established to have the same kernel
348: // items as 'this' -- so merge all the kernel lookahead items
349: // of 'this' into 'dest'; return 'true' if any changes were made
350: // to 'dest'
351: bool mergeLookaheadsInto(ItemSet &dest) const;
352:
353: // true if this itemset has an item "A -> alpha * t beta", i.e.
354: // one that would extend 'A' by shifting 't'
355: bool hasExtendingShift(Nonterminal const *A, Terminal const *t) const;
356:
357: // ---- transition mutations ----
358: // set transition on 'sym' to be 'dest'
359: void setTransition(Symbol const *sym, ItemSet *dest);
360:
361: // remove the the shift on 'sym'
362: void removeShift(Terminal const *sym);
363:
364: // ------ hashtable stuff --------
365: static ItemSet const *dataToKey(ItemSet *data);
366: static unsigned hash(ItemSet const *key);
367: static bool equalKey(ItemSet const *key1, ItemSet const *key2);
368:
369: // ---- debugging ----
370: void writeGraph(std::ostream &os, GrammarAnalysis const &g) const;
371: void print(std::ostream &os, GrammarAnalysis const &g, bool nonkernel=true) const;
372: };
373:
374:
375: // ---------------------- GrammarAnalysis -------------------
376: class GrammarAnalysis : public Grammar {
377: protected: // data
378: // if entry i,j is true, then nonterminal i can derive nonterminal j
379: // (this is a graph, represented (for now) as an adjacency matrix)
380: enum { emptyStringIndex = 0 };
381: Bit2d *derivable; // (owner)
382:
383: // index the symbols on their integer ids
384: Nonterminal **indexedNonterms; // (owner -> serfs) ntIndex -> Nonterminal
385: Terminal **indexedTerms; // (owner -> serfs) termIndex -> Terminal
386: // numNonterms==Grammar::numNonterminals(), numTerms==Grammar::numTerminals()
387: int numNonterms; // length of 'indexedNonterms' array
388: int numTerms; // " " terms "
389:
390: // during itemSetClosure, profiling reports we spend a lot of time
391: // walking the list of productions looking for those that have a given
392: // symbol on the LHS; so let's index produtions by LHS symbol index;
393: // this array has 'numNonterms' elements, mapping each nonterminal to
394: // the list of productions with that nonterminal on the LHS
395: SObjList<Production> *productionsByLHS; // (owner ptr to array)
396:
397: // map of production x dotPosition -> DottedProduction;
398: // each element of the 'dottedProds' array is a pointer to an
399: // array of DottedProduction objects
400: DottedProduction **dottedProds; // (owner ptr to array of owners)
401:
402: // index of productions by id
403: Production **indexedProds; // (owner -> serfs) prodIndex -> Production
404: int numProds; // length of 'dottedProds'
405:
406: // only true after initializeAuxData has been called
407: bool initialized;
408:
409: // used to assign itemset ids while the item sets are being
410: // initially constructed; later, they get renumbered into a
411: // canonical order
412: int nextItemSetId;
413:
414: // the LR parsing tables
415: ObjList<ItemSet> itemSets;
416:
417: // distinguished start state; NOTE: much of the grammar analysis
418: // code currently assumes (and checks) that state 0 is the start
419: // state, so if you want to do something different, that code might
420: // need to be changed
421: ItemSet *startState; // (serf)
422:
423: public: // data
424: // true if any nonterminal can derive itself (with no extra symbols
425: // surrounding it) in 1 or more steps
426: bool cyclic;
427:
428: // symbol of interest; various diagnostics are printed when
429: // certain things happen with it (e.g. the first application
430: // is to print whenever something is added to this sym's
431: // follow)
432: Symbol const *symOfInterest;
433:
434: // incremented each time we encounter an error that we can recover from
435: int errors;
436:
437: // parse tables
438: ParseTables *tables; // (owner)
439:
440: private: // funcs
441: // ---- analyis init ----
442: // call this after grammar is completely built
443: void initializeAuxData();
444: void computeIndexedNonterms();
445: void computeIndexedTerms();
446: void computeProductionsByLHS();
447: void computeReachable();
448: void computeReachableDFS(Nonterminal *nt);
449: void resetFirstFollow();
450: void computeDProdFirsts();
451: void computeSupersets();
452:
453: // ---- dotted productions ----
454: void createDottedProductions();
455: void deleteDottedProductions();
456: DottedProduction const *getDProd(Production const *prod, int posn) const;
457: DottedProduction *getDProd_nc(Production const *prod, int posn)
458: { return const_cast<DottedProduction*>(getDProd(prod, posn)); }
459:
460: // given a dprod, yield the one obtained by moving the dot one
461: // place to the right
462: DottedProduction const *nextDProd(DottedProduction const *dp) const
463: #ifdef NDEBUG
464: { return dp+1; } // take advantage of physical co-location
465: #endif
466: ; // debug version checks bounds
467:
468: // ---- derivability ----
469: // iteratively compute every pair A,B such that A can derive B
470: void computeWhatCanDeriveWhat();
471: void initDerivableRelation();
472:
473: // add a derivability relation; returns true if this makes a change
474: bool addDerivable(Nonterminal const *left, Nonterminal const *right);
475: bool addDerivable(int leftNtIndex, int rightNtIndex);
476:
477: // private derivability interface
478: bool canDerive(int leftNtIndex, int rightNtIndex) const;
479: bool sequenceCanDeriveEmpty(RHSEltList const &list) const;
480: bool iterSeqCanDeriveEmpty(RHSEltListIter iter) const;
481:
482: // ---- First ----
483: void computeFirst();
484: //bool addFirst(Nonterminal *NT, Terminal *term);
485: void firstOfSequence(TerminalSet &destList, RHSEltList const &sequence);
486: void firstOfIterSeq(TerminalSet &destList, RHSEltListIter sym);
487:
488: // ---- Follow ----
489: void computeFollow();
490: //bool addFollow(Nonterminal *NT, Terminal *term);
491:
492: // ---- LR item sets ----
493: ItemSet *makeItemSet();
494: void disposeItemSet(ItemSet *is);
495: void moveDotNoClosure(ItemSet const *source, Symbol const *symbol,
496: ItemSet *dest, ObjList<LRItem> &unusedTail,
497: GrowArray<DottedProduction const*> &array);
498: ItemSet *findItemSetInList(ObjList<ItemSet> &list,
499: ItemSet const *itemSet);
500: static bool itemSetsEqual(ItemSet const *is1, ItemSet const *is2);
501:
502: void constructLRItemSets();
503: void lrParse(char const *input);
504:
505: void handleShiftReduceConflict(
506: bool &keepShift, bool &keepReduce, bool &dontWarn,
507: ItemSet const *state, Production const *prod, Terminal const *sym);
508:
509: void resolveConflicts(
510: ItemSet const *state, // parse state in which the actions are possible
511: Terminal const *sym, // lookahead symbol for these actions
512: ItemSet const *&shiftDest, // (inout) if non-NULL, the state to which we can shift
513: ProductionList &reductions, // (inout) list of possible reductions
514: bool allowAmbig, // if false, always return at most 1 action
515: bool &printedConflictHeader, // (inout) true once we've printed the state header
516: int &sr, int &rr); // (inout) counts of S/R and R/R conflicts, resp.
517: void computeParseTables(bool allowAmbig);
518:
519: int subsetDirectiveResolution(
520: ItemSet const *state, // parse state in which the actions are possible
521: Terminal const *sym, // lookahead symbol for these actions
522: ProductionList &reductions); // list to try to cut down
523:
524: void renumberStates();
525: static int renumberStatesDiff
526: (ItemSet const *left, ItemSet const *right, void *vgramanl);
527: static int arbitraryProductionOrder
528: (Production const *left, Production const *right, void*);
529: static int arbitraryRHSEltOrder
530: (Production::RHSElt const *left, Production::RHSElt const *right, void*);
531:
532: void computeBFSTree();
533:
534: // misc
535: void computePredictiveParsingTable();
536: // non-const because have to add productions to lists
537:
538: void topologicalSort(NtIndex *order, int &nextOrdinal,
539: NtIndex current, BitArray &seen);
540:
541: // the inverse of transition: map a target state to the symbol that
542: // would transition to that state (from the given source state)
543: Symbol const *inverseTransitionC(ItemSet const *source,
544: ItemSet const *target) const;
545:
546: // sample input helpers
547: void leftContext(SymbolList &output, ItemSet const *state) const;
548: bool rewriteAsTerminals(TerminalList &output, SymbolList const &input) const;
549: bool rewriteAsTerminalsHelper(TerminalList &output, SymbolList const &input,
550: ProductionList &reductionStack) const;
551: bool rewriteSingleNTAsTerminals(TerminalList &output, Nonterminal const *nonterminal,
552: ProductionList &reductionStack) const;
553:
554: // let's try this .. it needs to access 'itemSets'
555: friend void ItemSet::xferSerfs(Flatten &flat, GrammarAnalysis &g);
556:
557: void singleItemClosure(OwnerKHashTable<LRItem, DottedProduction> &finished,
558: ArrayStack<LRItem*> &worklist,
559: //OwnerKHashArray<LRItem, DottedProduction> &workhash,
560: LRItem const *item, TerminalSet &scratchSet);
561:
562: public: // funcs
563: GrammarAnalysis();
564: ~GrammarAnalysis();
565:
566: // access symbols by index
567: Terminal const *getTerminal(int index) const;
568: Nonterminal const *getNonterminal(int index) const;
569: Production const *getProduction(int index) const;
570:
571: ItemSet const *getItemSet(int index) const;
572: int numItemSets() const { return nextItemSetId; }
573:
574: // faster access to counts
575: int numTerminals() const { return numTerms; }
576: int numNonterminals() const { return numNonterms; }
577:
578: // binary read/write
579: void xfer(Flatten &flat);
580:
581: // essentially, my 'main()' while experimenting
582: void exampleGrammar();
583:
584: // overrides base class to add a little bit of the
585: // annotated info
586: void printProductions(std::ostream &os, bool printCode=true) const;
587:
588: // print lots of stuff
589: void printProductionsAndItems(std::ostream &os, bool printCode=true) const;
590:
591: // when grammar is built, this runs all analyses and stores
592: // the results in this object's data fields; write the LR item
593: // sets to the given file (or don't, if NULL)
594: void runAnalyses(char const *setsFname);
595:
596: // print the item sets to a stream (optionally include nonkernel items)
597: void printItemSets(std::ostream &os, bool nonkernel) const;
598:
599: // given a grammar, replace all of its actions with actions that
600: // will build a straightforward parse tree using the facilities
601: // of ptreenode.h; the rules will need the user to already have
602: // done some necessary work in the verbatim preamble, such as
603: // #including ptreenode.h
604: void addTreebuildingActions();
605:
606: // ---- grammar queries ----
607: bool canDerive(Nonterminal const *lhs, Nonterminal const *rhs) const;
608: bool canDeriveEmpty(Nonterminal const *lhs) const;
609:
610: bool firstIncludes(Nonterminal const *NT, Terminal const *term) const;
611: bool followIncludes(Nonterminal const *NT, Terminal const *term) const;
612:
613: // ---- sample inputs and contexts ----
614: sm_string sampleInput(ItemSet const *state) const;
615: sm_string leftContextString(ItemSet const *state) const;
616:
617: // ---- moved out of private ----
618: void itemSetClosure(ItemSet &itemSet);
619: DottedProduction const *getDProdIndex(int prodIndex, int posn) const;
620: };
621:
622:
623: // in gramexpl.cc: interactive grammar experimentation system
624: void grammarExplorer(GrammarAnalysis &g);
625:
626:
627: #endif // __GRAMANL_H
Start C section to elk/elk_gramast.ast.gen.h[1
/1
]
1: #line 1956 "./lpsrc/elk.pak"
2: // gramast.ast.gen.h
3: // *** DO NOT EDIT ***
4: // generated automatically by astgen, from gramast.ast
5:
6: #ifndef GRAMAST_AST_GEN_H
7: #define GRAMAST_AST_GEN_H
8:
9: #include "ast_asthelp.h"
10:
11: // fwd decls
12: class GrammarAST;
13: class TopForm;
14: class TF_context;
15: class TF_verbatim;
16: class TF_option;
17: class TF_terminals;
18: class TF_nonterm;
19: class TermDecl;
20: class TermType;
21: class PrecSpec;
22: class SpecFunc;
23: class ProdDecl;
24: class RHSElt;
25: class RH_name;
26: class RH_sm_string;
27: class RH_prec;
28:
29:
30: // *** DO NOT EDIT ***
31:
32: #include "ast_locstr.h"
33: #include "elk_asockind.h"
34:
35: // *** DO NOT EDIT ***
36: class GrammarAST {
37: public: // data
38: ASTList <TopForm > forms;
39:
40: public: // funcs
41: GrammarAST(ASTList <TopForm > *_forms) : forms(_forms) {
42: { terms=NULL; firstNT=NULL; };
43: }
44: ~GrammarAST();
45:
46: char const *kindName() const { return "GrammarAST"; }
47:
48: GrammarAST *clone() const;
49:
50: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
51:
52: public: TF_terminals *terms;
53: public: TF_nonterm *firstNT;
54: };
55:
56:
57:
58: // *** DO NOT EDIT ***
59: class TopForm {
60: public: // data
61:
62: public: // funcs
63: TopForm() {
64: }
65: virtual ~TopForm();
66:
67: enum Kind { TF_CONTEXT, TF_VERBATIM, TF_OPTION, TF_TERMINALS, TF_NONTERM, NUM_KINDS };
68: virtual Kind kind() const = 0;
69:
70: static char const * const kindNames[NUM_KINDS];
71: char const *kindName() const { return kindNames[kind()]; }
72:
73: DECL_AST_DOWNCASTS(TF_context, TF_CONTEXT)
74: DECL_AST_DOWNCASTS(TF_verbatim, TF_VERBATIM)
75: DECL_AST_DOWNCASTS(TF_option, TF_OPTION)
76: DECL_AST_DOWNCASTS(TF_terminals, TF_TERMINALS)
77: DECL_AST_DOWNCASTS(TF_nonterm, TF_NONTERM)
78:
79: virtual TopForm *clone() const=0;
80:
81: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
82:
83: };
84:
85: class TF_context : public TopForm {
86: public: // data
87: LocString body;
88:
89: public: // funcs
90: TF_context(LocString *_body) : TopForm(), body(_body) {
91: }
92: virtual ~TF_context();
93:
94: virtual Kind kind() const { return TF_CONTEXT; }
95: enum { TYPE_TAG = TF_CONTEXT };
96:
97: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
98:
99: virtual TF_context *clone() const;
100:
101: };
102:
103: class TF_verbatim : public TopForm {
104: public: // data
105: bool isImpl;
106: LocString code;
107:
108: public: // funcs
109: TF_verbatim(bool _isImpl, LocString *_code) : TopForm(), isImpl(_isImpl), code(_code) {
110: }
111: virtual ~TF_verbatim();
112:
113: virtual Kind kind() const { return TF_VERBATIM; }
114: enum { TYPE_TAG = TF_VERBATIM };
115:
116: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
117:
118: virtual TF_verbatim *clone() const;
119:
120: };
121:
122: class TF_option : public TopForm {
123: public: // data
124: LocString name;
125: int value;
126:
127: public: // funcs
128: TF_option(LocString *_name, int _value) : TopForm(), name(_name), value(_value) {
129: }
130: virtual ~TF_option();
131:
132: virtual Kind kind() const { return TF_OPTION; }
133: enum { TYPE_TAG = TF_OPTION };
134:
135: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
136:
137: virtual TF_option *clone() const;
138:
139: };
140:
141: class TF_terminals : public TopForm {
142: public: // data
143: ASTList <TermDecl > decls;
144: ASTList <TermType > types;
145: ASTList <PrecSpec > prec;
146:
147: public: // funcs
148: TF_terminals(ASTList <TermDecl > *_decls, ASTList <TermType > *_types, ASTList <PrecSpec > *_prec) : TopForm(), decls(_decls), types(_types), prec(_prec) {
149: }
150: virtual ~TF_terminals();
151:
152: virtual Kind kind() const { return TF_TERMINALS; }
153: enum { TYPE_TAG = TF_TERMINALS };
154:
155: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
156:
157: virtual TF_terminals *clone() const;
158:
159: };
160:
161: class TF_nonterm : public TopForm {
162: public: // data
163: LocString name;
164: LocString type;
165: ASTList <SpecFunc > funcs;
166: ASTList <ProdDecl > productions;
167: ASTList <LocString > subsets;
168:
169: public: // funcs
170: TF_nonterm(LocString *_name, LocString *_type, ASTList <SpecFunc > *_funcs, ASTList <ProdDecl > *_productions, ASTList <LocString > *_subsets) : TopForm(), name(_name), type(_type), funcs(_funcs), productions(_productions), subsets(_subsets) {
171: }
172: virtual ~TF_nonterm();
173:
174: virtual Kind kind() const { return TF_NONTERM; }
175: enum { TYPE_TAG = TF_NONTERM };
176:
177: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
178:
179: virtual TF_nonterm *clone() const;
180:
181: };
182:
183:
184:
185: // *** DO NOT EDIT ***
186: class TermDecl {
187: public: // data
188: int code;
189: LocString name;
190: LocString alias;
191:
192: public: // funcs
193: TermDecl(int _code, LocString *_name, LocString *_alias) : code(_code), name(_name), alias(_alias) {
194: }
195: ~TermDecl();
196:
197: char const *kindName() const { return "TermDecl"; }
198:
199: TermDecl *clone() const;
200:
201: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
202:
203: };
204:
205:
206:
207: // *** DO NOT EDIT ***
208: class TermType {
209: public: // data
210: LocString name;
211: LocString type;
212: ASTList <SpecFunc > funcs;
213:
214: public: // funcs
215: TermType(LocString *_name, LocString *_type, ASTList <SpecFunc > *_funcs) : name(_name), type(_type), funcs(_funcs) {
216: }
217: ~TermType();
218:
219: char const *kindName() const { return "TermType"; }
220:
221: TermType *clone() const;
222:
223: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
224:
225: };
226:
227:
228:
229: // *** DO NOT EDIT ***
230: class PrecSpec {
231: public: // data
232: AssocKind kind;
233: int prec;
234: ASTList <LocString > tokens;
235:
236: public: // funcs
237: PrecSpec(AssocKind _kind, int _prec, ASTList <LocString > *_tokens) : kind(_kind), prec(_prec), tokens(_tokens) {
238: }
239: ~PrecSpec();
240:
241: char const *kindName() const { return "PrecSpec"; }
242:
243: PrecSpec *clone() const;
244:
245: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
246:
247: };
248:
249:
250:
251: // *** DO NOT EDIT ***
252: class SpecFunc {
253: public: // data
254: LocString name;
255: ASTList <LocString > formals;
256: LocString code;
257:
258: public: // funcs
259: SpecFunc(LocString *_name, ASTList <LocString > *_formals, LocString *_code) : name(_name), formals(_formals), code(_code) {
260: }
261: ~SpecFunc();
262:
263: char const *kindName() const { return "SpecFunc"; }
264:
265: SpecFunc *clone() const;
266:
267: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
268:
269: public: LocString nthFormal(int i) const
270: { return *( formals.nthC(i) ); };
271: };
272:
273:
274:
275: // *** DO NOT EDIT ***
276: class ProdDecl {
277: public: // data
278: ASTList <RHSElt > rhs;
279: LocString actionCode;
280:
281: public: // funcs
282: ProdDecl(ASTList <RHSElt > *_rhs, LocString *_actionCode) : rhs(_rhs), actionCode(_actionCode) {
283: }
284: ~ProdDecl();
285:
286: char const *kindName() const { return "ProdDecl"; }
287:
288: ProdDecl *clone() const;
289:
290: void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
291:
292: };
293:
294:
295:
296: // *** DO NOT EDIT ***
297: class RHSElt {
298: public: // data
299:
300: public: // funcs
301: RHSElt() {
302: }
303: virtual ~RHSElt();
304:
305: enum Kind { RH_NAME, RH_STRING, RH_PREC, NUM_KINDS };
306: virtual Kind kind() const = 0;
307:
308: static char const * const kindNames[NUM_KINDS];
309: char const *kindName() const { return kindNames[kind()]; }
310:
311: DECL_AST_DOWNCASTS(RH_name, RH_NAME)
312: DECL_AST_DOWNCASTS(RH_sm_string, RH_STRING)
313: DECL_AST_DOWNCASTS(RH_prec, RH_PREC)
314:
315: virtual RHSElt *clone() const=0;
316:
317: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
318:
319: };
320:
321: class RH_name : public RHSElt {
322: public: // data
323: LocString tag;
324: LocString name;
325:
326: public: // funcs
327: RH_name(LocString *_tag, LocString *_name) : RHSElt(), tag(_tag), name(_name) {
328: }
329: virtual ~RH_name();
330:
331: virtual Kind kind() const { return RH_NAME; }
332: enum { TYPE_TAG = RH_NAME };
333:
334: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
335:
336: virtual RH_name *clone() const;
337:
338: };
339:
340: class RH_sm_string : public RHSElt {
341: public: // data
342: LocString tag;
343: LocString str;
344:
345: public: // funcs
346: RH_sm_string(LocString *_tag, LocString *_str) : RHSElt(), tag(_tag), str(_str) {
347: }
348: virtual ~RH_sm_string();
349:
350: virtual Kind kind() const { return RH_STRING; }
351: enum { TYPE_TAG = RH_STRING };
352:
353: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
354:
355: virtual RH_sm_string *clone() const;
356:
357: };
358:
359: class RH_prec : public RHSElt {
360: public: // data
361: LocString tokName;
362:
363: public: // funcs
364: RH_prec(LocString *_tokName) : RHSElt(), tokName(_tokName) {
365: }
366: virtual ~RH_prec();
367:
368: virtual Kind kind() const { return RH_PREC; }
369: enum { TYPE_TAG = RH_PREC };
370:
371: virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
372:
373: virtual RH_prec *clone() const;
374:
375: };
376:
377:
378:
379: #endif // GRAMAST_AST_GEN_H
Start cpp section to elk/elk_grammar.cpp[1
/1
]
1: #line 2336 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24: StringTable grammarStringTable;
25:
26:
27:
28: Symbol::Symbol(LocString const &n, bool t, bool e)
29: : name(n),
30: isTerm(t),
31: isEmptyString(e),
32: type(NULL),
33: dupParam(NULL),
34: dupCode(),
35: delParam(NULL),
36: delCode(),
37: reachable(false)
38: {}
39:
40: Symbol::~Symbol()
41: {}
42:
43:
44: Symbol::Symbol(Flatten &flat)
45: : name(flat),
46: isTerm(false),
47: isEmptyString(false),
48: type(NULL),
49: dupParam(NULL),
50: delParam(NULL)
51: {}
52:
53: void Symbol::xfer(Flatten &flat)
54: {
55:
56: const_cast<LocString&>(name).xfer(flat);
57: flat.xferBool(const_cast<bool&>(isTerm));
58: flat.xferBool(const_cast<bool&>(isEmptyString));
59:
60: flattenStrTable->xfer(flat, type);
61:
62: flattenStrTable->xfer(flat, dupParam);
63: dupCode.xfer(flat);
64:
65: flattenStrTable->xfer(flat, delParam);
66: delCode.xfer(flat);
67:
68: flat.xferBool(reachable);
69: }
70:
71:
72: int Symbol::getTermOrNontermIndex() const
73: {
74: if (isTerminal()) {
75: return asTerminalC().termIndex;
76: }
77: else {
78: return asNonterminalC().ntIndex;
79: }
80: }
81:
82:
83: void Symbol::print(std::ostream &os) const
84: {
85: os << name;
86: if (type) {
87: os << "[" << type << "]";
88: }
89: os << ":";
90: PVAL(isTerm);
91: }
92:
93:
94: void Symbol::printDDM(std::ostream &os) const
95: {
96:
97: if (!anyDDM()) return;
98:
99:
100: os << " " << (isTerminal()? "token" : "nonterm");
101: if (type) {
102: os << "[" << type << "]";
103: }
104: os << " " << name << " {\n";
105:
106: internalPrintDDM(os);
107:
108: os << " }\n";
109: }
110:
111:
112: void Symbol::internalPrintDDM(std::ostream &os) const
113: {
114: if (dupCode.isNonNull()) {
115: os << " dup(" << dupParam << ") [" << dupCode << "]\n";
116: }
117:
118: if (delCode.isNonNull()) {
119: os << " del(" << (delParam? delParam : "") << ") [" << delCode << "]\n";
120: }
121: }
122:
123:
124: bool Symbol::anyDDM() const
125: {
126: return dupCode.isNonNull() ||
127: delCode.isNonNull();
128: }
129:
130:
131: Terminal const &Symbol::asTerminalC() const
132: {
133: xassert(isTerminal());
134: return (Terminal const &)(*this);
135: }
136:
137: Nonterminal const &Symbol::asNonterminalC() const
138: {
139: xassert(isNonterminal());
140: return (Nonterminal const &)(*this);
141: }
142:
143:
144: Terminal const *Symbol::ifTerminalC() const
145: {
146: return isTerminal()? (Terminal const *)this : NULL;
147: }
148:
149: Nonterminal const *Symbol::ifNonterminalC() const
150: {
151: return isNonterminal()? (Nonterminal const *)this : NULL;
152: }
153:
154:
155:
156:
157: Terminal::Terminal(Flatten &flat)
158: : Symbol(flat),
159: alias(flat),
160: classifyParam(NULL)
161: {}
162:
163: void Terminal::xfer(Flatten &flat)
164: {
165: Symbol::xfer(flat);
166:
167: alias.xfer(flat);
168:
169: flat.xferInt(precedence);
170: flat.xferInt((int&)associativity);
171:
172: flat.xferInt(termIndex);
173:
174: flattenStrTable->xfer(flat, classifyParam);
175: classifyCode.xfer(flat);
176: }
177:
178:
179: void Terminal::print(std::ostream &os) const
180: {
181: os << "[" << termIndex << "]";
182: if (precedence) {
183: os << "(" << ::toString(associativity) << " " << precedence << ")";
184: }
185: os << " ";
186: Symbol::print(os);
187: }
188:
189:
190: void Terminal::internalPrintDDM(std::ostream &os) const
191: {
192: Symbol::internalPrintDDM(os);
193:
194: if (classifyCode.isNonNull()) {
195: os << " classify(" << classifyParam << ") [" << classifyCode << "]\n";
196: }
197: }
198:
199:
200: bool Terminal::anyDDM() const
201: {
202: return Symbol::anyDDM() ||
203: classifyCode.isNonNull();
204: }
205:
206:
207: sm_string Terminal::toString(bool quoteAliases) const
208: {
209: if (alias.length() > 0) {
210: if (quoteAliases) {
211: return sm_stringc << "\"" << ::toString(alias) << "\"";
212: }
213: else {
214: return ::toString(alias);
215: }
216: }
217: else {
218: return ::toString(name);
219: }
220: }
221:
222:
223:
224: Nonterminal::Nonterminal(LocString const &name, bool isEmpty)
225: : Symbol(name, false /*terminal*/, isEmpty),
226: mergeParam1(NULL),
227: mergeParam2(NULL),
228: mergeCode(),
229: keepParam(NULL),
230: keepCode(),
231: maximal(false),
232: subsets(),
233: ntIndex(-1),
234: cyclic(false),
235: first(0),
236: follow(0),
237: superset(NULL)
238: {}
239:
240: Nonterminal::~Nonterminal()
241: {}
242:
243:
244: Nonterminal::Nonterminal(Flatten &flat)
245: : Symbol(flat),
246: mergeParam1(NULL),
247: mergeParam2(NULL),
248: keepParam(NULL),
249: first(flat),
250: follow(flat),
251: superset(NULL)
252: {}
253:
254: void Nonterminal::xfer(Flatten &flat)
255: {
256: Symbol::xfer(flat);
257:
258: flattenStrTable->xfer(flat, mergeParam1);
259: flattenStrTable->xfer(flat, mergeParam2);
260: mergeCode.xfer(flat);
261:
262: flattenStrTable->xfer(flat, keepParam);
263: keepCode.xfer(flat);
264: }
265:
266: void Nonterminal::xferSerfs(Flatten &flat, Grammar &g)
267: {
268:
269: flat.xferInt(ntIndex);
270: flat.xferBool(cyclic);
271: first.xfer(flat);
272: follow.xfer(flat);
273: }
274:
275:
276: void Nonterminal::print(std::ostream &os, Grammar const *grammar) const
277: {
278: os << "[" << ntIndex << "] ";
279: Symbol::print(os);
280:
281:
282: if (cyclic) {
283: os << " (cyclic!)";
284: }
285:
286: if (grammar) {
287:
288: os << " first={";
289: first.print(os, *grammar);
290: os << "}";
291:
292:
293: os << " follow=";
294: follow.print(os, *grammar);
295: os << "}";
296: }
297: }
298:
299:
300: void Nonterminal::internalPrintDDM(std::ostream &os) const
301: {
302: Symbol::internalPrintDDM(os);
303:
304: if (mergeCode.isNonNull()) {
305: os << " merge(" << mergeParam1 << ", " << mergeParam2
306: << ") [" << mergeCode << "]\n";
307: }
308:
309: if (keepCode.isNonNull()) {
310: os << " keep(" << keepParam << ") [" << keepCode << "]\n";
311: }
312: }
313:
314:
315: bool Nonterminal::anyDDM() const
316: {
317: return Symbol::anyDDM() ||
318: mergeCode.isNonNull() ||
319: keepCode.isNonNull();
320: }
321:
322:
323:
324: STATICDEF Terminal const *TerminalSet::suppressExcept = NULL;
325:
326: TerminalSet::TerminalSet(int numTerms)
327: {
328: init(numTerms);
329: }
330:
331: TerminalSet::TerminalSet(TerminalSet const &obj)
332: {
333: init(obj.bitmapLen * 8);
334: copy(obj);
335: }
336:
337: void TerminalSet::init(int numTerms)
338: {
339: if (numTerms != 0) {
340:
341:
342: bitmapLen = (numTerms + 7) / 8;
343: bitmap = new unsigned char[bitmapLen];
344:
345:
346: memset(bitmap, 0, bitmapLen);
347: }
348: else {
349:
350:
351: bitmapLen = 0;
352: bitmap = NULL;
353: }
354: }
355:
356:
357: TerminalSet::~TerminalSet()
358: {
359: if (bitmap) {
360: delete[] bitmap;
361: }
362: }
363:
364:
365: TerminalSet::TerminalSet(Flatten&)
366: : bitmap(NULL)
367: {}
368:
369: void TerminalSet::xfer(Flatten &flat)
370: {
371: flat.xferInt(bitmapLen);
372:
373: if (bitmapLen > 0) {
374: if (flat.reading()) {
375: bitmap = new unsigned char[bitmapLen];
376: }
377: flat.xferSimple(bitmap, bitmapLen);
378: }
379: }
380:
381:
382: void TerminalSet::reset(int numTerms)
383: {
384: if (bitmap) {
385: delete[] bitmap;
386: }
387: init(numTerms);
388: }
389:
390:
391: unsigned char *TerminalSet::getByte(int id) const
392: {
393: int offset = (unsigned)id / 8;
394: xassert(offset < bitmapLen);
395:
396: return bitmap + offset;
397: }
398:
399:
400: bool TerminalSet::contains(int id) const
401: {
402: unsigned char *p = getByte(id);
403: return (*p >> getBit(id)) & 1 == 1;
404: }
405:
406:
407: bool TerminalSet::isEqual(TerminalSet const &obj) const
408: {
409: xassert(obj.bitmapLen == bitmapLen);
410: return 0==memcmp(bitmap, obj.bitmap, bitmapLen);
411: }
412:
413:
414: void TerminalSet::add(int id)
415: {
416: unsigned char *p = getByte(id);
417: *p |= (unsigned char)(1 << getBit(id));
418: }
419:
420:
421: void TerminalSet::remove(int id)
422: {
423: unsigned char *p = getByte(id);
424: *p &= (unsigned char)(~(1 << getBit(id)));
425: }
426:
427:
428: void TerminalSet::clear()
429: {
430: memset(bitmap, 0, bitmapLen);
431: }
432:
433:
434: void TerminalSet::copy(TerminalSet const &obj)
435: {
436: xassert(obj.bitmapLen == bitmapLen);
437: memcpy(bitmap, obj.bitmap, bitmapLen);
438: }
439:
440:
441: bool TerminalSet::merge(TerminalSet const &obj)
442: {
443: bool changed = false;
444: for (int i=0; i<bitmapLen; i++) {
445: unsigned before = bitmap[i];
446: unsigned after = before | obj.bitmap[i];
447: if (after != before) {
448: changed = true;
449: bitmap[i] = after;
450: }
451: }
452: return changed;
453: }
454:
455:
456: void TerminalSet::print(std::ostream &os, Grammar const &g) const
457: {
458: int ct=0;
459: FOREACH_TERMINAL(g.terminals, iter) {
460: Terminal const *t = iter.data();
461: if (!contains(t->termIndex)) continue;
462:
463: if (suppressExcept &&
464: suppressExcept != t) continue;
465:
466: if (ct++ == 0) {
467:
468:
469:
470: os << ", ";
471: }
472: else {
473: os << "/";
474: }
475:
476: os << t->toString();
477: }
478: }
479:
480:
481:
482: Production::RHSElt::~RHSElt()
483: {}
484:
485:
486: Production::RHSElt::RHSElt(Flatten &flat)
487: : sym(NULL),
488: tag(flat)
489: {}
490:
491: void Production::RHSElt::xfer(Flatten &flat)
492: {
493: tag.xfer(flat);
494: }
495:
496: void Production::RHSElt::xferSerfs(Flatten &flat, Grammar &g)
497: {
498: xferSerfPtr(flat, sym);
499: }
500:
501:
502:
503:
504: Production::Production(Nonterminal *L, char const *Ltag)
505: : left(L),
506: right(),
507: precedence(0),
508: rhsLen(-1),
509: prodIndex(-1),
510: firstSet(0)
511: {}
512:
513: Production::~Production()
514: {}
515:
516:
517: Production::Production(Flatten &flat)
518: : left(NULL),
519: action(flat),
520: firstSet(flat)
521: {}
522:
523: void Production::xfer(Flatten &flat)
524: {
525: xferObjList(flat, right);
526: action.xfer(flat);
527: flat.xferInt(precedence);
528:
529: flat.xferInt(rhsLen);
530: flat.xferInt(prodIndex);
531: firstSet.xfer(flat);
532: }
533:
534: void Production::xferSerfs(Flatten &flat, Grammar &g)
535: {
536:
537:
538: xferSerfPtrToList(flat, const_cast<Nonterminal*&>(left),
539: g.nonterminals);
540:
541:
542: MUTATE_EACH_OBJLIST(RHSElt, right, iter) {
543: iter.data()->xferSerfs(flat, g);
544: }
545:
546:
547: if (flat.reading()) {
548: computeDerived();
549: }
550: }
551:
552:
553:
554: int Production::rhsLength() const
555: {
556: if (!right.isEmpty()) {
557:
558:
559: xassert(!right.nthC(0)->sym->isEmptyString);
560: }
561:
562: return right.count();
563: }
564:
565:
566:
567:
568: int Production::rhsLength() const
569: {
570: xassert(rhsLen != -1);
571: return rhsLen;
572: }
573:
574:
575:
576: int Production::numRHSNonterminals() const
577: {
578: int ct = 0;
579: FOREACH_OBJLIST(RHSElt, right, iter) {
580: if (iter.data()->sym->isNonterminal()) {
581: ct++;
582: }
583: }
584: return ct;
585: }
586:
587:
588: bool Production::rhsHasSymbol(Symbol const *sym) const
589: {
590: FOREACH_OBJLIST(RHSElt, right, iter) {
591: if (iter.data()->sym == sym) {
592: return true;
593: }
594: }
595: return false;
596: }
597:
598:
599: void Production::getRHSSymbols(SymbolList &output) const
600: {
601: FOREACH_OBJLIST(RHSElt, right, iter) {
602: output.append(iter.data()->sym);
603: }
604: }
605:
606:
607: void Production::append(Symbol *sym, LocString const &tag)
608: {
609:
610:
611:
612: xassert(!sym->isEmptyString);
613:
614: right.append(new RHSElt(sym, tag));
615: }
616:
617:
618: void Production::finished(int numTerms)
619: {
620: computeDerived();
621: firstSet.reset(numTerms);
622: }
623:
624: void Production::computeDerived()
625: {
626: rhsLen = right.count();
627: }
628:
629:
630:
631:
632:
633: bool tagCompare(StringRef s1, StringRef s2)
634: {
635: return s1 == s2;
636: }
637:
638:
639: int Production::findTag(StringRef tag) const
640: {
641:
642: ObjListIter<RHSElt> tagIter(right);
643: int index=1;
644: for(; !tagIter.isDone(); tagIter.adv(), index++) {
645: if (tagCompare(tagIter.data()->tag, tag)) {
646: return index;
647: }
648: }
649:
650:
651: return -1;
652: }
653:
654:
655:
656: sm_string taggedName(char const *name, char const *tag)
657: {
658: if (tag == NULL || tag[0] == 0) {
659: return sm_string(name);
660: }
661: else {
662: return sm_stringb(tag << ":" << name);
663: }
664: }
665:
666:
667: sm_string Production::symbolTag(int index) const
668: {
669:
670: xassert(index != 0);
671:
672:
673: index--;
674: return sm_string(right.nthC(index)->tag);
675: }
676:
677:
678: Symbol const *Production::symbolByIndexC(int index) const
679: {
680:
681: if (index == 0) {
682: return left;
683: }
684:
685:
686: index--;
687: return right.nthC(index)->sym;
688: }
689:
690:
691:
692: DottedProduction const *Production::getDProdC(int dotPlace) const
693: {
694: xassert(0 <= dotPlace && dotPlace < numDotPlaces);
695: return &dprods[dotPlace];
696: }
697:
698:
699:
700: void Production::print(std::ostream &os) const
701: {
702: os << toString();
703: }
704:
705:
706: sm_string Production::toString(bool printType, bool printIndex) const
707: {
708:
709: sm_stringBuilder sb;
710: if (printIndex) {
711: sb << "[" << prodIndex << "] ";
712: }
713:
714: sb << left->name;
715: if (printType && left->type) {
716: sb << "[" << left->type << "]";
717: }
718: sb << " -> " << rhsString();
719:
720: if (printType && precedence) {
721:
722: sb << " %prec(" << precedence << ")";
723: }
724: return sb;
725: }
726:
727:
728: sm_string Production::rhsString(bool printTags, bool quoteAliases) const
729: {
730: sm_stringBuilder sb;
731:
732: if (right.isNotEmpty()) {
733:
734: int ct=0;
735: FOREACH_OBJLIST(RHSElt, right, iter) {
736: RHSElt const &elt = *(iter.data());
737:
738: if (ct++ > 0) {
739: sb << " ";
740: }
741:
742: sm_string symName;
743: if (elt.sym->isNonterminal()) {
744: symName = elt.sym->name;
745: }
746: else {
747:
748: symName = elt.sym->asTerminalC().toString(quoteAliases);
749: }
750:
751: if (printTags) {
752:
753: sb << taggedName(symName, elt.tag);
754: }
755: else {
756: sb << symName;
757: }
758: }
759: }
760:
761: else {
762:
763: sb << "empty";
764: }
765:
766: return sb;
767: }
768:
769:
770: sm_string Production::toStringMore(bool printCode) const
771: {
772: sm_stringBuilder sb;
773: sb << toString();
774:
775: if (printCode && !action.isNull()) {
776: sb << "\t\t[" << action.strref() << "]";
777: }
778:
779: sb << "\n";
780:
781: return sb;
782: }
783:
784:
785:
786: Grammar::Grammar()
787: : startSymbol(NULL),
788: emptyString(LocString(HERE_SOURCELOC, "empty"),
789: true /*isEmptyString*/),
790: targetLang("C++"),
791: useGCDefaults(false),
792: defaultMergeAborts(false),
793: expectedSR(-1),
794: expectedRR(-1),
795: expectedUNRNonterms(-1),
796: expectedUNRTerms(-1)
797: {}
798:
799:
800: Grammar::~Grammar()
801: {}
802:
803:
804: void Grammar::xfer(Flatten &flat)
805: {
806:
807: flat.checkpoint(0xC7AB4D86);
808: xferObjList(flat, nonterminals);
809: xferObjList(flat, terminals);
810: xferObjList(flat, productions);
811:
812:
813:
814: xferObjList(flat, verbatim);
815:
816: actionClassName.xfer(flat);
817: xferObjList(flat, actionClasses);
818:
819: xferObjList(flat, implVerbatim);
820:
821: targetLang.xfer(flat);
822: flat.xferBool(useGCDefaults);
823: flat.xferBool(defaultMergeAborts);
824:
825: flat.xferInt(expectedSR);
826: flat.xferInt(expectedRR);
827: flat.xferInt(expectedUNRNonterms);
828: flat.xferInt(expectedUNRTerms);
829:
830:
831: flat.checkpoint(0x8580AAD2);
832:
833: MUTATE_EACH_OBJLIST(Nonterminal, nonterminals, nt) {
834: nt.data()->xferSerfs(flat, *this);
835: }
836: MUTATE_EACH_OBJLIST(Production, productions, p) {
837: p.data()->xferSerfs(flat, *this);
838: }
839:
840: xferSerfPtrToList(flat, startSymbol, nonterminals);
841:
842: flat.checkpoint(0x2874DB95);
843: }
844:
845:
846: int Grammar::numTerminals() const
847: {
848: return terminals.count();
849: }
850:
851: int Grammar::numNonterminals() const
852: {
853:
854: return nonterminals.count() + 1;
855: }
856:
857:
858: void Grammar::printSymbolTypes(std::ostream &os) const
859: {
860: os << "Grammar terminals with types or precedence:\n";
861: FOREACH_OBJLIST(Terminal, terminals, term) {
862: Terminal const &t = *(term.data());
863: t.printDDM(os);
864: if (t.precedence) {
865: os << " " << t.name << " " << ::toString(t.associativity)
866: << " %prec " << t.precedence << std::endl;
867: }
868: }
869:
870: os << "Grammar nonterminals with types:\n";
871: FOREACH_OBJLIST(Nonterminal, nonterminals, nt) {
872: nt.data()->printDDM(os);
873: }
874: }
875:
876:
877: void Grammar::printProductions(std::ostream &os, bool code) const
878: {
879: os << "Grammar productions:\n";
880: for (ObjListIter<Production> iter(productions);
881: !iter.isDone(); iter.adv()) {
882: os << " " << iter.data()->toStringMore(code);
883: }
884: }
885:
886:
887:
888: void Grammar::addProduction(Nonterminal *lhs, Symbol *firstRhs, ...)
889: {
890: va_list argptr;
891: Symbol *arg;
892: va_start(argptr, firstRhs);
893:
894: Production *prod = new Production(lhs, NULL /*tag*/);
895: prod->append(firstRhs, NULL /*tag*/);
896: for(;;) {
897: arg = va_arg(argptr, Symbol*);
898: if (arg == NULL) {
899: break;
900: }
901:
902: prod->append(arg, NULL /*tag*/);
903: }
904:
905: addProduction(prod);
906: }
907:
908:
909:
910: void Grammar::addProduction(Production *prod)
911: {
912:
913:
914:
915: prod->prodIndex = productions.count();
916: productions.append(prod);
917:
918:
919:
920:
921: if (startSymbol == NULL) {
922: startSymbol = prod->left;
923: }
924: }
925:
926:
927:
928: bool Grammar::declareToken(LocString const &symbolName, int code,
929: LocString const &alias)
930: {
931:
932: if (findSymbolC(symbolName)) {
933: std::cout << "token " << symbolName << " has already been declared\n";
934: return false;
935: }
936:
937:
938: Terminal *term = getOrMakeTerminal(symbolName);
939:
940:
941: term->termIndex = code;
942: term->alias = alias;
943:
944: return true;
945: }
946:
947:
948:
949: void Grammar::checkWellFormed() const
950: {
951:
952: }
953:
954:
955:
956: sm_string bisonTokenName(Terminal const *t)
957: {
958:
959:
960:
961:
962: return sm_string(t->name.str);
963: }
964:
965:
966: void Grammar::printAsBison(std::ostream &os) const
967: {
968: os << "/* automatically generated grammar */\n\n";
969:
970: os << "/* -------- tokens -------- */\n";
971: FOREACH_TERMINAL(terminals, term) {
972:
973:
974: os << "%token " << bisonTokenName(term.data()) << " "
975: << term.data()->termIndex << "\n";
976: }
977: os << "\n\n";
978:
979: os << "/* -------- precedence and associativity ---------*/\n"
980: "/* low precedence */\n";
981: {
982:
983: int highMark=0;
984: FOREACH_TERMINAL(terminals, iter) {
985: highMark = max(iter.data()->precedence, highMark);
986: }
987:
988:
989:
990: static char const * const kindMap[NUM_ASSOC_KINDS] =
991: { "%left", "%right", "%nonassoc", "%nonassoc", "%nonassoc" };
992:
993:
994:
995: for (int level=1; level <= highMark; level++) {
996: AssocKind kind = NUM_ASSOC_KINDS;
997: FOREACH_TERMINAL(terminals, iter) {
998: Terminal const *t = iter.data();
999:
1000: if (t->precedence == level) {
1001: if (kind == NUM_ASSOC_KINDS) {
1002:
1003: kind = t->associativity;
1004: os << kindMap[kind];
1005: }
1006: else if (kind != t->associativity) {
1007: xfailure("different associativities at same precedence?!");
1008: }
1009:
1010:
1011: os << " " << bisonTokenName(t);
1012: }
1013: }
1014:
1015:
1016: os << "\n";
1017: }
1018: }
1019: os << "/* high precedence */\n"
1020: "\n\n";
1021:
1022: os << "/* -------- productions ------ */\n"
1023: "%%\n\n";
1024:
1025: FOREACH_NONTERMINAL(nonterminals, nt) {
1026:
1027: bool first = true;
1028: FOREACH_PRODUCTION(productions, prod) {
1029: if (prod.data()->left == nt.data()) {
1030:
1031: if (first) {
1032: os << nt.data()->name << ":";
1033: }
1034: else {
1035: os << "\n";
1036: INTLOOP(i, 0, nt.data()->name.length()) {
1037: os << " ";
1038: }
1039: os << "|";
1040: }
1041:
1042:
1043: FOREACH_OBJLIST(Production::RHSElt, prod.data()->right, symIter) {
1044: Symbol const *sym = symIter.data()->sym;
1045: if (sym != &emptyString) {
1046: if (sym->isTerminal()) {
1047: os << " " << bisonTokenName(&( sym->asTerminalC() ));
1048: }
1049: else {
1050: os << " " << sym->name;
1051: }
1052: }
1053: }
1054:
1055:
1056: if (prod.data()->rhsLength() == 0) {
1057: os << " /* empty */";
1058: }
1059:
1060:
1061: if (prod.data()->precedence) {
1062:
1063: bool found=false;
1064: FOREACH_TERMINAL(terminals, iter) {
1065: if (iter.data()->precedence == prod.data()->precedence) {
1066:
1067: os << " %prec " << bisonTokenName(iter.data());
1068: found = true;
1069: break;
1070: }
1071: }
1072: if (!found) {
1073: std::cout << "warning: cannot find token for precedence level "
1074: << prod.data()->precedence << std::endl;
1075: os << " /* no token precedence level "/* */
1076: << prod.data()->precedence << " */";
1077: }
1078: }
1079:
1080:
1081: os << " { $$=" << prod.data()->prodIndex << "; }";
1082:
1083: first = false;
1084: }
1085: }
1086:
1087: if (first) {
1088:
1089: os << "/* no rules for " << nt.data()->name << " */";
1090: }
1091: else {
1092:
1093: os << "\n";
1094: INTLOOP(i, 0, nt.data()->name.length()) {
1095: os << " ";
1096: }
1097: os << ";";
1098: }
1099:
1100: os << "\n\n";
1101: }
1102: }
1103:
1104:
1105:
1106:
1107: Nonterminal const *Grammar::findNonterminalC(char const *name) const
1108: {
1109:
1110: if (emptyString.name.equals(name)) {
1111: return &emptyString;
1112: }
1113:
1114: FOREACH_NONTERMINAL(nonterminals, iter) {
1115: if (iter.data()->name.equals(name)) {
1116: return iter.data();
1117: }
1118: }
1119: return NULL;
1120: }
1121:
1122:
1123: Terminal const *Grammar::findTerminalC(char const *name) const
1124: {
1125: FOREACH_TERMINAL(terminals, iter) {
1126: if (iter.data()->name.equals(name) ||
1127: iter.data()->alias.equals(name)) {
1128: return iter.data();
1129: }
1130: }
1131: return NULL;
1132: }
1133:
1134:
1135: Symbol const *Grammar::findSymbolC(char const *name) const
1136: {
1137:
1138: Nonterminal const *nt = findNonterminalC(name);
1139: if (nt) {
1140: return nt;
1141: }
1142:
1143:
1144: return findTerminalC(name);
1145: }
1146:
1147:
1148:
1149: Nonterminal *Grammar::getOrMakeNonterminal(LocString const &name)
1150: {
1151: Nonterminal *nt = findNonterminal(name);
1152: if (nt != NULL) {
1153: return nt;
1154: }
1155:
1156: nt = new Nonterminal(name);
1157: nonterminals.append(nt);
1158: return nt;
1159: }
1160:
1161: Terminal *Grammar::getOrMakeTerminal(LocString const &name)
1162: {
1163: Terminal *term = findTerminal(name);
1164: if (term != NULL) {
1165: return term;
1166: }
1167:
1168: term = new Terminal(name);
1169: terminals.append(term);
1170: return term;
1171: }
1172:
1173: Symbol *Grammar::getOrMakeSymbol(LocString const &name)
1174: {
1175: Symbol *sym = findSymbol(name);
1176: if (sym != NULL) {
1177: return sym;
1178: }
1179:
1180:
1181:
1182:
1183:
1184: if (isupper(name[0])) {
1185: return getOrMakeNonterminal(name);
1186: }
1187: else {
1188: return getOrMakeTerminal(name);
1189: }
1190: }
1191:
1192:
1193: int Grammar::getProductionIndex(Production const *prod) const
1194: {
1195: int ret = productions.indexOf(prod);
1196: xassert(ret != -1);
1197: return ret;
1198: }
1199:
1200:
1201: sm_string symbolSequenceToString(SymbolList const &list)
1202: {
1203: sm_stringBuilder sb;
1204:
1205: bool first = true;
1206: SFOREACH_SYMBOL(list, sym) {
1207: if (!first) {
1208: sb << " ";
1209: }
1210:
1211: if (sym.data()->isTerminal()) {
1212: sb << sym.data()->asTerminalC().toString();
1213: }
1214: else {
1215: sb << sym.data()->name;
1216: }
1217: first = false;
1218: }
1219:
1220: return sb;
1221: }
1222:
1223:
1224: sm_string terminalSequenceToString(TerminalList const &list)
1225: {
1226:
1227: return symbolSequenceToString(reinterpret_cast<SymbolList const&>(list));
1228: }
1229:
1230:
1231:
1232:
1233: void Grammar::emitSelfCC(std::ostream &os) const
1234: {
1235: os << "void buildGrammar(Grammar *g)\n"
1236: "{\n";
1237:
1238: FOREACH_OBJLIST(Terminal, terminals, termIter) {
1239: Terminal const *term = termIter.data();
1240:
1241: os << "g->declareToken(" << term->name
1242: << ", " << term->termIndex
1243: << ", " << quoted(term->alias)
1244: << ");\n";
1245: }
1246:
1247: FOREACH_OBJLIST(Nonterminal, nonterminals, ntIter) {
1248: Nonterminal const *nt = ntIter.data();
1249:
1250: os << ...
1251: }
1252:
1253: os << "}\n";
1254:
1255:
1256: }
1257:
Start C section to elk/elk_grammar.h[1
/1
]
1: #line 3594 "./lpsrc/elk.pak"
2: // grammar.h see license.txt for copyright and terms of use
3: // representation and algorithms for context-free grammars
4:
5: // Author: Scott McPeak, April 2000
6:
7: // Unfortunately, representation and algorithm tend to get
8: // mixed together. Separating them entirely is possible,
9: // but syntactically inconvenient. So, instead, I try to
10: // document the separation in comments. Specifically,
11: // sections beginning with ---- representation ---- are data
12: // for representation of the underlying concept, while
13: // sections with ---- annotation ---- are data created by
14: // algorithms manipulating the data.
15:
16: // Another measure is I've split all grammar-wide algorithm
17: // stuff into GrammarAnalysis (gramanl.h). Things should
18: // only be put into Grammar if they are directly related
19: // to the grammar representation. (However, constitutent
20: // objects like Production will continue to be a mix.)
21:
22: #ifndef __GRAMMAR_H
23: #define __GRAMMAR_H
24:
25: #include <iostream> // std::ostream
26:
27: #include "sm_str.h"
28: #include "sm_objlist.h"
29: #include "sm_sobjlist.h"
30: #include "elk_util.h"
31: #include "ast_locstr.h"
32: #include "sm_strobjdict.h"
33: #include "sm_owner.h"
34: #include "elk_asockind.h"
35:
36: class StrtokParse; // strtokp.h
37:
38: // fwds defined below
39: class Symbol;
40: class Terminal;
41: class Nonterminal;
42: class Production;
43: class DottedProduction;
44: class Grammar;
45:
46: // transitional definitions
47: typedef StringObjDict<LocString> LitCodeDict;
48: typedef LocString LiteralCode;
49:
50:
51: // everywhere in the Grammar specification we have a StringRef, it
52: // refers to this sm_string table
53: extern StringTable grammarStringTable;
54:
55:
56: // ---------------- Symbol --------------------
57: // either a nonterminal or terminal symbol
58: class Symbol {
59: // ------ representation ------
60: public:
61: LocString const name; // symbol's name in grammar
62: bool const isTerm; // true: terminal (only on right-hand sides of productions)
63: // false: nonterminal (can appear on left-hand sides)
64: bool const isEmptyString; // true only for the emptyString nonterminal
65:
66: StringRef type; // C type of semantic value
67:
68: StringRef dupParam; // name of parameter to 'dup'
69: LocString dupCode; // code to duplicate a semantic value
70:
71: StringRef delParam; // param name; may be NULL to indicate not used
72: LocString delCode; // code
73:
74: // ----------- annotation ------------
75: public:
76: bool reachable; // computed by constructLRItemSets; true when nonterminal reachable from start symbol
77:
78: protected: // funcs
79: virtual void internalPrintDDM(std::ostream &os) const;
80:
81: public: // funcs
82: Symbol(LocString const &n, bool t, bool e = false);
83: virtual ~Symbol();
84:
85: Symbol(Flatten&);
86: void xfer(Flatten &flat);
87:
88: // symmetric selectors
89: bool isTerminal() const { return isTerm; }
90: bool isNonterminal() const { return !isTerm; }
91:
92: // both terminals and nonterminals have ids; this gets the
93: // id for whichever kind this object happens to be
94: int getTermOrNontermIndex() const;
95:
96: // casting
97: Terminal const &asTerminalC() const; // checks 'isTerminal' for cast safety
98: Terminal &asTerminal()
99: { return const_cast<Terminal&>(asTerminalC()); }
100:
101: Nonterminal const &asNonterminalC() const;
102: Nonterminal &asNonterminal()
103: { return const_cast<Nonterminal&>(asNonterminalC()); }
104:
105: // cast or NULL
106: Terminal const *ifTerminalC() const;
107: Terminal *ifTerminal()
108: { return const_cast<Terminal*>(ifTerminalC()); }
109:
110: Nonterminal const *ifNonterminalC() const;
111: Nonterminal *ifNonterminal()
112: { return const_cast<Nonterminal*>(ifNonterminalC()); }
113:
114: // debugging
115: // print as '$name: isTerminal=$isTerminal' (no newline)
116: virtual void print(std::ostream &os) const;
117: OSTREAM_OPERATOR(Symbol)
118:
119: // print 'token[type] name { dup.. del.. merge.. }' (with newlines)
120: void printDDM(std::ostream &os) const;
121:
122: // true if any of the handlers were specified
123: virtual bool anyDDM() const;
124:
125: virtual sm_string toString() const { return sm_string(name); }
126: };
127:
128: // I have several needs for serf lists of symbols, so let's use this for now
129: typedef SObjList<Symbol> SymbolList;
130: typedef SObjListIter<Symbol> SymbolListIter;
131: typedef SObjListMutator<Symbol> SymbolListMutator;
132:
133: #define FOREACH_SYMBOL(list, iter) FOREACH_OBJLIST(Symbol, list, iter)
134: #define MUTATE_EACH_SYMBOL(list, iter) MUTATE_EACH_OBJLIST(Symbol, list, iter)
135: #define SFOREACH_SYMBOL(list, iter) SFOREACH_OBJLIST(Symbol, list, iter)
136: #define SMUTATE_EACH_SYMBOL(list, iter) SMUTATE_EACH_OBJLIST(Symbol, list, iter)
137:
138: // format: "s1 s2 s3"
139: sm_string symbolSequenceToString(SymbolList const &list);
140:
141:
142: // ---------------- Terminal --------------------
143: // something that only appears on the right-hand side of
144: // productions, and is an element of the source language
145: // NOTE: This is really a terminal *class*, in that it's possible
146: // for several different tokens to be classified into the same
147: // terminal class (e.g. "foo" and "bar" are both identifiers)
148: class Terminal : public Symbol {
149: // -------- representation ---------
150: public: // data
151: // whereas 'name' is the canonical name for the terminal class,
152: // this field is an alias; for example, if the canonical name is
153: // L2_EQUALEQUAL, the alias might be "=="; the alias should *not*
154: // include actual double-quote characters
155: // if the alias is "", there is no alias
156: LocString alias;
157:
158: // parsgen-time conflict resolution: if a shift/reduce conflict
159: // occurs between a production and a symbol, both with specified
160: // precedence (not 0), then the one with the numerically higher
161: // precedence will be used
162: int precedence;
163:
164: // if, in the above scenario, the precedence values are the same,
165: // then the associativity kind will be used to decide which to use
166: AssocKind associativity;
167:
168: StringRef classifyParam; // name of parameter to 'classify'
169: LocString classifyCode; // code to reclassify a token type
170:
171: // ------ annotation ------
172: public: // data
173: // terminal class index - this terminal's id; -1 means unassigned
174: int termIndex;
175:
176: protected: // funcs
177: virtual void internalPrintDDM(std::ostream &os) const;
178:
179: public: // funcs
180: Terminal(LocString const &name) // canonical name for terminal class
181: : Symbol(name, true /*terminal*/),
182: alias(),
183: precedence(0),
184: associativity(AK_NONASSOC),
185: classifyParam(NULL),
186: termIndex(-1)
187: {}
188:
189: Terminal(Flatten &flat);
190: void xfer(Flatten &flat);
191:
192: virtual void print(std::ostream &os) const;
193: OSTREAM_OPERATOR(Terminal)
194:
195: virtual bool anyDDM() const;
196:
197: // return alias if defined, name otherwise
198: virtual sm_string toString(bool quoteAliases = false) const;
199: };
200:
201: typedef SObjList<Terminal> TerminalList;
202: typedef SObjListIter<Terminal> TerminalListIter;
203:
204: #define FOREACH_TERMINAL(list, iter) FOREACH_OBJLIST(Terminal, list, iter)
205: #define MUTATE_EACH_TERMINAL(list, iter) MUTATE_EACH_OBJLIST(Terminal, list, iter)
206: #define SFOREACH_TERMINAL(list, iter) SFOREACH_OBJLIST(Terminal, list, iter)
207: #define SMUTATE_EACH_TERMINAL(list, iter) SMUTATE_EACH_OBJLIST(Terminal, list, iter)
208:
209: // casting aggregates
210: inline ObjList<Symbol> const &toObjList(ObjList<Terminal> const &list)
211: { return reinterpret_cast< ObjList<Symbol>const& >(list); }
212:
213: // format: "t1 t2 t3"
214: sm_string terminalSequenceToString(TerminalList const &list);
215:
216:
217: // ----------------- TerminalSet -------------------
218: // used for the lookahead sets of LR items, and for the First()
219: // sets of production RHSs
220: class TerminalSet {
221: private: // data
222: unsigned char *bitmap; // (owner) bitmap of terminals, indexed by
223: // terminal id; lsb of byte 0 is index 0
224: int bitmapLen; // # of bytes in 'bitmap'
225:
226: public: // data
227: // printing customization: when non-NULL only print tokens if
228: // it includes this token, and then *only* print this one
229: static Terminal const *suppressExcept;
230:
231: private: // funcs
232: void init(int numTerms);
233: unsigned char *getByte(int terminalId) const;
234: int getBit(int terminalId) const
235: { return ((unsigned)terminalId % 8); }
236:
237: public: // funcs
238: TerminalSet(int numTerms=0); // allocate new set, initially empty
239: TerminalSet(TerminalSet const &obj);
240: ~TerminalSet();
241:
242: TerminalSet& operator= (TerminalSet const &obj)
243: { copy(obj); return *this; }
244:
245: TerminalSet(Flatten&);
246: void xfer(Flatten &flat);
247:
248: // call this to re-allocate at a new size; set is emptied
249: void reset(int numTerms);
250:
251: // true when the # of symbols is 0; an unfinished state
252: bool nullMap() const { return bitmap==NULL; }
253:
254: bool contains(int terminalId) const;
255:
256: // NOTE: can only compare dotted productions which have the
257: // same number of symbols (assertion fail otherwise)
258: bool isEqual(TerminalSet const &obj) const;
259:
260: void add(int terminalId);
261: void remove(int terminalId);
262: void clear();
263:
264: void copy(TerminalSet const &obj); // lengths must be the same
265: bool merge(TerminalSet const &obj); // union; returns true if merging changed set
266:
267: void print(std::ostream &os, Grammar const &g) const;
268: };
269:
270:
271: // ---------------- Nonterminal --------------------
272: // something that can appear on the left-hand side of a production
273: // (or, emptyString, since we classify that as a nonterminal also)
274: class Nonterminal : public Symbol {
275: // ---------- representation --------
276: public:
277: StringRef mergeParam1; // param name for first alternative
278: StringRef mergeParam2; // and 2nd alt
279: LocString mergeCode; // code to resolve then
280:
281: StringRef keepParam; // name of parameter to 'keep'
282: LocString keepCode; // code to decide whether to keep a reduction
283:
284: bool maximal; // if true, use maximal munch disambiguation
285:
286: SObjList<Nonterminal> subsets; // preferred subsets (for scannerless)
287:
288: protected: // funcs
289: virtual void internalPrintDDM(std::ostream &os) const;
290:
291: public: // funcs
292: Nonterminal(LocString const &name, bool isEmptyString=false);
293: virtual ~Nonterminal();
294:
295: Nonterminal(Flatten &flat);
296: void xfer(Flatten &flat);
297: void xferSerfs(Flatten &flat, Grammar &g);
298:
299: virtual void print(std::ostream &os, Grammar const *grammer = NULL) const;
300: OSTREAM_OPERATOR(Nonterminal)
301:
302: virtual bool anyDDM() const;
303:
304: // ------ annotation ------
305: public: // data
306: int ntIndex; // nonterminal index; see Grammar::computeWhatCanDeriveWhat
307: bool cyclic; // true if this can derive itself in 1 or more steps
308: TerminalSet first; // set of terminals that can be start of a sm_string derived from 'this'
309: TerminalSet follow; // set of terminals that can follow a sm_string derived from 'this'
310: Nonterminal *superset; // inverse of 'subsets'
311: };
312:
313: typedef SObjList<Nonterminal> NonterminalList;
314: typedef SObjListIter<Nonterminal> NonterminalListIter;
315:
316: #define FOREACH_NONTERMINAL(list, iter) FOREACH_OBJLIST(Nonterminal, list, iter)
317: #define MUTATE_EACH_NONTERMINAL(list, iter) MUTATE_EACH_OBJLIST(Nonterminal, list, iter)
318: #define SFOREACH_NONTERMINAL(list, iter) SFOREACH_OBJLIST(Nonterminal, list, iter)
319: #define SMUTATE_EACH_NONTERMINAL(list, iter) SMUTATE_EACH_OBJLIST(Nonterminal, list, iter)
320:
321: // casting aggregates
322: inline ObjList<Symbol> const &toObjList(ObjList<Nonterminal> const &list)
323: { return reinterpret_cast< ObjList<Symbol>const& >(list); }
324:
325:
326: // ---------------- Production --------------------
327: // a rewrite rule
328: class Production {
329: // ------ representation ------
330: public: // types
331: class RHSElt {
332: public:
333: Symbol *sym; // (serf) rhs element symbol
334:
335: // tags applied to the symbols for purposes of unambiguous naming in
336: // actions, and for self-commenting value as role indicators; an
337: // empty tag ("") is allowed and means there is no tag
338: LocString tag; // tag for this symbol; can be ""
339:
340: public:
341: RHSElt(Symbol *s, LocString const &t) : sym(s), tag(t) {}
342: ~RHSElt();
343:
344: RHSElt(Flatten&);
345: void xfer(Flatten &flat);
346: void xferSerfs(Flatten &flat, Grammar &g);
347: };
348:
349: public: // data
350: // fundamental context-free grammar (CFG) component
351: Nonterminal * const left; // (serf) left hand side; must be nonterminal
352: ObjList<RHSElt> right; // right hand side; terminals & nonterminals
353: int precedence; // precedence level for disambiguation (0 for none specified)
354:
355: // user-supplied reduction action code
356: LocString action;
357:
358: private: // funcs
359: void computeDerived();
360:
361: public: // funcs
362: Production(Nonterminal *left, char const *leftTag);
363: ~Production();
364:
365: Production(Flatten &flat);
366: void xfer(Flatten &flat);
367: void xferSerfs(Flatten &flat, Grammar &g);
368:
369: // length *not* including emptySymbol, if present
370: // UPDATE: I'm now disallowing emptySymbol from ever appearing in 'right'
371: int rhsLength() const { return rhsLen; }
372:
373: // number of nonterminals on RHS
374: int numRHSNonterminals() const;
375:
376: // true if the given symbol appears in 'right'
377: bool rhsHasSymbol(Symbol const *sym) const;
378:
379: // retrieve the RHS as a list of symbols, rather than as a list of RHSElts
380: void getRHSSymbols(SymbolList &output) const;
381:
382: // append a RHS symbol
383: void append(Symbol *sym, LocString const &tag);
384:
385: // call this when production is built, so it can compute annotations
386: // (this is called by GrammarAnalysis::initializeAuxData, from
387: // inside runAnalyses)
388: void finished(int numTerms);
389:
390: // find a symbol by tag; returns 1 for first RHS symbol, 2 for
391: // second, etc.; returns -1 if the tag doesn't match anything
392: int findTag(StringRef tag) const;
393:
394: // given an index as returned by 'findTaggedSymbol', translate that
395: // back into a tag
396: sm_string symbolTag(int symbolIndex) const;
397:
398: // or translate a symbol index into a symbol
399: Symbol const *symbolByIndexC(int symbolIndex) const;
400: Symbol *symbolByIndex(int symbolIndex)
401: { return const_cast<Symbol*>(symbolByIndexC(symbolIndex)); }
402:
403: #if 0
404: // retrieve an item
405: DottedProduction const *getDProdC(int dotPlace) const;
406: DottedProduction *getDProd(int dotPlace)
407: { return const_cast<DottedProduction*>(getDProdC(dotPlace)); }
408: #endif // 0
409:
410: // print 'A -> B c D' (no newline)
411: sm_string toString(bool printType = true, bool printIndex = true) const;
412:
413: // this one prints 'B c D' for above example rule
414: sm_string rhsString(bool printTags = true, bool quoteAliases = false) const;
415:
416: void print(std::ostream &os) const;
417: OSTREAM_OPERATOR(Production)
418:
419: // print entire input syntax, with newlines, e.g.
420: // A -> B c D { return foo; }
421: sm_string toStringMore(bool printCode) const;
422:
423: // ------ annotation ------
424: private: // data
425: int rhsLen; // right.count()
426:
427: public: // data
428: int prodIndex; // unique production id
429: TerminalSet firstSet; // First(RHS); computed by GrammarAnalysis::computeFirst
430: };
431:
432: typedef SObjList<Production> ProductionList;
433: typedef SObjListIter<Production> ProductionListIter;
434:
435: #define FOREACH_PRODUCTION(list, iter) FOREACH_OBJLIST(Production, list, iter)
436: #define MUTATE_EACH_PRODUCTION(list, iter) MUTATE_EACH_OBJLIST(Production, list, iter)
437: #define SFOREACH_PRODUCTION(list, iter) SFOREACH_OBJLIST(Production, list, iter)
438: #define SMUTATE_EACH_PRODUCTION(list, iter) SMUTATE_EACH_OBJLIST(Production, list, iter)
439:
440: typedef ObjList<Production::RHSElt> RHSEltList;
441: typedef ObjListIter<Production::RHSElt> RHSEltListIter;
442: typedef ObjListMutator<Production::RHSElt> RHSEltListMutator;
443:
444:
445: // ---------------- Grammar --------------------
446: // represent a grammar: nonterminals, terminals, productions, and start-symbol
447: class Grammar {
448: // ------ representation ------
449: public: // data
450: ObjList<Nonterminal> nonterminals; // (owner list)
451: ObjList<Terminal> terminals; // (owner list)
452: ObjList<Production> productions; // (owner list)
453: Nonterminal *startSymbol; // (serf) a particular nonterminal
454:
455: // the special terminal for the empty sm_string; does not appear in the
456: // list of nonterminals or terminals for a grammar, but can be
457: // referenced by productions, etc.; the decision to explicitly have
458: // such a symbol, instead of letting it always be implicit, is
459: // motivated by things like the derivability relation, where it's
460: // nice to treat empty like any other symbol
461: Nonterminal emptyString;
462:
463: // sections of verbatim code emitted into the interface file, before
464: // the parser context class body
465: ObjList<LocString> verbatim;
466:
467: // name of the class into which the action functions are placed
468: LocString actionClassName;
469:
470: // verbatim action class declaration, and additional codes from
471: // extension modules to append to it (but see note of 11/13/04
472: // in grampar.cc)
473: ObjList<LocString> actionClasses;
474:
475: // code emitted into the implementation file at the end
476: ObjList<LocString> implVerbatim;
477:
478: // ---- declarative options ----
479: // name of the target language; nominally "C++"
480: sm_string targetLang;
481:
482: // when true, the default dup/del is what's expected for a
483: // garbage-collected system: dup() is the identity function,
484: // and del() is a no-op
485: bool useGCDefaults;
486:
487: // when true, unspecified merge() functions abort()
488: bool defaultMergeAborts;
489:
490: // expected numbers of various anomalies; -1 means no
491: // expectation has been supplied; this informtion is used
492: // to control what is reported after grammar analysis
493: int expectedSR; // shift/reduce conflicts
494: int expectedRR; // reduce/reduce conflicts
495: int expectedUNRNonterms; // # unreachable nonterminals
496: int expectedUNRTerms; // # unreachable terminals
497:
498: public: // funcs
499: Grammar(); // set everything manually
500: ~Grammar();
501:
502: // read/write as binary file
503: void xfer(Flatten &flat);
504:
505: // simple queries
506: int numTerminals() const;
507: int numNonterminals() const;
508:
509:
510: // ---- building a grammar ----
511: // declare a new token exists, with name and optional alias;
512: // return false if it's already declared
513: bool declareToken(LocString const &symbolName, int code,
514: LocString const &alias);
515:
516: // add a new production; the rhs arg list must be terminated with a NULL
517: //void addProduction(Nonterminal *lhs, Symbol *rhs, ...);
518:
519: // add a pre-constructed production
520: void addProduction(Production *prod);
521:
522: // ---------- outputting a grammar --------------
523: // print the list of symbols with type annotations
524: void printSymbolTypes(std::ostream &os) const;
525:
526: // print the current list of productions
527: void printProductions(std::ostream &os, bool printCode=true) const;
528:
529: // emit C++ code to construct this grammar later
530: void emitSelfCC(std::ostream &os) const;
531:
532: // ---- whole-grammar stuff ----
533: // after adding all rules, check that all nonterminals have
534: // at least one rule; also checks referential integrity
535: // in actions and conditions; throw exception if there is a
536: // problem
537: void checkWellFormed() const;
538:
539: // output grammar in Bison's syntax
540: // (coincidentally, when bison dumps its table with '-v', its table
541: // dump syntax is similar to my input syntax)
542: void printAsBison(std::ostream &os) const;
543:
544: // ---- symbol access ----
545: #define SYMBOL_ACCESS(Thing) \
546: /* retrieve, return NULL if not there */ \
547: Thing const *find##Thing##C(char const *name) const; \
548: Thing *find##Thing(char const *name) \
549: { return const_cast<Thing*>(find##Thing##C(name)); } \
550: \
551: /* retrieve, or create it if not already there */ \
552: Thing *getOrMake##Thing(LocString const &name);
553:
554: SYMBOL_ACCESS(Symbol) // findSymbolC, findSymbol, getOrMakeSymbol
555: SYMBOL_ACCESS(Terminal) // findTerminal{C,}, getOrMakeTerminal
556: SYMBOL_ACCESS(Nonterminal) // findNonterminal{C,}, getOrMakeNonterminal
557: #undef SYMBOL_ACCESS
558:
559: // map a production to a unique index
560: int getProductionIndex(Production const *prod) const;
561: };
562:
563:
564: #endif // __GRAMMAR_H
565:
Start C section to elk/elk_grampar.codes.h[1
/1
]
1: #line 4160 "./lpsrc/elk.pak"
2: # define BISON_GRAMPAR_TAB_H /* tweak */
3: # define YYSTYPE yystype
4: # define YYSTYPE_IS_TRIVIAL 1
5: # define TOK_INTEGER 257
6: # define TOK_NAME 258
7: # define TOK_STRING 259
8: # define TOK_LIT_CODE 260
9: # define TOK_LBRACE 261
10: # define TOK_RBRACE 262
11: # define TOK_COLON 263
12: # define TOK_SEMICOLON 264
13: # define TOK_ARROW 265
14: # define TOK_LPAREN 266
15: # define TOK_RPAREN 267
16: # define TOK_COMMA 268
17: # define TOK_TERMINALS 269
18: # define TOK_TOKEN 270
19: # define TOK_NONTERM 271
20: # define TOK_FUN 272
21: # define TOK_VERBATIM 273
22: # define TOK_IMPL_VERBATIM 274
23: # define TOK_PRECEDENCE 275
24: # define TOK_OPTION 276
25: # define TOK_EXPECT 277
26: # define TOK_CONTEXT_CLASS 278
27: # define TOK_SUBSETS 279
Start C section to elk/elk_grampar.h[1
/1
]
1: #line 4188 "./lpsrc/elk.pak"
2: // grampar.h see license.txt for copyright and terms of use
3: // declarations for bison-generated grammar parser
4:
5: #ifndef __GRAMPAR_H
6: #define __GRAMPAR_H
7:
8: #include "sm_typ.h"
9: #include "sm_sobjlist.h"
10: #include "sm_exc.h"
11: #include "sm_strsobjdict.h"
12: #include "ast_locstr.h"
13:
14: // linkdepend: grampar.tab.cc
15:
16: // fwd decl
17: class GrammarAST; // gramast.ast
18: class TF_nonterm; // gramast.ast
19: class GrammarLexer; // ../ast/gramlex.h
20: class StringTable; // strtable.h
21:
22:
23: // -------- rest of the program's view of parser ------------
24: // name of extra parameter to yyparse (i.e. the context in
25: // which the parser operates, instead of that being stored
26: // in some collection of globals)
27: #define YYPARSE_PARAM parseParam
28:
29: // type of thing extra param points at
30: struct ParseParams {
31: GrammarAST *treeTop; // set when parsing finishes; AST tree top
32: GrammarLexer &lexer; // lexer we're using
33:
34: public:
35: ParseParams(GrammarLexer &L) :
36: treeTop(NULL),
37: lexer(L)
38: {}
39: };
40:
41: // caller interface to Bison-generated parser; starts parsing
42: // (whatever stream lexer is reading) and returns 0 for success and
43: // 1 for error; the extra parameter is available to actions to use
44: int grampar_yyparse(void *YYPARSE_PARAM);
45:
46: // when this is set to true, bison parser emits info about
47: // actions as it's taking them (shared by all instances of
48: // bison-generated parsers in a given program)
49: extern int yydebug;
50:
51:
52: // ---------- Bison's view of the rest of the program --------
53: // Bison calls this to get each token; returns token code,
54: // or 0 for eof; semantic value for returned token can be
55: // put into '*lvalp'
56: // TODO: Paul Hilfinger reports there's a problem saying "union
57: // YYSTYPE"; he's using bison 1.34 I think, so I need to upgrade
58: // and see what the problem is (suspect my 'sed' pattern isn't
59: // matching, in the Makefile)
60: int grampar_yylex(union YYSTYPE *lvalp, void *parseParam);
61:
62: // error printer
63: void grampar_yyerror(char const *message, void *parseParam);
64:
65:
66: // ---------------- grampar's parsing structures ---------------
67: class Grammar; // fwd
68:
69: // while walking the AST, we do a kind of recursive evaluation
70: // to handle things like inherited actions and self-updating
71: // (eval'd at grammar parse time) action expressions
72: class Environment {
73: public: // data
74: // grammar we're playing with (stored here because it's
75: // more convenient than passing it to every fn separately)
76: Grammar &g;
77:
78: // env in which we're nested, if any
79: Environment *prevEnv; // (serf)
80:
81: // maps from a nonterminal name to its declaration, if that
82: // nonterminal has in fact been declared already
83: StringSObjDict<TF_nonterm /*const*/> nontermDecls;
84:
85: // count of recoverable errors; only the one in the
86: // topmost environment is used
87: int errorCount;
88:
89: // reference to the one we're really using
90: int &errors;
91:
92: public:
93: Environment(Grammar &G); // new env
94: Environment(Environment &prevEnv); // nested env
95: ~Environment();
96: };
97:
98:
99: // --------------- grampar's external interface -----------
100: // parse grammar file 'fname' into grammar 'g', throwing exceptions
101: // if there are problems
102: void readGrammarFile(Grammar &g, char const *fname);
103:
104: // just do the parsing stage
105: GrammarAST *parseGrammarFile(char const *fname, bool useML);
106:
107: // merge two grammar descriptions; neither argument is consumed,
108: // but subtrees of the 2nd argument get moved into the first tree
109: void mergeGrammar(GrammarAST *base, GrammarAST *ext);
110:
111: // GrammarAST -> Grammar
112: void parseGrammarAST(Grammar &g, GrammarAST *treeTop);
113:
114:
115: // thrown when there is an error parsing the AST
116: class XASTParse : public xBase {
117: public: // data
118: // token at or near failure
119: LocString failToken;
120:
121: // what is wrong
122: sm_string message;
123:
124: private: // funcs
125: static sm_string constructMsg(LocString const &tok, char const *msg);
126:
127: public: // funcs
128: XASTParse(LocString const &tok, char const *msg);
129: XASTParse(XASTParse const &obj);
130: ~XASTParse();
131: };
132:
133:
134: #endif // __GRAMPAR_H
Start C section to elk/elk_grampar.tab.h[1
/1
]
1: #line 4323 "./lpsrc/elk.pak"
2: #ifndef BISON_GRAMPAR_TAB_H /* tweak */
3: # define BISON_GRAMPAR_TAB_H
4:
5: #ifndef YYSTYPE
6: typedef union YYSTYPE {
7: int num;
8: LocString *str;
9:
10: ASTList<TopForm> *topFormList;
11: TopForm *topForm;
12:
13: ASTList<TermDecl> *termDecls;
14: TermDecl *termDecl;
15: ASTList<TermType> *termTypes;
16: TermType *termType;
17: ASTList<PrecSpec> *precSpecs;
18:
19: ASTList<SpecFunc> *specFuncs;
20: SpecFunc *specFunc;
21: ASTList<LocString> *sm_stringList;
22:
23: ASTList<ProdDecl> *prodDecls;
24: ProdDecl *prodDecl;
25: ASTList<RHSElt> *rhsList;
26: RHSElt *rhsElt;
27: } yystype;
28: # define YYSTYPE yystype
29: # define YYSTYPE_IS_TRIVIAL 1
30: #endif
31: # define TOK_INTEGER 257
32: # define TOK_NAME 258
33: # define TOK_STRING 259
34: # define TOK_LIT_CODE 260
35: # define TOK_LBRACE 261
36: # define TOK_RBRACE 262
37: # define TOK_COLON 263
38: # define TOK_SEMICOLON 264
39: # define TOK_ARROW 265
40: # define TOK_LPAREN 266
41: # define TOK_RPAREN 267
42: # define TOK_COMMA 268
43: # define TOK_TERMINALS 269
44: # define TOK_TOKEN 270
45: # define TOK_NONTERM 271
46: # define TOK_FUN 272
47: # define TOK_VERBATIM 273
48: # define TOK_IMPL_VERBATIM 274
49: # define TOK_PRECEDENCE 275
50: # define TOK_OPTION 276
51: # define TOK_EXPECT 277
52: # define TOK_CONTEXT_CLASS 278
53: # define TOK_SUBSETS 279
54:
55:
56: #endif /* not BISON_GRAMPAR_TAB_H */
Start C section to elk/elk_lexerint.h[1
/1
]
1: #line 4380 "./lpsrc/elk.pak"
2: // lexerint.h see license.txt for copyright and terms of use
3: // LexerInterface, the interface the GLR parser uses
4: // to access the lexer's token stream
5:
6: #ifndef LEXERINT_H
7: #define LEXERINT_H
8:
9: #include "elk_useract.h"
10: #include "sm_srcloc.h"
11: #include "sm_str.h"
12:
13: // This 'interface' is a collection of variables describing
14: // the current token. I don't use a bunch of pure-virtual
15: // functions because of the cost of calling them; everything
16: // here will be in the inner loop of the parser.
17: class LexerInterface {
18: public: // data
19: // NOTE: All of these fields are *written* by the lexer, and
20: // *read* by the parser.
21:
22: // token classification; this is what the parser will use to
23: // make parsing decisions; this code must correspond to something
24: // declared in the 'terminals' section of the grammar; when this
25: // is 0, it is the final (end-of-file) token; the parser is allowed
26: // to change this for its own purposes, and currently does so for
27: // token reclassification
28: int type;
29:
30: // semantic value; this is what will be passed to the reduction
31: // actions when this token is on the right hand side of a rule
32: SemanticValue sval;
33:
34: // source location of the token; this will only be used if the
35: // parser has been compiled to automatically propagate it
36: SourceLoc loc;
37:
38: public: // funcs
39: LexerInterface()
40: : type(0),
41: sval(0),
42: loc(SL_UNKNOWN)
43: {}
44: virtual ~LexerInterface() {}
45:
46:
47: // retrieve the next token; the lexer should respond by filling in
48: // the above fields with new values, to describe the next token; the
49: // lexer indicates end of file by putting 0 into 'type'; when the
50: // LexerInterface object is first passed to the parser, the above
51: // fields should already be set correctly (i.e. the parser will make
52: // its first call to 'nextToken' *after* processing the first token)
53: typedef void (*NextTokenFunc)(LexerInterface *);
54:
55: // get the function which we'll call to get the next token
56: //
57: // Why the two-step approach? Virtual method calls are more
58: // expensive than simple indirect function calls, and this happens
59: // in the inner parsing loop. If C++ had a way to explicitly cache
60: // the result of a method lookup this wouldn't be necessary.
61: virtual NextTokenFunc getTokenFunc() const=0;
62:
63:
64: // The following functions are called to help create diagnostic
65: // reports. They should describe the current token (the one
66: // which the above fields refer to) in more-or-less human-readable
67: // terms.
68:
69: // describe the token; for tokens with multiple spellings (e.g.
70: // identifiers), this should include the actual token spelling
71: // if possible; note that if the token has been reclassified,
72: // then the 'type' field above might have been changed by the
73: // parser, in which case this function should ideally print
74: // a description which takes the new type into account
75: virtual sm_string tokenDesc() const=0;
76:
77: // describe a token kind; this is different from tokenDesc(), since
78: // it need not correspond to the token kind that was just yielded,
79: // and hence any related lexeme data cannot be assumed to be
80: // available; this is used during error diagnosis
81: virtual sm_string tokenKindDesc(int kind) const=0;
82: };
83:
84: #endif // LEXERINT_H
Start C section to elk/elk_mlsstr.h[1
/1
]
1: #line 4465 "./lpsrc/elk.pak"
2: // mlsstr.h see license.txt for copyright and terms of use
3: // handles lexically embedded ML
4: // based on ccsstr.h
5:
6: #ifndef MLSSTR_H
7: #define MLSSTR_H
8:
9: #include "ast_embedded.h"
10:
11: class MLSubstrateTest;
12:
13: class MLSubstrate : public EmbeddedLang {
14: private:
15: enum State {
16: ST_NORMAL, // normal text
17: ST_STRING, // inside a sm_string literal
18: ST_CHAR, // inside a char literal
19: ST_COMMENT, // inside a comment
20: NUM_STATES
21: } state;
22: int nesting; // depth of paren/bracket/brace nesting
23: int comNesting; // depth of comment nesting (in ST_COMMENT)
24: char prev; // previous character
25:
26: // so test code can interrogate internal state
27: friend class MLSubstrateTest;
28:
29: public:
30: MLSubstrate(ReportError *err = NULL);
31: virtual ~MLSubstrate();
32:
33: // EmbeddedLang entry points (see gramlex.h for description
34: // of each function)
35: virtual void reset(int initNest = 0);
36: virtual void handle(char const *str, int len, char finalDelim);
37: virtual bool zeroNesting() const;
38: virtual sm_string getFuncBody() const;
39: virtual sm_string getDeclName() const;
40: };
41:
42: #endif // MLSSTR_H
Start C section to elk/elk_ownerspec.h[1
/1
]
1: #line 4508 "./lpsrc/elk.pak"
2: // ownerspec.h see license.txt for copyright and terms of use
3: // specification of "owner pointer", as a C++ template class
4:
5: // I made this as an experiment.. it's really part of the
6: // verifier project...
7: #error This is not intended to be used
8:
9: template <class T>
10: class OwnerPtr {
11: private:
12: T *ptr;
13:
14: enum State { OP_NULL, OP_DEAD, OP_OWNING };
15: State state;
16:
17: public:
18: OwnerPtr() : ptr(NULL), state(OP_NULL) {}
19:
20: OwnerPtr(T *src) : ptr(src), state(src? OP_OWNING : OP_NULL) {}
21:
22: OwnerPtr(OwnerPtr &src) {
23: ptr = src.ptr;
24: state = src.state;
25: src.state = OP_DEAD;
26: }
27:
28: ~OwnerPtr() {
29: assert(state != OP_OWNING);
30: }
31:
32: OwnerPtr& operator= (OwnerPtr &src) {
33: if (this != &src) {
34: assert(state != OP_OWNING);
35: ptr = src.ptr;
36: state = src.state;
37: src.state = OP_DEAD;
38: }
39: return *this;
40: }
41:
42: OwnerPtr& operator= (T *src) {
43: assert(state != OP_OWNING);
44: ptr = src;
45: state = src? OP_OWNING : OP_NULL;
46: return *this;
47: }
48:
49: bool operator== (T *p) {
50: assert(state != OP_DEAD);
51: return ptr == p;
52: }
53:
54: // yield serf for possible further use
55: operator T* () {
56: assert(state != OP_DEAD);
57: return ptr;
58: }
59:
60: // use directly
61: T& operator* () {
62: assert(state == OP_OWNING);
63: return *ptr;
64: }
65: T* operator-> () {
66: assert(state == OP_OWNING);
67: return ptr;
68: }
69: };
70:
71:
72:
73:
74:
75:
Start C section to elk/elk_parsetables.h[1
/1
]
1: #line 4584 "./lpsrc/elk.pak"
2: // parsetables.h see license.txt for copyright and terms of use
3: // ParseTables, a class to contain the tables need by the
4: // LR/GLR parsing algorithm
5:
6: #ifndef PARSETABLES_H
7: #define PARSETABLES_H
8:
9: #include "sm_array.h"
10: #include "elk_glrconfig.h"
11: #include <iostream> // std::ostream
12:
13: class Flatten; // flatten.h
14: class EmitCode; // emitcode.h
15: class Symbol; // grammar.h
16: class Bit2d; // bit2d.h
17:
18: class ELK_EXTERN ParseTables;
19:
20: // integer id for an item-set DFA state; I'm using an 'enum' to
21: // prevent any other integers from silently flowing into it
22: enum StateId { STATE_INVALID=-1 };
23:
24: inline std::ostream& operator<< (std::ostream &os, StateId id)
25: { return os << (int)id; }
26:
27:
28: // encodes an action in 'action' table; see 'actionTable'
29: #if ENABLE_CRS_COMPRESSION
30: // high bits encoding
31: enum ActionEntryKind {
32: AE_MASK = 0xC0, // selection mask
33: AE_SHIFT = 0x00, // 00 = shift
34: AE_REDUCE = 0x40, // 01 = reduce
35: AE_AMBIGUOUS = 0x80, // 10 = ambiguous
36: AE_ERROR = 0xC0, // 11 = error (if EEF is off)
37: AE_MAXINDEX = 63 // maximum value of lower bits
38: };
39:
40: // remaining 6 bits:
41: //
42: // shift: desination state, encoded as an offset from the
43: // first state that that terminal can reach
44: //
45: // reduce: production, encoded as an index into a per-state
46: // array of distinct production indices
47: //
48: // ambiguous: for each state, have an array of ActionEntries.
49: // ambiguous entries index into this array. first indexed
50: // entry is the count of how many actions follow
51: typedef unsigned char ActionEntry;
52: ActionEntry makeAE(ActionEntryKind k, int index);
53: #define errorActionEntry ((ActionEntry)AE_ERROR)
54: #else
55: // each entry is one of:
56: // +N+1, 0 <= N < numStates: shift, and go to state N
57: // -N-1, 0 <= N < numProds: reduce using production N
58: // numStates+N+1, 0 <= N < numAmbig: ambiguous, use ambigAction N
59: // 0: error
60: // (there is no 'accept', acceptance is handled outside this table)
61: typedef signed short ActionEntry;
62: #define errorActionEntry ((ActionEntry)0)
63: #endif
64:
65:
66: // encodes a destination state in 'gotoTable'
67: #if ENABLE_CRS_COMPRESSION
68: // entry is an offset from the first state that can be reached
69: // by shifting the nonterminal
70: typedef unsigned char GotoEntry;
71: #else
72: // entry is the to go to after shifting the nonterminal
73: typedef unsigned short GotoEntry;
74: #endif
75: #define errorGotoEntry ((GotoEntry)~0)
76:
77:
78: // name a terminal using an index
79: typedef unsigned char TermIndex;
80:
81: // name a nonterminal using an index
82: typedef unsigned char NtIndex;
83:
84: // name a production using an index
85: typedef unsigned short ProdIndex;
86:
87: // an addressed cell in the 'errorBits' table
88: typedef unsigned char ErrorBitsEntry;
89:
90:
91: // encodes either terminal index N (as N+1) or
92: // nonterminal index N (as -N-1), or 0 for no-symbol
93: typedef signed short SymbolId;
94: inline bool symIsTerm(SymbolId id) { return id > 0; }
95: inline int symAsTerm(SymbolId id) { return id-1; }
96: inline bool symIsNonterm(SymbolId id) { return id < 0; }
97: inline NtIndex symAsNonterm(SymbolId id) { return (NtIndex)(-(id+1)); }
98: SymbolId encodeSymbolId(Symbol const *sym); // gramanl.cc
99:
100:
101: // assign, but check for truncation
102: template <class DEST, class SRC>
103: inline void checkAssign(DEST &d, SRC s)
104: {
105: d = (DEST)s;
106: xassert(d == s);
107: }
108:
109:
110: // the parse tables are the traditional action/goto, plus the list
111: // of ambiguous actions, plus any more auxilliary tables useful during
112: // run-time parsing
113: class ELK_EXTERN ParseTables {
114: private: // types
115: // data about an intermediate state of parse table construction;
116: // once the table is finished, this data gets consolidated into the
117: // actual tables, and then thrown away
118: class TempData {
119: public: // data
120: // nascent ambigTable
121: ArrayStack<ActionEntry> ambigTable;
122:
123: // nascent bigProductionList
124: ArrayStack<ProdIndex> bigProductionList;
125:
126: // nascent productionsForState, except using integer offsets from
127: // start of 'bigProductionList' instead of direct pointers into it
128: ArrayStack<int> productionsForState;
129:
130: // nascent versions of ambig tables, again with integer offsets
131: ArrayStack<int> ambigStateTable;
132:
133: public: // funcs
134: TempData(int numStates);
135: ~TempData();
136: };
137:
138: public: // types
139: // per-production info
140: struct ProdInfo {
141: unsigned char rhsLen; // # of RHS symbols
142: NtIndex lhsIndex; // 'ntIndex' of LHS
143: };
144:
145: protected: // data
146: // when this is false, all of the below "(owner*)" annotations are
147: // actually "(serf)", i.e. this object does *not* own any of the
148: // tables (see emitConstructionCode())
149: bool owning;
150:
151: // non-NULL during construction
152: TempData *temp; // (nullable owner)
153:
154: // # terminals, nonterminals in grammar
155: int numTerms;
156: int numNonterms;
157:
158: // # of parse states
159: int numStates;
160:
161: // # of productions in the grammar
162: int numProds;
163:
164: // action table, indexed by (state*actionCols + lookahead)
165: int actionCols;
166: ActionEntry *actionTable; // (owner*)
167:
168: // goto table, indexed by (state*gotoCols + nontermId)
169: int gotoCols;
170: GotoEntry *gotoTable; // (owner*)
171:
172: // map production id to information about that production
173: ProdInfo *prodInfo; // (owner*)
174:
175: // map a state id to the symbol (terminal or nonterminal) which is
176: // shifted to arrive at that state
177: SymbolId *stateSymbol; // (owner*)
178:
179: // ambiguous actions: one big list, for allocation purposes; then
180: // the actions encode indices into this table; the first indexed
181: // entry gives the # of actions, and is followed by that many
182: // actions, each interpreted the same way ordinary 'actionTable'
183: // entries are
184: int ambigTableSize;
185: ActionEntry *ambigTable; // (nullable owner*)
186:
187: // total order on nonterminals for use in choosing which to
188: // reduce to in the RWL algorithm; index into this using a
189: // nonterminal index, and it yields the ordinal for that
190: // nonterminal (so these aren't really NtIndex's, but they're
191: // exactly as wide, so I use NtIndex anyway)
192: //
193: // The order is consistent with the requirement that if
194: // A ->+ B
195: // then B will be earlier in the order (assuming acyclicity).
196: // That way, we'll do all reductions to B before any to A (for
197: // reductions spanning the same set of ground terminals), and
198: // therefore will merge all alternatives for B before reducing
199: // any of them to A.
200: NtIndex *nontermOrder; // (owner*)
201:
202: // --------------------- table compression ----------------------
203:
204: // table compression techniques taken from:
205: // [DDH] Peter Dencker, Karl Duerre, and Johannes Heuft.
206: // Optimization of Parser Tables for Portable Compilers.
207: // In ACM TOPLAS, 6, 4 (1984) 546-572.
208: // http://citeseer.nj.nec.com/context/27540/0 (not in database)
209: // ~/doc/papers/p546-dencker.pdf (from ACM DL)
210:
211: // Code Reduction Scheme (CRS):
212: //
213: // Part (a): The states are numbered such that all states that
214: // are reached by transitions on a given symbol are contiguous.
215: // See gramanl.cc, GrammarAnalysis::renumberStates(). Then, we
216: // simply need a map from the symbol index to the first state
217: // that is reached along that symbol.
218: StateId *firstWithTerminal; // (nullable owner*) termIndex -> state
219: StateId *firstWithNonterminal; // (nullable owner*) ntIndex -> state
220: //
221: // Part (b): The production indices that appear on a given row
222: // are collected together. (This is called (c) by [DDH]; I don't
223: // have a counterpart to their (b).)
224: int bigProductionListSize;
225: ProdIndex *bigProductionList; // (nullable owner*) array into which 'productionsForState' points
226: ProdIndex **productionsForState; // (nullable owner to serf) state -> stateProdIndex -> prodIndex
227: //
228: // Part (c): Pointers into 'ambigTable' are are collected together in
229: // per-state lists as well.
230: ActionEntry **ambigStateTable; // (nullable owner) state -> (+ambigStateTableIndex -> ActionEntry*)
231:
232: // Error Entry Factoring (EEF):
233: //
234: // Factor out all the error entries into their own bitmap. Then
235: // regard error entries in the original tables as "insignificant".
236: //
237: // 'errorBits' is a map of where the error actions are in the action
238: // table. It is indexed through 'errorBitsPointers':
239: // byte = errorBitsPointers[stateId][lookahead >> 3];
240: // if ((byte >> (lookahead & 7)) & 1) then ERROR
241: int errorBitsRowSize; // bytes per row
242: int uniqueErrorRows; // distinct rows
243: ErrorBitsEntry *errorBits; // (nullable owner*)
244: ErrorBitsEntry **errorBitsPointers; // (nullable owner ptr to serfs)
245:
246: // Graph Coloring Scheme (GCS):
247: //
248: // Merge lines and columns that have identical significant entries.
249: // This is done as two-pass graph coloring. They give a specific
250: // heuristic.
251: //
252: // this is a map to be applied to terminal indices before being
253: // used to access the compressed action table; it maps the terminal
254: // id (as reported by the lexer) to the proper action table column
255: TermIndex *actionIndexMap; // (nullable owner*)
256: //
257: // this is a map from states to the beginning of the action table
258: // row that pertains to that state; it effectively factors the
259: // states into equivalence classes
260: int actionRows; // rows in actionTable[]
261: ActionEntry **actionRowPointers; // (nullable owner ptr to serfs)
262: //
263: // index map for the goto table
264: NtIndex *gotoIndexMap; // (nullable owner*)
265: //
266: // row map for the goto table
267: int gotoRows;
268: GotoEntry **gotoRowPointers; // (nullable owner ptr to serfs)
269:
270: public: // data
271: // These are public because if they weren't, I'd just have a stupid
272: // getter/setter pattern that exposes them anyway.
273:
274: // start state id
275: StateId startState;
276:
277: // index of the production which will finish a parse; it's the
278: // final reduction executed
279: int finalProductionIndex;
280:
281: private: // funcs
282: void alloc(int numTerms, int numNonterms, int numStates, int numProds,
283: StateId start, int finalProd);
284:
285: // index tables
286: ActionEntry &actionEntry(StateId stateId, int termId)
287: { return actionTable[stateId*actionCols + termId]; }
288: int actionTableSize() const
289: { return actionRows * actionCols; }
290:
291: GotoEntry &gotoEntry(StateId stateId, int nontermId)
292: { return gotoTable[stateId*gotoCols + nontermId]; }
293: int gotoTableSize() const
294: { return gotoRows * gotoCols; }
295:
296: void appendAmbig(ArrayStack<ActionEntry> const &set);
297: bool compareAmbig(ArrayStack<ActionEntry> const &set, int startIndex);
298:
299: void fillInErrorBits(bool setPointers);
300: int colorTheGraph(int *color, Bit2d &graph);
301:
302: protected: // funcs
303: // the idea is that 'emitConstructionCode' will emit code that
304: // defines a subclass of 'ParseTables'; that's why so many of the
305: // data members are protected: the subclass can then access them
306: // directly, which is very convenient when trying to construct the
307: // tables from static data
308: ParseTables(bool owning); // only legal when owning==false
309:
310: public: // funcs
311: ParseTables(int numTerms, int numNonterms, int numStates, int numProds,
312: StateId start, int finalProd);
313: ~ParseTables();
314:
315: // simple queries
316: int getNumTerms() const { return numTerms; }
317: int getNumNonterms() const { return numNonterms; }
318: int getNumStates() const { return numStates; }
319: int getNumProds() const { return numProds; }
320:
321: // finish construction; do this before emitting code
322: void finishTables();
323:
324: // write the tables out as C++ source that can be compiled into
325: // the program that will ultimately do the parsing
326: void emitConstructionCode(EmitCode &out, char const *className, char const *funcName);
327:
328: // this does the same thing for ML, and is implemented in genml.cc
329: void emitMLConstructionCode(EmitCode &out, char const *className, char const *funcName);
330:
331:
332: // -------------------- table construction ------------------------
333: // CRS dest-state origin tables
334: void setFirstWithTerminal(int termId, StateId s) {
335: xassert((unsigned)termId < (unsigned)numTerms);
336: firstWithTerminal[termId] = s;
337: }
338: void setFirstWithNonterminal(int nontermId, StateId s) {
339: xassert((unsigned)nontermId < (unsigned)numNonterms);
340: firstWithNonterminal[nontermId] = s;
341: }
342:
343: void setActionEntry(StateId stateId, int termId, ActionEntry act)
344: { actionEntry(stateId, termId) = act; }
345: void setGotoEntry(StateId stateId, int nontermId, GotoEntry got)
346: { gotoEntry(stateId, nontermId) = got; }
347:
348: // encode actions
349: ActionEntry encodeShift(StateId destState, int shiftedTermId);
350: ActionEntry encodeReduce(int prodId, StateId inWhatState);
351: ActionEntry encodeAmbig(ArrayStack<ActionEntry> const &set,
352: StateId inWhatState);
353: ActionEntry encodeError() const;
354: ActionEntry validateAction(int code) const;
355:
356: // encode gotos
357: GotoEntry encodeGoto(StateId stateId, int shiftedNontermId) const;
358: GotoEntry encodeGotoError() const
359: { return errorGotoEntry; }
360: GotoEntry validateGoto(int code) const;
361:
362: // misc
363: void setProdInfo(int prodId, int rhsLen, int ntIndex) {
364: checkAssign(prodInfo[prodId].rhsLen, rhsLen);
365: checkAssign(prodInfo[prodId].lhsIndex, ntIndex);
366: }
367: void setStateSymbol(StateId state, SymbolId sym) {
368: stateSymbol[state] = sym;
369: }
370: NtIndex *getWritableNontermOrder() {
371: // expose this directly, due to the way the algorithm that
372: // computes it is written
373: return nontermOrder;
374: }
375:
376: // table compressors
377: void computeErrorBits();
378: void mergeActionColumns();
379: void mergeActionRows();
380: void mergeGotoColumns();
381: void mergeGotoRows();
382:
383:
384: // -------------------- table queries ---------------------------
385: // return true if the action is an error
386: bool actionEntryIsError(StateId stateId, int termId) {
387: #if ENABLE_EEF_COMPRESSION
388: // check with the error table
389: return ( errorBitsPointers[stateId][termId >> 3]
390: >> (termId & 7) ) & 1;
391: #else
392: return isErrorAction(actionEntry(stateId, termId));
393: #endif
394: }
395:
396: // query action table, without checking the error bitmap
397: ActionEntry getActionEntry_noError(StateId stateId, int termId) {
398: #if ENABLE_GCS_COMPRESSION
399: #if ENABLE_GCS_COLUMN_COMPRESSION
400: return actionRowPointers[stateId][actionIndexMap[termId]];
401: #else
402: return actionRowPointers[stateId][termId];
403: #endif
404: #else
405: return actionEntry(stateId, termId);
406: #endif
407: }
408:
409: // query the action table, yielding an action that might be
410: // an error action
411: ActionEntry getActionEntry(StateId stateId, int termId) {
412: #if ENABLE_EEF_COMPRESSION
413: if (actionEntryIsError(stateId, termId)) {
414: return errorActionEntry;
415: }
416: #endif
417:
418: return getActionEntry_noError(stateId, termId);
419: }
420:
421: // decode actions
422: #if !ENABLE_CRS_COMPRESSION
423: bool isShiftAction(ActionEntry code) const
424: { return code > 0 && code <= numStates; }
425: static StateId decodeShift(ActionEntry code, int /*shiftedTerminal*/)
426: { return (StateId)(code-1); }
427: static bool isReduceAction(ActionEntry code)
428: { return code < 0; }
429: static int decodeReduce(ActionEntry code, StateId /*inState*/)
430: { return -(code+1); }
431: static bool isErrorAction(ActionEntry code)
432: { return code == 0; }
433:
434: // ambigAction is only other choice; this yields a pointer to
435: // an array of actions, the first of which says how many actions
436: // there are
437: ActionEntry *decodeAmbigAction(ActionEntry code, StateId /*inState*/) const
438: { return ambigTable + (code-1-numStates); }
439:
440: #else
441: static bool isShiftAction(ActionEntry code) {
442: return (code & AE_MASK) == AE_SHIFT;
443: }
444: StateId decodeShift(ActionEntry code, int shiftedTerminal) {
445: return (StateId)(firstWithTerminal[shiftedTerminal] + (code & AE_MAXINDEX));
446: }
447: static bool isReduceAction(ActionEntry code) {
448: return (code & AE_MASK) == AE_REDUCE;
449: }
450: int decodeReduce(ActionEntry code, StateId inState) {
451: return productionsForState[inState][code & AE_MAXINDEX];
452: }
453: static bool isErrorAction(ActionEntry code) {
454: return code == AE_ERROR;
455: }
456:
457: ActionEntry *decodeAmbigAction(ActionEntry code, StateId inState) const {
458: return ambigStateTable[inState] + (code & AE_MAXINDEX);
459: }
460: #endif
461:
462: // decode gotos
463: GotoEntry getGotoEntry(StateId stateId, int nontermId) {
464: #if ENABLE_GCS_COMPRESSION
465: #if ENABLE_GCS_COLUMN_COMPRESSION
466: return gotoRowPointers[stateId][gotoIndexMap[nontermId]];
467: #else
468: return gotoRowPointers[stateId][nontermId];
469: #endif
470: #else
471: return gotoEntry(stateId, nontermId);
472: #endif
473: }
474:
475: bool isErrorGoto(GotoEntry code)
476: { return code == errorGotoEntry; }
477:
478: StateId decodeGoto(GotoEntry code, int shiftedNonterminal) {
479: #if ENABLE_CRS_COMPRESSION
480: return (StateId)(firstWithNonterminal[shiftedNonterminal] + code);
481: #else
482: return (StateId)code;
483: #endif
484: }
485:
486: // nonterminal order
487: int nontermOrderSize() const
488: { return numNonterms; }
489: NtIndex getNontermOrdinal(NtIndex idx) const
490: { return nontermOrder[idx]; }
491:
492: // misc
493: ProdInfo const &getProdInfo(int prodIndex) const
494: { return prodInfo[prodIndex]; }
495: int getStateSymbol(StateId id) const
496: { return stateSymbol[id]; }
497:
498: // query compression options based on which fields are not NULL; do
499: // *not* use the compile-time flags, because we're trying to detect
500: // mismatch between compiler flags used at different times
501: bool eef_enabled() const
502: { return !!errorBits; }
503: bool gcs_enabled() const
504: { return !!actionRowPointers; }
505: bool gcsc_enabled() const
506: { return !!actionIndexMap; }
507: bool crs_enabled() const
508: { return !!firstWithTerminal; }
509: };
510:
511:
512: // NOTE: At one point (before 7/27/03), I had the ability to read and
513: // write parse tables to files, *not* using the C++ compiler to store
514: // tables as static data. I removed it because I wasn't using it, and
515: // it was hindering table evolution. But as the tables stabilize
516: // again, if the need arises, one could go get (from CVS) the code
517: // that did it and fix it up to work again.
518:
519:
520: #endif // PARSETABLES_H
Start C section to elk/elk_ptreeact.h[1
/1
]
1: #line 5105 "./lpsrc/elk.pak"
2: // ptreeact.h see license.txt for copyright and terms of use
3: // a generic set of user actions that build parse trees for any grammar
4:
5: #ifndef PTREEACT_H
6: #define PTREEACT_H
7:
8: #include "elk_lexerint.h"
9: #include "elk_useract.h"
10:
11: class ParseTables; // parsetables.h
12:
13:
14: // lexer to yield PTreeNodes for tokens
15: class ParseTreeLexer : public LexerInterface {
16: private:
17: LexerInterface *underlying; // for getting token descriptions
18: NextTokenFunc underToken; // for getting tokens
19: UserActions *actions; // for getting symbol names
20:
21: private:
22: void copyFields();
23:
24: public:
25: ParseTreeLexer(LexerInterface *u, UserActions *a);
26:
27: static void nextToken(LexerInterface *lex);
28: virtual NextTokenFunc getTokenFunc() const
29: { return &ParseTreeLexer::nextToken; }
30:
31: virtual sm_string tokenDesc() const;
32: virtual sm_string tokenKindDesc(int kind) const;
33: };
34:
35:
36: // layer these actions on top of the generated actions to
37: // build parse trees for the reductions
38: class ParseTreeActions : public TrivialUserActions {
39: private:
40: UserActions *underlying; // for getting symbol names
41: ParseTables *tables; // for finding out production lengths
42:
43: public:
44: ParseTreeActions(UserActions *u, ParseTables *t)
45: : underlying(u), tables(t) {}
46:
47: static SemanticValue reduce(
48: UserActions *context,
49: int productionId,
50: SemanticValue const *svals
51: SOURCELOCARG( SourceLoc loc ) );
52: virtual ReductionActionFunc getReductionAction()
53: { return &ParseTreeActions::reduce; }
54:
55: virtual SemanticValue mergeAlternativeParses(
56: int ntIndex, SemanticValue left, SemanticValue right
57: SOURCELOCARG( SourceLoc loc ) );
58:
59: virtual char const *terminalName(int termId);
60: virtual char const *nonterminalName(int termId);
61:
62: ParseTables *getTables() { return tables; }
63: };
64:
65:
66: #endif // PTREEACT_H
Start C section to elk/elk_ptreenode.h[1
/1
]
1: #line 5172 "./lpsrc/elk.pak"
2: // ptreenode.h see license.txt for copyright and terms of use
3: // parse tree node for experimental grammars (this isn't somthing
4: // Elkhound as a whole knows about--it doesn't make trees unless
5: // the user actions do)
6:
7: #ifndef PTREENODE_H
8: #define PTREENODE_H
9:
10: #include <stddef.h> // NULL
11: #include <iostream> // std::ostream
12:
13: // for storing counts of parse trees; I try to make the code work for
14: // either 'int' or 'double' in this spot (e.g. I assign 0 to it
15: // instead of 0.0), even though 'int' overflows quickly for the highly
16: // ambiguous grammars
17: typedef double TreeCount;
18:
19: class PTreeNode {
20: public: // types
21: // max # of children (when this is increased, more constructors
22: // for PTreeNode should be added)
23: enum { MAXCHILDREN = 10 };
24:
25: // printing options
26: enum PrintFlags {
27: PF_NONE = 0, // default, print types as-is
28: PF_EXPAND = 1, // types are just LHS, dig down to find RHSs
29: PF_ADDRS = 2, // print node virtual addresses to see sharing
30: };
31:
32: public: // data
33: // textual repr. of the production applied; possibly useful for
34: // printing the tree, or during debugging
35: char const *type;
36:
37: // instead of making explicit merge nodes (which runs afoul of the
38: // yield-then-merge problem), just link alternatives together using
39: // this link; this is NULL when there are no alternatives, or for
40: // the last node in a list of alts
41: PTreeNode *merged;
42:
43: // array of children; these aren't owner pointers because
44: // we might have arbitrary sharing for some grammars
45: int numChildren;
46: PTreeNode *children[MAXCHILDREN];
47:
48: // # of parse trees of which this is the root; effectively this
49: // memoizes the result to avoid an exponential blowup counting
50: // the trees; when this value is 0, it means the count has not
51: // yet been computed (any count must be positive)
52: TreeCount count;
53:
54: // count of # of allocated nodes; useful for identifying when
55: // we're making too many
56: static int allocCount;
57:
58: // count # of times addAlternative is called; this will tell
59: // the total number of local ambiguities that need to be resolved
60: static int alternativeCount;
61:
62: private: // funcs
63: // init fields which don't depend on ctor args
64: void init();
65:
66: // helpers
67: static void indent(std::ostream &out, int n);
68: void innerPrintTree(std::ostream &out, int indentation, PrintFlags pf) const;
69: int countMergedList() const;
70:
71: public: // funcs
72: // now lots of constructors so we have one for each possible
73: // number of children; the calls are automatically inserted
74: // by a perl script ('make-trivparser.pl') or by the grammar
75: // transformation GrammarAnalysis::addTreebuildingActions()
76: PTreeNode(char const *t)
77: : type(t), numChildren(0), count(0) { init(); }
78: PTreeNode(char const *t, PTreeNode *ch0)
79: : type(t), numChildren(1), count(0) { init(); children[0] = ch0; }
80: PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1)
81: : type(t), numChildren(2), count(0) { init(); children[0] = ch0; children[1] = ch1; }
82: PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2)
83: : type(t), numChildren(3), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; }
84: PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2, PTreeNode *ch3)
85: : type(t), numChildren(4), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; children[3] = ch3; }
86: PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2, PTreeNode *ch3, PTreeNode *ch4)
87: : type(t), numChildren(5), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; children[3] = ch3; children[4] = ch4; }
88: // be sure to update MAXCHILDREN, above, if you add constructors
89: // which accept more children
90:
91: ~PTreeNode() { allocCount--; }
92:
93: // count the number of trees encoded (taking merge nodes into
94: // account) in the tree rooted at 'this'
95: TreeCount countTrees();
96:
97: // print the entire parse forest using indentation to represent
98: // nesting, and duplicating printing of shared subtrees within
99: // ambiguous regions
100: void printTree(std::ostream &out, PrintFlags pf = PF_NONE) const;
101:
102: // add an alternative to the current 'merged' list
103: void addAlternative(PTreeNode *alt);
104: };
105:
106: #endif // PTREENODE_H
Start C section to elk/elk_rcptr.h[1
/1
]
1: #line 5279 "./lpsrc/elk.pak"
2: // rcptr.h see license.txt for copyright and terms of use
3: // a stab at a reference-counting pointer
4:
5: // the object pointed-at must support this interface:
6: // // increment reference count
7: // void incRefCt();
8: //
9: // // decrement refcount, and if it becomes 0, delete yourself
10: // void decRefCt();
11:
12: #ifndef __RCPTR_H
13: #define __RCPTR_H
14:
15: #include "sm_typ.h"
16:
17: #if 0
18: #include <stdio.h> // printf, temporary
19: #define DBG(fn) printf("%s(%p)\n", fn, ptr)
20: #else
21: #define DBG(fn)
22: #endif
23:
24: template <class T>
25: class RCPtr {
26: private: // data
27: T *ptr; // the real pointer
28:
29: private: // funcs
30: void inc() { DBG("inc"); if (ptr) { ptr->incRefCt(); } }
31: void dec() { DBG("dec"); if (ptr) { ptr->decRefCt(); ptr=NULL; } }
32:
33: public: // funcs
34: explicit RCPtr(T *p = NULL) : ptr(p) { DBG("ctor"); inc(); }
35: explicit RCPtr(RCPtr const &obj) : ptr(obj.ptr) { DBG("cctor"); inc(); }
36: ~RCPtr() { DBG("dtor"); dec(); }
37:
38: // point at something new (setting to NULL is an option)
39: void operator= (T *p) { DBG("op=ptr"); dec(); ptr=p; inc(); }
40: void operator= (RCPtr<T> const &obj)
41: { DBG("op=obj"); dec(); ptr=obj.ptr; inc(); }
42:
43: // some operators that make Owner behave more or less like
44: // a native C++ pointer
45: operator T const * () const { DBG("opcT*"); return ptr; }
46: T const & operator* () const { DBG("opc*"); return *ptr; }
47: T const * operator-> () const { DBG("opc->"); return ptr; }
48:
49: bool operator==(T *p) const { return ptr == p; }
50: bool operator!=(T *p) const { return !this->operator==(p); }
51:
52: bool operator==(RCPtr<T> const &obj) const { return ptr == obj.ptr; }
53: bool operator!=(RCPtr<T> const &obj) const { return !this->operator==(obj); }
54:
55: operator T* () { DBG("opT*"); return ptr; }
56: operator T const * () { DBG("opcT*"); return ptr; }
57: T& operator* () { DBG("op*"); return *ptr; }
58: T* operator-> () { DBG("op->"); return ptr; }
59:
60: // escape hatch for when operators flake out on us
61: T *get() { DBG("get"); return ptr; }
62: T const *getC() const { DBG("getC"); return ptr; }
63:
64: // sometimes, in performance-critical code, I need fine control
65: // over the refcount operations; this lets me change 'ptr', the
66: // assumption being I'll update the refct manually
67: void setWithoutUpdateRefct(T *p) { ptr=p; }
68: };
69:
70:
71: #endif // __RCPTR_H
Start C section to elk/elk_useract.h[1
/1
]
1: #line 5351 "./lpsrc/elk.pak"
2: // useract.h see license.txt for copyright and terms of use
3: // interface to an object containing user-defined action functions
4:
5: // the code appears in the .cc file generated by 'gramanl' from
6: // an associated .gr file
7:
8: // the comments below are guidelines on writing grammar actions, since
9: // those grammar actions are composed to form the single-entry
10: // functions documented below
11:
12: #ifndef USERACT_H
13: #define USERACT_H
14:
15: #include "elk_glrconfig.h"
16: #include "sm_str.h"
17: #include "sm_srcloc.h"
18:
19: class ParseTables; // parsetables.h
20: class ELK_EXTERN UserActions;
21:
22: // user-supplied semantic values:
23: // - Semantic values are an arbitrary word, that the user can then
24: // use as a pointer or an integer or whatever. The parser
25: // generator inserts the appropriate casts, so the actual type
26: // I use here shouldn't ever be visible to the user.
27: // - Usually, SemanticValues that are used as pointers are considered
28: // to be owner pointers, but only in the sense that del() will be
29: // called. It's up to the user to decide if del() actually does
30: // anything.
31: typedef unsigned long SemanticValue;
32:
33: // name of a null sval; can't use "NULL" because of __null weirdness in gcc-3...
34: #define NULL_SVAL 0
35:
36:
37: // package of functions; the user will create an instance of a class
38: // derived from this, and the parser will carry it along to invoke
39: // the various action functions
40: class ELK_EXTERN UserActions {
41: public:
42: // allow abstract user to delete
43: virtual ~UserActions();
44:
45: // user-supplied reduction actions
46: // - production 'id' is being used to reduce
47: // - 'svals' contains an array of semantic values yielded by the RHS
48: // symbols, such that the 0th element is the leftmost RHS element;
49: // the pointers in the array are owner pointers (the array ptr itself
50: // is a serf)
51: // - 'loc' is the location of the left edge of the parse subtree
52: // - this fn returns the semantic value for the reduction; this return
53: // value is an owner pointer
54: typedef SemanticValue (*ReductionActionFunc)(
55: UserActions *context, // parser context class object
56: int productionId, // production being used to reduce
57: SemanticValue const *svals // array of semantic values
58: SOURCELOCARG( SourceLoc loc ) );
59:
60: // get the actual function; two-step to avoid virtual call in inner loop
61: virtual ReductionActionFunc getReductionAction()=0;
62:
63: // duplication of semantic values:
64: // - the given 'sval' is about to be passed to a reduction action
65: // function. the user must return a value to be stored in place
66: // of the old one, in case it is needed to pass to another action
67: // function in case of local ambiguity; 'sval' is a serf
68: // - the return value will be yielded (if necessary) to the next
69: // consumer action function, and is an owner ptr
70: // - some possible strategies:
71: // - return NULL, in which case it is probably an error for the
72: // value to be passed to another action (i.e. the grammar needs
73: // to be LALR(1) near this semantic value); in this case, 'del'
74: // will not be called on the NULL value
75: // - increment a reference count and return 'sval'
76: // - do nothing, and rely on some higher-level allocation scheme
77: // such as full GC, or regions
78: virtual SemanticValue duplicateTerminalValue(
79: int termId, SemanticValue sval)=0;
80: virtual SemanticValue duplicateNontermValue(
81: int nontermId, SemanticValue sval)=0;
82:
83: // a semantic value didn't get passed to an action function, either
84: // because it was never used at all (e.g. a semantic value for a
85: // punctuator token, which the user can simply ignore), or because we
86: // duplicated it in anticipation of a possible local ambiguity, but
87: // then that parse turned out not to happen, so we're cancelling
88: // the dup now; 'sval' is an owner pointer
89: virtual void deallocateTerminalValue(int termId, SemanticValue sval)=0;
90: virtual void deallocateNontermValue(int nontermId, SemanticValue sval)=0;
91:
92: // this is called when there are two interpretations for the same
93: // sequence of ground terminals, culminating in two different reductions
94: // deriving the same left-hand-side nonterminal (identified by 'ntIndex');
95: // it should return a value to be used in the place where they conflict'
96: // both 'left' and 'right' are owner pointers, and the return value
97: // is also an owner pointer
98: //
99: // NOTE: the 'left' value is always the node which came first, and
100: // might even have been yielded to another reduction already
101: // (depending on the grammar), whereas the 'right' value is always a
102: // node which was just created, and has definitely *not* been
103: // yielded to anything (this fact is critical to solving the general
104: // yield-then-merge problem)
105: virtual SemanticValue mergeAlternativeParses(
106: int ntIndex, SemanticValue left, SemanticValue right
107: SOURCELOCARG( SourceLoc loc )
108: )=0;
109:
110: // after every reduction, the semantic value is passed to this function,
111: // which returns 'false' if the reduction should be cancelled; if it
112: // does return false, then 'sval' is an owner pointer (the parser engine
113: // will drop the value on the floor)
114: virtual bool keepNontermValue(int nontermId, SemanticValue sval)=0;
115:
116: // every time a token is pulled from the lexer, this reclassifier is
117: // used to give the user a chance to reinterpret the token, before it
118: // is used for reduction lookahead comparisons; it returns the
119: // reclassified token type, or 'oldTokenType' to leave it unchanged
120: typedef int (*ReclassifyFunc)(UserActions *ths, int oldTokenType, SemanticValue sval);
121:
122: // get the reclassifier
123: virtual ReclassifyFunc getReclassifier()=0;
124:
125: // descriptions of symbols with their semantic values; this is useful
126: // for the ACTION_TRACE function of the parser
127: virtual sm_string terminalDescription(int termId, SemanticValue sval)=0;
128: virtual sm_string nonterminalDescription(int nontermId, SemanticValue sval)=0;
129:
130: // get static names for all of the symbols
131: virtual char const *terminalName(int termId)=0;
132: virtual char const *nonterminalName(int termId)=0;
133:
134: // get the parse tables for this grammar; the default action
135: // complains that no tables are defined
136: virtual ParseTables *makeTables();
137: };
138:
139:
140: // for derived classes, the list of functions to be declared
141: // (this macro is used by the generated code)
142: #define USER_ACTION_FUNCTIONS \
143: virtual ReductionActionFunc getReductionAction(); \
144: \
145: virtual SemanticValue duplicateTerminalValue( \
146: int termId, SemanticValue sval); \
147: virtual SemanticValue duplicateNontermValue( \
148: int nontermId, SemanticValue sval); \
149: \
150: virtual void deallocateTerminalValue( \
151: int termId, SemanticValue sval); \
152: virtual void deallocateNontermValue( \
153: int nontermId, SemanticValue sval); \
154: \
155: virtual SemanticValue mergeAlternativeParses( \
156: int ntIndex, SemanticValue left, SemanticValue right \
157: SOURCELOCARG( SourceLoc loc ) \
158: ); \
159: \
160: virtual bool keepNontermValue(int nontermId, SemanticValue sval); \
161: \
162: virtual ReclassifyFunc getReclassifier(); \
163: \
164: virtual sm_string terminalDescription(int termId, SemanticValue sval); \
165: virtual sm_string nonterminalDescription(int nontermId, SemanticValue sval); \
166: \
167: virtual char const *terminalName(int termId); \
168: virtual char const *nonterminalName(int termId);
169:
170:
171: // a useraction class which has only trivial actions
172: class TrivialUserActions : public UserActions {
173: public:
174: USER_ACTION_FUNCTIONS
175:
176: static SemanticValue doReductionAction(
177: UserActions *ths,
178: int productionId, SemanticValue const *svals
179: SOURCELOCARG( SourceLoc loc ) );
180:
181: static int reclassifyToken(UserActions *ths,
182: int oldTokenType, SemanticValue sval);
183: };
184:
185:
186: #endif // USERACT_H
Start C section to elk/elk_util.h[1
/1
]
1: #line 5538 "./lpsrc/elk.pak"
2: // util.h see license.txt for copyright and terms of use
3: // collection of utility macros and functions that are
4: // candidates for adding to the smbase library
5:
6: #ifndef __UTIL_H
7: #define __UTIL_H
8:
9: #include "sm_trace.h"
10:
11: // given a method called 'print', define an operator to use it
12: #define OSTREAM_OPERATOR(MyClass) \
13: friend std::ostream &operator << (std::ostream &os, MyClass const &ths) \
14: { ths.print(os); return os; }
15:
16:
17: // I'm experimenting with the idea of making my control structures
18: // more declarative
19: #define INTLOOP(var, start, maxPlusOne) \
20: for (int var = start; var < maxPlusOne; var++)
21:
22:
23: // experiment: given (a reference to), an owner pointer, yield the pointer
24: // value after nullifying the given pointer
25: template <class T>
26: inline T *transferOwnership(T *&ptr)
27: {
28: T *ret = ptr;
29: ptr = NULL;
30: return ret;
31: }
32:
33:
34: // print a value under the debug trace (name: Trace VALue)
35: #define TVAL(expr) \
36: trace("debug") << #expr ": " << (expr) << std::endl
37:
38:
39: #endif // __UTIL_H
Start cpp section to elk/elk_asockind.cpp[1
/1
]
1: #line 5578 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8: sm_string toString(AssocKind k)
9: {
10: static char const * const arr[NUM_ASSOC_KINDS] = {
11: "AK_LEFT", "AK_RIGHT", "AK_NONASSOC"
12: };
13: xassert((unsigned)k < NUM_ASSOC_KINDS);
14: return sm_string(arr[k]);
15: }
16:
17:
Start cpp section to elk/elk_emitcode.cpp[1
/1
]
1: #line 5596 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10: EmitCode::EmitCode(char const *f)
11: : sm_stringBuilder(),
12: os(f),
13: fname(f),
14: line(1)
15: {
16: if (!os) {
17: xsyserror("open", fname);
18: }
19: }
20:
21: EmitCode::~EmitCode()
22: {
23: flush();
24: }
25:
26:
27: int EmitCode::getLine()
28: {
29: flush();
30: return line;
31: }
32:
33:
34: void EmitCode::flush()
35: {
36:
37: char const *p = pcharc();
38: while (*p) {
39: if (*p == '\n') {
40: line++;
41: }
42: p++;
43: }
44:
45: os << *this;
46: setlength(0);
47: }
48:
49:
50: char const *hashLine()
51: {
52: if (tracingSys("nolines")) {
53:
54: return "// #line ";
55: }
56: else {
57: return "#line ";
58: }
59: }
60:
61:
62:
63: sm_string lineDirective(SourceLoc loc)
64: {
65: char const *fname;
66: int line, col;
67: sourceLocManager->decodeLineCol(loc, fname, line, col);
68:
69: return sm_stringc << hashLine() << line << " \"" << fname << "\"\n";
70: }
71:
72: sm_stringBuilder &restoreLine(sm_stringBuilder &sb)
73: {
74:
75: EmitCode &os = (EmitCode&)sb;
76:
77:
78: int line = os.getLine()+1;
79: return os << hashLine() << line
80: << " \"" << os.getFname() << "\"\n";
81: }
Start cpp section to elk/elk_genml.cpp[1
/1
]
1: #line 5678 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24: void emitMLDescriptions(GrammarAnalysis const &g, EmitCode &out);
25: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
26: char const *mlFname, char const *srcFname);
27: void emitMLUserCode(EmitCode &out, LocString const &code, bool braces = true);
28: void emitMLActions(Grammar const &g, EmitCode &out, EmitCode &dcl);
29: void emitMLDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl);
30: void emitMLFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
31: char const *rettype, char const *params);
32: void emitMLDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
33: Symbol const &sym);
34: void emitMLSwitchCode(Grammar const &g, EmitCode &out,
35: char const *signature, char const *switchVar,
36: ObjList<Symbol> const &syms, int whichFunc,
37: char const *templateCode, char const *actUpon);
38:
39:
40:
41:
42:
43:
44: sm_string actionFuncName(Production const &prod)
45: {
46: return sm_stringc << "action" << prod.prodIndex
47: << "_" << prod.left->name;
48: }
49:
50:
51:
52:
53: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
54: char const *mlFname, char const *srcFname)
55: {
56: EmitCode dcl(mliFname);
57: if (!dcl) {
58: throw_XOpen(mliFname);
59: }
60:
61:
62: dcl << "(* " << mliFname << " *)\n"
63: << "(* *** DO NOT EDIT BY HAND *** *)\n"
64: << "(* automatically generated by elkhound, from " << srcFname << " *)\n"
65: << "\n"
66: ;
67:
68:
69: {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
70: emitMLUserCode(dcl, *(iter.data()), false /*braces*/);
71: }}
72:
73: #if 0
74:
75:
76:
77: {
78: int ct=0;
79: FOREACH_OBJLIST(LocString, g.actionClasses, iter) {
80: if (ct++ > 0) {
81:
82:
83:
84: dcl << "};\n";
85: }
86:
87: dcl << "\n"
88: << "// parser context class\n"
89: << "class ";
90: emitUserCode(dcl, *(iter.data()), false /*braces*/);
91: }}
92:
93:
94: dcl << "\n"
95: << "private:\n"
96: << " USER_ACTION_FUNCTIONS // see useract.h\n"
97: << "\n"
98: << " // declare the actual action function\n"
99: << " static SemanticValue doReductionAction(\n"
100: << " " << g.actionClassName << " *ths,\n"
101: << " int productionId, SemanticValue const *semanticValues"
102: SOURCELOC( << ",\n SourceLoc loc" )
103: << ");\n"
104: << "\n"
105: << " // declare the classifier function\n"
106: << " static int reclassifyToken(\n"
107: << " " << g.actionClassName << " *ths,\n"
108: << " int oldTokenType, SemanticValue sval);\n"
109: << "\n"
110: ;
111: #endif
112:
113:
114:
115: dcl << "val " << g.actionClassName << "ParseTables: Parsetables.tParseTables\n";
116: dcl << "val " << g.actionClassName << "UserActions: Useract.tUserActions\n";
117:
118: EmitCode out(mlFname);
119: if (!out) {
120: throw_XOpen(mlFname);
121: }
122:
123: out << "(* " << mlFname << " *)\n";
124: out << "(* *** DO NOT EDIT BY HAND *** *)\n";
125: out << "(* automatically generated by gramanl, from " << srcFname << " *)\n";
126: out << "\n"
127: << "open Useract (* tSemanticValue *)\n"
128: << "open Parsetables (* tParseTables *)\n"
129: << "\n"
130: << "\n"
131: ;
132:
133:
134: {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
135: emitMLUserCode(out, *(iter.data()), false /*braces*/);
136: }}
137:
138: #if 0
139: #ifdef NO_GLR_SOURCELOC
140:
141:
142:
143: out << "#ifndef NO_GLR_SOURCELOC\n";
144: out << " #define NO_GLR_SOURCELOC\n";
145: out << "#endif\n";
146: #else
147: out << "// GLR source location information is enabled\n";
148: #endif
149: out << "\n";
150: out << "#include \"" << hFname << "\" // " << g.actionClassName << "\n";
151: out << "#include \"elk_parsetables.h\" // ParseTables\n";
152: out << "#include \"sm_srcloc.h\" // SourceLoc\n";
153: out << "\n";
154: out << "#include <assert.h> // assert\n";
155: out << "#include <iostream> // std::cout\n";
156: out << "#include <stdlib.h> // abort\n";
157: out << "\n";
158:
159: NOSOURCELOC(
160: out << "// parser-originated location information is disabled by\n"
161: << "// NO_GLR_SOURCELOC; any rule which refers to 'loc' will get this one\n"
162: << "static SourceLoc loc = SL_UNKNOWN;\n"
163: << "\n\n";
164: )
165: #endif
166:
167: emitMLDescriptions(g, out);
168:
169:
170: emitMLActions(g, out, dcl);
171: out << "\n";
172: out << "\n";
173:
174: emitMLDupDelMerge(g, out, dcl);
175: out << "\n";
176: out << "\n";
177:
178:
179: out << "let " << g.actionClassName << "UserActions = {\n";
180: #define COPY(name) \
181: out << " " #name " = " #name "Func;\n";
182: COPY(reductionAction)
183: COPY(duplicateTerminalValue)
184: COPY(duplicateNontermValue)
185: COPY(deallocateTerminalValue)
186: COPY(deallocateNontermValue)
187: COPY(mergeAlternativeParses)
188: COPY(keepNontermValue)
189: COPY(terminalDescription)
190: COPY(nonterminalDescription)
191: COPY(terminalName)
192: COPY(nonterminalName)
193: #undef COPY
194: out << "}\n"
195: << "\n"
196: << "\n"
197: ;
198:
199: g.tables->finishTables();
200: g.tables->emitMLConstructionCode(out, g.actionClassName, "makeTables");
201:
202: #if 0
203:
204: dcl << "\n"
205: << "// the function which makes the parse tables\n"
206: << "public:\n"
207: << " virtual ParseTables *makeTables();\n"
208: << "};\n"
209: << "\n"
210: << "#endif // " << latchName << "\n"
211: ;
212: #endif
213:
214:
215: FOREACH_OBJLIST(LocString, g.implVerbatim, iter) {
216: emitMLUserCode(out, *(iter.data()), false /*braces*/);
217: }
218: }
219:
220:
221: void emitMLUserCode(EmitCode &out, LocString const &code, bool braces)
222: {
223: out << "\n";
224: if (false/*TODO:fix*/ && code.validLoc()) {
225: out << lineDirective(code.loc);
226: }
227:
228:
229: if (braces) {
230: out << "(";
231: }
232:
233: out << code;
234:
235:
236:
237: if (braces) {
238: out << " )";
239: }
240:
241: if (false/*TODO:fix*/ && code.validLoc()) {
242: out << "\n" << restoreLine;
243: }
244: else {
245: out << "\n";
246: }
247: }
248:
249:
250:
251:
252: static char const *notVoid(char const *type)
253: {
254: if (0==strcmp(type, "void")) {
255: return "tSemanticValue";
256: }
257: else {
258: return type;
259: }
260: }
261:
262:
263:
264:
265: static char const *typeString(char const *type, LocString const &tag)
266: {
267: if (!type) {
268: std::cout << tag.locString() << ": Production tag \"" << tag
269: << "\" on a symbol with no type.\n";
270: return "__error_no_type__";
271: }
272: else {
273: return notVoid(type);
274: }
275: }
276:
277:
278: void emitMLDescriptions(GrammarAnalysis const &g, EmitCode &out)
279: {
280:
281: {
282: out << "let termNamesArray: sm_string array = [|\n";
283: for (int code=0; code < g.numTerminals(); code++) {
284: Terminal const *t = g.getTerminal(code);
285: if (!t) {
286:
287: out << " \"(no terminal)\"; (* " << code << " *)\n";
288: }
289: else {
290: out << " \"" << t->name << "\"; (* " << code << " *)\n";
291: }
292: }
293: out << " \"\" (* dummy final value for ';' separation *)\n"
294: << "|]\n"
295: << "\n";
296: }
297:
298:
299:
300:
301:
302:
303:
304:
305:
306:
307: out << "let terminalDescriptionFunc (termId:int) (sval:tSemanticValue) : sm_string =\n"
308: << "begin\n"
309: << " termNamesArray.(termId)\n"
310: << "end\n"
311: << "\n"
312: << "\n"
313: ;
314:
315:
316: {
317: out << "let nontermNamesArray: sm_string array = [|\n";
318: for (int code=0; code < g.numNonterminals(); code++) {
319: Nonterminal const *nt = g.getNonterminal(code);
320: if (!nt) {
321:
322: out << " \"(no nonterminal)\"; (* " << code << " *)\n";
323: }
324: else {
325: out << " \"" << nt->name << "\"; (* " << code << " *)\n";
326: }
327: }
328: out << " \"\" (* dummy final value for ';' separation *)\n"
329: << "|]\n"
330: << "\n";
331: }
332:
333:
334: out << "let nonterminalDescriptionFunc (nontermId:int) (sval:tSemanticValue)\n"
335: << " : sm_string =\n"
336: << "begin\n"
337: << " nontermNamesArray.(nontermId)\n"
338: << "end\n"
339: << "\n"
340: << "\n"
341: ;
342:
343:
344: out << "let terminalNameFunc (termId:int) : sm_string =\n"
345: << "begin\n"
346: << " termNamesArray.(termId)\n"
347: << "end\n"
348: << "\n"
349: << "let nonterminalNameFunc (nontermId:int) : sm_string =\n"
350: << "begin\n"
351: << " nontermNamesArray.(nontermId)\n"
352: << "end\n"
353: << "\n"
354: << "\n"
355: ;
356: }
357:
358:
359: void emitMLActions(Grammar const &g, EmitCode &out, EmitCode &dcl)
360: {
361: out << "(* ------------------- actions ------------------ *)\n"
362: << "let reductionActionArray : (tSemanticValue array -> tSemanticValue) array = [|\n"
363: << "\n"
364: ;
365:
366:
367: {FOREACH_OBJLIST(Production, g.productions, iter) {
368: Production const &prod = *(iter.data());
369:
370:
371:
372: xassert(prod.left->type);
373:
374:
375: out << "(* " << prod.toString() << " *)\n";
376:
377: out << "(fun svals ->\n";
378:
379:
380: int index=-1;
381: FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
382: Production::RHSElt const &elt = *(rhsIter.data());
383: index++;
384: if (elt.tag.length() == 0) continue;
385:
386:
387:
388: out << " let " << elt.tag << " = (Obj.obj svals.(" << index << ") : "
389: << typeString(elt.sym->type, elt.tag) << ") in\n";
390: }
391:
392:
393:
394: out << " let __result: " << prod.left->type << " =";
395:
396:
397:
398: emitMLUserCode(out, prod.action, true /*braces*/);
399:
400: out << " in (Obj.repr __result)\n"
401: << ");\n"
402: << "\n"
403: ;
404: }}
405:
406:
407: out << "(fun _ -> (failwith \"bad production index\")) (* no ; *)"
408: << "\n"
409: << "|]\n"
410: << "\n"
411: ;
412:
413:
414: out << "let reductionActionFunc (productionId:int) (svals: tSemanticValue array)\n"
415: << " : tSemanticValue =\n"
416: << "begin\n"
417: << " (reductionActionArray.(productionId) svals)\n"
418: << "end\n"
419: << "\n"
420: ;
421:
422:
423: #if 0
424: if (0==strcmp(prod.left->type, "void")) {
425:
426:
427: out << ", 0";
428: }
429: #endif
430: }
431:
432:
433: void emitMLDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl)
434: {
435: out << "(* ---------------- dup/del/merge/keep nonterminals --------------- *)\n"
436: << "\n";
437:
438:
439: FOREACH_OBJLIST(Nonterminal, g.nonterminals, ntIter) {
440: emitMLDDMInlines(g, out, dcl, *(ntIter.data()));
441: }
442:
443:
444: emitMLSwitchCode(g, out,
445: "let duplicateNontermValueFunc (nontermId:int) (sval:tSemanticValue) : tSemanticValue",
446: "nontermId",
447: (ObjList<Symbol> const&)g.nonterminals,
448: 0 /*dupCode*/,
449: " (Obj.repr (dup_$symName ((Obj.obj sval) : $symType)))\n",
450: NULL);
451:
452:
453: emitMLSwitchCode(g, out,
454: "let deallocateNontermValueFunc (nontermId:int) (sval:tSemanticValue) : unit",
455: "nontermId",
456: (ObjList<Symbol> const&)g.nonterminals,
457: 1 /*delCode*/,
458: " (del_$symName ((Obj.obj sval) : $symType));\n",
459: "deallocate nonterm");
460:
461:
462: emitMLSwitchCode(g, out,
463: "let mergeAlternativeParsesFunc (nontermId:int) (left:tSemanticValue)\n"
464: " (right:tSemanticValue) : tSemanticValue",
465:
466: "nontermId",
467: (ObjList<Symbol> const&)g.nonterminals,
468: 2 /*mergeCode*/,
469: " (Obj.repr (merge_$symName ((Obj.obj left) : $symType) ((Obj.obj right) : $symType)))\n",
470: "merge nonterm");
471:
472:
473: emitMLSwitchCode(g, out,
474: "let keepNontermValueFunc (nontermId:int) (sval:tSemanticValue) : bool",
475: "nontermId",
476: (ObjList<Symbol> const&)g.nonterminals,
477: 3 /*keepCode*/,
478: " (keep_$symName ((Obj.obj sval) : $symType))\n",
479: NULL);
480:
481:
482: out << "\n";
483: out << "(* ---------------- dup/del/classify terminals --------------- *)";
484:
485: FOREACH_OBJLIST(Terminal, g.terminals, termIter) {
486: emitMLDDMInlines(g, out, dcl, *(termIter.data()));
487: }
488:
489:
490: emitMLSwitchCode(g, out,
491: "let duplicateTerminalValueFunc (termId:int) (sval:tSemanticValue) : tSemanticValue",
492: "termId",
493: (ObjList<Symbol> const&)g.terminals,
494: 0 /*dupCode*/,
495: " (Obj.repr (dup_$symName ((Obj.obj sval) : $symType)))\n",
496: NULL);
497:
498:
499: emitMLSwitchCode(g, out,
500: "let deallocateTerminalValueFunc (termId:int) (sval:tSemanticValue) : unit",
501: "termId",
502: (ObjList<Symbol> const&)g.terminals,
503: 1 /*delCode*/,
504: " (del_$symName ((Obj.obj sval) : $symType));\n",
505: "deallocate terminal");
506:
507:
508: emitMLSwitchCode(g, out,
509: "let reclassifyTokenFunc (oldTokenType:int) (sval:tSemanticValue) : int",
510: "oldTokenType",
511: (ObjList<Symbol> const&)g.terminals,
512: 4 /*classifyCode*/,
513: " (classify_$symName ((Obj.obj sval) : $symType))\n",
514: NULL);
515: }
516:
517:
518:
519:
520: void emitMLFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
521: char const *rettype, char const *params)
522: {
523: out << "(*inline*) let " << params << ": " << rettype << " =";
524: }
525:
526:
527: void emitMLDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
528: Symbol const &sym)
529: {
530: Terminal const *term = sym.ifTerminalC();
531: Nonterminal const *nonterm = sym.ifNonterminalC();
532:
533: if (sym.dupCode) {
534: emitMLFuncDecl(g, out, dcl, sym.type,
535: sm_stringc << "dup_" << sym.name
536: << " (" << sym.dupParam << ": " << sym.type << ") ");
537: emitMLUserCode(out, sym.dupCode);
538: out << "\n";
539: }
540:
541: if (sym.delCode) {
542: emitMLFuncDecl(g, out, dcl, "unit",
543: sm_stringc << "del_" << sym.name
544: << " (" << (sym.delParam? sym.delParam : "_")
545: << ": " << sym.type << ") ");
546: emitMLUserCode(out, sym.delCode);
547: out << "\n";
548: }
549:
550: if (nonterm && nonterm->mergeCode) {
551: emitMLFuncDecl(g, out, dcl, notVoid(sym.type),
552: sm_stringc << "merge_" << sym.name
553: << " (" << nonterm->mergeParam1 << ": " << notVoid(sym.type) << ") "
554: << " (" << nonterm->mergeParam2 << ": " << notVoid(sym.type) << ") ");
555: emitMLUserCode(out, nonterm->mergeCode);
556: out << "\n";
557: }
558:
559: if (nonterm && nonterm->keepCode) {
560: emitMLFuncDecl(g, out, dcl, "bool",
561: sm_stringc << "keep_" << sym.name
562: << " (" << nonterm->keepParam << ": " << sym.type << ") ");
563: emitMLUserCode(out, nonterm->keepCode);
564: out << "\n";
565: }
566:
567: if (term && term->classifyCode) {
568: emitMLFuncDecl(g, out, dcl, "int",
569: sm_stringc << "classify_" << sym.name
570: << " (" << term->classifyParam << ": " << sym.type << ") ");
571: emitMLUserCode(out, term->classifyCode);
572: out << "\n";
573: }
574: }
575:
576: void emitMLSwitchCode(Grammar const &g, EmitCode &out,
577: char const *signature, char const *switchVar,
578: ObjList<Symbol> const &syms, int whichFunc,
579: char const *templateCode, char const *actUpon)
580: {
581: out << replace(signature, "$acn", g.actionClassName) << " =\n"
582: "begin\n"
583: " match " << switchVar << " with\n"
584: ;
585:
586: FOREACH_OBJLIST(Symbol, syms, symIter) {
587: Symbol const &sym = *(symIter.data());
588:
589: if (whichFunc==0 && sym.dupCode ||
590: whichFunc==1 && sym.delCode ||
591: whichFunc==2 && sym.asNonterminalC().mergeCode ||
592: whichFunc==3 && sym.asNonterminalC().keepCode ||
593: whichFunc==4 && sym.asTerminalC().classifyCode) {
594: out << " | " << sym.getTermOrNontermIndex() << " -> (\n";
595: out << replace(replace(templateCode,
596: "$symName", sym.name),
597: "$symType", notVoid(sym.type));
598: out << " )\n";
599: }
600: }
601:
602: out << " | _ -> (\n";
603: switch (whichFunc) {
604: default:
605: xfailure("bad func code");
606:
607:
608:
609:
610:
611: case 0:
612: out << " sval\n";
613: break;
614:
615: case 1:
616:
617: out << " ()\n";
618: break;
619:
620: case 2:
621: out << " (Printf.printf \"WARNING: no action to merge nonterm %s\\n\"\n"
622: << " nontermNamesArray.(" << switchVar << "));\n"
623: << " (flush stdout);\n"
624: << " left\n"
625: ;
626: break;
627:
628: case 3:
629: out << " true\n";
630: break;
631:
632: case 4:
633: out << " oldTokenType\n";
634: break;
635: }
636:
637: out << " )\n"
638: "end\n"
639: "\n";
640: }
641:
642:
643:
644:
645: template <class EltType>
646: void emitMLTable(EmitCode &out, EltType const *table, int size, int rowLength,
647: char const *tableName)
648: {
649: if (!table || !size) {
650: out << " " << tableName << " = [| |]; (* 0 elements *)\n"
651: << "\n"
652: ;
653: return;
654: }
655:
656: bool printHex = false;
657: #if 0
658: 0==strcmp(typeName, "ErrorBitsEntry") ||
659: (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "ActionEntry")) ||
660: (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "GotoEntry")) ;
661: bool needCast = 0==strcmp(typeName, "StateId");
662: #endif
663:
664: if (size * sizeof(*table) > 50) {
665:
666: if (size % rowLength == 0) {
667: out << " (* rows: " << (size/rowLength) << " cols: " << rowLength << " *)\n";
668: }
669: }
670:
671: int rowNumWidth = sm_stringf("%d", size / rowLength /*round down*/).length();
672:
673: out << " " << tableName << " = [| (* " << size << " elements *)";
674: int row = 0;
675: for (int i=0; i<size; i++) {
676: if (i % rowLength == 0) {
677: out << sm_stringf("\n (*%*d*) ", rowNumWidth, row++);
678: }
679:
680: #if 0
681: if (needCast) {
682: out << "(" << typeName << ")";
683: }
684: #endif
685:
686: if (printHex) {
687: out << sm_stringf("0x%02X", table[i]);
688: }
689: else if (sizeof(table[i]) == 1) {
690:
691:
692:
693: out << (int)(*((unsigned char*)(table+i)));
694: }
695: else {
696:
697:
698: out << table[i];
699: }
700:
701: if (i != size-1) {
702: out << "; ";
703: }
704: }
705: out << "\n"
706: << " |];\n"
707: << "\n"
708: ;
709: }
710:
711:
712:
713: sm_stringBuilder& operator<< (sm_stringBuilder &sb, ParseTables::ProdInfo const &info)
714: {
715: sb << "{" << (int)info.rhsLen << "," << (int)info.lhsIndex << "}";
716: return sb;
717: }
718:
719:
720:
721: template <class EltType>
722: void emitMLTable2(EmitCode &out, EltType const *table, int size, int rowLength,
723: char const *typeName, char const *tableName)
724: {
725: sm_string tempName = sm_stringc << tableName << "_static";
726: emitMLTable(out, table, size, rowLength, typeName, tempName);
727: out << " " << tableName << " = const_cast<" << typeName << "*>("
728: << tempName << ");\n\n";
729: }
730:
731:
732: template <class EltType>
733: void emitMLOffsetTable(EmitCode &out, EltType **table, EltType *base, int size,
734: char const *typeName, char const *tableName, char const *baseName)
735: {
736: if (!table) {
737: out << " " << tableName << " = NULL;\n\n";
738: return;
739: }
740:
741:
742: Array<int> offsets(size);
743: bool allUnassigned = true;
744: for (int i=0; i < size; i++) {
745: if (table[i]) {
746: offsets[i] = table[i] - base;
747: allUnassigned = false;
748: }
749: else {
750: offsets[i] = UNASSIGNED;
751: }
752: }
753:
754: if (allUnassigned) {
755:
756: size = 0;
757: }
758:
759: if (size > 0) {
760: out << " " << tableName << " = new " << typeName << " [" << size << "];\n";
761:
762: emitTable(out, (int*)offsets, size, 16, "int", sm_stringc << tableName << "_offsets");
763:
764:
765: out << " for (int i=0; i < " << size << "; i++) {\n"
766: << " int ofs = " << tableName << "_offsets[i];\n"
767: << " if (ofs >= 0) {\n"
768: << " " << tableName << "[i] = " << baseName << " + ofs;\n"
769: << " }\n"
770: << " else {\n"
771: << " " << tableName << "[i] = NULL;\n"
772: << " }\n"
773: << " }\n\n";
774: }
775: else {
776: out << " // offset table is empty\n"
777: << " " << tableName << " = NULL;\n\n";
778: }
779: }
780:
781:
782:
783: template <class EltType>
784: void printMLTable(EltType const *table, int size, int rowLength,
785: char const *typeName, char const *tableName)
786: {
787:
788:
789: #if 0
790: {
791: EmitCode out("printTable.tmp");
792: emitTable(out, table, size, rowLength, typeName, tableName);
793: }
794:
795: system("cat printTable.tmp; rm printTable.tmp");
796: #endif
797: }
798:
799:
800:
801:
802:
803:
804: void ParseTables::emitMLConstructionCode
805: (EmitCode &out, char const *className, char const *funcName)
806: {
807:
808: xassert(!temp);
809:
810: out << "(* a literal tParseTables;\n"
811: << " * the code is written by ParseTables::emitConstructionCode()\n"
812: << " * in " << __FILE__ << " *)\n"
813: << "let " << className << "ParseTables:tParseTables = {\n";
814: ;
815:
816: #define SET_VAR(var) \
817: out << " " #var " = " << var << ";\n";
818:
819: SET_VAR(numTerms);
820: SET_VAR(numNonterms);
821: SET_VAR(numProds);
822: out << "\n";
823:
824: SET_VAR(numStates);
825: out << "\n";
826:
827: SET_VAR(actionCols);
828: emitMLTable(out, actionTable, actionTableSize(),
829: actionCols, "actionTable");
830:
831: SET_VAR(gotoCols);
832: emitMLTable(out, gotoTable, gotoTableSize(),
833: gotoCols, "gotoTable");
834:
835:
836: {
837: Array<int> rhsLen(numProds);
838: Array<int> lhsIndex(numProds);
839:
840: for (int i=0; i < numProds; i++) {
841: rhsLen[i] = prodInfo[i].rhsLen;
842: lhsIndex[i] = prodInfo[i].lhsIndex;
843: }
844:
845: emitMLTable(out, rhsLen.operator int const *(), numProds,
846: 16 /*columns; arbitrary*/, "prodInfo_rhsLen");
847: emitMLTable(out, lhsIndex.operator int const *(), numProds,
848: 16 /*columns; arbitrary*/, "prodInfo_lhsIndex");
849: }
850:
851: emitMLTable(out, stateSymbol, numStates,
852: 16, "stateSymbol");
853:
854: SET_VAR(ambigTableSize);
855: emitMLTable(out, ambigTable, ambigTableSize,
856: 16, "ambigTable");
857:
858: emitMLTable(out, nontermOrder, nontermOrderSize(),
859: 16, "nontermOrder");
860:
861: SET_VAR(startState);
862:
863:
864: out << " finalProductionIndex = " << finalProductionIndex << "\n";
865:
866: out << "}\n"
867: << "\n"
868: ;
869: }
870:
871:
872:
Start cpp section to elk/elk_glr.cpp[1
/1
]
1: #line 6551 "./lpsrc/elk.pak"
2:
3:
4:
5: /* Implementation Notes
6: *
7: * A design point: [GLR] uses more 'global's than I do. My criteria
8: * here is that something should be global (stored in class GLR) if
9: * it has meaning between processing of tokens. If something is only
10: * used during the processing of a single token, then I make it a
11: * parameter where necessary.
12: *
13: * Update: I've decided to make 'currentToken' and 'parserWorklist'
14: * global because they are needed deep inside of 'glrShiftNonterminal',
15: * though they are not needed by the intervening levels, and their
16: * presence in the argument lists would therefore only clutter them.
17: *
18: * (OLD) It should be clear that many factors contribute to this
19: * implementation being slow, and I'm going to refrain from any
20: * optimization for a bit.
21: *
22: * UPDATE (3/29/02): I'm now trying to optimize it. The starting
23: * implementation is 300x slower than bison. Ideal goal is 3x, but
24: * more realistic is 10x.
25: *
26: * UPDATE (8/24/02): It's very fast now; within 3% of Bison for
27: * deterministic grammars, and 5x when I disable the mini-LR core.
28: *
29: * Description of the various lists in play here:
30: *
31: * topmostParsers
32: * --------------
33: * The active parsers are at the frontier of the parse tree
34: * space. It *never* contains more than one stack node with
35: * a given parse state; I call this the unique-state property
36: * (USP). If we're about to add a stack node with the same
37: * state as an existing node, we merge them (if it's a shift,
38: * we add another leftAdjState; if it's a reduction, we add a
39: * rule node *and* another leftAdjState).
40: *
41: * Before a token is processed, topmostParsers contains those
42: * parsers that successfully shifted the previous token. This
43: * list is then walked to make the initial reduction worklist.
44: *
45: * Before the shifts are processed, the topmostParsers list is
46: * cleared. As each shift is processed, the resulting parser is
47: * added to topmostParsers (modulo USP).
48: *
49: * [GLR] calls this "active-parsers"
50: *
51: *
52: * Discussion of path re-examination, called do-limited-reductions by
53: * [GLR]:
54: *
55: * After thinking about this for some time, I have reached the conclusion
56: * that the only way to handle the problem is to separate the collection
57: * of paths from the iteration over them.
58: *
59: * Here are several alternative schemes, and the reasons they don't
60: * work:
61: *
62: * 1. [GLR]'s approach of limiting re-examination to those involving
63: * the new link
64: *
65: * This fails because it does not prevent re-examined paths
66: * from appearing in the normal iteration also.
67: *
68: * 2. Modify [GLR] so the new link can't be used after the re-examination
69: * is complete
70: *
71: * Then if *another* new link is added, paths involving both new
72: * links wouldn't be processed.
73: *
74: * 3. Further schemes involving controlling which re-examination stage can
75: * use which links
76: *
77: * Difficult to reason about, unclear a correct scheme exists, short
78: * of the full-blown path-listing approach I'm going to take.
79: *
80: * 4. My first "fix" which assumes there is never more than one path to
81: * a given parser
82: *
83: * This is WRONG. There can be more than one path, even as all such
84: * paths are labeled the same (namely, with the RHS symbols). Consider
85: * grammar "E -> x | E + E" parsing "x+x+x": both toplevel parses use
86: * the "E -> E + E" rule, and both arrive at the root parser
87: *
88: * So, the solution I will implement is to collect all paths into a list
89: * before processing any of them. During path re-examination, I also will
90: * collect paths into a list, this time only those that involve the new
91: * link.
92: *
93: * This scheme is clearly correct, since path collection cannot be disrupted
94: * by the process of adding links, and when links are added, exactly the new
95: * paths are collected and processed. It's easy to see that every path is
96: * considered exactly once.
97: *
98: *
99: * MAJOR UPDATE (12/06/02): I've replaced the state worklist (SWL) core
100: * used in all previous GLR implementations with a reduction worklist (RWL)
101: * core. This core is just as fast, but can be implemented to always
102: * avoid the yield-then-merge problem for acyclic grammars.
103: *
104: *
105: * Below, parse-tree building activity is marked "TREEBUILD".
106: */
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178: int parserMerges = 0;
179: int computeDepthIters = 0;
180: int totalExtracts = 0;
181: int multipleDelayedExtracts = 0;
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205: enum {
206:
207: MAX_RHSLEN = 30,
208:
209:
210:
211:
212:
213:
214:
215: TYPICAL_MAX_REDUCTION_PATHS = 5,
216:
217:
218:
219: INITIAL_RHSLEN_SIZE = 10,
220: };
221:
222:
223:
224:
225:
226: sm_string symbolDescription(SymbolId sym, UserActions *user,
227: SemanticValue sval)
228: {
229: if (symIsTerm(sym)) {
230: return user->terminalDescription(symAsTerm(sym), sval);
231: }
232: else {
233: return user->nonterminalDescription(symAsNonterm(sym), sval);
234: }
235: }
236:
237: SemanticValue GLR::duplicateSemanticValue(SymbolId sym, SemanticValue sval)
238: {
239: xassert(sym != 0);
240:
241:
242:
243: if (!sval) return sval;
244:
245: SemanticValue ret;
246: if (symIsTerm(sym)) {
247: ret = userAct->duplicateTerminalValue(symAsTerm(sym), sval);
248: }
249: else {
250: ret = userAct->duplicateNontermValue(symAsNonterm(sym), sval);
251: }
252:
253: TRSACTION(" " << symbolDescription(sym, userAct, ret) <<
254: " is DUP of " <<
255: symbolDescription(sym, userAct, sval));
256:
257: return ret;
258: }
259:
260: void deallocateSemanticValue(SymbolId sym, UserActions *user,
261: SemanticValue sval)
262: {
263: xassert(sym != 0);
264: TRSACTION(" DEL " << symbolDescription(sym, user, sval));
265:
266: if (!sval) return;
267:
268: if (symIsTerm(sym)) {
269: return user->deallocateTerminalValue(symAsTerm(sym), sval);
270: }
271: else {
272: return user->deallocateNontermValue(symAsNonterm(sym), sval);
273: }
274: }
275:
276: void GLR::deallocateSemanticValue(SymbolId sym, SemanticValue sval)
277: {
278: ::deallocateSemanticValue(sym, userAct, sval);
279: }
280:
281:
282:
283: inline SiblingLink::SiblingLink(StackNode *s, SemanticValue sv
284: SOURCELOCARG( SourceLoc L ) )
285: : sib(s), sval(sv)
286: SOURCELOCARG( loc(L) )
287: {
288: YIELD_COUNT( yieldCount = 0; )
289: }
290:
291: SiblingLink::~SiblingLink()
292: {}
293:
294:
295:
296: int StackNode::numStackNodesAllocd=0;
297: int StackNode::maxStackNodesAllocd=0;
298:
299:
300: StackNode::StackNode()
301: : state(STATE_INVALID),
302: leftSiblings(),
303: firstSib(NULL, NULL_SVAL SOURCELOCARG( SL_UNKNOWN ) ),
304: referenceCount(0),
305: determinDepth(0),
306: glr(NULL)
307: {
308:
309: }
310:
311: StackNode::~StackNode()
312: {
313:
314: }
315:
316:
317: inline void StackNode::init(StateId st, GLR *g)
318: {
319: state = st;
320: xassertdb(leftSiblings.isEmpty());
321: xassertdb(hasZeroSiblings());
322: referenceCount = 0;
323: determinDepth = 1;
324: glr = g;
325:
326: #if DO_ACCOUNTING
327: INC_HIGH_WATER(numStackNodesAllocd, maxStackNodesAllocd);
328:
329:
330: #endif
331: }
332:
333: inline void StackNode::decrementAllocCounter()
334: {
335: #if DO_ACCOUNTING
336: numStackNodesAllocd--;
337:
338:
339: #endif
340: }
341:
342: inline void StackNode::deinit()
343: {
344: decrementAllocCounter();
345:
346: if (!unwinding()) {
347: xassert(numStackNodesAllocd >= 0);
348: xassert(referenceCount == 0);
349: }
350:
351: deallocSemanticValues();
352:
353:
354:
355:
356: firstSib.sib = NULL;
357: }
358:
359: inline SymbolId StackNode::getSymbolC() const
360: {
361: xassertdb((unsigned)state < (unsigned)(glr->tables->getNumStates()));
362: return glr->tables->getStateSymbol(state);
363: }
364:
365:
366:
367: void StackNode::deallocSemanticValues()
368: {
369:
370:
371:
372: if (firstSib.sib != NULL) {
373: deallocateSemanticValue(getSymbolC(), glr->userAct, firstSib.sval);
374: }
375:
376: while (leftSiblings.isNotEmpty()) {
377: Owner<SiblingLink> sib(leftSiblings.removeAt(0));
378: deallocateSemanticValue(getSymbolC(), glr->userAct, sib->sval);
379: }
380: }
381:
382:
383:
384: inline void StackNode
385: ::addFirstSiblingLink_noRefCt(StackNode *leftSib, SemanticValue sval
386: SOURCELOCARG( SourceLoc loc ) )
387: {
388: xassertdb(hasZeroSiblings());
389:
390:
391: determinDepth = leftSib->determinDepth + 1;
392:
393:
394:
395:
396: xassertdb(firstSib.sib == NULL);
397: firstSib.sib.setWithoutUpdateRefct(leftSib);
398:
399: firstSib.sval = sval;
400:
401:
402: SOURCELOC( firstSib.loc = loc; )
403: YIELD_COUNT( firstSib.yieldCount = 0; )
404: }
405:
406:
407:
408: inline SiblingLink *StackNode::
409: addSiblingLink(StackNode *leftSib, SemanticValue sval
410: SOURCELOCARG( SourceLoc loc ) )
411: {
412: if (hasZeroSiblings()) {
413: addFirstSiblingLink_noRefCt(leftSib, sval SOURCELOCARG( loc ) );
414:
415:
416: leftSib->incRefCt();
417:
418:
419:
420:
421: return &firstSib;
422: }
423: else {
424:
425:
426:
427: return addAdditionalSiblingLink(leftSib, sval SOURCELOCARG( loc ) );
428: }
429: }
430:
431:
432:
433:
434:
435: SiblingLink *StackNode::
436: addAdditionalSiblingLink(StackNode *leftSib, SemanticValue sval
437: SOURCELOCARG( SourceLoc loc ) )
438: {
439:
440:
441:
442: determinDepth = 0;
443:
444: SiblingLink *link = new SiblingLink(leftSib, sval SOURCELOCARG( loc ) );
445: leftSiblings.prepend(link);
446: return link;
447: }
448:
449:
450:
451:
452:
453: inline void StackNode::decRefCt()
454: {
455: xassert(referenceCount > 0);
456:
457:
458:
459: if (--referenceCount == 0) {
460: glr->stackNodePool->dealloc(this);
461: }
462: }
463:
464:
465: SiblingLink const *StackNode::getUniqueLinkC() const
466: {
467: xassert(hasOneSibling());
468: return &firstSib;
469: }
470:
471:
472: SiblingLink *StackNode::getLinkTo(StackNode *another)
473: {
474:
475: if (firstSib.sib == another) {
476: return &firstSib;
477: }
478:
479:
480: MUTATE_EACH_OBJLIST(SiblingLink, leftSiblings, sibIter) {
481: SiblingLink *candidate = sibIter.data();
482: if (candidate->sib == another) {
483: return candidate;
484: }
485: }
486: return NULL;
487: }
488:
489:
490: STATICDEF void StackNode::printAllocStats()
491: {
492: std::cout << "stack nodes: " << numStackNodesAllocd
493: << ", max stack nodes: " << maxStackNodesAllocd
494: << std::endl;
495: }
496:
497:
498: int StackNode::computeDeterminDepth() const
499: {
500: if (hasZeroSiblings()) {
501: return 1;
502: }
503: else if (hasOneSibling()) {
504:
505: return firstSib.sib->determinDepth + 1;
506: }
507: else {
508: xassert(hasMultipleSiblings());
509: return 0;
510: }
511: }
512:
513:
514:
515:
516: inline void StackNode::checkLocalInvariants() const
517: {
518: xassertdb(computeDeterminDepth() == determinDepth);
519: }
520:
521:
522:
523: void decParserList(ArrayStack<StackNode*> &list)
524: {
525: for (int i=0; i < list.length(); i++) {
526: list[i]->decRefCt();
527: }
528: }
529:
530: void incParserList(ArrayStack<StackNode*> &list)
531: {
532: for (int i=0; i < list.length(); i++) {
533: list[i]->incRefCt();
534: }
535: }
536:
537:
538: bool parserListContains(ArrayStack<StackNode*> &list, StackNode *node)
539: {
540: for (int i=0; i < list.length(); i++) {
541: if (list[i] == node) {
542: return true;
543: }
544: }
545: return false;
546: }
547:
548:
549:
550: GLR::GLR(UserActions *user, ParseTables *t)
551: : userAct(user),
552: tables(t),
553: lexerPtr(NULL),
554: topmostParsers(),
555: parserIndex(NULL),
556: toPass(MAX_RHSLEN),
557: prevTopmost(),
558: stackNodePool(NULL),
559: pathQueue(t),
560: noisyFailedParse(true),
561: trParse(tracingSys("parse")),
562: trsParse(trace("parse") << "parse tracing enabled\n"),
563: detShift(0),
564: detReduce(0),
565: nondetShift(0),
566: nondetReduce(0),
567: yieldThenMergeCt(0)
568:
569: {
570:
571:
572: if (tracingSys("glrConfig")) {
573: printConfig();
574: }
575:
576:
577:
578: #if USE_MINI_LR
579:
580:
581:
582:
583:
584: for (int i=0; i < tables->getNumProds(); i++) {
585: if (tables->getProdInfo(i).rhsLen > MAX_RHSLEN) {
586: printf("Production %d contains %d right-hand side symbols,\n"
587: "but the GLR core has been compiled with a limit of %d.\n"
588: "Please adjust MAX_RHSLEN and recompile the GLR core.\n",
589: i, tables->getProdInfo(i).rhsLen, MAX_RHSLEN);
590: xfailure("cannot continue");
591: }
592: }
593: #endif
594:
595:
596:
597: configCheck("EEF compression", ENABLE_EEF_COMPRESSION, tables->eef_enabled());
598: configCheck("GCS compression", ENABLE_GCS_COMPRESSION, tables->gcs_enabled());
599: configCheck("GCS column compression", ENABLE_GCS_COLUMN_COMPRESSION, tables->gcsc_enabled());
600: configCheck("CRS compression", ENABLE_CRS_COMPRESSION, tables->crs_enabled());
601: }
602:
603: void GLR::configCheck(char const *option, bool core, bool table)
604: {
605: if (core != table) {
606: xfailure(sm_stringc
607: << "The GLR parser core was compiled with " << option
608: << (core? " enabled" : " disabled")
609: << ", but the parse tables generated by Elkhound have it "
610: << (table? "enabled" : "disabled"));
611: }
612: }
613:
614: GLR::~GLR()
615: {
616: if (parserIndex) {
617: delete[] parserIndex;
618: }
619:
620:
621:
622: }
623:
624:
625: void GLR::clearAllStackNodes()
626: {
627:
628:
629:
630:
631: }
632:
633:
634:
635:
636: void GLR::printConfig() const
637: {
638: printf("GLR configuration follows. Settings marked with an\n"
639: "asterisk (*) are the higher-performance settings.\n");
640:
641: printf(" source location information: \t\t\t%s\n",
642: SOURCELOC(1+)0? "enabled" : "disabled *");
643:
644: printf(" stack node columns: \t\t\t\t%s\n",
645: NODE_COLUMN(1+)0? "enabled" : "disabled *");
646:
647: printf(" semantic value yield count: \t\t\t%s\n",
648: YIELD_COUNT(1+)0? "enabled" : "disabled *");
649:
650: printf(" ACTION_TRACE (for debugging): \t\t%s\n",
651: ACTION(1+)0? "enabled" : "disabled *");
652:
653: printf(" NDEBUG: \t\t\t\t\t%s\n",
654: IF_NDEBUG(1+)0? "set *" : "not set");
655:
656: printf(" xassert-style assertions: \t\t\t%s\n",
657: #ifdef NDEBUG_NO_ASSERTIONS
658: "disabled *"
659: #else
660: "enabled"
661: #endif
662: );
663:
664: printf(" user actions: \t\t\t\t%s\n",
665: USE_ACTIONS? "respected" : "ignored *");
666:
667: printf(" token reclassification: \t\t\t%s\n",
668: USE_RECLASSIFY? "enabled" : "disabled *");
669:
670: printf(" reduction cancellation: \t\t\t%s\n",
671: USE_KEEP? "enabled" : "disabled *");
672:
673: printf(" mini-LR parser core: \t\t\t\t%s\n",
674: USE_MINI_LR? "enabled *" : "disabled");
675:
676: printf(" allocated-node and parse action accounting: \t%s\n",
677: ACCOUNTING(1+)0? "enabled" : "disabled *");
678:
679: printf(" unrolled reduce loop: \t\t\t%s\n",
680: USE_UNROLLED_REDUCE? "enabled *" : "disabled");
681:
682: printf(" parser index: \t\t\t\t%s\n",
683: #ifdef USE_PARSER_INDEX
684: "enabled"
685: #else
686: "disabled *"
687: #endif
688: );
689:
690:
691:
692:
693:
694: printf(" C++ compiler's optimizer: \t\t\t%s\n",
695: #ifdef __OPTIMIZE__
696: "enabled *"
697: #else
698: "disabled"
699: #endif
700: );
701:
702:
703: printf(" Error Entry Factoring (EEF): \t\t\t%s\n",
704: ENABLE_EEF_COMPRESSION? "enabled" : "disabled *");
705: printf(" Graph Coloring Scheme (GCS): \t\t\t%s\n",
706: ENABLE_GCS_COMPRESSION? "enabled" : "disabled *");
707: printf(" GCS for columns (GCSC): \t\t\t%s\n",
708: ENABLE_GCS_COLUMN_COMPRESSION? "enabled" : "disabled *");
709: printf(" Code Reduction Scheme (CRS): \t\t\t%s\n",
710: ENABLE_CRS_COMPRESSION? "enabled" : "disabled *");
711: }
712:
713:
714:
715:
716: SemanticValue GLR::grabTopSval(StackNode *node)
717: {
718: SiblingLink *sib = node->getUniqueLink();
719: SemanticValue ret = sib->sval;
720: sib->sval = duplicateSemanticValue(node->getSymbolC(), sib->sval);
721:
722: TRSACTION("dup'd " << ret << " for top sval, yielded " << sib->sval);
723:
724: return ret;
725: }
726:
727:
728:
729:
730:
731:
732:
733:
734:
735: dest = (pool).alloc(); \
736: dest->init(state, glr); \
737: NODE_COLUMN( dest->column = (glr)->globalNodeColumn; )
738:
739:
740: inline StackNode *GLR::makeStackNode(StateId state)
741: {
742: StackNode *sn;
743: MAKE_STACK_NODE(sn, state, this, *stackNodePool);
744: return sn;
745: }
746:
747:
748:
749:
750: inline void GLR::addTopmostParser(StackNode *parser)
751: {
752: parser->checkLocalInvariants();
753:
754: topmostParsers.push(parser);
755: parser->incRefCt();
756:
757:
758:
759:
760: #ifdef USE_PARSER_INDEX
761:
762:
763:
764:
765: int index = topmostParsers.length()-1;
766: xassert(index < INDEX_NO_PARSER);
767:
768: xassert(parserIndex[parser->state] == INDEX_NO_PARSER);
769: parserIndex[parser->state] = index;
770: #endif
771: }
772:
773:
774: void GLR::buildParserIndex()
775: {
776: if (parserIndex) {
777: delete[] parserIndex;
778: }
779: parserIndex = new ParserIndexEntry[tables->getNumStates()];
780: {
781: for (int i=0; i < tables->getNumStates(); i++) {
782: parserIndex[i] = INDEX_NO_PARSER;
783: }
784: }
785: }
786:
787:
788: bool GLR::glrParse(LexerInterface &lexer, SemanticValue &treeTop)
789: {
790: #if !ACTION_TRACE
791:
792:
793: trace("action") << "warning: ACTION_TRACE is currently disabled by a\n";
794: trace("action") << "compile-time switch, so you won't see parser actions.\n";
795: #endif
796:
797: #ifdef NDEBUG
798: trace("parse") << "warning: Because NDEBUG was specified when elkhound was\n";
799: trace("parse") << " compiled, the 'parse' tracing flag does nothing.\n";
800: #endif
801:
802:
803: traceProgress(2) << "parsing...\n";
804: clearAllStackNodes();
805:
806:
807: lexerPtr = &lexer;
808:
809:
810:
811:
812: buildParserIndex();
813:
814:
815: bool ret = innerGlrParse(*this, lexer, treeTop);
816: stackNodePool = NULL;
817: if (!ret) {
818: lexerPtr = NULL;
819: return ret;
820: }
821:
822:
823:
824: if (getenv("ELKHOUND_DEBUG")) {
825: #if DO_ACCOUNTING
826: StackNode::printAllocStats();
827: std::cout << "detShift=" << detShift
828: << ", detReduce=" << detReduce
829: << ", nondetShift=" << nondetShift
830: << ", nondetReduce=" << nondetReduce
831: << std::endl;
832:
833: PVAL(computeDepthIters);
834:
835: PVAL(yieldThenMergeCt);
836: PVAL(totalExtracts);
837: PVAL(multipleDelayedExtracts);
838: #endif
839: }
840:
841: lexerPtr = NULL;
842: return ret;
843: }
844:
845:
846:
847:
848:
849:
850:
851:
852:
853:
854:
855:
856:
857: STATICDEF bool GLR
858: ::innerGlrParse(GLR &glr, LexerInterface &lexer, SemanticValue &treeTop)
859: {
860: #ifndef NDEBUG
861: bool doDumpGSS = tracingSys("dumpGSS");
862: #endif
863:
864:
865:
866: UserActions *userAct = glr.userAct;
867: ParseTables *tables = glr.tables;
868: #if USE_MINI_LR
869: ArrayStack<StackNode*> &topmostParsers = glr.topmostParsers;
870: #endif
871:
872:
873: LexerInterface::NextTokenFunc nextToken = lexer.getTokenFunc();
874:
875: #if USE_RECLASSIFY
876:
877: UserActions::ReclassifyFunc reclassifyToken =
878: userAct->getReclassifier();
879: #endif
880:
881:
882:
883:
884:
885: ObjectPool<StackNode> stackNodePool(30);
886: glr.stackNodePool = &stackNodePool;
887:
888:
889:
890: NODE_COLUMN( glr.globalNodeColumn = 0; )
891: {
892: StackNode *first = glr.makeStackNode(tables->startState);
893: glr.addTopmostParser(first);
894: }
895:
896: #if USE_MINI_LR
897:
898: UserActions::ReductionActionFunc reductionAction =
899: userAct->getReductionAction();
900:
901:
902:
903:
904:
905:
906:
907:
908:
909: SemanticValue toPass[MAX_RHSLEN];
910: #endif
911:
912:
913: ACCOUNTING( int localDetShift=0; int localDetReduce=0; )
914:
915:
916: #ifndef NDEBUG
917: int tokenNumber = 0;
918:
919:
920: bool trParse = glr.trParse;
921: std::ostream &trsParse = glr.trsParse;
922: #endif
923: for (;;) {
924:
925: TRSPARSE(
926: "------- "
927: << "processing token " << lexer.tokenDesc()
928: << ", " << glr.topmostParsers.length() << " active parsers"
929: << " -------"
930: )
931: TRSPARSE("Stack:" << glr.stackSummary())
932:
933: #ifndef NDEBUG
934: if (doDumpGSS) {
935: glr.dumpGSS(tokenNumber);
936: }
937: #endif
938:
939:
940: #if USE_RECLASSIFY
941: lexer.type = reclassifyToken(userAct, lexer.type, lexer.sval);
942: #else
943:
944:
945:
946: #endif
947:
948:
949: TRSACTION("lookahead token: " << lexer.tokenDesc() <<
950: " aka " << userAct->terminalDescription(lexer.type, lexer.sval));
951:
952: #if USE_MINI_LR
953:
954:
955:
956:
957: tryDeterministic:
958:
959:
960:
961:
962:
963:
964:
965:
966:
967:
968:
969:
970:
971: if (topmostParsers.length() == 1) {
972: StackNode *parser = topmostParsers[0];
973: xassertdb(parser->referenceCount==1);
974:
975: #if ENABLE_EEF_COMPRESSION
976: if (tables->actionEntryIsError(parser->state, lexer.type)) {
977: return false;
978: }
979: #endif
980:
981: ActionEntry action =
982: tables->getActionEntry_noError(parser->state, lexer.type);
983:
984:
985:
986:
987:
988: if (tables->isReduceAction(action)) {
989: ACCOUNTING( localDetReduce++; )
990: int prodIndex = tables->decodeReduce(action, parser->state);
991: ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(prodIndex);
992: int rhsLen = prodInfo.rhsLen;
993: if (rhsLen <= parser->determinDepth) {
994:
995:
996:
997:
998: TRSPARSE_DECL( int startStateId = parser->state; )
999:
1000:
1001:
1002: ACTION(
1003: sm_string rhsDescription("");
1004: if (rhsLen == 0) {
1005:
1006: rhsDescription = " empty";
1007: }
1008: )
1009:
1010:
1011:
1012:
1013: SOURCELOC( SourceLoc leftEdge = lexer.loc; )
1014:
1015:
1016: xassertdb(rhsLen <= MAX_RHSLEN);
1017:
1018:
1019:
1020:
1021:
1022:
1023: StackNode *prev = stackNodePool.private_getHead();
1024:
1025: #if USE_UNROLLED_REDUCE
1026:
1027:
1028:
1029:
1030:
1031:
1032:
1033:
1034:
1035:
1036: switch ((unsigned)rhsLen) {
1037: case 1: {
1038: SiblingLink &sib = parser->firstSib;
1039: toPass[0] = sib.sval;
1040: ACTION( rhsDescription =
1041: sm_stringc << " "
1042: << symbolDescription(parser->getSymbolC(), userAct, sib.sval)
1043: << rhsDescription; )
1044: SOURCELOC(
1045: if (sib.validLoc()) {
1046: leftEdge = sib.loc;
1047: }
1048: )
1049: parser->nextInFreeList = prev;
1050: prev = parser;
1051: parser = sib.sib;
1052: xassertdb(parser->referenceCount==1);
1053: xassertdb(prev->referenceCount==1);
1054: prev->decrementAllocCounter();
1055: prev->firstSib.sib.setWithoutUpdateRefct(NULL);
1056: xassertdb(parser->referenceCount==1);
1057:
1058: }
1059:
1060: case 0:
1061:
1062: goto afterGeneralLoop;
1063: }
1064: #endif
1065:
1066:
1067:
1068:
1069:
1070:
1071:
1072: for (int i = rhsLen-1; i >= 0; i--) {
1073:
1074:
1075: SiblingLink &sib = parser->firstSib;
1076:
1077:
1078:
1079:
1080:
1081:
1082: toPass[i] = sib.sval;
1083:
1084:
1085: ACTION( rhsDescription =
1086: sm_stringc << " "
1087: << symbolDescription(parser->getSymbolC(), userAct, sib.sval)
1088: << rhsDescription; )
1089:
1090:
1091:
1092:
1093:
1094:
1095:
1096:
1097: SOURCELOC(
1098: if (sib.validLoc()) {
1099: leftEdge = sib.loc;
1100: }
1101: )
1102:
1103:
1104: parser->nextInFreeList = prev;
1105: prev = parser;
1106: parser = sib.sib;
1107:
1108:
1109:
1110:
1111:
1112:
1113: xassertdb(parser->referenceCount==1);
1114:
1115: xassertdb(prev->referenceCount==1);
1116:
1117: {
1118:
1119:
1120:
1121:
1122:
1123:
1124: prev->decrementAllocCounter();
1125:
1126:
1127:
1128:
1129:
1130:
1131:
1132:
1133:
1134:
1135:
1136: prev->firstSib.sib.setWithoutUpdateRefct(NULL);
1137:
1138:
1139:
1140:
1141:
1142:
1143:
1144:
1145:
1146:
1147:
1148: }
1149:
1150: xassertdb(parser->referenceCount==1);
1151: }
1152:
1153: #if USE_UNROLLED_REDUCE
1154: afterGeneralLoop:
1155: #endif
1156:
1157:
1158: stackNodePool.private_setHead(prev);
1159:
1160:
1161: SemanticValue sval =
1162: #if USE_ACTIONS
1163: reductionAction(userAct, prodIndex, toPass /*.getArray()*/
1164: SOURCELOCARG( leftEdge ) );
1165: #else
1166: NULL;
1167: #endif
1168:
1169:
1170:
1171:
1172:
1173:
1174:
1175: StateId newState = tables->decodeGoto(
1176: tables->getGotoEntry(parser->state, prodInfo.lhsIndex),
1177: prodInfo.lhsIndex);
1178:
1179:
1180: TRSPARSE("state " << startStateId <<
1181: ", (unambig) reduce by " << prodIndex <<
1182: " (len=" << rhsLen <<
1183: "), back to " << parser->state <<
1184: " then out to " << newState);
1185:
1186:
1187: xassertdb(parser->referenceCount==1);
1188:
1189:
1190: StackNode *newNode;
1191: MAKE_STACK_NODE(newNode, newState, &glr, stackNodePool)
1192:
1193: newNode->addFirstSiblingLink_noRefCt(
1194: parser, sval SOURCELOCARG( leftEdge ) );
1195:
1196:
1197:
1198:
1199:
1200: {
1201:
1202: }
1203: xassertdb(parser->referenceCount==1);
1204:
1205:
1206: topmostParsers[0] = newNode;
1207: newNode->incRefCt();
1208: xassertdb(newNode->referenceCount == 1);
1209:
1210:
1211: TRSACTION(" " <<
1212: symbolDescription(newNode->getSymbolC(), userAct, sval) <<
1213: " ->" << rhsDescription);
1214:
1215: #if USE_KEEP
1216:
1217: if (!userAct->keepNontermValue(prodInfo.lhsIndex, sval)) {
1218: ACTION( sm_string lhsDesc =
1219: userAct->nonterminalDescription(prodInfo.lhsIndex, sval); )
1220: TRSACTION(" CANCELLED " << lhsDesc);
1221: glr.printParseErrorMessage(newNode->state);
1222: ACCOUNTING(
1223: glr.detShift += localDetShift;
1224: glr.detReduce += localDetReduce;
1225: )
1226:
1227:
1228:
1229: return false;
1230: }
1231: #endif
1232:
1233:
1234:
1235:
1236:
1237: goto tryDeterministic;
1238: }
1239: }
1240:
1241: else if (tables->isShiftAction(action)) {
1242: ACCOUNTING( localDetShift++; )
1243:
1244:
1245: StateId newState = tables->decodeShift(action, lexer.type);
1246:
1247: TRSPARSE("state " << parser->state <<
1248: ", (unambig) shift token " << lexer.tokenDesc() <<
1249: ", to state " << newState);
1250:
1251: NODE_COLUMN( glr.globalNodeColumn++; )
1252:
1253: StackNode *rightSibling;
1254: MAKE_STACK_NODE(rightSibling, newState, &glr, stackNodePool);
1255:
1256: rightSibling->addFirstSiblingLink_noRefCt(
1257: parser, lexer.sval SOURCELOCARG( lexer.loc ) );
1258:
1259:
1260:
1261: topmostParsers[0] = rightSibling;
1262:
1263:
1264: {
1265:
1266: }
1267: xassertdb(parser->referenceCount==1);
1268:
1269: xassertdb(rightSibling->referenceCount==0);
1270:
1271: {
1272: rightSibling->referenceCount = 1;
1273: }
1274: xassertdb(rightSibling->referenceCount==1);
1275:
1276:
1277: goto getNextToken;
1278: }
1279:
1280: else {
1281:
1282: }
1283: }
1284:
1285: #endif
1286:
1287:
1288:
1289: if (!glr.nondeterministicParseToken()) {
1290: return false;
1291: }
1292:
1293: #if USE_MINI_LR
1294: getNextToken:
1295: #endif
1296:
1297: if (lexer.type == 0) {
1298: break;
1299: }
1300:
1301:
1302: nextToken(&lexer);
1303: #ifndef NDEBUG
1304: tokenNumber++;
1305: #endif
1306: }
1307:
1308:
1309: ACCOUNTING(
1310: glr.detShift += localDetShift;
1311: glr.detReduce += localDetReduce;
1312: )
1313:
1314:
1315:
1316: return glr.cleanupAfterParse(treeTop);
1317: }
1318:
1319:
1320:
1321:
1322:
1323: sm_string stackTraceString(StackNode *parser)
1324: {
1325:
1326: return sm_string("need to think about this some more..");
1327: }
1328:
1329:
1330:
1331:
1332:
1333: bool GLR::nondeterministicParseToken()
1334: {
1335:
1336:
1337:
1338:
1339:
1340:
1341: StateId lastToDie = STATE_INVALID;
1342:
1343:
1344:
1345: int i;
1346: for (i=0; i < topmostParsers.length(); i++) {
1347: StackNode *parser = topmostParsers[i];
1348:
1349: ActionEntry action =
1350: tables->getActionEntry(parser->state, lexerPtr->type);
1351: int actions = rwlEnqueueReductions(parser, action, NULL /*sibLink*/);
1352:
1353: if (actions == 0) {
1354: TRSPARSE("parser in state " << parser->state << " died");
1355: lastToDie = parser->state;
1356: }
1357: }
1358:
1359:
1360:
1361: rwlProcessWorklist();
1362:
1363:
1364: rwlShiftTerminals();
1365:
1366:
1367:
1368: if (topmostParsers.isEmpty()) {
1369: printParseErrorMessage(lastToDie);
1370: return false;
1371: }
1372: else {
1373: return true;
1374: }
1375: }
1376:
1377:
1378:
1379: void GLR::printParseErrorMessage(StateId lastToDie)
1380: {
1381: if (!noisyFailedParse) {
1382: return;
1383: }
1384:
1385:
1386:
1387:
1388:
1389: if (lastToDie != STATE_INVALID) {
1390: std::cout << "In state " << lastToDie << ", I expected one of these tokens:\n";
1391: std::cout << " ";
1392: for (int i=0; i < tables->getNumTerms(); i++) {
1393: ActionEntry act = tables->getActionEntry(lastToDie, i);
1394: if (!tables->isErrorAction(act)) {
1395:
1396: std::cout << lexerPtr->tokenKindDesc(i) << ", ";
1397: }
1398: }
1399: std::cout << "\n";
1400: }
1401: else {
1402:
1403:
1404:
1405:
1406: std::cout << "(expected-token info not available due to nondeterministic mode)\n";
1407: }
1408:
1409: std::cout << toString(lexerPtr->loc)
1410: << ": Parse error (state " << lastToDie << ") at "
1411: << lexerPtr->tokenDesc()
1412: << std::endl;
1413:
1414:
1415:
1416:
1417: #if 0
1418: if (lastToDie == STATE_INVALID) {
1419:
1420: std::cout << "what the? lastToDie is STATE_INVALID??\n";
1421: }
1422: else {
1423:
1424: std::cout << "last parser (state " << lastToDie << ") to die had:\n"
1425: << " sample input: "
1426: << sampleInput(getItemSet(lastToDie)) << "\n"
1427: << " left context: "
1428: << leftContextString(getItemSet(lastToDie)) << "\n";
1429: }
1430: #endif
1431: }
1432:
1433:
1434: SemanticValue GLR::doReductionAction(
1435: int productionId, SemanticValue const *svals
1436: SOURCELOCARG( SourceLoc loc ) )
1437: {
1438:
1439:
1440: return (userAct->getReductionAction())(userAct, productionId, svals SOURCELOCARG(loc));
1441: }
1442:
1443:
1444:
1445: bool GLR::cleanupAfterParse(SemanticValue &treeTop)
1446: {
1447: traceProgress() << "done parsing\n";
1448: trsParse << "Parse succeeded!\n";
1449:
1450:
1451:
1452: if (topmostParsers.length() != 1) {
1453: std::cout << "parsing finished with more than one active parser!\n";
1454: return false;
1455: }
1456: StackNode *last = topmostParsers.top();
1457:
1458:
1459:
1460:
1461: SemanticValue arr[2];
1462: StackNode *nextToLast = last->getUniqueLink()->sib;
1463: arr[0] = grabTopSval(nextToLast);
1464: arr[1] = grabTopSval(last);
1465:
1466:
1467: TRSACTION("handing toplevel sval " << arr[0] <<
1468: " and " << arr[1] <<
1469: " to top start's reducer");
1470: treeTop = doReductionAction(
1471:
1472: tables->finalProductionIndex,
1473: arr
1474: SOURCELOCARG( last->getUniqueLinkC()->loc ) );
1475:
1476:
1477:
1478:
1479:
1480:
1481:
1482:
1483: decParserList(topmostParsers);
1484:
1485: return true;
1486: }
1487:
1488:
1489:
1490:
1491:
1492: void GLR::pullFromTopmostParsers(StackNode *parser)
1493: {
1494: int last = topmostParsers.length()-1;
1495: for (int i=0; i <= last; i++) {
1496: if (topmostParsers[i] == parser) {
1497:
1498:
1499: if (i < last) {
1500: topmostParsers[i] = topmostParsers[last];
1501:
1502: }
1503: topmostParsers.pop();
1504: parser->decRefCt();
1505: break;
1506: }
1507: }
1508: }
1509:
1510:
1511:
1512:
1513:
1514:
1515:
1516:
1517: bool GLR::canMakeProgress(StackNode *parser)
1518: {
1519: ActionEntry entry =
1520: tables->getActionEntry(parser->state, lexerPtr->type);
1521:
1522: return tables->isShiftAction(entry) ||
1523: tables->isReduceAction(entry) ||
1524: !tables->isErrorAction(entry);
1525: }
1526:
1527:
1528:
1529:
1530: StackNode *GLR::findTopmostParser(StateId state)
1531: {
1532: #ifdef USE_PARSER_INDEX
1533: int index = parserIndex[state];
1534: if (index != INDEX_NO_PARSER) {
1535: return topmostParsers[index];
1536: }
1537: else {
1538: return NULL;
1539: }
1540: #else
1541: for (int i=0; i < topmostParsers.length(); i++) {
1542: StackNode *node = topmostParsers[i];
1543: if (node->state == state) {
1544: return node;
1545: }
1546: }
1547: return NULL;
1548: #endif
1549: }
1550:
1551:
1552:
1553:
1554:
1555: void GLR::dumpGSS(int tokenNumber) const
1556: {
1557: FILE *dest = fopen(sm_stringc << "gss." << tokenNumber << ".g", "w");
1558:
1559:
1560:
1561: SObjList<StackNode> printed;
1562:
1563:
1564:
1565:
1566: SObjList<StackNode> queue;
1567: for (int i=0; i < topmostParsers.length(); i++) {
1568: queue.append(topmostParsers[i]);
1569: }
1570:
1571:
1572: while (queue.isNotEmpty()) {
1573: StackNode *node = queue.removeFirst();
1574: if (printed.contains(node)) {
1575: continue;
1576: }
1577: printed.append(node);
1578:
1579:
1580:
1581:
1582:
1583: if (node->firstSib.sib != NULL) {
1584: dumpGSSEdge(dest, node, node->firstSib.sib);
1585: queue.append(node->firstSib.sib);
1586:
1587: FOREACH_OBJLIST(SiblingLink, node->leftSiblings, iter) {
1588: dumpGSSEdge(dest, node, iter.data()->sib);
1589: queue.append(const_cast<StackNode*>( iter.data()->sib.getC() ));
1590: }
1591: }
1592: }
1593:
1594: fclose(dest);
1595: }
1596:
1597:
1598: void GLR::dumpGSSEdge(FILE *dest, StackNode const *src,
1599: StackNode const *target) const
1600: {
1601: fprintf(dest, "e %d_%p_%d %d_%p_%d\n",
1602: 0 NODE_COLUMN( + src->column ), src, src->state,
1603: 0 NODE_COLUMN( + target->column ), target, target->state);
1604: }
1605:
1606:
1607:
1608: sm_string GLR::stackSummary() const
1609: {
1610: sm_stringBuilder sb;
1611:
1612:
1613:
1614: SObjList<StackNode const> printed;
1615:
1616: for (int i=0; i < topmostParsers.length(); i++) {
1617: sb << " (" << i << ": ";
1618: innerStackSummary(sb, printed, topmostParsers[i]);
1619: sb << ")";
1620: }
1621:
1622: return sb;
1623: }
1624:
1625: void GLR::nodeSummary(sm_stringBuilder &sb, StackNode const *node) const
1626: {
1627: sb << node->state << "[" << node->referenceCount << "]";
1628: }
1629:
1630: void GLR::innerStackSummary(sm_stringBuilder &sb, SObjList<StackNode const> &printed,
1631: StackNode const *node) const
1632: {
1633: if (printed.contains(node)) {
1634: sb << "(rep:";
1635: nodeSummary(sb, node);
1636: sb << ")";
1637: return;
1638: }
1639:
1640: nodeSummary(sb, node);
1641: printed.append(node);
1642:
1643: if (!node->firstSib.sib) {
1644: return;
1645: }
1646:
1647: sb << "-";
1648:
1649: if (node->leftSiblings.isEmpty()) {
1650:
1651: innerStackSummary(sb, printed, node->firstSib.sib);
1652: }
1653: else {
1654:
1655: sb << "(";
1656: innerStackSummary(sb, printed, node->firstSib.sib);
1657:
1658: FOREACH_OBJLIST(SiblingLink, node->leftSiblings, iter) {
1659: sb << "|";
1660: innerStackSummary(sb, printed, iter.data()->sib);
1661: }
1662: sb << ")";
1663: }
1664: }
1665:
1666:
1667:
1668: SemanticValue GLR::getParseResult()
1669: {
1670:
1671:
1672:
1673: SemanticValue sv =
1674: topmostParsers.first()->
1675: leftSiblings.first()->sib->
1676: leftSiblings.first()->
1677: sval;
1678:
1679: return sv;
1680: }
1681:
1682:
1683:
1684:
1685:
1686:
1687:
1688:
1689:
1690: ReductionPathQueue::Path::Path()
1691: : startStateId(STATE_INVALID),
1692: prodIndex(-1),
1693: startColumn(-1),
1694: leftEdgeNode(NULL),
1695: sibLinks(INITIAL_RHSLEN_SIZE),
1696: symbols(INITIAL_RHSLEN_SIZE)
1697: {
1698: next = NULL;
1699: }
1700:
1701: ReductionPathQueue::Path::~Path()
1702: {}
1703:
1704:
1705: void ReductionPathQueue::Path::init(StateId ssi, int pi, int rhsLen)
1706: {
1707: startStateId = ssi;
1708: prodIndex = pi;
1709:
1710: sibLinks.ensureIndexDoubler(rhsLen);
1711: symbols.ensureIndexDoubler(rhsLen);
1712: }
1713:
1714:
1715: ReductionPathQueue::ReductionPathQueue(ParseTables *t)
1716: : top(NULL),
1717: pathPool(30),
1718: tables(t)
1719: {}
1720:
1721: ReductionPathQueue::~ReductionPathQueue()
1722: {
1723:
1724:
1725:
1726: }
1727:
1728:
1729: ReductionPathQueue::Path *ReductionPathQueue::newPath(
1730: StateId startStateId, int prodIndex, int rhsLen)
1731: {
1732: Path *p = pathPool.alloc();
1733: p->init(startStateId, prodIndex, rhsLen);
1734: return p;
1735: }
1736:
1737:
1738: void ReductionPathQueue::insertPathCopy(Path const *src, StackNode *leftEdge)
1739: {
1740: ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(src->prodIndex);
1741:
1742:
1743: Path *p = pathPool.alloc();
1744: p->init(src->startStateId, src->prodIndex, prodInfo.rhsLen);
1745:
1746:
1747: p->leftEdgeNode = leftEdge;
1748: p->startColumn = leftEdge->column;
1749:
1750:
1751: for (int i = prodInfo.rhsLen-1; i>=0; i--) {
1752: p->sibLinks[i] = src->sibLinks[i];
1753: p->symbols[i] = src->symbols[i];
1754: }
1755:
1756:
1757: if (!top || goesBefore(p, top)) {
1758:
1759: p->next = top;
1760: top = p;
1761: }
1762: else {
1763:
1764: Path *prev = top;
1765: while (prev->next && !goesBefore(p, prev->next)) {
1766: prev = prev->next;
1767: }
1768:
1769:
1770: p->next = prev->next;
1771: prev->next = p;
1772: }
1773: }
1774:
1775: bool ReductionPathQueue::goesBefore(Path const *p1, Path const *p2) const
1776: {
1777: if (p1->startColumn > p2->startColumn) {
1778:
1779: return true;
1780: }
1781: else if (p2->startColumn > p1->startColumn) {
1782:
1783: return false;
1784: }
1785: else {
1786:
1787:
1788: NtIndex p1NtIndex = tables->getProdInfo(p1->prodIndex).lhsIndex;
1789: NtIndex p2NtIndex = tables->getProdInfo(p2->prodIndex).lhsIndex;
1790:
1791:
1792: int ord1 = tables->getNontermOrdinal(p1NtIndex);
1793: int ord2 = tables->getNontermOrdinal(p2NtIndex);
1794:
1795: return ord1 < ord2;
1796: }
1797: }
1798:
1799:
1800: inline ReductionPathQueue::Path *ReductionPathQueue::dequeue()
1801: {
1802: Path *ret = top;
1803: top = top->next;
1804: return ret;
1805: }
1806:
1807:
1808: void ReductionPathQueue::deletePath(Path *p)
1809: {
1810: pathPool.dealloc(p);
1811: }
1812:
1813:
1814:
1815: void GLR::rwlProcessWorklist()
1816: {
1817:
1818: SOURCELOC( SourceLoc tokenLoc = lexerPtr->loc; )
1819:
1820: while (pathQueue.isNotEmpty()) {
1821:
1822: ReductionPathQueue::Path *path = pathQueue.dequeue();
1823:
1824:
1825: ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(path->prodIndex);
1826: int rhsLen = prodInfo.rhsLen;
1827:
1828: TRSPARSE("state " << path->startStateId <<
1829: ", reducing by production " << path->prodIndex <<
1830: " (rhsLen=" << rhsLen <<
1831: "), back to state " << path->leftEdgeNode->state);
1832:
1833: ACCOUNTING( nondetReduce++; )
1834:
1835:
1836:
1837: SOURCELOC( SourceLoc leftEdge = tokenLoc; )
1838:
1839:
1840: ACTION(
1841: sm_string rhsDescription("");
1842: if (rhsLen == 0) {
1843:
1844: rhsDescription = " empty";
1845: }
1846: )
1847:
1848:
1849:
1850:
1851: toPass.ensureIndexDoubler(rhsLen-1);
1852: for (int i=rhsLen-1; i >= 0; i--) {
1853: SiblingLink *sib = path->sibLinks[i];
1854:
1855:
1856: toPass[i] = sib->sval;
1857:
1858:
1859: ACTION( rhsDescription =
1860: sm_stringc << symbolDescription(path->symbols[i], userAct, sib->sval)
1861: << " "
1862: << rhsDescription;
1863: )
1864:
1865:
1866:
1867: SOURCELOC(
1868: if (sib->loc != SL_UNKNOWN) {
1869: leftEdge = sib->loc;
1870: }
1871: )
1872:
1873:
1874:
1875:
1876:
1877: sib->sval = duplicateSemanticValue(path->symbols[i], sib->sval);
1878:
1879: YIELD_COUNT( sib->yieldCount++; )
1880: }
1881:
1882:
1883:
1884:
1885: SemanticValue sval =
1886: doReductionAction(path->prodIndex, toPass.getArray()
1887: SOURCELOCARG( leftEdge ) );
1888:
1889:
1890: ACTION( sm_string lhsDesc =
1891: userAct->nonterminalDescription(prodInfo.lhsIndex, sval); )
1892: TRSACTION(" " << lhsDesc << " ->" << rhsDescription);
1893:
1894:
1895: if (USE_KEEP &&
1896: !userAct->keepNontermValue(prodInfo.lhsIndex, sval)) {
1897: TRSACTION(" CANCELLED " << lhsDesc);
1898: }
1899: else {
1900:
1901: SiblingLink *newLink =
1902: rwlShiftNonterminal(path->leftEdgeNode, prodInfo.lhsIndex,
1903: sval SOURCELOCARG( leftEdge ) );
1904:
1905: if (newLink) {
1906:
1907: for (int i=0; i < topmostParsers.length(); i++) {
1908: StackNode *parser = topmostParsers[i];
1909:
1910:
1911: ActionEntry action =
1912: tables->getActionEntry(parser->state, lexerPtr->type);
1913: rwlEnqueueReductions(parser, action, newLink);
1914: }
1915: }
1916: }
1917:
1918: pathQueue.deletePath(path);
1919: }
1920: }
1921:
1922:
1923:
1924:
1925:
1926:
1927:
1928:
1929:
1930:
1931:
1932:
1933: SiblingLink *GLR::rwlShiftNonterminal(StackNode *leftSibling, int lhsIndex,
1934: SemanticValue /*owner*/ sval
1935: SOURCELOCARG( SourceLoc loc ) )
1936: {
1937:
1938:
1939: StateId rightSiblingState = tables->decodeGoto(
1940: tables->getGotoEntry(leftSibling->state, lhsIndex), lhsIndex);
1941:
1942:
1943: TRSPARSE("state " << leftSibling->state <<
1944: ", shift nonterm " << lhsIndex <<
1945: ", to state " << rightSiblingState);
1946:
1947:
1948: StackNode *rightSibling = findTopmostParser(rightSiblingState);
1949: if (rightSibling) {
1950:
1951: SiblingLink *sibLink = rightSibling->getLinkTo(leftSibling);
1952: if (sibLink) {
1953:
1954:
1955:
1956:
1957:
1958:
1959:
1960:
1961:
1962:
1963:
1964: if (!canMakeProgress(rightSibling)) {
1965:
1966:
1967:
1968: TRSPARSE("avoided a merge by noticing the state was dead");
1969: deallocateSemanticValue(rightSibling->getSymbolC(), sval);
1970: return NULL;
1971: }
1972:
1973:
1974: YIELD_COUNT(SemanticValue old2 = sibLink->sval);
1975:
1976:
1977: ACTION(
1978: sm_string leftDesc = userAct->nonterminalDescription(lhsIndex, sibLink->sval);
1979: sm_string rightDesc = userAct->nonterminalDescription(lhsIndex, sval);
1980: )
1981:
1982:
1983:
1984: sibLink->sval =
1985: userAct->mergeAlternativeParses(lhsIndex, sibLink->sval, sval SOURCELOCARG( loc ) );
1986:
1987:
1988: TRSACTION(" " <<
1989: userAct->nonterminalDescription(lhsIndex, sibLink->sval) <<
1990: " is MERGE of " << leftDesc << " and " << rightDesc);
1991:
1992: YIELD_COUNT(
1993: if (sibLink->yieldCount > 0) {
1994:
1995: yieldThenMergeCt++;
1996: SOURCELOC( trace("ytm") << "at " << toString(loc) << std::endl; )
1997:
1998:
1999:
2000:
2001:
2002: if (old2 != sibLink->sval) {
2003: std::cout << "warning: incomplete parse forest: " << (void*)old2
2004: << " has already been yielded, but it now has been "
2005: << "merged with " << (void*)sval << " to make "
2006: << (void*)(sibLink->sval) << " (lhsIndex="
2007: << lhsIndex << ")" << std::endl;
2008: }
2009: }
2010: )
2011:
2012:
2013: return NULL;
2014:
2015:
2016:
2017: }
2018:
2019:
2020:
2021: sibLink = rightSibling->addSiblingLink(leftSibling, sval SOURCELOCARG( loc ) );
2022:
2023:
2024:
2025:
2026:
2027:
2028:
2029:
2030:
2031:
2032:
2033:
2034:
2035:
2036:
2037:
2038:
2039:
2040:
2041: parserMerges++;
2042:
2043:
2044:
2045:
2046: if (rightSibling->referenceCount > 1) {
2047:
2048:
2049:
2050: int changes=1, iters=0;
2051: while (changes) {
2052: changes = 0;
2053: for (int i=0; i < topmostParsers.length(); i++) {
2054: StackNode *parser = topmostParsers[i];
2055: int newDepth = parser->computeDeterminDepth();
2056: if (newDepth != parser->determinDepth) {
2057: changes++;
2058: parser->determinDepth = newDepth;
2059: }
2060: }
2061: iters++;
2062: xassert(iters < 1000);
2063: computeDepthIters++;
2064: }
2065: }
2066:
2067:
2068: return sibLink;
2069: }
2070:
2071: else {
2072:
2073:
2074:
2075: rightSibling = makeStackNode(rightSiblingState);
2076:
2077:
2078: rightSibling->addSiblingLink(leftSibling, sval SOURCELOCARG( loc ) );
2079:
2080:
2081:
2082: addTopmostParser(rightSibling);
2083:
2084:
2085:
2086:
2087: ActionEntry action =
2088: tables->getActionEntry(rightSibling->state, lexerPtr->type);
2089: rwlEnqueueReductions(rightSibling, action, NULL /*sibLink*/);
2090:
2091:
2092:
2093:
2094: return NULL;
2095: }
2096: }
2097:
2098:
2099:
2100:
2101:
2102:
2103:
2104:
2105:
2106:
2107:
2108:
2109:
2110:
2111: int GLR::rwlEnqueueReductions(StackNode *parser, ActionEntry action,
2112: SiblingLink *mustUseLink)
2113: {
2114: parser->checkLocalInvariants();
2115:
2116: if (tables->isShiftAction(action)) {
2117:
2118: return 1;
2119: }
2120: else if (tables->isReduceAction(action)) {
2121:
2122: int prodIndex = tables->decodeReduce(action, parser->state);
2123:
2124:
2125: ParseTables::ProdInfo const &info = tables->getProdInfo(prodIndex);
2126: int rhsLen = info.rhsLen;
2127: xassert(rhsLen >= 0);
2128:
2129:
2130:
2131: ReductionPathQueue::Path *proto =
2132: pathQueue.newPath(parser->state, prodIndex, rhsLen);
2133:
2134:
2135: rwlRecursiveEnqueue(proto, rhsLen, parser, mustUseLink);
2136:
2137:
2138: pathQueue.deletePath(proto);
2139:
2140: return 1;
2141: }
2142: else if (tables->isErrorAction(action)) {
2143:
2144: return 0;
2145: }
2146: else {
2147:
2148: ActionEntry *entry = tables->decodeAmbigAction(action, parser->state);
2149: for (int i=0; i<entry[0]; i++) {
2150: rwlEnqueueReductions(parser, entry[i+1], mustUseLink);
2151: }
2152:
2153: return entry[0];
2154: }
2155: }
2156:
2157:
2158:
2159: inline void GLR::rwlCollectPathLink(
2160: ReductionPathQueue::Path *proto, int popsRemaining,
2161: StackNode *currentNode, SiblingLink *mustUseLink, SiblingLink *linkToAdd)
2162: {
2163: proto->sibLinks[popsRemaining] = linkToAdd;
2164: proto->symbols[popsRemaining] = currentNode->getSymbolC();
2165:
2166: if (linkToAdd == mustUseLink) {
2167: rwlRecursiveEnqueue(proto, popsRemaining, linkToAdd->sib,
2168: NULL /*mustUseLink*/);
2169: }
2170: else {
2171: rwlRecursiveEnqueue(proto, popsRemaining, linkToAdd->sib,
2172: mustUseLink);
2173: }
2174: }
2175:
2176:
2177: void GLR::rwlRecursiveEnqueue(
2178: ReductionPathQueue::Path *proto,
2179: int popsRemaining,
2180: StackNode *currentNode,
2181: SiblingLink *mustUseLink)
2182: {
2183: if (popsRemaining == 0) {
2184:
2185:
2186:
2187: if (mustUseLink != NULL) {
2188: return;
2189: }
2190:
2191:
2192:
2193: pathQueue.insertPathCopy(proto, currentNode);
2194: }
2195:
2196: else {
2197:
2198: rwlCollectPathLink(proto, popsRemaining-1, currentNode, mustUseLink,
2199: &(currentNode->firstSib));
2200:
2201:
2202:
2203: if (currentNode->leftSiblings.isNotEmpty()) {
2204: FOREACH_OBJLIST_NC(SiblingLink, currentNode->leftSiblings, sibling) {
2205: rwlCollectPathLink(proto, popsRemaining-1, currentNode, mustUseLink,
2206: sibling.data());
2207: }
2208: }
2209: }
2210: }
2211:
2212:
2213:
2214:
2215: void GLR::rwlShiftTerminals()
2216: {
2217: NODE_COLUMN( globalNodeColumn++; )
2218:
2219:
2220: xassert(prevTopmost.isEmpty());
2221: prevTopmost.swapWith(topmostParsers);
2222: xassert(topmostParsers.isEmpty());
2223:
2224:
2225:
2226:
2227: SiblingLink *prev = NULL;
2228:
2229:
2230: while (prevTopmost.isNotEmpty()) {
2231:
2232:
2233:
2234: RCPtr<StackNode> leftSibling(prevTopmost.pop());
2235: xassertdb(leftSibling->referenceCount >= 2);
2236: leftSibling->decRefCt();
2237:
2238:
2239: ActionEntry action =
2240: tables->getActionEntry(leftSibling->state, lexerPtr->type);
2241:
2242:
2243: StateId newState = STATE_INVALID;
2244:
2245:
2246: if (tables->isShiftAction(action)) {
2247:
2248: newState = tables->decodeShift(action, lexerPtr->type);
2249: }
2250: else if (tables->isReduceAction(action) ||
2251: tables->isErrorAction(action)) {
2252:
2253: continue;
2254: }
2255: else {
2256:
2257: ActionEntry *entry = tables->decodeAmbigAction(action, leftSibling->state);
2258:
2259:
2260: for (int i=0; i<entry[0]; i++) {
2261: action = entry[i+1];
2262: if (tables->isShiftAction(action)) {
2263:
2264: newState = tables->decodeShift(action, lexerPtr->type);
2265: break;
2266: }
2267: }
2268:
2269:
2270: if (newState == STATE_INVALID) {
2271: continue;
2272: }
2273: }
2274:
2275:
2276: ACCOUNTING( nondetShift++; )
2277:
2278:
2279: TRSPARSE("state " << leftSibling->state <<
2280: ", shift token " << lexerPtr->tokenDesc() <<
2281: ", to state " << newState);
2282:
2283:
2284: StackNode *rightSibling = findTopmostParser(newState);
2285: if (rightSibling != NULL) {
2286:
2287: }
2288:
2289: else {
2290:
2291: rightSibling = makeStackNode(newState);
2292:
2293:
2294: addTopmostParser(rightSibling);
2295: }
2296:
2297: SemanticValue sval = lexerPtr->sval;
2298: if (prev) {
2299:
2300:
2301: sval = userAct->duplicateTerminalValue(lexerPtr->type, prev->sval);
2302:
2303: TRSACTION(" " << userAct->terminalDescription(lexerPtr->type, sval) <<
2304: " is (@lexer) DUP of " <<
2305: userAct->terminalDescription(lexerPtr->type, prev->sval));
2306: }
2307:
2308:
2309:
2310: prev = rightSibling->addSiblingLink(leftSibling, sval
2311: SOURCELOCARG( lexerPtr->loc ) );
2312:
2313:
2314:
2315:
2316:
2317:
2318:
2319: xassert(rightSibling->referenceCount == 1);
2320: }
2321: }
2322:
2323:
2324:
2325:
2326:
2327: sm_string stackNodeName(StackNode const *sn)
2328: {
2329: Symbol const *s = sn->getSymbolC();
2330: char const *symName = (s? s->name.pcharc() : "(null)");
2331: return sm_stringb(sn->stackNodeId
2332: << ":col=" << sn->tokenColumn
2333: << ",st=" << sn->state->id
2334: << ",sym=" << symName);
2335: }
2336:
2337:
2338:
2339: sm_string reductionName(StackNode const *sn, int ruleNo, Reduction const *red)
2340: {
2341: return sm_stringb(sn->stackNodeId << "/" << ruleNo << ":"
2342: << replace(red->production->toString(), " ", "_"));
2343: }
2344:
2345:
2346:
2347:
2348:
2349:
2350:
2351:
2352:
2353:
2354:
2355:
2356:
2357: void GLR::writeParseGraph(char const *fname) const
2358: {
2359: FILE *out = fopen(sm_stringb("graphs/" << fname), "w");
2360: if (!out) {
2361: xsyserror("fopen", sm_stringb("opening file `graphs/" << fname << "'"));
2362: }
2363:
2364:
2365: fprintf(out, "# parse graph file: %s\n", fname);
2366: fprintf(out, "# automatically generated\n"
2367: "\n");
2368:
2369: #if 0
2370:
2371: FOREACH_OBJLIST(StackNode, allStackNodes, stackNodeIter) {
2372: StackNode const *stackNode = stackNodeIter.data();
2373: sm_string myName = stackNodeName(stackNode);
2374:
2375:
2376: fputs(sm_stringb("\n# ------ node: " << myName << " ------\n"), out);
2377:
2378:
2379: fputs(sm_stringb("n " << myName << "\n\n"), out);
2380:
2381:
2382: int ruleNo=0;
2383: FOREACH_OBJLIST(SiblingLink, stackNode->leftSiblings, sibIter) {
2384: SiblingLink const *link = sibIter.data();
2385:
2386:
2387: fputs(sm_stringb("e " << myName << " "
2388: << stackNodeName(link->sib) << "\n"), out);
2389:
2390:
2391:
2392:
2393:
2394:
2395: if (link->treeNode->isNonterm()) {
2396:
2397: FOREACH_OBJLIST(Reduction, link->treeNode->asNonterm().reductions,
2398: redIter) {
2399: Reduction const *red = redIter.data();
2400: ruleNo++;
2401:
2402: sm_string ruleName = reductionName(stackNode, ruleNo, red);
2403:
2404:
2405: fputs(sm_stringb("n " << ruleName << "\n"), out);
2406:
2407:
2408: fputs(sm_stringb("e " << myName << " " << ruleName << "\n"), out);
2409:
2410:
2411:
2412: #if 0
2413: SFOREACH_OBJLIST(StackNode, rule->children, child) {
2414: fputs(sm_stringb("e " << ruleName << " "
2415: << stackNodeName(child.data()) << "\n"), out);
2416: }
2417: #endif
2418:
2419:
2420: fputs("\n", out);
2421: }
2422: }
2423: }
2424: }
2425: #endif
2426:
2427:
2428: if (fclose(out) != 0) {
2429: xsyserror("fclose");
2430: }
2431: }
2432:
2433:
2434:
2435:
2436:
2437:
2438:
2439: sm_string readFileIntoString(char const *fname)
2440: {
2441:
2442: FILE *fp = fopen(fname, "r");
2443: if (!fp) {
2444: xsyserror("fopen", sm_stringb("opening `" << fname << "' for reading"));
2445: }
2446:
2447:
2448: if (fseek(fp, 0, SEEK_END) < 0) {
2449: xsyserror("fseek");
2450: }
2451: int len = (int)ftell(fp);
2452: if (len < 0) {
2453: xsyserror("ftell");
2454: }
2455: if (fseek(fp, 0, SEEK_SET) < 0) {
2456: xsyserror("fseek");
2457: }
2458:
2459:
2460: sm_string ret(len);
2461:
2462:
2463: if (fread(ret.pchar(), 1, len, fp) < (size_t)len) {
2464: xsyserror("fread");
2465: }
2466:
2467:
2468: if (fclose(fp) < 0) {
2469: xsyserror("fclose");
2470: }
2471:
2472:
2473: return ret;
2474: }
2475:
2476:
2477:
Start data section to elk/elk_gramanl.cxx[1
/1
]
1: // gramanl.cc see license.txt for copyright and terms of use
2: // code for gramanl.h
3:
4: #include "elk_gramanl.h"
5:
6: #include "sm_bit2d.h"
7: #include "sm_bitarray.h"
8: #include "sm_strtokp.h"
9: #include "sm_syserr.h"
10: #include "sm_trace.h"
11: #include "sm_nonport.h"
12: #include "sm_crc.h"
13: #include "elk_flatutil.h"
14: #include "elk_grampar.h"
15: #include "elk_emitcode.h"
16: #include "sm_strutil.h"
17: #include "sm_ckheap.h"
18: #include "elk_genml.h"
19:
20: #include <fstream> // std::ofstream
21: #include <stdlib.h> // getenv
22: #include <stdio.h> // printf
23:
24: // for ParseTables::emitConstructionCode:
25: // linkdepend: emittables.cc
26:
27:
28: // for now, we'll just have these be global variables; if I later
29: // decide I actually want more than one at a time, I can move these
30: // into GrammarAnalysis and push the interfaces to accomodate
31:
32: // NOTE: only LALR(1) has been recently tested; in particular I
33: // know that LR(1) is broken (3/26/02)
34:
35: // LR(0) does all reductions, regardless of what the next token is
36: static bool const LR0 = false;
37:
38: // SLR(1) looks at a production's LHS's Follow
39: static bool const SLR1 = false;
40:
41: // LR(1) computes context-sensitive follow for each item,
42: // depending on how that item arises in the item-set DFA
43: static bool const LR1 = false;
44:
45: // LALR(1) is like LR(1), except two states are merged if
46: // they only differ in their items' lookaheads (so it has
47: // the same # of states as SLR(1), while having some of the
48: // context-sensitivity of LR(1))
49: static bool const LALR1 = true;
50:
51:
52: #if !defined(NDEBUG) // track unauthorized malloc's
53: #define TRACK_MALLOC
54: #endif
55:
56: #ifdef TRACK_MALLOC
57: // take initial snapsot
58: #define INITIAL_MALLOC_STATS() \
59: unsigned mallocCt = numMallocCalls();
60:
61: // nothing should have been allocated recently; if it has, then
62: // print a warning
63: #define CHECK_MALLOC_STATS(desc) \
64: { \
65: unsigned newCt = numMallocCalls(); \
66: if (mallocCt != newCt) { \
67: std::cout << (newCt - mallocCt) << " malloc calls during " << desc << std::endl; \
68: mallocCt = newCt; \
69: breaker(); \
70: } \
71: }
72:
73: // some unavoidable allocation just happened, so just update counter
74: #define UPDATE_MALLOC_STATS() \
75: mallocCt = numMallocCalls();
76: #else
77: #define INITIAL_MALLOC_STATS()
78: #define CHECK_MALLOC_STATS(desc)
79: #define UPDATE_MALLOC_STATS()
80: #endif
81:
82:
83: // ----------------- DottedProduction ------------------
84: #if 0 // used?
85: DottedProduction::DottedProduction(DottedProduction const &obj)
86: {
87: prod = obj.prod;
88: dot = obj.dot;
89: afterDot = obj.afterDot;
90: firstSet = obj.firstSet;
91: canDeriveEmpty = obj.canDeriveEmpty;
92: }
93: #endif // 0
94:
95:
96: DottedProduction::DottedProduction()
97: {
98: init();
99: }
100:
101: void DottedProduction::init()
102: {
103: prod = NULL;
104: dot = -1;
105: afterDot = NULL;
106: canDeriveEmpty = false;
107: backPointer = NULL;
108: }
109:
110:
111: DottedProduction::~DottedProduction()
112: {}
113:
114:
115: // arbitrary integer unique to every symbol and preserved
116: // across read/write
117: int symbolIndex(Symbol const *s)
118: {
119: if (s->isTerminal()) {
120: // make terminals negative since otherwise they'd
121: // collide with nonterminals
122: return -( s->asTerminalC().termIndex );
123: }
124: else {
125: return s->asNonterminalC().ntIndex;
126: }
127: }
128:
129:
130: #if 0
131: bool DottedProduction::isEqual(DottedProduction const &obj) const
132: {
133: return dot == obj.dot &&
134: prod == obj.prod;
135: }
136: #endif // 0
137:
138:
139: void DottedProduction::setProdAndDot(Production const *p, int d)
140: {
141: prod = p;
142: dot = d;
143:
144: // computing this each time turned out to be significant
145: // according to the profiler, so we store it instead
146: bool dotAtEnd = (dot == prod->rhsLength());
147: afterDot = dotAtEnd? NULL : prod->right.nthC(dot)->sym;
148: }
149:
150: Symbol const *DottedProduction::symbolBeforeDotC() const
151: {
152: xassert(!isDotAtStart());
153: return prod->right.nthC(dot-1)->sym;
154: }
155:
156: #if 0
157: Symbol const *DottedProduction::symbolAfterDotC() const
158: {
159: xassert(!isDotAtEnd());
160: return prod->right.nthC(dot)->sym;
161: }
162: #endif // 0
163:
164:
165: void DottedProduction::print(std::ostream &os) const
166: {
167: os << prod->left->name << " ->";
168:
169: int position = 0;
170: for (ObjListIter<Production::RHSElt> iter(prod->right);
171: !iter.isDone(); iter.adv(), position++) {
172: if (position == dot) {
173: os << " .";
174: }
175: os << " " << iter.data()->sym->toString();
176: }
177: if (position == dot) {
178: os << " .";
179: }
180: }
181:
182:
183: // ---------------------- LRItem -------------------
184: LRItem::LRItem(int numTerms, DottedProduction const *dp)
185: : dprod(dp),
186: lookahead(numTerms)
187: {}
188:
189: LRItem::LRItem(LRItem const &obj)
190: : dprod(obj.dprod),
191: lookahead(obj.lookahead)
192: {}
193:
194: LRItem::~LRItem()
195: {}
196:
197: LRItem::LRItem(Flatten &flat)
198: : dprod(NULL),
199: lookahead(flat)
200: {}
201:
202: void LRItem::xfer(Flatten &flat)
203: {
204: lookahead.xfer(flat);
205: }
206:
207: void LRItem::xferSerfs(Flatten &flat, GrammarAnalysis &g)
208: {
209: if (flat.writing()) {
210: flat.writeInt(prodIndex());
211: flat.writeInt(getDot());
212: }
213: else {
214: // originally had these directly in the argument list,
215: // but order of eval is undefined!
216: int idx = flat.readInt();
217: int d = flat.readInt();
218: dprod = g.getDProdIndex(idx, d);
219: }
220: }
221:
222:
223: // compare two items in an arbitrary (but deterministic) way so that
224: // sorting will always put a list of items into the same order, for
225: // comparison purposes; this doesn't consider the lookahead
226: STATICDEF int LRItem::diff(LRItem const *a, LRItem const *b, void*)
227: {
228: // check the prodIndex first
229: int ret = a->prodIndex() - b->prodIndex();
230: if (ret) { return ret; }
231:
232: // 'dot'
233: ret = a->getDot() - b->getDot();
234: return ret;
235: }
236:
237:
238: bool firstIncludes(Symbol const *sym, Terminal const *t)
239: {
240: if (sym->isTerminal()) {
241: return sym == t;
242: }
243: else {
244: // this generalizes 'isExtendingShift'.. and while this did help
245: // eliminate one S/R in a grammar I was working on, there were
246: // others that could not be eliminated at all (they were not
247: // statically decidable), so this generalization might not be
248: // useful afterall
249: return sym->asNonterminalC().first.contains(t->termIndex);
250: }
251: }
252:
253: bool LRItem::isExtendingShift(Nonterminal const *A, Terminal const *t) const
254: {
255: return !dprod->isDotAtEnd() && // shift
256: dprod->getProd()->left == A && // extending A
257: firstIncludes(dprod->symbolAfterDotC(), t); // with t
258: }
259:
260:
261: void LRItem::print(std::ostream &os, GrammarAnalysis const &g) const
262: {
263: dprod->print(os);
264: lookahead.print(os, g); // prints the separating comma, if necessary
265: }
266:
267:
268: // ----------------- ItemSet -------------------
269: ItemSet::ItemSet(StateId anId, int numTerms, int numNonterms)
270: : kernelItems(),
271: nonkernelItems(),
272: termTransition(NULL), // inited below
273: nontermTransition(NULL), // inited below
274: terms(numTerms),
275: nonterms(numNonterms),
276: dotsAtEnd(NULL),
277: numDotsAtEnd(0),
278: stateSymbol(NULL),
279: id(anId),
280: BFSparent(NULL)
281: {
282: allocateTransitionFunction();
283: }
284:
285: void ItemSet::allocateTransitionFunction()
286: {
287: termTransition = new ItemSet* [terms];
288: nontermTransition = new ItemSet* [nonterms];
289:
290: INTLOOP(t, 0, terms) {
291: termTransition[t] = (ItemSet*)NULL; // means no transition on t
292: }
293: INTLOOP(n, 0, nonterms) {
294: nontermTransition[n] = (ItemSet*)NULL;
295: }
296: }
297:
298:
299: ItemSet::~ItemSet()
300: {
301: delete[] termTransition;
302: delete[] nontermTransition;
303:
304: if (dotsAtEnd) {
305: delete[] dotsAtEnd;
306: }
307: }
308:
309:
310: ItemSet::ItemSet(Flatten &flat)
311: : termTransition(NULL),
312: nontermTransition(NULL),
313: dotsAtEnd(NULL),
314: numDotsAtEnd(0),
315: stateSymbol(NULL),
316: BFSparent(NULL)
317: {}
318:
319:
320: Production *getNthProduction(Grammar *g, int n)
321: {
322: if (0 <= n && n < g->productions.count()) {
323: return g->productions.nth(n);
324: }
325: else {
326: // my access path functions' contract is to
327: // return NULL on any error (as opposed to, say,
328: // an exception or assertion failure); this serves two
329: // purposes:
330: // - the writing code can use it to determine the
331: // maximum value of 'n'
332: // - the reading code can use it to validate 'n',
333: // since that comes from the input file
334: return NULL;
335: }
336: }
337:
338: #if 0 // not needed, doesn't work
339: DottedProduction *getNthDottedProduction(Production *p, int n)
340: {
341: if (0 <= n && n < (p->rhsLength() + 1)) {
342: return p->getDProd(n);
343: }
344: else {
345: return NULL;
346: }
347: }
348: #endif // 0
349:
350:
351: void ItemSet::xfer(Flatten &flat)
352: {
353: xferObjList(flat, kernelItems);
354: xferObjList(flat, nonkernelItems);
355:
356: flat.xferInt(terms);
357: flat.xferInt(nonterms);
358:
359: // numDotsAtEnd and kernelItemsCRC are computed from
360: // other data
361: // NEW: but computing them requires the items, which I'm omitting
362:
363: flat.xferInt(numDotsAtEnd);
364: flat.xferLong((long&)kernelItemsCRC);
365:
366: flat.xferInt((int&)id);
367: }
368:
369:
370: int ticksComputeNonkernel = 0;
371:
372: void ItemSet::xferSerfs(Flatten &flat, GrammarAnalysis &g)
373: {
374: // xfer the 'prod' fields of the items
375: {
376: MUTATE_EACH_OBJLIST(LRItem, kernelItems, k) {
377: k.data()->xferSerfs(flat, g);
378: }
379: MUTATE_EACH_OBJLIST(LRItem, nonkernelItems, n) {
380: n.data()->xferSerfs(flat, g);
381: }
382: }
383:
384:
385: #if 0
386: // 'kernelItems' and 'nonkernelItems': each one accessed as
387: // g.productions.nth(???)->getDProd(???)
388: xferSObjList_twoLevelAccess(
389: flat,
390: kernelItems, // serf list
391: static_cast<Grammar*>(&g), // root of access path
392: getNthProduction, // first access path link
393: getNthDottedProduction); // second access path link
394:
395: #if 1
396: xferSObjList_twoLevelAccess(
397: flat,
398: nonkernelItems, // serf list
399: static_cast<Grammar*>(&g), // root of access path
400: getNthProduction, // first access path link
401: getNthDottedProduction); // second access path link
402: #else
403: // instead of the above, let's try computing the nonkernel items
404: if (flat.reading()) {
405: int start = getMilliseconds();
406: g.itemSetClosure(*this);
407: ticksComputeNonkernel += (getMilliseconds() - start);
408: }
409: #endif
410: #endif // 0
411:
412: // these need to be sorted for 'changedItems'; but since
413: // we're sorting by *address*, that's not necessarily
414: // preserved across read/write
415: // NEW: it should be stable now
416: //kernelItems.insertionSort(LRItem::diff);
417:
418:
419: // transition functions
420: if (flat.reading()) {
421: allocateTransitionFunction();
422: }
423: INTLOOP(t, 0, terms) {
424: //xferNullableSerfPtrToList(flat, termTransition[t], g.itemSets);
425: xferNullableSerfPtr(flat, termTransition[t]);
426: }
427: INTLOOP(n, 0, nonterms) {
428: //xferNullableSerfPtrToList(flat, nontermTransition[n], g.itemSets);
429: xferNullableSerfPtr(flat, nontermTransition[n]);
430: }
431:
432:
433: // dotsAtEnd, numDotsAtEnd, kernelItemsCRC
434: //if (flat.reading()) {
435: // changedItems();
436: //}
437:
438: if (flat.reading()) {
439: dotsAtEnd = new LRItem const * [numDotsAtEnd];
440: }
441: INTLOOP(p, 0, numDotsAtEnd) {
442: #if 0
443: xferSerfPtr_twoLevelAccess(
444: flat,
445: const_cast<LRItem*&>(dotsAtEnd[p]), // serf
446: static_cast<Grammar*>(&g), // root of access path
447: getNthProduction, // first access path link
448: getNthDottedProduction); // second access path link
449: #endif // 0
450: xferSerfPtr(flat, dotsAtEnd[p]);
451: }
452:
453: xferNullableSerfPtr(flat, stateSymbol);
454:
455: xferNullableSerfPtrToList(flat, BFSparent, g.itemSets);
456: }
457:
458:
459: Symbol const *ItemSet::computeStateSymbolC() const
460: {
461: // need only check kernel items since all nonkernel items
462: // have their dots at the left side
463: FOREACH_OBJLIST(LRItem, kernelItems, item) {
464: if (! item.data()->isDotAtStart() ) {
465: return item.data()->symbolBeforeDotC();
466: }
467: }
468: return NULL;
469: }
470:
471:
472: int ItemSet::bcheckTerm(int index) const
473: {
474: xassert(0 <= index && index < terms);
475: return index;
476: }
477:
478: int ItemSet::bcheckNonterm(int index) const
479: {
480: xassert(0 <= index && index < nonterms);
481: return index;
482: }
483:
484: ItemSet *&ItemSet::refTransition(Symbol const *sym)
485: {
486: if (sym->isTerminal()) {
487: Terminal const &t = sym->asTerminalC();
488: return termTransition[bcheckTerm(t.termIndex)];
489: }
490: else {
491: Nonterminal const &nt = sym->asNonterminalC();
492: return nontermTransition[bcheckNonterm(nt.ntIndex)];
493: }
494: }
495:
496:
497: ItemSet const *ItemSet::transitionC(Symbol const *sym) const
498: {
499: return const_cast<ItemSet*>(this)->refTransition(sym);
500: }
501:
502:
503: void ItemSet::setTransition(Symbol const *sym, ItemSet *dest)
504: {
505: refTransition(sym) = dest;
506: }
507:
508:
509: void ItemSet::removeShift(Terminal const *sym)
510: {
511: refTransition(sym) = NULL;
512: }
513:
514:
515: void ItemSet::addKernelItem(LRItem *item)
516: {
517: // add it
518: kernelItems.appendUnique(item);
519: }
520:
521:
522: void ItemSet::sortKernelItems()
523: {
524: // sort the items to facilitate equality checks
525: kernelItems.mergeSort(LRItem::diff);
526:
527: // note: the caller must call changedItems
528: }
529:
530:
531: bool ItemSet::operator==(ItemSet const &obj) const
532: {
533: // since common case is disequality, check the
534: // CRCs first, and only do full check if they
535: // match
536: if (kernelItemsCRC == obj.kernelItemsCRC) {
537: // since nonkernel items are entirely determined by kernel
538: // items, and kernel items are sorted, it's sufficient to
539: // check for kernel list equality
540: // OLD: when pointer equality was sufficient
541: // return kernelItems.equalAsPointerLists(obj.kernelItems);
542: // NEW: use deep equality check
543: return kernelItems.equalAsLists(obj.kernelItems, LRItem::diff);
544: }
545: else {
546: // can't possibly be equal if CRCs differ
547: return false;
548: }
549: }
550:
551:
552: void ItemSet::addNonkernelItem(LRItem *item)
553: {
554: nonkernelItems.appendUnique(item);
555:
556: // note: the caller is supposed to call changedItems
557: }
558:
559:
560: void ItemSet::removeReduce(Production const *prod, Terminal const *sym)
561: {
562: MUTATE_EACH_OBJLIST(LRItem, kernelItems, k) {
563: if (k.data()->isDotAtEnd() &&
564: k.data()->getProd() == prod) {
565: k.data()->laRemove(sym->termIndex);
566: }
567: }
568:
569: MUTATE_EACH_OBJLIST(LRItem, nonkernelItems, n) {
570: if (n.data()->isDotAtEnd() &&
571: n.data()->getProd() == prod) {
572: n.data()->laRemove(sym->termIndex);
573: }
574: }
575:
576: #if 0
577: ObjListMutator<LRItem> k(kernelItems);
578: while (!k.isDone()) {
579: if (k.data()->isDotAtEnd() &&
580: k.data()->getProd() == prod) {
581: k.deleteIt();
582: }
583: else {
584: k.adv();
585: }
586: }
587:
588: changedItems();
589: #endif // 0
590: }
591:
592:
593: void ItemSet::getAllItems(SObjList<LRItem> &dest, bool nonkernel) const
594: {
595: SObjListMutator<LRItem> mut(dest);
596:
597: FOREACH_OBJLIST(LRItem, kernelItems, k) {
598: mut.append(const_cast<LRItem*>(k.data()));
599: }
600: if (nonkernel) {
601: FOREACH_OBJLIST(LRItem, nonkernelItems, n) {
602: mut.append(const_cast<LRItem*>(n.data()));
603: }
604: }
605: }
606:
607:
608: STATICDEF int ItemSet::diffById(ItemSet const *left, ItemSet const *right, void*)
609: {
610: return left->id - right->id;
611: }
612:
613:
614: void ItemSet::throwAwayItems()
615: {
616: // can't delete the whole lists because I need the
617: // reductions; among other things, 'dotsAtEnd' refers to them
618: deleteNonReductions(kernelItems);
619: deleteNonReductions(nonkernelItems);
620: }
621:
622: void ItemSet::deleteNonReductions(ObjList<LRItem> &list)
623: {
624: ObjListMutator<LRItem> mut(list);
625: while (!mut.isDone()) {
626: if (mut.data()->isDotAtEnd()) {
627: // keep it
628: mut.adv();
629: }
630: else {
631: // trash it
632: mut.deleteIt(); // also advances
633: }
634: }
635: }
636:
637:
638: // return the reductions that are ready in this state, given
639: // that the next symbol is 'lookahead'
640: void ItemSet::getPossibleReductions(ProductionList &reductions,
641: Terminal const *lookahead,
642: bool parsing) const
643: {
644: // for each item with dot at end
645: loopi(numDotsAtEnd) {
646: LRItem const *item = dotsAtEnd[i];
647:
648: if (LR0) {
649: // don't check the lookahead
650: }
651: else if (SLR1) {
652: // the follow of its LHS must include 'lookahead'
653: if (!item->getProd()->left->follow.contains(lookahead->termIndex)) { // (constness)
654: if (parsing && tracingSys("parse")) {
655: trace("parse") << "state " << id
656: << ", not reducing by "
657: << item->getProd()->toString(false /*printType*/)
658: << " because " << lookahead->toString()
659: << " is not in follow of "
660: << item->getProd()->left->name << std::endl;
661: }
662: continue;
663: }
664: }
665: else if (LALR1 || LR1) {
666: // the item's lookahead must include 'lookahead'
667: if (!item->laContains(lookahead->termIndex)) {
668: if (parsing && tracingSys("parse")) {
669: trace("parse") << "state " << id
670: << ", not reducing by "
671: << item->getProd()->toString(false /*printType*/)
672: << " because " << lookahead->toString()
673: << " is not in lookahead" << std::endl;
674: }
675: continue;
676: }
677: }
678: else {
679: xfailure("no LR variant specified?");
680: }
681:
682: // ok, this one's ready
683: reductions.append(const_cast<Production*>(item->getProd())); // (constness)
684: }
685: }
686:
687:
688: bool ItemSet::mergeLookaheadsInto(ItemSet &dest) const
689: {
690: // will return true if any changes made
691: bool changes = false;
692:
693: // iterate over both kernel lists simultaneously
694: ObjListIter<LRItem> srcIter(kernelItems);
695: ObjListMutator<LRItem> destIter(dest.kernelItems);
696: while (!srcIter.isDone() && !destIter.isDone()) {
697: LRItem const &srcItem = *(srcIter.data());
698: LRItem &destItem = *(destIter.data());
699:
700: // the caller should already have established equality of the
701: // non-lookahead components of the kernel items
702: xassert(srcItem.equalNoLA(destItem));
703:
704: // merge lookaheads
705: if (destItem.laMerge(srcItem)) {
706: changes = true;
707: }
708:
709: srcIter.adv();
710: destIter.adv();
711: }
712:
713: // kernel list lengths are supposed to be the same
714: xassert(srcIter.isDone() && destIter.isDone());
715:
716: return changes;
717: }
718:
719:
720: bool ItemSet::hasExtendingShift(Nonterminal const *A, Terminal const *t) const
721: {
722: FOREACH_OBJLIST(LRItem, kernelItems, iter1) {
723: if (iter1.data()->isExtendingShift(A, t)) { return true; }
724: }
725: FOREACH_OBJLIST(LRItem, nonkernelItems, iter2) {
726: if (iter2.data()->isExtendingShift(A, t)) { return true; }
727: }
728: return false;
729: }
730:
731:
732: Production const *ItemSet::getFirstReduction() const
733: {
734: xassert(numDotsAtEnd >= 1);
735: return dotsAtEnd[0]->getProd();
736: }
737:
738:
739: void ItemSet::changedItems()
740: {
741: // -- recompute dotsAtEnd --
742: // collect all items
743: SObjList<LRItem> items; // (constness) 'items' shouldn't be used to modify the elements
744: getAllItems(items);
745:
746: // count number with dots at end
747: int count = 0;
748: {
749: SFOREACH_OBJLIST(LRItem, items, itemIter) {
750: LRItem const *item = itemIter.data();
751:
752: if (item->isDotAtEnd()) {
753: count++;
754: }
755: }
756: }
757:
758: // get array of right size
759: if (dotsAtEnd && count == numDotsAtEnd) {
760: // no need to reallocate, already correct size
761: }
762: else {
763: // throw old away
764: if (dotsAtEnd) {
765: delete[] dotsAtEnd;
766: }
767:
768: // allocate new array
769: numDotsAtEnd = count;
770: dotsAtEnd = new LRItem const * [numDotsAtEnd];
771: }
772:
773: // fill array
774: int index = 0;
775: SFOREACH_OBJLIST(LRItem, items, itemIter) {
776: LRItem const *item = itemIter.data();
777:
778: if (item->isDotAtEnd()) {
779: dotsAtEnd[index] = item;
780: index++;
781: }
782: }
783:
784: // verify both loops executed same number of times
785: xassert(index == count);
786:
787: // compute CRC; in this function, I just allocate here since this
788: // function is already allocation-happy
789: GrowArray<DottedProduction const*> array(0 /*allocate later*/);
790: computeKernelCRC(array);
791:
792: // compute this so we can throw away items later if we want to
793: stateSymbol = computeStateSymbolC();
794: }
795:
796:
797: void ItemSet::computeKernelCRC(GrowArray<DottedProduction const*> &array)
798: {
799: int numKernelItems = kernelItems.count();
800:
801: // expand as necessary, but don't get smaller
802: array.ensureAtLeast(numKernelItems);
803:
804: // we will crc the prod/dot fields, using the pointer representation
805: // of 'dprod'; assumes the items have already been sorted!
806: int index = 0;
807: FOREACH_OBJLIST(LRItem, kernelItems, kitem) {
808: array[index] = kitem.data()->dprod;
809: index++;
810: }
811:
812: // CRC the buffer
813: kernelItemsCRC = crc32((unsigned char const*)(array.getArray()),
814: sizeof(array[0]) * numKernelItems);
815: }
816:
817:
818: void ItemSet::print(std::ostream &os, GrammarAnalysis const &g,
819: bool nonkernel) const
820: {
821: os << "ItemSet " << id << ":\n";
822:
823: // collect all items
824: SObjList<LRItem> items; // (constness) don't use 'item' to modify elements
825: getAllItems(items, nonkernel);
826:
827: // for each item
828: SFOREACH_OBJLIST(LRItem, items, itemIter) {
829: LRItem const *item = itemIter.data();
830:
831: // print its text
832: os << " ";
833: item->print(os, g);
834: os << " ";
835:
836: // print any transitions on its after-dot symbol
837: if (!item->isDotAtEnd()) {
838: ItemSet const *is = transitionC(item->symbolAfterDotC());
839: if (is == NULL) {
840: // this happens if I print the item set before running closure,
841: // and also after prec/assoc disambiguation
842: os << "(no transition)";
843: }
844: else {
845: os << "--> " << is->id;
846: }
847: }
848: os << std::endl;
849: }
850:
851: // print transition function directly, since I'm now throwing
852: // away items sometimes
853: for (int t=0; t<terms; t++) {
854: if (termTransition[t]) {
855: os << " on terminal " << g.getTerminal(t)->name
856: << " go to " << termTransition[t]->id << std::endl;
857: }
858: }
859:
860: for (int n=0; n<nonterms; n++) {
861: if (nontermTransition[n]) {
862: os << " on nonterminal " << g.getNonterminal(n)->name
863: << " go to " << nontermTransition[n]->id << std::endl;
864: }
865: }
866:
867: for (int p=0; p<numDotsAtEnd; p++) {
868: os << " can reduce by " << dotsAtEnd[p]->getProd()->toString() << std::endl;
869: }
870: }
871:
872:
873: void ItemSet::writeGraph(std::ostream &os, GrammarAnalysis const &g) const
874: {
875: // node: n <name> <desc>
876: os << "\nn ItemSet" << id << " ItemSet" << id << "/";
877: // rest of desc will follow
878:
879: // collect all items
880: SObjList<LRItem> items; // (constness) don't use 'items' to modify elements
881: getAllItems(items);
882:
883: // for each item, print the item text
884: SFOREACH_OBJLIST(LRItem, items, itemIter) {
885: LRItem const *item = itemIter.data();
886:
887: // print its text
888: os << " ";
889: item->print(os, g);
890:
891: // THIS IS A PROBLEM! the item's output will include
892: // slashes too, if it has >1 lookahead token ... !
893: os << "/"; // line separator in my node format
894: }
895: os << std::endl;
896:
897: // print transitions on terminals
898: INTLOOP(t, 0, terms) {
899: if (termTransition[t] != NULL) {
900: os << "e ItemSet" << id
901: << " ItemSet" << termTransition[t]->id << std::endl;
902: }
903: }
904:
905: // print transitions on nonterminals
906: INTLOOP(nt, 0, nonterms) {
907: if (nontermTransition[nt] != NULL) {
908: os << "e ItemSet" << id
909: << " ItemSet" << nontermTransition[nt]->id << std::endl;
910: }
911: }
912: }
913:
914:
915: // ------------------------ GrammarAnalysis --------------------
916: GrammarAnalysis::GrammarAnalysis()
917: : derivable(NULL),
918: indexedNonterms(NULL),
919: indexedTerms(NULL),
920: numNonterms(0),
921: numTerms(0),
922: productionsByLHS(NULL),
923: dottedProds(NULL),
924: indexedProds(NULL),
925: numProds(0),
926: initialized(false),
927: nextItemSetId(0), // [ASU] starts at 0 too
928: itemSets(),
929: startState(NULL),
930: cyclic(false),
931: symOfInterest(NULL),
932: errors(0),
933: tables(NULL)
934: {}
935:
936:
937: GrammarAnalysis::~GrammarAnalysis()
938: {
939: if (indexedNonterms != NULL) {
940: delete indexedNonterms;
941: }
942:
943: if (indexedTerms != NULL) {
944: delete indexedTerms;
945: }
946:
947: if (productionsByLHS != NULL) {
948: // empties all lists automatically because of "[]"
949: delete[] productionsByLHS;
950: }
951:
952: if (indexedProds != NULL) {
953: delete[] indexedProds;
954: }
955:
956: deleteDottedProductions();
957:
958: if (derivable != NULL) {
959: delete derivable;
960: }
961:
962: if (tables) {
963: delete tables;
964: }
965: }
966:
967:
968: Terminal const *GrammarAnalysis::getTerminal(int index) const
969: {
970: xassert((unsigned)index < (unsigned)numTerms);
971: return indexedTerms[index];
972: }
973:
974: Nonterminal const *GrammarAnalysis::getNonterminal(int index) const
975: {
976: xassert((unsigned)index < (unsigned)numNonterms);
977: return indexedNonterms[index];
978: }
979:
980: Production const *GrammarAnalysis::getProduction(int index) const
981: {
982: xassert((unsigned)index < (unsigned)numProds);
983: return indexedProds[index];
984: }
985:
986: ItemSet const *GrammarAnalysis::getItemSet(int index) const
987: {
988: // no pretense of efficiency; this is only used interactively
989: FOREACH_OBJLIST(ItemSet, itemSets, iter) {
990: if (iter.data()->id == index) {
991: return iter.data();
992: }
993: }
994: return NULL;
995: }
996:
997:
998: void GrammarAnalysis::xfer(Flatten &flat)
999: {
1000: Grammar::xfer(flat);
1001:
1002: xferOwnerPtr(flat, derivable);
1003:
1004: // delay indexed[Non]Terms, productionsByLHS,
1005: // and initialized
1006:
1007: flat.xferInt(nextItemSetId);
1008:
1009: xferObjList(flat, itemSets);
1010: xferSerfPtrToList(flat, startState, itemSets);
1011:
1012: flat.xferBool(cyclic);
1013:
1014: // don't bother xferring 'symOfInterest', since it's
1015: // only used for debugging
1016:
1017: // 7/27/03: tables are no longer xferrable
1018: //xferOwnerPtr(flat, tables);
1019:
1020: // now do the easily-computable stuff
1021: // NOTE: these functions are also called by initializeAuxData,
1022: // so they need to serve both callers correctly
1023: computeIndexedNonterms();
1024: computeIndexedTerms();
1025: computeProductionsByLHS();
1026: createDottedProductions();
1027:
1028: // do serfs after because if I want to compute the
1029: // nonkernel items instead of storing them, I need
1030: // the indices
1031: MUTATE_EACH_OBJLIST(ItemSet, itemSets, iter) {
1032: iter.data()->xferSerfs(flat, *this);
1033: }
1034:
1035: flat.xferBool(initialized);
1036: }
1037:
1038:
1039: void GrammarAnalysis::
1040: printProductions(std::ostream &os, bool printCode) const
1041: {
1042: if (cyclic) {
1043: os << "(cyclic!) ";
1044: }
1045: Grammar::printProductions(os, printCode);
1046: }
1047:
1048:
1049: void GrammarAnalysis::
1050: printProductionsAndItems(std::ostream &os, bool printCode) const
1051: {
1052: printProductions(os, printCode);
1053:
1054: FOREACH_OBJLIST(ItemSet, itemSets, iter) {
1055: iter.data()->print(os, *this);
1056: }
1057: }
1058:
1059:
1060: void printSymbols(std::ostream &os, ObjList<Symbol> const &list)
1061: {
1062: for (ObjListIter<Symbol> iter(list);
1063: !iter.isDone(); iter.adv()) {
1064: os << " " << *(iter.data()) << std::endl;
1065: }
1066: }
1067:
1068:
1069: bool GrammarAnalysis::addDerivable(Nonterminal const *left, Nonterminal const *right)
1070: {
1071: return addDerivable(left->ntIndex, right->ntIndex);
1072: }
1073:
1074: bool GrammarAnalysis::addDerivable(int left, int right)
1075: {
1076: // Almost as an aside, I'd like to track cyclicity in grammars.
1077: // It's always true that N ->* N, because 0 steps are allowed.
1078: // A grammar is cyclic if N ->+ N, i.e. it derives itself in
1079: // 1 or more steps.
1080: //
1081: // We can detect that fairly easily by tracking calls to
1082: // this fn with left==right. Since N ->* N in 0 steps is
1083: // recorded during init (and *not* by calling this fn), the
1084: // only calls to this with left==right will be when the
1085: // derivability code detects a nonzero-length path.
1086:
1087: if (left==right) {
1088: Nonterminal *NT = indexedNonterms[left]; // ==right
1089: if (!NT->cyclic) {
1090: trace("derivable")
1091: << "discovered that " << NT->name << " ->+ "
1092: << NT->name << " (i.e. is cyclic)\n";
1093: NT->cyclic = true;
1094: cyclic = true; // for grammar as a whole
1095:
1096: // Even though we didn't know this already, it doesn't
1097: // constitute a change in the ->* relation (which is what the
1098: // derivability code cares about), so we do *not* report a
1099: // change for the cyclicty detection.
1100: }
1101: }
1102:
1103: // we only made a change, and hence should return true,
1104: // if there was a 0 here before
1105: return 0 == derivable->testAndSet(point(left, right));
1106: }
1107:
1108:
1109: bool GrammarAnalysis::canDerive(Nonterminal const *left, Nonterminal const *right) const
1110: {
1111: return canDerive(left->ntIndex, right->ntIndex);
1112: }
1113:
1114: bool GrammarAnalysis::canDerive(int left, int right) const
1115: {
1116: return 1 == derivable->get(point(left, right));
1117: }
1118:
1119:
1120: void GrammarAnalysis::initDerivableRelation()
1121: {
1122: // two-dimensional matrix to represent token derivabilities
1123: derivable = new Bit2d(point(numNonterms, numNonterms));
1124:
1125: // initialize it
1126: derivable->setall(0);
1127: loopi(numNonterms) {
1128: derivable->set(point(i,i));
1129: // every nonterminal can derive itself in 0 or more steps
1130: // (specifically, in 0 steps, at least)
1131: //
1132: // NOTE: we do *not* call addDerivable because that would
1133: // mess up the cyclicity detection logic
1134: }
1135: }
1136:
1137:
1138: bool GrammarAnalysis::canDeriveEmpty(Nonterminal const *nonterm) const
1139: {
1140: return canDerive(nonterm, &emptyString);
1141: }
1142:
1143:
1144: bool GrammarAnalysis::sequenceCanDeriveEmpty(RHSEltList const &list) const
1145: {
1146: RHSEltListIter iter(list);
1147: return iterSeqCanDeriveEmpty(iter);
1148: }
1149:
1150: bool GrammarAnalysis::iterSeqCanDeriveEmpty(RHSEltListIter iter) const
1151: {
1152: // look through the sequence beginning with 'iter'; if any members cannot
1153: // derive emptyString, fail
1154: for (; !iter.isDone(); iter.adv()) {
1155: if (iter.data()->sym->isTerminal()) {
1156: return false; // terminals can't derive emptyString
1157: }
1158:
1159: if (!canDeriveEmpty(&( iter.data()->sym->asNonterminalC() ))) {
1160: return false; // nonterminal that can't derive emptyString
1161: }
1162: }
1163:
1164: return true;
1165: }
1166:
1167:
1168: bool GrammarAnalysis::firstIncludes(Nonterminal const *NT, Terminal const *term) const
1169: {
1170: return NT->first.contains(term->termIndex);
1171: }
1172:
1173: #if 0
1174: bool GrammarAnalysis::addFirst(Nonterminal *NT, Terminal *term)
1175: {
1176: return NT->first.prependUnique(term);
1177:
1178: // regarding non-constness of 'term':
1179: // highly nonideal.. the problem is that by using annotations in
1180: // the structures themselves, I have a hard time saying that I
1181: // intend to modify the annotations but not the "key" data...
1182: // this cast is really a symptom of that too.. (and, perhaps, also
1183: // that I don't have a List class that promises to never permit
1184: // modification of the pointed-to data.. but it's not clear I'd
1185: // be better of using it here even if I had it)
1186: }
1187: #endif // 0
1188:
1189:
1190: bool GrammarAnalysis::followIncludes(Nonterminal const *NT, Terminal const *term) const
1191: {
1192: return NT->follow.contains(term->termIndex);
1193: }
1194:
1195: #if 0
1196: // returns true if Follow(NT) is changed by adding 'term' to it
1197: bool GrammarAnalysis::addFollow(Nonterminal *NT, Terminal *term)
1198: {
1199: return NT->follow.prependUnique(term);
1200: }
1201: #endif // 0
1202:
1203:
1204: // ----------------- Grammar algorithms --------------------------
1205: // create and initialize 'indexedNonterms'
1206: void GrammarAnalysis::computeIndexedNonterms()
1207: {
1208: // map: ntIndex -> Nonterminal*
1209: numNonterms = Grammar::numNonterminals();
1210: indexedNonterms = new Nonterminal* [numNonterms];
1211:
1212: // fill it
1213: indexedNonterms[emptyStringIndex] = &emptyString;
1214: int index = emptyStringIndex;
1215: emptyString.ntIndex = index++;
1216:
1217: for (ObjListMutator<Nonterminal> sym(nonterminals);
1218: !sym.isDone(); index++, sym.adv()) {
1219: indexedNonterms[index] = sym.data(); // map: index to symbol
1220: sym.data()->ntIndex = index; // map: symbol to index
1221: }
1222: }
1223:
1224:
1225: // create and initialize 'indexedTerms'
1226: void GrammarAnalysis::computeIndexedTerms()
1227: {
1228: // map: termIndex -> Terminal*
1229: // the ids have already been assigned; but I'm going to continue
1230: // to insist on a contiguous space starting at 0
1231: numTerms = Grammar::numTerminals();
1232: indexedTerms = new Terminal* [numTerms];
1233: loopi(numTerminals()) {
1234: indexedTerms[i] = NULL; // used to track id duplication
1235: }
1236: for (ObjListMutator<Terminal> sym(terminals);
1237: !sym.isDone(); sym.adv()) {
1238: int index = sym.data()->termIndex; // map: symbol to index
1239: if (indexedTerms[index] != NULL) {
1240: xfailure(sm_stringc << "terminal index collision at index " << index);
1241: }
1242: indexedTerms[index] = sym.data(); // map: index to symbol
1243: }
1244: }
1245:
1246:
1247: // set the first/follow of all nonterminals to the correct size
1248: void GrammarAnalysis::resetFirstFollow()
1249: {
1250: MUTATE_EACH_NONTERMINAL(nonterminals, sym) {
1251: sym.data()->first.reset(numTerminals());
1252: sym.data()->follow.reset(numTerminals());
1253: }
1254: }
1255:
1256:
1257: // create and initialize 'productionsByLHS' and 'indexedProds'
1258: void GrammarAnalysis::computeProductionsByLHS()
1259: {
1260: // map: nonterminal -> productions with that nonterm on LHS
1261: productionsByLHS = new SObjList<Production> [numNonterms];
1262:
1263: // map: prodIndex -> production
1264: numProds = productions.count();
1265: indexedProds = new Production* [numProds];
1266: memset(indexedProds, 0, sizeof(*indexedProds) * numProds);
1267:
1268: // fill in both maps
1269: {
1270: MUTATE_EACH_PRODUCTION(productions, prod) { // (constness)
1271: int LHSindex = prod.data()->left->ntIndex;
1272: xassert(LHSindex < numNonterms);
1273:
1274: productionsByLHS[LHSindex].append(prod.data());
1275: indexedProds[prod.data()->prodIndex] = prod.data();
1276: }
1277: }
1278:
1279: // verify we filled the 'prodIndex' map
1280: for (int id=0; id<numProds; id++) {
1281: xassert(indexedProds[id] != NULL);
1282: }
1283: }
1284:
1285:
1286: void GrammarAnalysis::createDottedProductions()
1287: {
1288: // map: prodIndex x dotPosn -> DottedProduction
1289: //DottedProduction const **
1290: dottedProds = new DottedProduction* [numProds];
1291: memset(dottedProds, 0, sizeof(*dottedProds) * numProds);
1292:
1293: FOREACH_PRODUCTION(productions, iter) {
1294: Production const *prod = iter.data();
1295: int rhsLen = prod->rhsLength();
1296: xassert(rhsLen >= 0);
1297: int id = prod->prodIndex;
1298:
1299: // one dottedproduction for every dot position, which is one
1300: // more than the # of RHS elements
1301: DottedProduction *array = new DottedProduction[rhsLen + 1];
1302: dottedProds[id] = array;
1303:
1304: // fill in each one
1305: for (int posn=0; posn <= rhsLen; posn++) {
1306: array[posn].setProdAndDot(prod, posn);
1307: }
1308: }
1309:
1310: // verify we filled the whole table, i.e. that the production
1311: // indices form a dense map
1312: for (int id=0; id<numProds; id++) {
1313: xassert(dottedProds[id] != NULL);
1314: }
1315: }
1316:
1317:
1318: void GrammarAnalysis::deleteDottedProductions()
1319: {
1320: if (dottedProds != NULL) {
1321: for (int id=0; id<numProds; id++) {
1322: delete[] dottedProds[id];
1323: }
1324: delete[] dottedProds;
1325: dottedProds = NULL;
1326: }
1327: }
1328:
1329:
1330: DottedProduction const *GrammarAnalysis::
1331: getDProd(Production const *prod, int posn) const
1332: {
1333: xassert(posn <= prod->rhsLength());
1334: return &( dottedProds[prod->prodIndex][posn] );
1335: }
1336:
1337: DottedProduction const *GrammarAnalysis::
1338: getDProdIndex(int prodIndex, int posn) const
1339: {
1340: // go through the other fn to bounds-check 'posn'
1341: return getDProd(getProduction(prodIndex), posn);
1342: }
1343:
1344:
1345: #ifndef NDEBUG
1346: DottedProduction const *GrammarAnalysis::
1347: nextDProd(DottedProduction const *dp) const
1348: {
1349: xassert(!dp->isDotAtEnd());
1350: return dp + 1;
1351: }
1352: #endif // !NDEBUG
1353:
1354:
1355: // NOTE: the sequence of initialization actions in this function
1356: // and the functions it calls must interact properly with the
1357: // sequence in GrammarAnalysis::xfer
1358: void GrammarAnalysis::initializeAuxData()
1359: {
1360: // at the moment, calling this twice leaks memory
1361: xassert(!initialized);
1362:
1363: computeIndexedNonterms();
1364: computeIndexedTerms();
1365: resetFirstFollow();
1366:
1367: computeProductionsByLHS();
1368: computeReachable();
1369:
1370: // finish the productions before we compute the
1371: // dotted productions
1372: MUTATE_EACH_PRODUCTION(productions, prod) {
1373: prod.data()->finished(numTerminals());
1374: }
1375:
1376: createDottedProductions();
1377:
1378: // initialize the derivable relation
1379: initDerivableRelation();
1380:
1381: // mark the grammar as initialized
1382: initialized = true;
1383: }
1384:
1385:
1386: void GrammarAnalysis::computeWhatCanDeriveWhat()
1387: {
1388: xassert(initialized);
1389:
1390:
1391: // iterate: propagate 'true' bits across the derivability matrix
1392: // (i.e. compute transitive closure on the canDerive relation)
1393: for (;;) {
1394: int changes = 0; // for this iter, # of times we set a matrix bit
1395:
1396: // --------- first part: add new canDerive relations --------
1397: // loop over all productions
1398: for (ObjListIter<Production> prodIter(productions);
1399: !prodIter.isDone(); prodIter.adv()) {
1400: // convenient alias
1401: Production const *prod = prodIter.data();
1402:
1403: // since I don't include 'empty' explicitly in my rules, I won't
1404: // conclude that anything can derive empty, which is a problem;
1405: // so I special-case it here
1406: if (prod->right.isEmpty()) {
1407: addDerivable(prod->left, &emptyString);
1408: continue; // no point in looping over RHS symbols since there are none
1409: }
1410:
1411: // iterate over RHS symbols, seeing if the LHS can derive that
1412: // RHS symbol (by itself)
1413: for (RHSEltListIter rightSym(prod->right);
1414: !rightSym.isDone(); rightSym.adv()) {
1415:
1416: if (rightSym.data()->sym->isTerminal()) {
1417: // if prod->left derives a sm_string containing a terminal,
1418: // then it can't derive any nontermial alone (using this
1419: // production, at least) -- empty is considered a nonterminal
1420: break;
1421: }
1422:
1423: // otherwise, it's a nonterminal
1424: Nonterminal const &rightNT = rightSym.data()->sym->asNonterminalC();
1425:
1426: // check if we already know that LHS derives rightNT
1427: if (canDerive(prod->left, &rightNT)) {
1428: // we already know that prod->left derives rightSym,
1429: // so let's not check it again
1430: }
1431:
1432: else {
1433: // we are wondering if prod->left can derive rightSym.. for
1434: // this to be true, every symbol that comes after rightSym
1435: // must be able to derive emptySymbol (we've already verified
1436: // by now that every symbol to the *left* can derive empty)
1437: RHSEltListIter afterRightSym(rightSym);
1438: bool restDeriveEmpty = true;
1439: for (afterRightSym.adv(); // *after* right symbol
1440: !afterRightSym.isDone(); afterRightSym.adv()) {
1441:
1442: if (afterRightSym.data()->sym->isTerminal() ||
1443: // if it's a terminal, it can't derive emptyString
1444: !canDeriveEmpty(&( afterRightSym.data()->sym->asNonterminalC() ))) {
1445: // this symbol can't derive empty sm_string (or, we don't
1446: // yet know that it can), so we conclude that prod->left
1447: // can't derive rightSym
1448: restDeriveEmpty = false;
1449: break;
1450: }
1451: }
1452:
1453: if (restDeriveEmpty) {
1454: // we have discovered that prod->left can derive rightSym
1455: bool chgd = addDerivable(prod->left, &rightNT);
1456: xassert(chgd); // above, we verified we didn't already know this
1457:
1458: changes++;
1459:
1460: trace("derivable")
1461: << "discovered (by production): " << prod->left->name
1462: << " ->* " << rightNT.name << "\n";
1463: }
1464: }
1465:
1466: // ok, we've considered prod->left deriving rightSym. now, we
1467: // want to consider whether prod->left can derive any of the
1468: // symbols that follow rightSym in this production. for this
1469: // to be true, rightSym itself must derive the emptyString
1470: if (!canDeriveEmpty(&rightNT)) {
1471: // it doesn't -- no point in further consideration of
1472: // this production
1473: break;
1474: }
1475: } // end of loop over RHS symbols
1476: } // end of loop over productions
1477:
1478:
1479: // -------- second part: compute closure over existing relations ------
1480: // I'll do this by computing R + R^2 -- that is, I'll find all
1481: // paths of length 2 and add an edge between their endpoints.
1482: // I do this, rather than computing the entire closure now, since
1483: // on the next iter I will add more relations and have to re-do
1484: // a full closure; iterative progress seems a better way.
1485:
1486: // I don't consider edges (u,u) because it messes up my cyclicty
1487: // detection logic. (But (u,v) and (v,u) is ok, and in fact is
1488: // what I want, for detecting cycles.)
1489:
1490: // for each node u (except empty)
1491: int numNonterms = numNonterminals();
1492: for (int u=1; u<numNonterms; u++) {
1493: // for each edge (u,v) where u != v
1494: for (int v=0; v<numNonterms; v++) {
1495: if (u==v || !canDerive(u,v)) continue;
1496:
1497: // for each edge (v,w) where v != w
1498: for (int w=0; w<numNonterms; w++) {
1499: if (v==w || !canDerive(v,w)) continue;
1500:
1501: // add an edge (u,w), if there isn't one already
1502: if (addDerivable(u,w)) {
1503: changes++;
1504: trace("derivable")
1505: << "discovered (by closure step): "
1506: << indexedNonterms[u]->name << " ->* "
1507: << indexedNonterms[w]->name << "\n";
1508: }
1509: }
1510: }
1511: }
1512:
1513:
1514: // ------ finally: iterate until no changes -------
1515: if (changes == 0) {
1516: // didn't make any changes during the last iter, so
1517: // everything has settled
1518: break;
1519: }
1520: } // end of loop until settles
1521:
1522:
1523: // I used to do all closure here and no closure in the loop.
1524: // But that fails in cases where closure (when it reveals
1525: // more things that derive emptyString) yields new opportunities
1526: // for derives-relation discovery. Therefore I now alternate
1527: // between them, and at the end, no closure is necessary.
1528: }
1529:
1530:
1531: // set Nonterminal::superset to correspond to Nonterminal::subsets
1532: void GrammarAnalysis::computeSupersets()
1533: {
1534: FOREACH_OBJLIST_NC(Nonterminal, nonterminals, iter1) {
1535: Nonterminal *super = iter1.data();
1536:
1537: SFOREACH_OBJLIST_NC(Nonterminal, super->subsets, iter2) {
1538: Nonterminal *sub = iter2.data();
1539:
1540: // for now, only handle 'super' as a partial function
1541: if (sub->superset != NULL) {
1542: xfailure(sm_stringc << sub->name << " has more than one superset");
1543: }
1544: sub->superset = super;
1545: }
1546: }
1547: }
1548:
1549:
1550: // Compute, for each nonterminal, the "First" set, defined as:
1551: //
1552: // First(N) = { x | N ->* x alpha }, where alpha is any sequence
1553: // of terminals and nonterminals
1554: //
1555: // If N can derive emptyString, I'm going to say that empty is
1556: // *not* in First, despite what Aho/Sethi/Ullman says. I do this
1557: // because I have that information readily as my derivable relation,
1558: // and because it violates the type system I've devised.
1559: //
1560: // I also don't "compute" First for terminals, since they are trivial
1561: // (First(x) = {x}).
1562: void GrammarAnalysis::computeFirst()
1563: {
1564: bool tr = tracingSys("first");
1565: int numTerms = numTerminals();
1566:
1567: // iterate, looking for new First members, until no changes
1568: int changes = 1; // so the loop begins
1569: while (changes > 0) {
1570: changes = 0;
1571:
1572: // for each production
1573: for (ObjListMutator<Production> prodIter(productions);
1574: !prodIter.isDone(); prodIter.adv()) {
1575: // convenient aliases
1576: Production *prod = prodIter.data();
1577: Nonterminal *LHS = prod->left;
1578: // the list iter is mutating because I modify LHS's First set
1579:
1580: // compute First(RHS-sequence)
1581: TerminalSet firstOfRHS(numTerms);
1582: firstOfSequence(firstOfRHS, prod->right);
1583:
1584: // store this back into 'prod'
1585: prod->firstSet.merge(firstOfRHS);
1586:
1587: // add everything in First(RHS-sequence) to First(LHS)
1588: if (LHS->first.merge(firstOfRHS)) {
1589: changes++;
1590: if (tr) {
1591: std::ostream &trs = trace("first");
1592: trs << "added ";
1593: firstOfRHS.print(trs, *this);
1594: trs << " to " << LHS->name << " because of "
1595: << prod->toString() << std::endl;
1596: }
1597: }
1598: } // for (productions)
1599: } // while (changes)
1600:
1601: if (tr) {
1602: FOREACH_NONTERMINAL(nonterminals, iter) {
1603: Nonterminal const &nt = *(iter.data());
1604:
1605: std::ostream &trs = trace("first") << " " << nt.name << ": ";
1606: nt.first.print(trs, *this);
1607: trs << std::endl;
1608: }
1609: }
1610: }
1611:
1612:
1613: // 'sequence' isn't const because we need to hand pointers over to
1614: // the 'destList', which isn't const; similarly for 'this'
1615: // (what I'd like here is to say that 'sequence' and 'this' are const
1616: // if 'destList' can't modify the things it contains)
1617: void GrammarAnalysis::firstOfSequence(TerminalSet &destList,
1618: RHSEltList const &sequence)
1619: {
1620: RHSEltListIter iter(sequence);
1621: firstOfIterSeq(destList, iter);
1622: }
1623:
1624: // similar to above, 'sym' needs to be a mutator
1625: void GrammarAnalysis::firstOfIterSeq(TerminalSet &destList,
1626: RHSEltListIter sym)
1627: {
1628: //int numTerms = numTerminals();
1629:
1630: // for each sequence member such that all
1631: // preceeding members can derive emptyString
1632: for (; !sym.isDone(); sym.adv()) {
1633: // LHS -> x alpha means x is in First(LHS)
1634: if (sym.data()->sym->isTerminal()) {
1635: destList.add(sym.data()->sym->asTerminal().termIndex);
1636: break; // stop considering RHS members since a terminal
1637: // effectively "hides" all further symbols from First
1638: }
1639:
1640: // sym must be a nonterminal
1641: Nonterminal const &nt = sym.data()->sym->asNonterminalC();
1642:
1643: // anything already in nt's First should be added to destList
1644: destList.merge(nt.first);
1645:
1646: // if nt can't derive emptyString, then it blocks further
1647: // consideration of right-hand side members
1648: if (!canDeriveEmpty(&nt)) {
1649: break;
1650: }
1651: } // for (RHS members)
1652: }
1653:
1654:
1655: void GrammarAnalysis::computeDProdFirsts()
1656: {
1657: // for each production..
1658: FOREACH_PRODUCTION(productions, prodIter) {
1659: // for each dotted production where the dot is not at the end..
1660: int rhsLen = prodIter.data()->rhsLength();
1661: for (int posn=0; posn <= rhsLen; posn++) {
1662: DottedProduction *dprod = getDProd_nc(prodIter.data(), posn);
1663:
1664: // compute its first
1665: RHSEltListIter symIter(dprod->getProd()->right, posn);
1666: dprod->firstSet.reset(numTerms);
1667: firstOfIterSeq(dprod->firstSet, symIter);
1668:
1669: // can it derive empty?
1670: dprod->canDeriveEmpty = iterSeqCanDeriveEmpty(symIter);
1671: }
1672: }
1673: }
1674:
1675:
1676: void GrammarAnalysis::computeFollow()
1677: {
1678: int numTerms = numTerminals();
1679:
1680: // loop until no changes
1681: int changes = 1;
1682: while (changes > 0) {
1683: changes = 0;
1684:
1685: // 'mutate' is needed because adding 'term' to the follow of 'nt'
1686: // needs a mutable 'term' and 'nt'
1687:
1688: // for each production
1689: MUTATE_EACH_PRODUCTION(productions, prodIter) {
1690: Production *prod = prodIter.data();
1691:
1692: // for each RHS nonterminal member
1693: MUTATE_EACH_OBJLIST(Production::RHSElt, prod->right, rightSym) {
1694: if (rightSym.data()->sym->isTerminal()) continue;
1695:
1696: // convenient alias
1697: Nonterminal &rightNT = rightSym.data()->sym->asNonterminal();
1698:
1699: // I'm not sure what it means to compute Follow(emptyString),
1700: // so let's just not do so
1701: if (&rightNT == &emptyString) {
1702: continue;
1703: }
1704:
1705: // an iterator pointing to the symbol just after
1706: // 'rightSym' will be useful below
1707: RHSEltListMutator afterRightSym(rightSym);
1708: afterRightSym.adv(); // NOTE: 'isDone()' may be true now
1709:
1710: // rule 1:
1711: // if there is a production A -> alpha B beta, then
1712: // everything in First(beta) is in Follow(B)
1713: {
1714: // compute First(beta)
1715: TerminalSet firstOfBeta(numTerms);
1716: firstOfIterSeq(firstOfBeta, afterRightSym);
1717:
1718: // put those into Follow(rightNT)
1719: if (rightNT.follow.merge(firstOfBeta)) {
1720: changes++;
1721: if (&rightNT == symOfInterest) {
1722: std::ostream &trs = trace("follow-sym");
1723: trs << "Follow(" << rightNT.name
1724: << "): adding ";
1725: firstOfBeta.print(trs, *this);
1726: trs << " by first(RHS-tail) of " << *prod
1727: << std::endl;
1728: }
1729: }
1730: }
1731:
1732: // rule 2:
1733: // if there is a production A -> alpha B, or a
1734: // production A -> alpha B beta where beta ->* empty ...
1735: if (iterSeqCanDeriveEmpty(afterRightSym)) {
1736: // ... then everything in Follow(A) is in Follow(B)
1737: if (rightNT.follow.merge(prod->left->follow)) {
1738: changes++;
1739: if (&rightNT == symOfInterest) {
1740: std::ostream &trs = trace("follow-sym");
1741: trs << "Follow(" << rightNT.name
1742: << "): adding ";
1743: prod->left->follow.print(trs, *this);
1744: trs << " by follow(LHS) of " << *prod
1745: << std::endl;
1746: }
1747: }
1748: }
1749:
1750: } // for each RHS nonterminal member
1751: } // for each production
1752: } // until no changes
1753: }
1754:
1755:
1756: // [ASU] alg 4.4, p.190
1757: void GrammarAnalysis::computePredictiveParsingTable()
1758: {
1759: int numTerms = numTerminals();
1760: int numNonterms = numNonterminals();
1761:
1762: // the table will be a 2d array of lists of productions
1763: ProductionList *table = new ProductionList[numTerms * numNonterms]; // (owner)
1764: #define TABLE(term,nt) table[(term) + (nt)*numNonterms]
1765:
1766: // for each production 'prod' (non-const iter because adding them
1767: // to ProductionList, which doesn't promise to not change them)
1768: MUTATE_EACH_PRODUCTION(productions, prodIter) {
1769: Production *prod = prodIter.data();
1770:
1771: // for each terminal 'term' in First(RHS)
1772: TerminalSet firsts(numTerms);
1773: firstOfSequence(firsts, prod->right);
1774: for (int termIndex=0; termIndex<numTerms; termIndex++) {
1775: if (!firsts.contains(termIndex)) continue;
1776:
1777: // add 'prod' to table[LHS,term]
1778: TABLE(prod->left->ntIndex, termIndex).prependUnique(prod);
1779: }
1780:
1781: // if RHS ->* emptyString, ...
1782: if (sequenceCanDeriveEmpty(prod->right)) {
1783: // ... then for each terminal 'term' in Follow(LHS), ...
1784: for (int termIndex=0; termIndex<numTerms; termIndex++) {
1785: if (!firsts.contains(termIndex)) continue;
1786:
1787: // ... add 'prod' to table[LHS,term]
1788: TABLE(prod->left->ntIndex, termIndex).prependUnique(prod);
1789: }
1790: }
1791: }
1792:
1793:
1794: // print the resulting table
1795: std::ostream &os = trace("pred-table") << std::endl;
1796:
1797: // for each nonterminal
1798: INTLOOP(nonterm, 0, numNonterms) {
1799: os << "Row " << indexedNonterms[nonterm]->name << ":\n";
1800:
1801: // for each terminal
1802: INTLOOP(term, 0, numTerms) {
1803: os << " Column " << indexedTerms[term]->name << ":";
1804:
1805: // for each production in table[nonterm,term]
1806: SFOREACH_PRODUCTION(TABLE(nonterm,term), prod) {
1807: os << " ";
1808: prod.data()->print(os);
1809: }
1810:
1811: os << std::endl;
1812: }
1813: }
1814:
1815: // cleanup
1816: #undef TABLE
1817: delete[] table;
1818: }
1819:
1820:
1821: // these hashtables are keyed using the DottedProduction,
1822: // but yield LRItems as values
1823:
1824: // for storing dotted productions in a hash table, this is
1825: // the hash function itself
1826: STATICDEF unsigned LRItem::hash(DottedProduction const *key)
1827: {
1828: //DottedProduction const *dp = (DottedProduction const*)key;
1829:
1830: // on the assumption few productions have 20 RHS elts..
1831: //int val = dp->dot + (20 * dp->prod->prodIndex);
1832:
1833: // just use the address.. they're all shared..
1834: return HashTable::lcprngHashFn((void*)key);
1835: }
1836:
1837: // given the data, yield the key
1838: STATICDEF DottedProduction const *LRItem::dataToKey(LRItem *it)
1839: {
1840: return it->dprod;
1841: }
1842:
1843: // compare two dotted production keys for equality; since dotted
1844: // productions are shared, pointer equality suffices
1845: STATICDEF bool LRItem::dpEqual(DottedProduction const *key1,
1846: DottedProduction const *key2)
1847: {
1848: return key1 == key2;
1849: }
1850:
1851:
1852: // based on [ASU] figure 4.33, p.223
1853: // NOTE: sometimes this is called with nonempty nonkernel items...
1854: void GrammarAnalysis::itemSetClosure(ItemSet &itemSet)
1855: {
1856: bool const tr = tracingSys("closure");
1857: std::ostream &trs = trace("closure"); // trace stream
1858: if (tr) {
1859: trs << "computing closure of ";
1860: itemSet.print(trs, *this);
1861: }
1862:
1863: // hashtable, list of items still yet to close; items are
1864: // simultaneously in both the hash and the list, or not in either
1865: #if 0
1866: OwnerKHashArray<LRItem, DottedProduction> workhash(
1867: &LRItem::dataToKey,
1868: &LRItem::hash,
1869: &LRItem::dpEqual, 13);
1870: #endif // 0
1871:
1872: // every 'item' on the worklist has item->dprod->backPointer == item;
1873: // every 'dprod' not associated has dprod->backPointer == NULL
1874: ArrayStack<LRItem*> worklist;
1875:
1876: // scratch terminal set for singleItemClosure
1877: TerminalSet scratchSet(numTerminals());
1878:
1879: // and another for the items we've finished
1880: OwnerKHashTable<LRItem, DottedProduction> finished(
1881: &LRItem::dataToKey,
1882: &LRItem::hash,
1883: &LRItem::dpEqual, 13);
1884: finished.setEnableShrink(false);
1885:
1886: // put all the nonkernels we have into 'finished'
1887: while (itemSet.nonkernelItems.isNotEmpty()) {
1888: LRItem *dp = itemSet.nonkernelItems.removeFirst();
1889: finished.add(dp->dprod, dp);
1890: }
1891:
1892: // first, close the kernel items -> worklist
1893: FOREACH_OBJLIST(LRItem, itemSet.kernelItems, itemIter) {
1894: singleItemClosure(finished, worklist, itemIter.data(), scratchSet);
1895: }
1896:
1897: while (worklist.isNotEmpty()) {
1898: // pull the first production
1899: LRItem *item = worklist.pop();
1900: xassert(item->dprod->backPointer == item); // was on worklist
1901: item->dprod->backPointer = NULL; // now off of worklist
1902:
1903: // put it into list of 'done' items; this way, if this
1904: // exact item is generated during closure, it will be
1905: // seen and re-inserted (instead of duplicated)
1906: finished.add(item->dprod, item);
1907:
1908: // close it -> worklist
1909: singleItemClosure(finished, worklist, item, scratchSet);
1910: }
1911:
1912: // move everything from 'finished' to the nonkernel items list
1913: try {
1914: for (OwnerKHashTableIter<LRItem, DottedProduction> iter(finished);
1915: !iter.isDone(); iter.adv()) {
1916: // temporarily, the item is owned both by the hashtable
1917: // and the list
1918: itemSet.nonkernelItems.prepend(iter.data());
1919: }
1920: finished.disownAndForgetAll();
1921: }
1922: catch (...) {
1923: breaker(); // debug breakpoint
1924:
1925: // resolve the multiple ownership by leaking some
1926: finished.disownAndForgetAll();
1927: throw;
1928: }
1929:
1930: // we potentially added a bunch of things
1931: itemSet.changedItems();
1932:
1933: if (tr) {
1934: trs << "done with closure of state " << itemSet.id << std::endl;
1935: itemSet.print(trs, *this);
1936: }
1937: }
1938:
1939:
1940: void GrammarAnalysis
1941: ::singleItemClosure(OwnerKHashTable<LRItem, DottedProduction> &finished,
1942: ArrayStack<LRItem*> &worklist,
1943: //OwnerKHashArray<LRItem, DottedProduction> &workhash,
1944: LRItem const *item, TerminalSet &newItemLA)
1945: {
1946: INITIAL_MALLOC_STATS();
1947:
1948: bool const tr = tracingSys("closure");
1949: std::ostream &trs = trace("closure"); // trace stream
1950:
1951: if (tr) {
1952: trs << " considering item ";
1953: item->print(trs, *this);
1954: trs << std::endl;
1955: }
1956:
1957: if (item->isDotAtEnd()) {
1958: if (tr) {
1959: trs << " dot is at the end" << std::endl;
1960: }
1961: CHECK_MALLOC_STATS("return, dot at end");
1962: return;
1963: }
1964:
1965: // in comments that follow, 'item' is broken down as
1966: // A -> alpha . B beta, LA
1967:
1968: // get the symbol B (the one right after the dot)
1969: Symbol const *B = item->symbolAfterDotC();
1970: if (B->isTerminal()) {
1971: if (tr) {
1972: trs << " symbol after the dot is a terminal" << std::endl;
1973: }
1974: CHECK_MALLOC_STATS("return, dot sym is terminal");
1975: return;
1976: }
1977: int nontermIndex = B->asNonterminalC().ntIndex;
1978:
1979: // could pull this out of even this fn, to the caller, but I don't
1980: // see any difference in time when I make it static (which simulates
1981: // the effect, though static itself is a bad idea because it makes
1982: // the size constant through a whole run); but maybe when other things
1983: // are faster I will be able to notice the difference, so I might
1984: // revisit this
1985: //TerminalSet newItemLA(numTerminals());
1986:
1987: // for each production "B -> gamma"
1988: SMUTATE_EACH_PRODUCTION(productionsByLHS[nontermIndex], prodIter) { // (constness)
1989: Production &prod = *(prodIter.data());
1990: if (tr) {
1991: trs << " considering production " << prod << std::endl;
1992: }
1993:
1994: // key to good performance: do *no* dynamic allocation in this
1995: // loop (one of two inner loops in the grammar analysis), until a
1996: // new item is actually *needed* (which is the uncommon case); for
1997: // example, all debug output statements are guarded by 'if (tr)'
1998: // because otherwise they would allocate
1999:
2000: // invariant of the indexed productions list
2001: xassert(prod.left == B);
2002:
2003: // construct "B -> . gamma, First(beta LA)";
2004: // except, don't actually build it until later; in the meantime,
2005: // determine which DP and lookahead it would use if created
2006: DottedProduction const *newDP = getDProd(&prod, 0 /*dot at left*/);
2007:
2008: // get beta (what follows B in 'item')
2009: DottedProduction const *beta = nextDProd(item->dprod);
2010:
2011: // get First(beta) -> new item's lookahead
2012: newItemLA = beta->firstSet;
2013:
2014: // if beta ->* epsilon, add LA
2015: if (beta->canDeriveEmpty) {
2016: newItemLA.merge(item->lookahead);
2017: }
2018:
2019: if (tr) {
2020: trs << " built item ";
2021: // this is what LRItem::print would do if I actually
2022: // constructed the object
2023: newDP->print(trs);
2024: trs << ", ";
2025: newItemLA.print(trs, *this);
2026: trs << std::endl;
2027: }
2028:
2029: // is 'newDP' already there?
2030: // check in working and finished tables
2031: bool inDoneList = true;
2032: LRItem *already = newDP->backPointer; // workhash.lookup(newDP);
2033: if (already) {
2034: inDoneList = false;
2035: }
2036: else {
2037: already = finished.get(newDP);
2038: }
2039:
2040: if (already) {
2041: // yes, it's already there
2042: if (tr) {
2043: trs << " looks similar to ";
2044: already->print(trs, *this);
2045: trs << std::endl;
2046: }
2047:
2048: // but the new item may have additional lookahead
2049: // components, so merge them with the old
2050: if (already->lookahead.merge(newItemLA)) {
2051: // merging changed 'already'
2052: if (tr) {
2053: trs << " (chg) merged it to make ";
2054: already->print(trs, *this);
2055: trs << std::endl;
2056: }
2057:
2058: if (inDoneList) {
2059: // pull from the 'done' list and put in worklist, since the
2060: // lookahead changed
2061: finished.remove(already->dprod);
2062: CHECK_MALLOC_STATS("before worklist push");
2063: worklist.push(already);
2064: xassert(already->dprod->backPointer == NULL); // was not on
2065: already->dprod->backPointer = already; // now is on worklist
2066: UPDATE_MALLOC_STATS(); // allow expansion
2067: }
2068: else {
2069: // 'already' is in the worklist, so that's fine
2070: }
2071: }
2072: else {
2073: if (tr) {
2074: trs << " this dprod already existed" << std::endl;
2075: }
2076: }
2077: }
2078: else {
2079: CHECK_MALLOC_STATS("bunch of stuff before 'if'");
2080:
2081: // it's not already there, so add it to worklist (but first
2082: // actually create it!)
2083: LRItem *newItem = new LRItem(numTerms, newDP);
2084: newItem->lookahead.copy(newItemLA);
2085: if (tr) {
2086: trs << " this dprod is new, queueing it to add" << std::endl;
2087: }
2088:
2089: worklist.push(newItem);
2090: xassert(newItem->dprod->backPointer == NULL);
2091: newItem->dprod->backPointer = newItem;
2092:
2093: UPDATE_MALLOC_STATS(); // "new LRItem" or expansion of worklist
2094: }
2095:
2096: CHECK_MALLOC_STATS("processing of production");
2097: } // for each production
2098:
2099: CHECK_MALLOC_STATS("end of singleItemClosure");
2100: }
2101:
2102:
2103: // -------------- START of construct LR item sets -------------------
2104: ItemSet *GrammarAnalysis::makeItemSet()
2105: {
2106: return new ItemSet((StateId)(nextItemSetId++),
2107: numTerminals(), numNonterminals());
2108: }
2109:
2110: void GrammarAnalysis::disposeItemSet(ItemSet *is)
2111: {
2112: // we assume we're only doing this right after making it, as the
2113: // point of this exercise is to avoid fragmenting the id space
2114: nextItemSetId--;
2115: xassert(is->id == nextItemSetId);
2116: delete is;
2117: }
2118:
2119:
2120: // yield (by filling 'dest') a new itemset by moving the dot across
2121: // the productions in 'source' that have 'symbol' to the right of the
2122: // dot; do *not* compute the closure
2123: //
2124: // unusedTail:
2125: // since 'dest' comes with a bunch of kernel items, some of which we
2126: // most likely won't need, put the unused ones into 'unusedTail'
2127: //
2128: // array:
2129: // since I don't want to allocate anything in here, we need scratch
2130: // space for computing kernel CRCs
2131: void GrammarAnalysis::moveDotNoClosure(ItemSet const *source, Symbol const *symbol,
2132: ItemSet *dest, ObjList<LRItem> &unusedTail,
2133: GrowArray<DottedProduction const*> &array)
2134: {
2135: //ItemSet *ret = makeItemSet();
2136:
2137: // total # of items added
2138: int appendCt=0;
2139:
2140: // iterator for walking down dest's kernel list
2141: ObjListMutator<LRItem> destIter(dest->kernelItems);
2142:
2143: // iterator for walking both lists of items; switching from an
2144: // implementation which used 'getAllItems' for performance reasons
2145: ObjListIter<LRItem> srcIter(source->kernelItems);
2146: int passCt=0; // 0=kernelItems, 1=nonkernelItems
2147: while (passCt < 2) {
2148: if (passCt++ == 1) {
2149: srcIter.reset(source->nonkernelItems);
2150: }
2151:
2152: // for each item
2153: for (; !srcIter.isDone(); srcIter.adv()) {
2154: LRItem const *item = srcIter.data();
2155:
2156: if (item->isDotAtEnd() ||
2157: item->symbolAfterDotC() != symbol) {
2158: continue; // can't move dot
2159: }
2160:
2161: // need to access destIter; if there are no more items, make more
2162: if (destIter.isDone()) {
2163: // the new item becomes the current 'data()'
2164: destIter.insertBefore(new LRItem(numTerminals(), NULL /*dprod*/));
2165: }
2166:
2167: // move the dot; write dot-moved item into 'destIter'
2168: LRItem *dotMoved = destIter.data();
2169: dotMoved->dprod = nextDProd(item->dprod);
2170: dotMoved->lookahead = item->lookahead;
2171:
2172: // add the new item to the itemset I'm building
2173: //ret->addKernelItem(dotMoved); // UPDATE: it's already in the list
2174: appendCt++;
2175: destIter.adv();
2176: }
2177: }
2178:
2179: // pull out any unused items into 'unusedItems'; it's important that
2180: // this action not have to look at each unused item, because I want
2181: // to be able to make a really big scratch item list and not pay for
2182: // items I don't end up using
2183: unusedTail.stealTailAt(appendCt, dest->kernelItems);
2184:
2185: // verify we actually got something
2186: xassert(appendCt > 0);
2187:
2188: // we added stuff; sorting is needed both for the CRC below, and also
2189: // for the lookahead merge step that follows a successful lookup
2190: dest->sortKernelItems();
2191:
2192: // recompute the one thing I need to do hashing
2193: dest->computeKernelCRC(array);
2194: }
2195:
2196:
2197: // if 'list' contains something equal to 'itemSet', return that
2198: // equal object; otherwise, return NULL
2199: // 'list' is non-const because might return an element of it
2200: ItemSet *GrammarAnalysis::findItemSetInList(ObjList<ItemSet> &list,
2201: ItemSet const *itemSet)
2202: {
2203: // inefficiency: using iteration to check set membership
2204:
2205: MUTATE_EACH_OBJLIST(ItemSet, list, iter) {
2206: if (itemSetsEqual(iter.data(), itemSet)) {
2207: return iter.data();
2208: }
2209: }
2210: return NULL;
2211: }
2212:
2213:
2214: STATICDEF bool GrammarAnalysis::itemSetsEqual(ItemSet const *is1, ItemSet const *is2)
2215: {
2216: // checks for equality of the kernel items
2217: return *is1 == *is2;
2218: }
2219:
2220:
2221: // keys and data are the same
2222: STATICDEF ItemSet const *ItemSet::dataToKey(ItemSet *data)
2223: {
2224: return data;
2225: }
2226:
2227: STATICDEF unsigned ItemSet::hash(ItemSet const *key)
2228: {
2229: unsigned crc = key->kernelItemsCRC;
2230: return HashTable::lcprngHashFn((void*)crc);
2231: }
2232:
2233: STATICDEF bool ItemSet::equalKey(ItemSet const *key1, ItemSet const *key2)
2234: {
2235: return *key1 == *key2;
2236: }
2237:
2238:
2239: // [ASU] fig 4.34, p.224
2240: // puts the finished parse tables into 'itemSetsDone'
2241: void GrammarAnalysis::constructLRItemSets()
2242: {
2243: bool tr = tracingSys("lrsets");
2244:
2245: enum { BIG_VALUE = 100 };
2246:
2247: // item sets yet to be processed; item sets are simultaneously in
2248: // both the hash and the list, or not in either
2249: OwnerKHashArray<ItemSet, ItemSet> itemSetsPending(
2250: &ItemSet::dataToKey,
2251: &ItemSet::hash,
2252: &ItemSet::equalKey);
2253:
2254: // item sets with all outgoing links processed
2255: OwnerKHashTable<ItemSet, ItemSet> itemSetsDone(
2256: &ItemSet::dataToKey,
2257: &ItemSet::hash,
2258: &ItemSet::equalKey);
2259: itemSetsDone.setEnableShrink(false);
2260:
2261: // to avoid allocating in the inner loop, we make a single item set
2262: // which we'll fill with kernel items every time we think we *might*
2263: // make a new state, and if it turns out we really do need a new
2264: // state, then the kernel items in this one will be copied elsewhere
2265: Owner<ItemSet> scratchState(
2266: new ItemSet((StateId)-1 /*id*/, numTerms, numNonterms));
2267:
2268: // fill the scratch state with lots of kernel items to start with;
2269: // since these items will be re-used over and over, filling it now
2270: // ensures good locality on those accesses (assuming malloc returns
2271: // objects close together)
2272: enum { INIT_LIST_LEN = BIG_VALUE };
2273: for (int i=0; i<INIT_LIST_LEN; i++) {
2274: // this is a dummy item; it allocates the bitmap for 'lookahead',
2275: // but those bits and the 'dprod' pointer will be overwritten
2276: // many times during the algorithm
2277: LRItem *item = new LRItem(numTerms, NULL /*dottedprod*/);
2278: scratchState->addKernelItem(item);
2279: }
2280:
2281: // similar to the scratch state, make a scratch array for the
2282: // kernel CRC computation
2283: GrowArray<DottedProduction const*> kernelCRCArray(BIG_VALUE);
2284:
2285: // start by constructing closure of first production
2286: // (basically assumes first production has start symbol
2287: // on LHS, and no other productions have the start symbol
2288: // on LHS)
2289: {
2290: ItemSet *is = makeItemSet(); // (owner)
2291: startState = is;
2292: LRItem *firstDP
2293: = new LRItem(numTerms, getDProd(productions.first(), 0 /*dot at left*/));
2294:
2295: // don't add this to the lookahead; we assume EOF is actually
2296: // mentioned in the production already, and we won't contemplate
2297: // executing this reduction within the normal parser core
2298: // (see GLR::cleanupAfterParse)
2299: //firstDP->laAdd(0 /*EOF token id*/);
2300:
2301: is->addKernelItem(firstDP);
2302: is->sortKernelItems(); // redundant, but can't hurt
2303: itemSetClosure(*is); // calls changedItems internally
2304:
2305: // this makes the initial pending itemSet
2306: itemSetsPending.push(is, is); // (ownership transfer)
2307: }
2308:
2309: // track how much allocation we're doing
2310: INITIAL_MALLOC_STATS();
2311:
2312: // for each pending item set
2313: while (itemSetsPending.isNotEmpty()) {
2314: ItemSet *itemSet = itemSetsPending.pop(); // dequeue (owner)
2315:
2316: CHECK_MALLOC_STATS("top of pending list loop");
2317:
2318: // put it in the done set; note that we must do this *before*
2319: // the processing below, to properly handle self-loops
2320: itemSetsDone.add(itemSet, itemSet); // (ownership transfer; 'itemSet' becomes serf)
2321:
2322: // allows for expansion of 'itemSetsDone' hash
2323: UPDATE_MALLOC_STATS();
2324:
2325: if (tr) {
2326: trace("lrsets") << "state " << itemSet->id
2327: << ", " << itemSet->kernelItems.count()
2328: << " kernel items and "
2329: << itemSet->nonkernelItems.count()
2330: << " nonkernel items" << std::endl;
2331: }
2332:
2333: // see below; this is part of a fix for a *very* subtle heisenbug
2334: bool mustCloseMyself = false;
2335:
2336: // for each production in the item set where the
2337: // dot is not at the right end
2338: //
2339: // explicitly iterate over both lists because 'getAllItems'
2340: // does allocation
2341: ObjListIter<LRItem> itemIter(itemSet->kernelItems);
2342: int passCt=0; // 0=kernelItems, 1=nonkernelItems
2343: while (passCt < 2) {
2344: if (passCt++ == 1) {
2345: itemIter.reset(itemSet->nonkernelItems);
2346: }
2347:
2348: for (; !itemIter.isDone(); itemIter.adv()) {
2349: LRItem const *item = itemIter.data();
2350: if (item->isDotAtEnd()) continue;
2351:
2352: CHECK_MALLOC_STATS("top of item list loop");
2353:
2354: if (tr) {
2355: std::ostream &trs = trace("lrsets");
2356: trs << "considering item ";
2357: item->print(trs, *this);
2358: trs << std::endl;
2359: }
2360:
2361: // get the symbol 'sym' after the dot (next to be shifted)
2362: Symbol const *sym = item->symbolAfterDotC();
2363:
2364: // in LALR(1), two items might have different lookaheads; more
2365: // likely, re-expansions needs to propagate lookahead that
2366: // wasn't present from an earlier expansion
2367: if (!LALR1) {
2368: // if we already have a transition for this symbol,
2369: // there's nothing more to be done
2370: if (itemSet->transitionC(sym) != NULL) {
2371: continue;
2372: }
2373: }
2374:
2375: // compute the itemSet (into 'scratchState') produced by moving
2376: // the dot across 'sym'; don't take closure yet since we
2377: // first want to check whether it is already present
2378: //
2379: // this call also yields the unused remainder of the kernel items,
2380: // so we can add them back in at the end
2381: ObjList<LRItem> unusedTail;
2382: moveDotNoClosure(itemSet, sym, scratchState,
2383: unusedTail, kernelCRCArray);
2384: ItemSet *withDotMoved = scratchState; // clarify role from here down
2385:
2386: CHECK_MALLOC_STATS("moveDotNoClosure");
2387:
2388: // see if we already have it, in either set
2389: ItemSet *already = itemSetsPending.lookup(withDotMoved);
2390: bool inDoneList = false;
2391: if (already == NULL) {
2392: already = itemSetsDone.get(withDotMoved);
2393: inDoneList = true; // used if 'already' != NULL
2394: }
2395:
2396: // have it?
2397: if (already != NULL) {
2398: // we already have a state with at least equal kernel items, not
2399: // considering their lookahead sets; so we have to merge the
2400: // computed lookaheads with those in 'already'
2401: if (withDotMoved->mergeLookaheadsInto(*already)) {
2402: if (tr) {
2403: trace("lrsets")
2404: << "from state " << itemSet->id << ", found that the transition "
2405: << "on " << sym->name << " yielded a state similar to "
2406: << already->id << ", but with different lookahead" << std::endl;
2407: }
2408:
2409: CHECK_MALLOC_STATS("mergeLookaheadsInto");
2410:
2411: // this changed 'already'; recompute its closure
2412: if (already != itemSet) {
2413: itemSetClosure(*already);
2414: }
2415: else {
2416: // DANGER! I'm already iterating over 'itemSet's item lists,
2417: // and if I execute the closure algorithm it will invalidate
2418: // my iterator. so, postpone it
2419: mustCloseMyself = true;
2420: }
2421:
2422: // and reconsider all of the states reachable from it
2423: if (!inDoneList) {
2424: // itemSetsPending contains 'already', it will be processed later
2425: }
2426: else {
2427: // we thought we were done with this
2428: xassertdb(itemSetsDone.get(already));
2429:
2430: // but we're not: move it back to the 'pending' list
2431: itemSetsDone.remove(already);
2432: itemSetsPending.push(already, already);
2433: }
2434:
2435: // it's ok if closure makes more items, or if
2436: // the pending list expands
2437: UPDATE_MALLOC_STATS();
2438: }
2439:
2440: // we already have it, so throw away one we made
2441: // UPDATE: we didn't allocate, so don't deallocate
2442: //disposeItemSet(withDotMoved); // deletes 'withDotMoved'
2443:
2444: // and use existing one for setting the transition function
2445: withDotMoved = already;
2446: }
2447: else {
2448: // we don't already have it; need to actually allocate & copy
2449: withDotMoved = makeItemSet();
2450: FOREACH_OBJLIST(LRItem, scratchState->kernelItems, iter) {
2451: withDotMoved->addKernelItem(new LRItem( *(iter.data()) ));
2452: }
2453:
2454: // finish it by computing its closure
2455: itemSetClosure(*withDotMoved);
2456:
2457: // then add it to 'pending'
2458: itemSetsPending.push(withDotMoved, withDotMoved);
2459:
2460: // takes into account:
2461: // - creation of 'withDotMoved' state
2462: // - creation of items to fill its kernel
2463: // - creation of nonkernel items during closure
2464: // - possible expansion of the 'itemSetsPending' hash
2465: UPDATE_MALLOC_STATS();
2466: }
2467:
2468: // setup the transition function
2469: itemSet->setTransition(sym, withDotMoved);
2470:
2471: // finally, restore 'scratchState's kernel item list
2472: scratchState->kernelItems.concat(unusedTail);
2473:
2474: // make sure the link restoration process works as expected
2475: xassertdb(scratchState->kernelItems.count() >= INIT_LIST_LEN);
2476:
2477: CHECK_MALLOC_STATS("end of item loop");
2478:
2479: } // for each item
2480: } // 0=kernel, 1=nonkernel
2481:
2482: CHECK_MALLOC_STATS("end of item set loop");
2483:
2484: // now that we're finished iterating over the items, I can do the
2485: // postponed closure
2486: if (mustCloseMyself) {
2487: itemSetClosure(*itemSet);
2488: UPDATE_MALLOC_STATS();
2489: }
2490:
2491: } // for each item set
2492:
2493: // we're done constructing item sets, so move all of them out
2494: // of the 'itemSetsDone' hash and into 'this->itemSets'
2495: try {
2496: for (OwnerKHashTableIter<ItemSet, ItemSet> iter(itemSetsDone);
2497: !iter.isDone(); iter.adv()) {
2498: itemSets.prepend(iter.data());
2499: }
2500: itemSetsDone.disownAndForgetAll();
2501: }
2502: catch (...) {
2503: breaker();
2504: itemSetsDone.disownAndForgetAll();
2505: throw;
2506: }
2507:
2508: // since we sometimes consider a state more than once, the
2509: // states end up out of order; put them back in order
2510: itemSets.mergeSort(ItemSet::diffById);
2511:
2512:
2513: traceProgress(1) << "done with LR sets: " << itemSets.count()
2514: << " states\n";
2515:
2516:
2517: // do the BFS now, since we want to print the sample inputs
2518: // in the loop that follows
2519: traceProgress(1) << "BFS tree on transition graph...\n";
2520: computeBFSTree();
2521:
2522: if (tracingSys("itemset-graph")) {
2523: // write this info to a graph applet file
2524: std::ofstream out("lrsets.g");
2525: if (!out) {
2526: xsyserror("std::ofstream open");
2527: }
2528: out << "# lr sets in graph form\n";
2529:
2530: FOREACH_OBJLIST(ItemSet, itemSets, itemSet) {
2531: itemSet.data()->writeGraph(out, *this);
2532: }
2533: }
2534: }
2535:
2536:
2537: // print each item set
2538: void GrammarAnalysis::printItemSets(std::ostream &os, bool nonkernel) const
2539: {
2540: FOREACH_OBJLIST(ItemSet, itemSets, itemSet) {
2541: os << "State " << itemSet.data()->id
2542: << ", sample input: " << sampleInput(itemSet.data()) << "\n"
2543: << " and left context: " << leftContextString(itemSet.data()) << "\n"
2544: ;
2545: itemSet.data()->print(os, *this, nonkernel);
2546: os << "\n\n";
2547: }
2548: }
2549:
2550:
2551: // --------------- END of construct LR item sets -------------------
2552:
2553:
2554: Symbol const *GrammarAnalysis::
2555: inverseTransitionC(ItemSet const *source, ItemSet const *target) const
2556: {
2557: // for each symbol..
2558: FOREACH_TERMINAL(terminals, t) {
2559: // see if it is the one
2560: if (source->transitionC(t.data()) == target) {
2561: return t.data();
2562: }
2563: }
2564:
2565: FOREACH_NONTERMINAL(nonterminals, nt) {
2566: if (source->transitionC(nt.data()) == target) {
2567: return nt.data();
2568: }
2569: }
2570:
2571: xfailure("GrammarAnalysis::inverseTransitionC: no transition from source to target");
2572: return NULL; // silence warning
2573: }
2574:
2575:
2576: void GrammarAnalysis::computeReachable()
2577: {
2578: // start by clearing the reachability flags
2579: MUTATE_EACH_NONTERMINAL(nonterminals, iter) {
2580: iter.data()->reachable = false;
2581: }
2582:
2583: // do a DFS on the grammar, marking things reachable as
2584: // they're encountered
2585: computeReachableDFS(startSymbol);
2586: }
2587:
2588:
2589: void GrammarAnalysis::computeReachableDFS(Nonterminal *nt)
2590: {
2591: if (nt->reachable) {
2592: // already looked at this nonterminal
2593: return;
2594: }
2595: nt->reachable = true;
2596:
2597: // iterate over this nonterminal's rules
2598: SFOREACH_PRODUCTION(productionsByLHS[nt->ntIndex], iter) {
2599: // iterate over symbols in the rule RHS
2600: FOREACH_OBJLIST(Production::RHSElt, iter.data()->right, jter) {
2601: Production::RHSElt const *elt = jter.data();
2602:
2603: if (elt->sym->isNonterminal()) {
2604: // recursively analyze nonterminal elements
2605: computeReachableDFS(elt->sym->ifNonterminal());
2606: }
2607: else {
2608: // just mark terminals
2609: elt->sym->reachable = true;
2610: }
2611: }
2612: }
2613: }
2614:
2615:
2616: // --------------- LR support -------------------
2617: // decide what to do, and record the result into the two
2618: // boolean reference parameters
2619: void GrammarAnalysis::handleShiftReduceConflict(
2620: bool &keepShift, bool &keepReduce, bool &dontWarn,
2621: ItemSet const *state, Production const *prod, Terminal const *sym)
2622: {
2623: // say that we're considering this conflict
2624: trace("prec")
2625: << "in state " << state->id << ", S/R conflict on token "
2626: << sym->name << " with production " << *prod << std::endl;
2627:
2628: // look at scannerless directives
2629: {
2630: // is this nonterm or any of its declared supersets maximal?
2631: Nonterminal const *super = prod->left;
2632: bool maximal = super->maximal;
2633: while (!maximal && super->superset) {
2634: super = super->superset;
2635: maximal = super->maximal;
2636: }
2637:
2638: if (maximal) {
2639: // see if this reduction can be removed due to a 'maximal' spec;
2640: // in particular, is the shift going to extend 'super'?
2641: if (state->hasExtendingShift(super, sym)) {
2642: trace("prec") << "resolved in favor of SHIFT due to maximal munch\n";
2643: keepReduce = false;
2644: return;
2645: }
2646: }
2647: }
2648:
2649: if (!( prod->precedence && sym->precedence )) {
2650: // one of the two doesn't have a precedence specification,
2651: // so we can do nothing
2652: trace("prec") << "will SPLIT because no disambiguation spec available" << std::endl;
2653: return;
2654: }
2655:
2656: if (prod->precedence > sym->precedence) {
2657: // production's precedence is higher, so we choose to reduce
2658: // instead of shift
2659: trace("prec") << "resolved in favor of REDUCE due to precedence\n";
2660: keepShift = false;
2661: return;
2662: }
2663:
2664: if (prod->precedence < sym->precedence) {
2665: // symbol's precedence is higher, so we shift
2666: trace("prec") << "resolved in favor of SHIFT due to precedence\n";
2667: keepReduce = false;
2668: return;
2669: }
2670:
2671: // precedences are equal, so we look at associativity (of token)
2672: switch (sym->associativity) {
2673: case AK_LEFT:
2674: trace("prec") << "resolved in favor of REDUCE due to associativity\n";
2675: keepShift = false;
2676: return;
2677:
2678: case AK_RIGHT:
2679: trace("prec") << "resolved in favor of SHIFT due to associativity\n";
2680: keepReduce = false;
2681: return;
2682:
2683: case AK_NONASSOC:
2684: trace("pred") << "removed BOTH alternatives due to nonassociativity\n";
2685: keepShift = false;
2686: keepReduce = false;
2687: return;
2688:
2689: case AK_NEVERASSOC:
2690: // the user claimed this token would never be involved in a conflict
2691: trace("pred") << "neverassoc specification ERROR\n";
2692: errors++;
2693: std::cout << "token " << sym->name << " was declared 'prec', "
2694: << "but it is involved in an associativity conflict with \""
2695: << *prod << "\" in state " << state->id << std::endl;
2696: return;
2697:
2698: case AK_SPLIT:
2699: // the user does not want disambiguation of this
2700: trace("pred") << "will SPLIT because user asked to\n";
2701: dontWarn = true;
2702: return;
2703:
2704: default:
2705: xfailure("bad assoc code");
2706: }
2707: }
2708:
2709:
2710: // given an LR transition graph, compute the BFS tree on top of it
2711: // and set the parent links to record the tree
2712: void GrammarAnalysis::computeBFSTree()
2713: {
2714: // for the BFS, we need a queue of states yet to be processed, and a
2715: // pile of 'done' states
2716: SObjList<ItemSet> queue;
2717: SObjList<ItemSet> done;
2718:
2719: // initial entry in queue is root of BFS tree
2720: queue.append(startState);
2721:
2722: // it will be convenient to have all the symbols in a single list
2723: // for iteration purposes
2724: SymbolList allSymbols; // (const list)
2725: {
2726: FOREACH_TERMINAL(terminals, t) {
2727: allSymbols.append(const_cast<Terminal*>(t.data()));
2728: }
2729: FOREACH_NONTERMINAL(nonterminals, nt) {
2730: allSymbols.append(const_cast<Nonterminal*>(nt.data()));
2731: }
2732: }
2733:
2734: // loop until the queue is exhausted
2735: while (queue.isNotEmpty()) {
2736: // dequeue first element
2737: ItemSet *source = queue.removeAt(0);
2738:
2739: // mark it as done so we won't consider any more transitions to it
2740: done.append(source);
2741:
2742: // for each symbol...
2743: SFOREACH_SYMBOL(allSymbols, sym) {
2744: // get the transition on this symbol
2745: ItemSet *target = source->transition(sym.data());
2746:
2747: // if the target is done or already enqueued, or there is no
2748: // transition on this symbol, we don't need to consider it
2749: // further
2750: if (target == NULL ||
2751: done.contains(target) ||
2752: queue.contains(target)) {
2753: continue;
2754: }
2755:
2756: // the source->target link just examined is the first time
2757: // we've encounted 'target', so that link becomes the BFS
2758: // parent link
2759: target->BFSparent = source;
2760:
2761: // finally, enqueue the target so we'll explore its targets too
2762: queue.append(target);
2763: }
2764: }
2765: }
2766:
2767:
2768: // --------------- parse table construction -------------------
2769: #if 0 // obsolete
2770: // compare two productions by precedence
2771: static int productionPrecCompare(Production const *p1, Production const *p2, void*)
2772: {
2773: if (p1->precedence && p2->precedence) {
2774: // I want the low precedence first
2775: return p1->precedence - p2->precedence;
2776: }
2777: else {
2778: // if one or the other doesn't have a precedence, then there's
2779: // no basis for distinction
2780: return 0;
2781: }
2782: }
2783: #endif
2784:
2785: // given some potential parse actions, apply available disambiguation
2786: // to remove some of them; print warnings about conflicts, in some
2787: // situations
2788: void GrammarAnalysis::resolveConflicts(
2789: ItemSet const *state, // parse state in which the actions are possible
2790: Terminal const *sym, // lookahead symbol for these actions
2791: ItemSet const *&shiftDest, // (inout) if non-NULL, the state to which we can shift
2792: ProductionList &reductions, // (inout) list of possible reductions
2793: bool allowAmbig, // if false, always return at most 1 action
2794: bool &printedConflictHeader, // (inout) true once we've printed the state header
2795: int &sr, int &rr) // (inout) counts of S/R and R/R conflicts, resp.
2796: {
2797: // how many actions are there?
2798: int actions = (shiftDest? 1 : 0) + reductions.count();
2799: if (actions <= 1) {
2800: return; // no conflict
2801: }
2802:
2803: // count how many warning suppressions we have
2804: int dontWarns = 0;
2805:
2806: // static disambiguation for S/R conflicts
2807: if (shiftDest) {
2808: // we have (at least) a shift/reduce conflict, which is the
2809: // situation in which prec/assoc specifications are used; consider
2810: // all the possible reductions, so we can resolve S/R conflicts
2811: // even when there are R/R conflicts present too
2812: SObjListMutator<Production> mut(reductions);
2813: while (!mut.isDone() && shiftDest != NULL) {
2814: Production const *prod = mut.data();
2815:
2816: bool keepShift=true, keepReduce=true, dontWarn=false;
2817: handleShiftReduceConflict(keepShift, keepReduce, dontWarn, state, prod, sym);
2818:
2819: if (!keepShift) {
2820: actions--;
2821: shiftDest = NULL; // remove the shift
2822: }
2823:
2824: if (!keepReduce) {
2825: actions--;
2826: mut.remove(); // remove the reduction
2827: }
2828: else {
2829: mut.adv();
2830: }
2831:
2832: if (dontWarn) {
2833: dontWarns++;
2834: }
2835: }
2836:
2837: // there is still a potential for misbehavior.. e.g., if there are two
2838: // possible reductions (R1 and R2), and one shift (S), then the user
2839: // could have specified prec/assoc to disambiguate, e.g.
2840: // R1 < S
2841: // S < R2
2842: // so that R2 is the right choice; but if I consider (S,R2) first,
2843: // I'll simply drop S, leaving no way to disambiguate R1 and R2 ..
2844: // for now I'll just note the possibility...
2845: }
2846:
2847: // static disambiguation for R/R conflicts
2848: if (reductions.count() > 1) {
2849:
2850: // NEW CODE FROM ELKHOUND version 1.156, 2005/02/25 20:10:47
2851: // find the highest precedence
2852: int highestPrec = 0;
2853: SFOREACH_PRODUCTION(reductions, iter) {
2854: int p = iter.data()->precedence;
2855:
2856: if (p && p>highestPrec) {
2857: highestPrec = p;
2858: }
2859: }
2860:
2861: // remove any productions that are lower than 'highestPrec'
2862: SObjListMutator<Production> mut(reductions);
2863: while (!mut.isDone()) {
2864: int p = mut.data()->precedence;
2865:
2866: if (p && p<highestPrec) {
2867: trace("prec")
2868: << "in state " << state->id << ", R/R conflict on token "
2869: << sym->name << ", removed production " << *(mut.data())
2870: << " because " << p << "<" << highestPrec << std::endl;
2871: mut.remove();
2872: }
2873: else {
2874: mut.adv();
2875: }
2876: }
2877:
2878: #if 0 // totally wrong
2879: // sort the reductions so the lowest precedence reductions are
2880: // first, then higher precedences, and finally reductions that
2881: // lack any precedence (use insertion sort since I expect that
2882: // most of the time the list won't require any changes)
2883: reductions.insertionSort(productionPrecCompare);
2884:
2885: // work through the head of the list, discarding productions
2886: // that have higher-precedence productions beneath them
2887: int ct = reductions.count();
2888: while (ct >= 2) {
2889: Production *p1 = reductions.nth(0);
2890: Production *p2 = reductions.nth(1);
2891: if (!(p1->precedence && p2->precedence)) break;
2892:
2893: // remove first one
2894: reductions.removeFirst();
2895: ct--;
2896: actions--;
2897:
2898: // report
2899: trace("prec")
2900: << "in state " << state->id << ", R/R conflict on token "
2901: << sym->name << ", removed production " << *p1 << std::endl;
2902: }
2903: #endif
2904: }
2905:
2906: // additional R/R resolution using subset directives
2907: if (reductions.count() > 1) {
2908: actions -= subsetDirectiveResolution(state, sym, reductions);
2909: }
2910:
2911: // after the disambiguation, maybe now there's no conflicts?
2912: // or, if conflicts remain, did we get at least that many warning
2913: // suppressions?
2914: if ((actions-dontWarns) <= 1) {
2915: // don't print information about conflicts
2916: }
2917: else {
2918: // print conflict info
2919: if (!printedConflictHeader) {
2920: trace("conflict")
2921: << "--------- state " << state->id << " ----------\n"
2922: << "left context: " << leftContextString(state)
2923: << std::endl
2924: << "sample input: " << sampleInput(state)
2925: << std::endl
2926: ;
2927: printedConflictHeader = true;
2928: }
2929:
2930: trace("conflict")
2931: << "conflict for symbol " << sym->name
2932: << std::endl;
2933:
2934: if (shiftDest) {
2935: trace("conflict") << " shift, and move to state " << shiftDest->id << std::endl;
2936: sr++; // shift/reduce conflict
2937: rr += actions - 2; // any reduces beyond first are r/r errors
2938: }
2939: else {
2940: rr += actions - 1; // all reduces beyond first are r/r errors
2941: }
2942:
2943: SFOREACH_PRODUCTION(reductions, prod) {
2944: trace("conflict") << " reduce by rule " << *(prod.data()) << std::endl;
2945: }
2946: }
2947:
2948: if (!allowAmbig && actions > 1) {
2949: // force only one action, using Bison's disambiguation:
2950: // - prefer shift to reduce
2951: // - prefer the reduction which occurs first in the grammar file
2952: if (shiftDest) {
2953: reductions.removeAll();
2954: }
2955: else {
2956: while (reductions.count() >= 2) {
2957: // compare first and second
2958: Production const *first = reductions.nth(0);
2959: Production const *second = reductions.nth(1);
2960:
2961: // production indices happen to be assigned in file order
2962: if (first->prodIndex < second->prodIndex) {
2963: reductions.removeItem(second);
2964: }
2965: else {
2966: reductions.removeItem(first);
2967: }
2968: }
2969: }
2970: }
2971: }
2972:
2973:
2974: void reportUnexpected(int value, int expectedValue, char const *desc)
2975: {
2976: if ((expectedValue == -1 && value>0) ||
2977: (expectedValue != -1 && expectedValue != value)) {
2978: std::cout << value << " " << desc;
2979: if (expectedValue != -1) {
2980: std::cout << " (expected " << expectedValue << ")";
2981: }
2982: std::cout << std::endl;
2983: }
2984: }
2985:
2986:
2987: // the idea is we might be trying to do scannerless parsing, and
2988: // someone might say that Identifier has as subsets all the keywords,
2989: // so competing reductions should favor the subsets (the keywords)
2990: int GrammarAnalysis::subsetDirectiveResolution(
2991: ItemSet const *state, // parse state in which the actions are possible
2992: Terminal const *sym, // lookahead symbol for these actions
2993: ProductionList &reductions) // list to try to cut down
2994: {
2995: int removed = 0;
2996:
2997: // make a map of which nonterminals appear on the LHS of one
2998: // of the reductions, and has a superset
2999: BitArray map(numNonterms);
3000: bool anyWithSuper = false;
3001: {
3002: SFOREACH_PRODUCTION(reductions, iter) {
3003: Production const *p = iter.data();
3004: if (p->left->superset) {
3005: map.set(p->left->ntIndex);
3006: anyWithSuper = true;
3007: }
3008: }
3009: }
3010:
3011: if (!anyWithSuper) {
3012: return removed; // nothing we can do
3013: }
3014:
3015: // walk over the reductions, removing those that have reductions
3016: // to subsets also in the list
3017: SObjListMutator<Production> mut(reductions);
3018: while (!mut.isDone()) {
3019: Production const *prod = mut.data();
3020:
3021: SFOREACH_OBJLIST(Nonterminal, prod->left->subsets, iter) {
3022: Nonterminal const *sub = iter.data();
3023: if (map.test(sub->ntIndex)) {
3024: trace("prec")
3025: << "in state " << state->id
3026: << ", R/R conflict on token " << sym->name
3027: << ", removed production yielding " << prod->left->name
3028: << " b/c another yields subset " << sub->name
3029: << std::endl;
3030: mut.remove();
3031: removed++;
3032: goto continue_outer_loop;
3033: }
3034: }
3035:
3036: // didn't remove, must manually advance
3037: mut.adv();
3038:
3039: continue_outer_loop:;
3040: }
3041:
3042: return removed;
3043: }
3044:
3045:
3046: bool isAmbiguousNonterminal(Symbol const *sym)
3047: {
3048: if (sym->isNonterminal()) {
3049: Nonterminal const &nt = sym->asNonterminalC();
3050: if (nt.mergeCode) {
3051: return true; // presence of merge() signals potential ambiguity
3052: }
3053: }
3054: return false;
3055: }
3056:
3057:
3058: // The purpose of this function is to number the states (which have up
3059: // to this point been numbered arbitrarily) in such a way that all
3060: // states that have a given symbol on incoming arcs will be numbered
3061: // consecutively. This is part of the table compression schemes
3062: // described in the Dencker et. al. paper (see parsetables.h).
3063: void GrammarAnalysis::renumberStates()
3064: {
3065: // sort them into the right order
3066: itemSets.mergeSort(&GrammarAnalysis::renumberStatesDiff, this);
3067:
3068: // number them in that order
3069: int n = 0;
3070: FOREACH_OBJLIST_NC(ItemSet, itemSets, iter) {
3071: ItemSet *s = iter.data();
3072: if (n == 0) {
3073: // the first element should always be the start state
3074: xassert(s->id == 0);
3075: }
3076: else {
3077: s->id = (StateId)n;
3078: }
3079:
3080: n++;
3081: }
3082: }
3083:
3084: STATICDEF int GrammarAnalysis::renumberStatesDiff
3085: (ItemSet const *left, ItemSet const *right, void *vgramanl)
3086: {
3087: GrammarAnalysis *gramanl = (GrammarAnalysis*)vgramanl;
3088:
3089: int ret;
3090:
3091: // if for some reason I'm ever asked to compare a state to
3092: // itself..
3093: if (left == right) {
3094: return 0;
3095: }
3096:
3097: // order them first by their incoming arc symbol; this effects
3098: // the renumbering that the Code Reduction Scheme demands
3099: {
3100: Symbol const *ls = left->getStateSymbolC();
3101: Symbol const *rs = right->getStateSymbolC();
3102:
3103: // any state with no incoming arcs (start state) is first
3104: ret = (int)(bool)ls - (int)(bool)rs;
3105: if (ret) return ret;
3106:
3107: // terminals come before nonterminals
3108: ret = (int)(ls->isNonterminal()) - (int)(rs->isNonterminal());
3109: if (ret) return ret;
3110:
3111: // order by id within terms/nonterms
3112: ret = ls->getTermOrNontermIndex() - rs->getTermOrNontermIndex();
3113: if (ret) return ret;
3114: }
3115:
3116: // from this point on, the CRS would be happy with an arbitrary
3117: // order, but I want the state numbering to be canonical so that
3118: // I have an easier time debugging and comparing parse traces
3119:
3120: // they have the same incoming arc symbol; now, sort by outgoing
3121: // arc symbols
3122:
3123: // first up: terminals
3124: {
3125: for (int t=0; t < gramanl->numTerminals(); t++) {
3126: ItemSet const *ldest = left->getTermTransition(t);
3127: ItemSet const *rdest = right->getTermTransition(t);
3128:
3129: ret = (int)!ldest - (int)!rdest;
3130: if (ret) return ret;
3131:
3132: if (ldest && rdest) {
3133: ret = ldest->id - rdest->id;
3134: if (ret) return ret;
3135: }
3136: }
3137: }
3138:
3139: // next: nonterminals
3140: {
3141: for (int nt=0; nt < gramanl->numNonterminals(); nt++) {
3142: ItemSet const *ldest = left->getNontermTransition(nt);
3143: ItemSet const *rdest = right->getNontermTransition(nt);
3144:
3145: ret = (int)!ldest - (int)!rdest;
3146: if (ret) return ret;
3147:
3148: if (ldest && rdest) {
3149: ret = ldest->id - rdest->id;
3150: if (ret) return ret;
3151: }
3152: }
3153: }
3154:
3155: // I suspect this will never be reached, since usually the
3156: // transition function will be sufficient
3157: // update: it happens often enough.. even in the arith grammar
3158: //std::cout << "using reductions to distinguish states\n";
3159:
3160: // finally, order by possible reductions
3161: FOREACH_OBJLIST(Terminal, gramanl->terminals, termIter) {
3162: ProductionList lpl, rpl;
3163: left->getPossibleReductions(lpl, termIter.data(), false /*parsing*/);
3164: right->getPossibleReductions(rpl, termIter.data(), false /*parsing*/);
3165:
3166: // sort the productions before we can compare them...
3167: lpl.insertionSort(&GrammarAnalysis::arbitraryProductionOrder);
3168: rpl.insertionSort(&GrammarAnalysis::arbitraryProductionOrder);
3169:
3170: ret = lpl.compareAsLists(rpl, &GrammarAnalysis::arbitraryProductionOrder);
3171: if (ret) return ret;
3172: }
3173:
3174: // I used to throw an xfailure here, but that causes a problem
3175: // because the 'itemSets' list is not well-formed, because we
3176: // are in the middle of sorting it
3177: std::cout << "two different states have identical transitions and "
3178: "identical reductions!\n";
3179: std::cout << "left=" << left->id
3180: << ", sym is " << left->getStateSymbolC()->toString() << "\n";
3181: left->print(std::cout, *gramanl);
3182: std::cout << "right=" << right->id
3183: << ", sym is " << right->getStateSymbolC()->toString() << "\n";
3184: right->print(std::cout, *gramanl);
3185:
3186: return 0;
3187: }
3188:
3189: STATICDEF int GrammarAnalysis::arbitraryProductionOrder
3190: (Production const *left, Production const *right, void*)
3191: {
3192: // compare LHS
3193: int ret = left->left->ntIndex - right->left->ntIndex;
3194: if (ret) return ret;
3195:
3196: // RHS elts one at a time
3197: return left->right.compareAsLists(right->right,
3198: &GrammarAnalysis::arbitraryRHSEltOrder);
3199: }
3200:
3201: STATICDEF int GrammarAnalysis::arbitraryRHSEltOrder
3202: (Production::RHSElt const *left, Production::RHSElt const *right, void*)
3203: {
3204: int ret = (int)left->sym->isTerminal() - (int)right->sym->isTerminal();
3205: if (ret) return ret;
3206:
3207: return left->sym->getTermOrNontermIndex() - right->sym->getTermOrNontermIndex();
3208: }
3209:
3210:
3211: void GrammarAnalysis::computeParseTables(bool allowAmbig)
3212: {
3213: tables = new ParseTables(numTerms, numNonterms, itemSets.count(), numProds,
3214: startState->id,
3215: 0 /* slight hack: assume it's the first production */);
3216:
3217: if (ENABLE_CRS_COMPRESSION) {
3218: // first-state info
3219: bool doingTerms = true;
3220: int prevSymCode = -1;
3221: FOREACH_OBJLIST(ItemSet, itemSets, iter) {
3222: ItemSet const *state = iter.data();
3223: Symbol const *sym = state->getStateSymbolC();
3224: if (!sym) continue; // skip start state
3225: int symCode = sym->getTermOrNontermIndex();
3226:
3227: if (sym->isTerminal() == doingTerms &&
3228: symCode == prevSymCode) {
3229: // continuing the current run, do nothing
3230: continue;
3231: }
3232:
3233: if (sym->isNonterminal() && doingTerms) {
3234: // transition from terminals to nonterminals
3235: doingTerms = false;
3236: }
3237: else {
3238: // continue current phase, with new code; states must
3239: // already have been sorted into increasing order
3240: xassert(sym->isTerminal() == doingTerms);
3241: xassert(prevSymCode < symCode);
3242: }
3243:
3244: if (doingTerms) {
3245: tables->setFirstWithTerminal(symCode, state->id);
3246: }
3247: else {
3248: tables->setFirstWithNonterminal(symCode, state->id);
3249: }
3250:
3251: prevSymCode = symCode;
3252: }
3253: }
3254:
3255: // count total number of conflicts of each kind
3256: int sr=0, rr=0;
3257:
3258: // for each state...
3259: FOREACH_OBJLIST(ItemSet, itemSets, stateIter) {
3260: ItemSet const *state = stateIter.data();
3261: bool printedConflictHeader = false;
3262:
3263: // ---- fill in this row in the action table ----
3264: // for each possible lookahead...
3265: for (int termId=0; termId < numTerms; termId++) {
3266: Terminal const *terminal = getTerminal(termId);
3267:
3268: // can shift?
3269: ItemSet const *shiftDest = state->transitionC(terminal);
3270:
3271: // can reduce?
3272: ProductionList reductions;
3273: state->getPossibleReductions(reductions, terminal,
3274: false /*parsing*/);
3275:
3276: // try to resolve conflicts; this may print warnings about
3277: // the conflicts, depending on various factors; if 'allowAmbig'
3278: // is false, this will remove all but one action
3279: resolveConflicts(state, terminal, shiftDest, reductions,
3280: allowAmbig, printedConflictHeader, sr, rr);
3281:
3282: // what to do in this cell
3283: ActionEntry cellAction;
3284:
3285: // still conflicts?
3286: int actions = (shiftDest? 1 : 0) + reductions.count();
3287: if (actions >= 2) {
3288: // make a new ambiguous-action entry-set
3289: ArrayStack<ActionEntry> set;
3290:
3291: // fill in the actions
3292: if (shiftDest) {
3293: set.push(tables->encodeShift(shiftDest->id, termId));
3294: }
3295: SFOREACH_PRODUCTION(reductions, prodIter) {
3296: set.push(tables->encodeReduce(prodIter.data()->prodIndex, state->id));
3297: }
3298: xassert(set.length() == actions);
3299:
3300: cellAction = tables->encodeAmbig(set, state->id);
3301: }
3302:
3303: else {
3304: // single action
3305: if (shiftDest) {
3306: xassert(reductions.count() == 0);
3307: cellAction = tables->encodeShift(shiftDest->id, termId);
3308: }
3309: else if (reductions.isNotEmpty()) {
3310: xassert(reductions.count() == 1);
3311: cellAction = tables->encodeReduce(reductions.first()->prodIndex, state->id);
3312: }
3313: else {
3314: cellAction = tables->encodeError();
3315: }
3316: }
3317:
3318: // add this entry to the table
3319: tables->setActionEntry(state->id, termId, cellAction);
3320:
3321: // based on the contents of 'reductions', decide whether this
3322: // state is delayed or not; to be delayed, the state must be
3323: // able to reduce by a production which:
3324: // - has an ambiguous nonterminal as the last symbol on its RHS
3325: // - is not reducing to the *same* nonterminal as the last symbol
3326: // (rationale: eagerly reduce "E -> E + E")
3327: // UPDATE: removed last condition because it actually makes things
3328: // worse..
3329: bool delayed = false;
3330: if (reductions.isNotEmpty()) { // no reductions: eager (irrelevant, actually)
3331: SFOREACH_PRODUCTION(reductions, prodIter) {
3332: Production const &prod = *prodIter.data();
3333: if (prod.rhsLength() >= 1) { // nonempty RHS?
3334: Symbol const *lastSym = prod.right.lastC()->sym;
3335: if (isAmbiguousNonterminal(lastSym) // last RHS ambig?
3336: /*&& lastSym != prod.left*/) { // not same as LHS?
3337: delayed = true;
3338: }
3339: }
3340: }
3341: }
3342: }
3343:
3344: // ---- fill in this row in the goto table ----
3345: // for each nonterminal...
3346: for (int nontermId=0; nontermId<numNonterms; nontermId++) {
3347: Nonterminal const *nonterminal = getNonterminal(nontermId);
3348:
3349: // where do we go when we reduce to this nonterminal?
3350: ItemSet const *gotoDest = state->transitionC(nonterminal);
3351:
3352: GotoEntry cellGoto;
3353: if (gotoDest) {
3354: cellGoto = tables->encodeGoto(gotoDest->id, nonterminal->ntIndex);
3355: }
3356: else {
3357: // this should never be accessed at parse time..
3358: cellGoto = tables->encodeGotoError();
3359: }
3360:
3361: // fill in entry
3362: tables->setGotoEntry(state->id, nontermId, cellGoto);
3363: }
3364:
3365: // get the state symbol
3366: xassert((unsigned)(state->id) < (unsigned)(tables->getNumStates()));
3367: tables->setStateSymbol(state->id,
3368: encodeSymbolId(state->getStateSymbolC()));
3369: }
3370:
3371: // report on conflict counts
3372: reportUnexpected(sr, expectedSR, "shift/reduce conflicts");
3373: reportUnexpected(rr, expectedRR, "reduce/reduce conflicts");
3374:
3375: // report on cyclicity
3376: for (int nontermId=0; nontermId<numNonterms; nontermId++) {
3377: Nonterminal const *nonterminal = getNonterminal(nontermId);
3378: if (nonterminal->cyclic) {
3379: std::cout << "grammar symbol " << nonterminal->name << " is cyclic\n";
3380: }
3381: }
3382:
3383: // fill in 'prodInfo'
3384: for (int p=0; p<numProds; p++) {
3385: Production const *prod = getProduction(p);
3386: tables->setProdInfo(p, prod->rhsLength(), prod->left->ntIndex);
3387: }
3388:
3389: // use the derivability relation to compute a total order
3390: // on nonterminals
3391: BitArray seen(numNonterms);
3392: int nextOrdinal = numNonterms-1;
3393: for (int nt=0; nt < numNonterms; nt++) {
3394: // expand from 'nt' in case it's disconnected; this will be
3395: // a no-op if we've already 'seen' it
3396: topologicalSort(tables->getWritableNontermOrder(), nextOrdinal, nt, seen);
3397: }
3398: xassert(nextOrdinal == -1); // should have used them all
3399:
3400: if (ENABLE_EEF_COMPRESSION) {
3401: tables->computeErrorBits();
3402: }
3403:
3404: if (ENABLE_GCS_COMPRESSION) {
3405: if (ENABLE_GCS_COLUMN_COMPRESSION) {
3406: tables->mergeActionColumns();
3407: }
3408: tables->mergeActionRows();
3409:
3410: if (ENABLE_GCS_COLUMN_COMPRESSION) {
3411: tables->mergeGotoColumns();
3412: }
3413: tables->mergeGotoRows();
3414: }
3415: }
3416:
3417:
3418: // this is a depth-first traversal of the 'derivable' relation;
3419: // when we reach a nonterminal that can't derive any others not
3420: // already in the order, we give its entry the latest ordinal
3421: // that isn't already taken ('nextOrdinal')
3422: void GrammarAnalysis::topologicalSort(
3423: NtIndex *order, // table we're filling with ordinals
3424: int &nextOrdinal, // latest ordinal not yet used
3425: NtIndex current, // current nonterminal to expand
3426: BitArray &seen) // set of nonterminals we've already seen
3427: {
3428: if (seen.test(current)) {
3429: // already expanded this one
3430: return;
3431: }
3432:
3433: // don't expand this one again
3434: seen.set(current);
3435:
3436: // look at all nonterminals this one can derive
3437: for (int nt=0; nt < numNonterms; nt++) {
3438: if (derivable->get(point(nt, current))) {
3439: // 'nt' can derive 'current'; expand 'nt' first, thus making
3440: // it later in the order, so we'll reduce to 'current' before
3441: // reducing to 'nt' (when token spans are equal)
3442: xassert((NtIndex)nt == nt);
3443: topologicalSort(order, nextOrdinal, (NtIndex)nt, seen);
3444: }
3445: }
3446:
3447: // finally, put 'current' into the order
3448: order[current] = nextOrdinal;
3449: nextOrdinal--;
3450: }
3451:
3452:
3453: SymbolId encodeSymbolId(Symbol const *sym)
3454: {
3455: int ret;
3456: if (!sym) {
3457: ret = 0;
3458: }
3459: else if (sym->isTerminal()) {
3460: ret = sym->asTerminalC().termIndex + 1;
3461: }
3462: else /*nonterminal*/ {
3463: ret = - sym->asNonterminalC().ntIndex - 1;
3464:
3465: // verify encoding of nonterminals is sufficiently wide
3466: int idx = sym->asNonterminalC().ntIndex;
3467: xassert((NtIndex)idx == idx);
3468: }
3469:
3470: // verify encoding is lossless
3471: SymbolId ret2 = (SymbolId)ret;
3472: xassert((int)ret2 == ret);
3473: return ret2;
3474: }
3475:
3476:
3477: // --------------- sample inputs -------------------
3478: // yield a sequence of names of symbols (terminals and nonterminals) that
3479: // will lead to the given state, from the start state
3480: sm_string GrammarAnalysis::leftContextString(ItemSet const *state) const
3481: {
3482: SymbolList ctx;
3483: leftContext(ctx, state); // get as list
3484: return symbolSequenceToString(ctx); // convert to sm_string
3485: }
3486:
3487:
3488: // yield the left-context as a sequence of symbols
3489: // CONSTNESS: want output as list of const pointers
3490: void GrammarAnalysis::leftContext(SymbolList &output,
3491: ItemSet const *state) const
3492: {
3493: // since we have the BFS tree, generating sample input (at least, if
3494: // it's allowed to contain nonterminals) is a simple matter of walking
3495: // the tree towards the root
3496:
3497: // for each parent..
3498: while (state->BFSparent) {
3499: // get that parent
3500: ItemSet *parent = state->BFSparent;
3501:
3502: // find a symbol on which we would transition from the parent
3503: // to the current state
3504: Symbol const *sym = inverseTransitionC(parent, state);
3505:
3506: // prepend that symbol's name to our current context
3507: output.prepend(const_cast<Symbol*>(sym));
3508:
3509: // move to our parent and repeat
3510: state = parent;
3511: }
3512: }
3513:
3514:
3515: // compare two-element quantities where one dominates and the other is
3516: // only for tie-breaking; return <0/=0/>0 if a's quantities are
3517: // fewer/equal/grearter (this fn is a candidate for adding to a
3518: // library somewhere)
3519: int priorityCompare(int a_dominant, int b_dominant,
3520: int a_recessive, int b_recessive)
3521: {
3522: if (a_dominant < b_dominant) return -1;
3523: if (a_dominant > b_dominant) return +1;
3524: return a_recessive - b_recessive;
3525: }
3526:
3527: int priorityFewer(int a_dominant, int b_dominant,
3528: int a_recessive, int b_recessive)
3529: {
3530: return priorityCompare(a_dominant, b_dominant,
3531: a_recessive, b_recessive) < 1;
3532: }
3533:
3534:
3535: // sample input (terminals only) that can lead to a state
3536: sm_string GrammarAnalysis::sampleInput(ItemSet const *state) const
3537: {
3538: // get left-context as terminals and nonterminals
3539: SymbolList symbols;
3540: leftContext(symbols, state);
3541:
3542: // reduce the nonterminals to terminals
3543: TerminalList terminals;
3544: if (!rewriteAsTerminals(terminals, symbols)) {
3545: return sm_string("(failed to reduce!!)");
3546: }
3547:
3548: // convert to a sm_string
3549: return terminalSequenceToString(terminals);
3550: }
3551:
3552:
3553: // given a sequence of symbols (terminals and nonterminals), use the
3554: // productions to rewrite it as a (hopefully minimal) sequence of
3555: // terminals only; return true if it works, false if we get stuck
3556: // in an infinite loop
3557: // CONSTNESS: ideally, 'output' would contain const ptrs to terminals
3558: bool GrammarAnalysis::rewriteAsTerminals(TerminalList &output, SymbolList const &input) const
3559: {
3560: // we detect looping by noticing if we ever reduce via the same
3561: // production more than once in a single vertical recursive slice
3562: ProductionList reductionStack; // starts empty
3563:
3564: // start the recursive version
3565: return rewriteAsTerminalsHelper(output, input, reductionStack);
3566: }
3567:
3568:
3569: // (nonterminals and terminals) -> terminals;
3570: // if this returns false, it's guaranteed to return with 'output'
3571: // unchanged from when the function was invoked
3572: bool GrammarAnalysis::
3573: rewriteAsTerminalsHelper(TerminalList &output, SymbolList const &input,
3574: ProductionList &reductionStack) const
3575: {
3576: // remember the initial 'output' length so we can restore
3577: int origLength = output.count();
3578:
3579: // walk down the input list, creating the output list by copying
3580: // terminals and reducing nonterminals
3581: SFOREACH_SYMBOL(input, symIter) {
3582: Symbol const *sym = symIter.data();
3583:
3584: if (sym->isEmptyString) {
3585: // easy; no-op
3586: }
3587:
3588: else if (sym->isTerminal()) {
3589: // no sweat, just copy it (er, copy the pointer)
3590: output.append(const_cast<Terminal*>(&sym->asTerminalC()));
3591: }
3592:
3593: else {
3594: // not too bad either, just reduce it, sticking the result
3595: // directly into our output list
3596: if (!rewriteSingleNTAsTerminals(output, &sym->asNonterminalC(),
3597: reductionStack)) {
3598: // oops.. restore 'output'
3599: while (output.count() > origLength) {
3600: output.removeAt(origLength);
3601: }
3602: return false;
3603: }
3604: }
3605: }
3606:
3607: // ok!
3608: return true;
3609: }
3610:
3611:
3612: // for rewriting into sequences of terminals, we prefer rules with
3613: // fewer nonterminals on the RHS, and then (to break ties) rules with
3614: // fewer RHS symbols altogether; overriding all of this, if one
3615: // production's RHS contains a symbol already expanded, and the other
3616: // does not, then prefer the RHS which hasn't already been expanded
3617: int compareProductionsForRewriting(Production const *p1, Production const *p2,
3618: void *extra)
3619: {
3620: ProductionList *reductionStack = (ProductionList*)extra;
3621:
3622: bool p1RHSSeen=false, p2RHSSeen=false;
3623: SFOREACH_PRODUCTION(*reductionStack, iter) {
3624: if (p1->rhsHasSymbol( iter.data()->left )) {
3625: p1RHSSeen = true;
3626: }
3627: if (p2->rhsHasSymbol( iter.data()->left )) {
3628: p2RHSSeen = true;
3629: }
3630: }
3631:
3632: if (p1RHSSeen != p2RHSSeen) {
3633: // e.g.: p1RHSSeen=true, so p2 is preferred; this will yield +1,
3634: // meaning p1>p2, so p2 comes first in an increasing order sort
3635: return (int)p1RHSSeen - (int)p2RHSSeen;
3636: }
3637:
3638: return priorityCompare(p1->numRHSNonterminals(), p2->numRHSNonterminals(),
3639: p1->rhsLength(), p2->rhsLength());
3640: }
3641:
3642: // nonterminal -> terminals
3643: // CONSTNESS: want 'reductionStack' to be list of const ptrs
3644: bool GrammarAnalysis::
3645: rewriteSingleNTAsTerminals(TerminalList &output, Nonterminal const *nonterminal,
3646: ProductionList &reductionStack) const
3647: {
3648: // get all of 'nonterminal's productions that are not recursive
3649: ProductionList candidates;
3650: FOREACH_PRODUCTION(productions, prodIter) {
3651: Production const *prod = prodIter.data();
3652: if (prod->left != nonterminal) continue;
3653:
3654: // if 'prod' has 'nonterminal' on RHS, that would certainly
3655: // lead to looping (though it's not the only way -- consider
3656: // mutual recursion), so don't even consider it
3657: if (prod->rhsHasSymbol(nonterminal)) {
3658: continue;
3659: }
3660:
3661: // if this production has already been used, don't use it again
3662: if (reductionStack.contains(prod)) {
3663: continue;
3664: }
3665:
3666: // it's a candidate
3667: candidates.prepend(const_cast<Production*>(prod)); // constness
3668: }
3669:
3670: if (candidates.isEmpty()) {
3671: // I don't expect this... either the NT doesn't have any rules,
3672: // or all of them are recursive (which means the language doesn't
3673: // have any finite sentences)
3674: trace("rewrite") << "couldn't find any unused, non-recursive rules for "
3675: << nonterminal->name << std::endl;
3676: return false;
3677: }
3678:
3679: // sort them into order of preference
3680: candidates.mergeSort(compareProductionsForRewriting, &reductionStack);
3681:
3682: // try each in turn until one succeeds; this effectively uses
3683: // backtracking when one fails
3684: bool retval = false;
3685: SFOREACH_PRODUCTION(candidates, candIter) {
3686: Production const *prod = candIter.data();
3687:
3688: // add chosen production to the stack
3689: reductionStack.prepend(const_cast<Production*>(prod));
3690:
3691: // now, the chosen rule provides a RHS, which is a sequence of
3692: // terminals and nonterminals; recursively reduce that sequence
3693: SymbolList rhsSymbols;
3694: prod->getRHSSymbols(rhsSymbols);
3695: retval = rewriteAsTerminalsHelper(output, rhsSymbols, reductionStack);
3696:
3697: // remove chosen production from stack
3698: Production *temp = reductionStack.removeFirst();
3699: xassert(temp == prod);
3700:
3701: if (retval) {
3702: // success!
3703: break;
3704: }
3705: else {
3706: // failed; try the next production
3707: }
3708: }
3709:
3710: // and we succeed only if we found a valid rewriting
3711: return retval;
3712: }
3713:
3714: // --------------- END of sample inputs -------------------
3715:
3716:
3717: // this is mostly [ASU] algorithm 4.7, p.218-219: an SLR(1) parser
3718: void GrammarAnalysis::lrParse(char const *input)
3719: {
3720: // tokenize the input
3721: StrtokParse tok(input, " \t");
3722:
3723: // parser state
3724: int currentToken = 0; // index of current token
3725: StateId state = startState->id; // current parser state
3726: ArrayStack<StateId> stateStack; // stack of parser states; top==state
3727: stateStack.push(state);
3728: ArrayStack<Symbol const*> symbolStack; // stack of shifted symbols
3729:
3730: // for each token of input
3731: while (currentToken < tok) {
3732: // map the token text to a symbol
3733: Terminal *symbol = findTerminal(tok[currentToken]); // (constness)
3734:
3735: // consult action table
3736: ActionEntry action = tables->getActionEntry(state, symbol->termIndex);
3737:
3738: // see what kind of action it is
3739: if (tables->isShiftAction(action)) {
3740: // shift
3741: StateId destState = tables->decodeShift(action, symbol->termIndex);
3742:
3743: // push current state and symbol
3744: state = destState;
3745: stateStack.push(state);
3746: symbolStack.push(symbol);
3747:
3748: // next input symbol
3749: currentToken++;
3750:
3751: // debugging
3752: trace("parse")
3753: << "moving to state " << state
3754: << " after shifting symbol " << symbol->name << std::endl;
3755: }
3756:
3757: else if (tables->isReduceAction(action)) {
3758: // reduce
3759: int prodIndex = tables->decodeReduce(action, state);
3760: ParseTables::ProdInfo const &info = tables->getProdInfo(prodIndex);
3761:
3762: // it is here that an action or tree-building step would
3763: // take place
3764:
3765: // pop as many symbols off stacks as there are symbols on
3766: // the right-hand side of 'prod'
3767: stateStack.popMany(info.rhsLen);
3768: state = stateStack.top();
3769: symbolStack.popMany(info.rhsLen);
3770:
3771: // find out where to go
3772: StateId destState = tables->decodeGoto(
3773: tables->getGotoEntry(state, info.lhsIndex), info.lhsIndex);
3774:
3775: // go there
3776: state = destState;
3777: stateStack.push(state);
3778:
3779: // and push the reduced nonterminal
3780: symbolStack.push(getNonterminal(info.lhsIndex));
3781:
3782: // debugging
3783: trace("parse")
3784: << "moving to state " << state
3785: << " after reducing by rule id " << prodIndex << std::endl;
3786: }
3787:
3788: else if (tables->isErrorAction(action)) {
3789: // error
3790: trace("parse")
3791: << "no actions defined for symbol " << symbol->name
3792: << " in state " << state << std::endl;
3793: break; // stop parsing
3794: }
3795:
3796: else {
3797: // conflict
3798: trace("parse")
3799: << "conflict for symbol " << symbol->name
3800: << " in state " << state
3801: << "; possible actions:\n";
3802:
3803: // get actions
3804: ActionEntry *entry = tables->decodeAmbigAction(action, state);
3805:
3806: // explain each one
3807: for (int i=0; i<entry[0]; i++) {
3808: action = entry[i+1];
3809: if (tables->isShiftAction(action)) {
3810: trace("parse") << " shift, and move to state "
3811: << tables->decodeShift(action, symbol->termIndex) << std::endl;
3812: }
3813: else if (tables->isReduceAction(action)) {
3814: trace("parse") << " reduce by rule id "
3815: << tables->decodeReduce(action, state) << std::endl;
3816: }
3817: else {
3818: // no other alternative makes sense
3819: xfailure("bad code in ambiguous action table");
3820: }
3821: }
3822:
3823: break; // stop parsing
3824: }
3825: }
3826:
3827: // print final contents of stack; if the parse was successful,
3828: // I want to see what remains; if not, it's interesting anyway
3829: trace("parse") << "final contents of stacks (right is top):\n";
3830:
3831: std::ostream &os = trace("parse") << " state stack:";
3832: int i;
3833: for (i=0; i < stateStack.length(); i++) {
3834: os << " " << stateStack[i];
3835: }
3836: os << " <-- current" << std::endl;
3837:
3838: os << " symbol stack:";
3839: for (i=0; i < symbolStack.length(); i++) {
3840: os << " " << symbolStack[i]->name;
3841: }
3842: os << std::endl;
3843: }
3844:
3845:
3846: // ------------------- grammar transformations ------------------
3847: void GrammarAnalysis::addTreebuildingActions()
3848: {
3849: #define STR(s) LITERAL_LOCSTRING(grammarStringTable.add(s))
3850:
3851: // prepend an #include to the verbatim
3852: {
3853: StringRef extra = grammarStringTable.add(
3854: "\n#include \"ptreenode.h\" // PTreeNode\n");
3855: verbatim.prepend(new LITERAL_LOCSTRING(extra));
3856: }
3857:
3858: // get handles to the sm_strings we want to emit
3859: LocString param = STR("n");
3860: LocString dupCode = STR("return n;"); // dup is identity
3861: LocString delCode = STR(""); // del is no-op
3862: LocString svalType = STR("PTreeNode*");
3863:
3864: // merge relies on chaining scheme for alternatives
3865: LocString mergeParam1 = STR("L");
3866: LocString mergeParam2 = STR("R");
3867: LocString mergeCode = STR("L->addAlternative(R); return L;");
3868:
3869: // write dup/del/merge for nonterminals
3870: MUTATE_EACH_OBJLIST(Nonterminal, nonterminals, ntIter) {
3871: Nonterminal *nt = ntIter.data();
3872:
3873: nt->dupParam = param;
3874: nt->dupCode = dupCode;
3875:
3876: nt->delParam = param;
3877: nt->delCode = delCode;
3878:
3879: nt->type = svalType;
3880:
3881: nt->mergeParam1 = mergeParam1;
3882: nt->mergeParam2 = mergeParam2;
3883: nt->mergeCode = mergeCode;
3884: }
3885:
3886: // write treebuilding actions for productions
3887: MUTATE_EACH_OBJLIST(Production, productions, prodIter) {
3888: Production *p = prodIter.data();
3889:
3890: // build up the code
3891: sm_stringBuilder code;
3892: code << "return new PTreeNode(\"" << p->left->name << " -> "
3893: << encodeWithEscapes(p->rhsString(false /*printTags*/,
3894: true /*quoteAliases*/))
3895: << "\"";
3896:
3897: int ct=1;
3898: MUTATE_EACH_OBJLIST(Production::RHSElt, p->right, rIter) {
3899: Production::RHSElt *elt = rIter.data();
3900:
3901: // connect nonterminal subtrees; drop lexemes on the floor
3902: if (elt->sym->isNonterminal()) {
3903: // use a generic tag
3904: sm_string tag = sm_stringc << "t" << ct++;
3905: elt->tag = STR(tag);
3906:
3907: code << ", " << tag;
3908: }
3909: }
3910:
3911: code << ");";
3912:
3913: // insert the code into the production
3914: p->action = LocString(SL_UNKNOWN,
3915: grammarStringTable.add(code));
3916: }
3917:
3918: #undef STR
3919: }
3920:
3921:
3922: // ---------------------------- main --------------------------------
3923: void pretendUsed(...)
3924: {}
3925:
3926:
3927: void GrammarAnalysis::exampleGrammar()
3928: {
3929: // at one time I was using this to verify my LR item set
3930: // construction code; this function isn't even called anymore..
3931: readGrammarFile(*this, "examples/asu419.gr");
3932:
3933: char const *input[] = {
3934: " id $",
3935: " id + id $",
3936: " id * id $",
3937: " id + id * id $",
3938: " id * id + id $",
3939: " ( id + id ) * id $",
3940: " id + id + id $",
3941: " id + ( id + id ) $"
3942: };
3943:
3944: // verify we got what we expected
3945: printProductions(trace("grammar") << std::endl);
3946:
3947:
3948: // run analyses
3949: runAnalyses(NULL);
3950:
3951:
3952: // do some test parses
3953: INTLOOP(i, 0, (int)TABLESIZE(input)) {
3954: trace("parse") << "------ parsing: `" << input[i] << "' -------\n";
3955: lrParse(input[i]);
3956: }
3957: }
3958:
3959:
3960: void GrammarAnalysis::runAnalyses(char const *setsFname)
3961: {
3962: // prepare for symbol of interest
3963: {
3964: char const *name = getenv("SYM_OF_INTEREST");
3965: if (name != NULL) {
3966: symOfInterest = findSymbolC(name);
3967: if (!symOfInterest) {
3968: std::cout << "warning: " << name << " isn't in the grammar\n";
3969: }
3970: }
3971: }
3972:
3973: // reset error count so it might be possible to reuse the object
3974: // for another grammar
3975: errors = 0;
3976:
3977: checkWellFormed();
3978:
3979: // precomputations
3980: traceProgress(1) << "init...\n";
3981: initializeAuxData();
3982:
3983: traceProgress(1) << "derivability relation...\n";
3984: computeWhatCanDeriveWhat();
3985:
3986: computeSupersets();
3987:
3988: traceProgress(1) << "first...\n";
3989: computeFirst();
3990: computeDProdFirsts();
3991:
3992: traceProgress(1) << "follow...\n";
3993: computeFollow();
3994:
3995: // print results
3996: {
3997: std::ostream &tracer = trace("terminals") << "Terminals:\n";
3998: printSymbols(tracer, toObjList(terminals));
3999: }
4000: {
4001: std::ostream &tracer = trace("nonterminals") << "Nonterminals:\n";
4002: tracer << " " << emptyString << std::endl;
4003: printSymbols(tracer, toObjList(nonterminals));
4004: }
4005:
4006: if (tracingSys("derivable")) {
4007: derivable->print();
4008: }
4009:
4010: // testing closure
4011: #if 0
4012: {
4013: // make a singleton set out of the first production, and
4014: // with the dot at the start
4015: ObjList<LRItem> itemSet;
4016: LRItem *kernel = productions.nth(0)->getDProd(0); // (serf)
4017: itemSet.append(kernel);
4018:
4019: // compute its closure
4020: itemSetClosure(itemSet);
4021:
4022: // print it
4023: std::cout << "Closure of: ";
4024: kernel->print(std::cout);
4025: std::cout << std::endl;
4026:
4027: SFOREACH_OBJLIST(LRItem, itemSet, dprod) {
4028: std::cout << " ";
4029: dprod.data()->print(std::cout);
4030: std::cout << std::endl;
4031: }
4032: }
4033: #endif // 0
4034:
4035:
4036: // LR stuff
4037: traceProgress(1) << "LR item sets...\n";
4038: constructLRItemSets();
4039:
4040: traceProgress(1) << "state renumbering...\n";
4041: renumberStates();
4042:
4043: traceProgress(1) << "parse tables...\n";
4044: computeParseTables(!tracingSys("deterministic"));
4045:
4046: #if 0 // old code; need it for just a while longer
4047: {
4048: int sr=0, rr=0; // numbers of each kind of conflict
4049: findSLRConflicts(sr, rr);
4050: if (sr + rr > 0) {
4051: std::cout << sr << " shift/reduce conflicts and "
4052: << rr << " reduce/reduce conflicts\n";
4053: }
4054: }
4055: #endif // 0
4056:
4057: // if we want to print, do so before throwing away the items
4058: if (tracingSys("itemsets")) {
4059: printProductionsAndItems(std::cout, true /*code*/);
4060: }
4061:
4062: // open debug output file
4063: std::ofstream *setsOutput = NULL;
4064: if (setsFname) {
4065: setsOutput = new std::ofstream(setsFname);
4066: if (!*setsOutput) {
4067: std::cout << "couldn't open " << setsFname << " to write item sets\n";
4068: delete setsOutput;
4069: setsOutput = NULL;
4070: }
4071: }
4072:
4073: // count the number of unreachable nonterminals & terminals
4074: {
4075: if (setsOutput) {
4076: *setsOutput << "unreachable nonterminals:\n";
4077: }
4078: int ct=0;
4079: FOREACH_NONTERMINAL(nonterminals, iter) {
4080: if (!iter.data()->reachable) {
4081: ct++;
4082:
4083: if (setsOutput) {
4084: *setsOutput << " " << iter.data()->name << "\n";
4085: }
4086: }
4087: }
4088:
4089: reportUnexpected(ct, expectedUNRNonterms, "unreachable nonterminals");
4090:
4091: // bison also reports the number of productions under all the
4092: // unreachable nonterminals, but that doesn't seem especially
4093: // useful to me
4094:
4095: if (setsOutput) {
4096: *setsOutput << "unreachable terminals:\n";
4097: }
4098: ct=0;
4099: FOREACH_TERMINAL(terminals, jter) {
4100: if (!jter.data()->reachable) {
4101: ct++;
4102:
4103: if (setsOutput) {
4104: *setsOutput << " " << jter.data()->name << "\n";
4105: }
4106: }
4107: }
4108:
4109: reportUnexpected(ct, expectedUNRTerms, "unreachable terminals");
4110: }
4111:
4112: // print the item sets
4113: if (setsOutput) {
4114: traceProgress() << "printing item sets to " << setsFname << " ..." << std::endl;
4115: *setsOutput << "NOTE: Item set numbers can change depending on what flags\n"
4116: << "are passed to 'elkhound'!\n\n\n";
4117: // only print the nonkernel items if they're explicitly requested,
4118: // since they are more noise than signal, usually
4119: printItemSets(*setsOutput, tracingSys("nonkernel"));
4120: }
4121:
4122: // print information about all tokens
4123: if (setsOutput) {
4124: *setsOutput << "terminals:\n";
4125: FOREACH_TERMINAL(terminals, iter) {
4126: Terminal const *t = iter.data();
4127: *setsOutput << " ";
4128: t->print(*setsOutput);
4129: *setsOutput << "\n";
4130: }
4131:
4132: // and nonterminals
4133: *setsOutput << "nonterminals:\n";
4134: FOREACH_NONTERMINAL(nonterminals, ntIter) {
4135: Nonterminal const *nt = ntIter.data();
4136: *setsOutput << " ";
4137: nt->print(*setsOutput);
4138: *setsOutput << "\n";
4139: }
4140:
4141: // and productions
4142: *setsOutput << "productions:\n";
4143: for (int p=0; p<numProds; p++) {
4144: *setsOutput << " ";
4145: getProduction(p)->print(*setsOutput);
4146: *setsOutput << "\n";
4147: }
4148: }
4149:
4150:
4151: delete setsOutput;
4152:
4153: // I don't need (most of) the item sets during parsing, so
4154: // throw them away once I'm done analyzing the grammar
4155: MUTATE_EACH_OBJLIST(ItemSet, itemSets, iter) {
4156: iter.data()->throwAwayItems();
4157: }
4158:
4159:
4160: // another analysis
4161: //computePredictiveParsingTable();
4162:
4163: // silence warnings
4164: //pretendUsed(a,b,c,d,e, S,A,B,C,D);
4165: }
4166:
4167:
4168: // ------------------ emitting action code -----------------------
4169: // prototypes for this section; some of them accept Grammar simply
4170: // because that's all they need; there's no problem upgrading them
4171: // to GrammarAnalysis
4172: void emitDescriptions(GrammarAnalysis const &g, EmitCode &out);
4173: void emitActionCode(GrammarAnalysis const &g, char const *hFname,
4174: char const *ccFname, char const *srcFname);
4175: void emitUserCode(EmitCode &out, LocString const &code, bool braces = true);
4176: void emitActions(Grammar const &g, EmitCode &out, EmitCode &dcl);
4177: void emitDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl);
4178: void emitFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
4179: char const *rettype, char const *params);
4180: void emitDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
4181: Symbol const &sym);
4182: void emitSwitchCode(Grammar const &g, EmitCode &out,
4183: char const *signature, char const *switchVar,
4184: ObjList<Symbol> const &syms, int whichFunc,
4185: char const *templateCode, char const *actUpon);
4186:
4187:
4188: // yield the name of the inline function for this production; naming
4189: // design motivated by desire to make debugging easier
4190: sm_string actionFuncName(Production const &prod)
4191: {
4192: return sm_stringc << "action" << prod.prodIndex
4193: << "_" << prod.left->name;
4194: }
4195:
4196:
4197: // emit the user's action code to a file
4198: void emitActionCode(GrammarAnalysis const &g, char const *hFname,
4199: char const *ccFname, char const *srcFname)
4200: {
4201: EmitCode dcl(hFname);
4202: if (!dcl) {
4203: throw_XOpen(hFname);
4204: }
4205:
4206: sm_string latchName = replace(replace(replace(replace(replace(
4207: sm_stringToupper(hFname),
4208: ".", "_"),
4209: ":", "_"),
4210: "\\", "_"),
4211: "/", "_"),
4212: "-", "_");
4213:
4214: // prologue
4215: dcl << "// " << hFname << "\n"
4216: << "// *** DO NOT EDIT BY HAND ***\n"
4217: << "// automatically generated by elkhound, from " << srcFname << "\n"
4218: << "\n"
4219: << "#ifndef " << latchName << "\n"
4220: << "#define " << latchName << "\n"
4221: << "\n"
4222: << "#include \"elk_useract.h\" // UserActions\n"
4223: << "\n"
4224: ;
4225:
4226: // insert the stand-alone verbatim sections
4227: {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
4228: emitUserCode(dcl, *(iter.data()), false /*braces*/);
4229: }}
4230:
4231: // insert each of the context class definitions; the last one
4232: // is the one whose name is 'g.actionClassName' and into which
4233: // the action functions are inserted as methods
4234: {
4235: int ct=0;
4236: FOREACH_OBJLIST(LocString, g.actionClasses, iter) {
4237: if (ct++ > 0) {
4238: // end the previous class; the following body will open
4239: // another one, and the brace following the action list
4240: // will close the last one
4241: dcl << "};\n";
4242: }
4243:
4244: dcl << "\n"
4245: << "// parser context class\n"
4246: << "class ";
4247: emitUserCode(dcl, *(iter.data()), false /*braces*/);
4248: }}
4249:
4250: // we end the context class with declarations of the action functions
4251: dcl << "\n"
4252: << "private:\n"
4253: << " USER_ACTION_FUNCTIONS // see useract.h\n"
4254: << "\n"
4255: << " // declare the actual action function\n"
4256: << " static SemanticValue doReductionAction(\n"
4257: << " " << g.actionClassName << " *ths,\n"
4258: << " int productionId, SemanticValue const *semanticValues"
4259: SOURCELOC( << ",\n SourceLoc loc" )
4260: << ");\n"
4261: << "\n"
4262: << " // declare the classifier function\n"
4263: << " static int reclassifyToken(\n"
4264: << " " << g.actionClassName << " *ths,\n"
4265: << " int oldTokenType, SemanticValue sval);\n"
4266: << "\n"
4267: ;
4268:
4269: EmitCode out(ccFname);
4270: if (!out) {
4271: throw_XOpen(ccFname);
4272: }
4273:
4274: out << "// " << ccFname << "\n";
4275: out << "// *** DO NOT EDIT BY HAND ***\n";
4276: out << "// automatically generated by gramanl, from " << srcFname << "\n";
4277: out << "\n";
4278: #ifdef NO_GLR_SOURCELOC
4279: // we need to make sure the USER_ACTION_FUNCTIONS use
4280: // the declarations consistent with how we're printing
4281: // the definitions
4282: out << "#ifndef NO_GLR_SOURCELOC\n";
4283: out << " #define NO_GLR_SOURCELOC\n";
4284: out << "#endif\n";
4285: #else
4286: out << "// GLR source location information is enabled\n";
4287: #endif
4288: out << "\n";
4289: out << "#include \"" << sm_basename(hFname).pchar() << "\" // " << g.actionClassName << "\n";
4290: out << "#include \"elk_parsetables.h\" // ParseTables\n";
4291: out << "#include \"sm_srcloc.h\" // SourceLoc\n";
4292: out << "\n";
4293: out << "#include <assert.h> // assert\n";
4294: out << "#include <iostream> // std::cout\n";
4295: out << "#include <stdlib.h> // abort\n";
4296: out << "\n";
4297:
4298: NOSOURCELOC(
4299: out << "// parser-originated location information is disabled by\n"
4300: << "// NO_GLR_SOURCELOC; any rule which refers to 'loc' will get this one\n"
4301: << "static SourceLoc loc = SL_UNKNOWN;\n"
4302: << "\n\n";
4303: )
4304:
4305: emitDescriptions(g, out);
4306: // 'emitDescriptions' prints two newlines itself..
4307:
4308: emitActions(g, out, dcl);
4309: out << "\n";
4310: out << "\n";
4311:
4312: emitDupDelMerge(g, out, dcl);
4313: out << "\n";
4314: out << "\n";
4315:
4316: g.tables->finishTables();
4317: g.tables->emitConstructionCode(out, g.actionClassName, "makeTables");
4318:
4319: // I put this last in the context class, and make it public
4320: dcl << "\n"
4321: << "// the function which makes the parse tables\n"
4322: << "public:\n"
4323: << " virtual ParseTables *makeTables();\n"
4324: << "};\n"
4325: << "\n"
4326: << "#endif // " << latchName << "\n"
4327: ;
4328:
4329: // finish the implementation file with the impl_verbatim sections
4330: FOREACH_OBJLIST(LocString, g.implVerbatim, iter) {
4331: emitUserCode(out, *(iter.data()), false /*braces*/);
4332: }
4333: }
4334:
4335:
4336: void emitUserCode(EmitCode &out, LocString const &code, bool braces)
4337: {
4338: out << "\n";
4339: if (code.validLoc()) {
4340: out << lineDirective(code.loc);
4341: }
4342:
4343: // 7/27/03: swapped so that braces are inside the line directive
4344: if (braces) {
4345: out << "{";
4346: }
4347:
4348: out << code;
4349:
4350: // the final brace is on the same line so errors reported at the
4351: // last brace go to user code
4352: if (braces) {
4353: out << " }";
4354: }
4355:
4356: if (code.validLoc()) {
4357: out << "\n" << restoreLine;
4358: }
4359: out << "\n";
4360: }
4361:
4362:
4363: // bit of a hack: map "void" to "SemanticValue" so that the compiler
4364: // won't mind when I try to declare parameters of that type
4365: char const *notVoid(char const *type)
4366: {
4367: if (0==strcmp(type, "void")) {
4368: return "SemanticValue";
4369: }
4370: else {
4371: return type;
4372: }
4373: }
4374:
4375: // yield the given type, but if it's NULL, then yield
4376: // something to use instead
4377: char const *typeString(char const *type, LocString const &tag)
4378: {
4379: if (!type) {
4380: xbase(sm_stringc << tag.locString() << ": Production tag \"" << tag
4381: << "\" on a symbol with no type.\n");
4382: return NULL; // silence warning
4383: }
4384: else {
4385: return notVoid(type);
4386: }
4387: }
4388:
4389:
4390: // return true if the type starts with the word "enum"
4391: bool isEnumType(char const *type)
4392: {
4393: return 0==strncmp(type, "enum", 4);
4394: }
4395:
4396:
4397: void emitDescriptions(GrammarAnalysis const &g, EmitCode &out)
4398: {
4399: // emit a map of terminal ids to their names
4400: {
4401: out << "static char const *termNames[] = {\n";
4402: for (int code=0; code < g.numTerminals(); code++) {
4403: Terminal const *t = g.getTerminal(code);
4404: if (!t) {
4405: // no terminal for that code
4406: out << " \"(no terminal)\", // " << code << "\n";
4407: }
4408: else {
4409: out << " \"" << t->name << "\", // " << code << "\n";
4410: }
4411: }
4412: out << "};\n"
4413: << "\n";
4414: }
4415:
4416: // emit a function to describe terminals; at some point I'd like to
4417: // extend my grammar format to allow the user to supply
4418: // token-specific description functions, but for now I will just
4419: // use the information easily available the synthesize one;
4420: // I print "sval % 100000" so I get a 5-digit number, which is
4421: // easy for me to compare for equality without adding much clutter
4422: out << "sm_string " << g.actionClassName
4423: << "::terminalDescription(int termId, SemanticValue sval)\n"
4424: << "{\n"
4425: << " return sm_stringc << termNames[termId]\n"
4426: << " << \"(\" << (sval % 100000) << \")\";\n"
4427: << "}\n"
4428: << "\n"
4429: << "\n"
4430: ;
4431:
4432: // emit a map of nonterminal ids to their names
4433: {
4434: out << "static char const *nontermNames[] = {\n";
4435: for (int code=0; code < g.numNonterminals(); code++) {
4436: Nonterminal const *nt = g.getNonterminal(code);
4437: if (!nt) {
4438: // no nonterminal for that code
4439: out << " \"(no nonterminal)\", // " << code << "\n";
4440: }
4441: else {
4442: out << " \"" << nt->name << "\", // " << code << "\n";
4443: }
4444: }
4445: out << "};\n"
4446: << "\n";
4447: }
4448:
4449: // and a function to describe nonterminals also
4450: out << "sm_string " << g.actionClassName
4451: << "::nonterminalDescription(int nontermId, SemanticValue sval)\n"
4452: << "{\n"
4453: << " return sm_stringc << nontermNames[nontermId]\n"
4454: << " << \"(\" << (sval % 100000) << \")\";\n"
4455: << "}\n"
4456: << "\n"
4457: << "\n"
4458: ;
4459:
4460: // emit functions to get access to the static maps
4461: out << "char const *" << g.actionClassName
4462: << "::terminalName(int termId)\n"
4463: << "{\n"
4464: << " return termNames[termId];\n"
4465: << "}\n"
4466: << "\n"
4467: << "char const *" << g.actionClassName
4468: << "::nonterminalName(int nontermId)\n"
4469: << "{\n"
4470: << " return nontermNames[nontermId];\n"
4471: << "}\n"
4472: << "\n"
4473: ;
4474: }
4475:
4476:
4477: void emitActions(Grammar const &g, EmitCode &out, EmitCode &dcl)
4478: {
4479: out << "// ------------------- actions ------------------\n";
4480:
4481: // iterate over productions, emitting inline action functions
4482: {FOREACH_OBJLIST(Production, g.productions, iter) {
4483: Production const &prod = *(iter.data());
4484:
4485: // there's no syntax for a typeless nonterminal, so this shouldn't
4486: // be triggerable by the user
4487: xassert(prod.left->type);
4488:
4489: // put the production in comments above the defn
4490: out << "// " << prod.toString() << "\n";
4491:
4492: out << "inline " << prod.left->type << " "
4493: << g.actionClassName << "::" << actionFuncName(prod)
4494: << "("
4495: SOURCELOC( << "SourceLoc loc" )
4496: ;
4497:
4498: dcl << " " << prod.left->type << " " << actionFuncName(prod) << "("
4499: SOURCELOC( << "SourceLoc loc" )
4500: ;
4501:
4502: int ct=0;
4503: SOURCELOC( ct++ ); // if we printed the 'loc' param, count it
4504:
4505: // iterate over RHS elements, emitting formals for each with a tag
4506: FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
4507: Production::RHSElt const &elt = *(rhsIter.data());
4508: if (elt.tag.length() == 0) continue;
4509:
4510: if (ct++ > 0) {
4511: out << ", ";
4512: dcl << ", ";
4513: }
4514:
4515: out << typeString(elt.sym->type, elt.tag);
4516: dcl << typeString(elt.sym->type, elt.tag);
4517:
4518: // the tag becomes the formal parameter's name
4519: out << " " << elt.tag;
4520: dcl << " " << elt.tag;
4521: }
4522:
4523: out << ")";
4524: dcl << ");\n";
4525:
4526: // now insert the user's code, to execute in this environment of
4527: // properly-typed semantic values
4528: emitUserCode(out, prod.action);
4529: }}
4530:
4531: out << "\n";
4532:
4533: // main action function; calls the inline functions emitted above
4534: out << "/*static*/ SemanticValue " << g.actionClassName << "::doReductionAction(\n"
4535: << " " << g.actionClassName << " *ths,\n"
4536: << " int productionId, SemanticValue const *semanticValues"
4537: SOURCELOC( << ",\n SourceLoc loc" )
4538: << ")\n";
4539: out << "{\n";
4540: out << " switch (productionId) {\n";
4541:
4542: // iterate over productions
4543: FOREACH_OBJLIST(Production, g.productions, iter) {
4544: Production const &prod = *(iter.data());
4545:
4546: out << " case " << prod.prodIndex << ":\n";
4547: out << " return (SemanticValue)(ths->" << actionFuncName(prod) << "("
4548: SOURCELOC( << "loc" )
4549: ;
4550:
4551: // iterate over RHS elements, emitting arguments for each with a tag
4552: int index = -1; // index into 'semanticValues'
4553: int ct=0;
4554: SOURCELOC( ct++ ); // count 'loc' if it is passed
4555: FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
4556: Production::RHSElt const &elt = *(rhsIter.data());
4557:
4558: // we have semantic values in the array for all RHS elements,
4559: // even if they didn't get a tag
4560: index++;
4561:
4562: if (elt.tag.length() == 0) continue;
4563:
4564: if (ct++ > 0) {
4565: out << ", ";
4566: }
4567:
4568: // cast SemanticValue to proper type
4569: out << "(" << typeString(elt.sym->type, elt.tag) << ")";
4570: if (isEnumType(elt.sym->type)) {
4571: // egcs-1.1.2 complains when I cast from void* to enum, even
4572: // when there is a cast! so let's put an intermediate cast
4573: // to int
4574: out << "(int)";
4575: }
4576: out << "(semanticValues[" << index << "])";
4577: }
4578:
4579: out << ")"; // end of argument list
4580:
4581: if (0==strcmp(prod.left->type, "void")) {
4582: // cute hack: turn the expression into a comma expression, with
4583: // the value returned being 0
4584: out << ", 0";
4585: }
4586:
4587: out << ");\n";
4588: }
4589:
4590: out << " default:\n";
4591: out << " assert(!\"invalid production code\");\n";
4592: out << " return (SemanticValue)0; // silence warning\n";
4593: out << " }\n";
4594: out << "}\n";
4595:
4596:
4597: // now emit the UserActions function which returns the doReductionAction
4598: // function pointer
4599: out << "\n";
4600: out << "UserActions::ReductionActionFunc " << g.actionClassName << "::getReductionAction()\n";
4601: out << "{\n";
4602: out << " return (ReductionActionFunc)&" << g.actionClassName << "::doReductionAction;\n";
4603: out << "}\n";
4604:
4605: }
4606:
4607:
4608: void emitDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl)
4609: {
4610: out << "// ---------------- dup/del/merge/keep nonterminals ---------------\n"
4611: << "\n";
4612:
4613: // emit inlines for dup/del/merge of nonterminals
4614: FOREACH_OBJLIST(Nonterminal, g.nonterminals, ntIter) {
4615: emitDDMInlines(g, out, dcl, *(ntIter.data()));
4616: }
4617:
4618: // emit dup-nonterm
4619: emitSwitchCode(g, out,
4620: "SemanticValue $acn::duplicateNontermValue(int nontermId, SemanticValue sval)",
4621: "nontermId",
4622: (ObjList<Symbol> const&)g.nonterminals,
4623: 0 /*dupCode*/,
4624: " return (SemanticValue)dup_$symName(($symType)sval);\n",
4625: NULL);
4626:
4627: // emit del-nonterm
4628: emitSwitchCode(g, out,
4629: "void $acn::deallocateNontermValue(int nontermId, SemanticValue sval)",
4630: "nontermId",
4631: (ObjList<Symbol> const&)g.nonterminals,
4632: 1 /*delCode*/,
4633: " del_$symName(($symType)sval);\n"
4634: " return;\n",
4635: "deallocate nonterm");
4636:
4637: // emit merge-nonterm
4638: emitSwitchCode(g, out,
4639: "SemanticValue $acn::mergeAlternativeParses(int nontermId, SemanticValue left,\n"
4640: " SemanticValue right"
4641: SOURCELOC(", SourceLoc loc")
4642: ")",
4643: "nontermId",
4644: (ObjList<Symbol> const&)g.nonterminals,
4645: 2 /*mergeCode*/,
4646: " return (SemanticValue)merge_$symName(($symType)left, ($symType)right);\n",
4647: "merge nonterm");
4648:
4649: // emit keep-nonterm
4650: emitSwitchCode(g, out,
4651: "bool $acn::keepNontermValue(int nontermId, SemanticValue sval)",
4652: "nontermId",
4653: (ObjList<Symbol> const&)g.nonterminals,
4654: 3 /*keepCode*/,
4655: " return keep_$symName(($symType)sval);\n",
4656: NULL);
4657:
4658:
4659: out << "\n";
4660: out << "// ---------------- dup/del/classify terminals ---------------\n";
4661: // emit inlines for dup/del of terminals
4662: FOREACH_OBJLIST(Terminal, g.terminals, termIter) {
4663: emitDDMInlines(g, out, dcl, *(termIter.data()));
4664: }
4665:
4666: // emit dup-term
4667: emitSwitchCode(g, out,
4668: "SemanticValue $acn::duplicateTerminalValue(int termId, SemanticValue sval)",
4669: "termId",
4670: (ObjList<Symbol> const&)g.terminals,
4671: 0 /*dupCode*/,
4672: " return (SemanticValue)dup_$symName(($symType)sval);\n",
4673: NULL);
4674:
4675: // emit del-term
4676: emitSwitchCode(g, out,
4677: "void $acn::deallocateTerminalValue(int termId, SemanticValue sval)",
4678: "termId",
4679: (ObjList<Symbol> const&)g.terminals,
4680: 1 /*delCode*/,
4681: " del_$symName(($symType)sval);\n"
4682: " return;\n",
4683: "deallocate terminal");
4684:
4685: // emit classify-term
4686: emitSwitchCode(g, out,
4687: "/*static*/ int $acn::reclassifyToken($acn *ths, int oldTokenType, SemanticValue sval)",
4688: "oldTokenType",
4689: (ObjList<Symbol> const&)g.terminals,
4690: 4 /*classifyCode*/,
4691: " return ths->classify_$symName(($symType)sval);\n",
4692: NULL);
4693:
4694: // and the virtual method which returns the classifier
4695: out << "UserActions::ReclassifyFunc " << g.actionClassName << "::getReclassifier()\n"
4696: << "{\n"
4697: << " return (ReclassifyFunc)&" << g.actionClassName << "::reclassifyToken;\n"
4698: << "}\n";
4699: }
4700:
4701:
4702: // emit both the function decl for the .h file, and the beginning of
4703: // the function definition for the .cc file
4704: void emitFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
4705: char const *rettype, char const *params)
4706: {
4707: out << "inline " << rettype << " " << g.actionClassName
4708: << "::" << params;
4709:
4710: dcl << " inline " << rettype << " " << params << ";\n";
4711: }
4712:
4713:
4714: void emitDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
4715: Symbol const &sym)
4716: {
4717: Terminal const *term = sym.ifTerminalC();
4718: Nonterminal const *nonterm = sym.ifNonterminalC();
4719:
4720: if (sym.dupCode) {
4721: emitFuncDecl(g, out, dcl, sym.type,
4722: sm_stringc << "dup_" << sym.name
4723: << "(" << sym.type << " " << sym.dupParam << ") ");
4724: emitUserCode(out, sym.dupCode);
4725: }
4726:
4727: if (sym.delCode) {
4728: emitFuncDecl(g, out, dcl, "void",
4729: sm_stringc << "del_" << sym.name
4730: << "(" << sym.type << " "
4731: << (sym.delParam? sym.delParam : "") << ") ");
4732: emitUserCode(out, sym.delCode);
4733: }
4734:
4735: if (nonterm && nonterm->mergeCode) {
4736: emitFuncDecl(g, out, dcl, notVoid(sym.type),
4737: sm_stringc << "merge_" << sym.name
4738: << "(" << notVoid(sym.type) << " " << nonterm->mergeParam1
4739: << ", " << notVoid(sym.type) << " " << nonterm->mergeParam2 << ") ");
4740: emitUserCode(out, nonterm->mergeCode);
4741: }
4742:
4743: if (nonterm && nonterm->keepCode) {
4744: emitFuncDecl(g, out, dcl, "bool",
4745: sm_stringc << "keep_" << sym.name
4746: << "(" << sym.type << " " << nonterm->keepParam << ") ");
4747: emitUserCode(out, nonterm->keepCode);
4748: }
4749:
4750: if (term && term->classifyCode) {
4751: emitFuncDecl(g, out, dcl, "int",
4752: sm_stringc << "classify_" << sym.name
4753: << "(" << sym.type << " " << term->classifyParam << ") ");
4754: emitUserCode(out, term->classifyCode);
4755: }
4756: }
4757:
4758: void emitSwitchCode(Grammar const &g, EmitCode &out,
4759: char const *signature, char const *switchVar,
4760: ObjList<Symbol> const &syms, int whichFunc,
4761: char const *templateCode, char const *actUpon)
4762: {
4763: out << replace(signature, "$acn", g.actionClassName) << "\n"
4764: "{\n"
4765: " switch (" << switchVar << ") {\n";
4766:
4767: FOREACH_OBJLIST(Symbol, syms, symIter) {
4768: Symbol const &sym = *(symIter.data());
4769:
4770: if (whichFunc==0 && sym.dupCode ||
4771: whichFunc==1 && sym.delCode ||
4772: whichFunc==2 && sym.asNonterminalC().mergeCode ||
4773: whichFunc==3 && sym.asNonterminalC().keepCode ||
4774: whichFunc==4 && sym.asTerminalC().classifyCode) {
4775: out << " case " << sym.getTermOrNontermIndex() << ":\n";
4776: out << replace(replace(templateCode,
4777: "$symName", sym.name),
4778: "$symType", notVoid(sym.type));
4779: }
4780: }
4781:
4782: out << " default:\n";
4783: switch (whichFunc) {
4784: default:
4785: xfailure("bad func code");
4786:
4787: case 0: // unspecified dup
4788: if (!g.useGCDefaults) {
4789: // not using GC, return NULL so silent sharing doesn't happen
4790: out << " return (SemanticValue)0;\n";
4791: }
4792: else {
4793: // using GC, sharing is fine
4794: out << " return sval;\n";
4795: }
4796: break;
4797:
4798: case 1: // unspecified del
4799: if (!g.useGCDefaults) {
4800: // warn about unspec'd del, since it's probably a memory leak
4801: if (syms.firstC()->isNonterminal()) {
4802: // use the nonterminal map
4803: out << " std::cout << \"WARNING: there is no action to deallocate nonterm \"\n"
4804: " << nontermNames[" << switchVar << "] << std::endl;\n";
4805: }
4806: else {
4807: // use the terminal map
4808: out << " std::cout << \"WARNING: there is no action to deallocate terminal \"\n"
4809: " << termNames[" << switchVar << "] << std::endl;\n";
4810: }
4811: }
4812: else {
4813: // in gc mode, just ignore del
4814: out << " break;\n";
4815: }
4816: break;
4817:
4818: case 2: // unspecified merge: warn, but then use left (arbitrarily)
4819: out << " std::cout << toString(loc) \n"
4820: << " << \": WARNING: there is no action to merge nonterm \"\n"
4821: << " << nontermNames[" << switchVar << "] << std::endl;\n";
4822: if (g.defaultMergeAborts) {
4823: out << " abort();\n";
4824: }
4825: else {
4826: out << " return left;\n";
4827: }
4828: break;
4829:
4830: case 3: // unspecified keep: keep it
4831: out << " return true;\n";
4832: break;
4833:
4834: case 4: // unspecified classifier: identity map
4835: out << " return oldTokenType;\n";
4836: break;
4837: }
4838:
4839: out << " }\n"
4840: "}\n"
4841: "\n";
4842: }
4843:
4844:
4845: // ------------------------- main --------------------------
4846: // TODO: split this into its own source file
4847:
4848: #include "sm_bflatten.h"
4849: #include "sm_test.h"
4850: #include "elk_gramast.ast.gen.h"
4851:
4852: #include <stdio.h> // remove
4853: #include <stdlib.h> // system
4854:
4855:
4856: int inner_entry(int argc, char **argv)
4857: {
4858: #define SHIFT argc--; argv++ /* user ; */
4859:
4860: char const *progName = argv[0];
4861: SHIFT;
4862:
4863: // disable 'Exception thrown' reports
4864: xBase::logExceptions = false;
4865:
4866: // as long as this remains 0-length, it means to use
4867: // the default naming scheme
4868: sm_string prefix;
4869:
4870: // true to use ML, false to use C
4871: bool useML = false;
4872:
4873: while (argv[0] && argv[0][0] == '-') {
4874: char const *op = argv[0]+1;
4875: if (0==strcmp(op, "tr")) {
4876: SHIFT;
4877: traceAddMultiSys(argv[0]);
4878: SHIFT;
4879: }
4880: else if (0==strcmp(op, "v")) {
4881: SHIFT;
4882: traceAddSys("progress");
4883: }
4884: else if (0==strcmp(op, "o")) {
4885: SHIFT;
4886: prefix = argv[0];
4887: SHIFT;
4888: }
4889: else if (0==strcmp(op, "testRW")) {
4890: SHIFT;
4891: std::cout << "The testRW option has been removed because I wasn't using\n"
4892: "it, and the code that implements it has bit-rotted.\n";
4893: exit(3);
4894: }
4895: else if (0==strcmp(op, "ocaml")) {
4896: SHIFT;
4897: useML = true;
4898: }
4899: else {
4900: std::cout << "unknown option: " << argv[0] << std::endl;
4901: exit(2);
4902: }
4903: }
4904:
4905: if (!argv[0]) {
4906: std::cout << "usage: " << progName << " [options] filename.gr [extension.gr [...]]\n"
4907: " Generates parse tables to parse with the given grammar.\n"
4908: " The optional extension modules can add rules, etc.\n"
4909: "\n"
4910: "options:\n"
4911: " -tr <traceFlags>: turn on some flags (separate with commas):\n"
4912: " conflict : print LALR(1) conflicts\n"
4913: " prec : show how prec/assoc are used to resolve conflicts\n"
4914: " lrtable : print LR parsing tables to <prefix>.out\n"
4915: " nonkernel : include non-kernel items in <prefix>.out\n"
4916: " treebuild : replace given actions with treebuilding actions\n"
4917: " grammar : echo grammar to stdout (after merging modules)\n"
4918: " -v : print stages of processing\n"
4919: " -o <prefix> : name outputs <prefix>.h and <prefix>.cc\n"
4920: " (default is filename.gen.h, filename.gen.cc)\n"
4921: " -ocaml : generate ocaml parser instead of C++ parser\n"
4922: ;
4923: return 0;
4924: }
4925:
4926: if (!prefix.length()) {
4927: // default naming scheme
4928: prefix = replace(argv[0], ".gr", "");
4929: }
4930:
4931: SourceLocManager mgr;
4932:
4933: // parse the grammar
4934: sm_string grammarFname = argv[0];
4935: SHIFT;
4936: Owner<GrammarAST> ast(parseGrammarFile(grammarFname, useML));
4937:
4938: // parse and merge its extension modules
4939: while (argv[0]) {
4940: Owner<GrammarAST> ext(parseGrammarFile(argv[0], useML));
4941:
4942: traceProgress() << "merging module: " << argv[0] << std::endl;
4943: mergeGrammar(ast, ext);
4944:
4945: SHIFT;
4946: }
4947:
4948: // parse the AST into a Grammar
4949: GrammarAnalysis g;
4950: if (useML) {
4951: g.targetLang = "OCaml";
4952: }
4953: parseGrammarAST(g, ast);
4954: ast.del(); // done with it
4955:
4956: if (tracingSys("treebuild")) {
4957: std::cout << "replacing given actions with treebuilding actions\n";
4958: g.addTreebuildingActions();
4959: }
4960: g.printProductions(trace("grammar") << std::endl);
4961:
4962: sm_string setsFname = sm_stringc << prefix << ".out";
4963: g.runAnalyses(tracingSys("lrtable")? setsFname.pcharc() : NULL);
4964: if (g.errors) {
4965: return 2;
4966: }
4967:
4968: if (!useML) {
4969: // emit some C++ code
4970: sm_string hFname = sm_stringc << prefix << ".h";
4971: sm_string ccFname = sm_stringc << prefix << ".cc";
4972: traceProgress() << "emitting C++ code to " << ccFname
4973: << " and " << hFname << " ...\n";
4974:
4975: emitActionCode(g, hFname, ccFname, grammarFname);
4976: }
4977: else {
4978: // emit some ML code
4979: sm_string mliFname = sm_stringc << prefix << ".mli";
4980: sm_string mlFname = sm_stringc << prefix << ".ml";
4981: traceProgress() << "emitting OCaml code to " << mlFname
4982: << " and " << mliFname << " ...\n";
4983:
4984: emitMLActionCode(g, mliFname, mlFname, grammarFname);
4985: }
4986:
4987: // before using 'xfer' we have to tell it about the sm_string table
4988: flattenStrTable = &grammarStringTable;
4989:
4990: // write it in a bison-compatible format as well
4991: if (tracingSys("bison")) {
4992: sm_string bisonFname = sm_stringc << prefix << ".y";
4993: traceProgress() << "writing bison-compatible grammar to " << bisonFname << std::endl;
4994: std::ofstream out(bisonFname);
4995: g.printAsBison(out);
4996: }
4997:
4998: traceProgress() << "done\n";
4999:
5000: // this doesn't work
5001: if (tracingSys("explore")) {
5002: grammarExplorer(g);
5003: }
5004:
5005: return 0;
5006: }
5007:
5008: void entry(int argc, char **argv)
5009: {
5010: int ret = inner_entry(argc, argv);
5011: if (ret != 0) {
5012: exit(ret);
5013: }
5014: }
5015:
5016: ARGS_MAIN
5017:
Start cpp section to elk/elk_gramast.ast.gen.cpp[1
/1
]
1: #line 14048 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11: GrammarAST::~GrammarAST()
12: {
13: forms.deleteAll();
14: }
15:
16: void GrammarAST::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
17: {
18: PRINT_HEADER(subtreeName, GrammarAST);
19:
20: PRINT_LIST(TopForm, forms);
21: }
22:
23: GrammarAST *GrammarAST::clone() const
24: {
25: GrammarAST *ret = new GrammarAST(
26: cloneASTList(forms)
27: );
28: return ret;
29: }
30:
31:
32:
33:
34: TopForm::~TopForm()
35: {
36: }
37:
38: char const * const TopForm::kindNames[TopForm::NUM_KINDS] = {
39: "TF_context",
40: "TF_verbatim",
41: "TF_option",
42: "TF_terminals",
43: "TF_nonterm",
44: };
45:
46: void TopForm::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
47: {
48: }
49:
50: DEFN_AST_DOWNCASTS(TopForm, TF_context, TF_CONTEXT)
51:
52: TF_context::~TF_context()
53: {
54: }
55:
56: void TF_context::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
57: {
58: PRINT_HEADER(subtreeName, TF_context);
59:
60: TopForm::debugPrint(os, indent, subtreeName);
61:
62: PRINT_GENERIC(body);
63: }
64:
65: TF_context *TF_context::clone() const
66: {
67: TF_context *ret = new TF_context(
68: body.clone()
69: );
70: return ret;
71: }
72:
73: DEFN_AST_DOWNCASTS(TopForm, TF_verbatim, TF_VERBATIM)
74:
75: TF_verbatim::~TF_verbatim()
76: {
77: }
78:
79: void TF_verbatim::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
80: {
81: PRINT_HEADER(subtreeName, TF_verbatim);
82:
83: TopForm::debugPrint(os, indent, subtreeName);
84:
85: PRINT_BOOL(isImpl);
86: PRINT_GENERIC(code);
87: }
88:
89: TF_verbatim *TF_verbatim::clone() const
90: {
91: TF_verbatim *ret = new TF_verbatim(
92: isImpl,
93: code.clone()
94: );
95: return ret;
96: }
97:
98: DEFN_AST_DOWNCASTS(TopForm, TF_option, TF_OPTION)
99:
100: TF_option::~TF_option()
101: {
102: }
103:
104: void TF_option::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
105: {
106: PRINT_HEADER(subtreeName, TF_option);
107:
108: TopForm::debugPrint(os, indent, subtreeName);
109:
110: PRINT_GENERIC(name);
111: PRINT_GENERIC(value);
112: }
113:
114: TF_option *TF_option::clone() const
115: {
116: TF_option *ret = new TF_option(
117: name.clone(),
118: value
119: );
120: return ret;
121: }
122:
123: DEFN_AST_DOWNCASTS(TopForm, TF_terminals, TF_TERMINALS)
124:
125: TF_terminals::~TF_terminals()
126: {
127: decls.deleteAll();
128: types.deleteAll();
129: prec.deleteAll();
130: }
131:
132: void TF_terminals::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
133: {
134: PRINT_HEADER(subtreeName, TF_terminals);
135:
136: TopForm::debugPrint(os, indent, subtreeName);
137:
138: PRINT_LIST(TermDecl, decls);
139: PRINT_LIST(TermType, types);
140: PRINT_LIST(PrecSpec, prec);
141: }
142:
143: TF_terminals *TF_terminals::clone() const
144: {
145: TF_terminals *ret = new TF_terminals(
146: cloneASTList(decls),
147: cloneASTList(types),
148: cloneASTList(prec)
149: );
150: return ret;
151: }
152:
153: DEFN_AST_DOWNCASTS(TopForm, TF_nonterm, TF_NONTERM)
154:
155: TF_nonterm::~TF_nonterm()
156: {
157: funcs.deleteAll();
158: productions.deleteAll();
159: subsets.deleteAll();
160: }
161:
162: void TF_nonterm::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
163: {
164: PRINT_HEADER(subtreeName, TF_nonterm);
165:
166: TopForm::debugPrint(os, indent, subtreeName);
167:
168: PRINT_GENERIC(name);
169: PRINT_GENERIC(type);
170: PRINT_LIST(SpecFunc, funcs);
171: PRINT_LIST(ProdDecl, productions);
172: PRINT_LIST(LocString, subsets);
173: }
174:
175: TF_nonterm *TF_nonterm::clone() const
176: {
177: TF_nonterm *ret = new TF_nonterm(
178: name.clone(),
179: type.clone(),
180: cloneASTList(funcs),
181: cloneASTList(productions),
182: cloneASTList(subsets)
183: );
184: return ret;
185: }
186:
187:
188:
189:
190: TermDecl::~TermDecl()
191: {
192: }
193:
194: void TermDecl::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
195: {
196: PRINT_HEADER(subtreeName, TermDecl);
197:
198: PRINT_GENERIC(code);
199: PRINT_GENERIC(name);
200: PRINT_GENERIC(alias);
201: }
202:
203: TermDecl *TermDecl::clone() const
204: {
205: TermDecl *ret = new TermDecl(
206: code,
207: name.clone(),
208: alias.clone()
209: );
210: return ret;
211: }
212:
213:
214:
215:
216: TermType::~TermType()
217: {
218: funcs.deleteAll();
219: }
220:
221: void TermType::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
222: {
223: PRINT_HEADER(subtreeName, TermType);
224:
225: PRINT_GENERIC(name);
226: PRINT_GENERIC(type);
227: PRINT_LIST(SpecFunc, funcs);
228: }
229:
230: TermType *TermType::clone() const
231: {
232: TermType *ret = new TermType(
233: name.clone(),
234: type.clone(),
235: cloneASTList(funcs)
236: );
237: return ret;
238: }
239:
240:
241:
242:
243: PrecSpec::~PrecSpec()
244: {
245: tokens.deleteAll();
246: }
247:
248: void PrecSpec::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
249: {
250: PRINT_HEADER(subtreeName, PrecSpec);
251:
252: PRINT_GENERIC(kind);
253: PRINT_GENERIC(prec);
254: PRINT_LIST(LocString, tokens);
255: }
256:
257: PrecSpec *PrecSpec::clone() const
258: {
259: PrecSpec *ret = new PrecSpec(
260: kind,
261: prec,
262: cloneASTList(tokens)
263: );
264: return ret;
265: }
266:
267:
268:
269:
270: SpecFunc::~SpecFunc()
271: {
272: formals.deleteAll();
273: }
274:
275: void SpecFunc::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
276: {
277: PRINT_HEADER(subtreeName, SpecFunc);
278:
279: PRINT_GENERIC(name);
280: PRINT_LIST(LocString, formals);
281: PRINT_GENERIC(code);
282: }
283:
284: SpecFunc *SpecFunc::clone() const
285: {
286: SpecFunc *ret = new SpecFunc(
287: name.clone(),
288: cloneASTList(formals),
289: code.clone()
290: );
291: return ret;
292: }
293:
294:
295:
296:
297: ProdDecl::~ProdDecl()
298: {
299: rhs.deleteAll();
300: }
301:
302: void ProdDecl::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
303: {
304: PRINT_HEADER(subtreeName, ProdDecl);
305:
306: PRINT_LIST(RHSElt, rhs);
307: PRINT_GENERIC(actionCode);
308: }
309:
310: ProdDecl *ProdDecl::clone() const
311: {
312: ProdDecl *ret = new ProdDecl(
313: cloneASTList(rhs),
314: actionCode.clone()
315: );
316: return ret;
317: }
318:
319:
320:
321:
322: RHSElt::~RHSElt()
323: {
324: }
325:
326: char const * const RHSElt::kindNames[RHSElt::NUM_KINDS] = {
327: "RH_name",
328: "RH_sm_string",
329: "RH_prec",
330: };
331:
332: void RHSElt::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
333: {
334: }
335:
336: DEFN_AST_DOWNCASTS(RHSElt, RH_name, RH_NAME)
337:
338: RH_name::~RH_name()
339: {
340: }
341:
342: void RH_name::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
343: {
344: PRINT_HEADER(subtreeName, RH_name);
345:
346: RHSElt::debugPrint(os, indent, subtreeName);
347:
348: PRINT_GENERIC(tag);
349: PRINT_GENERIC(name);
350: }
351:
352: RH_name *RH_name::clone() const
353: {
354: RH_name *ret = new RH_name(
355: tag.clone(),
356: name.clone()
357: );
358: return ret;
359: }
360:
361: DEFN_AST_DOWNCASTS(RHSElt, RH_sm_string, RH_STRING)
362:
363: RH_sm_string::~RH_sm_string()
364: {
365: }
366:
367: void RH_sm_string::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
368: {
369: PRINT_HEADER(subtreeName, RH_sm_string);
370:
371: RHSElt::debugPrint(os, indent, subtreeName);
372:
373: PRINT_GENERIC(tag);
374: PRINT_GENERIC(str);
375: }
376:
377: RH_sm_string *RH_sm_string::clone() const
378: {
379: RH_sm_string *ret = new RH_sm_string(
380: tag.clone(),
381: str.clone()
382: );
383: return ret;
384: }
385:
386: DEFN_AST_DOWNCASTS(RHSElt, RH_prec, RH_PREC)
387:
388: RH_prec::~RH_prec()
389: {
390: }
391:
392: void RH_prec::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
393: {
394: PRINT_HEADER(subtreeName, RH_prec);
395:
396: RHSElt::debugPrint(os, indent, subtreeName);
397:
398: PRINT_GENERIC(tokName);
399: }
400:
401: RH_prec *RH_prec::clone() const
402: {
403: RH_prec *ret = new RH_prec(
404: tokName.clone()
405: );
406: return ret;
407: }
408:
409:
410:
411:
Start cpp section to elk/elk_gramlex.yy.cpp[1
/1
]
1: #line 14460 "./lpsrc/elk.pak"
2: /* A lexical scanner generated by flex */
3:
4: /* Scanner skeleton version:
5: * $Header$
6: */
7:
8:
9:
10:
11:
12:
13:
14: /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26: using namespace std;
27:
28: /* Use prototypes in function declarations. */
29:
30:
31: /* The "const" storage-class-modifier is valid. */
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66: /* Returned upon end-of-file. */
67:
68:
69: /* Promotes a possibly negative, possibly signed char to an unsigned
70: * integer for use as an array index. If the signed char is negative,
71: * we want to instead treat it as an 8-bit unsigned char, hence the
72: * double cast.
73: */
74:
75:
76: /* Enter a start condition. This macro really ought to take a parameter,
77: * but we do it the disgusting crufty way forced on us by the ()-less
78: * definition of BEGIN.
79: */
80:
81:
82: /* Translate the current start state into a value that can be later handed
83: * to BEGIN to return to the state. The YYSTATE alias is for lex
84: * compatibility.
85: */
86:
87:
88:
89: /* Action number for EOF rule of a given start state. */
90:
91:
92: /* Special action meaning "start processing a new file". */
93:
94:
95:
96:
97: /* Size of default input buffer. */
98:
99:
100: typedef struct yy_buffer_state *YY_BUFFER_STATE;
101:
102: extern int yyleng;
103:
104:
105:
106:
107:
108: /* The funky do-while in the following #define is used to turn the definition
109: * int a single C statement (which needs a semi-colon terminator). This
110: * avoids problems with code like:
111: *
112: * if ( condition_holds )
113: * yyless( 5 );
114: * else
115: * do_something_else();
116: *
117: * Prior to using the do-while the compiler would get upset at the
118: * "else" because it interpreted the "if" statement as being all
119: * done when it reached the ';' after the yyless() call.
120: */
121:
122: /* Return all but the first 'n' matched characters back to the input stream. */
123:
124:
125: do \
126: { \
127: /* Undo effects of setting up yytext. */ \
128: *yy_cp = yy_hold_char; \
129: YY_RESTORE_YY_MORE_OFFSET \
130: yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
131: YY_DO_BEFORE_ACTION; /* set up yytext again */ \
132: } \
133: while ( 0 )
134:
135:
136:
137: /* The following is because we cannot portably get our hands on size_t
138: * (without autoconf's help, which isn't available because we want
139: * flex-generated scanners to compile on their own).
140: */
141: typedef unsigned int yy_size_t;
142:
143:
144: struct yy_buffer_state
145: {
146: std::istream* yy_input_file;
147:
148: char *yy_ch_buf; /* input buffer */
149: char *yy_buf_pos; /* current position in input buffer */
150:
151: /* Size of input buffer in bytes, not including room for EOB
152: * characters.
153: */
154: yy_size_t yy_buf_size;
155:
156: /* Number of characters read into yy_ch_buf, not including EOB
157: * characters.
158: */
159: int yy_n_chars;
160:
161: /* Whether we "own" the buffer - i.e., we know we created it,
162: * and can realloc() it to grow it, and should free() it to
163: * delete it.
164: */
165: int yy_is_our_buffer;
166:
167: /* Whether this is an "interactive" input source; if so, and
168: * if we're using stdio for input, then we want to use getc()
169: * instead of fread(), to make sure we stop fetching input after
170: * each newline.
171: */
172: int yy_is_interactive;
173:
174: /* Whether we're considered to be at the beginning of a line.
175: * If so, '^' rules will be active on the next match, otherwise
176: * not.
177: */
178: int yy_at_bol;
179:
180: /* Whether to try to fill the input buffer when we reach the
181: * end of it.
182: */
183: int yy_fill_buffer;
184:
185: int yy_buffer_status;
186: #define YY_BUFFER_NEW 0
187: #define YY_BUFFER_NORMAL 1
188: /* When an EOF's been seen but there's still some text to process
189: * then we mark the buffer as YY_EOF_PENDING, to indicate that we
190: * shouldn't try reading from the input source any more. We might
191: * still have a bunch of tokens to match, though, because of
192: * possible backing-up.
193: *
194: * When we actually see the EOF, we change the status to "new"
195: * (via yyrestart()), so that the user can continue scanning by
196: * just pointing yyin at a new input file.
197: */
198: #define YY_BUFFER_EOF_PENDING 2
199: };
200:
201:
202: /* We provide macros for accessing buffer states in case in the
203: * future we want to put the buffer states in a more general
204: * "scanner state".
205: */
206:
207:
208:
209:
210: static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
211: static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
212: static void yy_flex_free YY_PROTO(( void * ));
213:
214:
215:
216:
217: { \
218: if ( ! yy_current_buffer ) \
219: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
220: yy_current_buffer->yy_is_interactive = is_interactive; \
221: }
222:
223:
224: { \
225: if ( ! yy_current_buffer ) \
226: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
227: yy_current_buffer->yy_at_bol = at_bol; \
228: }
229:
230:
231:
232:
233:
234:
235: typedef unsigned char YY_CHAR;
236:
237:
238:
239:
240: int yyFlexLexer::yylex()
241: {
242: LexerError( "yyFlexLexer::yylex invoked but %option yyclass used" );
243: return 0;
244: }
245:
246:
247:
248:
249: /* Done after the current pattern has been matched and before the
250: * corresponding action - sets up yytext.
251: */
252:
253: yytext_ptr = yy_bp; \
254: yyleng = (int) (yy_cp - yy_bp); \
255: yy_hold_char = *yy_cp; \
256: *yy_cp = '\0'; \
257: yy_c_buf_p = yy_cp;
258:
259:
260:
261: static yyconst short int yy_accept[159] =
262: { 0,
263: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
264: 0, 0, 0, 0, 0, 0, 41, 39, 2, 1,
265: 39, 24, 10, 11, 39, 39, 37, 9, 21, 36,
266: 17, 36, 36, 36, 36, 36, 36, 36, 36, 36,
267: 36, 20, 8, 5, 6, 5, 33, 33, 33, 34,
268: 35, 27, 28, 29, 19, 23, 2, 0, 38, 18,
269: 3, 0, 37, 36, 36, 36, 36, 36, 36, 36,
270: 36, 36, 36, 36, 36, 36, 4, 0, 0, 0,
271: 0, 34, 27, 0, 7, 36, 36, 25, 36, 36,
272: 36, 36, 36, 36, 36, 36, 36, 0, 36, 36,
273:
274: 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,
275: 36, 36, 36, 36, 36, 36, 36, 36, 36, 22,
276: 36, 0, 32, 36, 15, 36, 36, 36, 14, 36,
277: 36, 36, 36, 36, 36, 31, 36, 16, 36, 36,
278: 36, 36, 36, 36, 26, 36, 36, 36, 12, 36,
279: 36, 13, 36, 36, 36, 36, 30, 0
280: } ;
281:
282: static yyconst int yy_ec[256] =
283: { 0,
284: 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
285: 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
286: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
287: 1, 2, 1, 4, 1, 1, 1, 1, 1, 5,
288: 6, 7, 1, 8, 9, 1, 10, 11, 11, 11,
289: 11, 11, 11, 11, 11, 11, 11, 12, 13, 1,
290: 1, 14, 1, 1, 15, 15, 15, 15, 15, 15,
291: 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
292: 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
293: 16, 17, 18, 1, 19, 1, 20, 21, 22, 23,
294:
295: 24, 25, 15, 15, 26, 15, 27, 28, 29, 30,
296: 31, 32, 15, 33, 34, 35, 36, 37, 15, 38,
297: 15, 15, 39, 1, 40, 1, 1, 1, 1, 1,
298: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
299: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
300: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
301: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
302: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
304: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
305:
306: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
311: 1, 1, 1, 1, 1
312: } ;
313:
314: static yyconst int yy_meta[41] =
315: { 0,
316: 1, 1, 2, 3, 1, 4, 1, 1, 1, 1,
317: 5, 1, 1, 1, 5, 1, 6, 4, 5, 5,
318: 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
319: 5, 5, 5, 5, 5, 5, 5, 5, 1, 4
320: } ;
321:
322: static yyconst short int yy_base[169] =
323: { 0,
324: 0, 0, 38, 39, 42, 46, 224, 223, 46, 47,
325: 186, 185, 30, 41, 50, 53, 223, 228, 220, 228,
326: 217, 228, 228, 228, 206, 49, 208, 228, 228, 0,
327: 228, 187, 179, 180, 31, 184, 182, 180, 176, 43,
328: 187, 228, 228, 228, 228, 200, 228, 66, 68, 0,
329: 228, 0, 228, 228, 228, 228, 207, 204, 228, 228,
330: 228, 204, 195, 0, 175, 172, 173, 170, 179, 170,
331: 164, 174, 176, 163, 168, 161, 228, 71, 73, 77,
332: 0, 0, 0, 190, 228, 157, 167, 0, 162, 161,
333: 153, 161, 164, 151, 155, 159, 161, 177, 156, 157,
334:
335: 159, 141, 152, 144, 150, 149, 146, 141, 150, 76,
336: 131, 133, 130, 143, 132, 134, 140, 127, 131, 0,
337: 125, 82, 228, 124, 0, 134, 133, 127, 0, 131,
338: 120, 133, 126, 132, 117, 0, 119, 0, 120, 118,
339: 124, 124, 120, 107, 0, 103, 109, 99, 0, 98,
340: 56, 0, 56, 59, 49, 33, 0, 228, 92, 98,
341: 104, 110, 116, 52, 121, 127, 133, 139
342: } ;
343:
344: static yyconst short int yy_def[169] =
345: { 0,
346: 158, 1, 159, 159, 160, 160, 161, 161, 162, 162,
347: 1, 1, 1, 1, 1, 1, 158, 158, 158, 158,
348: 163, 158, 158, 158, 158, 158, 158, 158, 158, 164,
349: 158, 164, 164, 164, 164, 164, 164, 164, 164, 164,
350: 164, 158, 158, 158, 158, 158, 158, 158, 158, 165,
351: 158, 166, 158, 158, 158, 158, 158, 163, 158, 158,
352: 158, 167, 158, 164, 164, 164, 164, 164, 164, 164,
353: 164, 164, 164, 164, 164, 164, 158, 158, 158, 158,
354: 168, 165, 166, 167, 158, 164, 164, 164, 164, 164,
355: 164, 164, 164, 164, 164, 164, 164, 168, 164, 164,
356:
357: 164, 164, 164, 164, 164, 164, 164, 164, 164, 158,
358: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
359: 164, 158, 158, 164, 164, 164, 164, 164, 164, 164,
360: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
361: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
362: 164, 164, 164, 164, 164, 164, 164, 0, 158, 158,
363: 158, 158, 158, 158, 158, 158, 158, 158
364: } ;
365:
366: static yyconst short int yy_nxt[269] =
367: { 0,
368: 18, 19, 20, 21, 22, 23, 18, 24, 25, 26,
369: 27, 28, 29, 18, 30, 31, 18, 18, 30, 30,
370: 30, 32, 30, 33, 34, 35, 30, 30, 30, 36,
371: 37, 38, 30, 39, 40, 30, 41, 30, 42, 43,
372: 45, 45, 18, 48, 46, 46, 49, 48, 53, 53,
373: 49, 54, 54, 18, 56, 61, 64, 56, 62, 68,
374: 69, 145, 18, 54, 54, 18, 74, 78, 55, 80,
375: 79, 81, 78, 75, 80, 79, 81, 122, 80, 55,
376: 81, 123, 157, 122, 156, 54, 54, 123, 18, 155,
377: 154, 18, 44, 44, 44, 44, 44, 44, 47, 47,
378:
379: 47, 47, 47, 47, 50, 50, 50, 50, 50, 50,
380: 52, 52, 52, 52, 52, 52, 58, 153, 58, 58,
381: 58, 82, 152, 82, 82, 82, 82, 83, 151, 83,
382: 150, 83, 83, 84, 84, 84, 84, 84, 84, 98,
383: 149, 148, 98, 98, 147, 146, 145, 144, 143, 142,
384: 141, 140, 139, 138, 137, 120, 136, 135, 134, 133,
385: 132, 131, 130, 129, 128, 127, 126, 125, 124, 121,
386: 120, 119, 118, 117, 116, 115, 114, 113, 112, 111,
387: 110, 109, 108, 107, 106, 105, 104, 103, 102, 101,
388: 100, 99, 85, 97, 96, 95, 94, 93, 92, 91,
389:
390: 90, 89, 88, 87, 86, 63, 85, 59, 57, 77,
391: 76, 73, 72, 71, 70, 67, 66, 65, 63, 60,
392: 59, 57, 158, 55, 55, 51, 51, 17, 158, 158,
393: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
394: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
395: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
396: 158, 158, 158, 158, 158, 158, 158, 158
397: } ;
398:
399: static yyconst short int yy_chk[269] =
400: { 0,
401: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
402: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
403: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
404: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
405: 3, 4, 13, 5, 3, 4, 5, 6, 9, 10,
406: 6, 9, 10, 14, 15, 26, 164, 16, 26, 35,
407: 35, 156, 15, 9, 10, 16, 40, 48, 13, 49,
408: 48, 49, 78, 40, 79, 78, 79, 110, 80, 14,
409: 80, 110, 155, 122, 154, 9, 10, 122, 15, 153,
410: 151, 16, 159, 159, 159, 159, 159, 159, 160, 160,
411:
412: 160, 160, 160, 160, 161, 161, 161, 161, 161, 161,
413: 162, 162, 162, 162, 162, 162, 163, 150, 163, 163,
414: 163, 165, 148, 165, 165, 165, 165, 166, 147, 166,
415: 146, 166, 166, 167, 167, 167, 167, 167, 167, 168,
416: 144, 143, 168, 168, 142, 141, 140, 139, 137, 135,
417: 134, 133, 132, 131, 130, 128, 127, 126, 124, 121,
418: 119, 118, 117, 116, 115, 114, 113, 112, 111, 109,
419: 108, 107, 106, 105, 104, 103, 102, 101, 100, 99,
420: 98, 97, 96, 95, 94, 93, 92, 91, 90, 89,
421: 87, 86, 84, 76, 75, 74, 73, 72, 71, 70,
422:
423: 69, 68, 67, 66, 65, 63, 62, 58, 57, 46,
424: 41, 39, 38, 37, 36, 34, 33, 32, 27, 25,
425: 21, 19, 17, 12, 11, 8, 7, 158, 158, 158,
426: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
427: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
428: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
429: 158, 158, 158, 158, 158, 158, 158, 158
430: } ;
431:
432: /* The intent behind this definition is that it'll catch
433: * any uses of REJECT which flex missed.
434: */
435:
436:
437:
438:
439:
440:
441: /* grammar.lex
442: * lexical analyzer for my grammar input format
443: *
444: * The variety of syntaxes for embedded literal code cause this lexer
445: * to have some of the context sensitivity usually associated with a
446: * parser. This context doesn't nest arbitrarily deeply, so the
447: * language recognized is still regular, but clearly there's some
448: * design tension.
449: */
450: /* ----------------- C definitions -------------------- */
451:
452:
453:
454:
455:
456:
457:
458:
459:
460:
461:
462:
463:
464:
465:
466: fileState.loc = sourceLocManager->advCol(fileState.loc, yyleng) /* user ; */
467:
468:
469: /* -------------------- flex options ------------------ */
470: /* no wrapping is needed; setting this means we don't have to link with libfl.a */
471: /* don't use the default-echo rules */
472: /* generate a c++ lexer */
473: /* and I will define the class */
474: /* ------------------- definitions -------------------- */
475: /* any character, including newline */
476: /* any character except newline */
477: /* starting character in a name */
478: /* starting character in a numeric literal */
479: /* double-quote */
480: /* character that can appear in a quoted string */
481: /* (I currently don't have any backslash codes, but I want to
482: * leave open that possibility, so for now backslashes are illegal) */
483: /* horizontal whitespace */
484: /* --------------- start conditions ------------------- */
485: /* eating a comment delimited by slash-star and star-slash; note
486: * that we remember our current state when entering C_COMMENT,
487: * and restore it on exit */
488:
489:
490: /* looking for the file name in an "include" directive */
491:
492:
493: /* recovering from an error by skipping to the next newline */
494:
495:
496: /* gathering literal embedded code; the delimiter is specified
497: * in the 'embedFinish' variable */
498:
499:
500: /* tokenizing the right-hand side of a production; this one is not
501: * exclusive because tokenization is virtually the same in RHS
502: * mode as in INITIAL mode */
503:
504:
505: /* tokenizing parameter list of a function, leading into the
506: * embedded code that is its body */
507:
508:
509: /* looking for the start of a type that follows "token" or "nonterm",
510: * or the TOK_NAME meaning the type has been omitted */
511:
512:
513: /* ---------------------- rules ----------------------- */
514:
515:
516: /* Macros after this point can all be overridden by user definitions in
517: * section 1.
518: */
519:
520:
521:
522: extern "C" int yywrap YY_PROTO(( void ));
523:
524: extern int yywrap YY_PROTO(( void ));
525:
526:
527:
528:
529:
530: static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
531:
532:
533:
534: static int yy_flex_strlen YY_PROTO(( yyconst char * ));
535:
536:
537:
538:
539:
540:
541: static int yy_start_stack_ptr = 0;
542: static int yy_start_stack_depth = 0;
543: static int *yy_start_stack = 0;
544:
545: static void yy_push_state YY_PROTO(( int new_state ));
546:
547:
548: static void yy_pop_state YY_PROTO(( void ));
549:
550:
551: static int yy_top_state YY_PROTO(( void ));
552:
553:
554:
555:
556:
557:
558:
559:
560:
561: YY_MALLOC_DECL
562:
563:
564:
565:
566:
567:
568: /* Just try to get by without declaring the routines. This will fail
569: * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
570: * or sizeof(void*) != sizeof(int).
571: */
572:
573:
574:
575: /* Amount of stuff to slurp up with each read. */
576:
577:
578:
579:
580: /* Copy whatever the last rule matched to the standard output. */
581:
582:
583:
584:
585:
586: /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
587: * is returned in "result".
588: */
589:
590:
591: if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \
592: YY_FATAL_ERROR( "input in flex scanner failed" );
593:
594:
595: /* No semi-colon after return; correct usage is to write "yyterminate();" -
596: * we don't want an extra ';' after the "return" because that will cause
597: * some compilers to complain about unreachable statements.
598: */
599:
600:
601:
602:
603: /* Number of entries by which start-condition stack grows. */
604:
605:
606:
607:
608: /* Report a fatal error. */
609:
610:
611:
612:
613: /* Default declaration of generated scanner - a define so the user can
614: * easily add parameters.
615: */
616:
617:
618:
619:
620: /* Code executed at the beginning of each rule, after yytext and yyleng
621: * have been set up.
622: */
623:
624:
625:
626:
627: /* Code executed at the end of each rule. */
628:
629:
630:
631:
632:
633: YY_USER_ACTION
634:
635: YY_DECL
636: {
637: register yy_state_type yy_current_state;
638: register char *yy_cp = NULL, *yy_bp = NULL;
639: register int yy_act;
640:
641: #line 102 "gramlex.lex"
642:
643:
644: /* -------- whitespace ------ */
645: #line 645 "lex.yy.cc"
646:
647: if ( yy_init )
648: {
649: yy_init = 0;
650:
651: #ifdef YY_USER_INIT
652: YY_USER_INIT;
653: #endif
654:
655: if ( ! yy_start )
656: yy_start = 1; /* first start state */
657:
658: if ( ! yyin )
659: yyin = &cin;
660:
661: if ( ! yyout )
662: yyout = &std::cout;
663:
664: if ( ! yy_current_buffer )
665: yy_current_buffer =
666: yy_create_buffer( yyin, YY_BUF_SIZE );
667:
668: yy_load_buffer_state();
669: }
670:
671: while ( 1 ) /* loops until end-of-file is reached */
672: {
673: yy_cp = yy_c_buf_p;
674:
675: /* Support of yytext. */
676: *yy_cp = yy_hold_char;
677:
678: /* yy_bp points to the position in yy_ch_buf of the start of
679: * the current run.
680: */
681: yy_bp = yy_cp;
682:
683: yy_current_state = yy_start;
684: yy_match:
685: do
686: {
687: register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
688: if ( yy_accept[yy_current_state] )
689: {
690: yy_last_accepting_state = yy_current_state;
691: yy_last_accepting_cpos = yy_cp;
692: }
693: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
694: {
695: yy_current_state = (int) yy_def[yy_current_state];
696: if ( yy_current_state >= 159 )
697: yy_c = yy_meta[(unsigned int) yy_c];
698: }
699: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
700: ++yy_cp;
701: }
702: while ( yy_base[yy_current_state] != 228 );
703:
704: yy_find_action:
705: yy_act = yy_accept[yy_current_state];
706: if ( yy_act == 0 )
707: { /* have to back up */
708: yy_cp = yy_last_accepting_cpos;
709: yy_current_state = yy_last_accepting_state;
710: yy_act = yy_accept[yy_current_state];
711: }
712:
713: YY_DO_BEFORE_ACTION;
714:
715:
716: do_action: /* This label is used only to access EOF actions. */
717:
718:
719: switch ( yy_act )
720: { /* beginning of action switch */
721: case 0: /* must back up */
722: /* undo the effects of YY_DO_BEFORE_ACTION */
723: *yy_cp = yy_hold_char;
724: yy_cp = yy_last_accepting_cpos;
725: yy_current_state = yy_last_accepting_state;
726: goto yy_find_action;
727:
728: case 1:
729: YY_RULE_SETUP
730: #line 105 "gramlex.lex"
731: {
732: newLine();
733: }
734: YY_BREAK
735: case 2:
736: YY_RULE_SETUP
737: #line 109 "gramlex.lex"
738: {
739: UPD_COL;
740: }
741: YY_BREAK
742: /* -------- comments -------- */
743: case 3:
744: YY_RULE_SETUP
745: #line 114 "gramlex.lex"
746: {
747: /* C-style comments */
748: TOKEN_START;
749: UPD_COL;
750: prevState = YY_START;
751: BEGIN(C_COMMENT);
752: }
753: YY_BREAK
754:
755: case 4:
756: YY_RULE_SETUP
757: #line 123 "gramlex.lex"
758: {
759: /* end of comment */
760: UPD_COL;
761: BEGIN(prevState);
762: }
763: YY_BREAK
764: case 5:
765: YY_RULE_SETUP
766: #line 129 "gramlex.lex"
767: {
768: /* anything but slash-star or newline -- eat it */
769: UPD_COL;
770: }
771: YY_BREAK
772: case 6:
773: YY_RULE_SETUP
774: #line 134 "gramlex.lex"
775: {
776: newLine();
777: }
778: YY_BREAK
779: case YY_STATE_EOF(C_COMMENT):
780: #line 138 "gramlex.lex"
781: {
782: UPD_COL;
783: errorUnterminatedComment();
784: return TOK_EOF;
785: }
786: YY_BREAK
787:
788: case 7:
789: YY_RULE_SETUP
790: #line 146 "gramlex.lex"
791: {
792: /* C++-style comment -- eat it */
793: TOKEN_START;
794: advCol(yyleng-1);
795: newLine();
796: }
797: YY_BREAK
798: /* -------- punctuators, operators, keywords --------- */
799: case 8:
800: YY_RULE_SETUP
801: #line 155 "gramlex.lex"
802: TOK_UPD_COL; return TOK_RBRACE;
803: YY_BREAK
804: case 9:
805: YY_RULE_SETUP
806: #line 156 "gramlex.lex"
807: TOK_UPD_COL; return TOK_COLON;
808: YY_BREAK
809: case 10:
810: YY_RULE_SETUP
811: #line 157 "gramlex.lex"
812: TOK_UPD_COL; return TOK_RPAREN;
813: YY_BREAK
814: case 11:
815: YY_RULE_SETUP
816: #line 158 "gramlex.lex"
817: TOK_UPD_COL; return TOK_COMMA;
818: YY_BREAK
819: case 12:
820: YY_RULE_SETUP
821: #line 160 "gramlex.lex"
822: TOK_UPD_COL; return TOK_TERMINALS;
823: YY_BREAK
824: case 13:
825: YY_RULE_SETUP
826: #line 161 "gramlex.lex"
827: TOK_UPD_COL; return TOK_PRECEDENCE;
828: YY_BREAK
829: case 14:
830: YY_RULE_SETUP
831: #line 162 "gramlex.lex"
832: TOK_UPD_COL; return TOK_OPTION;
833: YY_BREAK
834: case 15:
835: YY_RULE_SETUP
836: #line 163 "gramlex.lex"
837: TOK_UPD_COL; return TOK_EXPECT;
838: YY_BREAK
839: case 16:
840: YY_RULE_SETUP
841: #line 164 "gramlex.lex"
842: TOK_UPD_COL; return TOK_SUBSETS;
843: YY_BREAK
844: /* ----------- sequences that begin literal code ------------ */
845: /* for the time being, a "[" will always start an embedded sequence;
846: * eventually, I'll remove this in favor of the brace- and paren-
847: * delimited embedded sequences */
848: case 17:
849: YY_RULE_SETUP
850: #line 171 "gramlex.lex"
851: {
852: TOK_UPD_COL;
853: BEGIN(LITCODE);
854: beginEmbed(']', TOK_LIT_CODE);
855: }
856: YY_BREAK
857: /* the "->" operator moves us into RHS mode, which is special because
858: * in this mode any "{" is interpreted as the beginning of an embedded
859: * section of literal code */
860: case 18:
861: YY_RULE_SETUP
862: #line 180 "gramlex.lex"
863: {
864: TOK_UPD_COL;
865: BEGIN(RHS);
866: return TOK_ARROW;
867: }
868: YY_BREAK
869: /* "{" in a RHS begins embedded */
870: case 19:
871: YY_RULE_SETUP
872: #line 187 "gramlex.lex"
873: {
874: TOK_UPD_COL;
875: BEGIN(LITCODE);
876: beginEmbed('}', TOK_LIT_CODE);
877: }
878: YY_BREAK
879: /* otherwise it's just a "{" */
880: case 20:
881: YY_RULE_SETUP
882: #line 194 "gramlex.lex"
883: {
884: TOK_UPD_COL;
885: return TOK_LBRACE;
886: }
887: YY_BREAK
888: /* since right-hand-sides can end with either embedded code or a simple
889: * ";", the semicolon gets out of RHS mode */
890: case 21:
891: YY_RULE_SETUP
892: #line 201 "gramlex.lex"
893: {
894: TOK_UPD_COL;
895: BEGIN(INITIAL);
896: return TOK_SEMICOLON;
897: }
898: YY_BREAK
899: /* "token" and "nonterm" are always followed by an optional type,
900: * and then a TOK_NAME. So, until we see a TOK_NAME, "(" will mean
901: * the start of an embedded sequence. */
902: case 22:
903: YY_RULE_SETUP
904: #line 210 "gramlex.lex"
905: {
906: TOK_UPD_COL;
907: BEGIN(OPTIONAL_TYPE);
908: return yytext[0]=='t'? TOK_TOKEN : TOK_NONTERM;
909: }
910: YY_BREAK
911: /* so now this begins embedded */
912: case 23:
913: YY_RULE_SETUP
914: #line 217 "gramlex.lex"
915: {
916: TOK_UPD_COL;
917: BEGIN(LITCODE);
918: beginEmbed(')', TOK_LIT_CODE);
919: }
920: YY_BREAK
921: /* otherwise it's just itself */
922: case 24:
923: YY_RULE_SETUP
924: #line 224 "gramlex.lex"
925: {
926: TOK_UPD_COL;
927: return TOK_LPAREN;
928: }
929: YY_BREAK
930: /* function beginning */
931: case 25:
932: YY_RULE_SETUP
933: #line 230 "gramlex.lex"
934: {
935: TOK_UPD_COL;
936: BEGIN(FUN);
937: return TOK_FUN;
938: }
939: YY_BREAK
940: /* verbatim beginning */
941: case 26:
942: YY_RULE_SETUP
943: #line 237 "gramlex.lex"
944: {
945: TOK_UPD_COL;
946: BEGIN(FUN);
947: return yytext[0]=='v'? TOK_VERBATIM : TOK_IMPL_VERBATIM;
948: }
949: YY_BREAK
950: /* --------- embedded literal code --------- */
951: /* no TOKEN_START here; we'll use the tokenStartLoc that
952: * was computed in the opening punctuation */
953:
954: case 27:
955: YY_RULE_SETUP
956: #line 248 "gramlex.lex"
957: {
958: UPD_COL;
959: embedded->handle(yytext, yyleng, embedFinish);
960: }
961: YY_BREAK
962: case 28:
963: YY_RULE_SETUP
964: #line 253 "gramlex.lex"
965: {
966: newLine();
967: embedded->handle(yytext, yyleng, embedFinish);
968: }
969: YY_BREAK
970: case 29:
971: YY_RULE_SETUP
972: #line 258 "gramlex.lex"
973: {
974: UPD_COL;
975: if (embedded->zeroNesting()) {
976:
977: BEGIN(INITIAL);
978:
979:
980: if (embedFinish != yytext[0]) {
981: err("unbalanced literal code delimiter");
982: }
983:
984:
985: embedded->exprOnly = false;
986:
987:
988: embedded->isDeclaration = false;
989:
990:
991: return embedMode;
992: }
993: else {
994:
995: embedded->handle(yytext, yyleng, embedFinish);
996: }
997: }
998: YY_BREAK
999: case YY_STATE_EOF(LITCODE):
1000: #line 284 "gramlex.lex"
1001: {
1002: err(sm_stringc << "hit end of file while looking for final `"
1003: << embedFinish << "'");
1004: yyterminate();
1005: }
1006: YY_BREAK
1007:
1008: /* embedded *type* description */
1009: case 30:
1010: YY_RULE_SETUP
1011: #line 293 "gramlex.lex"
1012: {
1013: /* caller will get text from yytext and yyleng */
1014: TOK_UPD_COL;
1015:
1016: /* drop into literal-code processing */
1017: BEGIN(LITCODE);
1018:
1019: /* I reset the initial nesting to -1 so that the '{' at the
1020: * beginning of the class body sets nesting to 0, thus when
1021: * I see the final '}' I'll see that at level 0 and stop */
1022: beginEmbed('}', TOK_LIT_CODE, -1);
1023:
1024: return TOK_CONTEXT_CLASS;
1025: }
1026: YY_BREAK
1027: /* ---------- includes ----------- */
1028: case 31:
1029: YY_RULE_SETUP
1030: #line 310 "gramlex.lex"
1031: {
1032: TOK_UPD_COL; /* hence no TOKEN_START in INCLUDE area */
1033: BEGIN(INCLUDE);
1034: }
1035: YY_BREAK
1036:
1037: case 32:
1038: YY_RULE_SETUP
1039: #line 316 "gramlex.lex"
1040: {
1041: /* e.g.: ("filename") */
1042: /* file name to include */
1043: UPD_COL;
1044:
1045: /* find quotes */
1046: char *leftq = strchr(yytext, '"');
1047: char *rightq = strchr(leftq+1, '"');
1048: xassert(leftq && rightq);
1049:
1050: /* extract filename string */
1051: includeFileName = addString(leftq+1, rightq-leftq-1);
1052:
1053: /* go back to normal processing */
1054: BEGIN(INITIAL);
1055: return TOK_INCLUDE;
1056: }
1057: YY_BREAK
1058: case 33:
1059: YY_RULE_SETUP
1060: #line 334 "gramlex.lex"
1061: {
1062: /* anything else: malformed */
1063: UPD_COL;
1064: errorMalformedInclude();
1065:
1066: /* rudimentary error recovery.. */
1067: BEGIN(EAT_TO_NEWLINE);
1068: }
1069: YY_BREAK
1070:
1071:
1072: case 34:
1073: YY_RULE_SETUP
1074: #line 345 "gramlex.lex"
1075: {
1076: UPD_COL;
1077: /* not newline, eat it */
1078: }
1079: YY_BREAK
1080: case 35:
1081: YY_RULE_SETUP
1082: #line 350 "gramlex.lex"
1083: {
1084: /* get out of here */
1085: newLine();
1086: BEGIN(INITIAL);
1087: }
1088: YY_BREAK
1089:
1090: /* -------- name literal --------- */
1091: case 36:
1092: YY_RULE_SETUP
1093: #line 358 "gramlex.lex"
1094: {
1095: /* get text from yytext and yyleng */
1096: TOK_UPD_COL;
1097: if (YY_START == OPTIONAL_TYPE) {
1098: BEGIN(INITIAL);
1099: }
1100: return TOK_NAME;
1101: }
1102: YY_BREAK
1103: /* -------- numeric literal ------ */
1104: case 37:
1105: YY_RULE_SETUP
1106: #line 368 "gramlex.lex"
1107: {
1108: TOK_UPD_COL;
1109: integerLiteral = strtoul(yytext, NULL, 10 /*radix*/);
1110: return TOK_INTEGER;
1111: }
1112: YY_BREAK
1113: /* ----------- string literal ----- */
1114: case 38:
1115: YY_RULE_SETUP
1116: #line 375 "gramlex.lex"
1117: {
1118: TOK_UPD_COL;
1119: sm_stringLiteral = addString(yytext+1, yyleng-2);
1120: return TOK_STRING;
1121: }
1122: YY_BREAK
1123: /* --------- illegal ------------- */
1124: case 39:
1125: YY_RULE_SETUP
1126: #line 382 "gramlex.lex"
1127: {
1128: TOK_UPD_COL;
1129: errorIllegalCharacter(yytext[0]);
1130: }
1131: YY_BREAK
1132: case 40:
1133: YY_RULE_SETUP
1134: #line 388 "gramlex.lex"
1135: YY_FATAL_ERROR( "flex scanner jammed" );
1136: YY_BREAK
1137: #line 1137 "lex.yy.cc"
1138: case YY_STATE_EOF(INITIAL):
1139: case YY_STATE_EOF(INCLUDE):
1140: case YY_STATE_EOF(EAT_TO_NEWLINE):
1141: case YY_STATE_EOF(RHS):
1142: case YY_STATE_EOF(FUN):
1143: case YY_STATE_EOF(OPTIONAL_TYPE):
1144: yyterminate();
1145:
1146: case YY_END_OF_BUFFER:
1147: {
1148: /* Amount of text matched not including the EOB char. */
1149: int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
1150:
1151: /* Undo the effects of YY_DO_BEFORE_ACTION. */
1152: *yy_cp = yy_hold_char;
1153: YY_RESTORE_YY_MORE_OFFSET
1154:
1155: if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
1156: {
1157: /* We're scanning a new file or input source. It's
1158: * possible that this happened because the user
1159: * just pointed yyin at a new source and called
1160: * yylex(). If so, then we have to assure
1161: * consistency between yy_current_buffer and our
1162: * globals. Here is the right place to do so, because
1163: * this is the first action (other than possibly a
1164: * back-up) that will match for the new input source.
1165: */
1166: yy_n_chars = yy_current_buffer->yy_n_chars;
1167: yy_current_buffer->yy_input_file = yyin;
1168: yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
1169: }
1170:
1171: /* Note that here we test for yy_c_buf_p "<=" to the position
1172: * of the first EOB in the buffer, since yy_c_buf_p will
1173: * already have been incremented past the NUL character
1174: * (since all states make transitions on EOB to the
1175: * end-of-buffer state). Contrast this with the test
1176: * in input().
1177: */
1178: if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
1179: { /* This was really a NUL. */
1180: yy_state_type yy_next_state;
1181:
1182: yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
1183:
1184: yy_current_state = yy_get_previous_state();
1185:
1186: /* Okay, we're now positioned to make the NUL
1187: * transition. We couldn't have
1188: * yy_get_previous_state() go ahead and do it
1189: * for us because it doesn't know how to deal
1190: * with the possibility of jamming (and we don't
1191: * want to build jamming into it because then it
1192: * will run more slowly).
1193: */
1194:
1195: yy_next_state = yy_try_NUL_trans( yy_current_state );
1196:
1197: yy_bp = yytext_ptr + YY_MORE_ADJ;
1198:
1199: if ( yy_next_state )
1200: {
1201: /* Consume the NUL. */
1202: yy_cp = ++yy_c_buf_p;
1203: yy_current_state = yy_next_state;
1204: goto yy_match;
1205: }
1206:
1207: else
1208: {
1209: yy_cp = yy_c_buf_p;
1210: goto yy_find_action;
1211: }
1212: }
1213:
1214: else switch ( yy_get_next_buffer() )
1215: {
1216: case EOB_ACT_END_OF_FILE:
1217: {
1218: yy_did_buffer_switch_on_eof = 0;
1219:
1220: if ( yywrap() )
1221: {
1222: /* Note: because we've taken care in
1223: * yy_get_next_buffer() to have set up
1224: * yytext, we can now set up
1225: * yy_c_buf_p so that if some total
1226: * hoser (like flex itself) wants to
1227: * call the scanner after we return the
1228: * YY_NULL, it'll still work - another
1229: * YY_NULL will get returned.
1230: */
1231: yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
1232:
1233: yy_act = YY_STATE_EOF(YY_START);
1234: goto do_action;
1235: }
1236:
1237: else
1238: {
1239: if ( ! yy_did_buffer_switch_on_eof )
1240: YY_NEW_FILE;
1241: }
1242: break;
1243: }
1244:
1245: case EOB_ACT_CONTINUE_SCAN:
1246: yy_c_buf_p =
1247: yytext_ptr + yy_amount_of_matched_text;
1248:
1249: yy_current_state = yy_get_previous_state();
1250:
1251: yy_cp = yy_c_buf_p;
1252: yy_bp = yytext_ptr + YY_MORE_ADJ;
1253: goto yy_match;
1254:
1255: case EOB_ACT_LAST_MATCH:
1256: yy_c_buf_p =
1257: &yy_current_buffer->yy_ch_buf[yy_n_chars];
1258:
1259: yy_current_state = yy_get_previous_state();
1260:
1261: yy_cp = yy_c_buf_p;
1262: yy_bp = yytext_ptr + YY_MORE_ADJ;
1263: goto yy_find_action;
1264: }
1265: break;
1266: }
1267:
1268: default:
1269: YY_FATAL_ERROR(
1270: "fatal flex scanner internal error--no action found" );
1271: } /* end of action switch */
1272: } /* end of scanning one token */
1273: } /* end of yylex */
1274:
1275: yyFlexLexer::yyFlexLexer( std::istream* arg_yyin, std::ostream* arg_yyout )
1276: {
1277: yyin = arg_yyin;
1278: yyout = arg_yyout;
1279: yy_c_buf_p = 0;
1280: yy_init = 1;
1281: yy_start = 0;
1282: yy_flex_debug = 0;
1283: yylineno = 1;
1284:
1285: yy_did_buffer_switch_on_eof = 0;
1286:
1287: yy_looking_for_trail_begin = 0;
1288: yy_more_flag = 0;
1289: yy_more_len = 0;
1290: yy_more_offset = yy_prev_more_offset = 0;
1291:
1292: yy_start_stack_ptr = yy_start_stack_depth = 0;
1293: yy_start_stack = 0;
1294:
1295: yy_current_buffer = 0;
1296:
1297: #ifdef YY_USES_REJECT
1298: yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2];
1299: #else
1300: yy_state_buf = 0;
1301: #endif
1302: }
1303:
1304: yyFlexLexer::~yyFlexLexer()
1305: {
1306: delete yy_state_buf;
1307: yy_delete_buffer( yy_current_buffer );
1308: }
1309:
1310: void yyFlexLexer::switch_streams( std::istream* new_in, std::ostream* new_out )
1311: {
1312: if ( new_in )
1313: {
1314: yy_delete_buffer( yy_current_buffer );
1315: yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) );
1316: }
1317:
1318: if ( new_out )
1319: yyout = new_out;
1320: }
1321:
1322:
1323: int yyFlexLexer::LexerInput( char* buf, int /* max_size */ )
1324:
1325: int yyFlexLexer::LexerInput( char* buf, int max_size )
1326:
1327: {
1328: if ( yyin->eof() || yyin->fail() )
1329: return 0;
1330:
1331: #ifdef YY_INTERACTIVE
1332: yyin->get( buf[0] );
1333:
1334: if ( yyin->eof() )
1335: return 0;
1336:
1337: if ( yyin->bad() )
1338: return -1;
1339:
1340: return 1;
1341:
1342: #else
1343: (void) yyin->read( buf, max_size );
1344:
1345: if ( yyin->bad() )
1346: return -1;
1347: else
1348: return yyin->gcount();
1349: #endif
1350: }
1351:
1352: void yyFlexLexer::LexerOutput( const char* buf, int size )
1353: {
1354: (void) yyout->write( buf, size );
1355: }
1356:
1357: /* yy_get_next_buffer - try to read in a new buffer
1358: *
1359: * Returns a code representing an action:
1360: * EOB_ACT_LAST_MATCH -
1361: * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
1362: * EOB_ACT_END_OF_FILE - end of file
1363: */
1364:
1365: int yyFlexLexer::yy_get_next_buffer()
1366: {
1367: register char *dest = yy_current_buffer->yy_ch_buf;
1368: register char *source = yytext_ptr;
1369: register int number_to_move, i;
1370: int ret_val;
1371:
1372: if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
1373: YY_FATAL_ERROR(
1374: "fatal flex scanner internal error--end of buffer missed" );
1375:
1376: if ( yy_current_buffer->yy_fill_buffer == 0 )
1377: { /* Don't try to fill the buffer, so this is an EOF. */
1378: if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
1379: {
1380: /* We matched a single character, the EOB, so
1381: * treat this as a final EOF.
1382: */
1383: return EOB_ACT_END_OF_FILE;
1384: }
1385:
1386: else
1387: {
1388: /* We matched some text prior to the EOB, first
1389: * process it.
1390: */
1391: return EOB_ACT_LAST_MATCH;
1392: }
1393: }
1394:
1395: /* Try to read more data. */
1396:
1397: /* First move last chars to start of buffer. */
1398: number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
1399:
1400: for ( i = 0; i < number_to_move; ++i )
1401: *(dest++) = *(source++);
1402:
1403: if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
1404: /* don't do the read, it's not guaranteed to return an EOF,
1405: * just force an EOF
1406: */
1407: yy_current_buffer->yy_n_chars = yy_n_chars = 0;
1408:
1409: else
1410: {
1411: int num_to_read =
1412: yy_current_buffer->yy_buf_size - number_to_move - 1;
1413:
1414: while ( num_to_read <= 0 )
1415: { /* Not enough room in the buffer - grow it. */
1416: #ifdef YY_USES_REJECT
1417: YY_FATAL_ERROR(
1418: "input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
1419: #else
1420:
1421: /* just a shorter name for the current buffer */
1422: YY_BUFFER_STATE b = yy_current_buffer;
1423:
1424: int yy_c_buf_p_offset =
1425: (int) (yy_c_buf_p - b->yy_ch_buf);
1426:
1427: if ( b->yy_is_our_buffer )
1428: {
1429: int new_size = b->yy_buf_size * 2;
1430:
1431: if ( new_size <= 0 )
1432: b->yy_buf_size += b->yy_buf_size / 8;
1433: else
1434: b->yy_buf_size *= 2;
1435:
1436: b->yy_ch_buf = (char *)
1437: /* Include room in for 2 EOB chars. */
1438: yy_flex_realloc( (void *) b->yy_ch_buf,
1439: b->yy_buf_size + 2 );
1440: }
1441: else
1442: /* Can't grow it, we don't own it. */
1443: b->yy_ch_buf = 0;
1444:
1445: if ( ! b->yy_ch_buf )
1446: YY_FATAL_ERROR(
1447: "fatal error - scanner input buffer overflow" );
1448:
1449: yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
1450:
1451: num_to_read = yy_current_buffer->yy_buf_size -
1452: number_to_move - 1;
1453: #endif
1454: }
1455:
1456: if ( num_to_read > YY_READ_BUF_SIZE )
1457: num_to_read = YY_READ_BUF_SIZE;
1458:
1459: /* Read in more data. */
1460: YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
1461: yy_n_chars, num_to_read );
1462:
1463: yy_current_buffer->yy_n_chars = yy_n_chars;
1464: }
1465:
1466: if ( yy_n_chars == 0 )
1467: {
1468: if ( number_to_move == YY_MORE_ADJ )
1469: {
1470: ret_val = EOB_ACT_END_OF_FILE;
1471: yyrestart( yyin );
1472: }
1473:
1474: else
1475: {
1476: ret_val = EOB_ACT_LAST_MATCH;
1477: yy_current_buffer->yy_buffer_status =
1478: YY_BUFFER_EOF_PENDING;
1479: }
1480: }
1481:
1482: else
1483: ret_val = EOB_ACT_CONTINUE_SCAN;
1484:
1485: yy_n_chars += number_to_move;
1486: yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
1487: yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
1488:
1489: yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
1490:
1491: return ret_val;
1492: }
1493:
1494:
1495: /* yy_get_previous_state - get the state just before the EOB char was reached */
1496:
1497: yy_state_type yyFlexLexer::yy_get_previous_state()
1498: {
1499: register yy_state_type yy_current_state;
1500: register char *yy_cp;
1501:
1502: yy_current_state = yy_start;
1503:
1504: for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
1505: {
1506: register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
1507: if ( yy_accept[yy_current_state] )
1508: {
1509: yy_last_accepting_state = yy_current_state;
1510: yy_last_accepting_cpos = yy_cp;
1511: }
1512: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
1513: {
1514: yy_current_state = (int) yy_def[yy_current_state];
1515: if ( yy_current_state >= 159 )
1516: yy_c = yy_meta[(unsigned int) yy_c];
1517: }
1518: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
1519: }
1520:
1521: return yy_current_state;
1522: }
1523:
1524:
1525: /* yy_try_NUL_trans - try to make a transition on the NUL character
1526: *
1527: * synopsis
1528: * next_state = yy_try_NUL_trans( current_state );
1529: */
1530:
1531: yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state )
1532: {
1533: register int yy_is_jam;
1534: register char *yy_cp = yy_c_buf_p;
1535:
1536: register YY_CHAR yy_c = 1;
1537: if ( yy_accept[yy_current_state] )
1538: {
1539: yy_last_accepting_state = yy_current_state;
1540: yy_last_accepting_cpos = yy_cp;
1541: }
1542: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
1543: {
1544: yy_current_state = (int) yy_def[yy_current_state];
1545: if ( yy_current_state >= 159 )
1546: yy_c = yy_meta[(unsigned int) yy_c];
1547: }
1548: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
1549: yy_is_jam = (yy_current_state == 158);
1550:
1551: return yy_is_jam ? 0 : yy_current_state;
1552: }
1553:
1554:
1555: void yyFlexLexer::yyunput( int c, register char* yy_bp )
1556: {
1557: register char *yy_cp = yy_c_buf_p;
1558:
1559: /* undo effects of setting up yytext */
1560: *yy_cp = yy_hold_char;
1561:
1562: if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
1563: { /* need to shift things up to make room */
1564: /* +2 for EOB chars. */
1565: register int number_to_move = yy_n_chars + 2;
1566: register char *dest = &yy_current_buffer->yy_ch_buf[
1567: yy_current_buffer->yy_buf_size + 2];
1568: register char *source =
1569: &yy_current_buffer->yy_ch_buf[number_to_move];
1570:
1571: while ( source > yy_current_buffer->yy_ch_buf )
1572: *--dest = *--source;
1573:
1574: yy_cp += (int) (dest - source);
1575: yy_bp += (int) (dest - source);
1576: yy_current_buffer->yy_n_chars =
1577: yy_n_chars = yy_current_buffer->yy_buf_size;
1578:
1579: if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
1580: YY_FATAL_ERROR( "flex scanner push-back overflow" );
1581: }
1582:
1583: *--yy_cp = (char) c;
1584:
1585:
1586: yytext_ptr = yy_bp;
1587: yy_hold_char = *yy_cp;
1588: yy_c_buf_p = yy_cp;
1589: }
1590:
1591:
1592: int yyFlexLexer::yyinput()
1593: {
1594: int c;
1595:
1596: *yy_c_buf_p = yy_hold_char;
1597:
1598: if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
1599: {
1600: /* yy_c_buf_p now points to the character we want to return.
1601: * If this occurs *before* the EOB characters, then it's a
1602: * valid NUL; if not, then we've hit the end of the buffer.
1603: */
1604: if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
1605: /* This was really a NUL. */
1606: *yy_c_buf_p = '\0';
1607:
1608: else
1609: { /* need more input */
1610: int offset = yy_c_buf_p - yytext_ptr;
1611: ++yy_c_buf_p;
1612:
1613: switch ( yy_get_next_buffer() )
1614: {
1615: case EOB_ACT_LAST_MATCH:
1616: /* This happens because yy_g_n_b()
1617: * sees that we've accumulated a
1618: * token and flags that we need to
1619: * try matching the token before
1620: * proceeding. But for input(),
1621: * there's no matching to consider.
1622: * So convert the EOB_ACT_LAST_MATCH
1623: * to EOB_ACT_END_OF_FILE.
1624: */
1625:
1626: /* Reset buffer status. */
1627: yyrestart( yyin );
1628:
1629: /* fall through */
1630:
1631: case EOB_ACT_END_OF_FILE:
1632: {
1633: if ( yywrap() )
1634: return EOF;
1635:
1636: if ( ! yy_did_buffer_switch_on_eof )
1637: YY_NEW_FILE;
1638: #ifdef __cplusplus
1639: return yyinput();
1640: #else
1641: return input();
1642: #endif
1643: }
1644:
1645: case EOB_ACT_CONTINUE_SCAN:
1646: yy_c_buf_p = yytext_ptr + offset;
1647: break;
1648: }
1649: }
1650: }
1651:
1652: c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */
1653: *yy_c_buf_p = '\0'; /* preserve yytext */
1654: yy_hold_char = *++yy_c_buf_p;
1655:
1656:
1657: return c;
1658: }
1659:
1660: void yyFlexLexer::yyrestart( std::istream* input_file )
1661: {
1662: if ( ! yy_current_buffer )
1663: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
1664:
1665: yy_init_buffer( yy_current_buffer, input_file );
1666: yy_load_buffer_state();
1667: }
1668:
1669:
1670: void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
1671: {
1672: if ( yy_current_buffer == new_buffer )
1673: return;
1674:
1675: if ( yy_current_buffer )
1676: {
1677: /* Flush out information for old buffer. */
1678: *yy_c_buf_p = yy_hold_char;
1679: yy_current_buffer->yy_buf_pos = yy_c_buf_p;
1680: yy_current_buffer->yy_n_chars = yy_n_chars;
1681: }
1682:
1683: yy_current_buffer = new_buffer;
1684: yy_load_buffer_state();
1685:
1686: /* We don't actually know whether we did this switch during
1687: * EOF (yywrap()) processing, but the only time this flag
1688: * is looked at is after yywrap() is called, so it's safe
1689: * to go ahead and always set it.
1690: */
1691: yy_did_buffer_switch_on_eof = 1;
1692: }
1693:
1694:
1695: void yyFlexLexer::yy_load_buffer_state()
1696: {
1697: yy_n_chars = yy_current_buffer->yy_n_chars;
1698: yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
1699: yyin = yy_current_buffer->yy_input_file;
1700: yy_hold_char = *yy_c_buf_p;
1701: }
1702:
1703:
1704: YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream* file, int size )
1705: {
1706: YY_BUFFER_STATE b;
1707:
1708: b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
1709: if ( ! b )
1710: YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
1711:
1712: b->yy_buf_size = size;
1713:
1714: /* yy_ch_buf has to be 2 characters longer than the size given because
1715: * we need to put in 2 end-of-buffer characters.
1716: */
1717: b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
1718: if ( ! b->yy_ch_buf )
1719: YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
1720:
1721: b->yy_is_our_buffer = 1;
1722:
1723: yy_init_buffer( b, file );
1724:
1725: return b;
1726: }
1727:
1728:
1729: void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b )
1730: {
1731: if ( ! b )
1732: return;
1733:
1734: if ( b == yy_current_buffer )
1735: yy_current_buffer = (YY_BUFFER_STATE) 0;
1736:
1737: if ( b->yy_is_our_buffer )
1738: yy_flex_free( (void *) b->yy_ch_buf );
1739:
1740: yy_flex_free( (void *) b );
1741: }
1742:
1743:
1744: void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, std::istream* file )
1745:
1746: {
1747: yy_flush_buffer( b );
1748:
1749: b->yy_input_file = file;
1750: b->yy_fill_buffer = 1;
1751:
1752: b->yy_is_interactive = 0;
1753: }
1754:
1755:
1756: void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b )
1757: {
1758: if ( ! b )
1759: return;
1760:
1761: b->yy_n_chars = 0;
1762:
1763: /* We always need two end-of-buffer characters. The first causes
1764: * a transition to the end-of-buffer state. The second causes
1765: * a jam in that state.
1766: */
1767: b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
1768: b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
1769:
1770: b->yy_buf_pos = &b->yy_ch_buf[0];
1771:
1772: b->yy_at_bol = 1;
1773: b->yy_buffer_status = YY_BUFFER_NEW;
1774:
1775: if ( b == yy_current_buffer )
1776: yy_load_buffer_state();
1777: }
1778:
1779:
1780:
1781:
1782:
1783:
1784:
1785:
1786:
1787:
1788:
1789:
1790:
1791:
1792:
1793: void yyFlexLexer::yy_push_state( int new_state )
1794: {
1795: if ( yy_start_stack_ptr >= yy_start_stack_depth )
1796: {
1797: yy_size_t new_size;
1798:
1799: yy_start_stack_depth += YY_START_STACK_INCR;
1800: new_size = yy_start_stack_depth * sizeof( int );
1801:
1802: if ( ! yy_start_stack )
1803: yy_start_stack = (int *) yy_flex_alloc( new_size );
1804:
1805: else
1806: yy_start_stack = (int *) yy_flex_realloc(
1807: (void *) yy_start_stack, new_size );
1808:
1809: if ( ! yy_start_stack )
1810: YY_FATAL_ERROR(
1811: "out of memory expanding start-condition stack" );
1812: }
1813:
1814: yy_start_stack[yy_start_stack_ptr++] = YY_START;
1815:
1816: BEGIN(new_state);
1817: }
1818:
1819:
1820:
1821:
1822: void yyFlexLexer::yy_pop_state()
1823: {
1824: if ( --yy_start_stack_ptr < 0 )
1825: YY_FATAL_ERROR( "start-condition stack underflow" );
1826:
1827: BEGIN(yy_start_stack[yy_start_stack_ptr]);
1828: }
1829:
1830:
1831:
1832:
1833: int yyFlexLexer::yy_top_state()
1834: {
1835: return yy_start_stack[yy_start_stack_ptr - 1];
1836: }
1837:
1838:
1839:
1840:
1841:
1842:
1843:
1844: void yyFlexLexer::LexerError( yyconst char msg[] )
1845: {
1846: cerr << msg << '\n';
1847: exit( YY_EXIT_FAILURE );
1848: }
1849:
1850:
1851: /* Redefine yyless() so it works in section 3 code. */
1852:
1853:
1854:
1855: do \
1856: { \
1857: /* Undo effects of setting up yytext. */ \
1858: yytext[yyleng] = yy_hold_char; \
1859: yy_c_buf_p = yytext + n; \
1860: yy_hold_char = *yy_c_buf_p; \
1861: *yy_c_buf_p = '\0'; \
1862: yyleng = n; \
1863: } \
1864: while ( 0 )
1865:
1866:
1867: /* Internal utility routines. */
1868:
1869:
1870:
1871: static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
1872:
1873: static void yy_flex_strncpy( s1, s2, n )
1874: char *s1;
1875: yyconst char *s2;
1876: int n;
1877:
1878: {
1879: register int i;
1880: for ( i = 0; i < n; ++i )
1881: s1[i] = s2[i];
1882: }
1883:
1884:
1885:
1886:
1887: static int yy_flex_strlen( yyconst char *s )
1888:
1889: static int yy_flex_strlen( s )
1890: yyconst char *s;
1891:
1892: {
1893: register int n;
1894: for ( n = 0; s[n]; ++n )
1895: ;
1896:
1897: return n;
1898: }
1899:
1900:
1901:
1902:
1903: static void *yy_flex_alloc( yy_size_t size )
1904:
1905: static void *yy_flex_alloc( size )
1906: yy_size_t size;
1907:
1908: {
1909: return (void *) malloc( size );
1910: }
1911:
1912:
1913: static void *yy_flex_realloc( void *ptr, yy_size_t size )
1914:
1915: static void *yy_flex_realloc( ptr, size )
1916: void *ptr;
1917: yy_size_t size;
1918:
1919: {
1920: /* The cast to (char *) in the following accommodates both
1921: * implementations that use char* generic pointers, and those
1922: * that use void* generic pointers. It works with the latter
1923: * because both ANSI C and C++ allow castless assignment from
1924: * any pointer type to void*, and deal with argument conversions
1925: * as though doing an assignment.
1926: */
1927: return (void *) realloc( (char *) ptr, size );
1928: }
1929:
1930:
1931: static void yy_flex_free( void *ptr )
1932:
1933: static void yy_flex_free( ptr )
1934: void *ptr;
1935:
1936: {
1937: free( ptr );
1938: }
1939:
1940:
1941: int main()
1942: {
1943: yylex();
1944: return 0;
1945: }
1946:
1947:
1948:
1949: /* -------------------- additional C code -------------------- */
1950:
1951:
1952: bool isGramlexEmbed(int code)
1953: {
1954: return code == TOK_LIT_CODE;
1955: }
1956:
Start data section to elk/elk_gramlex.yy.cpp.old[1
/1
]
1: /* A lexical scanner generated by flex */
2:
3: /* Scanner skeleton version:
4: * $Header$
5: */
6:
7: #define FLEX_SCANNER
8: #define YY_FLEX_MAJOR_VERSION 2
9: #define YY_FLEX_MINOR_VERSION 5
10:
11:
12:
13: /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
14: #ifdef c_plusplus
15: #ifndef __cplusplus
16: #define __cplusplus
17: #endif
18: #endif
19:
20:
21: #ifdef __cplusplus
22:
23: #include <stdlib.h>
24: #include <iostream>
25: using namespace std;
26:
27: /* Use prototypes in function declarations. */
28: #define YY_USE_PROTOS
29:
30: /* The "const" storage-class-modifier is valid. */
31: #define YY_USE_CONST
32:
33: #else /* ! __cplusplus */
34:
35: #if __STDC__
36:
37: #define YY_USE_PROTOS
38: #define YY_USE_CONST
39:
40: #endif /* __STDC__ */
41: #endif /* ! __cplusplus */
42:
43: #ifdef __TURBOC__
44: #pragma warn -rch
45: #pragma warn -use
46: #include <io.h>
47: #include <stdlib.h>
48: #define YY_USE_CONST
49: #define YY_USE_PROTOS
50: #endif
51:
52: #ifdef YY_USE_CONST
53: #define yyconst const
54: #else
55: #define yyconst
56: #endif
57:
58:
59: #ifdef YY_USE_PROTOS
60: #define YY_PROTO(proto) proto
61: #else
62: #define YY_PROTO(proto) ()
63: #endif
64:
65: /* Returned upon end-of-file. */
66: #define YY_NULL 0
67:
68: /* Promotes a possibly negative, possibly signed char to an unsigned
69: * integer for use as an array index. If the signed char is negative,
70: * we want to instead treat it as an 8-bit unsigned char, hence the
71: * double cast.
72: */
73: #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
74:
75: /* Enter a start condition. This macro really ought to take a parameter,
76: * but we do it the disgusting crufty way forced on us by the ()-less
77: * definition of BEGIN.
78: */
79: #define BEGIN yy_start = 1 + 2 *
80:
81: /* Translate the current start state into a value that can be later handed
82: * to BEGIN to return to the state. The YYSTATE alias is for lex
83: * compatibility.
84: */
85: #define YY_START ((yy_start - 1) / 2)
86: #define YYSTATE YY_START
87:
88: /* Action number for EOF rule of a given start state. */
89: #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
90:
91: /* Special action meaning "start processing a new file". */
92: #define YY_NEW_FILE yyrestart( yyin )
93:
94: #define YY_END_OF_BUFFER_CHAR 0
95:
96: /* Size of default input buffer. */
97: #define YY_BUF_SIZE 16384
98:
99: typedef struct yy_buffer_state *YY_BUFFER_STATE;
100:
101: extern int yyleng;
102:
103: #define EOB_ACT_CONTINUE_SCAN 0
104: #define EOB_ACT_END_OF_FILE 1
105: #define EOB_ACT_LAST_MATCH 2
106:
107: /* The funky do-while in the following #define is used to turn the definition
108: * int a single C statement (which needs a semi-colon terminator). This
109: * avoids problems with code like:
110: *
111: * if ( condition_holds )
112: * yyless( 5 );
113: * else
114: * do_something_else();
115: *
116: * Prior to using the do-while the compiler would get upset at the
117: * "else" because it interpreted the "if" statement as being all
118: * done when it reached the ';' after the yyless() call.
119: */
120:
121: /* Return all but the first 'n' matched characters back to the input stream. */
122:
123: #define yyless(n) \
124: do \
125: { \
126: /* Undo effects of setting up yytext. */ \
127: *yy_cp = yy_hold_char; \
128: YY_RESTORE_YY_MORE_OFFSET \
129: yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
130: YY_DO_BEFORE_ACTION; /* set up yytext again */ \
131: } \
132: while ( 0 )
133:
134: #define unput(c) yyunput( c, yytext_ptr )
135:
136: /* The following is because we cannot portably get our hands on size_t
137: * (without autoconf's help, which isn't available because we want
138: * flex-generated scanners to compile on their own).
139: */
140: typedef unsigned int yy_size_t;
141:
142:
143: struct yy_buffer_state
144: {
145: std::istream* yy_input_file;
146:
147: char *yy_ch_buf; /* input buffer */
148: char *yy_buf_pos; /* current position in input buffer */
149:
150: /* Size of input buffer in bytes, not including room for EOB
151: * characters.
152: */
153: yy_size_t yy_buf_size;
154:
155: /* Number of characters read into yy_ch_buf, not including EOB
156: * characters.
157: */
158: int yy_n_chars;
159:
160: /* Whether we "own" the buffer - i.e., we know we created it,
161: * and can realloc() it to grow it, and should free() it to
162: * delete it.
163: */
164: int yy_is_our_buffer;
165:
166: /* Whether this is an "interactive" input source; if so, and
167: * if we're using stdio for input, then we want to use getc()
168: * instead of fread(), to make sure we stop fetching input after
169: * each newline.
170: */
171: int yy_is_interactive;
172:
173: /* Whether we're considered to be at the beginning of a line.
174: * If so, '^' rules will be active on the next match, otherwise
175: * not.
176: */
177: int yy_at_bol;
178:
179: /* Whether to try to fill the input buffer when we reach the
180: * end of it.
181: */
182: int yy_fill_buffer;
183:
184: int yy_buffer_status;
185: #define YY_BUFFER_NEW 0
186: #define YY_BUFFER_NORMAL 1
187: /* When an EOF's been seen but there's still some text to process
188: * then we mark the buffer as YY_EOF_PENDING, to indicate that we
189: * shouldn't try reading from the input source any more. We might
190: * still have a bunch of tokens to match, though, because of
191: * possible backing-up.
192: *
193: * When we actually see the EOF, we change the status to "new"
194: * (via yyrestart()), so that the user can continue scanning by
195: * just pointing yyin at a new input file.
196: */
197: #define YY_BUFFER_EOF_PENDING 2
198: };
199:
200:
201: /* We provide macros for accessing buffer states in case in the
202: * future we want to put the buffer states in a more general
203: * "scanner state".
204: */
205: #define YY_CURRENT_BUFFER yy_current_buffer
206:
207:
208:
209: static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
210: static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
211: static void yy_flex_free YY_PROTO(( void * ));
212:
213: #define yy_new_buffer yy_create_buffer
214:
215: #define yy_set_interactive(is_interactive) \
216: { \
217: if ( ! yy_current_buffer ) \
218: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
219: yy_current_buffer->yy_is_interactive = is_interactive; \
220: }
221:
222: #define yy_set_bol(at_bol) \
223: { \
224: if ( ! yy_current_buffer ) \
225: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
226: yy_current_buffer->yy_at_bol = at_bol; \
227: }
228:
229: #define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
230:
231:
232: #define yywrap() 1
233: #define YY_SKIP_YYWRAP
234: typedef unsigned char YY_CHAR;
235: #define yytext_ptr yytext
236: #define YY_INTERACTIVE
237:
238: #include "sm_flexlexer.h"
239: int yyFlexLexer::yylex()
240: {
241: LexerError( "yyFlexLexer::yylex invoked but %option yyclass used" );
242: return 0;
243: }
244:
245: #define YY_DECL int GrammarLexer::yylex()
246:
247:
248: /* Done after the current pattern has been matched and before the
249: * corresponding action - sets up yytext.
250: */
251: #define YY_DO_BEFORE_ACTION \
252: yytext_ptr = yy_bp; \
253: yyleng = (int) (yy_cp - yy_bp); \
254: yy_hold_char = *yy_cp; \
255: *yy_cp = '\0'; \
256: yy_c_buf_p = yy_cp;
257:
258: #define YY_NUM_RULES 40
259: #define YY_END_OF_BUFFER 41
260: static yyconst short int yy_accept[159] =
261: { 0,
262: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
263: 0, 0, 0, 0, 0, 0, 41, 39, 2, 1,
264: 39, 24, 10, 11, 39, 39, 37, 9, 21, 36,
265: 17, 36, 36, 36, 36, 36, 36, 36, 36, 36,
266: 36, 20, 8, 5, 6, 5, 33, 33, 33, 34,
267: 35, 27, 28, 29, 19, 23, 2, 0, 38, 18,
268: 3, 0, 37, 36, 36, 36, 36, 36, 36, 36,
269: 36, 36, 36, 36, 36, 36, 4, 0, 0, 0,
270: 0, 34, 27, 0, 7, 36, 36, 25, 36, 36,
271: 36, 36, 36, 36, 36, 36, 36, 0, 36, 36,
272:
273: 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,
274: 36, 36, 36, 36, 36, 36, 36, 36, 36, 22,
275: 36, 0, 32, 36, 15, 36, 36, 36, 14, 36,
276: 36, 36, 36, 36, 36, 31, 36, 16, 36, 36,
277: 36, 36, 36, 36, 26, 36, 36, 36, 12, 36,
278: 36, 13, 36, 36, 36, 36, 30, 0
279: } ;
280:
281: static yyconst int yy_ec[256] =
282: { 0,
283: 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
284: 4, 4, 1, 1, 1, 1, 1, 1, 1, 1,
285: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
286: 1, 2, 1, 5, 1, 1, 1, 1, 1, 6,
287: 7, 8, 1, 9, 10, 1, 11, 12, 12, 12,
288: 12, 12, 12, 12, 12, 12, 12, 13, 14, 1,
289: 1, 15, 1, 1, 16, 16, 16, 16, 16, 16,
290: 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
291: 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
292: 17, 18, 19, 1, 20, 1, 21, 22, 23, 24,
293:
294: 25, 26, 16, 16, 27, 16, 28, 29, 30, 31,
295: 32, 33, 16, 34, 35, 36, 37, 38, 16, 39,
296: 16, 16, 40, 1, 41, 1, 1, 1, 1, 1,
297: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
298: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
299: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
300: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
301: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
302: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
303: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
304:
305: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
306: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
307: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
308: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
309: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
310: 1, 1, 1, 1, 1
311: } ;
312:
313: static yyconst int yy_meta[42] =
314: { 0,
315: 1, 1, 2, 1, 3, 1, 4, 1, 1, 1,
316: 1, 5, 1, 1, 1, 5, 1, 6, 4, 5,
317: 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
318: 5, 5, 5, 5, 5, 5, 5, 5, 5, 1,
319: 4
320: } ;
321:
322: static yyconst short int yy_base[169] =
323: { 0,
324: 0, 0, 39, 40, 43, 44, 227, 226, 48, 49,
325: 188, 187, 30, 39, 48, 51, 226, 231, 56, 231,
326: 220, 231, 231, 231, 209, 53, 211, 231, 231, 0,
327: 231, 190, 182, 183, 41, 187, 185, 183, 179, 34,
328: 190, 231, 231, 231, 231, 203, 231, 67, 72, 0,
329: 231, 0, 231, 231, 231, 231, 74, 208, 231, 231,
330: 231, 209, 199, 0, 179, 176, 177, 174, 183, 174,
331: 168, 178, 180, 167, 172, 165, 231, 78, 80, 81,
332: 0, 0, 0, 195, 231, 161, 171, 0, 166, 165,
333: 157, 165, 168, 155, 159, 163, 165, 181, 160, 161,
334:
335: 163, 145, 156, 148, 154, 153, 150, 145, 154, 85,
336: 135, 137, 134, 147, 136, 138, 144, 131, 135, 0,
337: 129, 91, 231, 128, 0, 138, 137, 131, 0, 135,
338: 124, 137, 130, 136, 121, 0, 123, 0, 124, 122,
339: 128, 126, 124, 102, 0, 106, 108, 99, 0, 76,
340: 60, 0, 60, 67, 46, 45, 0, 231, 98, 104,
341: 110, 116, 122, 58, 127, 133, 139, 145
342: } ;
343:
344: static yyconst short int yy_def[169] =
345: { 0,
346: 158, 1, 159, 159, 160, 160, 161, 161, 162, 162,
347: 1, 1, 1, 1, 1, 1, 158, 158, 158, 158,
348: 163, 158, 158, 158, 158, 158, 158, 158, 158, 164,
349: 158, 164, 164, 164, 164, 164, 164, 164, 164, 164,
350: 164, 158, 158, 158, 158, 158, 158, 158, 158, 165,
351: 158, 166, 158, 158, 158, 158, 158, 163, 158, 158,
352: 158, 167, 158, 164, 164, 164, 164, 164, 164, 164,
353: 164, 164, 164, 164, 164, 164, 158, 158, 158, 158,
354: 168, 165, 166, 167, 158, 164, 164, 164, 164, 164,
355: 164, 164, 164, 164, 164, 164, 164, 168, 164, 164,
356:
357: 164, 164, 164, 164, 164, 164, 164, 164, 164, 158,
358: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
359: 164, 158, 158, 164, 164, 164, 164, 164, 164, 164,
360: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
361: 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
362: 164, 164, 164, 164, 164, 164, 164, 0, 158, 158,
363: 158, 158, 158, 158, 158, 158, 158, 158
364: } ;
365:
366: static yyconst short int yy_nxt[273] =
367: { 0,
368: 18, 19, 20, 19, 21, 22, 23, 18, 24, 25,
369: 26, 27, 28, 29, 18, 30, 31, 18, 18, 30,
370: 30, 30, 32, 30, 33, 34, 35, 30, 30, 30,
371: 36, 37, 38, 30, 39, 40, 30, 41, 30, 42,
372: 43, 45, 45, 18, 48, 48, 46, 46, 49, 49,
373: 53, 53, 18, 56, 54, 54, 56, 57, 74, 57,
374: 61, 18, 64, 62, 18, 75, 54, 54, 78, 55,
375: 68, 69, 79, 80, 145, 57, 81, 57, 55, 78,
376: 157, 80, 80, 79, 81, 81, 122, 18, 54, 54,
377: 18, 123, 122, 156, 155, 154, 153, 123, 44, 44,
378:
379: 44, 44, 44, 44, 47, 47, 47, 47, 47, 47,
380: 50, 50, 50, 50, 50, 50, 52, 52, 52, 52,
381: 52, 52, 58, 152, 58, 58, 58, 82, 151, 82,
382: 82, 82, 82, 83, 150, 83, 149, 83, 83, 84,
383: 84, 84, 84, 84, 84, 98, 148, 147, 98, 98,
384: 146, 145, 144, 143, 142, 141, 140, 139, 138, 137,
385: 120, 136, 135, 134, 133, 132, 131, 130, 129, 128,
386: 127, 126, 125, 124, 121, 120, 119, 118, 117, 116,
387: 115, 114, 113, 112, 111, 110, 109, 108, 107, 106,
388: 105, 104, 103, 102, 101, 100, 99, 85, 97, 96,
389:
390: 95, 94, 93, 92, 91, 90, 89, 88, 87, 86,
391: 63, 85, 59, 77, 76, 73, 72, 71, 70, 67,
392: 66, 65, 63, 60, 59, 158, 55, 55, 51, 51,
393: 17, 158, 158, 158, 158, 158, 158, 158, 158, 158,
394: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
395: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
396: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
397: 158, 158
398: } ;
399:
400: static yyconst short int yy_chk[273] =
401: { 0,
402: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
403: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
404: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
405: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
406: 1, 3, 4, 13, 5, 6, 3, 4, 5, 6,
407: 9, 10, 14, 15, 9, 10, 16, 19, 40, 19,
408: 26, 15, 164, 26, 16, 40, 9, 10, 48, 13,
409: 35, 35, 48, 49, 156, 57, 49, 57, 14, 78,
410: 155, 79, 80, 78, 79, 80, 110, 15, 9, 10,
411: 16, 110, 122, 154, 153, 151, 150, 122, 159, 159,
412:
413: 159, 159, 159, 159, 160, 160, 160, 160, 160, 160,
414: 161, 161, 161, 161, 161, 161, 162, 162, 162, 162,
415: 162, 162, 163, 148, 163, 163, 163, 165, 147, 165,
416: 165, 165, 165, 166, 146, 166, 144, 166, 166, 167,
417: 167, 167, 167, 167, 167, 168, 143, 142, 168, 168,
418: 141, 140, 139, 137, 135, 134, 133, 132, 131, 130,
419: 128, 127, 126, 124, 121, 119, 118, 117, 116, 115,
420: 114, 113, 112, 111, 109, 108, 107, 106, 105, 104,
421: 103, 102, 101, 100, 99, 98, 97, 96, 95, 94,
422: 93, 92, 91, 90, 89, 87, 86, 84, 76, 75,
423:
424: 74, 73, 72, 71, 70, 69, 68, 67, 66, 65,
425: 63, 62, 58, 46, 41, 39, 38, 37, 36, 34,
426: 33, 32, 27, 25, 21, 17, 12, 11, 8, 7,
427: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
428: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
429: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
430: 158, 158, 158, 158, 158, 158, 158, 158, 158, 158,
431: 158, 158
432: } ;
433:
434: /* The intent behind this definition is that it'll catch
435: * any uses of REJECT which flex missed.
436: */
437: #define REJECT reject_used_but_not_detected
438: #define yymore() yymore_used_but_not_detected
439: #define YY_MORE_ADJ 0
440: #define YY_RESTORE_YY_MORE_OFFSET
441: #define INITIAL 0
442: /* grammar.lex
443: * lexical analyzer for my grammar input format
444: *
445: * The variety of syntaxes for embedded literal code cause this lexer
446: * to have some of the context sensitivity usually associated with a
447: * parser. This context doesn't nest arbitrarily deeply, so the
448: * language recognized is still regular, but clearly there's some
449: * design tension.
450: */
451: /* ----------------- C definitions -------------------- */
452:
453: // pull in my declaration of the lexer class -- this defines
454: // the additional lexer state, some of which is used in the
455: // action rules below (this is in the ../ast/ directory now)
456: #include "ast_gramlex.h"
457:
458: // pull in the bison-generated token codes
459: #include "elk_grampar.codes.h"
460:
461: #include <string.h> // strchr, strrchr
462:
463: // for maintaining column count
464: #define TOKEN_START tokenStartLoc = fileState.loc /* user ; */
465: #define UPD_COL \
466: fileState.loc = sourceLocManager->advCol(fileState.loc, yyleng) /* user ; */
467: #define TOK_UPD_COL TOKEN_START; UPD_COL /* user ; */
468:
469: /* -------------------- flex options ------------------ */
470: /* no wrapping is needed; setting this means we don't have to link with libfl.a */
471: /* don't use the default-echo rules */
472: /* generate a c++ lexer */
473: /* and I will define the class */
474: /* ------------------- definitions -------------------- */
475: /* any character, including newline */
476: /* any character except newline */
477: /* starting character in a name */
478: /* starting character in a numeric literal */
479: /* double-quote */
480: /* character that can appear in a quoted sm_string */
481: /* (I currently don't have any backslash codes, but I want to
482: * leave open that possibility, so for now backslashes are illegal) */
483: /* horizontal whitespace */
484: /* whitespace that doesn't cross line a boundary */
485: /* --------------- start conditions ------------------- */
486: /* eating a comment delimited by slash-star and star-slash; note
487: * that we remember our current state when entering C_COMMENT,
488: * and restore it on exit */
489: #define C_COMMENT 1
490:
491: /* looking for the file name in an "include" directive */
492: #define INCLUDE 2
493:
494: /* recovering from an error by skipping to the next newline */
495: #define EAT_TO_NEWLINE 3
496:
497: /* gathering literal embedded code; the delimiter is specified
498: * in the 'embedFinish' variable */
499: #define LITCODE 4
500:
501: /* tokenizing the right-hand side of a production; this one is not
502: * exclusive because tokenization is virtually the same in RHS
503: * mode as in INITIAL mode */
504: #define RHS 5
505:
506: /* tokenizing parameter list of a function, leading into the
507: * embedded code that is its body */
508: #define FUN 6
509:
510: /* looking for the start of a type that follows "token" or "nonterm",
511: * or the TOK_NAME meaning the type has been omitted */
512: #define OPTIONAL_TYPE 7
513:
514: /* ---------------------- rules ----------------------- */
515:
516: /* Macros after this point can all be overridden by user definitions in
517: * section 1.
518: */
519:
520: #ifndef YY_SKIP_YYWRAP
521: #ifdef __cplusplus
522: extern "C" int yywrap YY_PROTO(( void ));
523: #else
524: extern int yywrap YY_PROTO(( void ));
525: #endif
526: #endif
527:
528:
529: #ifndef yytext_ptr
530: static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
531: #endif
532:
533: #ifdef YY_NEED_STRLEN
534: static int yy_flex_strlen YY_PROTO(( yyconst char * ));
535: #endif
536:
537: #ifndef YY_NO_INPUT
538: #endif
539:
540: #if YY_STACK_USED
541: static int yy_start_stack_ptr = 0;
542: static int yy_start_stack_depth = 0;
543: static int *yy_start_stack = 0;
544: #ifndef YY_NO_PUSH_STATE
545: static void yy_push_state YY_PROTO(( int new_state ));
546: #endif
547: #ifndef YY_NO_POP_STATE
548: static void yy_pop_state YY_PROTO(( void ));
549: #endif
550: #ifndef YY_NO_TOP_STATE
551: static int yy_top_state YY_PROTO(( void ));
552: #endif
553:
554: #else
555: #define YY_NO_PUSH_STATE 1
556: #define YY_NO_POP_STATE 1
557: #define YY_NO_TOP_STATE 1
558: #endif
559:
560: #ifdef YY_MALLOC_DECL
561: YY_MALLOC_DECL
562: #else
563: #if __STDC__
564: #ifndef __cplusplus
565: #include <stdlib.h>
566: #endif
567: #else
568: /* Just try to get by without declaring the routines. This will fail
569: * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
570: * or sizeof(void*) != sizeof(int).
571: */
572: #endif
573: #endif
574:
575: /* Amount of stuff to slurp up with each read. */
576: #ifndef YY_READ_BUF_SIZE
577: #define YY_READ_BUF_SIZE 8192
578: #endif
579:
580: /* Copy whatever the last rule matched to the standard output. */
581:
582: #ifndef ECHO
583: #define ECHO LexerOutput( yytext, yyleng )
584: #endif
585:
586: /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
587: * is returned in "result".
588: */
589: #ifndef YY_INPUT
590: #define YY_INPUT(buf,result,max_size) \
591: if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \
592: YY_FATAL_ERROR( "input in flex scanner failed" );
593: #endif
594:
595: /* No semi-colon after return; correct usage is to write "yyterminate();" -
596: * we don't want an extra ';' after the "return" because that will cause
597: * some compilers to complain about unreachable statements.
598: */
599: #ifndef yyterminate
600: #define yyterminate() return YY_NULL
601: #endif
602:
603: /* Number of entries by which start-condition stack grows. */
604: #ifndef YY_START_STACK_INCR
605: #define YY_START_STACK_INCR 25
606: #endif
607:
608: /* Report a fatal error. */
609: #ifndef YY_FATAL_ERROR
610: #define YY_FATAL_ERROR(msg) LexerError( msg )
611: #endif
612:
613: /* Default declaration of generated scanner - a define so the user can
614: * easily add parameters.
615: */
616: #ifndef YY_DECL
617: #define YY_DECL int yyFlexLexer::yylex()
618: #endif
619:
620: /* Code executed at the beginning of each rule, after yytext and yyleng
621: * have been set up.
622: */
623: #ifndef YY_USER_ACTION
624: #define YY_USER_ACTION
625: #endif
626:
627: /* Code executed at the end of each rule. */
628: #ifndef YY_BREAK
629: #define YY_BREAK break;
630: #endif
631:
632: #define YY_RULE_SETUP \
633: YY_USER_ACTION
634:
635: YY_DECL
636: {
637: register yy_state_type yy_current_state;
638: register char *yy_cp = NULL, *yy_bp = NULL;
639: register int yy_act;
640:
641:
642:
643: /* -------- whitespace ------ */
644:
645: if ( yy_init )
646: {
647: yy_init = 0;
648:
649: #ifdef YY_USER_INIT
650: YY_USER_INIT;
651: #endif
652:
653: if ( ! yy_start )
654: yy_start = 1; /* first start state */
655:
656: if ( ! yyin )
657: yyin = &cin;
658:
659: if ( ! yyout )
660: yyout = &std::cout;
661:
662: if ( ! yy_current_buffer )
663: yy_current_buffer =
664: yy_create_buffer( yyin, YY_BUF_SIZE );
665:
666: yy_load_buffer_state();
667: }
668:
669: while ( 1 ) /* loops until end-of-file is reached */
670: {
671: yy_cp = yy_c_buf_p;
672:
673: /* Support of yytext. */
674: *yy_cp = yy_hold_char;
675:
676: /* yy_bp points to the position in yy_ch_buf of the start of
677: * the current run.
678: */
679: yy_bp = yy_cp;
680:
681: yy_current_state = yy_start;
682: yy_match:
683: do
684: {
685: register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
686: if ( yy_accept[yy_current_state] )
687: {
688: yy_last_accepting_state = yy_current_state;
689: yy_last_accepting_cpos = yy_cp;
690: }
691: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
692: {
693: yy_current_state = (int) yy_def[yy_current_state];
694: if ( yy_current_state >= 159 )
695: yy_c = yy_meta[(unsigned int) yy_c];
696: }
697: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
698: ++yy_cp;
699: }
700: while ( yy_base[yy_current_state] != 231 );
701:
702: yy_find_action:
703: yy_act = yy_accept[yy_current_state];
704: if ( yy_act == 0 )
705: { /* have to back up */
706: yy_cp = yy_last_accepting_cpos;
707: yy_current_state = yy_last_accepting_state;
708: yy_act = yy_accept[yy_current_state];
709: }
710:
711: YY_DO_BEFORE_ACTION;
712:
713:
714: do_action: /* This label is used only to access EOF actions. */
715:
716:
717: switch ( yy_act )
718: { /* beginning of action switch */
719: case 0: /* must back up */
720: /* undo the effects of YY_DO_BEFORE_ACTION */
721: *yy_cp = yy_hold_char;
722: yy_cp = yy_last_accepting_cpos;
723: yy_current_state = yy_last_accepting_state;
724: goto yy_find_action;
725:
726: case 1:
727: YY_RULE_SETUP
728: {
729: newLine();
730: }
731: YY_BREAK
732: case 2:
733: YY_RULE_SETUP
734: {
735: UPD_COL;
736: }
737: YY_BREAK
738: /* -------- comments -------- */
739: case 3:
740: YY_RULE_SETUP
741: {
742: /* C-style comments */
743: TOKEN_START;
744: UPD_COL;
745: prevState = YY_START;
746: BEGIN(C_COMMENT);
747: }
748: YY_BREAK
749:
750: case 4:
751: YY_RULE_SETUP
752: {
753: /* end of comment */
754: UPD_COL;
755: BEGIN(prevState);
756: }
757: YY_BREAK
758: case 5:
759: YY_RULE_SETUP
760: {
761: /* anything but slash-star or newline -- eat it */
762: UPD_COL;
763: }
764: YY_BREAK
765: case 6:
766: YY_RULE_SETUP
767: {
768: newLine();
769: }
770: YY_BREAK
771: case YY_STATE_EOF(C_COMMENT):
772: {
773: UPD_COL; // <<EOF>> yyleng is 1!
774: errorUnterminatedComment();
775: return TOK_EOF;
776: }
777: YY_BREAK
778:
779: case 7:
780: YY_RULE_SETUP
781: {
782: /* C++-style comment -- eat it */
783: TOKEN_START;
784: advCol(yyleng-1); // don't count newline
785: newLine(); // count it here
786: }
787: YY_BREAK
788: /* -------- punctuators, operators, keywords --------- */
789: case 8:
790: YY_RULE_SETUP
791: TOK_UPD_COL; return TOK_RBRACE;
792: YY_BREAK
793: case 9:
794: YY_RULE_SETUP
795: TOK_UPD_COL; return TOK_COLON;
796: YY_BREAK
797: case 10:
798: YY_RULE_SETUP
799: TOK_UPD_COL; return TOK_RPAREN;
800: YY_BREAK
801: case 11:
802: YY_RULE_SETUP
803: TOK_UPD_COL; return TOK_COMMA;
804: YY_BREAK
805: case 12:
806: YY_RULE_SETUP
807: TOK_UPD_COL; return TOK_TERMINALS;
808: YY_BREAK
809: case 13:
810: YY_RULE_SETUP
811: TOK_UPD_COL; return TOK_PRECEDENCE;
812: YY_BREAK
813: case 14:
814: YY_RULE_SETUP
815: TOK_UPD_COL; return TOK_OPTION;
816: YY_BREAK
817: case 15:
818: YY_RULE_SETUP
819: TOK_UPD_COL; return TOK_EXPECT;
820: YY_BREAK
821: case 16:
822: YY_RULE_SETUP
823: TOK_UPD_COL; return TOK_SUBSETS;
824: YY_BREAK
825: /* ----------- sequences that begin literal code ------------ */
826: /* for the time being, a "[" will always start an embedded sequence;
827: * eventually, I'll remove this in favor of the brace- and paren-
828: * delimited embedded sequences */
829: case 17:
830: YY_RULE_SETUP
831: {
832: TOK_UPD_COL;
833: BEGIN(LITCODE);
834: beginEmbed(']', TOK_LIT_CODE);
835: }
836: YY_BREAK
837: /* the "->" operator moves us into RHS mode, which is special because
838: * in this mode any "{" is interpreted as the beginning of an embedded
839: * section of literal code */
840: case 18:
841: YY_RULE_SETUP
842: {
843: TOK_UPD_COL;
844: BEGIN(RHS);
845: return TOK_ARROW;
846: }
847: YY_BREAK
848: /* "{" in a RHS begins embedded */
849: case 19:
850: YY_RULE_SETUP
851: {
852: TOK_UPD_COL;
853: BEGIN(LITCODE);
854: beginEmbed('}', TOK_LIT_CODE);
855: }
856: YY_BREAK
857: /* otherwise it's just a "{" */
858: case 20:
859: YY_RULE_SETUP
860: {
861: TOK_UPD_COL;
862: return TOK_LBRACE;
863: }
864: YY_BREAK
865: /* since right-hand-sides can end with either embedded code or a simple
866: * ";", the semicolon gets out of RHS mode */
867: case 21:
868: YY_RULE_SETUP
869: {
870: TOK_UPD_COL;
871: BEGIN(INITIAL); // if in RHS, reset to INITIAL
872: return TOK_SEMICOLON;
873: }
874: YY_BREAK
875: /* "token" and "nonterm" are always followed by an optional type,
876: * and then a TOK_NAME. So, until we see a TOK_NAME, "(" will mean
877: * the start of an embedded sequence. */
878: case 22:
879: YY_RULE_SETUP
880: {
881: TOK_UPD_COL;
882: BEGIN(OPTIONAL_TYPE);
883: return yytext[0]=='t'? TOK_TOKEN : TOK_NONTERM;
884: }
885: YY_BREAK
886: /* so now this begins embedded */
887: case 23:
888: YY_RULE_SETUP
889: {
890: TOK_UPD_COL;
891: BEGIN(LITCODE);
892: beginEmbed(')', TOK_LIT_CODE);
893: }
894: YY_BREAK
895: /* otherwise it's just itself */
896: case 24:
897: YY_RULE_SETUP
898: {
899: TOK_UPD_COL;
900: return TOK_LPAREN;
901: }
902: YY_BREAK
903: /* function beginning */
904: case 25:
905: YY_RULE_SETUP
906: {
907: TOK_UPD_COL;
908: BEGIN(FUN); // treat "{" as beginning literal code
909: return TOK_FUN;
910: }
911: YY_BREAK
912: /* verbatim beginning */
913: case 26:
914: YY_RULE_SETUP
915: {
916: TOK_UPD_COL;
917: BEGIN(FUN); // close enough
918: return yytext[0]=='v'? TOK_VERBATIM : TOK_IMPL_VERBATIM;
919: }
920: YY_BREAK
921: /* --------- embedded literal code --------- */
922: /* no TOKEN_START here; we'll use the tokenStartLoc that
923: * was computed in the opening punctuation */
924:
925: case 27:
926: YY_RULE_SETUP
927: {
928: UPD_COL;
929: embedded->handle(yytext, yyleng, embedFinish);
930: }
931: YY_BREAK
932: case 28:
933: YY_RULE_SETUP
934: {
935: newLine();
936: embedded->handle(yytext, yyleng, embedFinish);
937: }
938: YY_BREAK
939: case 29:
940: YY_RULE_SETUP
941: {
942: UPD_COL;
943: if (embedded->zeroNesting()) {
944: // done
945: BEGIN(INITIAL);
946:
947: // check for balanced delimiter
948: if (embedFinish != yytext[0]) {
949: err("unbalanced literal code delimiter");
950: }
951:
952: // don't add "return" or ";"
953: embedded->exprOnly = false;
954:
955: // can't extract anything
956: embedded->isDeclaration = false;
957:
958: // caller can get text from embedded->text
959: return embedMode;
960: }
961: else {
962: // delimeter paired within the embedded code, mostly ignore it
963: embedded->handle(yytext, yyleng, embedFinish);
964: }
965: }
966: YY_BREAK
967: case YY_STATE_EOF(LITCODE):
968: {
969: err(sm_stringc << "hit end of file while looking for final `"
970: << embedFinish << "'");
971: yyterminate();
972: }
973: YY_BREAK
974:
975: /* embedded *type* description */
976: case 30:
977: YY_RULE_SETUP
978: {
979: /* caller will get text from yytext and yyleng */
980: TOK_UPD_COL;
981:
982: /* drop into literal-code processing */
983: BEGIN(LITCODE);
984:
985: /* I reset the initial nesting to -1 so that the '{' at the
986: * beginning of the class body sets nesting to 0, thus when
987: * I see the final '}' I'll see that at level 0 and stop */
988: beginEmbed('}', TOK_LIT_CODE, -1);
989:
990: return TOK_CONTEXT_CLASS;
991: }
992: YY_BREAK
993: /* ---------- includes ----------- */
994: case 31:
995: YY_RULE_SETUP
996: {
997: TOK_UPD_COL; /* hence no TOKEN_START in INCLUDE area */
998: BEGIN(INCLUDE);
999: }
1000: YY_BREAK
1001:
1002: case 32:
1003: YY_RULE_SETUP
1004: {
1005: /* e.g.: ("filename") */
1006: /* file name to include */
1007: UPD_COL;
1008:
1009: /* find quotes */
1010: char *leftq = strchr(yytext, '"');
1011: char *rightq = strchr(leftq+1, '"');
1012: xassert(leftq && rightq);
1013:
1014: /* extract filename sm_string */
1015: includeFileName = addString(leftq+1, rightq-leftq-1);
1016:
1017: /* go back to normal processing */
1018: BEGIN(INITIAL);
1019: return TOK_INCLUDE;
1020: }
1021: YY_BREAK
1022: case 33:
1023: YY_RULE_SETUP
1024: {
1025: /* anything else: malformed */
1026: UPD_COL;
1027: errorMalformedInclude();
1028:
1029: /* rudimentary error recovery.. */
1030: BEGIN(EAT_TO_NEWLINE);
1031: }
1032: YY_BREAK
1033:
1034:
1035: case 34:
1036: YY_RULE_SETUP
1037: {
1038: UPD_COL;
1039: /* not newline, eat it */
1040: }
1041: YY_BREAK
1042: case 35:
1043: YY_RULE_SETUP
1044: {
1045: /* get out of here */
1046: newLine();
1047: BEGIN(INITIAL);
1048: }
1049: YY_BREAK
1050:
1051: /* -------- name literal --------- */
1052: case 36:
1053: YY_RULE_SETUP
1054: {
1055: /* get text from yytext and yyleng */
1056: TOK_UPD_COL;
1057: if (YY_START == OPTIONAL_TYPE) {
1058: BEGIN(INITIAL); // bail out of OPTIONAL_TYPE mode
1059: }
1060: return TOK_NAME;
1061: }
1062: YY_BREAK
1063: /* -------- numeric literal ------ */
1064: case 37:
1065: YY_RULE_SETUP
1066: {
1067: TOK_UPD_COL;
1068: integerLiteral = strtoul(yytext, NULL, 10 /*radix*/);
1069: return TOK_INTEGER;
1070: }
1071: YY_BREAK
1072: /* ----------- sm_string literal ----- */
1073: case 38:
1074: YY_RULE_SETUP
1075: {
1076: TOK_UPD_COL;
1077: sm_stringLiteral = addString(yytext+1, yyleng-2); // strip quotes
1078: return TOK_STRING;
1079: }
1080: YY_BREAK
1081: /* --------- illegal ------------- */
1082: case 39:
1083: YY_RULE_SETUP
1084: {
1085: TOK_UPD_COL;
1086: errorIllegalCharacter(yytext[0]);
1087: }
1088: YY_BREAK
1089: case 40:
1090: YY_RULE_SETUP
1091: YY_FATAL_ERROR( "flex scanner jammed" );
1092: YY_BREAK
1093: case YY_STATE_EOF(INITIAL):
1094: case YY_STATE_EOF(INCLUDE):
1095: case YY_STATE_EOF(EAT_TO_NEWLINE):
1096: case YY_STATE_EOF(RHS):
1097: case YY_STATE_EOF(FUN):
1098: case YY_STATE_EOF(OPTIONAL_TYPE):
1099: yyterminate();
1100:
1101: case YY_END_OF_BUFFER:
1102: {
1103: /* Amount of text matched not including the EOB char. */
1104: int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
1105:
1106: /* Undo the effects of YY_DO_BEFORE_ACTION. */
1107: *yy_cp = yy_hold_char;
1108: YY_RESTORE_YY_MORE_OFFSET
1109:
1110: if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
1111: {
1112: /* We're scanning a new file or input source. It's
1113: * possible that this happened because the user
1114: * just pointed yyin at a new source and called
1115: * yylex(). If so, then we have to assure
1116: * consistency between yy_current_buffer and our
1117: * globals. Here is the right place to do so, because
1118: * this is the first action (other than possibly a
1119: * back-up) that will match for the new input source.
1120: */
1121: yy_n_chars = yy_current_buffer->yy_n_chars;
1122: yy_current_buffer->yy_input_file = yyin;
1123: yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
1124: }
1125:
1126: /* Note that here we test for yy_c_buf_p "<=" to the position
1127: * of the first EOB in the buffer, since yy_c_buf_p will
1128: * already have been incremented past the NUL character
1129: * (since all states make transitions on EOB to the
1130: * end-of-buffer state). Contrast this with the test
1131: * in input().
1132: */
1133: if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
1134: { /* This was really a NUL. */
1135: yy_state_type yy_next_state;
1136:
1137: yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
1138:
1139: yy_current_state = yy_get_previous_state();
1140:
1141: /* Okay, we're now positioned to make the NUL
1142: * transition. We couldn't have
1143: * yy_get_previous_state() go ahead and do it
1144: * for us because it doesn't know how to deal
1145: * with the possibility of jamming (and we don't
1146: * want to build jamming into it because then it
1147: * will run more slowly).
1148: */
1149:
1150: yy_next_state = yy_try_NUL_trans( yy_current_state );
1151:
1152: yy_bp = yytext_ptr + YY_MORE_ADJ;
1153:
1154: if ( yy_next_state )
1155: {
1156: /* Consume the NUL. */
1157: yy_cp = ++yy_c_buf_p;
1158: yy_current_state = yy_next_state;
1159: goto yy_match;
1160: }
1161:
1162: else
1163: {
1164: yy_cp = yy_c_buf_p;
1165: goto yy_find_action;
1166: }
1167: }
1168:
1169: else switch ( yy_get_next_buffer() )
1170: {
1171: case EOB_ACT_END_OF_FILE:
1172: {
1173: yy_did_buffer_switch_on_eof = 0;
1174:
1175: if ( yywrap() )
1176: {
1177: /* Note: because we've taken care in
1178: * yy_get_next_buffer() to have set up
1179: * yytext, we can now set up
1180: * yy_c_buf_p so that if some total
1181: * hoser (like flex itself) wants to
1182: * call the scanner after we return the
1183: * YY_NULL, it'll still work - another
1184: * YY_NULL will get returned.
1185: */
1186: yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
1187:
1188: yy_act = YY_STATE_EOF(YY_START);
1189: goto do_action;
1190: }
1191:
1192: else
1193: {
1194: if ( ! yy_did_buffer_switch_on_eof )
1195: YY_NEW_FILE;
1196: }
1197: break;
1198: }
1199:
1200: case EOB_ACT_CONTINUE_SCAN:
1201: yy_c_buf_p =
1202: yytext_ptr + yy_amount_of_matched_text;
1203:
1204: yy_current_state = yy_get_previous_state();
1205:
1206: yy_cp = yy_c_buf_p;
1207: yy_bp = yytext_ptr + YY_MORE_ADJ;
1208: goto yy_match;
1209:
1210: case EOB_ACT_LAST_MATCH:
1211: yy_c_buf_p =
1212: &yy_current_buffer->yy_ch_buf[yy_n_chars];
1213:
1214: yy_current_state = yy_get_previous_state();
1215:
1216: yy_cp = yy_c_buf_p;
1217: yy_bp = yytext_ptr + YY_MORE_ADJ;
1218: goto yy_find_action;
1219: }
1220: break;
1221: }
1222:
1223: default:
1224: YY_FATAL_ERROR(
1225: "fatal flex scanner internal error--no action found" );
1226: } /* end of action switch */
1227: } /* end of scanning one token */
1228: } /* end of yylex */
1229:
1230: yyFlexLexer::yyFlexLexer( std::istream* arg_yyin, std::ostream* arg_yyout )
1231: {
1232: yyin = arg_yyin;
1233: yyout = arg_yyout;
1234: yy_c_buf_p = 0;
1235: yy_init = 1;
1236: yy_start = 0;
1237: yy_flex_debug = 0;
1238: yylineno = 1; // this will only get updated if %option yylineno
1239:
1240: yy_did_buffer_switch_on_eof = 0;
1241:
1242: yy_looking_for_trail_begin = 0;
1243: yy_more_flag = 0;
1244: yy_more_len = 0;
1245: yy_more_offset = yy_prev_more_offset = 0;
1246:
1247: yy_start_stack_ptr = yy_start_stack_depth = 0;
1248: yy_start_stack = 0;
1249:
1250: yy_current_buffer = 0;
1251:
1252: #ifdef YY_USES_REJECT
1253: yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2];
1254: #else
1255: yy_state_buf = 0;
1256: #endif
1257: }
1258:
1259: yyFlexLexer::~yyFlexLexer()
1260: {
1261: delete yy_state_buf;
1262: yy_delete_buffer( yy_current_buffer );
1263: }
1264:
1265: void yyFlexLexer::switch_streams( std::istream* new_in, std::ostream* new_out )
1266: {
1267: if ( new_in )
1268: {
1269: yy_delete_buffer( yy_current_buffer );
1270: yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) );
1271: }
1272:
1273: if ( new_out )
1274: yyout = new_out;
1275: }
1276:
1277: #ifdef YY_INTERACTIVE
1278: int yyFlexLexer::LexerInput( char* buf, int /* max_size */ )
1279: #else
1280: int yyFlexLexer::LexerInput( char* buf, int max_size )
1281: #endif
1282: {
1283: if ( yyin->eof() || yyin->fail() )
1284: return 0;
1285:
1286: #ifdef YY_INTERACTIVE
1287: yyin->get( buf[0] );
1288:
1289: if ( yyin->eof() )
1290: return 0;
1291:
1292: if ( yyin->bad() )
1293: return -1;
1294:
1295: return 1;
1296:
1297: #else
1298: (void) yyin->read( buf, max_size );
1299:
1300: if ( yyin->bad() )
1301: return -1;
1302: else
1303: return yyin->gcount();
1304: #endif
1305: }
1306:
1307: void yyFlexLexer::LexerOutput( const char* buf, int size )
1308: {
1309: (void) yyout->write( buf, size );
1310: }
1311:
1312: /* yy_get_next_buffer - try to read in a new buffer
1313: *
1314: * Returns a code representing an action:
1315: * EOB_ACT_LAST_MATCH -
1316: * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
1317: * EOB_ACT_END_OF_FILE - end of file
1318: */
1319:
1320: int yyFlexLexer::yy_get_next_buffer()
1321: {
1322: register char *dest = yy_current_buffer->yy_ch_buf;
1323: register char *source = yytext_ptr;
1324: register int number_to_move, i;
1325: int ret_val;
1326:
1327: if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
1328: YY_FATAL_ERROR(
1329: "fatal flex scanner internal error--end of buffer missed" );
1330:
1331: if ( yy_current_buffer->yy_fill_buffer == 0 )
1332: { /* Don't try to fill the buffer, so this is an EOF. */
1333: if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
1334: {
1335: /* We matched a single character, the EOB, so
1336: * treat this as a final EOF.
1337: */
1338: return EOB_ACT_END_OF_FILE;
1339: }
1340:
1341: else
1342: {
1343: /* We matched some text prior to the EOB, first
1344: * process it.
1345: */
1346: return EOB_ACT_LAST_MATCH;
1347: }
1348: }
1349:
1350: /* Try to read more data. */
1351:
1352: /* First move last chars to start of buffer. */
1353: number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
1354:
1355: for ( i = 0; i < number_to_move; ++i )
1356: *(dest++) = *(source++);
1357:
1358: if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
1359: /* don't do the read, it's not guaranteed to return an EOF,
1360: * just force an EOF
1361: */
1362: yy_current_buffer->yy_n_chars = yy_n_chars = 0;
1363:
1364: else
1365: {
1366: int num_to_read =
1367: yy_current_buffer->yy_buf_size - number_to_move - 1;
1368:
1369: while ( num_to_read <= 0 )
1370: { /* Not enough room in the buffer - grow it. */
1371: #ifdef YY_USES_REJECT
1372: YY_FATAL_ERROR(
1373: "input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
1374: #else
1375:
1376: /* just a shorter name for the current buffer */
1377: YY_BUFFER_STATE b = yy_current_buffer;
1378:
1379: int yy_c_buf_p_offset =
1380: (int) (yy_c_buf_p - b->yy_ch_buf);
1381:
1382: if ( b->yy_is_our_buffer )
1383: {
1384: int new_size = b->yy_buf_size * 2;
1385:
1386: if ( new_size <= 0 )
1387: b->yy_buf_size += b->yy_buf_size / 8;
1388: else
1389: b->yy_buf_size *= 2;
1390:
1391: b->yy_ch_buf = (char *)
1392: /* Include room in for 2 EOB chars. */
1393: yy_flex_realloc( (void *) b->yy_ch_buf,
1394: b->yy_buf_size + 2 );
1395: }
1396: else
1397: /* Can't grow it, we don't own it. */
1398: b->yy_ch_buf = 0;
1399:
1400: if ( ! b->yy_ch_buf )
1401: YY_FATAL_ERROR(
1402: "fatal error - scanner input buffer overflow" );
1403:
1404: yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
1405:
1406: num_to_read = yy_current_buffer->yy_buf_size -
1407: number_to_move - 1;
1408: #endif
1409: }
1410:
1411: if ( num_to_read > YY_READ_BUF_SIZE )
1412: num_to_read = YY_READ_BUF_SIZE;
1413:
1414: /* Read in more data. */
1415: YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
1416: yy_n_chars, num_to_read );
1417:
1418: yy_current_buffer->yy_n_chars = yy_n_chars;
1419: }
1420:
1421: if ( yy_n_chars == 0 )
1422: {
1423: if ( number_to_move == YY_MORE_ADJ )
1424: {
1425: ret_val = EOB_ACT_END_OF_FILE;
1426: yyrestart( yyin );
1427: }
1428:
1429: else
1430: {
1431: ret_val = EOB_ACT_LAST_MATCH;
1432: yy_current_buffer->yy_buffer_status =
1433: YY_BUFFER_EOF_PENDING;
1434: }
1435: }
1436:
1437: else
1438: ret_val = EOB_ACT_CONTINUE_SCAN;
1439:
1440: yy_n_chars += number_to_move;
1441: yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
1442: yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
1443:
1444: yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
1445:
1446: return ret_val;
1447: }
1448:
1449:
1450: /* yy_get_previous_state - get the state just before the EOB char was reached */
1451:
1452: yy_state_type yyFlexLexer::yy_get_previous_state()
1453: {
1454: register yy_state_type yy_current_state;
1455: register char *yy_cp;
1456:
1457: yy_current_state = yy_start;
1458:
1459: for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
1460: {
1461: register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
1462: if ( yy_accept[yy_current_state] )
1463: {
1464: yy_last_accepting_state = yy_current_state;
1465: yy_last_accepting_cpos = yy_cp;
1466: }
1467: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
1468: {
1469: yy_current_state = (int) yy_def[yy_current_state];
1470: if ( yy_current_state >= 159 )
1471: yy_c = yy_meta[(unsigned int) yy_c];
1472: }
1473: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
1474: }
1475:
1476: return yy_current_state;
1477: }
1478:
1479:
1480: /* yy_try_NUL_trans - try to make a transition on the NUL character
1481: *
1482: * synopsis
1483: * next_state = yy_try_NUL_trans( current_state );
1484: */
1485:
1486: yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state )
1487: {
1488: register int yy_is_jam;
1489: register char *yy_cp = yy_c_buf_p;
1490:
1491: register YY_CHAR yy_c = 1;
1492: if ( yy_accept[yy_current_state] )
1493: {
1494: yy_last_accepting_state = yy_current_state;
1495: yy_last_accepting_cpos = yy_cp;
1496: }
1497: while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
1498: {
1499: yy_current_state = (int) yy_def[yy_current_state];
1500: if ( yy_current_state >= 159 )
1501: yy_c = yy_meta[(unsigned int) yy_c];
1502: }
1503: yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
1504: yy_is_jam = (yy_current_state == 158);
1505:
1506: return yy_is_jam ? 0 : yy_current_state;
1507: }
1508:
1509:
1510: void yyFlexLexer::yyunput( int c, register char* yy_bp )
1511: {
1512: register char *yy_cp = yy_c_buf_p;
1513:
1514: /* undo effects of setting up yytext */
1515: *yy_cp = yy_hold_char;
1516:
1517: if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
1518: { /* need to shift things up to make room */
1519: /* +2 for EOB chars. */
1520: register int number_to_move = yy_n_chars + 2;
1521: register char *dest = &yy_current_buffer->yy_ch_buf[
1522: yy_current_buffer->yy_buf_size + 2];
1523: register char *source =
1524: &yy_current_buffer->yy_ch_buf[number_to_move];
1525:
1526: while ( source > yy_current_buffer->yy_ch_buf )
1527: *--dest = *--source;
1528:
1529: yy_cp += (int) (dest - source);
1530: yy_bp += (int) (dest - source);
1531: yy_current_buffer->yy_n_chars =
1532: yy_n_chars = yy_current_buffer->yy_buf_size;
1533:
1534: if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
1535: YY_FATAL_ERROR( "flex scanner push-back overflow" );
1536: }
1537:
1538: *--yy_cp = (char) c;
1539:
1540:
1541: yytext_ptr = yy_bp;
1542: yy_hold_char = *yy_cp;
1543: yy_c_buf_p = yy_cp;
1544: }
1545:
1546:
1547: int yyFlexLexer::yyinput()
1548: {
1549: int c;
1550:
1551: *yy_c_buf_p = yy_hold_char;
1552:
1553: if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
1554: {
1555: /* yy_c_buf_p now points to the character we want to return.
1556: * If this occurs *before* the EOB characters, then it's a
1557: * valid NUL; if not, then we've hit the end of the buffer.
1558: */
1559: if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
1560: /* This was really a NUL. */
1561: *yy_c_buf_p = '\0';
1562:
1563: else
1564: { /* need more input */
1565: int offset = yy_c_buf_p - yytext_ptr;
1566: ++yy_c_buf_p;
1567:
1568: switch ( yy_get_next_buffer() )
1569: {
1570: case EOB_ACT_LAST_MATCH:
1571: /* This happens because yy_g_n_b()
1572: * sees that we've accumulated a
1573: * token and flags that we need to
1574: * try matching the token before
1575: * proceeding. But for input(),
1576: * there's no matching to consider.
1577: * So convert the EOB_ACT_LAST_MATCH
1578: * to EOB_ACT_END_OF_FILE.
1579: */
1580:
1581: /* Reset buffer status. */
1582: yyrestart( yyin );
1583:
1584: /* fall through */
1585:
1586: case EOB_ACT_END_OF_FILE:
1587: {
1588: if ( yywrap() )
1589: return EOF;
1590:
1591: if ( ! yy_did_buffer_switch_on_eof )
1592: YY_NEW_FILE;
1593: #ifdef __cplusplus
1594: return yyinput();
1595: #else
1596: return input();
1597: #endif
1598: }
1599:
1600: case EOB_ACT_CONTINUE_SCAN:
1601: yy_c_buf_p = yytext_ptr + offset;
1602: break;
1603: }
1604: }
1605: }
1606:
1607: c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */
1608: *yy_c_buf_p = '\0'; /* preserve yytext */
1609: yy_hold_char = *++yy_c_buf_p;
1610:
1611:
1612: return c;
1613: }
1614:
1615: void yyFlexLexer::yyrestart( std::istream* input_file )
1616: {
1617: if ( ! yy_current_buffer )
1618: yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
1619:
1620: yy_init_buffer( yy_current_buffer, input_file );
1621: yy_load_buffer_state();
1622: }
1623:
1624:
1625: void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
1626: {
1627: if ( yy_current_buffer == new_buffer )
1628: return;
1629:
1630: if ( yy_current_buffer )
1631: {
1632: /* Flush out information for old buffer. */
1633: *yy_c_buf_p = yy_hold_char;
1634: yy_current_buffer->yy_buf_pos = yy_c_buf_p;
1635: yy_current_buffer->yy_n_chars = yy_n_chars;
1636: }
1637:
1638: yy_current_buffer = new_buffer;
1639: yy_load_buffer_state();
1640:
1641: /* We don't actually know whether we did this switch during
1642: * EOF (yywrap()) processing, but the only time this flag
1643: * is looked at is after yywrap() is called, so it's safe
1644: * to go ahead and always set it.
1645: */
1646: yy_did_buffer_switch_on_eof = 1;
1647: }
1648:
1649:
1650: void yyFlexLexer::yy_load_buffer_state()
1651: {
1652: yy_n_chars = yy_current_buffer->yy_n_chars;
1653: yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
1654: yyin = yy_current_buffer->yy_input_file;
1655: yy_hold_char = *yy_c_buf_p;
1656: }
1657:
1658:
1659: YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream* file, int size )
1660: {
1661: YY_BUFFER_STATE b;
1662:
1663: b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
1664: if ( ! b )
1665: YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
1666:
1667: b->yy_buf_size = size;
1668:
1669: /* yy_ch_buf has to be 2 characters longer than the size given because
1670: * we need to put in 2 end-of-buffer characters.
1671: */
1672: b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
1673: if ( ! b->yy_ch_buf )
1674: YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
1675:
1676: b->yy_is_our_buffer = 1;
1677:
1678: yy_init_buffer( b, file );
1679:
1680: return b;
1681: }
1682:
1683:
1684: void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b )
1685: {
1686: if ( ! b )
1687: return;
1688:
1689: if ( b == yy_current_buffer )
1690: yy_current_buffer = (YY_BUFFER_STATE) 0;
1691:
1692: if ( b->yy_is_our_buffer )
1693: yy_flex_free( (void *) b->yy_ch_buf );
1694:
1695: yy_flex_free( (void *) b );
1696: }
1697:
1698:
1699: void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, std::istream* file )
1700:
1701: {
1702: yy_flush_buffer( b );
1703:
1704: b->yy_input_file = file;
1705: b->yy_fill_buffer = 1;
1706:
1707: b->yy_is_interactive = 0;
1708: }
1709:
1710:
1711: void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b )
1712: {
1713: if ( ! b )
1714: return;
1715:
1716: b->yy_n_chars = 0;
1717:
1718: /* We always need two end-of-buffer characters. The first causes
1719: * a transition to the end-of-buffer state. The second causes
1720: * a jam in that state.
1721: */
1722: b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
1723: b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
1724:
1725: b->yy_buf_pos = &b->yy_ch_buf[0];
1726:
1727: b->yy_at_bol = 1;
1728: b->yy_buffer_status = YY_BUFFER_NEW;
1729:
1730: if ( b == yy_current_buffer )
1731: yy_load_buffer_state();
1732: }
1733:
1734:
1735: #ifndef YY_NO_SCAN_BUFFER
1736: #endif
1737:
1738:
1739: #ifndef YY_NO_SCAN_STRING
1740: #endif
1741:
1742:
1743: #ifndef YY_NO_SCAN_BYTES
1744: #endif
1745:
1746:
1747: #ifndef YY_NO_PUSH_STATE
1748: void yyFlexLexer::yy_push_state( int new_state )
1749: {
1750: if ( yy_start_stack_ptr >= yy_start_stack_depth )
1751: {
1752: yy_size_t new_size;
1753:
1754: yy_start_stack_depth += YY_START_STACK_INCR;
1755: new_size = yy_start_stack_depth * sizeof( int );
1756:
1757: if ( ! yy_start_stack )
1758: yy_start_stack = (int *) yy_flex_alloc( new_size );
1759:
1760: else
1761: yy_start_stack = (int *) yy_flex_realloc(
1762: (void *) yy_start_stack, new_size );
1763:
1764: if ( ! yy_start_stack )
1765: YY_FATAL_ERROR(
1766: "out of memory expanding start-condition stack" );
1767: }
1768:
1769: yy_start_stack[yy_start_stack_ptr++] = YY_START;
1770:
1771: BEGIN(new_state);
1772: }
1773: #endif
1774:
1775:
1776: #ifndef YY_NO_POP_STATE
1777: void yyFlexLexer::yy_pop_state()
1778: {
1779: if ( --yy_start_stack_ptr < 0 )
1780: YY_FATAL_ERROR( "start-condition stack underflow" );
1781:
1782: BEGIN(yy_start_stack[yy_start_stack_ptr]);
1783: }
1784: #endif
1785:
1786:
1787: #ifndef YY_NO_TOP_STATE
1788: int yyFlexLexer::yy_top_state()
1789: {
1790: return yy_start_stack[yy_start_stack_ptr - 1];
1791: }
1792: #endif
1793:
1794: #ifndef YY_EXIT_FAILURE
1795: #define YY_EXIT_FAILURE 2
1796: #endif
1797:
1798:
1799: void yyFlexLexer::LexerError( yyconst char msg[] )
1800: {
1801: cerr << msg << '\n';
1802: exit( YY_EXIT_FAILURE );
1803: }
1804:
1805:
1806: /* Redefine yyless() so it works in section 3 code. */
1807:
1808: #undef yyless
1809: #define yyless(n) \
1810: do \
1811: { \
1812: /* Undo effects of setting up yytext. */ \
1813: yytext[yyleng] = yy_hold_char; \
1814: yy_c_buf_p = yytext + n; \
1815: yy_hold_char = *yy_c_buf_p; \
1816: *yy_c_buf_p = '\0'; \
1817: yyleng = n; \
1818: } \
1819: while ( 0 )
1820:
1821:
1822: /* Internal utility routines. */
1823:
1824: #ifndef yytext_ptr
1825: #ifdef YY_USE_PROTOS
1826: static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
1827: #else
1828: static void yy_flex_strncpy( s1, s2, n )
1829: char *s1;
1830: yyconst char *s2;
1831: int n;
1832: #endif
1833: {
1834: register int i;
1835: for ( i = 0; i < n; ++i )
1836: s1[i] = s2[i];
1837: }
1838: #endif
1839:
1840: #ifdef YY_NEED_STRLEN
1841: #ifdef YY_USE_PROTOS
1842: static int yy_flex_strlen( yyconst char *s )
1843: #else
1844: static int yy_flex_strlen( s )
1845: yyconst char *s;
1846: #endif
1847: {
1848: register int n;
1849: for ( n = 0; s[n]; ++n )
1850: ;
1851:
1852: return n;
1853: }
1854: #endif
1855:
1856:
1857: #ifdef YY_USE_PROTOS
1858: static void *yy_flex_alloc( yy_size_t size )
1859: #else
1860: static void *yy_flex_alloc( size )
1861: yy_size_t size;
1862: #endif
1863: {
1864: return (void *) malloc( size );
1865: }
1866:
1867: #ifdef YY_USE_PROTOS
1868: static void *yy_flex_realloc( void *ptr, yy_size_t size )
1869: #else
1870: static void *yy_flex_realloc( ptr, size )
1871: void *ptr;
1872: yy_size_t size;
1873: #endif
1874: {
1875: /* The cast to (char *) in the following accommodates both
1876: * implementations that use char* generic pointers, and those
1877: * that use void* generic pointers. It works with the latter
1878: * because both ANSI C and C++ allow castless assignment from
1879: * any pointer type to void*, and deal with argument conversions
1880: * as though doing an assignment.
1881: */
1882: return (void *) realloc( (char *) ptr, size );
1883: }
1884:
1885: #ifdef YY_USE_PROTOS
1886: static void yy_flex_free( void *ptr )
1887: #else
1888: static void yy_flex_free( ptr )
1889: void *ptr;
1890: #endif
1891: {
1892: free( ptr );
1893: }
1894:
1895: #if YY_MAIN
1896: int main()
1897: {
1898: yylex();
1899: return 0;
1900: }
1901: #endif
1902:
1903: /* -------------------- additional C code -------------------- */
1904:
1905: // identify tokens representing embedded text
1906: bool isGramlexEmbed(int code)
1907: {
1908: return code == TOK_LIT_CODE;
1909: }
Start cpp section to elk/elk_grampar.cpp[1
/1
]
1: #line 18328 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26: Environment::Environment(Grammar &G)
27: : g(G),
28: prevEnv(NULL),
29: nontermDecls(),
30: errorCount(0),
31: errors(errorCount)
32: {}
33:
34: Environment::Environment(Environment &prev)
35: : g(prev.g),
36: prevEnv(&prev),
37: nontermDecls(prev.nontermDecls),
38: errorCount(-1000),
39: errors(prev.errors)
40: {}
41:
42: Environment::~Environment()
43: {}
44:
45:
46:
47: STATICDEF sm_string XASTParse::
48: constructMsg(LocString const &tok, char const *msg)
49: {
50: if (tok.validLoc()) {
51: return sm_stringc << tok.locString() << ": near " << tok
52: << ", " << msg;
53: }
54: else {
55: return sm_string(msg);
56: }
57: }
58:
59: XASTParse::XASTParse(LocString const &tok, char const *m)
60: : xBase(constructMsg(tok, m)),
61: failToken(tok),
62: message(m)
63: {}
64:
65:
66: XASTParse::XASTParse(XASTParse const &obj)
67: : xBase(obj),
68: DMEMB(failToken),
69: DMEMB(message)
70: {}
71:
72: XASTParse::~XASTParse()
73: {}
74:
75:
76:
77:
78: void astParseGrammar(Grammar &g, GrammarAST *treeTop);
79: void astParseTerminals(Environment &env, TF_terminals const &terms);
80: void astParseDDM(Environment &env, Symbol *sym,
81: ASTList<SpecFunc> const &funcs);
82: void astParseNonterm(Environment &env, TF_nonterm const *nt);
83: void astParseProduction(Environment &env, Nonterminal *nonterm,
84: ProdDecl const *prod);
85:
86:
87:
88: void astParseError(LocString const &failToken, char const *msg)
89: {
90: THROW(XASTParse(failToken, msg));
91: }
92:
93: void astParseError(char const *msg)
94: {
95: LocString ls;
96: THROW(XASTParse(ls, msg));
97: }
98:
99:
100: void astParseErrorCont(Environment &env, LocString const &failToken,
101: char const *msg)
102: {
103: XASTParse x(failToken, msg);
104: std::cout << x.why() << std::endl;
105: env.errors++;
106: }
107:
108:
109:
110:
111:
112: catch (XASTParse &x) { \
113: /* leave unchanged */ \
114: throw x; \
115: } \
116: catch (xBase &x) { \
117: /* add context */ \
118: astParseError(tok, x.why()); \
119: throw 0; /* silence warning */ \
120: }
121:
122:
123:
124:
125: void setAnnotations(GrammarAST *ast)
126: {
127:
128: FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
129: ASTSWITCH(TopForm, iter.data()) {
130: ASTCASE(TF_terminals, t) {
131: if (!ast->terms) {
132: ast->terms = t;
133: }
134: else {
135: astParseError("there is more than one 'Terminals' section");
136: }
137: }
138:
139: ASTNEXT(TF_nonterm, nt) {
140: if (!ast->firstNT) {
141: ast->firstNT = nt;
142: }
143: }
144:
145: ASTENDCASED
146: }
147: }
148:
149: if (!ast->terms) {
150: astParseError("'Terminals' specification is missing");
151: }
152: if (!ast->firstNT) {
153: astParseError("you have to have at least one nonterminal");
154: }
155: }
156:
157:
158: LocString extractActionClassName(LocString const &body)
159: {
160:
161: char const *start = body.str;
162: while (isspace(*start)) start++;
163:
164:
165: char const *p = start;
166: while (isspace(*p)) p++;
167: while (isalnum(*p) || *p=='_') p++;
168:
169:
170: return LocString(body.loc, grammarStringTable.add(sm_string(start, p-start)));
171: }
172:
173:
174:
175: void astParseOptions(Grammar &g, GrammarAST *ast)
176: {
177:
178: FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
179: ASTSWITCH(TopForm, iter.data()) {
180: ASTCASE(TF_context, c) {
181:
182:
183: g.actionClassName = extractActionClassName(c->body);
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223: g.actionClasses.deleteAll();
224: g.actionClasses.append(new LocString(c->body));
225: }
226:
227: ASTNEXT(TF_verbatim, v) {
228: if (v->isImpl) {
229: g.implVerbatim.append(new LocString(v->code));
230: }
231: else {
232: g.verbatim.append(new LocString(v->code));
233: }
234: }
235:
236: ASTNEXT(TF_option, op) {
237: LocString const &name = op->name;
238: int value = op->value;
239: bool boolVal = !!value;
240:
241: if (name.equals("useGCDefaults")) {
242: g.useGCDefaults = boolVal;
243: }
244: else if (name.equals("defaultMergeAborts")) {
245: g.defaultMergeAborts = boolVal;
246: }
247: else if (name.equals("shift_reduce_conflicts")) {
248: g.expectedSR = value;
249: }
250: else if (name.equals("reduce_reduce_conflicts")) {
251: g.expectedRR = value;
252: }
253: else if (name.equals("unreachable_nonterminals")) {
254: g.expectedUNRNonterms = value;
255: }
256: else if (name.equals("unreachable_terminals")) {
257: g.expectedUNRTerms = value;
258: }
259: else if (name.equals("lang_OCaml")) {
260:
261:
262:
263:
264:
265: astParseError(name, "The `lang_OCaml' option has been replaced with "
266: "the `-ocaml' command-line switch. Please use "
267: "that instead. (Sorry for the inconvenience.)");
268: }
269: else {
270: astParseError(name, "unknown option name");
271: }
272: }
273:
274: ASTENDCASED
275: }
276: }
277: }
278:
279:
280:
281: void astParseGrammar(Grammar &g, GrammarAST *ast)
282: {
283:
284: Environment env(g);
285:
286:
287: astParseTerminals(env, *(ast->terms));
288:
289:
290:
291: {
292: FOREACH_ASTLIST(TopForm, ast->forms, iter) {
293: if (!iter.data()->isTF_nonterm()) continue;
294: TF_nonterm const *nt = iter.data()->asTF_nontermC();
295:
296:
297: if (env.nontermDecls.isMapped(nt->name)) {
298: astParseError(nt->name, "nonterminal already declared");
299: }
300:
301:
302: env.g.getOrMakeNonterminal(nt->name);
303:
304:
305: env.nontermDecls.add(nt->name, const_cast<TF_nonterm*>(nt));
306: }
307: }
308:
309:
310: {
311: FOREACH_ASTLIST(TopForm, ast->forms, iter) {
312: if (!iter.data()->isTF_nonterm()) continue;
313: TF_nonterm const *nt = iter.data()->asTF_nontermC();
314:
315:
316:
317: Environment newEnv(env);
318:
319:
320: astParseNonterm(newEnv, nt);
321: }
322: }
323:
324: if (!g.actionClassName.str) {
325: astParseError("you must specify a context class; for example:\n"
326: " context_class Context : public UserActions {};\n");
327: }
328:
329: if (env.errors) {
330: astParseError("halting due to previously reported errors");
331: }
332: }
333:
334:
335:
336: Terminal *astParseToken(Environment &env, LocString const &name)
337: {
338: Terminal *t = env.g.findTerminal(name);
339: if (!t) {
340: astParseError(name, "undeclared token");
341: }
342: return t;
343: }
344:
345:
346:
347:
348: class InitFalseBool {
349: public:
350: bool b;
351: public:
352: InitFalseBool() : b(false) {}
353: };
354:
355:
356: void astParseTerminals(Environment &env, TF_terminals const &terms)
357: {
358:
359: {
360: int maxCode = 0;
361: GrowArray<InitFalseBool> codeHasTerm(200);
362: FOREACH_ASTLIST(TermDecl, terms.decls, iter) {
363: TermDecl const &term = *(iter.data());
364:
365:
366: int code = term.code;
367: StringRef name = term.name;
368: trace("grampar") << "token: code=" << code
369: << ", name=" << name << std::endl;
370:
371: if (!env.g.declareToken(term.name, code, term.alias)) {
372: astParseError(term.name, "token already declared");
373: }
374:
375:
376: maxCode = max(code, maxCode);
377: codeHasTerm.ensureIndexDoubler(code);
378: codeHasTerm[code].b = true;
379: }
380:
381:
382:
383: SourceLoc dummyLoc(HERE_SOURCELOC);
384: for (int i=0; i<maxCode; i++) {
385: if (!codeHasTerm[i].b) {
386: LocString dummy(dummyLoc, grammarStringTable.add(
387: sm_stringc << "__dummy_filler_token" << i));
388: env.g.declareToken(dummy, i, dummy);
389: }
390: }
391: }
392:
393:
394: {
395: FOREACH_ASTLIST(TermType, terms.types, iter) {
396: TermType const &type = *(iter.data());
397: trace("grampar") << "token type: name=" << type.name
398: << ", type=" << type.type << std::endl;
399:
400:
401: Terminal *t = astParseToken(env, type.name);
402: if (t->type) {
403: astParseError(type.name, "this token already has a type");
404: }
405:
406:
407: t->type = type.type;
408:
409:
410: astParseDDM(env, t, type.funcs);
411: }
412: }
413:
414:
415: {
416: FOREACH_ASTLIST(PrecSpec, terms.prec, iter) {
417: PrecSpec const &spec = *(iter.data());
418:
419: FOREACH_ASTLIST(LocString, spec.tokens, tokIter) {
420: LocString const &tokName = *(tokIter.data());
421: trace("grampar") << "prec: " << toString(spec.kind)
422: << " " << spec.prec << " " << tokName;
423:
424:
425: Terminal *t = astParseToken(env, tokName);
426: if (t->precedence) {
427: astParseError(tokName,
428: sm_stringc << tokName << " already has a specified precedence");
429: }
430:
431: if (spec.prec == 0) {
432:
433: astParseError(tokName,
434: "you can't use 0 as a precedence level, because that value "
435: "is used internally to mean something else");
436: }
437:
438:
439: t->precedence = spec.prec;
440: t->associativity = spec.kind;
441: }
442: }
443: }
444: }
445:
446:
447: void astParseDDM(Environment &env, Symbol *sym,
448: ASTList<SpecFunc> const &funcs)
449: {
450: Terminal *term = sym->ifTerminal();
451: Nonterminal *nonterm = sym->ifNonterminal();
452:
453: FOREACH_ASTLIST(SpecFunc, funcs, iter) {
454: SpecFunc const &func = *(iter.data());
455: int numFormals = func.formals.count();
456:
457:
458:
459: if (func.name.equals("dup")) {
460: if (numFormals != 1) {
461: astParseError(func.name, "'dup' function must have one formal parameter");
462: }
463: sym->dupParam = func.nthFormal(0);
464: sym->dupCode = func.code;
465: }
466:
467: else if (func.name.equals("del")) {
468: if (numFormals == 0) {
469:
470:
471: sym->delParam = NULL;
472: }
473: else if (numFormals == 1) {
474: sym->delParam = func.nthFormal(0);
475: }
476: else {
477: astParseError(func.name, "'del' function must have either zero or one formal parameters");
478: }
479: sym->delCode = func.code;
480: }
481:
482: else if (func.name.equals("merge")) {
483: if (nonterm) {
484: if (numFormals != 2) {
485: astParseError(func.name, "'merge' function must have two formal parameters");
486: }
487: nonterm->mergeParam1 = func.nthFormal(0);
488: nonterm->mergeParam2 = func.nthFormal(1);
489: nonterm->mergeCode = func.code;
490: }
491: else {
492: astParseError(func.name, "'merge' can only be applied to nonterminals");
493: }
494: }
495:
496: else if (func.name.equals("keep")) {
497: if (nonterm) {
498: if (numFormals != 1) {
499: astParseError(func.name, "'keep' function must have one formal parameter");
500: }
501: nonterm->keepParam = func.nthFormal(0);
502: nonterm->keepCode = func.code;
503: }
504: else {
505: astParseError(func.name, "'keep' can only be applied to nonterminals");
506: }
507: }
508:
509: else if (func.name.equals("classify")) {
510: if (term) {
511: if (numFormals != 1) {
512: astParseError(func.name, "'classify' function must have one formal parameter");
513: }
514: term->classifyParam = func.nthFormal(0);
515: term->classifyCode = func.code;
516: }
517: else {
518: astParseError(func.name, "'classify' can only be applied to terminals");
519: }
520: }
521:
522: else if (func.name.equals("maximal")) {
523: if (nonterm) {
524: nonterm->maximal = true;
525: }
526: else {
527: astParseError(func.name, "'maximal' can only be applied to nonterminals");
528: }
529: }
530:
531: else {
532: astParseError(func.name,
533: sm_stringc << "unrecognized spec function \"" << func.name << "\"");
534: }
535: }
536: }
537:
538:
539: void addDefaultTypesActions(Grammar &g, GrammarAST *ast)
540: {
541:
542: StringRef defaultType, defaultAction;
543: if (g.targetLang.equals("OCaml")) {
544: defaultType = grammarStringTable.add("unit");
545: defaultAction = grammarStringTable.add("()");
546: }
547: else /*C*/ {
548: defaultType = grammarStringTable.add("void");
549: defaultAction = grammarStringTable.add("return;");
550: }
551:
552:
553:
554:
555: bool forceDefaults = tracingSys("forceDefaultActions");
556:
557:
558: FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
559: if (!iter.data()->isTF_nonterm()) { continue; }
560: TF_nonterm *nt = iter.data()->asTF_nonterm();
561:
562:
563: if (forceDefaults || nt->type.isNull()) {
564: nt->type.str = defaultType;
565: }
566:
567:
568: FOREACH_ASTLIST_NC(ProdDecl, nt->productions, iter2) {
569: ProdDecl *pd = iter2.data();
570:
571:
572: if (forceDefaults || pd->actionCode.isNull()) {
573: pd->actionCode.str = defaultAction;
574: }
575:
576: if (forceDefaults) {
577:
578:
579:
580: StringRef empty = grammarStringTable.add("");
581: FOREACH_ASTLIST_NC(RHSElt, pd->rhs, iter3) {
582: ASTSWITCH(RHSElt, iter3.data()) {
583: ASTCASE(RH_name, n)
584: n->tag.str = empty;
585:
586: ASTNEXT(RH_sm_string, s)
587: s->tag.str = empty;
588:
589: ASTENDCASED
590: }
591: }
592: }
593: }
594: }
595: }
596:
597:
598: void synthesizeStartRule(Grammar &g, GrammarAST *ast)
599: {
600:
601: TF_nonterm *firstNT = ast->firstNT;
602:
603:
604: TermDecl const *eof = NULL;
605: FOREACH_ASTLIST(TermDecl, ast->terms->decls, iter) {
606: if (iter.data()->code == 0) {
607: eof = iter.data();
608: break;
609: }
610: }
611: if (!eof) {
612: astParseError("you have to have an EOF token, with code 0");
613: }
614:
615:
616: RHSElt *rhs1 = new RH_name(LIT_STR("top").clone(), firstNT->name.clone());
617: RHSElt *rhs2 = new RH_name(LIT_STR("").clone(), eof->name.clone());
618: ASTList<RHSElt> *rhs = new ASTList<RHSElt>();
619: rhs->append(rhs1);
620: rhs->append(rhs2);
621: char const *action = g.targetLang.equals("OCaml")? " top " :
622: firstNT->type.equals("void")? " return; " :
623: " return top; ";
624: ProdDecl *startProd = new ProdDecl(rhs, LIT_STR(action).clone());
625:
626:
627: TF_nonterm *earlyStartNT
628: = new TF_nonterm(
629: LIT_STR("__EarlyStartSymbol").clone(),
630: firstNT->type.clone(),
631: NULL,
632: new ASTList<ProdDecl>(startProd),
633: NULL
634: );
635:
636:
637: ast->forms.prepend(earlyStartNT);
638: }
639:
640:
641: void astParseNonterm(Environment &env, TF_nonterm const *nt)
642: {
643: LocString const &name = nt->name;
644:
645:
646: Nonterminal *nonterm = env.g.findNonterminal(name);
647: xassert(nonterm);
648:
649: nonterm->type = nt->type;
650:
651:
652: FOREACH_ASTLIST(ProdDecl, nt->productions, iter) {
653: astParseProduction(env, nonterm, iter.data());
654: }
655:
656:
657: astParseDDM(env, nonterm, nt->funcs);
658:
659:
660: {
661: FOREACH_ASTLIST(LocString, nt->subsets, iter) {
662: LocString const *ls = iter.data();
663: Nonterminal *sub = env.g.findNonterminal(*ls);
664: if (!sub) {
665: astParseError(*ls, "nonexistent nonterminal");
666: }
667:
668:
669:
670:
671: nonterm->subsets.prepend(sub);
672: }
673: }
674: }
675:
676:
677: void astParseProduction(Environment &env, Nonterminal *nonterm,
678: ProdDecl const *prodDecl)
679: {
680:
681: bool synthesizedStart = nonterm->name.equals("__EarlyStartSymbol");
682:
683:
684: Production *prod = new Production(nonterm, "this");
685:
686:
687: prod->action = prodDecl->actionCode;
688:
689:
690: FOREACH_ASTLIST(RHSElt, prodDecl->rhs, iter) {
691: RHSElt const *n = iter.data();
692: LocString symName;
693: LocString symTag;
694: bool isString = false;
695: bool isPrec = false;
696:
697:
698: ASTSWITCHC(RHSElt, n) {
699: ASTCASEC(RH_name, tname) {
700: symName = tname->name;
701: symTag = tname->tag;
702: }
703:
704: ASTNEXTC(RH_sm_string, ts) {
705: symName = ts->str;
706: symTag = ts->tag;
707: isString = true;
708: }
709:
710: ASTNEXTC(RH_prec, p) {
711:
712: prod->precedence = astParseToken(env, p->tokName)->precedence;
713:
714:
715: iter.adv();
716: if (!iter.isDone()) {
717: astParseError(p->tokName,
718: "precedence spec must be last thing in a production "
719: "(before the action code)");
720: }
721: isPrec = true;
722: }
723:
724: ASTENDCASECD
725: }
726:
727: if (isPrec) {
728: break;
729: }
730:
731:
732: Terminal *term = env.g.findTerminal(symName);
733: Nonterminal *nonterm = env.g.findNonterminal(symName);
734: xassert(!( term && nonterm ));
735:
736:
737: if (isString && !term) {
738: astParseError(symName, "terminals must be declared");
739: }
740:
741: if (!term && !nonterm) {
742: astParseErrorCont(env, symName, "undeclared symbol");
743:
744:
745: nonterm = env.g.getOrMakeNonterminal(symName);
746: }
747:
748: if (term && term->termIndex==0 && !synthesizedStart) {
749: astParseError(symName, "you cannot use the EOF token in your rules");
750: }
751:
752: if (symTag.equals("loc")) {
753:
754:
755: astParseErrorCont(env, symTag, "cannot use \"loc\" as a tag");
756: }
757:
758:
759:
760:
761: if (term) {
762: prod->precedence = term->precedence;
763: }
764:
765:
766: Symbol *s;
767: if (nonterm) {
768: s = nonterm;
769: }
770: else {
771: s = term;
772: }
773:
774: if (s->isEmptyString) {
775:
776:
777: }
778: else {
779:
780: prod->append(s, symTag);
781: }
782: }
783:
784:
785:
786:
787:
788:
789:
790: env.g.addProduction(prod);
791: }
792:
793:
794:
795:
796: int grampar_yylex(YYSTYPE *lvalp, void *parseParam)
797: {
798: ParseParams *par = (ParseParams*)parseParam;
799: GrammarLexer &lexer = par->lexer;
800:
801: int code = lexer.yylexInc();
802:
803: try {
804:
805:
806:
807: switch (code) {
808: case TOK_INTEGER:
809: lvalp->num = lexer.integerLiteral;
810: break;
811:
812: case TOK_STRING:
813: lvalp->str = new LocString(lexer.curLoc(), lexer.sm_stringLiteral);
814: break;
815:
816: case TOK_NAME:
817: lvalp->str = new LocString(lexer.curLoc(), lexer.curToken());
818: break;
819:
820: case TOK_LIT_CODE:
821: lvalp->str = new LocString(lexer.curLoc(), lexer.curFuncBody());
822: break;
823:
824: default:
825: lvalp->str = NULL;
826: }
827: }
828: catch (xBase &x) {
829:
830: std::cout << lexer.curLocStr() << ": " << x << std::endl;
831:
832:
833: return grampar_yylex(lvalp, parseParam);
834: }
835:
836: return code;
837: }
838:
839:
840: void grampar_yyerror(char const *message, void *parseParam)
841: {
842: ParseParams *par = (ParseParams*)parseParam;
843: std::cout << par->lexer.curLocStr() << ": " << message << std::endl;
844: }
845:
846:
847:
848: void mergeContext(GrammarAST *base, TF_context * /*owner*/ ext)
849: {
850:
851:
852: base->forms.append(ext);
853:
854: #if 0
855:
856: TF_context *baseContext = NULL;
857: FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
858: if (iter.data()->isTF_context()) {
859: baseContext = iter.data()->asTF_context();
860: break;
861: }
862: }
863:
864: if (!baseContext) {
865:
866: base->forms.append(ext);
867: }
868:
869: else if (baseContext->name.str == ext->name.str) {
870:
871:
872:
873: astParseError(ext->name, "context append not implemented");
874: }
875:
876: else {
877:
878: base->forms.removeItem(baseContext);
879: delete baseContext;
880: base->forms.append(ext);
881: }
882: #endif
883: }
884:
885:
886: void mergeOption(GrammarAST *base, TF_option * /*owner*/ ext)
887: {
888:
889: FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
890: if (!iter.data()->isTF_option()) continue;
891: TF_option *op = iter.data()->asTF_option();
892:
893: if (op->name.str == ext->name.str) {
894:
895: op->value = ext->value;
896: delete ext;
897: return;
898: }
899: }
900:
901:
902: base->forms.append(ext);
903: }
904:
905:
906: void mergeTerminals(GrammarAST *base, TF_terminals * /*owner*/ ext)
907: {
908: FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
909: if (iter.data()->isTF_terminals()) {
910: TF_terminals *t = iter.data()->asTF_terminals();
911:
912:
913:
914:
915: t->decls.concat(ext->decls);
916:
917:
918:
919:
920: t->types.concat(ext->types);
921: t->prec.concat(ext->prec);
922:
923: delete ext;
924: return;
925: }
926: }
927:
928:
929: base->forms.append(ext);
930: }
931:
932:
933: void mergeSpecFunc(TF_nonterm *base, SpecFunc * /*owner*/ ext)
934: {
935:
936: FOREACH_ASTLIST_NC(SpecFunc, base->funcs, iter) {
937: SpecFunc *f = iter.data();
938: if (f->name.str == ext->name) {
939:
940: base->funcs.removeItem(f);
941: delete f;
942: break;
943: }
944: }
945:
946:
947: base->funcs.append(ext);
948: }
949:
950:
951: bool equalRHSElt(RHSElt const *elt1, RHSElt const *elt2)
952: {
953: if (elt1->kind() != elt2->kind()) {
954: return false;
955: }
956:
957:
958:
959:
960: if (elt1->isRH_name()) {
961: return elt1->asRH_nameC()->name.str == elt2->asRH_nameC()->name.str;
962: }
963: if (elt1->isRH_sm_string()) {
964: return elt1->asRH_sm_stringC()->str.str == elt2->asRH_sm_stringC()->str.str;
965: }
966: if (elt1->isRH_prec()) {
967:
968: return elt1->asRH_precC()->tokName.str == elt2->asRH_precC()->tokName.str;
969: }
970:
971: xfailure("unknown RHSElt kind");
972: return false;
973: }
974:
975:
976: bool equalRHS(ProdDecl const *prod1, ProdDecl const *prod2)
977: {
978: if (prod1->rhs.count() != prod2->rhs.count()) {
979: return false;
980: }
981:
982: for (ASTListIter<RHSElt> iter1(prod1->rhs), iter2(prod2->rhs);
983: !iter1.isDone(); iter1.adv(), iter2.adv()) {
984: if (!equalRHSElt(iter1.data(), iter2.data())) {
985: return false;
986: }
987: }
988: return true;
989: }
990:
991:
992: void mergeProduction(TF_nonterm *base, ProdDecl *ext)
993: {
994:
995: FOREACH_ASTLIST_NC(ProdDecl, base->productions, iter) {
996: ProdDecl *prod = iter.data();
997:
998:
999: if (equalRHS(prod, ext)) {
1000:
1001: base->productions.removeItem(prod);
1002: delete prod;
1003: break;
1004: }
1005: }
1006:
1007:
1008: base->productions.append(ext);
1009: }
1010:
1011:
1012: void mergeNonterminal(GrammarAST *base, TF_nonterm * /*owner*/ ext)
1013: {
1014:
1015: TF_nonterm *exist = NULL;
1016: FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
1017: if (iter.data()->isTF_nonterm() &&
1018: iter.data()->asTF_nonterm()->name.str == ext->name) {
1019: exist = iter.data()->asTF_nonterm();
1020: }
1021: }
1022:
1023: if (!exist) {
1024:
1025: base->forms.append(ext);
1026: return;
1027: }
1028:
1029:
1030: if (exist->type.str != ext->type) {
1031: astParseError(ext->type, "cannot redefine the type of a nonterminal");
1032: }
1033:
1034:
1035: while (ext->funcs.isNotEmpty()) {
1036: mergeSpecFunc(exist, ext->funcs.removeFirst());
1037: }
1038:
1039:
1040: while (ext->productions.isNotEmpty()) {
1041: mergeProduction(exist, ext->productions.removeFirst());
1042: }
1043:
1044: delete ext;
1045: }
1046:
1047:
1048: void mergeGrammar(GrammarAST *base, GrammarAST *ext)
1049: {
1050:
1051:
1052:
1053: while (ext->forms.isNotEmpty()) {
1054: TopForm *form = ext->forms.removeFirst();
1055:
1056: ASTSWITCH(TopForm, form) {
1057: ASTCASE(TF_context, c) {
1058: mergeContext(base, c);
1059: }
1060:
1061: ASTNEXT(TF_verbatim, v) {
1062:
1063: base->forms.append(v);
1064: }
1065:
1066: ASTNEXT(TF_option, op) {
1067: mergeOption(base, op);
1068: }
1069:
1070: ASTNEXT(TF_terminals, t) {
1071: mergeTerminals(base, t);
1072: }
1073:
1074: ASTNEXT(TF_nonterm, n) {
1075: mergeNonterminal(base, n);
1076: }
1077:
1078: ASTDEFAULT {
1079: xfailure("doh");
1080: }
1081:
1082: ASTENDCASE
1083: }
1084: }
1085: }
1086:
1087:
1088:
1089: bool isGramlexEmbed(int code);
1090:
1091: GrammarAST *parseGrammarFile(char const *fname, bool useML)
1092: {
1093: #ifndef NDEBUG
1094: if (tracingSys("yydebug")) {
1095: yydebug = true;
1096: }
1097: #endif
1098:
1099:
1100: Owner<std::ifstream> in;
1101: if (fname == NULL) {
1102: fname = "<stdin>";
1103: }
1104: else {
1105: in = new std::ifstream(fname);
1106: if (!*in) {
1107: xsyserror("open", sm_stringc << "error opening input file " << fname);
1108: }
1109: }
1110:
1111:
1112: EmbeddedLang *embed = NULL;
1113: if (useML) {
1114: embed = new MLSubstrate;
1115: }
1116:
1117:
1118: GrammarLexer lexer(isGramlexEmbed,
1119: grammarStringTable,
1120: fname,
1121: in.xfr(),
1122: embed);
1123: if (embed) {
1124:
1125: embed->err = &lexer.altReporter;
1126: }
1127:
1128: ParseParams params(lexer);
1129:
1130: traceProgress() << "parsing grammar source: " << fname << std::endl;
1131: int retval = grampar_yyparse(¶ms);
1132: if (retval==0 && lexer.errors==0) {
1133: GrammarAST *ret = params.treeTop;
1134:
1135: if (tracingSys("printGrammarAST")) {
1136:
1137: std::cout << "AST:\n";
1138: ret->debugPrint(std::cout, 2);
1139: }
1140:
1141: return ret;
1142: }
1143: else {
1144: xbase("parsing finished with an error");
1145: return NULL;
1146: }
1147: }
1148:
1149:
1150: void parseGrammarAST(Grammar &g, GrammarAST *treeTop)
1151: {
1152: setAnnotations(treeTop);
1153:
1154:
1155:
1156: astParseOptions(g, treeTop);
1157:
1158:
1159: addDefaultTypesActions(g, treeTop);
1160:
1161:
1162: synthesizeStartRule(g, treeTop);
1163:
1164:
1165: traceProgress() << "parsing grammar AST..\n";
1166: astParseGrammar(g, treeTop);
1167:
1168:
1169:
1170: traceProgress() << "beginning grammar analysis..\n";
1171: g.checkWellFormed();
1172: }
1173:
1174:
1175: void readGrammarFile(Grammar &g, char const *fname)
1176: {
1177:
1178: Owner<GrammarAST> treeTop(parseGrammarFile(fname, false /*useML*/));
1179:
1180: parseGrammarAST(g, treeTop);
1181:
1182: treeTop.del();
1183:
1184:
1185:
1186:
1187:
1188: }
1189:
1190:
1191:
1192:
1193:
1194:
1195:
1196:
1197: int main(int argc, char **argv)
1198: {
1199: if (argc < 2) {
1200: std::cout << "usage: " << argv[0] << " [-tr flags] filename.gr\n";
1201: std::cout << " interesting trace flags:\n";
1202: std::cout << " keep-tmp do not delete the temporary files\n";
1203:
1204: return 0;
1205: }
1206:
1207: traceAddSys("progress");
1208: TRACE_ARGS();
1209:
1210: bool printCode = true;
1211:
1212:
1213: Grammar g1;
1214: readGrammarFile(g1, argv[1]);
1215:
1216:
1217: char const g1Fname[] = "grammar.g1.tmp";
1218: traceProgress() << "printing initial grammar to " << g1Fname << "\n";
1219: {
1220: std::ofstream out(g1Fname);
1221: g1.printSymbolTypes(out);
1222: g1.printProductions(out, printCode);
1223: }
1224:
1225:
1226: system("cat grammar.g1.tmp");
1227:
1228:
1229:
1230: flattenStrTable = &grammarStringTable;
1231:
1232:
1233: char const binFname[] = "grammar.bin.tmp";
1234: traceProgress() << "writing initial grammar to " << binFname << "\n";
1235: {
1236: BFlatten flat(binFname, false /*reading*/);
1237: g1.xfer(flat);
1238: }
1239:
1240:
1241: traceProgress() << "reading grammar from " << binFname << "\n";
1242: Grammar g2;
1243: {
1244: BFlatten flat(binFname, true /*reading*/);
1245: g2.xfer(flat);
1246: }
1247:
1248:
1249: char const g2Fname[] = "grammar.g2.tmp";
1250: traceProgress() << "printing just-read grammar to " << g2Fname << "\n";
1251: {
1252: std::ofstream out(g2Fname);
1253: g2.printSymbolTypes(out);
1254: g2.printProductions(out, printCode);
1255: }
1256:
1257:
1258: int result = system(sm_stringc << "diff " << g1Fname << " " << g2Fname);
1259: if (result != 0) {
1260: std::cout << "the two ascii representations differ!!\n";
1261: return 4;
1262: }
1263:
1264:
1265: if (!tracingSys("keep-tmp")) {
1266: remove(g1Fname);
1267: remove(g2Fname);
1268: remove(binFname);
1269: }
1270:
1271: std::cout << "successfully parsed, printed, wrote, and read a grammar!\n";
1272: return 0;
1273: }
1274:
1275:
Start cpp section to elk/elk_grampar.tab.cpp[1
/1
]
1: #line 19604 "./lpsrc/elk.pak"
2: /* A Bison parser, made from grampar.y
3: by GNU bison 1.35. */ /* tweak */
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67: new LocString(SL_UNKNOWN, /* unknown location */ \
68: PARAM->lexer.strtable.add(str))
69:
70:
71:
72: new LocString(SL_UNKNOWN, NULL)
73:
74:
75:
76:
77: new LocString(otherLoc->loc, PARAM->lexer.strtable.add(str))
78:
79:
80: AssocKind whichKind(LocString * /*owner*/ kind);
81:
82:
83:
84: typedef union YYSTYPE {
85: int num;
86: LocString *str;
87:
88: ASTList<TopForm> *topFormList;
89: TopForm *topForm;
90:
91: ASTList<TermDecl> *termDecls;
92: TermDecl *termDecl;
93: ASTList<TermType> *termTypes;
94: TermType *termType;
95: ASTList<PrecSpec> *precSpecs;
96:
97: ASTList<SpecFunc> *specFuncs;
98: SpecFunc *specFunc;
99: ASTList<LocString> *sm_stringList;
100:
101: ASTList<ProdDecl> *prodDecls;
102: ProdDecl *prodDecl;
103: ASTList<RHSElt> *rhsList;
104: RHSElt *rhsElt;
105: } yystype;
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119: /* YYTRANSLATE(YYLEX) -- Bison token number corresponding to YYLEX. */
120:
121:
122: /* YYTRANSLATE[YYLEX] -- Bison token number corresponding to YYLEX. */
123: static const char yytranslate[] =
124: {
125: 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
145: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
147: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
148: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
149: 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
150: 2, 2, 2, 2, 2, 2, 1, 3, 4, 5,
151: 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
152: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
153: };
154:
155:
156: static const short yyprhs[] =
157: {
158: 0, 0, 2, 3, 6, 8, 10, 12, 14, 16,
159: 20, 23, 26, 30, 35, 42, 43, 46, 51, 57,
160: 59, 60, 61, 64, 69, 76, 77, 82, 83, 89,
161: 90, 93, 95, 97, 98, 101, 108, 109, 111, 113,
162: 117, 122, 131, 132, 135, 139, 141, 143, 144, 147,
163: 149, 153, 155, 159, 164, 165
164: };
165: static const short yyrhs[] =
166: {
167: 27, 0, 0, 27, 28, 0, 29, 0, 30, 0,
168: 31, 0, 32, 0, 46, 0, 24, 6, 10, 0,
169: 19, 6, 0, 20, 6, 0, 22, 4, 10, 0,
170: 22, 4, 3, 10, 0, 15, 7, 33, 36, 38,
171: 8, 0, 0, 33, 34, 0, 3, 9, 4, 10,
172: 0, 3, 9, 4, 5, 10, 0, 6, 0, 0,
173: 0, 36, 37, 0, 16, 35, 4, 10, 0, 16,
174: 35, 4, 7, 42, 8, 0, 0, 21, 7, 39,
175: 8, 0, 0, 39, 4, 3, 40, 10, 0, 0,
176: 40, 41, 0, 4, 0, 5, 0, 0, 42, 43,
177: 0, 18, 4, 12, 44, 13, 6, 0, 0, 45,
178: 0, 4, 0, 45, 14, 4, 0, 17, 35, 4,
179: 48, 0, 17, 35, 4, 7, 42, 47, 52, 8,
180: 0, 0, 47, 48, 0, 11, 50, 49, 0, 6,
181: 0, 10, 0, 0, 50, 51, 0, 4, 0, 4,
182: 9, 4, 0, 5, 0, 4, 9, 5, 0, 21,
183: 12, 41, 13, 0, 0, 25, 45, 10, 0
184: };
185:
186:
187:
188:
189: /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
190: static const short yyrline[] =
191: {
192: 0, 158, 163, 164, 168, 169, 170, 171, 172, 176,
193: 181, 182, 187, 188, 199, 204, 205, 213, 215, 220,
194: 221, 225, 226, 230, 232, 237, 238, 242, 244, 249,
195: 250, 254, 255, 261, 262, 266, 271, 272, 276, 277,
196: 288, 291, 296, 297, 301, 305, 306, 310, 311, 320,
197: 322, 324, 326, 328, 333, 334
198: };
199:
200:
201:
202:
203:
204: /* YYTNAME[TOKEN_NUM] -- String name of the token TOKEN_NUM. */
205: static const char *const yytname[] =
206: {
207: "$", "error", "$undefined.", "TOK_INTEGER", "TOK_NAME", "TOK_STRING",
208: "TOK_LIT_CODE", "\"{\"", "\"}\"", "\":\"", "\";\"", "\"->\"", "\"(\"",
209: "\")\"", "\",\"", "\"terminals\"", "\"token\"", "\"nonterm\"",
210: "\"fun\"", "\"verbatim\"", "\"impl_verbatim\"", "\"precedence\"",
211: "\"option\"", "\"expect\"", "\"context_class\"", "\"subsets\"",
212: "StartSymbol", "TopFormList", "TopForm", "ContextClass", "Verbatim",
213: "Option", "Terminals", "TermDecls", "TerminalDecl", "Type", "TermTypes",
214: "TermType", "Precedence", "PrecSpecs", "NameOrStringList",
215: "NameOrString", "SpecFuncs", "SpecFunc", "FormalsOpt", "Formals",
216: "Nonterminal", "Productions", "Production", "Action", "RHS", "RHSElt",
217: "Subsets", 0
218: };
219:
220:
221: /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
222: static const short yyr1[] =
223: {
224: 0, 26, 27, 27, 28, 28, 28, 28, 28, 29,
225: 30, 30, 31, 31, 32, 33, 33, 34, 34, 35,
226: 35, 36, 36, 37, 37, 38, 38, 39, 39, 40,
227: 40, 41, 41, 42, 42, 43, 44, 44, 45, 45,
228: 46, 46, 47, 47, 48, 49, 49, 50, 50, 51,
229: 51, 51, 51, 51, 52, 52
230: };
231:
232: /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
233: static const short yyr2[] =
234: {
235: 0, 1, 0, 2, 1, 1, 1, 1, 1, 3,
236: 2, 2, 3, 4, 6, 0, 2, 4, 5, 1,
237: 0, 0, 2, 4, 6, 0, 4, 0, 5, 0,
238: 2, 1, 1, 0, 2, 6, 0, 1, 1, 3,
239: 4, 8, 0, 2, 3, 1, 1, 0, 2, 1,
240: 3, 1, 3, 4, 0, 3
241: };
242:
243: /* YYDEFACT[S] -- default rule to reduce with in state S when YYTABLE
244: doesn't specify something else to do. Zero means the default is an
245: error. */
246: static const short yydefact[] =
247: {
248: 2, 1, 0, 20, 0, 0, 0, 0, 3, 4,
249: 5, 6, 7, 8, 15, 19, 0, 10, 11, 0,
250: 0, 21, 0, 0, 12, 9, 0, 16, 25, 33,
251: 47, 40, 13, 0, 20, 0, 22, 0, 42, 0,
252: 0, 0, 27, 14, 0, 34, 54, 49, 51, 45,
253: 46, 0, 44, 48, 0, 17, 0, 0, 0, 0,
254: 43, 0, 0, 0, 18, 33, 23, 0, 26, 36,
255: 38, 0, 41, 50, 52, 31, 32, 0, 0, 29,
256: 0, 37, 55, 0, 53, 24, 0, 0, 39, 28,
257: 30, 35, 0, 0, 0
258: };
259:
260: static const short yydefgoto[] =
261: {
262: 92, 1, 8, 9, 10, 11, 12, 21, 27, 16,
263: 28, 36, 37, 57, 86, 77, 38, 45, 80, 71,
264: 13, 46, 31, 52, 39, 53, 61
265: };
266:
267: static const short yypact[] =
268: {
269: -32768, -10, 4, 33, 34, 35, 38, 37,-32768,-32768,
270: -32768,-32768,-32768,-32768,-32768,-32768, 40,-32768,-32768, 5,
271: 13, 42, 19, 28,-32768,-32768, 39,-32768, 0,-32768,
272: -32768,-32768,-32768, 43, 33, 44,-32768, 41, 36, -4,
273: 17, 46,-32768,-32768, 48,-32768, -7, 47,-32768,-32768,
274: -32768, 45,-32768,-32768, 49,-32768, 22, 20, 50, 51,
275: -32768, 52, 29, 32,-32768,-32768,-32768, 55,-32768, 51,
276: -32768, 21,-32768,-32768,-32768,-32768,-32768, 53, -5,-32768,
277: 54, 56,-32768, 57,-32768,-32768, 15, 58,-32768,-32768,
278: -32768,-32768, 63, 65,-32768
279: };
280:
281: static const short yypgoto[] =
282: {
283: -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, 12,
284: -32768,-32768,-32768,-32768,-32768, -33, 3,-32768,-32768, 2,
285: -32768,-32768, 23,-32768,-32768,-32768,-32768
286: };
287:
288:
289:
290:
291:
292: static const short yytable[] =
293: {
294: 47, 48, 49, 85, 30, 2, 50, 3, 23, 4,
295: 5, 14, 6, 44, 7, 24, 34, 51, 59, 75,
296: 76, 35, 54, 25, 67, 89, 29, 55, 68, 65,
297: 30, 82, 66, 73, 74, 83, 75, 76, 32, 15,
298: 17, 18, 19, 20, 22, 26, 41, 40, 33, 43,
299: 56, 42, 58, 90, 44, 70, 62, 63, 79, 64,
300: 72, 88, 69, 93, 91, 94, 84, 87, 78, 60,
301: 83, 81
302: };
303:
304: static const short yycheck[] =
305: {
306: 4, 5, 6, 8, 11, 15, 10, 17, 3, 19,
307: 20, 7, 22, 18, 24, 10, 16, 21, 25, 4,
308: 5, 21, 5, 10, 4, 10, 7, 10, 8, 7,
309: 11, 10, 10, 4, 5, 14, 4, 5, 10, 6,
310: 6, 6, 4, 6, 4, 3, 34, 4, 9, 8,
311: 4, 7, 4, 86, 18, 4, 9, 12, 3, 10,
312: 8, 4, 12, 0, 6, 0, 13, 13, 65, 46,
313: 14, 69
314: };
315:
316:
317: /* -*-C-*- Note some compilers choke on comments on `#line' lines. */
318:
319: /* Skeleton output parser for bison,
320:
321: Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software
322: Foundation, Inc.
323:
324: This program is free software; you can redistribute it and/or modify
325: it under the terms of the GNU General Public License as published by
326: the Free Software Foundation; either version 2, or (at your option)
327: any later version.
328:
329: This program is distributed in the hope that it will be useful,
330: but WITHOUT ANY WARRANTY; without even the implied warranty of
331: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
332: GNU General Public License for more details.
333:
334: You should have received a copy of the GNU General Public License
335: along with this program; if not, write to the Free Software
336: Foundation, Inc., 59 Temple Place - Suite 330,
337: Boston, MA 02111-1307, USA. */
338:
339: /* As a special exception, when this file is copied by Bison into a
340: Bison output file, you may use that output file without restriction.
341: This special exception was added by the Free Software Foundation
342: in version 1.24 of Bison. */
343:
344: /* This is the parser code that is written into each bison parser when
345: the %semantic_parser declaration is not specified in the grammar.
346: It was written by Richard Stallman by simplifying the hairy parser
347: used when %semantic_parser is specified. */
348:
349: /* All symbols defined below should begin with yy or YY, to avoid
350: infringing on user name space. This should be done even for local
351: variables, as they might otherwise be expanded by user macros.
352: There are some unavoidable exceptions within include files to
353: define necessary library symbols; they are noted "INFRINGES ON
354: USER NAME SPACE" below. */
355:
356:
357:
358: /* The parser invokes alloca or malloc; define the necessary symbols. */
359:
360:
361:
362:
363:
364:
365:
366:
367:
368:
369:
370:
371:
372:
373:
374:
375: /* Pacify GCC's `empty if-body' warning. */
376:
377:
378:
379:
380:
381:
382:
383:
384:
385:
386:
387:
388:
389: && (! defined (__cplusplus) \
390: || (YYLTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
391:
392: /* A type that is properly aligned for any stack member. */
393: union yyalloc
394: {
395: short yyss;
396: YYSTYPE yyvs;
397:
398: YYLTYPE yyls;
399:
400: };
401:
402: /* The size of the maximum gap between one aligned stack and the next. */
403: # define YYSTACK_GAP_MAX (sizeof (union yyalloc) - 1)
404:
405: /* The size of an array large to enough to hold all stacks, each with
406: N elements. */
407: # if YYLSP_NEEDED
408: # define YYSTACK_BYTES(N) \
409: ((N) * (sizeof (short) + sizeof (YYSTYPE) + sizeof (YYLTYPE)) \
410: + 2 * YYSTACK_GAP_MAX)
411: # else
412: # define YYSTACK_BYTES(N) \
413: ((N) * (sizeof (short) + sizeof (YYSTYPE)) \
414: + YYSTACK_GAP_MAX)
415: # endif
416:
417: /* Copy COUNT objects from FROM to TO. The source and destination do
418: not overlap. */
419: # ifndef YYCOPY
420: # if 1 < __GNUC__
421: # define YYCOPY(To, From, Count) \
422: __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
423: # else
424: # define YYCOPY(To, From, Count) \
425: do \
426: { \
427: register YYSIZE_T yyi; \
428: for (yyi = 0; yyi < (Count); yyi++) \
429: (To)[yyi] = (From)[yyi]; \
430: } \
431: while (0)
432: # endif
433: # endif
434:
435: /* Relocate STACK from its old location to the new one. The
436: local variables YYSIZE and YYSTACKSIZE give the old and new number of
437: elements in the stack, and YYPTR gives the new location of the
438: stack. Advance YYPTR to a properly aligned location for the next
439: stack. */
440: # define YYSTACK_RELOCATE(Stack) \
441: do \
442: { \
443: YYSIZE_T yynewbytes; \
444: YYCOPY (&yyptr->Stack, Stack, yysize); \
445: Stack = &yyptr->Stack; \
446: yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAX; \
447: yyptr += yynewbytes / sizeof (*yyptr); \
448: } \
449: while (0)
450:
451: #endif
452:
453:
454: #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
455: # define YYSIZE_T __SIZE_TYPE__
456: #endif
457: #if ! defined (YYSIZE_T) && defined (size_t)
458: # define YYSIZE_T size_t
459: #endif
460: #if ! defined (YYSIZE_T)
461: # if defined (__STDC__) || defined (__cplusplus)
462: # include <stddef.h> /* INFRINGES ON USER NAME SPACE */
463: # define YYSIZE_T size_t
464: # endif
465: #endif
466: #if ! defined (YYSIZE_T)
467: # define YYSIZE_T unsigned int
468: #endif
469:
470: #define yyerrok (yyerrstatus = 0)
471: #define yyclearin (yychar = YYEMPTY)
472: #define YYEMPTY -2
473: #define YYEOF 0
474: #define YYACCEPT goto yyacceptlab
475: #define YYABORT goto yyabortlab
476: #define YYERROR goto yyerrlab1
477: /* Like YYERROR except do call yyerror. This remains here temporarily
478: to ease the transition to the new meaning of YYERROR, for GCC.
479: Once GCC version 2 has supplanted version 1, this can go. */
480: #define YYFAIL goto yyerrlab
481: #define YYRECOVERING() (!!yyerrstatus)
482: #define YYBACKUP(Token, Value) \
483: do \
484: if (yychar == YYEMPTY && yylen == 1) \
485: { \
486: yychar = (Token); \
487: yylval = (Value); \
488: yychar1 = YYTRANSLATE (yychar); \
489: YYPOPSTACK; \
490: goto yybackup; \
491: } \
492: else \
493: { \
494: yyerror ("syntax error: cannot back up"); \
495: YYERROR; \
496: } \
497: while (0)
498:
499: #define YYTERROR 1
500: #define YYERRCODE 256
501:
502:
503: /* YYLLOC_DEFAULT -- Compute the default location (before the actions
504: are run).
505:
506: When YYLLOC_DEFAULT is run, CURRENT is set the location of the
507: first token. By default, to implement support for ranges, extend
508: its range to the last symbol. */
509:
510: #ifndef YYLLOC_DEFAULT
511: # define YYLLOC_DEFAULT(Current, Rhs, N) \
512: Current.last_line = Rhs[N].last_line; \
513: Current.last_column = Rhs[N].last_column;
514: #endif
515:
516:
517: /* YYLEX -- calling `yylex' with the right arguments. */
518:
519: #if YYPURE
520: # if YYLSP_NEEDED
521: # ifdef YYLEX_PARAM
522: # define YYLEX yylex (&yylval, &yylloc, YYLEX_PARAM)
523: # else
524: # define YYLEX yylex (&yylval, &yylloc)
525: # endif
526: # else /* !YYLSP_NEEDED */
527: # ifdef YYLEX_PARAM
528: # define YYLEX yylex (&yylval, YYLEX_PARAM)
529: # else
530: # define YYLEX yylex (&yylval)
531: # endif
532: # endif /* !YYLSP_NEEDED */
533: #else /* !YYPURE */
534: # define YYLEX yylex ()
535: #endif /* !YYPURE */
536:
537:
538: /* Enable debugging if requested. */
539: #if YYDEBUG
540:
541: # ifndef YYFPRINTF
542: # include <stdio.h> /* INFRINGES ON USER NAME SPACE */
543: # define YYFPRINTF fprintf
544: # endif
545:
546: # define YYDPRINTF(Args) \
547: do { \
548: if (yydebug) \
549: YYFPRINTF Args; \
550: } while (0)
551: /* Nonzero means print parse trace. It is left uninitialized so that
552: multiple parsers can coexist. */
553: int yydebug;
554: #else /* !YYDEBUG */
555: # define YYDPRINTF(Args)
556: #endif /* !YYDEBUG */
557:
558: /* YYINITDEPTH -- initial size of the parser's stacks. */
559: #ifndef YYINITDEPTH
560: # define YYINITDEPTH 200
561: #endif
562:
563: /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
564: if the built-in stack extension method is used).
565:
566: Do not make this value too large; the results are undefined if
567: SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
568: evaluated with infinite-precision integer arithmetic. */
569:
570: #if YYMAXDEPTH == 0
571: # undef YYMAXDEPTH
572: #endif
573:
574: #ifndef YYMAXDEPTH
575: # define YYMAXDEPTH 10000
576: #endif
577:
578: #ifdef YYERROR_VERBOSE
579:
580: # ifndef yystrlen
581: # if defined (__GLIBC__) && defined (_STRING_H)
582: # define yystrlen strlen
583: # else
584: /* Return the length of YYSTR. */
585: static YYSIZE_T
586: # if defined (__STDC__) || defined (__cplusplus)
587: yystrlen (const char *yystr)
588: # else
589: yystrlen (yystr)
590: const char *yystr;
591: # endif
592: {
593: register const char *yys = yystr;
594:
595: while (*yys++ != '\0')
596: continue;
597:
598: return yys - yystr - 1;
599: }
600: # endif
601: # endif
602:
603: # ifndef yystpcpy
604: # if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
605: # define yystpcpy stpcpy
606: # else
607: /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
608: YYDEST. */
609: static char *
610: # if defined (__STDC__) || defined (__cplusplus)
611: yystpcpy (char *yydest, const char *yysrc)
612: # else
613: yystpcpy (yydest, yysrc)
614: char *yydest;
615: const char *yysrc;
616: # endif
617: {
618: register char *yyd = yydest;
619: register const char *yys = yysrc;
620:
621: while ((*yyd++ = *yys++) != '\0')
622: continue;
623:
624: return yyd - 1;
625: }
626: # endif
627: # endif
628: #endif
629:
630:
631: /* The user can define YYPARSE_PARAM as the name of an argument to be passed
632: into yyparse. The argument should have type void *.
633: It should actually point to an object.
634: Grammar actions can access the variable by casting it
635: to the proper pointer type. */
636:
637: #ifdef YYPARSE_PARAM
638: # if defined (__STDC__) || defined (__cplusplus)
639: # define YYPARSE_PARAM_ARG void *YYPARSE_PARAM
640: # define YYPARSE_PARAM_DECL
641: # else
642: # define YYPARSE_PARAM_ARG YYPARSE_PARAM
643: # define YYPARSE_PARAM_DECL void *YYPARSE_PARAM;
644: # endif
645: #else /* !YYPARSE_PARAM */
646: # define YYPARSE_PARAM_ARG
647: # define YYPARSE_PARAM_DECL
648: #endif /* !YYPARSE_PARAM */
649:
650: /* Prevent warning if -Wstrict-prototypes. */
651: #ifdef __GNUC__
652: # ifdef YYPARSE_PARAM
653: int yyparse (void *);
654: # else
655: int yyparse (void);
656: # endif
657: #endif
658:
659: /* YY_DECL_VARIABLES -- depending whether we use a pure parser,
660: variables are global, or local to YYPARSE. */
661:
662: #define YY_DECL_NON_LSP_VARIABLES \
663: /* The lookahead symbol. */ \
664: int yychar; \
665:
666: /* The semantic value of the lookahead symbol. */ \
667: YYSTYPE yylval; \
668:
669: /* Number of parse errors so far. */ \
670: int yynerrs;
671:
672: #if YYLSP_NEEDED
673: # define YY_DECL_VARIABLES \
674: YY_DECL_NON_LSP_VARIABLES \
675:
676: /* Location data for the lookahead symbol. */ \
677: YYLTYPE yylloc;
678: #else
679: # define YY_DECL_VARIABLES \
680: YY_DECL_NON_LSP_VARIABLES
681: #endif
682:
683:
684: /* If nonreentrant, generate the variables here. */
685:
686: #if !YYPURE
687: YY_DECL_VARIABLES
688: #endif /* !YYPURE */
689:
690: int
691: yyparse (YYPARSE_PARAM_ARG)
692: YYPARSE_PARAM_DECL
693: {
694: /* If reentrant, generate the variables here. */
695:
696: YY_DECL_VARIABLES
697:
698:
699: register int yystate;
700: register int yyn;
701: int yyresult;
702: /* Number of tokens to shift before error messages enabled. */
703: int yyerrstatus;
704: /* Lookahead token as an internal (translated) token number. */
705: int yychar1 = 0;
706:
707: /* Three stacks and their tools:
708: `yyss': related to states,
709: `yyvs': related to semantic values,
710: `yyls': related to locations.
711:
712: Refer to the stacks thru separate pointers, to allow yyoverflow
713: to reallocate them elsewhere. */
714:
715: /* The state stack. */
716: short yyssa[YYINITDEPTH];
717: short *yyss = yyssa;
718: register short *yyssp;
719:
720: /* The semantic value stack. */
721: YYSTYPE yyvsa[YYINITDEPTH];
722: YYSTYPE *yyvs = yyvsa;
723: register YYSTYPE *yyvsp;
724:
725:
726: /* The location stack. */
727: YYLTYPE yylsa[YYINITDEPTH];
728: YYLTYPE *yyls = yylsa;
729: YYLTYPE *yylsp;
730:
731:
732:
733:
734:
735:
736:
737:
738: YYSIZE_T yystacksize = YYINITDEPTH;
739:
740:
741: /* The variables used to return semantic value and location from the
742: action routines. */
743: YYSTYPE yyval;
744:
745: YYLTYPE yyloc;
746:
747:
748: /* When reducing, the number of symbols on the RHS of the reduced
749: rule. */
750: int yylen;
751:
752: YYDPRINTF ((stderr, "Starting parse\n"));
753:
754: yystate = 0;
755: yyerrstatus = 0;
756: yynerrs = 0;
757: yychar = YYEMPTY; /* Cause a token to be read. */
758:
759: /* Initialize stack pointers.
760: Waste one element of value and location stack
761: so that they stay on the same level as the state stack.
762: The wasted elements are never initialized. */
763:
764: yyssp = yyss;
765: yyvsp = yyvs;
766:
767: yylsp = yyls;
768:
769: goto yysetstate;
770:
771: /*------------------------------------------------------------.
772: | yynewstate -- Push a new state, which is found in yystate. |
773: `------------------------------------------------------------*/
774: yynewstate:
775: /* In all cases, when you get here, the value and location stacks
776: have just been pushed. so pushing a state here evens the stacks.
777: */
778: yyssp++;
779:
780: yysetstate:
781: *yyssp = yystate;
782:
783: if (yyssp >= yyss + yystacksize - 1)
784: {
785: /* Get the current used size of the three stacks, in elements. */
786: YYSIZE_T yysize = yyssp - yyss + 1;
787:
788: #ifdef yyoverflow
789: {
790: /* Give user a chance to reallocate the stack. Use copies of
791: these so that the &'s don't force the real ones into
792: memory. */
793: YYSTYPE *yyvs1 = yyvs;
794: short *yyss1 = yyss;
795:
796: /* Each stack pointer address is followed by the size of the
797: data in use in that stack, in bytes. */
798: # if YYLSP_NEEDED
799: YYLTYPE *yyls1 = yyls;
800: /* This used to be a conditional around just the two extra args,
801: but that might be undefined if yyoverflow is a macro. */
802: yyoverflow ("parser stack overflow",
803: &yyss1, yysize * sizeof (*yyssp),
804: &yyvs1, yysize * sizeof (*yyvsp),
805: &yyls1, yysize * sizeof (*yylsp),
806: &yystacksize);
807: yyls = yyls1;
808: # else
809: yyoverflow ("parser stack overflow",
810: &yyss1, yysize * sizeof (*yyssp),
811: &yyvs1, yysize * sizeof (*yyvsp),
812: &yystacksize);
813: # endif
814: yyss = yyss1;
815: yyvs = yyvs1;
816: }
817: #else /* no yyoverflow */
818: # ifndef YYSTACK_RELOCATE
819: goto yyoverflowlab;
820: # else
821: /* Extend the stack our own way. */
822: if (yystacksize >= YYMAXDEPTH)
823: goto yyoverflowlab;
824: yystacksize *= 2;
825: if (yystacksize > YYMAXDEPTH)
826: yystacksize = YYMAXDEPTH;
827:
828: {
829: short *yyss1 = yyss;
830: union yyalloc *yyptr =
831: (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
832: if (! yyptr)
833: goto yyoverflowlab;
834: YYSTACK_RELOCATE (yyss);
835: YYSTACK_RELOCATE (yyvs);
836: # if YYLSP_NEEDED
837: YYSTACK_RELOCATE (yyls);
838: # endif
839: # undef YYSTACK_RELOCATE
840: if (yyss1 != yyssa)
841: YYSTACK_FREE (yyss1);
842: }
843: # endif
844: #endif /* no yyoverflow */
845:
846: yyssp = yyss + yysize - 1;
847: yyvsp = yyvs + yysize - 1;
848: #if YYLSP_NEEDED
849: yylsp = yyls + yysize - 1;
850: #endif
851:
852: YYDPRINTF ((stderr, "Stack size increased to %lu\n",
853: (unsigned long int) yystacksize));
854:
855: if (yyssp >= yyss + yystacksize - 1)
856: YYABORT;
857: }
858:
859: YYDPRINTF ((stderr, "Entering state %d\n", yystate));
860:
861: goto yybackup;
862:
863:
864: /*-----------.
865: | yybackup. |
866: `-----------*/
867: yybackup:
868:
869: /* Do appropriate processing given the current state. */
870: /* Read a lookahead token if we need one and don't already have one. */
871: /* yyresume: */
872:
873: /* First try to decide what to do without reference to lookahead token. */
874:
875: yyn = yypact[yystate];
876: if (yyn == YYFLAG)
877: goto yydefault;
878:
879: /* Not known => get a lookahead token if don't already have one. */
880:
881: /* yychar is either YYEMPTY or YYEOF
882: or a valid token in external form. */
883:
884: if (yychar == YYEMPTY)
885: {
886: YYDPRINTF ((stderr, "Reading a token: "));
887: yychar = YYLEX;
888: }
889:
890: /* Convert token to internal form (in yychar1) for indexing tables with */
891:
892: if (yychar <= 0) /* This means end of input. */
893: {
894: yychar1 = 0;
895: yychar = YYEOF; /* Don't call YYLEX any more */
896:
897: YYDPRINTF ((stderr, "Now at end of input.\n"));
898: }
899: else
900: {
901: yychar1 = YYTRANSLATE (yychar);
902:
903: #if YYDEBUG
904: /* We have to keep this `#if YYDEBUG', since we use variables
905: which are defined only if `YYDEBUG' is set. */
906: if (yydebug)
907: {
908: YYFPRINTF (stderr, "Next token is %d (%s",
909: yychar, yytname[yychar1]);
910: /* Give the individual parser a way to print the precise
911: meaning of a token, for further debugging info. */
912: # ifdef YYPRINT
913: YYPRINT (stderr, yychar, yylval);
914: # endif
915: YYFPRINTF (stderr, ")\n");
916: }
917: #endif
918: }
919:
920: yyn += yychar1;
921: if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
922: goto yydefault;
923:
924: yyn = yytable[yyn];
925:
926: /* yyn is what to do for this token type in this state.
927: Negative => reduce, -yyn is rule number.
928: Positive => shift, yyn is new state.
929: New state is final state => don't bother to shift,
930: just return success.
931: 0, or most negative number => error. */
932:
933: if (yyn < 0)
934: {
935: if (yyn == YYFLAG)
936: goto yyerrlab;
937: yyn = -yyn;
938: goto yyreduce;
939: }
940: else if (yyn == 0)
941: goto yyerrlab;
942:
943: if (yyn == YYFINAL)
944: YYACCEPT;
945:
946: /* Shift the lookahead token. */
947: YYDPRINTF ((stderr, "Shifting token %d (%s), ",
948: yychar, yytname[yychar1]));
949:
950: /* Discard the token being shifted unless it is eof. */
951: if (yychar != YYEOF)
952: yychar = YYEMPTY;
953:
954: *++yyvsp = yylval;
955:
956: *++yylsp = yylloc;
957:
958:
959: /* Count tokens shifted since error; after three, turn off error
960: status. */
961: if (yyerrstatus)
962: yyerrstatus--;
963:
964: yystate = yyn;
965: goto yynewstate;
966:
967:
968: /*-----------------------------------------------------------.
969: | yydefault -- do the default action for the current state. |
970: `-----------------------------------------------------------*/
971: yydefault:
972: yyn = yydefact[yystate];
973: if (yyn == 0)
974: goto yyerrlab;
975: goto yyreduce;
976:
977:
978: /*-----------------------------.
979: | yyreduce -- Do a reduction. |
980: `-----------------------------*/
981: yyreduce:
982: /* yyn is the number of a rule to reduce with. */
983: yylen = yyr2[yyn];
984:
985: /* If YYLEN is nonzero, implement the default value of the action:
986: `$$ = $1'.
987:
988: Otherwise, the following line sets YYVAL to the semantic value of
989: the lookahead token. This behavior is undocumented and Bison
990: users should not rely upon it. Assigning to YYVAL
991: unconditionally makes the parser a bit smaller, and it avoids a
992: GCC warning that YYVAL may be used uninitialized. */
993: yyval = yyvsp[1-yylen];
994:
995:
996: /* Similarly for the default location. Let the user run additional
997: commands if for instance locations are ranges. */
998: yyloc = yylsp[1-yylen];
999: YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
1000:
1001:
1002:
1003: /* We have to keep this `#if YYDEBUG', since we use variables which
1004: are defined only if `YYDEBUG' is set. */
1005: if (yydebug)
1006: {
1007: int yyi;
1008:
1009: YYFPRINTF (stderr, "Reducing via rule %d (line %d), ",
1010: yyn, yyrline[yyn]);
1011:
1012: /* Print the symbols being reduced, and their result. */
1013: for (yyi = yyprhs[yyn]; yyrhs[yyi] > 0; yyi++)
1014: YYFPRINTF (stderr, "%s ", yytname[yyrhs[yyi]]);
1015: YYFPRINTF (stderr, " -> %s\n", yytname[yyr1[yyn]]);
1016: }
1017:
1018:
1019: switch (yyn) {
1020:
1021: case 1:
1022: { ((ParseParams*)parseParam)->treeTop = new GrammarAST(yyvsp[0].topFormList); yyval.num=0; ;
1023: break;}
1024: case 2:
1025: { yyval.topFormList = new ASTList<TopForm>; ;
1026: break;}
1027: case 3:
1028: { (yyval.topFormList=yyvsp[-1].topFormList)->append(yyvsp[0].topForm); ;
1029: break;}
1030: case 4:
1031: { yyval.topForm = yyvsp[0].topForm; ;
1032: break;}
1033: case 5:
1034: { yyval.topForm = yyvsp[0].topForm; ;
1035: break;}
1036: case 6:
1037: { yyval.topForm = yyvsp[0].topForm; ;
1038: break;}
1039: case 7:
1040: { yyval.topForm = yyvsp[0].topForm; ;
1041: break;}
1042: case 8:
1043: { yyval.topForm = yyvsp[0].topForm; ;
1044: break;}
1045: case 9:
1046: { yyval.topForm = new TF_context(yyvsp[-1].str); ;
1047: break;}
1048: case 10:
1049: { yyval.topForm = new TF_verbatim(false, yyvsp[0].str); ;
1050: break;}
1051: case 11:
1052: { yyval.topForm = new TF_verbatim(true, yyvsp[0].str); ;
1053: break;}
1054: case 12:
1055: { yyval.topForm = new TF_option(yyvsp[-1].str, 1); ;
1056: break;}
1057: case 13:
1058: { yyval.topForm = new TF_option(yyvsp[-2].str, yyvsp[-1].num); ;
1059: break;}
1060: case 14:
1061: { yyval.topForm = new TF_terminals(yyvsp[-3].termDecls, yyvsp[-2].termTypes, yyvsp[-1].precSpecs); ;
1062: break;}
1063: case 15:
1064: { yyval.termDecls = new ASTList<TermDecl>; ;
1065: break;}
1066: case 16:
1067: { (yyval.termDecls=yyvsp[-1].termDecls)->append(yyvsp[0].termDecl); ;
1068: break;}
1069: case 17:
1070: { yyval.termDecl = new TermDecl(yyvsp[-3].num, yyvsp[-1].str, sameloc(yyvsp[-1].str, "")); ;
1071: break;}
1072: case 18:
1073: { yyval.termDecl = new TermDecl(yyvsp[-4].num, yyvsp[-2].str, yyvsp[-1].str); ;
1074: break;}
1075: case 19:
1076: { yyval.str = yyvsp[0].str; ;
1077: break;}
1078: case 20:
1079: { yyval.str = nolocNULL(); ;
1080: break;}
1081: case 21:
1082: { yyval.termTypes = new ASTList<TermType>; ;
1083: break;}
1084: case 22:
1085: { (yyval.termTypes=yyvsp[-1].termTypes)->append(yyvsp[0].termType); ;
1086: break;}
1087: case 23:
1088: { yyval.termType = new TermType(yyvsp[-1].str, yyvsp[-2].str, new ASTList<SpecFunc>); ;
1089: break;}
1090: case 24:
1091: { yyval.termType = new TermType(yyvsp[-3].str, yyvsp[-4].str, yyvsp[-1].specFuncs); ;
1092: break;}
1093: case 25:
1094: { yyval.precSpecs = new ASTList<PrecSpec>; ;
1095: break;}
1096: case 26:
1097: { yyval.precSpecs = yyvsp[-1].precSpecs; ;
1098: break;}
1099: case 27:
1100: { yyval.precSpecs = new ASTList<PrecSpec>; ;
1101: break;}
1102: case 28:
1103: { (yyval.precSpecs=yyvsp[-4].precSpecs)->append(new PrecSpec(whichKind(yyvsp[-3].str), yyvsp[-2].num, yyvsp[-1].sm_stringList)); ;
1104: break;}
1105: case 29:
1106: { yyval.sm_stringList = new ASTList<LocString>; ;
1107: break;}
1108: case 30:
1109: { (yyval.sm_stringList=yyvsp[-1].sm_stringList)->append(yyvsp[0].str); ;
1110: break;}
1111: case 31:
1112: { yyval.str = yyvsp[0].str; ;
1113: break;}
1114: case 32:
1115: { yyval.str = yyvsp[0].str; ;
1116: break;}
1117: case 33:
1118: { yyval.specFuncs = new ASTList<SpecFunc>; ;
1119: break;}
1120: case 34:
1121: { (yyval.specFuncs=yyvsp[-1].specFuncs)->append(yyvsp[0].specFunc); ;
1122: break;}
1123: case 35:
1124: { yyval.specFunc = new SpecFunc(yyvsp[-4].str, yyvsp[-2].sm_stringList, yyvsp[0].str); ;
1125: break;}
1126: case 36:
1127: { yyval.sm_stringList = new ASTList<LocString>; ;
1128: break;}
1129: case 37:
1130: { yyval.sm_stringList = yyvsp[0].sm_stringList; ;
1131: break;}
1132: case 38:
1133: { yyval.sm_stringList = new ASTList<LocString>(yyvsp[0].str); ;
1134: break;}
1135: case 39:
1136: { (yyval.sm_stringList=yyvsp[-2].sm_stringList)->append(yyvsp[0].str); ;
1137: break;}
1138: case 40:
1139: { yyval.topForm = new TF_nonterm(yyvsp[-1].str, yyvsp[-2].str, new ASTList<SpecFunc>,
1140: new ASTList<ProdDecl>(yyvsp[0].prodDecl), NULL); ;
1141: break;}
1142: case 41:
1143: { yyval.topForm = new TF_nonterm(yyvsp[-5].str, yyvsp[-6].str, yyvsp[-3].specFuncs, yyvsp[-2].prodDecls, yyvsp[-1].sm_stringList); ;
1144: break;}
1145: case 42:
1146: { yyval.prodDecls = new ASTList<ProdDecl>; ;
1147: break;}
1148: case 43:
1149: { (yyval.prodDecls=yyvsp[-1].prodDecls)->append(yyvsp[0].prodDecl); ;
1150: break;}
1151: case 44:
1152: { yyval.prodDecl = new ProdDecl(yyvsp[-1].rhsList, yyvsp[0].str); ;
1153: break;}
1154: case 45:
1155: { yyval.str = yyvsp[0].str; ;
1156: break;}
1157: case 46:
1158: { yyval.str = nolocNULL(); ;
1159: break;}
1160: case 47:
1161: { yyval.rhsList = new ASTList<RHSElt>; ;
1162: break;}
1163: case 48:
1164: { (yyval.rhsList=yyvsp[-1].rhsList)->append(yyvsp[0].rhsElt); ;
1165: break;}
1166: case 49:
1167: { yyval.rhsElt = new RH_name(sameloc(yyvsp[0].str, ""), yyvsp[0].str); ;
1168: break;}
1169: case 50:
1170: { yyval.rhsElt = new RH_name(yyvsp[-2].str, yyvsp[0].str); ;
1171: break;}
1172: case 51:
1173: { yyval.rhsElt = new RH_sm_string(sameloc(yyvsp[0].str, ""), yyvsp[0].str); ;
1174: break;}
1175: case 52:
1176: { yyval.rhsElt = new RH_sm_string(yyvsp[-2].str, yyvsp[0].str); ;
1177: break;}
1178: case 53:
1179: { yyval.rhsElt = new RH_prec(yyvsp[-1].str); ;
1180: break;}
1181: case 54:
1182: { yyval.sm_stringList = NULL; ;
1183: break;}
1184: case 55:
1185: { yyval.sm_stringList = yyvsp[-1].sm_stringList; ;
1186: break;}
1187: }
1188:
1189:
1190: yyvsp -= yylen;
1191: yyssp -= yylen;
1192:
1193: yylsp -= yylen;
1194:
1195:
1196:
1197: if (yydebug)
1198: {
1199: short *yyssp1 = yyss - 1;
1200: YYFPRINTF (stderr, "state stack now");
1201: while (yyssp1 != yyssp)
1202: YYFPRINTF (stderr, " %d", *++yyssp1);
1203: YYFPRINTF (stderr, "\n");
1204: }
1205:
1206:
1207: *++yyvsp = yyval;
1208:
1209: *++yylsp = yyloc;
1210:
1211:
1212: /* Now `shift' the result of the reduction. Determine what state
1213: that goes to, based on the state we popped back to and the rule
1214: number reduced by. */
1215:
1216: yyn = yyr1[yyn];
1217:
1218: yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
1219: if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
1220: yystate = yytable[yystate];
1221: else
1222: yystate = yydefgoto[yyn - YYNTBASE];
1223:
1224: goto yynewstate;
1225:
1226:
1227: /*------------------------------------.
1228: | yyerrlab -- here on detecting error |
1229: `------------------------------------*/
1230: yyerrlab:
1231: /* If not already recovering from an error, report this error. */
1232: if (!yyerrstatus)
1233: {
1234: ++yynerrs;
1235:
1236: #ifdef YYERROR_VERBOSE
1237: yyn = yypact[yystate];
1238:
1239: if (yyn > YYFLAG && yyn < YYLAST)
1240: {
1241: YYSIZE_T yysize = 0;
1242: char *yymsg;
1243: int yyx, yycount;
1244:
1245: yycount = 0;
1246: /* Start YYX at -YYN if negative to avoid negative indexes in
1247: YYCHECK. */
1248: for (yyx = yyn < 0 ? -yyn : 0;
1249: yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++)
1250: if (yycheck[yyx + yyn] == yyx)
1251: yysize += yystrlen (yytname[yyx]) + 15, yycount++;
1252: yysize += yystrlen ("parse error, unexpected ") + 1;
1253: yysize += yystrlen (yytname[YYTRANSLATE (yychar)]);
1254: yymsg = (char *) YYSTACK_ALLOC (yysize);
1255: if (yymsg != 0)
1256: {
1257: char *yyp = yystpcpy (yymsg, "parse error, unexpected ");
1258: yyp = yystpcpy (yyp, yytname[YYTRANSLATE (yychar)]);
1259:
1260: if (yycount < 5)
1261: {
1262: yycount = 0;
1263: for (yyx = yyn < 0 ? -yyn : 0;
1264: yyx < (int) (sizeof (yytname) / sizeof (char *));
1265: yyx++)
1266: if (yycheck[yyx + yyn] == yyx)
1267: {
1268: const char *yyq = ! yycount ? ", expecting " : " or ";
1269: yyp = yystpcpy (yyp, yyq);
1270: yyp = yystpcpy (yyp, yytname[yyx]);
1271: yycount++;
1272: }
1273: }
1274: yyerror (yymsg);
1275: YYSTACK_FREE (yymsg);
1276: }
1277: else
1278: yyerror ("parse error; also virtual memory exhausted");
1279: }
1280: else
1281: #endif /* defined (YYERROR_VERBOSE) */
1282: yyerror ("parse error");
1283: }
1284: goto yyerrlab1;
1285:
1286:
1287: /*--------------------------------------------------.
1288: | yyerrlab1 -- error raised explicitly by an action |
1289: `--------------------------------------------------*/
1290: yyerrlab1:
1291: if (yyerrstatus == 3)
1292: {
1293: /* If just tried and failed to reuse lookahead token after an
1294: error, discard it. */
1295:
1296: /* return failure if at end of input */
1297: if (yychar == YYEOF)
1298: YYABORT;
1299: YYDPRINTF ((stderr, "Discarding token %d (%s).\n",
1300: yychar, yytname[yychar1]));
1301: yychar = YYEMPTY;
1302: }
1303:
1304: /* Else will try to reuse lookahead token after shifting the error
1305: token. */
1306:
1307: yyerrstatus = 3; /* Each real token shifted decrements this */
1308:
1309: goto yyerrhandle;
1310:
1311:
1312: /*-------------------------------------------------------------------.
1313: | yyerrdefault -- current state does not do anything special for the |
1314: | error token. |
1315: `-------------------------------------------------------------------*/
1316: yyerrdefault:
1317:
1318: /* This is wrong; only states that explicitly want error tokens
1319: should shift them. */
1320:
1321: /* If its default is to accept any token, ok. Otherwise pop it. */
1322: yyn = yydefact[yystate];
1323: if (yyn)
1324: goto yydefault;
1325:
1326:
1327:
1328: /*---------------------------------------------------------------.
1329: | yyerrpop -- pop the current state because it cannot handle the |
1330: | error token |
1331: `---------------------------------------------------------------*/
1332: yyerrpop:
1333: if (yyssp == yyss)
1334: YYABORT;
1335: yyvsp--;
1336: yystate = *--yyssp;
1337:
1338: yylsp--;
1339:
1340:
1341:
1342: if (yydebug)
1343: {
1344: short *yyssp1 = yyss - 1;
1345: YYFPRINTF (stderr, "Error: state stack now");
1346: while (yyssp1 != yyssp)
1347: YYFPRINTF (stderr, " %d", *++yyssp1);
1348: YYFPRINTF (stderr, "\n");
1349: }
1350:
1351:
1352: /*--------------.
1353: | yyerrhandle. |
1354: `--------------*/
1355: yyerrhandle:
1356: yyn = yypact[yystate];
1357: if (yyn == YYFLAG)
1358: goto yyerrdefault;
1359:
1360: yyn += YYTERROR;
1361: if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
1362: goto yyerrdefault;
1363:
1364: yyn = yytable[yyn];
1365: if (yyn < 0)
1366: {
1367: if (yyn == YYFLAG)
1368: goto yyerrpop;
1369: yyn = -yyn;
1370: goto yyreduce;
1371: }
1372: else if (yyn == 0)
1373: goto yyerrpop;
1374:
1375: if (yyn == YYFINAL)
1376: YYACCEPT;
1377:
1378: YYDPRINTF ((stderr, "Shifting error token, "));
1379:
1380: *++yyvsp = yylval;
1381:
1382: *++yylsp = yylloc;
1383:
1384:
1385: yystate = yyn;
1386: goto yynewstate;
1387:
1388:
1389: /*-------------------------------------.
1390: | yyacceptlab -- YYACCEPT comes here. |
1391: `-------------------------------------*/
1392: yyacceptlab:
1393: yyresult = 0;
1394: goto yyreturn;
1395:
1396: /*-----------------------------------.
1397: | yyabortlab -- YYABORT comes here. |
1398: `-----------------------------------*/
1399: yyabortlab:
1400: yyresult = 1;
1401: goto yyreturn;
1402:
1403: /*---------------------------------------------.
1404: | yyoverflowab -- parser overflow comes here. |
1405: `---------------------------------------------*/
1406: yyoverflowlab:
1407: yyerror ("parser stack overflow");
1408: yyresult = 2;
1409: /* Fall through. */
1410:
1411: yyreturn:
1412:
1413: if (yyss != yyssa)
1414: YYSTACK_FREE (yyss);
1415:
1416: return yyresult;
1417: }
1418:
1419: /* ------------------ extra C code ------------------ */
1420: AssocKind whichKind(LocString * /*owner*/ kind)
1421: {
1422:
1423: Owner<LocString> killer(kind);
1424:
1425:
1426: if (kind->equals(syntax)) { \
1427: return value; \
1428: }
1429: CHECK("left", AK_LEFT);
1430: CHECK("right", AK_RIGHT);
1431: CHECK("nonassoc", AK_NONASSOC);
1432: CHECK("prec", AK_NEVERASSOC);
1433: CHECK("assoc_split", AK_SPLIT);
1434:
1435:
1436: xbase(sm_stringc << kind->locString()
1437: << ": invalid associativity kind: " << *kind);
1438: }
Start cpp section to elk/elk_mlsstr.cpp[1
/1
]
1: #line 21043 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15: MLSubstrate::MLSubstrate(ReportError *err)
16: : EmbeddedLang(err)
17: {
18: reset();
19: }
20:
21: void MLSubstrate::reset(int initNest)
22: {
23: state = ST_NORMAL;
24: nesting = initNest;
25: comNesting = 0;
26: prev = 0;
27: text.setlength(0);
28: }
29:
30:
31: MLSubstrate::~MLSubstrate()
32: {}
33:
34:
35: void MLSubstrate::handle(char const *str, int len, char finalDelim)
36: {
37: text.append(str, len);
38:
39: for (; len>0; len--,str++) {
40: switch (state) {
41: case ST_NORMAL:
42: switch (*str) {
43: case '{':
44: case '(':
45: case '[':
46: nesting++;
47: break;
48:
49: case '}':
50: case ')':
51: case ']':
52: if (nesting == 0) {
53: err->reportError(sm_stringc
54: << "unexpected closing delimiter `" << *str
55: << "' -- probably due to missing `" << finalDelim << "'");
56: }
57: else {
58: nesting--;
59: }
60: break;
61:
62: case '\"':
63: state = ST_STRING;
64: break;
65:
66: case '\'':
67: state = ST_CHAR;
68: break;
69:
70: case '*':
71: if (prev == '(') {
72: state = ST_COMMENT;
73: xassert(comNesting == 0);
74: xassert(nesting > 0);
75: nesting--;
76:
77:
78:
79: prev = 0;
80: continue;
81: }
82: break;
83: }
84: break;
85:
86: case ST_STRING:
87: case ST_CHAR:
88: if (prev != '\\') {
89: if ((state == ST_STRING && *str == '\"') ||
90: (state == ST_CHAR && *str == '\'')) {
91: state = ST_NORMAL;
92: }
93: else if (*str == '\n') {
94: err->reportError("unterminated sm_string or char literal");
95: }
96: }
97: break;
98:
99: case ST_COMMENT:
100: if (prev == '(' && *str == '*') {
101: comNesting++;
102: prev = 0;
103: continue;
104: }
105: else if (prev == '*' && *str == ')') {
106: xassert(comNesting >= 0);
107: if (comNesting == 0) {
108:
109: state = ST_NORMAL;
110: }
111: else {
112:
113: comNesting--;
114: }
115: }
116: break;
117:
118: default:
119: xfailure("unknown state");
120: }
121:
122: prev = *str;
123: }
124: }
125:
126:
127: bool MLSubstrate::zeroNesting() const
128: {
129: return state == ST_NORMAL && nesting == 0;
130: }
131:
132:
133: sm_string MLSubstrate::getFuncBody() const
134: {
135: return text;
136: }
137:
138:
139:
140:
141: sm_string MLSubstrate::getDeclName() const
142: {
143:
144:
145: char const *start = text.pcharc();
146: char const *p = start;
147:
148:
149: while (*p && *p!='(') { p++; }
150: if (!*p) {
151: xformat("missing '('");
152: }
153: if (p == start) {
154: xformat("missing name");
155: }
156:
157:
158: p--;
159: while (p>=start && isspace(*p)) { p--; }
160: if (p<start) {
161: xformat("missing name");
162: }
163: char const *nameEnd = p+1;
164:
165:
166: while (p>=start &&
167: (isalnum(*p) || *p=='_'))
168: { p--; }
169: p++;
170:
171:
172: return sm_string(p, nameEnd-p);
173: }
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184: class Test {
185: public:
186: void feed(ML &ml, char const *src);
187: void test(char const *src, ML::State state, int nesting,
188: int comNesting, char prev);
189: void normal(char const *src, int nesting);
190: void str(char const *src, int nesting, bool bs);
191: void yes(char const *src);
192: void no(char const *src);
193: void name(char const *body, char const *n);
194: void badname(char const *body);
195: int main();
196: };
197:
198:
199:
200:
201: void Test::feed(ML &ml, char const *src)
202: {
203: std::cout << "trying: " << src << std::endl;
204: while (*src) {
205:
206: int len = min(strlen(src), 10);
207: ml.handle(src, len, '}');
208: src += len;
209: }
210: }
211:
212:
213: void Test::test(char const *src, ML::State state, int nesting,
214: int comNesting, char prev)
215: {
216: ML ml;
217: feed(ml, src);
218:
219: if (!( ml.state == state &&
220: ml.nesting == nesting &&
221: ml.prev == prev )) {
222: xfailure(sm_stringc << "failed on src: " << src);
223: }
224: }
225:
226:
227: void Test::normal(char const *src, int nesting)
228: {
229: test(src, ML::ST_NORMAL, nesting, 0, src[strlen(src)-1]);
230: }
231:
232: void Test::str(char const *src, int nesting, bool bs)
233: {
234: char prev = (bs? '\\' : src[strlen(src)-1]);
235: test(src, ML::ST_STRING, nesting, 0, prev);
236:
237:
238: sm_string another = replace(src, "\"", "\'");
239: test(another, ML::ST_CHAR, nesting, 0, prev);
240: }
241:
242:
243: void Test::yes(char const *src)
244: {
245: ML ml;
246: feed(ml, src);
247:
248: xassert(ml.zeroNesting());
249: }
250:
251: void Test::no(char const *src)
252: {
253: ML ml;
254: feed(ml, src);
255:
256: xassert(!ml.zeroNesting());
257: }
258:
259: void Test::name(char const *body, char const *n)
260: {
261: ML ml;
262: feed(ml, body);
263: xassert(ml.getDeclName().equals(n));
264: }
265:
266: void Test::badname(char const *body)
267: {
268: ML ml;
269: feed(ml, body);
270: try {
271: ml.getDeclName();
272: xfailure("got a name when it shoudn't have!");
273: }
274: catch (...)
275: {}
276: }
277:
278:
279: int Test::main()
280: {
281: normal("int main()", 0);
282: normal("int main() { hi", 1);
283: normal("int main() { hi {", 2);
284: normal("int main() { hi { foo[5", 3);
285: normal("int main() { hi { foo[5] and ", 2);
286: normal("int main() { hi { foo[5] and } bar ", 1);
287: normal("int main() { hi { foo[5] and } bar } baz ", 0);
288:
289: normal("main() { printf(\"hello \\ world\"); ret", 1);
290:
291: normal("()[]{}([{}])", 0);
292: normal("{ ()[]{}([{}]) } ", 0);
293: normal("( ()[]{}([{}]) )", 0);
294: normal("[ ()[]{}([{}]) ]", 0);
295: normal("\"foo\" ()[]{}([{}])", 0);
296:
297: str("main() { printf(\"hello", 2, false);
298: str("main() { printf(\"hello \\", 2, true);
299: str("main() { printf(\"hello \\ world", 2, false);
300: str("main() { printf(\"hello \\ world\", \"hi", 2, false);
301:
302: test("\"a\" 'b' (", ML::ST_NORMAL, 1, 0, '(');
303:
304:
305: test("(", ML::ST_NORMAL, 1, 0, '(');
306: test("(*", ML::ST_COMMENT, 0, 0, 0);
307: test("(*)", ML::ST_COMMENT, 0, 0, ')');
308: test("(*)(", ML::ST_COMMENT, 0, 0, '(');
309: test("(*)(*", ML::ST_COMMENT, 0, 1, 0);
310: test("(*)(*)", ML::ST_COMMENT, 0, 1, ')');
311: test("(*)(*)*", ML::ST_COMMENT, 0, 1, '*');
312: test("(*)(*)*)", ML::ST_COMMENT, 0, 0, ')');
313: test("(*)(*)*)*", ML::ST_COMMENT, 0, 0, '*');
314: test("(*)(*)*)*)", ML::ST_NORMAL, 0, 0, ')');
315:
316: test("(*(*(*(*", ML::ST_COMMENT, 0, 4, 0);
317:
318: yes("main() {}");
319: yes("main() { printf(\"foo\", 3, 4 (*yep{*)); }");
320: yes("some (* junk {\n more*)");
321: yes("'\\''");
322: yes("\"\\\"\"");
323: yes("[][][][][]");
324: yes("\"[[[\"");
325: yes("*");
326: yes("(* [ / * [ *)");
327:
328: no("\"");
329: no("(");
330: no(" ( (* ) *) ");
331:
332: name("int main()", "main");
333: name("int eval(Environment &env)", "eval");
334: name("man()", "man");
335: badname("(");
336: badname(" (");
337: badname(" ");
338: badname("");
339: badname(")");
340: badname("main");
341:
342: std::cout << "\nmlsstr: all tests PASSED\n";
343:
344: return 0;
345: }
346:
347: int main()
348: {
349: Test t;
350: return t.main();
351: }
352:
353:
Start cpp section to elk/elk_parsetables.cpp[1
/1
]
1: #line 21397 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17: enum { UNASSIGNED = -1 };
18:
19:
20:
21: template <class EltType>
22: void printTable(EltType const *table, int size, int rowLength,
23: char const *typeName, char const *tableName);
24:
25:
26: ParseTables::ParseTables(int t, int nt, int s, int p, StateId start, int final)
27: {
28: alloc(t, nt, s, p, start, final);
29: }
30:
31: template <class T>
32: void allocInitArray(T *&arr, int size, T init)
33: {
34: arr = new T[size];
35: for (int i=0; i<size; i++) {
36: arr[i] = init;
37: }
38: }
39:
40: template <class T>
41: void allocZeroArray(T *&arr, int size)
42: {
43: arr = new T[size];
44: memset(arr, 0, sizeof(arr[0]) * size);
45: }
46:
47: void ParseTables::alloc(int t, int nt, int s, int p, StateId start, int final)
48: {
49: owning = true;
50:
51: temp = new TempData(s);
52:
53: numTerms = t;
54: numNonterms = nt;
55: numStates = s;
56: numProds = p;
57:
58: actionCols = numTerms;
59: actionRows = numStates;
60:
61: gotoCols = numNonterms;
62: gotoRows = numStates;
63:
64: allocZeroArray(actionTable, actionTableSize());
65:
66: allocZeroArray(gotoTable, gotoTableSize());
67:
68: allocZeroArray(prodInfo, numProds);
69:
70: allocZeroArray(stateSymbol, numStates);
71:
72:
73:
74:
75: ambigTableSize = 0;
76: ambigTable = NULL;
77:
78: startState = start;
79: finalProductionIndex = final;
80:
81: allocZeroArray(nontermOrder, nontermOrderSize());
82:
83: if (ENABLE_CRS_COMPRESSION) {
84: allocZeroArray(firstWithTerminal, numTerms);
85: allocZeroArray(firstWithNonterminal, numNonterms);
86: }
87: else {
88: firstWithTerminal = NULL;
89: firstWithNonterminal = NULL;
90: }
91:
92: bigProductionListSize = 0;
93: bigProductionList = NULL;
94: if (ENABLE_CRS_COMPRESSION) {
95: allocZeroArray(productionsForState, numStates);
96: }
97: else {
98: productionsForState = NULL;
99: }
100:
101: if (ENABLE_CRS_COMPRESSION) {
102: allocZeroArray(ambigStateTable, numStates);
103: }
104: else {
105: ambigStateTable = NULL;
106: }
107:
108:
109: errorBitsRowSize = ((numTerms+31) >> 5) * 4;
110:
111:
112: uniqueErrorRows = 0;
113: errorBits = NULL;
114: errorBitsPointers = NULL;
115:
116: actionIndexMap = NULL;
117: actionRowPointers = NULL;
118:
119: gotoIndexMap = NULL;
120: gotoRowPointers = NULL;
121: }
122:
123:
124: ParseTables::~ParseTables()
125: {
126: if (temp) {
127: delete temp;
128: }
129:
130: if (owning) {
131: delete[] actionTable;
132: delete[] gotoTable;
133: delete[] prodInfo;
134: delete[] stateSymbol;
135:
136: if (ambigTable) {
137: delete[] ambigTable;
138: }
139:
140: delete[] nontermOrder;
141:
142: if (firstWithTerminal) {
143: delete[] firstWithTerminal;
144: }
145: if (firstWithNonterminal) {
146: delete[] firstWithNonterminal;
147: }
148:
149: if (bigProductionList) {
150: delete[] bigProductionList;
151: }
152:
153: if (errorBits) {
154: delete[] errorBits;
155: }
156: if (actionIndexMap) {
157: delete[] actionIndexMap;
158: }
159: if (gotoIndexMap) {
160: delete[] gotoIndexMap;
161: }
162: }
163:
164:
165: if (productionsForState) {
166: delete[] productionsForState;
167: }
168: if (ambigStateTable) {
169: delete[] ambigStateTable;
170: }
171: if (errorBitsPointers) {
172: delete[] errorBitsPointers;
173: }
174: if (actionRowPointers) {
175: delete[] actionRowPointers;
176: }
177: if (gotoRowPointers) {
178: delete[] gotoRowPointers;
179: }
180: }
181:
182:
183: ParseTables::TempData::TempData(int numStates)
184: : ambigTable(),
185: bigProductionList(),
186: productionsForState(numStates),
187: ambigStateTable(numStates)
188: {
189: productionsForState.setAll(UNASSIGNED);
190: ambigStateTable.setAll(UNASSIGNED);
191: }
192:
193: ParseTables::TempData::~TempData()
194: {}
195:
196:
197: ActionEntry ParseTables::validateAction(int code) const
198: {
199:
200:
201:
202:
203:
204:
205: ActionEntry ret = (ActionEntry)code;
206: xassert((int)ret == code);
207: return ret;
208: }
209:
210: GotoEntry ParseTables::validateGoto(int code) const
211: {
212:
213: GotoEntry ret = (GotoEntry)code;
214: xassert((int)ret == code);
215: xassert(ret != errorGotoEntry);
216: return ret;
217: }
218:
219:
220:
221: ParseTables::ParseTables(bool o)
222: : owning(o),
223: temp(NULL)
224: {
225: xassert(owning == false);
226: }
227:
228:
229:
230: ActionEntry makeAE(ActionEntryKind k, int index)
231: {
232:
233: if ((unsigned)index <= AE_MAXINDEX) {
234:
235: }
236: else {
237:
238:
239: std::cout << "error: index " << index << " truncated!\n";
240: index = AE_MAXINDEX;
241: }
242:
243: if (k == AE_ERROR) {
244: xassert(index == 0);
245: }
246:
247: return k | index;
248: }
249:
250:
251:
252: ActionEntry ParseTables::encodeShift(StateId destState, int shiftedTermId)
253: {
254: #if ENABLE_CRS_COMPRESSION
255: int delta = destState - firstWithTerminal[shiftedTermId];
256: return makeAE(AE_SHIFT, delta);
257: #else
258: return validateAction(+destState+1);
259: #endif
260: }
261:
262:
263: ActionEntry ParseTables::encodeReduce(int prodId, StateId inWhatState)
264: {
265: #if ENABLE_CRS_COMPRESSION
266: int begin = temp->productionsForState[inWhatState];
267: int end = temp->bigProductionList.length();
268: if (begin == UNASSIGNED) {
269:
270: temp->productionsForState[inWhatState] = end;
271: temp->bigProductionList.push(prodId);
272: return AE_REDUCE | 0 /*first in set*/;
273: }
274: else {
275:
276: int delta;
277: for (int i=begin; i<end; i++) {
278: if (temp->bigProductionList[i] == prodId) {
279:
280: delta = i-begin;
281: goto encode;
282: }
283: }
284:
285:
286: temp->bigProductionList.push(prodId);
287: delta = end-begin;
288:
289: encode:
290: return makeAE(AE_REDUCE, delta);
291: }
292:
293: #else
294: return validateAction(-prodId-1);
295: #endif
296: }
297:
298:
299: ActionEntry ParseTables::encodeAmbig
300: (ArrayStack<ActionEntry> const &set, StateId inWhatState)
301: {
302: #if ENABLE_CRS_COMPRESSION
303: int begin = temp->ambigStateTable[inWhatState];
304: int end = temp->ambigTable.length();
305: if (begin == UNASSIGNED) {
306:
307: temp->ambigStateTable[inWhatState] = end;
308: appendAmbig(set);
309: return makeAE(AE_AMBIGUOUS, 0 /*first in set*/);
310: }
311: else {
312:
313:
314:
315:
316:
317:
318:
319:
320:
321: int encodeLen = set.length()+1;
322:
323: for (int i=begin; i+encodeLen <= end; i++) {
324:
325: if (compareAmbig(set, i)) {
326: return makeAE(AE_AMBIGUOUS, i-begin /*delta*/);
327: }
328: }
329:
330:
331: appendAmbig(set);
332: return makeAE(AE_AMBIGUOUS, end-begin /*delta*/);
333: }
334:
335: #else
336: int end = temp->ambigTable.length();
337: appendAmbig(set);
338: return validateAction(numStates+end+1);
339: #endif
340: }
341:
342:
343: void ParseTables::appendAmbig(ArrayStack<ActionEntry> const &set)
344: {
345: temp->ambigTable.push(set.length());
346: for (int j=0; j < set.length(); j++) {
347: temp->ambigTable.push(set[j]);
348: }
349: }
350:
351: bool ParseTables::compareAmbig(ArrayStack<ActionEntry> const &set,
352: int startIndex)
353: {
354: if (temp->ambigTable[startIndex] != set.length()) {
355: return false;
356: }
357: for (int j=0; j < set.length(); j++) {
358: if (temp->ambigTable[startIndex+1+j] != set[j]) {
359: return false;
360: }
361: }
362: return true;
363: }
364:
365:
366: ActionEntry ParseTables::encodeError() const
367: {
368: #if ENABLE_CRS_COMPRESSION
369: return makeAE(AE_ERROR, 0);
370: #else
371: return validateAction(0);
372: #endif
373: }
374:
375:
376: GotoEntry ParseTables::encodeGoto(StateId destState, int shiftedNontermId) const
377: {
378: #if ENABLE_CRS_COMPRESSION
379: xassert(0 <= shiftedNontermId && shiftedNontermId < numNonterms);
380: int delta = destState - firstWithNonterminal[shiftedNontermId];
381: return validateGoto(delta);
382: #else
383: return validateGoto(destState);
384: #endif
385: }
386:
387:
388:
389: template <class T>
390: void copyArray(int &len, T *&dest, ArrayStack<T> const &src)
391: {
392: len = src.length();
393: dest = new T[len];
394: memcpy(dest, src.getArray(), sizeof(T) * len);
395: }
396:
397:
398:
399: template <class T>
400: void copyIndexPtrArray(int len, T **&dest, T *base, ArrayStack<int> const &src)
401: {
402: dest = new T* [len];
403: for (int i=0; i<len; i++) {
404: if (src[i] != UNASSIGNED) {
405: dest[i] = base + src[i];
406: }
407: else {
408: dest[i] = NULL;
409: }
410: }
411: }
412:
413: void ParseTables::finishTables()
414: {
415:
416: copyArray(ambigTableSize, ambigTable, temp->ambigTable);
417:
418: if (ENABLE_CRS_COMPRESSION) {
419:
420: copyArray(bigProductionListSize, bigProductionList, temp->bigProductionList);
421:
422:
423: copyIndexPtrArray(numStates, productionsForState, bigProductionList,
424: temp->productionsForState);
425:
426:
427: copyIndexPtrArray(numStates, ambigStateTable, ambigTable,
428: temp->ambigStateTable);
429: }
430:
431: delete temp;
432: temp = NULL;
433: }
434:
435:
436:
437: void ParseTables::computeErrorBits()
438: {
439: traceProgress() << "computing errorBits[]\n";
440:
441:
442: xassert(!errorBits);
443:
444:
445: int rowSize = ((numTerms+31) >> 5) * 4;
446: allocZeroArray(errorBits, numStates * rowSize);
447:
448:
449: allocZeroArray(errorBitsPointers, numStates);
450:
451:
452: fillInErrorBits(true /*setPointers*/);
453:
454:
455:
456:
457:
458: int *compressed = new int[numStates];
459: uniqueErrorRows = 0;
460: int s;
461: for (s=0; s < numStates; s++) {
462:
463: for (int t=0; t < s; t++) {
464:
465: if (0==memcmp(errorBitsPointers[s],
466: errorBitsPointers[t],
467: sizeof(ErrorBitsEntry) * errorBitsRowSize)) {
468:
469: compressed[s] = compressed[t];
470: goto next_s;
471: }
472: }
473:
474:
475: compressed[s] = uniqueErrorRows;
476: uniqueErrorRows++;
477:
478: next_s:
479: ;
480: }
481:
482:
483: delete[] errorBits;
484: allocZeroArray(errorBits, uniqueErrorRows * rowSize);
485:
486:
487: for (s=0; s < numStates; s++) {
488: errorBitsPointers[s] = errorBits + (compressed[s] * errorBitsRowSize);
489: }
490: delete[] compressed;
491:
492:
493: fillInErrorBits(false /*setPointers*/);
494: }
495:
496:
497: void ParseTables::fillInErrorBits(bool setPointers)
498: {
499: for (int s=0; s < numStates; s++) {
500: if (setPointers) {
501: errorBitsPointers[s] = errorBits + (s * errorBitsRowSize);
502: }
503:
504: for (int t=0; t < numTerms; t++) {
505: if (isErrorAction(actionEntry((StateId)s, t))) {
506: ErrorBitsEntry &b = errorBitsPointers[s][t >> 3];
507: b |= 1 << (t & 7);
508: }
509: }
510: }
511: }
512:
513:
514: void ParseTables::mergeActionColumns()
515: {
516: traceProgress() << "merging action columns\n";
517:
518:
519: xassert(errorBits);
520:
521:
522: xassert(!actionIndexMap);
523:
524: if (tracingSys("mergeActionColumnsPre")) {
525:
526: printTable(actionTable, actionTableSize(), actionCols,
527: "ActionEntry", "actionTable");
528: }
529:
530:
531:
532: Bit2d graph(point(numTerms, numTerms));
533: graph.setall(0);
534:
535:
536: for (int t1=0; t1 < numTerms; t1++) {
537: for (int t2=0; t2 < t1; t2++) {
538:
539: for (int s=0; s < numStates; s++) {
540: ActionEntry a1 = actionEntry((StateId)s, t1);
541: ActionEntry a2 = actionEntry((StateId)s, t2);
542:
543: if (isErrorAction(a1) ||
544: isErrorAction(a2) ||
545: a1 == a2) {
546:
547: }
548: else {
549:
550: graph.set(point(t1, t2));
551: graph.set(point(t2, t1));
552: break;
553: }
554: }
555: }
556: }
557:
558:
559: Array<int> color(numTerms);
560: int numColors = colorTheGraph(color, graph);
561:
562:
563:
564: ActionEntry *newTable;
565: allocInitArray(newTable, numStates * numColors, errorActionEntry);
566:
567:
568:
569: actionIndexMap = new TermIndex[numTerms];
570: for (int t=0; t<numTerms; t++) {
571: int c = color[t];
572:
573:
574: for (int s=0; s<numStates; s++) {
575: ActionEntry &dest = newTable[s*numColors + c];
576:
577: ActionEntry src = actionEntry((StateId)s, t);
578: if (!isErrorAction(src)) {
579:
580:
581: xassert(isErrorAction(dest) ||
582: dest == src);
583:
584:
585: dest = src;
586: }
587: }
588:
589:
590: TermIndex ti = (TermIndex)c;
591: xassert(ti == c);
592: actionIndexMap[t] = ti;
593: }
594:
595: trace("compression")
596: << "action table: from " << (actionTableSize() * sizeof(ActionEntry))
597: << " down to " << (numStates * numColors * sizeof(ActionEntry))
598: << " bytes\n";
599:
600:
601: delete[] actionTable;
602: actionTable = newTable;
603: actionCols = numColors;
604: }
605:
606:
607:
608:
609:
610:
611: void ParseTables::mergeActionRows()
612: {
613: traceProgress() << "merging action rows\n";
614:
615:
616: xassert(errorBits);
617:
618:
619: xassert(!actionRowPointers);
620:
621:
622:
623: Bit2d graph(point(numStates, numStates));
624: graph.setall(0);
625:
626:
627: for (int s1=0; s1 < numStates; s1++) {
628: for (int s2=0; s2 < s1; s2++) {
629:
630: for (int t=0; t < actionCols; t++) {
631: ActionEntry a1 = actionTable[s1*actionCols + t];
632: ActionEntry a2 = actionTable[s2*actionCols + t];
633:
634: if (isErrorAction(a1) ||
635: isErrorAction(a2) ||
636: a1 == a2) {
637:
638: }
639: else {
640:
641: graph.set(point(s1, s2));
642: graph.set(point(s2, s1));
643: break;
644: }
645: }
646: }
647: }
648:
649:
650: Array<int> color(numStates);
651: int numColors = colorTheGraph(color, graph);
652:
653:
654: ActionEntry *newTable;
655: allocInitArray(newTable, numColors * actionCols, errorActionEntry);
656:
657:
658:
659:
660:
661:
662:
663:
664:
665:
666:
667:
668:
669:
670:
671:
672:
673:
674:
675:
676:
677:
678: actionRowPointers = new ActionEntry* [numStates];
679: for (int s=0; s<numStates; s++) {
680: int c = color[s];
681:
682:
683: for (int t=0; t<actionCols; t++) {
684: ActionEntry &dest = newTable[c*actionCols + t];
685:
686: ActionEntry src = actionTable[s*actionCols + t];
687: if (!isErrorAction(src)) {
688:
689:
690: xassert(isErrorAction(dest) ||
691: dest == src);
692:
693:
694: dest = src;
695: }
696: }
697:
698:
699: actionRowPointers[s] = newTable + c*actionCols;
700: }
701:
702: trace("compression")
703: << "action table: from " << (numStates * actionCols * sizeof(ActionEntry))
704: << " down to " << (numColors * actionCols * sizeof(ActionEntry))
705: << " bytes\n";
706:
707:
708: delete[] actionTable;
709: actionTable = newTable;
710: actionRows = numColors;
711:
712:
713:
714: {
715: int ct=0;
716: for (int s=0; s<actionRows; s++) {
717: int val = 0;
718: for (int t=0; t<actionCols; t++) {
719: int entry = actionRowPointers[s][t];
720: if (val==0) {
721: val = entry;
722: }
723: else if (entry != 0 && entry != val) {
724:
725: goto next_s;
726: }
727: }
728:
729:
730: ct++;
731:
732: next_s:
733: ;
734: }
735: trace("compression") << ct << " same-valued action rows\n";
736: }
737: }
738:
739:
740:
741:
742: void ParseTables::mergeGotoColumns()
743: {
744: traceProgress() << "merging goto columns\n";
745:
746:
747: xassert(errorBits);
748:
749:
750: xassert(!gotoIndexMap);
751:
752:
753: Bit2d graph(point(numNonterms, numNonterms));
754: graph.setall(0);
755:
756:
757: for (int nt1=0; nt1 < numNonterms; nt1++) {
758: for (int nt2=0; nt2 < nt1; nt2++) {
759:
760: for (int s=0; s < numStates; s++) {
761: GotoEntry g1 = gotoEntry((StateId)s, nt1);
762: GotoEntry g2 = gotoEntry((StateId)s, nt2);
763:
764: if (isErrorGoto(g1) ||
765: isErrorGoto(g2) ||
766: g1 == g2) {
767:
768: }
769: else {
770:
771: graph.set(point(nt1, nt2));
772: graph.set(point(nt2, nt1));
773: break;
774: }
775: }
776: }
777: }
778:
779:
780: Array<int> color(numNonterms);
781: int numColors = colorTheGraph(color, graph);
782:
783:
784:
785: GotoEntry *newTable;
786: allocInitArray(newTable, numStates * numColors, encodeGotoError());
787:
788:
789:
790: gotoIndexMap = new NtIndex[numNonterms];
791: for (int nt=0; nt<numNonterms; nt++) {
792: int c = color[nt];
793:
794:
795: for (int s=0; s<numStates; s++) {
796: GotoEntry &dest = newTable[s*numColors + c];
797:
798: GotoEntry src = gotoEntry((StateId)s, nt);
799: if (!isErrorGoto(src)) {
800:
801:
802: xassert(isErrorGoto(dest) ||
803: dest == src);
804:
805:
806: dest = src;
807: }
808: }
809:
810:
811: NtIndex nti = (NtIndex)c;
812: xassert(nti == c);
813: gotoIndexMap[nt] = nti;
814: }
815:
816: trace("compression")
817: << "goto table: from " << (gotoTableSize() * sizeof(GotoEntry))
818: << " down to " << (numStates * numColors * sizeof(GotoEntry))
819: << " bytes\n";
820:
821:
822: delete[] gotoTable;
823: gotoTable = newTable;
824: gotoCols = numColors;
825: }
826:
827:
828:
829:
830: void ParseTables::mergeGotoRows()
831: {
832: traceProgress() << "merging goto rows\n";
833:
834:
835: xassert(errorBits);
836:
837:
838: xassert(!gotoRowPointers);
839:
840:
841: Bit2d graph(point(numStates, numStates));
842: graph.setall(0);
843:
844:
845: for (int s1=0; s1 < numStates; s1++) {
846: for (int s2=0; s2 < s1; s2++) {
847:
848: for (int nt=0; nt < gotoCols; nt++) {
849: GotoEntry g1 = gotoTable[s1*gotoCols + nt];
850: GotoEntry g2 = gotoTable[s2*gotoCols + nt];
851:
852: if (isErrorGoto(g1) ||
853: isErrorGoto(g2) ||
854: g1 == g2) {
855:
856: }
857: else {
858:
859: graph.set(point(s1, s2));
860: graph.set(point(s2, s1));
861: break;
862: }
863: }
864: }
865: }
866:
867:
868: Array<int> color(numStates);
869: int numColors = colorTheGraph(color, graph);
870:
871:
872: GotoEntry *newTable;
873: allocInitArray(newTable, numColors * gotoCols, encodeGotoError());
874:
875:
876:
877:
878:
879:
880:
881:
882:
883:
884:
885:
886:
887:
888:
889:
890:
891:
892:
893:
894:
895:
896: gotoRowPointers = new GotoEntry* [numStates];
897: for (int s=0; s<numStates; s++) {
898: int c = color[s];
899:
900:
901: for (int nt=0; nt<gotoCols; nt++) {
902: GotoEntry &dest = newTable[c*gotoCols + nt];
903:
904: GotoEntry src = gotoTable[s*gotoCols + nt];
905: if (!isErrorGoto(src)) {
906:
907:
908: xassert(isErrorGoto(dest) ||
909: dest == src);
910:
911:
912: dest = src;
913: }
914: }
915:
916:
917: gotoRowPointers[s] = newTable + c*gotoCols;
918: }
919:
920: trace("compression")
921: << "goto table: from " << (numStates * gotoCols * sizeof(GotoEntry))
922: << " down to " << (numColors * gotoCols * sizeof(GotoEntry))
923: << " bytes\n";
924:
925:
926: delete[] gotoTable;
927: gotoTable = newTable;
928: gotoRows = numColors;
929: }
930:
931:
932: static int intCompare(void const *left, void const *right)
933: {
934: return *((int const*)left) - *((int const*)right);
935: }
936:
937: int ParseTables::colorTheGraph(int *color, Bit2d &graph)
938: {
939: int n = graph.Size().x;
940:
941: if (tracingSys("graphColor") && n < 20) {
942: graph.print();
943: }
944:
945:
946: Array<int> degree(n);
947: memset((int*)degree, 0, n * sizeof(int));
948:
949:
950: Array<int> blocked(n);
951:
952:
953: enum { UNASSIGNED = -1 };
954: {
955: for (int i=0; i<n; i++) {
956:
957: color[i] = UNASSIGNED;
958: blocked[i] = 0;
959:
960: for (int j=0; j<n; j++) {
961: if (graph.get(point(i,j))) {
962: degree[i]++;
963: }
964: }
965: }
966: }
967:
968:
969: int usedColors = 0;
970:
971: for (int numColored=0; numColored < n; numColored++) {
972:
973:
974:
975:
976:
977: int best = -1;
978: int bestBlocked = 0;
979: int bestUnblocked = 0;
980:
981: for (int choice = 0; choice < n; choice++) {
982: if (color[choice] != UNASSIGNED) continue;
983:
984: int chBlocked = blocked[choice];
985: int chUnblocked = degree[choice] - blocked[choice];
986: if (best == -1 ||
987: chBlocked > bestBlocked ||
988: (chBlocked == bestBlocked &&
989: chUnblocked < bestUnblocked)) {
990:
991: best = choice;
992: bestBlocked = chBlocked;
993: bestUnblocked = chUnblocked;
994: }
995: }
996:
997:
998: Array<int> adjColor(bestBlocked);
999: int adjIndex = 0;
1000: for (int i=0; i<n; i++) {
1001: if (graph.get(point(best,i)) &&
1002: color[i] != UNASSIGNED) {
1003: adjColor[adjIndex++] = color[i];
1004: }
1005: }
1006: xassert(adjIndex == bestBlocked);
1007:
1008:
1009: qsort((int*)adjColor, bestBlocked, sizeof(int), intCompare);
1010:
1011:
1012: int selColor = 0;
1013: for (int j=0; j<bestBlocked; j++) {
1014: if (selColor == adjColor[j]) {
1015: selColor++;
1016: }
1017: else if (selColor < adjColor[j]) {
1018:
1019: break;
1020: }
1021: else {
1022:
1023:
1024:
1025: }
1026: }
1027:
1028:
1029: color[best] = selColor;
1030: if (selColor+1 > usedColors) {
1031: usedColors = selColor+1;
1032: }
1033:
1034:
1035: for (int k=0; k<n; k++) {
1036: if (graph.get(point(best,k))) {
1037:
1038: blocked[k]++;
1039: }
1040: }
1041: }
1042:
1043: std::ostream &os = trace("graphColor") << "colors[]:";
1044:
1045: for (int i=0; i<n; i++) {
1046:
1047: xassert(blocked[i] == degree[i]);
1048:
1049:
1050: xassert(color[i] != UNASSIGNED);
1051: os << " " << color[i];
1052: }
1053:
1054: os << "\n";
1055:
1056: return usedColors;
1057: }
1058:
1059:
1060:
1061:
1062: template <class EltType>
1063: void emitTable(EmitCode &out, EltType const *table, int size, int rowLength,
1064: char const *typeName, char const *tableName)
1065: {
1066: if (!table || !size) {
1067: out << " " << typeName << " *" << tableName << " = NULL;\n";
1068: return;
1069: }
1070:
1071: bool printHex = 0==strcmp(typeName, "ErrorBitsEntry") ||
1072: (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "ActionEntry")) ||
1073: (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "GotoEntry")) ;
1074: bool needCast = 0==strcmp(typeName, "StateId");
1075:
1076: if (size * sizeof(*table) > 50) {
1077: out << " // storage size: " << size * sizeof(*table) << " bytes\n";
1078: if (size % rowLength == 0) {
1079: out << " // rows: " << (size/rowLength) << " cols: " << rowLength << "\n";
1080: }
1081: }
1082:
1083: int rowNumWidth = sm_stringf("%d", size / rowLength /*round down*/).length();
1084:
1085:
1086:
1087:
1088:
1089:
1090:
1091:
1092: out << " static " << typeName << " const " << tableName << "[" << size << "] = {";
1093: int row = 0;
1094: for (int i=0; i<size; i++) {
1095: if (i % rowLength == 0) {
1096: out << sm_stringf("\n /""*%*d*""/ ", rowNumWidth, row++);
1097: }
1098:
1099: if (needCast) {
1100: out << "(" << typeName << ")";
1101: }
1102:
1103: if (printHex) {
1104: out << sm_stringf("0x%02X, ", table[i]);
1105: }
1106: else if (sizeof(table[i]) == 1) {
1107:
1108:
1109:
1110: out << (int)(*((unsigned char*)(table+i))) << ", ";
1111: }
1112: else {
1113:
1114:
1115: out << table[i] << ", ";
1116: }
1117: }
1118: out << "\n"
1119: << " };\n";
1120: }
1121:
1122:
1123: sm_stringBuilder& operator<< (sm_stringBuilder &sb, ParseTables::ProdInfo const &info)
1124: {
1125: sb << "{" << (int)info.rhsLen << "," << (int)info.lhsIndex << "}";
1126: return sb;
1127: }
1128:
1129:
1130:
1131: template <class EltType>
1132: void emitTable2(EmitCode &out, EltType const *table, int size, int rowLength,
1133: char const *typeName, char const *tableName)
1134: {
1135: sm_string tempName = sm_stringc << tableName << "_static";
1136: emitTable(out, table, size, rowLength, typeName, tempName);
1137: out << " " << tableName << " = const_cast<" << typeName << "*>("
1138: << tempName << ");\n\n";
1139: }
1140:
1141:
1142: template <class EltType>
1143: void emitOffsetTable(EmitCode &out, EltType **table, EltType *base, int size,
1144: char const *typeName, char const *tableName, char const *baseName)
1145: {
1146: if (!table) {
1147: out << " " << tableName << " = NULL;\n\n";
1148: return;
1149: }
1150:
1151:
1152: Array<int> offsets(size);
1153: bool allUnassigned = true;
1154: for (int i=0; i < size; i++) {
1155: if (table[i]) {
1156: offsets[i] = table[i] - base;
1157: allUnassigned = false;
1158: }
1159: else {
1160: offsets[i] = UNASSIGNED;
1161: }
1162: }
1163:
1164: if (allUnassigned) {
1165:
1166: size = 0;
1167: }
1168:
1169: if (size > 0) {
1170: out << " " << tableName << " = new " << typeName << " [" << size << "];\n";
1171:
1172: emitTable(out, (int*)offsets, size, 16, "int", sm_stringc << tableName << "_offsets");
1173:
1174:
1175: out << " for (int i=0; i < " << size << "; i++) {\n"
1176: << " int ofs = " << tableName << "_offsets[i];\n"
1177: << " if (ofs >= 0) {\n"
1178: << " " << tableName << "[i] = " << baseName << " + ofs;\n"
1179: << " }\n"
1180: << " else {\n"
1181: << " " << tableName << "[i] = NULL;\n"
1182: << " }\n"
1183: << " }\n\n";
1184: }
1185: else {
1186: out << " // offset table is empty\n"
1187: << " " << tableName << " = NULL;\n\n";
1188: }
1189: }
1190:
1191:
1192:
1193: template <class EltType>
1194: void printTable(EltType const *table, int size, int rowLength,
1195: char const *typeName, char const *tableName)
1196: {
1197:
1198:
1199: #if 0
1200: {
1201: EmitCode out("printTable.tmp");
1202: emitTable(out, table, size, rowLength, typeName, tableName);
1203: }
1204:
1205: system("cat printTable.tmp; rm printTable.tmp");
1206: #endif
1207: }
1208:
1209:
1210:
1211:
1212:
1213: void ParseTables::emitConstructionCode(EmitCode &out,
1214: char const *className, char const *funcName)
1215: {
1216:
1217: xassert(!temp);
1218:
1219: out << "// this makes a ParseTables from some literal data;\n"
1220: << "// the code is written by ParseTables::emitConstructionCode()\n"
1221: << "// in " << __FILE__ << "\n"
1222: << "class " << className << "_ParseTables : public ParseTables {\n"
1223: << "public:\n"
1224: << " " << className << "_ParseTables();\n"
1225: << "};\n"
1226: << "\n"
1227: << className << "_ParseTables::" << className << "_ParseTables()\n"
1228: << " : ParseTables(false /*owning*/)\n"
1229: << "{\n"
1230: ;
1231:
1232:
1233: #define SET_VAR(var) \
1234: out << " " #var " = " << var << ";\n";
1235: SET_VAR(numTerms);
1236: SET_VAR(numNonterms);
1237: SET_VAR(numStates);
1238: SET_VAR(numProds);
1239: SET_VAR(actionCols);
1240: SET_VAR(actionRows);
1241: SET_VAR(gotoCols);
1242: SET_VAR(gotoRows);
1243: SET_VAR(ambigTableSize);
1244: out << " startState = (StateId)" << (int)startState << ";\n";
1245: SET_VAR(finalProductionIndex);
1246: SET_VAR(bigProductionListSize);
1247: SET_VAR(errorBitsRowSize);
1248: SET_VAR(uniqueErrorRows);
1249: #undef SET_VAR
1250: out << "\n";
1251:
1252:
1253: emitTable2(out, actionTable, actionTableSize(), actionCols,
1254: "ActionEntry", "actionTable");
1255:
1256:
1257: emitTable2(out, gotoTable, gotoTableSize(), gotoCols,
1258: "GotoEntry", "gotoTable");
1259:
1260:
1261: emitTable2(out, prodInfo, numProds, 16, "ParseTables::ProdInfo", "prodInfo");
1262:
1263:
1264: emitTable2(out, stateSymbol, numStates, 16, "SymbolId", "stateSymbol");
1265:
1266:
1267: emitTable2(out, ambigTable, ambigTableSize, 16, "ActionEntry", "ambigTable");
1268:
1269:
1270: emitTable2(out, nontermOrder, nontermOrderSize(), 16,
1271: "NtIndex", "nontermOrder");
1272:
1273:
1274: emitTable2(out, errorBits, uniqueErrorRows * errorBitsRowSize, errorBitsRowSize,
1275: "ErrorBitsEntry", "errorBits");
1276:
1277: emitOffsetTable(out, errorBitsPointers, errorBits, numStates,
1278: "ErrorBitsEntry*", "errorBitsPointers", "errorBits");
1279:
1280:
1281: emitTable2(out, actionIndexMap, numTerms, 16,
1282: "TermIndex", "actionIndexMap");
1283:
1284:
1285: emitOffsetTable(out, actionRowPointers, actionTable, numStates,
1286: "ActionEntry*", "actionRowPointers", "actionTable");
1287:
1288:
1289: emitTable2(out, gotoIndexMap, numNonterms, 16,
1290: "NtIndex", "gotoIndexMap");
1291:
1292:
1293: emitOffsetTable(out, gotoRowPointers, gotoTable, numStates,
1294: "GotoEntry*", "gotoRowPointers", "gotoTable");
1295:
1296: if (ENABLE_CRS_COMPRESSION) {
1297: emitTable2(out, firstWithTerminal, numTerms, 16,
1298: "StateId", "firstWithTerminal");
1299:
1300: emitTable2(out, firstWithNonterminal, numNonterms, 16,
1301: "StateId", "firstWithNonterminal");
1302:
1303: emitTable2(out, bigProductionList, bigProductionListSize, 16,
1304: "ProdIndex", "bigProductionList");
1305:
1306: emitOffsetTable(out, productionsForState, bigProductionList, numStates,
1307: "ProdIndex*", "productionsForState", "bigProductionList");
1308:
1309: emitOffsetTable(out, ambigStateTable, ambigTable, numStates,
1310: "ActionEntry*", "ambigStateTable", "ambigTable");
1311: }
1312: else {
1313: out << " firstWithTerminal = NULL;\n"
1314: << " firstWithNonterminal = NULL;\n"
1315: << " bigProductionList = NULL;\n"
1316: << " productionsForState = NULL;\n"
1317: << " ambigStateTable = NULL;\n"
1318: ;
1319: }
1320:
1321: out << "}\n"
1322: << "\n"
1323: << "\n"
1324: << "ParseTables *" << className << "::" << funcName << "()\n"
1325: << "{\n"
1326: << " return new " << className << "_ParseTables;\n"
1327: << "}\n"
1328: << "\n"
1329: ;
1330: }
1331:
1332:
1333:
Start cpp section to elk/elk_ptreeact.cpp[1
/1
]
1: #line 22731 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12: ParseTreeLexer::ParseTreeLexer(LexerInterface *u, UserActions *a)
13: : underlying(u),
14: underToken(u->getTokenFunc()),
15: actions(a)
16: {
17:
18: copyFields();
19: }
20:
21: STATICDEF void ParseTreeLexer::nextToken(LexerInterface *lex)
22: {
23: ParseTreeLexer *ths = static_cast<ParseTreeLexer*>(lex);
24:
25:
26: ths->underToken(ths->underlying);
27:
28:
29: ths->copyFields();
30: }
31:
32: void ParseTreeLexer::copyFields()
33: {
34: type = underlying->type;
35: loc = underlying->loc;
36:
37:
38:
39:
40:
41: PTreeNode *ret = new PTreeNode(actions->terminalName(type));
42: sval = (SemanticValue)ret;
43: }
44:
45:
46: sm_string ParseTreeLexer::tokenDesc() const
47: {
48: return underlying->tokenDesc();
49: }
50:
51: sm_string ParseTreeLexer::tokenKindDesc(int kind) const
52: {
53: return underlying->tokenKindDesc(kind);
54: }
55:
56:
57:
58: STATICDEF SemanticValue ParseTreeActions::reduce(
59: UserActions *context,
60: int productionId,
61: SemanticValue const *svals
62: SOURCELOCARG( SourceLoc loc ) )
63: {
64: ParseTreeActions *ths = static_cast<ParseTreeActions*>(context);
65:
66:
67: ParseTables::ProdInfo const &info = ths->tables->getProdInfo(productionId);
68: xassert(info.rhsLen <= PTreeNode::MAXCHILDREN);
69:
70:
71: PTreeNode *ret = new PTreeNode(ths->underlying->nonterminalName(info.lhsIndex));
72:
73:
74: for (int i=0; i < info.rhsLen; i++) {
75: ret->children[i] = (PTreeNode*)svals[i];
76: }
77: ret->numChildren = info.rhsLen;
78:
79: return (SemanticValue)ret;
80: }
81:
82:
83: SemanticValue ParseTreeActions::mergeAlternativeParses(
84: int ntIndex, SemanticValue left, SemanticValue right
85: SOURCELOCARG( SourceLoc loc ) )
86: {
87: trace("ptreeactMerge") << underlying->nonterminalName(ntIndex) << "\n";
88:
89:
90: PTreeNode *L = (PTreeNode*)left;
91: PTreeNode *R = (PTreeNode*)right;
92:
93: L->addAlternative(R);
94: return left;
95: }
96:
97:
98: char const *ParseTreeActions::terminalName(int termId)
99: {
100: return underlying->terminalName(termId);
101: }
102:
103: char const *ParseTreeActions::nonterminalName(int termId)
104: {
105: return underlying->nonterminalName(termId);
106: }
Start cpp section to elk/elk_ptreenode.cpp[1
/1
]
1: #line 22838 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12: int PTreeNode::allocCount = 0;
13: int PTreeNode::alternativeCount = 0;
14:
15:
16: void PTreeNode::init()
17: {
18: merged = NULL;
19: allocCount++;
20: }
21:
22:
23: TreeCount PTreeNode::countTrees()
24: {
25:
26: if (count != 0) {
27: return count;
28: }
29:
30: else {
31:
32:
33: count = 1;
34: for (int i=0; i<numChildren; i++) {
35: count *= children[i]->countTrees();
36: }
37:
38:
39: if (merged) {
40:
41: count += merged->countTrees();
42: }
43: }
44:
45: return count;
46: }
47:
48:
49: void PTreeNode::printTree(std::ostream &out, PrintFlags pf) const
50: {
51: if (tracingSys("ptreeAddrs")) {
52: pf = (PrintFlags)(pf | PF_ADDRS);
53: }
54: innerPrintTree(out, 0 /*indentation*/, pf);
55: }
56:
57:
58:
59: enum { INDENT_INC = 2 };
60:
61: void PTreeNode::innerPrintTree(std::ostream &out, int indentation,
62: PrintFlags pf) const
63: {
64: int alts = 1;
65: sm_string LHS;
66:
67: if (merged) {
68:
69: alts = countMergedList();
70:
71:
72:
73: char const *firstSpace = strchr(type, ' ');
74: if (!firstSpace) {
75: LHS = type;
76: }
77: else {
78: LHS = sm_string(type, firstSpace-type);
79: }
80:
81: indentation += INDENT_INC;
82: }
83:
84:
85: int ct=1;
86: for (PTreeNode const *n = this; n != NULL; n = n->merged) {
87: if (alts > 1) {
88: indent(out, indentation - INDENT_INC);
89: out << "--------- ambiguous " << LHS << ": "
90: << ct << " of " << alts << " ---------\n";
91: }
92:
93: indent(out, indentation);
94:
95: out << n->type;
96: if (pf & PF_EXPAND) {
97:
98:
99: if (n->numChildren) {
100: out << " ->";
101: for (int c=0; c < n->numChildren; c++) {
102: out << " " << n->children[c]->type;
103: }
104: }
105: }
106:
107: if (pf & PF_ADDRS) {
108:
109: out << " (" << ((void*)n) << ")";
110: }
111: out << "\n";
112:
113:
114: for (int c=0; c < n->numChildren; c++) {
115:
116: n->children[c]->innerPrintTree(out, indentation + INDENT_INC, pf);
117: }
118:
119: ct++;
120: }
121:
122: if (merged) {
123:
124: indentation -= INDENT_INC;
125: indent(out, indentation);
126: out << "--------- end of ambiguous " << LHS << " ---------\n";
127: }
128: }
129:
130: STATICDEF void PTreeNode::indent(std::ostream &out, int n)
131: {
132: for (int i=0; i<n; i++) {
133: out << " ";
134: }
135: }
136:
137:
138:
139: int PTreeNode::countMergedList() const
140: {
141: int ct = 1;
142: for (PTreeNode const *n = merged; n != NULL; n = n->merged) {
143: ct++;
144: }
145: return ct;
146: }
147:
148:
149: void PTreeNode::addAlternative(PTreeNode *alt)
150: {
151:
152: alt->merged = this->merged;
153: this->merged = alt;
154:
155: alternativeCount++;
156: }
Start cpp section to elk/elk_useract.cpp[1
/1
]
1: #line 22995 "./lpsrc/elk.pak"
2:
3:
4:
5:
6:
7:
8:
9:
10: UserActions::~UserActions()
11: {}
12:
13:
14: ParseTables *UserActions::makeTables()
15: {
16: xfailure("this object does not have any tables");
17: return NULL;
18: }
19:
20:
21:
22: UserActions::ReductionActionFunc TrivialUserActions::getReductionAction()
23: {
24: return &TrivialUserActions::doReductionAction;
25: }
26:
27: STATICDEF SemanticValue TrivialUserActions::doReductionAction(
28: UserActions *, int , SemanticValue const *
29: SOURCELOCARG( SourceLoc ) )
30: { return NULL_SVAL; }
31:
32: SemanticValue TrivialUserActions::duplicateTerminalValue(
33: int , SemanticValue sval)
34: { return sval; }
35:
36: SemanticValue TrivialUserActions::duplicateNontermValue(
37: int , SemanticValue sval)
38: { return sval; }
39:
40:
41: void TrivialUserActions::deallocateTerminalValue(
42: int , SemanticValue )
43: {}
44:
45: void TrivialUserActions::deallocateNontermValue(
46: int , SemanticValue )
47: {}
48:
49: SemanticValue TrivialUserActions::mergeAlternativeParses(
50: int , SemanticValue left, SemanticValue
51: SOURCELOCARG( SourceLoc ) )
52: { return left; }
53:
54: bool TrivialUserActions::keepNontermValue(int , SemanticValue )
55: { return true; }
56:
57:
58: UserActions::ReclassifyFunc TrivialUserActions::getReclassifier()
59: {
60: return &TrivialUserActions::reclassifyToken;
61: }
62:
63: STATICDEF int TrivialUserActions::reclassifyToken(UserActions *,
64: int oldTokenType, SemanticValue )
65: { return oldTokenType; }
66:
67: sm_string TrivialUserActions::terminalDescription(int, SemanticValue)
68: { return sm_string(""); }
69:
70: sm_string TrivialUserActions::nonterminalDescription(int, SemanticValue)
71: { return sm_string(""); }
72:
73: char const *TrivialUserActions::terminalName(int)
74: { return ""; }
75: char const *TrivialUserActions::nonterminalName(int)
76: { return ""; }
77: