1. elkhound

Start data section to licences/elk_licence.txt[1 /1 ]
     1: The elkhound software
     2: Copyright (c) 2002, Regents of the University of California
     3: All rights reserved.
     4: 
     5: Redistribution and use in source and binary forms, with or without
     6: modification, are permitted provided that the following conditions are
     7: met:
     8: 
     9:     * Redistributions of source code must retain the above copyright
    10:       notice, this list of conditions and the following disclaimer.
    11: 
    12:     * Redistributions in binary form must reproduce the above
    13:       copyright notice, this list of conditions and the following
    14:       disclaimer in the documentation and/or other materials provided
    15:       with the distribution.
    16: 
    17:     * Neither the name of the University of California, Berkeley nor
    18:       the names of its contributors may be used to endorse or promote
    19:       products derived from this software without specific prior
    20:       written permission.
    21: 
    22: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    23: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    24: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    25: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    26: OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    27: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    28: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    29: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    30: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    31: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    32: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    33: 
End data section to licences/elk_licence.txt[1]
Start python section to spkgs/elk.py[1 /1 ]
     1: #line 41 "./lpsrc/elk.pak"
     2: 
     3: # Elkhound stuffs
     4: 
     5: SMBASE = [
     6: ]
     7: 
     8: # These are the files actually needed at run time by Felix
     9: SMRTL = [
    10:   'elk/sm_malloc_stub',
    11:   'elk/sm_nonport',
    12:   'elk/sm_autofile',
    13:   'elk/sm_bflatten',
    14:   'elk/sm_bit2d',
    15:   'elk/sm_bitarray',
    16:   'elk/sm_boxprint',
    17:   'elk/sm_breaker',
    18:   'elk/sm_crc',
    19:   'elk/sm_datablok',
    20:   'elk/sm_flatten',
    21:   'elk/sm_growbuf',
    22:   'elk/sm_gprintf',
    23:   'elk/sm_hashline',
    24:   'elk/sm_hashtbl',
    25:   'elk/sm_missing',
    26:   'elk/sm_point',
    27:   'elk/sm_pprint',
    28:   'elk/sm_strdict',
    29:   'elk/sm_strhash',
    30:   'elk/sm_stringset',
    31:   'elk/sm_strtokp',
    32:   'elk/sm_strutil',
    33:   'elk/sm_svdict',
    34:   'elk/sm_vdtllist',
    35:   'elk/sm_vptrmap',
    36:   'elk/sm_warn',
    37:   'elk/sm_srcloc',
    38:   'elk/sm_syserr',
    39:   'elk/sm_str',
    40:   'elk/sm_trace',
    41:   'elk/sm_trdelete',
    42:   'elk/sm_voidlist',
    43:   'elk/sm_exc',
    44: ]
    45: 
    46: ASTGEN = [
    47:   'elk/ast_gramlex',
    48:   'elk/ast_ccsstr',
    49:   'elk/ast_reporterr',
    50:   'elk/ast_embedded',
    51:   'elk/ast_asthelp',
    52:   'elk/ast_strtable',
    53:   'elk/ast_locstr',
    54: ]
    55: 
    56: ELKHOUND = [
    57:   'elk/elk_asockind',
    58:   'elk/elk_grammar',
    59:   'elk/elk_emitcode',
    60:   'elk/elk_mlsstr',
    61:   'elk/elk_genml',
    62:   'elk/elk_gramast.ast.gen',
    63:   'elk/elk_gramlex.yy',
    64:   'elk/elk_grampar',
    65:   'elk/elk_grampar.tab',
    66:   'elk/elk_gramexpl',
    67: ]
    68: 
    69: ELKRTL = [
    70:   'elk/elk_glr',
    71:   'elk/elk_parsetables',
    72:   'elk/elk_useract',
    73:   'elk/elk_ptreenode',
    74:   'elk/elk_ptreeact',
    75: ]
    76: 
    77: ELKRTL_INTERFACES = [
    78:   'elk/sm_array.h',
    79:   'elk/sm_objpool.h',
    80:   'elk/sm_sobjlist.h',
    81:   'elk/sm_trdelete.h',
    82:   'elk/sm_voidlist.h',
    83:   'elk/sm_macros.h',
    84:   'elk/sm_srcloc.h',
    85:   'elk/sm_typ.h',
    86:   'elk/sm_xassert.h',
    87:   'elk/sm_objlist.h',
    88:   'elk/sm_str.h',
    89:   'elk/elk_lexerint.h',
    90:   'elk/elk_glrconfig.h',
    91:   'elk/elk_parsetables.h',
    92:   'elk/elk_glr.h',
    93:   'elk/elk_rcptr.h',
    94:   'elk/elk_useract.h',
    95: ]
    96: 
    97: host_cpp_cpps = SMRTL+ASTGEN+ELKHOUND+ELKRTL
    98: host_exes = [("elk/elk_gramanl","bin/flx_elkhound")]
    99: host_exes_require_libs = ["elk/libelk_host_static"]
   100: 
   101: rtl_interfaces = ELKRTL_INTERFACES
   102: cpp_cpps = ELKRTL + SMRTL
   103: iscr_source = ['lpsrc/sm.pak','lpsrc/ast.pak','lpsrc/elk.pak']
   104: build_macro = "ELK"
   105: weaver_directory = "doc/elkhound/"
   106: 
End python section to spkgs/elk.py[1]
Start data section to config/elk.fpc[1 /1 ]
     1: Name: elk
     2: Description: Elhound
     3: Version: 1
     4: provides_dlib: -lelk_dynamic
     5: provides_slib: -lelk_static
End data section to config/elk.fpc[1]
Start cpp section to rtl/flx_target_elk_config.hpp[1 /1 ]
     1: #line 160 "./lpsrc/elk.pak"
     2: #ifndef __FLX_TARGET_ELK_CONFIG_H__
     3: #define __FLX_TARGET_ELK_CONFIG_H__
     4: #include "flx_rtl_config.hpp"
     5: #ifdef BUILD_ELK
     6: #define ELK_EXTERN FLX_EXPORT
     7: #else
     8: #define ELK_EXTERN FLX_IMPORT
     9: #endif
    10: #endif
    11: 
End cpp section to rtl/flx_target_elk_config.hpp[1]
Start cpp section to rtl/flx_host_elk_config.hpp[1 /1 ]
     1: #line 172 "./lpsrc/elk.pak"
     2: #ifndef __FLX_HOST_ELK_CONFIG_H__
     3: #define __FLX_HOST_ELK_CONFIG_H__
     4: #include "flx_host_config.hpp"
     5: #ifdef BUILD_ELK
     6: #define ELK_EXTERN FLX_EXPORT
     7: #else
     8: #define ELK_EXTERN FLX_IMPORT
     9: #endif
    10: #endif
    11: 
End cpp section to rtl/flx_host_elk_config.hpp[1]
Start cpp section to rtl/flx_elk_config.hpp[1 /1 ]
     1: #line 184 "./lpsrc/elk.pak"
     2: #ifndef __FLX_ELK_CONFIG_H__
     3: #define __FLX_ELK_CONFIG_H__
     4: #if defined(HOST_BUILD)
     5: #include "flx_host_elk_config.hpp"
     6: #elif defined(TARGET_BUILD)
     7: #include "flx_target_elk_config.hpp"
     8: #else
     9: #error "ELKHOUND: must specify -DHOST_BUILD or -DTARGET_BUILD"
    10: #endif
    11: #endif
    12: 
End cpp section to rtl/flx_elk_config.hpp[1]
Start cpp section to elk/elk_gramexpl.cpp[1 /1 ]
     1: #line 197 "./lpsrc/elk.pak"
     2: // gramexpl.cc            see license.txt for copyright and terms of use
     3: // interactively query and modify a grammar; primary purpose
     4: // is to assist diagnosing SLR conflict reports
     5: 
     6: #include "elk_gramanl.h"    // GrammarAnalysis
     7: #include "sm_strtokp.h"    // StrtokParse
     8: 
     9: #include <iostream>   // cin/std::cout
    10: 
    11: 
    12: void grammarExplorer(GrammarAnalysis &g)
    13: {
    14:   std::cout << "exploring the grammar:\n";
    15: 
    16:   #if 0
    17:   for (;;) {
    18:     std::cout << "commands:\n"
    19:             "  terminals\n"
    20:             "  nonterminals\n"
    21:             "  productions <nonterm-id>\n"
    22:             "  state <state-id>\n"
    23:             "  suppress-except <term-id> (-1 to disable)\n"
    24:             "  reach <state-id>\n"
    25:             "  track-la <state-id> <prod-id> <term-id>\n"
    26:             "  quit\n";
    27:     std::cout << "command> ";
    28:     std::cout.flush();
    29: 
    30:     char buf[80];
    31:     cin >> buf;     // buffer overrun potential, don't care
    32:     if (cin.eof()) break;
    33: 
    34:     StrtokParse tok(buf, " \n\t");
    35:     if (tok == 0) continue;
    36: 
    37:     try {
    38:       if (0==strcmp(tok[0], "terminals")) {
    39:         for (int i=0; i < g.numTerminals(); i++) {
    40:           Terminal const *t = g.getTerminal(i);
    41:           t->print(std::cout);
    42:         }
    43:       }
    44: 
    45:       else if (0==strcmp(tok[0], "nonterminals")) {
    46:         for (int i=0; i < g.numNonterminals(); i++) {
    47:           Nonterminal const *nt = g.getNonterminal(i);
    48:           nt->print(std::cout);
    49:         }
    50:       }
    51: 
    52:       else if (0==strcmp(tok[0], "productions")) {
    53:         int id = atoi(tok[1]);
    54:         Nonterminal const *nt = g.getNonterminal(i);
    55:         int ct=0;
    56:         FOREACH_PRODUCTION(g.productions, iter) {
    57:           if (iter.data()->left == nt) {
    58:             std::cout << "[" << ct << "] ";   // production id
    59:             iter.data()->print(std::cout);
    60:           }
    61:           ct++;
    62:         }
    63:       }
    64: 
    65:       else if (0==strcmp(tok[0], "state")) {
    66:         ItemSet const *is = g.getItemSet(atoi(tok[1]));
    67:         is->print(std::cout, g);
    68:       }
    69: 
    70:       else if (0==strcmp(tok[0], "suppress-except")) {
    71:         int id = atoi(tok[1]);
    72:         Terminal const *t = (id==-1? NULL : g.getTerminal(atoi(tok[1])));
    73:         DottedProduction::lookaheadSuppressExcept = t;
    74:         if (t) {
    75:           std::cout << "suppressing  " << t->name << std::endl;
    76:         }
    77:         else {
    78:           std::cout << "suppressing nothing\n";
    79:         }
    80:       }
    81: 
    82:       else if (0==strcmp(tok[0], "reach")) {
    83:         int targetId = atoi(tok[1]);
    84: 
    85:         // consider every state..
    86:         for (int i=0; i < g.numItemSets(); i++) {
    87:           ItemSet const *set = g.getItemSet(i);
    88: 
    89:           // iterate over all possible symbols to find transitions
    90:           for (int termId=0; termId < g.numTerminals(); termId++) {
    91:             ItemSet const *dest = set->transitionC(g.getTerminal(termId));
    92:             if (dest && dest->id == targetId) {
    93:               dest->print(std::cout, g);
    94:             }
    95:           }
    96:           for (int nontermId=0; nontermId < g.numNonterminals(); nontermId++) {
    97:             ItemSet const *dest = set->transitionC(g.getNonterminal(nontermId));
    98:             if (dest && dest->id == targetId) {
    99:               dest->print(std::cout, g);
   100:             }
   101:           }
   102:         }
   103:       }
   104: 
   105:       else if (0==strcmp(tok[0], "track-la")) {
   106:         int stateId = atoi(tok[1]);
   107:         ItemSet const *set = g.getItemSet(stateId);
   108: 
   109:         int prodId = atoi(tok[2]);
   110:         Production const *prod = g.productions.nth(prodId);
   111: 
   112:         int termId = atoi(tok[3]);
   113:         Terminal const *term = g.getTerminal(termId);
   114: 
   115: 
   116: 
   117: 
   118: 
   119: 
   120: 
   121: 
   122:       }
   123:       else if (0==strcmp(tok[0], "quit")) {
   124:       }
   125:       else {
   126:         std::cout << "unknown command: " << tok[0] << std::endl;
   127:       }
   128:     }
   129:     catch (xArrayBounds &) {
   130:       std::cout << "too few arguments to " << tok[0] << std::endl;
   131:     }
   132: 
   133: 
   134: 
   135: 
   136: 
   137: 
   138: 
   139: 
   140:   #endif // 0
   141: 
   142: }
   143: 
   144: 
End cpp section to elk/elk_gramexpl.cpp[1]
Start C section to elk/elk_asockind.h[1 /1 ]
     1: #line 342 "./lpsrc/elk.pak"
     2: // asockind.h            see license.txt for copyright and terms of use
     3: // AssocKind; pulled out on its own so I don't have dependency problems
     4: 
     5: #ifndef ASOCKIND_H
     6: #define ASOCKIND_H
     7: 
     8: #include "sm_str.h"
     9: 
    10: // specifies what to do when there is a shift/reduce conflict, and
    11: // the production and token have the same precedence; this is attached
    12: // to the token
    13: enum AssocKind {
    14:   AK_LEFT,            // disambiguate by reducing
    15:   AK_RIGHT,           // disambiguate by shifting
    16:   AK_NONASSOC,        // make it a parse-time syntax error
    17:   AK_NEVERASSOC,      // make it a parsgen-time specification error
    18:   AK_SPLIT,           // (GLR-specific) fork the parser
    19: 
    20:   NUM_ASSOC_KINDS
    21: };
    22: 
    23: sm_string toString(AssocKind k);
    24: 
    25: #endif // ASOCKIND_H
    26: 
End C section to elk/elk_asockind.h[1]
Start C section to elk/elk_emitcode.h[1 /1 ]
     1: #line 369 "./lpsrc/elk.pak"
     2: // emitcode.h            see license.txt for copyright and terms of use
     3: // track state of emitted code so I can emit #line too
     4: 
     5: #ifndef EMITCODE_H
     6: #define EMITCODE_H
     7: 
     8: #include <fstream>      // std::ofstream
     9: #include "sm_str.h"
    10: #include "sm_srcloc.h"
    11: 
    12: class EmitCode : public sm_stringBuilder {
    13: private:     // data
    14:   std::ofstream os;         // stream to write to
    15:   sm_string fname;        // filename for emitting #line
    16:   int line;            // current line number
    17: 
    18: public:      // funcs
    19:   EmitCode(char const *fname);
    20:   ~EmitCode();
    21: 
    22:   sm_string const &getFname() const { return fname; }
    23: 
    24:   // get current line number; flushes internally
    25:   int getLine();
    26: 
    27:   // flush data in sm_stringBuffer to 'os'
    28:   void flush();
    29: };
    30: 
    31: 
    32: // return a #line directive for the given location
    33: sm_string lineDirective(SourceLoc loc);
    34: 
    35: // emit a #line directive to restore reporting to the
    36: // EmitCode file itself (the 'sb' argument must be an EmitFile object)
    37: sm_stringBuilder &restoreLine(sm_stringBuilder &sb);
    38: 
    39: 
    40: #endif // EMITCODE_H
End C section to elk/elk_emitcode.h[1]
Start C section to elk/elk_flatutil.h[1 /1 ]
     1: #line 410 "./lpsrc/elk.pak"
     2: // flatutil.h            see license.txt for copyright and terms of use
     3: // flatten helpers
     4: 
     5: #ifndef FLATUTIL_H
     6: #define FLATUTIL_H
     7: 
     8: #include "sm_flatten.h"
     9: #include "sm_objlist.h"
    10: #include "sm_sobjlist.h"
    11: 
    12: 
    13: // ------------- xfer of owners -----------------
    14: template <class T>
    15: void xferOwnerPtr(Flatten &flat, T *&ptr)
    16: {
    17:   if (flat.reading()) {
    18:     // construct a new, empty object
    19:     ptr = new T(flat);
    20:   }
    21: 
    22:   // read/write it
    23:   ptr->xfer(flat);
    24: 
    25:   // note it so we can have serfs to it
    26:   flat.noteOwner(ptr);
    27: }
    28: 
    29: 
    30: template <class T>
    31: void xferOwnerPtr_readObj(Flatten &flat, T *&ptr)
    32: {
    33:   if (flat.reading()) {
    34:     // construct a new object, *and* read it from file
    35:     ptr = T::readObj(flat);
    36:   }
    37:   else {
    38:     // write it
    39:     ptr->xfer(flat);
    40:   }
    41: 
    42:   // note it so we can have serfs to it
    43:   flat.noteOwner(ptr);
    44: }
    45: 
    46: 
    47: template <class T>
    48: void xferObjList(Flatten &flat, ObjList <T> &list)
    49: {
    50:   if (flat.writing()) {
    51:     flat.writeInt(list.count());
    52: 
    53:     MUTATE_EACH_OBJLIST(T, list, iter) {
    54:       iter.data()->xfer(flat);
    55:       flat.noteOwner(iter.data());
    56:     }
    57:   }
    58:   else {
    59:     int listLen = flat.readInt();
    60: 
    61:     ObjListMutator<T> mut(list);
    62:     while (listLen--) {
    63:       // construct a new, empty object
    64:       T *obj = new T(flat);
    65: 
    66:       // read it
    67:       obj->xfer(flat);
    68:       flat.noteOwner(obj);
    69: 
    70:       // add it to the list
    71:       mut.append(obj);
    72:     }
    73:   }
    74: }
    75: 
    76: 
    77: // for things like AExprNode which have a readObj
    78: // static method .. it's possible to merge this with
    79: // the above code, but I'm not sure that's a good idea yet
    80: template <class T>
    81: void xferObjList_readObj(Flatten &flat, ObjList <T> &list)
    82: {
    83:   if (flat.writing()) {
    84:     flat.writeInt(list.count());
    85: 
    86:     MUTATE_EACH_OBJLIST(T, list, iter) {
    87:       iter.data()->xfer(flat);
    88:       flat.noteOwner(iter.data());
    89:     }
    90:   }
    91:   else {
    92:     int listLen = flat.readInt();
    93: 
    94:     ObjListMutator<T> mut(list);
    95:     while (listLen--) {
    96:       // construct a new object, *and* read its
    97:       // contents from the file
    98:       T *obj = T::readObj(flat);
    99:       flat.noteOwner(obj);
   100: 
   101:       // add it to the list
   102:       mut.append(obj);
   103:     }
   104:   }
   105: }
   106: 
   107: 
   108: // ------------- xfer of serfs -----------------
   109: // xfer a list of serf pointers to objects, each object
   110: // could be in one of several owner lists
   111: template <class T>
   112: void xferSObjList_multi(Flatten &flat, SObjList<T> &list,
   113:                         ObjList<T> **masterLists, int numMasters)
   114: {
   115:   // be sure the same number of master lists are used at
   116:   // read and write time
   117:   flat.checkpoint(numMasters);
   118: 
   119:   if (flat.writing()) {
   120:     flat.writeInt(list.count());
   121: 
   122:     SMUTATE_EACH_OBJLIST(T, list, iter) {
   123:       // determine which master list it's in
   124:       int master;
   125:       for (master = 0; master<numMasters; master++) {
   126:         int index = masterLists[master]->indexOf(iter.data());
   127:         if (index != -1) {
   128:           // we found it -- encode the list and its index
   129:           if (numMasters > 1) {
   130:             flat.writeInt(master);    // only do this if multiple masters
   131:           }
   132:           flat.writeInt(index);
   133:           break;
   134:         }
   135:       }
   136: 
   137:       if (master == numMasters) {
   138:         // failed to find the master list
   139:         xfailure("xferSObjList_multi: obj not in any of the lists");
   140:       }
   141:     }
   142:   }
   143: 
   144:   else {
   145:     int listLen = flat.readInt();
   146: 
   147:     SObjListMutator<T> mut(list);
   148:     while (listLen--) {
   149:       int master = 0;               // assume just 1 master
   150:       if (numMasters > 1) {
   151:         master = flat.readInt();    // then refine
   152:       }
   153: 
   154:       mut.append(masterLists[master]->nth(flat.readInt()));
   155:     }
   156:   }
   157: }
   158: 
   159: 
   160: // xfer a list of serf pointers to objects owner by 'masterList'
   161: template <class T>
   162: void xferSObjList(Flatten &flat, SObjList<T> &list, ObjList<T> &masterList)
   163: {
   164:   ObjList<T> *ptr = &masterList;
   165:   xferSObjList_multi(flat, list, &ptr, 1 /*numMasters*/);
   166: }
   167: 
   168: 
   169: // xfer a pointer which points to something in a master list
   170: template <class T>
   171: void xferSerfPtrToList(Flatten &flat, T *&ptr, ObjList<T> &masterList)
   172: {
   173:   if (flat.writing()) {
   174:     flat.writeInt(masterList.indexOfF(ptr));
   175:   }
   176:   else {
   177:     ptr = masterList.nth(flat.readInt());
   178:   }
   179: }
   180: 
   181: 
   182: template <class T>
   183: void xferNullableSerfPtrToList(Flatten &flat, T *&ptr, ObjList<T> &masterList)
   184: {
   185:   if (flat.writing()) {
   186:     flat.writeInt(masterList.indexOf(ptr));
   187:   }
   188:   else {
   189:     int index = flat.readInt();
   190:     if (index >= 0) {
   191:       ptr = masterList.nth(index);
   192:     }
   193:     else {
   194:       ptr = NULL;
   195:     }
   196:   }
   197: }
   198: 
   199: 
   200: template <class T>
   201: void computedValue(Flatten &flat, T &variable, T value)
   202: {
   203:   if (flat.writing()) {
   204:     // check it
   205:     xassert(variable == value);
   206:   }
   207:   else {
   208:     // set it
   209:     variable = value;
   210:   }
   211: }
   212: 
   213: 
   214: // void* implementation
   215: //#define Leaf void
   216: //#define Root void
   217: //#define FirstLevel void
   218: template <class Root, class FirstLevel, class Leaf>
   219: void xferSerfPtr_twoLevelAccess(
   220:   Flatten &flat,
   221:   Leaf *&leaf,
   222:   Root *root,
   223:   FirstLevel* (*getNthFirst)(Root *r, int n),
   224:   Leaf* (*getNthLeaf)(FirstLevel *f, int n))
   225: {
   226:   if (flat.writing()) {
   227:     // determine both indices
   228:     for (int index1=0; ; index1++) {
   229:       // get a first-level obj
   230:       FirstLevel *first = getNthFirst(root, index1);
   231:       if (!first) {
   232:         // exhausted first-level objs
   233:         xfailure("xferSerfPtr_twoLevelAccess: couldn't find obj to xfer");
   234:       }
   235: 
   236:       // look for the leaf inside it
   237:       for (int index2=0; ; index2++) {
   238:         Leaf *second = getNthLeaf(first, index2);
   239:         if (second == leaf) {
   240:           // found it; encode both indices
   241:           flat.writeInt(index1);
   242:           flat.writeInt(index2);
   243:           return;
   244:         }
   245:         if (second == NULL) {
   246:           // exhausted this subtree
   247:           break;
   248:         }
   249:       } // end of iter over leaves
   250:     } // end of iter over first-lvl objs
   251:   }
   252: 
   253:   else /*reading*/ {
   254:     // read both indicies
   255:     int index1 = flat.readInt();
   256:     int index2 = flat.readInt();
   257: 
   258:     // follow the access path
   259:     FirstLevel *first = getNthFirst(root, index1);
   260:     formatAssert(first != NULL);
   261:     Leaf *second = getNthLeaf(first, index2);
   262:     formatAssert(second != NULL);
   263: 
   264:     // found it
   265:     leaf = second;
   266:   }
   267: }
   268: //#undef Leaf
   269: //#undef Root
   270: //#undef FirstLevel
   271: 
   272: 
   273: #if 0
   274: typedef void *accessFunc_void(void *parent, int childNum);
   275: 
   276: // typesafe interface
   277: template <class Root, class FirstLevel, class Leaf>
   278: inline void xferSerfPtr_twoLevelAccess(
   279:   Flatten &flat,
   280:   Leaf *&leaf,
   281:   Root *root,
   282:   FirstLevel* (*getNthFirst)(Root *r, int n),
   283:   Leaf* (*getNthLeaf)(FirstLevel *f, int n))
   284: {
   285:   xferSerfPtr_twoLevelAccess(
   286:     flat,
   287:     (void*&)leaf,
   288:     (void*)root,
   289:     (accessFunc_void)getNthFirst,
   290:     (accessFunc_void)getNthLeaf);
   291: }
   292: #endif // 0
   293: 
   294: 
   295: template <class Root, class FirstLevel, class Leaf>
   296: void xferSObjList_twoLevelAccess(
   297:   Flatten &flat,
   298:   SObjList<Leaf> &serfList,
   299:   Root *root,
   300:   FirstLevel* (*getNthFirst)(Root *r, int n),
   301:   Leaf* (*getNthLeaf)(FirstLevel *f, int n))
   302: {
   303:   if (flat.writing()) {
   304:     // length of list
   305:     flat.writeInt(serfList.count());
   306: 
   307:     // iterate over list
   308:     SMUTATE_EACH_OBJLIST(Leaf, serfList, iter) {
   309:       // write the obj
   310:       Leaf *leaf = iter.data();
   311:       xferSerfPtr_twoLevelAccess(
   312:         flat, leaf, root,
   313:         getNthFirst, getNthLeaf);
   314:     }
   315:   }
   316:   else {
   317:     int length = flat.readInt();
   318: 
   319:     SObjListMutator<Leaf> mut(serfList);
   320:     while (length--) {
   321:       // read the obj
   322:       Leaf *leaf;
   323:       xferSerfPtr_twoLevelAccess(
   324:         flat, leaf, root,
   325:         getNthFirst, getNthLeaf);
   326: 
   327:       // store it in the list
   328:       mut.append(leaf);
   329:     }
   330:   }
   331: }
   332: 
   333: 
   334: template <class T>
   335: void xferSerfPtr(Flatten &flat, T *&serfPtr)
   336: {
   337:   flat.xferSerf((void*&)serfPtr, false /*nullable*/);
   338: }
   339: 
   340: template <class T>
   341: void xferNullableSerfPtr(Flatten &flat, T *&serfPtr)
   342: {
   343:   flat.xferSerf((void*&)serfPtr, true /*nullable*/);
   344: }
   345: 
   346: 
   347: #endif // FLATUTIL_H
End C section to elk/elk_flatutil.h[1]
Start C section to elk/elk_genml.h[1 /1 ]
     1: #line 758 "./lpsrc/elk.pak"
     2: // genml.h            see license.txt for copyright and terms of use
     3: // extension to gramanl module that generates ML instead of C
     4: 
     5: #ifndef GENML_H
     6: #define GENML_H
     7: 
     8: class GrammarAnalysis;
     9: 
    10: // entry point
    11: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
    12:                       char const *mlFname, char const *srcFname);
    13: 
    14: #endif // GENML_H
End C section to elk/elk_genml.h[1]
Start C section to elk/elk_glrconfig.h[1 /1 ]
     1: #line 773 "./lpsrc/elk.pak"
     2: // glrconfig.h
     3: // do not edit; generated by ./configure
     4: 
     5: // glrconfig.h.in            see license.txt for copyright and terms of use
     6: // compile-time configuration options which affect the generated
     7: // GLR parser, and the interface to the user actions
     8: 
     9: #ifndef GLRCONFIG_H
    10: #define GLRCONFIG_H
    11: 
    12: 
    13: // when NO_GLR_SOURCELOC is #defined, we disable all support for
    14: // automatically propagating source location information in the
    15: // parser; user actions can still refer to 'loc', but they just get
    16: // a dummy no-location value
    17: #ifndef GLR_SOURCELOC
    18:   #define GLR_SOURCELOC 1        // set by ./configure
    19: #endif
    20: 
    21: #if GLR_SOURCELOC
    22:   #define SOURCELOC(stuff) stuff
    23: 
    24:   // this one adds a leading comma (I can't put that into the
    25:   // argument <stuff>, because then it looks like the macro is
    26:   // being passed 2 arguments)
    27:   #define SOURCELOCARG(stuff) , stuff
    28: 
    29:   #define NOSOURCELOC(stuff)
    30: #else
    31:   #define SOURCELOC(stuff)
    32:   #define SOURCELOCARG(stuff)
    33:   #define NOSOURCELOC(stuff) stuff
    34: #endif
    35: 
    36: 
    37: // when enabled, NODE_COLUMN tracks in each stack node the
    38: // appropriate column to display it for in debugging dump.
    39: // in the new RWL core, this is required to always be 1.
    40: #ifndef ENABLE_NODE_COLUMNS
    41:   #define ENABLE_NODE_COLUMNS 1
    42: #endif
    43: #if ENABLE_NODE_COLUMNS
    44:   #define NODE_COLUMN(stuff) stuff
    45: #else
    46:   #define NODE_COLUMN(stuff)
    47: #endif
    48: 
    49: 
    50: // when enabled, YIELD_COUNT keeps track of the number of times a
    51: // given semantic value is yielded; this is useful for warning the
    52: // user when a merge is performed but one of the merged values has
    53: // already been yielded to another semantic action, which implies
    54: // that the induced parse forest is incomplete
    55: #ifndef ENABLE_YIELD_COUNT
    56:   #define ENABLE_YIELD_COUNT 1
    57: #endif
    58: #if ENABLE_YIELD_COUNT
    59:   #define YIELD_COUNT(stuff) stuff
    60: #else
    61:   #define YIELD_COUNT(stuff)
    62: #endif
    63: 
    64: 
    65: // when true, error entries in the action table are extracted into
    66: // their own bitmap; this then enables compression on the action
    67: // table, since it makes it sparse
    68: #ifndef ENABLE_EEF_COMPRESSION
    69:   #define ENABLE_EEF_COMPRESSION 0
    70: #endif
    71: 
    72: // when true, the action and goto tables are compressed using
    73: // graph coloring
    74: #ifndef ENABLE_GCS_COMPRESSION
    75:   #define ENABLE_GCS_COMPRESSION 0
    76: #endif
    77: 
    78: // when true, action and goto *columns* are merged during GCS;
    79: // otherwise, only rows are merged
    80: #ifndef ENABLE_GCS_COLUMN_COMPRESSION
    81:   #define ENABLE_GCS_COLUMN_COMPRESSION 0
    82: #endif
    83: 
    84: // when true, entries in the action and goto tables are a
    85: // 1-byte index into an appropriate map
    86: #ifndef ENABLE_CRS_COMPRESSION
    87:   #define ENABLE_CRS_COMPRESSION 0
    88: #endif
    89: 
    90: 
    91: 
    92: #endif // GLRCONFIG_H
End C section to elk/elk_glrconfig.h[1]
Start C section to elk/elk_glr.h[1 /1 ]
     1: #line 866 "./lpsrc/elk.pak"
     2: // glr.h            see license.txt for copyright and terms of use
     3: // GLR parsing algorithm
     4: 
     5: /*
     6:  * Author: Scott McPeak, April 2000
     7:  *
     8:  * The fundamental concept in Generalized LR (GLR) parsing
     9:  * is to permit (at least local) ambiguity by "forking" the
    10:  * parse stack.  If the input is actually unambiguous, then
    11:  * all but one of the forked parsers will, at some point,
    12:  * fail to shift a symbol, and die.  If the input is truly
    13:  * ambiguous, forked parsers rejoin at some point, and the
    14:  * parse tree becomes a parse DAG, representing all possible
    15:  * parses.  (In fact, since cyclic grammars are supported,
    16:  * which can have an infinite number of parse trees for
    17:  * some inputs, we may end up with a cyclic parse *graph*.)
    18:  *
    19:  * In the larger scheme of things, this level of support for
    20:  * ambiguity is useful because it lets us use simpler and
    21:  * more intuitive grammars, more sophisticated disambiguation
    22:  * techniques, and parsing in the presence of incomplete
    23:  * or incorrect information (e.g. in an editor).
    24:  *
    25:  * The downside is that parsing is slower, and whatever tool
    26:  * processes the parse graph needs to have ways of dealing
    27:  * with the multiple parse interpretations.
    28:  *
    29:  * references:
    30:  *
    31:  *   [GLR]  J. Rekers.  Parser Generation for Interactive
    32:  *          Environments.  PhD thesis, University of
    33:  *          Amsterdam, 1992.  Available by ftp from
    34:  *          ftp://ftp.cwi.nl/pub/gipe/reports/Rek92.ps.Z .
    35:  *          [Contains a good description of the Generalized
    36:  *          LR (GLR) algorithm.]
    37:  */
    38: 
    39: #ifndef GLR_H
    40: #define GLR_H
    41: 
    42: #include "elk_glrconfig.h"
    43: #include "elk_parsetables.h"
    44: #include "elk_rcptr.h"
    45: #include "elk_useract.h"
    46: #include "sm_objpool.h"
    47: #include "sm_objlist.h"
    48: #include "sm_srcloc.h"
    49: #include "sm_sobjlist.h"
    50: 
    51: #include <stdio.h>         // FILE
    52: #include <iostream>      // std::ostream
    53: #include "flx_elk_config.hpp"
    54: 
    55: // fwds from other files
    56: class LexerInterface;      // lexerint.h
    57: 
    58: // forward decls for things declared below
    59: class StackNode;           // unit of parse state
    60: class SiblingLink;         // connections between stack nodes
    61: class PendingShift;        // for postponing shifts.. may remove
    62: class ELK_EXTERN GLR;                 // main class for GLR parsing
    63: 
    64: 
    65: // a pointer from a stacknode to one 'below' it (in the LR
    66: // parse stack sense); also has a link to the parse graph
    67: // we're constructing
    68: class SiblingLink {
    69: public:
    70:   // the stack node being pointed-at; it was created eariler
    71:   // than the one doing the pointing
    72:   RCPtr<StackNode> sib;
    73: 
    74:   // this is the semantic value associated with this link
    75:   // (parse tree nodes are *not* associated with stack nodes --
    76:   // that's now it was originally, but I figured out the hard
    77:   // way that's wrong (more info in compiler.notes.txt));
    78:   // this is an *owner* pointer
    79:   SemanticValue sval;
    80: 
    81:   // the source location of the left edge of the subtree rooted
    82:   // at this stack node; this is in essence part of the semantic
    83:   // value, but automatically propagated by the parser
    84:   SOURCELOC( SourceLoc loc; )
    85: 
    86:   // number of times this 'sval' has been yielded; this is used
    87:   // to track cases where we yield a value and then merge it
    88:   // (which means the induced parse forest is incomplete)
    89:   YIELD_COUNT( int yieldCount; )
    90: 
    91:   // if you add additional fields, they need to be inited in the
    92:   // constructor *and* in StackNode::addFirstSiblingLink_noRefCt
    93: 
    94: public:
    95:   SiblingLink(StackNode *s, SemanticValue sv
    96:               SOURCELOCARG( SourceLoc L ) );
    97:   ~SiblingLink();
    98: 
    99:   #if GLR_SOURCELOC
   100:     bool validLoc() const { return loc != SL_UNKNOWN; }
   101:   #else
   102:     bool validLoc() const { return false; }
   103:   #endif
   104: };
   105: 
   106: 
   107: // the GLR parse state is primarily made up of a graph of these
   108: // nodes, which play a role analogous to the stack nodes of a
   109: // normal LR parser; GLR nodes form a graph instead of a linear
   110: // stack because choice points (real or potential ambiguities)
   111: // are represented as multiple left-siblings
   112: class StackNode {
   113: public:
   114:   // the LR state the parser is in when this node is at the
   115:   // top ("at the top" means that nothing, besides perhaps itself,
   116:   // is pointing to it)
   117:   //ItemSet const * const state;                 // (serf)
   118:   StateId state;       // now it is an id
   119: 
   120:   // each leftSibling points to a stack node in one possible LR stack.
   121:   // if there is more than one, it means two or more LR stacks have
   122:   // been joined at this point.  this is the parse-time representation
   123:   // of ambiguity (actually, unambiguous grammars or inputs do
   124:   // sometimes lead to multiple siblings)
   125:   ObjList<SiblingLink> leftSiblings;           // this is a set
   126: 
   127:   // the *first* sibling is simply embedded directly into the
   128:   // stack node, to avoid list overhead in the common case of
   129:   // only one sibling; when firstSib.sib==NULL, there are no
   130:   // siblings
   131:   SiblingLink firstSib;
   132: 
   133:   // number of sibling links pointing at 'this', plus the number
   134:   // of worklists on which 'this' appears (some liberty is taken
   135:   // in the mini-LR parser, but it is carefully documented there)
   136:   int referenceCount;
   137: 
   138:   // how many stack nodes can I pop before hitting a nondeterminism?
   139:   // if this node itself has >1 sibling link, determinDepth==0; if
   140:   // this node has 1 sibling, but that sibling has >1 sibling, then
   141:   // determinDepth==1, and so on; if this node has 0 siblings, then
   142:   // determinDepth==1
   143:   int determinDepth;
   144: 
   145:   union {
   146:     // somewhat nonideal: I need access to the 'userActions' to
   147:     // deallocate semantic values when refCt hits zero, and I need
   148:     // to map states to state-symbols for the same reason.
   149:     // update: now I'm also using this to support pool-based
   150:     // deallocation in decRefCt()
   151:     GLR *glr;
   152: 
   153:     // this is used by the ObjectPool which handles allocation of
   154:     // StackNodes
   155:     StackNode *nextInFreeList;
   156:   };
   157: 
   158:   // ordinal position of the token that was being processed
   159:   // when this stack node was created; this information is useful
   160:   // for laying out the nodes when visualizing the GSS, but is
   161:   // not used by the parsing algorithm itself
   162:   NODE_COLUMN( int column; )
   163: 
   164:   // count and high-water for stack nodes
   165:   static int numStackNodesAllocd;
   166:   static int maxStackNodesAllocd;
   167: 
   168: 
   169: private:    // funcs
   170:   SiblingLink *
   171:     addAdditionalSiblingLink(StackNode *leftSib, SemanticValue sval
   172:                              SOURCELOCARG( SourceLoc loc ) );
   173: 
   174: public:     // funcs
   175:   StackNode();
   176:   ~StackNode();
   177: 
   178:   // ctor/dtor from point of view of the object pool user
   179:   void init(StateId state, GLR *glr);
   180:   void deinit();
   181: 
   182:   // internal workings of 'deinit', exposed for performance reasons
   183:   inline void decrementAllocCounter();
   184:   void deallocSemanticValues();
   185: 
   186:   // add a new link with the given tree node; return the link
   187:   SiblingLink *addSiblingLink(StackNode *leftSib, SemanticValue sval
   188:                               SOURCELOCARG( SourceLoc loc ) );
   189: 
   190:   // specialized version for performance-critical sections
   191:   inline void
   192:     addFirstSiblingLink_noRefCt(StackNode *leftSib, SemanticValue sval
   193:                                 SOURCELOCARG( SourceLoc loc ) );
   194: 
   195:   // return the symbol represented by this stack node;  it's
   196:   // the symbol shifted or reduced-to to get to this state
   197:   // (this used to be a data member, but there are at least
   198:   // two ways to compute it, so there's no need to store it)
   199:   SymbolId getSymbolC() const;
   200: 
   201:   // reference count stuff
   202:   void incRefCt() { referenceCount++; }
   203:   void decRefCt();
   204: 
   205:   // sibling count queries (each one answerable in constant time)
   206:   bool hasZeroSiblings() const { return firstSib.sib==NULL; }
   207:   bool hasOneSibling() const { return firstSib.sib!=NULL && leftSiblings.isEmpty(); }
   208:   bool hasMultipleSiblings() const { return leftSiblings.isNotEmpty(); }
   209: 
   210:   // when you expect there's only one sibling link, get it this way
   211:   SiblingLink const *getUniqueLinkC() const;
   212:   SiblingLink *getUniqueLink() { return const_cast<SiblingLink*>(getUniqueLinkC()); }
   213: 
   214:   // retrieve pointer to the sibling link to a given node, or NULL if none
   215:   SiblingLink *getLinkTo(StackNode *another);
   216: 
   217:   // recompute my determinDepth based on siblings,
   218:   // but don't actually change the state
   219:   int computeDeterminDepth() const;
   220: 
   221:   // debugging
   222:   static void printAllocStats();
   223:   void checkLocalInvariants() const;
   224: };
   225: 
   226: 
   227: // this is a priority queue of stack node paths that are candidates to
   228: // reduce, maintained such that we can select paths in an order which
   229: // will avoid yield-then-merge
   230: class ReductionPathQueue {
   231: public:       // types
   232:   // a single path in the stack
   233:   class Path {
   234:   public:     // data
   235:     // ---- right edge info ----
   236:     // the rightmost state's id; we're reducing in this state
   237:     StateId startStateId;
   238: 
   239:     // id of the production with which we're reducing
   240:     int prodIndex;
   241: 
   242:     // ---- left edge info ----
   243:     // the token column (ordinal position of a token in the token
   244:     // stream) of the leftmost stack node; the smaller the
   245:     // startColumn, the more tokens this reduction spans
   246:     int startColumn;
   247: 
   248:     // stack node at the left edge; our reduction will push a new
   249:     // stack node on top of this one
   250:     StackNode *leftEdgeNode;
   251: 
   252:     // ---- path in between ----
   253:     // array of sibling links, naming the path; 'sibLink[0]' is the
   254:     // leftmost link; array length is given by the rhsLen of
   255:     // prodIndex's production
   256:     GrowArray<SiblingLink*> sibLinks;    // (array of serfs)
   257: 
   258:     // corresponding array of symbol ids so we know how to interpret
   259:     // the semantic values in the links
   260:     GrowArray<SymbolId> symbols;
   261: 
   262:     union {
   263:       // link between nodes for construction of a linked list,
   264:       // kept in sorted order
   265:       Path *next;
   266: 
   267:       // link for free list in the object pool
   268:       Path *nextInFreeList;
   269:     };
   270: 
   271:   public:     // funcs
   272:     Path();
   273:     ~Path();
   274: 
   275:     void init(StateId startStateId, int prodIndex, int rhsLen);
   276:     void deinit() {}
   277:   };
   278: 
   279: private:      // data
   280:   // head of the list
   281:   Path *top;
   282: 
   283:   // allocation pool of Path objects
   284:   ObjectPool<Path> pathPool;
   285: 
   286:   // parse tables, so we can decode prodIndex and also compare
   287:   // production ids for sorting purposes
   288:   ParseTables *tables;
   289: 
   290: private:      // funcs
   291:   bool goesBefore(Path const *p1, Path const *p2) const;
   292: 
   293: public:       // funcs
   294:   ReductionPathQueue(ParseTables *t);
   295:   ~ReductionPathQueue();
   296: 
   297:   // get another Path object, inited with these values
   298:   Path *newPath(StateId startStateId, int prodIndex, int rhsLen);
   299: 
   300:   // make a copy of the prototype 'src', fill in its left-edge
   301:   // fields using 'leftEdge', and insert it into sorted order
   302:   // in the queue
   303:   void insertPathCopy(Path const *src, StackNode *leftEdge);
   304: 
   305:   // true if there are no more paths
   306:   bool isEmpty() const { return top == NULL; }
   307:   bool isNotEmpty() const { return !isEmpty(); }
   308: 
   309:   // remove the next path to reduce from the list, and return it
   310:   Path *dequeue();
   311: 
   312:   // mark a path as not being used, so it will be recycled into the pool
   313:   void deletePath(Path *p);
   314: };
   315: 
   316: 
   317: // each GLR object is a parser for a specific grammar, but can be
   318: // used to parse multiple token streams
   319: class ELK_EXTERN GLR {
   320: public:
   321:   // ---- grammar-wide data ----
   322:   // user-specified actions
   323:   UserActions *userAct;                     // (serf)
   324: 
   325:   // parse tables derived from the grammar
   326:   ParseTables *tables;                      // (serf)
   327: 
   328:   // ---- parser state between tokens ----
   329:   // I keep a pointer to this so I can ask for token descriptions
   330:   // inside some of the helper functions
   331:   LexerInterface *lexerPtr;                 // (serf)
   332: 
   333:   // Every node in this set is (the top of) a parser that might
   334:   // ultimately succeed to parse the input, or might reach a
   335:   // point where it cannot proceed, and therefore dies.  (See
   336:   // comments at top of glr.cc for more details.)
   337:   ArrayStack<StackNode*> topmostParsers;     // (refct list)
   338: 
   339:   // index: StateId -> index in 'topmostParsers' of unique parser
   340:   // with that state, or INDEX_NO_PARSER if none has that state
   341:   typedef unsigned char ParserIndexEntry;
   342:   enum { INDEX_NO_PARSER = 255 };
   343:   ParserIndexEntry *parserIndex;            // (owner)
   344: 
   345:   // this is for assigning unique ids to stack nodes
   346:   int nextStackNodeId;
   347:   enum { initialStackNodeId = 1 };
   348: 
   349:   // ---- parser state during each token ----
   350:   // I used to have fields:
   351:   //   int currentTokenType;
   352:   //   SemanticValue currentTokenValue;
   353:   //   SourceLoc currentTokenLoc;
   354:   // but these have been now replaced by, respectively,
   355:   //   lexerPtr->type
   356:   //   lexerPtr->sval
   357:   //   lexerPtr->loc
   358: 
   359:   // ---- scratch space re-used at token-level (or finer) granularity ----
   360:   // to be regarded as a local variable of GLR::rwlProcessWorklist
   361:   GrowArray<SemanticValue> toPass;
   362: 
   363:   // persistent array that I swap with 'topmostParsers' during
   364:   // 'rwlShiftTerminals' to avoid extra copying or allocation;
   365:   // this should be regarded as variable local to that function
   366:   ArrayStack<StackNode*> prevTopmost;        // (refct list)
   367: 
   368:   // ---- allocation pools ----
   369:   // this is a pointer to the same-named local variable in innerGlrParse
   370:   ObjectPool<StackNode> *stackNodePool;
   371: 
   372:   // pool and list for the RWL implementation
   373:   ReductionPathQueue pathQueue;
   374: 
   375:   // ---- user options ----
   376:   // when true, failed parses are accompanied by some rudimentary
   377:   // diagnosis; when false, failed parses are silent (default: true)
   378:   bool noisyFailedParse;
   379: 
   380:   // ---- debugging trace ----
   381:   // these are computed during GLR::GLR since the profiler reports
   382:   // there is significant expense to computing the debug sm_strings
   383:   // (that are then usually not printed)
   384:   bool trParse;                             // tracingSys("parse")
   385:   std::ostream &trsParse;                        // trace("parse")
   386: 
   387:   // track column for new nodes
   388:   NODE_COLUMN( int globalNodeColumn; )
   389: 
   390:   // statistics on parser actions
   391:   int detShift, detReduce, nondetShift, nondetReduce;
   392: 
   393:   // count of # of times yield-then-merge happens
   394:   int yieldThenMergeCt;
   395: 
   396: private:    // funcs
   397:   // comments in glr.cc
   398:   SemanticValue duplicateSemanticValue(SymbolId sym, SemanticValue sval);
   399:   void deallocateSemanticValue(SymbolId sym, SemanticValue sval);
   400:   SemanticValue grabTopSval(StackNode *node);
   401: 
   402:   StackNode *findTopmostParser(StateId state);
   403:   StackNode *makeStackNode(StateId state);
   404:   void writeParseGraph(char const *input) const;
   405:   void clearAllStackNodes();
   406:   void addTopmostParser(StackNode *parser);
   407:   void pullFromTopmostParsers(StackNode *parser);
   408:   bool canMakeProgress(StackNode *parser);
   409:   void dumpGSS(int tokenNumber) const;
   410:   void dumpGSSEdge(FILE *dest, StackNode const *src,
   411:                                StackNode const *target) const;
   412:   void printConfig() const;
   413:   void buildParserIndex();
   414:   void printParseErrorMessage(StateId lastToDie);
   415:   bool cleanupAfterParse(SemanticValue &treeTop);
   416:   bool nondeterministicParseToken();
   417:   static bool innerGlrParse(GLR &glr, LexerInterface &lexer, SemanticValue &treeTop);
   418:   SemanticValue doReductionAction(
   419:     int productionId, SemanticValue const *svals
   420:     SOURCELOCARG( SourceLoc loc ) );
   421: 
   422:   void rwlProcessWorklist();
   423:   SiblingLink *rwlShiftNonterminal(StackNode *leftSibling, int lhsIndex,
   424:                                    SemanticValue /*owner*/ sval
   425:                                    SOURCELOCARG( SourceLoc loc ) );
   426:   int rwlEnqueueReductions(StackNode *parser, ActionEntry action,
   427:                            SiblingLink *sibLink);
   428:   void rwlCollectPathLink(
   429:     ReductionPathQueue::Path *proto, int popsRemaining,
   430:     StackNode *currentNode, SiblingLink *mustUseLink, SiblingLink *linkToAdd);
   431:   void rwlRecursiveEnqueue(
   432:     ReductionPathQueue::Path *proto,
   433:     int popsRemaining,
   434:     StackNode *currentNode,
   435:     SiblingLink *mustUseLink);
   436:   void rwlShiftTerminals();
   437: 
   438:   void configCheck(char const *option, bool core, bool table);
   439: 
   440:   sm_string stackSummary() const;
   441:   void nodeSummary(sm_stringBuilder &sb, StackNode const *node) const;
   442:   void innerStackSummary(sm_stringBuilder &sb,
   443:                          SObjList<StackNode const> &printed,
   444:                          StackNode const *node) const;
   445: 
   446: public:     // funcs
   447:   GLR(UserActions *userAct, ParseTables *tables);
   448:   ~GLR();
   449: 
   450:   // ------- primary interface -------
   451:   // read the named grammar file (.bin extension, typically)
   452:   void readBinaryGrammar(char const *grammarFname);
   453: 
   454:   // parse, using the token stream in 'lexer', and store the final
   455:   // semantic value in 'treeTop'
   456:   bool glrParse(LexerInterface &lexer, SemanticValue &treeTop);
   457: 
   458: };
   459: 
   460: 
   461: #endif // GLR_H
End C section to elk/elk_glr.h[1]
Start C section to elk/elk_gramanl.h[1 /1 ]
     1: #line 1328 "./lpsrc/elk.pak"
     2: // gramanl.h            see license.txt for copyright and terms of use
     3: // grammar analysis module; separated from grammar.h to
     4: //   reduce mixing of representation and algorithm; this
     5: //   module should be entirely algorithm
     6: 
     7: // Author: Scott McPeak, April 2000
     8: // Updates: March 2002
     9: 
    10: // references:
    11: //
    12: //   [ASU]  Aho, Sethi Ullman.  Compilers: Principles,
    13: //          Techniques, and Tools.  Addison-Wesley,
    14: //          Reading, MA.  1986.  Second printing (3/88).
    15: //          [A classic reference for LR parsing.]
    16: 
    17: 
    18: #ifndef __GRAMANL_H
    19: #define __GRAMANL_H
    20: 
    21: #include "elk_grammar.h"
    22: #include "sm_ohashtbl.h"
    23: #include "sm_okhashtbl.h"
    24: #include "sm_okhasharr.h"
    25: #include "elk_glrconfig.h"
    26: #include "elk_parsetables.h"
    27: 
    28: // forward decls
    29: class Bit2d;              // bit2d.h
    30: class BitArray;           // bitarray.h
    31: class EmitCode;           // emitcode.h
    32: 
    33: // this file
    34: class GrammarAnalysis;
    35: 
    36: 
    37: // ---------------- DottedProduction --------------------
    38: // a production, with an indicator that says how much of this
    39: // production has been matched by some part of the input sm_string
    40: // (exactly which part of the input depends on where this appears
    41: // in the algorithm's data structures)
    42: class DottedProduction {
    43: // ------ representation ------
    44: private:    // data
    45:   Production const *prod;        // (serf) the base production
    46:   int dot;                       // 0 means it's before all RHS symbols, 1 means after first, etc.
    47: 
    48: // -------- annotation ----------
    49: private:    // data
    50:   // performance optimization: NULL if dot at end, or else pointer
    51:   // to the symbol right after the dot
    52:   Symbol *afterDot;
    53: 
    54: public:     // data
    55:   // First of the sentential form that follows the dot; this set
    56:   // is computed by GrammarAnalysis::computeDProdFirsts
    57:   TerminalSet firstSet;
    58: 
    59:   // also computed by computeDProdFirsts, this is true if the
    60:   // sentential form can derive epsilon (the empty sm_string)
    61:   bool canDeriveEmpty;
    62: 
    63:   // during item set closure, I need a way to map from dotted prods to
    64:   // the items which use them; so rather than use a hash table, I'll
    65:   // just annotate the dprods themselves with backpointers; these
    66:   // backpointers *must* be maintained as NULL when there's no
    67:   // association
    68:   mutable class LRItem *backPointer;
    69: 
    70: private:    // funcs
    71:   void init();
    72: 
    73: public:     // funcs
    74:   //DottedProduction(DottedProduction const &obj);
    75: 
    76:   // need the grammar passed during creation so we know how big
    77:   // to make 'lookahead'
    78:   //DottedProduction(GrammarAnalysis const &g);       // for later filling-in
    79:   //DottedProduction(/*GrammarAnalysis const &g,*/ Production *p, int d);
    80:   DottedProduction();     // for creating arrays of them
    81:   ~DottedProduction();
    82: 
    83:   // no point to flattening these because they're easily re-computable
    84:   #if 0
    85:   DottedProduction(Flatten&);
    86:   void xfer(Flatten &flat);
    87:   void xferSerfs(Flatten &flat, GrammarAnalysis &g);
    88:   #endif // 0
    89: 
    90:   // simple queries
    91:   Production const *getProd() const { return prod; }
    92:   int getDot() const { return dot; }
    93:   bool isDotAtStart() const { return dot==0; }
    94:   bool isDotAtEnd() const { return afterDot==NULL; }
    95: 
    96:   // no need for equality now, since all DPs with the same
    97:   // prod/dot are shared
    98:   //bool isEqual(DottedProduction const &obj) const;
    99:   //bool operator== (DottedProduction const &obj) const;
   100: 
   101:   // call this to change prod and dot
   102:   void setProdAndDot(Production const *p, int d);
   103: 
   104:   // dot must not be at the start (left edge)
   105:   Symbol const *symbolBeforeDotC() const;
   106:   Symbol *symbolBeforeDot() { return const_cast<Symbol*>(symbolBeforeDotC()); }
   107: 
   108:   // dot must not be at the end (right edge)
   109:   Symbol const *symbolAfterDotC() const { return afterDot; }
   110:   Symbol *symbolAfterDot() { return const_cast<Symbol*>(symbolAfterDotC()); }
   111: 
   112:   // print to std::cout as 'A -> B . c D' (no newline)
   113:   void print(std::ostream &os/*, GrammarAnalysis const &g*/) const;
   114:   OSTREAM_OPERATOR(DottedProduction)
   115: };
   116: 
   117: // lists of dotted productions
   118: typedef ObjList<DottedProduction> DProductionList;
   119: typedef ObjListIter<DottedProduction> DProductionListIter;
   120: typedef SObjList<DottedProduction> SDProductionList;
   121: typedef SObjListIter<DottedProduction> SDProductionListIter;
   122: 
   123: #define FOREACH_DOTTEDPRODUCTION(list, iter) FOREACH_OBJLIST(DottedProduction, list, iter)
   124: #define MUTATE_EACH_DOTTEDPRODUCTION(list, iter) MUTATE_EACH_OBJLIST(DottedProduction, list, iter)
   125: #define SFOREACH_DOTTEDPRODUCTION(list, iter) SFOREACH_OBJLIST(DottedProduction, list, iter)
   126: #define SMUTATE_EACH_DOTTEDPRODUCTION(list, iter) SMUTATE_EACH_OBJLIST(DottedProduction, list, iter)
   127: 
   128: 
   129: // --------------- LRItem ---------------
   130: // a dotted production with a lookahead; whereas each production
   131: // has a fixed number of dotted versions of that production, there
   132: // can be lots of items, because of the differing lookahead sets
   133: // (I prefer the name "LRItem" to simply "Item" because the latter
   134: // easily collides with other uses)
   135: class LRItem {
   136: public:    // data
   137:   DottedProduction const *dprod;  // (serf) production and dot position
   138:   TerminalSet lookahead;          // lookahead symbols
   139: 
   140: public:    // funcs
   141:   LRItem(LRItem const &obj);
   142:   ~LRItem();
   143: 
   144:   // need 'numTerms' to tell how big to make 'lookahead'
   145:   LRItem(int numTerms, DottedProduction const *dp);
   146: 
   147:   LRItem(Flatten&);
   148:   void xfer(Flatten &flat);
   149:   void xferSerfs(Flatten &flat, GrammarAnalysis &g);
   150: 
   151:   // comparison
   152:   static int diff(LRItem const *a, LRItem const *b, void*);
   153:   bool equalNoLA(LRItem const &obj) const
   154:     { return dprod == obj.dprod; }
   155: 
   156:   // manipulate the lookahead set
   157:   bool laContains(int terminalId) const
   158:     { return lookahead.contains(terminalId); }
   159:   void laAdd(int terminalId)
   160:     { lookahead.add(terminalId); }
   161:   void laRemove(int terminalId)
   162:     { lookahead.remove(terminalId); }
   163:   void laCopy(LRItem const &obj)
   164:     { lookahead.copy(obj.lookahead); }
   165:   bool laMerge(LRItem const &obj)     // returns true if merging changed lookahead
   166:     { return lookahead.merge(obj.lookahead); }
   167:   bool laIsEqual(LRItem const &obj) const
   168:     { return lookahead.isEqual(obj.lookahead); }
   169: 
   170:   // pass-thru queries into 'dprod'
   171:   Production const *getProd() const
   172:     { return dprod->getProd(); }
   173:   int getDot() const
   174:     { return dprod->getDot(); }
   175:   bool isDotAtStart() const
   176:     { return dprod->isDotAtStart(); }
   177:   bool isDotAtEnd() const
   178:     { return dprod->isDotAtEnd(); }
   179:   Symbol const *symbolBeforeDotC() const
   180:     { return dprod->symbolBeforeDotC(); }
   181:   Symbol const *symbolAfterDotC() const
   182:     { return dprod->symbolAfterDotC(); }
   183: 
   184:   int prodIndex() const
   185:     { return getProd()->prodIndex; }
   186: 
   187:   // stuff for insertion into a hash table
   188:   static unsigned hash(DottedProduction const *key);
   189:   static DottedProduction const *dataToKey(LRItem *dp);
   190:   static bool dpEqual(DottedProduction const *key1, DottedProduction const *key2);
   191: 
   192:   // true if this item is "A -> alpha * t beta"
   193:   bool isExtendingShift(Nonterminal const *A, Terminal const *t) const;
   194: 
   195:   void print(std::ostream &os, GrammarAnalysis const &g) const;
   196: };
   197: 
   198: 
   199: // ---------------- ItemSet -------------------
   200: // a set of dotted productions, and the transitions between
   201: // item sets, as in LR(0) set-of-items construction
   202: class ItemSet {
   203: public:     // intended to be read-only public
   204:   // kernel items: the items that define the set; except for
   205:   // the special case of the initial item in the initial state,
   206:   // the kernel items are distinguished by having the dot *not*
   207:   // at the left edge
   208:   ObjList<LRItem> kernelItems;
   209: 
   210:   // nonkernel items: those derived as the closure of the kernel
   211:   // items by expanding symbols to the right of dots; here I am
   212:   // making the choice to materialize them, rather than derive
   213:   // them on the spot as needed (and may change this decision)
   214:   ObjList<LRItem> nonkernelItems;
   215: 
   216: private:    // data
   217:   // transition function (where we go on shifts); NULL means no transition
   218:   //   Map : (Terminal id or Nonterminal id)  -> ItemSet*
   219:   ItemSet **termTransition;                  // (owner ptr to array of serf ptrs)
   220:   ItemSet **nontermTransition;               // (owner ptr to array of serf ptrs)
   221: 
   222:   // bounds for above
   223:   int terms;
   224:   int nonterms;
   225: 
   226:   // profiler reports I'm spending significant time rifling through
   227:   // the items looking for those that have the dot at the end; so this
   228:   // array will point to all such items
   229:   LRItem const **dotsAtEnd;                  // (owner ptr to array of serf ptrs)
   230:   int numDotsAtEnd;                          // number of elements in 'dotsAtEnd'
   231: 
   232:   // profiler also reports I'm still spending time comparing item sets; this
   233:   // stores a CRC of the numerically sorted kernel item pointer addresses,
   234:   // concatenated into a buffer of sufficient size
   235:   unsigned long kernelItemsCRC;
   236: 
   237:   // need to store this, because I can't compute it once I throw
   238:   // away the items
   239:   Symbol const *stateSymbol;
   240: 
   241: public:     // data
   242:   // numerical state id, should be unique among item sets
   243:   // in a particular grammar's sets
   244:   StateId id;
   245: 
   246:   // it's useful to have a BFS tree superimposed on the transition
   247:   // graph; for example, it makes it easy to generate sample inputs
   248:   // for each state.  so we store the parent pointer; we can derive
   249:   // child pointers by looking at all outgoing transitions, and
   250:   // filtering for those whose targets' parent pointers equal 'this'.
   251:   // the start state's parent is NULL, since it is the root of the
   252:   // BFS tree
   253:   ItemSet *BFSparent;                        // (serf)
   254: 
   255: private:    // funcs
   256:   int bcheckTerm(int index) const;
   257:   int bcheckNonterm(int index) const;
   258:   ItemSet *&refTransition(Symbol const *sym);
   259: 
   260:   void allocateTransitionFunction();
   261:   Symbol const *computeStateSymbolC() const;
   262: 
   263:   void deleteNonReductions(ObjList<LRItem> &list);
   264: 
   265: public:     // funcs
   266:   ItemSet(StateId id, int numTerms, int numNonterms);
   267:   ~ItemSet();
   268: 
   269:   ItemSet(Flatten&);
   270:   void xfer(Flatten &flat);
   271:   void xferSerfs(Flatten &flat, GrammarAnalysis &g);
   272: 
   273:   // ---- item queries ----
   274:   // the set of items names a symbol as the symbol used
   275:   // to reach this state -- namely, the symbol that appears
   276:   // to the left of a dot.  this fn retrieves that symbol
   277:   // (if all items have dots at left edge, returns NULL; this
   278:   // would be true only for the initial state)
   279:   Symbol const *getStateSymbolC() const { return stateSymbol; }
   280: 
   281:   // equality is defined as having the same items (basic set equality)
   282:   bool operator== (ItemSet const &obj) const;
   283: 
   284:   // sometimes it's convenient to have all items mixed together
   285:   // (CONSTNESS: allows modification of items...)
   286:   void getAllItems(SObjList<LRItem> &dest, bool nonkernel=true) const;
   287: 
   288:   // used for sorting by id
   289:   static int diffById(ItemSet const *left, ItemSet const *right, void*);
   290: 
   291:   // ---- transition queries ----
   292:   // query transition fn for an arbitrary symbol; returns
   293:   // NULL if no transition is defined
   294:   ItemSet const *transitionC(Symbol const *sym) const;
   295:   ItemSet *transition(Symbol const *sym)
   296:     { return const_cast<ItemSet*>(transitionC(sym)); }
   297: 
   298:   // alternate interface; also might return NULL
   299:   ItemSet const *getTermTransition(int termId) const
   300:     { return termTransition[bcheckTerm(termId)]; }
   301:   ItemSet const *getNontermTransition(int nontermId) const
   302:     { return nontermTransition[bcheckNonterm(nontermId)]; }
   303: 
   304:   // get the list of productions that are ready to reduce, given
   305:   // that the next input symbol is 'lookahead' (i.e. in the follow
   306:   // of a production's LHS); parsing=true means we are actually
   307:   // parsing input, so certain tracing output is appropriate;
   308:   // 'reductions' is a list of const Productions
   309:   void getPossibleReductions(ProductionList &reductions,
   310:                              Terminal const *lookahead,
   311:                              bool parsing) const;
   312: 
   313: 
   314:   // assuming this itemset has at least one reduction ready (an assertion
   315:   // checks this), retrieve the first one
   316:   Production const *getFirstReduction() const;
   317: 
   318:   // ---- item mutations ----
   319:   // add a kernel item; used while constructing the state
   320:   void addKernelItem(LRItem * /*owner*/ item);
   321: 
   322:   // after adding all kernel items, call this
   323:   void sortKernelItems();
   324: 
   325:   // add a nonkernel item; used while computing closure; this
   326:   // item must not already be in the item set
   327:   void addNonkernelItem(LRItem * /*owner*/ item);
   328: 
   329:   // computes things derived from the item set lists:
   330:   // dotsAtEnd, numDotsAtEnd, kernelItemsCRC, stateSymbol;
   331:   // do this after adding things to the items lists
   332:   void changedItems();
   333: 
   334:   // a part of 'changedItems', this is used in a specialized way
   335:   // during LR item set construction; it leaves 'this' in a somewhat
   336:   // half-baked state (if changedItems is not also called), so some
   337:   // care needs to be taken when using this directly
   338:   void computeKernelCRC(GrowArray<DottedProduction const*> &array);
   339: 
   340:   // remove the reduce using 'prod' on lookahead 'sym;
   341:   // calls 'changedItems' internally
   342:   void removeReduce(Production const *prod, Terminal const *sym);
   343: 
   344:   // throw away information not needed during parsing
   345:   void throwAwayItems();
   346: 
   347:   // 'dest' has already been established to have the same kernel
   348:   // items as 'this' -- so merge all the kernel lookahead items
   349:   // of 'this' into 'dest'; return 'true' if any changes were made
   350:   // to 'dest'
   351:   bool mergeLookaheadsInto(ItemSet &dest) const;
   352: 
   353:   // true if this itemset has an item "A -> alpha * t beta", i.e.
   354:   // one that would extend 'A' by shifting 't'
   355:   bool hasExtendingShift(Nonterminal const *A, Terminal const *t) const;
   356: 
   357:   // ---- transition mutations ----
   358:   // set transition on 'sym' to be 'dest'
   359:   void setTransition(Symbol const *sym, ItemSet *dest);
   360: 
   361:   // remove the the shift on 'sym'
   362:   void removeShift(Terminal const *sym);
   363: 
   364:   // ------ hashtable stuff --------
   365:   static ItemSet const *dataToKey(ItemSet *data);
   366:   static unsigned hash(ItemSet const *key);
   367:   static bool equalKey(ItemSet const *key1, ItemSet const *key2);
   368: 
   369:   // ---- debugging ----
   370:   void writeGraph(std::ostream &os, GrammarAnalysis const &g) const;
   371:   void print(std::ostream &os, GrammarAnalysis const &g, bool nonkernel=true) const;
   372: };
   373: 
   374: 
   375: // ---------------------- GrammarAnalysis -------------------
   376: class GrammarAnalysis : public Grammar {
   377: protected:  // data
   378:   // if entry i,j is true, then nonterminal i can derive nonterminal j
   379:   // (this is a graph, represented (for now) as an adjacency matrix)
   380:   enum { emptyStringIndex = 0 };
   381:   Bit2d *derivable;                     // (owner)
   382: 
   383:   // index the symbols on their integer ids
   384:   Nonterminal **indexedNonterms;        // (owner -> serfs) ntIndex -> Nonterminal
   385:   Terminal **indexedTerms;              // (owner -> serfs) termIndex -> Terminal
   386:   // numNonterms==Grammar::numNonterminals(), numTerms==Grammar::numTerminals()
   387:   int numNonterms;                      // length of 'indexedNonterms' array
   388:   int numTerms;                         //   "     "         terms       "
   389: 
   390:   // during itemSetClosure, profiling reports we spend a lot of time
   391:   // walking the list of productions looking for those that have a given
   392:   // symbol on the LHS; so let's index produtions by LHS symbol index;
   393:   // this array has 'numNonterms' elements, mapping each nonterminal to
   394:   // the list of productions with that nonterminal on the LHS
   395:   SObjList<Production> *productionsByLHS;    // (owner ptr to array)
   396: 
   397:   // map of production x dotPosition -> DottedProduction;
   398:   // each element of the 'dottedProds' array is a pointer to an
   399:   // array of DottedProduction objects
   400:   DottedProduction **dottedProds;       // (owner ptr to array of owners)
   401: 
   402:   // index of productions by id
   403:   Production **indexedProds;            // (owner -> serfs) prodIndex -> Production
   404:   int numProds;                         // length of 'dottedProds'
   405: 
   406:   // only true after initializeAuxData has been called
   407:   bool initialized;
   408: 
   409:   // used to assign itemset ids while the item sets are being
   410:   // initially constructed; later, they get renumbered into a
   411:   // canonical order
   412:   int nextItemSetId;
   413: 
   414:   // the LR parsing tables
   415:   ObjList<ItemSet> itemSets;
   416: 
   417:   // distinguished start state; NOTE: much of the grammar analysis
   418:   // code currently assumes (and checks) that state 0 is the start
   419:   // state, so if you want to do something different, that code might
   420:   // need to be changed
   421:   ItemSet *startState;                  // (serf)
   422: 
   423: public:     // data
   424:   // true if any nonterminal can derive itself (with no extra symbols
   425:   // surrounding it) in 1 or more steps
   426:   bool cyclic;
   427: 
   428:   // symbol of interest; various diagnostics are printed when
   429:   // certain things happen with it (e.g. the first application
   430:   // is to print whenever something is added to this sym's
   431:   // follow)
   432:   Symbol const *symOfInterest;
   433: 
   434:   // incremented each time we encounter an error that we can recover from
   435:   int errors;
   436: 
   437:   // parse tables
   438:   ParseTables *tables;                  // (owner)
   439: 
   440: private:    // funcs
   441:   // ---- analyis init ----
   442:   // call this after grammar is completely built
   443:   void initializeAuxData();
   444:   void computeIndexedNonterms();
   445:   void computeIndexedTerms();
   446:   void computeProductionsByLHS();
   447:   void computeReachable();
   448:   void computeReachableDFS(Nonterminal *nt);
   449:   void resetFirstFollow();
   450:   void computeDProdFirsts();
   451:   void computeSupersets();
   452: 
   453:   // ---- dotted productions ----
   454:   void createDottedProductions();
   455:   void deleteDottedProductions();
   456:   DottedProduction const *getDProd(Production const *prod, int posn) const;
   457:   DottedProduction *getDProd_nc(Production const *prod, int posn)
   458:     { return const_cast<DottedProduction*>(getDProd(prod, posn)); }
   459: 
   460:   // given a dprod, yield the one obtained by moving the dot one
   461:   // place to the right
   462:   DottedProduction const *nextDProd(DottedProduction const *dp) const
   463:     #ifdef NDEBUG
   464:       { return dp+1; }      // take advantage of physical co-location
   465:     #endif
   466:       ;                     // debug version checks bounds
   467: 
   468:   // ---- derivability ----
   469:   // iteratively compute every pair A,B such that A can derive B
   470:   void computeWhatCanDeriveWhat();
   471:   void initDerivableRelation();
   472: 
   473:   // add a derivability relation; returns true if this makes a change
   474:   bool addDerivable(Nonterminal const *left, Nonterminal const *right);
   475:   bool addDerivable(int leftNtIndex, int rightNtIndex);
   476: 
   477:   // private derivability interface
   478:   bool canDerive(int leftNtIndex, int rightNtIndex) const;
   479:   bool sequenceCanDeriveEmpty(RHSEltList const &list) const;
   480:   bool iterSeqCanDeriveEmpty(RHSEltListIter iter) const;
   481: 
   482:   // ---- First ----
   483:   void computeFirst();
   484:   //bool addFirst(Nonterminal *NT, Terminal *term);
   485:   void firstOfSequence(TerminalSet &destList, RHSEltList const &sequence);
   486:   void firstOfIterSeq(TerminalSet &destList, RHSEltListIter sym);
   487: 
   488:   // ---- Follow ----
   489:   void computeFollow();
   490:   //bool addFollow(Nonterminal *NT, Terminal *term);
   491: 
   492:   // ---- LR item sets ----
   493:   ItemSet *makeItemSet();
   494:   void disposeItemSet(ItemSet *is);
   495:   void moveDotNoClosure(ItemSet const *source, Symbol const *symbol,
   496:                         ItemSet *dest, ObjList<LRItem> &unusedTail,
   497:                         GrowArray<DottedProduction const*> &array);
   498:   ItemSet *findItemSetInList(ObjList<ItemSet> &list,
   499:                              ItemSet const *itemSet);
   500:   static bool itemSetsEqual(ItemSet const *is1, ItemSet const *is2);
   501: 
   502:   void constructLRItemSets();
   503:   void lrParse(char const *input);
   504: 
   505:   void handleShiftReduceConflict(
   506:     bool &keepShift, bool &keepReduce, bool &dontWarn,
   507:     ItemSet const *state, Production const *prod, Terminal const *sym);
   508: 
   509:   void resolveConflicts(
   510:     ItemSet const *state,        // parse state in which the actions are possible
   511:     Terminal const *sym,         // lookahead symbol for these actions
   512:     ItemSet const *&shiftDest,   // (inout) if non-NULL, the state to which we can shift
   513:     ProductionList &reductions,  // (inout) list of possible reductions
   514:     bool allowAmbig,             // if false, always return at most 1 action
   515:     bool &printedConflictHeader, // (inout) true once we've printed the state header
   516:     int &sr, int &rr);           // (inout) counts of S/R and R/R conflicts, resp.
   517:   void computeParseTables(bool allowAmbig);
   518: 
   519:   int subsetDirectiveResolution(
   520:     ItemSet const *state,        // parse state in which the actions are possible
   521:     Terminal const *sym,         // lookahead symbol for these actions
   522:     ProductionList &reductions); // list to try to cut down
   523: 
   524:   void renumberStates();
   525:   static int renumberStatesDiff
   526:     (ItemSet const *left, ItemSet const *right, void *vgramanl);
   527:   static int arbitraryProductionOrder
   528:     (Production const *left, Production const *right, void*);
   529:   static int arbitraryRHSEltOrder
   530:     (Production::RHSElt const *left, Production::RHSElt const *right, void*);
   531: 
   532:   void computeBFSTree();
   533: 
   534:   // misc
   535:   void computePredictiveParsingTable();
   536:     // non-const because have to add productions to lists
   537: 
   538:   void topologicalSort(NtIndex *order,  int &nextOrdinal,
   539:                        NtIndex current, BitArray &seen);
   540: 
   541:   // the inverse of transition: map a target state to the symbol that
   542:   // would transition to that state (from the given source state)
   543:   Symbol const *inverseTransitionC(ItemSet const *source,
   544:                                    ItemSet const *target) const;
   545: 
   546:   // sample input helpers
   547:   void leftContext(SymbolList &output, ItemSet const *state) const;
   548:   bool rewriteAsTerminals(TerminalList &output, SymbolList const &input) const;
   549:   bool rewriteAsTerminalsHelper(TerminalList &output, SymbolList const &input,
   550:                                 ProductionList &reductionStack) const;
   551:   bool rewriteSingleNTAsTerminals(TerminalList &output, Nonterminal const *nonterminal,
   552:                                   ProductionList &reductionStack) const;
   553: 
   554:   // let's try this .. it needs to access 'itemSets'
   555:   friend void ItemSet::xferSerfs(Flatten &flat, GrammarAnalysis &g);
   556: 
   557:   void singleItemClosure(OwnerKHashTable<LRItem, DottedProduction> &finished,
   558:                          ArrayStack<LRItem*> &worklist,
   559:                          //OwnerKHashArray<LRItem, DottedProduction> &workhash,
   560:                          LRItem const *item, TerminalSet &scratchSet);
   561: 
   562: public:     // funcs
   563:   GrammarAnalysis();
   564:   ~GrammarAnalysis();
   565: 
   566:   // access symbols by index
   567:   Terminal const *getTerminal(int index) const;
   568:   Nonterminal const *getNonterminal(int index) const;
   569:   Production const *getProduction(int index) const;
   570: 
   571:   ItemSet const *getItemSet(int index) const;
   572:   int numItemSets() const { return nextItemSetId; }
   573: 
   574:   // faster access to counts
   575:   int numTerminals() const { return numTerms; }
   576:   int numNonterminals() const { return numNonterms; }
   577: 
   578:   // binary read/write
   579:   void xfer(Flatten &flat);
   580: 
   581:   // essentially, my 'main()' while experimenting
   582:   void exampleGrammar();
   583: 
   584:   // overrides base class to add a little bit of the
   585:   // annotated info
   586:   void printProductions(std::ostream &os, bool printCode=true) const;
   587: 
   588:   // print lots of stuff
   589:   void printProductionsAndItems(std::ostream &os, bool printCode=true) const;
   590: 
   591:   // when grammar is built, this runs all analyses and stores
   592:   // the results in this object's data fields; write the LR item
   593:   // sets to the given file (or don't, if NULL)
   594:   void runAnalyses(char const *setsFname);
   595: 
   596:   // print the item sets to a stream (optionally include nonkernel items)
   597:   void printItemSets(std::ostream &os, bool nonkernel) const;
   598: 
   599:   // given a grammar, replace all of its actions with actions that
   600:   // will build a straightforward parse tree using the facilities
   601:   // of ptreenode.h; the rules will need the user to already have
   602:   // done some necessary work in the verbatim preamble, such as
   603:   // #including ptreenode.h
   604:   void addTreebuildingActions();
   605: 
   606:   // ---- grammar queries ----
   607:   bool canDerive(Nonterminal const *lhs, Nonterminal const *rhs) const;
   608:   bool canDeriveEmpty(Nonterminal const *lhs) const;
   609: 
   610:   bool firstIncludes(Nonterminal const *NT, Terminal const *term) const;
   611:   bool followIncludes(Nonterminal const *NT, Terminal const *term) const;
   612: 
   613:   // ---- sample inputs and contexts ----
   614:   sm_string sampleInput(ItemSet const *state) const;
   615:   sm_string leftContextString(ItemSet const *state) const;
   616: 
   617:   // ---- moved out of private ----
   618:   void itemSetClosure(ItemSet &itemSet);
   619:   DottedProduction const *getDProdIndex(int prodIndex, int posn) const;
   620: };
   621: 
   622: 
   623: // in gramexpl.cc: interactive grammar experimentation system
   624: void grammarExplorer(GrammarAnalysis &g);
   625: 
   626: 
   627: #endif // __GRAMANL_H
End C section to elk/elk_gramanl.h[1]
Start C section to elk/elk_gramast.ast.gen.h[1 /1 ]
     1: #line 1956 "./lpsrc/elk.pak"
     2: // gramast.ast.gen.h
     3: // *** DO NOT EDIT ***
     4: // generated automatically by astgen, from gramast.ast
     5: 
     6: #ifndef GRAMAST_AST_GEN_H
     7: #define GRAMAST_AST_GEN_H
     8: 
     9: #include "ast_asthelp.h"
    10: 
    11: // fwd decls
    12: class GrammarAST;
    13: class TopForm;
    14: class TF_context;
    15: class TF_verbatim;
    16: class TF_option;
    17: class TF_terminals;
    18: class TF_nonterm;
    19: class TermDecl;
    20: class TermType;
    21: class PrecSpec;
    22: class SpecFunc;
    23: class ProdDecl;
    24: class RHSElt;
    25: class RH_name;
    26: class RH_sm_string;
    27: class RH_prec;
    28: 
    29: 
    30: // *** DO NOT EDIT ***
    31: 
    32: #include "ast_locstr.h"
    33: #include "elk_asockind.h"
    34: 
    35: // *** DO NOT EDIT ***
    36: class GrammarAST {
    37: public:      // data
    38:   ASTList <TopForm > forms;
    39: 
    40: public:      // funcs
    41:   GrammarAST(ASTList <TopForm > *_forms) : forms(_forms) {
    42:      { terms=NULL; firstNT=NULL; };
    43:   }
    44:   ~GrammarAST();
    45: 
    46:   char const *kindName() const { return "GrammarAST"; }
    47: 
    48:   GrammarAST *clone() const;
    49: 
    50:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
    51: 
    52:   public:  TF_terminals *terms;
    53:   public:  TF_nonterm *firstNT;
    54: };
    55: 
    56: 
    57: 
    58: // *** DO NOT EDIT ***
    59: class TopForm {
    60: public:      // data
    61: 
    62: public:      // funcs
    63:   TopForm() {
    64:   }
    65:   virtual ~TopForm();
    66: 
    67:   enum Kind { TF_CONTEXT, TF_VERBATIM, TF_OPTION, TF_TERMINALS, TF_NONTERM, NUM_KINDS };
    68:   virtual Kind kind() const = 0;
    69: 
    70:   static char const * const kindNames[NUM_KINDS];
    71:   char const *kindName() const { return kindNames[kind()]; }
    72: 
    73:   DECL_AST_DOWNCASTS(TF_context, TF_CONTEXT)
    74:   DECL_AST_DOWNCASTS(TF_verbatim, TF_VERBATIM)
    75:   DECL_AST_DOWNCASTS(TF_option, TF_OPTION)
    76:   DECL_AST_DOWNCASTS(TF_terminals, TF_TERMINALS)
    77:   DECL_AST_DOWNCASTS(TF_nonterm, TF_NONTERM)
    78: 
    79:   virtual TopForm *clone() const=0;
    80: 
    81:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
    82: 
    83: };
    84: 
    85: class TF_context : public TopForm {
    86: public:      // data
    87:   LocString body;
    88: 
    89: public:      // funcs
    90:   TF_context(LocString *_body) : TopForm(), body(_body) {
    91:   }
    92:   virtual ~TF_context();
    93: 
    94:   virtual Kind kind() const { return TF_CONTEXT; }
    95:   enum { TYPE_TAG = TF_CONTEXT };
    96: 
    97:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
    98: 
    99:   virtual TF_context *clone() const;
   100: 
   101: };
   102: 
   103: class TF_verbatim : public TopForm {
   104: public:      // data
   105:   bool isImpl;
   106:   LocString code;
   107: 
   108: public:      // funcs
   109:   TF_verbatim(bool _isImpl, LocString *_code) : TopForm(), isImpl(_isImpl), code(_code) {
   110:   }
   111:   virtual ~TF_verbatim();
   112: 
   113:   virtual Kind kind() const { return TF_VERBATIM; }
   114:   enum { TYPE_TAG = TF_VERBATIM };
   115: 
   116:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   117: 
   118:   virtual TF_verbatim *clone() const;
   119: 
   120: };
   121: 
   122: class TF_option : public TopForm {
   123: public:      // data
   124:   LocString name;
   125:   int value;
   126: 
   127: public:      // funcs
   128:   TF_option(LocString *_name, int _value) : TopForm(), name(_name), value(_value) {
   129:   }
   130:   virtual ~TF_option();
   131: 
   132:   virtual Kind kind() const { return TF_OPTION; }
   133:   enum { TYPE_TAG = TF_OPTION };
   134: 
   135:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   136: 
   137:   virtual TF_option *clone() const;
   138: 
   139: };
   140: 
   141: class TF_terminals : public TopForm {
   142: public:      // data
   143:   ASTList <TermDecl > decls;
   144:   ASTList <TermType > types;
   145:   ASTList <PrecSpec > prec;
   146: 
   147: public:      // funcs
   148:   TF_terminals(ASTList <TermDecl > *_decls, ASTList <TermType > *_types, ASTList <PrecSpec > *_prec) : TopForm(), decls(_decls), types(_types), prec(_prec) {
   149:   }
   150:   virtual ~TF_terminals();
   151: 
   152:   virtual Kind kind() const { return TF_TERMINALS; }
   153:   enum { TYPE_TAG = TF_TERMINALS };
   154: 
   155:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   156: 
   157:   virtual TF_terminals *clone() const;
   158: 
   159: };
   160: 
   161: class TF_nonterm : public TopForm {
   162: public:      // data
   163:   LocString name;
   164:   LocString type;
   165:   ASTList <SpecFunc > funcs;
   166:   ASTList <ProdDecl > productions;
   167:   ASTList <LocString > subsets;
   168: 
   169: public:      // funcs
   170:   TF_nonterm(LocString *_name, LocString *_type, ASTList <SpecFunc > *_funcs, ASTList <ProdDecl > *_productions, ASTList <LocString > *_subsets) : TopForm(), name(_name), type(_type), funcs(_funcs), productions(_productions), subsets(_subsets) {
   171:   }
   172:   virtual ~TF_nonterm();
   173: 
   174:   virtual Kind kind() const { return TF_NONTERM; }
   175:   enum { TYPE_TAG = TF_NONTERM };
   176: 
   177:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   178: 
   179:   virtual TF_nonterm *clone() const;
   180: 
   181: };
   182: 
   183: 
   184: 
   185: // *** DO NOT EDIT ***
   186: class TermDecl {
   187: public:      // data
   188:   int code;
   189:   LocString name;
   190:   LocString alias;
   191: 
   192: public:      // funcs
   193:   TermDecl(int _code, LocString *_name, LocString *_alias) : code(_code), name(_name), alias(_alias) {
   194:   }
   195:   ~TermDecl();
   196: 
   197:   char const *kindName() const { return "TermDecl"; }
   198: 
   199:   TermDecl *clone() const;
   200: 
   201:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   202: 
   203: };
   204: 
   205: 
   206: 
   207: // *** DO NOT EDIT ***
   208: class TermType {
   209: public:      // data
   210:   LocString name;
   211:   LocString type;
   212:   ASTList <SpecFunc > funcs;
   213: 
   214: public:      // funcs
   215:   TermType(LocString *_name, LocString *_type, ASTList <SpecFunc > *_funcs) : name(_name), type(_type), funcs(_funcs) {
   216:   }
   217:   ~TermType();
   218: 
   219:   char const *kindName() const { return "TermType"; }
   220: 
   221:   TermType *clone() const;
   222: 
   223:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   224: 
   225: };
   226: 
   227: 
   228: 
   229: // *** DO NOT EDIT ***
   230: class PrecSpec {
   231: public:      // data
   232:   AssocKind kind;
   233:   int prec;
   234:   ASTList <LocString > tokens;
   235: 
   236: public:      // funcs
   237:   PrecSpec(AssocKind _kind, int _prec, ASTList <LocString > *_tokens) : kind(_kind), prec(_prec), tokens(_tokens) {
   238:   }
   239:   ~PrecSpec();
   240: 
   241:   char const *kindName() const { return "PrecSpec"; }
   242: 
   243:   PrecSpec *clone() const;
   244: 
   245:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   246: 
   247: };
   248: 
   249: 
   250: 
   251: // *** DO NOT EDIT ***
   252: class SpecFunc {
   253: public:      // data
   254:   LocString name;
   255:   ASTList <LocString > formals;
   256:   LocString code;
   257: 
   258: public:      // funcs
   259:   SpecFunc(LocString *_name, ASTList <LocString > *_formals, LocString *_code) : name(_name), formals(_formals), code(_code) {
   260:   }
   261:   ~SpecFunc();
   262: 
   263:   char const *kindName() const { return "SpecFunc"; }
   264: 
   265:   SpecFunc *clone() const;
   266: 
   267:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   268: 
   269:   public:  LocString nthFormal(int i) const
   270:     { return *( formals.nthC(i) ); };
   271: };
   272: 
   273: 
   274: 
   275: // *** DO NOT EDIT ***
   276: class ProdDecl {
   277: public:      // data
   278:   ASTList <RHSElt > rhs;
   279:   LocString actionCode;
   280: 
   281: public:      // funcs
   282:   ProdDecl(ASTList <RHSElt > *_rhs, LocString *_actionCode) : rhs(_rhs), actionCode(_actionCode) {
   283:   }
   284:   ~ProdDecl();
   285: 
   286:   char const *kindName() const { return "ProdDecl"; }
   287: 
   288:   ProdDecl *clone() const;
   289: 
   290:   void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   291: 
   292: };
   293: 
   294: 
   295: 
   296: // *** DO NOT EDIT ***
   297: class RHSElt {
   298: public:      // data
   299: 
   300: public:      // funcs
   301:   RHSElt() {
   302:   }
   303:   virtual ~RHSElt();
   304: 
   305:   enum Kind { RH_NAME, RH_STRING, RH_PREC, NUM_KINDS };
   306:   virtual Kind kind() const = 0;
   307: 
   308:   static char const * const kindNames[NUM_KINDS];
   309:   char const *kindName() const { return kindNames[kind()]; }
   310: 
   311:   DECL_AST_DOWNCASTS(RH_name, RH_NAME)
   312:   DECL_AST_DOWNCASTS(RH_sm_string, RH_STRING)
   313:   DECL_AST_DOWNCASTS(RH_prec, RH_PREC)
   314: 
   315:   virtual RHSElt *clone() const=0;
   316: 
   317:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   318: 
   319: };
   320: 
   321: class RH_name : public RHSElt {
   322: public:      // data
   323:   LocString tag;
   324:   LocString name;
   325: 
   326: public:      // funcs
   327:   RH_name(LocString *_tag, LocString *_name) : RHSElt(), tag(_tag), name(_name) {
   328:   }
   329:   virtual ~RH_name();
   330: 
   331:   virtual Kind kind() const { return RH_NAME; }
   332:   enum { TYPE_TAG = RH_NAME };
   333: 
   334:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   335: 
   336:   virtual RH_name *clone() const;
   337: 
   338: };
   339: 
   340: class RH_sm_string : public RHSElt {
   341: public:      // data
   342:   LocString tag;
   343:   LocString str;
   344: 
   345: public:      // funcs
   346:   RH_sm_string(LocString *_tag, LocString *_str) : RHSElt(), tag(_tag), str(_str) {
   347:   }
   348:   virtual ~RH_sm_string();
   349: 
   350:   virtual Kind kind() const { return RH_STRING; }
   351:   enum { TYPE_TAG = RH_STRING };
   352: 
   353:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   354: 
   355:   virtual RH_sm_string *clone() const;
   356: 
   357: };
   358: 
   359: class RH_prec : public RHSElt {
   360: public:      // data
   361:   LocString tokName;
   362: 
   363: public:      // funcs
   364:   RH_prec(LocString *_tokName) : RHSElt(), tokName(_tokName) {
   365:   }
   366:   virtual ~RH_prec();
   367: 
   368:   virtual Kind kind() const { return RH_PREC; }
   369:   enum { TYPE_TAG = RH_PREC };
   370: 
   371:   virtual void debugPrint(std::ostream &os, int indent, char const *subtreeName = "tree") const;
   372: 
   373:   virtual RH_prec *clone() const;
   374: 
   375: };
   376: 
   377: 
   378: 
   379: #endif // GRAMAST_AST_GEN_H
End C section to elk/elk_gramast.ast.gen.h[1]
Start cpp section to elk/elk_grammar.cpp[1 /1 ]
     1: #line 2336 "./lpsrc/elk.pak"
     2: // grammar.cc            see license.txt for copyright and terms of use
     3: // code for grammar.h
     4: 
     5: #include "elk_grammar.h"   // this module
     6: #include "sm_syserr.h"    // xsyserror
     7: #include "sm_strtokp.h"   // StrtokParse
     8: #include "sm_trace.h"     // trace
     9: #include "sm_exc.h"       // xBase
    10: #include "sm_strutil.h"   // quoted, parseQuotedString
    11: #include "sm_flatten.h"   // Flatten
    12: #include "elk_flatutil.h"  // various xfer helpers
    13: 
    14: #include <stdarg.h>    // variable-args stuff
    15: #include <stdio.h>     // FILE, etc.
    16: #include <ctype.h>     // isupper
    17: #include <stdlib.h>    // atoi
    18: 
    19: 
    20: // print a variable value
    21: #define PVAL(var) os << " " << #var "=" << var;
    22: 
    23: 
    24: StringTable grammarStringTable;
    25: 
    26: 
    27: // ---------------------- Symbol --------------------
    28: Symbol::Symbol(LocString const &n, bool t, bool e)
    29:   : name(n),
    30:     isTerm(t),
    31:     isEmptyString(e),
    32:     type(NULL),
    33:     dupParam(NULL),
    34:     dupCode(),
    35:     delParam(NULL),
    36:     delCode(),
    37:     reachable(false)
    38: {}
    39: 
    40: Symbol::~Symbol()
    41: {}
    42: 
    43: 
    44: Symbol::Symbol(Flatten &flat)
    45:   : name(flat),
    46:     isTerm(false),
    47:     isEmptyString(false),
    48:     type(NULL),
    49:     dupParam(NULL),
    50:     delParam(NULL)
    51: {}
    52: 
    53: void Symbol::xfer(Flatten &flat)
    54: {
    55:   // have to break constness to unflatten
    56:   const_cast<LocString&>(name).xfer(flat);
    57:   flat.xferBool(const_cast<bool&>(isTerm));
    58:   flat.xferBool(const_cast<bool&>(isEmptyString));
    59: 
    60:   flattenStrTable->xfer(flat, type);
    61: 
    62:   flattenStrTable->xfer(flat, dupParam);
    63:   dupCode.xfer(flat);
    64: 
    65:   flattenStrTable->xfer(flat, delParam);
    66:   delCode.xfer(flat);
    67: 
    68:   flat.xferBool(reachable);
    69: }
    70: 
    71: 
    72: int Symbol::getTermOrNontermIndex() const
    73: {
    74:   if (isTerminal()) {
    75:     return asTerminalC().termIndex;
    76:   }
    77:   else {
    78:     return asNonterminalC().ntIndex;
    79:   }
    80: }
    81: 
    82: 
    83: void Symbol::print(std::ostream &os) const
    84: {
    85:   os << name;
    86:   if (type) {
    87:     os << "[" << type << "]";
    88:   }
    89:   os << ":";
    90:   PVAL(isTerm);
    91: }
    92: 
    93: 
    94: void Symbol::printDDM(std::ostream &os) const
    95: {
    96:   // don't print anything if no handlers
    97:   if (!anyDDM()) return;
    98: 
    99:   // print with roughly the same syntax as input
   100:   os << "  " << (isTerminal()? "token" : "nonterm");
   101:   if (type) {
   102:     os << "[" << type << "]";
   103:   }
   104:   os << " " << name << " {\n";
   105: 
   106:   internalPrintDDM(os);
   107: 
   108:   os << "  }\n";
   109: }
   110: 
   111: 
   112: void Symbol::internalPrintDDM(std::ostream &os) const
   113: {
   114:   if (dupCode.isNonNull()) {
   115:     os << "    dup(" << dupParam << ") [" << dupCode << "]\n";
   116:   }
   117: 
   118:   if (delCode.isNonNull()) {
   119:     os << "    del(" << (delParam? delParam : "") << ") [" << delCode << "]\n";
   120:   }
   121: }
   122: 
   123: 
   124: bool Symbol::anyDDM() const
   125: {
   126:   return dupCode.isNonNull() ||
   127:          delCode.isNonNull();
   128: }
   129: 
   130: 
   131: Terminal const &Symbol::asTerminalC() const
   132: {
   133:   xassert(isTerminal());
   134:   return (Terminal const &)(*this);
   135: }
   136: 
   137: Nonterminal const &Symbol::asNonterminalC() const
   138: {
   139:   xassert(isNonterminal());
   140:   return (Nonterminal const &)(*this);
   141: }
   142: 
   143: 
   144: Terminal const *Symbol::ifTerminalC() const
   145: {
   146:   return isTerminal()? (Terminal const *)this : NULL;
   147: }
   148: 
   149: Nonterminal const *Symbol::ifNonterminalC() const
   150: {
   151:   return isNonterminal()? (Nonterminal const *)this : NULL;
   152: }
   153: 
   154: 
   155: 
   156: // -------------------- Terminal ------------------------
   157: Terminal::Terminal(Flatten &flat)
   158:   : Symbol(flat),
   159:     alias(flat),
   160:     classifyParam(NULL)
   161: {}
   162: 
   163: void Terminal::xfer(Flatten &flat)
   164: {
   165:   Symbol::xfer(flat);
   166: 
   167:   alias.xfer(flat);
   168: 
   169:   flat.xferInt(precedence);
   170:   flat.xferInt((int&)associativity);
   171: 
   172:   flat.xferInt(termIndex);
   173: 
   174:   flattenStrTable->xfer(flat, classifyParam);
   175:   classifyCode.xfer(flat);
   176: }
   177: 
   178: 
   179: void Terminal::print(std::ostream &os) const
   180: {
   181:   os << "[" << termIndex << "]";
   182:   if (precedence) {
   183:     os << "(" << ::toString(associativity) << " " << precedence << ")";
   184:   }
   185:   os << " ";
   186:   Symbol::print(os);
   187: }
   188: 
   189: 
   190: void Terminal::internalPrintDDM(std::ostream &os) const
   191: {
   192:   Symbol::internalPrintDDM(os);
   193: 
   194:   if (classifyCode.isNonNull()) {
   195:     os << "    classify(" << classifyParam << ") [" << classifyCode << "]\n";
   196:   }
   197: }
   198: 
   199: 
   200: bool Terminal::anyDDM() const
   201: {
   202:   return Symbol::anyDDM() ||
   203:          classifyCode.isNonNull();
   204: }
   205: 
   206: 
   207: sm_string Terminal::toString(bool quoteAliases) const
   208: {
   209:   if (alias.length() > 0) {
   210:     if (quoteAliases) {
   211:       return sm_stringc << "\"" << ::toString(alias) << "\"";
   212:     }
   213:     else {
   214:       return ::toString(alias);
   215:     }
   216:   }
   217:   else {
   218:     return ::toString(name);
   219:   }
   220: }
   221: 
   222: 
   223: // ----------------- Nonterminal ------------------------
   224: Nonterminal::Nonterminal(LocString const &name, bool isEmpty)
   225:   : Symbol(name, false /*terminal*/, isEmpty),
   226:     mergeParam1(NULL),
   227:     mergeParam2(NULL),
   228:     mergeCode(),
   229:     keepParam(NULL),
   230:     keepCode(),
   231:     maximal(false),
   232:     subsets(),
   233:     ntIndex(-1),
   234:     cyclic(false),
   235:     first(0),
   236:     follow(0),
   237:     superset(NULL)
   238: {}
   239: 
   240: Nonterminal::~Nonterminal()
   241: {}
   242: 
   243: 
   244: Nonterminal::Nonterminal(Flatten &flat)
   245:   : Symbol(flat),
   246:     mergeParam1(NULL),
   247:     mergeParam2(NULL),
   248:     keepParam(NULL),
   249:     first(flat),
   250:     follow(flat),
   251:     superset(NULL)
   252: {}
   253: 
   254: void Nonterminal::xfer(Flatten &flat)
   255: {
   256:   Symbol::xfer(flat);
   257: 
   258:   flattenStrTable->xfer(flat, mergeParam1);
   259:   flattenStrTable->xfer(flat, mergeParam2);
   260:   mergeCode.xfer(flat);
   261: 
   262:   flattenStrTable->xfer(flat, keepParam);
   263:   keepCode.xfer(flat);
   264: }
   265: 
   266: void Nonterminal::xferSerfs(Flatten &flat, Grammar &g)
   267: {
   268:   // annotation
   269:   flat.xferInt(ntIndex);
   270:   flat.xferBool(cyclic);
   271:   first.xfer(flat);
   272:   follow.xfer(flat);
   273: }
   274: 
   275: 
   276: void Nonterminal::print(std::ostream &os, Grammar const *grammar) const
   277: {
   278:   os << "[" << ntIndex << "] ";
   279:   Symbol::print(os);
   280: 
   281:   // cyclic?
   282:   if (cyclic) {
   283:     os << " (cyclic!)";
   284:   }
   285: 
   286:   if (grammar) {
   287:     // first
   288:     os << " first={";
   289:     first.print(os, *grammar);
   290:     os << "}";
   291: 
   292:     // follow
   293:     os << " follow=";
   294:     follow.print(os, *grammar);
   295:     os << "}";
   296:   }
   297: }
   298: 
   299: 
   300: void Nonterminal::internalPrintDDM(std::ostream &os) const
   301: {
   302:   Symbol::internalPrintDDM(os);
   303: 
   304:   if (mergeCode.isNonNull()) {
   305:     os << "    merge(" << mergeParam1 << ", " << mergeParam2
   306:        << ") [" << mergeCode << "]\n";
   307:   }
   308: 
   309:   if (keepCode.isNonNull()) {
   310:     os << "    keep(" << keepParam << ") [" << keepCode << "]\n";
   311:   }
   312: }
   313: 
   314: 
   315: bool Nonterminal::anyDDM() const
   316: {
   317:   return Symbol::anyDDM() ||
   318:          mergeCode.isNonNull() ||
   319:          keepCode.isNonNull();
   320: }
   321: 
   322: 
   323: // -------------------- TerminalSet ------------------------
   324: STATICDEF Terminal const *TerminalSet::suppressExcept = NULL;
   325: 
   326: TerminalSet::TerminalSet(int numTerms)
   327: {
   328:   init(numTerms);
   329: }
   330: 
   331: TerminalSet::TerminalSet(TerminalSet const &obj)
   332: {
   333:   init(obj.bitmapLen * 8);    // close enough; same # of bytes at least
   334:   copy(obj);
   335: }
   336: 
   337: void TerminalSet::init(int numTerms)
   338: {
   339:   if (numTerms != 0) {
   340:     // allocate enough space for one bit per terminal; I assume
   341:     // 8 bits per byte
   342:     bitmapLen = (numTerms + 7) / 8;
   343:     bitmap = new unsigned char[bitmapLen];
   344: 
   345:     // initially the set will be empty
   346:     memset(bitmap, 0, bitmapLen);
   347:   }
   348:   else {
   349:     // intended for situations where reset() will be called later
   350:     // to allocate some space
   351:     bitmapLen = 0;
   352:     bitmap = NULL;
   353:   }
   354: }
   355: 
   356: 
   357: TerminalSet::~TerminalSet()
   358: {
   359:   if (bitmap) {
   360:     delete[] bitmap;
   361:   }
   362: }
   363: 
   364: 
   365: TerminalSet::TerminalSet(Flatten&)
   366:   : bitmap(NULL)
   367: {}
   368: 
   369: void TerminalSet::xfer(Flatten &flat)
   370: {
   371:   flat.xferInt(bitmapLen);
   372: 
   373:   if (bitmapLen > 0) {
   374:     if (flat.reading()) {
   375:       bitmap = new unsigned char[bitmapLen];
   376:     }
   377:     flat.xferSimple(bitmap, bitmapLen);
   378:   }
   379: }
   380: 
   381: 
   382: void TerminalSet::reset(int numTerms)
   383: {
   384:   if (bitmap) {
   385:     delete[] bitmap;
   386:   }
   387:   init(numTerms);
   388: }
   389: 
   390: 
   391: unsigned char *TerminalSet::getByte(int id) const
   392: {
   393:   int offset = (unsigned)id / 8;
   394:   xassert(offset < bitmapLen);
   395: 
   396:   return bitmap + offset;
   397: }
   398: 
   399: 
   400: bool TerminalSet::contains(int id) const
   401: {
   402:   unsigned char *p = getByte(id);
   403:   return (*p >> getBit(id)) & 1 == 1;
   404: }
   405: 
   406: 
   407: bool TerminalSet::isEqual(TerminalSet const &obj) const
   408: {
   409:   xassert(obj.bitmapLen == bitmapLen);
   410:   return 0==memcmp(bitmap, obj.bitmap, bitmapLen);
   411: }
   412: 
   413: 
   414: void TerminalSet::add(int id)
   415: {
   416:   unsigned char *p = getByte(id);
   417:   *p |= (unsigned char)(1 << getBit(id));
   418: }
   419: 
   420: 
   421: void TerminalSet::remove(int id)
   422: {
   423:   unsigned char *p = getByte(id);
   424:   *p &= (unsigned char)(~(1 << getBit(id)));
   425: }
   426: 
   427: 
   428: void TerminalSet::clear()
   429: {
   430:   memset(bitmap, 0, bitmapLen);
   431: }
   432: 
   433: 
   434: void TerminalSet::copy(TerminalSet const &obj)
   435: {
   436:   xassert(obj.bitmapLen == bitmapLen);
   437:   memcpy(bitmap, obj.bitmap, bitmapLen);
   438: }
   439: 
   440: 
   441: bool TerminalSet::merge(TerminalSet const &obj)
   442: {
   443:   bool changed = false;
   444:   for (int i=0; i<bitmapLen; i++) {
   445:     unsigned before = bitmap[i];
   446:     unsigned after = before | obj.bitmap[i];
   447:     if (after != before) {
   448:       changed = true;
   449:       bitmap[i] = after;
   450:     }
   451:   }
   452:   return changed;
   453: }
   454: 
   455: 
   456: void TerminalSet::print(std::ostream &os, Grammar const &g) const
   457: {
   458:   int ct=0;
   459:   FOREACH_TERMINAL(g.terminals, iter) {
   460:     Terminal const *t = iter.data();
   461:     if (!contains(t->termIndex)) continue;
   462: 
   463:     if (suppressExcept &&                  // suppressing..
   464:         suppressExcept != t) continue;     // and this isn't the exception
   465: 
   466:     if (ct++ == 0) {
   467:       // by waiting until now to print this, if the set has no symbols
   468:       // (e.g. we're in SLR(1) mode), then the comma won't be printed
   469:       // either
   470:       os << ", ";
   471:     }
   472:     else {
   473:       os << "/";
   474:     }
   475: 
   476:     os << t->toString();
   477:   }
   478: }
   479: 
   480: 
   481: // -------------------- Production::RHSElt -------------------------
   482: Production::RHSElt::~RHSElt()
   483: {}
   484: 
   485: 
   486: Production::RHSElt::RHSElt(Flatten &flat)
   487:   : sym(NULL),
   488:     tag(flat)
   489: {}
   490: 
   491: void Production::RHSElt::xfer(Flatten &flat)
   492: {
   493:   tag.xfer(flat);
   494: }
   495: 
   496: void Production::RHSElt::xferSerfs(Flatten &flat, Grammar &g)
   497: {
   498:   xferSerfPtr(flat, sym);
   499: }
   500: 
   501: 
   502: 
   503: // -------------------- Production -------------------------
   504: Production::Production(Nonterminal *L, char const *Ltag)
   505:   : left(L),
   506:     right(),
   507:     precedence(0),
   508:     rhsLen(-1),
   509:     prodIndex(-1),
   510:     firstSet(0)       // don't allocate bitmap yet
   511: {}
   512: 
   513: Production::~Production()
   514: {}
   515: 
   516: 
   517: Production::Production(Flatten &flat)
   518:   : left(NULL),
   519:     action(flat),
   520:     firstSet(flat)
   521: {}
   522: 
   523: void Production::xfer(Flatten &flat)
   524: {
   525:   xferObjList(flat, right);
   526:   action.xfer(flat);
   527:   flat.xferInt(precedence);
   528: 
   529:   flat.xferInt(rhsLen);
   530:   flat.xferInt(prodIndex);
   531:   firstSet.xfer(flat);
   532: }
   533: 
   534: void Production::xferSerfs(Flatten &flat, Grammar &g)
   535: {
   536:   // must break constness in xfer
   537: 
   538:   xferSerfPtrToList(flat, const_cast<Nonterminal*&>(left),
   539:                           g.nonterminals);
   540: 
   541:   // xfer right's 'sym' pointers
   542:   MUTATE_EACH_OBJLIST(RHSElt, right, iter) {
   543:     iter.data()->xferSerfs(flat, g);
   544:   }
   545: 
   546:   // compute derived data
   547:   if (flat.reading()) {
   548:     computeDerived();
   549:   }
   550: }
   551: 
   552: 
   553: #if 0   // optimized away, using 'rhsLen' instead
   554: int Production::rhsLength() const
   555: {
   556:   if (!right.isEmpty()) {
   557:     // I used to have code here which handled this situation by returning 0;
   558:     // since it should now never happen, I'll check that here instead
   559:     xassert(!right.nthC(0)->sym->isEmptyString);
   560:   }
   561: 
   562:   return right.count();
   563: }
   564: #endif // 0
   565: 
   566: 
   567: #if 0    // useful for verifying 'finish' is called before rhsLen
   568: int Production::rhsLength() const
   569: {
   570:   xassert(rhsLen != -1);     // otherwise 'finish' wasn't called
   571:   return rhsLen;
   572: }
   573: #endif // 0
   574: 
   575: 
   576: int Production::numRHSNonterminals() const
   577: {
   578:   int ct = 0;
   579:   FOREACH_OBJLIST(RHSElt, right, iter) {
   580:     if (iter.data()->sym->isNonterminal()) {
   581:       ct++;
   582:     }
   583:   }
   584:   return ct;
   585: }
   586: 
   587: 
   588: bool Production::rhsHasSymbol(Symbol const *sym) const
   589: {
   590:   FOREACH_OBJLIST(RHSElt, right, iter) {
   591:     if (iter.data()->sym == sym) {
   592:       return true;
   593:     }
   594:   }
   595:   return false;
   596: }
   597: 
   598: 
   599: void Production::getRHSSymbols(SymbolList &output) const
   600: {
   601:   FOREACH_OBJLIST(RHSElt, right, iter) {
   602:     output.append(iter.data()->sym);
   603:   }
   604: }
   605: 
   606: 
   607: void Production::append(Symbol *sym, LocString const &tag)
   608: {
   609:   // my new design decision (6/26/00 14:24) is to disallow the
   610:   // emptyString nonterminal from explicitly appearing in the
   611:   // productions
   612:   xassert(!sym->isEmptyString);
   613: 
   614:   right.append(new RHSElt(sym, tag));
   615: }
   616: 
   617: 
   618: void Production::finished(int numTerms)
   619: {
   620:   computeDerived();
   621:   firstSet.reset(numTerms);
   622: }
   623: 
   624: void Production::computeDerived()
   625: {
   626:   rhsLen = right.count();
   627: }
   628: 
   629: 
   630: // basically strcmp but without the segfaults when s1 or s2
   631: // is null; return true if sm_strings are equal
   632: // update: now that they're StringRef, simple "==" suffices
   633: bool tagCompare(StringRef s1, StringRef s2)
   634: {
   635:   return s1 == s2;
   636: }
   637: 
   638: 
   639: int Production::findTag(StringRef tag) const
   640: {
   641:   // walk RHS list looking for a match
   642:   ObjListIter<RHSElt> tagIter(right);
   643:   int index=1;
   644:   for(; !tagIter.isDone(); tagIter.adv(), index++) {
   645:     if (tagCompare(tagIter.data()->tag, tag)) {
   646:       return index;
   647:     }
   648:   }
   649: 
   650:   // not found
   651:   return -1;
   652: }
   653: 
   654: 
   655: // assemble a possibly tagged name for printing
   656: sm_string taggedName(char const *name, char const *tag)
   657: {
   658:   if (tag == NULL || tag[0] == 0) {
   659:     return sm_string(name);
   660:   }
   661:   else {
   662:     return sm_stringb(tag << ":" << name);
   663:   }
   664: }
   665: 
   666: 
   667: sm_string Production::symbolTag(int index) const
   668: {
   669:   // no longer have tags for LHS
   670:   xassert(index != 0);
   671: 
   672:   // find index in RHS list
   673:   index--;
   674:   return sm_string(right.nthC(index)->tag);
   675: }
   676: 
   677: 
   678: Symbol const *Production::symbolByIndexC(int index) const
   679: {
   680:   // check LHS
   681:   if (index == 0) {
   682:     return left;
   683:   }
   684: 
   685:   // find index in RHS list
   686:   index--;
   687:   return right.nthC(index)->sym;
   688: }
   689: 
   690: 
   691: #if 0
   692: DottedProduction const *Production::getDProdC(int dotPlace) const
   693: {
   694:   xassert(0 <= dotPlace && dotPlace < numDotPlaces);
   695:   return &dprods[dotPlace];
   696: }
   697: #endif // 0
   698: 
   699: 
   700: void Production::print(std::ostream &os) const
   701: {
   702:   os << toString();
   703: }
   704: 
   705: 
   706: sm_string Production::toString(bool printType, bool printIndex) const
   707: {
   708:   // LHS "->" RHS
   709:   sm_stringBuilder sb;
   710:   if (printIndex) {
   711:     sb << "[" << prodIndex << "] ";
   712:   }
   713: 
   714:   sb << left->name;
   715:   if (printType && left->type) {
   716:     sb << "[" << left->type << "]";
   717:   }
   718:   sb << " -> " << rhsString();
   719: 
   720:   if (printType && precedence) {
   721:     // take this as licence to print prec too
   722:     sb << " %prec(" << precedence << ")";
   723:   }
   724:   return sb;
   725: }
   726: 
   727: 
   728: sm_string Production::rhsString(bool printTags, bool quoteAliases) const
   729: {
   730:   sm_stringBuilder sb;
   731: 
   732:   if (right.isNotEmpty()) {
   733:     // print the RHS symbols
   734:     int ct=0;
   735:     FOREACH_OBJLIST(RHSElt, right, iter) {
   736:       RHSElt const &elt = *(iter.data());
   737: 
   738:       if (ct++ > 0) {
   739:         sb << " ";
   740:       }
   741: 
   742:       sm_string symName;
   743:       if (elt.sym->isNonterminal()) {
   744:         symName = elt.sym->name;
   745:       }
   746:       else {
   747:         // print terminals as aliases if possible
   748:         symName = elt.sym->asTerminalC().toString(quoteAliases);
   749:       }
   750: 
   751:       if (printTags) {
   752:         // print tag if present
   753:         sb << taggedName(symName, elt.tag);
   754:       }
   755:       else {
   756:         sb << symName;
   757:       }
   758:     }
   759:   }
   760: 
   761:   else {
   762:     // empty RHS
   763:     sb << "empty";
   764:   }
   765: 
   766:   return sb;
   767: }
   768: 
   769: 
   770: sm_string Production::toStringMore(bool printCode) const
   771: {
   772:   sm_stringBuilder sb;
   773:   sb << toString();
   774: 
   775:   if (printCode && !action.isNull()) {
   776:     sb << "\t\t[" << action.strref() << "]";
   777:   }
   778: 
   779:   sb << "\n";
   780: 
   781:   return sb;
   782: }
   783: 
   784: 
   785: // ------------------ Grammar -----------------
   786: Grammar::Grammar()
   787:   : startSymbol(NULL),
   788:     emptyString(LocString(HERE_SOURCELOC, "empty"),
   789:                 true /*isEmptyString*/),
   790:     targetLang("C++"),
   791:     useGCDefaults(false),
   792:     defaultMergeAborts(false),
   793:     expectedSR(-1),
   794:     expectedRR(-1),
   795:     expectedUNRNonterms(-1),
   796:     expectedUNRTerms(-1)
   797: {}
   798: 
   799: 
   800: Grammar::~Grammar()
   801: {}
   802: 
   803: 
   804: void Grammar::xfer(Flatten &flat)
   805: {
   806:   // owners
   807:   flat.checkpoint(0xC7AB4D86);
   808:   xferObjList(flat, nonterminals);
   809:   xferObjList(flat, terminals);
   810:   xferObjList(flat, productions);
   811: 
   812:   // emptyString is const
   813: 
   814:   xferObjList(flat, verbatim);
   815: 
   816:   actionClassName.xfer(flat);
   817:   xferObjList(flat, actionClasses);
   818: 
   819:   xferObjList(flat, implVerbatim);
   820: 
   821:   targetLang.xfer(flat);
   822:   flat.xferBool(useGCDefaults);
   823:   flat.xferBool(defaultMergeAborts);
   824: 
   825:   flat.xferInt(expectedSR);
   826:   flat.xferInt(expectedRR);
   827:   flat.xferInt(expectedUNRNonterms);
   828:   flat.xferInt(expectedUNRTerms);
   829: 
   830:   // serfs
   831:   flat.checkpoint(0x8580AAD2);
   832: 
   833:   MUTATE_EACH_OBJLIST(Nonterminal, nonterminals, nt) {
   834:     nt.data()->xferSerfs(flat, *this);
   835:   }
   836:   MUTATE_EACH_OBJLIST(Production, productions, p) {
   837:     p.data()->xferSerfs(flat, *this);
   838:   }
   839: 
   840:   xferSerfPtrToList(flat, startSymbol, nonterminals);
   841: 
   842:   flat.checkpoint(0x2874DB95);
   843: }
   844: 
   845: 
   846: int Grammar::numTerminals() const
   847: {
   848:   return terminals.count();
   849: }
   850: 
   851: int Grammar::numNonterminals() const
   852: {
   853:   // everywhere, we regard emptyString as a nonterminal
   854:   return nonterminals.count() + 1;
   855: }
   856: 
   857: 
   858: void Grammar::printSymbolTypes(std::ostream &os) const
   859: {
   860:   os << "Grammar terminals with types or precedence:\n";
   861:   FOREACH_OBJLIST(Terminal, terminals, term) {
   862:     Terminal const &t = *(term.data());
   863:     t.printDDM(os);
   864:     if (t.precedence) {
   865:       os << "  " << t.name << " " << ::toString(t.associativity)
   866:          << " %prec " << t.precedence << std::endl;
   867:     }
   868:   }
   869: 
   870:   os << "Grammar nonterminals with types:\n";
   871:   FOREACH_OBJLIST(Nonterminal, nonterminals, nt) {
   872:     nt.data()->printDDM(os);
   873:   }
   874: }
   875: 
   876: 
   877: void Grammar::printProductions(std::ostream &os, bool code) const
   878: {
   879:   os << "Grammar productions:\n";
   880:   for (ObjListIter<Production> iter(productions);
   881:        !iter.isDone(); iter.adv()) {
   882:     os << "  " << iter.data()->toStringMore(code);
   883:   }
   884: }
   885: 
   886: 
   887: #if 0
   888: void Grammar::addProduction(Nonterminal *lhs, Symbol *firstRhs, ...)
   889: {
   890:   va_list argptr;                   // state for working through args
   891:   Symbol *arg;
   892:   va_start(argptr, firstRhs);       // initialize 'argptr'
   893: 
   894:   Production *prod = new Production(lhs, NULL /*tag*/);
   895:   prod->append(firstRhs, NULL /*tag*/);
   896:   for(;;) {
   897:     arg = va_arg(argptr, Symbol*);  // get next argument
   898:     if (arg == NULL) {
   899:       break;    // end of list
   900:     }
   901: 
   902:     prod->append(arg, NULL /*tag*/);
   903:   }
   904: 
   905:   addProduction(prod);
   906: }
   907: #endif // 0
   908: 
   909: 
   910: void Grammar::addProduction(Production *prod)
   911: {
   912:   // I used to add emptyString if there were 0 RHS symbols,
   913:   // but I've now switched to not explicitly saying that
   914: 
   915:   prod->prodIndex = productions.count();
   916:   productions.append(prod);
   917: 
   918:   // if the start symbol isn't defined yet, we can here
   919:   // implement the convention that the LHS of the first
   920:   // production is the start symbol
   921:   if (startSymbol == NULL) {
   922:     startSymbol = prod->left;
   923:   }
   924: }
   925: 
   926: 
   927: // add a token to those we know about
   928: bool Grammar::declareToken(LocString const &symbolName, int code,
   929:                            LocString const &alias)
   930: {
   931:   // verify that this token hasn't been declared already
   932:   if (findSymbolC(symbolName)) {
   933:     std::cout << "token " << symbolName << " has already been declared\n";
   934:     return false;
   935:   }
   936: 
   937:   // create a new terminal class
   938:   Terminal *term = getOrMakeTerminal(symbolName);
   939: 
   940:   // assign fields specified in %token declaration
   941:   term->termIndex = code;
   942:   term->alias = alias;
   943: 
   944:   return true;
   945: }
   946: 
   947: 
   948: // well-formedness check
   949: void Grammar::checkWellFormed() const
   950: {
   951:   // after removing some things, now there's nothing to check...
   952: }
   953: 
   954: 
   955: // syntax for identifying tokens in Bison output
   956: sm_string bisonTokenName(Terminal const *t)
   957: {
   958:   // this worked with older versions of Bison
   959:   //return sm_stringc << "\"" << t->name << "\"";
   960: 
   961:   // but the newer ones don't like quoted terminal names..
   962:   return sm_string(t->name.str);
   963: }
   964: 
   965: // print the grammar in a form that Bison likes
   966: void Grammar::printAsBison(std::ostream &os) const
   967: {
   968:   os << "/* automatically generated grammar */\n\n";
   969: 
   970:   os << "/* -------- tokens -------- */\n";
   971:   FOREACH_TERMINAL(terminals, term) {
   972:     // I'll surround all my tokens with quotes and see how Bison likes it
   973:     // TODO: the latest bison does *not* like it!
   974:     os << "%token " << bisonTokenName(term.data()) << " "
   975:        << term.data()->termIndex << "\n";
   976:   }
   977:   os << "\n\n";
   978: 
   979:   os << "/* -------- precedence and associativity ---------*/\n"
   980:         "/* low precedence */\n";
   981:   {
   982:     // first, compute the highest precedence used anywhere in the grammar
   983:     int highMark=0;
   984:     FOREACH_TERMINAL(terminals, iter) {
   985:       highMark = max(iter.data()->precedence, highMark);
   986:     }
   987: 
   988:     // map AssocKind to bison declaration; map stuff bison doesn't
   989:     // have to %nonassoc
   990:     static char const * const kindMap[NUM_ASSOC_KINDS] =
   991:       { "%left", "%right", "%nonassoc", "%nonassoc", "%nonassoc" };
   992: 
   993:     // now iterate over the precedence levels (level 0 is skipped
   994:     // because it means 'unspecified')
   995:     for (int level=1; level <= highMark; level++) {
   996:       AssocKind kind = NUM_ASSOC_KINDS;   // means we haven't seen any kind yet
   997:       FOREACH_TERMINAL(terminals, iter) {
   998:         Terminal const *t = iter.data();
   999: 
  1000:         if (t->precedence == level) {
  1001:           if (kind == NUM_ASSOC_KINDS) {
  1002:             // first token at this level
  1003:             kind = t->associativity;
  1004:             os << kindMap[kind];
  1005:           }
  1006:           else if (kind != t->associativity) {
  1007:             xfailure("different associativities at same precedence?!");
  1008:           }
  1009: 
  1010:           // print the token itself
  1011:           os << " " << bisonTokenName(t);
  1012:         }
  1013:       }
  1014: 
  1015:       // end of the level
  1016:       os << "\n";
  1017:     }
  1018:   }
  1019:   os << "/* high precedence */\n"
  1020:         "\n\n";
  1021: 
  1022:   os << "/* -------- productions ------ */\n"
  1023:         "%%\n\n";
  1024:   // print every nonterminal's rules
  1025:   FOREACH_NONTERMINAL(nonterminals, nt) {
  1026:     // look at every rule where this nonterminal is on LHS
  1027:     bool first = true;
  1028:     FOREACH_PRODUCTION(productions, prod) {
  1029:       if (prod.data()->left == nt.data()) {
  1030: 
  1031:         if (first) {
  1032:           os << nt.data()->name << ":";
  1033:         }
  1034:         else {
  1035:           os << "\n";
  1036:           INTLOOP(i, 0, nt.data()->name.length()) {
  1037:             os << " ";
  1038:           }
  1039:           os << "|";
  1040:         }
  1041: 
  1042:         // print RHS symbols
  1043:         FOREACH_OBJLIST(Production::RHSElt, prod.data()->right, symIter) {
  1044:           Symbol const *sym = symIter.data()->sym;
  1045:           if (sym != &emptyString) {
  1046:             if (sym->isTerminal()) {
  1047:               os << " " << bisonTokenName(&( sym->asTerminalC() ));
  1048:             }
  1049:             else {
  1050:               os << " " << sym->name;
  1051:             }
  1052:           }
  1053:         }
  1054: 
  1055:         // or, if empty..
  1056:         if (prod.data()->rhsLength() == 0) {
  1057:           os << " /* empty */";
  1058:         }
  1059: 
  1060:         // precedence?
  1061:         if (prod.data()->precedence) {
  1062:           // search for a terminal with the required precedence level
  1063:           bool found=false;
  1064:           FOREACH_TERMINAL(terminals, iter) {
  1065:             if (iter.data()->precedence == prod.data()->precedence) {
  1066:               // found suitable token
  1067:               os << " %prec " << bisonTokenName(iter.data());
  1068:               found = true;
  1069:               break;
  1070:             }
  1071:           }
  1072:           if (!found) {
  1073:             std::cout << "warning: cannot find token for precedence level "
  1074:                  << prod.data()->precedence << std::endl;
  1075:             os << " /* no token precedence level "/* */
  1076:                << prod.data()->precedence << " */";
  1077:           }
  1078:         }
  1079: 
  1080:         // dummy action to help while debugging
  1081:         os << " { $$=" << prod.data()->prodIndex << "; }";
  1082: 
  1083:         first = false;
  1084:       }
  1085:     }
  1086: 
  1087:     if (first) {
  1088:       // no rules..
  1089:       os << "/* no rules for " << nt.data()->name << " */";
  1090:     }
  1091:     else {
  1092:       // finish the rules with a semicolon
  1093:       os << "\n";
  1094:       INTLOOP(i, 0, nt.data()->name.length()) {
  1095:         os << " ";
  1096:       }
  1097:       os << ";";
  1098:     }
  1099: 
  1100:     os << "\n\n";
  1101:   }
  1102: }
  1103: 
  1104: 
  1105: 
  1106: // ------------------- symbol access -------------------
  1107: Nonterminal const *Grammar::findNonterminalC(char const *name) const
  1108: {
  1109:   // check for empty first, since it's not in the list
  1110:   if (emptyString.name.equals(name)) {
  1111:     return &emptyString;
  1112:   }
  1113: 
  1114:   FOREACH_NONTERMINAL(nonterminals, iter) {
  1115:     if (iter.data()->name.equals(name)) {
  1116:       return iter.data();
  1117:     }
  1118:   }
  1119:   return NULL;
  1120: }
  1121: 
  1122: 
  1123: Terminal const *Grammar::findTerminalC(char const *name) const
  1124: {
  1125:   FOREACH_TERMINAL(terminals, iter) {
  1126:     if (iter.data()->name.equals(name) ||
  1127:         iter.data()->alias.equals(name)) {
  1128:       return iter.data();
  1129:     }
  1130:   }
  1131:   return NULL;
  1132: }
  1133: 
  1134: 
  1135: Symbol const *Grammar::findSymbolC(char const *name) const
  1136: {
  1137:   // try nonterminals
  1138:   Nonterminal const *nt = findNonterminalC(name);
  1139:   if (nt) {
  1140:     return nt;
  1141:   }
  1142: 
  1143:   // now try terminals; if it fails, we fail
  1144:   return findTerminalC(name);
  1145: }
  1146: 
  1147: 
  1148: 
  1149: Nonterminal *Grammar::getOrMakeNonterminal(LocString const &name)
  1150: {
  1151:   Nonterminal *nt = findNonterminal(name);
  1152:   if (nt != NULL) {
  1153:     return nt;
  1154:   }
  1155: 
  1156:   nt = new Nonterminal(name);
  1157:   nonterminals.append(nt);
  1158:   return nt;
  1159: }
  1160: 
  1161: Terminal *Grammar::getOrMakeTerminal(LocString const &name)
  1162: {
  1163:   Terminal *term = findTerminal(name);
  1164:   if (term != NULL) {
  1165:     return term;
  1166:   }
  1167: 
  1168:   term = new Terminal(name);
  1169:   terminals.append(term);
  1170:   return term;
  1171: }
  1172: 
  1173: Symbol *Grammar::getOrMakeSymbol(LocString const &name)
  1174: {
  1175:   Symbol *sym = findSymbol(name);
  1176:   if (sym != NULL) {
  1177:     return sym;
  1178:   }
  1179: 
  1180:   // Since name is not already defined, we don't know whether
  1181:   // it will be a nonterminal or a terminal.  For now, I will
  1182:   // use the lexical convention that nonterminals are
  1183:   // capitalized and terminals are not.
  1184:   if (isupper(name[0])) {
  1185:     return getOrMakeNonterminal(name);
  1186:   }
  1187:   else {
  1188:     return getOrMakeTerminal(name);
  1189:   }
  1190: }
  1191: 
  1192: 
  1193: int Grammar::getProductionIndex(Production const *prod) const
  1194: {
  1195:   int ret = productions.indexOf(prod);
  1196:   xassert(ret != -1);
  1197:   return ret;
  1198: }
  1199: 
  1200: 
  1201: sm_string symbolSequenceToString(SymbolList const &list)
  1202: {
  1203:   sm_stringBuilder sb;   // collects output
  1204: 
  1205:   bool first = true;
  1206:   SFOREACH_SYMBOL(list, sym) {
  1207:     if (!first) {
  1208:       sb << " ";
  1209:     }
  1210: 
  1211:     if (sym.data()->isTerminal()) {
  1212:       sb << sym.data()->asTerminalC().toString();
  1213:     }
  1214:     else {
  1215:       sb << sym.data()->name;
  1216:     }
  1217:     first = false;
  1218:   }
  1219: 
  1220:   return sb;
  1221: }
  1222: 
  1223: 
  1224: sm_string terminalSequenceToString(TerminalList const &list)
  1225: {
  1226:   // this works because access is read-only
  1227:   return symbolSequenceToString(reinterpret_cast<SymbolList const&>(list));
  1228: }
  1229: 
  1230: 
  1231: // ------------------ emitting C++ code ---------------------
  1232: #if 0     // not done
  1233: void Grammar::emitSelfCC(std::ostream &os) const
  1234: {
  1235:   os << "void buildGrammar(Grammar *g)\n"
  1236:         "{\n";
  1237: 
  1238:   FOREACH_OBJLIST(Terminal, terminals, termIter) {
  1239:     Terminal const *term = termIter.data();
  1240: 
  1241:     os << "g->declareToken(" << term->name
  1242:        << ", " << term->termIndex
  1243:        << ", " << quoted(term->alias)
  1244:        << ");\n";
  1245:   }
  1246: 
  1247:   FOREACH_OBJLIST(Nonterminal, nonterminals, ntIter) {
  1248:     Nonterminal const *nt = ntIter.data();
  1249: 
  1250:     os << ...
  1251:   }
  1252: 
  1253:   os << "}\n";
  1254: 
  1255:   // todo: more
  1256: }
  1257: #endif // 0
End cpp section to elk/elk_grammar.cpp[1]
Start C section to elk/elk_grammar.h[1 /1 ]
     1: #line 3594 "./lpsrc/elk.pak"
     2: // grammar.h            see license.txt for copyright and terms of use
     3: // representation and algorithms for context-free grammars
     4: 
     5: // Author: Scott McPeak, April 2000
     6: 
     7: // Unfortunately, representation and algorithm tend to get
     8: // mixed together.  Separating them entirely is possible,
     9: // but syntactically inconvenient.  So, instead, I try to
    10: // document the separation in comments.  Specifically,
    11: // sections beginning with ---- representation ---- are data
    12: // for representation of the underlying concept, while
    13: // sections with ---- annotation ---- are data created by
    14: // algorithms manipulating the data.
    15: 
    16: // Another measure is I've split all grammar-wide algorithm
    17: // stuff into GrammarAnalysis (gramanl.h).  Things should
    18: // only be put into Grammar if they are directly related
    19: // to the grammar representation.  (However, constitutent
    20: // objects like Production will continue to be a mix.)
    21: 
    22: #ifndef __GRAMMAR_H
    23: #define __GRAMMAR_H
    24: 
    25: #include <iostream>    // std::ostream
    26: 
    27: #include "sm_str.h"
    28: #include "sm_objlist.h"
    29: #include "sm_sobjlist.h"
    30: #include "elk_util.h"
    31: #include "ast_locstr.h"
    32: #include "sm_strobjdict.h"
    33: #include "sm_owner.h"
    34: #include "elk_asockind.h"
    35: 
    36: class StrtokParse;       // strtokp.h
    37: 
    38: // fwds defined below
    39: class Symbol;
    40: class Terminal;
    41: class Nonterminal;
    42: class Production;
    43: class DottedProduction;
    44: class Grammar;
    45: 
    46: // transitional definitions
    47: typedef StringObjDict<LocString> LitCodeDict;
    48: typedef LocString LiteralCode;
    49: 
    50: 
    51: // everywhere in the Grammar specification we have a StringRef, it
    52: // refers to this sm_string table
    53: extern StringTable grammarStringTable;
    54: 
    55: 
    56: // ---------------- Symbol --------------------
    57: // either a nonterminal or terminal symbol
    58: class Symbol {
    59: // ------ representation ------
    60: public:
    61:   LocString const name;     // symbol's name in grammar
    62:   bool const isTerm;        // true: terminal (only on right-hand sides of productions)
    63:                             // false: nonterminal (can appear on left-hand sides)
    64:   bool const isEmptyString; // true only for the emptyString nonterminal
    65: 
    66:   StringRef type;           // C type of semantic value
    67: 
    68:   StringRef dupParam;       // name of parameter to 'dup'
    69:   LocString dupCode;        // code to duplicate a semantic value
    70: 
    71:   StringRef delParam;       // param name; may be NULL to indicate not used
    72:   LocString delCode;        // code
    73: 
    74: // ----------- annotation ------------
    75: public:
    76:   bool reachable;           // computed by constructLRItemSets; true when nonterminal reachable from start symbol
    77: 
    78: protected:  // funcs
    79:   virtual void internalPrintDDM(std::ostream &os) const;
    80: 
    81: public:      // funcs
    82:   Symbol(LocString const &n, bool t, bool e = false);
    83:   virtual ~Symbol();
    84: 
    85:   Symbol(Flatten&);
    86:   void xfer(Flatten &flat);
    87: 
    88:   // symmetric selectors
    89:   bool isTerminal() const { return isTerm; }
    90:   bool isNonterminal() const { return !isTerm; }
    91: 
    92:   // both terminals and nonterminals have ids; this gets the
    93:   // id for whichever kind this object happens to be
    94:   int getTermOrNontermIndex() const;
    95: 
    96:   // casting
    97:   Terminal const &asTerminalC() const;       // checks 'isTerminal' for cast safety
    98:   Terminal &asTerminal()
    99:     { return const_cast<Terminal&>(asTerminalC()); }
   100: 
   101:   Nonterminal const &asNonterminalC() const;
   102:   Nonterminal &asNonterminal()
   103:     { return const_cast<Nonterminal&>(asNonterminalC()); }
   104: 
   105:   // cast or NULL
   106:   Terminal const *ifTerminalC() const;
   107:   Terminal *ifTerminal()
   108:     { return const_cast<Terminal*>(ifTerminalC()); }
   109: 
   110:   Nonterminal const *ifNonterminalC() const;
   111:   Nonterminal *ifNonterminal()
   112:     { return const_cast<Nonterminal*>(ifNonterminalC()); }
   113: 
   114:   // debugging
   115:   // print as '$name: isTerminal=$isTerminal' (no newline)
   116:   virtual void print(std::ostream &os) const;
   117:   OSTREAM_OPERATOR(Symbol)
   118: 
   119:   // print 'token[type] name { dup.. del.. merge.. }' (with newlines)
   120:   void printDDM(std::ostream &os) const;
   121: 
   122:   // true if any of the handlers were specified
   123:   virtual bool anyDDM() const;
   124: 
   125:   virtual sm_string toString() const { return sm_string(name); }
   126: };
   127: 
   128: // I have several needs for serf lists of symbols, so let's use this for now
   129: typedef SObjList<Symbol> SymbolList;
   130: typedef SObjListIter<Symbol> SymbolListIter;
   131: typedef SObjListMutator<Symbol> SymbolListMutator;
   132: 
   133: #define FOREACH_SYMBOL(list, iter) FOREACH_OBJLIST(Symbol, list, iter)
   134: #define MUTATE_EACH_SYMBOL(list, iter) MUTATE_EACH_OBJLIST(Symbol, list, iter)
   135: #define SFOREACH_SYMBOL(list, iter) SFOREACH_OBJLIST(Symbol, list, iter)
   136: #define SMUTATE_EACH_SYMBOL(list, iter) SMUTATE_EACH_OBJLIST(Symbol, list, iter)
   137: 
   138: // format: "s1 s2 s3"
   139: sm_string symbolSequenceToString(SymbolList const &list);
   140: 
   141: 
   142: // ---------------- Terminal --------------------
   143: // something that only appears on the right-hand side of
   144: // productions, and is an element of the source language
   145: // NOTE:  This is really a terminal *class*, in that it's possible
   146: // for several different tokens to be classified into the same
   147: // terminal class (e.g. "foo" and "bar" are both identifiers)
   148: class Terminal : public Symbol {
   149: // -------- representation ---------
   150: public:     // data
   151:   // whereas 'name' is the canonical name for the terminal class,
   152:   // this field is an alias; for example, if the canonical name is
   153:   // L2_EQUALEQUAL, the alias might be "=="; the alias should *not*
   154:   // include actual double-quote characters
   155:   // if the alias is "", there is no alias
   156:   LocString alias;
   157: 
   158:   // parsgen-time conflict resolution: if a shift/reduce conflict
   159:   // occurs between a production and a symbol, both with specified
   160:   // precedence (not 0), then the one with the numerically higher
   161:   // precedence will be used
   162:   int precedence;
   163: 
   164:   // if, in the above scenario, the precedence values are the same,
   165:   // then the associativity kind will be used to decide which to use
   166:   AssocKind associativity;
   167: 
   168:   StringRef classifyParam;      // name of parameter to 'classify'
   169:   LocString classifyCode;       // code to reclassify a token type
   170: 
   171: // ------ annotation ------
   172: public:     // data
   173:   // terminal class index - this terminal's id; -1 means unassigned
   174:   int termIndex;
   175: 
   176: protected:  // funcs
   177:   virtual void internalPrintDDM(std::ostream &os) const;
   178: 
   179: public:     // funcs
   180:   Terminal(LocString const &name)        // canonical name for terminal class
   181:     : Symbol(name, true /*terminal*/),
   182:       alias(),
   183:       precedence(0),
   184:       associativity(AK_NONASSOC),
   185:       classifyParam(NULL),
   186:       termIndex(-1)
   187:   {}
   188: 
   189:   Terminal(Flatten &flat);
   190:   void xfer(Flatten &flat);
   191: 
   192:   virtual void print(std::ostream &os) const;
   193:   OSTREAM_OPERATOR(Terminal)
   194: 
   195:   virtual bool anyDDM() const;
   196: 
   197:   // return alias if defined, name otherwise
   198:   virtual sm_string toString(bool quoteAliases = false) const;
   199: };
   200: 
   201: typedef SObjList<Terminal> TerminalList;
   202: typedef SObjListIter<Terminal> TerminalListIter;
   203: 
   204: #define FOREACH_TERMINAL(list, iter) FOREACH_OBJLIST(Terminal, list, iter)
   205: #define MUTATE_EACH_TERMINAL(list, iter) MUTATE_EACH_OBJLIST(Terminal, list, iter)
   206: #define SFOREACH_TERMINAL(list, iter) SFOREACH_OBJLIST(Terminal, list, iter)
   207: #define SMUTATE_EACH_TERMINAL(list, iter) SMUTATE_EACH_OBJLIST(Terminal, list, iter)
   208: 
   209: // casting aggregates
   210: inline ObjList<Symbol> const &toObjList(ObjList<Terminal> const &list)
   211:   { return reinterpret_cast< ObjList<Symbol>const& >(list); }
   212: 
   213: // format: "t1 t2 t3"
   214: sm_string terminalSequenceToString(TerminalList const &list);
   215: 
   216: 
   217: // ----------------- TerminalSet -------------------
   218: // used for the lookahead sets of LR items, and for the First()
   219: // sets of production RHSs
   220: class TerminalSet {
   221: private:    // data
   222:   unsigned char *bitmap;      // (owner) bitmap of terminals, indexed by
   223:                               // terminal id; lsb of byte 0 is index 0
   224:   int bitmapLen;              // # of bytes in 'bitmap'
   225: 
   226: public:     // data
   227:   // printing customization: when non-NULL only print tokens if
   228:   // it includes this token, and then *only* print this one
   229:   static Terminal const *suppressExcept;
   230: 
   231: private:    // funcs
   232:   void init(int numTerms);
   233:   unsigned char *getByte(int terminalId) const;
   234:   int getBit(int terminalId) const
   235:     { return ((unsigned)terminalId % 8); }
   236: 
   237: public:     // funcs
   238:   TerminalSet(int numTerms=0);                   // allocate new set, initially empty
   239:   TerminalSet(TerminalSet const &obj);
   240:   ~TerminalSet();
   241: 
   242:   TerminalSet& operator= (TerminalSet const &obj)
   243:     { copy(obj); return *this; }
   244: 
   245:   TerminalSet(Flatten&);
   246:   void xfer(Flatten &flat);
   247: 
   248:   // call this to re-allocate at a new size; set is emptied
   249:   void reset(int numTerms);
   250: 
   251:   // true when the # of symbols is 0; an unfinished state
   252:   bool nullMap() const { return bitmap==NULL; }
   253: 
   254:   bool contains(int terminalId) const;
   255: 
   256:   // NOTE: can only compare dotted productions which have the
   257:   // same number of symbols (assertion fail otherwise)
   258:   bool isEqual(TerminalSet const &obj) const;
   259: 
   260:   void add(int terminalId);
   261:   void remove(int terminalId);
   262:   void clear();
   263: 
   264:   void copy(TerminalSet const &obj);      // lengths must be the same
   265:   bool merge(TerminalSet const &obj);     // union; returns true if merging changed set
   266: 
   267:   void print(std::ostream &os, Grammar const &g) const;
   268: };
   269: 
   270: 
   271: // ---------------- Nonterminal --------------------
   272: // something that can appear on the left-hand side of a production
   273: // (or, emptyString, since we classify that as a nonterminal also)
   274: class Nonterminal : public Symbol {
   275: // ---------- representation --------
   276: public:
   277:   StringRef mergeParam1;    // param name for first alternative
   278:   StringRef mergeParam2;    // and 2nd alt
   279:   LocString mergeCode;      // code to resolve then
   280: 
   281:   StringRef keepParam;      // name of parameter to 'keep'
   282:   LocString keepCode;       // code to decide whether to keep a reduction
   283: 
   284:   bool maximal;             // if true, use maximal munch disambiguation
   285: 
   286:   SObjList<Nonterminal> subsets;      // preferred subsets (for scannerless)
   287: 
   288: protected:  // funcs
   289:   virtual void internalPrintDDM(std::ostream &os) const;
   290: 
   291: public:     // funcs
   292:   Nonterminal(LocString const &name, bool isEmptyString=false);
   293:   virtual ~Nonterminal();
   294: 
   295:   Nonterminal(Flatten &flat);
   296:   void xfer(Flatten &flat);
   297:   void xferSerfs(Flatten &flat, Grammar &g);
   298: 
   299:   virtual void print(std::ostream &os, Grammar const *grammer = NULL) const;
   300:   OSTREAM_OPERATOR(Nonterminal)
   301: 
   302:   virtual bool anyDDM() const;
   303: 
   304: // ------ annotation ------
   305: public:     // data
   306:   int ntIndex;           // nonterminal index; see Grammar::computeWhatCanDeriveWhat
   307:   bool cyclic;           // true if this can derive itself in 1 or more steps
   308:   TerminalSet first;     // set of terminals that can be start of a sm_string derived from 'this'
   309:   TerminalSet follow;    // set of terminals that can follow a sm_string derived from 'this'
   310:   Nonterminal *superset; // inverse of 'subsets'
   311: };
   312: 
   313: typedef SObjList<Nonterminal> NonterminalList;
   314: typedef SObjListIter<Nonterminal> NonterminalListIter;
   315: 
   316: #define FOREACH_NONTERMINAL(list, iter) FOREACH_OBJLIST(Nonterminal, list, iter)
   317: #define MUTATE_EACH_NONTERMINAL(list, iter) MUTATE_EACH_OBJLIST(Nonterminal, list, iter)
   318: #define SFOREACH_NONTERMINAL(list, iter) SFOREACH_OBJLIST(Nonterminal, list, iter)
   319: #define SMUTATE_EACH_NONTERMINAL(list, iter) SMUTATE_EACH_OBJLIST(Nonterminal, list, iter)
   320: 
   321: // casting aggregates
   322: inline ObjList<Symbol> const &toObjList(ObjList<Nonterminal> const &list)
   323:   { return reinterpret_cast< ObjList<Symbol>const& >(list); }
   324: 
   325: 
   326: // ---------------- Production --------------------
   327: // a rewrite rule
   328: class Production {
   329: // ------ representation ------
   330: public:     // types
   331:   class RHSElt {
   332:   public:
   333:     Symbol *sym;                // (serf) rhs element symbol
   334: 
   335:     // tags applied to the symbols for purposes of unambiguous naming in
   336:     // actions, and for self-commenting value as role indicators; an
   337:     // empty tag ("") is allowed and means there is no tag
   338:     LocString tag;             // tag for this symbol; can be ""
   339: 
   340:   public:
   341:     RHSElt(Symbol *s, LocString const &t) : sym(s), tag(t) {}
   342:     ~RHSElt();
   343: 
   344:     RHSElt(Flatten&);
   345:     void xfer(Flatten &flat);
   346:     void xferSerfs(Flatten &flat, Grammar &g);
   347:   };
   348: 
   349: public:     // data
   350:   // fundamental context-free grammar (CFG) component
   351:   Nonterminal * const left;     // (serf) left hand side; must be nonterminal
   352:   ObjList<RHSElt> right;        // right hand side; terminals & nonterminals
   353:   int precedence;               // precedence level for disambiguation (0 for none specified)
   354: 
   355:   // user-supplied reduction action code
   356:   LocString action;
   357: 
   358: private:    // funcs
   359:   void computeDerived();
   360: 
   361: public:     // funcs
   362:   Production(Nonterminal *left, char const *leftTag);
   363:   ~Production();
   364: 
   365:   Production(Flatten &flat);
   366:   void xfer(Flatten &flat);
   367:   void xferSerfs(Flatten &flat, Grammar &g);
   368: 
   369:   // length *not* including emptySymbol, if present
   370:   // UPDATE: I'm now disallowing emptySymbol from ever appearing in 'right'
   371:   int rhsLength() const { return rhsLen; }
   372: 
   373:   // number of nonterminals on RHS
   374:   int numRHSNonterminals() const;
   375: 
   376:   // true if the given symbol appears in 'right'
   377:   bool rhsHasSymbol(Symbol const *sym) const;
   378: 
   379:   // retrieve the RHS as a list of symbols, rather than as a list of RHSElts
   380:   void getRHSSymbols(SymbolList &output) const;
   381: 
   382:   // append a RHS symbol
   383:   void append(Symbol *sym, LocString const &tag);
   384: 
   385:   // call this when production is built, so it can compute annotations
   386:   // (this is called by GrammarAnalysis::initializeAuxData, from
   387:   // inside runAnalyses)
   388:   void finished(int numTerms);
   389: 
   390:   // find a symbol by tag; returns 1 for first RHS symbol, 2 for
   391:   // second, etc.; returns -1 if the tag doesn't match anything
   392:   int findTag(StringRef tag) const;
   393: 
   394:   // given an index as returned by 'findTaggedSymbol', translate that
   395:   // back into a tag
   396:   sm_string symbolTag(int symbolIndex) const;
   397: 
   398:   // or translate a symbol index into a symbol
   399:   Symbol const *symbolByIndexC(int symbolIndex) const;
   400:   Symbol *symbolByIndex(int symbolIndex)
   401:     { return const_cast<Symbol*>(symbolByIndexC(symbolIndex)); }
   402: 
   403:   #if 0
   404:   // retrieve an item
   405:   DottedProduction const *getDProdC(int dotPlace) const;
   406:   DottedProduction *getDProd(int dotPlace)
   407:     { return const_cast<DottedProduction*>(getDProdC(dotPlace)); }
   408:   #endif // 0
   409: 
   410:   // print 'A -> B c D' (no newline)
   411:   sm_string toString(bool printType = true, bool printIndex = true) const;
   412: 
   413:   // this one prints 'B c D' for above example rule
   414:   sm_string rhsString(bool printTags = true, bool quoteAliases = false) const;
   415: 
   416:   void print(std::ostream &os) const;
   417:   OSTREAM_OPERATOR(Production)
   418: 
   419:   // print entire input syntax, with newlines, e.g.
   420:   //   A -> B c D { return foo; }
   421:   sm_string toStringMore(bool printCode) const;
   422: 
   423: // ------ annotation ------
   424: private:    // data
   425:   int rhsLen;                   // right.count()
   426: 
   427: public:     // data
   428:   int prodIndex;                // unique production id
   429:   TerminalSet firstSet;         // First(RHS); computed by GrammarAnalysis::computeFirst
   430: };
   431: 
   432: typedef SObjList<Production> ProductionList;
   433: typedef SObjListIter<Production> ProductionListIter;
   434: 
   435: #define FOREACH_PRODUCTION(list, iter) FOREACH_OBJLIST(Production, list, iter)
   436: #define MUTATE_EACH_PRODUCTION(list, iter) MUTATE_EACH_OBJLIST(Production, list, iter)
   437: #define SFOREACH_PRODUCTION(list, iter) SFOREACH_OBJLIST(Production, list, iter)
   438: #define SMUTATE_EACH_PRODUCTION(list, iter) SMUTATE_EACH_OBJLIST(Production, list, iter)
   439: 
   440: typedef ObjList<Production::RHSElt> RHSEltList;
   441: typedef ObjListIter<Production::RHSElt> RHSEltListIter;
   442: typedef ObjListMutator<Production::RHSElt> RHSEltListMutator;
   443: 
   444: 
   445: // ---------------- Grammar --------------------
   446: // represent a grammar: nonterminals, terminals, productions, and start-symbol
   447: class Grammar {
   448: // ------ representation ------
   449: public:     // data
   450:   ObjList<Nonterminal> nonterminals;    // (owner list)
   451:   ObjList<Terminal> terminals;          // (owner list)
   452:   ObjList<Production> productions;      // (owner list)
   453:   Nonterminal *startSymbol;             // (serf) a particular nonterminal
   454: 
   455:   // the special terminal for the empty sm_string; does not appear in the
   456:   // list of nonterminals or terminals for a grammar, but can be
   457:   // referenced by productions, etc.; the decision to explicitly have
   458:   // such a symbol, instead of letting it always be implicit, is
   459:   // motivated by things like the derivability relation, where it's
   460:   // nice to treat empty like any other symbol
   461:   Nonterminal emptyString;
   462: 
   463:   // sections of verbatim code emitted into the interface file, before
   464:   // the parser context class body
   465:   ObjList<LocString> verbatim;
   466: 
   467:   // name of the class into which the action functions are placed
   468:   LocString actionClassName;
   469: 
   470:   // verbatim action class declaration, and additional codes from
   471:   // extension modules to append to it (but see note of 11/13/04
   472:   // in grampar.cc)
   473:   ObjList<LocString> actionClasses;
   474: 
   475:   // code emitted into the implementation file at the end
   476:   ObjList<LocString> implVerbatim;
   477: 
   478:   // ---- declarative options ----
   479:   // name of the target language; nominally "C++"
   480:   sm_string targetLang;
   481: 
   482:   // when true, the default dup/del is what's expected for a
   483:   // garbage-collected system: dup() is the identity function,
   484:   // and del() is a no-op
   485:   bool useGCDefaults;
   486: 
   487:   // when true, unspecified merge() functions abort()
   488:   bool defaultMergeAborts;
   489: 
   490:   // expected numbers of various anomalies; -1 means no
   491:   // expectation has been supplied; this informtion is used
   492:   // to control what is reported after grammar analysis
   493:   int expectedSR;                       // shift/reduce conflicts
   494:   int expectedRR;                       // reduce/reduce conflicts
   495:   int expectedUNRNonterms;              // # unreachable nonterminals
   496:   int expectedUNRTerms;                 // # unreachable terminals
   497: 
   498: public:     // funcs
   499:   Grammar();                            // set everything manually
   500:   ~Grammar();
   501: 
   502:   // read/write as binary file
   503:   void xfer(Flatten &flat);
   504: 
   505:   // simple queries
   506:   int numTerminals() const;
   507:   int numNonterminals() const;
   508: 
   509: 
   510:   // ---- building a grammar ----
   511:   // declare a new token exists, with name and optional alias;
   512:   // return false if it's already declared
   513:   bool declareToken(LocString const &symbolName, int code,
   514:                     LocString const &alias);
   515: 
   516:   // add a new production; the rhs arg list must be terminated with a NULL
   517:   //void addProduction(Nonterminal *lhs, Symbol *rhs, ...);
   518: 
   519:   // add a pre-constructed production
   520:   void addProduction(Production *prod);
   521: 
   522:   // ---------- outputting a grammar --------------
   523:   // print the list of symbols with type annotations
   524:   void printSymbolTypes(std::ostream &os) const;
   525: 
   526:   // print the current list of productions
   527:   void printProductions(std::ostream &os, bool printCode=true) const;
   528: 
   529:   // emit C++ code to construct this grammar later
   530:   void emitSelfCC(std::ostream &os) const;
   531: 
   532:   // ---- whole-grammar stuff ----
   533:   // after adding all rules, check that all nonterminals have
   534:   // at least one rule; also checks referential integrity
   535:   // in actions and conditions; throw exception if there is a
   536:   // problem
   537:   void checkWellFormed() const;
   538: 
   539:   // output grammar in Bison's syntax
   540:   // (coincidentally, when bison dumps its table with '-v', its table
   541:   // dump syntax is similar to my input syntax)
   542:   void printAsBison(std::ostream &os) const;
   543: 
   544:   // ---- symbol access ----
   545:   #define SYMBOL_ACCESS(Thing)                              \
   546:     /* retrieve, return NULL if not there */                \
   547:     Thing const *find##Thing##C(char const *name) const;    \
   548:     Thing *find##Thing(char const *name)                    \
   549:       { return const_cast<Thing*>(find##Thing##C(name)); }  \
   550:                                                             \
   551:     /* retrieve, or create it if not already there */       \
   552:     Thing *getOrMake##Thing(LocString const &name);
   553: 
   554:   SYMBOL_ACCESS(Symbol)        // findSymbolC, findSymbol, getOrMakeSymbol
   555:   SYMBOL_ACCESS(Terminal)      // findTerminal{C,}, getOrMakeTerminal
   556:   SYMBOL_ACCESS(Nonterminal)   // findNonterminal{C,}, getOrMakeNonterminal
   557:   #undef SYMBOL_ACCESS
   558: 
   559:   // map a production to a unique index
   560:   int getProductionIndex(Production const *prod) const;
   561: };
   562: 
   563: 
   564: #endif // __GRAMMAR_H
   565: 
End C section to elk/elk_grammar.h[1]
Start C section to elk/elk_grampar.codes.h[1 /1 ]
     1: #line 4160 "./lpsrc/elk.pak"
     2: # define BISON_GRAMPAR_TAB_H     /* tweak */
     3: # define YYSTYPE yystype
     4: # define YYSTYPE_IS_TRIVIAL 1
     5: # define        TOK_INTEGER     257
     6: # define        TOK_NAME        258
     7: # define        TOK_STRING      259
     8: # define        TOK_LIT_CODE    260
     9: # define        TOK_LBRACE      261
    10: # define        TOK_RBRACE      262
    11: # define        TOK_COLON       263
    12: # define        TOK_SEMICOLON   264
    13: # define        TOK_ARROW       265
    14: # define        TOK_LPAREN      266
    15: # define        TOK_RPAREN      267
    16: # define        TOK_COMMA       268
    17: # define        TOK_TERMINALS   269
    18: # define        TOK_TOKEN       270
    19: # define        TOK_NONTERM     271
    20: # define        TOK_FUN 272
    21: # define        TOK_VERBATIM    273
    22: # define        TOK_IMPL_VERBATIM       274
    23: # define        TOK_PRECEDENCE  275
    24: # define        TOK_OPTION      276
    25: # define        TOK_EXPECT      277
    26: # define        TOK_CONTEXT_CLASS       278
    27: # define        TOK_SUBSETS     279
End C section to elk/elk_grampar.codes.h[1]
Start C section to elk/elk_grampar.h[1 /1 ]
     1: #line 4188 "./lpsrc/elk.pak"
     2: // grampar.h            see license.txt for copyright and terms of use
     3: // declarations for bison-generated grammar parser
     4: 
     5: #ifndef __GRAMPAR_H
     6: #define __GRAMPAR_H
     7: 
     8: #include "sm_typ.h"
     9: #include "sm_sobjlist.h"
    10: #include "sm_exc.h"
    11: #include "sm_strsobjdict.h"
    12: #include "ast_locstr.h"
    13: 
    14: // linkdepend: grampar.tab.cc
    15: 
    16: // fwd decl
    17: class GrammarAST;         // gramast.ast
    18: class TF_nonterm;         // gramast.ast
    19: class GrammarLexer;       // ../ast/gramlex.h
    20: class StringTable;        // strtable.h
    21: 
    22: 
    23: // -------- rest of the program's view of parser ------------
    24: // name of extra parameter to yyparse (i.e. the context in
    25: // which the parser operates, instead of that being stored
    26: // in some collection of globals)
    27: #define YYPARSE_PARAM parseParam
    28: 
    29: // type of thing extra param points at
    30: struct ParseParams {
    31:   GrammarAST *treeTop;    // set when parsing finishes; AST tree top
    32:   GrammarLexer &lexer;    // lexer we're using
    33: 
    34: public:
    35:   ParseParams(GrammarLexer &L) :
    36:     treeTop(NULL),
    37:     lexer(L)
    38:   {}
    39: };
    40: 
    41: // caller interface to Bison-generated parser; starts parsing
    42: // (whatever stream lexer is reading) and returns 0 for success and
    43: // 1 for error; the extra parameter is available to actions to use
    44: int grampar_yyparse(void *YYPARSE_PARAM);
    45: 
    46: // when this is set to true, bison parser emits info about
    47: // actions as it's taking them (shared by all instances of
    48: // bison-generated parsers in a given program)
    49: extern int yydebug;
    50: 
    51: 
    52: // ---------- Bison's view of the rest of the program --------
    53: // Bison calls this to get each token; returns token code,
    54: // or 0 for eof; semantic value for returned token can be
    55: // put into '*lvalp'
    56: // TODO: Paul Hilfinger reports there's a problem saying "union
    57: // YYSTYPE"; he's using bison 1.34 I think, so I need to upgrade
    58: // and see what the problem is (suspect my 'sed' pattern isn't
    59: // matching, in the Makefile)
    60: int grampar_yylex(union YYSTYPE *lvalp, void *parseParam);
    61: 
    62: // error printer
    63: void grampar_yyerror(char const *message, void *parseParam);
    64: 
    65: 
    66: // ---------------- grampar's parsing structures ---------------
    67: class Grammar;    // fwd
    68: 
    69: // while walking the AST, we do a kind of recursive evaluation
    70: // to handle things like inherited actions and self-updating
    71: // (eval'd at grammar parse time) action expressions
    72: class Environment {
    73: public:      // data
    74:   // grammar we're playing with (stored here because it's
    75:   // more convenient than passing it to every fn separately)
    76:   Grammar &g;
    77: 
    78:   // env in which we're nested, if any
    79:   Environment *prevEnv;      // (serf)
    80: 
    81:   // maps from a nonterminal name to its declaration, if that
    82:   // nonterminal has in fact been declared already
    83:   StringSObjDict<TF_nonterm /*const*/> nontermDecls;
    84: 
    85:   // count of recoverable errors; only the one in the
    86:   // topmost environment is used
    87:   int errorCount;
    88: 
    89:   // reference to the one we're really using
    90:   int &errors;
    91: 
    92: public:
    93:   Environment(Grammar &G);             // new env
    94:   Environment(Environment &prevEnv);   // nested env
    95:   ~Environment();
    96: };
    97: 
    98: 
    99: // --------------- grampar's external interface -----------
   100: // parse grammar file 'fname' into grammar 'g', throwing exceptions
   101: // if there are problems
   102: void readGrammarFile(Grammar &g, char const *fname);
   103: 
   104: // just do the parsing stage
   105: GrammarAST *parseGrammarFile(char const *fname, bool useML);
   106: 
   107: // merge two grammar descriptions; neither argument is consumed,
   108: // but subtrees of the 2nd argument get moved into the first tree
   109: void mergeGrammar(GrammarAST *base, GrammarAST *ext);
   110: 
   111: // GrammarAST -> Grammar
   112: void parseGrammarAST(Grammar &g, GrammarAST *treeTop);
   113: 
   114: 
   115: // thrown when there is an error parsing the AST
   116: class XASTParse : public xBase {
   117: public:    // data
   118:   // token at or near failure
   119:   LocString failToken;
   120: 
   121:   // what is wrong
   122:   sm_string message;
   123: 
   124: private:   // funcs
   125:   static sm_string constructMsg(LocString const &tok, char const *msg);
   126: 
   127: public:    // funcs
   128:   XASTParse(LocString const &tok, char const *msg);
   129:   XASTParse(XASTParse const &obj);
   130:   ~XASTParse();
   131: };
   132: 
   133: 
   134: #endif // __GRAMPAR_H
End C section to elk/elk_grampar.h[1]
Start C section to elk/elk_grampar.tab.h[1 /1 ]
     1: #line 4323 "./lpsrc/elk.pak"
     2: #ifndef BISON_GRAMPAR_TAB_H    /* tweak */
     3: # define BISON_GRAMPAR_TAB_H
     4: 
     5: #ifndef YYSTYPE
     6: typedef union YYSTYPE {
     7:   int num;
     8:   LocString *str;
     9: 
    10:   ASTList<TopForm> *topFormList;
    11:   TopForm *topForm;
    12: 
    13:   ASTList<TermDecl> *termDecls;
    14:   TermDecl *termDecl;
    15:   ASTList<TermType> *termTypes;
    16:   TermType *termType;
    17:   ASTList<PrecSpec> *precSpecs;
    18: 
    19:   ASTList<SpecFunc> *specFuncs;
    20:   SpecFunc *specFunc;
    21:   ASTList<LocString> *sm_stringList;
    22: 
    23:   ASTList<ProdDecl> *prodDecls;
    24:   ProdDecl *prodDecl;
    25:   ASTList<RHSElt> *rhsList;
    26:   RHSElt *rhsElt;
    27: } yystype;
    28: # define YYSTYPE yystype
    29: # define YYSTYPE_IS_TRIVIAL 1
    30: #endif
    31: # define        TOK_INTEGER     257
    32: # define        TOK_NAME        258
    33: # define        TOK_STRING      259
    34: # define        TOK_LIT_CODE    260
    35: # define        TOK_LBRACE      261
    36: # define        TOK_RBRACE      262
    37: # define        TOK_COLON       263
    38: # define        TOK_SEMICOLON   264
    39: # define        TOK_ARROW       265
    40: # define        TOK_LPAREN      266
    41: # define        TOK_RPAREN      267
    42: # define        TOK_COMMA       268
    43: # define        TOK_TERMINALS   269
    44: # define        TOK_TOKEN       270
    45: # define        TOK_NONTERM     271
    46: # define        TOK_FUN 272
    47: # define        TOK_VERBATIM    273
    48: # define        TOK_IMPL_VERBATIM       274
    49: # define        TOK_PRECEDENCE  275
    50: # define        TOK_OPTION      276
    51: # define        TOK_EXPECT      277
    52: # define        TOK_CONTEXT_CLASS       278
    53: # define        TOK_SUBSETS     279
    54: 
    55: 
    56: #endif /* not BISON_GRAMPAR_TAB_H */
End C section to elk/elk_grampar.tab.h[1]
Start C section to elk/elk_lexerint.h[1 /1 ]
     1: #line 4380 "./lpsrc/elk.pak"
     2: // lexerint.h            see license.txt for copyright and terms of use
     3: // LexerInterface, the interface the GLR parser uses
     4: // to access the lexer's token stream
     5: 
     6: #ifndef LEXERINT_H
     7: #define LEXERINT_H
     8: 
     9: #include "elk_useract.h"
    10: #include "sm_srcloc.h"
    11: #include "sm_str.h"
    12: 
    13: // This 'interface' is a collection of variables describing
    14: // the current token.  I don't use a bunch of pure-virtual
    15: // functions because of the cost of calling them; everything
    16: // here will be in the inner loop of the parser.
    17: class LexerInterface {
    18: public:     // data
    19:   // NOTE: All of these fields are *written* by the lexer, and
    20:   // *read* by the parser.
    21: 
    22:   // token classification; this is what the parser will use to
    23:   // make parsing decisions; this code must correspond to something
    24:   // declared in the 'terminals' section of the grammar; when this
    25:   // is 0, it is the final (end-of-file) token; the parser is allowed
    26:   // to change this for its own purposes, and currently does so for
    27:   // token reclassification
    28:   int type;
    29: 
    30:   // semantic value; this is what will be passed to the reduction
    31:   // actions when this token is on the right hand side of a rule
    32:   SemanticValue sval;
    33: 
    34:   // source location of the token; this will only be used if the
    35:   // parser has been compiled to automatically propagate it
    36:   SourceLoc loc;
    37: 
    38: public:     // funcs
    39:   LexerInterface()
    40:     : type(0),
    41:       sval(0),
    42:       loc(SL_UNKNOWN)
    43:   {}
    44:   virtual ~LexerInterface() {}
    45: 
    46: 
    47:   // retrieve the next token; the lexer should respond by filling in
    48:   // the above fields with new values, to describe the next token; the
    49:   // lexer indicates end of file by putting 0 into 'type'; when the
    50:   // LexerInterface object is first passed to the parser, the above
    51:   // fields should already be set correctly (i.e. the parser will make
    52:   // its first call to 'nextToken' *after* processing the first token)
    53:   typedef void (*NextTokenFunc)(LexerInterface *);
    54: 
    55:   // get the function which we'll call to get the next token
    56:   //
    57:   // Why the two-step approach?  Virtual method calls are more
    58:   // expensive than simple indirect function calls, and this happens
    59:   // in the inner parsing loop.  If C++ had a way to explicitly cache
    60:   // the result of a method lookup this wouldn't be necessary.
    61:   virtual NextTokenFunc getTokenFunc() const=0;
    62: 
    63: 
    64:   // The following functions are called to help create diagnostic
    65:   // reports.  They should describe the current token (the one
    66:   // which the above fields refer to) in more-or-less human-readable
    67:   // terms.
    68: 
    69:   // describe the token; for tokens with multiple spellings (e.g.
    70:   // identifiers), this should include the actual token spelling
    71:   // if possible; note that if the token has been reclassified,
    72:   // then the 'type' field above might have been changed by the
    73:   // parser, in which case this function should ideally print
    74:   // a description which takes the new type into account
    75:   virtual sm_string tokenDesc() const=0;
    76: 
    77:   // describe a token kind; this is different from tokenDesc(), since
    78:   // it need not correspond to the token kind that was just yielded,
    79:   // and hence any related lexeme data cannot be assumed to be
    80:   // available; this is used during error diagnosis
    81:   virtual sm_string tokenKindDesc(int kind) const=0;
    82: };
    83: 
    84: #endif // LEXERINT_H
End C section to elk/elk_lexerint.h[1]
Start C section to elk/elk_mlsstr.h[1 /1 ]
     1: #line 4465 "./lpsrc/elk.pak"
     2: // mlsstr.h            see license.txt for copyright and terms of use
     3: // handles lexically embedded ML
     4: // based on ccsstr.h
     5: 
     6: #ifndef MLSSTR_H
     7: #define MLSSTR_H
     8: 
     9: #include "ast_embedded.h"
    10: 
    11: class MLSubstrateTest;
    12: 
    13: class MLSubstrate : public EmbeddedLang {
    14: private:
    15:   enum State {
    16:     ST_NORMAL,       // normal text
    17:     ST_STRING,       // inside a sm_string literal
    18:     ST_CHAR,         // inside a char literal
    19:     ST_COMMENT,      // inside a comment
    20:     NUM_STATES
    21:   } state;
    22:   int nesting;       // depth of paren/bracket/brace nesting
    23:   int comNesting;    // depth of comment nesting (in ST_COMMENT)
    24:   char prev;         // previous character
    25: 
    26:   // so test code can interrogate internal state
    27:   friend class MLSubstrateTest;
    28: 
    29: public:
    30:   MLSubstrate(ReportError *err = NULL);
    31:   virtual ~MLSubstrate();
    32: 
    33:   // EmbeddedLang entry points (see gramlex.h for description
    34:   // of each function)
    35:   virtual void reset(int initNest = 0);
    36:   virtual void handle(char const *str, int len, char finalDelim);
    37:   virtual bool zeroNesting() const;
    38:   virtual sm_string getFuncBody() const;
    39:   virtual sm_string getDeclName() const;
    40: };
    41: 
    42: #endif // MLSSTR_H
End C section to elk/elk_mlsstr.h[1]
Start C section to elk/elk_ownerspec.h[1 /1 ]
     1: #line 4508 "./lpsrc/elk.pak"
     2: // ownerspec.h            see license.txt for copyright and terms of use
     3: // specification of "owner pointer", as a C++ template class
     4: 
     5: // I made this as an experiment.. it's really part of the
     6: // verifier project...
     7: #error This is not intended to be used
     8: 
     9: template <class T>
    10: class OwnerPtr {
    11: private:
    12:   T *ptr;
    13: 
    14:   enum State { OP_NULL, OP_DEAD, OP_OWNING };
    15:   State state;
    16: 
    17: public:
    18:   OwnerPtr() : ptr(NULL), state(OP_NULL) {}
    19: 
    20:   OwnerPtr(T *src) : ptr(src), state(src? OP_OWNING : OP_NULL) {}
    21: 
    22:   OwnerPtr(OwnerPtr &src) {
    23:     ptr = src.ptr;
    24:     state = src.state;
    25:     src.state = OP_DEAD;
    26:   }
    27: 
    28:   ~OwnerPtr() {
    29:     assert(state != OP_OWNING);
    30:   }
    31: 
    32:   OwnerPtr& operator= (OwnerPtr &src) {
    33:     if (this != &src) {
    34:       assert(state != OP_OWNING);
    35:       ptr = src.ptr;
    36:       state = src.state;
    37:       src.state = OP_DEAD;
    38:     }
    39:     return *this;
    40:   }
    41: 
    42:   OwnerPtr& operator= (T *src) {
    43:     assert(state != OP_OWNING);
    44:     ptr = src;
    45:     state = src? OP_OWNING : OP_NULL;
    46:     return *this;
    47:   }
    48: 
    49:   bool operator== (T *p) {
    50:     assert(state != OP_DEAD);
    51:     return ptr == p;
    52:   }
    53: 
    54:   // yield serf for possible further use
    55:   operator T* () {
    56:     assert(state != OP_DEAD);
    57:     return ptr;
    58:   }
    59: 
    60:   // use directly
    61:   T& operator* () {
    62:     assert(state == OP_OWNING);
    63:     return *ptr;
    64:   }
    65:   T* operator-> () {
    66:     assert(state == OP_OWNING);
    67:     return ptr;
    68:   }
    69: };
    70: 
    71: 
    72: 
    73: 
    74: 
    75: 
End C section to elk/elk_ownerspec.h[1]
Start C section to elk/elk_parsetables.h[1 /1 ]
     1: #line 4584 "./lpsrc/elk.pak"
     2: // parsetables.h            see license.txt for copyright and terms of use
     3: // ParseTables, a class to contain the tables need by the
     4: // LR/GLR parsing algorithm
     5: 
     6: #ifndef PARSETABLES_H
     7: #define PARSETABLES_H
     8: 
     9: #include "sm_array.h"
    10: #include "elk_glrconfig.h"
    11: #include <iostream>     // std::ostream
    12: 
    13: class Flatten;            // flatten.h
    14: class EmitCode;           // emitcode.h
    15: class Symbol;             // grammar.h
    16: class Bit2d;              // bit2d.h
    17: 
    18: class ELK_EXTERN ParseTables;
    19: 
    20: // integer id for an item-set DFA state; I'm using an 'enum' to
    21: // prevent any other integers from silently flowing into it
    22: enum StateId { STATE_INVALID=-1 };
    23: 
    24: inline std::ostream& operator<< (std::ostream &os, StateId id)
    25:   { return os << (int)id; }
    26: 
    27: 
    28: // encodes an action in 'action' table; see 'actionTable'
    29: #if ENABLE_CRS_COMPRESSION
    30:   // high bits encoding
    31:   enum ActionEntryKind {
    32:     AE_MASK      = 0xC0,    // selection mask
    33:     AE_SHIFT     = 0x00,    // 00 = shift
    34:     AE_REDUCE    = 0x40,    // 01 = reduce
    35:     AE_AMBIGUOUS = 0x80,    // 10 = ambiguous
    36:     AE_ERROR     = 0xC0,    // 11 = error (if EEF is off)
    37:     AE_MAXINDEX  = 63       // maximum value of lower bits
    38:   };
    39: 
    40:   // remaining 6 bits:
    41:   //
    42:   //   shift: desination state, encoded as an offset from the
    43:   //   first state that that terminal can reach
    44:   //
    45:   //   reduce: production, encoded as an index into a per-state
    46:   //   array of distinct production indices
    47:   //
    48:   //   ambiguous: for each state, have an array of ActionEntries.
    49:   //   ambiguous entries index into this array.  first indexed
    50:   //   entry is the count of how many actions follow
    51:   typedef unsigned char ActionEntry;
    52:   ActionEntry makeAE(ActionEntryKind k, int index);
    53:   #define errorActionEntry ((ActionEntry)AE_ERROR)
    54: #else
    55:   // each entry is one of:
    56:   //   +N+1, 0 <= N < numStates:         shift, and go to state N
    57:   //   -N-1, 0 <= N < numProds:          reduce using production N
    58:   //   numStates+N+1, 0 <= N < numAmbig: ambiguous, use ambigAction N
    59:   //   0:                                error
    60:   // (there is no 'accept', acceptance is handled outside this table)
    61:   typedef signed short ActionEntry;
    62:   #define errorActionEntry ((ActionEntry)0)
    63: #endif
    64: 
    65: 
    66: // encodes a destination state in 'gotoTable'
    67: #if ENABLE_CRS_COMPRESSION
    68:   // entry is an offset from the first state that can be reached
    69:   // by shifting the nonterminal
    70:   typedef unsigned char GotoEntry;
    71: #else
    72:   // entry is the to go to after shifting the nonterminal
    73:   typedef unsigned short GotoEntry;
    74: #endif
    75: #define errorGotoEntry ((GotoEntry)~0)
    76: 
    77: 
    78: // name a terminal using an index
    79: typedef unsigned char TermIndex;
    80: 
    81: // name a nonterminal using an index
    82: typedef unsigned char NtIndex;
    83: 
    84: // name a production using an index
    85: typedef unsigned short ProdIndex;
    86: 
    87: // an addressed cell in the 'errorBits' table
    88: typedef unsigned char ErrorBitsEntry;
    89: 
    90: 
    91: // encodes either terminal index N (as N+1) or
    92: // nonterminal index N (as -N-1), or 0 for no-symbol
    93: typedef signed short SymbolId;
    94: inline bool symIsTerm(SymbolId id) { return id > 0; }
    95: inline int symAsTerm(SymbolId id) { return id-1; }
    96: inline bool symIsNonterm(SymbolId id) { return id < 0; }
    97: inline NtIndex symAsNonterm(SymbolId id) { return (NtIndex)(-(id+1)); }
    98: SymbolId encodeSymbolId(Symbol const *sym);       // gramanl.cc
    99: 
   100: 
   101: // assign, but check for truncation
   102: template <class DEST, class SRC>
   103: inline void checkAssign(DEST &d, SRC s)
   104: {
   105:   d = (DEST)s;
   106:   xassert(d == s);
   107: }
   108: 
   109: 
   110: // the parse tables are the traditional action/goto, plus the list
   111: // of ambiguous actions, plus any more auxilliary tables useful during
   112: // run-time parsing
   113: class ELK_EXTERN ParseTables {
   114: private:    // types
   115:   // data about an intermediate state of parse table construction;
   116:   // once the table is finished, this data gets consolidated into the
   117:   // actual tables, and then thrown away
   118:   class TempData {
   119:   public:   // data
   120:     // nascent ambigTable
   121:     ArrayStack<ActionEntry> ambigTable;
   122: 
   123:     // nascent bigProductionList
   124:     ArrayStack<ProdIndex> bigProductionList;
   125: 
   126:     // nascent productionsForState, except using integer offsets from
   127:     // start of 'bigProductionList' instead of direct pointers into it
   128:     ArrayStack<int> productionsForState;
   129: 
   130:     // nascent versions of ambig tables, again with integer offsets
   131:     ArrayStack<int> ambigStateTable;
   132: 
   133:   public:   // funcs
   134:     TempData(int numStates);
   135:     ~TempData();
   136:   };
   137: 
   138: public:     // types
   139:   // per-production info
   140:   struct ProdInfo {
   141:     unsigned char rhsLen;                // # of RHS symbols
   142:     NtIndex lhsIndex;                    // 'ntIndex' of LHS
   143:   };
   144: 
   145: protected:  // data
   146:   // when this is false, all of the below "(owner*)" annotations are
   147:   // actually "(serf)", i.e. this object does *not* own any of the
   148:   // tables (see emitConstructionCode())
   149:   bool owning;
   150: 
   151:   // non-NULL during construction
   152:   TempData *temp;                        // (nullable owner)
   153: 
   154:   // # terminals, nonterminals in grammar
   155:   int numTerms;
   156:   int numNonterms;
   157: 
   158:   // # of parse states
   159:   int numStates;
   160: 
   161:   // # of productions in the grammar
   162:   int numProds;
   163: 
   164:   // action table, indexed by (state*actionCols + lookahead)
   165:   int actionCols;
   166:   ActionEntry *actionTable;              // (owner*)
   167: 
   168:   // goto table, indexed by (state*gotoCols + nontermId)
   169:   int gotoCols;
   170:   GotoEntry *gotoTable;                  // (owner*)
   171: 
   172:   // map production id to information about that production
   173:   ProdInfo *prodInfo;                    // (owner*)
   174: 
   175:   // map a state id to the symbol (terminal or nonterminal) which is
   176:   // shifted to arrive at that state
   177:   SymbolId *stateSymbol;                 // (owner*)
   178: 
   179:   // ambiguous actions: one big list, for allocation purposes; then
   180:   // the actions encode indices into this table; the first indexed
   181:   // entry gives the # of actions, and is followed by that many
   182:   // actions, each interpreted the same way ordinary 'actionTable'
   183:   // entries are
   184:   int ambigTableSize;
   185:   ActionEntry *ambigTable;               // (nullable owner*)
   186: 
   187:   // total order on nonterminals for use in choosing which to
   188:   // reduce to in the RWL algorithm; index into this using a
   189:   // nonterminal index, and it yields the ordinal for that
   190:   // nonterminal (so these aren't really NtIndex's, but they're
   191:   // exactly as wide, so I use NtIndex anyway)
   192:   //
   193:   // The order is consistent with the requirement that if
   194:   //   A ->+ B
   195:   // then B will be earlier in the order (assuming acyclicity).
   196:   // That way, we'll do all reductions to B before any to A (for
   197:   // reductions spanning the same set of ground terminals), and
   198:   // therefore will merge all alternatives for B before reducing
   199:   // any of them to A.
   200:   NtIndex *nontermOrder;                 // (owner*)
   201: 
   202:   // --------------------- table compression ----------------------
   203: 
   204:   // table compression techniques taken from:
   205:   //   [DDH] Peter Dencker, Karl Duerre, and Johannes Heuft.
   206:   //   Optimization of Parser Tables for Portable Compilers.
   207:   //   In ACM TOPLAS, 6, 4 (1984) 546-572.
   208:   //   http://citeseer.nj.nec.com/context/27540/0 (not in database)
   209:   //   ~/doc/papers/p546-dencker.pdf (from ACM DL)
   210: 
   211:   // Code Reduction Scheme (CRS):
   212:   //
   213:   // Part (a):  The states are numbered such that all states that
   214:   // are reached by transitions on a given symbol are contiguous.
   215:   // See gramanl.cc, GrammarAnalysis::renumberStates().  Then, we
   216:   // simply need a map from the symbol index to the first state
   217:   // that is reached along that symbol.
   218:   StateId *firstWithTerminal;            // (nullable owner*) termIndex -> state
   219:   StateId *firstWithNonterminal;         // (nullable owner*) ntIndex -> state
   220:   //
   221:   // Part (b):  The production indices that appear on a given row
   222:   // are collected together.  (This is called (c) by [DDH]; I don't
   223:   // have a counterpart to their (b).)
   224:   int bigProductionListSize;
   225:   ProdIndex *bigProductionList;          // (nullable owner*) array into which 'productionsForState' points
   226:   ProdIndex **productionsForState;       // (nullable owner to serf) state -> stateProdIndex -> prodIndex
   227:   //
   228:   // Part (c):  Pointers into 'ambigTable' are are collected together in
   229:   // per-state lists as well.
   230:   ActionEntry **ambigStateTable;         // (nullable owner) state -> (+ambigStateTableIndex -> ActionEntry*)
   231: 
   232:   // Error Entry Factoring (EEF):
   233:   //
   234:   // Factor out all the error entries into their own bitmap.  Then
   235:   // regard error entries in the original tables as "insignificant".
   236:   //
   237:   // 'errorBits' is a map of where the error actions are in the action
   238:   // table.  It is indexed through 'errorBitsPointers':
   239:   //   byte = errorBitsPointers[stateId][lookahead >> 3];
   240:   //   if ((byte >> (lookahead & 7)) & 1) then ERROR
   241:   int errorBitsRowSize;                  // bytes per row
   242:   int uniqueErrorRows;                   // distinct rows
   243:   ErrorBitsEntry *errorBits;             // (nullable owner*)
   244:   ErrorBitsEntry **errorBitsPointers;    // (nullable owner ptr to serfs)
   245: 
   246:   // Graph Coloring Scheme (GCS):
   247:   //
   248:   // Merge lines and columns that have identical significant entries.
   249:   // This is done as two-pass graph coloring.  They give a specific
   250:   // heuristic.
   251:   //
   252:   // this is a map to be applied to terminal indices before being
   253:   // used to access the compressed action table; it maps the terminal
   254:   // id (as reported by the lexer) to the proper action table column
   255:   TermIndex *actionIndexMap;             // (nullable owner*)
   256:   //
   257:   // this is a map from states to the beginning of the action table
   258:   // row that pertains to that state; it effectively factors the
   259:   // states into equivalence classes
   260:   int actionRows;                        // rows in actionTable[]
   261:   ActionEntry **actionRowPointers;       // (nullable owner ptr to serfs)
   262:   //
   263:   // index map for the goto table
   264:   NtIndex *gotoIndexMap;                 // (nullable owner*)
   265:   //
   266:   // row map for the goto table
   267:   int gotoRows;
   268:   GotoEntry **gotoRowPointers;           // (nullable owner ptr to serfs)
   269: 
   270: public:     // data
   271:   // These are public because if they weren't, I'd just have a stupid
   272:   // getter/setter pattern that exposes them anyway.
   273: 
   274:   // start state id
   275:   StateId startState;
   276: 
   277:   // index of the production which will finish a parse; it's the
   278:   // final reduction executed
   279:   int finalProductionIndex;
   280: 
   281: private:    // funcs
   282:   void alloc(int numTerms, int numNonterms, int numStates, int numProds,
   283:              StateId start, int finalProd);
   284: 
   285:   // index tables
   286:   ActionEntry &actionEntry(StateId stateId, int termId)
   287:     { return actionTable[stateId*actionCols + termId]; }
   288:   int actionTableSize() const
   289:     { return actionRows * actionCols; }
   290: 
   291:   GotoEntry &gotoEntry(StateId stateId, int nontermId)
   292:     { return gotoTable[stateId*gotoCols + nontermId]; }
   293:   int gotoTableSize() const
   294:     { return gotoRows * gotoCols; }
   295: 
   296:   void appendAmbig(ArrayStack<ActionEntry> const &set);
   297:   bool compareAmbig(ArrayStack<ActionEntry> const &set, int startIndex);
   298: 
   299:   void fillInErrorBits(bool setPointers);
   300:   int colorTheGraph(int *color, Bit2d &graph);
   301: 
   302: protected:  // funcs
   303:   // the idea is that 'emitConstructionCode' will emit code that
   304:   // defines a subclass of 'ParseTables'; that's why so many of the
   305:   // data members are protected: the subclass can then access them
   306:   // directly, which is very convenient when trying to construct the
   307:   // tables from static data
   308:   ParseTables(bool owning);    // only legal when owning==false
   309: 
   310: public:     // funcs
   311:   ParseTables(int numTerms, int numNonterms, int numStates, int numProds,
   312:               StateId start, int finalProd);
   313:   ~ParseTables();
   314: 
   315:   // simple queries
   316:   int getNumTerms() const { return numTerms; }
   317:   int getNumNonterms() const { return numNonterms; }
   318:   int getNumStates() const { return numStates; }
   319:   int getNumProds() const { return numProds; }
   320: 
   321:   // finish construction; do this before emitting code
   322:   void finishTables();
   323: 
   324:   // write the tables out as C++ source that can be compiled into
   325:   // the program that will ultimately do the parsing
   326:   void emitConstructionCode(EmitCode &out, char const *className, char const *funcName);
   327: 
   328:   // this does the same thing for ML, and is implemented in genml.cc
   329:   void emitMLConstructionCode(EmitCode &out, char const *className, char const *funcName);
   330: 
   331: 
   332:   // -------------------- table construction ------------------------
   333:   // CRS dest-state origin tables
   334:   void setFirstWithTerminal(int termId, StateId s) {
   335:     xassert((unsigned)termId < (unsigned)numTerms);
   336:     firstWithTerminal[termId] = s;
   337:   }
   338:   void setFirstWithNonterminal(int nontermId, StateId s) {
   339:     xassert((unsigned)nontermId < (unsigned)numNonterms);
   340:     firstWithNonterminal[nontermId] = s;
   341:   }
   342: 
   343:   void setActionEntry(StateId stateId, int termId, ActionEntry act)
   344:     { actionEntry(stateId, termId) = act; }
   345:   void setGotoEntry(StateId stateId, int nontermId, GotoEntry got)
   346:     { gotoEntry(stateId, nontermId) = got; }
   347: 
   348:   // encode actions
   349:   ActionEntry encodeShift(StateId destState, int shiftedTermId);
   350:   ActionEntry encodeReduce(int prodId, StateId inWhatState);
   351:   ActionEntry encodeAmbig(ArrayStack<ActionEntry> const &set,
   352:                           StateId inWhatState);
   353:   ActionEntry encodeError() const;
   354:   ActionEntry validateAction(int code) const;
   355: 
   356:   // encode gotos
   357:   GotoEntry encodeGoto(StateId stateId, int shiftedNontermId) const;
   358:   GotoEntry encodeGotoError() const
   359:     { return errorGotoEntry; }
   360:   GotoEntry validateGoto(int code) const;
   361: 
   362:   // misc
   363:   void setProdInfo(int prodId, int rhsLen, int ntIndex) {
   364:     checkAssign(prodInfo[prodId].rhsLen, rhsLen);
   365:     checkAssign(prodInfo[prodId].lhsIndex, ntIndex);
   366:   }
   367:   void setStateSymbol(StateId state, SymbolId sym) {
   368:     stateSymbol[state] = sym;
   369:   }
   370:   NtIndex *getWritableNontermOrder() {
   371:     // expose this directly, due to the way the algorithm that
   372:     // computes it is written
   373:     return nontermOrder;
   374:   }
   375: 
   376:   // table compressors
   377:   void computeErrorBits();
   378:   void mergeActionColumns();
   379:   void mergeActionRows();
   380:   void mergeGotoColumns();
   381:   void mergeGotoRows();
   382: 
   383: 
   384:   // -------------------- table queries ---------------------------
   385:   // return true if the action is an error
   386:   bool actionEntryIsError(StateId stateId, int termId) {
   387:     #if ENABLE_EEF_COMPRESSION
   388:       // check with the error table
   389:       return ( errorBitsPointers[stateId][termId >> 3]
   390:                  >> (termId & 7) ) & 1;
   391:     #else
   392:       return isErrorAction(actionEntry(stateId, termId));
   393:     #endif
   394:   }
   395: 
   396:   // query action table, without checking the error bitmap
   397:   ActionEntry getActionEntry_noError(StateId stateId, int termId) {
   398:     #if ENABLE_GCS_COMPRESSION
   399:       #if ENABLE_GCS_COLUMN_COMPRESSION
   400:         return actionRowPointers[stateId][actionIndexMap[termId]];
   401:       #else
   402:         return actionRowPointers[stateId][termId];
   403:       #endif
   404:     #else
   405:       return actionEntry(stateId, termId);
   406:     #endif
   407:   }
   408: 
   409:   // query the action table, yielding an action that might be
   410:   // an error action
   411:   ActionEntry getActionEntry(StateId stateId, int termId) {
   412:     #if ENABLE_EEF_COMPRESSION
   413:       if (actionEntryIsError(stateId, termId)) {
   414:         return errorActionEntry;
   415:       }
   416:     #endif
   417: 
   418:     return getActionEntry_noError(stateId, termId);
   419:   }
   420: 
   421:   // decode actions
   422:   #if !ENABLE_CRS_COMPRESSION
   423:     bool isShiftAction(ActionEntry code) const
   424:       { return code > 0 && code <= numStates; }
   425:     static StateId decodeShift(ActionEntry code, int /*shiftedTerminal*/)
   426:       { return (StateId)(code-1); }
   427:     static bool isReduceAction(ActionEntry code)
   428:       { return code < 0; }
   429:     static int decodeReduce(ActionEntry code, StateId /*inState*/)
   430:       { return -(code+1); }
   431:     static bool isErrorAction(ActionEntry code)
   432:       { return code == 0; }
   433: 
   434:     // ambigAction is only other choice; this yields a pointer to
   435:     // an array of actions, the first of which says how many actions
   436:     // there are
   437:     ActionEntry *decodeAmbigAction(ActionEntry code, StateId /*inState*/) const
   438:       { return ambigTable + (code-1-numStates); }
   439: 
   440:   #else
   441:     static bool isShiftAction(ActionEntry code) {
   442:       return (code & AE_MASK) == AE_SHIFT;
   443:     }
   444:     StateId decodeShift(ActionEntry code, int shiftedTerminal) {
   445:       return (StateId)(firstWithTerminal[shiftedTerminal] + (code & AE_MAXINDEX));
   446:     }
   447:     static bool isReduceAction(ActionEntry code) {
   448:       return (code & AE_MASK) == AE_REDUCE;
   449:     }
   450:     int decodeReduce(ActionEntry code, StateId inState) {
   451:       return productionsForState[inState][code & AE_MAXINDEX];
   452:     }
   453:     static bool isErrorAction(ActionEntry code) {
   454:       return code == AE_ERROR;
   455:     }
   456: 
   457:     ActionEntry *decodeAmbigAction(ActionEntry code, StateId inState) const {
   458:       return ambigStateTable[inState] + (code & AE_MAXINDEX);
   459:     }
   460:   #endif
   461: 
   462:   // decode gotos
   463:   GotoEntry getGotoEntry(StateId stateId, int nontermId) {
   464:     #if ENABLE_GCS_COMPRESSION
   465:       #if ENABLE_GCS_COLUMN_COMPRESSION
   466:         return gotoRowPointers[stateId][gotoIndexMap[nontermId]];
   467:       #else
   468:         return gotoRowPointers[stateId][nontermId];
   469:       #endif
   470:     #else
   471:       return gotoEntry(stateId, nontermId);
   472:     #endif
   473:   }
   474: 
   475:   bool isErrorGoto(GotoEntry code)
   476:     { return code == errorGotoEntry; }
   477: 
   478:   StateId decodeGoto(GotoEntry code, int shiftedNonterminal) {
   479:     #if ENABLE_CRS_COMPRESSION
   480:       return (StateId)(firstWithNonterminal[shiftedNonterminal] + code);
   481:     #else
   482:       return (StateId)code;
   483:     #endif
   484:   }
   485: 
   486:   // nonterminal order
   487:   int nontermOrderSize() const
   488:     { return numNonterms; }
   489:   NtIndex getNontermOrdinal(NtIndex idx) const
   490:     { return nontermOrder[idx]; }
   491: 
   492:   // misc
   493:   ProdInfo const &getProdInfo(int prodIndex) const
   494:     { return prodInfo[prodIndex]; }
   495:   int getStateSymbol(StateId id) const
   496:     { return stateSymbol[id]; }
   497: 
   498:   // query compression options based on which fields are not NULL; do
   499:   // *not* use the compile-time flags, because we're trying to detect
   500:   // mismatch between compiler flags used at different times
   501:   bool eef_enabled() const
   502:     { return !!errorBits; }
   503:   bool gcs_enabled() const
   504:     { return !!actionRowPointers; }
   505:   bool gcsc_enabled() const
   506:     { return !!actionIndexMap; }
   507:   bool crs_enabled() const
   508:     { return !!firstWithTerminal; }
   509: };
   510: 
   511: 
   512: // NOTE: At one point (before 7/27/03), I had the ability to read and
   513: // write parse tables to files, *not* using the C++ compiler to store
   514: // tables as static data.  I removed it because I wasn't using it, and
   515: // it was hindering table evolution.  But as the tables stabilize
   516: // again, if the need arises, one could go get (from CVS) the code
   517: // that did it and fix it up to work again.
   518: 
   519: 
   520: #endif // PARSETABLES_H
End C section to elk/elk_parsetables.h[1]
Start C section to elk/elk_ptreeact.h[1 /1 ]
     1: #line 5105 "./lpsrc/elk.pak"
     2: // ptreeact.h            see license.txt for copyright and terms of use
     3: // a generic set of user actions that build parse trees for any grammar
     4: 
     5: #ifndef PTREEACT_H
     6: #define PTREEACT_H
     7: 
     8: #include "elk_lexerint.h"
     9: #include "elk_useract.h"
    10: 
    11: class ParseTables;         // parsetables.h
    12: 
    13: 
    14: // lexer to yield PTreeNodes for tokens
    15: class ParseTreeLexer : public LexerInterface {
    16: private:
    17:   LexerInterface *underlying;   // for getting token descriptions
    18:   NextTokenFunc underToken;     // for getting tokens
    19:   UserActions *actions;         // for getting symbol names
    20: 
    21: private:
    22:   void copyFields();
    23: 
    24: public:
    25:   ParseTreeLexer(LexerInterface *u, UserActions *a);
    26: 
    27:   static void nextToken(LexerInterface *lex);
    28:   virtual NextTokenFunc getTokenFunc() const
    29:     { return &ParseTreeLexer::nextToken; }
    30: 
    31:   virtual sm_string tokenDesc() const;
    32:   virtual sm_string tokenKindDesc(int kind) const;
    33: };
    34: 
    35: 
    36: // layer these actions on top of the generated actions to
    37: // build parse trees for the reductions
    38: class ParseTreeActions : public TrivialUserActions {
    39: private:
    40:   UserActions *underlying;   // for getting symbol names
    41:   ParseTables *tables;       // for finding out production lengths
    42: 
    43: public:
    44:   ParseTreeActions(UserActions *u, ParseTables *t)
    45:     : underlying(u), tables(t) {}
    46: 
    47:   static SemanticValue reduce(
    48:     UserActions *context,
    49:     int productionId,
    50:     SemanticValue const *svals
    51:     SOURCELOCARG( SourceLoc loc ) );
    52:   virtual ReductionActionFunc getReductionAction()
    53:     { return &ParseTreeActions::reduce; }
    54: 
    55:   virtual SemanticValue mergeAlternativeParses(
    56:     int ntIndex, SemanticValue left, SemanticValue right
    57:     SOURCELOCARG( SourceLoc loc ) );
    58: 
    59:   virtual char const *terminalName(int termId);
    60:   virtual char const *nonterminalName(int termId);
    61: 
    62:   ParseTables *getTables() { return tables; }
    63: };
    64: 
    65: 
    66: #endif // PTREEACT_H
End C section to elk/elk_ptreeact.h[1]
Start C section to elk/elk_ptreenode.h[1 /1 ]
     1: #line 5172 "./lpsrc/elk.pak"
     2: // ptreenode.h            see license.txt for copyright and terms of use
     3: // parse tree node for experimental grammars (this isn't somthing
     4: // Elkhound as a whole knows about--it doesn't make trees unless
     5: // the user actions do)
     6: 
     7: #ifndef PTREENODE_H
     8: #define PTREENODE_H
     9: 
    10: #include <stddef.h>     // NULL
    11: #include <iostream>   // std::ostream
    12: 
    13: // for storing counts of parse trees; I try to make the code work for
    14: // either 'int' or 'double' in this spot (e.g. I assign 0 to it
    15: // instead of 0.0), even though 'int' overflows quickly for the highly
    16: // ambiguous grammars
    17: typedef double TreeCount;
    18: 
    19: class PTreeNode {
    20: public:    // types
    21:   // max # of children (when this is increased, more constructors
    22:   // for PTreeNode should be added)
    23:   enum { MAXCHILDREN = 10 };
    24: 
    25:   // printing options
    26:   enum PrintFlags {
    27:     PF_NONE    = 0,       // default, print types as-is
    28:     PF_EXPAND  = 1,       // types are just LHS, dig down to find RHSs
    29:     PF_ADDRS   = 2,       // print node virtual addresses to see sharing
    30:   };
    31: 
    32: public:    // data
    33:   // textual repr. of the production applied; possibly useful for
    34:   // printing the tree, or during debugging
    35:   char const *type;
    36: 
    37:   // instead of making explicit merge nodes (which runs afoul of the
    38:   // yield-then-merge problem), just link alternatives together using
    39:   // this link; this is NULL when there are no alternatives, or for
    40:   // the last node in a list of alts
    41:   PTreeNode *merged;
    42: 
    43:   // array of children; these aren't owner pointers because
    44:   // we might have arbitrary sharing for some grammars
    45:   int numChildren;
    46:   PTreeNode *children[MAXCHILDREN];
    47: 
    48:   // # of parse trees of which this is the root; effectively this
    49:   // memoizes the result to avoid an exponential blowup counting
    50:   // the trees; when this value is 0, it means the count has not
    51:   // yet been computed (any count must be positive)
    52:   TreeCount count;
    53: 
    54:   // count of # of allocated nodes; useful for identifying when
    55:   // we're making too many
    56:   static int allocCount;
    57: 
    58:   // count # of times addAlternative is called; this will tell
    59:   // the total number of local ambiguities that need to be resolved
    60:   static int alternativeCount;
    61: 
    62: private:     // funcs
    63:   // init fields which don't depend on ctor args
    64:   void init();
    65: 
    66:   // helpers
    67:   static void indent(std::ostream &out, int n);
    68:   void innerPrintTree(std::ostream &out, int indentation, PrintFlags pf) const;
    69:   int countMergedList() const;
    70: 
    71: public:      // funcs
    72:   // now lots of constructors so we have one for each possible
    73:   // number of children; the calls are automatically inserted
    74:   // by a perl script ('make-trivparser.pl') or by the grammar
    75:   // transformation GrammarAnalysis::addTreebuildingActions()
    76:   PTreeNode(char const *t)
    77:     : type(t), numChildren(0), count(0) { init(); }
    78:   PTreeNode(char const *t, PTreeNode *ch0)
    79:     : type(t), numChildren(1), count(0) { init(); children[0] = ch0; }
    80:   PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1)
    81:     : type(t), numChildren(2), count(0) { init(); children[0] = ch0; children[1] = ch1; }
    82:   PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2)
    83:     : type(t), numChildren(3), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; }
    84:   PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2, PTreeNode *ch3)
    85:     : type(t), numChildren(4), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; children[3] = ch3; }
    86:   PTreeNode(char const *t, PTreeNode *ch0, PTreeNode *ch1, PTreeNode *ch2, PTreeNode *ch3, PTreeNode *ch4)
    87:     : type(t), numChildren(5), count(0) { init(); children[0] = ch0; children[1] = ch1; children[2] = ch2; children[3] = ch3; children[4] = ch4; }
    88:   // be sure to update MAXCHILDREN, above, if you add constructors
    89:   // which accept more children
    90: 
    91:   ~PTreeNode() { allocCount--; }
    92: 
    93:   // count the number of trees encoded (taking merge nodes into
    94:   // account) in the tree rooted at 'this'
    95:   TreeCount countTrees();
    96: 
    97:   // print the entire parse forest using indentation to represent
    98:   // nesting, and duplicating printing of shared subtrees within
    99:   // ambiguous regions
   100:   void printTree(std::ostream &out, PrintFlags pf = PF_NONE) const;
   101: 
   102:   // add an alternative to the current 'merged' list
   103:   void addAlternative(PTreeNode *alt);
   104: };
   105: 
   106: #endif // PTREENODE_H
End C section to elk/elk_ptreenode.h[1]
Start C section to elk/elk_rcptr.h[1 /1 ]
     1: #line 5279 "./lpsrc/elk.pak"
     2: // rcptr.h            see license.txt for copyright and terms of use
     3: // a stab at a reference-counting pointer
     4: 
     5: // the object pointed-at must support this interface:
     6: //   // increment reference count
     7: //   void incRefCt();
     8: //
     9: //   // decrement refcount, and if it becomes 0, delete yourself
    10: //   void decRefCt();
    11: 
    12: #ifndef __RCPTR_H
    13: #define __RCPTR_H
    14: 
    15: #include "sm_typ.h"
    16: 
    17: #if 0
    18:   #include <stdio.h>    // printf, temporary
    19:   #define DBG(fn) printf("%s(%p)\n", fn, ptr)
    20: #else
    21:   #define DBG(fn)
    22: #endif
    23: 
    24: template <class T>
    25: class RCPtr {
    26: private:    // data
    27:   T *ptr;                // the real pointer
    28: 
    29: private:    // funcs
    30:   void inc() { DBG("inc"); if (ptr) { ptr->incRefCt(); } }
    31:   void dec() { DBG("dec"); if (ptr) { ptr->decRefCt(); ptr=NULL; } }
    32: 
    33: public:     // funcs
    34:   explicit RCPtr(T *p = NULL) : ptr(p) { DBG("ctor"); inc(); }
    35:   explicit RCPtr(RCPtr const &obj) : ptr(obj.ptr) { DBG("cctor"); inc(); }
    36:   ~RCPtr() { DBG("dtor"); dec(); }
    37: 
    38:   // point at something new (setting to NULL is an option)
    39:   void operator= (T *p) { DBG("op=ptr"); dec(); ptr=p; inc(); }
    40:   void operator= (RCPtr<T> const &obj)
    41:     { DBG("op=obj"); dec(); ptr=obj.ptr; inc(); }
    42: 
    43:   // some operators that make Owner behave more or less like
    44:   // a native C++ pointer
    45:   operator T const * () const { DBG("opcT*"); return ptr; }
    46:   T const & operator* () const { DBG("opc*"); return *ptr; }
    47:   T const * operator-> () const { DBG("opc->"); return ptr; }
    48: 
    49:   bool operator==(T *p) const { return ptr == p; }
    50:   bool operator!=(T *p) const { return !this->operator==(p); }
    51: 
    52:   bool operator==(RCPtr<T> const &obj) const { return ptr == obj.ptr; }
    53:   bool operator!=(RCPtr<T> const &obj) const { return !this->operator==(obj); }
    54: 
    55:   operator T* () { DBG("opT*"); return ptr; }
    56:   operator T const * () { DBG("opcT*"); return ptr; }
    57:   T& operator* () { DBG("op*"); return *ptr; }
    58:   T* operator-> () { DBG("op->"); return ptr; }
    59: 
    60:   // escape hatch for when operators flake out on us
    61:   T *get() { DBG("get"); return ptr; }
    62:   T const *getC() const { DBG("getC"); return ptr; }
    63: 
    64:   // sometimes, in performance-critical code, I need fine control
    65:   // over the refcount operations; this lets me change 'ptr', the
    66:   // assumption being I'll update the refct manually
    67:   void setWithoutUpdateRefct(T *p) { ptr=p; }
    68: };
    69: 
    70: 
    71: #endif // __RCPTR_H
End C section to elk/elk_rcptr.h[1]
Start C section to elk/elk_useract.h[1 /1 ]
     1: #line 5351 "./lpsrc/elk.pak"
     2: // useract.h            see license.txt for copyright and terms of use
     3: // interface to an object containing user-defined action functions
     4: 
     5: // the code appears in the .cc file generated by 'gramanl' from
     6: // an associated .gr file
     7: 
     8: // the comments below are guidelines on writing grammar actions, since
     9: // those grammar actions are composed to form the single-entry
    10: // functions documented below
    11: 
    12: #ifndef USERACT_H
    13: #define USERACT_H
    14: 
    15: #include "elk_glrconfig.h"
    16: #include "sm_str.h"
    17: #include "sm_srcloc.h"
    18: 
    19: class ParseTables;         // parsetables.h
    20: class ELK_EXTERN UserActions;
    21: 
    22: // user-supplied semantic values:
    23: //  - Semantic values are an arbitrary word, that the user can then
    24: //    use as a pointer or an integer or whatever.  The parser
    25: //    generator inserts the appropriate casts, so the actual type
    26: //    I use here shouldn't ever be visible to the user.
    27: //  - Usually, SemanticValues that are used as pointers are considered
    28: //    to be owner pointers, but only in the sense that del() will be
    29: //    called.  It's up to the user to decide if del() actually does
    30: //    anything.
    31: typedef unsigned long SemanticValue;
    32: 
    33: // name of a null sval; can't use "NULL" because of __null weirdness in gcc-3...
    34: #define NULL_SVAL 0
    35: 
    36: 
    37: // package of functions; the user will create an instance of a class
    38: // derived from this, and the parser will carry it along to invoke
    39: // the various action functions
    40: class ELK_EXTERN UserActions {
    41: public:
    42:   // allow abstract user to delete
    43:   virtual ~UserActions();
    44: 
    45:   // user-supplied reduction actions
    46:   //  - production 'id' is being used to reduce
    47:   //  - 'svals' contains an array of semantic values yielded by the RHS
    48:   //    symbols, such that the 0th element is the leftmost RHS element;
    49:   //    the pointers in the array are owner pointers (the array ptr itself
    50:   //    is a serf)
    51:   //  - 'loc' is the location of the left edge of the parse subtree
    52:   //  - this fn returns the semantic value for the reduction; this return
    53:   //    value is an owner pointer
    54:   typedef SemanticValue (*ReductionActionFunc)(
    55:     UserActions *context,         // parser context class object
    56:     int productionId,             // production being used to reduce
    57:     SemanticValue const *svals    // array of semantic values
    58:     SOURCELOCARG( SourceLoc loc ) );
    59: 
    60:   // get the actual function; two-step to avoid virtual call in inner loop
    61:   virtual ReductionActionFunc getReductionAction()=0;
    62: 
    63:   // duplication of semantic values:
    64:   //  - the given 'sval' is about to be passed to a reduction action
    65:   //    function.  the user must return a value to be stored in place
    66:   //    of the old one, in case it is needed to pass to another action
    67:   //    function in case of local ambiguity; 'sval' is a serf
    68:   //  - the return value will be yielded (if necessary) to the next
    69:   //    consumer action function, and is an owner ptr
    70:   //  - some possible strategies:
    71:   //    - return NULL, in which case it is probably an error for the
    72:   //      value to be passed to another action (i.e. the grammar needs
    73:   //      to be LALR(1) near this semantic value); in this case, 'del'
    74:   //      will not be called on the NULL value
    75:   //    - increment a reference count and return 'sval'
    76:   //    - do nothing, and rely on some higher-level allocation scheme
    77:   //      such as full GC, or regions
    78:   virtual SemanticValue duplicateTerminalValue(
    79:     int termId, SemanticValue sval)=0;
    80:   virtual SemanticValue duplicateNontermValue(
    81:     int nontermId, SemanticValue sval)=0;
    82: 
    83:   // a semantic value didn't get passed to an action function, either
    84:   // because it was never used at all (e.g. a semantic value for a
    85:   // punctuator token, which the user can simply ignore), or because we
    86:   // duplicated it in anticipation of a possible local ambiguity, but
    87:   // then that parse turned out not to happen, so we're cancelling
    88:   // the dup now; 'sval' is an owner pointer
    89:   virtual void deallocateTerminalValue(int termId, SemanticValue sval)=0;
    90:   virtual void deallocateNontermValue(int nontermId, SemanticValue sval)=0;
    91: 
    92:   // this is called when there are two interpretations for the same
    93:   // sequence of ground terminals, culminating in two different reductions
    94:   // deriving the same left-hand-side nonterminal (identified by 'ntIndex');
    95:   // it should return a value to be used in the place where they conflict'
    96:   // both 'left' and 'right' are owner pointers, and the return value
    97:   // is also an owner pointer
    98:   //
    99:   // NOTE: the 'left' value is always the node which came first, and
   100:   // might even have been yielded to another reduction already
   101:   // (depending on the grammar), whereas the 'right' value is always a
   102:   // node which was just created, and has definitely *not* been
   103:   // yielded to anything (this fact is critical to solving the general
   104:   // yield-then-merge problem)
   105:   virtual SemanticValue mergeAlternativeParses(
   106:     int ntIndex, SemanticValue left, SemanticValue right
   107:     SOURCELOCARG( SourceLoc loc )
   108:   )=0;
   109: 
   110:   // after every reduction, the semantic value is passed to this function,
   111:   // which returns 'false' if the reduction should be cancelled; if it
   112:   // does return false, then 'sval' is an owner pointer (the parser engine
   113:   // will drop the value on the floor)
   114:   virtual bool keepNontermValue(int nontermId, SemanticValue sval)=0;
   115: 
   116:   // every time a token is pulled from the lexer, this reclassifier is
   117:   // used to give the user a chance to reinterpret the token, before it
   118:   // is used for reduction lookahead comparisons; it returns the
   119:   // reclassified token type, or 'oldTokenType' to leave it unchanged
   120:   typedef int (*ReclassifyFunc)(UserActions *ths, int oldTokenType, SemanticValue sval);
   121: 
   122:   // get the reclassifier
   123:   virtual ReclassifyFunc getReclassifier()=0;
   124: 
   125:   // descriptions of symbols with their semantic values; this is useful
   126:   // for the ACTION_TRACE function of the parser
   127:   virtual sm_string terminalDescription(int termId, SemanticValue sval)=0;
   128:   virtual sm_string nonterminalDescription(int nontermId, SemanticValue sval)=0;
   129: 
   130:   // get static names for all of the symbols
   131:   virtual char const *terminalName(int termId)=0;
   132:   virtual char const *nonterminalName(int termId)=0;
   133: 
   134:   // get the parse tables for this grammar; the default action
   135:   // complains that no tables are defined
   136:   virtual ParseTables *makeTables();
   137: };
   138: 
   139: 
   140: // for derived classes, the list of functions to be declared
   141: // (this macro is used by the generated code)
   142: #define USER_ACTION_FUNCTIONS                                          \
   143:   virtual ReductionActionFunc getReductionAction();                    \
   144:                                                                        \
   145:   virtual SemanticValue duplicateTerminalValue(                        \
   146:     int termId, SemanticValue sval);                                   \
   147:   virtual SemanticValue duplicateNontermValue(                         \
   148:     int nontermId, SemanticValue sval);                                \
   149:                                                                        \
   150:   virtual void deallocateTerminalValue(                                \
   151:     int termId, SemanticValue sval);                                   \
   152:   virtual void deallocateNontermValue(                                 \
   153:     int nontermId, SemanticValue sval);                                \
   154:                                                                        \
   155:   virtual SemanticValue mergeAlternativeParses(                        \
   156:     int ntIndex, SemanticValue left, SemanticValue right               \
   157:     SOURCELOCARG( SourceLoc loc )                                      \
   158:   );                                                                   \
   159:                                                                        \
   160:   virtual bool keepNontermValue(int nontermId, SemanticValue sval);    \
   161:                                                                        \
   162:   virtual ReclassifyFunc getReclassifier();                            \
   163:                                                                        \
   164:   virtual sm_string terminalDescription(int termId, SemanticValue sval);  \
   165:   virtual sm_string nonterminalDescription(int nontermId, SemanticValue sval);  \
   166:                                                                        \
   167:   virtual char const *terminalName(int termId);                        \
   168:   virtual char const *nonterminalName(int termId);
   169: 
   170: 
   171: // a useraction class which has only trivial actions
   172: class TrivialUserActions : public UserActions {
   173: public:
   174:   USER_ACTION_FUNCTIONS
   175: 
   176:   static SemanticValue doReductionAction(
   177:     UserActions *ths,
   178:     int productionId, SemanticValue const *svals
   179:     SOURCELOCARG( SourceLoc loc ) );
   180: 
   181:   static int reclassifyToken(UserActions *ths,
   182:     int oldTokenType, SemanticValue sval);
   183: };
   184: 
   185: 
   186: #endif // USERACT_H
End C section to elk/elk_useract.h[1]
Start C section to elk/elk_util.h[1 /1 ]
     1: #line 5538 "./lpsrc/elk.pak"
     2: // util.h            see license.txt for copyright and terms of use
     3: // collection of utility macros and functions that are
     4: // candidates for adding to the smbase library
     5: 
     6: #ifndef __UTIL_H
     7: #define __UTIL_H
     8: 
     9: #include "sm_trace.h"
    10: 
    11: // given a method called 'print', define an operator to use it
    12: #define OSTREAM_OPERATOR(MyClass)                                \
    13:   friend std::ostream &operator << (std::ostream &os, MyClass const &ths)  \
    14:     { ths.print(os); return os; }
    15: 
    16: 
    17: // I'm experimenting with the idea of making my control structures
    18: // more declarative
    19: #define INTLOOP(var, start, maxPlusOne) \
    20:   for (int var = start; var < maxPlusOne; var++)
    21: 
    22: 
    23: // experiment: given (a reference to), an owner pointer, yield the pointer
    24: // value after nullifying the given pointer
    25: template <class T>
    26: inline T *transferOwnership(T *&ptr)
    27: {
    28:   T *ret = ptr;
    29:   ptr = NULL;
    30:   return ret;
    31: }
    32: 
    33: 
    34: // print a value under the debug trace (name: Trace VALue)
    35: #define TVAL(expr) \
    36:   trace("debug") << #expr ": " << (expr) << std::endl
    37: 
    38: 
    39: #endif // __UTIL_H
End C section to elk/elk_util.h[1]
Start cpp section to elk/elk_asockind.cpp[1 /1 ]
     1: #line 5578 "./lpsrc/elk.pak"
     2: // asockind.cc            see license.txt for copyright and terms of use
     3: // code for asockind.h
     4: 
     5: #include "elk_asockind.h"
     6: #include "sm_xassert.h"
     7: 
     8: sm_string toString(AssocKind k)
     9: {
    10:   static char const * const arr[NUM_ASSOC_KINDS] = {
    11:     "AK_LEFT", "AK_RIGHT", "AK_NONASSOC"
    12:   };
    13:   xassert((unsigned)k < NUM_ASSOC_KINDS);
    14:   return sm_string(arr[k]);
    15: }
    16: 
    17: 
End cpp section to elk/elk_asockind.cpp[1]
Start cpp section to elk/elk_emitcode.cpp[1 /1 ]
     1: #line 5596 "./lpsrc/elk.pak"
     2: // emitcode.cc            see license.txt for copyright and terms of use
     3: // code for emitcode.h
     4: 
     5: #include "elk_emitcode.h"
     6: #include "sm_syserr.h"
     7: #include "sm_srcloc.h"
     8: #include "sm_trace.h"
     9: 
    10: EmitCode::EmitCode(char const *f)
    11:   : sm_stringBuilder(),
    12:     os(f),
    13:     fname(f),
    14:     line(1)
    15: {
    16:   if (!os) {
    17:     xsyserror("open", fname);
    18:   }
    19: }
    20: 
    21: EmitCode::~EmitCode()
    22: {
    23:   flush();
    24: }
    25: 
    26: 
    27: int EmitCode::getLine()
    28: {
    29:   flush();
    30:   return line;
    31: }
    32: 
    33: 
    34: void EmitCode::flush()
    35: {
    36:   // count newlines
    37:   char const *p = pcharc();
    38:   while (*p) {
    39:     if (*p == '\n') {
    40:       line++;
    41:     }
    42:     p++;
    43:   }
    44: 
    45:   os << *this;
    46:   setlength(0);
    47: }
    48: 
    49: 
    50: char const *hashLine()
    51: {
    52:   if (tracingSys("nolines")) {
    53:     // emit with comment to disable its effect
    54:     return "// #line ";
    55:   }
    56:   else {
    57:     return "#line ";
    58:   }
    59: }
    60: 
    61: 
    62: // note that #line must be preceeded by a newline
    63: sm_string lineDirective(SourceLoc loc)
    64: {
    65:   char const *fname;
    66:   int line, col;
    67:   sourceLocManager->decodeLineCol(loc, fname, line, col);
    68: 
    69:   return sm_stringc << hashLine() << line << " \"" << fname << "\"\n";
    70: }
    71: 
    72: sm_stringBuilder &restoreLine(sm_stringBuilder &sb)
    73: {
    74:   // little hack..
    75:   EmitCode &os = (EmitCode&)sb;
    76: 
    77:   // +1 because we specify what line will be *next*
    78:   int line = os.getLine()+1;
    79:   return os << hashLine() << line
    80:             << " \"" << os.getFname() << "\"\n";
    81: }
End cpp section to elk/elk_emitcode.cpp[1]
Start cpp section to elk/elk_genml.cpp[1 /1 ]
     1: #line 5678 "./lpsrc/elk.pak"
     2: // genml.cc            see license.txt for copyright and terms of use
     3: // code for genml.h
     4: // first half based on 'emitActionCode' and friends from gramanl.cc
     5: // second half based on 'emitConstructionCode' from parsetables.cc
     6: 
     7: #include "elk_genml.h"
     8: #include "elk_gramanl.h"
     9: #include "elk_emitcode.h"
    10: #include "elk_parsetables.h"
    11: #include "sm_exc.h"
    12: #include "sm_strutil.h"
    13: 
    14: 
    15: // NOTE: The as following code is largely copied from elsewhere,
    16: // including comments, the comments may be in some places not
    17: // perfectly in correspondence with the code.
    18: 
    19: 
    20: 
    21: // prototypes for this section; some of them accept Grammar simply
    22: // because that's all they need; there's no problem upgrading them
    23: // to GrammarAnalysis
    24: void emitMLDescriptions(GrammarAnalysis const &g, EmitCode &out);
    25: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
    26:                       char const *mlFname, char const *srcFname);
    27: void emitMLUserCode(EmitCode &out, LocString const &code, bool braces = true);
    28: void emitMLActions(Grammar const &g, EmitCode &out, EmitCode &dcl);
    29: void emitMLDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl);
    30: void emitMLFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
    31:                     char const *rettype, char const *params);
    32: void emitMLDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
    33:                       Symbol const &sym);
    34: void emitMLSwitchCode(Grammar const &g, EmitCode &out,
    35:                       char const *signature, char const *switchVar,
    36:                       ObjList<Symbol> const &syms, int whichFunc,
    37:                       char const *templateCode, char const *actUpon);
    38: 
    39: 
    40: // ------------- first half: action emission ----------------
    41: #if 0   // not needed
    42: // yield the name of the inline function for this production; naming
    43: // design motivated by desire to make debugging easier
    44: sm_string actionFuncName(Production const &prod)
    45: {
    46:   return sm_stringc << "action" << prod.prodIndex
    47:                  << "_" << prod.left->name;
    48: }
    49: #endif // 0
    50: 
    51: 
    52: // emit the user's action code to a file
    53: void emitMLActionCode(GrammarAnalysis const &g, char const *mliFname,
    54:                       char const *mlFname, char const *srcFname)
    55: {
    56:   EmitCode dcl(mliFname);
    57:   if (!dcl) {
    58:     throw_XOpen(mliFname);
    59:   }
    60: 
    61:   // prologue
    62:   dcl << "(* " << mliFname << " *)\n"
    63:       << "(* *** DO NOT EDIT BY HAND *** *)\n"
    64:       << "(* automatically generated by elkhound, from " << srcFname << " *)\n"
    65:       << "\n"
    66:       ;
    67: 
    68:   // insert the stand-alone verbatim sections
    69:   {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
    70:     emitMLUserCode(dcl, *(iter.data()), false /*braces*/);
    71:   }}
    72: 
    73:   #if 0    // not implemented
    74:   // insert each of the context class definitions; the last one
    75:   // is the one whose name is 'g.actionClassName' and into which
    76:   // the action functions are inserted as methods
    77:   {
    78:     int ct=0;
    79:     FOREACH_OBJLIST(LocString, g.actionClasses, iter) {
    80:       if (ct++ > 0) {
    81:         // end the previous class; the following body will open
    82:         // another one, and the brace following the action list
    83:         // will close the last one
    84:         dcl << "};\n";
    85:       }
    86: 
    87:       dcl << "\n"
    88:           << "// parser context class\n"
    89:           << "class ";
    90:       emitUserCode(dcl, *(iter.data()), false /*braces*/);
    91:   }}
    92: 
    93:   // we end the context class with declarations of the action functions
    94:   dcl << "\n"
    95:       << "private:\n"
    96:       << "  USER_ACTION_FUNCTIONS      // see useract.h\n"
    97:       << "\n"
    98:       << "  // declare the actual action function\n"
    99:       << "  static SemanticValue doReductionAction(\n"
   100:       << "    " << g.actionClassName << " *ths,\n"
   101:       << "    int productionId, SemanticValue const *semanticValues"
   102:          SOURCELOC( << ",\n  SourceLoc loc" )
   103:       << ");\n"
   104:       << "\n"
   105:       << "  // declare the classifier function\n"
   106:       << "  static int reclassifyToken(\n"
   107:       << "    " << g.actionClassName << " *ths,\n"
   108:       << "    int oldTokenType, SemanticValue sval);\n"
   109:       << "\n"
   110:       ;
   111:   #endif // 0
   112: 
   113:   // all that goes into the interface is the name of the
   114:   // tUserActions and tParseTables objects
   115:   dcl << "val " << g.actionClassName << "ParseTables: Parsetables.tParseTables\n";
   116:   dcl << "val " << g.actionClassName << "UserActions: Useract.tUserActions\n";
   117: 
   118:   EmitCode out(mlFname);
   119:   if (!out) {
   120:     throw_XOpen(mlFname);
   121:   }
   122: 
   123:   out << "(* " << mlFname << " *)\n";
   124:   out << "(* *** DO NOT EDIT BY HAND *** *)\n";
   125:   out << "(* automatically generated by gramanl, from " << srcFname << " *)\n";
   126:   out << "\n"
   127:       << "open Useract      (* tSemanticValue *)\n"
   128:       << "open Parsetables  (* tParseTables *)\n"
   129:       << "\n"
   130:       << "\n"
   131:       ;
   132: 
   133:   // stand-alone verbatim sections go into .ml file *also*
   134:   {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
   135:     emitMLUserCode(out, *(iter.data()), false /*braces*/);
   136:   }}
   137: 
   138:   #if 0   // not implemented and/or not needed
   139:     #ifdef NO_GLR_SOURCELOC
   140:       // we need to make sure the USER_ACTION_FUNCTIONS use
   141:       // the declarations consistent with how we're printing
   142:       // the definitions
   143:       out << "#ifndef NO_GLR_SOURCELOC\n";
   144:       out << "  #define NO_GLR_SOURCELOC\n";
   145:       out << "#endif\n";
   146:     #else
   147:       out << "// GLR source location information is enabled\n";
   148:     #endif
   149:     out << "\n";
   150:     out << "#include \"" << hFname << "\"     // " << g.actionClassName << "\n";
   151:     out << "#include \"elk_parsetables.h\" // ParseTables\n";
   152:     out << "#include \"sm_srcloc.h\"      // SourceLoc\n";
   153:     out << "\n";
   154:     out << "#include <assert.h>      // assert\n";
   155:     out << "#include <iostream>    // std::cout\n";
   156:     out << "#include <stdlib.h>      // abort\n";
   157:     out << "\n";
   158: 
   159:     NOSOURCELOC(
   160:       out << "// parser-originated location information is disabled by\n"
   161:           << "// NO_GLR_SOURCELOC; any rule which refers to 'loc' will get this one\n"
   162:           << "static SourceLoc loc = SL_UNKNOWN;\n"
   163:           << "\n\n";
   164:     )
   165:   #endif // 0
   166: 
   167:   emitMLDescriptions(g, out);
   168:   // 'emitMLDescriptions' prints two newlines itself..
   169: 
   170:   emitMLActions(g, out, dcl);
   171:   out << "\n";
   172:   out << "\n";
   173: 
   174:   emitMLDupDelMerge(g, out, dcl);
   175:   out << "\n";
   176:   out << "\n";
   177: 
   178:   // wrap all the action stuff up as a struct
   179:   out << "let " << g.actionClassName << "UserActions = {\n";
   180:   #define COPY(name) \
   181:     out << "  " #name " = " #name "Func;\n";
   182:   COPY(reductionAction)
   183:   COPY(duplicateTerminalValue)
   184:   COPY(duplicateNontermValue)
   185:   COPY(deallocateTerminalValue)
   186:   COPY(deallocateNontermValue)
   187:   COPY(mergeAlternativeParses)
   188:   COPY(keepNontermValue)
   189:   COPY(terminalDescription)
   190:   COPY(nonterminalDescription)
   191:   COPY(terminalName)
   192:   COPY(nonterminalName)
   193:   #undef COPY
   194:   out << "}\n"
   195:       << "\n"
   196:       << "\n"
   197:       ;
   198: 
   199:   g.tables->finishTables();
   200:   g.tables->emitMLConstructionCode(out, g.actionClassName, "makeTables");
   201: 
   202:   #if 0   // not implemented
   203:     // I put this last in the context class, and make it public
   204:     dcl << "\n"
   205:         << "// the function which makes the parse tables\n"
   206:         << "public:\n"
   207:         << "  virtual ParseTables *makeTables();\n"
   208:         << "};\n"
   209:         << "\n"
   210:         << "#endif // " << latchName << "\n"
   211:         ;
   212:   #endif // 0
   213: 
   214:   // finish the implementation file with the impl_verbatim sections
   215:   FOREACH_OBJLIST(LocString, g.implVerbatim, iter) {
   216:     emitMLUserCode(out, *(iter.data()), false /*braces*/);
   217:   }
   218: }
   219: 
   220: 
   221: void emitMLUserCode(EmitCode &out, LocString const &code, bool braces)
   222: {
   223:   out << "\n";
   224:   if (false/*TODO:fix*/ && code.validLoc()) {
   225:     out << lineDirective(code.loc);
   226:   }
   227: 
   228:   // 7/27/03: swapped so that braces are inside the line directive
   229:   if (braces) {
   230:     out << "(";
   231:   }
   232: 
   233:   out << code;
   234: 
   235:   // the final brace is on the same line so errors reported at the
   236:   // last brace go to user code
   237:   if (braces) {
   238:     out << " )";
   239:   }
   240: 
   241:   if (false/*TODO:fix*/ && code.validLoc()) {
   242:     out << "\n" << restoreLine;
   243:   }
   244:   else {
   245:     out << "\n";
   246:   }
   247: }
   248: 
   249: 
   250: // bit of a hack: map "void" to "SemanticValue" so that the compiler
   251: // won't mind when I try to declare parameters of that type
   252: static char const *notVoid(char const *type)
   253: {
   254:   if (0==strcmp(type, "void")) {     // ML: Q: should this now be "unit"?
   255:     return "tSemanticValue";
   256:   }
   257:   else {
   258:     return type;
   259:   }
   260: }
   261: 
   262: 
   263: // yield the given type, but if it's NULL, then yield
   264: // something to use instead
   265: static char const *typeString(char const *type, LocString const &tag)
   266: {
   267:   if (!type) {
   268:     std::cout << tag.locString() << ": Production tag \"" << tag
   269:          << "\" on a symbol with no type.\n";
   270:     return "__error_no_type__";     // will make compiler complain
   271:   }
   272:   else {
   273:     return notVoid(type);
   274:   }
   275: }
   276: 
   277: 
   278: void emitMLDescriptions(GrammarAnalysis const &g, EmitCode &out)
   279: {
   280:   // emit a map of terminal ids to their names
   281:   {
   282:     out << "let termNamesArray: sm_string array = [|\n";
   283:     for (int code=0; code < g.numTerminals(); code++) {
   284:       Terminal const *t = g.getTerminal(code);
   285:       if (!t) {
   286:         // no terminal for that code
   287:         out << "  \"(no terminal)\";  (* " << code << " *)\n";
   288:       }
   289:       else {
   290:         out << "  \"" << t->name << "\";  (* " << code << " *)\n";
   291:       }
   292:     }
   293:     out << "  \"\"   (* dummy final value for ';' separation *)\n"
   294:         << "|]\n"
   295:         << "\n";
   296:   }
   297: 
   298:   // emit a function to describe terminals; at some point I'd like to
   299:   // extend my grammar format to allow the user to supply
   300:   // token-specific description functions, but for now I will just
   301:   // use the information easily available the synthesize one;
   302:   // I print "sval % 100000" so I get a 5-digit number, which is
   303:   // easy for me to compare for equality without adding much clutter
   304:   //
   305:   // ML: I could do something like this using Obj, but I'd rather
   306:   // not abuse that interface unnecessarily.
   307:   out << "let terminalDescriptionFunc (termId:int) (sval:tSemanticValue) : sm_string =\n"
   308:       << "begin\n"
   309:       << "  termNamesArray.(termId)\n"
   310:       << "end\n"
   311:       << "\n"
   312:       << "\n"
   313:       ;
   314: 
   315:   // emit a map of nonterminal ids to their names
   316:   {
   317:     out << "let nontermNamesArray: sm_string array = [|\n";
   318:     for (int code=0; code < g.numNonterminals(); code++) {
   319:       Nonterminal const *nt = g.getNonterminal(code);
   320:       if (!nt) {
   321:         // no nonterminal for that code
   322:         out << "  \"(no nonterminal)\";  (* " << code << " *)\n";
   323:       }
   324:       else {
   325:         out << "  \"" << nt->name << "\";  (* " << code << " *)\n";
   326:       }
   327:     }
   328:     out << "  \"\"   (* dummy final value for ';' separation *)\n"
   329:         << "|]\n"
   330:         << "\n";
   331:   }
   332: 
   333:   // and a function to describe nonterminals also
   334:   out << "let nonterminalDescriptionFunc (nontermId:int) (sval:tSemanticValue)\n"
   335:       << "  : sm_string =\n"
   336:       << "begin\n"
   337:       << "  nontermNamesArray.(nontermId)\n"
   338:       << "end\n"
   339:       << "\n"
   340:       << "\n"
   341:       ;
   342: 
   343:   // emit functions to get access to the static maps
   344:   out << "let terminalNameFunc (termId:int) : sm_string =\n"
   345:       << "begin\n"
   346:       << "  termNamesArray.(termId)\n"
   347:       << "end\n"
   348:       << "\n"
   349:       << "let nonterminalNameFunc (nontermId:int) : sm_string =\n"
   350:       << "begin\n"
   351:       << "  nontermNamesArray.(nontermId)\n"
   352:       << "end\n"
   353:       << "\n"
   354:       << "\n"
   355:       ;
   356: }
   357: 
   358: 
   359: void emitMLActions(Grammar const &g, EmitCode &out, EmitCode &dcl)
   360: {
   361:   out << "(* ------------------- actions ------------------ *)\n"
   362:       << "let reductionActionArray : (tSemanticValue array -> tSemanticValue) array = [|\n"
   363:       << "\n"
   364:       ;
   365: 
   366:   // iterate over productions, emitting action function closures
   367:   {FOREACH_OBJLIST(Production, g.productions, iter) {
   368:     Production const &prod = *(iter.data());
   369: 
   370:     // there's no syntax for a typeless nonterminal, so this shouldn't
   371:     // be triggerable by the user
   372:     xassert(prod.left->type);
   373: 
   374:     // put the production in comments above the defn
   375:     out << "(* " << prod.toString() << " *)\n";
   376: 
   377:     out << "(fun svals ->\n";
   378: 
   379:     // iterate over RHS elements, emitting bindings for each with a tag
   380:     int index=-1;
   381:     FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
   382:       Production::RHSElt const &elt = *(rhsIter.data());
   383:       index++;
   384:       if (elt.tag.length() == 0) continue;
   385: 
   386:       // example:
   387:       //   let e1 = (Obj.obj svals.(0) : int) in
   388:       out << "  let " << elt.tag << " = (Obj.obj svals.(" << index << ") : "
   389:           << typeString(elt.sym->type, elt.tag) << ") in\n";
   390:     }
   391: 
   392:     // give a name to the yielded value so we can ensure it conforms to
   393:     // the declared type
   394:     out << "  let __result: " << prod.left->type << " =";
   395: 
   396:     // now insert the user's code, to execute in this environment of
   397:     // properly-typed semantic values
   398:     emitMLUserCode(out, prod.action, true /*braces*/);
   399: 
   400:     out << "  in (Obj.repr __result)\n"     // cast to tSemanticValue
   401:         << ");\n"
   402:         << "\n"
   403:         ;
   404:   }}
   405: 
   406:   // finish the array; one dummy element for ';' separation
   407:   out << "(fun _ -> (failwith \"bad production index\"))   (* no ; *)"
   408:       << "\n"
   409:       << "|]\n"
   410:       << "\n"
   411:       ;
   412: 
   413:   // main action function; uses the array emitted above
   414:   out << "let reductionActionFunc (productionId:int) (svals: tSemanticValue array)\n"
   415:       << "  : tSemanticValue =\n"
   416:       << "begin\n"
   417:       << "  (reductionActionArray.(productionId) svals)\n"
   418:       << "end\n"
   419:       << "\n"
   420:       ;
   421: 
   422: 
   423:   #if 0  // shouldn't be needed
   424:   if (0==strcmp(prod.left->type, "void")) {
   425:     // cute hack: turn the expression into a comma expression, with
   426:     // the value returned being 0
   427:     out << ", 0";
   428:   }
   429:   #endif // 0
   430: }
   431: 
   432: 
   433: void emitMLDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl)
   434: {
   435:   out << "(* ---------------- dup/del/merge/keep nonterminals --------------- *)\n"
   436:       << "\n";
   437: 
   438:   // emit inlines for dup/del/merge of nonterminals
   439:   FOREACH_OBJLIST(Nonterminal, g.nonterminals, ntIter) {
   440:     emitMLDDMInlines(g, out, dcl, *(ntIter.data()));
   441:   }
   442: 
   443:   // emit dup-nonterm
   444:   emitMLSwitchCode(g, out,
   445:     "let duplicateNontermValueFunc (nontermId:int) (sval:tSemanticValue) : tSemanticValue",
   446:     "nontermId",
   447:     (ObjList<Symbol> const&)g.nonterminals,
   448:     0 /*dupCode*/,
   449:     "      (Obj.repr (dup_$symName ((Obj.obj sval) : $symType)))\n",
   450:     NULL);
   451: 
   452:   // emit del-nonterm
   453:   emitMLSwitchCode(g, out,
   454:     "let deallocateNontermValueFunc (nontermId:int) (sval:tSemanticValue) : unit",
   455:     "nontermId",
   456:     (ObjList<Symbol> const&)g.nonterminals,
   457:     1 /*delCode*/,
   458:     "      (del_$symName ((Obj.obj sval) : $symType));\n",
   459:     "deallocate nonterm");
   460: 
   461:   // emit merge-nonterm
   462:   emitMLSwitchCode(g, out,
   463:     "let mergeAlternativeParsesFunc (nontermId:int) (left:tSemanticValue)\n"
   464:     "                               (right:tSemanticValue) : tSemanticValue",
   465:     // SOURCELOC?
   466:     "nontermId",
   467:     (ObjList<Symbol> const&)g.nonterminals,
   468:     2 /*mergeCode*/,
   469:     "      (Obj.repr (merge_$symName ((Obj.obj left) : $symType) ((Obj.obj right) : $symType)))\n",
   470:     "merge nonterm");
   471: 
   472:   // emit keep-nonterm
   473:   emitMLSwitchCode(g, out,
   474:     "let keepNontermValueFunc (nontermId:int) (sval:tSemanticValue) : bool",
   475:     "nontermId",
   476:     (ObjList<Symbol> const&)g.nonterminals,
   477:     3 /*keepCode*/,
   478:     "      (keep_$symName ((Obj.obj sval) : $symType))\n",
   479:     NULL);
   480: 
   481: 
   482:   out << "\n";
   483:   out << "(* ---------------- dup/del/classify terminals --------------- *)";
   484:   // emit inlines for dup/del of terminals
   485:   FOREACH_OBJLIST(Terminal, g.terminals, termIter) {
   486:     emitMLDDMInlines(g, out, dcl, *(termIter.data()));
   487:   }
   488: 
   489:   // emit dup-term
   490:   emitMLSwitchCode(g, out,
   491:     "let duplicateTerminalValueFunc (termId:int) (sval:tSemanticValue) : tSemanticValue",
   492:     "termId",
   493:     (ObjList<Symbol> const&)g.terminals,
   494:     0 /*dupCode*/,
   495:     "      (Obj.repr (dup_$symName ((Obj.obj sval) : $symType)))\n",
   496:     NULL);
   497: 
   498:   // emit del-term
   499:   emitMLSwitchCode(g, out,
   500:     "let deallocateTerminalValueFunc (termId:int) (sval:tSemanticValue) : unit",
   501:     "termId",
   502:     (ObjList<Symbol> const&)g.terminals,
   503:     1 /*delCode*/,
   504:     "      (del_$symName ((Obj.obj sval) : $symType));\n",
   505:     "deallocate terminal");
   506: 
   507:   // emit classify-term
   508:   emitMLSwitchCode(g, out,
   509:     "let reclassifyTokenFunc (oldTokenType:int) (sval:tSemanticValue) : int",
   510:     "oldTokenType",
   511:     (ObjList<Symbol> const&)g.terminals,
   512:     4 /*classifyCode*/,
   513:     "      (classify_$symName ((Obj.obj sval) : $symType))\n",
   514:     NULL);
   515: }
   516: 
   517: 
   518: // emit both the function decl for the .h file, and the beginning of
   519: // the function definition for the .cc file
   520: void emitMLFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
   521:                     char const *rettype, char const *params)
   522: {
   523:   out << "(*inline*) let " << params << ": " << rettype << " =";
   524: }
   525: 
   526: 
   527: void emitMLDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
   528:                       Symbol const &sym)
   529: {
   530:   Terminal const *term = sym.ifTerminalC();
   531:   Nonterminal const *nonterm = sym.ifNonterminalC();
   532: 
   533:   if (sym.dupCode) {
   534:     emitMLFuncDecl(g, out, dcl, sym.type,
   535:       sm_stringc << "dup_" << sym.name
   536:               << " (" << sym.dupParam << ": " << sym.type << ") ");
   537:     emitMLUserCode(out, sym.dupCode);
   538:     out << "\n";
   539:   }
   540: 
   541:   if (sym.delCode) {
   542:     emitMLFuncDecl(g, out, dcl, "unit",
   543:       sm_stringc << "del_" << sym.name
   544:               << " (" << (sym.delParam? sym.delParam : "_")
   545:               << ": " << sym.type << ") ");
   546:     emitMLUserCode(out, sym.delCode);
   547:     out << "\n";
   548:   }
   549: 
   550:   if (nonterm && nonterm->mergeCode) {
   551:     emitMLFuncDecl(g, out, dcl, notVoid(sym.type),
   552:       sm_stringc << "merge_" << sym.name
   553:               << " (" << nonterm->mergeParam1 << ": " << notVoid(sym.type) << ") "
   554:               << " (" << nonterm->mergeParam2 << ": " << notVoid(sym.type) << ") ");
   555:     emitMLUserCode(out, nonterm->mergeCode);
   556:     out << "\n";
   557:   }
   558: 
   559:   if (nonterm && nonterm->keepCode) {
   560:     emitMLFuncDecl(g, out, dcl, "bool",
   561:       sm_stringc << "keep_" << sym.name
   562:               << " (" << nonterm->keepParam << ": " << sym.type << ") ");
   563:     emitMLUserCode(out, nonterm->keepCode);
   564:     out << "\n";
   565:   }
   566: 
   567:   if (term && term->classifyCode) {
   568:     emitMLFuncDecl(g, out, dcl, "int",
   569:       sm_stringc << "classify_" << sym.name
   570:               << " (" << term->classifyParam << ": " << sym.type << ") ");
   571:     emitMLUserCode(out, term->classifyCode);
   572:     out << "\n";
   573:   }
   574: }
   575: 
   576: void emitMLSwitchCode(Grammar const &g, EmitCode &out,
   577:                       char const *signature, char const *switchVar,
   578:                       ObjList<Symbol> const &syms, int whichFunc,
   579:                       char const *templateCode, char const *actUpon)
   580: {
   581:   out << replace(signature, "$acn", g.actionClassName) << " =\n"
   582:          "begin\n"
   583:          "  match " << switchVar << " with\n"
   584:          ;
   585: 
   586:   FOREACH_OBJLIST(Symbol, syms, symIter) {
   587:     Symbol const &sym = *(symIter.data());
   588: 
   589:     if (whichFunc==0 && sym.dupCode ||
   590:         whichFunc==1 && sym.delCode ||
   591:         whichFunc==2 && sym.asNonterminalC().mergeCode ||
   592:         whichFunc==3 && sym.asNonterminalC().keepCode ||
   593:         whichFunc==4 && sym.asTerminalC().classifyCode) {
   594:       out << "  | " << sym.getTermOrNontermIndex() << " -> (\n";
   595:       out << replace(replace(templateCode,
   596:                "$symName", sym.name),
   597:                "$symType", notVoid(sym.type));
   598:       out << "    )\n";
   599:     }
   600:   }
   601: 
   602:   out << "  | _ -> (\n";
   603:   switch (whichFunc) {
   604:     default:
   605:       xfailure("bad func code");
   606: 
   607:     // in ML it's not such a good idea to yield cNULL_SVAL, since the
   608:     // runtime engine might get more confused than a C program
   609:     // with a NULL pointer.. so always do the gc-defaults thing
   610: 
   611:     case 0:    // unspecified dup
   612:       out << "      sval\n";
   613:       break;
   614: 
   615:     case 1:    // unspecified del
   616:       // ignore del
   617:       out << "      ()\n";
   618:       break;
   619: 
   620:     case 2:    // unspecified merge: warn, but then use left (arbitrarily)
   621:       out << "      (Printf.printf \"WARNING: no action to merge nonterm %s\\n\"\n"
   622:           << "                     nontermNamesArray.(" << switchVar << "));\n"
   623:           << "      (flush stdout);\n"
   624:           << "      left\n"
   625:           ;
   626:       break;
   627: 
   628:     case 3:    // unspecified keep: keep it
   629:       out << "      true\n";
   630:       break;
   631: 
   632:     case 4:    // unspecified classifier: identity map
   633:       out << "      oldTokenType\n";
   634:       break;
   635:   }
   636: 
   637:   out << "    )\n"
   638:          "end\n"
   639:          "\n";
   640: }
   641: 
   642: 
   643: // ----------------- second half: table emission ------------------
   644: // create literal tables
   645: template <class EltType>
   646: void emitMLTable(EmitCode &out, EltType const *table, int size, int rowLength,
   647:                  char const *tableName)
   648: {
   649:   if (!table || !size) {
   650:     out << "  " << tableName << " = [| |];      (* 0 elements *)\n"
   651:         << "\n"
   652:         ;
   653:     return;
   654:   }
   655: 
   656:   bool printHex = false;
   657:   #if 0   // not needed?
   658:                   0==strcmp(typeName, "ErrorBitsEntry") ||
   659:                   (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "ActionEntry")) ||
   660:                   (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "GotoEntry")) ;
   661:   bool needCast = 0==strcmp(typeName, "StateId");
   662:   #endif // 0
   663: 
   664:   if (size * sizeof(*table) > 50) {    // suppress small ones
   665:     //out << "  // storage size: " << size * sizeof(*table) << " bytes\n";
   666:     if (size % rowLength == 0) {
   667:       out << "  (* rows: " << (size/rowLength) << "  cols: " << rowLength << " *)\n";
   668:     }
   669:   }
   670: 
   671:   int rowNumWidth = sm_stringf("%d", size / rowLength /*round down*/).length();
   672: 
   673:   out << "  " << tableName << " = [|           (* " << size << " elements *)";
   674:   int row = 0;
   675:   for (int i=0; i<size; i++) {
   676:     if (i % rowLength == 0) {    // one row per state
   677:       out << sm_stringf("\n    (*%*d*) ", rowNumWidth, row++);
   678:     }
   679: 
   680:     #if 0
   681:     if (needCast) {
   682:       out << "(" << typeName << ")";           // ML: not used
   683:     }
   684:     #endif // 0
   685: 
   686:     if (printHex) {
   687:       out << sm_stringf("0x%02X", table[i]);    // ML: not used
   688:     }
   689:     else if (sizeof(table[i]) == 1) {
   690:       // little bit of a hack to make sure 'unsigned char' gets
   691:       // printed as an int; the casts are necessary because this
   692:       // code gets compiled even when EltType is ProdInfo
   693:       out << (int)(*((unsigned char*)(table+i)));
   694:     }
   695:     else {
   696:       // print the other int-sized things, or ProdInfo using
   697:       // the overloaded '<<' below
   698:       out << table[i];
   699:     }
   700: 
   701:     if (i != size-1) {
   702:       out << "; ";
   703:     }
   704:   }
   705:   out << "\n"
   706:       << "  |];\n"
   707:       << "\n"
   708:       ;
   709: }
   710: 
   711: #if 0   // not used
   712: // used to emit the elements of the prodInfo table
   713: sm_stringBuilder& operator<< (sm_stringBuilder &sb, ParseTables::ProdInfo const &info)
   714: {
   715:   sb << "{" << (int)info.rhsLen << "," << (int)info.lhsIndex << "}";
   716:   return sb;
   717: }
   718: 
   719: 
   720: // like 'emitTable', but also set a local called 'tableName'
   721: template <class EltType>
   722: void emitMLTable2(EmitCode &out, EltType const *table, int size, int rowLength,
   723:                   char const *typeName, char const *tableName)
   724: {
   725:   sm_string tempName = sm_stringc << tableName << "_static";
   726:   emitMLTable(out, table, size, rowLength, typeName, tempName);
   727:   out << "  " << tableName << " = const_cast<" << typeName << "*>("
   728:       << tempName << ");\n\n";
   729: }
   730: 
   731: 
   732: template <class EltType>
   733: void emitMLOffsetTable(EmitCode &out, EltType **table, EltType *base, int size,
   734:                        char const *typeName, char const *tableName, char const *baseName)
   735: {
   736:   if (!table) {
   737:     out << "  " << tableName << " = NULL;\n\n";
   738:     return;
   739:   }
   740: 
   741:   // make the pointers persist by storing a table of offsets
   742:   Array<int> offsets(size);
   743:   bool allUnassigned = true;
   744:   for (int i=0; i < size; i++) {
   745:     if (table[i]) {
   746:       offsets[i] = table[i] - base;
   747:       allUnassigned = false;
   748:     }
   749:     else {
   750:       offsets[i] = UNASSIGNED;    // codes for a NULL entry
   751:     }
   752:   }
   753: 
   754:   if (allUnassigned) {
   755:     // for example, an LALR(1) grammar has no ambiguous entries in its tables
   756:     size = 0;
   757:   }
   758: 
   759:   if (size > 0) {
   760:     out << "  " << tableName << " = new " << typeName << " [" << size << "];\n";
   761: 
   762:     emitTable(out, (int*)offsets, size, 16, "int", sm_stringc << tableName << "_offsets");
   763: 
   764:     // at run time, interpret the offsets table
   765:     out << "  for (int i=0; i < " << size << "; i++) {\n"
   766:         << "    int ofs = " << tableName << "_offsets[i];\n"
   767:         << "    if (ofs >= 0) {\n"
   768:         << "      " << tableName << "[i] = " << baseName << " + ofs;\n"
   769:         << "    }\n"
   770:         << "    else {\n"
   771:         << "      " << tableName << "[i] = NULL;\n"
   772:         << "    }\n"
   773:         << "  }\n\n";
   774:   }
   775:   else {
   776:     out << "  // offset table is empty\n"
   777:         << "  " << tableName << " = NULL;\n\n";
   778:   }
   779: }
   780: 
   781: 
   782: // for debugging
   783: template <class EltType>
   784: void printMLTable(EltType const *table, int size, int rowLength,
   785:                   char const *typeName, char const *tableName)
   786: {
   787:   // disabled for now since I don't need it anymore, and it adds
   788:   // a link dependency on emitcode.cc ...
   789:   #if 0
   790:   {
   791:     EmitCode out("printTable.tmp");
   792:     emitTable(out, table, size, rowLength, typeName, tableName);
   793:   }
   794: 
   795:   system("cat printTable.tmp; rm printTable.tmp");
   796:   #endif // 0
   797: }
   798: #endif // 0
   799: 
   800: 
   801: // emit code for a function which, when compiled and executed, will
   802: // construct this same table (except the constructed table won't own
   803: // the table data, since it will point to static program data)
   804: void ParseTables::emitMLConstructionCode
   805:   (EmitCode &out, char const *className, char const *funcName)
   806: {
   807:   // must have already called 'finishTables'
   808:   xassert(!temp);
   809: 
   810:   out << "(* a literal tParseTables;\n"
   811:       << " * the code is written by ParseTables::emitConstructionCode()\n"
   812:       << " * in " << __FILE__ << " *)\n"
   813:       << "let " << className << "ParseTables:tParseTables = {\n";
   814:       ;
   815: 
   816:   #define SET_VAR(var) \
   817:     out << "  " #var " = " << var << ";\n";
   818: 
   819:   SET_VAR(numTerms);
   820:   SET_VAR(numNonterms);
   821:   SET_VAR(numProds);
   822:   out << "\n";
   823: 
   824:   SET_VAR(numStates);
   825:   out << "\n";
   826: 
   827:   SET_VAR(actionCols);
   828:   emitMLTable(out, actionTable, actionTableSize(),
   829:               actionCols, "actionTable");
   830: 
   831:   SET_VAR(gotoCols);
   832:   emitMLTable(out, gotoTable, gotoTableSize(),
   833:               gotoCols, "gotoTable");
   834: 
   835:   // break the prodInfo into two arrays
   836:   {
   837:     Array<int> rhsLen(numProds);
   838:     Array<int> lhsIndex(numProds);
   839: 
   840:     for (int i=0; i < numProds; i++) {
   841:       rhsLen[i] = prodInfo[i].rhsLen;
   842:       lhsIndex[i] = prodInfo[i].lhsIndex;
   843:     }
   844: 
   845:     emitMLTable(out, rhsLen.operator int const *(), numProds,
   846:                 16 /*columns; arbitrary*/, "prodInfo_rhsLen");
   847:     emitMLTable(out, lhsIndex.operator int const *(), numProds,
   848:                 16 /*columns; arbitrary*/, "prodInfo_lhsIndex");
   849:   }
   850: 
   851:   emitMLTable(out, stateSymbol, numStates,
   852:               16, "stateSymbol");
   853: 
   854:   SET_VAR(ambigTableSize);
   855:   emitMLTable(out, ambigTable, ambigTableSize,
   856:               16, "ambigTable");
   857: 
   858:   emitMLTable(out, nontermOrder, nontermOrderSize(),
   859:               16, "nontermOrder");
   860: 
   861:   SET_VAR(startState);
   862: 
   863:   // no semicolon for last one
   864:   out << "  finalProductionIndex = " << finalProductionIndex << "\n";
   865: 
   866:   out << "}\n"
   867:       << "\n"
   868:       ;
   869: }
   870: 
   871: 
   872: // EOF
End cpp section to elk/elk_genml.cpp[1]
Start cpp section to elk/elk_glr.cpp[1 /1 ]
     1: #line 6551 "./lpsrc/elk.pak"
     2: // glr.cc            see license.txt for copyright and terms of use
     3: // code for glr.h
     4: 
     5: /* Implementation Notes
     6:  *
     7:  * A design point: [GLR] uses more 'global's than I do.  My criteria
     8:  * here is that something should be global (stored in class GLR) if
     9:  * it has meaning between processing of tokens.  If something is only
    10:  * used during the processing of a single token, then I make it a
    11:  * parameter where necessary.
    12:  *
    13:  * Update: I've decided to make 'currentToken' and 'parserWorklist'
    14:  * global because they are needed deep inside of 'glrShiftNonterminal',
    15:  * though they are not needed by the intervening levels, and their
    16:  * presence in the argument lists would therefore only clutter them.
    17:  *
    18:  * (OLD) It should be clear that many factors contribute to this
    19:  * implementation being slow, and I'm going to refrain from any
    20:  * optimization for a bit.
    21:  *
    22:  * UPDATE (3/29/02): I'm now trying to optimize it.  The starting
    23:  * implementation is 300x slower than bison.  Ideal goal is 3x, but
    24:  * more realistic is 10x.
    25:  *
    26:  * UPDATE (8/24/02): It's very fast now; within 3% of Bison for
    27:  * deterministic grammars, and 5x when I disable the mini-LR core.
    28:  *
    29:  * Description of the various lists in play here:
    30:  *
    31:  *   topmostParsers
    32:  *   --------------
    33:  *   The active parsers are at the frontier of the parse tree
    34:  *   space.  It *never* contains more than one stack node with
    35:  *   a given parse state; I call this the unique-state property
    36:  *   (USP).  If we're about to add a stack node with the same
    37:  *   state as an existing node, we merge them (if it's a shift,
    38:  *   we add another leftAdjState; if it's a reduction, we add a
    39:  *   rule node *and* another leftAdjState).
    40:  *
    41:  *   Before a token is processed, topmostParsers contains those
    42:  *   parsers that successfully shifted the previous token.  This
    43:  *   list is then walked to make the initial reduction worklist.
    44:  *
    45:  *   Before the shifts are processed, the topmostParsers list is
    46:  *   cleared.  As each shift is processed, the resulting parser is
    47:  *   added to topmostParsers (modulo USP).
    48:  *
    49:  *   [GLR] calls this "active-parsers"
    50:  *
    51:  *
    52:  * Discussion of path re-examination, called do-limited-reductions by
    53:  * [GLR]:
    54:  *
    55:  * After thinking about this for some time, I have reached the conclusion
    56:  * that the only way to handle the problem is to separate the collection
    57:  * of paths from the iteration over them.
    58:  *
    59:  * Here are several alternative schemes, and the reasons they don't
    60:  * work:
    61:  *
    62:  *   1. [GLR]'s approach of limiting re-examination to those involving
    63:  *      the new link
    64:  *
    65:  *      This fails because it does not prevent re-examined paths
    66:  *      from appearing in the normal iteration also.
    67:  *
    68:  *   2. Modify [GLR] so the new link can't be used after the re-examination
    69:  *      is complete
    70:  *
    71:  *      Then if *another* new link is added, paths involving both new
    72:  *      links wouldn't be processed.
    73:  *
    74:  *   3. Further schemes involving controlling which re-examination stage can
    75:  *      use which links
    76:  *
    77:  *      Difficult to reason about, unclear a correct scheme exists, short
    78:  *      of the full-blown path-listing approach I'm going to take.
    79:  *
    80:  *   4. My first "fix" which assumes there is never more than one path to
    81:  *      a given parser
    82:  *
    83:  *      This is WRONG.  There can be more than one path, even as all such
    84:  *      paths are labeled the same (namely, with the RHS symbols).  Consider
    85:  *      grammar "E -> x | E + E" parsing "x+x+x": both toplevel parses use
    86:  *      the "E -> E + E" rule, and both arrive at the root parser
    87:  *
    88:  * So, the solution I will implement is to collect all paths into a list
    89:  * before processing any of them.  During path re-examination, I also will
    90:  * collect paths into a list, this time only those that involve the new
    91:  * link.
    92:  *
    93:  * This scheme is clearly correct, since path collection cannot be disrupted
    94:  * by the process of adding links, and when links are added, exactly the new
    95:  * paths are collected and processed.  It's easy to see that every path is
    96:  * considered exactly once.
    97:  *
    98:  *
    99:  * MAJOR UPDATE (12/06/02):  I've replaced the state worklist (SWL) core
   100:  * used in all previous GLR implementations with a reduction worklist (RWL)
   101:  * core.  This core is just as fast, but can be implemented to always
   102:  * avoid the yield-then-merge problem for acyclic grammars.
   103:  *
   104:  *
   105:  * Below, parse-tree building activity is marked "TREEBUILD".
   106:  */
   107: 
   108: 
   109: #include "elk_glr.h"
   110: #include "sm_strtokp.h"
   111: #include "sm_syserr.h"
   112: #include "sm_trace.h"
   113: #include "sm_strutil.h"
   114: #include "elk_lexerint.h"
   115: #include "sm_test.h"
   116: #include "sm_sobjlist.h"
   117: #include "sm_owner.h"
   118: 
   119: #include <stdio.h>       // FILE
   120: #include <stdlib.h>      // getenv
   121: 
   122: // ACTION(..) is code to execute for action trace diagnostics, i.e. "-tr action"
   123: #ifndef ACTION_TRACE
   124:   #define ACTION_TRACE 0
   125: #endif
   126: #if ACTION_TRACE
   127:   #define ACTION(stmt) stmt
   128:   #define TRSACTION(stuff) if (tracingSys("action")) { std::cout << stuff << std::endl; }
   129: #else
   130:   #define ACTION(stmt)
   131:   #define TRSACTION(stuff)
   132: #endif
   133: 
   134: // TRSPARSE(stuff) traces <stuff> during debugging with -tr parse
   135: #if !defined(NDEBUG)
   136:   #define IF_NDEBUG(stuff)
   137:   #define TRSPARSE(stuff) if (trParse) { trsParse << stuff << std::endl; }
   138:   #define TRSPARSE_DECL(stuff) stuff
   139: #else
   140:   #define IF_NDEBUG(stuff) stuff
   141:   #define TRSPARSE(stuff)
   142:   #define TRSPARSE_DECL(stuff)
   143: #endif
   144: 
   145: // whether to use the ordinary LR core in addition to the GLR core
   146: #ifndef USE_MINI_LR
   147:   #define USE_MINI_LR 1
   148: #endif
   149: 
   150: // these disable features of mini-LR for performance testing
   151: #ifndef USE_ACTIONS
   152:   #define USE_ACTIONS 1
   153: #endif
   154: #ifndef USE_RECLASSIFY
   155:   #define USE_RECLASSIFY 1
   156: #endif
   157: #ifndef USE_KEEP
   158:   #define USE_KEEP 1
   159: #endif
   160: 
   161: // enables tracking of some statistics useful for debugging and profiling
   162: #ifndef DO_ACCOUNTING
   163:   #define DO_ACCOUNTING 1
   164: #endif
   165: #if DO_ACCOUNTING
   166:   #define ACCOUNTING(stuff) stuff
   167: #else
   168:   #define ACCOUNTING(stuff)
   169: #endif
   170: 
   171: // unroll the inner loop; approx. 3% performance improvement
   172: // update: right now, it actually *costs* about 8%..
   173: #ifndef USE_UNROLLED_REDUCE
   174:   #define USE_UNROLLED_REDUCE 0
   175: #endif
   176: 
   177: // some things we track..
   178: int parserMerges = 0;
   179: int computeDepthIters = 0;
   180: int totalExtracts = 0;
   181: int multipleDelayedExtracts = 0;
   182: 
   183: // can turn this on to experiment.. but right now it
   184: // actually makes things slower.. (!)
   185: //#define USE_PARSER_INDEX
   186: 
   187: 
   188: // Note on inlining generally: Inlining functions is a very important
   189: // way to improve performance, in inner loops.  However it's easy to
   190: // guess wrong about where and what to inline.  So generally I mark
   191: // things as inline whenver the profiler (gprof) reports:
   192: //   - it's showing up in gprof as a function call (i.e. not already
   193: //     being inlined)
   194: //   - the function that calls it takes significant time
   195: //   - the call itself takes significant time
   196: // All this is obvious, but is worth saying, since otherwise the
   197: // tendency is to inline everything, which is a mistake because it
   198: // makes the system as a whole slower (by wasting space in the I-cache)
   199: // without leaving a clear indicator of who is to blame (it's very
   200: // hard to profile for over-aggressive inlining).
   201: 
   202: 
   203: // the transition to array-based implementations requires I specify
   204: // initial sizes
   205: enum {
   206:   // this one does *not* grow as needed (at least not in the mini-LR core)
   207:   MAX_RHSLEN = 30,
   208: 
   209:   // ----------
   210:   // the settings below here are for initial sizes of growable arrays,
   211:   // and it should be ok in terms of correctness to set them all to 1,
   212:   // which may be a useful thing during debugging to verify
   213: 
   214:   // this one grows as needed
   215:   TYPICAL_MAX_REDUCTION_PATHS = 5,
   216: 
   217:   // this is the length to make arrays which hold rhsLen many items
   218:   // typically, but are growable
   219:   INITIAL_RHSLEN_SIZE = 10,
   220: };
   221: 
   222: 
   223: // ------------- front ends to user code ---------------
   224: // given a symbol id (terminal or nonterminal), and its associated
   225: // semantic value, yield a description sm_string
   226: sm_string symbolDescription(SymbolId sym, UserActions *user,
   227:                          SemanticValue sval)
   228: {
   229:   if (symIsTerm(sym)) {
   230:     return user->terminalDescription(symAsTerm(sym), sval);
   231:   }
   232:   else {
   233:     return user->nonterminalDescription(symAsNonterm(sym), sval);
   234:   }
   235: }
   236: 
   237: SemanticValue GLR::duplicateSemanticValue(SymbolId sym, SemanticValue sval)
   238: {
   239:   xassert(sym != 0);
   240: 
   241:   // 6/23/04: Why did I do this?  Some kind of optimization?  It should
   242:   // at least be documented... and probably removed altogether.
   243:   if (!sval) return sval;
   244: 
   245:   SemanticValue ret;
   246:   if (symIsTerm(sym)) {
   247:     ret = userAct->duplicateTerminalValue(symAsTerm(sym), sval);
   248:   }
   249:   else {
   250:     ret = userAct->duplicateNontermValue(symAsNonterm(sym), sval);
   251:   }
   252: 
   253:   TRSACTION("  " << symbolDescription(sym, userAct, ret) <<
   254:             " is DUP of " <<
   255:             symbolDescription(sym, userAct, sval));
   256: 
   257:   return ret;
   258: }
   259: 
   260: void deallocateSemanticValue(SymbolId sym, UserActions *user,
   261:                              SemanticValue sval)
   262: {
   263:   xassert(sym != 0);
   264:   TRSACTION("  DEL " << symbolDescription(sym, user, sval));
   265: 
   266:   if (!sval) return;
   267: 
   268:   if (symIsTerm(sym)) {
   269:     return user->deallocateTerminalValue(symAsTerm(sym), sval);
   270:   }
   271:   else {
   272:     return user->deallocateNontermValue(symAsNonterm(sym), sval);
   273:   }
   274: }
   275: 
   276: void GLR::deallocateSemanticValue(SymbolId sym, SemanticValue sval)
   277: {
   278:   ::deallocateSemanticValue(sym, userAct, sval);
   279: }
   280: 
   281: 
   282: // ------------------ SiblingLink ------------------
   283: inline SiblingLink::SiblingLink(StackNode *s, SemanticValue sv
   284:                                 SOURCELOCARG( SourceLoc L ) )
   285:   : sib(s), sval(sv)
   286:     SOURCELOCARG( loc(L) )
   287: {
   288:   YIELD_COUNT( yieldCount = 0; )
   289: }
   290: 
   291: SiblingLink::~SiblingLink()
   292: {}
   293: 
   294: 
   295: // ----------------------- StackNode -----------------------
   296: int StackNode::numStackNodesAllocd=0;
   297: int StackNode::maxStackNodesAllocd=0;
   298: 
   299: 
   300: StackNode::StackNode()
   301:   : state(STATE_INVALID),
   302:     leftSiblings(),
   303:     firstSib(NULL, NULL_SVAL  SOURCELOCARG( SL_UNKNOWN ) ),
   304:     referenceCount(0),
   305:     determinDepth(0),
   306:     glr(NULL)
   307: {
   308:   // the interesting stuff happens in init()
   309: }
   310: 
   311: StackNode::~StackNode()
   312: {
   313:   // the interesting stuff happens in deinit()
   314: }
   315: 
   316: 
   317: inline void StackNode::init(StateId st, GLR *g)
   318: {
   319:   state = st;
   320:   xassertdb(leftSiblings.isEmpty());
   321:   xassertdb(hasZeroSiblings());
   322:   referenceCount = 0;
   323:   determinDepth = 1;    // 0 siblings now, so this node is unambiguous
   324:   glr = g;
   325: 
   326:   #if DO_ACCOUNTING
   327:     INC_HIGH_WATER(numStackNodesAllocd, maxStackNodesAllocd);
   328:     //TRACE("nodes", "(!!!) init stack node: num=" << numStackNodesAllocd
   329:     //            << ", max=" << maxStackNodesAllocd);
   330:   #endif
   331: }
   332: 
   333: inline void StackNode::decrementAllocCounter()
   334: {
   335:   #if DO_ACCOUNTING
   336:     numStackNodesAllocd--;
   337:     //TRACE("nodes", "(...) deinit stack node: num=" << numStackNodesAllocd
   338:     //            << ", max=" << maxStackNodesAllocd);
   339:   #endif
   340: }
   341: 
   342: inline void StackNode::deinit()
   343: {
   344:   decrementAllocCounter();
   345: 
   346:   if (!unwinding()) {
   347:     xassert(numStackNodesAllocd >= 0);
   348:     xassert(referenceCount == 0);
   349:   }
   350: 
   351:   deallocSemanticValues();
   352: 
   353:   // this is pulled out of 'deallocSemanticValues' since dSV gets
   354:   // called from the mini-LR parser, which sets this to NULL itself
   355:   // (and circumvents the refct decrement)
   356:   firstSib.sib = NULL;
   357: }
   358: 
   359: inline SymbolId StackNode::getSymbolC() const
   360: {
   361:   xassertdb((unsigned)state < (unsigned)(glr->tables->getNumStates()));
   362:   return glr->tables->getStateSymbol(state);
   363: }
   364: 
   365: 
   366: 
   367: void StackNode::deallocSemanticValues()
   368: {
   369:   // explicitly deallocate siblings, so I can deallocate their
   370:   // semantic values if necessary (this requires knowing the
   371:   // associated symbol, which the SiblingLinks don't know)
   372:   if (firstSib.sib != NULL) {
   373:     deallocateSemanticValue(getSymbolC(), glr->userAct, firstSib.sval);
   374:   }
   375: 
   376:   while (leftSiblings.isNotEmpty()) {
   377:     Owner<SiblingLink> sib(leftSiblings.removeAt(0));
   378:     deallocateSemanticValue(getSymbolC(), glr->userAct, sib->sval);
   379:   }
   380: }
   381: 
   382: 
   383: // add the very first sibling
   384: inline void StackNode
   385:   ::addFirstSiblingLink_noRefCt(StackNode *leftSib, SemanticValue sval
   386:                                 SOURCELOCARG( SourceLoc loc ) )
   387: {
   388:   xassertdb(hasZeroSiblings());
   389: 
   390:   // my depth will be my new sibling's depth, plus 1
   391:   determinDepth = leftSib->determinDepth + 1;
   392: 
   393:   // we don't have any siblings yet; use embedded
   394:   // don't update reference count of 'leftSib', instead caller must do so
   395:   //firstSib.sib = leftSib;
   396:   xassertdb(firstSib.sib == NULL);      // otherwise we'd miss a decRefCt
   397:   firstSib.sib.setWithoutUpdateRefct(leftSib);
   398: 
   399:   firstSib.sval = sval;
   400: 
   401:   // initialize some other fields
   402:   SOURCELOC( firstSib.loc = loc; )
   403:   YIELD_COUNT( firstSib.yieldCount = 0; )
   404: }
   405: 
   406: 
   407: // add a new sibling by creating a new link
   408: inline SiblingLink *StackNode::
   409:   addSiblingLink(StackNode *leftSib, SemanticValue sval
   410:                  SOURCELOCARG( SourceLoc loc ) )
   411: {
   412:   if (hasZeroSiblings()) {
   413:     addFirstSiblingLink_noRefCt(leftSib, sval  SOURCELOCARG( loc ) );
   414: 
   415:     // manually increment leftSib's refct
   416:     leftSib->incRefCt();
   417: 
   418:     // sibling link pointers are used to control the reduction
   419:     // process in certain corner cases; an interior pointer
   420:     // should work fine
   421:     return &firstSib;
   422:   }
   423:   else {
   424:     // as best I can tell, x86 static branch prediction is simply
   425:     // "conditional forward branches are assumed not taken", hence
   426:     // the uncommon case belongs in the 'else' branch
   427:     return addAdditionalSiblingLink(leftSib, sval  SOURCELOCARG( loc ) );
   428:   }
   429: }
   430: 
   431: 
   432: // pulled out of 'addSiblingLink' so I can inline addSiblingLink
   433: // without excessive object code bloat; the branch represented by
   434: // the code in this function is much less common
   435: SiblingLink *StackNode::
   436:   addAdditionalSiblingLink(StackNode *leftSib, SemanticValue sval
   437:                            SOURCELOCARG( SourceLoc loc ) )
   438: {
   439:   // there's currently at least one sibling, and now we're adding another;
   440:   // right now, no other stack node should point at this one (if it does,
   441:   // most likely will catch that when we use the stale info)
   442:   determinDepth = 0;
   443: 
   444:   SiblingLink *link = new SiblingLink(leftSib, sval  SOURCELOCARG( loc ) );
   445:   leftSiblings.prepend(link);   // dsw: don't append; it becomes quadratic!
   446:   return link;
   447: }
   448: 
   449: 
   450: // inlined for the GLR part; mini-LR doesn't use this directly;
   451: // gcc will inline the first level, even though it's recursive,
   452: // and the effect is significant (~10%) for GLR-only parser
   453: inline void StackNode::decRefCt()
   454: {
   455:   xassert(referenceCount > 0);
   456: 
   457:   //printf("decrementing node %d to %d\n", state, referenceCount-1);
   458: 
   459:   if (--referenceCount == 0) {
   460:     glr->stackNodePool->dealloc(this);
   461:   }
   462: }
   463: 
   464: 
   465: SiblingLink const *StackNode::getUniqueLinkC() const
   466: {
   467:   xassert(hasOneSibling());
   468:   return &firstSib;
   469: }
   470: 
   471: 
   472: SiblingLink *StackNode::getLinkTo(StackNode *another)
   473: {
   474:   // check first..
   475:   if (firstSib.sib == another) {
   476:     return &firstSib;
   477:   }
   478: 
   479:   // check rest
   480:   MUTATE_EACH_OBJLIST(SiblingLink, leftSiblings, sibIter) {
   481:     SiblingLink *candidate = sibIter.data();
   482:     if (candidate->sib == another) {
   483:       return candidate;
   484:     }
   485:   }
   486:   return NULL;
   487: }
   488: 
   489: 
   490: STATICDEF void StackNode::printAllocStats()
   491: {
   492:   std::cout << "stack nodes: " << numStackNodesAllocd
   493:        << ", max stack nodes: " << maxStackNodesAllocd
   494:        << std::endl;
   495: }
   496: 
   497: 
   498: int StackNode::computeDeterminDepth() const
   499: {
   500:   if (hasZeroSiblings()) {
   501:     return 1;
   502:   }
   503:   else if (hasOneSibling()) {
   504:     // it must be equal to sibling's, plus one
   505:     return firstSib.sib->determinDepth + 1;
   506:   }
   507:   else {
   508:     xassert(hasMultipleSiblings());
   509:     return 0;
   510:   }
   511: }
   512: 
   513: 
   514: // I sprinkle calls to this here and there; in NDEBUG mode
   515: // they'll all disappear
   516: inline void StackNode::checkLocalInvariants() const
   517: {
   518:   xassertdb(computeDeterminDepth() == determinDepth);
   519: }
   520: 
   521: 
   522: // ------------- stack node list ops ----------------
   523: void decParserList(ArrayStack<StackNode*> &list)
   524: {
   525:   for (int i=0; i < list.length(); i++) {
   526:     list[i]->decRefCt();
   527:   }
   528: }
   529: 
   530: void incParserList(ArrayStack<StackNode*> &list)
   531: {
   532:   for (int i=0; i < list.length(); i++) {
   533:     list[i]->incRefCt();
   534:   }
   535: }
   536: 
   537: // candidate for adding to ArrayStack.. but I'm hesitant for some reason
   538: bool parserListContains(ArrayStack<StackNode*> &list, StackNode *node)
   539: {
   540:   for (int i=0; i < list.length(); i++) {
   541:     if (list[i] == node) {
   542:       return true;
   543:     }
   544:   }
   545:   return false;
   546: }
   547: 
   548: 
   549: // ------------------------- GLR ---------------------------
   550: GLR::GLR(UserActions *user, ParseTables *t)
   551:   : userAct(user),
   552:     tables(t),
   553:     lexerPtr(NULL),
   554:     topmostParsers(),
   555:     parserIndex(NULL),
   556:     toPass(MAX_RHSLEN),
   557:     prevTopmost(),
   558:     stackNodePool(NULL),
   559:     pathQueue(t),
   560:     noisyFailedParse(true),
   561:     trParse(tracingSys("parse")),
   562:     trsParse(trace("parse") << "parse tracing enabled\n"),
   563:     detShift(0),
   564:     detReduce(0),
   565:     nondetShift(0),
   566:     nondetReduce(0),
   567:     yieldThenMergeCt(0)
   568:   // some fields (re-)initialized by 'clearAllStackNodes'
   569: {
   570:   // originally I had this inside glrParse() itself, but that
   571:   // made it 25% slower!  gcc register allocator again!
   572:   if (tracingSys("glrConfig")) {
   573:     printConfig();
   574:   }
   575: 
   576:   // the ordinary GLR core doesn't have this limitation because
   577:   // it uses a growable array
   578:   #if USE_MINI_LR
   579:     // make sure none of the productions have right-hand sides
   580:     // that are too long; I think it's worth doing an iteration
   581:     // here since going over the limit would be really hard to
   582:     // debug, and this ctor is of course outside the main
   583:     // parsing loop
   584:     for (int i=0; i < tables->getNumProds(); i++) {
   585:       if (tables->getProdInfo(i).rhsLen > MAX_RHSLEN) {
   586:         printf("Production %d contains %d right-hand side symbols,\n"
   587:                "but the GLR core has been compiled with a limit of %d.\n"
   588:                "Please adjust MAX_RHSLEN and recompile the GLR core.\n",
   589:                i, tables->getProdInfo(i).rhsLen, MAX_RHSLEN);
   590:         xfailure("cannot continue");
   591:       }
   592:     }
   593:   #endif // USE_MINI_LR
   594: 
   595:   // check that the parse tables' compression (if any) is the same
   596:   // as this core expects
   597:   configCheck("EEF compression", ENABLE_EEF_COMPRESSION, tables->eef_enabled());
   598:   configCheck("GCS compression", ENABLE_GCS_COMPRESSION, tables->gcs_enabled());
   599:   configCheck("GCS column compression", ENABLE_GCS_COLUMN_COMPRESSION, tables->gcsc_enabled());
   600:   configCheck("CRS compression", ENABLE_CRS_COMPRESSION, tables->crs_enabled());
   601: }
   602: 
   603: void GLR::configCheck(char const *option, bool core, bool table)
   604: {
   605:   if (core != table) {
   606:     xfailure(sm_stringc
   607:       << "The GLR parser core was compiled with " << option
   608:       << (core? " enabled" : " disabled")
   609:       << ", but the parse tables generated by Elkhound have it "
   610:       << (table? "enabled" : "disabled"));
   611:   }
   612: }
   613: 
   614: GLR::~GLR()
   615: {
   616:   if (parserIndex) {
   617:     delete[] parserIndex;
   618:   }
   619: 
   620:   // NOTE: must not delete 'tables' until after the 'decParserList'
   621:   // calls above, because they refer to the tables!
   622: }
   623: 
   624: 
   625: void GLR::clearAllStackNodes()
   626: {
   627:   // the stack nodes themselves are now reference counted, so they
   628:   // should already be cleared if we're between parses (modulo
   629:   // creation of cycles, which I currently just ignore and allow to
   630:   // leak..)
   631: }
   632: 
   633: 
   634: // print compile-time configuration; this is useful for making
   635: // sure a given binary has been compiled the way you think
   636: void GLR::printConfig() const
   637: {
   638:   printf("GLR configuration follows.  Settings marked with an\n"
   639:          "asterisk (*) are the higher-performance settings.\n");
   640: 
   641:   printf("  source location information: \t\t\t%s\n",
   642:          SOURCELOC(1+)0? "enabled" : "disabled *");
   643: 
   644:   printf("  stack node columns: \t\t\t\t%s\n",
   645:          NODE_COLUMN(1+)0? "enabled" : "disabled *");
   646: 
   647:   printf("  semantic value yield count: \t\t\t%s\n",
   648:          YIELD_COUNT(1+)0? "enabled" : "disabled *");
   649: 
   650:   printf("  ACTION_TRACE (for debugging): \t\t%s\n",
   651:          ACTION(1+)0? "enabled" : "disabled *");
   652: 
   653:   printf("  NDEBUG: \t\t\t\t\t%s\n",
   654:          IF_NDEBUG(1+)0? "set      *" : "not set");
   655: 
   656:   printf("  xassert-style assertions: \t\t\t%s\n",
   657:          #ifdef NDEBUG_NO_ASSERTIONS
   658:            "disabled *"
   659:          #else
   660:            "enabled"
   661:          #endif
   662:          );
   663: 
   664:   printf("  user actions: \t\t\t\t%s\n",
   665:          USE_ACTIONS? "respected" : "ignored  *");
   666: 
   667:   printf("  token reclassification: \t\t\t%s\n",
   668:          USE_RECLASSIFY? "enabled" : "disabled *");
   669: 
   670:   printf("  reduction cancellation: \t\t\t%s\n",
   671:          USE_KEEP? "enabled" : "disabled *");
   672: 
   673:   printf("  mini-LR parser core: \t\t\t\t%s\n",
   674:          USE_MINI_LR? "enabled  *" : "disabled");
   675: 
   676:   printf("  allocated-node and parse action accounting: \t%s\n",
   677:          ACCOUNTING(1+)0? "enabled" : "disabled *");
   678: 
   679:   printf("  unrolled reduce loop: \t\t\t%s\n",
   680:          USE_UNROLLED_REDUCE? "enabled  *" : "disabled");
   681: 
   682:   printf("  parser index: \t\t\t\t%s\n",
   683:          #ifdef USE_PARSER_INDEX
   684:            "enabled"
   685:          #else
   686:            "disabled *"
   687:          #endif
   688:          );
   689: 
   690:   // checking __OPTIMIZE__ is misleading if preprocessing is entirely
   691:   // divorced from compilation proper, but I still think this printout
   692:   // is useful; also, gcc does not provide a way to tell what level of
   693:   // optimization was applied (as far as I know)
   694:   printf("  C++ compiler's optimizer: \t\t\t%s\n",
   695:          #ifdef __OPTIMIZE__
   696:            "enabled  *"
   697:          #else
   698:            "disabled"
   699:          #endif
   700:          );
   701: 
   702:   // at the moment, disabling compression makes it fastest
   703:   printf("  Error Entry Factoring (EEF): \t\t\t%s\n",
   704:          ENABLE_EEF_COMPRESSION? "enabled" : "disabled *");
   705:   printf("  Graph Coloring Scheme (GCS): \t\t\t%s\n",
   706:          ENABLE_GCS_COMPRESSION? "enabled" : "disabled *");
   707:   printf("  GCS for columns (GCSC): \t\t\t%s\n",
   708:          ENABLE_GCS_COLUMN_COMPRESSION? "enabled" : "disabled *");
   709:   printf("  Code Reduction Scheme (CRS): \t\t\t%s\n",
   710:          ENABLE_CRS_COMPRESSION? "enabled" : "disabled *");
   711: }
   712: 
   713: 
   714: // used to extract the svals from the nodes just under the
   715: // start symbol reduction
   716: SemanticValue GLR::grabTopSval(StackNode *node)
   717: {
   718:   SiblingLink *sib = node->getUniqueLink();
   719:   SemanticValue ret = sib->sval;
   720:   sib->sval = duplicateSemanticValue(node->getSymbolC(), sib->sval);
   721: 
   722:   TRSACTION("dup'd " << ret << " for top sval, yielded " << sib->sval);
   723: 
   724:   return ret;
   725: }
   726: 
   727: 
   728: // This macro has been pulled out so I can have even finer control
   729: // over the allocation process from the mini-LR core.
   730: //   dest: variable into which the pointer to the new node will be put
   731: //   state: DFA state for this node
   732: //   glr: pointer to the associated GLR object
   733: //   pool: node pool from which to allocate
   734: #define MAKE_STACK_NODE(dest, state, glr, pool)              \
   735:   dest = (pool).alloc();                                     \
   736:   dest->init(state, glr);                                    \
   737:   NODE_COLUMN( dest->column = (glr)->globalNodeColumn; )
   738: 
   739: // more-friendly inline version, for use outside mini-LR
   740: inline StackNode *GLR::makeStackNode(StateId state)
   741: {
   742:   StackNode *sn;
   743:   MAKE_STACK_NODE(sn, state, this, *stackNodePool);
   744:   return sn;
   745: }
   746: 
   747: 
   748: // add a new parser to the 'topmostParsers' list, maintaing
   749: // related invariants
   750: inline void GLR::addTopmostParser(StackNode *parser)
   751: {
   752:   parser->checkLocalInvariants();
   753: 
   754:   topmostParsers.push(parser);
   755:   parser->incRefCt();
   756: 
   757:   // I implemented this index, and then discovered it made no difference
   758:   // (actually, slight degradation) in performance; so for now it will
   759:   // be an optional design choice, off by default
   760:   #ifdef USE_PARSER_INDEX
   761:     // fill in the state id index; if the assertion here ever fails, it
   762:     // means there are more than 255 active parsers; either the grammer
   763:     // is highly ambiguous by mistake, or else ParserIndexEntry needs to
   764:     // be re-typedef'd to something bigger than 'char'
   765:     int index = topmostParsers.length()-1;   // index just used
   766:     xassert(index < INDEX_NO_PARSER);
   767: 
   768:     xassert(parserIndex[parser->state] == INDEX_NO_PARSER);
   769:     parserIndex[parser->state] = index;
   770:   #endif // USE_PARSER_INDEX
   771: }
   772: 
   773: 
   774: void GLR::buildParserIndex()
   775: {
   776:   if (parserIndex) {
   777:     delete[] parserIndex;
   778:   }
   779:   parserIndex = new ParserIndexEntry[tables->getNumStates()];
   780:   {
   781:     for (int i=0; i < tables->getNumStates(); i++) {
   782:       parserIndex[i] = INDEX_NO_PARSER;
   783:     }
   784:   }
   785: }
   786: 
   787: 
   788: bool GLR::glrParse(LexerInterface &lexer, SemanticValue &treeTop)
   789: {
   790:   #if !ACTION_TRACE
   791:     // tell the user why "-tr action" doesn't do anything, if
   792:     // they specified that
   793:     trace("action") << "warning: ACTION_TRACE is currently disabled by a\n";
   794:     trace("action") << "compile-time switch, so you won't see parser actions.\n";
   795:   #endif
   796: 
   797:   #ifdef NDEBUG
   798:     trace("parse") << "warning: Because NDEBUG was specified when elkhound was\n";
   799:     trace("parse") << "         compiled, the 'parse' tracing flag does nothing.\n";
   800:   #endif
   801: 
   802:   // get ready..
   803:   traceProgress(2) << "parsing...\n";
   804:   clearAllStackNodes();
   805: 
   806:   // this should be reset to NULL on all exit paths..
   807:   lexerPtr = &lexer;
   808: 
   809:   // build the parser index (I do this regardless of whether I'm going
   810:   // to use it, because up here it makes no performance difference,
   811:   // and I'd like as little code as possible being #ifdef'd)
   812:   buildParserIndex();
   813: 
   814:   // call the inner parser core, which is a static member function
   815:   bool ret = innerGlrParse(*this, lexer, treeTop);
   816:   stackNodePool = NULL;     // prevent dangling references
   817:   if (!ret) {
   818:     lexerPtr = NULL;
   819:     return ret;
   820:   }
   821: 
   822:   // sm: I like to always see these statistics, but dsw doesn't,
   823:   // so I'll just set ELKHOUND_DEBUG in my .bashrc
   824:   if (getenv("ELKHOUND_DEBUG")) {
   825:     #if DO_ACCOUNTING
   826:       StackNode::printAllocStats();
   827:       std::cout << "detShift=" << detShift
   828:            << ", detReduce=" << detReduce
   829:            << ", nondetShift=" << nondetShift
   830:            << ", nondetReduce=" << nondetReduce
   831:            << std::endl;
   832:       //PVAL(parserMerges);
   833:       PVAL(computeDepthIters);
   834: 
   835:       PVAL(yieldThenMergeCt);
   836:       PVAL(totalExtracts);
   837:       PVAL(multipleDelayedExtracts);
   838:     #endif
   839:   }
   840: 
   841:   lexerPtr = NULL;
   842:   return ret;
   843: }
   844: 
   845: 
   846: // old note: this function's complexity and/or size is *right* at the
   847: // limit of what gcc-2.95.3 is capable of optimizing well; I've already
   848: // pulled quite a bit of functionality into separate functions to try
   849: // to reduce the register pressure, but it's still near the limit;
   850: // if you do something to cross a pressure threshold, performance drops
   851: // 25% so watch out!
   852: //
   853: // This function is the core of the parser, and its performance is
   854: // critical to the end-to-end performance of the whole system.  It is
   855: // a static member so the accesses to 'glr' (aka 'this') will be
   856: // visible.
   857: STATICDEF bool GLR
   858:   ::innerGlrParse(GLR &glr, LexerInterface &lexer, SemanticValue &treeTop)
   859: {
   860:   #ifndef NDEBUG
   861:     bool doDumpGSS = tracingSys("dumpGSS");
   862:   #endif
   863: 
   864:   // pull a bunch of things out of 'glr' so they'll be accessible from
   865:   // the stack frame instead of having to indirect into the 'glr' object
   866:   UserActions *userAct = glr.userAct;
   867:   ParseTables *tables = glr.tables;
   868:   #if USE_MINI_LR
   869:     ArrayStack<StackNode*> &topmostParsers = glr.topmostParsers;
   870:   #endif
   871: 
   872:   // lexer token function
   873:   LexerInterface::NextTokenFunc nextToken = lexer.getTokenFunc();
   874: 
   875:   #if USE_RECLASSIFY
   876:   // reclassifier
   877:   UserActions::ReclassifyFunc reclassifyToken =
   878:     userAct->getReclassifier();
   879:   #endif
   880: 
   881:   // the stack node pool is a local variable of this function for
   882:   // fastest access by the mini-LR core; other parts of the algorihthm
   883:   // can access it using a pointer stored in the GLR class (caller
   884:   // nullifies this pointer afterward to prevent dangling references)
   885:   ObjectPool<StackNode> stackNodePool(30);
   886:   glr.stackNodePool = &stackNodePool;
   887: 
   888:   // create an initial ParseTop with grammar-initial-state,
   889:   // set active-parsers to contain just this
   890:   NODE_COLUMN( glr.globalNodeColumn = 0; )
   891:   {
   892:     StackNode *first = glr.makeStackNode(tables->startState);
   893:     glr.addTopmostParser(first);
   894:   }
   895: 
   896:   #if USE_MINI_LR
   897:     // reduction action function
   898:     UserActions::ReductionActionFunc reductionAction =
   899:       userAct->getReductionAction();
   900: 
   901:     // this is *not* a reference to the 'glr' member because it
   902:     // doesn't need to be shared with the rest of the algorithm (it's
   903:     // only used in the Mini-LR core), and by having it directly on
   904:     // the stack another indirection is saved
   905:     //
   906:     // new approach: let's try embedding this directly into the stack
   907:     // (this saves 10% in end-to-end performance!)
   908:     //GrowArray<SemanticValue> toPass(TYPICAL_MAX_RHSLEN);
   909:     SemanticValue toPass[MAX_RHSLEN];
   910:   #endif
   911: 
   912:   // count # of times we use mini LR
   913:   ACCOUNTING( int localDetShift=0; int localDetReduce=0; )
   914: 
   915:   // for each input symbol
   916:   #ifndef NDEBUG
   917:     int tokenNumber = 0;
   918: 
   919:     // some debugging streams so the TRSPARSE etc. macros work
   920:     bool trParse       = glr.trParse;
   921:     std::ostream &trsParse  = glr.trsParse;
   922:   #endif
   923:   for (;;) {
   924:     // debugging
   925:     TRSPARSE(
   926:            "------- "
   927:         << "processing token " << lexer.tokenDesc()
   928:         << ", " << glr.topmostParsers.length() << " active parsers"
   929:         << " -------"
   930:     )
   931:     TRSPARSE("Stack:" << glr.stackSummary())
   932: 
   933:     #ifndef NDEBUG
   934:       if (doDumpGSS) {
   935:         glr.dumpGSS(tokenNumber);
   936:       }
   937:     #endif
   938: 
   939:     // get token type, possibly using token reclassification
   940:     #if USE_RECLASSIFY
   941:       lexer.type = reclassifyToken(userAct, lexer.type, lexer.sval);
   942:     #else     // this is what bccgr does
   943:       //if (lexer.type == 1 /*L2_NAME*/) {
   944:       //  lexer.type = 3 /*L2_VARIABLE_NAME*/;
   945:       //}
   946:     #endif
   947: 
   948:     // alternate debugging; print after reclassification
   949:     TRSACTION("lookahead token: " << lexer.tokenDesc() <<
   950:               " aka " << userAct->terminalDescription(lexer.type, lexer.sval));
   951: 
   952:   #if USE_MINI_LR
   953:     // try to cache a few values in locals (this didn't help any..)
   954:     //ActionEntry const * const actionTable = this->tables->actionTable;
   955:     //int const numTerms = this->tables->numTerms;
   956: 
   957:   tryDeterministic:
   958:     // --------------------- mini-LR parser -------------------------
   959:     // optimization: if there's only one active parser, and the
   960:     // action is unambiguous, and it doesn't involve traversing
   961:     // parts of the stack which are nondeterministic, then do the
   962:     // parse action the way an ordinary LR parser would
   963:     //
   964:     // please note:  The code in this section is cobbled together
   965:     // from various other GLR functions.  Everything here appears in
   966:     // at least one other place, so modifications will usually have
   967:     // to be done in both places.
   968:     //
   969:     // This code is the core of the parsing algorithm, so it's a bit
   970:     // hairy for its performance optimizations.
   971:     if (topmostParsers.length() == 1) {
   972:       StackNode *parser = topmostParsers[0];
   973:       xassertdb(parser->referenceCount==1);     // 'topmostParsers[0]' is referrer
   974: 
   975:       #if ENABLE_EEF_COMPRESSION
   976:         if (tables->actionEntryIsError(parser->state, lexer.type)) {
   977:           return false;    // parse error
   978:         }
   979:       #endif
   980: 
   981:       ActionEntry action =
   982:         tables->getActionEntry_noError(parser->state, lexer.type);
   983: 
   984:       // I decode reductions before shifts because:
   985:       //   - they are 4x more common in my C grammar
   986:       //   - decoding a reduction is one less integer comparison
   987:       // however I can only measure ~1% performance difference
   988:       if (tables->isReduceAction(action)) {
   989:         ACCOUNTING( localDetReduce++; )
   990:         int prodIndex = tables->decodeReduce(action, parser->state);
   991:         ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(prodIndex);
   992:         int rhsLen = prodInfo.rhsLen;
   993:         if (rhsLen <= parser->determinDepth) {
   994:           // can reduce unambiguously
   995: 
   996:           // I need to hide this declaration when debugging is off and
   997:           // optimizer and -Werror are on, because it provokes a warning
   998:           TRSPARSE_DECL( int startStateId = parser->state; )
   999: 
  1000:           // if we're tracing actions, I'm going to build a sm_string
  1001:           // that describes all of the RHS symbols
  1002:           ACTION(
  1003:             sm_string rhsDescription("");
  1004:             if (rhsLen == 0) {
  1005:               // print something anyway
  1006:               rhsDescription = " empty";
  1007:             }
  1008:           )
  1009: 
  1010:           // record location of left edge; defaults to no location
  1011:           // (used for epsilon rules)
  1012:           // update: use location of lookahead token instead, for epsilons
  1013:           SOURCELOC( SourceLoc leftEdge = lexer.loc; )
  1014: 
  1015:           //toPass.ensureIndexDoubler(rhsLen-1);
  1016:           xassertdb(rhsLen <= MAX_RHSLEN);
  1017: 
  1018:           // we will manually sm_string the stack nodes together onto
  1019:           // the free list in 'stackNodePool', and 'prev' will point
  1020:           // to the head of the current list; at the end, we'll
  1021:           // install the final value of 'prev' back into
  1022:           // 'stackNodePool' as the new head of the list
  1023:           StackNode *prev = stackNodePool.private_getHead();
  1024: 
  1025:           #if USE_UNROLLED_REDUCE
  1026:             // What follows is unrollings of the loop below,
  1027:             // labeled "loop for arbitrary rhsLen".  Read that loop
  1028:             // before the unrollings here, since I omit the comments
  1029:             // here.  In general, this program should be correct
  1030:             // whether USE_UNROLLED_REDUCE is set or not.
  1031:             //
  1032:             // To produce the unrolled versions, simply copy all of the
  1033:             // noncomment lines from the general loop, and replace the
  1034:             // occurrence of 'i' with the value of one less than the 'case'
  1035:             // label number.
  1036:             switch ((unsigned)rhsLen) {    // gcc produces slightly better code if I cast to unsigned first
  1037:               case 1: {
  1038:                 SiblingLink &sib = parser->firstSib;
  1039:                 toPass[0] = sib.sval;
  1040:                 ACTION( rhsDescription =
  1041:                   sm_stringc << " "
  1042:                           << symbolDescription(parser->getSymbolC(), userAct, sib.sval)
  1043:                           << rhsDescription; )
  1044:                 SOURCELOC(
  1045:                   if (sib.validLoc()) {
  1046:                     leftEdge = sib.loc;
  1047:                   }
  1048:                 )
  1049:                 parser->nextInFreeList = prev;
  1050:                 prev = parser;
  1051:                 parser = sib.sib;
  1052:                 xassertdb(parser->referenceCount==1);
  1053:                 xassertdb(prev->referenceCount==1);
  1054:                 prev->decrementAllocCounter();
  1055:                 prev->firstSib.sib.setWithoutUpdateRefct(NULL);
  1056:                 xassertdb(parser->referenceCount==1);
  1057:                 // drop through into next case
  1058:               }
  1059: 
  1060:               case 0:
  1061:                 // nothing to do
  1062:                 goto afterGeneralLoop;
  1063:             }
  1064:           #endif // USE_UNROLLED_REDUCE
  1065: 
  1066:           // ------ loop for arbitrary rhsLen ------
  1067:           // pop off 'rhsLen' stack nodes, collecting as many semantic
  1068:           // values into 'toPass'
  1069:           // NOTE: this loop is the innermost inner loop of the entire
  1070:           // parser engine -- even *one* branch inside the loop body
  1071:           // costs about 30% end-to-end performance loss!
  1072:           for (int i = rhsLen-1; i >= 0; i--) {
  1073:             // grab 'parser's only sibling link
  1074:             //SiblingLink *sib = parser->getUniqueLink();
  1075:             SiblingLink &sib = parser->firstSib;
  1076: 
  1077:             // Store its semantic value it into array that will be
  1078:             // passed to user's routine.  Note that there is no need to
  1079:             // dup() this value, since it will never be passed to
  1080:             // another action routine (avoiding that overhead is
  1081:             // another advantage to the LR mode).
  1082:             toPass[i] = sib.sval;
  1083: 
  1084:             // when tracing actions, continue building rhs desc
  1085:             ACTION( rhsDescription =
  1086:               sm_stringc << " "
  1087:                       << symbolDescription(parser->getSymbolC(), userAct, sib.sval)
  1088:                       << rhsDescription; )
  1089: 
  1090:             // not necessary:
  1091:             //   sib.sval = NULL;                  // link no longer owns the value
  1092:             // this assignment isn't necessary because the usual treatment
  1093:             // of NULL is to ignore it, and I manually ignore *any* value
  1094:             // in the inline-expanded code below
  1095: 
  1096:             // if it has a valid source location, grab it
  1097:             SOURCELOC(
  1098:               if (sib.validLoc()) {
  1099:                 leftEdge = sib.loc;
  1100:               }
  1101:             )
  1102: 
  1103:             // pop 'parser' and move to the next one
  1104:             parser->nextInFreeList = prev;
  1105:             prev = parser;
  1106:             parser = sib.sib;
  1107: 
  1108:             // don't actually increment, since I now no longer actually decrement
  1109:             // cancelled(1) effect: parser->incRefCt();    // so 'parser' survives deallocation of 'sib'
  1110:             // cancelled(1) observable: xassertdb(parser->referenceCount==1);       // 'sib' and the fake one
  1111: 
  1112:             // so now it's just the one
  1113:             xassertdb(parser->referenceCount==1);     // just 'sib'
  1114: 
  1115:             xassertdb(prev->referenceCount==1);
  1116:             // expand "prev->decRefCt();"             // deinit 'prev', dealloc 'sib'
  1117:             {
  1118:               // I don't actually decrement the reference count on 'prev'
  1119:               // because it will be reset to 0 anyway when it is inited
  1120:               // the next time it is used
  1121:               //prev->referenceCount = 0;
  1122: 
  1123:               // adjust the global count of stack nodes
  1124:               prev->decrementAllocCounter();
  1125: 
  1126:               // I previously had a test for "prev->firstSib.sval != NULL",
  1127:               // but that can't happen because I set it to NULL above!
  1128:               // (as the alias sib.sval)
  1129:               // update: now I don't even set it to NULL because the code here
  1130:               // has been changed to ignore *any* value
  1131:               //if (prev->firstSib.sval != NULL) {
  1132:               //  std::cout << "I GOT THE ANALYSIS WRONG!\n";
  1133:               //}
  1134: 
  1135:               // cancelled(1) effect: parser->decRefCt();
  1136:               prev->firstSib.sib.setWithoutUpdateRefct(NULL);
  1137: 
  1138:               // possible optimization: I could eliminiate
  1139:               // "prev->firstSib.sib=NULL" if I consistently modified all
  1140:               // creation of stack nodes to treat sib as a dead value:
  1141:               // right after creation I would make sure the new
  1142:               // sibling value *overwrites* sib, and no attempt is
  1143:               // made to decrement a refct on the dead value
  1144: 
  1145:               // this is obviated by the manual construction of the
  1146:               // free list links (nestInFreeList) above
  1147:               //stackNodePool.deallocNoDeinit(prev);
  1148:             }
  1149: 
  1150:             xassertdb(parser->referenceCount==1);     // fake refct only
  1151:           } // end of general rhsLen loop
  1152: 
  1153:         #if USE_UNROLLED_REDUCE    // suppress the warning when not using it..
  1154:         afterGeneralLoop:
  1155:         #endif
  1156:           // having now manually strung the deallocated stack nodes together
  1157:           // on the free list, I need to make the node pool's head point at them
  1158:           stackNodePool.private_setHead(prev);
  1159: 
  1160:           // call the user's action function (TREEBUILD)
  1161:           SemanticValue sval =
  1162:           #if USE_ACTIONS
  1163:             reductionAction(userAct, prodIndex, toPass /*.getArray()*/
  1164:                             SOURCELOCARG( leftEdge ) );
  1165:           #else
  1166:             NULL;
  1167:           #endif
  1168: 
  1169:           // now, push a new state; essentially, shift prodInfo.lhsIndex.
  1170:           // do "glrShiftNonterminal(parser, prodInfo.lhsIndex, sval, leftEdge);",
  1171:           // except avoid interacting with the worklists
  1172: 
  1173:           // this is like a shift -- we need to know where to go; the
  1174:           // 'goto' table has this information
  1175:           StateId newState = tables->decodeGoto(
  1176:             tables->getGotoEntry(parser->state, prodInfo.lhsIndex),
  1177:             prodInfo.lhsIndex);
  1178: 
  1179:           // debugging
  1180:           TRSPARSE("state " << startStateId <<
  1181:                    ", (unambig) reduce by " << prodIndex <<
  1182:                    " (len=" << rhsLen <<
  1183:                    "), back to " << parser->state <<
  1184:                    " then out to " << newState);
  1185: 
  1186:           // 'parser' has refct 1, reflecting the local variable only
  1187:           xassertdb(parser->referenceCount==1);
  1188: 
  1189:           // push new state
  1190:           StackNode *newNode;
  1191:           MAKE_STACK_NODE(newNode, newState, &glr, stackNodePool)
  1192: 
  1193:           newNode->addFirstSiblingLink_noRefCt(
  1194:             parser, sval  SOURCELOCARG( leftEdge ) );
  1195:           // cancelled(3) effect: parser->incRefCt();
  1196: 
  1197:           // cancelled(3) effect: xassertdb(parser->referenceCount==2);
  1198:           // expand:
  1199:           //   "parser->decRefCt();"                 // local variable "parser" about to go out of scope
  1200:           {
  1201:             // cancelled(3) effect: parser->referenceCount = 1;
  1202:           }
  1203:           xassertdb(parser->referenceCount==1);
  1204: 
  1205:           // replace whatever is in 'topmostParsers[0]' with 'newNode'
  1206:           topmostParsers[0] = newNode;
  1207:           newNode->incRefCt();
  1208:           xassertdb(newNode->referenceCount == 1);   // topmostParsers[0] is referrer
  1209: 
  1210:           // emit some trace output
  1211:           TRSACTION("  " <<
  1212:                     symbolDescription(newNode->getSymbolC(), userAct, sval) <<
  1213:                     " ->" << rhsDescription);
  1214: 
  1215:           #if USE_KEEP
  1216:             // see if the user wants to keep this reduction
  1217:             if (!userAct->keepNontermValue(prodInfo.lhsIndex, sval)) {
  1218:               ACTION( sm_string lhsDesc =
  1219:                         userAct->nonterminalDescription(prodInfo.lhsIndex, sval); )
  1220:               TRSACTION("    CANCELLED " << lhsDesc);
  1221:               glr.printParseErrorMessage(newNode->state);
  1222:               ACCOUNTING(
  1223:                 glr.detShift += localDetShift;
  1224:                 glr.detReduce += localDetReduce;
  1225:               )
  1226: 
  1227:               // TODO: I'm pretty sure I'm not properly cleaning
  1228:               // up all of my state here..
  1229:               return false;
  1230:             }
  1231:           #endif // USE_KEEP
  1232: 
  1233:           // after all this, we haven't shifted any tokens, so the token
  1234:           // context remains; let's go back and try to keep acting
  1235:           // determinstically (if at some point we can't be deterministic,
  1236:           // then we drop into full GLR, which always ends by shifting)
  1237:           goto tryDeterministic;
  1238:         }
  1239:       }
  1240: 
  1241:       else if (tables->isShiftAction(action)) {
  1242:         ACCOUNTING( localDetShift++; )
  1243: 
  1244:         // can shift unambiguously
  1245:         StateId newState = tables->decodeShift(action, lexer.type);
  1246: 
  1247:         TRSPARSE("state " << parser->state <<
  1248:                  ", (unambig) shift token " << lexer.tokenDesc() <<
  1249:                  ", to state " << newState);
  1250: 
  1251:         NODE_COLUMN( glr.globalNodeColumn++; )
  1252: 
  1253:         StackNode *rightSibling;
  1254:         MAKE_STACK_NODE(rightSibling, newState, &glr, stackNodePool);
  1255: 
  1256:         rightSibling->addFirstSiblingLink_noRefCt(
  1257:           parser, lexer.sval  SOURCELOCARG( lexer.loc ) );
  1258:         // cancelled(2) effect: parser->incRefCt();
  1259: 
  1260:         // replace 'parser' with 'rightSibling' in the topmostParsers list
  1261:         topmostParsers[0] = rightSibling;
  1262:         // cancelled(2) effect: xassertdb(parser->referenceCount==2);         // rightSibling & topmostParsers[0]
  1263:         // expand "parser->decRefCt();"
  1264:         {
  1265:           // cancelled(2) effect: parser->referenceCount = 1;
  1266:         }
  1267:         xassertdb(parser->referenceCount==1);         // rightSibling
  1268: 
  1269:         xassertdb(rightSibling->referenceCount==0);   // just created
  1270:         // expand "rightSibling->incRefCt();"
  1271:         {
  1272:           rightSibling->referenceCount = 1;
  1273:         }
  1274:         xassertdb(rightSibling->referenceCount==1);   // topmostParsers[0] refers to it
  1275: 
  1276:         // get next token
  1277:         goto getNextToken;
  1278:       }
  1279: 
  1280:       else {
  1281:         // error or ambig; not deterministic
  1282:       }
  1283:     }
  1284:     // ------------------ end of mini-LR parser ------------------
  1285:   #endif // USE_MINI_LR
  1286: 
  1287:     // if we get here, we're dropping into the nondeterministic GLR
  1288:     // algorithm in its full glory
  1289:     if (!glr.nondeterministicParseToken()) {
  1290:       return false;
  1291:     }
  1292: 
  1293:   #if USE_MINI_LR    // silence a warning when it's not enabled
  1294:   getNextToken:
  1295:   #endif
  1296:     // was that the last token?
  1297:     if (lexer.type == 0) {
  1298:       break;
  1299:     }
  1300: 
  1301:     // get the next token
  1302:     nextToken(&lexer);
  1303:     #ifndef NDEBUG
  1304:       tokenNumber++;
  1305:     #endif
  1306:   }
  1307: 
  1308:   // push stats into main object
  1309:   ACCOUNTING(
  1310:     glr.detShift += localDetShift;
  1311:     glr.detReduce += localDetReduce;
  1312:   )
  1313: 
  1314:   // end of parse; note that this function must be called *before*
  1315:   // the stackNodePool is deallocated
  1316:   return glr.cleanupAfterParse(treeTop);
  1317: }
  1318: 
  1319: 
  1320: // diagnostic/debugging function: yield sequence of
  1321: // states represented by 'parser'; in the case of
  1322: // ambiguity, just show one...
  1323: sm_string stackTraceString(StackNode *parser)
  1324: {
  1325:   // hmm.. what to do about cyclic stacks?
  1326:   return sm_string("need to think about this some more..");
  1327: }
  1328: 
  1329: 
  1330: // return false if caller should return false; pulled out of
  1331: // glrParse to reduce register pressure (but didn't help as
  1332: // far as I can tell!)
  1333: bool GLR::nondeterministicParseToken()
  1334: {
  1335:   //std::cout << "not deterministic\n";
  1336: 
  1337:   // ([GLR] called the code from here to the end of
  1338:   // the loop 'parseword')
  1339: 
  1340:   // work through the worklist
  1341:   StateId lastToDie = STATE_INVALID;
  1342: 
  1343:   // do all reduction explicitly first, then all shifts by
  1344:   // re-iterating over topmost parsers
  1345:   int i;
  1346:   for (i=0; i < topmostParsers.length(); i++) {
  1347:     StackNode *parser = topmostParsers[i];
  1348: 
  1349:     ActionEntry action =
  1350:       tables->getActionEntry(parser->state, lexerPtr->type);
  1351:     int actions = rwlEnqueueReductions(parser, action, NULL /*sibLink*/);
  1352: 
  1353:     if (actions == 0) {
  1354:       TRSPARSE("parser in state " << parser->state << " died");
  1355:       lastToDie = parser->state;
  1356:     }
  1357:   }
  1358: 
  1359:   // now that the reductions for all the existing topmost states
  1360:   // have been enqueued, process that worklist
  1361:   rwlProcessWorklist();
  1362: 
  1363:   // finally, do all the shifts that the topmost states can do
  1364:   rwlShiftTerminals();
  1365: 
  1366: 
  1367:   // if all active parsers have died, there was an error
  1368:   if (topmostParsers.isEmpty()) {
  1369:     printParseErrorMessage(lastToDie);
  1370:     return false;
  1371:   }
  1372:   else {
  1373:     return true;
  1374:   }
  1375: }
  1376: 
  1377: 
  1378: // pulled out of glrParse() to reduce register pressure
  1379: void GLR::printParseErrorMessage(StateId lastToDie)
  1380: {
  1381:   if (!noisyFailedParse) {
  1382:     return;
  1383:   }
  1384: 
  1385:   // print which tokens could have allowed progress; this isn't
  1386:   // perfect because I'm only printing this for one state, but in the
  1387:   // nondeterministic algorithm there might have been more than one
  1388:   // state that could have made progress..
  1389:   if (lastToDie != STATE_INVALID) {
  1390:     std::cout << "In state " << lastToDie << ", I expected one of these tokens:\n";
  1391:     std::cout << "  ";
  1392:     for (int i=0; i < tables->getNumTerms(); i++) {
  1393:       ActionEntry act = tables->getActionEntry(lastToDie, i);
  1394:       if (!tables->isErrorAction(act)) {
  1395:         //std::cout << "  [" << i << "] " << lexerPtr->tokenKindDesc(i) << "\n";
  1396:         std::cout << lexerPtr->tokenKindDesc(i) << ", ";
  1397:       }
  1398:     }
  1399:     std::cout << "\n";
  1400:   }
  1401:   else {
  1402:     // this happens because I lose the dead-parser info while processing
  1403:     // the reduction worklist; to implement this I'd need to remember each
  1404:     // state that died while processing the worklist; for now I'll just let
  1405:     // it be, and only have the right info sometimes
  1406:     std::cout << "(expected-token info not available due to nondeterministic mode)\n";
  1407:   }
  1408: 
  1409:   std::cout << toString(lexerPtr->loc)
  1410:        << ": Parse error (state " << lastToDie << ") at "
  1411:        << lexerPtr->tokenDesc()
  1412:        << std::endl;
  1413: 
  1414:   // removing this for now since keeping it would mean putting
  1415:   // sample inputs and left contexts for all states into the
  1416:   // parse tables
  1417:   #if 0
  1418:   if (lastToDie == STATE_INVALID) {
  1419:     // I'm not entirely confident it has to be nonnull..
  1420:     std::cout << "what the?  lastToDie is STATE_INVALID??\n";
  1421:   }
  1422:   else {
  1423:     // print out the context of that parser
  1424:     std::cout << "last parser (state " << lastToDie << ") to die had:\n"
  1425:          << "  sample input: "
  1426:          << sampleInput(getItemSet(lastToDie)) << "\n"
  1427:          << "  left context: "
  1428:          << leftContextString(getItemSet(lastToDie)) << "\n";
  1429:   }
  1430:   #endif // 0
  1431: }
  1432: 
  1433: 
  1434: SemanticValue GLR::doReductionAction(
  1435:   int productionId, SemanticValue const *svals
  1436:   SOURCELOCARG( SourceLoc loc ) )
  1437: {
  1438:   // get the function pointer and invoke it; possible optimization
  1439:   // is to cache the function pointer in the GLR object
  1440:   return (userAct->getReductionAction())(userAct, productionId, svals  SOURCELOCARG(loc));
  1441: }
  1442: 
  1443: 
  1444: // pulled from glrParse() to reduce register pressure
  1445: bool GLR::cleanupAfterParse(SemanticValue &treeTop)
  1446: {
  1447:   traceProgress() << "done parsing\n";
  1448:   trsParse << "Parse succeeded!\n";
  1449: 
  1450: 
  1451:   // finish the parse by reducing to start symbol
  1452:   if (topmostParsers.length() != 1) {
  1453:     std::cout << "parsing finished with more than one active parser!\n";
  1454:     return false;
  1455:   }
  1456:   StackNode *last = topmostParsers.top();
  1457: 
  1458:   // pull out the semantic values; this assumes the start symbol
  1459:   // always looks like "Start -> Something EOF"; it also assumes
  1460:   // the top of the tree is unambiguous
  1461:   SemanticValue arr[2];
  1462:   StackNode *nextToLast = last->getUniqueLink()->sib;
  1463:   arr[0] = grabTopSval(nextToLast);   // Something's sval
  1464:   arr[1] = grabTopSval(last);         // eof's sval
  1465: 
  1466:   // reduce
  1467:   TRSACTION("handing toplevel sval " << arr[0] <<
  1468:             " and " << arr[1] <<
  1469:             " to top start's reducer");
  1470:   treeTop = doReductionAction(
  1471:               //getItemSet(last->state)->getFirstReduction()->prodIndex,
  1472:               tables->finalProductionIndex,
  1473:               arr
  1474:               SOURCELOCARG( last->getUniqueLinkC()->loc ) );
  1475: 
  1476:   // why do this song-and-dance here, instead of letting the normal
  1477:   // parser engine do the final reduction?  because the GLR algorithm
  1478:   // always finishes its iterations with a shift, and it's not trivial
  1479:   // to add a special exception for the case of the reduce which
  1480:   // finishes the parse
  1481: 
  1482:   // these also must be done before the pool goes away..
  1483:   decParserList(topmostParsers);
  1484: 
  1485:   return true;
  1486: }
  1487: 
  1488: 
  1489: // this used to be code in glrParse(), but its presense disturbs gcc's
  1490: // register allocator to the tune of a 33% performance hit!  so I've
  1491: // pulled it in hopes the allocator will be happier now
  1492: void GLR::pullFromTopmostParsers(StackNode *parser)
  1493: {
  1494:   int last = topmostParsers.length()-1;
  1495:   for (int i=0; i <= last; i++) {
  1496:     if (topmostParsers[i] == parser) {
  1497:       // remove it; if it's not last in the list, swap it with
  1498:       // the last one to maintain contiguity
  1499:       if (i < last) {
  1500:         topmostParsers[i] = topmostParsers[last];
  1501:         // (no need to actually copy 'i' into 'last')
  1502:       }
  1503:       topmostParsers.pop();     // removes a reference to 'parser'
  1504:       parser->decRefCt();       // so decrement reference count
  1505:       break;
  1506:     }
  1507:   }
  1508: }
  1509: 
  1510: 
  1511: // return true if the given parser can either shift or reduce.  NOTE:
  1512: // this isn't really sufficient for its intended purpose, since I
  1513: // don't check to see whether *further* actions after a reduce are
  1514: // possible; moreover, checking that could be very expensive, since
  1515: // there may be many paths along which to consider reducing, and many
  1516: // paths from that reduced node forward..
  1517: bool GLR::canMakeProgress(StackNode *parser)
  1518: {
  1519:   ActionEntry entry =
  1520:     tables->getActionEntry(parser->state, lexerPtr->type);
  1521: 
  1522:   return tables->isShiftAction(entry) ||
  1523:          tables->isReduceAction(entry) ||
  1524:          !tables->isErrorAction(entry);
  1525: }
  1526: 
  1527: 
  1528: // if an active parser is at 'state', return it; otherwise
  1529: // return NULL
  1530: StackNode *GLR::findTopmostParser(StateId state)
  1531: {
  1532:   #ifdef USE_PARSER_INDEX
  1533:     int index = parserIndex[state];
  1534:     if (index != INDEX_NO_PARSER) {
  1535:       return topmostParsers[index];
  1536:     }
  1537:     else {
  1538:       return NULL;
  1539:     }
  1540:   #else
  1541:     for (int i=0; i < topmostParsers.length(); i++) {
  1542:       StackNode *node = topmostParsers[i];
  1543:       if (node->state == state) {
  1544:         return node;
  1545:       }
  1546:     }
  1547:     return NULL;
  1548:   #endif
  1549: }
  1550: 
  1551: 
  1552: // print the graph-structured stack to a file, named according
  1553: // to the current token number, in a format suitable for a
  1554: // graph visualization tool of some sort
  1555: void GLR::dumpGSS(int tokenNumber) const
  1556: {
  1557:   FILE *dest = fopen(sm_stringc << "gss." << tokenNumber << ".g", "w");
  1558: 
  1559:   // list of nodes we've already printed, to avoid printing any
  1560:   // node more than once
  1561:   SObjList<StackNode> printed;
  1562: 
  1563:   // list of nodes to print; might intersect 'printed', in which case
  1564:   // such nodes should be discarded; initially contains all the active
  1565:   // parsers (tops of stacks)
  1566:   SObjList<StackNode> queue;
  1567:   for (int i=0; i < topmostParsers.length(); i++) {
  1568:     queue.append(topmostParsers[i]);
  1569:   }
  1570: 
  1571:   // keep printing nodes while there are still some to print
  1572:   while (queue.isNotEmpty()) {
  1573:     StackNode *node = queue.removeFirst();
  1574:     if (printed.contains(node)) {
  1575:       continue;
  1576:     }
  1577:     printed.append(node);
  1578: 
  1579:     // only edges actually get printed (since the node names
  1580:     // encode all the important information); so iterate over
  1581:     // the sibling links now; while iterating, add the discovered
  1582:     // nodes to the queue so we'll print them too
  1583:     if (node->firstSib.sib != NULL) {
  1584:       dumpGSSEdge(dest, node, node->firstSib.sib);
  1585:       queue.append(node->firstSib.sib);
  1586: 
  1587:       FOREACH_OBJLIST(SiblingLink, node->leftSiblings, iter) {
  1588:         dumpGSSEdge(dest, node, iter.data()->sib);
  1589:         queue.append(const_cast<StackNode*>( iter.data()->sib.getC() ));
  1590:       }
  1591:     }
  1592:   }
  1593: 
  1594:   fclose(dest);
  1595: }
  1596: 
  1597: 
  1598: void GLR::dumpGSSEdge(FILE *dest, StackNode const *src,
  1599:                                   StackNode const *target) const
  1600: {
  1601:   fprintf(dest, "e %d_%p_%d %d_%p_%d\n",
  1602:                 0 NODE_COLUMN( + src->column ), src, src->state,
  1603:                 0 NODE_COLUMN( + target->column ), target, target->state);
  1604: }
  1605: 
  1606: 
  1607: // alternative to above: stack info in a single sm_string
  1608: sm_string GLR::stackSummary() const
  1609: {
  1610:   sm_stringBuilder sb;
  1611: 
  1612:   // list of nodes we've already printed, to avoid printing any
  1613:   // node more than once
  1614:   SObjList<StackNode const> printed;
  1615: 
  1616:   for (int i=0; i < topmostParsers.length(); i++) {
  1617:     sb << " (" << i << ": ";
  1618:     innerStackSummary(sb, printed, topmostParsers[i]);
  1619:     sb << ")";
  1620:   }
  1621: 
  1622:   return sb;
  1623: }
  1624: 
  1625: void GLR::nodeSummary(sm_stringBuilder &sb, StackNode const *node) const
  1626: {
  1627:   sb << node->state << "[" << node->referenceCount << "]";
  1628: }
  1629: 
  1630: void GLR::innerStackSummary(sm_stringBuilder &sb, SObjList<StackNode const> &printed,
  1631:                             StackNode const *node) const
  1632: {
  1633:   if (printed.contains(node)) {
  1634:     sb << "(rep:";
  1635:     nodeSummary(sb, node);
  1636:     sb << ")";
  1637:     return;
  1638:   }
  1639: 
  1640:   nodeSummary(sb, node);
  1641:   printed.append(node);
  1642: 
  1643:   if (!node->firstSib.sib) {
  1644:     return;   // no siblings
  1645:   }
  1646: 
  1647:   sb << "-";
  1648: 
  1649:   if (node->leftSiblings.isEmpty()) {
  1650:     // one sibling
  1651:     innerStackSummary(sb, printed, node->firstSib.sib);
  1652:   }
  1653:   else {
  1654:     // multiple siblings
  1655:     sb << "(";
  1656:     innerStackSummary(sb, printed, node->firstSib.sib);
  1657: 
  1658:     FOREACH_OBJLIST(SiblingLink, node->leftSiblings, iter) {
  1659:       sb << "|";
  1660:       innerStackSummary(sb, printed, iter.data()->sib);
  1661:     }
  1662:     sb << ")";
  1663:   }
  1664: }
  1665: 
  1666: 
  1667: #if 0
  1668: SemanticValue GLR::getParseResult()
  1669: {
  1670:   // the final topmost parser is the one that shifted the
  1671:   // end-of-stream marker, so we want its left sibling, since that
  1672:   // will be the reduction(s) to the start symbol
  1673:   SemanticValue sv =
  1674:     topmostParsers.first()->                    // parser that shifted end-of-stream
  1675:       leftSiblings.first()->sib->              // parser that shifted start symbol
  1676:       leftSiblings.first()->                   // sibling link with start symbol
  1677:       sval;                                    // start symbol tree node
  1678: 
  1679:   return sv;
  1680: }
  1681: #endif // 0
  1682: 
  1683: 
  1684: // -------------- reduction worklist (RWL) algorithm --------------
  1685: // This algorithm is an attempt to avoid the problem where a semantic
  1686: // value is yielded to a reduction action, but then merged with
  1687: // another semantic value, such that the original one yielded is now
  1688: // stale.  It's described in more detail in the tech report.
  1689: 
  1690: ReductionPathQueue::Path::Path()
  1691:   : startStateId(STATE_INVALID),
  1692:     prodIndex(-1),
  1693:     startColumn(-1),
  1694:     leftEdgeNode(NULL),
  1695:     sibLinks(INITIAL_RHSLEN_SIZE),
  1696:     symbols(INITIAL_RHSLEN_SIZE)
  1697: {
  1698:   next = NULL;
  1699: }
  1700: 
  1701: ReductionPathQueue::Path::~Path()
  1702: {}
  1703: 
  1704: 
  1705: void ReductionPathQueue::Path::init(StateId ssi, int pi, int rhsLen)
  1706: {
  1707:   startStateId = ssi;
  1708:   prodIndex = pi;
  1709: 
  1710:   sibLinks.ensureIndexDoubler(rhsLen);
  1711:   symbols.ensureIndexDoubler(rhsLen);
  1712: }
  1713: 
  1714: 
  1715: ReductionPathQueue::ReductionPathQueue(ParseTables *t)
  1716:   : top(NULL),
  1717:     pathPool(30),    // arbitrary initial pool size
  1718:     tables(t)
  1719: {}
  1720: 
  1721: ReductionPathQueue::~ReductionPathQueue()
  1722: {
  1723:   // 'pathPool' will automatically array-deallocate all of the
  1724:   // paths, which will themselves then delete their internal
  1725:   // 'sibLinks' and 'symbols' arrays
  1726: }
  1727: 
  1728: 
  1729: ReductionPathQueue::Path *ReductionPathQueue::newPath(
  1730:   StateId startStateId, int prodIndex, int rhsLen)
  1731: {
  1732:   Path *p = pathPool.alloc();
  1733:   p->init(startStateId, prodIndex, rhsLen);
  1734:   return p;
  1735: }
  1736: 
  1737: 
  1738: void ReductionPathQueue::insertPathCopy(Path const *src, StackNode *leftEdge)
  1739: {
  1740:   ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(src->prodIndex);
  1741: 
  1742:   // make a new node
  1743:   Path *p = pathPool.alloc();
  1744:   p->init(src->startStateId, src->prodIndex, prodInfo.rhsLen);
  1745: 
  1746:   // fill in left edge info
  1747:   p->leftEdgeNode = leftEdge;
  1748:   p->startColumn = leftEdge->column;
  1749: 
  1750:   // copy the path info
  1751:   for (int i = prodInfo.rhsLen-1; i>=0; i--) {
  1752:     p->sibLinks[i] = src->sibLinks[i];
  1753:     p->symbols[i] = src->symbols[i];
  1754:   }
  1755: 
  1756:   // find the proper place to insert it
  1757:   if (!top || goesBefore(p, top)) {
  1758:     // prepend
  1759:     p->next = top;
  1760:     top = p;
  1761:   }
  1762:   else {
  1763:     // search
  1764:     Path *prev = top;
  1765:     while (prev->next && !goesBefore(p, prev->next)) {
  1766:       prev = prev->next;
  1767:     }
  1768: 
  1769:     // insert
  1770:     p->next = prev->next;
  1771:     prev->next = p;
  1772:   }
  1773: }
  1774: 
  1775: bool ReductionPathQueue::goesBefore(Path const *p1, Path const *p2) const
  1776: {
  1777:   if (p1->startColumn > p2->startColumn) {
  1778:     // 'p1' spans fewer tokens, so it goes first
  1779:     return true;
  1780:   }
  1781:   else if (p2->startColumn > p1->startColumn) {
  1782:     // same logic
  1783:     return false;
  1784:   }
  1785:   else {
  1786:     // equal start columns, so compare ids of nonterminals
  1787:     // to which we're reducing in each case
  1788:     NtIndex p1NtIndex = tables->getProdInfo(p1->prodIndex).lhsIndex;
  1789:     NtIndex p2NtIndex = tables->getProdInfo(p2->prodIndex).lhsIndex;
  1790: 
  1791:     // consult total order on nonterminals
  1792:     int ord1 = tables->getNontermOrdinal(p1NtIndex);
  1793:     int ord2 = tables->getNontermOrdinal(p2NtIndex);
  1794: 
  1795:     return ord1 < ord2;
  1796:   }
  1797: }
  1798: 
  1799: 
  1800: inline ReductionPathQueue::Path *ReductionPathQueue::dequeue()
  1801: {
  1802:   Path *ret = top;
  1803:   top = top->next;
  1804:   return ret;
  1805: }
  1806: 
  1807: 
  1808: void ReductionPathQueue::deletePath(Path *p)
  1809: {
  1810:   pathPool.dealloc(p);
  1811: }
  1812: 
  1813: 
  1814: // process the reduction worklist
  1815: void GLR::rwlProcessWorklist()
  1816: {
  1817:   // location of this token
  1818:   SOURCELOC( SourceLoc tokenLoc = lexerPtr->loc; )
  1819: 
  1820:   while (pathQueue.isNotEmpty()) {
  1821:     // process the enabled reductions in priority order
  1822:     ReductionPathQueue::Path *path = pathQueue.dequeue();
  1823: 
  1824:     // info about the production
  1825:     ParseTables::ProdInfo const &prodInfo = tables->getProdInfo(path->prodIndex);
  1826:     int rhsLen = prodInfo.rhsLen;
  1827: 
  1828:     TRSPARSE("state " << path->startStateId <<
  1829:              ", reducing by production " << path->prodIndex <<
  1830:              " (rhsLen=" << rhsLen <<
  1831:              "), back to state " << path->leftEdgeNode->state);
  1832: 
  1833:     ACCOUNTING( nondetReduce++; )
  1834: 
  1835:     // record location of left edge; initially is location of
  1836:     // the lookahead token
  1837:     SOURCELOC( SourceLoc leftEdge = tokenLoc; )
  1838: 
  1839:     // build description of rhs for tracing
  1840:     ACTION(
  1841:       sm_string rhsDescription("");
  1842:       if (rhsLen == 0) {
  1843:         // print something anyway
  1844:         rhsDescription = " empty";
  1845:       }
  1846:     )
  1847: 
  1848:     // before calling the user, duplicate any needed values; this loop
  1849:     // goes from right to left backwards so that 'leftEdge' is
  1850:     // computed properly
  1851:     toPass.ensureIndexDoubler(rhsLen-1);
  1852:     for (int i=rhsLen-1; i >= 0; i--) {
  1853:       SiblingLink *sib = path->sibLinks[i];
  1854: 
  1855:       // we're about to yield sib's 'sval' to the reduction action
  1856:       toPass[i] = sib->sval;
  1857: 
  1858:       // continue building rhs desc
  1859:       ACTION( rhsDescription =
  1860:         sm_stringc << symbolDescription(path->symbols[i], userAct, sib->sval)
  1861:                 << " "
  1862:                 << rhsDescription;
  1863:       )
  1864: 
  1865:       // left edge?  or, have all previous tokens failed to yield
  1866:       // information?
  1867:       SOURCELOC(
  1868:         if (sib->loc != SL_UNKNOWN) {
  1869:           leftEdge = sib->loc;
  1870:         }
  1871:       )
  1872: 
  1873:       // we inform the user, and the user responds with a value
  1874:       // to be kept in this sibling link *instead* of the passed
  1875:       // value; if this link yields a value in the future, it will
  1876:       // be this replacement
  1877:       sib->sval = duplicateSemanticValue(path->symbols[i], sib->sval);
  1878: 
  1879:       YIELD_COUNT( sib->yieldCount++; )
  1880:     }
  1881: 
  1882:     // we've popped the required number of symbols; call the
  1883:     // user's code to synthesize a semantic value by combining them
  1884:     // (TREEBUILD)
  1885:     SemanticValue sval =
  1886:       doReductionAction(path->prodIndex, toPass.getArray()
  1887:                         SOURCELOCARG( leftEdge ) );
  1888: 
  1889:     // emit tracing diagnostics for this reduction
  1890:     ACTION( sm_string lhsDesc =
  1891:               userAct->nonterminalDescription(prodInfo.lhsIndex, sval); )
  1892:     TRSACTION("  " << lhsDesc << " ->" << rhsDescription);
  1893: 
  1894:     // see if the user wants to keep this reduction
  1895:     if (USE_KEEP &&
  1896:         !userAct->keepNontermValue(prodInfo.lhsIndex, sval)) {
  1897:       TRSACTION("    CANCELLED " << lhsDesc);
  1898:     }
  1899:     else {
  1900:       // shift the nonterminal with its reduced semantic value
  1901:       SiblingLink *newLink =
  1902:         rwlShiftNonterminal(path->leftEdgeNode, prodInfo.lhsIndex,
  1903:                             sval  SOURCELOCARG( leftEdge ) );
  1904: 
  1905:       if (newLink) {
  1906:         // for each 'finished' parser ...
  1907:         for (int i=0; i < topmostParsers.length(); i++) {
  1908:           StackNode *parser = topmostParsers[i];
  1909: 
  1910:           // ... do any reduce actions that are now enabled by the new link
  1911:           ActionEntry action =
  1912:             tables->getActionEntry(parser->state, lexerPtr->type);
  1913:           rwlEnqueueReductions(parser, action, newLink);
  1914:         }
  1915:       }
  1916:     }
  1917: 
  1918:     pathQueue.deletePath(path);
  1919:   }
  1920: }
  1921: 
  1922: 
  1923: // shift reduction onto 'leftSibling' parser, 'lhsIndex' says which
  1924: // nonterminal is being shifted; 'sval' is the semantic value of this
  1925: // subtree, and 'loc' is the location of the left edge; return value
  1926: // is the newly added link, if one was added between existing nodes
  1927: // ([GLR] calls this function 'reducer')
  1928: //
  1929: // exactly one of three possible things happens:
  1930: //   - we make a new stack node
  1931: //   - we add a new link between existing stack nodes
  1932: //   - we merge two semantic values onto an existing link
  1933: SiblingLink *GLR::rwlShiftNonterminal(StackNode *leftSibling, int lhsIndex,
  1934:                                       SemanticValue /*owner*/ sval
  1935:                                       SOURCELOCARG( SourceLoc loc ) )
  1936: {
  1937:   // this is like a shift -- we need to know where to go; the
  1938:   // 'goto' table has this information
  1939:   StateId rightSiblingState = tables->decodeGoto(
  1940:     tables->getGotoEntry(leftSibling->state, lhsIndex), lhsIndex);
  1941: 
  1942:   // debugging
  1943:   TRSPARSE("state " << leftSibling->state <<
  1944:            ", shift nonterm " << lhsIndex <<
  1945:            ", to state " << rightSiblingState);
  1946: 
  1947:   // is there already an active parser with this state?
  1948:   StackNode *rightSibling = findTopmostParser(rightSiblingState);
  1949:   if (rightSibling) {
  1950:     // does it already have a sibling link to 'leftSibling'?
  1951:     SiblingLink *sibLink = rightSibling->getLinkTo(leftSibling);
  1952:     if (sibLink) {
  1953:       // we already have a sibling link, so we don't need to add one
  1954: 
  1955:       // +--------------------------------------------------+
  1956:       // | it is here that we are bringing the tops of two  |
  1957:       // | alternative parses together (TREEBUILD)          |
  1958:       // +--------------------------------------------------+
  1959: 
  1960:       // sometimes we are trying to merge dead trees--if the
  1961:       // 'rightSibling' cannot make progress at all, it would be much
  1962:       // better to just drop this alternative than demand the user
  1963:       // merge trees when there is not necessarily any ambiguity
  1964:       if (!canMakeProgress(rightSibling)) {
  1965:         // both trees are dead; deallocate one (the other alternative
  1966:         // will be dropped later, when 'rightSibling' is considered
  1967:         // for action in the usual way)
  1968:         TRSPARSE("avoided a merge by noticing the state was dead");
  1969:         deallocateSemanticValue(rightSibling->getSymbolC(), sval);
  1970:         return NULL;
  1971:       }
  1972: 
  1973:       // remember previous value, for yield count warning
  1974:       YIELD_COUNT(SemanticValue old2 = sibLink->sval);
  1975: 
  1976:       // remember descriptions of the values before they are merged
  1977:       ACTION(
  1978:         sm_string leftDesc = userAct->nonterminalDescription(lhsIndex, sibLink->sval);
  1979:         sm_string rightDesc = userAct->nonterminalDescription(lhsIndex, sval);
  1980:       )
  1981: 
  1982:       // call the user's code to merge, and replace what we have
  1983:       // now with the merged version
  1984:       sibLink->sval =
  1985:         userAct->mergeAlternativeParses(lhsIndex, sibLink->sval, sval  SOURCELOCARG( loc ) );
  1986: 
  1987:       // emit tracing diagnostics for the merge
  1988:       TRSACTION("  " <<
  1989:                 userAct->nonterminalDescription(lhsIndex, sibLink->sval) <<
  1990:                 " is MERGE of " << leftDesc << " and " << rightDesc);
  1991: 
  1992:       YIELD_COUNT(
  1993:         if (sibLink->yieldCount > 0) {
  1994:           // yield-then-merge (YTM) happened
  1995:           yieldThenMergeCt++;
  1996:           SOURCELOC( trace("ytm") << "at " << toString(loc) << std::endl; )
  1997: 
  1998:           // if merging yielded a new semantic value, then we most likely
  1999:           // have a problem; if it yielded the *same* value, then most
  2000:           // likely the user has implemented the 'ambiguity' link soln,
  2001:           // so we're ok
  2002:           if (old2 != sibLink->sval) {
  2003:             std::cout << "warning: incomplete parse forest: " << (void*)old2
  2004:                  << " has already been yielded, but it now has been "
  2005:                  << "merged with " << (void*)sval << " to make "
  2006:                  << (void*)(sibLink->sval) << " (lhsIndex="
  2007:                  << lhsIndex << ")" << std::endl;
  2008:           }
  2009:         }
  2010:       )
  2011: 
  2012:       // ok, done
  2013:       return NULL;
  2014: 
  2015:       // and since we didn't add a link, there is no potential for new
  2016:       // paths
  2017:     }
  2018: 
  2019:     // we get here if there is no suitable sibling link already
  2020:     // existing; so add the link (and keep the ptr for loop below)
  2021:     sibLink = rightSibling->addSiblingLink(leftSibling, sval  SOURCELOCARG( loc ) );
  2022: 
  2023:     // adding a new sibling link may have introduced additional
  2024:     // opportunties to do reductions from parsers we thought
  2025:     // we were finished with.
  2026:     //
  2027:     // what's more, it's not just the parser ('rightSibling') we
  2028:     // added the link to -- if rightSibling's itemSet contains 'A ->
  2029:     // alpha . B beta' and B ->* empty (so A's itemSet also has 'B
  2030:     // -> .'), then we reduced it (if lookahead ok), so
  2031:     // 'rightSibling' now has another left sibling with 'A -> alpha
  2032:     // B . beta'.  We need to let this sibling re-try its reductions
  2033:     // also.
  2034:     //
  2035:     // so, the strategy is to let all 'finished' parsers re-try
  2036:     // reductions, and process those that actually use the just-
  2037:     // added link
  2038: 
  2039:     // TODO: I think this code path is unusual; confirm by measurement
  2040:     // update: it's taken maybe 1 in 10 times through this function..
  2041:     parserMerges++;
  2042: 
  2043:     // we don't have to recompute if nothing else points at
  2044:     // 'rightSibling'; the refct is always at least 1 because we found
  2045:     // it on the "active parsers" worklist
  2046:     if (rightSibling->referenceCount > 1) {
  2047:       // since we added a new link *all* determinDepths might
  2048:       // be compromised; iterating more than once should be very
  2049:       // rare (and this code path should already be unusual)
  2050:       int changes=1, iters=0;
  2051:       while (changes) {
  2052:         changes = 0;
  2053:         for (int i=0; i < topmostParsers.length(); i++) {
  2054:           StackNode *parser = topmostParsers[i];
  2055:           int newDepth = parser->computeDeterminDepth();
  2056:           if (newDepth != parser->determinDepth) {
  2057:             changes++;
  2058:             parser->determinDepth = newDepth;
  2059:           }
  2060:         }
  2061:         iters++;
  2062:         xassert(iters < 1000);    // protect against infinite loop
  2063:         computeDepthIters++;
  2064:       }
  2065:     }
  2066: 
  2067:     // inform the caller that a new sibling link was added
  2068:     return sibLink;
  2069:   }
  2070: 
  2071:   else {
  2072:     // no, there is not already an active parser with this
  2073:     // state.  we must create one; it will become the right
  2074:     // sibling of 'leftSibling'
  2075:     rightSibling = makeStackNode(rightSiblingState);
  2076: 
  2077:     // add the sibling link (and keep ptr for tree stuff)
  2078:     rightSibling->addSiblingLink(leftSibling, sval  SOURCELOCARG( loc ) );
  2079: 
  2080:     // since this is a new parser top, it needs to become a
  2081:     // member of the frontier
  2082:     addTopmostParser(rightSibling);
  2083: 
  2084:     // here, rather than adding something to the parser worklist,
  2085:     // we'll directly expand its reduction paths and add them
  2086:     // to the reduction worklist
  2087:     ActionEntry action =
  2088:       tables->getActionEntry(rightSibling->state, lexerPtr->type);
  2089:     rwlEnqueueReductions(rightSibling, action, NULL /*sibLink*/);
  2090: 
  2091:     // no need for the elaborate re-checking above, since we
  2092:     // just created rightSibling, so no new opportunities
  2093:     // for reduction could have arisen
  2094:     return NULL;
  2095:   }
  2096: }
  2097: 
  2098: 
  2099: // find and enqueue all the reductions that 'parser' can do; 'action'
  2100: // is the parser's action code; we only consider reductions that use
  2101: // 'mustUseLink', if that is not NULL
  2102: //
  2103: // This function will enqueue reduction paths, ordered first by the
  2104: // number of terminals spanned and second by the nonterminal
  2105: // derivability relation on the nonterminal to which the path reduces
  2106: // (if A ->+ B then we will reduce to B before reducing to A, if
  2107: // terminal spans are equal).
  2108: //
  2109: // this function returns the # of actions the parser can take, as
  2110: // part of a rather weak error reporting scheme..
  2111: int GLR::rwlEnqueueReductions(StackNode *parser, ActionEntry action,
  2112:                               SiblingLink *mustUseLink)
  2113: {
  2114:   parser->checkLocalInvariants();
  2115: 
  2116:   if (tables->isShiftAction(action)) {
  2117:     // do nothing, we're only interested in reductions
  2118:     return 1;
  2119:   }
  2120:   else if (tables->isReduceAction(action)) {
  2121:     // reduce
  2122:     int prodIndex = tables->decodeReduce(action, parser->state);
  2123: 
  2124:     // get information about the production we'll use
  2125:     ParseTables::ProdInfo const &info = tables->getProdInfo(prodIndex);
  2126:     int rhsLen = info.rhsLen;
  2127:     xassert(rhsLen >= 0);    // paranoia before using this to control recursion
  2128: 
  2129:     // initialize a prototype Path which will monitor our progress
  2130:     // though the enumeration of all paths
  2131:     ReductionPathQueue::Path *proto =
  2132:       pathQueue.newPath(parser->state, prodIndex, rhsLen);
  2133: 
  2134:     // kick off the recursion
  2135:     rwlRecursiveEnqueue(proto, rhsLen, parser, mustUseLink);
  2136: 
  2137:     // deallocate the prototype
  2138:     pathQueue.deletePath(proto);
  2139: 
  2140:     return 1;
  2141:   }
  2142:   else if (tables->isErrorAction(action)) {
  2143:     // the parser dies, we don't do anything
  2144:     return 0;
  2145:   }
  2146:   else {
  2147:     // ambiguous; check for reductions
  2148:     ActionEntry *entry = tables->decodeAmbigAction(action, parser->state);
  2149:     for (int i=0; i<entry[0]; i++) {
  2150:       rwlEnqueueReductions(parser, entry[i+1], mustUseLink);
  2151:     }
  2152: 
  2153:     return entry[0];
  2154:   }
  2155: }
  2156: 
  2157: 
  2158: // arguments have same meanings as in 'rwlRecursiveEnqueue'
  2159: inline void GLR::rwlCollectPathLink(
  2160:   ReductionPathQueue::Path *proto, int popsRemaining,
  2161:   StackNode *currentNode, SiblingLink *mustUseLink, SiblingLink *linkToAdd)
  2162: {
  2163:   proto->sibLinks[popsRemaining] = linkToAdd;
  2164:   proto->symbols[popsRemaining] = currentNode->getSymbolC();
  2165: 
  2166:   if (linkToAdd == mustUseLink) {
  2167:     rwlRecursiveEnqueue(proto, popsRemaining, linkToAdd->sib,
  2168:                         NULL /*mustUseLink*/);
  2169:   }
  2170:   else {
  2171:     rwlRecursiveEnqueue(proto, popsRemaining, linkToAdd->sib,
  2172:                         mustUseLink);
  2173:   }
  2174: }
  2175: 
  2176: // recursive depth-first enumeration of paths
  2177: void GLR::rwlRecursiveEnqueue(
  2178:   ReductionPathQueue::Path *proto,  // prototype path, with path so far
  2179:   int popsRemaining,                // # of links yet to traverse to find a full path
  2180:   StackNode *currentNode,           // node we're at in the path
  2181:   SiblingLink *mustUseLink)         // link the path must use (if non-NULL)
  2182: {
  2183:   if (popsRemaining == 0) {
  2184:     // we found path of required length
  2185: 
  2186:     // if we have failed to use the required link, ignore this path
  2187:     if (mustUseLink != NULL) {
  2188:       return;
  2189:     }
  2190: 
  2191:     // the prototype path is the one we want; copy it, fill in
  2192:     // the 'startColumn', and insert it into the queue
  2193:     pathQueue.insertPathCopy(proto, currentNode);
  2194:   }
  2195: 
  2196:   else {
  2197:     // explore 'currentNode's siblings
  2198:     rwlCollectPathLink(proto, popsRemaining-1, currentNode, mustUseLink,
  2199:                        &(currentNode->firstSib));
  2200: 
  2201:     // test before dropping into the loop, since profiler reported
  2202:     // some time spent calling VoidListMutator::reset ..
  2203:     if (currentNode->leftSiblings.isNotEmpty()) {
  2204:       FOREACH_OBJLIST_NC(SiblingLink, currentNode->leftSiblings, sibling) {
  2205:         rwlCollectPathLink(proto, popsRemaining-1, currentNode, mustUseLink,
  2206:                            sibling.data());
  2207:       }
  2208:     }
  2209:   }
  2210: }
  2211: 
  2212: 
  2213: // final phase in processing of a token: all topmost parsers
  2214: // shift the current token, if they can
  2215: void GLR::rwlShiftTerminals()
  2216: {
  2217:   NODE_COLUMN( globalNodeColumn++; )
  2218: 
  2219:   // move all the parsers from 'topmostParsers' into 'prevTopmost'
  2220:   xassert(prevTopmost.isEmpty());
  2221:   prevTopmost.swapWith(topmostParsers);
  2222:   xassert(topmostParsers.isEmpty());
  2223: 
  2224:   // to solve the multi-yield problem for tokens, I'll remember
  2225:   // the previously-created sibling link (if any), and dup the
  2226:   // sval in that link as needed
  2227:   SiblingLink *prev = NULL;
  2228: 
  2229:   // foreach node in prevTopmost
  2230:   while (prevTopmost.isNotEmpty()) {
  2231:     // take the node from 'prevTopmost'; the refcount includes both
  2232:     // 'leftSibling' and 'prevTopmost', and then we decrement the
  2233:     // count to reflect that only 'leftSibling' has it
  2234:     RCPtr<StackNode> leftSibling(prevTopmost.pop());
  2235:     xassertdb(leftSibling->referenceCount >= 2);
  2236:     leftSibling->decRefCt();
  2237: 
  2238:     // where can this shift, if anyplace?
  2239:     ActionEntry action =
  2240:       tables->getActionEntry(leftSibling->state, lexerPtr->type);
  2241: 
  2242:     // we'll set this if we find a valid shift dest
  2243:     StateId newState = STATE_INVALID;
  2244: 
  2245:     // consult action table, looking only for shifts
  2246:     if (tables->isShiftAction(action)) {
  2247:       // unambiguous shift
  2248:       newState = tables->decodeShift(action, lexerPtr->type);
  2249:     }
  2250:     else if (tables->isReduceAction(action) ||
  2251:              tables->isErrorAction(action)) {
  2252:       // reduce or error
  2253:       continue;
  2254:     }
  2255:     else {
  2256:       // nondeterministic; get actions
  2257:       ActionEntry *entry = tables->decodeAmbigAction(action, leftSibling->state);
  2258: 
  2259:       // do each one
  2260:       for (int i=0; i<entry[0]; i++) {
  2261:         action = entry[i+1];
  2262:         if (tables->isShiftAction(action)) {
  2263:           // a shift was among the conflicted actions
  2264:           newState = tables->decodeShift(action, lexerPtr->type);
  2265:           break;
  2266:         }
  2267:       }
  2268: 
  2269:       // did we find a shift?
  2270:       if (newState == STATE_INVALID) {
  2271:         continue;    // no
  2272:       }
  2273:     }
  2274: 
  2275:     // found a shift to perform
  2276:     ACCOUNTING( nondetShift++; )
  2277: 
  2278:     // debugging
  2279:     TRSPARSE("state " << leftSibling->state <<
  2280:              ", shift token " << lexerPtr->tokenDesc() <<
  2281:              ", to state " << newState);
  2282: 
  2283:     // if there's already a parser with this state
  2284:     StackNode *rightSibling = findTopmostParser(newState);
  2285:     if (rightSibling != NULL) {
  2286:       // no need to create the node
  2287:     }
  2288: 
  2289:     else {
  2290:       // must make a new stack node
  2291:       rightSibling = makeStackNode(newState);
  2292: 
  2293:       // and add it to the active parsers
  2294:       addTopmostParser(rightSibling);
  2295:     }
  2296: 
  2297:     SemanticValue sval = lexerPtr->sval;
  2298:     if (prev) {
  2299:       // the 'sval' we just grabbed has already been claimed by
  2300:       // 'prev->sval'; get a fresh one by duplicating the latter
  2301:       sval = userAct->duplicateTerminalValue(lexerPtr->type, prev->sval);
  2302: 
  2303:       TRSACTION("  " << userAct->terminalDescription(lexerPtr->type, sval) <<
  2304:                 " is (@lexer) DUP of " <<
  2305:                 userAct->terminalDescription(lexerPtr->type, prev->sval));
  2306:     }
  2307: 
  2308:     // either way, add the sibling link now
  2309:     //TRSACTION("grabbed token sval " << lexerPtr->sval);
  2310:     prev = rightSibling->addSiblingLink(leftSibling, sval
  2311:                                         SOURCELOCARG( lexerPtr->loc ) );
  2312: 
  2313:     // adding this sibling link cannot violate the determinDepth
  2314:     // invariant of some other node, because all of the nodes created
  2315:     // or added-to during shifting do not have anything pointing at
  2316:     // them, so in particular nothing points to 'rightSibling'; a simple
  2317:     // check of this is to check the reference count and verify it is 1,
  2318:     // the 1 being for the 'topmostParsers' list it is on
  2319:     xassert(rightSibling->referenceCount == 1);
  2320:   }
  2321: }
  2322: 
  2323: 
  2324: // ------------------ stuff for outputting raw graphs ------------------
  2325: #if 0   // disabled for now
  2326: // name for graphs (can't have any spaces in the name)
  2327: sm_string stackNodeName(StackNode const *sn)
  2328: {
  2329:   Symbol const *s = sn->getSymbolC();
  2330:   char const *symName = (s? s->name.pcharc() : "(null)");
  2331:   return sm_stringb(sn->stackNodeId
  2332:               << ":col="  << sn->tokenColumn
  2333:               << ",st=" << sn->state->id
  2334:               << ",sym=" << symName);
  2335: }
  2336: 
  2337: // name for rules; 'rn' is the 'ruleNo'-th rule in 'sn'
  2338: // (again, no spaces allowed)
  2339: sm_string reductionName(StackNode const *sn, int ruleNo, Reduction const *red)
  2340: {
  2341:   return sm_stringb(sn->stackNodeId << "/" << ruleNo << ":"
  2342:               << replace(red->production->toString(), " ", "_"));
  2343: }
  2344: 
  2345: 
  2346: // this prints the graph in my java graph applet format, where
  2347: // nodes lines look like
  2348: //   n <name> <optional-desc>
  2349: // and edges look like
  2350: //   e <from> <to>
  2351: // unfortunately, the graph applet needs a bit of work before it
  2352: // is worthwhile to use this routinely (though it's great for
  2353: // quickly verifying a single (small) parse)
  2354: //
  2355: // however, it's worth noting that the text output is not entirely
  2356: // unreadable...
  2357: void GLR::writeParseGraph(char const *fname) const
  2358: {
  2359:   FILE *out = fopen(sm_stringb("graphs/" << fname), "w");
  2360:   if (!out) {
  2361:     xsyserror("fopen", sm_stringb("opening file `graphs/" << fname << "'"));
  2362:   }
  2363: 
  2364:   // header info
  2365:   fprintf(out, "# parse graph file: %s\n", fname);
  2366:   fprintf(out, "# automatically generated\n"
  2367:                "\n");
  2368: 
  2369:   #if 0    // can't do anymore because allStackNodes is gone ...
  2370:   // for each stack node
  2371:   FOREACH_OBJLIST(StackNode, allStackNodes, stackNodeIter) {
  2372:     StackNode const *stackNode = stackNodeIter.data();
  2373:     sm_string myName = stackNodeName(stackNode);
  2374: 
  2375:     // visual delimiter
  2376:     fputs(sm_stringb("\n# ------ node: " << myName << " ------\n"), out);
  2377: 
  2378:     // write info for the node itself
  2379:     fputs(sm_stringb("n " << myName << "\n\n"), out);
  2380: 
  2381:     // for all sibling links
  2382:     int ruleNo=0;
  2383:     FOREACH_OBJLIST(SiblingLink, stackNode->leftSiblings, sibIter) {
  2384:       SiblingLink const *link = sibIter.data();
  2385: 
  2386:       // write the sibling link
  2387:       fputs(sm_stringb("e " << myName << " "
  2388:                          << stackNodeName(link->sib) << "\n"), out);
  2389: 
  2390:       // ideally, we'd attach the reduction nodes directly to the
  2391:       // sibling edge.  however, since I haven't developed the
  2392:       // graph applet far enough for that, I'll instead attach it
  2393:       // to the stack node directly..
  2394: 
  2395:       if (link->treeNode->isNonterm()) {
  2396:         // for each reduction node
  2397:         FOREACH_OBJLIST(Reduction, link->treeNode->asNonterm().reductions,
  2398:                         redIter) {
  2399:           Reduction const *red = redIter.data();
  2400:           ruleNo++;
  2401: 
  2402:           sm_string ruleName = reductionName(stackNode, ruleNo, red);
  2403: 
  2404:           // write info for the rule node
  2405:           fputs(sm_stringb("n " << ruleName << "\n"), out);
  2406: 
  2407:           // put the link from the stack node to the rule node
  2408:           fputs(sm_stringb("e " << myName << " " << ruleName << "\n"), out);
  2409: 
  2410:           // write all child links
  2411:           // ACK!  until my graph format is better, this is almost impossible
  2412:           #if 0
  2413:           SFOREACH_OBJLIST(StackNode, rule->children, child) {
  2414:             fputs(sm_stringb("e " << ruleName << " "
  2415:                                << stackNodeName(child.data()) << "\n"), out);
  2416:           }
  2417:           #endif // 0
  2418: 
  2419:           // blank line for visual separation
  2420:           fputs("\n", out);
  2421:         } // for each reduction
  2422:       } // if is nonterminal
  2423:     } // for each sibling
  2424:   } // for each stack node
  2425:   #endif // 0
  2426: 
  2427:   // done
  2428:   if (fclose(out) != 0) {
  2429:     xsyserror("fclose");
  2430:   }
  2431: }
  2432: #endif // 0
  2433: 
  2434: 
  2435: // --------------------- testing ------------------------
  2436: // read an entire file into a single sm_string
  2437: // currenty is *not* pipe-frendly because it must seek
  2438: // (candidate for adding to 'str' module)
  2439: sm_string readFileIntoString(char const *fname)
  2440: {
  2441:   // open file
  2442:   FILE *fp = fopen(fname, "r");
  2443:   if (!fp) {
  2444:     xsyserror("fopen", sm_stringb("opening `" << fname << "' for reading"));
  2445:   }
  2446: 
  2447:   // determine file's length
  2448:   if (fseek(fp, 0, SEEK_END) < 0) {
  2449:     xsyserror("fseek");
  2450:   }
  2451:   int len = (int)ftell(fp);      // conceivably problematic cast..
  2452:   if (len < 0) {
  2453:     xsyserror("ftell");
  2454:   }
  2455:   if (fseek(fp, 0, SEEK_SET) < 0) {
  2456:     xsyserror("fseek");
  2457:   }
  2458: 
  2459:   // allocate a sufficiently large buffer
  2460:   sm_string ret(len);
  2461: 
  2462:   // read the file into that buffer
  2463:   if (fread(ret.pchar(), 1, len, fp) < (size_t)len) {
  2464:     xsyserror("fread");
  2465:   }
  2466: 
  2467:   // close file
  2468:   if (fclose(fp) < 0) {
  2469:     xsyserror("fclose");
  2470:   }
  2471: 
  2472:   // return the new sm_string
  2473:   return ret;
  2474: }
  2475: 
  2476: 
  2477: // EOF
End cpp section to elk/elk_glr.cpp[1]
Start data section to elk/elk_gramanl.cxx[1 /1 ]
     1: // gramanl.cc            see license.txt for copyright and terms of use
     2: // code for gramanl.h
     3: 
     4: #include "elk_gramanl.h"
     5: 
     6: #include "sm_bit2d.h"
     7: #include "sm_bitarray.h"
     8: #include "sm_strtokp.h"
     9: #include "sm_syserr.h"
    10: #include "sm_trace.h"
    11: #include "sm_nonport.h"
    12: #include "sm_crc.h"
    13: #include "elk_flatutil.h"
    14: #include "elk_grampar.h"
    15: #include "elk_emitcode.h"
    16: #include "sm_strutil.h"
    17: #include "sm_ckheap.h"
    18: #include "elk_genml.h"
    19: 
    20: #include <fstream>     // std::ofstream
    21: #include <stdlib.h>      // getenv
    22: #include <stdio.h>       // printf
    23: 
    24: // for ParseTables::emitConstructionCode:
    25: //   linkdepend: emittables.cc
    26: 
    27: 
    28: // for now, we'll just have these be global variables; if I later
    29: // decide I actually want more than one at a time, I can move these
    30: // into GrammarAnalysis and push the interfaces to accomodate
    31: 
    32: // NOTE: only LALR(1) has been recently tested; in particular I
    33: // know that LR(1) is broken (3/26/02)
    34: 
    35: // LR(0) does all reductions, regardless of what the next token is
    36: static bool const LR0 = false;
    37: 
    38: // SLR(1) looks at a production's LHS's Follow
    39: static bool const SLR1 = false;
    40: 
    41: // LR(1) computes context-sensitive follow for each item,
    42: // depending on how that item arises in the item-set DFA
    43: static bool const LR1 = false;
    44: 
    45: // LALR(1) is like LR(1), except two states are merged if
    46: // they only differ in their items' lookaheads (so it has
    47: // the same # of states as SLR(1), while having some of the
    48: // context-sensitivity of LR(1))
    49: static bool const LALR1 = true;
    50: 
    51: 
    52: #if !defined(NDEBUG)     // track unauthorized malloc's
    53:   #define TRACK_MALLOC
    54: #endif
    55: 
    56: #ifdef TRACK_MALLOC
    57:   // take initial snapsot
    58:   #define INITIAL_MALLOC_STATS() \
    59:     unsigned mallocCt = numMallocCalls();
    60: 
    61:   // nothing should have been allocated recently; if it has, then
    62:   // print a warning
    63:   #define CHECK_MALLOC_STATS(desc)                                              \
    64:     {                                                                           \
    65:       unsigned newCt = numMallocCalls();                                        \
    66:       if (mallocCt != newCt) {                                                  \
    67:         std::cout << (newCt - mallocCt) << " malloc calls during " << desc << std::endl;  \
    68:         mallocCt = newCt;                                                       \
    69:         breaker();                                                              \
    70:       }                                                                         \
    71:     }
    72: 
    73:   // some unavoidable allocation just happened, so just update counter
    74:   #define UPDATE_MALLOC_STATS() \
    75:     mallocCt = numMallocCalls();
    76: #else
    77:   #define INITIAL_MALLOC_STATS()
    78:   #define CHECK_MALLOC_STATS(desc)
    79:   #define UPDATE_MALLOC_STATS()
    80: #endif
    81: 
    82: 
    83: // ----------------- DottedProduction ------------------
    84: #if 0    // used?
    85: DottedProduction::DottedProduction(DottedProduction const &obj)
    86: {
    87:   prod = obj.prod;
    88:   dot = obj.dot;
    89:   afterDot = obj.afterDot;
    90:   firstSet = obj.firstSet;
    91:   canDeriveEmpty = obj.canDeriveEmpty;
    92: }
    93: #endif // 0
    94: 
    95: 
    96: DottedProduction::DottedProduction()
    97: {
    98:   init();
    99: }
   100: 
   101: void DottedProduction::init()
   102: {
   103:   prod = NULL;
   104:   dot = -1;
   105:   afterDot = NULL;
   106:   canDeriveEmpty = false;
   107:   backPointer = NULL;
   108: }
   109: 
   110: 
   111: DottedProduction::~DottedProduction()
   112: {}
   113: 
   114: 
   115: // arbitrary integer unique to every symbol and preserved
   116: // across read/write
   117: int symbolIndex(Symbol const *s)
   118: {
   119:   if (s->isTerminal()) {
   120:     // make terminals negative since otherwise they'd
   121:     // collide with nonterminals
   122:     return -( s->asTerminalC().termIndex );
   123:   }
   124:   else {
   125:     return s->asNonterminalC().ntIndex;
   126:   }
   127: }
   128: 
   129: 
   130: #if 0
   131: bool DottedProduction::isEqual(DottedProduction const &obj) const
   132: {
   133:   return dot == obj.dot &&
   134:          prod == obj.prod;
   135: }
   136: #endif // 0
   137: 
   138: 
   139: void DottedProduction::setProdAndDot(Production const *p, int d)
   140: {
   141:   prod = p;
   142:   dot = d;
   143: 
   144:   // computing this each time turned out to be significant
   145:   // according to the profiler, so we store it instead
   146:   bool dotAtEnd = (dot == prod->rhsLength());
   147:   afterDot = dotAtEnd? NULL : prod->right.nthC(dot)->sym;
   148: }
   149: 
   150: Symbol const *DottedProduction::symbolBeforeDotC() const
   151: {
   152:   xassert(!isDotAtStart());
   153:   return prod->right.nthC(dot-1)->sym;
   154: }
   155: 
   156: #if 0
   157: Symbol const *DottedProduction::symbolAfterDotC() const
   158: {
   159:   xassert(!isDotAtEnd());
   160:   return prod->right.nthC(dot)->sym;
   161: }
   162: #endif // 0
   163: 
   164: 
   165: void DottedProduction::print(std::ostream &os) const
   166: {
   167:   os << prod->left->name << " ->";
   168: 
   169:   int position = 0;
   170:   for (ObjListIter<Production::RHSElt> iter(prod->right);
   171:        !iter.isDone(); iter.adv(), position++) {
   172:     if (position == dot) {
   173:       os << " .";
   174:     }
   175:     os << " " << iter.data()->sym->toString();
   176:   }
   177:   if (position == dot) {
   178:     os << " .";
   179:   }
   180: }
   181: 
   182: 
   183: // ---------------------- LRItem -------------------
   184: LRItem::LRItem(int numTerms, DottedProduction const *dp)
   185:   : dprod(dp),
   186:     lookahead(numTerms)
   187: {}
   188: 
   189: LRItem::LRItem(LRItem const &obj)
   190:   : dprod(obj.dprod),
   191:     lookahead(obj.lookahead)
   192: {}
   193: 
   194: LRItem::~LRItem()
   195: {}
   196: 
   197: LRItem::LRItem(Flatten &flat)
   198:   : dprod(NULL),
   199:     lookahead(flat)
   200: {}
   201: 
   202: void LRItem::xfer(Flatten &flat)
   203: {
   204:   lookahead.xfer(flat);
   205: }
   206: 
   207: void LRItem::xferSerfs(Flatten &flat, GrammarAnalysis &g)
   208: {
   209:   if (flat.writing()) {
   210:     flat.writeInt(prodIndex());
   211:     flat.writeInt(getDot());
   212:   }
   213:   else {
   214:     // originally had these directly in the argument list,
   215:     // but order of eval is undefined!
   216:     int idx = flat.readInt();
   217:     int d = flat.readInt();
   218:     dprod = g.getDProdIndex(idx, d);
   219:   }
   220: }
   221: 
   222: 
   223: // compare two items in an arbitrary (but deterministic) way so that
   224: // sorting will always put a list of items into the same order, for
   225: // comparison purposes; this doesn't consider the lookahead
   226: STATICDEF int LRItem::diff(LRItem const *a, LRItem const *b, void*)
   227: {
   228:   // check the prodIndex first
   229:   int ret = a->prodIndex() - b->prodIndex();
   230:   if (ret) { return ret; }
   231: 
   232:   // 'dot'
   233:   ret = a->getDot() - b->getDot();
   234:   return ret;
   235: }
   236: 
   237: 
   238: bool firstIncludes(Symbol const *sym, Terminal const *t)
   239: {
   240:   if (sym->isTerminal()) {
   241:     return sym == t;
   242:   }
   243:   else {
   244:     // this generalizes 'isExtendingShift'.. and while this did help
   245:     // eliminate one S/R in a grammar I was working on, there were
   246:     // others that could not be eliminated at all (they were not
   247:     // statically decidable), so this generalization might not be
   248:     // useful afterall
   249:     return sym->asNonterminalC().first.contains(t->termIndex);
   250:   }
   251: }
   252: 
   253: bool LRItem::isExtendingShift(Nonterminal const *A, Terminal const *t) const
   254: {
   255:   return !dprod->isDotAtEnd() &&                      // shift
   256:          dprod->getProd()->left == A &&               // extending A
   257:          firstIncludes(dprod->symbolAfterDotC(), t);  // with t
   258: }
   259: 
   260: 
   261: void LRItem::print(std::ostream &os, GrammarAnalysis const &g) const
   262: {
   263:   dprod->print(os);
   264:   lookahead.print(os, g);      // prints the separating comma, if necessary
   265: }
   266: 
   267: 
   268: // ----------------- ItemSet -------------------
   269: ItemSet::ItemSet(StateId anId, int numTerms, int numNonterms)
   270:   : kernelItems(),
   271:     nonkernelItems(),
   272:     termTransition(NULL),      // inited below
   273:     nontermTransition(NULL),   // inited below
   274:     terms(numTerms),
   275:     nonterms(numNonterms),
   276:     dotsAtEnd(NULL),
   277:     numDotsAtEnd(0),
   278:     stateSymbol(NULL),
   279:     id(anId),
   280:     BFSparent(NULL)
   281: {
   282:   allocateTransitionFunction();
   283: }
   284: 
   285: void ItemSet::allocateTransitionFunction()
   286: {
   287:   termTransition = new ItemSet* [terms];
   288:   nontermTransition = new ItemSet* [nonterms];
   289: 
   290:   INTLOOP(t, 0, terms) {
   291:     termTransition[t] = (ItemSet*)NULL;      // means no transition on t
   292:   }
   293:   INTLOOP(n, 0, nonterms) {
   294:     nontermTransition[n] = (ItemSet*)NULL;
   295:   }
   296: }
   297: 
   298: 
   299: ItemSet::~ItemSet()
   300: {
   301:   delete[] termTransition;
   302:   delete[] nontermTransition;
   303: 
   304:   if (dotsAtEnd) {
   305:     delete[] dotsAtEnd;
   306:   }
   307: }
   308: 
   309: 
   310: ItemSet::ItemSet(Flatten &flat)
   311:   : termTransition(NULL),
   312:     nontermTransition(NULL),
   313:     dotsAtEnd(NULL),
   314:     numDotsAtEnd(0),
   315:     stateSymbol(NULL),
   316:     BFSparent(NULL)
   317: {}
   318: 
   319: 
   320: Production *getNthProduction(Grammar *g, int n)
   321: {
   322:   if (0 <= n && n < g->productions.count()) {
   323:     return g->productions.nth(n);
   324:   }
   325:   else {
   326:     // my access path functions' contract is to
   327:     // return NULL on any error (as opposed to, say,
   328:     // an exception or assertion failure); this serves two
   329:     // purposes:
   330:     //   - the writing code can use it to determine the
   331:     //     maximum value of 'n'
   332:     //   - the reading code can use it to validate 'n',
   333:     //     since that comes from the input file
   334:     return NULL;
   335:   }
   336: }
   337: 
   338: #if 0    // not needed, doesn't work
   339: DottedProduction *getNthDottedProduction(Production *p, int n)
   340: {
   341:   if (0 <= n && n < (p->rhsLength() + 1)) {
   342:     return p->getDProd(n);
   343:   }
   344:   else {
   345:     return NULL;
   346:   }
   347: }
   348: #endif // 0
   349: 
   350: 
   351: void ItemSet::xfer(Flatten &flat)
   352: {
   353:   xferObjList(flat, kernelItems);
   354:   xferObjList(flat, nonkernelItems);
   355: 
   356:   flat.xferInt(terms);
   357:   flat.xferInt(nonterms);
   358: 
   359:   // numDotsAtEnd and kernelItemsCRC are computed from
   360:   // other data
   361:   // NEW: but computing them requires the items, which I'm omitting
   362: 
   363:   flat.xferInt(numDotsAtEnd);
   364:   flat.xferLong((long&)kernelItemsCRC);
   365: 
   366:   flat.xferInt((int&)id);
   367: }
   368: 
   369: 
   370: int ticksComputeNonkernel = 0;
   371: 
   372: void ItemSet::xferSerfs(Flatten &flat, GrammarAnalysis &g)
   373: {
   374:   // xfer the 'prod' fields of the items
   375:   {
   376:     MUTATE_EACH_OBJLIST(LRItem, kernelItems, k) {
   377:       k.data()->xferSerfs(flat, g);
   378:     }
   379:     MUTATE_EACH_OBJLIST(LRItem, nonkernelItems, n) {
   380:       n.data()->xferSerfs(flat, g);
   381:     }
   382:   }
   383: 
   384: 
   385:   #if 0
   386:     // 'kernelItems' and 'nonkernelItems': each one accessed as
   387:     //   g.productions.nth(???)->getDProd(???)
   388:     xferSObjList_twoLevelAccess(
   389:       flat,
   390:       kernelItems,               // serf list
   391:       static_cast<Grammar*>(&g), // root of access path
   392:       getNthProduction,          // first access path link
   393:       getNthDottedProduction);   // second access path link
   394: 
   395:     #if 1
   396:       xferSObjList_twoLevelAccess(
   397:         flat,
   398:         nonkernelItems,            // serf list
   399:         static_cast<Grammar*>(&g), // root of access path
   400:         getNthProduction,          // first access path link
   401:         getNthDottedProduction);   // second access path link
   402:     #else
   403:       // instead of the above, let's try computing the nonkernel items
   404:       if (flat.reading()) {
   405:         int start = getMilliseconds();
   406:         g.itemSetClosure(*this);
   407:         ticksComputeNonkernel += (getMilliseconds() - start);
   408:       }
   409:     #endif
   410:   #endif // 0
   411: 
   412:   // these need to be sorted for 'changedItems'; but since
   413:   // we're sorting by *address*, that's not necessarily
   414:   // preserved across read/write
   415:   // NEW: it should be stable now
   416:   //kernelItems.insertionSort(LRItem::diff);
   417: 
   418: 
   419:   // transition functions
   420:   if (flat.reading()) {
   421:     allocateTransitionFunction();
   422:   }
   423:   INTLOOP(t, 0, terms) {
   424:     //xferNullableSerfPtrToList(flat, termTransition[t], g.itemSets);
   425:     xferNullableSerfPtr(flat, termTransition[t]);
   426:   }
   427:   INTLOOP(n, 0, nonterms) {
   428:     //xferNullableSerfPtrToList(flat, nontermTransition[n], g.itemSets);
   429:     xferNullableSerfPtr(flat, nontermTransition[n]);
   430:   }
   431: 
   432: 
   433:   // dotsAtEnd, numDotsAtEnd, kernelItemsCRC
   434:   //if (flat.reading()) {
   435:   //  changedItems();
   436:   //}
   437: 
   438:   if (flat.reading()) {
   439:     dotsAtEnd = new LRItem const * [numDotsAtEnd];
   440:   }
   441:   INTLOOP(p, 0, numDotsAtEnd) {
   442:     #if 0
   443:     xferSerfPtr_twoLevelAccess(
   444:       flat,
   445:       const_cast<LRItem*&>(dotsAtEnd[p]),   // serf
   446:       static_cast<Grammar*>(&g), // root of access path
   447:       getNthProduction,          // first access path link
   448:       getNthDottedProduction);   // second access path link
   449:     #endif // 0
   450:     xferSerfPtr(flat, dotsAtEnd[p]);
   451:   }
   452: 
   453:   xferNullableSerfPtr(flat, stateSymbol);
   454: 
   455:   xferNullableSerfPtrToList(flat, BFSparent, g.itemSets);
   456: }
   457: 
   458: 
   459: Symbol const *ItemSet::computeStateSymbolC() const
   460: {
   461:   // need only check kernel items since all nonkernel items
   462:   // have their dots at the left side
   463:   FOREACH_OBJLIST(LRItem, kernelItems, item) {
   464:     if (! item.data()->isDotAtStart() ) {
   465:       return item.data()->symbolBeforeDotC();
   466:     }
   467:   }
   468:   return NULL;
   469: }
   470: 
   471: 
   472: int ItemSet::bcheckTerm(int index) const
   473: {
   474:   xassert(0 <= index && index < terms);
   475:   return index;
   476: }
   477: 
   478: int ItemSet::bcheckNonterm(int index) const
   479: {
   480:   xassert(0 <= index && index < nonterms);
   481:   return index;
   482: }
   483: 
   484: ItemSet *&ItemSet::refTransition(Symbol const *sym)
   485: {
   486:   if (sym->isTerminal()) {
   487:     Terminal const &t = sym->asTerminalC();
   488:     return termTransition[bcheckTerm(t.termIndex)];
   489:   }
   490:   else {
   491:     Nonterminal const &nt = sym->asNonterminalC();
   492:     return nontermTransition[bcheckNonterm(nt.ntIndex)];
   493:   }
   494: }
   495: 
   496: 
   497: ItemSet const *ItemSet::transitionC(Symbol const *sym) const
   498: {
   499:   return const_cast<ItemSet*>(this)->refTransition(sym);
   500: }
   501: 
   502: 
   503: void ItemSet::setTransition(Symbol const *sym, ItemSet *dest)
   504: {
   505:   refTransition(sym) = dest;
   506: }
   507: 
   508: 
   509: void ItemSet::removeShift(Terminal const *sym)
   510: {
   511:   refTransition(sym) = NULL;
   512: }
   513: 
   514: 
   515: void ItemSet::addKernelItem(LRItem *item)
   516: {
   517:   // add it
   518:   kernelItems.appendUnique(item);
   519: }
   520: 
   521: 
   522: void ItemSet::sortKernelItems()
   523: {
   524:   // sort the items to facilitate equality checks
   525:   kernelItems.mergeSort(LRItem::diff);
   526: 
   527:   // note: the caller must call changedItems
   528: }
   529: 
   530: 
   531: bool ItemSet::operator==(ItemSet const &obj) const
   532: {
   533:   // since common case is disequality, check the
   534:   // CRCs first, and only do full check if they
   535:   // match
   536:   if (kernelItemsCRC == obj.kernelItemsCRC) {
   537:     // since nonkernel items are entirely determined by kernel
   538:     // items, and kernel items are sorted, it's sufficient to
   539:     // check for kernel list equality
   540:     // OLD: when pointer equality was sufficient
   541:     //   return kernelItems.equalAsPointerLists(obj.kernelItems);
   542:     // NEW: use deep equality check
   543:     return kernelItems.equalAsLists(obj.kernelItems, LRItem::diff);
   544:   }
   545:   else {
   546:     // can't possibly be equal if CRCs differ
   547:     return false;
   548:   }
   549: }
   550: 
   551: 
   552: void ItemSet::addNonkernelItem(LRItem *item)
   553: {
   554:   nonkernelItems.appendUnique(item);
   555: 
   556:   // note: the caller is supposed to call changedItems
   557: }
   558: 
   559: 
   560: void ItemSet::removeReduce(Production const *prod, Terminal const *sym)
   561: {
   562:   MUTATE_EACH_OBJLIST(LRItem, kernelItems, k) {
   563:     if (k.data()->isDotAtEnd() &&
   564:         k.data()->getProd() == prod) {
   565:       k.data()->laRemove(sym->termIndex);
   566:     }
   567:   }
   568: 
   569:   MUTATE_EACH_OBJLIST(LRItem, nonkernelItems, n) {
   570:     if (n.data()->isDotAtEnd() &&
   571:         n.data()->getProd() == prod) {
   572:       n.data()->laRemove(sym->termIndex);
   573:     }
   574:   }
   575: 
   576:   #if 0
   577:   ObjListMutator<LRItem> k(kernelItems);
   578:   while (!k.isDone()) {
   579:     if (k.data()->isDotAtEnd() &&
   580:         k.data()->getProd() == prod) {
   581:       k.deleteIt();
   582:     }
   583:     else {
   584:       k.adv();
   585:     }
   586:   }
   587: 
   588:   changedItems();
   589:   #endif // 0
   590: }
   591: 
   592: 
   593: void ItemSet::getAllItems(SObjList<LRItem> &dest, bool nonkernel) const
   594: {
   595:   SObjListMutator<LRItem> mut(dest);
   596: 
   597:   FOREACH_OBJLIST(LRItem, kernelItems, k) {
   598:     mut.append(const_cast<LRItem*>(k.data()));
   599:   }
   600:   if (nonkernel) {
   601:     FOREACH_OBJLIST(LRItem, nonkernelItems, n) {
   602:       mut.append(const_cast<LRItem*>(n.data()));
   603:     }
   604:   }
   605: }
   606: 
   607: 
   608: STATICDEF int ItemSet::diffById(ItemSet const *left, ItemSet const *right, void*)
   609: {
   610:   return left->id - right->id;
   611: }
   612: 
   613: 
   614: void ItemSet::throwAwayItems()
   615: {
   616:   // can't delete the whole lists because I need the
   617:   // reductions; among other things, 'dotsAtEnd' refers to them
   618:   deleteNonReductions(kernelItems);
   619:   deleteNonReductions(nonkernelItems);
   620: }
   621: 
   622: void ItemSet::deleteNonReductions(ObjList<LRItem> &list)
   623: {
   624:   ObjListMutator<LRItem> mut(list);
   625:   while (!mut.isDone()) {
   626:     if (mut.data()->isDotAtEnd()) {
   627:       // keep it
   628:       mut.adv();
   629:     }
   630:     else {
   631:       // trash it
   632:       mut.deleteIt();     // also advances
   633:     }
   634:   }
   635: }
   636: 
   637: 
   638: // return the reductions that are ready in this state, given
   639: // that the next symbol is 'lookahead'
   640: void ItemSet::getPossibleReductions(ProductionList &reductions,
   641:                                     Terminal const *lookahead,
   642:                                     bool parsing) const
   643: {
   644:   // for each item with dot at end
   645:   loopi(numDotsAtEnd) {
   646:     LRItem const *item = dotsAtEnd[i];
   647: 
   648:     if (LR0) {
   649:       // don't check the lookahead
   650:     }
   651:     else if (SLR1) {
   652:       // the follow of its LHS must include 'lookahead'
   653:       if (!item->getProd()->left->follow.contains(lookahead->termIndex)) {    // (constness)
   654:         if (parsing && tracingSys("parse")) {
   655:           trace("parse") << "state " << id
   656:                          << ", not reducing by "
   657:                          << item->getProd()->toString(false /*printType*/)
   658:                          << " because " << lookahead->toString()
   659:                          << " is not in follow of "
   660:                          << item->getProd()->left->name << std::endl;
   661:         }
   662:         continue;
   663:       }
   664:     }
   665:     else if (LALR1 || LR1) {
   666:       // the item's lookahead must include 'lookahead'
   667:       if (!item->laContains(lookahead->termIndex)) {
   668:         if (parsing && tracingSys("parse")) {
   669:           trace("parse") << "state " << id
   670:                          << ", not reducing by "
   671:                          << item->getProd()->toString(false /*printType*/)
   672:                          << " because " << lookahead->toString()
   673:                          << " is not in lookahead" << std::endl;
   674:         }
   675:         continue;
   676:       }
   677:     }
   678:     else {
   679:       xfailure("no LR variant specified?");
   680:     }
   681: 
   682:     // ok, this one's ready
   683:     reductions.append(const_cast<Production*>(item->getProd()));       // (constness)
   684:   }
   685: }
   686: 
   687: 
   688: bool ItemSet::mergeLookaheadsInto(ItemSet &dest) const
   689: {
   690:   // will return true if any changes made
   691:   bool changes = false;
   692: 
   693:   // iterate over both kernel lists simultaneously
   694:   ObjListIter<LRItem> srcIter(kernelItems);
   695:   ObjListMutator<LRItem> destIter(dest.kernelItems);
   696:   while (!srcIter.isDone() && !destIter.isDone()) {
   697:     LRItem const &srcItem = *(srcIter.data());
   698:     LRItem &destItem = *(destIter.data());
   699: 
   700:     // the caller should already have established equality of the
   701:     // non-lookahead components of the kernel items
   702:     xassert(srcItem.equalNoLA(destItem));
   703: 
   704:     // merge lookaheads
   705:     if (destItem.laMerge(srcItem)) {
   706:       changes = true;
   707:     }
   708: 
   709:     srcIter.adv();
   710:     destIter.adv();
   711:   }
   712: 
   713:   // kernel list lengths are supposed to be the same
   714:   xassert(srcIter.isDone() && destIter.isDone());
   715: 
   716:   return changes;
   717: }
   718: 
   719: 
   720: bool ItemSet::hasExtendingShift(Nonterminal const *A, Terminal const *t) const
   721: {
   722:   FOREACH_OBJLIST(LRItem, kernelItems, iter1) {
   723:     if (iter1.data()->isExtendingShift(A, t)) { return true; }
   724:   }
   725:   FOREACH_OBJLIST(LRItem, nonkernelItems, iter2) {
   726:     if (iter2.data()->isExtendingShift(A, t)) { return true; }
   727:   }
   728:   return false;
   729: }
   730: 
   731: 
   732: Production const *ItemSet::getFirstReduction() const
   733: {
   734:   xassert(numDotsAtEnd >= 1);
   735:   return dotsAtEnd[0]->getProd();
   736: }
   737: 
   738: 
   739: void ItemSet::changedItems()
   740: {
   741:   // -- recompute dotsAtEnd --
   742:   // collect all items
   743:   SObjList<LRItem> items;      // (constness) 'items' shouldn't be used to modify the elements
   744:   getAllItems(items);
   745: 
   746:   // count number with dots at end
   747:   int count = 0;
   748:   {
   749:     SFOREACH_OBJLIST(LRItem, items, itemIter) {
   750:       LRItem const *item = itemIter.data();
   751: 
   752:       if (item->isDotAtEnd()) {
   753:         count++;
   754:       }
   755:     }
   756:   }
   757: 
   758:   // get array of right size
   759:   if (dotsAtEnd  &&  count == numDotsAtEnd) {
   760:     // no need to reallocate, already correct size
   761:   }
   762:   else {
   763:     // throw old away
   764:     if (dotsAtEnd) {
   765:       delete[] dotsAtEnd;
   766:     }
   767: 
   768:     // allocate new array
   769:     numDotsAtEnd = count;
   770:     dotsAtEnd = new LRItem const * [numDotsAtEnd];
   771:   }
   772: 
   773:   // fill array
   774:   int index = 0;
   775:   SFOREACH_OBJLIST(LRItem, items, itemIter) {
   776:     LRItem const *item = itemIter.data();
   777: 
   778:     if (item->isDotAtEnd()) {
   779:       dotsAtEnd[index] = item;
   780:       index++;
   781:     }
   782:   }
   783: 
   784:   // verify both loops executed same number of times
   785:   xassert(index == count);
   786: 
   787:   // compute CRC; in this function, I just allocate here since this
   788:   // function is already allocation-happy
   789:   GrowArray<DottedProduction const*> array(0 /*allocate later*/);
   790:   computeKernelCRC(array);
   791: 
   792:   // compute this so we can throw away items later if we want to
   793:   stateSymbol = computeStateSymbolC();
   794: }
   795: 
   796: 
   797: void ItemSet::computeKernelCRC(GrowArray<DottedProduction const*> &array)
   798: {
   799:   int numKernelItems = kernelItems.count();
   800: 
   801:   // expand as necessary, but don't get smaller
   802:   array.ensureAtLeast(numKernelItems);
   803: 
   804:   // we will crc the prod/dot fields, using the pointer representation
   805:   // of 'dprod'; assumes the items have already been sorted!
   806:   int index = 0;
   807:   FOREACH_OBJLIST(LRItem, kernelItems, kitem) {
   808:     array[index] = kitem.data()->dprod;
   809:     index++;
   810:   }
   811: 
   812:   // CRC the buffer
   813:   kernelItemsCRC = crc32((unsigned char const*)(array.getArray()),
   814:                          sizeof(array[0]) * numKernelItems);
   815: }
   816: 
   817: 
   818: void ItemSet::print(std::ostream &os, GrammarAnalysis const &g,
   819:                     bool nonkernel) const
   820: {
   821:   os << "ItemSet " << id << ":\n";
   822: 
   823:   // collect all items
   824:   SObjList<LRItem> items;     // (constness) don't use 'item' to modify elements
   825:   getAllItems(items, nonkernel);
   826: 
   827:   // for each item
   828:   SFOREACH_OBJLIST(LRItem, items, itemIter) {
   829:     LRItem const *item = itemIter.data();
   830: 
   831:     // print its text
   832:     os << "  ";
   833:     item->print(os, g);
   834:     os << "      ";
   835: 
   836:     // print any transitions on its after-dot symbol
   837:     if (!item->isDotAtEnd()) {
   838:       ItemSet const *is = transitionC(item->symbolAfterDotC());
   839:       if (is == NULL) {
   840:         // this happens if I print the item set before running closure,
   841:         // and also after prec/assoc disambiguation
   842:         os << "(no transition)";
   843:       }
   844:       else {
   845:         os << "--> " << is->id;
   846:       }
   847:     }
   848:     os << std::endl;
   849:   }
   850: 
   851:   // print transition function directly, since I'm now throwing
   852:   // away items sometimes
   853:   for (int t=0; t<terms; t++) {
   854:     if (termTransition[t]) {
   855:       os << "  on terminal " << g.getTerminal(t)->name
   856:          << " go to " << termTransition[t]->id << std::endl;
   857:     }
   858:   }
   859: 
   860:   for (int n=0; n<nonterms; n++) {
   861:     if (nontermTransition[n]) {
   862:       os << "  on nonterminal " << g.getNonterminal(n)->name
   863:          << " go to " << nontermTransition[n]->id << std::endl;
   864:     }
   865:   }
   866: 
   867:   for (int p=0; p<numDotsAtEnd; p++) {
   868:     os << "  can reduce by " << dotsAtEnd[p]->getProd()->toString() << std::endl;
   869:   }
   870: }
   871: 
   872: 
   873: void ItemSet::writeGraph(std::ostream &os, GrammarAnalysis const &g) const
   874: {
   875:   // node: n <name> <desc>
   876:   os << "\nn ItemSet" << id << " ItemSet" << id << "/";
   877:     // rest of desc will follow
   878: 
   879:   // collect all items
   880:   SObjList<LRItem> items;         // (constness) don't use 'items' to modify elements
   881:   getAllItems(items);
   882: 
   883:   // for each item, print the item text
   884:   SFOREACH_OBJLIST(LRItem, items, itemIter) {
   885:     LRItem const *item = itemIter.data();
   886: 
   887:     // print its text
   888:     os << "   ";
   889:     item->print(os, g);
   890: 
   891:     // THIS IS A PROBLEM!  the item's output will include
   892:     // slashes too, if it has >1 lookahead token ... !
   893:     os << "/";      // line separator in my node format
   894:   }
   895:   os << std::endl;
   896: 
   897:   // print transitions on terminals
   898:   INTLOOP(t, 0, terms) {
   899:     if (termTransition[t] != NULL) {
   900:       os << "e ItemSet" << id
   901:          << " ItemSet" << termTransition[t]->id << std::endl;
   902:     }
   903:   }
   904: 
   905:   // print transitions on nonterminals
   906:   INTLOOP(nt, 0, nonterms) {
   907:     if (nontermTransition[nt] != NULL) {
   908:       os << "e ItemSet" << id
   909:          << " ItemSet" << nontermTransition[nt]->id << std::endl;
   910:     }
   911:   }
   912: }
   913: 
   914: 
   915: // ------------------------ GrammarAnalysis --------------------
   916: GrammarAnalysis::GrammarAnalysis()
   917:   : derivable(NULL),
   918:     indexedNonterms(NULL),
   919:     indexedTerms(NULL),
   920:     numNonterms(0),
   921:     numTerms(0),
   922:     productionsByLHS(NULL),
   923:     dottedProds(NULL),
   924:     indexedProds(NULL),
   925:     numProds(0),
   926:     initialized(false),
   927:     nextItemSetId(0),    // [ASU] starts at 0 too
   928:     itemSets(),
   929:     startState(NULL),
   930:     cyclic(false),
   931:     symOfInterest(NULL),
   932:     errors(0),
   933:     tables(NULL)
   934: {}
   935: 
   936: 
   937: GrammarAnalysis::~GrammarAnalysis()
   938: {
   939:   if (indexedNonterms != NULL) {
   940:     delete indexedNonterms;
   941:   }
   942: 
   943:   if (indexedTerms != NULL) {
   944:     delete indexedTerms;
   945:   }
   946: 
   947:   if (productionsByLHS != NULL) {
   948:     // empties all lists automatically because of "[]"
   949:     delete[] productionsByLHS;
   950:   }
   951: 
   952:   if (indexedProds != NULL) {
   953:     delete[] indexedProds;
   954:   }
   955: 
   956:   deleteDottedProductions();
   957: 
   958:   if (derivable != NULL) {
   959:     delete derivable;
   960:   }
   961: 
   962:   if (tables) {
   963:     delete tables;
   964:   }
   965: }
   966: 
   967: 
   968: Terminal const *GrammarAnalysis::getTerminal(int index) const
   969: {
   970:   xassert((unsigned)index < (unsigned)numTerms);
   971:   return indexedTerms[index];
   972: }
   973: 
   974: Nonterminal const *GrammarAnalysis::getNonterminal(int index) const
   975: {
   976:   xassert((unsigned)index < (unsigned)numNonterms);
   977:   return indexedNonterms[index];
   978: }
   979: 
   980: Production const *GrammarAnalysis::getProduction(int index) const
   981: {
   982:   xassert((unsigned)index < (unsigned)numProds);
   983:   return indexedProds[index];
   984: }
   985: 
   986: ItemSet const *GrammarAnalysis::getItemSet(int index) const
   987: {
   988:   // no pretense of efficiency; this is only used interactively
   989:   FOREACH_OBJLIST(ItemSet, itemSets, iter) {
   990:     if (iter.data()->id == index) {
   991:       return iter.data();
   992:     }
   993:   }
   994:   return NULL;
   995: }
   996: 
   997: 
   998: void GrammarAnalysis::xfer(Flatten &flat)
   999: {
  1000:   Grammar::xfer(flat);
  1001: 
  1002:   xferOwnerPtr(flat, derivable);
  1003: 
  1004:   // delay indexed[Non]Terms, productionsByLHS,
  1005:   // and initialized
  1006: 
  1007:   flat.xferInt(nextItemSetId);
  1008: 
  1009:   xferObjList(flat, itemSets);
  1010:   xferSerfPtrToList(flat, startState, itemSets);
  1011: 
  1012:   flat.xferBool(cyclic);
  1013: 
  1014:   // don't bother xferring 'symOfInterest', since it's
  1015:   // only used for debugging
  1016: 
  1017:   // 7/27/03: tables are no longer xferrable
  1018:   //xferOwnerPtr(flat, tables);
  1019: 
  1020:   // now do the easily-computable stuff
  1021:   // NOTE: these functions are also called by initializeAuxData,
  1022:   // so they need to serve both callers correctly
  1023:   computeIndexedNonterms();
  1024:   computeIndexedTerms();
  1025:   computeProductionsByLHS();
  1026:   createDottedProductions();
  1027: 
  1028:   // do serfs after because if I want to compute the
  1029:   // nonkernel items instead of storing them, I need
  1030:   // the indices
  1031:   MUTATE_EACH_OBJLIST(ItemSet, itemSets, iter) {
  1032:     iter.data()->xferSerfs(flat, *this);
  1033:   }
  1034: 
  1035:   flat.xferBool(initialized);
  1036: }
  1037: 
  1038: 
  1039: void GrammarAnalysis::
  1040:   printProductions(std::ostream &os, bool printCode) const
  1041: {
  1042:   if (cyclic) {
  1043:     os << "(cyclic!) ";
  1044:   }
  1045:   Grammar::printProductions(os, printCode);
  1046: }
  1047: 
  1048: 
  1049: void GrammarAnalysis::
  1050:   printProductionsAndItems(std::ostream &os, bool printCode) const
  1051: {
  1052:   printProductions(os, printCode);
  1053: 
  1054:   FOREACH_OBJLIST(ItemSet, itemSets, iter) {
  1055:     iter.data()->print(os, *this);
  1056:   }
  1057: }
  1058: 
  1059: 
  1060: void printSymbols(std::ostream &os, ObjList<Symbol> const &list)
  1061: {
  1062:   for (ObjListIter<Symbol> iter(list);
  1063:        !iter.isDone(); iter.adv()) {
  1064:     os << "  " << *(iter.data()) << std::endl;
  1065:   }
  1066: }
  1067: 
  1068: 
  1069: bool GrammarAnalysis::addDerivable(Nonterminal const *left, Nonterminal const *right)
  1070: {
  1071:   return addDerivable(left->ntIndex, right->ntIndex);
  1072: }
  1073: 
  1074: bool GrammarAnalysis::addDerivable(int left, int right)
  1075: {
  1076:   // Almost as an aside, I'd like to track cyclicity in grammars.
  1077:   // It's always true that N ->* N, because 0 steps are allowed.
  1078:   // A grammar is cyclic if N ->+ N, i.e. it derives itself in
  1079:   // 1 or more steps.
  1080:   //
  1081:   // We can detect that fairly easily by tracking calls to
  1082:   // this fn with left==right.  Since N ->* N in 0 steps is
  1083:   // recorded during init (and *not* by calling this fn), the
  1084:   // only calls to this with left==right will be when the
  1085:   // derivability code detects a nonzero-length path.
  1086: 
  1087:   if (left==right) {
  1088:     Nonterminal *NT = indexedNonterms[left];    // ==right
  1089:     if (!NT->cyclic) {
  1090:       trace("derivable")
  1091:         << "discovered that " << NT->name << " ->+ "
  1092:         << NT->name << " (i.e. is cyclic)\n";
  1093:       NT->cyclic = true;
  1094:       cyclic = true;     // for grammar as a whole
  1095: 
  1096:       // Even though we didn't know this already, it doesn't
  1097:       // constitute a change in the ->* relation (which is what the
  1098:       // derivability code cares about), so we do *not* report a
  1099:       // change for the cyclicty detection.
  1100:     }
  1101:   }
  1102: 
  1103:   // we only made a change, and hence should return true,
  1104:   // if there was a 0 here before
  1105:   return 0 == derivable->testAndSet(point(left, right));
  1106: }
  1107: 
  1108: 
  1109: bool GrammarAnalysis::canDerive(Nonterminal const *left, Nonterminal const *right) const
  1110: {
  1111:   return canDerive(left->ntIndex, right->ntIndex);
  1112: }
  1113: 
  1114: bool GrammarAnalysis::canDerive(int left, int right) const
  1115: {
  1116:   return 1 == derivable->get(point(left, right));
  1117: }
  1118: 
  1119: 
  1120: void GrammarAnalysis::initDerivableRelation()
  1121: {
  1122:   // two-dimensional matrix to represent token derivabilities
  1123:   derivable = new Bit2d(point(numNonterms, numNonterms));
  1124: 
  1125:   // initialize it
  1126:   derivable->setall(0);
  1127:   loopi(numNonterms) {
  1128:     derivable->set(point(i,i));
  1129:       // every nonterminal can derive itself in 0 or more steps
  1130:       // (specifically, in 0 steps, at least)
  1131:       //
  1132:       // NOTE: we do *not* call addDerivable because that would
  1133:       // mess up the cyclicity detection logic
  1134:   }
  1135: }
  1136: 
  1137: 
  1138: bool GrammarAnalysis::canDeriveEmpty(Nonterminal const *nonterm) const
  1139: {
  1140:   return canDerive(nonterm, &emptyString);
  1141: }
  1142: 
  1143: 
  1144: bool GrammarAnalysis::sequenceCanDeriveEmpty(RHSEltList const &list) const
  1145: {
  1146:   RHSEltListIter iter(list);
  1147:   return iterSeqCanDeriveEmpty(iter);
  1148: }
  1149: 
  1150: bool GrammarAnalysis::iterSeqCanDeriveEmpty(RHSEltListIter iter) const
  1151: {
  1152:   // look through the sequence beginning with 'iter'; if any members cannot
  1153:   // derive emptyString, fail
  1154:   for (; !iter.isDone(); iter.adv()) {
  1155:     if (iter.data()->sym->isTerminal()) {
  1156:       return false;    // terminals can't derive emptyString
  1157:     }
  1158: 
  1159:     if (!canDeriveEmpty(&( iter.data()->sym->asNonterminalC() ))) {
  1160:       return false;    // nonterminal that can't derive emptyString
  1161:     }
  1162:   }
  1163: 
  1164:   return true;
  1165: }
  1166: 
  1167: 
  1168: bool GrammarAnalysis::firstIncludes(Nonterminal const *NT, Terminal const *term) const
  1169: {
  1170:   return NT->first.contains(term->termIndex);
  1171: }
  1172: 
  1173: #if 0
  1174: bool GrammarAnalysis::addFirst(Nonterminal *NT, Terminal *term)
  1175: {
  1176:   return NT->first.prependUnique(term);
  1177: 
  1178:   // regarding non-constness of 'term':
  1179:   // highly nonideal.. the problem is that by using annotations in
  1180:   // the structures themselves, I have a hard time saying that I
  1181:   // intend to modify the annotations but not the "key" data...
  1182:   // this cast is really a symptom of that too.. (and, perhaps, also
  1183:   // that I don't have a List class that promises to never permit
  1184:   // modification of the pointed-to data.. but it's not clear I'd
  1185:   // be better of using it here even if I had it)
  1186: }
  1187: #endif // 0
  1188: 
  1189: 
  1190: bool GrammarAnalysis::followIncludes(Nonterminal const *NT, Terminal const *term) const
  1191: {
  1192:   return NT->follow.contains(term->termIndex);
  1193: }
  1194: 
  1195: #if 0
  1196: // returns true if Follow(NT) is changed by adding 'term' to it
  1197: bool GrammarAnalysis::addFollow(Nonterminal *NT, Terminal *term)
  1198: {
  1199:   return NT->follow.prependUnique(term);
  1200: }
  1201: #endif // 0
  1202: 
  1203: 
  1204: // ----------------- Grammar algorithms --------------------------
  1205: // create and initialize 'indexedNonterms'
  1206: void GrammarAnalysis::computeIndexedNonterms()
  1207: {
  1208:   // map: ntIndex -> Nonterminal*
  1209:   numNonterms = Grammar::numNonterminals();
  1210:   indexedNonterms = new Nonterminal* [numNonterms];
  1211: 
  1212:   // fill it
  1213:   indexedNonterms[emptyStringIndex] = &emptyString;
  1214:   int index = emptyStringIndex;
  1215:   emptyString.ntIndex = index++;
  1216: 
  1217:   for (ObjListMutator<Nonterminal> sym(nonterminals);
  1218:        !sym.isDone(); index++, sym.adv()) {
  1219:     indexedNonterms[index] = sym.data();    // map: index to symbol
  1220:     sym.data()->ntIndex = index;            // map: symbol to index
  1221:   }
  1222: }
  1223: 
  1224: 
  1225: // create and initialize 'indexedTerms'
  1226: void GrammarAnalysis::computeIndexedTerms()
  1227: {
  1228:   // map: termIndex -> Terminal*
  1229:   // the ids have already been assigned; but I'm going to continue
  1230:   // to insist on a contiguous space starting at 0
  1231:   numTerms = Grammar::numTerminals();
  1232:   indexedTerms = new Terminal* [numTerms];
  1233:   loopi(numTerminals()) {
  1234:     indexedTerms[i] = NULL;      // used to track id duplication
  1235:   }
  1236:   for (ObjListMutator<Terminal> sym(terminals);
  1237:        !sym.isDone(); sym.adv()) {
  1238:     int index = sym.data()->termIndex;   // map: symbol to index
  1239:     if (indexedTerms[index] != NULL) {
  1240:       xfailure(sm_stringc << "terminal index collision at index " << index);
  1241:     }
  1242:     indexedTerms[index] = sym.data();    // map: index to symbol
  1243:   }
  1244: }
  1245: 
  1246: 
  1247: // set the first/follow of all nonterminals to the correct size
  1248: void GrammarAnalysis::resetFirstFollow()
  1249: {
  1250:   MUTATE_EACH_NONTERMINAL(nonterminals, sym) {
  1251:     sym.data()->first.reset(numTerminals());
  1252:     sym.data()->follow.reset(numTerminals());
  1253:   }
  1254: }
  1255: 
  1256: 
  1257: // create and initialize 'productionsByLHS' and 'indexedProds'
  1258: void GrammarAnalysis::computeProductionsByLHS()
  1259: {
  1260:   // map: nonterminal -> productions with that nonterm on LHS
  1261:   productionsByLHS = new SObjList<Production> [numNonterms];
  1262: 
  1263:   // map: prodIndex -> production
  1264:   numProds = productions.count();
  1265:   indexedProds = new Production* [numProds];
  1266:   memset(indexedProds, 0, sizeof(*indexedProds) * numProds);
  1267: 
  1268:   // fill in both maps
  1269:   {
  1270:     MUTATE_EACH_PRODUCTION(productions, prod) {        // (constness)
  1271:       int LHSindex = prod.data()->left->ntIndex;
  1272:       xassert(LHSindex < numNonterms);
  1273: 
  1274:       productionsByLHS[LHSindex].append(prod.data());
  1275:       indexedProds[prod.data()->prodIndex] = prod.data();
  1276:     }
  1277:   }
  1278: 
  1279:   // verify we filled the 'prodIndex' map
  1280:   for (int id=0; id<numProds; id++) {
  1281:     xassert(indexedProds[id] != NULL);
  1282:   }
  1283: }
  1284: 
  1285: 
  1286: void GrammarAnalysis::createDottedProductions()
  1287: {
  1288:   // map: prodIndex x dotPosn -> DottedProduction
  1289:   //DottedProduction const **
  1290:   dottedProds = new DottedProduction* [numProds];
  1291:   memset(dottedProds, 0, sizeof(*dottedProds) * numProds);
  1292: 
  1293:   FOREACH_PRODUCTION(productions, iter) {
  1294:     Production const *prod = iter.data();
  1295:     int rhsLen = prod->rhsLength();
  1296:     xassert(rhsLen >= 0);
  1297:     int id = prod->prodIndex;
  1298: 
  1299:     // one dottedproduction for every dot position, which is one
  1300:     // more than the # of RHS elements
  1301:     DottedProduction *array = new DottedProduction[rhsLen + 1];
  1302:     dottedProds[id] = array;
  1303: 
  1304:     // fill in each one
  1305:     for (int posn=0; posn <= rhsLen; posn++) {
  1306:       array[posn].setProdAndDot(prod, posn);
  1307:     }
  1308:   }
  1309: 
  1310:   // verify we filled the whole table, i.e. that the production
  1311:   // indices form a dense map
  1312:   for (int id=0; id<numProds; id++) {
  1313:     xassert(dottedProds[id] != NULL);
  1314:   }
  1315: }
  1316: 
  1317: 
  1318: void GrammarAnalysis::deleteDottedProductions()
  1319: {
  1320:   if (dottedProds != NULL) {
  1321:     for (int id=0; id<numProds; id++) {
  1322:       delete[] dottedProds[id];
  1323:     }
  1324:     delete[] dottedProds;
  1325:     dottedProds = NULL;
  1326:   }
  1327: }
  1328: 
  1329: 
  1330: DottedProduction const *GrammarAnalysis::
  1331:   getDProd(Production const *prod, int posn) const
  1332: {
  1333:   xassert(posn <= prod->rhsLength());
  1334:   return &( dottedProds[prod->prodIndex][posn] );
  1335: }
  1336: 
  1337: DottedProduction const *GrammarAnalysis::
  1338:   getDProdIndex(int prodIndex, int posn) const
  1339: {
  1340:   // go through the other fn to bounds-check 'posn'
  1341:   return getDProd(getProduction(prodIndex), posn);
  1342: }
  1343: 
  1344: 
  1345: #ifndef NDEBUG
  1346: DottedProduction const *GrammarAnalysis::
  1347:   nextDProd(DottedProduction const *dp) const
  1348: {
  1349:   xassert(!dp->isDotAtEnd());
  1350:   return dp + 1;
  1351: }
  1352: #endif // !NDEBUG
  1353: 
  1354: 
  1355: // NOTE: the sequence of initialization actions in this function
  1356: // and the functions it calls must interact properly with the
  1357: // sequence in GrammarAnalysis::xfer
  1358: void GrammarAnalysis::initializeAuxData()
  1359: {
  1360:   // at the moment, calling this twice leaks memory
  1361:   xassert(!initialized);
  1362: 
  1363:   computeIndexedNonterms();
  1364:   computeIndexedTerms();
  1365:   resetFirstFollow();
  1366: 
  1367:   computeProductionsByLHS();
  1368:   computeReachable();
  1369: 
  1370:   // finish the productions before we compute the
  1371:   // dotted productions
  1372:   MUTATE_EACH_PRODUCTION(productions, prod) {
  1373:     prod.data()->finished(numTerminals());
  1374:   }
  1375: 
  1376:   createDottedProductions();
  1377: 
  1378:   // initialize the derivable relation
  1379:   initDerivableRelation();
  1380: 
  1381:   // mark the grammar as initialized
  1382:   initialized = true;
  1383: }
  1384: 
  1385: 
  1386: void GrammarAnalysis::computeWhatCanDeriveWhat()
  1387: {
  1388:   xassert(initialized);
  1389: 
  1390: 
  1391:   // iterate: propagate 'true' bits across the derivability matrix
  1392:   // (i.e. compute transitive closure on the canDerive relation)
  1393:   for (;;) {
  1394:     int changes = 0;       // for this iter, # of times we set a matrix bit
  1395: 
  1396:     // --------- first part: add new canDerive relations --------
  1397:     // loop over all productions
  1398:     for (ObjListIter<Production> prodIter(productions);
  1399:          !prodIter.isDone(); prodIter.adv()) {
  1400:       // convenient alias
  1401:       Production const *prod = prodIter.data();
  1402: 
  1403:       // since I don't include 'empty' explicitly in my rules, I won't
  1404:       // conclude that anything can derive empty, which is a problem;
  1405:       // so I special-case it here
  1406:       if (prod->right.isEmpty()) {
  1407:         addDerivable(prod->left, &emptyString);
  1408:         continue;       // no point in looping over RHS symbols since there are none
  1409:       }
  1410: 
  1411:       // iterate over RHS symbols, seeing if the LHS can derive that
  1412:       // RHS symbol (by itself)
  1413:       for (RHSEltListIter rightSym(prod->right);
  1414:            !rightSym.isDone(); rightSym.adv()) {
  1415: 
  1416:         if (rightSym.data()->sym->isTerminal()) {
  1417:           // if prod->left derives a sm_string containing a terminal,
  1418:           // then it can't derive any nontermial alone (using this
  1419:           // production, at least) -- empty is considered a nonterminal
  1420:           break;
  1421:         }
  1422: 
  1423:         // otherwise, it's a nonterminal
  1424:         Nonterminal const &rightNT = rightSym.data()->sym->asNonterminalC();
  1425: 
  1426:         // check if we already know that LHS derives rightNT
  1427:         if (canDerive(prod->left, &rightNT)) {
  1428:           // we already know that prod->left derives rightSym,
  1429:           // so let's not check it again
  1430:         }
  1431: 
  1432:         else {
  1433:           // we are wondering if prod->left can derive rightSym.. for
  1434:           // this to be true, every symbol that comes after rightSym
  1435:           // must be able to derive emptySymbol (we've already verified
  1436:           // by now that every symbol to the *left* can derive empty)
  1437:           RHSEltListIter afterRightSym(rightSym);
  1438:           bool restDeriveEmpty = true;
  1439:           for (afterRightSym.adv();    // *after* right symbol
  1440:                !afterRightSym.isDone(); afterRightSym.adv()) {
  1441: 
  1442:             if (afterRightSym.data()->sym->isTerminal()  ||
  1443:                   // if it's a terminal, it can't derive emptyString
  1444:                 !canDeriveEmpty(&( afterRightSym.data()->sym->asNonterminalC() ))) {
  1445:                   // this symbol can't derive empty sm_string (or, we don't
  1446:                   // yet know that it can), so we conclude that prod->left
  1447:                   // can't derive rightSym
  1448:               restDeriveEmpty = false;
  1449:               break;
  1450:             }
  1451:           }
  1452: 
  1453:           if (restDeriveEmpty) {
  1454:             // we have discovered that prod->left can derive rightSym
  1455:             bool chgd = addDerivable(prod->left, &rightNT);
  1456:             xassert(chgd);    // above, we verified we didn't already know this
  1457: 
  1458:             changes++;
  1459: 
  1460:             trace("derivable")
  1461:               << "discovered (by production): " << prod->left->name
  1462:               << " ->* " << rightNT.name << "\n";
  1463:           }
  1464:         }
  1465: 
  1466:         // ok, we've considered prod->left deriving rightSym.  now, we
  1467:         // want to consider whether prod->left can derive any of the
  1468:         // symbols that follow rightSym in this production.  for this
  1469:         // to be true, rightSym itself must derive the emptyString
  1470:         if (!canDeriveEmpty(&rightNT)) {
  1471:           // it doesn't -- no point in further consideration of
  1472:           // this production
  1473:           break;
  1474:         }
  1475:       } // end of loop over RHS symbols
  1476:     } // end of loop over productions
  1477: 
  1478: 
  1479:     // -------- second part: compute closure over existing relations ------
  1480:     // I'll do this by computing R + R^2 -- that is, I'll find all
  1481:     // paths of length 2 and add an edge between their endpoints.
  1482:     // I do this, rather than computing the entire closure now, since
  1483:     // on the next iter I will add more relations and have to re-do
  1484:     // a full closure; iterative progress seems a better way.
  1485: 
  1486:     // I don't consider edges (u,u) because it messes up my cyclicty
  1487:     // detection logic.  (But (u,v) and (v,u) is ok, and in fact is
  1488:     // what I want, for detecting cycles.)
  1489: 
  1490:     // for each node u (except empty)
  1491:     int numNonterms = numNonterminals();
  1492:     for (int u=1; u<numNonterms; u++) {
  1493:       // for each edge (u,v) where u != v
  1494:       for (int v=0; v<numNonterms; v++) {
  1495:         if (u==v || !canDerive(u,v)) continue;
  1496: 
  1497:         // for each edge (v,w) where v != w
  1498:         for (int w=0; w<numNonterms; w++) {
  1499:           if (v==w || !canDerive(v,w)) continue;
  1500: 
  1501:           // add an edge (u,w), if there isn't one already
  1502:           if (addDerivable(u,w)) {
  1503:             changes++;
  1504:             trace("derivable")
  1505:               << "discovered (by closure step): "
  1506:               << indexedNonterms[u]->name << " ->* "
  1507:               << indexedNonterms[w]->name << "\n";
  1508:           }
  1509:         }
  1510:       }
  1511:     }
  1512: 
  1513: 
  1514:     // ------ finally: iterate until no changes -------
  1515:     if (changes == 0) {
  1516:       // didn't make any changes during the last iter, so
  1517:       // everything has settled
  1518:       break;
  1519:     }
  1520:   } // end of loop until settles
  1521: 
  1522: 
  1523:   // I used to do all closure here and no closure in the loop.
  1524:   // But that fails in cases where closure (when it reveals
  1525:   // more things that derive emptyString) yields new opportunities
  1526:   // for derives-relation discovery.  Therefore I now alternate
  1527:   // between them, and at the end, no closure is necessary.
  1528: }
  1529: 
  1530: 
  1531: // set Nonterminal::superset to correspond to Nonterminal::subsets
  1532: void GrammarAnalysis::computeSupersets()
  1533: {
  1534:   FOREACH_OBJLIST_NC(Nonterminal, nonterminals, iter1) {
  1535:     Nonterminal *super = iter1.data();
  1536: 
  1537:     SFOREACH_OBJLIST_NC(Nonterminal, super->subsets, iter2) {
  1538:       Nonterminal *sub = iter2.data();
  1539: 
  1540:       // for now, only handle 'super' as a partial function
  1541:       if (sub->superset != NULL) {
  1542:         xfailure(sm_stringc << sub->name << " has more than one superset");
  1543:       }
  1544:       sub->superset = super;
  1545:     }
  1546:   }
  1547: }
  1548: 
  1549: 
  1550: // Compute, for each nonterminal, the "First" set, defined as:
  1551: //
  1552: //   First(N) = { x | N ->* x alpha }, where alpha is any sequence
  1553: //                                     of terminals and nonterminals
  1554: //
  1555: // If N can derive emptyString, I'm going to say that empty is
  1556: // *not* in First, despite what Aho/Sethi/Ullman says.  I do this
  1557: // because I have that information readily as my derivable relation,
  1558: // and because it violates the type system I've devised.
  1559: //
  1560: // I also don't "compute" First for terminals, since they are trivial
  1561: // (First(x) = {x}).
  1562: void GrammarAnalysis::computeFirst()
  1563: {
  1564:   bool tr = tracingSys("first");
  1565:   int numTerms = numTerminals();
  1566: 
  1567:   // iterate, looking for new First members, until no changes
  1568:   int changes = 1;   // so the loop begins
  1569:   while (changes > 0) {
  1570:     changes = 0;
  1571: 
  1572:     // for each production
  1573:     for (ObjListMutator<Production> prodIter(productions);
  1574:          !prodIter.isDone(); prodIter.adv()) {
  1575:       // convenient aliases
  1576:       Production *prod = prodIter.data();
  1577:       Nonterminal *LHS = prod->left;
  1578:         // the list iter is mutating because I modify LHS's First set
  1579: 
  1580:       // compute First(RHS-sequence)
  1581:       TerminalSet firstOfRHS(numTerms);
  1582:       firstOfSequence(firstOfRHS, prod->right);
  1583: 
  1584:       // store this back into 'prod'
  1585:       prod->firstSet.merge(firstOfRHS);
  1586: 
  1587:       // add everything in First(RHS-sequence) to First(LHS)
  1588:       if (LHS->first.merge(firstOfRHS)) {
  1589:         changes++;
  1590:         if (tr) {
  1591:           std::ostream &trs = trace("first");
  1592:           trs << "added ";
  1593:           firstOfRHS.print(trs, *this);
  1594:           trs << " to " << LHS->name << " because of "
  1595:               << prod->toString() << std::endl;
  1596:         }
  1597:       }
  1598:     } // for (productions)
  1599:   } // while (changes)
  1600: 
  1601:   if (tr) {
  1602:     FOREACH_NONTERMINAL(nonterminals, iter) {
  1603:       Nonterminal const &nt = *(iter.data());
  1604: 
  1605:       std::ostream &trs = trace("first") << " " << nt.name << ": ";
  1606:       nt.first.print(trs, *this);
  1607:       trs << std::endl;
  1608:     }
  1609:   }
  1610: }
  1611: 
  1612: 
  1613: // 'sequence' isn't const because we need to hand pointers over to
  1614: // the 'destList', which isn't const; similarly for 'this'
  1615: // (what I'd like here is to say that 'sequence' and 'this' are const
  1616: // if 'destList' can't modify the things it contains)
  1617: void GrammarAnalysis::firstOfSequence(TerminalSet &destList,
  1618:                                       RHSEltList const &sequence)
  1619: {
  1620:   RHSEltListIter iter(sequence);
  1621:   firstOfIterSeq(destList, iter);
  1622: }
  1623: 
  1624: // similar to above, 'sym' needs to be a mutator
  1625: void GrammarAnalysis::firstOfIterSeq(TerminalSet &destList,
  1626:                                      RHSEltListIter sym)
  1627: {
  1628:   //int numTerms = numTerminals();
  1629: 
  1630:   // for each sequence member such that all
  1631:   // preceeding members can derive emptyString
  1632:   for (; !sym.isDone(); sym.adv()) {
  1633:     // LHS -> x alpha   means x is in First(LHS)
  1634:     if (sym.data()->sym->isTerminal()) {
  1635:       destList.add(sym.data()->sym->asTerminal().termIndex);
  1636:       break;    // stop considering RHS members since a terminal
  1637:                 // effectively "hides" all further symbols from First
  1638:     }
  1639: 
  1640:     // sym must be a nonterminal
  1641:     Nonterminal const &nt = sym.data()->sym->asNonterminalC();
  1642: 
  1643:     // anything already in nt's First should be added to destList
  1644:     destList.merge(nt.first);
  1645: 
  1646:     // if nt can't derive emptyString, then it blocks further
  1647:     // consideration of right-hand side members
  1648:     if (!canDeriveEmpty(&nt)) {
  1649:       break;
  1650:     }
  1651:   } // for (RHS members)
  1652: }
  1653: 
  1654: 
  1655: void GrammarAnalysis::computeDProdFirsts()
  1656: {
  1657:   // for each production..
  1658:   FOREACH_PRODUCTION(productions, prodIter) {
  1659:     // for each dotted production where the dot is not at the end..
  1660:     int rhsLen = prodIter.data()->rhsLength();
  1661:     for (int posn=0; posn <= rhsLen; posn++) {
  1662:       DottedProduction *dprod = getDProd_nc(prodIter.data(), posn);
  1663: 
  1664:       // compute its first
  1665:       RHSEltListIter symIter(dprod->getProd()->right, posn);
  1666:       dprod->firstSet.reset(numTerms);
  1667:       firstOfIterSeq(dprod->firstSet, symIter);
  1668: 
  1669:       // can it derive empty?
  1670:       dprod->canDeriveEmpty = iterSeqCanDeriveEmpty(symIter);
  1671:     }
  1672:   }
  1673: }
  1674: 
  1675: 
  1676: void GrammarAnalysis::computeFollow()
  1677: {
  1678:   int numTerms = numTerminals();
  1679: 
  1680:   // loop until no changes
  1681:   int changes = 1;
  1682:   while (changes > 0) {
  1683:     changes = 0;
  1684: 
  1685:     // 'mutate' is needed because adding 'term' to the follow of 'nt'
  1686:     // needs a mutable 'term' and 'nt'
  1687: 
  1688:     // for each production
  1689:     MUTATE_EACH_PRODUCTION(productions, prodIter) {
  1690:       Production *prod = prodIter.data();
  1691: 
  1692:       // for each RHS nonterminal member
  1693:       MUTATE_EACH_OBJLIST(Production::RHSElt, prod->right, rightSym) {
  1694:         if (rightSym.data()->sym->isTerminal()) continue;
  1695: 
  1696:         // convenient alias
  1697:         Nonterminal &rightNT = rightSym.data()->sym->asNonterminal();
  1698: 
  1699:         // I'm not sure what it means to compute Follow(emptyString),
  1700:         // so let's just not do so
  1701:         if (&rightNT == &emptyString) {
  1702:           continue;
  1703:         }
  1704: 
  1705:         // an iterator pointing to the symbol just after
  1706:         // 'rightSym' will be useful below
  1707:         RHSEltListMutator afterRightSym(rightSym);
  1708:         afterRightSym.adv();    // NOTE: 'isDone()' may be true now
  1709: 
  1710:         // rule 1:
  1711:         // if there is a production A -> alpha B beta, then
  1712:         // everything in First(beta) is in Follow(B)
  1713:         {
  1714:           // compute First(beta)
  1715:           TerminalSet firstOfBeta(numTerms);
  1716:           firstOfIterSeq(firstOfBeta, afterRightSym);
  1717: 
  1718:           // put those into Follow(rightNT)
  1719:           if (rightNT.follow.merge(firstOfBeta)) {
  1720:             changes++;
  1721:             if (&rightNT == symOfInterest) {
  1722:               std::ostream &trs = trace("follow-sym");
  1723:               trs << "Follow(" << rightNT.name
  1724:                   << "): adding ";
  1725:               firstOfBeta.print(trs, *this);
  1726:               trs << " by first(RHS-tail) of " << *prod
  1727:                   << std::endl;
  1728:             }
  1729:           }
  1730:         }
  1731: 
  1732:         // rule 2:
  1733:         // if there is a production A -> alpha B, or a
  1734:         // production A -> alpha B beta where beta ->* empty ...
  1735:         if (iterSeqCanDeriveEmpty(afterRightSym)) {
  1736:           // ... then everything in Follow(A) is in Follow(B)
  1737:           if (rightNT.follow.merge(prod->left->follow)) {
  1738:             changes++;
  1739:             if (&rightNT == symOfInterest) {
  1740:               std::ostream &trs = trace("follow-sym");
  1741:               trs << "Follow(" << rightNT.name
  1742:                   << "): adding ";
  1743:               prod->left->follow.print(trs, *this);
  1744:               trs << " by follow(LHS) of " << *prod
  1745:                   << std::endl;
  1746:             }
  1747:           }
  1748:         }
  1749: 
  1750:       } // for each RHS nonterminal member
  1751:     } // for each production
  1752:   } // until no changes
  1753: }
  1754: 
  1755: 
  1756: // [ASU] alg 4.4, p.190
  1757: void GrammarAnalysis::computePredictiveParsingTable()
  1758: {
  1759:   int numTerms = numTerminals();
  1760:   int numNonterms = numNonterminals();
  1761: 
  1762:   // the table will be a 2d array of lists of productions
  1763:   ProductionList *table = new ProductionList[numTerms * numNonterms];     // (owner)
  1764:   #define TABLE(term,nt) table[(term) + (nt)*numNonterms]
  1765: 
  1766:   // for each production 'prod' (non-const iter because adding them
  1767:   // to ProductionList, which doesn't promise to not change them)
  1768:   MUTATE_EACH_PRODUCTION(productions, prodIter) {
  1769:     Production *prod = prodIter.data();
  1770: 
  1771:     // for each terminal 'term' in First(RHS)
  1772:     TerminalSet firsts(numTerms);
  1773:     firstOfSequence(firsts, prod->right);
  1774:     for (int termIndex=0; termIndex<numTerms; termIndex++) {
  1775:       if (!firsts.contains(termIndex)) continue;
  1776: 
  1777:       // add 'prod' to table[LHS,term]
  1778:       TABLE(prod->left->ntIndex, termIndex).prependUnique(prod);
  1779:     }
  1780: 
  1781:     // if RHS ->* emptyString, ...
  1782:     if (sequenceCanDeriveEmpty(prod->right)) {
  1783:       // ... then for each terminal 'term' in Follow(LHS), ...
  1784:       for (int termIndex=0; termIndex<numTerms; termIndex++) {
  1785:         if (!firsts.contains(termIndex)) continue;
  1786: 
  1787:         // ... add 'prod' to table[LHS,term]
  1788:         TABLE(prod->left->ntIndex, termIndex).prependUnique(prod);
  1789:       }
  1790:     }
  1791:   }
  1792: 
  1793: 
  1794:   // print the resulting table
  1795:   std::ostream &os = trace("pred-table") << std::endl;
  1796: 
  1797:   // for each nonterminal
  1798:   INTLOOP(nonterm, 0, numNonterms) {
  1799:     os << "Row " << indexedNonterms[nonterm]->name << ":\n";
  1800: 
  1801:     // for each terminal
  1802:     INTLOOP(term, 0, numTerms) {
  1803:       os << "  Column " << indexedTerms[term]->name << ":";
  1804: 
  1805:       // for each production in table[nonterm,term]
  1806:       SFOREACH_PRODUCTION(TABLE(nonterm,term), prod) {
  1807:         os << "   ";
  1808:         prod.data()->print(os);
  1809:       }
  1810: 
  1811:       os << std::endl;
  1812:     }
  1813:   }
  1814: 
  1815:   // cleanup
  1816:   #undef TABLE
  1817:   delete[] table;
  1818: }
  1819: 
  1820: 
  1821: // these hashtables are keyed using the DottedProduction,
  1822: // but yield LRItems as values
  1823: 
  1824: // for storing dotted productions in a hash table, this is
  1825: // the hash function itself
  1826: STATICDEF unsigned LRItem::hash(DottedProduction const *key)
  1827: {
  1828:   //DottedProduction const *dp = (DottedProduction const*)key;
  1829: 
  1830:   // on the assumption few productions have 20 RHS elts..
  1831:   //int val = dp->dot + (20 * dp->prod->prodIndex);
  1832: 
  1833:   // just use the address.. they're all shared..
  1834:   return HashTable::lcprngHashFn((void*)key);
  1835: }
  1836: 
  1837: // given the data, yield the key
  1838: STATICDEF DottedProduction const *LRItem::dataToKey(LRItem *it)
  1839: {
  1840:   return it->dprod;
  1841: }
  1842: 
  1843: // compare two dotted production keys for equality; since dotted
  1844: // productions are shared, pointer equality suffices
  1845: STATICDEF bool LRItem::dpEqual(DottedProduction const *key1,
  1846:                                DottedProduction const *key2)
  1847: {
  1848:   return key1 == key2;
  1849: }
  1850: 
  1851: 
  1852: // based on [ASU] figure 4.33, p.223
  1853: // NOTE: sometimes this is called with nonempty nonkernel items...
  1854: void GrammarAnalysis::itemSetClosure(ItemSet &itemSet)
  1855: {
  1856:   bool const tr = tracingSys("closure");
  1857:   std::ostream &trs = trace("closure");     // trace stream
  1858:   if (tr) {
  1859:     trs << "computing closure of ";
  1860:     itemSet.print(trs, *this);
  1861:   }
  1862: 
  1863:   // hashtable, list of items still yet to close; items are
  1864:   // simultaneously in both the hash and the list, or not in either
  1865:   #if 0
  1866:   OwnerKHashArray<LRItem, DottedProduction> workhash(
  1867:     &LRItem::dataToKey,
  1868:     &LRItem::hash,
  1869:     &LRItem::dpEqual, 13);
  1870:   #endif // 0
  1871: 
  1872:   // every 'item' on the worklist has item->dprod->backPointer == item;
  1873:   // every 'dprod' not associated has dprod->backPointer == NULL
  1874:   ArrayStack<LRItem*> worklist;
  1875: 
  1876:   // scratch terminal set for singleItemClosure
  1877:   TerminalSet scratchSet(numTerminals());
  1878: 
  1879:   // and another for the items we've finished
  1880:   OwnerKHashTable<LRItem, DottedProduction> finished(
  1881:     &LRItem::dataToKey,
  1882:     &LRItem::hash,
  1883:     &LRItem::dpEqual, 13);
  1884:   finished.setEnableShrink(false);
  1885: 
  1886:   // put all the nonkernels we have into 'finished'
  1887:   while (itemSet.nonkernelItems.isNotEmpty()) {
  1888:     LRItem *dp = itemSet.nonkernelItems.removeFirst();
  1889:     finished.add(dp->dprod, dp);
  1890:   }
  1891: 
  1892:   // first, close the kernel items -> worklist
  1893:   FOREACH_OBJLIST(LRItem, itemSet.kernelItems, itemIter) {
  1894:     singleItemClosure(finished, worklist, itemIter.data(), scratchSet);
  1895:   }
  1896: 
  1897:   while (worklist.isNotEmpty()) {
  1898:     // pull the first production
  1899:     LRItem *item = worklist.pop();
  1900:     xassert(item->dprod->backPointer == item);     // was on worklist
  1901:     item->dprod->backPointer = NULL;               // now off of worklist
  1902: 
  1903:     // put it into list of 'done' items; this way, if this
  1904:     // exact item is generated during closure, it will be
  1905:     // seen and re-inserted (instead of duplicated)
  1906:     finished.add(item->dprod, item);
  1907: 
  1908:     // close it -> worklist
  1909:     singleItemClosure(finished, worklist, item, scratchSet);
  1910:   }
  1911: 
  1912:   // move everything from 'finished' to the nonkernel items list
  1913:   try {
  1914:     for (OwnerKHashTableIter<LRItem, DottedProduction> iter(finished);
  1915:          !iter.isDone(); iter.adv()) {
  1916:       // temporarily, the item is owned both by the hashtable
  1917:       // and the list
  1918:       itemSet.nonkernelItems.prepend(iter.data());
  1919:     }
  1920:     finished.disownAndForgetAll();
  1921:   }
  1922:   catch (...) {
  1923:     breaker();    // debug breakpoint
  1924: 
  1925:     // resolve the multiple ownership by leaking some
  1926:     finished.disownAndForgetAll();
  1927:     throw;
  1928:   }
  1929: 
  1930:   // we potentially added a bunch of things
  1931:   itemSet.changedItems();
  1932: 
  1933:   if (tr) {
  1934:     trs << "done with closure of state " << itemSet.id << std::endl;
  1935:     itemSet.print(trs, *this);
  1936:   }
  1937: }
  1938: 
  1939: 
  1940: void GrammarAnalysis
  1941:   ::singleItemClosure(OwnerKHashTable<LRItem, DottedProduction> &finished,
  1942:                       ArrayStack<LRItem*> &worklist,
  1943:                       //OwnerKHashArray<LRItem, DottedProduction> &workhash,
  1944:                       LRItem const *item, TerminalSet &newItemLA)
  1945: {
  1946:   INITIAL_MALLOC_STATS();
  1947: 
  1948:   bool const tr = tracingSys("closure");
  1949:   std::ostream &trs = trace("closure");     // trace stream
  1950: 
  1951:   if (tr) {
  1952:     trs << "  considering item ";
  1953:     item->print(trs, *this);
  1954:     trs << std::endl;
  1955:   }
  1956: 
  1957:   if (item->isDotAtEnd()) {
  1958:     if (tr) {
  1959:       trs << "    dot is at the end" << std::endl;
  1960:     }
  1961:     CHECK_MALLOC_STATS("return, dot at end");
  1962:     return;
  1963:   }
  1964: 
  1965:   // in comments that follow, 'item' is broken down as
  1966:   //   A -> alpha . B beta, LA
  1967: 
  1968:   // get the symbol B (the one right after the dot)
  1969:   Symbol const *B = item->symbolAfterDotC();
  1970:   if (B->isTerminal()) {
  1971:     if (tr) {
  1972:       trs << "    symbol after the dot is a terminal" << std::endl;
  1973:     }
  1974:     CHECK_MALLOC_STATS("return, dot sym is terminal");
  1975:     return;
  1976:   }
  1977:   int nontermIndex = B->asNonterminalC().ntIndex;
  1978: 
  1979:   // could pull this out of even this fn, to the caller, but I don't
  1980:   // see any difference in time when I make it static (which simulates
  1981:   // the effect, though static itself is a bad idea because it makes
  1982:   // the size constant through a whole run); but maybe when other things
  1983:   // are faster I will be able to notice the difference, so I might
  1984:   // revisit this
  1985:   //TerminalSet newItemLA(numTerminals());
  1986: 
  1987:   // for each production "B -> gamma"
  1988:   SMUTATE_EACH_PRODUCTION(productionsByLHS[nontermIndex], prodIter) {    // (constness)
  1989:     Production &prod = *(prodIter.data());
  1990:     if (tr) {
  1991:       trs << "    considering production " << prod << std::endl;
  1992:     }
  1993: 
  1994:     // key to good performance: do *no* dynamic allocation in this
  1995:     // loop (one of two inner loops in the grammar analysis), until a
  1996:     // new item is actually *needed* (which is the uncommon case); for
  1997:     // example, all debug output statements are guarded by 'if (tr)'
  1998:     // because otherwise they would allocate
  1999: 
  2000:     // invariant of the indexed productions list
  2001:     xassert(prod.left == B);
  2002: 
  2003:     // construct "B -> . gamma, First(beta LA)";
  2004:     // except, don't actually build it until later; in the meantime,
  2005:     // determine which DP and lookahead it would use if created
  2006:     DottedProduction const *newDP = getDProd(&prod, 0 /*dot at left*/);
  2007: 
  2008:     // get beta (what follows B in 'item')
  2009:     DottedProduction const *beta = nextDProd(item->dprod);
  2010: 
  2011:     // get First(beta) -> new item's lookahead
  2012:     newItemLA = beta->firstSet;
  2013: 
  2014:     // if beta ->* epsilon, add LA
  2015:     if (beta->canDeriveEmpty) {
  2016:       newItemLA.merge(item->lookahead);
  2017:     }
  2018: 
  2019:     if (tr) {
  2020:       trs << "      built item ";
  2021:       // this is what LRItem::print would do if I actually
  2022:       // constructed the object
  2023:       newDP->print(trs);
  2024:       trs << ", ";
  2025:       newItemLA.print(trs, *this);
  2026:       trs << std::endl;
  2027:     }
  2028: 
  2029:     // is 'newDP' already there?
  2030:     // check in working and finished tables
  2031:     bool inDoneList = true;
  2032:     LRItem *already = newDP->backPointer;   // workhash.lookup(newDP);
  2033:     if (already) {
  2034:       inDoneList = false;
  2035:     }
  2036:     else {
  2037:       already = finished.get(newDP);
  2038:     }
  2039: 
  2040:     if (already) {
  2041:       // yes, it's already there
  2042:       if (tr) {
  2043:         trs << "      looks similar to ";
  2044:         already->print(trs, *this);
  2045:         trs << std::endl;
  2046:       }
  2047: 
  2048:       // but the new item may have additional lookahead
  2049:       // components, so merge them with the old
  2050:       if (already->lookahead.merge(newItemLA)) {
  2051:         // merging changed 'already'
  2052:         if (tr) {
  2053:           trs << "      (chg) merged it to make ";
  2054:           already->print(trs, *this);
  2055:           trs << std::endl;
  2056:         }
  2057: 
  2058:         if (inDoneList) {
  2059:           // pull from the 'done' list and put in worklist, since the
  2060:           // lookahead changed
  2061:           finished.remove(already->dprod);
  2062:           CHECK_MALLOC_STATS("before worklist push");
  2063:           worklist.push(already);
  2064:           xassert(already->dprod->backPointer == NULL);   // was not on
  2065:           already->dprod->backPointer = already;          // now is on worklist
  2066:           UPDATE_MALLOC_STATS();     // allow expansion
  2067:         }
  2068:         else {
  2069:           // 'already' is in the worklist, so that's fine
  2070:         }
  2071:       }
  2072:       else {
  2073:         if (tr) {
  2074:           trs << "      this dprod already existed" << std::endl;
  2075:         }
  2076:       }
  2077:     }
  2078:     else {
  2079:       CHECK_MALLOC_STATS("bunch of stuff before 'if'");
  2080: 
  2081:       // it's not already there, so add it to worklist (but first
  2082:       // actually create it!)
  2083:       LRItem *newItem = new LRItem(numTerms, newDP);
  2084:       newItem->lookahead.copy(newItemLA);
  2085:       if (tr) {
  2086:         trs << "      this dprod is new, queueing it to add" << std::endl;
  2087:       }
  2088: 
  2089:       worklist.push(newItem);
  2090:       xassert(newItem->dprod->backPointer == NULL);
  2091:       newItem->dprod->backPointer = newItem;
  2092: 
  2093:       UPDATE_MALLOC_STATS();     // "new LRItem" or expansion of worklist
  2094:     }
  2095: 
  2096:     CHECK_MALLOC_STATS("processing of production");
  2097:   } // for each production
  2098: 
  2099:   CHECK_MALLOC_STATS("end of singleItemClosure");
  2100: }
  2101: 
  2102: 
  2103: // -------------- START of construct LR item sets -------------------
  2104: ItemSet *GrammarAnalysis::makeItemSet()
  2105: {
  2106:   return new ItemSet((StateId)(nextItemSetId++),
  2107:                      numTerminals(), numNonterminals());
  2108: }
  2109: 
  2110: void GrammarAnalysis::disposeItemSet(ItemSet *is)
  2111: {
  2112:   // we assume we're only doing this right after making it, as the
  2113:   // point of this exercise is to avoid fragmenting the id space
  2114:   nextItemSetId--;
  2115:   xassert(is->id == nextItemSetId);
  2116:   delete is;
  2117: }
  2118: 
  2119: 
  2120: // yield (by filling 'dest') a new itemset by moving the dot across
  2121: // the productions in 'source' that have 'symbol' to the right of the
  2122: // dot; do *not* compute the closure
  2123: //
  2124: // unusedTail:
  2125: //   since 'dest' comes with a bunch of kernel items, some of which we
  2126: //   most likely won't need, put the unused ones into 'unusedTail'
  2127: //
  2128: // array:
  2129: //   since I don't want to allocate anything in here, we need scratch
  2130: //   space for computing kernel CRCs
  2131: void GrammarAnalysis::moveDotNoClosure(ItemSet const *source, Symbol const *symbol,
  2132:                                        ItemSet *dest, ObjList<LRItem> &unusedTail,
  2133:                                        GrowArray<DottedProduction const*> &array)
  2134: {
  2135:   //ItemSet *ret = makeItemSet();
  2136: 
  2137:   // total # of items added
  2138:   int appendCt=0;
  2139: 
  2140:   // iterator for walking down dest's kernel list
  2141:   ObjListMutator<LRItem> destIter(dest->kernelItems);
  2142: 
  2143:   // iterator for walking both lists of items; switching from an
  2144:   // implementation which used 'getAllItems' for performance reasons
  2145:   ObjListIter<LRItem> srcIter(source->kernelItems);
  2146:   int passCt=0;    // 0=kernelItems, 1=nonkernelItems
  2147:   while (passCt < 2) {
  2148:     if (passCt++ == 1) {
  2149:       srcIter.reset(source->nonkernelItems);
  2150:     }
  2151: 
  2152:     // for each item
  2153:     for (; !srcIter.isDone(); srcIter.adv()) {
  2154:       LRItem const *item = srcIter.data();
  2155: 
  2156:       if (item->isDotAtEnd() ||
  2157:           item->symbolAfterDotC() != symbol) {
  2158:         continue;    // can't move dot
  2159:       }
  2160: 
  2161:       // need to access destIter; if there are no more items, make more
  2162:       if (destIter.isDone()) {
  2163:         // the new item becomes the current 'data()'
  2164:         destIter.insertBefore(new LRItem(numTerminals(), NULL /*dprod*/));
  2165:       }
  2166: 
  2167:       // move the dot; write dot-moved item into 'destIter'
  2168:       LRItem *dotMoved = destIter.data();
  2169:       dotMoved->dprod = nextDProd(item->dprod);
  2170:       dotMoved->lookahead = item->lookahead;
  2171: 
  2172:       // add the new item to the itemset I'm building
  2173:       //ret->addKernelItem(dotMoved);   // UPDATE: it's already in the list
  2174:       appendCt++;
  2175:       destIter.adv();
  2176:     }
  2177:   }
  2178: 
  2179:   // pull out any unused items into 'unusedItems'; it's important that
  2180:   // this action not have to look at each unused item, because I want
  2181:   // to be able to make a really big scratch item list and not pay for
  2182:   // items I don't end up using
  2183:   unusedTail.stealTailAt(appendCt, dest->kernelItems);
  2184: 
  2185:   // verify we actually got something
  2186:   xassert(appendCt > 0);
  2187: 
  2188:   // we added stuff; sorting is needed both for the CRC below, and also
  2189:   // for the lookahead merge step that follows a successful lookup
  2190:   dest->sortKernelItems();
  2191: 
  2192:   // recompute the one thing I need to do hashing
  2193:   dest->computeKernelCRC(array);
  2194: }
  2195: 
  2196: 
  2197: // if 'list' contains something equal to 'itemSet', return that
  2198: // equal object; otherwise, return NULL
  2199: // 'list' is non-const because might return an element of it
  2200: ItemSet *GrammarAnalysis::findItemSetInList(ObjList<ItemSet> &list,
  2201:                                             ItemSet const *itemSet)
  2202: {
  2203:   // inefficiency: using iteration to check set membership
  2204: 
  2205:   MUTATE_EACH_OBJLIST(ItemSet, list, iter) {
  2206:     if (itemSetsEqual(iter.data(), itemSet)) {
  2207:       return iter.data();
  2208:     }
  2209:   }
  2210:   return NULL;
  2211: }
  2212: 
  2213: 
  2214: STATICDEF bool GrammarAnalysis::itemSetsEqual(ItemSet const *is1, ItemSet const *is2)
  2215: {
  2216:   // checks for equality of the kernel items
  2217:   return *is1 == *is2;
  2218: }
  2219: 
  2220: 
  2221: // keys and data are the same
  2222: STATICDEF ItemSet const *ItemSet::dataToKey(ItemSet *data)
  2223: {
  2224:   return data;
  2225: }
  2226: 
  2227: STATICDEF unsigned ItemSet::hash(ItemSet const *key)
  2228: {
  2229:   unsigned crc = key->kernelItemsCRC;
  2230:   return HashTable::lcprngHashFn((void*)crc);
  2231: }
  2232: 
  2233: STATICDEF bool ItemSet::equalKey(ItemSet const *key1, ItemSet const *key2)
  2234: {
  2235:   return *key1 == *key2;
  2236: }
  2237: 
  2238: 
  2239: // [ASU] fig 4.34, p.224
  2240: // puts the finished parse tables into 'itemSetsDone'
  2241: void GrammarAnalysis::constructLRItemSets()
  2242: {
  2243:   bool tr = tracingSys("lrsets");
  2244: 
  2245:   enum { BIG_VALUE = 100 };
  2246: 
  2247:   // item sets yet to be processed; item sets are simultaneously in
  2248:   // both the hash and the list, or not in either
  2249:   OwnerKHashArray<ItemSet, ItemSet> itemSetsPending(
  2250:     &ItemSet::dataToKey,
  2251:     &ItemSet::hash,
  2252:     &ItemSet::equalKey);
  2253: 
  2254:   // item sets with all outgoing links processed
  2255:   OwnerKHashTable<ItemSet, ItemSet> itemSetsDone(
  2256:     &ItemSet::dataToKey,
  2257:     &ItemSet::hash,
  2258:     &ItemSet::equalKey);
  2259:   itemSetsDone.setEnableShrink(false);
  2260: 
  2261:   // to avoid allocating in the inner loop, we make a single item set
  2262:   // which we'll fill with kernel items every time we think we *might*
  2263:   // make a new state, and if it turns out we really do need a new
  2264:   // state, then the kernel items in this one will be copied elsewhere
  2265:   Owner<ItemSet> scratchState(
  2266:     new ItemSet((StateId)-1 /*id*/, numTerms, numNonterms));
  2267: 
  2268:   // fill the scratch state with lots of kernel items to start with;
  2269:   // since these items will be re-used over and over, filling it now
  2270:   // ensures good locality on those accesses (assuming malloc returns
  2271:   // objects close together)
  2272:   enum { INIT_LIST_LEN = BIG_VALUE };
  2273:   for (int i=0; i<INIT_LIST_LEN; i++) {
  2274:     // this is a dummy item; it allocates the bitmap for 'lookahead',
  2275:     // but those bits and the 'dprod' pointer will be overwritten
  2276:     // many times during the algorithm
  2277:     LRItem *item = new LRItem(numTerms, NULL /*dottedprod*/);
  2278:     scratchState->addKernelItem(item);
  2279:   }
  2280: 
  2281:   // similar to the scratch state, make a scratch array for the
  2282:   // kernel CRC computation
  2283:   GrowArray<DottedProduction const*> kernelCRCArray(BIG_VALUE);
  2284: 
  2285:   // start by constructing closure of first production
  2286:   // (basically assumes first production has start symbol
  2287:   // on LHS, and no other productions have the start symbol
  2288:   // on LHS)
  2289:   {
  2290:     ItemSet *is = makeItemSet();              // (owner)
  2291:     startState = is;
  2292:     LRItem *firstDP
  2293:       = new LRItem(numTerms, getDProd(productions.first(), 0 /*dot at left*/));
  2294: 
  2295:     // don't add this to the lookahead; we assume EOF is actually
  2296:     // mentioned in the production already, and we won't contemplate
  2297:     // executing this reduction within the normal parser core
  2298:     // (see GLR::cleanupAfterParse)
  2299:     //firstDP->laAdd(0 /*EOF token id*/);
  2300: 
  2301:     is->addKernelItem(firstDP);
  2302:     is->sortKernelItems();                    // redundant, but can't hurt
  2303:     itemSetClosure(*is);                      // calls changedItems internally
  2304: 
  2305:     // this makes the initial pending itemSet
  2306:     itemSetsPending.push(is, is);             // (ownership transfer)
  2307:   }
  2308: 
  2309:   // track how much allocation we're doing
  2310:   INITIAL_MALLOC_STATS();
  2311: 
  2312:   // for each pending item set
  2313:   while (itemSetsPending.isNotEmpty()) {
  2314:     ItemSet *itemSet = itemSetsPending.pop();          // dequeue (owner)
  2315: 
  2316:     CHECK_MALLOC_STATS("top of pending list loop");
  2317: 
  2318:     // put it in the done set; note that we must do this *before*
  2319:     // the processing below, to properly handle self-loops
  2320:     itemSetsDone.add(itemSet, itemSet);                // (ownership transfer; 'itemSet' becomes serf)
  2321: 
  2322:     // allows for expansion of 'itemSetsDone' hash
  2323:     UPDATE_MALLOC_STATS();
  2324: 
  2325:     if (tr) {
  2326:       trace("lrsets") << "state " << itemSet->id
  2327:                       << ", " << itemSet->kernelItems.count()
  2328:                       << " kernel items and "
  2329:                       << itemSet->nonkernelItems.count()
  2330:                       << " nonkernel items" << std::endl;
  2331:     }
  2332: 
  2333:     // see below; this is part of a fix for a *very* subtle heisenbug
  2334:     bool mustCloseMyself = false;
  2335: 
  2336:     // for each production in the item set where the
  2337:     // dot is not at the right end
  2338:     //
  2339:     // explicitly iterate over both lists because 'getAllItems'
  2340:     // does allocation
  2341:     ObjListIter<LRItem> itemIter(itemSet->kernelItems);
  2342:     int passCt=0;    // 0=kernelItems, 1=nonkernelItems
  2343:     while (passCt < 2) {
  2344:       if (passCt++ == 1) {
  2345:         itemIter.reset(itemSet->nonkernelItems);
  2346:       }
  2347: 
  2348:       for (; !itemIter.isDone(); itemIter.adv()) {
  2349:         LRItem const *item = itemIter.data();
  2350:         if (item->isDotAtEnd()) continue;
  2351: 
  2352:         CHECK_MALLOC_STATS("top of item list loop");
  2353: 
  2354:         if (tr) {
  2355:           std::ostream &trs = trace("lrsets");
  2356:           trs << "considering item ";
  2357:           item->print(trs, *this);
  2358:           trs << std::endl;
  2359:         }
  2360: 
  2361:         // get the symbol 'sym' after the dot (next to be shifted)
  2362:         Symbol const *sym = item->symbolAfterDotC();
  2363: 
  2364:         // in LALR(1), two items might have different lookaheads; more
  2365:         // likely, re-expansions needs to propagate lookahead that
  2366:         // wasn't present from an earlier expansion
  2367:         if (!LALR1) {
  2368:           // if we already have a transition for this symbol,
  2369:           // there's nothing more to be done
  2370:           if (itemSet->transitionC(sym) != NULL) {
  2371:             continue;
  2372:           }
  2373:         }
  2374: 
  2375:         // compute the itemSet (into 'scratchState') produced by moving
  2376:         // the dot across 'sym'; don't take closure yet since we
  2377:         // first want to check whether it is already present
  2378:         //
  2379:         // this call also yields the unused remainder of the kernel items,
  2380:         // so we can add them back in at the end
  2381:         ObjList<LRItem> unusedTail;
  2382:         moveDotNoClosure(itemSet, sym, scratchState,
  2383:                          unusedTail, kernelCRCArray);
  2384:         ItemSet *withDotMoved = scratchState;    // clarify role from here down
  2385: 
  2386:         CHECK_MALLOC_STATS("moveDotNoClosure");
  2387: 
  2388:         // see if we already have it, in either set
  2389:         ItemSet *already = itemSetsPending.lookup(withDotMoved);
  2390:         bool inDoneList = false;
  2391:         if (already == NULL) {
  2392:           already = itemSetsDone.get(withDotMoved);
  2393:           inDoneList = true;    // used if 'already' != NULL
  2394:         }
  2395: 
  2396:         // have it?
  2397:         if (already != NULL) {
  2398:           // we already have a state with at least equal kernel items, not
  2399:           // considering their lookahead sets; so we have to merge the
  2400:           // computed lookaheads with those in 'already'
  2401:           if (withDotMoved->mergeLookaheadsInto(*already)) {
  2402:             if (tr) {
  2403:               trace("lrsets")
  2404:                 << "from state " << itemSet->id << ", found that the transition "
  2405:                 << "on " << sym->name << " yielded a state similar to "
  2406:                 << already->id << ", but with different lookahead" << std::endl;
  2407:             }
  2408: 
  2409:             CHECK_MALLOC_STATS("mergeLookaheadsInto");
  2410: 
  2411:             // this changed 'already'; recompute its closure
  2412:             if (already != itemSet) {
  2413:               itemSetClosure(*already);
  2414:             }
  2415:             else {
  2416:               // DANGER!  I'm already iterating over 'itemSet's item lists,
  2417:               // and if I execute the closure algorithm it will invalidate
  2418:               // my iterator.  so, postpone it
  2419:               mustCloseMyself = true;
  2420:             }
  2421: 
  2422:             // and reconsider all of the states reachable from it
  2423:             if (!inDoneList) {
  2424:               // itemSetsPending contains 'already', it will be processed later
  2425:             }
  2426:             else {
  2427:               // we thought we were done with this
  2428:               xassertdb(itemSetsDone.get(already));
  2429: 
  2430:               // but we're not: move it back to the 'pending' list
  2431:               itemSetsDone.remove(already);
  2432:               itemSetsPending.push(already, already);
  2433:             }
  2434: 
  2435:             // it's ok if closure makes more items, or if
  2436:             // the pending list expands
  2437:             UPDATE_MALLOC_STATS();
  2438:           }
  2439: 
  2440:           // we already have it, so throw away one we made
  2441:           // UPDATE: we didn't allocate, so don't deallocate
  2442:           //disposeItemSet(withDotMoved);     // deletes 'withDotMoved'
  2443: 
  2444:           // and use existing one for setting the transition function
  2445:           withDotMoved = already;
  2446:         }
  2447:         else {
  2448:           // we don't already have it; need to actually allocate & copy
  2449:           withDotMoved = makeItemSet();
  2450:           FOREACH_OBJLIST(LRItem, scratchState->kernelItems, iter) {
  2451:             withDotMoved->addKernelItem(new LRItem( *(iter.data()) ));
  2452:           }
  2453: 
  2454:           // finish it by computing its closure
  2455:           itemSetClosure(*withDotMoved);
  2456: 
  2457:           // then add it to 'pending'
  2458:           itemSetsPending.push(withDotMoved, withDotMoved);
  2459: 
  2460:           // takes into account:
  2461:           //   - creation of 'withDotMoved' state
  2462:           //   - creation of items to fill its kernel
  2463:           //   - creation of nonkernel items during closure
  2464:           //   - possible expansion of the 'itemSetsPending' hash
  2465:           UPDATE_MALLOC_STATS();
  2466:         }
  2467: 
  2468:         // setup the transition function
  2469:         itemSet->setTransition(sym, withDotMoved);
  2470: 
  2471:         // finally, restore 'scratchState's kernel item list
  2472:         scratchState->kernelItems.concat(unusedTail);
  2473: 
  2474:         // make sure the link restoration process works as expected
  2475:         xassertdb(scratchState->kernelItems.count() >= INIT_LIST_LEN);
  2476: 
  2477:         CHECK_MALLOC_STATS("end of item loop");
  2478: 
  2479:       } // for each item
  2480:     } // 0=kernel, 1=nonkernel
  2481: 
  2482:     CHECK_MALLOC_STATS("end of item set loop");
  2483: 
  2484:     // now that we're finished iterating over the items, I can do the
  2485:     // postponed closure
  2486:     if (mustCloseMyself) {
  2487:       itemSetClosure(*itemSet);
  2488:       UPDATE_MALLOC_STATS();
  2489:     }
  2490: 
  2491:   } // for each item set
  2492: 
  2493:   // we're done constructing item sets, so move all of them out
  2494:   // of the 'itemSetsDone' hash and into 'this->itemSets'
  2495:   try {
  2496:     for (OwnerKHashTableIter<ItemSet, ItemSet> iter(itemSetsDone);
  2497:          !iter.isDone(); iter.adv()) {
  2498:       itemSets.prepend(iter.data());
  2499:     }
  2500:     itemSetsDone.disownAndForgetAll();
  2501:   }
  2502:   catch (...) {
  2503:     breaker();
  2504:     itemSetsDone.disownAndForgetAll();
  2505:     throw;
  2506:   }
  2507: 
  2508:   // since we sometimes consider a state more than once, the
  2509:   // states end up out of order; put them back in order
  2510:   itemSets.mergeSort(ItemSet::diffById);
  2511: 
  2512: 
  2513:   traceProgress(1) << "done with LR sets: " << itemSets.count()
  2514:                    << " states\n";
  2515: 
  2516: 
  2517:   // do the BFS now, since we want to print the sample inputs
  2518:   // in the loop that follows
  2519:   traceProgress(1) << "BFS tree on transition graph...\n";
  2520:   computeBFSTree();
  2521: 
  2522:   if (tracingSys("itemset-graph")) {
  2523:     // write this info to a graph applet file
  2524:     std::ofstream out("lrsets.g");
  2525:     if (!out) {
  2526:       xsyserror("std::ofstream open");
  2527:     }
  2528:     out << "# lr sets in graph form\n";
  2529: 
  2530:     FOREACH_OBJLIST(ItemSet, itemSets, itemSet) {
  2531:       itemSet.data()->writeGraph(out, *this);
  2532:     }
  2533:   }
  2534: }
  2535: 
  2536: 
  2537: // print each item set
  2538: void GrammarAnalysis::printItemSets(std::ostream &os, bool nonkernel) const
  2539: {
  2540:   FOREACH_OBJLIST(ItemSet, itemSets, itemSet) {
  2541:     os << "State " << itemSet.data()->id
  2542:        << ", sample input: " << sampleInput(itemSet.data()) << "\n"
  2543:        << "  and left context: " << leftContextString(itemSet.data()) << "\n"
  2544:        ;
  2545:     itemSet.data()->print(os, *this, nonkernel);
  2546:     os << "\n\n";
  2547:   }
  2548: }
  2549: 
  2550: 
  2551: // --------------- END of construct LR item sets -------------------
  2552: 
  2553: 
  2554: Symbol const *GrammarAnalysis::
  2555:   inverseTransitionC(ItemSet const *source, ItemSet const *target) const
  2556: {
  2557:   // for each symbol..
  2558:   FOREACH_TERMINAL(terminals, t) {
  2559:     // see if it is the one
  2560:     if (source->transitionC(t.data()) == target) {
  2561:       return t.data();
  2562:     }
  2563:   }
  2564: 
  2565:   FOREACH_NONTERMINAL(nonterminals, nt) {
  2566:     if (source->transitionC(nt.data()) == target) {
  2567:       return nt.data();
  2568:     }
  2569:   }
  2570: 
  2571:   xfailure("GrammarAnalysis::inverseTransitionC: no transition from source to target");
  2572:   return NULL;     // silence warning
  2573: }
  2574: 
  2575: 
  2576: void GrammarAnalysis::computeReachable()
  2577: {
  2578:   // start by clearing the reachability flags
  2579:   MUTATE_EACH_NONTERMINAL(nonterminals, iter) {
  2580:     iter.data()->reachable = false;
  2581:   }
  2582: 
  2583:   // do a DFS on the grammar, marking things reachable as
  2584:   // they're encountered
  2585:   computeReachableDFS(startSymbol);
  2586: }
  2587: 
  2588: 
  2589: void GrammarAnalysis::computeReachableDFS(Nonterminal *nt)
  2590: {
  2591:   if (nt->reachable) {
  2592:     // already looked at this nonterminal
  2593:     return;
  2594:   }
  2595:   nt->reachable = true;
  2596: 
  2597:   // iterate over this nonterminal's rules
  2598:   SFOREACH_PRODUCTION(productionsByLHS[nt->ntIndex], iter) {
  2599:     // iterate over symbols in the rule RHS
  2600:     FOREACH_OBJLIST(Production::RHSElt, iter.data()->right, jter) {
  2601:       Production::RHSElt const *elt = jter.data();
  2602: 
  2603:       if (elt->sym->isNonterminal()) {
  2604:         // recursively analyze nonterminal elements
  2605:         computeReachableDFS(elt->sym->ifNonterminal());
  2606:       }
  2607:       else {
  2608:         // just mark terminals
  2609:         elt->sym->reachable = true;
  2610:       }
  2611:     }
  2612:   }
  2613: }
  2614: 
  2615: 
  2616: // --------------- LR support -------------------
  2617: // decide what to do, and record the result into the two
  2618: // boolean reference parameters
  2619: void GrammarAnalysis::handleShiftReduceConflict(
  2620:   bool &keepShift, bool &keepReduce, bool &dontWarn,
  2621:   ItemSet const *state, Production const *prod, Terminal const *sym)
  2622: {
  2623:   // say that we're considering this conflict
  2624:   trace("prec")
  2625:     << "in state " << state->id << ", S/R conflict on token "
  2626:     << sym->name << " with production " << *prod << std::endl;
  2627: 
  2628:   // look at scannerless directives
  2629:   {
  2630:     // is this nonterm or any of its declared supersets maximal?
  2631:     Nonterminal const *super = prod->left;
  2632:     bool maximal = super->maximal;
  2633:     while (!maximal && super->superset) {
  2634:       super = super->superset;
  2635:       maximal = super->maximal;
  2636:     }
  2637: 
  2638:     if (maximal) {
  2639:       // see if this reduction can be removed due to a 'maximal' spec;
  2640:       // in particular, is the shift going to extend 'super'?
  2641:       if (state->hasExtendingShift(super, sym)) {
  2642:         trace("prec") << "resolved in favor of SHIFT due to maximal munch\n";
  2643:         keepReduce = false;
  2644:         return;
  2645:       }
  2646:     }
  2647:   }
  2648: 
  2649:   if (!( prod->precedence && sym->precedence )) {
  2650:     // one of the two doesn't have a precedence specification,
  2651:     // so we can do nothing
  2652:     trace("prec") << "will SPLIT because no disambiguation spec available" << std::endl;
  2653:     return;
  2654:   }
  2655: 
  2656:   if (prod->precedence > sym->precedence) {
  2657:     // production's precedence is higher, so we choose to reduce
  2658:     // instead of shift
  2659:     trace("prec") << "resolved in favor of REDUCE due to precedence\n";
  2660:     keepShift = false;
  2661:     return;
  2662:   }
  2663: 
  2664:   if (prod->precedence < sym->precedence) {
  2665:     // symbol's precedence is higher, so we shift
  2666:     trace("prec") << "resolved in favor of SHIFT due to precedence\n";
  2667:     keepReduce = false;
  2668:     return;
  2669:   }
  2670: 
  2671:   // precedences are equal, so we look at associativity (of token)
  2672:   switch (sym->associativity) {
  2673:     case AK_LEFT:
  2674:       trace("prec") << "resolved in favor of REDUCE due to associativity\n";
  2675:       keepShift = false;
  2676:       return;
  2677: 
  2678:     case AK_RIGHT:
  2679:       trace("prec") << "resolved in favor of SHIFT due to associativity\n";
  2680:       keepReduce = false;
  2681:       return;
  2682: 
  2683:     case AK_NONASSOC:
  2684:       trace("pred") << "removed BOTH alternatives due to nonassociativity\n";
  2685:       keepShift = false;
  2686:       keepReduce = false;
  2687:       return;
  2688: 
  2689:     case AK_NEVERASSOC:
  2690:       // the user claimed this token would never be involved in a conflict
  2691:       trace("pred") << "neverassoc specification ERROR\n";
  2692:       errors++;
  2693:       std::cout << "token " << sym->name << " was declared 'prec', "
  2694:            << "but it is involved in an associativity conflict with \""
  2695:            << *prod << "\" in state " << state->id << std::endl;
  2696:       return;
  2697: 
  2698:     case AK_SPLIT:
  2699:       // the user does not want disambiguation of this
  2700:       trace("pred") << "will SPLIT because user asked to\n";
  2701:       dontWarn = true;
  2702:       return;
  2703: 
  2704:     default:
  2705:       xfailure("bad assoc code");
  2706:   }
  2707: }
  2708: 
  2709: 
  2710: // given an LR transition graph, compute the BFS tree on top of it
  2711: // and set the parent links to record the tree
  2712: void GrammarAnalysis::computeBFSTree()
  2713: {
  2714:   // for the BFS, we need a queue of states yet to be processed, and a
  2715:   // pile of 'done' states
  2716:   SObjList<ItemSet> queue;
  2717:   SObjList<ItemSet> done;
  2718: 
  2719:   // initial entry in queue is root of BFS tree
  2720:   queue.append(startState);
  2721: 
  2722:   // it will be convenient to have all the symbols in a single list
  2723:   // for iteration purposes
  2724:   SymbolList allSymbols;          // (const list)
  2725:   {
  2726:     FOREACH_TERMINAL(terminals, t) {
  2727:       allSymbols.append(const_cast<Terminal*>(t.data()));
  2728:     }
  2729:     FOREACH_NONTERMINAL(nonterminals, nt) {
  2730:       allSymbols.append(const_cast<Nonterminal*>(nt.data()));
  2731:     }
  2732:   }
  2733: 
  2734:   // loop until the queue is exhausted
  2735:   while (queue.isNotEmpty()) {
  2736:     // dequeue first element
  2737:     ItemSet *source = queue.removeAt(0);
  2738: 
  2739:     // mark it as done so we won't consider any more transitions to it
  2740:     done.append(source);
  2741: 
  2742:     // for each symbol...
  2743:     SFOREACH_SYMBOL(allSymbols, sym) {
  2744:       // get the transition on this symbol
  2745:       ItemSet *target = source->transition(sym.data());
  2746: 
  2747:       // if the target is done or already enqueued, or there is no
  2748:       // transition on this symbol, we don't need to consider it
  2749:       // further
  2750:       if (target == NULL ||
  2751:           done.contains(target) ||
  2752:           queue.contains(target)) {
  2753:         continue;
  2754:       }
  2755: 
  2756:       // the source->target link just examined is the first time
  2757:       // we've encounted 'target', so that link becomes the BFS
  2758:       // parent link
  2759:       target->BFSparent = source;
  2760: 
  2761:       // finally, enqueue the target so we'll explore its targets too
  2762:       queue.append(target);
  2763:     }
  2764:   }
  2765: }
  2766: 
  2767: 
  2768: // --------------- parse table construction -------------------
  2769: #if 0 // obsolete
  2770: // compare two productions by precedence
  2771: static int productionPrecCompare(Production const *p1, Production const *p2, void*)
  2772: {
  2773:   if (p1->precedence && p2->precedence) {
  2774:     // I want the low precedence first
  2775:     return p1->precedence - p2->precedence;
  2776:   }
  2777:   else {
  2778:     // if one or the other doesn't have a precedence, then there's
  2779:     // no basis for distinction
  2780:     return 0;
  2781:   }
  2782: }
  2783: #endif
  2784: 
  2785: // given some potential parse actions, apply available disambiguation
  2786: // to remove some of them; print warnings about conflicts, in some
  2787: // situations
  2788: void GrammarAnalysis::resolveConflicts(
  2789:   ItemSet const *state,        // parse state in which the actions are possible
  2790:   Terminal const *sym,         // lookahead symbol for these actions
  2791:   ItemSet const *&shiftDest,   // (inout) if non-NULL, the state to which we can shift
  2792:   ProductionList &reductions,  // (inout) list of possible reductions
  2793:   bool allowAmbig,             // if false, always return at most 1 action
  2794:   bool &printedConflictHeader, // (inout) true once we've printed the state header
  2795:   int &sr, int &rr)            // (inout) counts of S/R and R/R conflicts, resp.
  2796: {
  2797:   // how many actions are there?
  2798:   int actions = (shiftDest? 1 : 0) + reductions.count();
  2799:   if (actions <= 1) {
  2800:     return;      // no conflict
  2801:   }
  2802: 
  2803:   // count how many warning suppressions we have
  2804:   int dontWarns = 0;
  2805: 
  2806:   // static disambiguation for S/R conflicts
  2807:   if (shiftDest) {
  2808:     // we have (at least) a shift/reduce conflict, which is the
  2809:     // situation in which prec/assoc specifications are used; consider
  2810:     // all the possible reductions, so we can resolve S/R conflicts
  2811:     // even when there are R/R conflicts present too
  2812:     SObjListMutator<Production> mut(reductions);
  2813:     while (!mut.isDone() && shiftDest != NULL) {
  2814:       Production const *prod = mut.data();
  2815: 
  2816:       bool keepShift=true, keepReduce=true, dontWarn=false;
  2817:       handleShiftReduceConflict(keepShift, keepReduce, dontWarn, state, prod, sym);
  2818: 
  2819:       if (!keepShift) {
  2820:         actions--;
  2821:         shiftDest = NULL;      // remove the shift
  2822:       }
  2823: 
  2824:       if (!keepReduce) {
  2825:         actions--;
  2826:         mut.remove();          // remove the reduction
  2827:       }
  2828:       else {
  2829:         mut.adv();
  2830:       }
  2831: 
  2832:       if (dontWarn) {
  2833:         dontWarns++;
  2834:       }
  2835:     }
  2836: 
  2837:     // there is still a potential for misbehavior.. e.g., if there are two
  2838:     // possible reductions (R1 and R2), and one shift (S), then the user
  2839:     // could have specified prec/assoc to disambiguate, e.g.
  2840:     //   R1 < S
  2841:     //   S < R2
  2842:     // so that R2 is the right choice; but if I consider (S,R2) first,
  2843:     // I'll simply drop S, leaving no way to disambiguate R1 and R2 ..
  2844:     // for now I'll just note the possibility...
  2845:   }
  2846: 
  2847:   // static disambiguation for R/R conflicts
  2848:   if (reductions.count() > 1) {
  2849: 
  2850: // NEW CODE FROM ELKHOUND version 1.156, 2005/02/25 20:10:47
  2851:  // find the highest precedence
  2852:          int highestPrec = 0;
  2853:          SFOREACH_PRODUCTION(reductions, iter) {
  2854:            int p = iter.data()->precedence;
  2855: 
  2856:            if (p && p>highestPrec) {
  2857:              highestPrec = p;
  2858:            }
  2859:          }
  2860: 
  2861:          // remove any productions that are lower than 'highestPrec'
  2862:          SObjListMutator<Production> mut(reductions);
  2863:          while (!mut.isDone()) {
  2864:            int p = mut.data()->precedence;
  2865: 
  2866:            if (p && p<highestPrec) {
  2867:              trace("prec")
  2868:                << "in state " << state->id << ", R/R conflict on token "
  2869:                << sym->name << ", removed production " << *(mut.data())
  2870:                << " because " << p << "<" << highestPrec << std::endl;
  2871:              mut.remove();
  2872:            }
  2873:            else {
  2874:              mut.adv();
  2875:            }
  2876:          }
  2877: 
  2878: #if 0    // totally wrong
  2879:     // sort the reductions so the lowest precedence reductions are
  2880:     // first, then higher precedences, and finally reductions that
  2881:     // lack any precedence (use insertion sort since I expect that
  2882:     // most of the time the list won't require any changes)
  2883:     reductions.insertionSort(productionPrecCompare);
  2884: 
  2885:     // work through the head of the list, discarding productions
  2886:     // that have higher-precedence productions beneath them
  2887:     int ct = reductions.count();
  2888:     while (ct >= 2) {
  2889:       Production *p1 = reductions.nth(0);
  2890:       Production *p2 = reductions.nth(1);
  2891:       if (!(p1->precedence && p2->precedence)) break;
  2892: 
  2893:       // remove first one
  2894:       reductions.removeFirst();
  2895:       ct--;
  2896:       actions--;
  2897: 
  2898:       // report
  2899:       trace("prec")
  2900:         << "in state " << state->id << ", R/R conflict on token "
  2901:         << sym->name << ", removed production " << *p1 << std::endl;
  2902:     }
  2903: #endif
  2904:   }
  2905: 
  2906:   // additional R/R resolution using subset directives
  2907:   if (reductions.count() > 1) {
  2908:     actions -= subsetDirectiveResolution(state, sym, reductions);
  2909:   }
  2910: 
  2911:   // after the disambiguation, maybe now there's no conflicts?
  2912:   // or, if conflicts remain, did we get at least that many warning
  2913:   // suppressions?
  2914:   if ((actions-dontWarns) <= 1) {
  2915:     // don't print information about conflicts
  2916:   }
  2917:   else {
  2918:     // print conflict info
  2919:     if (!printedConflictHeader) {
  2920:       trace("conflict")
  2921:         << "--------- state " << state->id << " ----------\n"
  2922:         << "left context: " << leftContextString(state)
  2923:         << std::endl
  2924:         << "sample input: " << sampleInput(state)
  2925:         << std::endl
  2926:         ;
  2927:       printedConflictHeader = true;
  2928:     }
  2929: 
  2930:     trace("conflict")
  2931:       << "conflict for symbol " << sym->name
  2932:       << std::endl;
  2933: 
  2934:     if (shiftDest) {
  2935:       trace("conflict") << "  shift, and move to state " << shiftDest->id << std::endl;
  2936:       sr++;                 // shift/reduce conflict
  2937:       rr += actions - 2;    // any reduces beyond first are r/r errors
  2938:     }
  2939:     else {
  2940:       rr += actions - 1;    // all reduces beyond first are r/r errors
  2941:     }
  2942: 
  2943:     SFOREACH_PRODUCTION(reductions, prod) {
  2944:       trace("conflict") << "  reduce by rule " << *(prod.data()) << std::endl;
  2945:     }
  2946:   }
  2947: 
  2948:   if (!allowAmbig && actions > 1) {
  2949:     // force only one action, using Bison's disambiguation:
  2950:     //   - prefer shift to reduce
  2951:     //   - prefer the reduction which occurs first in the grammar file
  2952:     if (shiftDest) {
  2953:       reductions.removeAll();
  2954:     }
  2955:     else {
  2956:       while (reductions.count() >= 2) {
  2957:         // compare first and second
  2958:         Production const *first = reductions.nth(0);
  2959:         Production const *second = reductions.nth(1);
  2960: 
  2961:         // production indices happen to be assigned in file order
  2962:         if (first->prodIndex < second->prodIndex) {
  2963:           reductions.removeItem(second);
  2964:         }
  2965:         else {
  2966:           reductions.removeItem(first);
  2967:         }
  2968:       }
  2969:     }
  2970:   }
  2971: }
  2972: 
  2973: 
  2974: void reportUnexpected(int value, int expectedValue, char const *desc)
  2975: {
  2976:   if ((expectedValue == -1 && value>0) ||
  2977:       (expectedValue != -1 && expectedValue != value)) {
  2978:     std::cout << value << " " << desc;
  2979:     if (expectedValue != -1) {
  2980:       std::cout << " (expected " << expectedValue << ")";
  2981:     }
  2982:     std::cout << std::endl;
  2983:   }
  2984: }
  2985: 
  2986: 
  2987: // the idea is we might be trying to do scannerless parsing, and
  2988: // someone might say that Identifier has as subsets all the keywords,
  2989: // so competing reductions should favor the subsets (the keywords)
  2990: int GrammarAnalysis::subsetDirectiveResolution(
  2991:   ItemSet const *state,        // parse state in which the actions are possible
  2992:   Terminal const *sym,         // lookahead symbol for these actions
  2993:   ProductionList &reductions)  // list to try to cut down
  2994: {
  2995:   int removed = 0;
  2996: 
  2997:   // make a map of which nonterminals appear on the LHS of one
  2998:   // of the reductions, and has a superset
  2999:   BitArray map(numNonterms);
  3000:   bool anyWithSuper = false;
  3001:   {
  3002:     SFOREACH_PRODUCTION(reductions, iter) {
  3003:       Production const *p = iter.data();
  3004:       if (p->left->superset) {
  3005:         map.set(p->left->ntIndex);
  3006:         anyWithSuper = true;
  3007:       }
  3008:     }
  3009:   }
  3010: 
  3011:   if (!anyWithSuper) {
  3012:     return removed;     // nothing we can do
  3013:   }
  3014: 
  3015:   // walk over the reductions, removing those that have reductions
  3016:   // to subsets also in the list
  3017:   SObjListMutator<Production> mut(reductions);
  3018:   while (!mut.isDone()) {
  3019:     Production const *prod = mut.data();
  3020: 
  3021:     SFOREACH_OBJLIST(Nonterminal, prod->left->subsets, iter) {
  3022:       Nonterminal const *sub = iter.data();
  3023:       if (map.test(sub->ntIndex)) {
  3024:         trace("prec")
  3025:           << "in state " << state->id
  3026:           << ", R/R conflict on token " << sym->name
  3027:           << ", removed production yielding " << prod->left->name
  3028:           << " b/c another yields subset " << sub->name
  3029:           << std::endl;
  3030:         mut.remove();
  3031:         removed++;
  3032:         goto continue_outer_loop;
  3033:       }
  3034:     }
  3035: 
  3036:     // didn't remove, must manually advance
  3037:     mut.adv();
  3038: 
  3039:     continue_outer_loop:;
  3040:   }
  3041: 
  3042:   return removed;
  3043: }
  3044: 
  3045: 
  3046: bool isAmbiguousNonterminal(Symbol const *sym)
  3047: {
  3048:   if (sym->isNonterminal()) {
  3049:     Nonterminal const &nt = sym->asNonterminalC();
  3050:     if (nt.mergeCode) {
  3051:       return true;   // presence of merge() signals potential ambiguity
  3052:     }
  3053:   }
  3054:   return false;
  3055: }
  3056: 
  3057: 
  3058: // The purpose of this function is to number the states (which have up
  3059: // to this point been numbered arbitrarily) in such a way that all
  3060: // states that have a given symbol on incoming arcs will be numbered
  3061: // consecutively.  This is part of the table compression schemes
  3062: // described in the Dencker et. al. paper (see parsetables.h).
  3063: void GrammarAnalysis::renumberStates()
  3064: {
  3065:   // sort them into the right order
  3066:   itemSets.mergeSort(&GrammarAnalysis::renumberStatesDiff, this);
  3067: 
  3068:   // number them in that order
  3069:   int n = 0;
  3070:   FOREACH_OBJLIST_NC(ItemSet, itemSets, iter) {
  3071:     ItemSet *s = iter.data();
  3072:     if (n == 0) {
  3073:       // the first element should always be the start state
  3074:       xassert(s->id == 0);
  3075:     }
  3076:     else {
  3077:       s->id = (StateId)n;
  3078:     }
  3079: 
  3080:     n++;
  3081:   }
  3082: }
  3083: 
  3084: STATICDEF int GrammarAnalysis::renumberStatesDiff
  3085:   (ItemSet const *left, ItemSet const *right, void *vgramanl)
  3086: {
  3087:   GrammarAnalysis *gramanl = (GrammarAnalysis*)vgramanl;
  3088: 
  3089:   int ret;
  3090: 
  3091:   // if for some reason I'm ever asked to compare a state to
  3092:   // itself..
  3093:   if (left == right) {
  3094:     return 0;
  3095:   }
  3096: 
  3097:   // order them first by their incoming arc symbol; this effects
  3098:   // the renumbering that the Code Reduction Scheme demands
  3099:   {
  3100:     Symbol const *ls = left->getStateSymbolC();
  3101:     Symbol const *rs = right->getStateSymbolC();
  3102: 
  3103:     // any state with no incoming arcs (start state) is first
  3104:     ret = (int)(bool)ls - (int)(bool)rs;
  3105:     if (ret) return ret;
  3106: 
  3107:     // terminals come before nonterminals
  3108:     ret = (int)(ls->isNonterminal()) - (int)(rs->isNonterminal());
  3109:     if (ret) return ret;
  3110: 
  3111:     // order by id within terms/nonterms
  3112:     ret = ls->getTermOrNontermIndex() - rs->getTermOrNontermIndex();
  3113:     if (ret) return ret;
  3114:   }
  3115: 
  3116:   // from this point on, the CRS would be happy with an arbitrary
  3117:   // order, but I want the state numbering to be canonical so that
  3118:   // I have an easier time debugging and comparing parse traces
  3119: 
  3120:   // they have the same incoming arc symbol; now, sort by outgoing
  3121:   // arc symbols
  3122: 
  3123:   // first up: terminals
  3124:   {
  3125:     for (int t=0; t < gramanl->numTerminals(); t++) {
  3126:       ItemSet const *ldest = left->getTermTransition(t);
  3127:       ItemSet const *rdest = right->getTermTransition(t);
  3128: 
  3129:       ret = (int)!ldest - (int)!rdest;
  3130:       if (ret) return ret;
  3131: 
  3132:       if (ldest && rdest) {
  3133:         ret = ldest->id - rdest->id;
  3134:         if (ret) return ret;
  3135:       }
  3136:     }
  3137:   }
  3138: 
  3139:   // next: nonterminals
  3140:   {
  3141:     for (int nt=0; nt < gramanl->numNonterminals(); nt++) {
  3142:       ItemSet const *ldest = left->getNontermTransition(nt);
  3143:       ItemSet const *rdest = right->getNontermTransition(nt);
  3144: 
  3145:       ret = (int)!ldest - (int)!rdest;
  3146:       if (ret) return ret;
  3147: 
  3148:       if (ldest && rdest) {
  3149:         ret = ldest->id - rdest->id;
  3150:         if (ret) return ret;
  3151:       }
  3152:     }
  3153:   }
  3154: 
  3155:   // I suspect this will never be reached, since usually the
  3156:   // transition function will be sufficient
  3157:   // update: it happens often enough.. even in the arith grammar
  3158:   //std::cout << "using reductions to distinguish states\n";
  3159: 
  3160:   // finally, order by possible reductions
  3161:   FOREACH_OBJLIST(Terminal, gramanl->terminals, termIter) {
  3162:     ProductionList lpl, rpl;
  3163:     left->getPossibleReductions(lpl, termIter.data(), false /*parsing*/);
  3164:     right->getPossibleReductions(rpl, termIter.data(), false /*parsing*/);
  3165: 
  3166:     // sort the productions before we can compare them...
  3167:     lpl.insertionSort(&GrammarAnalysis::arbitraryProductionOrder);
  3168:     rpl.insertionSort(&GrammarAnalysis::arbitraryProductionOrder);
  3169: 
  3170:     ret = lpl.compareAsLists(rpl, &GrammarAnalysis::arbitraryProductionOrder);
  3171:     if (ret) return ret;
  3172:   }
  3173: 
  3174:   // I used to throw an xfailure here, but that causes a problem
  3175:   // because the 'itemSets' list is not well-formed, because we
  3176:   // are in the middle of sorting it
  3177:   std::cout << "two different states have identical transitions and "
  3178:           "identical reductions!\n";
  3179:   std::cout << "left=" << left->id
  3180:        << ", sym is " << left->getStateSymbolC()->toString() << "\n";
  3181:   left->print(std::cout, *gramanl);
  3182:   std::cout << "right=" << right->id
  3183:        << ", sym is " << right->getStateSymbolC()->toString() << "\n";
  3184:   right->print(std::cout, *gramanl);
  3185: 
  3186:   return 0;
  3187: }
  3188: 
  3189: STATICDEF int GrammarAnalysis::arbitraryProductionOrder
  3190:   (Production const *left, Production const *right, void*)
  3191: {
  3192:   // compare LHS
  3193:   int ret = left->left->ntIndex - right->left->ntIndex;
  3194:   if (ret) return ret;
  3195: 
  3196:   // RHS elts one at a time
  3197:   return left->right.compareAsLists(right->right,
  3198:     &GrammarAnalysis::arbitraryRHSEltOrder);
  3199: }
  3200: 
  3201: STATICDEF int GrammarAnalysis::arbitraryRHSEltOrder
  3202:   (Production::RHSElt const *left, Production::RHSElt const *right, void*)
  3203: {
  3204:   int ret = (int)left->sym->isTerminal() - (int)right->sym->isTerminal();
  3205:   if (ret) return ret;
  3206: 
  3207:   return left->sym->getTermOrNontermIndex() - right->sym->getTermOrNontermIndex();
  3208: }
  3209: 
  3210: 
  3211: void GrammarAnalysis::computeParseTables(bool allowAmbig)
  3212: {
  3213:   tables = new ParseTables(numTerms, numNonterms, itemSets.count(), numProds,
  3214:                            startState->id,
  3215:                            0 /* slight hack: assume it's the first production */);
  3216: 
  3217:   if (ENABLE_CRS_COMPRESSION) {
  3218:     // first-state info
  3219:     bool doingTerms = true;
  3220:     int prevSymCode = -1;
  3221:     FOREACH_OBJLIST(ItemSet, itemSets, iter) {
  3222:       ItemSet const *state = iter.data();
  3223:       Symbol const *sym = state->getStateSymbolC();
  3224:       if (!sym) continue;     // skip start state
  3225:       int symCode = sym->getTermOrNontermIndex();
  3226: 
  3227:       if (sym->isTerminal() == doingTerms &&
  3228:           symCode == prevSymCode) {
  3229:         // continuing the current run, do nothing
  3230:         continue;
  3231:       }
  3232: 
  3233:       if (sym->isNonterminal() && doingTerms) {
  3234:         // transition from terminals to nonterminals
  3235:         doingTerms = false;
  3236:       }
  3237:       else {
  3238:         // continue current phase, with new code; states must
  3239:         // already have been sorted into increasing order
  3240:         xassert(sym->isTerminal() == doingTerms);
  3241:         xassert(prevSymCode < symCode);
  3242:       }
  3243: 
  3244:       if (doingTerms) {
  3245:         tables->setFirstWithTerminal(symCode, state->id);
  3246:       }
  3247:       else {
  3248:         tables->setFirstWithNonterminal(symCode, state->id);
  3249:       }
  3250: 
  3251:       prevSymCode = symCode;
  3252:     }
  3253:   }
  3254: 
  3255:   // count total number of conflicts of each kind
  3256:   int sr=0, rr=0;
  3257: 
  3258:   // for each state...
  3259:   FOREACH_OBJLIST(ItemSet, itemSets, stateIter) {
  3260:     ItemSet const *state = stateIter.data();
  3261:     bool printedConflictHeader = false;
  3262: 
  3263:     // ---- fill in this row in the action table ----
  3264:     // for each possible lookahead...
  3265:     for (int termId=0; termId < numTerms; termId++) {
  3266:       Terminal const *terminal = getTerminal(termId);
  3267: 
  3268:       // can shift?
  3269:       ItemSet const *shiftDest = state->transitionC(terminal);
  3270: 
  3271:       // can reduce?
  3272:       ProductionList reductions;
  3273:       state->getPossibleReductions(reductions, terminal,
  3274:                                    false /*parsing*/);
  3275: 
  3276:       // try to resolve conflicts; this may print warnings about
  3277:       // the conflicts, depending on various factors; if 'allowAmbig'
  3278:       // is false, this will remove all but one action
  3279:       resolveConflicts(state, terminal, shiftDest, reductions,
  3280:                        allowAmbig, printedConflictHeader, sr, rr);
  3281: 
  3282:       // what to do in this cell
  3283:       ActionEntry cellAction;
  3284: 
  3285:       // still conflicts?
  3286:       int actions = (shiftDest? 1 : 0) + reductions.count();
  3287:       if (actions >= 2) {
  3288:         // make a new ambiguous-action entry-set
  3289:         ArrayStack<ActionEntry> set;
  3290: 
  3291:         // fill in the actions
  3292:         if (shiftDest) {
  3293:           set.push(tables->encodeShift(shiftDest->id, termId));
  3294:         }
  3295:         SFOREACH_PRODUCTION(reductions, prodIter) {
  3296:           set.push(tables->encodeReduce(prodIter.data()->prodIndex, state->id));
  3297:         }
  3298:         xassert(set.length() == actions);
  3299: 
  3300:         cellAction = tables->encodeAmbig(set, state->id);
  3301:       }
  3302: 
  3303:       else {
  3304:         // single action
  3305:         if (shiftDest) {
  3306:           xassert(reductions.count() == 0);
  3307:           cellAction = tables->encodeShift(shiftDest->id, termId);
  3308:         }
  3309:         else if (reductions.isNotEmpty()) {
  3310:           xassert(reductions.count() == 1);
  3311:           cellAction = tables->encodeReduce(reductions.first()->prodIndex, state->id);
  3312:         }
  3313:         else {
  3314:           cellAction = tables->encodeError();
  3315:         }
  3316:       }
  3317: 
  3318:       // add this entry to the table
  3319:       tables->setActionEntry(state->id, termId, cellAction);
  3320: 
  3321:       // based on the contents of 'reductions', decide whether this
  3322:       // state is delayed or not; to be delayed, the state must be
  3323:       // able to reduce by a production which:
  3324:       //   - has an ambiguous nonterminal as the last symbol on its RHS
  3325:       //   - is not reducing to the *same* nonterminal as the last symbol
  3326:       //     (rationale: eagerly reduce "E -> E + E")
  3327:       // UPDATE: removed last condition because it actually makes things
  3328:       // worse..
  3329:       bool delayed = false;
  3330:       if (reductions.isNotEmpty()) {    // no reductions: eager (irrelevant, actually)
  3331:         SFOREACH_PRODUCTION(reductions, prodIter) {
  3332:           Production const &prod = *prodIter.data();
  3333:           if (prod.rhsLength() >= 1) {                 // nonempty RHS?
  3334:             Symbol const *lastSym = prod.right.lastC()->sym;
  3335:             if (isAmbiguousNonterminal(lastSym)        // last RHS ambig?
  3336:                 /*&& lastSym != prod.left*/) {         // not same as LHS?
  3337:               delayed = true;
  3338:             }
  3339:           }
  3340:         }
  3341:       }
  3342:     }
  3343: 
  3344:     // ---- fill in this row in the goto table ----
  3345:     // for each nonterminal...
  3346:     for (int nontermId=0; nontermId<numNonterms; nontermId++) {
  3347:       Nonterminal const *nonterminal = getNonterminal(nontermId);
  3348: 
  3349:       // where do we go when we reduce to this nonterminal?
  3350:       ItemSet const *gotoDest = state->transitionC(nonterminal);
  3351: 
  3352:       GotoEntry cellGoto;
  3353:       if (gotoDest) {
  3354:         cellGoto = tables->encodeGoto(gotoDest->id, nonterminal->ntIndex);
  3355:       }
  3356:       else {
  3357:         // this should never be accessed at parse time..
  3358:         cellGoto = tables->encodeGotoError();
  3359:       }
  3360: 
  3361:       // fill in entry
  3362:       tables->setGotoEntry(state->id, nontermId, cellGoto);
  3363:     }
  3364: 
  3365:     // get the state symbol
  3366:     xassert((unsigned)(state->id) < (unsigned)(tables->getNumStates()));
  3367:     tables->setStateSymbol(state->id,
  3368:       encodeSymbolId(state->getStateSymbolC()));
  3369:   }
  3370: 
  3371:   // report on conflict counts
  3372:   reportUnexpected(sr, expectedSR, "shift/reduce conflicts");
  3373:   reportUnexpected(rr, expectedRR, "reduce/reduce conflicts");
  3374: 
  3375:   // report on cyclicity
  3376:   for (int nontermId=0; nontermId<numNonterms; nontermId++) {
  3377:     Nonterminal const *nonterminal = getNonterminal(nontermId);
  3378:     if (nonterminal->cyclic) {
  3379:       std::cout << "grammar symbol " << nonterminal->name << " is cyclic\n";
  3380:     }
  3381:   }
  3382: 
  3383:   // fill in 'prodInfo'
  3384:   for (int p=0; p<numProds; p++) {
  3385:     Production const *prod = getProduction(p);
  3386:     tables->setProdInfo(p, prod->rhsLength(), prod->left->ntIndex);
  3387:   }
  3388: 
  3389:   // use the derivability relation to compute a total order
  3390:   // on nonterminals
  3391:   BitArray seen(numNonterms);
  3392:   int nextOrdinal = numNonterms-1;
  3393:   for (int nt=0; nt < numNonterms; nt++) {
  3394:     // expand from 'nt' in case it's disconnected; this will be
  3395:     // a no-op if we've already 'seen' it
  3396:     topologicalSort(tables->getWritableNontermOrder(), nextOrdinal, nt, seen);
  3397:   }
  3398:   xassert(nextOrdinal == -1);    // should have used them all
  3399: 
  3400:   if (ENABLE_EEF_COMPRESSION) {
  3401:     tables->computeErrorBits();
  3402:   }
  3403: 
  3404:   if (ENABLE_GCS_COMPRESSION) {
  3405:     if (ENABLE_GCS_COLUMN_COMPRESSION) {
  3406:       tables->mergeActionColumns();
  3407:     }
  3408:     tables->mergeActionRows();
  3409: 
  3410:     if (ENABLE_GCS_COLUMN_COMPRESSION) {
  3411:       tables->mergeGotoColumns();
  3412:     }
  3413:     tables->mergeGotoRows();
  3414:   }
  3415: }
  3416: 
  3417: 
  3418: // this is a depth-first traversal of the 'derivable' relation;
  3419: // when we reach a nonterminal that can't derive any others not
  3420: // already in the order, we give its entry the latest ordinal
  3421: // that isn't already taken ('nextOrdinal')
  3422: void GrammarAnalysis::topologicalSort(
  3423:   NtIndex *order,    // table we're filling with ordinals
  3424:   int &nextOrdinal,  // latest ordinal not yet used
  3425:   NtIndex current,   // current nonterminal to expand
  3426:   BitArray &seen)    // set of nonterminals we've already seen
  3427: {
  3428:   if (seen.test(current)) {
  3429:     // already expanded this one
  3430:     return;
  3431:   }
  3432: 
  3433:   // don't expand this one again
  3434:   seen.set(current);
  3435: 
  3436:   // look at all nonterminals this one can derive
  3437:   for (int nt=0; nt < numNonterms; nt++) {
  3438:     if (derivable->get(point(nt, current))) {
  3439:       // 'nt' can derive 'current'; expand 'nt' first, thus making
  3440:       // it later in the order, so we'll reduce to 'current' before
  3441:       // reducing to 'nt' (when token spans are equal)
  3442:       xassert((NtIndex)nt == nt);
  3443:       topologicalSort(order, nextOrdinal, (NtIndex)nt, seen);
  3444:     }
  3445:   }
  3446: 
  3447:   // finally, put 'current' into the order
  3448:   order[current] = nextOrdinal;
  3449:   nextOrdinal--;
  3450: }
  3451: 
  3452: 
  3453: SymbolId encodeSymbolId(Symbol const *sym)
  3454: {
  3455:   int ret;
  3456:   if (!sym) {
  3457:     ret = 0;
  3458:   }
  3459:   else if (sym->isTerminal()) {
  3460:     ret = sym->asTerminalC().termIndex + 1;
  3461:   }
  3462:   else /*nonterminal*/ {
  3463:     ret = - sym->asNonterminalC().ntIndex - 1;
  3464: 
  3465:     // verify encoding of nonterminals is sufficiently wide
  3466:     int idx = sym->asNonterminalC().ntIndex;
  3467:     xassert((NtIndex)idx == idx);
  3468:   }
  3469: 
  3470:   // verify encoding is lossless
  3471:   SymbolId ret2 = (SymbolId)ret;
  3472:   xassert((int)ret2 == ret);
  3473:   return ret2;
  3474: }
  3475: 
  3476: 
  3477: // --------------- sample inputs -------------------
  3478: // yield a sequence of names of symbols (terminals and nonterminals) that
  3479: // will lead to the given state, from the start state
  3480: sm_string GrammarAnalysis::leftContextString(ItemSet const *state) const
  3481: {
  3482:   SymbolList ctx;
  3483:   leftContext(ctx, state);                // get as list
  3484:   return symbolSequenceToString(ctx);     // convert to sm_string
  3485: }
  3486: 
  3487: 
  3488: // yield the left-context as a sequence of symbols
  3489: // CONSTNESS: want output as list of const pointers
  3490: void GrammarAnalysis::leftContext(SymbolList &output,
  3491:                                   ItemSet const *state) const
  3492: {
  3493:   // since we have the BFS tree, generating sample input (at least, if
  3494:   // it's allowed to contain nonterminals) is a simple matter of walking
  3495:   // the tree towards the root
  3496: 
  3497:   // for each parent..
  3498:   while (state->BFSparent) {
  3499:     // get that parent
  3500:     ItemSet *parent = state->BFSparent;
  3501: 
  3502:     // find a symbol on which we would transition from the parent
  3503:     // to the current state
  3504:     Symbol const *sym = inverseTransitionC(parent, state);
  3505: 
  3506:     // prepend that symbol's name to our current context
  3507:     output.prepend(const_cast<Symbol*>(sym));
  3508: 
  3509:     // move to our parent and repeat
  3510:     state = parent;
  3511:   }
  3512: }
  3513: 
  3514: 
  3515: // compare two-element quantities where one dominates and the other is
  3516: // only for tie-breaking; return <0/=0/>0 if a's quantities are
  3517: // fewer/equal/grearter (this fn is a candidate for adding to a
  3518: // library somewhere)
  3519: int priorityCompare(int a_dominant, int b_dominant,
  3520:                     int a_recessive, int b_recessive)
  3521: {
  3522:   if (a_dominant < b_dominant) return -1;
  3523:   if (a_dominant > b_dominant) return +1;
  3524:   return a_recessive - b_recessive;
  3525: }
  3526: 
  3527: int priorityFewer(int a_dominant, int b_dominant,
  3528:                   int a_recessive, int b_recessive)
  3529: {
  3530:   return priorityCompare(a_dominant, b_dominant,
  3531:                          a_recessive, b_recessive) < 1;
  3532: }
  3533: 
  3534: 
  3535: // sample input (terminals only) that can lead to a state
  3536: sm_string GrammarAnalysis::sampleInput(ItemSet const *state) const
  3537: {
  3538:   // get left-context as terminals and nonterminals
  3539:   SymbolList symbols;
  3540:   leftContext(symbols, state);
  3541: 
  3542:   // reduce the nonterminals to terminals
  3543:   TerminalList terminals;
  3544:   if (!rewriteAsTerminals(terminals, symbols)) {
  3545:     return sm_string("(failed to reduce!!)");
  3546:   }
  3547: 
  3548:   // convert to a sm_string
  3549:   return terminalSequenceToString(terminals);
  3550: }
  3551: 
  3552: 
  3553: // given a sequence of symbols (terminals and nonterminals), use the
  3554: // productions to rewrite it as a (hopefully minimal) sequence of
  3555: // terminals only; return true if it works, false if we get stuck
  3556: // in an infinite loop
  3557: // CONSTNESS: ideally, 'output' would contain const ptrs to terminals
  3558: bool GrammarAnalysis::rewriteAsTerminals(TerminalList &output, SymbolList const &input) const
  3559: {
  3560:   // we detect looping by noticing if we ever reduce via the same
  3561:   // production more than once in a single vertical recursive slice
  3562:   ProductionList reductionStack;      // starts empty
  3563: 
  3564:   // start the recursive version
  3565:   return rewriteAsTerminalsHelper(output, input, reductionStack);
  3566: }
  3567: 
  3568: 
  3569: // (nonterminals and terminals) -> terminals;
  3570: // if this returns false, it's guaranteed to return with 'output'
  3571: // unchanged from when the function was invoked
  3572: bool GrammarAnalysis::
  3573:   rewriteAsTerminalsHelper(TerminalList &output, SymbolList const &input,
  3574:                            ProductionList &reductionStack) const
  3575: {
  3576:   // remember the initial 'output' length so we can restore
  3577:   int origLength = output.count();
  3578: 
  3579:   // walk down the input list, creating the output list by copying
  3580:   // terminals and reducing nonterminals
  3581:   SFOREACH_SYMBOL(input, symIter) {
  3582:     Symbol const *sym = symIter.data();
  3583: 
  3584:     if (sym->isEmptyString) {
  3585:       // easy; no-op
  3586:     }
  3587: 
  3588:     else if (sym->isTerminal()) {
  3589:       // no sweat, just copy it (er, copy the pointer)
  3590:       output.append(const_cast<Terminal*>(&sym->asTerminalC()));
  3591:     }
  3592: 
  3593:     else {
  3594:       // not too bad either, just reduce it, sticking the result
  3595:       // directly into our output list
  3596:       if (!rewriteSingleNTAsTerminals(output, &sym->asNonterminalC(),
  3597:                                       reductionStack)) {
  3598:         // oops.. restore 'output'
  3599:         while (output.count() > origLength) {
  3600:           output.removeAt(origLength);
  3601:         }
  3602:         return false;
  3603:       }
  3604:     }
  3605:   }
  3606: 
  3607:   // ok!
  3608:   return true;
  3609: }
  3610: 
  3611: 
  3612: // for rewriting into sequences of terminals, we prefer rules with
  3613: // fewer nonterminals on the RHS, and then (to break ties) rules with
  3614: // fewer RHS symbols altogether; overriding all of this, if one
  3615: // production's RHS contains a symbol already expanded, and the other
  3616: // does not, then prefer the RHS which hasn't already been expanded
  3617: int compareProductionsForRewriting(Production const *p1, Production const *p2,
  3618:                                    void *extra)
  3619: {
  3620:   ProductionList *reductionStack = (ProductionList*)extra;
  3621: 
  3622:   bool p1RHSSeen=false, p2RHSSeen=false;
  3623:   SFOREACH_PRODUCTION(*reductionStack, iter) {
  3624:     if (p1->rhsHasSymbol( iter.data()->left )) {
  3625:       p1RHSSeen = true;
  3626:     }
  3627:     if (p2->rhsHasSymbol( iter.data()->left )) {
  3628:       p2RHSSeen = true;
  3629:     }
  3630:   }
  3631: 
  3632:   if (p1RHSSeen != p2RHSSeen) {
  3633:     // e.g.: p1RHSSeen=true, so p2 is preferred; this will yield +1,
  3634:     // meaning p1>p2, so p2 comes first in an increasing order sort
  3635:     return (int)p1RHSSeen - (int)p2RHSSeen;
  3636:   }
  3637: 
  3638:   return priorityCompare(p1->numRHSNonterminals(), p2->numRHSNonterminals(),
  3639:                          p1->rhsLength(), p2->rhsLength());
  3640: }
  3641: 
  3642: // nonterminal -> terminals
  3643: // CONSTNESS: want 'reductionStack' to be list of const ptrs
  3644: bool GrammarAnalysis::
  3645:   rewriteSingleNTAsTerminals(TerminalList &output, Nonterminal const *nonterminal,
  3646:                              ProductionList &reductionStack) const
  3647: {
  3648:   // get all of 'nonterminal's productions that are not recursive
  3649:   ProductionList candidates;
  3650:   FOREACH_PRODUCTION(productions, prodIter) {
  3651:     Production const *prod = prodIter.data();
  3652:     if (prod->left != nonterminal) continue;
  3653: 
  3654:     // if 'prod' has 'nonterminal' on RHS, that would certainly
  3655:     // lead to looping (though it's not the only way -- consider
  3656:     // mutual recursion), so don't even consider it
  3657:     if (prod->rhsHasSymbol(nonterminal)) {
  3658:       continue;
  3659:     }
  3660: 
  3661:     // if this production has already been used, don't use it again
  3662:     if (reductionStack.contains(prod)) {
  3663:       continue;
  3664:     }
  3665: 
  3666:     // it's a candidate
  3667:     candidates.prepend(const_cast<Production*>(prod));   // constness
  3668:   }
  3669: 
  3670:   if (candidates.isEmpty()) {
  3671:     // I don't expect this... either the NT doesn't have any rules,
  3672:     // or all of them are recursive (which means the language doesn't
  3673:     // have any finite sentences)
  3674:     trace("rewrite") << "couldn't find any unused, non-recursive rules for "
  3675:                      << nonterminal->name << std::endl;
  3676:     return false;
  3677:   }
  3678: 
  3679:   // sort them into order of preference
  3680:   candidates.mergeSort(compareProductionsForRewriting, &reductionStack);
  3681: 
  3682:   // try each in turn until one succeeds; this effectively uses
  3683:   // backtracking when one fails
  3684:   bool retval = false;
  3685:   SFOREACH_PRODUCTION(candidates, candIter) {
  3686:     Production const *prod = candIter.data();
  3687: 
  3688:     // add chosen production to the stack
  3689:     reductionStack.prepend(const_cast<Production*>(prod));
  3690: 
  3691:     // now, the chosen rule provides a RHS, which is a sequence of
  3692:     // terminals and nonterminals; recursively reduce that sequence
  3693:     SymbolList rhsSymbols;
  3694:     prod->getRHSSymbols(rhsSymbols);
  3695:     retval = rewriteAsTerminalsHelper(output, rhsSymbols, reductionStack);
  3696: 
  3697:     // remove chosen production from stack
  3698:     Production *temp = reductionStack.removeFirst();
  3699:     xassert(temp == prod);
  3700: 
  3701:     if (retval) {
  3702:       // success!
  3703:       break;
  3704:     }
  3705:     else {
  3706:       // failed; try the next production
  3707:     }
  3708:   }
  3709: 
  3710:   // and we succeed only if we found a valid rewriting
  3711:   return retval;
  3712: }
  3713: 
  3714: // --------------- END of sample inputs -------------------
  3715: 
  3716: 
  3717: // this is mostly [ASU] algorithm 4.7, p.218-219: an SLR(1) parser
  3718: void GrammarAnalysis::lrParse(char const *input)
  3719: {
  3720:   // tokenize the input
  3721:   StrtokParse tok(input, " \t");
  3722: 
  3723:   // parser state
  3724:   int currentToken = 0;               // index of current token
  3725:   StateId state = startState->id;     // current parser state
  3726:   ArrayStack<StateId> stateStack;     // stack of parser states; top==state
  3727:   stateStack.push(state);
  3728:   ArrayStack<Symbol const*> symbolStack;    // stack of shifted symbols
  3729: 
  3730:   // for each token of input
  3731:   while (currentToken < tok) {
  3732:     // map the token text to a symbol
  3733:     Terminal *symbol = findTerminal(tok[currentToken]);     // (constness)
  3734: 
  3735:     // consult action table
  3736:     ActionEntry action = tables->getActionEntry(state, symbol->termIndex);
  3737: 
  3738:     // see what kind of action it is
  3739:     if (tables->isShiftAction(action)) {
  3740:       // shift
  3741:       StateId destState = tables->decodeShift(action, symbol->termIndex);
  3742: 
  3743:       // push current state and symbol
  3744:       state = destState;
  3745:       stateStack.push(state);
  3746:       symbolStack.push(symbol);
  3747: 
  3748:       // next input symbol
  3749:       currentToken++;
  3750: 
  3751:       // debugging
  3752:       trace("parse")
  3753:         << "moving to state " << state
  3754:         << " after shifting symbol " << symbol->name << std::endl;
  3755:     }
  3756: 
  3757:     else if (tables->isReduceAction(action)) {
  3758:       // reduce
  3759:       int prodIndex = tables->decodeReduce(action, state);
  3760:       ParseTables::ProdInfo const &info = tables->getProdInfo(prodIndex);
  3761: 
  3762:       // it is here that an action or tree-building step would
  3763:       // take place
  3764: 
  3765:       // pop as many symbols off stacks as there are symbols on
  3766:       // the right-hand side of 'prod'
  3767:       stateStack.popMany(info.rhsLen);
  3768:       state = stateStack.top();
  3769:       symbolStack.popMany(info.rhsLen);
  3770: 
  3771:       // find out where to go
  3772:       StateId destState = tables->decodeGoto(
  3773:         tables->getGotoEntry(state, info.lhsIndex), info.lhsIndex);
  3774: 
  3775:       // go there
  3776:       state = destState;
  3777:       stateStack.push(state);
  3778: 
  3779:       // and push the reduced nonterminal
  3780:       symbolStack.push(getNonterminal(info.lhsIndex));
  3781: 
  3782:       // debugging
  3783:       trace("parse")
  3784:         << "moving to state " << state
  3785:         << " after reducing by rule id " << prodIndex << std::endl;
  3786:     }
  3787: 
  3788:     else if (tables->isErrorAction(action)) {
  3789:       // error
  3790:       trace("parse")
  3791:         << "no actions defined for symbol " << symbol->name
  3792:         << " in state " << state << std::endl;
  3793:       break;       // stop parsing
  3794:     }
  3795: 
  3796:     else {
  3797:       // conflict
  3798:       trace("parse")
  3799:         << "conflict for symbol " << symbol->name
  3800:         << " in state " << state
  3801:         << "; possible actions:\n";
  3802: 
  3803:       // get actions
  3804:       ActionEntry *entry = tables->decodeAmbigAction(action, state);
  3805: 
  3806:       // explain each one
  3807:       for (int i=0; i<entry[0]; i++) {
  3808:         action = entry[i+1];
  3809:         if (tables->isShiftAction(action)) {
  3810:           trace("parse") << "  shift, and move to state "
  3811:                          << tables->decodeShift(action, symbol->termIndex) << std::endl;
  3812:         }
  3813:         else if (tables->isReduceAction(action)) {
  3814:           trace("parse") << "  reduce by rule id "
  3815:                          << tables->decodeReduce(action, state) << std::endl;
  3816:         }
  3817:         else {
  3818:           // no other alternative makes sense
  3819:           xfailure("bad code in ambiguous action table");
  3820:         }
  3821:       }
  3822: 
  3823:       break;       // stop parsing
  3824:     }
  3825:   }
  3826: 
  3827:   // print final contents of stack; if the parse was successful,
  3828:   // I want to see what remains; if not, it's interesting anyway
  3829:   trace("parse") << "final contents of stacks (right is top):\n";
  3830: 
  3831:   std::ostream &os = trace("parse") << "  state stack:";
  3832:   int i;
  3833:   for (i=0; i < stateStack.length(); i++) {
  3834:     os << " " << stateStack[i];
  3835:   }
  3836:   os << " <-- current" << std::endl;
  3837: 
  3838:   os << "  symbol stack:";
  3839:   for (i=0; i < symbolStack.length(); i++) {
  3840:     os << " " << symbolStack[i]->name;
  3841:   }
  3842:   os << std::endl;
  3843: }
  3844: 
  3845: 
  3846: // ------------------- grammar transformations ------------------
  3847: void GrammarAnalysis::addTreebuildingActions()
  3848: {
  3849:   #define STR(s) LITERAL_LOCSTRING(grammarStringTable.add(s))
  3850: 
  3851:   // prepend an #include to the verbatim
  3852:   {
  3853:     StringRef extra = grammarStringTable.add(
  3854:       "\n#include \"ptreenode.h\"     // PTreeNode\n");
  3855:     verbatim.prepend(new LITERAL_LOCSTRING(extra));
  3856:   }
  3857: 
  3858:   // get handles to the sm_strings we want to emit
  3859:   LocString param = STR("n");
  3860:   LocString dupCode = STR("return n;");    // dup is identity
  3861:   LocString delCode = STR("");             // del is no-op
  3862:   LocString svalType = STR("PTreeNode*");
  3863: 
  3864:   // merge relies on chaining scheme for alternatives
  3865:   LocString mergeParam1 = STR("L");
  3866:   LocString mergeParam2 = STR("R");
  3867:   LocString mergeCode = STR("L->addAlternative(R); return L;");
  3868: 
  3869:   // write dup/del/merge for nonterminals
  3870:   MUTATE_EACH_OBJLIST(Nonterminal, nonterminals, ntIter) {
  3871:     Nonterminal *nt = ntIter.data();
  3872: 
  3873:     nt->dupParam = param;
  3874:     nt->dupCode = dupCode;
  3875: 
  3876:     nt->delParam = param;
  3877:     nt->delCode = delCode;
  3878: 
  3879:     nt->type = svalType;
  3880: 
  3881:     nt->mergeParam1 = mergeParam1;
  3882:     nt->mergeParam2 = mergeParam2;
  3883:     nt->mergeCode = mergeCode;
  3884:   }
  3885: 
  3886:   // write treebuilding actions for productions
  3887:   MUTATE_EACH_OBJLIST(Production, productions, prodIter) {
  3888:     Production *p = prodIter.data();
  3889: 
  3890:     // build up the code
  3891:     sm_stringBuilder code;
  3892:     code << "return new PTreeNode(\"" << p->left->name << " -> "
  3893:          << encodeWithEscapes(p->rhsString(false /*printTags*/,
  3894:                                            true /*quoteAliases*/))
  3895:          << "\"";
  3896: 
  3897:     int ct=1;
  3898:     MUTATE_EACH_OBJLIST(Production::RHSElt, p->right, rIter) {
  3899:       Production::RHSElt *elt = rIter.data();
  3900: 
  3901:       // connect nonterminal subtrees; drop lexemes on the floor
  3902:       if (elt->sym->isNonterminal()) {
  3903:         // use a generic tag
  3904:         sm_string tag = sm_stringc << "t" << ct++;
  3905:         elt->tag = STR(tag);
  3906: 
  3907:         code << ", " << tag;
  3908:       }
  3909:     }
  3910: 
  3911:     code << ");";
  3912: 
  3913:     // insert the code into the production
  3914:     p->action = LocString(SL_UNKNOWN,
  3915:                           grammarStringTable.add(code));
  3916:   }
  3917: 
  3918:   #undef STR
  3919: }
  3920: 
  3921: 
  3922: // ---------------------------- main --------------------------------
  3923: void pretendUsed(...)
  3924: {}
  3925: 
  3926: 
  3927: void GrammarAnalysis::exampleGrammar()
  3928: {
  3929:   // at one time I was using this to verify my LR item set
  3930:   // construction code; this function isn't even called anymore..
  3931:   readGrammarFile(*this, "examples/asu419.gr");
  3932: 
  3933:   char const *input[] = {
  3934:     " id                 $",
  3935:     " id + id            $",
  3936:     " id * id            $",
  3937:     " id + id * id       $",
  3938:     " id * id + id       $",
  3939:     " ( id + id ) * id   $",
  3940:     " id + id + id       $",
  3941:     " id + ( id + id )   $"
  3942:   };
  3943: 
  3944:   // verify we got what we expected
  3945:   printProductions(trace("grammar") << std::endl);
  3946: 
  3947: 
  3948:   // run analyses
  3949:   runAnalyses(NULL);
  3950: 
  3951: 
  3952:   // do some test parses
  3953:   INTLOOP(i, 0, (int)TABLESIZE(input)) {
  3954:     trace("parse") << "------ parsing: `" << input[i] << "' -------\n";
  3955:     lrParse(input[i]);
  3956:   }
  3957: }
  3958: 
  3959: 
  3960: void GrammarAnalysis::runAnalyses(char const *setsFname)
  3961: {
  3962:   // prepare for symbol of interest
  3963:   {
  3964:     char const *name = getenv("SYM_OF_INTEREST");
  3965:     if (name != NULL) {
  3966:       symOfInterest = findSymbolC(name);
  3967:       if (!symOfInterest) {
  3968:         std::cout << "warning: " << name << " isn't in the grammar\n";
  3969:       }
  3970:     }
  3971:   }
  3972: 
  3973:   // reset error count so it might be possible to reuse the object
  3974:   // for another grammar
  3975:   errors = 0;
  3976: 
  3977:   checkWellFormed();
  3978: 
  3979:   // precomputations
  3980:   traceProgress(1) << "init...\n";
  3981:   initializeAuxData();
  3982: 
  3983:   traceProgress(1) << "derivability relation...\n";
  3984:   computeWhatCanDeriveWhat();
  3985: 
  3986:   computeSupersets();
  3987: 
  3988:   traceProgress(1) << "first...\n";
  3989:   computeFirst();
  3990:   computeDProdFirsts();
  3991: 
  3992:   traceProgress(1) << "follow...\n";
  3993:   computeFollow();
  3994: 
  3995:   // print results
  3996:   {
  3997:     std::ostream &tracer = trace("terminals") << "Terminals:\n";
  3998:     printSymbols(tracer, toObjList(terminals));
  3999:   }
  4000:   {
  4001:     std::ostream &tracer = trace("nonterminals") << "Nonterminals:\n";
  4002:     tracer << "  " << emptyString << std::endl;
  4003:     printSymbols(tracer, toObjList(nonterminals));
  4004:   }
  4005: 
  4006:   if (tracingSys("derivable")) {
  4007:     derivable->print();
  4008:   }
  4009: 
  4010:   // testing closure
  4011:   #if 0
  4012:   {
  4013:     // make a singleton set out of the first production, and
  4014:     // with the dot at the start
  4015:     ObjList<LRItem> itemSet;
  4016:     LRItem *kernel = productions.nth(0)->getDProd(0);  // (serf)
  4017:     itemSet.append(kernel);
  4018: 
  4019:     // compute its closure
  4020:     itemSetClosure(itemSet);
  4021: 
  4022:     // print it
  4023:     std::cout << "Closure of: ";
  4024:     kernel->print(std::cout);
  4025:     std::cout << std::endl;
  4026: 
  4027:     SFOREACH_OBJLIST(LRItem, itemSet, dprod) {
  4028:       std::cout << "  ";
  4029:       dprod.data()->print(std::cout);
  4030:       std::cout << std::endl;
  4031:     }
  4032:   }
  4033:   #endif // 0
  4034: 
  4035: 
  4036:   // LR stuff
  4037:   traceProgress(1) << "LR item sets...\n";
  4038:   constructLRItemSets();
  4039: 
  4040:   traceProgress(1) << "state renumbering...\n";
  4041:   renumberStates();
  4042: 
  4043:   traceProgress(1) << "parse tables...\n";
  4044:   computeParseTables(!tracingSys("deterministic"));
  4045: 
  4046:   #if 0     // old code; need it for just a while longer
  4047:   {
  4048:     int sr=0, rr=0;           // numbers of each kind of conflict
  4049:     findSLRConflicts(sr, rr);
  4050:     if (sr + rr > 0) {
  4051:       std::cout << sr << " shift/reduce conflicts and "
  4052:            << rr << " reduce/reduce conflicts\n";
  4053:     }
  4054:   }
  4055:   #endif // 0
  4056: 
  4057:   // if we want to print, do so before throwing away the items
  4058:   if (tracingSys("itemsets")) {
  4059:     printProductionsAndItems(std::cout, true /*code*/);
  4060:   }
  4061: 
  4062:   // open debug output file
  4063:   std::ofstream *setsOutput = NULL;
  4064:   if (setsFname) {
  4065:     setsOutput = new std::ofstream(setsFname);
  4066:     if (!*setsOutput) {
  4067:       std::cout << "couldn't open " << setsFname << " to write item sets\n";
  4068:       delete setsOutput;
  4069:       setsOutput = NULL;
  4070:     }
  4071:   }
  4072: 
  4073:   // count the number of unreachable nonterminals & terminals
  4074:   {
  4075:     if (setsOutput) {
  4076:       *setsOutput << "unreachable nonterminals:\n";
  4077:     }
  4078:     int ct=0;
  4079:     FOREACH_NONTERMINAL(nonterminals, iter) {
  4080:       if (!iter.data()->reachable) {
  4081:         ct++;
  4082: 
  4083:         if (setsOutput) {
  4084:           *setsOutput << "  " << iter.data()->name << "\n";
  4085:         }
  4086:       }
  4087:     }
  4088: 
  4089:     reportUnexpected(ct, expectedUNRNonterms, "unreachable nonterminals");
  4090: 
  4091:     // bison also reports the number of productions under all the
  4092:     // unreachable nonterminals, but that doesn't seem especially
  4093:     // useful to me
  4094: 
  4095:     if (setsOutput) {
  4096:       *setsOutput << "unreachable terminals:\n";
  4097:     }
  4098:     ct=0;
  4099:     FOREACH_TERMINAL(terminals, jter) {
  4100:       if (!jter.data()->reachable) {
  4101:         ct++;
  4102: 
  4103:         if (setsOutput) {
  4104:           *setsOutput << "  " << jter.data()->name << "\n";
  4105:         }
  4106:       }
  4107:     }
  4108: 
  4109:     reportUnexpected(ct, expectedUNRTerms, "unreachable terminals");
  4110:   }
  4111: 
  4112:   // print the item sets
  4113:   if (setsOutput) {
  4114:     traceProgress() << "printing item sets to " << setsFname << " ..." << std::endl;
  4115:     *setsOutput << "NOTE: Item set numbers can change depending on what flags\n"
  4116:                 << "are passed to 'elkhound'!\n\n\n";
  4117:     // only print the nonkernel items if they're explicitly requested,
  4118:     // since they are more noise than signal, usually
  4119:     printItemSets(*setsOutput, tracingSys("nonkernel"));
  4120:   }
  4121: 
  4122:   // print information about all tokens
  4123:   if (setsOutput) {
  4124:     *setsOutput << "terminals:\n";
  4125:     FOREACH_TERMINAL(terminals, iter) {
  4126:       Terminal const *t = iter.data();
  4127:       *setsOutput << "  ";
  4128:       t->print(*setsOutput);
  4129:       *setsOutput << "\n";
  4130:     }
  4131: 
  4132:     // and nonterminals
  4133:     *setsOutput << "nonterminals:\n";
  4134:     FOREACH_NONTERMINAL(nonterminals, ntIter) {
  4135:       Nonterminal const *nt = ntIter.data();
  4136:       *setsOutput << "  ";
  4137:       nt->print(*setsOutput);
  4138:       *setsOutput << "\n";
  4139:     }
  4140: 
  4141:     // and productions
  4142:     *setsOutput << "productions:\n";
  4143:     for (int p=0; p<numProds; p++) {
  4144:       *setsOutput << "  ";
  4145:       getProduction(p)->print(*setsOutput);
  4146:       *setsOutput << "\n";
  4147:     }
  4148:   }
  4149: 
  4150: 
  4151:   delete setsOutput;
  4152: 
  4153:   // I don't need (most of) the item sets during parsing, so
  4154:   // throw them away once I'm done analyzing the grammar
  4155:   MUTATE_EACH_OBJLIST(ItemSet, itemSets, iter) {
  4156:     iter.data()->throwAwayItems();
  4157:   }
  4158: 
  4159: 
  4160:   // another analysis
  4161:   //computePredictiveParsingTable();
  4162: 
  4163:   // silence warnings
  4164:   //pretendUsed(a,b,c,d,e, S,A,B,C,D);
  4165: }
  4166: 
  4167: 
  4168: // ------------------ emitting action code -----------------------
  4169: // prototypes for this section; some of them accept Grammar simply
  4170: // because that's all they need; there's no problem upgrading them
  4171: // to GrammarAnalysis
  4172: void emitDescriptions(GrammarAnalysis const &g, EmitCode &out);
  4173: void emitActionCode(GrammarAnalysis const &g, char const *hFname,
  4174:                     char const *ccFname, char const *srcFname);
  4175: void emitUserCode(EmitCode &out, LocString const &code, bool braces = true);
  4176: void emitActions(Grammar const &g, EmitCode &out, EmitCode &dcl);
  4177: void emitDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl);
  4178: void emitFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
  4179:                   char const *rettype, char const *params);
  4180: void emitDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
  4181:                     Symbol const &sym);
  4182: void emitSwitchCode(Grammar const &g, EmitCode &out,
  4183:                     char const *signature, char const *switchVar,
  4184:                     ObjList<Symbol> const &syms, int whichFunc,
  4185:                     char const *templateCode, char const *actUpon);
  4186: 
  4187: 
  4188: // yield the name of the inline function for this production; naming
  4189: // design motivated by desire to make debugging easier
  4190: sm_string actionFuncName(Production const &prod)
  4191: {
  4192:   return sm_stringc << "action" << prod.prodIndex
  4193:                  << "_" << prod.left->name;
  4194: }
  4195: 
  4196: 
  4197: // emit the user's action code to a file
  4198: void emitActionCode(GrammarAnalysis const &g, char const *hFname,
  4199:                     char const *ccFname, char const *srcFname)
  4200: {
  4201:   EmitCode dcl(hFname);
  4202:   if (!dcl) {
  4203:     throw_XOpen(hFname);
  4204:   }
  4205: 
  4206:   sm_string latchName = replace(replace(replace(replace(replace(
  4207:                        sm_stringToupper(hFname),
  4208:                          ".", "_"),
  4209:                          ":", "_"),
  4210:                          "\\", "_"),
  4211:                          "/", "_"),
  4212:                          "-", "_");
  4213: 
  4214:   // prologue
  4215:   dcl << "// " << hFname << "\n"
  4216:       << "// *** DO NOT EDIT BY HAND ***\n"
  4217:       << "// automatically generated by elkhound, from " << srcFname << "\n"
  4218:       << "\n"
  4219:       << "#ifndef " << latchName << "\n"
  4220:       << "#define " << latchName << "\n"
  4221:       << "\n"
  4222:       << "#include \"elk_useract.h\"     // UserActions\n"
  4223:       << "\n"
  4224:       ;
  4225: 
  4226:   // insert the stand-alone verbatim sections
  4227:   {FOREACH_OBJLIST(LocString, g.verbatim, iter) {
  4228:     emitUserCode(dcl, *(iter.data()), false /*braces*/);
  4229:   }}
  4230: 
  4231:   // insert each of the context class definitions; the last one
  4232:   // is the one whose name is 'g.actionClassName' and into which
  4233:   // the action functions are inserted as methods
  4234:   {
  4235:     int ct=0;
  4236:     FOREACH_OBJLIST(LocString, g.actionClasses, iter) {
  4237:       if (ct++ > 0) {
  4238:         // end the previous class; the following body will open
  4239:         // another one, and the brace following the action list
  4240:         // will close the last one
  4241:         dcl << "};\n";
  4242:       }
  4243: 
  4244:       dcl << "\n"
  4245:           << "// parser context class\n"
  4246:           << "class ";
  4247:       emitUserCode(dcl, *(iter.data()), false /*braces*/);
  4248:   }}
  4249: 
  4250:   // we end the context class with declarations of the action functions
  4251:   dcl << "\n"
  4252:       << "private:\n"
  4253:       << "  USER_ACTION_FUNCTIONS      // see useract.h\n"
  4254:       << "\n"
  4255:       << "  // declare the actual action function\n"
  4256:       << "  static SemanticValue doReductionAction(\n"
  4257:       << "    " << g.actionClassName << " *ths,\n"
  4258:       << "    int productionId, SemanticValue const *semanticValues"
  4259:          SOURCELOC( << ",\n  SourceLoc loc" )
  4260:       << ");\n"
  4261:       << "\n"
  4262:       << "  // declare the classifier function\n"
  4263:       << "  static int reclassifyToken(\n"
  4264:       << "    " << g.actionClassName << " *ths,\n"
  4265:       << "    int oldTokenType, SemanticValue sval);\n"
  4266:       << "\n"
  4267:       ;
  4268: 
  4269:   EmitCode out(ccFname);
  4270:   if (!out) {
  4271:     throw_XOpen(ccFname);
  4272:   }
  4273: 
  4274:   out << "// " << ccFname << "\n";
  4275:   out << "// *** DO NOT EDIT BY HAND ***\n";
  4276:   out << "// automatically generated by gramanl, from " << srcFname << "\n";
  4277:   out << "\n";
  4278:   #ifdef NO_GLR_SOURCELOC
  4279:     // we need to make sure the USER_ACTION_FUNCTIONS use
  4280:     // the declarations consistent with how we're printing
  4281:     // the definitions
  4282:     out << "#ifndef NO_GLR_SOURCELOC\n";
  4283:     out << "  #define NO_GLR_SOURCELOC\n";
  4284:     out << "#endif\n";
  4285:   #else
  4286:     out << "// GLR source location information is enabled\n";
  4287:   #endif
  4288:   out << "\n";
  4289:   out << "#include \"" << sm_basename(hFname).pchar() << "\"     // " << g.actionClassName << "\n";
  4290:   out << "#include \"elk_parsetables.h\" // ParseTables\n";
  4291:   out << "#include \"sm_srcloc.h\"      // SourceLoc\n";
  4292:   out << "\n";
  4293:   out << "#include <assert.h>      // assert\n";
  4294:   out << "#include <iostream>    // std::cout\n";
  4295:   out << "#include <stdlib.h>      // abort\n";
  4296:   out << "\n";
  4297: 
  4298:   NOSOURCELOC(
  4299:     out << "// parser-originated location information is disabled by\n"
  4300:         << "// NO_GLR_SOURCELOC; any rule which refers to 'loc' will get this one\n"
  4301:         << "static SourceLoc loc = SL_UNKNOWN;\n"
  4302:         << "\n\n";
  4303:   )
  4304: 
  4305:   emitDescriptions(g, out);
  4306:   // 'emitDescriptions' prints two newlines itself..
  4307: 
  4308:   emitActions(g, out, dcl);
  4309:   out << "\n";
  4310:   out << "\n";
  4311: 
  4312:   emitDupDelMerge(g, out, dcl);
  4313:   out << "\n";
  4314:   out << "\n";
  4315: 
  4316:   g.tables->finishTables();
  4317:   g.tables->emitConstructionCode(out, g.actionClassName, "makeTables");
  4318: 
  4319:   // I put this last in the context class, and make it public
  4320:   dcl << "\n"
  4321:       << "// the function which makes the parse tables\n"
  4322:       << "public:\n"
  4323:       << "  virtual ParseTables *makeTables();\n"
  4324:       << "};\n"
  4325:       << "\n"
  4326:       << "#endif // " << latchName << "\n"
  4327:       ;
  4328: 
  4329:   // finish the implementation file with the impl_verbatim sections
  4330:   FOREACH_OBJLIST(LocString, g.implVerbatim, iter) {
  4331:     emitUserCode(out, *(iter.data()), false /*braces*/);
  4332:   }
  4333: }
  4334: 
  4335: 
  4336: void emitUserCode(EmitCode &out, LocString const &code, bool braces)
  4337: {
  4338:   out << "\n";
  4339:   if (code.validLoc()) {
  4340:     out << lineDirective(code.loc);
  4341:   }
  4342: 
  4343:   // 7/27/03: swapped so that braces are inside the line directive
  4344:   if (braces) {
  4345:     out << "{";
  4346:   }
  4347: 
  4348:   out << code;
  4349: 
  4350:   // the final brace is on the same line so errors reported at the
  4351:   // last brace go to user code
  4352:   if (braces) {
  4353:     out << " }";
  4354:   }
  4355: 
  4356:   if (code.validLoc()) {
  4357:     out << "\n" << restoreLine;
  4358:   }
  4359:   out << "\n";
  4360: }
  4361: 
  4362: 
  4363: // bit of a hack: map "void" to "SemanticValue" so that the compiler
  4364: // won't mind when I try to declare parameters of that type
  4365: char const *notVoid(char const *type)
  4366: {
  4367:   if (0==strcmp(type, "void")) {
  4368:     return "SemanticValue";
  4369:   }
  4370:   else {
  4371:     return type;
  4372:   }
  4373: }
  4374: 
  4375: // yield the given type, but if it's NULL, then yield
  4376: // something to use instead
  4377: char const *typeString(char const *type, LocString const &tag)
  4378: {
  4379:   if (!type) {
  4380:     xbase(sm_stringc << tag.locString() << ": Production tag \"" << tag
  4381:                   << "\" on a symbol with no type.\n");
  4382:     return NULL;     // silence warning
  4383:   }
  4384:   else {
  4385:     return notVoid(type);
  4386:   }
  4387: }
  4388: 
  4389: 
  4390: // return true if the type starts with the word "enum"
  4391: bool isEnumType(char const *type)
  4392: {
  4393:   return 0==strncmp(type, "enum", 4);
  4394: }
  4395: 
  4396: 
  4397: void emitDescriptions(GrammarAnalysis const &g, EmitCode &out)
  4398: {
  4399:   // emit a map of terminal ids to their names
  4400:   {
  4401:     out << "static char const *termNames[] = {\n";
  4402:     for (int code=0; code < g.numTerminals(); code++) {
  4403:       Terminal const *t = g.getTerminal(code);
  4404:       if (!t) {
  4405:         // no terminal for that code
  4406:         out << "  \"(no terminal)\",  // " << code << "\n";
  4407:       }
  4408:       else {
  4409:         out << "  \"" << t->name << "\",  // " << code << "\n";
  4410:       }
  4411:     }
  4412:     out << "};\n"
  4413:         << "\n";
  4414:   }
  4415: 
  4416:   // emit a function to describe terminals; at some point I'd like to
  4417:   // extend my grammar format to allow the user to supply
  4418:   // token-specific description functions, but for now I will just
  4419:   // use the information easily available the synthesize one;
  4420:   // I print "sval % 100000" so I get a 5-digit number, which is
  4421:   // easy for me to compare for equality without adding much clutter
  4422:   out << "sm_string " << g.actionClassName
  4423:       << "::terminalDescription(int termId, SemanticValue sval)\n"
  4424:       << "{\n"
  4425:       << "  return sm_stringc << termNames[termId]\n"
  4426:       << "                 << \"(\" << (sval % 100000) << \")\";\n"
  4427:       << "}\n"
  4428:       << "\n"
  4429:       << "\n"
  4430:       ;
  4431: 
  4432:   // emit a map of nonterminal ids to their names
  4433:   {
  4434:     out << "static char const *nontermNames[] = {\n";
  4435:     for (int code=0; code < g.numNonterminals(); code++) {
  4436:       Nonterminal const *nt = g.getNonterminal(code);
  4437:       if (!nt) {
  4438:         // no nonterminal for that code
  4439:         out << "  \"(no nonterminal)\",  // " << code << "\n";
  4440:       }
  4441:       else {
  4442:         out << "  \"" << nt->name << "\",  // " << code << "\n";
  4443:       }
  4444:     }
  4445:     out << "};\n"
  4446:         << "\n";
  4447:   }
  4448: 
  4449:   // and a function to describe nonterminals also
  4450:   out << "sm_string " << g.actionClassName
  4451:       << "::nonterminalDescription(int nontermId, SemanticValue sval)\n"
  4452:       << "{\n"
  4453:       << "  return sm_stringc << nontermNames[nontermId]\n"
  4454:       << "                 << \"(\" << (sval % 100000) << \")\";\n"
  4455:       << "}\n"
  4456:       << "\n"
  4457:       << "\n"
  4458:       ;
  4459: 
  4460:   // emit functions to get access to the static maps
  4461:   out << "char const *" << g.actionClassName
  4462:       << "::terminalName(int termId)\n"
  4463:       << "{\n"
  4464:       << "  return termNames[termId];\n"
  4465:       << "}\n"
  4466:       << "\n"
  4467:       << "char const *" << g.actionClassName
  4468:       << "::nonterminalName(int nontermId)\n"
  4469:       << "{\n"
  4470:       << "  return nontermNames[nontermId];\n"
  4471:       << "}\n"
  4472:       << "\n"
  4473:       ;
  4474: }
  4475: 
  4476: 
  4477: void emitActions(Grammar const &g, EmitCode &out, EmitCode &dcl)
  4478: {
  4479:   out << "// ------------------- actions ------------------\n";
  4480: 
  4481:   // iterate over productions, emitting inline action functions
  4482:   {FOREACH_OBJLIST(Production, g.productions, iter) {
  4483:     Production const &prod = *(iter.data());
  4484: 
  4485:     // there's no syntax for a typeless nonterminal, so this shouldn't
  4486:     // be triggerable by the user
  4487:     xassert(prod.left->type);
  4488: 
  4489:     // put the production in comments above the defn
  4490:     out << "// " << prod.toString() << "\n";
  4491: 
  4492:     out << "inline " << prod.left->type << " "
  4493:         << g.actionClassName << "::" << actionFuncName(prod)
  4494:         << "("
  4495:         SOURCELOC( << "SourceLoc loc" )
  4496:         ;
  4497: 
  4498:     dcl << "  " << prod.left->type << " " << actionFuncName(prod) << "("
  4499:         SOURCELOC( << "SourceLoc loc" )
  4500:         ;
  4501: 
  4502:     int ct=0;
  4503:     SOURCELOC( ct++ );    // if we printed the 'loc' param, count it
  4504: 
  4505:     // iterate over RHS elements, emitting formals for each with a tag
  4506:     FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
  4507:       Production::RHSElt const &elt = *(rhsIter.data());
  4508:       if (elt.tag.length() == 0) continue;
  4509: 
  4510:       if (ct++ > 0) {
  4511:         out << ", ";
  4512:         dcl << ", ";
  4513:       }
  4514: 
  4515:       out << typeString(elt.sym->type, elt.tag);
  4516:       dcl << typeString(elt.sym->type, elt.tag);
  4517: 
  4518:       // the tag becomes the formal parameter's name
  4519:       out << " " << elt.tag;
  4520:       dcl << " " << elt.tag;
  4521:     }
  4522: 
  4523:     out << ")";
  4524:     dcl << ");\n";
  4525: 
  4526:     // now insert the user's code, to execute in this environment of
  4527:     // properly-typed semantic values
  4528:     emitUserCode(out, prod.action);
  4529:   }}
  4530: 
  4531:   out << "\n";
  4532: 
  4533:   // main action function; calls the inline functions emitted above
  4534:   out << "/*static*/ SemanticValue " << g.actionClassName << "::doReductionAction(\n"
  4535:       << "  " << g.actionClassName << " *ths,\n"
  4536:       << "  int productionId, SemanticValue const *semanticValues"
  4537:       SOURCELOC( << ",\n  SourceLoc loc" )
  4538:       << ")\n";
  4539:   out << "{\n";
  4540:   out << "  switch (productionId) {\n";
  4541: 
  4542:   // iterate over productions
  4543:   FOREACH_OBJLIST(Production, g.productions, iter) {
  4544:     Production const &prod = *(iter.data());
  4545: 
  4546:     out << "    case " << prod.prodIndex << ":\n";
  4547:     out << "      return (SemanticValue)(ths->" << actionFuncName(prod) << "("
  4548:         SOURCELOC( << "loc" )
  4549:         ;
  4550: 
  4551:     // iterate over RHS elements, emitting arguments for each with a tag
  4552:     int index = -1;      // index into 'semanticValues'
  4553:     int ct=0;
  4554:     SOURCELOC( ct++ );   // count 'loc' if it is passed
  4555:     FOREACH_OBJLIST(Production::RHSElt, prod.right, rhsIter) {
  4556:       Production::RHSElt const &elt = *(rhsIter.data());
  4557: 
  4558:       // we have semantic values in the array for all RHS elements,
  4559:       // even if they didn't get a tag
  4560:       index++;
  4561: 
  4562:       if (elt.tag.length() == 0) continue;
  4563: 
  4564:       if (ct++ > 0) {
  4565:         out << ", ";
  4566:       }
  4567: 
  4568:       // cast SemanticValue to proper type
  4569:       out << "(" << typeString(elt.sym->type, elt.tag) << ")";
  4570:       if (isEnumType(elt.sym->type)) {
  4571:         // egcs-1.1.2 complains when I cast from void* to enum, even
  4572:         // when there is a cast!  so let's put an intermediate cast
  4573:         // to int
  4574:         out << "(int)";
  4575:       }
  4576:       out << "(semanticValues[" << index << "])";
  4577:     }
  4578: 
  4579:     out << ")";     // end of argument list
  4580: 
  4581:     if (0==strcmp(prod.left->type, "void")) {
  4582:       // cute hack: turn the expression into a comma expression, with
  4583:       // the value returned being 0
  4584:       out << ", 0";
  4585:     }
  4586: 
  4587:     out << ");\n";
  4588:   }
  4589: 
  4590:   out << "    default:\n";
  4591:   out << "      assert(!\"invalid production code\");\n";
  4592:   out << "      return (SemanticValue)0;   // silence warning\n";
  4593:   out << "  }\n";
  4594:   out << "}\n";
  4595: 
  4596: 
  4597:   // now emit the UserActions function which returns the doReductionAction
  4598:   // function pointer
  4599:   out << "\n";
  4600:   out << "UserActions::ReductionActionFunc " << g.actionClassName << "::getReductionAction()\n";
  4601:   out << "{\n";
  4602:   out << "  return (ReductionActionFunc)&" << g.actionClassName << "::doReductionAction;\n";
  4603:   out << "}\n";
  4604: 
  4605: }
  4606: 
  4607: 
  4608: void emitDupDelMerge(GrammarAnalysis const &g, EmitCode &out, EmitCode &dcl)
  4609: {
  4610:   out << "// ---------------- dup/del/merge/keep nonterminals ---------------\n"
  4611:       << "\n";
  4612: 
  4613:   // emit inlines for dup/del/merge of nonterminals
  4614:   FOREACH_OBJLIST(Nonterminal, g.nonterminals, ntIter) {
  4615:     emitDDMInlines(g, out, dcl, *(ntIter.data()));
  4616:   }
  4617: 
  4618:   // emit dup-nonterm
  4619:   emitSwitchCode(g, out,
  4620:     "SemanticValue $acn::duplicateNontermValue(int nontermId, SemanticValue sval)",
  4621:     "nontermId",
  4622:     (ObjList<Symbol> const&)g.nonterminals,
  4623:     0 /*dupCode*/,
  4624:     "      return (SemanticValue)dup_$symName(($symType)sval);\n",
  4625:     NULL);
  4626: 
  4627:   // emit del-nonterm
  4628:   emitSwitchCode(g, out,
  4629:     "void $acn::deallocateNontermValue(int nontermId, SemanticValue sval)",
  4630:     "nontermId",
  4631:     (ObjList<Symbol> const&)g.nonterminals,
  4632:     1 /*delCode*/,
  4633:     "      del_$symName(($symType)sval);\n"
  4634:     "      return;\n",
  4635:     "deallocate nonterm");
  4636: 
  4637:   // emit merge-nonterm
  4638:   emitSwitchCode(g, out,
  4639:     "SemanticValue $acn::mergeAlternativeParses(int nontermId, SemanticValue left,\n"
  4640:     "                                           SemanticValue right"
  4641:     SOURCELOC(",  SourceLoc loc")
  4642:     ")",
  4643:     "nontermId",
  4644:     (ObjList<Symbol> const&)g.nonterminals,
  4645:     2 /*mergeCode*/,
  4646:     "      return (SemanticValue)merge_$symName(($symType)left, ($symType)right);\n",
  4647:     "merge nonterm");
  4648: 
  4649:   // emit keep-nonterm
  4650:   emitSwitchCode(g, out,
  4651:     "bool $acn::keepNontermValue(int nontermId, SemanticValue sval)",
  4652:     "nontermId",
  4653:     (ObjList<Symbol> const&)g.nonterminals,
  4654:     3 /*keepCode*/,
  4655:     "      return keep_$symName(($symType)sval);\n",
  4656:     NULL);
  4657: 
  4658: 
  4659:   out << "\n";
  4660:   out << "// ---------------- dup/del/classify terminals ---------------\n";
  4661:   // emit inlines for dup/del of terminals
  4662:   FOREACH_OBJLIST(Terminal, g.terminals, termIter) {
  4663:     emitDDMInlines(g, out, dcl, *(termIter.data()));
  4664:   }
  4665: 
  4666:   // emit dup-term
  4667:   emitSwitchCode(g, out,
  4668:     "SemanticValue $acn::duplicateTerminalValue(int termId, SemanticValue sval)",
  4669:     "termId",
  4670:     (ObjList<Symbol> const&)g.terminals,
  4671:     0 /*dupCode*/,
  4672:     "      return (SemanticValue)dup_$symName(($symType)sval);\n",
  4673:     NULL);
  4674: 
  4675:   // emit del-term
  4676:   emitSwitchCode(g, out,
  4677:     "void $acn::deallocateTerminalValue(int termId, SemanticValue sval)",
  4678:     "termId",
  4679:     (ObjList<Symbol> const&)g.terminals,
  4680:     1 /*delCode*/,
  4681:     "      del_$symName(($symType)sval);\n"
  4682:     "      return;\n",
  4683:     "deallocate terminal");
  4684: 
  4685:   // emit classify-term
  4686:   emitSwitchCode(g, out,
  4687:     "/*static*/ int $acn::reclassifyToken($acn *ths, int oldTokenType, SemanticValue sval)",
  4688:     "oldTokenType",
  4689:     (ObjList<Symbol> const&)g.terminals,
  4690:     4 /*classifyCode*/,
  4691:     "      return ths->classify_$symName(($symType)sval);\n",
  4692:     NULL);
  4693: 
  4694:   // and the virtual method which returns the classifier
  4695:   out << "UserActions::ReclassifyFunc " << g.actionClassName << "::getReclassifier()\n"
  4696:       << "{\n"
  4697:       << "  return (ReclassifyFunc)&" << g.actionClassName << "::reclassifyToken;\n"
  4698:       << "}\n";
  4699: }
  4700: 
  4701: 
  4702: // emit both the function decl for the .h file, and the beginning of
  4703: // the function definition for the .cc file
  4704: void emitFuncDecl(Grammar const &g, EmitCode &out, EmitCode &dcl,
  4705:                   char const *rettype, char const *params)
  4706: {
  4707:   out << "inline " << rettype << " " << g.actionClassName
  4708:       << "::" << params;
  4709: 
  4710:   dcl << "  inline " << rettype << " " << params << ";\n";
  4711: }
  4712: 
  4713: 
  4714: void emitDDMInlines(Grammar const &g, EmitCode &out, EmitCode &dcl,
  4715:                     Symbol const &sym)
  4716: {
  4717:   Terminal const *term = sym.ifTerminalC();
  4718:   Nonterminal const *nonterm = sym.ifNonterminalC();
  4719: 
  4720:   if (sym.dupCode) {
  4721:     emitFuncDecl(g, out, dcl, sym.type,
  4722:       sm_stringc << "dup_" << sym.name
  4723:               << "(" << sym.type << " " << sym.dupParam << ") ");
  4724:     emitUserCode(out, sym.dupCode);
  4725:   }
  4726: 
  4727:   if (sym.delCode) {
  4728:     emitFuncDecl(g, out, dcl, "void",
  4729:       sm_stringc << "del_" << sym.name
  4730:               << "(" << sym.type << " "
  4731:               << (sym.delParam? sym.delParam : "") << ") ");
  4732:     emitUserCode(out, sym.delCode);
  4733:   }
  4734: 
  4735:   if (nonterm && nonterm->mergeCode) {
  4736:     emitFuncDecl(g, out, dcl, notVoid(sym.type),
  4737:       sm_stringc << "merge_" << sym.name
  4738:               << "(" << notVoid(sym.type) << " " << nonterm->mergeParam1
  4739:               << ", " << notVoid(sym.type) << " " << nonterm->mergeParam2 << ") ");
  4740:     emitUserCode(out, nonterm->mergeCode);
  4741:   }
  4742: 
  4743:   if (nonterm && nonterm->keepCode) {
  4744:     emitFuncDecl(g, out, dcl, "bool",
  4745:       sm_stringc << "keep_" << sym.name
  4746:               << "(" << sym.type << " " << nonterm->keepParam << ") ");
  4747:     emitUserCode(out, nonterm->keepCode);
  4748:   }
  4749: 
  4750:   if (term && term->classifyCode) {
  4751:     emitFuncDecl(g, out, dcl, "int",
  4752:       sm_stringc << "classify_" << sym.name
  4753:               << "(" << sym.type << " " << term->classifyParam << ") ");
  4754:     emitUserCode(out, term->classifyCode);
  4755:   }
  4756: }
  4757: 
  4758: void emitSwitchCode(Grammar const &g, EmitCode &out,
  4759:                     char const *signature, char const *switchVar,
  4760:                     ObjList<Symbol> const &syms, int whichFunc,
  4761:                     char const *templateCode, char const *actUpon)
  4762: {
  4763:   out << replace(signature, "$acn", g.actionClassName) << "\n"
  4764:          "{\n"
  4765:          "  switch (" << switchVar << ") {\n";
  4766: 
  4767:   FOREACH_OBJLIST(Symbol, syms, symIter) {
  4768:     Symbol const &sym = *(symIter.data());
  4769: 
  4770:     if (whichFunc==0 && sym.dupCode ||
  4771:         whichFunc==1 && sym.delCode ||
  4772:         whichFunc==2 && sym.asNonterminalC().mergeCode ||
  4773:         whichFunc==3 && sym.asNonterminalC().keepCode ||
  4774:         whichFunc==4 && sym.asTerminalC().classifyCode) {
  4775:       out << "    case " << sym.getTermOrNontermIndex() << ":\n";
  4776:       out << replace(replace(templateCode,
  4777:                "$symName", sym.name),
  4778:                "$symType", notVoid(sym.type));
  4779:     }
  4780:   }
  4781: 
  4782:   out << "    default:\n";
  4783:   switch (whichFunc) {
  4784:     default:
  4785:       xfailure("bad func code");
  4786: 
  4787:     case 0:    // unspecified dup
  4788:       if (!g.useGCDefaults) {
  4789:         // not using GC, return NULL so silent sharing doesn't happen
  4790:         out << "      return (SemanticValue)0;\n";
  4791:       }
  4792:       else {
  4793:         // using GC, sharing is fine
  4794:         out << "      return sval;\n";
  4795:       }
  4796:       break;
  4797: 
  4798:     case 1:    // unspecified del
  4799:       if (!g.useGCDefaults) {
  4800:         // warn about unspec'd del, since it's probably a memory leak
  4801:         if (syms.firstC()->isNonterminal()) {
  4802:           // use the nonterminal map
  4803:           out << "      std::cout << \"WARNING: there is no action to deallocate nonterm \"\n"
  4804:                  "           << nontermNames[" << switchVar << "] << std::endl;\n";
  4805:         }
  4806:         else {
  4807:           // use the terminal map
  4808:           out << "      std::cout << \"WARNING: there is no action to deallocate terminal \"\n"
  4809:                  "           << termNames[" << switchVar << "] << std::endl;\n";
  4810:         }
  4811:       }
  4812:       else {
  4813:         // in gc mode, just ignore del
  4814:         out << "      break;\n";
  4815:       }
  4816:       break;
  4817: 
  4818:     case 2:    // unspecified merge: warn, but then use left (arbitrarily)
  4819:       out << "      std::cout << toString(loc) \n"
  4820:           << "           << \": WARNING: there is no action to merge nonterm \"\n"
  4821:           << "           << nontermNames[" << switchVar << "] << std::endl;\n";
  4822:       if (g.defaultMergeAborts) {
  4823:         out << "      abort();\n";
  4824:       }
  4825:       else {
  4826:         out << "      return left;\n";
  4827:       }
  4828:       break;
  4829: 
  4830:     case 3:    // unspecified keep: keep it
  4831:       out << "      return true;\n";
  4832:       break;
  4833: 
  4834:     case 4:    // unspecified classifier: identity map
  4835:       out << "      return oldTokenType;\n";
  4836:       break;
  4837:   }
  4838: 
  4839:   out << "  }\n"
  4840:          "}\n"
  4841:          "\n";
  4842: }
  4843: 
  4844: 
  4845: // ------------------------- main --------------------------
  4846: // TODO: split this into its own source file
  4847: 
  4848: #include "sm_bflatten.h"
  4849: #include "sm_test.h"
  4850: #include "elk_gramast.ast.gen.h"
  4851: 
  4852: #include <stdio.h>             // remove
  4853: #include <stdlib.h>            // system
  4854: 
  4855: 
  4856: int inner_entry(int argc, char **argv)
  4857: {
  4858:   #define SHIFT argc--; argv++ /* user ; */
  4859: 
  4860:   char const *progName = argv[0];
  4861:   SHIFT;
  4862: 
  4863:   // disable 'Exception thrown' reports
  4864:   xBase::logExceptions = false;
  4865: 
  4866:   // as long as this remains 0-length, it means to use
  4867:   // the default naming scheme
  4868:   sm_string prefix;
  4869: 
  4870:   // true to use ML, false to use C
  4871:   bool useML = false;
  4872: 
  4873:   while (argv[0] && argv[0][0] == '-') {
  4874:     char const *op = argv[0]+1;
  4875:     if (0==strcmp(op, "tr")) {
  4876:       SHIFT;
  4877:       traceAddMultiSys(argv[0]);
  4878:       SHIFT;
  4879:     }
  4880:     else if (0==strcmp(op, "v")) {
  4881:       SHIFT;
  4882:       traceAddSys("progress");
  4883:     }
  4884:     else if (0==strcmp(op, "o")) {
  4885:       SHIFT;
  4886:       prefix = argv[0];
  4887:       SHIFT;
  4888:     }
  4889:     else if (0==strcmp(op, "testRW")) {
  4890:       SHIFT;
  4891:       std::cout << "The testRW option has been removed because I wasn't using\n"
  4892:               "it, and the code that implements it has bit-rotted.\n";
  4893:       exit(3);
  4894:     }
  4895:     else if (0==strcmp(op, "ocaml")) {
  4896:       SHIFT;
  4897:       useML = true;
  4898:     }
  4899:     else {
  4900:       std::cout << "unknown option: " << argv[0] << std::endl;
  4901:       exit(2);
  4902:     }
  4903:   }
  4904: 
  4905:   if (!argv[0]) {
  4906:     std::cout << "usage: " << progName << " [options] filename.gr [extension.gr [...]]\n"
  4907:             "  Generates parse tables to parse with the given grammar.\n"
  4908:             "  The optional extension modules can add rules, etc.\n"
  4909:             "\n"
  4910:             "options:\n"
  4911:             "  -tr <traceFlags>: turn on some flags (separate with commas):\n"
  4912:             "      conflict    : print LALR(1) conflicts\n"
  4913:             "      prec        : show how prec/assoc are used to resolve conflicts\n"
  4914:             "      lrtable     : print LR parsing tables to <prefix>.out\n"
  4915:             "      nonkernel   : include non-kernel items in <prefix>.out\n"
  4916:             "      treebuild   : replace given actions with treebuilding actions\n"
  4917:             "      grammar     : echo grammar to stdout (after merging modules)\n"
  4918:             "  -v              : print stages of processing\n"
  4919:             "  -o <prefix>     : name outputs <prefix>.h and <prefix>.cc\n"
  4920:             "                    (default is filename.gen.h, filename.gen.cc)\n"
  4921:             "  -ocaml          : generate ocaml parser instead of C++ parser\n"
  4922:             ;
  4923:     return 0;
  4924:   }
  4925: 
  4926:   if (!prefix.length()) {
  4927:     // default naming scheme
  4928:     prefix = replace(argv[0], ".gr", "");
  4929:   }
  4930: 
  4931:   SourceLocManager mgr;
  4932: 
  4933:   // parse the grammar
  4934:   sm_string grammarFname = argv[0];
  4935:   SHIFT;
  4936:   Owner<GrammarAST> ast(parseGrammarFile(grammarFname, useML));
  4937: 
  4938:   // parse and merge its extension modules
  4939:   while (argv[0]) {
  4940:     Owner<GrammarAST> ext(parseGrammarFile(argv[0], useML));
  4941: 
  4942:     traceProgress() << "merging module: " << argv[0] << std::endl;
  4943:     mergeGrammar(ast, ext);
  4944: 
  4945:     SHIFT;
  4946:   }
  4947: 
  4948:   // parse the AST into a Grammar
  4949:   GrammarAnalysis g;
  4950:   if (useML) {
  4951:     g.targetLang = "OCaml";
  4952:   }
  4953:   parseGrammarAST(g, ast);
  4954:   ast.del();              // done with it
  4955: 
  4956:   if (tracingSys("treebuild")) {
  4957:     std::cout << "replacing given actions with treebuilding actions\n";
  4958:     g.addTreebuildingActions();
  4959:   }
  4960:   g.printProductions(trace("grammar") << std::endl);
  4961: 
  4962:   sm_string setsFname = sm_stringc << prefix << ".out";
  4963:   g.runAnalyses(tracingSys("lrtable")? setsFname.pcharc() : NULL);
  4964:   if (g.errors) {
  4965:     return 2;
  4966:   }
  4967: 
  4968:   if (!useML) {
  4969:     // emit some C++ code
  4970:     sm_string hFname = sm_stringc << prefix << ".h";
  4971:     sm_string ccFname = sm_stringc << prefix << ".cc";
  4972:     traceProgress() << "emitting C++ code to " << ccFname
  4973:                     << " and " << hFname << " ...\n";
  4974: 
  4975:     emitActionCode(g, hFname, ccFname, grammarFname);
  4976:   }
  4977:   else {
  4978:     // emit some ML code
  4979:     sm_string mliFname = sm_stringc << prefix << ".mli";
  4980:     sm_string mlFname = sm_stringc << prefix << ".ml";
  4981:     traceProgress() << "emitting OCaml code to " << mlFname
  4982:                     << " and " << mliFname << " ...\n";
  4983: 
  4984:     emitMLActionCode(g, mliFname, mlFname, grammarFname);
  4985:   }
  4986: 
  4987:   // before using 'xfer' we have to tell it about the sm_string table
  4988:   flattenStrTable = &grammarStringTable;
  4989: 
  4990:   // write it in a bison-compatible format as well
  4991:   if (tracingSys("bison")) {
  4992:     sm_string bisonFname = sm_stringc << prefix << ".y";
  4993:     traceProgress() << "writing bison-compatible grammar to " << bisonFname << std::endl;
  4994:     std::ofstream out(bisonFname);
  4995:     g.printAsBison(out);
  4996:   }
  4997: 
  4998:   traceProgress() << "done\n";
  4999: 
  5000:   // this doesn't work
  5001:   if (tracingSys("explore")) {
  5002:     grammarExplorer(g);
  5003:   }
  5004: 
  5005:   return 0;
  5006: }
  5007: 
  5008: void entry(int argc, char **argv)
  5009: {
  5010:   int ret = inner_entry(argc, argv);
  5011:   if (ret != 0) {
  5012:     exit(ret);
  5013:   }
  5014: }
  5015: 
  5016: ARGS_MAIN
  5017: 
End data section to elk/elk_gramanl.cxx[1]
Start cpp section to elk/elk_gramast.ast.gen.cpp[1 /1 ]
     1: #line 14048 "./lpsrc/elk.pak"
     2: // gramast.ast.gen.cc
     3: // *** DO NOT EDIT ***
     4: // generated automatically by astgen, from gramast.ast
     5: 
     6: #include "elk_gramast.ast.gen.h"
     7: 
     8: 
     9: // ------------------ GrammarAST -------------------
    10: // *** DO NOT EDIT ***
    11: GrammarAST::~GrammarAST()
    12: {
    13:   forms.deleteAll();
    14: }
    15: 
    16: void GrammarAST::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
    17: {
    18:   PRINT_HEADER(subtreeName, GrammarAST);
    19: 
    20:   PRINT_LIST(TopForm, forms);
    21: }
    22: 
    23: GrammarAST *GrammarAST::clone() const
    24: {
    25:   GrammarAST *ret = new GrammarAST(
    26:     cloneASTList(forms)
    27:   );
    28:   return ret;
    29: }
    30: 
    31: 
    32: // ------------------ TopForm -------------------
    33: // *** DO NOT EDIT ***
    34: TopForm::~TopForm()
    35: {
    36: }
    37: 
    38: char const * const TopForm::kindNames[TopForm::NUM_KINDS] = {
    39:   "TF_context",
    40:   "TF_verbatim",
    41:   "TF_option",
    42:   "TF_terminals",
    43:   "TF_nonterm",
    44: };
    45: 
    46: void TopForm::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
    47: {
    48: }
    49: 
    50: DEFN_AST_DOWNCASTS(TopForm, TF_context, TF_CONTEXT)
    51: 
    52: TF_context::~TF_context()
    53: {
    54: }
    55: 
    56: void TF_context::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
    57: {
    58:   PRINT_HEADER(subtreeName, TF_context);
    59: 
    60:   TopForm::debugPrint(os, indent, subtreeName);
    61: 
    62:   PRINT_GENERIC(body);
    63: }
    64: 
    65: TF_context *TF_context::clone() const
    66: {
    67:   TF_context *ret = new TF_context(
    68:     body.clone()
    69:   );
    70:   return ret;
    71: }
    72: 
    73: DEFN_AST_DOWNCASTS(TopForm, TF_verbatim, TF_VERBATIM)
    74: 
    75: TF_verbatim::~TF_verbatim()
    76: {
    77: }
    78: 
    79: void TF_verbatim::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
    80: {
    81:   PRINT_HEADER(subtreeName, TF_verbatim);
    82: 
    83:   TopForm::debugPrint(os, indent, subtreeName);
    84: 
    85:   PRINT_BOOL(isImpl);
    86:   PRINT_GENERIC(code);
    87: }
    88: 
    89: TF_verbatim *TF_verbatim::clone() const
    90: {
    91:   TF_verbatim *ret = new TF_verbatim(
    92:     isImpl,
    93:     code.clone()
    94:   );
    95:   return ret;
    96: }
    97: 
    98: DEFN_AST_DOWNCASTS(TopForm, TF_option, TF_OPTION)
    99: 
   100: TF_option::~TF_option()
   101: {
   102: }
   103: 
   104: void TF_option::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   105: {
   106:   PRINT_HEADER(subtreeName, TF_option);
   107: 
   108:   TopForm::debugPrint(os, indent, subtreeName);
   109: 
   110:   PRINT_GENERIC(name);
   111:   PRINT_GENERIC(value);
   112: }
   113: 
   114: TF_option *TF_option::clone() const
   115: {
   116:   TF_option *ret = new TF_option(
   117:     name.clone(),
   118:     value
   119:   );
   120:   return ret;
   121: }
   122: 
   123: DEFN_AST_DOWNCASTS(TopForm, TF_terminals, TF_TERMINALS)
   124: 
   125: TF_terminals::~TF_terminals()
   126: {
   127:   decls.deleteAll();
   128:   types.deleteAll();
   129:   prec.deleteAll();
   130: }
   131: 
   132: void TF_terminals::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   133: {
   134:   PRINT_HEADER(subtreeName, TF_terminals);
   135: 
   136:   TopForm::debugPrint(os, indent, subtreeName);
   137: 
   138:   PRINT_LIST(TermDecl, decls);
   139:   PRINT_LIST(TermType, types);
   140:   PRINT_LIST(PrecSpec, prec);
   141: }
   142: 
   143: TF_terminals *TF_terminals::clone() const
   144: {
   145:   TF_terminals *ret = new TF_terminals(
   146:     cloneASTList(decls),
   147:     cloneASTList(types),
   148:     cloneASTList(prec)
   149:   );
   150:   return ret;
   151: }
   152: 
   153: DEFN_AST_DOWNCASTS(TopForm, TF_nonterm, TF_NONTERM)
   154: 
   155: TF_nonterm::~TF_nonterm()
   156: {
   157:   funcs.deleteAll();
   158:   productions.deleteAll();
   159:   subsets.deleteAll();
   160: }
   161: 
   162: void TF_nonterm::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   163: {
   164:   PRINT_HEADER(subtreeName, TF_nonterm);
   165: 
   166:   TopForm::debugPrint(os, indent, subtreeName);
   167: 
   168:   PRINT_GENERIC(name);
   169:   PRINT_GENERIC(type);
   170:   PRINT_LIST(SpecFunc, funcs);
   171:   PRINT_LIST(ProdDecl, productions);
   172:   PRINT_LIST(LocString, subsets);
   173: }
   174: 
   175: TF_nonterm *TF_nonterm::clone() const
   176: {
   177:   TF_nonterm *ret = new TF_nonterm(
   178:     name.clone(),
   179:     type.clone(),
   180:     cloneASTList(funcs),
   181:     cloneASTList(productions),
   182:     cloneASTList(subsets)
   183:   );
   184:   return ret;
   185: }
   186: 
   187: 
   188: // ------------------ TermDecl -------------------
   189: // *** DO NOT EDIT ***
   190: TermDecl::~TermDecl()
   191: {
   192: }
   193: 
   194: void TermDecl::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   195: {
   196:   PRINT_HEADER(subtreeName, TermDecl);
   197: 
   198:   PRINT_GENERIC(code);
   199:   PRINT_GENERIC(name);
   200:   PRINT_GENERIC(alias);
   201: }
   202: 
   203: TermDecl *TermDecl::clone() const
   204: {
   205:   TermDecl *ret = new TermDecl(
   206:     code,
   207:     name.clone(),
   208:     alias.clone()
   209:   );
   210:   return ret;
   211: }
   212: 
   213: 
   214: // ------------------ TermType -------------------
   215: // *** DO NOT EDIT ***
   216: TermType::~TermType()
   217: {
   218:   funcs.deleteAll();
   219: }
   220: 
   221: void TermType::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   222: {
   223:   PRINT_HEADER(subtreeName, TermType);
   224: 
   225:   PRINT_GENERIC(name);
   226:   PRINT_GENERIC(type);
   227:   PRINT_LIST(SpecFunc, funcs);
   228: }
   229: 
   230: TermType *TermType::clone() const
   231: {
   232:   TermType *ret = new TermType(
   233:     name.clone(),
   234:     type.clone(),
   235:     cloneASTList(funcs)
   236:   );
   237:   return ret;
   238: }
   239: 
   240: 
   241: // ------------------ PrecSpec -------------------
   242: // *** DO NOT EDIT ***
   243: PrecSpec::~PrecSpec()
   244: {
   245:   tokens.deleteAll();
   246: }
   247: 
   248: void PrecSpec::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   249: {
   250:   PRINT_HEADER(subtreeName, PrecSpec);
   251: 
   252:   PRINT_GENERIC(kind);
   253:   PRINT_GENERIC(prec);
   254:   PRINT_LIST(LocString, tokens);
   255: }
   256: 
   257: PrecSpec *PrecSpec::clone() const
   258: {
   259:   PrecSpec *ret = new PrecSpec(
   260:     kind,
   261:     prec,
   262:     cloneASTList(tokens)
   263:   );
   264:   return ret;
   265: }
   266: 
   267: 
   268: // ------------------ SpecFunc -------------------
   269: // *** DO NOT EDIT ***
   270: SpecFunc::~SpecFunc()
   271: {
   272:   formals.deleteAll();
   273: }
   274: 
   275: void SpecFunc::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   276: {
   277:   PRINT_HEADER(subtreeName, SpecFunc);
   278: 
   279:   PRINT_GENERIC(name);
   280:   PRINT_LIST(LocString, formals);
   281:   PRINT_GENERIC(code);
   282: }
   283: 
   284: SpecFunc *SpecFunc::clone() const
   285: {
   286:   SpecFunc *ret = new SpecFunc(
   287:     name.clone(),
   288:     cloneASTList(formals),
   289:     code.clone()
   290:   );
   291:   return ret;
   292: }
   293: 
   294: 
   295: // ------------------ ProdDecl -------------------
   296: // *** DO NOT EDIT ***
   297: ProdDecl::~ProdDecl()
   298: {
   299:   rhs.deleteAll();
   300: }
   301: 
   302: void ProdDecl::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   303: {
   304:   PRINT_HEADER(subtreeName, ProdDecl);
   305: 
   306:   PRINT_LIST(RHSElt, rhs);
   307:   PRINT_GENERIC(actionCode);
   308: }
   309: 
   310: ProdDecl *ProdDecl::clone() const
   311: {
   312:   ProdDecl *ret = new ProdDecl(
   313:     cloneASTList(rhs),
   314:     actionCode.clone()
   315:   );
   316:   return ret;
   317: }
   318: 
   319: 
   320: // ------------------ RHSElt -------------------
   321: // *** DO NOT EDIT ***
   322: RHSElt::~RHSElt()
   323: {
   324: }
   325: 
   326: char const * const RHSElt::kindNames[RHSElt::NUM_KINDS] = {
   327:   "RH_name",
   328:   "RH_sm_string",
   329:   "RH_prec",
   330: };
   331: 
   332: void RHSElt::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   333: {
   334: }
   335: 
   336: DEFN_AST_DOWNCASTS(RHSElt, RH_name, RH_NAME)
   337: 
   338: RH_name::~RH_name()
   339: {
   340: }
   341: 
   342: void RH_name::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   343: {
   344:   PRINT_HEADER(subtreeName, RH_name);
   345: 
   346:   RHSElt::debugPrint(os, indent, subtreeName);
   347: 
   348:   PRINT_GENERIC(tag);
   349:   PRINT_GENERIC(name);
   350: }
   351: 
   352: RH_name *RH_name::clone() const
   353: {
   354:   RH_name *ret = new RH_name(
   355:     tag.clone(),
   356:     name.clone()
   357:   );
   358:   return ret;
   359: }
   360: 
   361: DEFN_AST_DOWNCASTS(RHSElt, RH_sm_string, RH_STRING)
   362: 
   363: RH_sm_string::~RH_sm_string()
   364: {
   365: }
   366: 
   367: void RH_sm_string::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   368: {
   369:   PRINT_HEADER(subtreeName, RH_sm_string);
   370: 
   371:   RHSElt::debugPrint(os, indent, subtreeName);
   372: 
   373:   PRINT_GENERIC(tag);
   374:   PRINT_GENERIC(str);
   375: }
   376: 
   377: RH_sm_string *RH_sm_string::clone() const
   378: {
   379:   RH_sm_string *ret = new RH_sm_string(
   380:     tag.clone(),
   381:     str.clone()
   382:   );
   383:   return ret;
   384: }
   385: 
   386: DEFN_AST_DOWNCASTS(RHSElt, RH_prec, RH_PREC)
   387: 
   388: RH_prec::~RH_prec()
   389: {
   390: }
   391: 
   392: void RH_prec::debugPrint(std::ostream &os, int indent, char const *subtreeName) const
   393: {
   394:   PRINT_HEADER(subtreeName, RH_prec);
   395: 
   396:   RHSElt::debugPrint(os, indent, subtreeName);
   397: 
   398:   PRINT_GENERIC(tokName);
   399: }
   400: 
   401: RH_prec *RH_prec::clone() const
   402: {
   403:   RH_prec *ret = new RH_prec(
   404:     tokName.clone()
   405:   );
   406:   return ret;
   407: }
   408: 
   409: 
   410: 
   411: 
End cpp section to elk/elk_gramast.ast.gen.cpp[1]
Start cpp section to elk/elk_gramlex.yy.cpp[1 /1 ]
     1: #line 14460 "./lpsrc/elk.pak"
     2: /* A lexical scanner generated by flex */
     3: 
     4: /* Scanner skeleton version:
     5:  * $Header$
     6:  */
     7: 
     8: #define FLEX_SCANNER
     9: #define YY_FLEX_MAJOR_VERSION 2
    10: #define YY_FLEX_MINOR_VERSION 5
    11: 
    12: 
    13: 
    14: /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
    15: #ifdef c_plusplus
    16: #ifndef __cplusplus
    17: #define __cplusplus
    18: #endif
    19: #endif
    20: 
    21: 
    22: #ifdef __cplusplus
    23: 
    24: #include <stdlib.h>
    25: #include <iostream>
    26: using namespace std;
    27: 
    28: /* Use prototypes in function declarations. */
    29: #define YY_USE_PROTOS
    30: 
    31: /* The "const" storage-class-modifier is valid. */
    32: #define YY_USE_CONST
    33: 
    34: #else  /* ! __cplusplus */
    35: 
    36: #if __STDC__
    37: 
    38: #define YY_USE_PROTOS
    39: #define YY_USE_CONST
    40: 
    41: #endif  /* __STDC__ */
    42: #endif  /* ! __cplusplus */
    43: 
    44: #ifdef __TURBOC__
    45:  #pragma warn -rch
    46:  #pragma warn -use
    47: #include <io.h>
    48: #include <stdlib.h>
    49: #define YY_USE_CONST
    50: #define YY_USE_PROTOS
    51: #endif
    52: 
    53: #ifdef YY_USE_CONST
    54: #define yyconst const
    55: #else
    56: #define yyconst
    57: #endif
    58: 
    59: 
    60: #ifdef YY_USE_PROTOS
    61: #define YY_PROTO(proto) proto
    62: #else
    63: #define YY_PROTO(proto) ()
    64: #endif
    65: 
    66: /* Returned upon end-of-file. */
    67: #define YY_NULL 0
    68: 
    69: /* Promotes a possibly negative, possibly signed char to an unsigned
    70:  * integer for use as an array index.  If the signed char is negative,
    71:  * we want to instead treat it as an 8-bit unsigned char, hence the
    72:  * double cast.
    73:  */
    74: #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
    75: 
    76: /* Enter a start condition.  This macro really ought to take a parameter,
    77:  * but we do it the disgusting crufty way forced on us by the ()-less
    78:  * definition of BEGIN.
    79:  */
    80: #define BEGIN yy_start = 1 + 2 *
    81: 
    82: /* Translate the current start state into a value that can be later handed
    83:  * to BEGIN to return to the state.  The YYSTATE alias is for lex
    84:  * compatibility.
    85:  */
    86: #define YY_START ((yy_start - 1) / 2)
    87: #define YYSTATE YY_START
    88: 
    89: /* Action number for EOF rule of a given start state. */
    90: #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
    91: 
    92: /* Special action meaning "start processing a new file". */
    93: #define YY_NEW_FILE yyrestart( yyin )
    94: 
    95: #define YY_END_OF_BUFFER_CHAR 0
    96: 
    97: /* Size of default input buffer. */
    98: #define YY_BUF_SIZE 16384
    99: 
   100: typedef struct yy_buffer_state *YY_BUFFER_STATE;
   101: 
   102: extern int yyleng;
   103: 
   104: #define EOB_ACT_CONTINUE_SCAN 0
   105: #define EOB_ACT_END_OF_FILE 1
   106: #define EOB_ACT_LAST_MATCH 2
   107: 
   108: /* The funky do-while in the following #define is used to turn the definition
   109:  * int a single C statement (which needs a semi-colon terminator).  This
   110:  * avoids problems with code like:
   111:  *
   112:  *   if ( condition_holds )
   113:  *    yyless( 5 );
   114:  *  else
   115:  *    do_something_else();
   116:  *
   117:  * Prior to using the do-while the compiler would get upset at the
   118:  * "else" because it interpreted the "if" statement as being all
   119:  * done when it reached the ';' after the yyless() call.
   120:  */
   121: 
   122: /* Return all but the first 'n' matched characters back to the input stream. */
   123: 
   124: #define yyless(n) \
   125:   do \
   126:     { \
   127:     /* Undo effects of setting up yytext. */ \
   128:     *yy_cp = yy_hold_char; \
   129:     YY_RESTORE_YY_MORE_OFFSET \
   130:     yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
   131:     YY_DO_BEFORE_ACTION; /* set up yytext again */ \
   132:     } \
   133:   while ( 0 )
   134: 
   135: #define unput(c) yyunput( c, yytext_ptr )
   136: 
   137: /* The following is because we cannot portably get our hands on size_t
   138:  * (without autoconf's help, which isn't available because we want
   139:  * flex-generated scanners to compile on their own).
   140:  */
   141: typedef unsigned int yy_size_t;
   142: 
   143: 
   144: struct yy_buffer_state
   145:   {
   146:   std::istream* yy_input_file;
   147: 
   148:   char *yy_ch_buf;    /* input buffer */
   149:   char *yy_buf_pos;    /* current position in input buffer */
   150: 
   151:   /* Size of input buffer in bytes, not including room for EOB
   152:    * characters.
   153:    */
   154:   yy_size_t yy_buf_size;
   155: 
   156:   /* Number of characters read into yy_ch_buf, not including EOB
   157:    * characters.
   158:    */
   159:   int yy_n_chars;
   160: 
   161:   /* Whether we "own" the buffer - i.e., we know we created it,
   162:    * and can realloc() it to grow it, and should free() it to
   163:    * delete it.
   164:    */
   165:   int yy_is_our_buffer;
   166: 
   167:   /* Whether this is an "interactive" input source; if so, and
   168:    * if we're using stdio for input, then we want to use getc()
   169:    * instead of fread(), to make sure we stop fetching input after
   170:    * each newline.
   171:    */
   172:   int yy_is_interactive;
   173: 
   174:   /* Whether we're considered to be at the beginning of a line.
   175:    * If so, '^' rules will be active on the next match, otherwise
   176:    * not.
   177:    */
   178:   int yy_at_bol;
   179: 
   180:   /* Whether to try to fill the input buffer when we reach the
   181:    * end of it.
   182:    */
   183:   int yy_fill_buffer;
   184: 
   185:   int yy_buffer_status;
   186: #define YY_BUFFER_NEW 0
   187: #define YY_BUFFER_NORMAL 1
   188:   /* When an EOF's been seen but there's still some text to process
   189:    * then we mark the buffer as YY_EOF_PENDING, to indicate that we
   190:    * shouldn't try reading from the input source any more.  We might
   191:    * still have a bunch of tokens to match, though, because of
   192:    * possible backing-up.
   193:    *
   194:    * When we actually see the EOF, we change the status to "new"
   195:    * (via yyrestart()), so that the user can continue scanning by
   196:    * just pointing yyin at a new input file.
   197:    */
   198: #define YY_BUFFER_EOF_PENDING 2
   199:   };
   200: 
   201: 
   202: /* We provide macros for accessing buffer states in case in the
   203:  * future we want to put the buffer states in a more general
   204:  * "scanner state".
   205:  */
   206: #define YY_CURRENT_BUFFER yy_current_buffer
   207: 
   208: 
   209: 
   210: static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
   211: static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
   212: static void yy_flex_free YY_PROTO(( void * ));
   213: 
   214: #define yy_new_buffer yy_create_buffer
   215: 
   216: #define yy_set_interactive(is_interactive) \
   217:   { \
   218:   if ( ! yy_current_buffer ) \
   219:     yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
   220:   yy_current_buffer->yy_is_interactive = is_interactive; \
   221:   }
   222: 
   223: #define yy_set_bol(at_bol) \
   224:   { \
   225:   if ( ! yy_current_buffer ) \
   226:     yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
   227:   yy_current_buffer->yy_at_bol = at_bol; \
   228:   }
   229: 
   230: #define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
   231: 
   232: 
   233: #define yywrap() 1
   234: #define YY_SKIP_YYWRAP
   235: typedef unsigned char YY_CHAR;
   236: #define yytext_ptr yytext
   237: #define YY_INTERACTIVE
   238: 
   239: #include "sm_flexlexer.h"
   240: int yyFlexLexer::yylex()
   241:   {
   242:   LexerError( "yyFlexLexer::yylex invoked but %option yyclass used" );
   243:   return 0;
   244:   }
   245: 
   246: #define YY_DECL int GrammarLexer::yylex()
   247: 
   248: 
   249: /* Done after the current pattern has been matched and before the
   250:  * corresponding action - sets up yytext.
   251:  */
   252: #define YY_DO_BEFORE_ACTION \
   253:   yytext_ptr = yy_bp; \
   254:   yyleng = (int) (yy_cp - yy_bp); \
   255:   yy_hold_char = *yy_cp; \
   256:   *yy_cp = '\0'; \
   257:   yy_c_buf_p = yy_cp;
   258: 
   259: #define YY_NUM_RULES 40
   260: #define YY_END_OF_BUFFER 41
   261: static yyconst short int yy_accept[159] =
   262:     {   0,
   263:         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
   264:         0,    0,    0,    0,    0,    0,   41,   39,    2,    1,
   265:        39,   24,   10,   11,   39,   39,   37,    9,   21,   36,
   266:        17,   36,   36,   36,   36,   36,   36,   36,   36,   36,
   267:        36,   20,    8,    5,    6,    5,   33,   33,   33,   34,
   268:        35,   27,   28,   29,   19,   23,    2,    0,   38,   18,
   269:         3,    0,   37,   36,   36,   36,   36,   36,   36,   36,
   270:        36,   36,   36,   36,   36,   36,    4,    0,    0,    0,
   271:         0,   34,   27,    0,    7,   36,   36,   25,   36,   36,
   272:        36,   36,   36,   36,   36,   36,   36,    0,   36,   36,
   273: 
   274:        36,   36,   36,   36,   36,   36,   36,   36,   36,    0,
   275:        36,   36,   36,   36,   36,   36,   36,   36,   36,   22,
   276:        36,    0,   32,   36,   15,   36,   36,   36,   14,   36,
   277:        36,   36,   36,   36,   36,   31,   36,   16,   36,   36,
   278:        36,   36,   36,   36,   26,   36,   36,   36,   12,   36,
   279:        36,   13,   36,   36,   36,   36,   30,    0
   280:     } ;
   281: 
   282: static yyconst int yy_ec[256] =
   283:     {   0,
   284:         1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
   285:         2,    2,    2,    1,    1,    1,    1,    1,    1,    1,
   286:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   287:         1,    2,    1,    4,    1,    1,    1,    1,    1,    5,
   288:         6,    7,    1,    8,    9,    1,   10,   11,   11,   11,
   289:        11,   11,   11,   11,   11,   11,   11,   12,   13,    1,
   290:         1,   14,    1,    1,   15,   15,   15,   15,   15,   15,
   291:        15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
   292:        15,   15,   15,   15,   15,   15,   15,   15,   15,   15,
   293:        16,   17,   18,    1,   19,    1,   20,   21,   22,   23,
   294: 
   295:        24,   25,   15,   15,   26,   15,   27,   28,   29,   30,
   296:        31,   32,   15,   33,   34,   35,   36,   37,   15,   38,
   297:        15,   15,   39,    1,   40,    1,    1,    1,    1,    1,
   298:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   299:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   300:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   301:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   302:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   303:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   304:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   305: 
   306:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   307:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   308:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   309:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   310:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   311:         1,    1,    1,    1,    1
   312:     } ;
   313: 
   314: static yyconst int yy_meta[41] =
   315:     {   0,
   316:         1,    1,    2,    3,    1,    4,    1,    1,    1,    1,
   317:         5,    1,    1,    1,    5,    1,    6,    4,    5,    5,
   318:         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
   319:         5,    5,    5,    5,    5,    5,    5,    5,    1,    4
   320:     } ;
   321: 
   322: static yyconst short int yy_base[169] =
   323:     {   0,
   324:         0,    0,   38,   39,   42,   46,  224,  223,   46,   47,
   325:       186,  185,   30,   41,   50,   53,  223,  228,  220,  228,
   326:       217,  228,  228,  228,  206,   49,  208,  228,  228,    0,
   327:       228,  187,  179,  180,   31,  184,  182,  180,  176,   43,
   328:       187,  228,  228,  228,  228,  200,  228,   66,   68,    0,
   329:       228,    0,  228,  228,  228,  228,  207,  204,  228,  228,
   330:       228,  204,  195,    0,  175,  172,  173,  170,  179,  170,
   331:       164,  174,  176,  163,  168,  161,  228,   71,   73,   77,
   332:         0,    0,    0,  190,  228,  157,  167,    0,  162,  161,
   333:       153,  161,  164,  151,  155,  159,  161,  177,  156,  157,
   334: 
   335:       159,  141,  152,  144,  150,  149,  146,  141,  150,   76,
   336:       131,  133,  130,  143,  132,  134,  140,  127,  131,    0,
   337:       125,   82,  228,  124,    0,  134,  133,  127,    0,  131,
   338:       120,  133,  126,  132,  117,    0,  119,    0,  120,  118,
   339:       124,  124,  120,  107,    0,  103,  109,   99,    0,   98,
   340:        56,    0,   56,   59,   49,   33,    0,  228,   92,   98,
   341:       104,  110,  116,   52,  121,  127,  133,  139
   342:     } ;
   343: 
   344: static yyconst short int yy_def[169] =
   345:     {   0,
   346:       158,    1,  159,  159,  160,  160,  161,  161,  162,  162,
   347:         1,    1,    1,    1,    1,    1,  158,  158,  158,  158,
   348:       163,  158,  158,  158,  158,  158,  158,  158,  158,  164,
   349:       158,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   350:       164,  158,  158,  158,  158,  158,  158,  158,  158,  165,
   351:       158,  166,  158,  158,  158,  158,  158,  163,  158,  158,
   352:       158,  167,  158,  164,  164,  164,  164,  164,  164,  164,
   353:       164,  164,  164,  164,  164,  164,  158,  158,  158,  158,
   354:       168,  165,  166,  167,  158,  164,  164,  164,  164,  164,
   355:       164,  164,  164,  164,  164,  164,  164,  168,  164,  164,
   356: 
   357:       164,  164,  164,  164,  164,  164,  164,  164,  164,  158,
   358:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   359:       164,  158,  158,  164,  164,  164,  164,  164,  164,  164,
   360:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   361:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   362:       164,  164,  164,  164,  164,  164,  164,    0,  158,  158,
   363:       158,  158,  158,  158,  158,  158,  158,  158
   364:     } ;
   365: 
   366: static yyconst short int yy_nxt[269] =
   367:     {   0,
   368:        18,   19,   20,   21,   22,   23,   18,   24,   25,   26,
   369:        27,   28,   29,   18,   30,   31,   18,   18,   30,   30,
   370:        30,   32,   30,   33,   34,   35,   30,   30,   30,   36,
   371:        37,   38,   30,   39,   40,   30,   41,   30,   42,   43,
   372:        45,   45,   18,   48,   46,   46,   49,   48,   53,   53,
   373:        49,   54,   54,   18,   56,   61,   64,   56,   62,   68,
   374:        69,  145,   18,   54,   54,   18,   74,   78,   55,   80,
   375:        79,   81,   78,   75,   80,   79,   81,  122,   80,   55,
   376:        81,  123,  157,  122,  156,   54,   54,  123,   18,  155,
   377:       154,   18,   44,   44,   44,   44,   44,   44,   47,   47,
   378: 
   379:        47,   47,   47,   47,   50,   50,   50,   50,   50,   50,
   380:        52,   52,   52,   52,   52,   52,   58,  153,   58,   58,
   381:        58,   82,  152,   82,   82,   82,   82,   83,  151,   83,
   382:       150,   83,   83,   84,   84,   84,   84,   84,   84,   98,
   383:       149,  148,   98,   98,  147,  146,  145,  144,  143,  142,
   384:       141,  140,  139,  138,  137,  120,  136,  135,  134,  133,
   385:       132,  131,  130,  129,  128,  127,  126,  125,  124,  121,
   386:       120,  119,  118,  117,  116,  115,  114,  113,  112,  111,
   387:       110,  109,  108,  107,  106,  105,  104,  103,  102,  101,
   388:       100,   99,   85,   97,   96,   95,   94,   93,   92,   91,
   389: 
   390:        90,   89,   88,   87,   86,   63,   85,   59,   57,   77,
   391:        76,   73,   72,   71,   70,   67,   66,   65,   63,   60,
   392:        59,   57,  158,   55,   55,   51,   51,   17,  158,  158,
   393:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   394:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   395:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   396:       158,  158,  158,  158,  158,  158,  158,  158
   397:     } ;
   398: 
   399: static yyconst short int yy_chk[269] =
   400:     {   0,
   401:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   402:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   403:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   404:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   405:         3,    4,   13,    5,    3,    4,    5,    6,    9,   10,
   406:         6,    9,   10,   14,   15,   26,  164,   16,   26,   35,
   407:        35,  156,   15,    9,   10,   16,   40,   48,   13,   49,
   408:        48,   49,   78,   40,   79,   78,   79,  110,   80,   14,
   409:        80,  110,  155,  122,  154,    9,   10,  122,   15,  153,
   410:       151,   16,  159,  159,  159,  159,  159,  159,  160,  160,
   411: 
   412:       160,  160,  160,  160,  161,  161,  161,  161,  161,  161,
   413:       162,  162,  162,  162,  162,  162,  163,  150,  163,  163,
   414:       163,  165,  148,  165,  165,  165,  165,  166,  147,  166,
   415:       146,  166,  166,  167,  167,  167,  167,  167,  167,  168,
   416:       144,  143,  168,  168,  142,  141,  140,  139,  137,  135,
   417:       134,  133,  132,  131,  130,  128,  127,  126,  124,  121,
   418:       119,  118,  117,  116,  115,  114,  113,  112,  111,  109,
   419:       108,  107,  106,  105,  104,  103,  102,  101,  100,   99,
   420:        98,   97,   96,   95,   94,   93,   92,   91,   90,   89,
   421:        87,   86,   84,   76,   75,   74,   73,   72,   71,   70,
   422: 
   423:        69,   68,   67,   66,   65,   63,   62,   58,   57,   46,
   424:        41,   39,   38,   37,   36,   34,   33,   32,   27,   25,
   425:        21,   19,   17,   12,   11,    8,    7,  158,  158,  158,
   426:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   427:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   428:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   429:       158,  158,  158,  158,  158,  158,  158,  158
   430:     } ;
   431: 
   432: /* The intent behind this definition is that it'll catch
   433:  * any uses of REJECT which flex missed.
   434:  */
   435: #define REJECT reject_used_but_not_detected
   436: #define yymore() yymore_used_but_not_detected
   437: #define YY_MORE_ADJ 0
   438: #define YY_RESTORE_YY_MORE_OFFSET
   439: #line 1 "gramlex.lex"
   440: #define INITIAL 0
   441: /* grammar.lex
   442:  * lexical analyzer for my grammar input format
   443:  *
   444:  * The variety of syntaxes for embedded literal code cause this lexer
   445:  * to have some of the context sensitivity usually associated with a
   446:  * parser.  This context doesn't nest arbitrarily deeply, so the
   447:  * language recognized is still regular, but clearly there's some
   448:  * design tension.
   449:  */
   450: /* ----------------- C definitions -------------------- */
   451: #line 13 "gramlex.lex"
   452: 
   453: // pull in my declaration of the lexer class -- this defines
   454: // the additional lexer state, some of which is used in the
   455: // action rules below (this is in the ../ast/ directory now)
   456: #include "ast_gramlex.h"
   457: 
   458: // pull in the bison-generated token codes
   459: #include "elk_grampar.codes.h"
   460: 
   461: #include <string.h>     // strchr, strrchr
   462: 
   463: // for maintaining column count
   464: #define TOKEN_START  tokenStartLoc = fileState.loc /* user ; */
   465: #define UPD_COL      \
   466:   fileState.loc = sourceLocManager->advCol(fileState.loc, yyleng)  /* user ; */
   467: #define TOK_UPD_COL  TOKEN_START; UPD_COL  /* user ; */
   468: 
   469: /* -------------------- flex options ------------------ */
   470: /* no wrapping is needed; setting this means we don't have to link with libfl.a */
   471: /* don't use the default-echo rules */
   472: /* generate a c++ lexer */
   473: /* and I will define the class */
   474: /* ------------------- definitions -------------------- */
   475: /* any character, including newline */
   476: /* any character except newline */
   477: /* starting character in a name */
   478: /* starting character in a numeric literal */
   479: /* double-quote */
   480: /* character that can appear in a quoted string */
   481: /* (I currently don't have any backslash codes, but I want to
   482:  * leave open that possibility, so for now backslashes are illegal) */
   483: /* horizontal whitespace */
   484: /* --------------- start conditions ------------------- */
   485: /* eating a comment delimited by slash-star and star-slash; note
   486:  * that we remember our current state when entering C_COMMENT,
   487:  * and restore it on exit */
   488: #define C_COMMENT 1
   489: 
   490: /* looking for the file name in an "include" directive */
   491: #define INCLUDE 2
   492: 
   493: /* recovering from an error by skipping to the next newline */
   494: #define EAT_TO_NEWLINE 3
   495: 
   496: /* gathering literal embedded code; the delimiter is specified
   497:  * in the 'embedFinish' variable */
   498: #define LITCODE 4
   499: 
   500: /* tokenizing the right-hand side of a production; this one is not
   501:  * exclusive because tokenization is virtually the same in RHS
   502:  * mode as in INITIAL mode */
   503: #define RHS 5
   504: 
   505: /* tokenizing parameter list of a function, leading into the
   506:  * embedded code that is its body */
   507: #define FUN 6
   508: 
   509: /* looking for the start of a type that follows "token" or "nonterm",
   510:  * or the TOK_NAME meaning the type has been omitted */
   511: #define OPTIONAL_TYPE 7
   512: 
   513: /* ---------------------- rules ----------------------- */
   514: #line 514 "lex.yy.cc"
   515: 
   516: /* Macros after this point can all be overridden by user definitions in
   517:  * section 1.
   518:  */
   519: 
   520: #ifndef YY_SKIP_YYWRAP
   521: #ifdef __cplusplus
   522: extern "C" int yywrap YY_PROTO(( void ));
   523: #else
   524: extern int yywrap YY_PROTO(( void ));
   525: #endif
   526: #endif
   527: 
   528: 
   529: #ifndef yytext_ptr
   530: static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
   531: #endif
   532: 
   533: #ifdef YY_NEED_STRLEN
   534: static int yy_flex_strlen YY_PROTO(( yyconst char * ));
   535: #endif
   536: 
   537: #ifndef YY_NO_INPUT
   538: #endif
   539: 
   540: #if YY_STACK_USED
   541: static int yy_start_stack_ptr = 0;
   542: static int yy_start_stack_depth = 0;
   543: static int *yy_start_stack = 0;
   544: #ifndef YY_NO_PUSH_STATE
   545: static void yy_push_state YY_PROTO(( int new_state ));
   546: #endif
   547: #ifndef YY_NO_POP_STATE
   548: static void yy_pop_state YY_PROTO(( void ));
   549: #endif
   550: #ifndef YY_NO_TOP_STATE
   551: static int yy_top_state YY_PROTO(( void ));
   552: #endif
   553: 
   554: #else
   555: #define YY_NO_PUSH_STATE 1
   556: #define YY_NO_POP_STATE 1
   557: #define YY_NO_TOP_STATE 1
   558: #endif
   559: 
   560: #ifdef YY_MALLOC_DECL
   561: YY_MALLOC_DECL
   562: #else
   563: #if __STDC__
   564: #ifndef __cplusplus
   565: #include <stdlib.h>
   566: #endif
   567: #else
   568: /* Just try to get by without declaring the routines.  This will fail
   569:  * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
   570:  * or sizeof(void*) != sizeof(int).
   571:  */
   572: #endif
   573: #endif
   574: 
   575: /* Amount of stuff to slurp up with each read. */
   576: #ifndef YY_READ_BUF_SIZE
   577: #define YY_READ_BUF_SIZE 8192
   578: #endif
   579: 
   580: /* Copy whatever the last rule matched to the standard output. */
   581: 
   582: #ifndef ECHO
   583: #define ECHO LexerOutput( yytext, yyleng )
   584: #endif
   585: 
   586: /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
   587:  * is returned in "result".
   588:  */
   589: #ifndef YY_INPUT
   590: #define YY_INPUT(buf,result,max_size) \
   591:   if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \
   592:     YY_FATAL_ERROR( "input in flex scanner failed" );
   593: #endif
   594: 
   595: /* No semi-colon after return; correct usage is to write "yyterminate();" -
   596:  * we don't want an extra ';' after the "return" because that will cause
   597:  * some compilers to complain about unreachable statements.
   598:  */
   599: #ifndef yyterminate
   600: #define yyterminate() return YY_NULL
   601: #endif
   602: 
   603: /* Number of entries by which start-condition stack grows. */
   604: #ifndef YY_START_STACK_INCR
   605: #define YY_START_STACK_INCR 25
   606: #endif
   607: 
   608: /* Report a fatal error. */
   609: #ifndef YY_FATAL_ERROR
   610: #define YY_FATAL_ERROR(msg) LexerError( msg )
   611: #endif
   612: 
   613: /* Default declaration of generated scanner - a define so the user can
   614:  * easily add parameters.
   615:  */
   616: #ifndef YY_DECL
   617: #define YY_DECL int yyFlexLexer::yylex()
   618: #endif
   619: 
   620: /* Code executed at the beginning of each rule, after yytext and yyleng
   621:  * have been set up.
   622:  */
   623: #ifndef YY_USER_ACTION
   624: #define YY_USER_ACTION
   625: #endif
   626: 
   627: /* Code executed at the end of each rule. */
   628: #ifndef YY_BREAK
   629: #define YY_BREAK break;
   630: #endif
   631: 
   632: #define YY_RULE_SETUP \
   633:   YY_USER_ACTION
   634: 
   635: YY_DECL
   636:   {
   637:   register yy_state_type yy_current_state;
   638:   register char *yy_cp = NULL, *yy_bp = NULL;
   639:   register int yy_act;
   640: 
   641: #line 102 "gramlex.lex"
   642: 
   643: 
   644:   /* -------- whitespace ------ */
   645: #line 645 "lex.yy.cc"
   646: 
   647:   if ( yy_init )
   648:     {
   649:     yy_init = 0;
   650: 
   651: #ifdef YY_USER_INIT
   652:     YY_USER_INIT;
   653: #endif
   654: 
   655:     if ( ! yy_start )
   656:       yy_start = 1;  /* first start state */
   657: 
   658:     if ( ! yyin )
   659:       yyin = &cin;
   660: 
   661:     if ( ! yyout )
   662:       yyout = &std::cout;
   663: 
   664:     if ( ! yy_current_buffer )
   665:       yy_current_buffer =
   666:         yy_create_buffer( yyin, YY_BUF_SIZE );
   667: 
   668:     yy_load_buffer_state();
   669:     }
   670: 
   671:   while ( 1 )    /* loops until end-of-file is reached */
   672:     {
   673:     yy_cp = yy_c_buf_p;
   674: 
   675:     /* Support of yytext. */
   676:     *yy_cp = yy_hold_char;
   677: 
   678:     /* yy_bp points to the position in yy_ch_buf of the start of
   679:      * the current run.
   680:      */
   681:     yy_bp = yy_cp;
   682: 
   683:     yy_current_state = yy_start;
   684: yy_match:
   685:     do
   686:       {
   687:       register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
   688:       if ( yy_accept[yy_current_state] )
   689:         {
   690:         yy_last_accepting_state = yy_current_state;
   691:         yy_last_accepting_cpos = yy_cp;
   692:         }
   693:       while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
   694:         {
   695:         yy_current_state = (int) yy_def[yy_current_state];
   696:         if ( yy_current_state >= 159 )
   697:           yy_c = yy_meta[(unsigned int) yy_c];
   698:         }
   699:       yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
   700:       ++yy_cp;
   701:       }
   702:     while ( yy_base[yy_current_state] != 228 );
   703: 
   704: yy_find_action:
   705:     yy_act = yy_accept[yy_current_state];
   706:     if ( yy_act == 0 )
   707:       { /* have to back up */
   708:       yy_cp = yy_last_accepting_cpos;
   709:       yy_current_state = yy_last_accepting_state;
   710:       yy_act = yy_accept[yy_current_state];
   711:       }
   712: 
   713:     YY_DO_BEFORE_ACTION;
   714: 
   715: 
   716: do_action:  /* This label is used only to access EOF actions. */
   717: 
   718: 
   719:     switch ( yy_act )
   720:   { /* beginning of action switch */
   721:       case 0: /* must back up */
   722:       /* undo the effects of YY_DO_BEFORE_ACTION */
   723:       *yy_cp = yy_hold_char;
   724:       yy_cp = yy_last_accepting_cpos;
   725:       yy_current_state = yy_last_accepting_state;
   726:       goto yy_find_action;
   727: 
   728: case 1:
   729: YY_RULE_SETUP
   730: #line 105 "gramlex.lex"
   731: {
   732:   newLine();
   733: }
   734:   YY_BREAK
   735: case 2:
   736: YY_RULE_SETUP
   737: #line 109 "gramlex.lex"
   738: {
   739:   UPD_COL;
   740: }
   741:   YY_BREAK
   742: /* -------- comments -------- */
   743: case 3:
   744: YY_RULE_SETUP
   745: #line 114 "gramlex.lex"
   746: {
   747:   /* C-style comments */
   748:   TOKEN_START;
   749:   UPD_COL;
   750:   prevState = YY_START;
   751:   BEGIN(C_COMMENT);
   752: }
   753:   YY_BREAK
   754: 
   755: case 4:
   756: YY_RULE_SETUP
   757: #line 123 "gramlex.lex"
   758: {
   759:     /* end of comment */
   760:     UPD_COL;
   761:     BEGIN(prevState);
   762:   }
   763:   YY_BREAK
   764: case 5:
   765: YY_RULE_SETUP
   766: #line 129 "gramlex.lex"
   767: {
   768:     /* anything but slash-star or newline -- eat it */
   769:     UPD_COL;
   770:   }
   771:   YY_BREAK
   772: case 6:
   773: YY_RULE_SETUP
   774: #line 134 "gramlex.lex"
   775: {
   776:     newLine();
   777:   }
   778:   YY_BREAK
   779: case YY_STATE_EOF(C_COMMENT):
   780: #line 138 "gramlex.lex"
   781: {
   782:     UPD_COL;      // <<EOF>> yyleng is 1!
   783:     errorUnterminatedComment();
   784:     return TOK_EOF;
   785:   }
   786:   YY_BREAK
   787: 
   788: case 7:
   789: YY_RULE_SETUP
   790: #line 146 "gramlex.lex"
   791: {
   792:   /* C++-style comment -- eat it */
   793:   TOKEN_START;
   794:   advCol(yyleng-1);   // don't count newline
   795:   newLine();          // count it here
   796: }
   797:   YY_BREAK
   798: /* -------- punctuators, operators, keywords --------- */
   799: case 8:
   800: YY_RULE_SETUP
   801: #line 155 "gramlex.lex"
   802: TOK_UPD_COL;  return TOK_RBRACE;
   803:   YY_BREAK
   804: case 9:
   805: YY_RULE_SETUP
   806: #line 156 "gramlex.lex"
   807: TOK_UPD_COL;  return TOK_COLON;
   808:   YY_BREAK
   809: case 10:
   810: YY_RULE_SETUP
   811: #line 157 "gramlex.lex"
   812: TOK_UPD_COL;  return TOK_RPAREN;
   813:   YY_BREAK
   814: case 11:
   815: YY_RULE_SETUP
   816: #line 158 "gramlex.lex"
   817: TOK_UPD_COL;  return TOK_COMMA;
   818:   YY_BREAK
   819: case 12:
   820: YY_RULE_SETUP
   821: #line 160 "gramlex.lex"
   822: TOK_UPD_COL;  return TOK_TERMINALS;
   823:   YY_BREAK
   824: case 13:
   825: YY_RULE_SETUP
   826: #line 161 "gramlex.lex"
   827: TOK_UPD_COL;  return TOK_PRECEDENCE;
   828:   YY_BREAK
   829: case 14:
   830: YY_RULE_SETUP
   831: #line 162 "gramlex.lex"
   832: TOK_UPD_COL;  return TOK_OPTION;
   833:   YY_BREAK
   834: case 15:
   835: YY_RULE_SETUP
   836: #line 163 "gramlex.lex"
   837: TOK_UPD_COL;  return TOK_EXPECT;
   838:   YY_BREAK
   839: case 16:
   840: YY_RULE_SETUP
   841: #line 164 "gramlex.lex"
   842: TOK_UPD_COL;  return TOK_SUBSETS;
   843:   YY_BREAK
   844: /* ----------- sequences that begin literal code ------------ */
   845: /* for the time being, a "[" will always start an embedded sequence;
   846:    * eventually, I'll remove this in favor of the brace- and paren-
   847:    * delimited embedded sequences */
   848: case 17:
   849: YY_RULE_SETUP
   850: #line 171 "gramlex.lex"
   851: {
   852:   TOK_UPD_COL;
   853:   BEGIN(LITCODE);
   854:   beginEmbed(']', TOK_LIT_CODE);
   855: }
   856:   YY_BREAK
   857: /* the "->" operator moves us into RHS mode, which is special because
   858:    * in this mode any "{" is interpreted as the beginning of an embedded
   859:    * section of literal code */
   860: case 18:
   861: YY_RULE_SETUP
   862: #line 180 "gramlex.lex"
   863: {
   864:   TOK_UPD_COL;
   865:   BEGIN(RHS);
   866:   return TOK_ARROW;
   867: }
   868:   YY_BREAK
   869: /* "{" in a RHS begins embedded */
   870: case 19:
   871: YY_RULE_SETUP
   872: #line 187 "gramlex.lex"
   873: {
   874:   TOK_UPD_COL;
   875:   BEGIN(LITCODE);
   876:   beginEmbed('}', TOK_LIT_CODE);
   877: }
   878:   YY_BREAK
   879: /* otherwise it's just a "{" */
   880: case 20:
   881: YY_RULE_SETUP
   882: #line 194 "gramlex.lex"
   883: {
   884:   TOK_UPD_COL;
   885:   return TOK_LBRACE;
   886: }
   887:   YY_BREAK
   888: /* since right-hand-sides can end with either embedded code or a simple
   889:    * ";", the semicolon gets out of RHS mode */
   890: case 21:
   891: YY_RULE_SETUP
   892: #line 201 "gramlex.lex"
   893: {
   894:   TOK_UPD_COL;
   895:   BEGIN(INITIAL);     // if in RHS, reset to INITIAL
   896:   return TOK_SEMICOLON;
   897: }
   898:   YY_BREAK
   899: /* "token" and "nonterm" are always followed by an optional type,
   900:    * and then a TOK_NAME.  So, until we see a TOK_NAME, "(" will mean
   901:    * the start of an embedded sequence. */
   902: case 22:
   903: YY_RULE_SETUP
   904: #line 210 "gramlex.lex"
   905: {
   906:   TOK_UPD_COL;
   907:   BEGIN(OPTIONAL_TYPE);
   908:   return yytext[0]=='t'? TOK_TOKEN : TOK_NONTERM;
   909: }
   910:   YY_BREAK
   911: /* so now this begins embedded */
   912: case 23:
   913: YY_RULE_SETUP
   914: #line 217 "gramlex.lex"
   915: {
   916:   TOK_UPD_COL;
   917:   BEGIN(LITCODE);
   918:   beginEmbed(')', TOK_LIT_CODE);
   919: }
   920:   YY_BREAK
   921: /* otherwise it's just itself */
   922: case 24:
   923: YY_RULE_SETUP
   924: #line 224 "gramlex.lex"
   925: {
   926:   TOK_UPD_COL;
   927:   return TOK_LPAREN;
   928: }
   929:   YY_BREAK
   930: /* function beginning */
   931: case 25:
   932: YY_RULE_SETUP
   933: #line 230 "gramlex.lex"
   934: {
   935:   TOK_UPD_COL;
   936:   BEGIN(FUN);            // treat "{" as beginning literal code
   937:   return TOK_FUN;
   938: }
   939:   YY_BREAK
   940: /* verbatim beginning */
   941: case 26:
   942: YY_RULE_SETUP
   943: #line 237 "gramlex.lex"
   944: {
   945:   TOK_UPD_COL;
   946:   BEGIN(FUN);            // close enough
   947:   return yytext[0]=='v'? TOK_VERBATIM : TOK_IMPL_VERBATIM;
   948: }
   949:   YY_BREAK
   950: /* --------- embedded literal code --------- */
   951: /* no TOKEN_START here; we'll use the tokenStartLoc that
   952:    * was computed in the opening punctuation */
   953: 
   954: case 27:
   955: YY_RULE_SETUP
   956: #line 248 "gramlex.lex"
   957: {
   958:     UPD_COL;
   959:     embedded->handle(yytext, yyleng, embedFinish);
   960:   }
   961:   YY_BREAK
   962: case 28:
   963: YY_RULE_SETUP
   964: #line 253 "gramlex.lex"
   965: {
   966:     newLine();
   967:     embedded->handle(yytext, yyleng, embedFinish);
   968:   }
   969:   YY_BREAK
   970: case 29:
   971: YY_RULE_SETUP
   972: #line 258 "gramlex.lex"
   973: {
   974:     UPD_COL;
   975:     if (embedded->zeroNesting()) {
   976:       // done
   977:       BEGIN(INITIAL);
   978: 
   979:       // check for balanced delimiter
   980:       if (embedFinish != yytext[0]) {
   981:         err("unbalanced literal code delimiter");
   982:       }
   983: 
   984:       // don't add "return" or ";"
   985:       embedded->exprOnly = false;
   986: 
   987:       // can't extract anything
   988:       embedded->isDeclaration = false;
   989: 
   990:       // caller can get text from embedded->text
   991:       return embedMode;
   992:     }
   993:     else {
   994:       // delimeter paired within the embedded code, mostly ignore it
   995:       embedded->handle(yytext, yyleng, embedFinish);
   996:     }
   997:   }
   998:   YY_BREAK
   999: case YY_STATE_EOF(LITCODE):
  1000: #line 284 "gramlex.lex"
  1001: {
  1002:     err(sm_stringc << "hit end of file while looking for final `"
  1003:                 << embedFinish << "'");
  1004:     yyterminate();
  1005:   }
  1006:   YY_BREAK
  1007: 
  1008: /* embedded *type* description */
  1009: case 30:
  1010: YY_RULE_SETUP
  1011: #line 293 "gramlex.lex"
  1012: {
  1013:   /* caller will get text from yytext and yyleng */
  1014:   TOK_UPD_COL;
  1015: 
  1016:   /* drop into literal-code processing */
  1017:   BEGIN(LITCODE);
  1018: 
  1019:   /* I reset the initial nesting to -1 so that the '{' at the
  1020:    * beginning of the class body sets nesting to 0, thus when
  1021:    * I see the final '}' I'll see that at level 0 and stop */
  1022:   beginEmbed('}', TOK_LIT_CODE, -1);
  1023: 
  1024:   return TOK_CONTEXT_CLASS;
  1025: }
  1026:   YY_BREAK
  1027: /* ---------- includes ----------- */
  1028: case 31:
  1029: YY_RULE_SETUP
  1030: #line 310 "gramlex.lex"
  1031: {
  1032:   TOK_UPD_COL;    /* hence no TOKEN_START in INCLUDE area */
  1033:   BEGIN(INCLUDE);
  1034: }
  1035:   YY_BREAK
  1036: 
  1037: case 32:
  1038: YY_RULE_SETUP
  1039: #line 316 "gramlex.lex"
  1040: {
  1041:     /* e.g.: ("filename") */
  1042:     /* file name to include */
  1043:     UPD_COL;
  1044: 
  1045:     /* find quotes */
  1046:     char *leftq = strchr(yytext, '"');
  1047:     char *rightq = strchr(leftq+1, '"');
  1048:     xassert(leftq && rightq);
  1049: 
  1050:     /* extract filename string */
  1051:     includeFileName = addString(leftq+1, rightq-leftq-1);
  1052: 
  1053:     /* go back to normal processing */
  1054:     BEGIN(INITIAL);
  1055:     return TOK_INCLUDE;
  1056:   }
  1057:   YY_BREAK
  1058: case 33:
  1059: YY_RULE_SETUP
  1060: #line 334 "gramlex.lex"
  1061: {
  1062:     /* anything else: malformed */
  1063:     UPD_COL;
  1064:     errorMalformedInclude();
  1065: 
  1066:     /* rudimentary error recovery.. */
  1067:     BEGIN(EAT_TO_NEWLINE);
  1068:   }
  1069:   YY_BREAK
  1070: 
  1071: 
  1072: case 34:
  1073: YY_RULE_SETUP
  1074: #line 345 "gramlex.lex"
  1075: {
  1076:     UPD_COL;
  1077:     /* not newline, eat it */
  1078:   }
  1079:   YY_BREAK
  1080: case 35:
  1081: YY_RULE_SETUP
  1082: #line 350 "gramlex.lex"
  1083: {
  1084:     /* get out of here */
  1085:     newLine();
  1086:     BEGIN(INITIAL);
  1087:   }
  1088:   YY_BREAK
  1089: 
  1090: /* -------- name literal --------- */
  1091: case 36:
  1092: YY_RULE_SETUP
  1093: #line 358 "gramlex.lex"
  1094: {
  1095:   /* get text from yytext and yyleng */
  1096:   TOK_UPD_COL;
  1097:   if (YY_START == OPTIONAL_TYPE) {
  1098:     BEGIN(INITIAL);      // bail out of OPTIONAL_TYPE mode
  1099:   }
  1100:   return TOK_NAME;
  1101: }
  1102:   YY_BREAK
  1103: /* -------- numeric literal ------ */
  1104: case 37:
  1105: YY_RULE_SETUP
  1106: #line 368 "gramlex.lex"
  1107: {
  1108:   TOK_UPD_COL;
  1109:   integerLiteral = strtoul(yytext, NULL, 10 /*radix*/);
  1110:   return TOK_INTEGER;
  1111: }
  1112:   YY_BREAK
  1113: /* ----------- string literal ----- */
  1114: case 38:
  1115: YY_RULE_SETUP
  1116: #line 375 "gramlex.lex"
  1117: {
  1118:   TOK_UPD_COL;
  1119:   sm_stringLiteral = addString(yytext+1, yyleng-2);        // strip quotes
  1120:   return TOK_STRING;
  1121: }
  1122:   YY_BREAK
  1123: /* --------- illegal ------------- */
  1124: case 39:
  1125: YY_RULE_SETUP
  1126: #line 382 "gramlex.lex"
  1127: {
  1128:   TOK_UPD_COL;
  1129:   errorIllegalCharacter(yytext[0]);
  1130: }
  1131:   YY_BREAK
  1132: case 40:
  1133: YY_RULE_SETUP
  1134: #line 388 "gramlex.lex"
  1135: YY_FATAL_ERROR( "flex scanner jammed" );
  1136:   YY_BREAK
  1137: #line 1137 "lex.yy.cc"
  1138: case YY_STATE_EOF(INITIAL):
  1139: case YY_STATE_EOF(INCLUDE):
  1140: case YY_STATE_EOF(EAT_TO_NEWLINE):
  1141: case YY_STATE_EOF(RHS):
  1142: case YY_STATE_EOF(FUN):
  1143: case YY_STATE_EOF(OPTIONAL_TYPE):
  1144:   yyterminate();
  1145: 
  1146:   case YY_END_OF_BUFFER:
  1147:     {
  1148:     /* Amount of text matched not including the EOB char. */
  1149:     int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
  1150: 
  1151:     /* Undo the effects of YY_DO_BEFORE_ACTION. */
  1152:     *yy_cp = yy_hold_char;
  1153:     YY_RESTORE_YY_MORE_OFFSET
  1154: 
  1155:     if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
  1156:       {
  1157:       /* We're scanning a new file or input source.  It's
  1158:        * possible that this happened because the user
  1159:        * just pointed yyin at a new source and called
  1160:        * yylex().  If so, then we have to assure
  1161:        * consistency between yy_current_buffer and our
  1162:        * globals.  Here is the right place to do so, because
  1163:        * this is the first action (other than possibly a
  1164:        * back-up) that will match for the new input source.
  1165:        */
  1166:       yy_n_chars = yy_current_buffer->yy_n_chars;
  1167:       yy_current_buffer->yy_input_file = yyin;
  1168:       yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
  1169:       }
  1170: 
  1171:     /* Note that here we test for yy_c_buf_p "<=" to the position
  1172:      * of the first EOB in the buffer, since yy_c_buf_p will
  1173:      * already have been incremented past the NUL character
  1174:      * (since all states make transitions on EOB to the
  1175:      * end-of-buffer state).  Contrast this with the test
  1176:      * in input().
  1177:      */
  1178:     if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
  1179:       { /* This was really a NUL. */
  1180:       yy_state_type yy_next_state;
  1181: 
  1182:       yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
  1183: 
  1184:       yy_current_state = yy_get_previous_state();
  1185: 
  1186:       /* Okay, we're now positioned to make the NUL
  1187:        * transition.  We couldn't have
  1188:        * yy_get_previous_state() go ahead and do it
  1189:        * for us because it doesn't know how to deal
  1190:        * with the possibility of jamming (and we don't
  1191:        * want to build jamming into it because then it
  1192:        * will run more slowly).
  1193:        */
  1194: 
  1195:       yy_next_state = yy_try_NUL_trans( yy_current_state );
  1196: 
  1197:       yy_bp = yytext_ptr + YY_MORE_ADJ;
  1198: 
  1199:       if ( yy_next_state )
  1200:         {
  1201:         /* Consume the NUL. */
  1202:         yy_cp = ++yy_c_buf_p;
  1203:         yy_current_state = yy_next_state;
  1204:         goto yy_match;
  1205:         }
  1206: 
  1207:       else
  1208:         {
  1209:         yy_cp = yy_c_buf_p;
  1210:         goto yy_find_action;
  1211:         }
  1212:       }
  1213: 
  1214:     else switch ( yy_get_next_buffer() )
  1215:       {
  1216:       case EOB_ACT_END_OF_FILE:
  1217:         {
  1218:         yy_did_buffer_switch_on_eof = 0;
  1219: 
  1220:         if ( yywrap() )
  1221:           {
  1222:           /* Note: because we've taken care in
  1223:            * yy_get_next_buffer() to have set up
  1224:            * yytext, we can now set up
  1225:            * yy_c_buf_p so that if some total
  1226:            * hoser (like flex itself) wants to
  1227:            * call the scanner after we return the
  1228:            * YY_NULL, it'll still work - another
  1229:            * YY_NULL will get returned.
  1230:            */
  1231:           yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
  1232: 
  1233:           yy_act = YY_STATE_EOF(YY_START);
  1234:           goto do_action;
  1235:           }
  1236: 
  1237:         else
  1238:           {
  1239:           if ( ! yy_did_buffer_switch_on_eof )
  1240:             YY_NEW_FILE;
  1241:           }
  1242:         break;
  1243:         }
  1244: 
  1245:       case EOB_ACT_CONTINUE_SCAN:
  1246:         yy_c_buf_p =
  1247:           yytext_ptr + yy_amount_of_matched_text;
  1248: 
  1249:         yy_current_state = yy_get_previous_state();
  1250: 
  1251:         yy_cp = yy_c_buf_p;
  1252:         yy_bp = yytext_ptr + YY_MORE_ADJ;
  1253:         goto yy_match;
  1254: 
  1255:       case EOB_ACT_LAST_MATCH:
  1256:         yy_c_buf_p =
  1257:         &yy_current_buffer->yy_ch_buf[yy_n_chars];
  1258: 
  1259:         yy_current_state = yy_get_previous_state();
  1260: 
  1261:         yy_cp = yy_c_buf_p;
  1262:         yy_bp = yytext_ptr + YY_MORE_ADJ;
  1263:         goto yy_find_action;
  1264:       }
  1265:     break;
  1266:     }
  1267: 
  1268:   default:
  1269:     YY_FATAL_ERROR(
  1270:       "fatal flex scanner internal error--no action found" );
  1271:   } /* end of action switch */
  1272:     } /* end of scanning one token */
  1273:   } /* end of yylex */
  1274: 
  1275: yyFlexLexer::yyFlexLexer( std::istream* arg_yyin, std::ostream* arg_yyout )
  1276:   {
  1277:   yyin = arg_yyin;
  1278:   yyout = arg_yyout;
  1279:   yy_c_buf_p = 0;
  1280:   yy_init = 1;
  1281:   yy_start = 0;
  1282:   yy_flex_debug = 0;
  1283:   yylineno = 1;  // this will only get updated if %option yylineno
  1284: 
  1285:   yy_did_buffer_switch_on_eof = 0;
  1286: 
  1287:   yy_looking_for_trail_begin = 0;
  1288:   yy_more_flag = 0;
  1289:   yy_more_len = 0;
  1290:   yy_more_offset = yy_prev_more_offset = 0;
  1291: 
  1292:   yy_start_stack_ptr = yy_start_stack_depth = 0;
  1293:   yy_start_stack = 0;
  1294: 
  1295:   yy_current_buffer = 0;
  1296: 
  1297: #ifdef YY_USES_REJECT
  1298:   yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2];
  1299: #else
  1300:   yy_state_buf = 0;
  1301: #endif
  1302:   }
  1303: 
  1304: yyFlexLexer::~yyFlexLexer()
  1305:   {
  1306:   delete yy_state_buf;
  1307:   yy_delete_buffer( yy_current_buffer );
  1308:   }
  1309: 
  1310: void yyFlexLexer::switch_streams( std::istream* new_in, std::ostream* new_out )
  1311:   {
  1312:   if ( new_in )
  1313:     {
  1314:     yy_delete_buffer( yy_current_buffer );
  1315:     yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) );
  1316:     }
  1317: 
  1318:   if ( new_out )
  1319:     yyout = new_out;
  1320:   }
  1321: 
  1322: #ifdef YY_INTERACTIVE
  1323: int yyFlexLexer::LexerInput( char* buf, int /* max_size */ )
  1324: #else
  1325: int yyFlexLexer::LexerInput( char* buf, int max_size )
  1326: #endif
  1327:   {
  1328:   if ( yyin->eof() || yyin->fail() )
  1329:     return 0;
  1330: 
  1331: #ifdef YY_INTERACTIVE
  1332:   yyin->get( buf[0] );
  1333: 
  1334:   if ( yyin->eof() )
  1335:     return 0;
  1336: 
  1337:   if ( yyin->bad() )
  1338:     return -1;
  1339: 
  1340:   return 1;
  1341: 
  1342: #else
  1343:   (void) yyin->read( buf, max_size );
  1344: 
  1345:   if ( yyin->bad() )
  1346:     return -1;
  1347:   else
  1348:     return yyin->gcount();
  1349: #endif
  1350:   }
  1351: 
  1352: void yyFlexLexer::LexerOutput( const char* buf, int size )
  1353:   {
  1354:   (void) yyout->write( buf, size );
  1355:   }
  1356: 
  1357: /* yy_get_next_buffer - try to read in a new buffer
  1358:  *
  1359:  * Returns a code representing an action:
  1360:  *  EOB_ACT_LAST_MATCH -
  1361:  *  EOB_ACT_CONTINUE_SCAN - continue scanning from current position
  1362:  *  EOB_ACT_END_OF_FILE - end of file
  1363:  */
  1364: 
  1365: int yyFlexLexer::yy_get_next_buffer()
  1366:   {
  1367:   register char *dest = yy_current_buffer->yy_ch_buf;
  1368:   register char *source = yytext_ptr;
  1369:   register int number_to_move, i;
  1370:   int ret_val;
  1371: 
  1372:   if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
  1373:     YY_FATAL_ERROR(
  1374:     "fatal flex scanner internal error--end of buffer missed" );
  1375: 
  1376:   if ( yy_current_buffer->yy_fill_buffer == 0 )
  1377:     { /* Don't try to fill the buffer, so this is an EOF. */
  1378:     if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
  1379:       {
  1380:       /* We matched a single character, the EOB, so
  1381:        * treat this as a final EOF.
  1382:        */
  1383:       return EOB_ACT_END_OF_FILE;
  1384:       }
  1385: 
  1386:     else
  1387:       {
  1388:       /* We matched some text prior to the EOB, first
  1389:        * process it.
  1390:        */
  1391:       return EOB_ACT_LAST_MATCH;
  1392:       }
  1393:     }
  1394: 
  1395:   /* Try to read more data. */
  1396: 
  1397:   /* First move last chars to start of buffer. */
  1398:   number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
  1399: 
  1400:   for ( i = 0; i < number_to_move; ++i )
  1401:     *(dest++) = *(source++);
  1402: 
  1403:   if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
  1404:     /* don't do the read, it's not guaranteed to return an EOF,
  1405:      * just force an EOF
  1406:      */
  1407:     yy_current_buffer->yy_n_chars = yy_n_chars = 0;
  1408: 
  1409:   else
  1410:     {
  1411:     int num_to_read =
  1412:       yy_current_buffer->yy_buf_size - number_to_move - 1;
  1413: 
  1414:     while ( num_to_read <= 0 )
  1415:       { /* Not enough room in the buffer - grow it. */
  1416: #ifdef YY_USES_REJECT
  1417:       YY_FATAL_ERROR(
  1418: "input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
  1419: #else
  1420: 
  1421:       /* just a shorter name for the current buffer */
  1422:       YY_BUFFER_STATE b = yy_current_buffer;
  1423: 
  1424:       int yy_c_buf_p_offset =
  1425:         (int) (yy_c_buf_p - b->yy_ch_buf);
  1426: 
  1427:       if ( b->yy_is_our_buffer )
  1428:         {
  1429:         int new_size = b->yy_buf_size * 2;
  1430: 
  1431:         if ( new_size <= 0 )
  1432:           b->yy_buf_size += b->yy_buf_size / 8;
  1433:         else
  1434:           b->yy_buf_size *= 2;
  1435: 
  1436:         b->yy_ch_buf = (char *)
  1437:           /* Include room in for 2 EOB chars. */
  1438:           yy_flex_realloc( (void *) b->yy_ch_buf,
  1439:                b->yy_buf_size + 2 );
  1440:         }
  1441:       else
  1442:         /* Can't grow it, we don't own it. */
  1443:         b->yy_ch_buf = 0;
  1444: 
  1445:       if ( ! b->yy_ch_buf )
  1446:         YY_FATAL_ERROR(
  1447:         "fatal error - scanner input buffer overflow" );
  1448: 
  1449:       yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
  1450: 
  1451:       num_to_read = yy_current_buffer->yy_buf_size -
  1452:             number_to_move - 1;
  1453: #endif
  1454:       }
  1455: 
  1456:     if ( num_to_read > YY_READ_BUF_SIZE )
  1457:       num_to_read = YY_READ_BUF_SIZE;
  1458: 
  1459:     /* Read in more data. */
  1460:     YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
  1461:       yy_n_chars, num_to_read );
  1462: 
  1463:     yy_current_buffer->yy_n_chars = yy_n_chars;
  1464:     }
  1465: 
  1466:   if ( yy_n_chars == 0 )
  1467:     {
  1468:     if ( number_to_move == YY_MORE_ADJ )
  1469:       {
  1470:       ret_val = EOB_ACT_END_OF_FILE;
  1471:       yyrestart( yyin );
  1472:       }
  1473: 
  1474:     else
  1475:       {
  1476:       ret_val = EOB_ACT_LAST_MATCH;
  1477:       yy_current_buffer->yy_buffer_status =
  1478:         YY_BUFFER_EOF_PENDING;
  1479:       }
  1480:     }
  1481: 
  1482:   else
  1483:     ret_val = EOB_ACT_CONTINUE_SCAN;
  1484: 
  1485:   yy_n_chars += number_to_move;
  1486:   yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
  1487:   yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
  1488: 
  1489:   yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
  1490: 
  1491:   return ret_val;
  1492:   }
  1493: 
  1494: 
  1495: /* yy_get_previous_state - get the state just before the EOB char was reached */
  1496: 
  1497: yy_state_type yyFlexLexer::yy_get_previous_state()
  1498:   {
  1499:   register yy_state_type yy_current_state;
  1500:   register char *yy_cp;
  1501: 
  1502:   yy_current_state = yy_start;
  1503: 
  1504:   for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
  1505:     {
  1506:     register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
  1507:     if ( yy_accept[yy_current_state] )
  1508:       {
  1509:       yy_last_accepting_state = yy_current_state;
  1510:       yy_last_accepting_cpos = yy_cp;
  1511:       }
  1512:     while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
  1513:       {
  1514:       yy_current_state = (int) yy_def[yy_current_state];
  1515:       if ( yy_current_state >= 159 )
  1516:         yy_c = yy_meta[(unsigned int) yy_c];
  1517:       }
  1518:     yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
  1519:     }
  1520: 
  1521:   return yy_current_state;
  1522:   }
  1523: 
  1524: 
  1525: /* yy_try_NUL_trans - try to make a transition on the NUL character
  1526:  *
  1527:  * synopsis
  1528:  *  next_state = yy_try_NUL_trans( current_state );
  1529:  */
  1530: 
  1531: yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state )
  1532:   {
  1533:   register int yy_is_jam;
  1534:   register char *yy_cp = yy_c_buf_p;
  1535: 
  1536:   register YY_CHAR yy_c = 1;
  1537:   if ( yy_accept[yy_current_state] )
  1538:     {
  1539:     yy_last_accepting_state = yy_current_state;
  1540:     yy_last_accepting_cpos = yy_cp;
  1541:     }
  1542:   while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
  1543:     {
  1544:     yy_current_state = (int) yy_def[yy_current_state];
  1545:     if ( yy_current_state >= 159 )
  1546:       yy_c = yy_meta[(unsigned int) yy_c];
  1547:     }
  1548:   yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
  1549:   yy_is_jam = (yy_current_state == 158);
  1550: 
  1551:   return yy_is_jam ? 0 : yy_current_state;
  1552:   }
  1553: 
  1554: 
  1555: void yyFlexLexer::yyunput( int c, register char* yy_bp )
  1556:   {
  1557:   register char *yy_cp = yy_c_buf_p;
  1558: 
  1559:   /* undo effects of setting up yytext */
  1560:   *yy_cp = yy_hold_char;
  1561: 
  1562:   if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
  1563:     { /* need to shift things up to make room */
  1564:     /* +2 for EOB chars. */
  1565:     register int number_to_move = yy_n_chars + 2;
  1566:     register char *dest = &yy_current_buffer->yy_ch_buf[
  1567:           yy_current_buffer->yy_buf_size + 2];
  1568:     register char *source =
  1569:         &yy_current_buffer->yy_ch_buf[number_to_move];
  1570: 
  1571:     while ( source > yy_current_buffer->yy_ch_buf )
  1572:       *--dest = *--source;
  1573: 
  1574:     yy_cp += (int) (dest - source);
  1575:     yy_bp += (int) (dest - source);
  1576:     yy_current_buffer->yy_n_chars =
  1577:       yy_n_chars = yy_current_buffer->yy_buf_size;
  1578: 
  1579:     if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
  1580:       YY_FATAL_ERROR( "flex scanner push-back overflow" );
  1581:     }
  1582: 
  1583:   *--yy_cp = (char) c;
  1584: 
  1585: 
  1586:   yytext_ptr = yy_bp;
  1587:   yy_hold_char = *yy_cp;
  1588:   yy_c_buf_p = yy_cp;
  1589:   }
  1590: 
  1591: 
  1592: int yyFlexLexer::yyinput()
  1593:   {
  1594:   int c;
  1595: 
  1596:   *yy_c_buf_p = yy_hold_char;
  1597: 
  1598:   if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
  1599:     {
  1600:     /* yy_c_buf_p now points to the character we want to return.
  1601:      * If this occurs *before* the EOB characters, then it's a
  1602:      * valid NUL; if not, then we've hit the end of the buffer.
  1603:      */
  1604:     if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
  1605:       /* This was really a NUL. */
  1606:       *yy_c_buf_p = '\0';
  1607: 
  1608:     else
  1609:       { /* need more input */
  1610:       int offset = yy_c_buf_p - yytext_ptr;
  1611:       ++yy_c_buf_p;
  1612: 
  1613:       switch ( yy_get_next_buffer() )
  1614:         {
  1615:         case EOB_ACT_LAST_MATCH:
  1616:           /* This happens because yy_g_n_b()
  1617:            * sees that we've accumulated a
  1618:            * token and flags that we need to
  1619:            * try matching the token before
  1620:            * proceeding.  But for input(),
  1621:            * there's no matching to consider.
  1622:            * So convert the EOB_ACT_LAST_MATCH
  1623:            * to EOB_ACT_END_OF_FILE.
  1624:            */
  1625: 
  1626:           /* Reset buffer status. */
  1627:           yyrestart( yyin );
  1628: 
  1629:           /* fall through */
  1630: 
  1631:         case EOB_ACT_END_OF_FILE:
  1632:           {
  1633:           if ( yywrap() )
  1634:             return EOF;
  1635: 
  1636:           if ( ! yy_did_buffer_switch_on_eof )
  1637:             YY_NEW_FILE;
  1638: #ifdef __cplusplus
  1639:           return yyinput();
  1640: #else
  1641:           return input();
  1642: #endif
  1643:           }
  1644: 
  1645:         case EOB_ACT_CONTINUE_SCAN:
  1646:           yy_c_buf_p = yytext_ptr + offset;
  1647:           break;
  1648:         }
  1649:       }
  1650:     }
  1651: 
  1652:   c = *(unsigned char *) yy_c_buf_p;  /* cast for 8-bit char's */
  1653:   *yy_c_buf_p = '\0';  /* preserve yytext */
  1654:   yy_hold_char = *++yy_c_buf_p;
  1655: 
  1656: 
  1657:   return c;
  1658:   }
  1659: 
  1660: void yyFlexLexer::yyrestart( std::istream* input_file )
  1661:   {
  1662:   if ( ! yy_current_buffer )
  1663:     yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
  1664: 
  1665:   yy_init_buffer( yy_current_buffer, input_file );
  1666:   yy_load_buffer_state();
  1667:   }
  1668: 
  1669: 
  1670: void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
  1671:   {
  1672:   if ( yy_current_buffer == new_buffer )
  1673:     return;
  1674: 
  1675:   if ( yy_current_buffer )
  1676:     {
  1677:     /* Flush out information for old buffer. */
  1678:     *yy_c_buf_p = yy_hold_char;
  1679:     yy_current_buffer->yy_buf_pos = yy_c_buf_p;
  1680:     yy_current_buffer->yy_n_chars = yy_n_chars;
  1681:     }
  1682: 
  1683:   yy_current_buffer = new_buffer;
  1684:   yy_load_buffer_state();
  1685: 
  1686:   /* We don't actually know whether we did this switch during
  1687:    * EOF (yywrap()) processing, but the only time this flag
  1688:    * is looked at is after yywrap() is called, so it's safe
  1689:    * to go ahead and always set it.
  1690:    */
  1691:   yy_did_buffer_switch_on_eof = 1;
  1692:   }
  1693: 
  1694: 
  1695: void yyFlexLexer::yy_load_buffer_state()
  1696:   {
  1697:   yy_n_chars = yy_current_buffer->yy_n_chars;
  1698:   yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
  1699:   yyin = yy_current_buffer->yy_input_file;
  1700:   yy_hold_char = *yy_c_buf_p;
  1701:   }
  1702: 
  1703: 
  1704: YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream* file, int size )
  1705:   {
  1706:   YY_BUFFER_STATE b;
  1707: 
  1708:   b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
  1709:   if ( ! b )
  1710:     YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
  1711: 
  1712:   b->yy_buf_size = size;
  1713: 
  1714:   /* yy_ch_buf has to be 2 characters longer than the size given because
  1715:    * we need to put in 2 end-of-buffer characters.
  1716:    */
  1717:   b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
  1718:   if ( ! b->yy_ch_buf )
  1719:     YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
  1720: 
  1721:   b->yy_is_our_buffer = 1;
  1722: 
  1723:   yy_init_buffer( b, file );
  1724: 
  1725:   return b;
  1726:   }
  1727: 
  1728: 
  1729: void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b )
  1730:   {
  1731:   if ( ! b )
  1732:     return;
  1733: 
  1734:   if ( b == yy_current_buffer )
  1735:     yy_current_buffer = (YY_BUFFER_STATE) 0;
  1736: 
  1737:   if ( b->yy_is_our_buffer )
  1738:     yy_flex_free( (void *) b->yy_ch_buf );
  1739: 
  1740:   yy_flex_free( (void *) b );
  1741:   }
  1742: 
  1743: 
  1744: void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, std::istream* file )
  1745: 
  1746:   {
  1747:   yy_flush_buffer( b );
  1748: 
  1749:   b->yy_input_file = file;
  1750:   b->yy_fill_buffer = 1;
  1751: 
  1752:   b->yy_is_interactive = 0;
  1753:   }
  1754: 
  1755: 
  1756: void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b )
  1757:   {
  1758:   if ( ! b )
  1759:     return;
  1760: 
  1761:   b->yy_n_chars = 0;
  1762: 
  1763:   /* We always need two end-of-buffer characters.  The first causes
  1764:    * a transition to the end-of-buffer state.  The second causes
  1765:    * a jam in that state.
  1766:    */
  1767:   b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
  1768:   b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
  1769: 
  1770:   b->yy_buf_pos = &b->yy_ch_buf[0];
  1771: 
  1772:   b->yy_at_bol = 1;
  1773:   b->yy_buffer_status = YY_BUFFER_NEW;
  1774: 
  1775:   if ( b == yy_current_buffer )
  1776:     yy_load_buffer_state();
  1777:   }
  1778: 
  1779: 
  1780: #ifndef YY_NO_SCAN_BUFFER
  1781: #endif
  1782: 
  1783: 
  1784: #ifndef YY_NO_SCAN_STRING
  1785: #endif
  1786: 
  1787: 
  1788: #ifndef YY_NO_SCAN_BYTES
  1789: #endif
  1790: 
  1791: 
  1792: #ifndef YY_NO_PUSH_STATE
  1793: void yyFlexLexer::yy_push_state( int new_state )
  1794:   {
  1795:   if ( yy_start_stack_ptr >= yy_start_stack_depth )
  1796:     {
  1797:     yy_size_t new_size;
  1798: 
  1799:     yy_start_stack_depth += YY_START_STACK_INCR;
  1800:     new_size = yy_start_stack_depth * sizeof( int );
  1801: 
  1802:     if ( ! yy_start_stack )
  1803:       yy_start_stack = (int *) yy_flex_alloc( new_size );
  1804: 
  1805:     else
  1806:       yy_start_stack = (int *) yy_flex_realloc(
  1807:           (void *) yy_start_stack, new_size );
  1808: 
  1809:     if ( ! yy_start_stack )
  1810:       YY_FATAL_ERROR(
  1811:       "out of memory expanding start-condition stack" );
  1812:     }
  1813: 
  1814:   yy_start_stack[yy_start_stack_ptr++] = YY_START;
  1815: 
  1816:   BEGIN(new_state);
  1817:   }
  1818: #endif
  1819: 
  1820: 
  1821: #ifndef YY_NO_POP_STATE
  1822: void yyFlexLexer::yy_pop_state()
  1823:   {
  1824:   if ( --yy_start_stack_ptr < 0 )
  1825:     YY_FATAL_ERROR( "start-condition stack underflow" );
  1826: 
  1827:   BEGIN(yy_start_stack[yy_start_stack_ptr]);
  1828:   }
  1829: #endif
  1830: 
  1831: 
  1832: #ifndef YY_NO_TOP_STATE
  1833: int yyFlexLexer::yy_top_state()
  1834:   {
  1835:   return yy_start_stack[yy_start_stack_ptr - 1];
  1836:   }
  1837: #endif
  1838: 
  1839: #ifndef YY_EXIT_FAILURE
  1840: #define YY_EXIT_FAILURE 2
  1841: #endif
  1842: 
  1843: 
  1844: void yyFlexLexer::LexerError( yyconst char msg[] )
  1845:   {
  1846:   cerr << msg << '\n';
  1847:   exit( YY_EXIT_FAILURE );
  1848:   }
  1849: 
  1850: 
  1851: /* Redefine yyless() so it works in section 3 code. */
  1852: 
  1853: #undef yyless
  1854: #define yyless(n) \
  1855:   do \
  1856:     { \
  1857:     /* Undo effects of setting up yytext. */ \
  1858:     yytext[yyleng] = yy_hold_char; \
  1859:     yy_c_buf_p = yytext + n; \
  1860:     yy_hold_char = *yy_c_buf_p; \
  1861:     *yy_c_buf_p = '\0'; \
  1862:     yyleng = n; \
  1863:     } \
  1864:   while ( 0 )
  1865: 
  1866: 
  1867: /* Internal utility routines. */
  1868: 
  1869: #ifndef yytext_ptr
  1870: #ifdef YY_USE_PROTOS
  1871: static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
  1872: #else
  1873: static void yy_flex_strncpy( s1, s2, n )
  1874: char *s1;
  1875: yyconst char *s2;
  1876: int n;
  1877: #endif
  1878:   {
  1879:   register int i;
  1880:   for ( i = 0; i < n; ++i )
  1881:     s1[i] = s2[i];
  1882:   }
  1883: #endif
  1884: 
  1885: #ifdef YY_NEED_STRLEN
  1886: #ifdef YY_USE_PROTOS
  1887: static int yy_flex_strlen( yyconst char *s )
  1888: #else
  1889: static int yy_flex_strlen( s )
  1890: yyconst char *s;
  1891: #endif
  1892:   {
  1893:   register int n;
  1894:   for ( n = 0; s[n]; ++n )
  1895:     ;
  1896: 
  1897:   return n;
  1898:   }
  1899: #endif
  1900: 
  1901: 
  1902: #ifdef YY_USE_PROTOS
  1903: static void *yy_flex_alloc( yy_size_t size )
  1904: #else
  1905: static void *yy_flex_alloc( size )
  1906: yy_size_t size;
  1907: #endif
  1908:   {
  1909:   return (void *) malloc( size );
  1910:   }
  1911: 
  1912: #ifdef YY_USE_PROTOS
  1913: static void *yy_flex_realloc( void *ptr, yy_size_t size )
  1914: #else
  1915: static void *yy_flex_realloc( ptr, size )
  1916: void *ptr;
  1917: yy_size_t size;
  1918: #endif
  1919:   {
  1920:   /* The cast to (char *) in the following accommodates both
  1921:    * implementations that use char* generic pointers, and those
  1922:    * that use void* generic pointers.  It works with the latter
  1923:    * because both ANSI C and C++ allow castless assignment from
  1924:    * any pointer type to void*, and deal with argument conversions
  1925:    * as though doing an assignment.
  1926:    */
  1927:   return (void *) realloc( (char *) ptr, size );
  1928:   }
  1929: 
  1930: #ifdef YY_USE_PROTOS
  1931: static void yy_flex_free( void *ptr )
  1932: #else
  1933: static void yy_flex_free( ptr )
  1934: void *ptr;
  1935: #endif
  1936:   {
  1937:   free( ptr );
  1938:   }
  1939: 
  1940: #if YY_MAIN
  1941: int main()
  1942:   {
  1943:   yylex();
  1944:   return 0;
  1945:   }
  1946: #endif
  1947: #line 388 "gramlex.lex"
  1948: 
  1949: /* -------------------- additional C code -------------------- */
  1950: 
  1951: // identify tokens representing embedded text
  1952: bool isGramlexEmbed(int code)
  1953: {
  1954:   return code == TOK_LIT_CODE;
  1955: }
  1956: 
End cpp section to elk/elk_gramlex.yy.cpp[1]
Start data section to elk/elk_gramlex.yy.cpp.old[1 /1 ]
     1: /* A lexical scanner generated by flex */
     2: 
     3: /* Scanner skeleton version:
     4:  * $Header$
     5:  */
     6: 
     7: #define FLEX_SCANNER
     8: #define YY_FLEX_MAJOR_VERSION 2
     9: #define YY_FLEX_MINOR_VERSION 5
    10: 
    11: 
    12: 
    13: /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
    14: #ifdef c_plusplus
    15: #ifndef __cplusplus
    16: #define __cplusplus
    17: #endif
    18: #endif
    19: 
    20: 
    21: #ifdef __cplusplus
    22: 
    23: #include <stdlib.h>
    24: #include <iostream>
    25: using namespace std;
    26: 
    27: /* Use prototypes in function declarations. */
    28: #define YY_USE_PROTOS
    29: 
    30: /* The "const" storage-class-modifier is valid. */
    31: #define YY_USE_CONST
    32: 
    33: #else   /* ! __cplusplus */
    34: 
    35: #if __STDC__
    36: 
    37: #define YY_USE_PROTOS
    38: #define YY_USE_CONST
    39: 
    40: #endif  /* __STDC__ */
    41: #endif  /* ! __cplusplus */
    42: 
    43: #ifdef __TURBOC__
    44:  #pragma warn -rch
    45:  #pragma warn -use
    46: #include <io.h>
    47: #include <stdlib.h>
    48: #define YY_USE_CONST
    49: #define YY_USE_PROTOS
    50: #endif
    51: 
    52: #ifdef YY_USE_CONST
    53: #define yyconst const
    54: #else
    55: #define yyconst
    56: #endif
    57: 
    58: 
    59: #ifdef YY_USE_PROTOS
    60: #define YY_PROTO(proto) proto
    61: #else
    62: #define YY_PROTO(proto) ()
    63: #endif
    64: 
    65: /* Returned upon end-of-file. */
    66: #define YY_NULL 0
    67: 
    68: /* Promotes a possibly negative, possibly signed char to an unsigned
    69:  * integer for use as an array index.  If the signed char is negative,
    70:  * we want to instead treat it as an 8-bit unsigned char, hence the
    71:  * double cast.
    72:  */
    73: #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
    74: 
    75: /* Enter a start condition.  This macro really ought to take a parameter,
    76:  * but we do it the disgusting crufty way forced on us by the ()-less
    77:  * definition of BEGIN.
    78:  */
    79: #define BEGIN yy_start = 1 + 2 *
    80: 
    81: /* Translate the current start state into a value that can be later handed
    82:  * to BEGIN to return to the state.  The YYSTATE alias is for lex
    83:  * compatibility.
    84:  */
    85: #define YY_START ((yy_start - 1) / 2)
    86: #define YYSTATE YY_START
    87: 
    88: /* Action number for EOF rule of a given start state. */
    89: #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
    90: 
    91: /* Special action meaning "start processing a new file". */
    92: #define YY_NEW_FILE yyrestart( yyin )
    93: 
    94: #define YY_END_OF_BUFFER_CHAR 0
    95: 
    96: /* Size of default input buffer. */
    97: #define YY_BUF_SIZE 16384
    98: 
    99: typedef struct yy_buffer_state *YY_BUFFER_STATE;
   100: 
   101: extern int yyleng;
   102: 
   103: #define EOB_ACT_CONTINUE_SCAN 0
   104: #define EOB_ACT_END_OF_FILE 1
   105: #define EOB_ACT_LAST_MATCH 2
   106: 
   107: /* The funky do-while in the following #define is used to turn the definition
   108:  * int a single C statement (which needs a semi-colon terminator).  This
   109:  * avoids problems with code like:
   110:  *
   111:  *      if ( condition_holds )
   112:  *              yyless( 5 );
   113:  *      else
   114:  *              do_something_else();
   115:  *
   116:  * Prior to using the do-while the compiler would get upset at the
   117:  * "else" because it interpreted the "if" statement as being all
   118:  * done when it reached the ';' after the yyless() call.
   119:  */
   120: 
   121: /* Return all but the first 'n' matched characters back to the input stream. */
   122: 
   123: #define yyless(n) \
   124:         do \
   125:                 { \
   126:                 /* Undo effects of setting up yytext. */ \
   127:                 *yy_cp = yy_hold_char; \
   128:                 YY_RESTORE_YY_MORE_OFFSET \
   129:                 yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
   130:                 YY_DO_BEFORE_ACTION; /* set up yytext again */ \
   131:                 } \
   132:         while ( 0 )
   133: 
   134: #define unput(c) yyunput( c, yytext_ptr )
   135: 
   136: /* The following is because we cannot portably get our hands on size_t
   137:  * (without autoconf's help, which isn't available because we want
   138:  * flex-generated scanners to compile on their own).
   139:  */
   140: typedef unsigned int yy_size_t;
   141: 
   142: 
   143: struct yy_buffer_state
   144:         {
   145:         std::istream* yy_input_file;
   146: 
   147:         char *yy_ch_buf;                /* input buffer */
   148:         char *yy_buf_pos;               /* current position in input buffer */
   149: 
   150:         /* Size of input buffer in bytes, not including room for EOB
   151:          * characters.
   152:          */
   153:         yy_size_t yy_buf_size;
   154: 
   155:         /* Number of characters read into yy_ch_buf, not including EOB
   156:          * characters.
   157:          */
   158:         int yy_n_chars;
   159: 
   160:         /* Whether we "own" the buffer - i.e., we know we created it,
   161:          * and can realloc() it to grow it, and should free() it to
   162:          * delete it.
   163:          */
   164:         int yy_is_our_buffer;
   165: 
   166:         /* Whether this is an "interactive" input source; if so, and
   167:          * if we're using stdio for input, then we want to use getc()
   168:          * instead of fread(), to make sure we stop fetching input after
   169:          * each newline.
   170:          */
   171:         int yy_is_interactive;
   172: 
   173:         /* Whether we're considered to be at the beginning of a line.
   174:          * If so, '^' rules will be active on the next match, otherwise
   175:          * not.
   176:          */
   177:         int yy_at_bol;
   178: 
   179:         /* Whether to try to fill the input buffer when we reach the
   180:          * end of it.
   181:          */
   182:         int yy_fill_buffer;
   183: 
   184:         int yy_buffer_status;
   185: #define YY_BUFFER_NEW 0
   186: #define YY_BUFFER_NORMAL 1
   187:         /* When an EOF's been seen but there's still some text to process
   188:          * then we mark the buffer as YY_EOF_PENDING, to indicate that we
   189:          * shouldn't try reading from the input source any more.  We might
   190:          * still have a bunch of tokens to match, though, because of
   191:          * possible backing-up.
   192:          *
   193:          * When we actually see the EOF, we change the status to "new"
   194:          * (via yyrestart()), so that the user can continue scanning by
   195:          * just pointing yyin at a new input file.
   196:          */
   197: #define YY_BUFFER_EOF_PENDING 2
   198:         };
   199: 
   200: 
   201: /* We provide macros for accessing buffer states in case in the
   202:  * future we want to put the buffer states in a more general
   203:  * "scanner state".
   204:  */
   205: #define YY_CURRENT_BUFFER yy_current_buffer
   206: 
   207: 
   208: 
   209: static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
   210: static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
   211: static void yy_flex_free YY_PROTO(( void * ));
   212: 
   213: #define yy_new_buffer yy_create_buffer
   214: 
   215: #define yy_set_interactive(is_interactive) \
   216:         { \
   217:         if ( ! yy_current_buffer ) \
   218:                 yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
   219:         yy_current_buffer->yy_is_interactive = is_interactive; \
   220:         }
   221: 
   222: #define yy_set_bol(at_bol) \
   223:         { \
   224:         if ( ! yy_current_buffer ) \
   225:                 yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
   226:         yy_current_buffer->yy_at_bol = at_bol; \
   227:         }
   228: 
   229: #define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
   230: 
   231: 
   232: #define yywrap() 1
   233: #define YY_SKIP_YYWRAP
   234: typedef unsigned char YY_CHAR;
   235: #define yytext_ptr yytext
   236: #define YY_INTERACTIVE
   237: 
   238: #include "sm_flexlexer.h"
   239: int yyFlexLexer::yylex()
   240:         {
   241:         LexerError( "yyFlexLexer::yylex invoked but %option yyclass used" );
   242:         return 0;
   243:         }
   244: 
   245: #define YY_DECL int GrammarLexer::yylex()
   246: 
   247: 
   248: /* Done after the current pattern has been matched and before the
   249:  * corresponding action - sets up yytext.
   250:  */
   251: #define YY_DO_BEFORE_ACTION \
   252:         yytext_ptr = yy_bp; \
   253:         yyleng = (int) (yy_cp - yy_bp); \
   254:         yy_hold_char = *yy_cp; \
   255:         *yy_cp = '\0'; \
   256:         yy_c_buf_p = yy_cp;
   257: 
   258: #define YY_NUM_RULES 40
   259: #define YY_END_OF_BUFFER 41
   260: static yyconst short int yy_accept[159] =
   261:     {   0,
   262:         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
   263:         0,    0,    0,    0,    0,    0,   41,   39,    2,    1,
   264:        39,   24,   10,   11,   39,   39,   37,    9,   21,   36,
   265:        17,   36,   36,   36,   36,   36,   36,   36,   36,   36,
   266:        36,   20,    8,    5,    6,    5,   33,   33,   33,   34,
   267:        35,   27,   28,   29,   19,   23,    2,    0,   38,   18,
   268:         3,    0,   37,   36,   36,   36,   36,   36,   36,   36,
   269:        36,   36,   36,   36,   36,   36,    4,    0,    0,    0,
   270:         0,   34,   27,    0,    7,   36,   36,   25,   36,   36,
   271:        36,   36,   36,   36,   36,   36,   36,    0,   36,   36,
   272: 
   273:        36,   36,   36,   36,   36,   36,   36,   36,   36,    0,
   274:        36,   36,   36,   36,   36,   36,   36,   36,   36,   22,
   275:        36,    0,   32,   36,   15,   36,   36,   36,   14,   36,
   276:        36,   36,   36,   36,   36,   31,   36,   16,   36,   36,
   277:        36,   36,   36,   36,   26,   36,   36,   36,   12,   36,
   278:        36,   13,   36,   36,   36,   36,   30,    0
   279:     } ;
   280: 
   281: static yyconst int yy_ec[256] =
   282:     {   0,
   283:         1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
   284:         4,    4,    1,    1,    1,    1,    1,    1,    1,    1,
   285:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   286:         1,    2,    1,    5,    1,    1,    1,    1,    1,    6,
   287:         7,    8,    1,    9,   10,    1,   11,   12,   12,   12,
   288:        12,   12,   12,   12,   12,   12,   12,   13,   14,    1,
   289:         1,   15,    1,    1,   16,   16,   16,   16,   16,   16,
   290:        16,   16,   16,   16,   16,   16,   16,   16,   16,   16,
   291:        16,   16,   16,   16,   16,   16,   16,   16,   16,   16,
   292:        17,   18,   19,    1,   20,    1,   21,   22,   23,   24,
   293: 
   294:        25,   26,   16,   16,   27,   16,   28,   29,   30,   31,
   295:        32,   33,   16,   34,   35,   36,   37,   38,   16,   39,
   296:        16,   16,   40,    1,   41,    1,    1,    1,    1,    1,
   297:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   298:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   299:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   300:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   301:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   302:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   303:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   304: 
   305:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   306:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   307:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   308:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   309:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   310:         1,    1,    1,    1,    1
   311:     } ;
   312: 
   313: static yyconst int yy_meta[42] =
   314:     {   0,
   315:         1,    1,    2,    1,    3,    1,    4,    1,    1,    1,
   316:         1,    5,    1,    1,    1,    5,    1,    6,    4,    5,
   317:         5,    5,    5,    5,    5,    5,    5,    5,    5,    5,
   318:         5,    5,    5,    5,    5,    5,    5,    5,    5,    1,
   319:         4
   320:     } ;
   321: 
   322: static yyconst short int yy_base[169] =
   323:     {   0,
   324:         0,    0,   39,   40,   43,   44,  227,  226,   48,   49,
   325:       188,  187,   30,   39,   48,   51,  226,  231,   56,  231,
   326:       220,  231,  231,  231,  209,   53,  211,  231,  231,    0,
   327:       231,  190,  182,  183,   41,  187,  185,  183,  179,   34,
   328:       190,  231,  231,  231,  231,  203,  231,   67,   72,    0,
   329:       231,    0,  231,  231,  231,  231,   74,  208,  231,  231,
   330:       231,  209,  199,    0,  179,  176,  177,  174,  183,  174,
   331:       168,  178,  180,  167,  172,  165,  231,   78,   80,   81,
   332:         0,    0,    0,  195,  231,  161,  171,    0,  166,  165,
   333:       157,  165,  168,  155,  159,  163,  165,  181,  160,  161,
   334: 
   335:       163,  145,  156,  148,  154,  153,  150,  145,  154,   85,
   336:       135,  137,  134,  147,  136,  138,  144,  131,  135,    0,
   337:       129,   91,  231,  128,    0,  138,  137,  131,    0,  135,
   338:       124,  137,  130,  136,  121,    0,  123,    0,  124,  122,
   339:       128,  126,  124,  102,    0,  106,  108,   99,    0,   76,
   340:        60,    0,   60,   67,   46,   45,    0,  231,   98,  104,
   341:       110,  116,  122,   58,  127,  133,  139,  145
   342:     } ;
   343: 
   344: static yyconst short int yy_def[169] =
   345:     {   0,
   346:       158,    1,  159,  159,  160,  160,  161,  161,  162,  162,
   347:         1,    1,    1,    1,    1,    1,  158,  158,  158,  158,
   348:       163,  158,  158,  158,  158,  158,  158,  158,  158,  164,
   349:       158,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   350:       164,  158,  158,  158,  158,  158,  158,  158,  158,  165,
   351:       158,  166,  158,  158,  158,  158,  158,  163,  158,  158,
   352:       158,  167,  158,  164,  164,  164,  164,  164,  164,  164,
   353:       164,  164,  164,  164,  164,  164,  158,  158,  158,  158,
   354:       168,  165,  166,  167,  158,  164,  164,  164,  164,  164,
   355:       164,  164,  164,  164,  164,  164,  164,  168,  164,  164,
   356: 
   357:       164,  164,  164,  164,  164,  164,  164,  164,  164,  158,
   358:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   359:       164,  158,  158,  164,  164,  164,  164,  164,  164,  164,
   360:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   361:       164,  164,  164,  164,  164,  164,  164,  164,  164,  164,
   362:       164,  164,  164,  164,  164,  164,  164,    0,  158,  158,
   363:       158,  158,  158,  158,  158,  158,  158,  158
   364:     } ;
   365: 
   366: static yyconst short int yy_nxt[273] =
   367:     {   0,
   368:        18,   19,   20,   19,   21,   22,   23,   18,   24,   25,
   369:        26,   27,   28,   29,   18,   30,   31,   18,   18,   30,
   370:        30,   30,   32,   30,   33,   34,   35,   30,   30,   30,
   371:        36,   37,   38,   30,   39,   40,   30,   41,   30,   42,
   372:        43,   45,   45,   18,   48,   48,   46,   46,   49,   49,
   373:        53,   53,   18,   56,   54,   54,   56,   57,   74,   57,
   374:        61,   18,   64,   62,   18,   75,   54,   54,   78,   55,
   375:        68,   69,   79,   80,  145,   57,   81,   57,   55,   78,
   376:       157,   80,   80,   79,   81,   81,  122,   18,   54,   54,
   377:        18,  123,  122,  156,  155,  154,  153,  123,   44,   44,
   378: 
   379:        44,   44,   44,   44,   47,   47,   47,   47,   47,   47,
   380:        50,   50,   50,   50,   50,   50,   52,   52,   52,   52,
   381:        52,   52,   58,  152,   58,   58,   58,   82,  151,   82,
   382:        82,   82,   82,   83,  150,   83,  149,   83,   83,   84,
   383:        84,   84,   84,   84,   84,   98,  148,  147,   98,   98,
   384:       146,  145,  144,  143,  142,  141,  140,  139,  138,  137,
   385:       120,  136,  135,  134,  133,  132,  131,  130,  129,  128,
   386:       127,  126,  125,  124,  121,  120,  119,  118,  117,  116,
   387:       115,  114,  113,  112,  111,  110,  109,  108,  107,  106,
   388:       105,  104,  103,  102,  101,  100,   99,   85,   97,   96,
   389: 
   390:        95,   94,   93,   92,   91,   90,   89,   88,   87,   86,
   391:        63,   85,   59,   77,   76,   73,   72,   71,   70,   67,
   392:        66,   65,   63,   60,   59,  158,   55,   55,   51,   51,
   393:        17,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   394:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   395:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   396:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   397:       158,  158
   398:     } ;
   399: 
   400: static yyconst short int yy_chk[273] =
   401:     {   0,
   402:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   403:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   404:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   405:         1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
   406:         1,    3,    4,   13,    5,    6,    3,    4,    5,    6,
   407:         9,   10,   14,   15,    9,   10,   16,   19,   40,   19,
   408:        26,   15,  164,   26,   16,   40,    9,   10,   48,   13,
   409:        35,   35,   48,   49,  156,   57,   49,   57,   14,   78,
   410:       155,   79,   80,   78,   79,   80,  110,   15,    9,   10,
   411:        16,  110,  122,  154,  153,  151,  150,  122,  159,  159,
   412: 
   413:       159,  159,  159,  159,  160,  160,  160,  160,  160,  160,
   414:       161,  161,  161,  161,  161,  161,  162,  162,  162,  162,
   415:       162,  162,  163,  148,  163,  163,  163,  165,  147,  165,
   416:       165,  165,  165,  166,  146,  166,  144,  166,  166,  167,
   417:       167,  167,  167,  167,  167,  168,  143,  142,  168,  168,
   418:       141,  140,  139,  137,  135,  134,  133,  132,  131,  130,
   419:       128,  127,  126,  124,  121,  119,  118,  117,  116,  115,
   420:       114,  113,  112,  111,  109,  108,  107,  106,  105,  104,
   421:       103,  102,  101,  100,   99,   98,   97,   96,   95,   94,
   422:        93,   92,   91,   90,   89,   87,   86,   84,   76,   75,
   423: 
   424:        74,   73,   72,   71,   70,   69,   68,   67,   66,   65,
   425:        63,   62,   58,   46,   41,   39,   38,   37,   36,   34,
   426:        33,   32,   27,   25,   21,   17,   12,   11,    8,    7,
   427:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   428:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   429:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   430:       158,  158,  158,  158,  158,  158,  158,  158,  158,  158,
   431:       158,  158
   432:     } ;
   433: 
   434: /* The intent behind this definition is that it'll catch
   435:  * any uses of REJECT which flex missed.
   436:  */
   437: #define REJECT reject_used_but_not_detected
   438: #define yymore() yymore_used_but_not_detected
   439: #define YY_MORE_ADJ 0
   440: #define YY_RESTORE_YY_MORE_OFFSET
   441: #define INITIAL 0
   442: /* grammar.lex
   443:  * lexical analyzer for my grammar input format
   444:  *
   445:  * The variety of syntaxes for embedded literal code cause this lexer
   446:  * to have some of the context sensitivity usually associated with a
   447:  * parser.  This context doesn't nest arbitrarily deeply, so the
   448:  * language recognized is still regular, but clearly there's some
   449:  * design tension.
   450:  */
   451: /* ----------------- C definitions -------------------- */
   452: 
   453: // pull in my declaration of the lexer class -- this defines
   454: // the additional lexer state, some of which is used in the
   455: // action rules below (this is in the ../ast/ directory now)
   456: #include "ast_gramlex.h"
   457: 
   458: // pull in the bison-generated token codes
   459: #include "elk_grampar.codes.h"
   460: 
   461: #include <string.h>     // strchr, strrchr
   462: 
   463: // for maintaining column count
   464: #define TOKEN_START  tokenStartLoc = fileState.loc /* user ; */
   465: #define UPD_COL      \
   466:   fileState.loc = sourceLocManager->advCol(fileState.loc, yyleng)  /* user ; */
   467: #define TOK_UPD_COL  TOKEN_START; UPD_COL  /* user ; */
   468: 
   469: /* -------------------- flex options ------------------ */
   470: /* no wrapping is needed; setting this means we don't have to link with libfl.a */
   471: /* don't use the default-echo rules */
   472: /* generate a c++ lexer */
   473: /* and I will define the class */
   474: /* ------------------- definitions -------------------- */
   475: /* any character, including newline */
   476: /* any character except newline */
   477: /* starting character in a name */
   478: /* starting character in a numeric literal */
   479: /* double-quote */
   480: /* character that can appear in a quoted sm_string */
   481: /* (I currently don't have any backslash codes, but I want to
   482:  * leave open that possibility, so for now backslashes are illegal) */
   483: /* horizontal whitespace */
   484: /* whitespace that doesn't cross line a boundary */
   485: /* --------------- start conditions ------------------- */
   486: /* eating a comment delimited by slash-star and star-slash; note
   487:  * that we remember our current state when entering C_COMMENT,
   488:  * and restore it on exit */
   489: #define C_COMMENT 1
   490: 
   491: /* looking for the file name in an "include" directive */
   492: #define INCLUDE 2
   493: 
   494: /* recovering from an error by skipping to the next newline */
   495: #define EAT_TO_NEWLINE 3
   496: 
   497: /* gathering literal embedded code; the delimiter is specified
   498:  * in the 'embedFinish' variable */
   499: #define LITCODE 4
   500: 
   501: /* tokenizing the right-hand side of a production; this one is not
   502:  * exclusive because tokenization is virtually the same in RHS
   503:  * mode as in INITIAL mode */
   504: #define RHS 5
   505: 
   506: /* tokenizing parameter list of a function, leading into the
   507:  * embedded code that is its body */
   508: #define FUN 6
   509: 
   510: /* looking for the start of a type that follows "token" or "nonterm",
   511:  * or the TOK_NAME meaning the type has been omitted */
   512: #define OPTIONAL_TYPE 7
   513: 
   514: /* ---------------------- rules ----------------------- */
   515: 
   516: /* Macros after this point can all be overridden by user definitions in
   517:  * section 1.
   518:  */
   519: 
   520: #ifndef YY_SKIP_YYWRAP
   521: #ifdef __cplusplus
   522: extern "C" int yywrap YY_PROTO(( void ));
   523: #else
   524: extern int yywrap YY_PROTO(( void ));
   525: #endif
   526: #endif
   527: 
   528: 
   529: #ifndef yytext_ptr
   530: static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
   531: #endif
   532: 
   533: #ifdef YY_NEED_STRLEN
   534: static int yy_flex_strlen YY_PROTO(( yyconst char * ));
   535: #endif
   536: 
   537: #ifndef YY_NO_INPUT
   538: #endif
   539: 
   540: #if YY_STACK_USED
   541: static int yy_start_stack_ptr = 0;
   542: static int yy_start_stack_depth = 0;
   543: static int *yy_start_stack = 0;
   544: #ifndef YY_NO_PUSH_STATE
   545: static void yy_push_state YY_PROTO(( int new_state ));
   546: #endif
   547: #ifndef YY_NO_POP_STATE
   548: static void yy_pop_state YY_PROTO(( void ));
   549: #endif
   550: #ifndef YY_NO_TOP_STATE
   551: static int yy_top_state YY_PROTO(( void ));
   552: #endif
   553: 
   554: #else
   555: #define YY_NO_PUSH_STATE 1
   556: #define YY_NO_POP_STATE 1
   557: #define YY_NO_TOP_STATE 1
   558: #endif
   559: 
   560: #ifdef YY_MALLOC_DECL
   561: YY_MALLOC_DECL
   562: #else
   563: #if __STDC__
   564: #ifndef __cplusplus
   565: #include <stdlib.h>
   566: #endif
   567: #else
   568: /* Just try to get by without declaring the routines.  This will fail
   569:  * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
   570:  * or sizeof(void*) != sizeof(int).
   571:  */
   572: #endif
   573: #endif
   574: 
   575: /* Amount of stuff to slurp up with each read. */
   576: #ifndef YY_READ_BUF_SIZE
   577: #define YY_READ_BUF_SIZE 8192
   578: #endif
   579: 
   580: /* Copy whatever the last rule matched to the standard output. */
   581: 
   582: #ifndef ECHO
   583: #define ECHO LexerOutput( yytext, yyleng )
   584: #endif
   585: 
   586: /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
   587:  * is returned in "result".
   588:  */
   589: #ifndef YY_INPUT
   590: #define YY_INPUT(buf,result,max_size) \
   591:         if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \
   592:                 YY_FATAL_ERROR( "input in flex scanner failed" );
   593: #endif
   594: 
   595: /* No semi-colon after return; correct usage is to write "yyterminate();" -
   596:  * we don't want an extra ';' after the "return" because that will cause
   597:  * some compilers to complain about unreachable statements.
   598:  */
   599: #ifndef yyterminate
   600: #define yyterminate() return YY_NULL
   601: #endif
   602: 
   603: /* Number of entries by which start-condition stack grows. */
   604: #ifndef YY_START_STACK_INCR
   605: #define YY_START_STACK_INCR 25
   606: #endif
   607: 
   608: /* Report a fatal error. */
   609: #ifndef YY_FATAL_ERROR
   610: #define YY_FATAL_ERROR(msg) LexerError( msg )
   611: #endif
   612: 
   613: /* Default declaration of generated scanner - a define so the user can
   614:  * easily add parameters.
   615:  */
   616: #ifndef YY_DECL
   617: #define YY_DECL int yyFlexLexer::yylex()
   618: #endif
   619: 
   620: /* Code executed at the beginning of each rule, after yytext and yyleng
   621:  * have been set up.
   622:  */
   623: #ifndef YY_USER_ACTION
   624: #define YY_USER_ACTION
   625: #endif
   626: 
   627: /* Code executed at the end of each rule. */
   628: #ifndef YY_BREAK
   629: #define YY_BREAK break;
   630: #endif
   631: 
   632: #define YY_RULE_SETUP \
   633:         YY_USER_ACTION
   634: 
   635: YY_DECL
   636:         {
   637:         register yy_state_type yy_current_state;
   638:         register char *yy_cp = NULL, *yy_bp = NULL;
   639:         register int yy_act;
   640: 
   641: 
   642: 
   643:   /* -------- whitespace ------ */
   644: 
   645:         if ( yy_init )
   646:                 {
   647:                 yy_init = 0;
   648: 
   649: #ifdef YY_USER_INIT
   650:                 YY_USER_INIT;
   651: #endif
   652: 
   653:                 if ( ! yy_start )
   654:                         yy_start = 1;   /* first start state */
   655: 
   656:                 if ( ! yyin )
   657:                         yyin = &cin;
   658: 
   659:                 if ( ! yyout )
   660:                         yyout = &std::cout;
   661: 
   662:                 if ( ! yy_current_buffer )
   663:                         yy_current_buffer =
   664:                                 yy_create_buffer( yyin, YY_BUF_SIZE );
   665: 
   666:                 yy_load_buffer_state();
   667:                 }
   668: 
   669:         while ( 1 )             /* loops until end-of-file is reached */
   670:                 {
   671:                 yy_cp = yy_c_buf_p;
   672: 
   673:                 /* Support of yytext. */
   674:                 *yy_cp = yy_hold_char;
   675: 
   676:                 /* yy_bp points to the position in yy_ch_buf of the start of
   677:                  * the current run.
   678:                  */
   679:                 yy_bp = yy_cp;
   680: 
   681:                 yy_current_state = yy_start;
   682: yy_match:
   683:                 do
   684:                         {
   685:                         register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
   686:                         if ( yy_accept[yy_current_state] )
   687:                                 {
   688:                                 yy_last_accepting_state = yy_current_state;
   689:                                 yy_last_accepting_cpos = yy_cp;
   690:                                 }
   691:                         while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
   692:                                 {
   693:                                 yy_current_state = (int) yy_def[yy_current_state];
   694:                                 if ( yy_current_state >= 159 )
   695:                                         yy_c = yy_meta[(unsigned int) yy_c];
   696:                                 }
   697:                         yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
   698:                         ++yy_cp;
   699:                         }
   700:                 while ( yy_base[yy_current_state] != 231 );
   701: 
   702: yy_find_action:
   703:                 yy_act = yy_accept[yy_current_state];
   704:                 if ( yy_act == 0 )
   705:                         { /* have to back up */
   706:                         yy_cp = yy_last_accepting_cpos;
   707:                         yy_current_state = yy_last_accepting_state;
   708:                         yy_act = yy_accept[yy_current_state];
   709:                         }
   710: 
   711:                 YY_DO_BEFORE_ACTION;
   712: 
   713: 
   714: do_action:      /* This label is used only to access EOF actions. */
   715: 
   716: 
   717:                 switch ( yy_act )
   718:         { /* beginning of action switch */
   719:                         case 0: /* must back up */
   720:                         /* undo the effects of YY_DO_BEFORE_ACTION */
   721:                         *yy_cp = yy_hold_char;
   722:                         yy_cp = yy_last_accepting_cpos;
   723:                         yy_current_state = yy_last_accepting_state;
   724:                         goto yy_find_action;
   725: 
   726: case 1:
   727: YY_RULE_SETUP
   728: {
   729:   newLine();
   730: }
   731:         YY_BREAK
   732: case 2:
   733: YY_RULE_SETUP
   734: {
   735:   UPD_COL;
   736: }
   737:         YY_BREAK
   738: /* -------- comments -------- */
   739: case 3:
   740: YY_RULE_SETUP
   741: {
   742:   /* C-style comments */
   743:   TOKEN_START;
   744:   UPD_COL;
   745:   prevState = YY_START;
   746:   BEGIN(C_COMMENT);
   747: }
   748:         YY_BREAK
   749: 
   750: case 4:
   751: YY_RULE_SETUP
   752: {
   753:     /* end of comment */
   754:     UPD_COL;
   755:     BEGIN(prevState);
   756:   }
   757:         YY_BREAK
   758: case 5:
   759: YY_RULE_SETUP
   760: {
   761:     /* anything but slash-star or newline -- eat it */
   762:     UPD_COL;
   763:   }
   764:         YY_BREAK
   765: case 6:
   766: YY_RULE_SETUP
   767: {
   768:     newLine();
   769:   }
   770:         YY_BREAK
   771: case YY_STATE_EOF(C_COMMENT):
   772: {
   773:     UPD_COL;      // <<EOF>> yyleng is 1!
   774:     errorUnterminatedComment();
   775:     return TOK_EOF;
   776:   }
   777:         YY_BREAK
   778: 
   779: case 7:
   780: YY_RULE_SETUP
   781: {
   782:   /* C++-style comment -- eat it */
   783:   TOKEN_START;
   784:   advCol(yyleng-1);   // don't count newline
   785:   newLine();          // count it here
   786: }
   787:         YY_BREAK
   788: /* -------- punctuators, operators, keywords --------- */
   789: case 8:
   790: YY_RULE_SETUP
   791: TOK_UPD_COL;  return TOK_RBRACE;
   792:         YY_BREAK
   793: case 9:
   794: YY_RULE_SETUP
   795: TOK_UPD_COL;  return TOK_COLON;
   796:         YY_BREAK
   797: case 10:
   798: YY_RULE_SETUP
   799: TOK_UPD_COL;  return TOK_RPAREN;
   800:         YY_BREAK
   801: case 11:
   802: YY_RULE_SETUP
   803: TOK_UPD_COL;  return TOK_COMMA;
   804:         YY_BREAK
   805: case 12:
   806: YY_RULE_SETUP
   807: TOK_UPD_COL;  return TOK_TERMINALS;
   808:         YY_BREAK
   809: case 13:
   810: YY_RULE_SETUP
   811: TOK_UPD_COL;  return TOK_PRECEDENCE;
   812:         YY_BREAK
   813: case 14:
   814: YY_RULE_SETUP
   815: TOK_UPD_COL;  return TOK_OPTION;
   816:         YY_BREAK
   817: case 15:
   818: YY_RULE_SETUP
   819: TOK_UPD_COL;  return TOK_EXPECT;
   820:         YY_BREAK
   821: case 16:
   822: YY_RULE_SETUP
   823: TOK_UPD_COL;  return TOK_SUBSETS;
   824:         YY_BREAK
   825: /* ----------- sequences that begin literal code ------------ */
   826: /* for the time being, a "[" will always start an embedded sequence;
   827:    * eventually, I'll remove this in favor of the brace- and paren-
   828:    * delimited embedded sequences */
   829: case 17:
   830: YY_RULE_SETUP
   831: {
   832:   TOK_UPD_COL;
   833:   BEGIN(LITCODE);
   834:   beginEmbed(']', TOK_LIT_CODE);
   835: }
   836:         YY_BREAK
   837: /* the "->" operator moves us into RHS mode, which is special because
   838:    * in this mode any "{" is interpreted as the beginning of an embedded
   839:    * section of literal code */
   840: case 18:
   841: YY_RULE_SETUP
   842: {
   843:   TOK_UPD_COL;
   844:   BEGIN(RHS);
   845:   return TOK_ARROW;
   846: }
   847:         YY_BREAK
   848: /* "{" in a RHS begins embedded */
   849: case 19:
   850: YY_RULE_SETUP
   851: {
   852:   TOK_UPD_COL;
   853:   BEGIN(LITCODE);
   854:   beginEmbed('}', TOK_LIT_CODE);
   855: }
   856:         YY_BREAK
   857: /* otherwise it's just a "{" */
   858: case 20:
   859: YY_RULE_SETUP
   860: {
   861:   TOK_UPD_COL;
   862:   return TOK_LBRACE;
   863: }
   864:         YY_BREAK
   865: /* since right-hand-sides can end with either embedded code or a simple
   866:    * ";", the semicolon gets out of RHS mode */
   867: case 21:
   868: YY_RULE_SETUP
   869: {
   870:   TOK_UPD_COL;
   871:   BEGIN(INITIAL);     // if in RHS, reset to INITIAL
   872:   return TOK_SEMICOLON;
   873: }
   874:         YY_BREAK
   875: /* "token" and "nonterm" are always followed by an optional type,
   876:    * and then a TOK_NAME.  So, until we see a TOK_NAME, "(" will mean
   877:    * the start of an embedded sequence. */
   878: case 22:
   879: YY_RULE_SETUP
   880: {
   881:   TOK_UPD_COL;
   882:   BEGIN(OPTIONAL_TYPE);
   883:   return yytext[0]=='t'? TOK_TOKEN : TOK_NONTERM;
   884: }
   885:         YY_BREAK
   886: /* so now this begins embedded */
   887: case 23:
   888: YY_RULE_SETUP
   889: {
   890:   TOK_UPD_COL;
   891:   BEGIN(LITCODE);
   892:   beginEmbed(')', TOK_LIT_CODE);
   893: }
   894:         YY_BREAK
   895: /* otherwise it's just itself */
   896: case 24:
   897: YY_RULE_SETUP
   898: {
   899:   TOK_UPD_COL;
   900:   return TOK_LPAREN;
   901: }
   902:         YY_BREAK
   903: /* function beginning */
   904: case 25:
   905: YY_RULE_SETUP
   906: {
   907:   TOK_UPD_COL;
   908:   BEGIN(FUN);            // treat "{" as beginning literal code
   909:   return TOK_FUN;
   910: }
   911:         YY_BREAK
   912: /* verbatim beginning */
   913: case 26:
   914: YY_RULE_SETUP
   915: {
   916:   TOK_UPD_COL;
   917:   BEGIN(FUN);            // close enough
   918:   return yytext[0]=='v'? TOK_VERBATIM : TOK_IMPL_VERBATIM;
   919: }
   920:         YY_BREAK
   921: /* --------- embedded literal code --------- */
   922: /* no TOKEN_START here; we'll use the tokenStartLoc that
   923:    * was computed in the opening punctuation */
   924: 
   925: case 27:
   926: YY_RULE_SETUP
   927: {
   928:     UPD_COL;
   929:     embedded->handle(yytext, yyleng, embedFinish);
   930:   }
   931:         YY_BREAK
   932: case 28:
   933: YY_RULE_SETUP
   934: {
   935:     newLine();
   936:     embedded->handle(yytext, yyleng, embedFinish);
   937:   }
   938:         YY_BREAK
   939: case 29:
   940: YY_RULE_SETUP
   941: {
   942:     UPD_COL;
   943:     if (embedded->zeroNesting()) {
   944:       // done
   945:       BEGIN(INITIAL);
   946: 
   947:       // check for balanced delimiter
   948:       if (embedFinish != yytext[0]) {
   949:         err("unbalanced literal code delimiter");
   950:       }
   951: 
   952:       // don't add "return" or ";"
   953:       embedded->exprOnly = false;
   954: 
   955:       // can't extract anything
   956:       embedded->isDeclaration = false;
   957: 
   958:       // caller can get text from embedded->text
   959:       return embedMode;
   960:     }
   961:     else {
   962:       // delimeter paired within the embedded code, mostly ignore it
   963:       embedded->handle(yytext, yyleng, embedFinish);
   964:     }
   965:   }
   966:         YY_BREAK
   967: case YY_STATE_EOF(LITCODE):
   968: {
   969:     err(sm_stringc << "hit end of file while looking for final `"
   970:                 << embedFinish << "'");
   971:     yyterminate();
   972:   }
   973:         YY_BREAK
   974: 
   975: /* embedded *type* description */
   976: case 30:
   977: YY_RULE_SETUP
   978: {
   979:   /* caller will get text from yytext and yyleng */
   980:   TOK_UPD_COL;
   981: 
   982:   /* drop into literal-code processing */
   983:   BEGIN(LITCODE);
   984: 
   985:   /* I reset the initial nesting to -1 so that the '{' at the
   986:    * beginning of the class body sets nesting to 0, thus when
   987:    * I see the final '}' I'll see that at level 0 and stop */
   988:   beginEmbed('}', TOK_LIT_CODE, -1);
   989: 
   990:   return TOK_CONTEXT_CLASS;
   991: }
   992:         YY_BREAK
   993: /* ---------- includes ----------- */
   994: case 31:
   995: YY_RULE_SETUP
   996: {
   997:   TOK_UPD_COL;    /* hence no TOKEN_START in INCLUDE area */
   998:   BEGIN(INCLUDE);
   999: }
  1000:         YY_BREAK
  1001: 
  1002: case 32:
  1003: YY_RULE_SETUP
  1004: {
  1005:     /* e.g.: ("filename") */
  1006:     /* file name to include */
  1007:     UPD_COL;
  1008: 
  1009:     /* find quotes */
  1010:     char *leftq = strchr(yytext, '"');
  1011:     char *rightq = strchr(leftq+1, '"');
  1012:     xassert(leftq && rightq);
  1013: 
  1014:     /* extract filename sm_string */
  1015:     includeFileName = addString(leftq+1, rightq-leftq-1);
  1016: 
  1017:     /* go back to normal processing */
  1018:     BEGIN(INITIAL);
  1019:     return TOK_INCLUDE;
  1020:   }
  1021:         YY_BREAK
  1022: case 33:
  1023: YY_RULE_SETUP
  1024: {
  1025:     /* anything else: malformed */
  1026:     UPD_COL;
  1027:     errorMalformedInclude();
  1028: 
  1029:     /* rudimentary error recovery.. */
  1030:     BEGIN(EAT_TO_NEWLINE);
  1031:   }
  1032:         YY_BREAK
  1033: 
  1034: 
  1035: case 34:
  1036: YY_RULE_SETUP
  1037: {
  1038:     UPD_COL;
  1039:     /* not newline, eat it */
  1040:   }
  1041:         YY_BREAK
  1042: case 35:
  1043: YY_RULE_SETUP
  1044: {
  1045:     /* get out of here */
  1046:     newLine();
  1047:     BEGIN(INITIAL);
  1048:   }
  1049:         YY_BREAK
  1050: 
  1051: /* -------- name literal --------- */
  1052: case 36:
  1053: YY_RULE_SETUP
  1054: {
  1055:   /* get text from yytext and yyleng */
  1056:   TOK_UPD_COL;
  1057:   if (YY_START == OPTIONAL_TYPE) {
  1058:     BEGIN(INITIAL);      // bail out of OPTIONAL_TYPE mode
  1059:   }
  1060:   return TOK_NAME;
  1061: }
  1062:         YY_BREAK
  1063: /* -------- numeric literal ------ */
  1064: case 37:
  1065: YY_RULE_SETUP
  1066: {
  1067:   TOK_UPD_COL;
  1068:   integerLiteral = strtoul(yytext, NULL, 10 /*radix*/);
  1069:   return TOK_INTEGER;
  1070: }
  1071:         YY_BREAK
  1072: /* ----------- sm_string literal ----- */
  1073: case 38:
  1074: YY_RULE_SETUP
  1075: {
  1076:   TOK_UPD_COL;
  1077:   sm_stringLiteral = addString(yytext+1, yyleng-2);        // strip quotes
  1078:   return TOK_STRING;
  1079: }
  1080:         YY_BREAK
  1081: /* --------- illegal ------------- */
  1082: case 39:
  1083: YY_RULE_SETUP
  1084: {
  1085:   TOK_UPD_COL;
  1086:   errorIllegalCharacter(yytext[0]);
  1087: }
  1088:         YY_BREAK
  1089: case 40:
  1090: YY_RULE_SETUP
  1091: YY_FATAL_ERROR( "flex scanner jammed" );
  1092:         YY_BREAK
  1093: case YY_STATE_EOF(INITIAL):
  1094: case YY_STATE_EOF(INCLUDE):
  1095: case YY_STATE_EOF(EAT_TO_NEWLINE):
  1096: case YY_STATE_EOF(RHS):
  1097: case YY_STATE_EOF(FUN):
  1098: case YY_STATE_EOF(OPTIONAL_TYPE):
  1099:         yyterminate();
  1100: 
  1101:         case YY_END_OF_BUFFER:
  1102:                 {
  1103:                 /* Amount of text matched not including the EOB char. */
  1104:                 int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
  1105: 
  1106:                 /* Undo the effects of YY_DO_BEFORE_ACTION. */
  1107:                 *yy_cp = yy_hold_char;
  1108:                 YY_RESTORE_YY_MORE_OFFSET
  1109: 
  1110:                 if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
  1111:                         {
  1112:                         /* We're scanning a new file or input source.  It's
  1113:                          * possible that this happened because the user
  1114:                          * just pointed yyin at a new source and called
  1115:                          * yylex().  If so, then we have to assure
  1116:                          * consistency between yy_current_buffer and our
  1117:                          * globals.  Here is the right place to do so, because
  1118:                          * this is the first action (other than possibly a
  1119:                          * back-up) that will match for the new input source.
  1120:                          */
  1121:                         yy_n_chars = yy_current_buffer->yy_n_chars;
  1122:                         yy_current_buffer->yy_input_file = yyin;
  1123:                         yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
  1124:                         }
  1125: 
  1126:                 /* Note that here we test for yy_c_buf_p "<=" to the position
  1127:                  * of the first EOB in the buffer, since yy_c_buf_p will
  1128:                  * already have been incremented past the NUL character
  1129:                  * (since all states make transitions on EOB to the
  1130:                  * end-of-buffer state).  Contrast this with the test
  1131:                  * in input().
  1132:                  */
  1133:                 if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
  1134:                         { /* This was really a NUL. */
  1135:                         yy_state_type yy_next_state;
  1136: 
  1137:                         yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
  1138: 
  1139:                         yy_current_state = yy_get_previous_state();
  1140: 
  1141:                         /* Okay, we're now positioned to make the NUL
  1142:                          * transition.  We couldn't have
  1143:                          * yy_get_previous_state() go ahead and do it
  1144:                          * for us because it doesn't know how to deal
  1145:                          * with the possibility of jamming (and we don't
  1146:                          * want to build jamming into it because then it
  1147:                          * will run more slowly).
  1148:                          */
  1149: 
  1150:                         yy_next_state = yy_try_NUL_trans( yy_current_state );
  1151: 
  1152:                         yy_bp = yytext_ptr + YY_MORE_ADJ;
  1153: 
  1154:                         if ( yy_next_state )
  1155:                                 {
  1156:                                 /* Consume the NUL. */
  1157:                                 yy_cp = ++yy_c_buf_p;
  1158:                                 yy_current_state = yy_next_state;
  1159:                                 goto yy_match;
  1160:                                 }
  1161: 
  1162:                         else
  1163:                                 {
  1164:                                 yy_cp = yy_c_buf_p;
  1165:                                 goto yy_find_action;
  1166:                                 }
  1167:                         }
  1168: 
  1169:                 else switch ( yy_get_next_buffer() )
  1170:                         {
  1171:                         case EOB_ACT_END_OF_FILE:
  1172:                                 {
  1173:                                 yy_did_buffer_switch_on_eof = 0;
  1174: 
  1175:                                 if ( yywrap() )
  1176:                                         {
  1177:                                         /* Note: because we've taken care in
  1178:                                          * yy_get_next_buffer() to have set up
  1179:                                          * yytext, we can now set up
  1180:                                          * yy_c_buf_p so that if some total
  1181:                                          * hoser (like flex itself) wants to
  1182:                                          * call the scanner after we return the
  1183:                                          * YY_NULL, it'll still work - another
  1184:                                          * YY_NULL will get returned.
  1185:                                          */
  1186:                                         yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
  1187: 
  1188:                                         yy_act = YY_STATE_EOF(YY_START);
  1189:                                         goto do_action;
  1190:                                         }
  1191: 
  1192:                                 else
  1193:                                         {
  1194:                                         if ( ! yy_did_buffer_switch_on_eof )
  1195:                                                 YY_NEW_FILE;
  1196:                                         }
  1197:                                 break;
  1198:                                 }
  1199: 
  1200:                         case EOB_ACT_CONTINUE_SCAN:
  1201:                                 yy_c_buf_p =
  1202:                                         yytext_ptr + yy_amount_of_matched_text;
  1203: 
  1204:                                 yy_current_state = yy_get_previous_state();
  1205: 
  1206:                                 yy_cp = yy_c_buf_p;
  1207:                                 yy_bp = yytext_ptr + YY_MORE_ADJ;
  1208:                                 goto yy_match;
  1209: 
  1210:                         case EOB_ACT_LAST_MATCH:
  1211:                                 yy_c_buf_p =
  1212:                                 &yy_current_buffer->yy_ch_buf[yy_n_chars];
  1213: 
  1214:                                 yy_current_state = yy_get_previous_state();
  1215: 
  1216:                                 yy_cp = yy_c_buf_p;
  1217:                                 yy_bp = yytext_ptr + YY_MORE_ADJ;
  1218:                                 goto yy_find_action;
  1219:                         }
  1220:                 break;
  1221:                 }
  1222: 
  1223:         default:
  1224:                 YY_FATAL_ERROR(
  1225:                         "fatal flex scanner internal error--no action found" );
  1226:         } /* end of action switch */
  1227:                 } /* end of scanning one token */
  1228:         } /* end of yylex */
  1229: 
  1230: yyFlexLexer::yyFlexLexer( std::istream* arg_yyin, std::ostream* arg_yyout )
  1231:         {
  1232:         yyin = arg_yyin;
  1233:         yyout = arg_yyout;
  1234:         yy_c_buf_p = 0;
  1235:         yy_init = 1;
  1236:         yy_start = 0;
  1237:         yy_flex_debug = 0;
  1238:         yylineno = 1;   // this will only get updated if %option yylineno
  1239: 
  1240:         yy_did_buffer_switch_on_eof = 0;
  1241: 
  1242:         yy_looking_for_trail_begin = 0;
  1243:         yy_more_flag = 0;
  1244:         yy_more_len = 0;
  1245:         yy_more_offset = yy_prev_more_offset = 0;
  1246: 
  1247:         yy_start_stack_ptr = yy_start_stack_depth = 0;
  1248:         yy_start_stack = 0;
  1249: 
  1250:         yy_current_buffer = 0;
  1251: 
  1252: #ifdef YY_USES_REJECT
  1253:         yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2];
  1254: #else
  1255:         yy_state_buf = 0;
  1256: #endif
  1257:         }
  1258: 
  1259: yyFlexLexer::~yyFlexLexer()
  1260:         {
  1261:         delete yy_state_buf;
  1262:         yy_delete_buffer( yy_current_buffer );
  1263:         }
  1264: 
  1265: void yyFlexLexer::switch_streams( std::istream* new_in, std::ostream* new_out )
  1266:         {
  1267:         if ( new_in )
  1268:                 {
  1269:                 yy_delete_buffer( yy_current_buffer );
  1270:                 yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) );
  1271:                 }
  1272: 
  1273:         if ( new_out )
  1274:                 yyout = new_out;
  1275:         }
  1276: 
  1277: #ifdef YY_INTERACTIVE
  1278: int yyFlexLexer::LexerInput( char* buf, int /* max_size */ )
  1279: #else
  1280: int yyFlexLexer::LexerInput( char* buf, int max_size )
  1281: #endif
  1282:         {
  1283:         if ( yyin->eof() || yyin->fail() )
  1284:                 return 0;
  1285: 
  1286: #ifdef YY_INTERACTIVE
  1287:         yyin->get( buf[0] );
  1288: 
  1289:         if ( yyin->eof() )
  1290:                 return 0;
  1291: 
  1292:         if ( yyin->bad() )
  1293:                 return -1;
  1294: 
  1295:         return 1;
  1296: 
  1297: #else
  1298:         (void) yyin->read( buf, max_size );
  1299: 
  1300:         if ( yyin->bad() )
  1301:                 return -1;
  1302:         else
  1303:                 return yyin->gcount();
  1304: #endif
  1305:         }
  1306: 
  1307: void yyFlexLexer::LexerOutput( const char* buf, int size )
  1308:         {
  1309:         (void) yyout->write( buf, size );
  1310:         }
  1311: 
  1312: /* yy_get_next_buffer - try to read in a new buffer
  1313:  *
  1314:  * Returns a code representing an action:
  1315:  *      EOB_ACT_LAST_MATCH -
  1316:  *      EOB_ACT_CONTINUE_SCAN - continue scanning from current position
  1317:  *      EOB_ACT_END_OF_FILE - end of file
  1318:  */
  1319: 
  1320: int yyFlexLexer::yy_get_next_buffer()
  1321:         {
  1322:         register char *dest = yy_current_buffer->yy_ch_buf;
  1323:         register char *source = yytext_ptr;
  1324:         register int number_to_move, i;
  1325:         int ret_val;
  1326: 
  1327:         if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
  1328:                 YY_FATAL_ERROR(
  1329:                 "fatal flex scanner internal error--end of buffer missed" );
  1330: 
  1331:         if ( yy_current_buffer->yy_fill_buffer == 0 )
  1332:                 { /* Don't try to fill the buffer, so this is an EOF. */
  1333:                 if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
  1334:                         {
  1335:                         /* We matched a single character, the EOB, so
  1336:                          * treat this as a final EOF.
  1337:                          */
  1338:                         return EOB_ACT_END_OF_FILE;
  1339:                         }
  1340: 
  1341:                 else
  1342:                         {
  1343:                         /* We matched some text prior to the EOB, first
  1344:                          * process it.
  1345:                          */
  1346:                         return EOB_ACT_LAST_MATCH;
  1347:                         }
  1348:                 }
  1349: 
  1350:         /* Try to read more data. */
  1351: 
  1352:         /* First move last chars to start of buffer. */
  1353:         number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
  1354: 
  1355:         for ( i = 0; i < number_to_move; ++i )
  1356:                 *(dest++) = *(source++);
  1357: 
  1358:         if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
  1359:                 /* don't do the read, it's not guaranteed to return an EOF,
  1360:                  * just force an EOF
  1361:                  */
  1362:                 yy_current_buffer->yy_n_chars = yy_n_chars = 0;
  1363: 
  1364:         else
  1365:                 {
  1366:                 int num_to_read =
  1367:                         yy_current_buffer->yy_buf_size - number_to_move - 1;
  1368: 
  1369:                 while ( num_to_read <= 0 )
  1370:                         { /* Not enough room in the buffer - grow it. */
  1371: #ifdef YY_USES_REJECT
  1372:                         YY_FATAL_ERROR(
  1373: "input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
  1374: #else
  1375: 
  1376:                         /* just a shorter name for the current buffer */
  1377:                         YY_BUFFER_STATE b = yy_current_buffer;
  1378: 
  1379:                         int yy_c_buf_p_offset =
  1380:                                 (int) (yy_c_buf_p - b->yy_ch_buf);
  1381: 
  1382:                         if ( b->yy_is_our_buffer )
  1383:                                 {
  1384:                                 int new_size = b->yy_buf_size * 2;
  1385: 
  1386:                                 if ( new_size <= 0 )
  1387:                                         b->yy_buf_size += b->yy_buf_size / 8;
  1388:                                 else
  1389:                                         b->yy_buf_size *= 2;
  1390: 
  1391:                                 b->yy_ch_buf = (char *)
  1392:                                         /* Include room in for 2 EOB chars. */
  1393:                                         yy_flex_realloc( (void *) b->yy_ch_buf,
  1394:                                                          b->yy_buf_size + 2 );
  1395:                                 }
  1396:                         else
  1397:                                 /* Can't grow it, we don't own it. */
  1398:                                 b->yy_ch_buf = 0;
  1399: 
  1400:                         if ( ! b->yy_ch_buf )
  1401:                                 YY_FATAL_ERROR(
  1402:                                 "fatal error - scanner input buffer overflow" );
  1403: 
  1404:                         yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
  1405: 
  1406:                         num_to_read = yy_current_buffer->yy_buf_size -
  1407:                                                 number_to_move - 1;
  1408: #endif
  1409:                         }
  1410: 
  1411:                 if ( num_to_read > YY_READ_BUF_SIZE )
  1412:                         num_to_read = YY_READ_BUF_SIZE;
  1413: 
  1414:                 /* Read in more data. */
  1415:                 YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
  1416:                         yy_n_chars, num_to_read );
  1417: 
  1418:                 yy_current_buffer->yy_n_chars = yy_n_chars;
  1419:                 }
  1420: 
  1421:         if ( yy_n_chars == 0 )
  1422:                 {
  1423:                 if ( number_to_move == YY_MORE_ADJ )
  1424:                         {
  1425:                         ret_val = EOB_ACT_END_OF_FILE;
  1426:                         yyrestart( yyin );
  1427:                         }
  1428: 
  1429:                 else
  1430:                         {
  1431:                         ret_val = EOB_ACT_LAST_MATCH;
  1432:                         yy_current_buffer->yy_buffer_status =
  1433:                                 YY_BUFFER_EOF_PENDING;
  1434:                         }
  1435:                 }
  1436: 
  1437:         else
  1438:                 ret_val = EOB_ACT_CONTINUE_SCAN;
  1439: 
  1440:         yy_n_chars += number_to_move;
  1441:         yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
  1442:         yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
  1443: 
  1444:         yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
  1445: 
  1446:         return ret_val;
  1447:         }
  1448: 
  1449: 
  1450: /* yy_get_previous_state - get the state just before the EOB char was reached */
  1451: 
  1452: yy_state_type yyFlexLexer::yy_get_previous_state()
  1453:         {
  1454:         register yy_state_type yy_current_state;
  1455:         register char *yy_cp;
  1456: 
  1457:         yy_current_state = yy_start;
  1458: 
  1459:         for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
  1460:                 {
  1461:                 register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
  1462:                 if ( yy_accept[yy_current_state] )
  1463:                         {
  1464:                         yy_last_accepting_state = yy_current_state;
  1465:                         yy_last_accepting_cpos = yy_cp;
  1466:                         }
  1467:                 while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
  1468:                         {
  1469:                         yy_current_state = (int) yy_def[yy_current_state];
  1470:                         if ( yy_current_state >= 159 )
  1471:                                 yy_c = yy_meta[(unsigned int) yy_c];
  1472:                         }
  1473:                 yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
  1474:                 }
  1475: 
  1476:         return yy_current_state;
  1477:         }
  1478: 
  1479: 
  1480: /* yy_try_NUL_trans - try to make a transition on the NUL character
  1481:  *
  1482:  * synopsis
  1483:  *      next_state = yy_try_NUL_trans( current_state );
  1484:  */
  1485: 
  1486: yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state )
  1487:         {
  1488:         register int yy_is_jam;
  1489:         register char *yy_cp = yy_c_buf_p;
  1490: 
  1491:         register YY_CHAR yy_c = 1;
  1492:         if ( yy_accept[yy_current_state] )
  1493:                 {
  1494:                 yy_last_accepting_state = yy_current_state;
  1495:                 yy_last_accepting_cpos = yy_cp;
  1496:                 }
  1497:         while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
  1498:                 {
  1499:                 yy_current_state = (int) yy_def[yy_current_state];
  1500:                 if ( yy_current_state >= 159 )
  1501:                         yy_c = yy_meta[(unsigned int) yy_c];
  1502:                 }
  1503:         yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
  1504:         yy_is_jam = (yy_current_state == 158);
  1505: 
  1506:         return yy_is_jam ? 0 : yy_current_state;
  1507:         }
  1508: 
  1509: 
  1510: void yyFlexLexer::yyunput( int c, register char* yy_bp )
  1511:         {
  1512:         register char *yy_cp = yy_c_buf_p;
  1513: 
  1514:         /* undo effects of setting up yytext */
  1515:         *yy_cp = yy_hold_char;
  1516: 
  1517:         if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
  1518:                 { /* need to shift things up to make room */
  1519:                 /* +2 for EOB chars. */
  1520:                 register int number_to_move = yy_n_chars + 2;
  1521:                 register char *dest = &yy_current_buffer->yy_ch_buf[
  1522:                                         yy_current_buffer->yy_buf_size + 2];
  1523:                 register char *source =
  1524:                                 &yy_current_buffer->yy_ch_buf[number_to_move];
  1525: 
  1526:                 while ( source > yy_current_buffer->yy_ch_buf )
  1527:                         *--dest = *--source;
  1528: 
  1529:                 yy_cp += (int) (dest - source);
  1530:                 yy_bp += (int) (dest - source);
  1531:                 yy_current_buffer->yy_n_chars =
  1532:                         yy_n_chars = yy_current_buffer->yy_buf_size;
  1533: 
  1534:                 if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
  1535:                         YY_FATAL_ERROR( "flex scanner push-back overflow" );
  1536:                 }
  1537: 
  1538:         *--yy_cp = (char) c;
  1539: 
  1540: 
  1541:         yytext_ptr = yy_bp;
  1542:         yy_hold_char = *yy_cp;
  1543:         yy_c_buf_p = yy_cp;
  1544:         }
  1545: 
  1546: 
  1547: int yyFlexLexer::yyinput()
  1548:         {
  1549:         int c;
  1550: 
  1551:         *yy_c_buf_p = yy_hold_char;
  1552: 
  1553:         if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
  1554:                 {
  1555:                 /* yy_c_buf_p now points to the character we want to return.
  1556:                  * If this occurs *before* the EOB characters, then it's a
  1557:                  * valid NUL; if not, then we've hit the end of the buffer.
  1558:                  */
  1559:                 if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
  1560:                         /* This was really a NUL. */
  1561:                         *yy_c_buf_p = '\0';
  1562: 
  1563:                 else
  1564:                         { /* need more input */
  1565:                         int offset = yy_c_buf_p - yytext_ptr;
  1566:                         ++yy_c_buf_p;
  1567: 
  1568:                         switch ( yy_get_next_buffer() )
  1569:                                 {
  1570:                                 case EOB_ACT_LAST_MATCH:
  1571:                                         /* This happens because yy_g_n_b()
  1572:                                          * sees that we've accumulated a
  1573:                                          * token and flags that we need to
  1574:                                          * try matching the token before
  1575:                                          * proceeding.  But for input(),
  1576:                                          * there's no matching to consider.
  1577:                                          * So convert the EOB_ACT_LAST_MATCH
  1578:                                          * to EOB_ACT_END_OF_FILE.
  1579:                                          */
  1580: 
  1581:                                         /* Reset buffer status. */
  1582:                                         yyrestart( yyin );
  1583: 
  1584:                                         /* fall through */
  1585: 
  1586:                                 case EOB_ACT_END_OF_FILE:
  1587:                                         {
  1588:                                         if ( yywrap() )
  1589:                                                 return EOF;
  1590: 
  1591:                                         if ( ! yy_did_buffer_switch_on_eof )
  1592:                                                 YY_NEW_FILE;
  1593: #ifdef __cplusplus
  1594:                                         return yyinput();
  1595: #else
  1596:                                         return input();
  1597: #endif
  1598:                                         }
  1599: 
  1600:                                 case EOB_ACT_CONTINUE_SCAN:
  1601:                                         yy_c_buf_p = yytext_ptr + offset;
  1602:                                         break;
  1603:                                 }
  1604:                         }
  1605:                 }
  1606: 
  1607:         c = *(unsigned char *) yy_c_buf_p;      /* cast for 8-bit char's */
  1608:         *yy_c_buf_p = '\0';     /* preserve yytext */
  1609:         yy_hold_char = *++yy_c_buf_p;
  1610: 
  1611: 
  1612:         return c;
  1613:         }
  1614: 
  1615: void yyFlexLexer::yyrestart( std::istream* input_file )
  1616:         {
  1617:         if ( ! yy_current_buffer )
  1618:                 yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
  1619: 
  1620:         yy_init_buffer( yy_current_buffer, input_file );
  1621:         yy_load_buffer_state();
  1622:         }
  1623: 
  1624: 
  1625: void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
  1626:         {
  1627:         if ( yy_current_buffer == new_buffer )
  1628:                 return;
  1629: 
  1630:         if ( yy_current_buffer )
  1631:                 {
  1632:                 /* Flush out information for old buffer. */
  1633:                 *yy_c_buf_p = yy_hold_char;
  1634:                 yy_current_buffer->yy_buf_pos = yy_c_buf_p;
  1635:                 yy_current_buffer->yy_n_chars = yy_n_chars;
  1636:                 }
  1637: 
  1638:         yy_current_buffer = new_buffer;
  1639:         yy_load_buffer_state();
  1640: 
  1641:         /* We don't actually know whether we did this switch during
  1642:          * EOF (yywrap()) processing, but the only time this flag
  1643:          * is looked at is after yywrap() is called, so it's safe
  1644:          * to go ahead and always set it.
  1645:          */
  1646:         yy_did_buffer_switch_on_eof = 1;
  1647:         }
  1648: 
  1649: 
  1650: void yyFlexLexer::yy_load_buffer_state()
  1651:         {
  1652:         yy_n_chars = yy_current_buffer->yy_n_chars;
  1653:         yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
  1654:         yyin = yy_current_buffer->yy_input_file;
  1655:         yy_hold_char = *yy_c_buf_p;
  1656:         }
  1657: 
  1658: 
  1659: YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( std::istream* file, int size )
  1660:         {
  1661:         YY_BUFFER_STATE b;
  1662: 
  1663:         b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
  1664:         if ( ! b )
  1665:                 YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
  1666: 
  1667:         b->yy_buf_size = size;
  1668: 
  1669:         /* yy_ch_buf has to be 2 characters longer than the size given because
  1670:          * we need to put in 2 end-of-buffer characters.
  1671:          */
  1672:         b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
  1673:         if ( ! b->yy_ch_buf )
  1674:                 YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
  1675: 
  1676:         b->yy_is_our_buffer = 1;
  1677: 
  1678:         yy_init_buffer( b, file );
  1679: 
  1680:         return b;
  1681:         }
  1682: 
  1683: 
  1684: void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b )
  1685:         {
  1686:         if ( ! b )
  1687:                 return;
  1688: 
  1689:         if ( b == yy_current_buffer )
  1690:                 yy_current_buffer = (YY_BUFFER_STATE) 0;
  1691: 
  1692:         if ( b->yy_is_our_buffer )
  1693:                 yy_flex_free( (void *) b->yy_ch_buf );
  1694: 
  1695:         yy_flex_free( (void *) b );
  1696:         }
  1697: 
  1698: 
  1699: void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, std::istream* file )
  1700: 
  1701:         {
  1702:         yy_flush_buffer( b );
  1703: 
  1704:         b->yy_input_file = file;
  1705:         b->yy_fill_buffer = 1;
  1706: 
  1707:         b->yy_is_interactive = 0;
  1708:         }
  1709: 
  1710: 
  1711: void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b )
  1712:         {
  1713:         if ( ! b )
  1714:                 return;
  1715: 
  1716:         b->yy_n_chars = 0;
  1717: 
  1718:         /* We always need two end-of-buffer characters.  The first causes
  1719:          * a transition to the end-of-buffer state.  The second causes
  1720:          * a jam in that state.
  1721:          */
  1722:         b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
  1723:         b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
  1724: 
  1725:         b->yy_buf_pos = &b->yy_ch_buf[0];
  1726: 
  1727:         b->yy_at_bol = 1;
  1728:         b->yy_buffer_status = YY_BUFFER_NEW;
  1729: 
  1730:         if ( b == yy_current_buffer )
  1731:                 yy_load_buffer_state();
  1732:         }
  1733: 
  1734: 
  1735: #ifndef YY_NO_SCAN_BUFFER
  1736: #endif
  1737: 
  1738: 
  1739: #ifndef YY_NO_SCAN_STRING
  1740: #endif
  1741: 
  1742: 
  1743: #ifndef YY_NO_SCAN_BYTES
  1744: #endif
  1745: 
  1746: 
  1747: #ifndef YY_NO_PUSH_STATE
  1748: void yyFlexLexer::yy_push_state( int new_state )
  1749:         {
  1750:         if ( yy_start_stack_ptr >= yy_start_stack_depth )
  1751:                 {
  1752:                 yy_size_t new_size;
  1753: 
  1754:                 yy_start_stack_depth += YY_START_STACK_INCR;
  1755:                 new_size = yy_start_stack_depth * sizeof( int );
  1756: 
  1757:                 if ( ! yy_start_stack )
  1758:                         yy_start_stack = (int *) yy_flex_alloc( new_size );
  1759: 
  1760:                 else
  1761:                         yy_start_stack = (int *) yy_flex_realloc(
  1762:                                         (void *) yy_start_stack, new_size );
  1763: 
  1764:                 if ( ! yy_start_stack )
  1765:                         YY_FATAL_ERROR(
  1766:                         "out of memory expanding start-condition stack" );
  1767:                 }
  1768: 
  1769:         yy_start_stack[yy_start_stack_ptr++] = YY_START;
  1770: 
  1771:         BEGIN(new_state);
  1772:         }
  1773: #endif
  1774: 
  1775: 
  1776: #ifndef YY_NO_POP_STATE
  1777: void yyFlexLexer::yy_pop_state()
  1778:         {
  1779:         if ( --yy_start_stack_ptr < 0 )
  1780:                 YY_FATAL_ERROR( "start-condition stack underflow" );
  1781: 
  1782:         BEGIN(yy_start_stack[yy_start_stack_ptr]);
  1783:         }
  1784: #endif
  1785: 
  1786: 
  1787: #ifndef YY_NO_TOP_STATE
  1788: int yyFlexLexer::yy_top_state()
  1789:         {
  1790:         return yy_start_stack[yy_start_stack_ptr - 1];
  1791:         }
  1792: #endif
  1793: 
  1794: #ifndef YY_EXIT_FAILURE
  1795: #define YY_EXIT_FAILURE 2
  1796: #endif
  1797: 
  1798: 
  1799: void yyFlexLexer::LexerError( yyconst char msg[] )
  1800:         {
  1801:         cerr << msg << '\n';
  1802:         exit( YY_EXIT_FAILURE );
  1803:         }
  1804: 
  1805: 
  1806: /* Redefine yyless() so it works in section 3 code. */
  1807: 
  1808: #undef yyless
  1809: #define yyless(n) \
  1810:         do \
  1811:                 { \
  1812:                 /* Undo effects of setting up yytext. */ \
  1813:                 yytext[yyleng] = yy_hold_char; \
  1814:                 yy_c_buf_p = yytext + n; \
  1815:                 yy_hold_char = *yy_c_buf_p; \
  1816:                 *yy_c_buf_p = '\0'; \
  1817:                 yyleng = n; \
  1818:                 } \
  1819:         while ( 0 )
  1820: 
  1821: 
  1822: /* Internal utility routines. */
  1823: 
  1824: #ifndef yytext_ptr
  1825: #ifdef YY_USE_PROTOS
  1826: static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
  1827: #else
  1828: static void yy_flex_strncpy( s1, s2, n )
  1829: char *s1;
  1830: yyconst char *s2;
  1831: int n;
  1832: #endif
  1833:         {
  1834:         register int i;
  1835:         for ( i = 0; i < n; ++i )
  1836:                 s1[i] = s2[i];
  1837:         }
  1838: #endif
  1839: 
  1840: #ifdef YY_NEED_STRLEN
  1841: #ifdef YY_USE_PROTOS
  1842: static int yy_flex_strlen( yyconst char *s )
  1843: #else
  1844: static int yy_flex_strlen( s )
  1845: yyconst char *s;
  1846: #endif
  1847:         {
  1848:         register int n;
  1849:         for ( n = 0; s[n]; ++n )
  1850:                 ;
  1851: 
  1852:         return n;
  1853:         }
  1854: #endif
  1855: 
  1856: 
  1857: #ifdef YY_USE_PROTOS
  1858: static void *yy_flex_alloc( yy_size_t size )
  1859: #else
  1860: static void *yy_flex_alloc( size )
  1861: yy_size_t size;
  1862: #endif
  1863:         {
  1864:         return (void *) malloc( size );
  1865:         }
  1866: 
  1867: #ifdef YY_USE_PROTOS
  1868: static void *yy_flex_realloc( void *ptr, yy_size_t size )
  1869: #else
  1870: static void *yy_flex_realloc( ptr, size )
  1871: void *ptr;
  1872: yy_size_t size;
  1873: #endif
  1874:         {
  1875:         /* The cast to (char *) in the following accommodates both
  1876:          * implementations that use char* generic pointers, and those
  1877:          * that use void* generic pointers.  It works with the latter
  1878:          * because both ANSI C and C++ allow castless assignment from
  1879:          * any pointer type to void*, and deal with argument conversions
  1880:          * as though doing an assignment.
  1881:          */
  1882:         return (void *) realloc( (char *) ptr, size );
  1883:         }
  1884: 
  1885: #ifdef YY_USE_PROTOS
  1886: static void yy_flex_free( void *ptr )
  1887: #else
  1888: static void yy_flex_free( ptr )
  1889: void *ptr;
  1890: #endif
  1891:         {
  1892:         free( ptr );
  1893:         }
  1894: 
  1895: #if YY_MAIN
  1896: int main()
  1897:         {
  1898:         yylex();
  1899:         return 0;
  1900:         }
  1901: #endif
  1902: 
  1903: /* -------------------- additional C code -------------------- */
  1904: 
  1905: // identify tokens representing embedded text
  1906: bool isGramlexEmbed(int code)
  1907: {
  1908:   return code == TOK_LIT_CODE;
  1909: }
End data section to elk/elk_gramlex.yy.cpp.old[1]
Start cpp section to elk/elk_grampar.cpp[1 /1 ]
     1: #line 18328 "./lpsrc/elk.pak"
     2: // grampar.cc            see license.txt for copyright and terms of use
     3: // additional C++ code for the grammar parser; in essence,
     4: // build the grammar internal representation out of what
     5: // the user supplies in a .gr file
     6: 
     7: #include "elk_grampar.h"
     8: #include "ast_gramlex.h"
     9: #include "sm_trace.h"
    10: #include "elk_gramast.ast.gen.h"
    11: #include "elk_grammar.h"
    12: #include "sm_owner.h"
    13: #include "sm_syserr.h"
    14: #include "sm_strutil.h"
    15: #include "elk_grampar.tab.h"
    16: #include "sm_array.h"
    17: #include "elk_mlsstr.h"
    18: 
    19: #include <fstream>         // std::ifstream
    20: #include <ctype.h>           // isspace, isalnum
    21: 
    22: #define LIT_STR(s) LocString(SL_INIT, grammarStringTable.add(s))
    23: 
    24: 
    25: // ------------------------- Environment ------------------------
    26: Environment::Environment(Grammar &G)
    27:   : g(G),
    28:     prevEnv(NULL),
    29:     nontermDecls(),
    30:     errorCount(0),
    31:     errors(errorCount)
    32: {}
    33: 
    34: Environment::Environment(Environment &prev)
    35:   : g(prev.g),
    36:     prevEnv(&prev),
    37:     nontermDecls(prev.nontermDecls),
    38:     errorCount(-1000),      // should never be used
    39:     errors(prev.errors)     // copy parent's 'errors' reference
    40: {}
    41: 
    42: Environment::~Environment()
    43: {}
    44: 
    45: 
    46: // -------------------- XASTParse --------------------
    47: STATICDEF sm_string XASTParse::
    48:   constructMsg(LocString const &tok, char const *msg)
    49: {
    50:   if (tok.validLoc()) {
    51:     return sm_stringc << tok.locString() << ": near " << tok
    52:                    << ", " << msg;
    53:   }
    54:   else {
    55:     return sm_string(msg);
    56:   }
    57: }
    58: 
    59: XASTParse::XASTParse(LocString const &tok, char const *m)
    60:   : xBase(constructMsg(tok, m)),
    61:     failToken(tok),
    62:     message(m)
    63: {}
    64: 
    65: 
    66: XASTParse::XASTParse(XASTParse const &obj)
    67:   : xBase(obj),
    68:     DMEMB(failToken),
    69:     DMEMB(message)
    70: {}
    71: 
    72: XASTParse::~XASTParse()
    73: {}
    74: 
    75: 
    76: // -------------------- AST parser support ---------------------
    77: // fwd-decl of parsing fns
    78: void astParseGrammar(Grammar &g, GrammarAST *treeTop);
    79: void astParseTerminals(Environment &env, TF_terminals const &terms);
    80: void astParseDDM(Environment &env, Symbol *sym,
    81:                  ASTList<SpecFunc> const &funcs);
    82: void astParseNonterm(Environment &env, TF_nonterm const *nt);
    83: void astParseProduction(Environment &env, Nonterminal *nonterm,
    84:                         ProdDecl const *prod);
    85: 
    86: 
    87: // really a static semantic error, more than a parse error..
    88: void astParseError(LocString const &failToken, char const *msg)
    89: {
    90:   THROW(XASTParse(failToken, msg));
    91: }
    92: 
    93: void astParseError(char const *msg)
    94: {
    95:   LocString ls;   // no location info
    96:   THROW(XASTParse(ls, msg));
    97: }
    98: 
    99: // print the same message, but keep going anyway
   100: void astParseErrorCont(Environment &env, LocString const &failToken,
   101:                        char const *msg)
   102: {
   103:   XASTParse x(failToken, msg);
   104:   std::cout << x.why() << std::endl;
   105:   env.errors++;
   106: }
   107: 
   108: 
   109: // to put as the catch block; so far it's kind of ad-hoc where
   110: // I actually put 'try' blocks..
   111: #define CATCH_APPLY_CONTEXT(tok)        \
   112:   catch (XASTParse &x) {                \
   113:     /* leave unchanged */               \
   114:     throw x;                            \
   115:   }                                     \
   116:   catch (xBase &x) {                    \
   117:     /* add context */                   \
   118:     astParseError(tok, x.why());        \
   119:     throw 0;     /* silence warning */  \
   120:   }
   121: 
   122: 
   123: // ---------------------- AST "parser" --------------------------
   124: // set the annotation pointers
   125: void setAnnotations(GrammarAST *ast)
   126: {
   127:   // work through the toplevel forms
   128:   FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
   129:     ASTSWITCH(TopForm, iter.data()) {
   130:       ASTCASE(TF_terminals, t) {
   131:         if (!ast->terms) {
   132:           ast->terms = t;
   133:         }
   134:         else {
   135:           astParseError("there is more than one 'Terminals' section");
   136:         }
   137:       }
   138: 
   139:       ASTNEXT(TF_nonterm, nt) {
   140:         if (!ast->firstNT) {
   141:           ast->firstNT = nt;
   142:         }
   143:       }
   144: 
   145:       ASTENDCASED
   146:     }
   147:   }
   148: 
   149:   if (!ast->terms) {
   150:     astParseError("'Terminals' specification is missing");
   151:   }
   152:   if (!ast->firstNT) {
   153:     astParseError("you have to have at least one nonterminal");
   154:   }
   155: }
   156: 
   157: 
   158: LocString extractActionClassName(LocString const &body)
   159: {
   160:   // find start of first token
   161:   char const *start = body.str;
   162:   while (isspace(*start)) start++;
   163: 
   164:   // find end of first token
   165:   char const *p = start;
   166:   while (isspace(*p)) p++;
   167:   while (isalnum(*p) || *p=='_') p++;
   168: 
   169:   // yield that, with the same source location
   170:   return LocString(body.loc, grammarStringTable.add(sm_string(start, p-start)));
   171: }
   172: 
   173: 
   174: // handle TF_verbatim and TF_option
   175: void astParseOptions(Grammar &g, GrammarAST *ast)
   176: {
   177:   // handle TF_verbatim and TF_option
   178:   FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
   179:     ASTSWITCH(TopForm, iter.data()) {
   180:       ASTCASE(TF_context, c) {
   181:         // overwrite the context class name, and append to
   182:         // its body verbatim list
   183:         g.actionClassName = extractActionClassName(c->body);
   184: 
   185:         // 11/13/04: There is a subtle problem with keeping the body
   186:         // from the base specification, when the following conditions
   187:         // hold:
   188:         //   - the base spec is compiled on its own (w/o the extension)
   189:         //   - some translation unit "A" sees the resulting .gr.gen.h file
   190:         //   - the extension spec is compiled
   191:         //   - some translation unit "B" sees the resulting .gr.gen.h file
   192:         //   - A and B are linked together in one executable
   193:         // In that case, the context_class from the base will have an
   194:         // inconsistent definition in A and B, since in A it will be
   195:         // whatever the user wrote plus, the declarations for the
   196:         // action functions, whereas in B it will be just what the
   197:         // user wrote, since the action functions end up in the
   198:         // extension context_class.
   199:         //
   200:         // What is even more subtle is the *manifestation* of this
   201:         // problem, which is linking problems with vtables.  C++
   202:         // compilers do not explicitly check that classes declared in
   203:         // multiple translation units have identical declarations
   204:         // (token for token), but they *do* of course rely on them
   205:         // being so.  That reliance shows up in the decisions
   206:         // regarding which module has the vtable, among other places.
   207:         // So this problem did not show up immediately, and was only
   208:         // revealed as initially mysterious portability problems
   209:         // (since my development toolchain happend to be fairly
   210:         // lenient w.r.t. vtable placement).
   211:         //
   212:         // Therefore the new policy is that context_classes from the
   213:         // base are *not* emitted, and consequently it is impossible
   214:         // to inherit from them in subsequent context_classes.  The
   215:         // user must put data/functions that are meant to be shared
   216:         // into a common base class that is *not* the context_class
   217:         // of any grammar or extension.
   218:         //
   219:         // old:
   220:         //g.actionClasses.append(new LocString(c->body));
   221:         //
   222:         // new:
   223:         g.actionClasses.deleteAll();
   224:         g.actionClasses.append(new LocString(c->body));
   225:       }
   226: 
   227:       ASTNEXT(TF_verbatim, v) {
   228:         if (v->isImpl) {
   229:           g.implVerbatim.append(new LocString(v->code));
   230:         }
   231:         else {
   232:           g.verbatim.append(new LocString(v->code));
   233:         }
   234:       }
   235: 
   236:       ASTNEXT(TF_option, op) {
   237:         LocString const &name = op->name;
   238:         int value = op->value;
   239:         bool boolVal = !!value;
   240: 
   241:         if (name.equals("useGCDefaults")) {
   242:           g.useGCDefaults = boolVal;
   243:         }
   244:         else if (name.equals("defaultMergeAborts")) {
   245:           g.defaultMergeAborts = boolVal;
   246:         }
   247:         else if (name.equals("shift_reduce_conflicts")) {
   248:           g.expectedSR = value;
   249:         }
   250:         else if (name.equals("reduce_reduce_conflicts")) {
   251:           g.expectedRR = value;
   252:         }
   253:         else if (name.equals("unreachable_nonterminals")) {
   254:           g.expectedUNRNonterms = value;
   255:         }
   256:         else if (name.equals("unreachable_terminals")) {
   257:           g.expectedUNRTerms = value;
   258:         }
   259:         else if (name.equals("lang_OCaml")) {
   260:           //g.targetLang = "OCaml";
   261:           //
   262:           // I'm retarded.. I need to know if we're parsing ocaml *before*
   263:           // we actually parse it, otherwise I can't skip the embedded
   264:           // action fragments properly!
   265:           astParseError(name, "The `lang_OCaml' option has been replaced with "
   266:                               "the `-ocaml' command-line switch.  Please use "
   267:                               "that instead.  (Sorry for the inconvenience.)");
   268:         }
   269:         else {
   270:           astParseError(name, "unknown option name");
   271:         }
   272:       }
   273: 
   274:       ASTENDCASED
   275:     }
   276:   }
   277: }
   278: 
   279: 
   280: // map the grammar definition AST into a Grammar data structure
   281: void astParseGrammar(Grammar &g, GrammarAST *ast)
   282: {
   283:   // default, empty environment
   284:   Environment env(g);
   285: 
   286:   // handle TF_terminals
   287:   astParseTerminals(env, *(ast->terms));
   288: 
   289:   // process all nonterminal declarations first, so while we're
   290:   // looking at their bodies we can tell if one isn't declared
   291:   {
   292:     FOREACH_ASTLIST(TopForm, ast->forms, iter) {
   293:       if (!iter.data()->isTF_nonterm()) continue;
   294:       TF_nonterm const *nt = iter.data()->asTF_nontermC();
   295: 
   296:       // check for already declared
   297:       if (env.nontermDecls.isMapped(nt->name)) {
   298:         astParseError(nt->name, "nonterminal already declared");
   299:       }
   300: 
   301:       // make the Grammar object to represent the new nonterminal
   302:       env.g.getOrMakeNonterminal(nt->name);
   303: 
   304:       // add this decl to our running list (in the original environment)
   305:       env.nontermDecls.add(nt->name, const_cast<TF_nonterm*>(nt));
   306:     }
   307:   }
   308: 
   309:   // process nonterminal bodies
   310:   {
   311:     FOREACH_ASTLIST(TopForm, ast->forms, iter) {
   312:       if (!iter.data()->isTF_nonterm()) continue;
   313:       TF_nonterm const *nt = iter.data()->asTF_nontermC();
   314: 
   315:       // new environment since it can contain a grouping construct
   316:       // (at this very moment it actually can't because there is no syntax..)
   317:       Environment newEnv(env);
   318: 
   319:       // parse it
   320:       astParseNonterm(newEnv, nt);
   321:     }
   322:   }
   323: 
   324:   if (!g.actionClassName.str) {
   325:     astParseError("you must specify a context class; for example:\n"
   326:                   "  context_class Context : public UserActions {};\n");
   327:   }
   328: 
   329:   if (env.errors) {
   330:     astParseError("halting due to previously reported errors");
   331:   }
   332: }
   333: 
   334: 
   335: // validate 'name'
   336: Terminal *astParseToken(Environment &env, LocString const &name)
   337: {
   338:   Terminal *t = env.g.findTerminal(name);
   339:   if (!t) {
   340:     astParseError(name, "undeclared token");
   341:   }
   342:   return t;
   343: }
   344: 
   345: 
   346: // needed to ensure the GrowArray below has its values initialized
   347: // to false when the array expands
   348: class InitFalseBool {
   349: public:
   350:   bool b;
   351: public:
   352:   InitFalseBool() : b(false) {}
   353: };
   354: 
   355: 
   356: void astParseTerminals(Environment &env, TF_terminals const &terms)
   357: {
   358:   // basic declarations
   359:   {
   360:     int maxCode = 0;
   361:     GrowArray<InitFalseBool> codeHasTerm(200);
   362:     FOREACH_ASTLIST(TermDecl, terms.decls, iter) {
   363:       TermDecl const &term = *(iter.data());
   364: 
   365:       // process the terminal declaration
   366:       int code = term.code;
   367:       StringRef name = term.name;
   368:       trace("grampar") << "token: code=" << code
   369:                        << ", name=" << name << std::endl;
   370: 
   371:       if (!env.g.declareToken(term.name, code, term.alias)) {
   372:         astParseError(term.name, "token already declared");
   373:       }
   374: 
   375:       // track what terminals have codes
   376:       maxCode = max(code, maxCode);
   377:       codeHasTerm.ensureIndexDoubler(code);
   378:       codeHasTerm[code].b = true;
   379:     }
   380: 
   381:     // fill in any gaps in the code space; this is required because
   382:     // later analyses assume the terminal code space is dense
   383:     SourceLoc dummyLoc(HERE_SOURCELOC);
   384:     for (int i=0; i<maxCode; i++) {
   385:       if (!codeHasTerm[i].b) {
   386:         LocString dummy(dummyLoc, grammarStringTable.add(
   387:           sm_stringc << "__dummy_filler_token" << i));
   388:         env.g.declareToken(dummy, i, dummy);
   389:       }
   390:     }
   391:   }
   392: 
   393:   // type annotations
   394:   {
   395:     FOREACH_ASTLIST(TermType, terms.types, iter) {
   396:       TermType const &type = *(iter.data());
   397:       trace("grampar") << "token type: name=" << type.name
   398:                        << ", type=" << type.type << std::endl;
   399: 
   400:       // look up the name
   401:       Terminal *t = astParseToken(env, type.name);
   402:       if (t->type) {
   403:         astParseError(type.name, "this token already has a type");
   404:       }
   405: 
   406:       // annotate with declared type
   407:       t->type = type.type;
   408: 
   409:       // parse the dup/del/merge spec
   410:       astParseDDM(env, t, type.funcs);
   411:     }
   412:   }
   413: 
   414:   // precedence specifications
   415:   {
   416:     FOREACH_ASTLIST(PrecSpec, terms.prec, iter) {
   417:       PrecSpec const &spec = *(iter.data());
   418: 
   419:       FOREACH_ASTLIST(LocString, spec.tokens, tokIter) {
   420:         LocString const &tokName = *(tokIter.data());
   421:         trace("grampar") << "prec: " << toString(spec.kind)
   422:                          << " " << spec.prec << " " << tokName;
   423: 
   424:         // look up the token
   425:         Terminal *t = astParseToken(env, tokName);
   426:         if (t->precedence) {
   427:           astParseError(tokName,
   428:             sm_stringc << tokName << " already has a specified precedence");
   429:         }
   430: 
   431:         if (spec.prec == 0) {
   432:           // 0 means precedence isn't specified
   433:           astParseError(tokName,
   434:             "you can't use 0 as a precedence level, because that value "
   435:             "is used internally to mean something else");
   436:         }
   437: 
   438:         // apply spec
   439:         t->precedence = spec.prec;
   440:         t->associativity = spec.kind;
   441:       }
   442:     }
   443:   }
   444: }
   445: 
   446: 
   447: void astParseDDM(Environment &env, Symbol *sym,
   448:                  ASTList<SpecFunc> const &funcs)
   449: {
   450:   Terminal *term = sym->ifTerminal();
   451:   Nonterminal *nonterm = sym->ifNonterminal();
   452: 
   453:   FOREACH_ASTLIST(SpecFunc, funcs, iter) {
   454:     SpecFunc const &func = *(iter.data());
   455:     int numFormals = func.formals.count();
   456: 
   457:     // decide what to do based on the name
   458: 
   459:     if (func.name.equals("dup")) {
   460:       if (numFormals != 1) {
   461:         astParseError(func.name, "'dup' function must have one formal parameter");
   462:       }
   463:       sym->dupParam = func.nthFormal(0);
   464:       sym->dupCode = func.code;
   465:     }
   466: 
   467:     else if (func.name.equals("del")) {
   468:       if (numFormals == 0) {
   469:         // not specified is ok, since it means the 'del' function
   470:         // doesn't use its parameter
   471:         sym->delParam = NULL;
   472:       }
   473:       else if (numFormals == 1) {
   474:         sym->delParam = func.nthFormal(0);
   475:       }
   476:       else {
   477:         astParseError(func.name, "'del' function must have either zero or one formal parameters");
   478:       }
   479:       sym->delCode = func.code;
   480:     }
   481: 
   482:     else if (func.name.equals("merge")) {
   483:       if (nonterm) {
   484:         if (numFormals != 2) {
   485:           astParseError(func.name, "'merge' function must have two formal parameters");
   486:         }
   487:         nonterm->mergeParam1 = func.nthFormal(0);
   488:         nonterm->mergeParam2 = func.nthFormal(1);
   489:         nonterm->mergeCode = func.code;
   490:       }
   491:       else {
   492:         astParseError(func.name, "'merge' can only be applied to nonterminals");
   493:       }
   494:     }
   495: 
   496:     else if (func.name.equals("keep")) {
   497:       if (nonterm) {
   498:         if (numFormals != 1) {
   499:           astParseError(func.name, "'keep' function must have one formal parameter");
   500:         }
   501:         nonterm->keepParam = func.nthFormal(0);
   502:         nonterm->keepCode = func.code;
   503:       }
   504:       else {
   505:         astParseError(func.name, "'keep' can only be applied to nonterminals");
   506:       }
   507:     }
   508: 
   509:     else if (func.name.equals("classify")) {
   510:       if (term) {
   511:         if (numFormals != 1) {
   512:           astParseError(func.name, "'classify' function must have one formal parameter");
   513:         }
   514:         term->classifyParam = func.nthFormal(0);
   515:         term->classifyCode = func.code;
   516:       }
   517:       else {
   518:         astParseError(func.name, "'classify' can only be applied to terminals");
   519:       }
   520:     }
   521: 
   522:     else if (func.name.equals("maximal")) {
   523:       if (nonterm) {
   524:         nonterm->maximal = true;     // function body has no meaning
   525:       }
   526:       else {
   527:         astParseError(func.name, "'maximal' can only be applied to nonterminals");
   528:       }
   529:     }
   530: 
   531:     else {
   532:       astParseError(func.name,
   533:         sm_stringc << "unrecognized spec function \"" << func.name << "\"");
   534:     }
   535:   }
   536: }
   537: 
   538: 
   539: void addDefaultTypesActions(Grammar &g, GrammarAST *ast)
   540: {
   541:   // language defaults
   542:   StringRef defaultType, defaultAction;
   543:   if (g.targetLang.equals("OCaml")) {
   544:     defaultType = grammarStringTable.add("unit");
   545:     defaultAction = grammarStringTable.add("()");
   546:   }
   547:   else /*C*/ {
   548:     defaultType = grammarStringTable.add("void");
   549:     defaultAction = grammarStringTable.add("return;");
   550:   }
   551: 
   552:   // hook to allow me to force defaults everywhere (this is useful
   553:   // when I want to try a grammar written for one language using
   554:   // another language's core)
   555:   bool forceDefaults = tracingSys("forceDefaultActions");
   556: 
   557:   // iterate over nonterminals
   558:   FOREACH_ASTLIST_NC(TopForm, ast->forms, iter) {
   559:     if (!iter.data()->isTF_nonterm()) { continue; }
   560:     TF_nonterm *nt = iter.data()->asTF_nonterm();
   561: 
   562:     // default type
   563:     if (forceDefaults || nt->type.isNull()) {
   564:       nt->type.str = defaultType;
   565:     }
   566: 
   567:     // iterate over productions
   568:     FOREACH_ASTLIST_NC(ProdDecl, nt->productions, iter2) {
   569:       ProdDecl *pd = iter2.data();
   570: 
   571:       // default action
   572:       if (forceDefaults || pd->actionCode.isNull()) {
   573:         pd->actionCode.str = defaultAction;
   574:       }
   575: 
   576:       if (forceDefaults) {
   577:         // clear RHSElt tags, since otherwise the lack of types
   578:         // will provoke errors; and default actions don't refer to
   579:         // the RHSElts anyway
   580:         StringRef empty = grammarStringTable.add("");
   581:         FOREACH_ASTLIST_NC(RHSElt, pd->rhs, iter3) {
   582:           ASTSWITCH(RHSElt, iter3.data()) {
   583:             ASTCASE(RH_name, n)
   584:               n->tag.str = empty;
   585: 
   586:             ASTNEXT(RH_sm_string, s)
   587:               s->tag.str = empty;
   588: 
   589:             ASTENDCASED
   590:           }
   591:         }
   592:       }
   593:     }
   594:   }
   595: }
   596: 
   597: 
   598: void synthesizeStartRule(Grammar &g, GrammarAST *ast)
   599: {
   600:   // get the first nonterminal; this is the user's start symbol
   601:   TF_nonterm *firstNT = ast->firstNT;
   602: 
   603:   // find the name of the user's EOF token
   604:   TermDecl const *eof = NULL;
   605:   FOREACH_ASTLIST(TermDecl, ast->terms->decls, iter) {
   606:     if (iter.data()->code == 0) {
   607:       eof = iter.data();
   608:       break;
   609:     }
   610:   }
   611:   if (!eof) {
   612:     astParseError("you have to have an EOF token, with code 0");
   613:   }
   614: 
   615:   // build a start production
   616:   RHSElt *rhs1 = new RH_name(LIT_STR("top").clone(), firstNT->name.clone());
   617:   RHSElt *rhs2 = new RH_name(LIT_STR("").clone(), eof->name.clone());
   618:   ASTList<RHSElt> *rhs = new ASTList<RHSElt>();
   619:   rhs->append(rhs1);
   620:   rhs->append(rhs2);
   621:   char const *action = g.targetLang.equals("OCaml")? " top " :
   622:                        firstNT->type.equals("void")? " return; " :
   623:                                                      " return top; ";
   624:   ProdDecl *startProd = new ProdDecl(rhs, LIT_STR(action).clone());
   625: 
   626:   // build an even earlier start symbol
   627:   TF_nonterm *earlyStartNT
   628:     = new TF_nonterm(
   629:         LIT_STR("__EarlyStartSymbol").clone(),   // name
   630:         firstNT->type.clone(),                   // type
   631:         NULL,                                    // empty list of functions
   632:         new ASTList<ProdDecl>(startProd),        // productions
   633:         NULL                                     // subsets
   634:       );
   635: 
   636:   // put it into the AST
   637:   ast->forms.prepend(earlyStartNT);
   638: }
   639: 
   640: 
   641: void astParseNonterm(Environment &env, TF_nonterm const *nt)
   642: {
   643:   LocString const &name = nt->name;
   644: 
   645:   // get the Grammar object that represents the nonterminal
   646:   Nonterminal *nonterm = env.g.findNonterminal(name);
   647:   xassert(nonterm);
   648: 
   649:   nonterm->type = nt->type;
   650: 
   651:   // iterate over the productions
   652:   FOREACH_ASTLIST(ProdDecl, nt->productions, iter) {
   653:     astParseProduction(env, nonterm, iter.data());
   654:   }
   655: 
   656:   // parse dup/del/merge
   657:   astParseDDM(env, nonterm, nt->funcs);
   658: 
   659:   // record subsets
   660:   {
   661:     FOREACH_ASTLIST(LocString, nt->subsets, iter) {
   662:       LocString const *ls = iter.data();
   663:       Nonterminal *sub = env.g.findNonterminal(*ls);
   664:       if (!sub) {
   665:         astParseError(*ls, "nonexistent nonterminal");
   666:       }
   667: 
   668:       // note that, since context-free language inclusion is
   669:       // undecidable (Hopcroft/Ullman), we can't actually check that
   670:       // the given nonterminals really are in the subset relation
   671:       nonterm->subsets.prepend(sub);
   672:     }
   673:   }
   674: }
   675: 
   676: 
   677: void astParseProduction(Environment &env, Nonterminal *nonterm,
   678:                         ProdDecl const *prodDecl)
   679: {
   680:   // is this the special start symbol I inserted?
   681:   bool synthesizedStart = nonterm->name.equals("__EarlyStartSymbol");
   682: 
   683:   // build a production; use 'this' as the tag for LHS elements
   684:   Production *prod = new Production(nonterm, "this");
   685: 
   686:   // put the code into it
   687:   prod->action = prodDecl->actionCode;
   688: 
   689:   // deal with RHS elements
   690:   FOREACH_ASTLIST(RHSElt, prodDecl->rhs, iter) {
   691:     RHSElt const *n = iter.data();
   692:     LocString symName;
   693:     LocString symTag;
   694:     bool isString = false;
   695:     bool isPrec = false;
   696: 
   697:     // pull various info out of the AST node
   698:     ASTSWITCHC(RHSElt, n) {
   699:       ASTCASEC(RH_name, tname) {
   700:         symName = tname->name;
   701:         symTag = tname->tag;
   702:       }
   703: 
   704:       ASTNEXTC(RH_sm_string, ts) {
   705:         symName = ts->str;
   706:         symTag = ts->tag;
   707:         isString = true;
   708:       }
   709: 
   710:       ASTNEXTC(RH_prec, p) {
   711:         // apply the specified precedence
   712:         prod->precedence = astParseToken(env, p->tokName)->precedence;
   713: 
   714:         // and require that this is the last RHS element
   715:         iter.adv();
   716:         if (!iter.isDone()) {
   717:           astParseError(p->tokName,
   718:             "precedence spec must be last thing in a production "
   719:             "(before the action code)");
   720:         }
   721:         isPrec = true;
   722:       }
   723: 
   724:       ASTENDCASECD
   725:     }
   726: 
   727:     if (isPrec) {
   728:       break;     // last element anyway
   729:     }
   730: 
   731:     // see which (if either) thing this name already is
   732:     Terminal *term = env.g.findTerminal(symName);
   733:     Nonterminal *nonterm = env.g.findNonterminal(symName);
   734:     xassert(!( term && nonterm ));     // better not be both!
   735: 
   736:     // syntax rules
   737:     if (isString  &&  !term) {
   738:       astParseError(symName, "terminals must be declared");
   739:     }
   740: 
   741:     if (!term && !nonterm) {
   742:       astParseErrorCont(env, symName, "undeclared symbol");
   743: 
   744:       // synthesize one anyway so we can find more errors
   745:       nonterm = env.g.getOrMakeNonterminal(symName);
   746:     }
   747: 
   748:     if (term && term->termIndex==0 && !synthesizedStart) {
   749:       astParseError(symName, "you cannot use the EOF token in your rules");
   750:     }
   751: 
   752:     if (symTag.equals("loc")) {
   753:       // bad because loc is the name of the automatically-propagated
   754:       // source location information
   755:       astParseErrorCont(env, symTag, "cannot use \"loc\" as a tag");
   756:     }
   757: 
   758:     // whenever we see a terminal, copy its precedence spec to
   759:     // the production; thus, the last symbol appearing in the
   760:     // production will be the one that gives the precedence
   761:     if (term) {
   762:       prod->precedence = term->precedence;
   763:     }
   764: 
   765:     // decide which symbol to put in the production
   766:     Symbol *s;
   767:     if (nonterm) {
   768:       s = nonterm;            // could do these two with a bitwise OR
   769:     }                         // if I were feeling extra clever today
   770:     else {
   771:       s = term;
   772:     }
   773: 
   774:     if (s->isEmptyString) {
   775:       // "empty" is a syntactic convenience; it doesn't get
   776:       // added to the production
   777:     }
   778:     else {
   779:       // add it to the production
   780:       prod->append(s, symTag);
   781:     }
   782:   }
   783: 
   784:   // after constructing the production we need to do this
   785:   // update: no we don't -- GrammarAnalysis takes care of it (and
   786:   // complains if we do)
   787:   //prod->finished();
   788: 
   789:   // add production to grammar
   790:   env.g.addProduction(prod);
   791: }
   792: 
   793: 
   794: // ----------------------- parser support ---------------------
   795: // Bison parser calls this to get a token
   796: int grampar_yylex(YYSTYPE *lvalp, void *parseParam)
   797: {
   798:   ParseParams *par = (ParseParams*)parseParam;
   799:   GrammarLexer &lexer = par->lexer;
   800: 
   801:   int code = lexer.yylexInc();
   802: 
   803:   try {
   804:     // yield semantic values for some things
   805:     // note that the yielded semantic value must be consistent with
   806:     // what is declared for these token types in grampar.y
   807:     switch (code) {
   808:       case TOK_INTEGER:
   809:         lvalp->num = lexer.integerLiteral;
   810:         break;
   811: 
   812:       case TOK_STRING:
   813:         lvalp->str = new LocString(lexer.curLoc(), lexer.sm_stringLiteral);
   814:         break;
   815: 
   816:       case TOK_NAME:
   817:         lvalp->str = new LocString(lexer.curLoc(), lexer.curToken());
   818:         break;
   819: 
   820:       case TOK_LIT_CODE:
   821:         lvalp->str = new LocString(lexer.curLoc(), lexer.curFuncBody());
   822:         break;
   823: 
   824:       default:
   825:         lvalp->str = NULL;        // any attempt to use will segfault
   826:     }
   827:   }
   828:   catch (xBase &x) {
   829:     // e.g. malformed fundecl
   830:     std::cout << lexer.curLocStr() << ": " << x << std::endl;
   831: 
   832:     // optimistically try just skipping the bad token
   833:     return grampar_yylex(lvalp, parseParam);
   834:   }
   835: 
   836:   return code;
   837: }
   838: 
   839: 
   840: void grampar_yyerror(char const *message, void *parseParam)
   841: {
   842:   ParseParams *par = (ParseParams*)parseParam;
   843:   std::cout << par->lexer.curLocStr() << ": " << message << std::endl;
   844: }
   845: 
   846: 
   847: // ---------------------- merging -----------------------
   848: void mergeContext(GrammarAST *base, TF_context * /*owner*/ ext)
   849: {
   850:   // do simple append, since the grammar parser above knows how
   851:   // to handle multiple context classes
   852:   base->forms.append(ext);
   853: 
   854:   #if 0
   855:   // find 'base' context
   856:   TF_context *baseContext = NULL;
   857:   FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
   858:     if (iter.data()->isTF_context()) {
   859:       baseContext = iter.data()->asTF_context();
   860:       break;
   861:     }
   862:   }
   863: 
   864:   if (!baseContext) {
   865:     // base does not have a context class, so 'ext' becomes it
   866:     base->forms.append(ext);
   867:   }
   868: 
   869:   else if (baseContext->name.str == ext->name.str) {
   870:     // same name; I'd like to append the code to what's already
   871:     // there, but that's tricky because the location won't
   872:     // be right..
   873:     astParseError(ext->name, "context append not implemented");
   874:   }
   875: 
   876:   else {
   877:     // different name, replace the old
   878:     base->forms.removeItem(baseContext);
   879:     delete baseContext;
   880:     base->forms.append(ext);
   881:   }
   882:   #endif // 0
   883: }
   884: 
   885: 
   886: void mergeOption(GrammarAST *base, TF_option * /*owner*/ ext)
   887: {
   888:   // find option with the same name
   889:   FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
   890:     if (!iter.data()->isTF_option()) continue;
   891:     TF_option *op = iter.data()->asTF_option();
   892: 
   893:     if (op->name.str == ext->name.str) {
   894:       // replace the old value
   895:       op->value = ext->value;
   896:       delete ext;
   897:       return;
   898:     }
   899:   }
   900: 
   901:   // otherwise, just add the new option
   902:   base->forms.append(ext);
   903: }
   904: 
   905: 
   906: void mergeTerminals(GrammarAST *base, TF_terminals * /*owner*/ ext)
   907: {
   908:   FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
   909:     if (iter.data()->isTF_terminals()) {
   910:       TF_terminals *t = iter.data()->asTF_terminals();
   911: 
   912:       // there's no point to changing codes, so all the
   913:       // TermDecls just get added (collisions are detected
   914:       // later, during AST parsing)
   915:       t->decls.concat(ext->decls);
   916: 
   917:       // in fact, I'll do the same for the others, even though
   918:       // it might make sense to do some replacement; my immediate
   919:       // needs don't include replacement at this level
   920:       t->types.concat(ext->types);
   921:       t->prec.concat(ext->prec);
   922: 
   923:       delete ext;
   924:       return;
   925:     }
   926:   }
   927: 
   928:   // no TF_terminals in 'base'.. unusual, but easy to handle
   929:   base->forms.append(ext);
   930: }
   931: 
   932: 
   933: void mergeSpecFunc(TF_nonterm *base, SpecFunc * /*owner*/ ext)
   934: {
   935:   // find an existing spec func with the same name
   936:   FOREACH_ASTLIST_NC(SpecFunc, base->funcs, iter) {
   937:     SpecFunc *f = iter.data();
   938:     if (f->name.str == ext->name) {
   939:       // replace the old code with the extension code
   940:       base->funcs.removeItem(f);
   941:       delete f;
   942:       break;
   943:     }
   944:   }
   945: 
   946:   // just add it
   947:   base->funcs.append(ext);
   948: }
   949: 
   950: 
   951: bool equalRHSElt(RHSElt const *elt1, RHSElt const *elt2)
   952: {
   953:   if (elt1->kind() != elt2->kind()) {
   954:     return false;
   955:   }
   956: 
   957:   // if the RHS names a terminal, this isn't perfect because one might
   958:   // use an alias.. but I don't have the necessary information to detect
   959:   // that, since I haven't yet computed the associated Symbols
   960:   if (elt1->isRH_name()) {
   961:     return elt1->asRH_nameC()->name.str == elt2->asRH_nameC()->name.str;
   962:   }
   963:   if (elt1->isRH_sm_string()) {
   964:     return elt1->asRH_sm_stringC()->str.str == elt2->asRH_sm_stringC()->str.str;
   965:   }
   966:   if (elt1->isRH_prec()) {
   967:     // this means you can't change the precedence..
   968:     return elt1->asRH_precC()->tokName.str == elt2->asRH_precC()->tokName.str;
   969:   }
   970: 
   971:   xfailure("unknown RHSElt kind");
   972:   return false;     // silence warning
   973: }
   974: 
   975: 
   976: bool equalRHS(ProdDecl const *prod1, ProdDecl const *prod2)
   977: {
   978:   if (prod1->rhs.count() != prod2->rhs.count()) {
   979:     return false;
   980:   }
   981: 
   982:   for (ASTListIter<RHSElt> iter1(prod1->rhs), iter2(prod2->rhs);
   983:        !iter1.isDone(); iter1.adv(), iter2.adv()) {
   984:     if (!equalRHSElt(iter1.data(), iter2.data())) {
   985:       return false;
   986:     }
   987:   }
   988:   return true;
   989: }
   990: 
   991: 
   992: void mergeProduction(TF_nonterm *base, ProdDecl *ext)
   993: {
   994:   // look for a production with an identical RHS
   995:   FOREACH_ASTLIST_NC(ProdDecl, base->productions, iter) {
   996:     ProdDecl *prod = iter.data();
   997: 
   998:     // check RHSs for equality
   999:     if (equalRHS(prod, ext)) {
  1000:       // replace old with new
  1001:       base->productions.removeItem(prod);
  1002:       delete prod;
  1003:       break;
  1004:     }
  1005:   }
  1006: 
  1007:   // add the production
  1008:   base->productions.append(ext);
  1009: }
  1010: 
  1011: 
  1012: void mergeNonterminal(GrammarAST *base, TF_nonterm * /*owner*/ ext)
  1013: {
  1014:   // find an existing nonterminal with the same name
  1015:   TF_nonterm *exist = NULL;
  1016:   FOREACH_ASTLIST_NC(TopForm, base->forms, iter) {
  1017:     if (iter.data()->isTF_nonterm() &&
  1018:         iter.data()->asTF_nonterm()->name.str == ext->name) {
  1019:       exist = iter.data()->asTF_nonterm();
  1020:     }
  1021:   }
  1022: 
  1023:   if (!exist) {
  1024:     // no pre-existing, just append it
  1025:     base->forms.append(ext);
  1026:     return;
  1027:   }
  1028: 
  1029:   // make sure the types agree
  1030:   if (exist->type.str != ext->type) {
  1031:     astParseError(ext->type, "cannot redefine the type of a nonterminal");
  1032:   }
  1033: 
  1034:   // merge the spec funcs
  1035:   while (ext->funcs.isNotEmpty()) {
  1036:     mergeSpecFunc(exist, ext->funcs.removeFirst());
  1037:   }
  1038: 
  1039:   // merge the productions
  1040:   while (ext->productions.isNotEmpty()) {
  1041:     mergeProduction(exist, ext->productions.removeFirst());
  1042:   }
  1043: 
  1044:   delete ext;
  1045: }
  1046: 
  1047: 
  1048: void mergeGrammar(GrammarAST *base, GrammarAST *ext)
  1049: {
  1050:   // work through all the forms in 'ext', removing each
  1051:   // one; it will then either be added to 'base', or
  1052:   // discarded entirely
  1053:   while (ext->forms.isNotEmpty()) {
  1054:     TopForm *form = ext->forms.removeFirst();
  1055: 
  1056:     ASTSWITCH(TopForm, form) {
  1057:       ASTCASE(TF_context, c) {
  1058:         mergeContext(base, c);
  1059:       }
  1060: 
  1061:       ASTNEXT(TF_verbatim, v) {
  1062:         // verbatims simply accumulate
  1063:         base->forms.append(v);
  1064:       }
  1065: 
  1066:       ASTNEXT(TF_option, op) {
  1067:         mergeOption(base, op);
  1068:       }
  1069: 
  1070:       ASTNEXT(TF_terminals, t) {
  1071:         mergeTerminals(base, t);
  1072:       }
  1073: 
  1074:       ASTNEXT(TF_nonterm, n) {
  1075:         mergeNonterminal(base, n);
  1076:       }
  1077: 
  1078:       ASTDEFAULT {
  1079:         xfailure("doh");
  1080:       }
  1081: 
  1082:       ASTENDCASE
  1083:     }
  1084:   }
  1085: }
  1086: 
  1087: 
  1088: // ---------------- external interface -------------------
  1089: bool isGramlexEmbed(int code);     // defined in gramlex.lex
  1090: 
  1091: GrammarAST *parseGrammarFile(char const *fname, bool useML)
  1092: {
  1093:   #ifndef NDEBUG
  1094:   if (tracingSys("yydebug")) {
  1095:     yydebug = true;    // this flag goes away when NDEBUG is specified..
  1096:   }
  1097:   #endif // NDEBUG
  1098: 
  1099:   // open input file
  1100:   Owner<std::ifstream> in;
  1101:   if (fname == NULL) {
  1102:     fname = "<stdin>";
  1103:   }
  1104:   else {
  1105:     in = new std::ifstream(fname);
  1106:     if (!*in) {
  1107:       xsyserror("open", sm_stringc << "error opening input file " << fname);
  1108:     }
  1109:   }
  1110: 
  1111:   // choose embedded language
  1112:   EmbeddedLang *embed = NULL;
  1113:   if (useML) {
  1114:     embed = new MLSubstrate;
  1115:   }
  1116: 
  1117:   // build lexer
  1118:   GrammarLexer lexer(isGramlexEmbed,
  1119:                      grammarStringTable,
  1120:                      fname,
  1121:                      in.xfr(),
  1122:                      embed);
  1123:   if (embed) {
  1124:     // install the refined error reporter
  1125:     embed->err = &lexer.altReporter;
  1126:   }
  1127: 
  1128:   ParseParams params(lexer);
  1129: 
  1130:   traceProgress() << "parsing grammar source: " << fname << std::endl;
  1131:   int retval = grampar_yyparse(¶ms);
  1132:   if (retval==0 && lexer.errors==0) {
  1133:     GrammarAST *ret = params.treeTop;
  1134: 
  1135:     if (tracingSys("printGrammarAST")) {
  1136:       // print AST
  1137:       std::cout << "AST:\n";
  1138:       ret->debugPrint(std::cout, 2);
  1139:     }
  1140: 
  1141:     return ret;
  1142:   }
  1143:   else {
  1144:     xbase("parsing finished with an error");
  1145:     return NULL;     // silence warning
  1146:   }
  1147: }
  1148: 
  1149: 
  1150: void parseGrammarAST(Grammar &g, GrammarAST *treeTop)
  1151: {
  1152:   setAnnotations(treeTop);
  1153: 
  1154:   // look at TF_options before synthesizing start rule,
  1155:   // so we can know what language is the target
  1156:   astParseOptions(g, treeTop);
  1157: 
  1158:   // fill in default types and actions
  1159:   addDefaultTypesActions(g, treeTop);
  1160: 
  1161:   // synthesize a rule "TrueStart -> Start EOF"
  1162:   synthesizeStartRule(g, treeTop);
  1163: 
  1164:   // parse the AST into a Grammar
  1165:   traceProgress() << "parsing grammar AST..\n";
  1166:   astParseGrammar(g, treeTop);
  1167: 
  1168:   // then check grammar properties; throws exception
  1169:   // on failure
  1170:   traceProgress() << "beginning grammar analysis..\n";
  1171:   g.checkWellFormed();
  1172: }
  1173: 
  1174: 
  1175: void readGrammarFile(Grammar &g, char const *fname)
  1176: {
  1177:   // make sure the tree gets deleted
  1178:   Owner<GrammarAST> treeTop(parseGrammarFile(fname, false /*useML*/));
  1179: 
  1180:   parseGrammarAST(g, treeTop);
  1181: 
  1182:   treeTop.del();
  1183: 
  1184:   // hmm.. I'd like to restore this functionality...
  1185:   //if (ASTNode::nodeCount > 0) {
  1186:   //  std::cout << "leaked " << ASTNode::nodeCount << " AST nodes\n";
  1187:   //}
  1188: }
  1189: 
  1190: 
  1191: // ----------------------- test code -----------------------
  1192: #ifdef TEST_GRAMPAR
  1193: 
  1194: #include "sm_bflatten.h"
  1195: #include <stdlib.h>       // system
  1196: 
  1197: int main(int argc, char **argv)
  1198: {
  1199:   if (argc < 2) {
  1200:     std::cout << "usage: " << argv[0] << " [-tr flags] filename.gr\n";
  1201:     std::cout << "  interesting trace flags:\n";
  1202:     std::cout << "    keep-tmp      do not delete the temporary files\n";
  1203:     //std::cout << "    cat-grammar   print the ascii rep to the screen\n";
  1204:     return 0;
  1205:   }
  1206: 
  1207:   traceAddSys("progress");
  1208:   TRACE_ARGS();
  1209: 
  1210:   bool printCode = true;
  1211: 
  1212:   // read the file
  1213:   Grammar g1;
  1214:   readGrammarFile(g1, argv[1]);
  1215: 
  1216:   // and print the grammar
  1217:   char const g1Fname[] = "grammar.g1.tmp";
  1218:   traceProgress() << "printing initial grammar to " << g1Fname << "\n";
  1219:   {
  1220:     std::ofstream out(g1Fname);
  1221:     g1.printSymbolTypes(out);
  1222:     g1.printProductions(out, printCode);
  1223:   }
  1224: 
  1225:   //if (tracingSys("cat-grammar")) {
  1226:     system("cat grammar.g1.tmp");
  1227:   //}
  1228: 
  1229:   // before using 'xfer' we have to tell it about the sm_string table
  1230:   flattenStrTable = &grammarStringTable;
  1231: 
  1232:   // write it to a binary file
  1233:   char const binFname[] = "grammar.bin.tmp";
  1234:   traceProgress() << "writing initial grammar to " << binFname << "\n";
  1235:   {
  1236:     BFlatten flat(binFname, false /*reading*/);
  1237:     g1.xfer(flat);
  1238:   }
  1239: 
  1240:   // read it back
  1241:   traceProgress() << "reading grammar from " << binFname << "\n";
  1242:   Grammar g2;
  1243:   {
  1244:     BFlatten flat(binFname, true /*reading*/);
  1245:     g2.xfer(flat);
  1246:   }
  1247: 
  1248:   // print that too
  1249:   char const g2Fname[] = "grammar.g2.tmp";
  1250:   traceProgress() << "printing just-read grammar to " << g2Fname << "\n";
  1251:   {
  1252:     std::ofstream out(g2Fname);
  1253:     g2.printSymbolTypes(out);
  1254:     g2.printProductions(out, printCode);
  1255:   }
  1256: 
  1257:   // compare the two written files
  1258:   int result = system(sm_stringc << "diff " << g1Fname << " " << g2Fname);
  1259:   if (result != 0) {
  1260:     std::cout << "the two ascii representations differ!!\n";
  1261:     return 4;
  1262:   }
  1263: 
  1264:   // remove the temp files
  1265:   if (!tracingSys("keep-tmp")) {
  1266:     remove(g1Fname);
  1267:     remove(g2Fname);
  1268:     remove(binFname);
  1269:   }
  1270: 
  1271:   std::cout << "successfully parsed, printed, wrote, and read a grammar!\n";
  1272:   return 0;
  1273: }
  1274: 
  1275: #endif // TEST_GRAMPAR
End cpp section to elk/elk_grampar.cpp[1]
Start cpp section to elk/elk_grampar.tab.cpp[1 /1 ]
     1: #line 19604 "./lpsrc/elk.pak"
     2: /* A Bison parser, made from grampar.y
     3:    by GNU bison 1.35.  */  /* tweak */
     4: 
     5: #define YYBISON 1  /* Identify Bison output.  */
     6: 
     7: # define        TOK_INTEGER     257
     8: # define        TOK_NAME        258
     9: # define        TOK_STRING      259
    10: # define        TOK_LIT_CODE    260
    11: # define        TOK_LBRACE      261
    12: # define        TOK_RBRACE      262
    13: # define        TOK_COLON       263
    14: # define        TOK_SEMICOLON   264
    15: # define        TOK_ARROW       265
    16: # define        TOK_LPAREN      266
    17: # define        TOK_RPAREN      267
    18: # define        TOK_COMMA       268
    19: # define        TOK_TERMINALS   269
    20: # define        TOK_TOKEN       270
    21: # define        TOK_NONTERM     271
    22: # define        TOK_FUN 272
    23: # define        TOK_VERBATIM    273
    24: # define        TOK_IMPL_VERBATIM       274
    25: # define        TOK_PRECEDENCE  275
    26: # define        TOK_OPTION      276
    27: # define        TOK_EXPECT      277
    28: # define        TOK_CONTEXT_CLASS       278
    29: # define        TOK_SUBSETS     279
    30: 
    31: 
    32: 
    33: #include "elk_grampar.h"
    34: #include "elk_gramast.ast.gen.h"
    35: #include "ast_gramlex.h"
    36: #include "sm_owner.h"
    37: 
    38: #include <stdlib.h>         // malloc, free
    39: #include <iostream>       // std::cout
    40: 
    41: // enable debugging the parser
    42: #ifndef NDEBUG
    43:   #define YYDEBUG 1
    44: #endif
    45: 
    46: // name of extra parameter to yylex
    47: #define YYLEX_PARAM parseParam
    48: 
    49: // make it call my yylex
    50: #define yylex(lv, param) grampar_yylex(lv, param)
    51: 
    52: // Bison calls yyerror(msg) on error; we need the extra
    53: // parameter too, so the macro shoehorns it in there
    54: #define yyerror(msg) grampar_yyerror(msg, YYPARSE_PARAM)
    55: 
    56: // rename the externally-visible parsing routine to make it
    57: // specific to this instance, so multiple bison-generated
    58: // parsers can coexist
    59: #define yyparse grampar_yyparse
    60: 
    61: 
    62: // grab the parameter
    63: #define PARAM ((ParseParams*)parseParam)
    64: 
    65: // return a locsm_string for 'str' with no location information
    66: #define noloc(str)                                                    \
    67:   new LocString(SL_UNKNOWN,      /* unknown location */               \
    68:                 PARAM->lexer.strtable.add(str))
    69: 
    70: // locsm_string for NULL, with no location
    71: #define nolocNULL()                                                   \
    72:   new LocString(SL_UNKNOWN, NULL)
    73: 
    74: // return a locsm_string with same location info as something else
    75: // (passed as a pointer to a SourceLocation)
    76: #define sameloc(otherLoc, str)                                        \
    77:   new LocString(otherLoc->loc, PARAM->lexer.strtable.add(str))
    78: 
    79: // interpret the word into an associativity kind specification
    80: AssocKind whichKind(LocString * /*owner*/ kind);
    81: 
    82: 
    83: #ifndef YYSTYPE
    84: typedef union YYSTYPE {
    85:   int num;
    86:   LocString *str;
    87: 
    88:   ASTList<TopForm> *topFormList;
    89:   TopForm *topForm;
    90: 
    91:   ASTList<TermDecl> *termDecls;
    92:   TermDecl *termDecl;
    93:   ASTList<TermType> *termTypes;
    94:   TermType *termType;
    95:   ASTList<PrecSpec> *precSpecs;
    96: 
    97:   ASTList<SpecFunc> *specFuncs;
    98:   SpecFunc *specFunc;
    99:   ASTList<LocString> *sm_stringList;
   100: 
   101:   ASTList<ProdDecl> *prodDecls;
   102:   ProdDecl *prodDecl;
   103:   ASTList<RHSElt> *rhsList;
   104:   RHSElt *rhsElt;
   105: } yystype;
   106: # define YYSTYPE yystype
   107: # define YYSTYPE_IS_TRIVIAL 1
   108: #endif
   109: #ifndef YYDEBUG
   110: # define YYDEBUG 0
   111: #endif
   112: 
   113: 
   114: 
   115: #define YYFINAL         94
   116: #define YYFLAG          -32768
   117: #define YYNTBASE        26
   118: 
   119: /* YYTRANSLATE(YYLEX) -- Bison token number corresponding to YYLEX. */
   120: #define YYTRANSLATE(x) ((unsigned)(x) <= 279 ? yytranslate[x] : 53)
   121: 
   122: /* YYTRANSLATE[YYLEX] -- Bison token number corresponding to YYLEX. */
   123: static const char yytranslate[] =
   124: {
   125:        0,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   126:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   127:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   128:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   129:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   130:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   131:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   132:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   133:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   134:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   135:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   136:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   137:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   138:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   139:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   140:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   141:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   142:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   143:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   144:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   145:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   146:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   147:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   148:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   149:        2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
   150:        2,     2,     2,     2,     2,     2,     1,     3,     4,     5,
   151:        6,     7,     8,     9,    10,    11,    12,    13,    14,    15,
   152:       16,    17,    18,    19,    20,    21,    22,    23,    24,    25
   153: };
   154: 
   155: #if YYDEBUG
   156: static const short yyprhs[] =
   157: {
   158:        0,     0,     2,     3,     6,     8,    10,    12,    14,    16,
   159:       20,    23,    26,    30,    35,    42,    43,    46,    51,    57,
   160:       59,    60,    61,    64,    69,    76,    77,    82,    83,    89,
   161:       90,    93,    95,    97,    98,   101,   108,   109,   111,   113,
   162:      117,   122,   131,   132,   135,   139,   141,   143,   144,   147,
   163:      149,   153,   155,   159,   164,   165
   164: };
   165: static const short yyrhs[] =
   166: {
   167:       27,     0,     0,    27,    28,     0,    29,     0,    30,     0,
   168:       31,     0,    32,     0,    46,     0,    24,     6,    10,     0,
   169:       19,     6,     0,    20,     6,     0,    22,     4,    10,     0,
   170:       22,     4,     3,    10,     0,    15,     7,    33,    36,    38,
   171:        8,     0,     0,    33,    34,     0,     3,     9,     4,    10,
   172:        0,     3,     9,     4,     5,    10,     0,     6,     0,     0,
   173:        0,    36,    37,     0,    16,    35,     4,    10,     0,    16,
   174:       35,     4,     7,    42,     8,     0,     0,    21,     7,    39,
   175:        8,     0,     0,    39,     4,     3,    40,    10,     0,     0,
   176:       40,    41,     0,     4,     0,     5,     0,     0,    42,    43,
   177:        0,    18,     4,    12,    44,    13,     6,     0,     0,    45,
   178:        0,     4,     0,    45,    14,     4,     0,    17,    35,     4,
   179:       48,     0,    17,    35,     4,     7,    42,    47,    52,     8,
   180:        0,     0,    47,    48,     0,    11,    50,    49,     0,     6,
   181:        0,    10,     0,     0,    50,    51,     0,     4,     0,     4,
   182:        9,     4,     0,     5,     0,     4,     9,     5,     0,    21,
   183:       12,    41,    13,     0,     0,    25,    45,    10,     0
   184: };
   185: 
   186: #endif
   187: 
   188: #if YYDEBUG
   189: /* YYRLINE[YYN] -- source line where rule number YYN was defined. */
   190: static const short yyrline[] =
   191: {
   192:        0,   158,   163,   164,   168,   169,   170,   171,   172,   176,
   193:      181,   182,   187,   188,   199,   204,   205,   213,   215,   220,
   194:      221,   225,   226,   230,   232,   237,   238,   242,   244,   249,
   195:      250,   254,   255,   261,   262,   266,   271,   272,   276,   277,
   196:      288,   291,   296,   297,   301,   305,   306,   310,   311,   320,
   197:      322,   324,   326,   328,   333,   334
   198: };
   199: #endif
   200: 
   201: 
   202: #if (YYDEBUG) || defined YYERROR_VERBOSE
   203: 
   204: /* YYTNAME[TOKEN_NUM] -- String name of the token TOKEN_NUM. */
   205: static const char *const yytname[] =
   206: {
   207:   "$", "error", "$undefined.", "TOK_INTEGER", "TOK_NAME", "TOK_STRING",
   208:   "TOK_LIT_CODE", "\"{\"", "\"}\"", "\":\"", "\";\"", "\"->\"", "\"(\"",
   209:   "\")\"", "\",\"", "\"terminals\"", "\"token\"", "\"nonterm\"",
   210:   "\"fun\"", "\"verbatim\"", "\"impl_verbatim\"", "\"precedence\"",
   211:   "\"option\"", "\"expect\"", "\"context_class\"", "\"subsets\"",
   212:   "StartSymbol", "TopFormList", "TopForm", "ContextClass", "Verbatim",
   213:   "Option", "Terminals", "TermDecls", "TerminalDecl", "Type", "TermTypes",
   214:   "TermType", "Precedence", "PrecSpecs", "NameOrStringList",
   215:   "NameOrString", "SpecFuncs", "SpecFunc", "FormalsOpt", "Formals",
   216:   "Nonterminal", "Productions", "Production", "Action", "RHS", "RHSElt",
   217:   "Subsets", 0
   218: };
   219: #endif
   220: 
   221: /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
   222: static const short yyr1[] =
   223: {
   224:        0,    26,    27,    27,    28,    28,    28,    28,    28,    29,
   225:       30,    30,    31,    31,    32,    33,    33,    34,    34,    35,
   226:       35,    36,    36,    37,    37,    38,    38,    39,    39,    40,
   227:       40,    41,    41,    42,    42,    43,    44,    44,    45,    45,
   228:       46,    46,    47,    47,    48,    49,    49,    50,    50,    51,
   229:       51,    51,    51,    51,    52,    52
   230: };
   231: 
   232: /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
   233: static const short yyr2[] =
   234: {
   235:        0,     1,     0,     2,     1,     1,     1,     1,     1,     3,
   236:        2,     2,     3,     4,     6,     0,     2,     4,     5,     1,
   237:        0,     0,     2,     4,     6,     0,     4,     0,     5,     0,
   238:        2,     1,     1,     0,     2,     6,     0,     1,     1,     3,
   239:        4,     8,     0,     2,     3,     1,     1,     0,     2,     1,
   240:        3,     1,     3,     4,     0,     3
   241: };
   242: 
   243: /* YYDEFACT[S] -- default rule to reduce with in state S when YYTABLE
   244:    doesn't specify something else to do.  Zero means the default is an
   245:    error. */
   246: static const short yydefact[] =
   247: {
   248:        2,     1,     0,    20,     0,     0,     0,     0,     3,     4,
   249:        5,     6,     7,     8,    15,    19,     0,    10,    11,     0,
   250:        0,    21,     0,     0,    12,     9,     0,    16,    25,    33,
   251:       47,    40,    13,     0,    20,     0,    22,     0,    42,     0,
   252:        0,     0,    27,    14,     0,    34,    54,    49,    51,    45,
   253:       46,     0,    44,    48,     0,    17,     0,     0,     0,     0,
   254:       43,     0,     0,     0,    18,    33,    23,     0,    26,    36,
   255:       38,     0,    41,    50,    52,    31,    32,     0,     0,    29,
   256:        0,    37,    55,     0,    53,    24,     0,     0,    39,    28,
   257:       30,    35,     0,     0,     0
   258: };
   259: 
   260: static const short yydefgoto[] =
   261: {
   262:       92,     1,     8,     9,    10,    11,    12,    21,    27,    16,
   263:       28,    36,    37,    57,    86,    77,    38,    45,    80,    71,
   264:       13,    46,    31,    52,    39,    53,    61
   265: };
   266: 
   267: static const short yypact[] =
   268: {
   269:   -32768,   -10,     4,    33,    34,    35,    38,    37,-32768,-32768,
   270:   -32768,-32768,-32768,-32768,-32768,-32768,    40,-32768,-32768,     5,
   271:       13,    42,    19,    28,-32768,-32768,    39,-32768,     0,-32768,
   272:   -32768,-32768,-32768,    43,    33,    44,-32768,    41,    36,    -4,
   273:       17,    46,-32768,-32768,    48,-32768,    -7,    47,-32768,-32768,
   274:   -32768,    45,-32768,-32768,    49,-32768,    22,    20,    50,    51,
   275:   -32768,    52,    29,    32,-32768,-32768,-32768,    55,-32768,    51,
   276:   -32768,    21,-32768,-32768,-32768,-32768,-32768,    53,    -5,-32768,
   277:       54,    56,-32768,    57,-32768,-32768,    15,    58,-32768,-32768,
   278:   -32768,-32768,    63,    65,-32768
   279: };
   280: 
   281: static const short yypgoto[] =
   282: {
   283:   -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,    12,
   284:   -32768,-32768,-32768,-32768,-32768,   -33,     3,-32768,-32768,     2,
   285:   -32768,-32768,    23,-32768,-32768,-32768,-32768
   286: };
   287: 
   288: 
   289: #define YYLAST          71
   290: 
   291: 
   292: static const short yytable[] =
   293: {
   294:       47,    48,    49,    85,    30,     2,    50,     3,    23,     4,
   295:        5,    14,     6,    44,     7,    24,    34,    51,    59,    75,
   296:       76,    35,    54,    25,    67,    89,    29,    55,    68,    65,
   297:       30,    82,    66,    73,    74,    83,    75,    76,    32,    15,
   298:       17,    18,    19,    20,    22,    26,    41,    40,    33,    43,
   299:       56,    42,    58,    90,    44,    70,    62,    63,    79,    64,
   300:       72,    88,    69,    93,    91,    94,    84,    87,    78,    60,
   301:       83,    81
   302: };
   303: 
   304: static const short yycheck[] =
   305: {
   306:        4,     5,     6,     8,    11,    15,    10,    17,     3,    19,
   307:       20,     7,    22,    18,    24,    10,    16,    21,    25,     4,
   308:        5,    21,     5,    10,     4,    10,     7,    10,     8,     7,
   309:       11,    10,    10,     4,     5,    14,     4,     5,    10,     6,
   310:        6,     6,     4,     6,     4,     3,    34,     4,     9,     8,
   311:        4,     7,     4,    86,    18,     4,     9,    12,     3,    10,
   312:        8,     4,    12,     0,     6,     0,    13,    13,    65,    46,
   313:       14,    69
   314: };
   315: #define YYPURE 1
   316: 
   317: /* -*-C-*-  Note some compilers choke on comments on `#line' lines.  */
   318: 
   319: /* Skeleton output parser for bison,
   320: 
   321:    Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software
   322:    Foundation, Inc.
   323: 
   324:    This program is free software; you can redistribute it and/or modify
   325:    it under the terms of the GNU General Public License as published by
   326:    the Free Software Foundation; either version 2, or (at your option)
   327:    any later version.
   328: 
   329:    This program is distributed in the hope that it will be useful,
   330:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   331:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   332:    GNU General Public License for more details.
   333: 
   334:    You should have received a copy of the GNU General Public License
   335:    along with this program; if not, write to the Free Software
   336:    Foundation, Inc., 59 Temple Place - Suite 330,
   337:    Boston, MA 02111-1307, USA.  */
   338: 
   339: /* As a special exception, when this file is copied by Bison into a
   340:    Bison output file, you may use that output file without restriction.
   341:    This special exception was added by the Free Software Foundation
   342:    in version 1.24 of Bison.  */
   343: 
   344: /* This is the parser code that is written into each bison parser when
   345:    the %semantic_parser declaration is not specified in the grammar.
   346:    It was written by Richard Stallman by simplifying the hairy parser
   347:    used when %semantic_parser is specified.  */
   348: 
   349: /* All symbols defined below should begin with yy or YY, to avoid
   350:    infringing on user name space.  This should be done even for local
   351:    variables, as they might otherwise be expanded by user macros.
   352:    There are some unavoidable exceptions within include files to
   353:    define necessary library symbols; they are noted "INFRINGES ON
   354:    USER NAME SPACE" below.  */
   355: 
   356: #if ! defined (yyoverflow) || defined (YYERROR_VERBOSE)
   357: 
   358: /* The parser invokes alloca or malloc; define the necessary symbols.  */
   359: 
   360: # if YYSTACK_USE_ALLOCA
   361: #  define YYSTACK_ALLOC alloca
   362: # else
   363: #  ifndef YYSTACK_USE_ALLOCA
   364: #   if defined (alloca) || defined (_ALLOCA_H)
   365: #    define YYSTACK_ALLOC alloca
   366: #   else
   367: #    ifdef __GNUC__
   368: #     define YYSTACK_ALLOC __builtin_alloca
   369: #    endif
   370: #   endif
   371: #  endif
   372: # endif
   373: 
   374: # ifdef YYSTACK_ALLOC
   375:    /* Pacify GCC's `empty if-body' warning. */
   376: #  define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
   377: # else
   378: #  if defined (__STDC__) || defined (__cplusplus)
   379: #   include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
   380: #   define YYSIZE_T size_t
   381: #  endif
   382: #  define YYSTACK_ALLOC malloc
   383: #  define YYSTACK_FREE free
   384: # endif
   385: #endif /* ! defined (yyoverflow) || defined (YYERROR_VERBOSE) */
   386: 
   387: 
   388: #if (! defined (yyoverflow) \
   389:      && (! defined (__cplusplus) \
   390:          || (YYLTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
   391: 
   392: /* A type that is properly aligned for any stack member.  */
   393: union yyalloc
   394: {
   395:   short yyss;
   396:   YYSTYPE yyvs;
   397: # if YYLSP_NEEDED
   398:   YYLTYPE yyls;
   399: # endif
   400: };
   401: 
   402: /* The size of the maximum gap between one aligned stack and the next.  */
   403: # define YYSTACK_GAP_MAX (sizeof (union yyalloc) - 1)
   404: 
   405: /* The size of an array large to enough to hold all stacks, each with
   406:    N elements.  */
   407: # if YYLSP_NEEDED
   408: #  define YYSTACK_BYTES(N) \
   409:      ((N) * (sizeof (short) + sizeof (YYSTYPE) + sizeof (YYLTYPE))      \
   410:       + 2 * YYSTACK_GAP_MAX)
   411: # else
   412: #  define YYSTACK_BYTES(N) \
   413:      ((N) * (sizeof (short) + sizeof (YYSTYPE))                         \
   414:       + YYSTACK_GAP_MAX)
   415: # endif
   416: 
   417: /* Copy COUNT objects from FROM to TO.  The source and destination do
   418:    not overlap.  */
   419: # ifndef YYCOPY
   420: #  if 1 < __GNUC__
   421: #   define YYCOPY(To, From, Count) \
   422:       __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
   423: #  else
   424: #   define YYCOPY(To, From, Count)              \
   425:       do                                        \
   426:         {                                       \
   427:           register YYSIZE_T yyi;                \
   428:           for (yyi = 0; yyi < (Count); yyi++)   \
   429:             (To)[yyi] = (From)[yyi];            \
   430:         }                                       \
   431:       while (0)
   432: #  endif
   433: # endif
   434: 
   435: /* Relocate STACK from its old location to the new one.  The
   436:    local variables YYSIZE and YYSTACKSIZE give the old and new number of
   437:    elements in the stack, and YYPTR gives the new location of the
   438:    stack.  Advance YYPTR to a properly aligned location for the next
   439:    stack.  */
   440: # define YYSTACK_RELOCATE(Stack)                                        \
   441:     do                                                                  \
   442:       {                                                                 \
   443:         YYSIZE_T yynewbytes;                                            \
   444:         YYCOPY (&yyptr->Stack, Stack, yysize);                          \
   445:         Stack = &yyptr->Stack;                                          \
   446:         yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAX;   \
   447:         yyptr += yynewbytes / sizeof (*yyptr);                          \
   448:       }                                                                 \
   449:     while (0)
   450: 
   451: #endif
   452: 
   453: 
   454: #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
   455: # define YYSIZE_T __SIZE_TYPE__
   456: #endif
   457: #if ! defined (YYSIZE_T) && defined (size_t)
   458: # define YYSIZE_T size_t
   459: #endif
   460: #if ! defined (YYSIZE_T)
   461: # if defined (__STDC__) || defined (__cplusplus)
   462: #  include <stddef.h> /* INFRINGES ON USER NAME SPACE */
   463: #  define YYSIZE_T size_t
   464: # endif
   465: #endif
   466: #if ! defined (YYSIZE_T)
   467: # define YYSIZE_T unsigned int
   468: #endif
   469: 
   470: #define yyerrok         (yyerrstatus = 0)
   471: #define yyclearin       (yychar = YYEMPTY)
   472: #define YYEMPTY         -2
   473: #define YYEOF           0
   474: #define YYACCEPT        goto yyacceptlab
   475: #define YYABORT         goto yyabortlab
   476: #define YYERROR         goto yyerrlab1
   477: /* Like YYERROR except do call yyerror.  This remains here temporarily
   478:    to ease the transition to the new meaning of YYERROR, for GCC.
   479:    Once GCC version 2 has supplanted version 1, this can go.  */
   480: #define YYFAIL          goto yyerrlab
   481: #define YYRECOVERING()  (!!yyerrstatus)
   482: #define YYBACKUP(Token, Value)                                  \
   483: do                                                              \
   484:   if (yychar == YYEMPTY && yylen == 1)                          \
   485:     {                                                           \
   486:       yychar = (Token);                                         \
   487:       yylval = (Value);                                         \
   488:       yychar1 = YYTRANSLATE (yychar);                           \
   489:       YYPOPSTACK;                                               \
   490:       goto yybackup;                                            \
   491:     }                                                           \
   492:   else                                                          \
   493:     {                                                           \
   494:       yyerror ("syntax error: cannot back up");                 \
   495:       YYERROR;                                                  \
   496:     }                                                           \
   497: while (0)
   498: 
   499: #define YYTERROR        1
   500: #define YYERRCODE       256
   501: 
   502: 
   503: /* YYLLOC_DEFAULT -- Compute the default location (before the actions
   504:    are run).
   505: 
   506:    When YYLLOC_DEFAULT is run, CURRENT is set the location of the
   507:    first token.  By default, to implement support for ranges, extend
   508:    its range to the last symbol.  */
   509: 
   510: #ifndef YYLLOC_DEFAULT
   511: # define YYLLOC_DEFAULT(Current, Rhs, N)        \
   512:    Current.last_line   = Rhs[N].last_line;      \
   513:    Current.last_column = Rhs[N].last_column;
   514: #endif
   515: 
   516: 
   517: /* YYLEX -- calling `yylex' with the right arguments.  */
   518: 
   519: #if YYPURE
   520: # if YYLSP_NEEDED
   521: #  ifdef YYLEX_PARAM
   522: #   define YYLEX                yylex (&yylval, &yylloc, YYLEX_PARAM)
   523: #  else
   524: #   define YYLEX                yylex (&yylval, &yylloc)
   525: #  endif
   526: # else /* !YYLSP_NEEDED */
   527: #  ifdef YYLEX_PARAM
   528: #   define YYLEX                yylex (&yylval, YYLEX_PARAM)
   529: #  else
   530: #   define YYLEX                yylex (&yylval)
   531: #  endif
   532: # endif /* !YYLSP_NEEDED */
   533: #else /* !YYPURE */
   534: # define YYLEX                  yylex ()
   535: #endif /* !YYPURE */
   536: 
   537: 
   538: /* Enable debugging if requested.  */
   539: #if YYDEBUG
   540: 
   541: # ifndef YYFPRINTF
   542: #  include <stdio.h> /* INFRINGES ON USER NAME SPACE */
   543: #  define YYFPRINTF fprintf
   544: # endif
   545: 
   546: # define YYDPRINTF(Args)                        \
   547: do {                                            \
   548:   if (yydebug)                                  \
   549:     YYFPRINTF Args;                             \
   550: } while (0)
   551: /* Nonzero means print parse trace.  It is left uninitialized so that
   552:    multiple parsers can coexist.  */
   553: int yydebug;
   554: #else /* !YYDEBUG */
   555: # define YYDPRINTF(Args)
   556: #endif /* !YYDEBUG */
   557: 
   558: /* YYINITDEPTH -- initial size of the parser's stacks.  */
   559: #ifndef YYINITDEPTH
   560: # define YYINITDEPTH 200
   561: #endif
   562: 
   563: /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
   564:    if the built-in stack extension method is used).
   565: 
   566:    Do not make this value too large; the results are undefined if
   567:    SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
   568:    evaluated with infinite-precision integer arithmetic.  */
   569: 
   570: #if YYMAXDEPTH == 0
   571: # undef YYMAXDEPTH
   572: #endif
   573: 
   574: #ifndef YYMAXDEPTH
   575: # define YYMAXDEPTH 10000
   576: #endif
   577: 
   578: #ifdef YYERROR_VERBOSE
   579: 
   580: # ifndef yystrlen
   581: #  if defined (__GLIBC__) && defined (_STRING_H)
   582: #   define yystrlen strlen
   583: #  else
   584: /* Return the length of YYSTR.  */
   585: static YYSIZE_T
   586: #   if defined (__STDC__) || defined (__cplusplus)
   587: yystrlen (const char *yystr)
   588: #   else
   589: yystrlen (yystr)
   590:      const char *yystr;
   591: #   endif
   592: {
   593:   register const char *yys = yystr;
   594: 
   595:   while (*yys++ != '\0')
   596:     continue;
   597: 
   598:   return yys - yystr - 1;
   599: }
   600: #  endif
   601: # endif
   602: 
   603: # ifndef yystpcpy
   604: #  if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE)
   605: #   define yystpcpy stpcpy
   606: #  else
   607: /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
   608:    YYDEST.  */
   609: static char *
   610: #   if defined (__STDC__) || defined (__cplusplus)
   611: yystpcpy (char *yydest, const char *yysrc)
   612: #   else
   613: yystpcpy (yydest, yysrc)
   614:      char *yydest;
   615:      const char *yysrc;
   616: #   endif
   617: {
   618:   register char *yyd = yydest;
   619:   register const char *yys = yysrc;
   620: 
   621:   while ((*yyd++ = *yys++) != '\0')
   622:     continue;
   623: 
   624:   return yyd - 1;
   625: }
   626: #  endif
   627: # endif
   628: #endif
   629: 
   630: 
   631: /* The user can define YYPARSE_PARAM as the name of an argument to be passed
   632:    into yyparse.  The argument should have type void *.
   633:    It should actually point to an object.
   634:    Grammar actions can access the variable by casting it
   635:    to the proper pointer type.  */
   636: 
   637: #ifdef YYPARSE_PARAM
   638: # if defined (__STDC__) || defined (__cplusplus)
   639: #  define YYPARSE_PARAM_ARG void *YYPARSE_PARAM
   640: #  define YYPARSE_PARAM_DECL
   641: # else
   642: #  define YYPARSE_PARAM_ARG YYPARSE_PARAM
   643: #  define YYPARSE_PARAM_DECL void *YYPARSE_PARAM;
   644: # endif
   645: #else /* !YYPARSE_PARAM */
   646: # define YYPARSE_PARAM_ARG
   647: # define YYPARSE_PARAM_DECL
   648: #endif /* !YYPARSE_PARAM */
   649: 
   650: /* Prevent warning if -Wstrict-prototypes.  */
   651: #ifdef __GNUC__
   652: # ifdef YYPARSE_PARAM
   653: int yyparse (void *);
   654: # else
   655: int yyparse (void);
   656: # endif
   657: #endif
   658: 
   659: /* YY_DECL_VARIABLES -- depending whether we use a pure parser,
   660:    variables are global, or local to YYPARSE.  */
   661: 
   662: #define YY_DECL_NON_LSP_VARIABLES                       \
   663: /* The lookahead symbol.  */                            \
   664: int yychar;                                             \
   665: 
   666: /* The semantic value of the lookahead symbol. */       \
   667: YYSTYPE yylval;                                         \
   668: 
   669: /* Number of parse errors so far.  */                   \
   670: int yynerrs;
   671: 
   672: #if YYLSP_NEEDED
   673: # define YY_DECL_VARIABLES                      \
   674: YY_DECL_NON_LSP_VARIABLES                       \
   675: 
   676: /* Location data for the lookahead symbol.  */  \
   677: YYLTYPE yylloc;
   678: #else
   679: # define YY_DECL_VARIABLES                      \
   680: YY_DECL_NON_LSP_VARIABLES
   681: #endif
   682: 
   683: 
   684: /* If nonreentrant, generate the variables here. */
   685: 
   686: #if !YYPURE
   687: YY_DECL_VARIABLES
   688: #endif  /* !YYPURE */
   689: 
   690: int
   691: yyparse (YYPARSE_PARAM_ARG)
   692:      YYPARSE_PARAM_DECL
   693: {
   694:   /* If reentrant, generate the variables here. */
   695: #if YYPURE
   696:   YY_DECL_VARIABLES
   697: #endif  /* !YYPURE */
   698: 
   699:   register int yystate;
   700:   register int yyn;
   701:   int yyresult;
   702:   /* Number of tokens to shift before error messages enabled.  */
   703:   int yyerrstatus;
   704:   /* Lookahead token as an internal (translated) token number.  */
   705:   int yychar1 = 0;
   706: 
   707:   /* Three stacks and their tools:
   708:      `yyss': related to states,
   709:      `yyvs': related to semantic values,
   710:      `yyls': related to locations.
   711: 
   712:      Refer to the stacks thru separate pointers, to allow yyoverflow
   713:      to reallocate them elsewhere.  */
   714: 
   715:   /* The state stack. */
   716:   short yyssa[YYINITDEPTH];
   717:   short *yyss = yyssa;
   718:   register short *yyssp;
   719: 
   720:   /* The semantic value stack.  */
   721:   YYSTYPE yyvsa[YYINITDEPTH];
   722:   YYSTYPE *yyvs = yyvsa;
   723:   register YYSTYPE *yyvsp;
   724: 
   725: #if YYLSP_NEEDED
   726:   /* The location stack.  */
   727:   YYLTYPE yylsa[YYINITDEPTH];
   728:   YYLTYPE *yyls = yylsa;
   729:   YYLTYPE *yylsp;
   730: #endif
   731: 
   732: #if YYLSP_NEEDED
   733: # define YYPOPSTACK   (yyvsp--, yyssp--, yylsp--)
   734: #else
   735: # define YYPOPSTACK   (yyvsp--, yyssp--)
   736: #endif
   737: 
   738:   YYSIZE_T yystacksize = YYINITDEPTH;
   739: 
   740: 
   741:   /* The variables used to return semantic value and location from the
   742:      action routines.  */
   743:   YYSTYPE yyval;
   744: #if YYLSP_NEEDED
   745:   YYLTYPE yyloc;
   746: #endif
   747: 
   748:   /* When reducing, the number of symbols on the RHS of the reduced
   749:      rule. */
   750:   int yylen;
   751: 
   752:   YYDPRINTF ((stderr, "Starting parse\n"));
   753: 
   754:   yystate = 0;
   755:   yyerrstatus = 0;
   756:   yynerrs = 0;
   757:   yychar = YYEMPTY;             /* Cause a token to be read.  */
   758: 
   759:   /* Initialize stack pointers.
   760:      Waste one element of value and location stack
   761:      so that they stay on the same level as the state stack.
   762:      The wasted elements are never initialized.  */
   763: 
   764:   yyssp = yyss;
   765:   yyvsp = yyvs;
   766: #if YYLSP_NEEDED
   767:   yylsp = yyls;
   768: #endif
   769:   goto yysetstate;
   770: 
   771: /*------------------------------------------------------------.
   772: | yynewstate -- Push a new state, which is found in yystate.  |
   773: `------------------------------------------------------------*/
   774:  yynewstate:
   775:   /* In all cases, when you get here, the value and location stacks
   776:      have just been pushed. so pushing a state here evens the stacks.
   777:      */
   778:   yyssp++;
   779: 
   780:  yysetstate:
   781:   *yyssp = yystate;
   782: 
   783:   if (yyssp >= yyss + yystacksize - 1)
   784:     {
   785:       /* Get the current used size of the three stacks, in elements.  */
   786:       YYSIZE_T yysize = yyssp - yyss + 1;
   787: 
   788: #ifdef yyoverflow
   789:       {
   790:         /* Give user a chance to reallocate the stack. Use copies of
   791:            these so that the &'s don't force the real ones into
   792:            memory.  */
   793:         YYSTYPE *yyvs1 = yyvs;
   794:         short *yyss1 = yyss;
   795: 
   796:         /* Each stack pointer address is followed by the size of the
   797:            data in use in that stack, in bytes.  */
   798: # if YYLSP_NEEDED
   799:         YYLTYPE *yyls1 = yyls;
   800:         /* This used to be a conditional around just the two extra args,
   801:            but that might be undefined if yyoverflow is a macro.  */
   802:         yyoverflow ("parser stack overflow",
   803:                     &yyss1, yysize * sizeof (*yyssp),
   804:                     &yyvs1, yysize * sizeof (*yyvsp),
   805:                     &yyls1, yysize * sizeof (*yylsp),
   806:                     &yystacksize);
   807:         yyls = yyls1;
   808: # else
   809:         yyoverflow ("parser stack overflow",
   810:                     &yyss1, yysize * sizeof (*yyssp),
   811:                     &yyvs1, yysize * sizeof (*yyvsp),
   812:                     &yystacksize);
   813: # endif
   814:         yyss = yyss1;
   815:         yyvs = yyvs1;
   816:       }
   817: #else /* no yyoverflow */
   818: # ifndef YYSTACK_RELOCATE
   819:       goto yyoverflowlab;
   820: # else
   821:       /* Extend the stack our own way.  */
   822:       if (yystacksize >= YYMAXDEPTH)
   823:         goto yyoverflowlab;
   824:       yystacksize *= 2;
   825:       if (yystacksize > YYMAXDEPTH)
   826:         yystacksize = YYMAXDEPTH;
   827: 
   828:       {
   829:         short *yyss1 = yyss;
   830:         union yyalloc *yyptr =
   831:           (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
   832:         if (! yyptr)
   833:           goto yyoverflowlab;
   834:         YYSTACK_RELOCATE (yyss);
   835:         YYSTACK_RELOCATE (yyvs);
   836: # if YYLSP_NEEDED
   837:         YYSTACK_RELOCATE (yyls);
   838: # endif
   839: # undef YYSTACK_RELOCATE
   840:         if (yyss1 != yyssa)
   841:           YYSTACK_FREE (yyss1);
   842:       }
   843: # endif
   844: #endif /* no yyoverflow */
   845: 
   846:       yyssp = yyss + yysize - 1;
   847:       yyvsp = yyvs + yysize - 1;
   848: #if YYLSP_NEEDED
   849:       yylsp = yyls + yysize - 1;
   850: #endif
   851: 
   852:       YYDPRINTF ((stderr, "Stack size increased to %lu\n",
   853:                   (unsigned long int) yystacksize));
   854: 
   855:       if (yyssp >= yyss + yystacksize - 1)
   856:         YYABORT;
   857:     }
   858: 
   859:   YYDPRINTF ((stderr, "Entering state %d\n", yystate));
   860: 
   861:   goto yybackup;
   862: 
   863: 
   864: /*-----------.
   865: | yybackup.  |
   866: `-----------*/
   867: yybackup:
   868: 
   869: /* Do appropriate processing given the current state.  */
   870: /* Read a lookahead token if we need one and don't already have one.  */
   871: /* yyresume: */
   872: 
   873:   /* First try to decide what to do without reference to lookahead token.  */
   874: 
   875:   yyn = yypact[yystate];
   876:   if (yyn == YYFLAG)
   877:     goto yydefault;
   878: 
   879:   /* Not known => get a lookahead token if don't already have one.  */
   880: 
   881:   /* yychar is either YYEMPTY or YYEOF
   882:      or a valid token in external form.  */
   883: 
   884:   if (yychar == YYEMPTY)
   885:     {
   886:       YYDPRINTF ((stderr, "Reading a token: "));
   887:       yychar = YYLEX;
   888:     }
   889: 
   890:   /* Convert token to internal form (in yychar1) for indexing tables with */
   891: 
   892:   if (yychar <= 0)              /* This means end of input. */
   893:     {
   894:       yychar1 = 0;
   895:       yychar = YYEOF;           /* Don't call YYLEX any more */
   896: 
   897:       YYDPRINTF ((stderr, "Now at end of input.\n"));
   898:     }
   899:   else
   900:     {
   901:       yychar1 = YYTRANSLATE (yychar);
   902: 
   903: #if YYDEBUG
   904:      /* We have to keep this `#if YYDEBUG', since we use variables
   905:         which are defined only if `YYDEBUG' is set.  */
   906:       if (yydebug)
   907:         {
   908:           YYFPRINTF (stderr, "Next token is %d (%s",
   909:                      yychar, yytname[yychar1]);
   910:           /* Give the individual parser a way to print the precise
   911:              meaning of a token, for further debugging info.  */
   912: # ifdef YYPRINT
   913:           YYPRINT (stderr, yychar, yylval);
   914: # endif
   915:           YYFPRINTF (stderr, ")\n");
   916:         }
   917: #endif
   918:     }
   919: 
   920:   yyn += yychar1;
   921:   if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
   922:     goto yydefault;
   923: 
   924:   yyn = yytable[yyn];
   925: 
   926:   /* yyn is what to do for this token type in this state.
   927:      Negative => reduce, -yyn is rule number.
   928:      Positive => shift, yyn is new state.
   929:        New state is final state => don't bother to shift,
   930:        just return success.
   931:      0, or most negative number => error.  */
   932: 
   933:   if (yyn < 0)
   934:     {
   935:       if (yyn == YYFLAG)
   936:         goto yyerrlab;
   937:       yyn = -yyn;
   938:       goto yyreduce;
   939:     }
   940:   else if (yyn == 0)
   941:     goto yyerrlab;
   942: 
   943:   if (yyn == YYFINAL)
   944:     YYACCEPT;
   945: 
   946:   /* Shift the lookahead token.  */
   947:   YYDPRINTF ((stderr, "Shifting token %d (%s), ",
   948:               yychar, yytname[yychar1]));
   949: 
   950:   /* Discard the token being shifted unless it is eof.  */
   951:   if (yychar != YYEOF)
   952:     yychar = YYEMPTY;
   953: 
   954:   *++yyvsp = yylval;
   955: #if YYLSP_NEEDED
   956:   *++yylsp = yylloc;
   957: #endif
   958: 
   959:   /* Count tokens shifted since error; after three, turn off error
   960:      status.  */
   961:   if (yyerrstatus)
   962:     yyerrstatus--;
   963: 
   964:   yystate = yyn;
   965:   goto yynewstate;
   966: 
   967: 
   968: /*-----------------------------------------------------------.
   969: | yydefault -- do the default action for the current state.  |
   970: `-----------------------------------------------------------*/
   971: yydefault:
   972:   yyn = yydefact[yystate];
   973:   if (yyn == 0)
   974:     goto yyerrlab;
   975:   goto yyreduce;
   976: 
   977: 
   978: /*-----------------------------.
   979: | yyreduce -- Do a reduction.  |
   980: `-----------------------------*/
   981: yyreduce:
   982:   /* yyn is the number of a rule to reduce with.  */
   983:   yylen = yyr2[yyn];
   984: 
   985:   /* If YYLEN is nonzero, implement the default value of the action:
   986:      `$$ = $1'.
   987: 
   988:      Otherwise, the following line sets YYVAL to the semantic value of
   989:      the lookahead token.  This behavior is undocumented and Bison
   990:      users should not rely upon it.  Assigning to YYVAL
   991:      unconditionally makes the parser a bit smaller, and it avoids a
   992:      GCC warning that YYVAL may be used uninitialized.  */
   993:   yyval = yyvsp[1-yylen];
   994: 
   995: #if YYLSP_NEEDED
   996:   /* Similarly for the default location.  Let the user run additional
   997:      commands if for instance locations are ranges.  */
   998:   yyloc = yylsp[1-yylen];
   999:   YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
  1000: #endif
  1001: 
  1002: #if YYDEBUG
  1003:   /* We have to keep this `#if YYDEBUG', since we use variables which
  1004:      are defined only if `YYDEBUG' is set.  */
  1005:   if (yydebug)
  1006:     {
  1007:       int yyi;
  1008: 
  1009:       YYFPRINTF (stderr, "Reducing via rule %d (line %d), ",
  1010:                  yyn, yyrline[yyn]);
  1011: 
  1012:       /* Print the symbols being reduced, and their result.  */
  1013:       for (yyi = yyprhs[yyn]; yyrhs[yyi] > 0; yyi++)
  1014:         YYFPRINTF (stderr, "%s ", yytname[yyrhs[yyi]]);
  1015:       YYFPRINTF (stderr, " -> %s\n", yytname[yyr1[yyn]]);
  1016:     }
  1017: #endif
  1018: 
  1019:   switch (yyn) {
  1020: 
  1021: case 1:
  1022: { ((ParseParams*)parseParam)->treeTop = new GrammarAST(yyvsp[0].topFormList); yyval.num=0; ;
  1023:     break;}
  1024: case 2:
  1025: { yyval.topFormList = new ASTList<TopForm>; ;
  1026:     break;}
  1027: case 3:
  1028: { (yyval.topFormList=yyvsp[-1].topFormList)->append(yyvsp[0].topForm); ;
  1029:     break;}
  1030: case 4:
  1031: { yyval.topForm = yyvsp[0].topForm; ;
  1032:     break;}
  1033: case 5:
  1034: { yyval.topForm = yyvsp[0].topForm; ;
  1035:     break;}
  1036: case 6:
  1037: { yyval.topForm = yyvsp[0].topForm; ;
  1038:     break;}
  1039: case 7:
  1040: { yyval.topForm = yyvsp[0].topForm; ;
  1041:     break;}
  1042: case 8:
  1043: { yyval.topForm = yyvsp[0].topForm; ;
  1044:     break;}
  1045: case 9:
  1046: { yyval.topForm = new TF_context(yyvsp[-1].str); ;
  1047:     break;}
  1048: case 10:
  1049: { yyval.topForm = new TF_verbatim(false, yyvsp[0].str); ;
  1050:     break;}
  1051: case 11:
  1052: { yyval.topForm = new TF_verbatim(true, yyvsp[0].str); ;
  1053:     break;}
  1054: case 12:
  1055: { yyval.topForm = new TF_option(yyvsp[-1].str, 1); ;
  1056:     break;}
  1057: case 13:
  1058: { yyval.topForm = new TF_option(yyvsp[-2].str, yyvsp[-1].num); ;
  1059:     break;}
  1060: case 14:
  1061: { yyval.topForm = new TF_terminals(yyvsp[-3].termDecls, yyvsp[-2].termTypes, yyvsp[-1].precSpecs); ;
  1062:     break;}
  1063: case 15:
  1064: { yyval.termDecls = new ASTList<TermDecl>; ;
  1065:     break;}
  1066: case 16:
  1067: { (yyval.termDecls=yyvsp[-1].termDecls)->append(yyvsp[0].termDecl); ;
  1068:     break;}
  1069: case 17:
  1070: { yyval.termDecl = new TermDecl(yyvsp[-3].num, yyvsp[-1].str, sameloc(yyvsp[-1].str, "")); ;
  1071:     break;}
  1072: case 18:
  1073: { yyval.termDecl = new TermDecl(yyvsp[-4].num, yyvsp[-2].str, yyvsp[-1].str); ;
  1074:     break;}
  1075: case 19:
  1076: { yyval.str = yyvsp[0].str; ;
  1077:     break;}
  1078: case 20:
  1079: { yyval.str = nolocNULL(); ;
  1080:     break;}
  1081: case 21:
  1082: { yyval.termTypes = new ASTList<TermType>; ;
  1083:     break;}
  1084: case 22:
  1085: { (yyval.termTypes=yyvsp[-1].termTypes)->append(yyvsp[0].termType); ;
  1086:     break;}
  1087: case 23:
  1088: { yyval.termType = new TermType(yyvsp[-1].str, yyvsp[-2].str, new ASTList<SpecFunc>); ;
  1089:     break;}
  1090: case 24:
  1091: { yyval.termType = new TermType(yyvsp[-3].str, yyvsp[-4].str, yyvsp[-1].specFuncs); ;
  1092:     break;}
  1093: case 25:
  1094: { yyval.precSpecs = new ASTList<PrecSpec>; ;
  1095:     break;}
  1096: case 26:
  1097: { yyval.precSpecs = yyvsp[-1].precSpecs; ;
  1098:     break;}
  1099: case 27:
  1100: { yyval.precSpecs = new ASTList<PrecSpec>; ;
  1101:     break;}
  1102: case 28:
  1103: { (yyval.precSpecs=yyvsp[-4].precSpecs)->append(new PrecSpec(whichKind(yyvsp[-3].str), yyvsp[-2].num, yyvsp[-1].sm_stringList)); ;
  1104:     break;}
  1105: case 29:
  1106: { yyval.sm_stringList = new ASTList<LocString>; ;
  1107:     break;}
  1108: case 30:
  1109: { (yyval.sm_stringList=yyvsp[-1].sm_stringList)->append(yyvsp[0].str); ;
  1110:     break;}
  1111: case 31:
  1112: { yyval.str = yyvsp[0].str; ;
  1113:     break;}
  1114: case 32:
  1115: { yyval.str = yyvsp[0].str; ;
  1116:     break;}
  1117: case 33:
  1118: { yyval.specFuncs = new ASTList<SpecFunc>; ;
  1119:     break;}
  1120: case 34:
  1121: { (yyval.specFuncs=yyvsp[-1].specFuncs)->append(yyvsp[0].specFunc); ;
  1122:     break;}
  1123: case 35:
  1124: { yyval.specFunc = new SpecFunc(yyvsp[-4].str, yyvsp[-2].sm_stringList, yyvsp[0].str); ;
  1125:     break;}
  1126: case 36:
  1127: { yyval.sm_stringList = new ASTList<LocString>; ;
  1128:     break;}
  1129: case 37:
  1130: { yyval.sm_stringList = yyvsp[0].sm_stringList; ;
  1131:     break;}
  1132: case 38:
  1133: { yyval.sm_stringList = new ASTList<LocString>(yyvsp[0].str); ;
  1134:     break;}
  1135: case 39:
  1136: { (yyval.sm_stringList=yyvsp[-2].sm_stringList)->append(yyvsp[0].str); ;
  1137:     break;}
  1138: case 40:
  1139: { yyval.topForm = new TF_nonterm(yyvsp[-1].str, yyvsp[-2].str, new ASTList<SpecFunc>,
  1140:                                      new ASTList<ProdDecl>(yyvsp[0].prodDecl), NULL); ;
  1141:     break;}
  1142: case 41:
  1143: { yyval.topForm = new TF_nonterm(yyvsp[-5].str, yyvsp[-6].str, yyvsp[-3].specFuncs, yyvsp[-2].prodDecls, yyvsp[-1].sm_stringList); ;
  1144:     break;}
  1145: case 42:
  1146: { yyval.prodDecls = new ASTList<ProdDecl>; ;
  1147:     break;}
  1148: case 43:
  1149: { (yyval.prodDecls=yyvsp[-1].prodDecls)->append(yyvsp[0].prodDecl); ;
  1150:     break;}
  1151: case 44:
  1152: { yyval.prodDecl = new ProdDecl(yyvsp[-1].rhsList, yyvsp[0].str); ;
  1153:     break;}
  1154: case 45:
  1155: { yyval.str = yyvsp[0].str; ;
  1156:     break;}
  1157: case 46:
  1158: { yyval.str = nolocNULL(); ;
  1159:     break;}
  1160: case 47:
  1161: { yyval.rhsList = new ASTList<RHSElt>; ;
  1162:     break;}
  1163: case 48:
  1164: { (yyval.rhsList=yyvsp[-1].rhsList)->append(yyvsp[0].rhsElt); ;
  1165:     break;}
  1166: case 49:
  1167: { yyval.rhsElt = new RH_name(sameloc(yyvsp[0].str, ""), yyvsp[0].str); ;
  1168:     break;}
  1169: case 50:
  1170: { yyval.rhsElt = new RH_name(yyvsp[-2].str, yyvsp[0].str); ;
  1171:     break;}
  1172: case 51:
  1173: { yyval.rhsElt = new RH_sm_string(sameloc(yyvsp[0].str, ""), yyvsp[0].str); ;
  1174:     break;}
  1175: case 52:
  1176: { yyval.rhsElt = new RH_sm_string(yyvsp[-2].str, yyvsp[0].str); ;
  1177:     break;}
  1178: case 53:
  1179: { yyval.rhsElt = new RH_prec(yyvsp[-1].str); ;
  1180:     break;}
  1181: case 54:
  1182: { yyval.sm_stringList = NULL; ;
  1183:     break;}
  1184: case 55:
  1185: { yyval.sm_stringList = yyvsp[-1].sm_stringList; ;
  1186:     break;}
  1187: }
  1188: 
  1189: 
  1190:   yyvsp -= yylen;
  1191:   yyssp -= yylen;
  1192: #if YYLSP_NEEDED
  1193:   yylsp -= yylen;
  1194: #endif
  1195: 
  1196: #if YYDEBUG
  1197:   if (yydebug)
  1198:     {
  1199:       short *yyssp1 = yyss - 1;
  1200:       YYFPRINTF (stderr, "state stack now");
  1201:       while (yyssp1 != yyssp)
  1202:         YYFPRINTF (stderr, " %d", *++yyssp1);
  1203:       YYFPRINTF (stderr, "\n");
  1204:     }
  1205: #endif
  1206: 
  1207:   *++yyvsp = yyval;
  1208: #if YYLSP_NEEDED
  1209:   *++yylsp = yyloc;
  1210: #endif
  1211: 
  1212:   /* Now `shift' the result of the reduction.  Determine what state
  1213:      that goes to, based on the state we popped back to and the rule
  1214:      number reduced by.  */
  1215: 
  1216:   yyn = yyr1[yyn];
  1217: 
  1218:   yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
  1219:   if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
  1220:     yystate = yytable[yystate];
  1221:   else
  1222:     yystate = yydefgoto[yyn - YYNTBASE];
  1223: 
  1224:   goto yynewstate;
  1225: 
  1226: 
  1227: /*------------------------------------.
  1228: | yyerrlab -- here on detecting error |
  1229: `------------------------------------*/
  1230: yyerrlab:
  1231:   /* If not already recovering from an error, report this error.  */
  1232:   if (!yyerrstatus)
  1233:     {
  1234:       ++yynerrs;
  1235: 
  1236: #ifdef YYERROR_VERBOSE
  1237:       yyn = yypact[yystate];
  1238: 
  1239:       if (yyn > YYFLAG && yyn < YYLAST)
  1240:         {
  1241:           YYSIZE_T yysize = 0;
  1242:           char *yymsg;
  1243:           int yyx, yycount;
  1244: 
  1245:           yycount = 0;
  1246:           /* Start YYX at -YYN if negative to avoid negative indexes in
  1247:              YYCHECK.  */
  1248:           for (yyx = yyn < 0 ? -yyn : 0;
  1249:                yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++)
  1250:             if (yycheck[yyx + yyn] == yyx)
  1251:               yysize += yystrlen (yytname[yyx]) + 15, yycount++;
  1252:           yysize += yystrlen ("parse error, unexpected ") + 1;
  1253:           yysize += yystrlen (yytname[YYTRANSLATE (yychar)]);
  1254:           yymsg = (char *) YYSTACK_ALLOC (yysize);
  1255:           if (yymsg != 0)
  1256:             {
  1257:               char *yyp = yystpcpy (yymsg, "parse error, unexpected ");
  1258:               yyp = yystpcpy (yyp, yytname[YYTRANSLATE (yychar)]);
  1259: 
  1260:               if (yycount < 5)
  1261:                 {
  1262:                   yycount = 0;
  1263:                   for (yyx = yyn < 0 ? -yyn : 0;
  1264:                        yyx < (int) (sizeof (yytname) / sizeof (char *));
  1265:                        yyx++)
  1266:                     if (yycheck[yyx + yyn] == yyx)
  1267:                       {
  1268:                         const char *yyq = ! yycount ? ", expecting " : " or ";
  1269:                         yyp = yystpcpy (yyp, yyq);
  1270:                         yyp = yystpcpy (yyp, yytname[yyx]);
  1271:                         yycount++;
  1272:                       }
  1273:                 }
  1274:               yyerror (yymsg);
  1275:               YYSTACK_FREE (yymsg);
  1276:             }
  1277:           else
  1278:             yyerror ("parse error; also virtual memory exhausted");
  1279:         }
  1280:       else
  1281: #endif /* defined (YYERROR_VERBOSE) */
  1282:         yyerror ("parse error");
  1283:     }
  1284:   goto yyerrlab1;
  1285: 
  1286: 
  1287: /*--------------------------------------------------.
  1288: | yyerrlab1 -- error raised explicitly by an action |
  1289: `--------------------------------------------------*/
  1290: yyerrlab1:
  1291:   if (yyerrstatus == 3)
  1292:     {
  1293:       /* If just tried and failed to reuse lookahead token after an
  1294:          error, discard it.  */
  1295: 
  1296:       /* return failure if at end of input */
  1297:       if (yychar == YYEOF)
  1298:         YYABORT;
  1299:       YYDPRINTF ((stderr, "Discarding token %d (%s).\n",
  1300:                   yychar, yytname[yychar1]));
  1301:       yychar = YYEMPTY;
  1302:     }
  1303: 
  1304:   /* Else will try to reuse lookahead token after shifting the error
  1305:      token.  */
  1306: 
  1307:   yyerrstatus = 3;              /* Each real token shifted decrements this */
  1308: 
  1309:   goto yyerrhandle;
  1310: 
  1311: 
  1312: /*-------------------------------------------------------------------.
  1313: | yyerrdefault -- current state does not do anything special for the |
  1314: | error token.                                                       |
  1315: `-------------------------------------------------------------------*/
  1316: yyerrdefault:
  1317: #if 0
  1318:   /* This is wrong; only states that explicitly want error tokens
  1319:      should shift them.  */
  1320: 
  1321:   /* If its default is to accept any token, ok.  Otherwise pop it.  */
  1322:   yyn = yydefact[yystate];
  1323:   if (yyn)
  1324:     goto yydefault;
  1325: #endif
  1326: 
  1327: 
  1328: /*---------------------------------------------------------------.
  1329: | yyerrpop -- pop the current state because it cannot handle the |
  1330: | error token                                                    |
  1331: `---------------------------------------------------------------*/
  1332: yyerrpop:
  1333:   if (yyssp == yyss)
  1334:     YYABORT;
  1335:   yyvsp--;
  1336:   yystate = *--yyssp;
  1337: #if YYLSP_NEEDED
  1338:   yylsp--;
  1339: #endif
  1340: 
  1341: #if YYDEBUG
  1342:   if (yydebug)
  1343:     {
  1344:       short *yyssp1 = yyss - 1;
  1345:       YYFPRINTF (stderr, "Error: state stack now");
  1346:       while (yyssp1 != yyssp)
  1347:         YYFPRINTF (stderr, " %d", *++yyssp1);
  1348:       YYFPRINTF (stderr, "\n");
  1349:     }
  1350: #endif
  1351: 
  1352: /*--------------.
  1353: | yyerrhandle.  |
  1354: `--------------*/
  1355: yyerrhandle:
  1356:   yyn = yypact[yystate];
  1357:   if (yyn == YYFLAG)
  1358:     goto yyerrdefault;
  1359: 
  1360:   yyn += YYTERROR;
  1361:   if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
  1362:     goto yyerrdefault;
  1363: 
  1364:   yyn = yytable[yyn];
  1365:   if (yyn < 0)
  1366:     {
  1367:       if (yyn == YYFLAG)
  1368:         goto yyerrpop;
  1369:       yyn = -yyn;
  1370:       goto yyreduce;
  1371:     }
  1372:   else if (yyn == 0)
  1373:     goto yyerrpop;
  1374: 
  1375:   if (yyn == YYFINAL)
  1376:     YYACCEPT;
  1377: 
  1378:   YYDPRINTF ((stderr, "Shifting error token, "));
  1379: 
  1380:   *++yyvsp = yylval;
  1381: #if YYLSP_NEEDED
  1382:   *++yylsp = yylloc;
  1383: #endif
  1384: 
  1385:   yystate = yyn;
  1386:   goto yynewstate;
  1387: 
  1388: 
  1389: /*-------------------------------------.
  1390: | yyacceptlab -- YYACCEPT comes here.  |
  1391: `-------------------------------------*/
  1392: yyacceptlab:
  1393:   yyresult = 0;
  1394:   goto yyreturn;
  1395: 
  1396: /*-----------------------------------.
  1397: | yyabortlab -- YYABORT comes here.  |
  1398: `-----------------------------------*/
  1399: yyabortlab:
  1400:   yyresult = 1;
  1401:   goto yyreturn;
  1402: 
  1403: /*---------------------------------------------.
  1404: | yyoverflowab -- parser overflow comes here.  |
  1405: `---------------------------------------------*/
  1406: yyoverflowlab:
  1407:   yyerror ("parser stack overflow");
  1408:   yyresult = 2;
  1409:   /* Fall through.  */
  1410: 
  1411: yyreturn:
  1412: #ifndef yyoverflow
  1413:   if (yyss != yyssa)
  1414:     YYSTACK_FREE (yyss);
  1415: #endif
  1416:   return yyresult;
  1417: }
  1418: 
  1419: /* ------------------ extra C code ------------------ */
  1420: AssocKind whichKind(LocString * /*owner*/ kind)
  1421: {
  1422:   // delete 'kind' however we exit
  1423:   Owner<LocString> killer(kind);
  1424: 
  1425:   #define CHECK(syntax, value)   \
  1426:     if (kind->equals(syntax)) {  \
  1427:       return value;              \
  1428:     }
  1429:   CHECK("left", AK_LEFT);
  1430:   CHECK("right", AK_RIGHT);
  1431:   CHECK("nonassoc", AK_NONASSOC);
  1432:   CHECK("prec", AK_NEVERASSOC);
  1433:   CHECK("assoc_split", AK_SPLIT);
  1434:   #undef CHECK
  1435: 
  1436:   xbase(sm_stringc << kind->locString()
  1437:                 << ": invalid associativity kind: " << *kind);
  1438: }
End cpp section to elk/elk_grampar.tab.cpp[1]
Start cpp section to elk/elk_mlsstr.cpp[1 /1 ]
     1: #line 21043 "./lpsrc/elk.pak"
     2: // mlsstr.cc            see license.txt for copyright and terms of use
     3: // code for mlsstr.h
     4: // based on ccsstr.cc
     5: 
     6: #include "elk_mlsstr.h"
     7: #include "sm_xassert.h"
     8: #include "sm_exc.h"
     9: #include "sm_strutil.h"
    10: 
    11: #include <iostream>    // std::cout
    12: #include <ctype.h>       // isspace
    13: 
    14: 
    15: MLSubstrate::MLSubstrate(ReportError *err)
    16:   : EmbeddedLang(err)
    17: {
    18:   reset();
    19: }
    20: 
    21: void MLSubstrate::reset(int initNest)
    22: {
    23:   state = ST_NORMAL;
    24:   nesting = initNest;
    25:   comNesting = 0;
    26:   prev = 0;
    27:   text.setlength(0);
    28: }
    29: 
    30: 
    31: MLSubstrate::~MLSubstrate()
    32: {}
    33: 
    34: 
    35: void MLSubstrate::handle(char const *str, int len, char finalDelim)
    36: {
    37:   text.append(str, len);
    38: 
    39:   for (; len>0; len--,str++) {
    40:     switch (state) {
    41:       case ST_NORMAL:
    42:         switch (*str) {
    43:           case '{':
    44:           case '(':
    45:           case '[':
    46:             nesting++;
    47:             break;
    48: 
    49:           case '}':
    50:           case ')':
    51:           case ']':
    52:             if (nesting == 0) {
    53:               err->reportError(sm_stringc
    54:                 << "unexpected closing delimiter `" << *str
    55:                 << "' -- probably due to missing `" << finalDelim << "'");
    56:             }
    57:             else {
    58:               nesting--;
    59:             }
    60:             break;
    61: 
    62:           case '\"':
    63:             state = ST_STRING;
    64:             break;
    65: 
    66:           case '\'':
    67:             state = ST_CHAR;
    68:             break;
    69: 
    70:           case '*':
    71:             if (prev == '(') {
    72:               state = ST_COMMENT;
    73:               xassert(comNesting == 0);
    74:               xassert(nesting > 0);
    75:               nesting--;     // undo 'nesting++' from the '('
    76: 
    77:               // if the next char is ')', i.e. input was "(*)", do
    78:               // not allow it to use this '*' to finish the comment
    79:               prev = 0;
    80:               continue;
    81:             }
    82:             break;
    83:         }
    84:         break;
    85: 
    86:       case ST_STRING:
    87:       case ST_CHAR:
    88:         if (prev != '\\') {
    89:           if ((state == ST_STRING && *str == '\"') ||
    90:               (state == ST_CHAR && *str == '\'')) {
    91:             state = ST_NORMAL;
    92:           }
    93:           else if (*str == '\n') {
    94:             err->reportError("unterminated sm_string or char literal");
    95:           }
    96:         }
    97:         break;
    98: 
    99:       case ST_COMMENT:
   100:         if (prev == '(' && *str == '*') {
   101:           comNesting++;
   102:           prev = 0;      // like above
   103:           continue;
   104:         }
   105:         else if (prev == '*' && *str == ')') {
   106:           xassert(comNesting >= 0);
   107:           if (comNesting == 0) {
   108:             // done with comment
   109:             state = ST_NORMAL;
   110:           }
   111:           else {
   112:             // decrease nesting
   113:             comNesting--;
   114:           }
   115:         }
   116:         break;
   117: 
   118:       default:
   119:         xfailure("unknown state");
   120:     }
   121: 
   122:     prev = *str;
   123:   }
   124: }
   125: 
   126: 
   127: bool MLSubstrate::zeroNesting() const
   128: {
   129:   return state == ST_NORMAL && nesting == 0;
   130: }
   131: 
   132: 
   133: sm_string MLSubstrate::getFuncBody() const
   134: {
   135:   return text;
   136: }
   137: 
   138: 
   139: // 4/29/04: I have no idea if this is right or not.. this is the
   140: // definition from ccsstr.cc.
   141: sm_string MLSubstrate::getDeclName() const
   142: {
   143:   // go with the rather inelegant heuristic that the word
   144:   // just before the first '(' is the function's name
   145:   char const *start = text.pcharc();
   146:   char const *p = start;
   147: 
   148:   // find first '('
   149:   while (*p && *p!='(') { p++; }
   150:   if (!*p) {
   151:     xformat("missing '('");
   152:   }
   153:   if (p == start) {
   154:     xformat("missing name");
   155:   }
   156: 
   157:   // skip backward past any whitespace before the '('
   158:   p--;
   159:   while (p>=start && isspace(*p)) { p--; }
   160:   if (p<start) {
   161:     xformat("missing name");
   162:   }
   163:   char const *nameEnd = p+1;    // char just past last
   164: 
   165:   // move backward through the name
   166:   while (p>=start &&
   167:          (isalnum(*p) || *p=='_'))
   168:     { p--; }
   169:   p++;    // move back to most recent legal char
   170: 
   171:   // done
   172:   return sm_string(p, nameEnd-p);
   173: }
   174: 
   175: 
   176: // ------------------ test code -------------------
   177: #ifdef TEST_MLSSTR
   178: 
   179: #define ML MLSubstrate
   180: #define Test MLSubstrateTest
   181: 
   182: // test code is put into a class just so that MLSubstrate
   183: // can grant it access to private fields
   184: class Test {
   185: public:
   186:   void feed(ML &ml, char const *src);
   187:   void test(char const *src, ML::State state, int nesting,
   188:             int comNesting, char prev);
   189:   void normal(char const *src, int nesting);
   190:   void str(char const *src, int nesting, bool bs);
   191:   void yes(char const *src);
   192:   void no(char const *src);
   193:   void name(char const *body, char const *n);
   194:   void badname(char const *body);
   195:   int main();
   196: };
   197: 
   198: 
   199: #define min(a,b) ((a)<(b)?(a):(b))
   200: 
   201: void Test::feed(ML &ml, char const *src)
   202: {
   203:   std::cout << "trying: " << src << std::endl;
   204:   while (*src) {
   205:     // feed it in 10 char increments, to test split processing too
   206:     int len = min(strlen(src), 10);
   207:     ml.handle(src, len, '}');
   208:     src += len;
   209:   }
   210: }
   211: 
   212: 
   213: void Test::test(char const *src, ML::State state, int nesting,
   214:                 int comNesting, char prev)
   215: {
   216:   ML ml;
   217:   feed(ml, src);
   218: 
   219:   if (!( ml.state == state &&
   220:          ml.nesting == nesting &&
   221:          ml.prev == prev )) {
   222:     xfailure(sm_stringc << "failed on src: " << src);
   223:   }
   224: }
   225: 
   226: 
   227: void Test::normal(char const *src, int nesting)
   228: {
   229:   test(src, ML::ST_NORMAL, nesting, 0, src[strlen(src)-1]);
   230: }
   231: 
   232: void Test::str(char const *src, int nesting, bool bs)
   233: {
   234:   char prev = (bs? '\\' : src[strlen(src)-1]);
   235:   test(src, ML::ST_STRING, nesting, 0, prev);
   236: 
   237:   // repeat the test with single-tick
   238:   sm_string another = replace(src, "\"", "\'");
   239:   test(another, ML::ST_CHAR, nesting, 0, prev);
   240: }
   241: 
   242: 
   243: void Test::yes(char const *src)
   244: {
   245:   ML ml;
   246:   feed(ml, src);
   247: 
   248:   xassert(ml.zeroNesting());
   249: }
   250: 
   251: void Test::no(char const *src)
   252: {
   253:   ML ml;
   254:   feed(ml, src);
   255: 
   256:   xassert(!ml.zeroNesting());
   257: }
   258: 
   259: void Test::name(char const *body, char const *n)
   260: {
   261:   ML ml;
   262:   feed(ml, body);
   263:   xassert(ml.getDeclName().equals(n));
   264: }
   265: 
   266: void Test::badname(char const *body)
   267: {
   268:   ML ml;
   269:   feed(ml, body);
   270:   try {
   271:     ml.getDeclName();
   272:     xfailure("got a name when it shoudn't have!");
   273:   }
   274:   catch (...)
   275:     {}
   276: }
   277: 
   278: 
   279: int Test::main()
   280: {
   281:   normal("int main()", 0);
   282:   normal("int main() { hi", 1);
   283:   normal("int main() { hi {", 2);
   284:   normal("int main() { hi { foo[5", 3);
   285:   normal("int main() { hi { foo[5] and ", 2);
   286:   normal("int main() { hi { foo[5] and } bar ", 1);
   287:   normal("int main() { hi { foo[5] and } bar } baz ", 0);
   288: 
   289:   normal("main() { printf(\"hello \\ world\"); ret", 1);
   290: 
   291:   normal("()[]{}([{}])", 0);
   292:   normal("{ ()[]{}([{}]) } ", 0);
   293:   normal("( ()[]{}([{}]) )", 0);
   294:   normal("[ ()[]{}([{}]) ]", 0);
   295:   normal("\"foo\" ()[]{}([{}])", 0);
   296: 
   297:   str("main() { printf(\"hello", 2, false);
   298:   str("main() { printf(\"hello \\", 2, true);
   299:   str("main() { printf(\"hello \\ world", 2, false);
   300:   str("main() { printf(\"hello \\ world\", \"hi", 2, false);
   301: 
   302:   test("\"a\" 'b' (", ML::ST_NORMAL, 1, 0, '(');
   303: 
   304:   // test comments, particularly testing
   305:   test("(", ML::ST_NORMAL, 1, 0, '(');
   306:   test("(*", ML::ST_COMMENT, 0, 0, 0);
   307:   test("(*)", ML::ST_COMMENT, 0, 0, ')');
   308:   test("(*)(", ML::ST_COMMENT, 0, 0, '(');
   309:   test("(*)(*", ML::ST_COMMENT, 0, 1, 0);
   310:   test("(*)(*)", ML::ST_COMMENT, 0, 1, ')');
   311:   test("(*)(*)*", ML::ST_COMMENT, 0, 1, '*');
   312:   test("(*)(*)*)", ML::ST_COMMENT, 0, 0, ')');
   313:   test("(*)(*)*)*", ML::ST_COMMENT, 0, 0, '*');
   314:   test("(*)(*)*)*)", ML::ST_NORMAL, 0, 0, ')');
   315: 
   316:   test("(*(*(*(*", ML::ST_COMMENT, 0, 4, 0);
   317: 
   318:   yes("main() {}");
   319:   yes("main() { printf(\"foo\", 3, 4 (*yep{*)); }");
   320:   yes("some (* junk {\n more*)");
   321:   yes("'\\''");
   322:   yes("\"\\\"\"");
   323:   yes("[][][][][]");
   324:   yes("\"[[[\"");
   325:   yes("*");
   326:   yes("(* [ / * [ *)");
   327: 
   328:   no("\"");
   329:   no("(");
   330:   no(" ( (* ) *) ");
   331: 
   332:   name("int main()", "main");
   333:   name("int eval(Environment &env)", "eval");
   334:   name("man()", "man");
   335:   badname("(");
   336:   badname("  (");
   337:   badname("  ");
   338:   badname("");
   339:   badname(")");
   340:   badname("main");
   341: 
   342:   std::cout << "\nmlsstr: all tests PASSED\n";
   343: 
   344:   return 0;
   345: }
   346: 
   347: int main()
   348: {
   349:   Test t;
   350:   return t.main();
   351: }
   352: 
   353: #endif // TEST_MLSSTR
End cpp section to elk/elk_mlsstr.cpp[1]
Start cpp section to elk/elk_parsetables.cpp[1 /1 ]
     1: #line 21397 "./lpsrc/elk.pak"
     2: // parsetables.cc            see license.txt for copyright and terms of use
     3: // code for parsetables.h
     4: 
     5: #include "elk_parsetables.h"
     6: #include "sm_bflatten.h"
     7: #include "sm_trace.h"
     8: #include "sm_crc.h"
     9: #include "elk_emitcode.h"
    10: #include "sm_bit2d.h"
    11: 
    12: #include <string.h>         // memset
    13: #include <stdlib.h>         // qsort, system
    14: 
    15: 
    16: // array index code
    17: enum { UNASSIGNED = -1 };
    18: 
    19: 
    20: // fwd
    21: template <class EltType>
    22: void printTable(EltType const *table, int size, int rowLength,
    23:                 char const *typeName, char const *tableName);
    24: 
    25: 
    26: ParseTables::ParseTables(int t, int nt, int s, int p, StateId start, int final)
    27: {
    28:   alloc(t, nt, s, p, start, final);
    29: }
    30: 
    31: template <class T>
    32: void allocInitArray(T *&arr, int size, T init)
    33: {
    34:   arr = new T[size];
    35:   for (int i=0; i<size; i++) {
    36:     arr[i] = init;
    37:   }
    38: }
    39: 
    40: template <class T>
    41: void allocZeroArray(T *&arr, int size)
    42: {
    43:   arr = new T[size];
    44:   memset(arr, 0, sizeof(arr[0]) * size);
    45: }
    46: 
    47: void ParseTables::alloc(int t, int nt, int s, int p, StateId start, int final)
    48: {
    49:   owning = true;
    50: 
    51:   temp = new TempData(s);
    52: 
    53:   numTerms = t;
    54:   numNonterms = nt;
    55:   numStates = s;
    56:   numProds = p;
    57: 
    58:   actionCols = numTerms;
    59:   actionRows = numStates;
    60: 
    61:   gotoCols = numNonterms;
    62:   gotoRows = numStates;
    63: 
    64:   allocZeroArray(actionTable, actionTableSize());
    65: 
    66:   allocZeroArray(gotoTable, gotoTableSize());
    67: 
    68:   allocZeroArray(prodInfo, numProds);
    69: 
    70:   allocZeroArray(stateSymbol, numStates);
    71: 
    72:   // table of ambiguous actions is NULL until someone fills in the
    73:   // whole thing; since we don't know how many there might be, we
    74:   // can't even allocate the storage now
    75:   ambigTableSize = 0;
    76:   ambigTable = NULL;
    77: 
    78:   startState = start;
    79:   finalProductionIndex = final;
    80: 
    81:   allocZeroArray(nontermOrder, nontermOrderSize());
    82: 
    83:   if (ENABLE_CRS_COMPRESSION) {
    84:     allocZeroArray(firstWithTerminal, numTerms);
    85:     allocZeroArray(firstWithNonterminal, numNonterms);
    86:   }
    87:   else {
    88:     firstWithTerminal = NULL;
    89:     firstWithNonterminal = NULL;
    90:   }
    91: 
    92:   bigProductionListSize = 0;
    93:   bigProductionList = NULL;
    94:   if (ENABLE_CRS_COMPRESSION) {
    95:     allocZeroArray(productionsForState, numStates);
    96:   }
    97:   else {
    98:     productionsForState = NULL;
    99:   }
   100: 
   101:   if (ENABLE_CRS_COMPRESSION) {
   102:     allocZeroArray(ambigStateTable, numStates);
   103:   }
   104:   else {
   105:     ambigStateTable = NULL;
   106:   }
   107: 
   108:   // # of bytes, but rounded up to nearest 32-bit boundary
   109:   errorBitsRowSize = ((numTerms+31) >> 5) * 4;
   110: 
   111:   // no compressed info
   112:   uniqueErrorRows = 0;
   113:   errorBits = NULL;
   114:   errorBitsPointers = NULL;
   115: 
   116:   actionIndexMap = NULL;
   117:   actionRowPointers = NULL;
   118: 
   119:   gotoIndexMap = NULL;
   120:   gotoRowPointers = NULL;
   121: }
   122: 
   123: 
   124: ParseTables::~ParseTables()
   125: {
   126:   if (temp) {
   127:     delete temp;
   128:   }
   129: 
   130:   if (owning) {
   131:     delete[] actionTable;
   132:     delete[] gotoTable;
   133:     delete[] prodInfo;
   134:     delete[] stateSymbol;
   135: 
   136:     if (ambigTable) {
   137:       delete[] ambigTable;
   138:     }
   139: 
   140:     delete[] nontermOrder;
   141: 
   142:     if (firstWithTerminal) {
   143:       delete[] firstWithTerminal;
   144:     }
   145:     if (firstWithNonterminal) {
   146:       delete[] firstWithNonterminal;
   147:     }
   148: 
   149:     if (bigProductionList) {
   150:       delete[] bigProductionList;
   151:     }
   152: 
   153:     if (errorBits) {
   154:       delete[] errorBits;
   155:     }
   156:     if (actionIndexMap) {
   157:       delete[] actionIndexMap;
   158:     }
   159:     if (gotoIndexMap) {
   160:       delete[] gotoIndexMap;
   161:     }
   162:   }
   163: 
   164:   // these are always owned
   165:   if (productionsForState) {
   166:     delete[] productionsForState;
   167:   }
   168:   if (ambigStateTable) {
   169:     delete[] ambigStateTable;
   170:   }
   171:   if (errorBitsPointers) {
   172:     delete[] errorBitsPointers;
   173:   }
   174:   if (actionRowPointers) {
   175:     delete[] actionRowPointers;
   176:   }
   177:   if (gotoRowPointers) {
   178:     delete[] gotoRowPointers;
   179:   }
   180: }
   181: 
   182: 
   183: ParseTables::TempData::TempData(int numStates)
   184:   : ambigTable(),
   185:     bigProductionList(),
   186:     productionsForState(numStates),
   187:     ambigStateTable(numStates)
   188: {
   189:   productionsForState.setAll(UNASSIGNED);
   190:   ambigStateTable.setAll(UNASSIGNED);
   191: }
   192: 
   193: ParseTables::TempData::~TempData()
   194: {}
   195: 
   196: 
   197: ActionEntry ParseTables::validateAction(int code) const
   198: {
   199:   // make sure that 'code' is representable; if this fails, most likely
   200:   // there are more than 32k states or productions; in turn, the most
   201:   // likely cause of *that* would be the grammar is being generated
   202:   // automatically from some other specification; you can change the
   203:   // typedefs of ActionEntry and GotoEntry in gramanl.h to get more
   204:   // capacity
   205:   ActionEntry ret = (ActionEntry)code;
   206:   xassert((int)ret == code);
   207:   return ret;
   208: }
   209: 
   210: GotoEntry ParseTables::validateGoto(int code) const
   211: {
   212:   // see above
   213:   GotoEntry ret = (GotoEntry)code;
   214:   xassert((int)ret == code);
   215:   xassert(ret != errorGotoEntry);    // otherwise collision with error code
   216:   return ret;
   217: }
   218: 
   219: 
   220: // doesn't init anything; for use by emitConstructionCode's emitted code
   221: ParseTables::ParseTables(bool o)
   222:   : owning(o),
   223:     temp(NULL)
   224: {
   225:   xassert(owning == false);
   226: }
   227: 
   228: 
   229: #if ENABLE_CRS_COMPRESSION
   230: ActionEntry makeAE(ActionEntryKind k, int index)
   231: {
   232:   // must fit into 6 bits for my encoding
   233:   if ((unsigned)index <= AE_MAXINDEX) {
   234:     // ok
   235:   }
   236:   else {
   237:     // this is just so I can see the resulting truncated table;
   238:     // the parser will *not* work
   239:     std::cout << "error: index " << index << " truncated!\n";
   240:     index = AE_MAXINDEX;
   241:   }
   242: 
   243:   if (k == AE_ERROR) {
   244:     xassert(index == 0);
   245:   }
   246: 
   247:   return k | index;
   248: }
   249: #endif
   250: 
   251: 
   252: ActionEntry ParseTables::encodeShift(StateId destState, int shiftedTermId)
   253: {
   254:   #if ENABLE_CRS_COMPRESSION
   255:     int delta = destState - firstWithTerminal[shiftedTermId];
   256:     return makeAE(AE_SHIFT, delta);
   257:   #else
   258:     return validateAction(+destState+1);
   259:   #endif
   260: }
   261: 
   262: 
   263: ActionEntry ParseTables::encodeReduce(int prodId, StateId inWhatState)
   264: {
   265:   #if ENABLE_CRS_COMPRESSION
   266:     int begin = temp->productionsForState[inWhatState];
   267:     int end = temp->bigProductionList.length();
   268:     if (begin == UNASSIGNED) {
   269:       // starting a new set of per-state productions
   270:       temp->productionsForState[inWhatState] = end;
   271:       temp->bigProductionList.push(prodId);
   272:       return AE_REDUCE | 0 /*first in set*/;
   273:     }
   274:     else {
   275:       // continuing a set; search for existing 'prodId' in that set
   276:       int delta;
   277:       for (int i=begin; i<end; i++) {
   278:         if (temp->bigProductionList[i] == prodId) {
   279:           // re-use this offset
   280:           delta = i-begin;
   281:           goto encode;
   282:         }
   283:       }
   284: 
   285:       // not found: add another production id to this set
   286:       temp->bigProductionList.push(prodId);
   287:       delta = end-begin;
   288: 
   289:     encode:
   290:       return makeAE(AE_REDUCE, delta);
   291:     }
   292: 
   293:   #else
   294:     return validateAction(-prodId-1);
   295:   #endif
   296: }
   297: 
   298: 
   299: ActionEntry ParseTables::encodeAmbig
   300:   (ArrayStack<ActionEntry> const &set, StateId inWhatState)
   301: {
   302:   #if ENABLE_CRS_COMPRESSION
   303:     int begin = temp->ambigStateTable[inWhatState];
   304:     int end = temp->ambigTable.length();
   305:     if (begin == UNASSIGNED) {
   306:       // starting a new set of per-state ambiguous actions
   307:       temp->ambigStateTable[inWhatState] = end;
   308:       appendAmbig(set);
   309:       return makeAE(AE_AMBIGUOUS, 0 /*first in set*/);
   310:     }
   311:     else {
   312:       // continuing a set: Look for another ambiguous action set in
   313:       // the same line that has identical contents.  Due to the way
   314:       // sets are constructed, their representation is canonical.
   315:       // This is important because some grammars (cc2) have many
   316:       // ambiguous entries, but they're all the same set of actions;
   317:       // were we to not consolidate like this, the 6-bit cell encoding
   318:       // would not be enough.
   319: 
   320:       // # of big-table entries that will be used
   321:       int encodeLen = set.length()+1;
   322: 
   323:       for (int i=begin; i+encodeLen <= end; i++) {
   324:         // does this offset contain the same set of actions?
   325:         if (compareAmbig(set, i)) {
   326:           return makeAE(AE_AMBIGUOUS, i-begin /*delta*/);
   327:         }
   328:       }
   329: 
   330:       // no match
   331:       appendAmbig(set);
   332:       return makeAE(AE_AMBIGUOUS, end-begin /*delta*/);
   333:     }
   334: 
   335:   #else
   336:     int end = temp->ambigTable.length();
   337:     appendAmbig(set);
   338:     return validateAction(numStates+end+1);
   339:   #endif
   340: }
   341: 
   342: 
   343: void ParseTables::appendAmbig(ArrayStack<ActionEntry> const &set)
   344: {
   345:   temp->ambigTable.push(set.length());
   346:   for (int j=0; j < set.length(); j++) {
   347:     temp->ambigTable.push(set[j]);
   348:   }
   349: }
   350: 
   351: bool ParseTables::compareAmbig(ArrayStack<ActionEntry> const &set,
   352:                                int startIndex)
   353: {
   354:   if (temp->ambigTable[startIndex] != set.length()) {
   355:     return false;           // mismatch in 1st entry
   356:   }
   357:   for (int j=0; j < set.length(); j++) {
   358:     if (temp->ambigTable[startIndex+1+j] != set[j]) {
   359:       return false;         // mismatch in j+2nd entry
   360:     }
   361:   }
   362:   return true;              // match!
   363: }
   364: 
   365: 
   366: ActionEntry ParseTables::encodeError() const
   367: {
   368:   #if ENABLE_CRS_COMPRESSION
   369:     return makeAE(AE_ERROR, 0);
   370:   #else
   371:     return validateAction(0);
   372:   #endif
   373: }
   374: 
   375: 
   376: GotoEntry ParseTables::encodeGoto(StateId destState, int shiftedNontermId) const
   377: {
   378:   #if ENABLE_CRS_COMPRESSION
   379:     xassert(0 <= shiftedNontermId && shiftedNontermId < numNonterms);
   380:     int delta = destState - firstWithNonterminal[shiftedNontermId];
   381:     return validateGoto(delta);
   382:   #else
   383:     return validateGoto(destState);
   384:   #endif
   385: }
   386: 
   387: 
   388: // simple alloc + copy
   389: template <class T>
   390: void copyArray(int &len, T *&dest, ArrayStack<T> const &src)
   391: {
   392:   len = src.length();
   393:   dest = new T[len];
   394:   memcpy(dest, src.getArray(), sizeof(T) * len);
   395: }
   396: 
   397: // given an array 'src' of indices relative to 'base', allocate the
   398: // array 'dest' and fill it in with actual pointers into 'base'
   399: template <class T>
   400: void copyIndexPtrArray(int len, T **&dest, T *base, ArrayStack<int> const &src)
   401: {
   402:   dest = new T* [len];
   403:   for (int i=0; i<len; i++) {
   404:     if (src[i] != UNASSIGNED) {
   405:       dest[i] = base + src[i];
   406:     }
   407:     else {
   408:       dest[i] = NULL;      // so segfault if deref unassigned entry
   409:     }
   410:   }
   411: }
   412: 
   413: void ParseTables::finishTables()
   414: {
   415:   // copy the ambiguous actions
   416:   copyArray(ambigTableSize, ambigTable, temp->ambigTable);
   417: 
   418:   if (ENABLE_CRS_COMPRESSION) {
   419:     // transfer bigProductionList
   420:     copyArray(bigProductionListSize, bigProductionList, temp->bigProductionList);
   421: 
   422:     // transfer productionsForState, translating indices into pointers
   423:     copyIndexPtrArray(numStates, productionsForState, bigProductionList,
   424:                       temp->productionsForState);
   425: 
   426:     // ambigStateTable
   427:     copyIndexPtrArray(numStates, ambigStateTable, ambigTable,
   428:                       temp->ambigStateTable);
   429:   }
   430: 
   431:   delete temp;
   432:   temp = NULL;
   433: }
   434: 
   435: 
   436: // -------------------- table compression --------------------
   437: void ParseTables::computeErrorBits()
   438: {
   439:   traceProgress() << "computing errorBits[]\n";
   440: 
   441:   // should only be done once
   442:   xassert(!errorBits);
   443: 
   444:   // allocate and clear it
   445:   int rowSize = ((numTerms+31) >> 5) * 4;
   446:   allocZeroArray(errorBits, numStates * rowSize);
   447: 
   448:   // build the pointer table
   449:   allocZeroArray(errorBitsPointers, numStates);
   450: 
   451:   // find and set the error bits
   452:   fillInErrorBits(true /*setPointers*/);
   453: 
   454:   // compute which rows are identical; I only compress the rows (and
   455:   // not the columns) because I can fold the former's compression into
   456:   // the errorBitsPointers[] access, whereas the latter would require
   457:   // yet another table
   458:   int *compressed = new int[numStates];   // row -> new location in errorBits[]
   459:   uniqueErrorRows = 0;
   460:   int s;
   461:   for (s=0; s < numStates; s++) {
   462:     // is 's' the same as any rows that preceded it?
   463:     for (int t=0; t < s; t++) {
   464:       // do 's' and 't' have the same contents?
   465:       if (0==memcmp(errorBitsPointers[s],
   466:                     errorBitsPointers[t],
   467:                     sizeof(ErrorBitsEntry) * errorBitsRowSize)) {
   468:         // yes, map 's' to 't' instead
   469:         compressed[s] = compressed[t];
   470:         goto next_s;
   471:       }
   472:     }
   473: 
   474:     // not the same as any
   475:     compressed[s] = uniqueErrorRows;
   476:     uniqueErrorRows++;
   477: 
   478:   next_s:
   479:     ;
   480:   }
   481: 
   482:   // make a smaller 'errorBits' array
   483:   delete[] errorBits;
   484:   allocZeroArray(errorBits, uniqueErrorRows * rowSize);
   485: 
   486:   // rebuild 'errorBitsPointers' according to 'compressed'
   487:   for (s=0; s < numStates; s++) {
   488:     errorBitsPointers[s] = errorBits + (compressed[s] * errorBitsRowSize);
   489:   }
   490:   delete[] compressed;
   491: 
   492:   // fill in the bits again, using the new pointers map
   493:   fillInErrorBits(false /*setPointers*/);
   494: }
   495: 
   496: 
   497: void ParseTables::fillInErrorBits(bool setPointers)
   498: {
   499:   for (int s=0; s < numStates; s++) {
   500:     if (setPointers) {
   501:       errorBitsPointers[s] = errorBits + (s * errorBitsRowSize);
   502:     }
   503: 
   504:     for (int t=0; t < numTerms; t++) {
   505:       if (isErrorAction(actionEntry((StateId)s, t))) {
   506:         ErrorBitsEntry &b = errorBitsPointers[s][t >> 3];
   507:         b |= 1 << (t & 7);
   508:       }
   509:     }
   510:   }
   511: }
   512: 
   513: 
   514: void ParseTables::mergeActionColumns()
   515: {
   516:   traceProgress() << "merging action columns\n";
   517: 
   518:   // can only do this if we've already pulled out the errors
   519:   xassert(errorBits);
   520: 
   521:   // for now I assume we don't have a map yet
   522:   xassert(!actionIndexMap);
   523: 
   524:   if (tracingSys("mergeActionColumnsPre")) {
   525:     // print the action table before compression
   526:     printTable(actionTable, actionTableSize(), actionCols,
   527:                "ActionEntry", "actionTable");
   528:   }
   529: 
   530:   // compute graph of conflicting 'action' columns
   531:   // (will be symmetric)
   532:   Bit2d graph(point(numTerms, numTerms));
   533:   graph.setall(0);
   534: 
   535:   // fill it in
   536:   for (int t1=0; t1 < numTerms; t1++) {
   537:     for (int t2=0; t2 < t1; t2++) {
   538:       // does column 't1' conflict with column 't2'?
   539:       for (int s=0; s < numStates; s++) {
   540:         ActionEntry a1 = actionEntry((StateId)s, t1);
   541:         ActionEntry a2 = actionEntry((StateId)s, t2);
   542: 
   543:         if (isErrorAction(a1) ||
   544:             isErrorAction(a2) ||
   545:             a1 == a2) {
   546:           // no problem
   547:         }
   548:         else {
   549:           // conflict!
   550:           graph.set(point(t1, t2));
   551:           graph.set(point(t2, t1));
   552:           break;
   553:         }
   554:       }
   555:     }
   556:   }
   557: 
   558:   // color the graph
   559:   Array<int> color(numTerms);      // terminal -> color
   560:   int numColors = colorTheGraph(color, graph);
   561: 
   562:   // build a new, compressed action table; the entries are initialized
   563:   // to 'error', meaning every cell starts as don't-care
   564:   ActionEntry *newTable;
   565:   allocInitArray(newTable, numStates * numColors, errorActionEntry);
   566: 
   567:   // merge columns in 'actionTable' into those in 'newTable'
   568:   // according to the 'color' map
   569:   actionIndexMap = new TermIndex[numTerms];
   570:   for (int t=0; t<numTerms; t++) {
   571:     int c = color[t];
   572: 
   573:     // merge actionTable[t] into newTable[c]
   574:     for (int s=0; s<numStates; s++) {
   575:       ActionEntry &dest = newTable[s*numColors + c];
   576: 
   577:       ActionEntry src = actionEntry((StateId)s, t);
   578:       if (!isErrorAction(src)) {
   579:         // make sure there's no conflict (otherwise the graph
   580:         // coloring algorithm screwed up)
   581:         xassert(isErrorAction(dest) ||
   582:                 dest == src);
   583: 
   584:         // merge the entry
   585:         dest = src;
   586:       }
   587:     }
   588: 
   589:     // fill in the action index map
   590:     TermIndex ti = (TermIndex)c;
   591:     xassert(ti == c);     // otherwise value truncation happened
   592:     actionIndexMap[t] = ti;
   593:   }
   594: 
   595:   trace("compression")
   596:     << "action table: from " << (actionTableSize() * sizeof(ActionEntry))
   597:     << " down to " << (numStates * numColors * sizeof(ActionEntry))
   598:     << " bytes\n";
   599: 
   600:   // replace the existing table with the compressed one
   601:   delete[] actionTable;
   602:   actionTable = newTable;
   603:   actionCols = numColors;
   604: }
   605: 
   606: 
   607: // unsurprisingly, this function has considerable structure in common
   608: // with 'mergeActionColumns'; however, my attempts to consolidate them
   609: // have led to code that is harder to understand and debug, so they
   610: // remain separate (at least for now)
   611: void ParseTables::mergeActionRows()
   612: {
   613:   traceProgress() << "merging action rows\n";
   614: 
   615:   // can only do this if we've already pulled out the errors
   616:   xassert(errorBits);
   617: 
   618:   // for now I assume we don't have a map yet
   619:   xassert(!actionRowPointers);
   620: 
   621:   // compute graph of conflicting 'action' rows
   622:   // (will be symmetric)
   623:   Bit2d graph(point(numStates, numStates));
   624:   graph.setall(0);
   625: 
   626:   // fill it in
   627:   for (int s1=0; s1 < numStates; s1++) {
   628:     for (int s2=0; s2 < s1; s2++) {
   629:       // does row 's1' conflict with row 's2'?
   630:       for (int t=0; t < actionCols; t++) {    // t is an equivalence class of terminals
   631:         ActionEntry a1 = actionTable[s1*actionCols + t];
   632:         ActionEntry a2 = actionTable[s2*actionCols + t];
   633: 
   634:         if (isErrorAction(a1) ||
   635:             isErrorAction(a2) ||
   636:             a1 == a2) {
   637:           // no problem
   638:         }
   639:         else {
   640:           // conflict!
   641:           graph.set(point(s1, s2));
   642:           graph.set(point(s2, s1));
   643:           break;
   644:         }
   645:       }
   646:     }
   647:   }
   648: 
   649:   // color the graph
   650:   Array<int> color(numStates);      // state -> color (equivalence class)
   651:   int numColors = colorTheGraph(color, graph);
   652: 
   653:   // build a new, compressed action table
   654:   ActionEntry *newTable;
   655:   allocInitArray(newTable, numColors * actionCols, errorActionEntry);
   656: 
   657:   // merge rows in 'actionTable' into those in 'newTable'
   658:   // according to the 'color' map
   659: 
   660:   // actionTable[]:
   661:   //
   662:   //             t0    t1    t2    t3      // terminal equivalence classes
   663:   //   s0
   664:   //   s1
   665:   //   s2
   666:   //    ...
   667:   //   /*states*/
   668: 
   669:   // newTable[]:
   670:   //
   671:   //             t0    t1    t2    t3      // terminal equivalence classes
   672:   //   c0
   673:   //   c1
   674:   //   c2    < e.g., union of state1 and state4 (color[1]==color[4]==2) >
   675:   //    ...
   676:   //   /*state equivalence classes (colors)*/
   677: 
   678:   actionRowPointers = new ActionEntry* [numStates];
   679:   for (int s=0; s<numStates; s++) {
   680:     int c = color[s];
   681: 
   682:     // merge actionTable row 's' into newTable row 'c'
   683:     for (int t=0; t<actionCols; t++) {
   684:       ActionEntry &dest = newTable[c*actionCols + t];
   685: 
   686:       ActionEntry src = actionTable[s*actionCols + t];
   687:       if (!isErrorAction(src)) {
   688:         // make sure there's no conflict (otherwise the graph
   689:         // coloring algorithm screwed up)
   690:         xassert(isErrorAction(dest) ||
   691:                 dest == src);
   692: 
   693:         // merge the entry
   694:         dest = src;
   695:       }
   696:     }
   697: 
   698:     // fill in the row pointer map
   699:     actionRowPointers[s] = newTable + c*actionCols;
   700:   }
   701: 
   702:   trace("compression")
   703:     << "action table: from " << (numStates * actionCols * sizeof(ActionEntry))
   704:     << " down to " << (numColors * actionCols * sizeof(ActionEntry))
   705:     << " bytes\n";
   706: 
   707:   // replace the existing table with the compressed one
   708:   delete[] actionTable;
   709:   actionTable = newTable;
   710:   actionRows = numColors;
   711: 
   712:   // how many single-value rows?  I'm investigating some other options
   713:   // for further compression...
   714:   {
   715:     int ct=0;
   716:     for (int s=0; s<actionRows; s++) {
   717:       int val = 0;
   718:       for (int t=0; t<actionCols; t++) {
   719:         int entry = actionRowPointers[s][t];
   720:         if (val==0) {
   721:           val = entry;
   722:         }
   723:         else if (entry != 0 && entry != val) {
   724:           // not all the same
   725:           goto next_s;
   726:         }
   727:       }
   728: 
   729:       // all same
   730:       ct++;
   731: 
   732:     next_s:
   733:       ;
   734:     }
   735:     trace("compression") << ct << " same-valued action rows\n";
   736:   }
   737: }
   738: 
   739: 
   740: // created by copying 'mergeGotoRows' and replacing 'action'
   741: // with 'goto', etc.
   742: void ParseTables::mergeGotoColumns()
   743: {
   744:   traceProgress() << "merging goto columns\n";
   745: 
   746:   // can only do this if we've already pulled out the errors
   747:   xassert(errorBits);
   748: 
   749:   // for now I assume we don't have a map yet
   750:   xassert(!gotoIndexMap);
   751: 
   752:   // compute graph of conflicting 'goto' columns
   753:   Bit2d graph(point(numNonterms, numNonterms));
   754:   graph.setall(0);
   755: 
   756:   // fill it in
   757:   for (int nt1=0; nt1 < numNonterms; nt1++) {
   758:     for (int nt2=0; nt2 < nt1; nt2++) {
   759:       // does column 't1' conflict with column 't2'?
   760:       for (int s=0; s < numStates; s++) {
   761:         GotoEntry g1 = gotoEntry((StateId)s, nt1);
   762:         GotoEntry g2 = gotoEntry((StateId)s, nt2);
   763: 
   764:         if (isErrorGoto(g1) ||
   765:             isErrorGoto(g2) ||
   766:             g1 == g2) {
   767:           // no problem
   768:         }
   769:         else {
   770:           // conflict!
   771:           graph.set(point(nt1, nt2));
   772:           graph.set(point(nt2, nt1));
   773:           break;
   774:         }
   775:       }
   776:     }
   777:   }
   778: 
   779:   // color the graph
   780:   Array<int> color(numNonterms);      // nonterminal -> color
   781:   int numColors = colorTheGraph(color, graph);
   782: 
   783:   // build a new, compressed goto table; the entries are initialized
   784:   // to 'error', meaning every cell starts as don't-care
   785:   GotoEntry *newTable;
   786:   allocInitArray(newTable, numStates * numColors, encodeGotoError());
   787: 
   788:   // merge columns in 'gotoTable' into those in 'newTable'
   789:   // according to the 'color' map
   790:   gotoIndexMap = new NtIndex[numNonterms];
   791:   for (int nt=0; nt<numNonterms; nt++) {
   792:     int c = color[nt];
   793: 
   794:     // merge gotoTable[nt] into newTable[c]
   795:     for (int s=0; s<numStates; s++) {
   796:       GotoEntry &dest = newTable[s*numColors + c];
   797: 
   798:       GotoEntry src = gotoEntry((StateId)s, nt);
   799:       if (!isErrorGoto(src)) {
   800:         // make sure there's no conflict (otherwise the graph
   801:         // coloring and/or conflict map algorithms screwed up)
   802:         xassert(isErrorGoto(dest) ||
   803:                 dest == src);
   804: 
   805:         // merge the entry
   806:         dest = src;
   807:       }
   808:     }
   809: 
   810:     // fill in the goto index map
   811:     NtIndex nti = (NtIndex)c;
   812:     xassert(nti == c);     // otherwise value truncation happened
   813:     gotoIndexMap[nt] = nti;
   814:   }
   815: 
   816:   trace("compression")
   817:     << "goto table: from " << (gotoTableSize() * sizeof(GotoEntry))
   818:     << " down to " << (numStates * numColors * sizeof(GotoEntry))
   819:     << " bytes\n";
   820: 
   821:   // replace the existing table with the compressed one
   822:   delete[] gotoTable;
   823:   gotoTable = newTable;
   824:   gotoCols = numColors;
   825: }
   826: 
   827: 
   828: // created by copying 'mergeActionRows' and replacing 'action'
   829: // with 'goto', etc.
   830: void ParseTables::mergeGotoRows()
   831: {
   832:   traceProgress() << "merging goto rows\n";
   833: 
   834:   // can only do this if we've already pulled out the errors
   835:   xassert(errorBits);
   836: 
   837:   // for now I assume we don't have a map yet
   838:   xassert(!gotoRowPointers);
   839: 
   840:   // compute graph of conflicting 'goto' rows
   841:   Bit2d graph(point(numStates, numStates));
   842:   graph.setall(0);
   843: 
   844:   // fill it in
   845:   for (int s1=0; s1 < numStates; s1++) {
   846:     for (int s2=0; s2 < s1; s2++) {
   847:       // does row 's1' conflict with row 's2'?
   848:       for (int nt=0; nt < gotoCols; nt++) {    // nt is an equivalence class of nonterminals
   849:         GotoEntry g1 = gotoTable[s1*gotoCols + nt];
   850:         GotoEntry g2 = gotoTable[s2*gotoCols + nt];
   851: 
   852:         if (isErrorGoto(g1) ||
   853:             isErrorGoto(g2) ||
   854:             g1 == g2) {
   855:           // no problem
   856:         }
   857:         else {
   858:           // conflict!
   859:           graph.set(point(s1, s2));
   860:           graph.set(point(s2, s1));
   861:           break;
   862:         }
   863:       }
   864:     }
   865:   }
   866: 
   867:   // color the graph
   868:   Array<int> color(numStates);      // state -> color (equivalence class)
   869:   int numColors = colorTheGraph(color, graph);
   870: 
   871:   // build a new, compressed goto table
   872:   GotoEntry *newTable;
   873:   allocInitArray(newTable, numColors * gotoCols, encodeGotoError());
   874: 
   875:   // merge rows in 'gotoTable' into those in 'newTable'
   876:   // according to the 'color' map
   877: 
   878:   // gotoTable[]:
   879:   //
   880:   //             t0    t1    t2    t3      // nonterminal equivalence classes
   881:   //   s0
   882:   //   s1
   883:   //   s2
   884:   //    ...
   885:   //   /*states*/
   886: 
   887:   // newTable[]:
   888:   //
   889:   //             t0    t1    t2    t3      // nonterminal equivalence classes
   890:   //   c0
   891:   //   c1
   892:   //   c2    < e.g., union of state1 and state4 (color[1]==color[4]==2) >
   893:   //    ...
   894:   //   /*state equivalence classes (colors)*/
   895: 
   896:   gotoRowPointers = new GotoEntry* [numStates];
   897:   for (int s=0; s<numStates; s++) {
   898:     int c = color[s];
   899: 
   900:     // merge gotoTable row 's' into newTable row 'c'
   901:     for (int nt=0; nt<gotoCols; nt++) {
   902:       GotoEntry &dest = newTable[c*gotoCols + nt];
   903: 
   904:       GotoEntry src = gotoTable[s*gotoCols + nt];
   905:       if (!isErrorGoto(src)) {
   906:         // make sure there's no conflict (otherwise the graph
   907:         // coloring algorithm screwed up)
   908:         xassert(isErrorGoto(dest) ||
   909:                 dest == src);
   910: 
   911:         // merge the entry
   912:         dest = src;
   913:       }
   914:     }
   915: 
   916:     // fill in the row pointer map
   917:     gotoRowPointers[s] = newTable + c*gotoCols;
   918:   }
   919: 
   920:   trace("compression")
   921:     << "goto table: from " << (numStates * gotoCols * sizeof(GotoEntry))
   922:     << " down to " << (numColors * gotoCols * sizeof(GotoEntry))
   923:     << " bytes\n";
   924: 
   925:   // replace the existing table with the compressed one
   926:   delete[] gotoTable;
   927:   gotoTable = newTable;
   928:   gotoRows = numColors;
   929: }
   930: 
   931: 
   932: static int intCompare(void const *left, void const *right)
   933: {
   934:   return *((int const*)left) - *((int const*)right);
   935: }
   936: 
   937: int ParseTables::colorTheGraph(int *color, Bit2d &graph)
   938: {
   939:   int n = graph.Size().x;  // same as y
   940: 
   941:   if (tracingSys("graphColor") && n < 20) {
   942:     graph.print();
   943:   }
   944: 
   945:   // node -> # of adjacent nodes
   946:   Array<int> degree(n);
   947:   memset((int*)degree, 0, n * sizeof(int));
   948: 
   949:   // node -> # of adjacent nodes that have colors already
   950:   Array<int> blocked(n);
   951: 
   952:   // initialize some arrays
   953:   enum { UNASSIGNED = -1 };
   954:   {
   955:     for (int i=0; i<n; i++) {
   956:       // clear the color map
   957:       color[i] = UNASSIGNED;
   958:       blocked[i] = 0;
   959: 
   960:       for (int j=0; j<n; j++) {
   961:         if (graph.get(point(i,j))) {
   962:           degree[i]++;
   963:         }
   964:       }
   965:     }
   966:   }
   967: 
   968:   // # of colors used
   969:   int usedColors = 0;
   970: 
   971:   for (int numColored=0; numColored < n; numColored++) {
   972:     // Find a vertex to color.  Prefer nodes that are more constrained
   973:     // (have more blocked colors) to those that are less constrained.
   974:     // Then, prefer those that are least constraining (heave least
   975:     // uncolored neighbors) to those that are more constraining.  If
   976:     // ties remain, choose arbitrarily.
   977:     int best = -1;
   978:     int bestBlocked = 0;
   979:     int bestUnblocked = 0;
   980: 
   981:     for (int choice = 0; choice < n; choice++) {
   982:       if (color[choice] != UNASSIGNED) continue;
   983: 
   984:       int chBlocked = blocked[choice];
   985:       int chUnblocked = degree[choice] - blocked[choice];
   986:       if (best == -1 ||                          // no choice yet
   987:           chBlocked > bestBlocked ||             // more constrained
   988:           (chBlocked == bestBlocked &&
   989:            chUnblocked < bestUnblocked)) {       // least constraining
   990:         // new best
   991:         best = choice;
   992:         bestBlocked = chBlocked;
   993:         bestUnblocked = chUnblocked;
   994:       }
   995:     }
   996: 
   997:     // get the assigned colors of the adjacent vertices
   998:     Array<int> adjColor(bestBlocked);
   999:     int adjIndex = 0;
  1000:     for (int i=0; i<n; i++) {
  1001:       if (graph.get(point(best,i)) &&
  1002:           color[i] != UNASSIGNED) {
  1003:         adjColor[adjIndex++] = color[i];
  1004:       }
  1005:     }
  1006:     xassert(adjIndex == bestBlocked);
  1007: 
  1008:     // sort them
  1009:     qsort((int*)adjColor, bestBlocked, sizeof(int), intCompare);
  1010: 
  1011:     // select the lowest-numbered color that won't conflict
  1012:     int selColor = 0;
  1013:     for (int j=0; j<bestBlocked; j++) {
  1014:       if (selColor == adjColor[j]) {
  1015:         selColor++;
  1016:       }
  1017:       else if (selColor < adjColor[j]) {
  1018:         // found one that doesn't conflict
  1019:         break;
  1020:       }
  1021:       else {
  1022:         // happens when we have two neighbors that have the same color;
  1023:         // that's fine, we'll go around the loop again to see what the
  1024:         // next neighbor has to say
  1025:       }
  1026:     }
  1027: 
  1028:     // assign 'selColor' to 'best'
  1029:     color[best] = selColor;
  1030:     if (selColor+1 > usedColors) {
  1031:       usedColors = selColor+1;
  1032:     }
  1033: 
  1034:     // update 'blocked[]'
  1035:     for (int k=0; k<n; k++) {
  1036:       if (graph.get(point(best,k))) {
  1037:         // every neighbor of 'k' now has one more blocked color
  1038:         blocked[k]++;
  1039:       }
  1040:     }
  1041:   }
  1042: 
  1043:   std::ostream &os = trace("graphColor") << "colors[]:";
  1044: 
  1045:   for (int i=0; i<n; i++) {
  1046:     // every node should now have blocked == degree
  1047:     xassert(blocked[i] == degree[i]);
  1048: 
  1049:     // and have a color assigned
  1050:     xassert(color[i] != UNASSIGNED);
  1051:     os << " " << color[i];
  1052:   }
  1053: 
  1054:   os << "\n";
  1055: 
  1056:   return usedColors;
  1057: }
  1058: 
  1059: 
  1060: // --------------------- table emission -------------------
  1061: // create literal tables
  1062: template <class EltType>
  1063: void emitTable(EmitCode &out, EltType const *table, int size, int rowLength,
  1064:                char const *typeName, char const *tableName)
  1065: {
  1066:   if (!table || !size) {
  1067:     out << "  " << typeName << " *" << tableName << " = NULL;\n";
  1068:     return;
  1069:   }
  1070: 
  1071:   bool printHex = 0==strcmp(typeName, "ErrorBitsEntry") ||
  1072:                   (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "ActionEntry")) ||
  1073:                   (ENABLE_CRS_COMPRESSION && 0==strcmp(typeName, "GotoEntry")) ;
  1074:   bool needCast = 0==strcmp(typeName, "StateId");
  1075: 
  1076:   if (size * sizeof(*table) > 50) {    // suppress small ones
  1077:     out << "  // storage size: " << size * sizeof(*table) << " bytes\n";
  1078:     if (size % rowLength == 0) {
  1079:       out << "  // rows: " << (size/rowLength) << "  cols: " << rowLength << "\n";
  1080:     }
  1081:   }
  1082: 
  1083:   int rowNumWidth = sm_stringf("%d", size / rowLength /*round down*/).length();
  1084: 
  1085:   // I make tables 'const' because that way the OS loader might be
  1086:   // smart enough to share them (on a read-only basis) across multiple
  1087:   // processes started from the same executable.  But I immediately
  1088:   // cast them to non-const, since ParseTables doesn't declare
  1089:   // pointers-to-const (since it also has methods to modify the tables
  1090:   // at parser generation time).
  1091: 
  1092:   out << "  static " << typeName << " const " << tableName << "[" << size << "] = {";
  1093:   int row = 0;
  1094:   for (int i=0; i<size; i++) {
  1095:     if (i % rowLength == 0) {    // one row per state
  1096:       out << sm_stringf("\n    /""*%*d*""/ ", rowNumWidth, row++);
  1097:     }
  1098: 
  1099:     if (needCast) {
  1100:       out << "(" << typeName << ")";
  1101:     }
  1102: 
  1103:     if (printHex) {
  1104:       out << sm_stringf("0x%02X, ", table[i]);
  1105:     }
  1106:     else if (sizeof(table[i]) == 1) {
  1107:       // little bit of a hack to make sure 'unsigned char' gets
  1108:       // printed as an int; the casts are necessary because this
  1109:       // code gets compiled even when EltType is ProdInfo
  1110:       out << (int)(*((unsigned char*)(table+i))) << ", ";
  1111:     }
  1112:     else {
  1113:       // print the other int-sized things, or ProdInfo using
  1114:       // the overloaded '<<' below
  1115:       out << table[i] << ", ";
  1116:     }
  1117:   }
  1118:   out << "\n"
  1119:       << "  };\n";
  1120: }
  1121: 
  1122: // used to emit the elements of the prodInfo table
  1123: sm_stringBuilder& operator<< (sm_stringBuilder &sb, ParseTables::ProdInfo const &info)
  1124: {
  1125:   sb << "{" << (int)info.rhsLen << "," << (int)info.lhsIndex << "}";
  1126:   return sb;
  1127: }
  1128: 
  1129: 
  1130: // like 'emitTable', but also set a local called 'tableName'
  1131: template <class EltType>
  1132: void emitTable2(EmitCode &out, EltType const *table, int size, int rowLength,
  1133:                 char const *typeName, char const *tableName)
  1134: {
  1135:   sm_string tempName = sm_stringc << tableName << "_static";
  1136:   emitTable(out, table, size, rowLength, typeName, tempName);
  1137:   out << "  " << tableName << " = const_cast<" << typeName << "*>("
  1138:       << tempName << ");\n\n";
  1139: }
  1140: 
  1141: 
  1142: template <class EltType>
  1143: void emitOffsetTable(EmitCode &out, EltType **table, EltType *base, int size,
  1144:                      char const *typeName, char const *tableName, char const *baseName)
  1145: {
  1146:   if (!table) {
  1147:     out << "  " << tableName << " = NULL;\n\n";
  1148:     return;
  1149:   }
  1150: 
  1151:   // make the pointers persist by storing a table of offsets
  1152:   Array<int> offsets(size);
  1153:   bool allUnassigned = true;
  1154:   for (int i=0; i < size; i++) {
  1155:     if (table[i]) {
  1156:       offsets[i] = table[i] - base;
  1157:       allUnassigned = false;
  1158:     }
  1159:     else {
  1160:       offsets[i] = UNASSIGNED;    // codes for a NULL entry
  1161:     }
  1162:   }
  1163: 
  1164:   if (allUnassigned) {
  1165:     // for example, an LALR(1) grammar has no ambiguous entries in its tables
  1166:     size = 0;
  1167:   }
  1168: 
  1169:   if (size > 0) {
  1170:     out << "  " << tableName << " = new " << typeName << " [" << size << "];\n";
  1171: 
  1172:     emitTable(out, (int*)offsets, size, 16, "int", sm_stringc << tableName << "_offsets");
  1173: 
  1174:     // at run time, interpret the offsets table
  1175:     out << "  for (int i=0; i < " << size << "; i++) {\n"
  1176:         << "    int ofs = " << tableName << "_offsets[i];\n"
  1177:         << "    if (ofs >= 0) {\n"
  1178:         << "      " << tableName << "[i] = " << baseName << " + ofs;\n"
  1179:         << "    }\n"
  1180:         << "    else {\n"
  1181:         << "      " << tableName << "[i] = NULL;\n"
  1182:         << "    }\n"
  1183:         << "  }\n\n";
  1184:   }
  1185:   else {
  1186:     out << "  // offset table is empty\n"
  1187:         << "  " << tableName << " = NULL;\n\n";
  1188:   }
  1189: }
  1190: 
  1191: 
  1192: // for debugging
  1193: template <class EltType>
  1194: void printTable(EltType const *table, int size, int rowLength,
  1195:                 char const *typeName, char const *tableName)
  1196: {
  1197:   // disabled for now since I don't need it anymore, and it adds
  1198:   // a link dependency on emitcode.cc ...
  1199:   #if 0
  1200:   {
  1201:     EmitCode out("printTable.tmp");
  1202:     emitTable(out, table, size, rowLength, typeName, tableName);
  1203:   }
  1204: 
  1205:   system("cat printTable.tmp; rm printTable.tmp");
  1206:   #endif // 0
  1207: }
  1208: 
  1209: 
  1210: // emit code for a function which, when compiled and executed, will
  1211: // construct this same table (except the constructed table won't own
  1212: // the table data, since it will point to static program data)
  1213: void ParseTables::emitConstructionCode(EmitCode &out,
  1214:   char const *className, char const *funcName)
  1215: {
  1216:   // must have already called 'finishTables'
  1217:   xassert(!temp);
  1218: 
  1219:   out << "// this makes a ParseTables from some literal data;\n"
  1220:       << "// the code is written by ParseTables::emitConstructionCode()\n"
  1221:       << "// in " << __FILE__ << "\n"
  1222:       << "class " << className << "_ParseTables : public ParseTables {\n"
  1223:       << "public:\n"
  1224:       << "  " << className << "_ParseTables();\n"
  1225:       << "};\n"
  1226:       << "\n"
  1227:       << className << "_ParseTables::" << className << "_ParseTables()\n"
  1228:       << "  : ParseTables(false /*owning*/)\n"
  1229:       << "{\n"
  1230:       ;
  1231: 
  1232:   // set all the integer-like variables
  1233:   #define SET_VAR(var) \
  1234:     out << "  " #var " = " << var << ";\n";
  1235:   SET_VAR(numTerms);
  1236:   SET_VAR(numNonterms);
  1237:   SET_VAR(numStates);
  1238:   SET_VAR(numProds);
  1239:   SET_VAR(actionCols);
  1240:   SET_VAR(actionRows);
  1241:   SET_VAR(gotoCols);
  1242:   SET_VAR(gotoRows);
  1243:   SET_VAR(ambigTableSize);
  1244:   out << "  startState = (StateId)" << (int)startState << ";\n";
  1245:   SET_VAR(finalProductionIndex);
  1246:   SET_VAR(bigProductionListSize);
  1247:   SET_VAR(errorBitsRowSize);
  1248:   SET_VAR(uniqueErrorRows);
  1249:   #undef SET_VAR
  1250:   out << "\n";
  1251: 
  1252:   // action table, one row per state
  1253:   emitTable2(out, actionTable, actionTableSize(), actionCols,
  1254:              "ActionEntry", "actionTable");
  1255: 
  1256:   // goto table, one row per state
  1257:   emitTable2(out, gotoTable, gotoTableSize(), gotoCols,
  1258:              "GotoEntry", "gotoTable");
  1259: 
  1260:   // production info, arbitrarily 16 per row
  1261:   emitTable2(out, prodInfo, numProds, 16, "ParseTables::ProdInfo", "prodInfo");
  1262: 
  1263:   // state symbol map, arbitrarily 16 per row
  1264:   emitTable2(out, stateSymbol, numStates, 16, "SymbolId", "stateSymbol");
  1265: 
  1266:   // ambigTable
  1267:   emitTable2(out, ambigTable, ambigTableSize, 16, "ActionEntry", "ambigTable");
  1268: 
  1269:   // nonterminal order
  1270:   emitTable2(out, nontermOrder, nontermOrderSize(), 16,
  1271:              "NtIndex", "nontermOrder");
  1272: 
  1273:   // errorBits
  1274:   emitTable2(out, errorBits, uniqueErrorRows * errorBitsRowSize, errorBitsRowSize,
  1275:              "ErrorBitsEntry", "errorBits");
  1276: 
  1277:   emitOffsetTable(out, errorBitsPointers, errorBits, numStates,
  1278:                   "ErrorBitsEntry*", "errorBitsPointers", "errorBits");
  1279: 
  1280:   // actionIndexMap
  1281:   emitTable2(out, actionIndexMap, numTerms, 16,
  1282:              "TermIndex", "actionIndexMap");
  1283: 
  1284:   // actionRowPointers
  1285:   emitOffsetTable(out, actionRowPointers, actionTable, numStates,
  1286:                   "ActionEntry*", "actionRowPointers", "actionTable");
  1287: 
  1288:   // gotoIndexMap
  1289:   emitTable2(out, gotoIndexMap, numNonterms, 16,
  1290:              "NtIndex", "gotoIndexMap");
  1291: 
  1292:   // gotoRowPointers
  1293:   emitOffsetTable(out, gotoRowPointers, gotoTable, numStates,
  1294:                   "GotoEntry*", "gotoRowPointers", "gotoTable");
  1295: 
  1296:   if (ENABLE_CRS_COMPRESSION) {
  1297:     emitTable2(out, firstWithTerminal, numTerms, 16,
  1298:                "StateId", "firstWithTerminal");
  1299: 
  1300:     emitTable2(out, firstWithNonterminal, numNonterms, 16,
  1301:                "StateId", "firstWithNonterminal");
  1302: 
  1303:     emitTable2(out, bigProductionList, bigProductionListSize, 16,
  1304:                "ProdIndex", "bigProductionList");
  1305: 
  1306:     emitOffsetTable(out, productionsForState, bigProductionList, numStates,
  1307:                     "ProdIndex*", "productionsForState", "bigProductionList");
  1308: 
  1309:     emitOffsetTable(out, ambigStateTable, ambigTable, numStates,
  1310:                     "ActionEntry*", "ambigStateTable", "ambigTable");
  1311:   }
  1312:   else {
  1313:     out << "  firstWithTerminal = NULL;\n"
  1314:         << "  firstWithNonterminal = NULL;\n"
  1315:         << "  bigProductionList = NULL;\n"
  1316:         << "  productionsForState = NULL;\n"
  1317:         << "  ambigStateTable = NULL;\n"
  1318:         ;
  1319:   }
  1320: 
  1321:   out << "}\n"
  1322:       << "\n"
  1323:       << "\n"
  1324:       << "ParseTables *" << className << "::" << funcName << "()\n"
  1325:       << "{\n"
  1326:       << "  return new " << className << "_ParseTables;\n"
  1327:       << "}\n"
  1328:       << "\n"
  1329:       ;
  1330: }
  1331: 
  1332: 
  1333: // EOF
End cpp section to elk/elk_parsetables.cpp[1]
Start cpp section to elk/elk_ptreeact.cpp[1 /1 ]
     1: #line 22731 "./lpsrc/elk.pak"
     2: // ptreeact.cc            see license.txt for copyright and terms of use
     3: // code for ptreeact.h
     4: 
     5: #include "elk_ptreeact.h"
     6: #include "elk_ptreenode.h"
     7: #include "elk_parsetables.h"
     8: #include "sm_trace.h"
     9: 
    10: 
    11: // ------------------- ParseTreeLexer -------------------
    12: ParseTreeLexer::ParseTreeLexer(LexerInterface *u, UserActions *a)
    13:   : underlying(u),
    14:     underToken(u->getTokenFunc()),
    15:     actions(a)
    16: {
    17:   // the underlying lexer is already primed
    18:   copyFields();
    19: }
    20: 
    21: STATICDEF void ParseTreeLexer::nextToken(LexerInterface *lex)
    22: {
    23:   ParseTreeLexer *ths = static_cast<ParseTreeLexer*>(lex);
    24: 
    25:   // call underlying token function
    26:   ths->underToken(ths->underlying);
    27: 
    28:   // grab its fields
    29:   ths->copyFields();
    30: }
    31: 
    32: void ParseTreeLexer::copyFields()
    33: {
    34:   type = underlying->type;
    35:   loc = underlying->loc;
    36: 
    37:   // leak underlying's 'sval'.. we'll just assume it doesn't matter
    38: 
    39:   // my sval is always a newly-allocated PTreeNode, with no children,
    40:   // and named according to the name of the token yielded
    41:   PTreeNode *ret = new PTreeNode(actions->terminalName(type));
    42:   sval = (SemanticValue)ret;
    43: }
    44: 
    45: 
    46: sm_string ParseTreeLexer::tokenDesc() const
    47: {
    48:   return underlying->tokenDesc();
    49: }
    50: 
    51: sm_string ParseTreeLexer::tokenKindDesc(int kind) const
    52: {
    53:   return underlying->tokenKindDesc(kind);
    54: }
    55: 
    56: 
    57: // ---------------------- ParseTreeActions -------------------
    58: STATICDEF SemanticValue ParseTreeActions::reduce(
    59:   UserActions *context,
    60:   int productionId,
    61:   SemanticValue const *svals
    62:   SOURCELOCARG( SourceLoc loc ) )
    63: {
    64:   ParseTreeActions *ths = static_cast<ParseTreeActions*>(context);
    65: 
    66:   // get info about this production
    67:   ParseTables::ProdInfo const &info = ths->tables->getProdInfo(productionId);
    68:   xassert(info.rhsLen <= PTreeNode::MAXCHILDREN);
    69: 
    70:   // make a bare PTreeNode, labeled with the LHS nonterminal name
    71:   PTreeNode *ret = new PTreeNode(ths->underlying->nonterminalName(info.lhsIndex));
    72: 
    73:   // add the children
    74:   for (int i=0; i < info.rhsLen; i++) {
    75:     ret->children[i] = (PTreeNode*)svals[i];
    76:   }
    77:   ret->numChildren = info.rhsLen;
    78: 
    79:   return (SemanticValue)ret;
    80: }
    81: 
    82: 
    83: SemanticValue ParseTreeActions::mergeAlternativeParses(
    84:   int ntIndex, SemanticValue left, SemanticValue right
    85:   SOURCELOCARG( SourceLoc loc ) )
    86: {
    87:   trace("ptreeactMerge") << underlying->nonterminalName(ntIndex) << "\n";
    88: 
    89:   // link the ambiguities together in the usual way
    90:   PTreeNode *L = (PTreeNode*)left;
    91:   PTreeNode *R = (PTreeNode*)right;
    92: 
    93:   L->addAlternative(R);
    94:   return left;
    95: }
    96: 
    97: 
    98: char const *ParseTreeActions::terminalName(int termId)
    99: {
   100:   return underlying->terminalName(termId);
   101: }
   102: 
   103: char const *ParseTreeActions::nonterminalName(int termId)
   104: {
   105:   return underlying->nonterminalName(termId);
   106: }
End cpp section to elk/elk_ptreeact.cpp[1]
Start cpp section to elk/elk_ptreenode.cpp[1 /1 ]
     1: #line 22838 "./lpsrc/elk.pak"
     2: // ptreenode.cc            see license.txt for copyright and terms of use
     3: // code for ptreenode.h
     4: 
     5: #include "elk_ptreenode.h"
     6: #include "sm_typ.h"
     7: #include "sm_str.h"
     8: #include "sm_trace.h"
     9: 
    10: #include <string.h>         // strchr
    11: 
    12: int PTreeNode::allocCount = 0;
    13: int PTreeNode::alternativeCount = 0;
    14: 
    15: 
    16: void PTreeNode::init()
    17: {
    18:   merged = NULL;
    19:   allocCount++;
    20: }
    21: 
    22: 
    23: TreeCount PTreeNode::countTrees()
    24: {
    25:   // memoize to avoid exponential blowup
    26:   if (count != 0) {
    27:     return count;
    28:   }
    29: 
    30:   else {
    31:     // a single tree can have any possibility for each of
    32:     // its children, so the result is their product
    33:     count = 1;
    34:     for (int i=0; i<numChildren; i++) {
    35:       count *= children[i]->countTrees();
    36:     }
    37: 
    38:     // are there alternatives?
    39:     if (merged) {
    40:       // add them too (recurse down the list of alts)
    41:       count += merged->countTrees();
    42:     }
    43:   }
    44: 
    45:   return count;
    46: }
    47: 
    48: 
    49: void PTreeNode::printTree(std::ostream &out, PrintFlags pf) const
    50: {
    51:   if (tracingSys("ptreeAddrs")) {
    52:     pf = (PrintFlags)(pf | PF_ADDRS);
    53:   }
    54:   innerPrintTree(out, 0 /*indentation*/, pf);
    55: }
    56: 
    57: 
    58: // amount to indent per level
    59: enum { INDENT_INC = 2 };
    60: 
    61: void PTreeNode::innerPrintTree(std::ostream &out, int indentation,
    62:                                PrintFlags pf) const
    63: {
    64:   int alts = 1;
    65:   sm_string LHS;
    66: 
    67:   if (merged) {
    68:     // this is an ambiguity node
    69:     alts = countMergedList();
    70: 
    71:     // since all of the alternatives should rewrite the same LHS
    72:     // nonterminal, extract it from the first one
    73:     char const *firstSpace = strchr(type, ' ');
    74:     if (!firstSpace) {
    75:       LHS = type;     // no space, use whole thing
    76:     }
    77:     else {
    78:       LHS = sm_string(type, firstSpace-type);
    79:     }
    80: 
    81:     indentation += INDENT_INC;
    82:   }
    83: 
    84:   // iterate over interpretations
    85:   int ct=1;
    86:   for (PTreeNode const *n = this; n != NULL; n = n->merged) {
    87:     if (alts > 1) {
    88:       indent(out, indentation - INDENT_INC);
    89:       out << "--------- ambiguous " << LHS << ": "
    90:           << ct << " of " << alts << " ---------\n";
    91:     }
    92: 
    93:     indent(out, indentation);
    94: 
    95:     out << n->type;
    96:     if (pf & PF_EXPAND) {
    97:       // the type is just the LHS name; write out the RHS names
    98:       // after an "->"
    99:       if (n->numChildren) {
   100:         out << " ->";
   101:         for (int c=0; c < n->numChildren; c++) {
   102:           out << " " << n->children[c]->type;
   103:         }
   104:       }
   105:     }
   106: 
   107:     if (pf & PF_ADDRS) {
   108:       // print the parse tree node address, so I can verify proper sharing
   109:       out << " (" << ((void*)n) << ")";
   110:     }
   111:     out << "\n";
   112: 
   113:     // iterate over children
   114:     for (int c=0; c < n->numChildren; c++) {
   115:       // recursively print children
   116:       n->children[c]->innerPrintTree(out, indentation + INDENT_INC, pf);
   117:     }
   118: 
   119:     ct++;
   120:   }
   121: 
   122:   if (merged) {
   123:     // close up ambiguity display
   124:     indentation -= INDENT_INC;
   125:     indent(out, indentation);
   126:     out << "--------- end of ambiguous " << LHS << " ---------\n";
   127:   }
   128: }
   129: 
   130: STATICDEF void PTreeNode::indent(std::ostream &out, int n)
   131: {
   132:   for (int i=0; i<n; i++) {
   133:     out << " ";
   134:   }
   135: }
   136: 
   137: // # of nodes on the 'merged' list; always at least 1 since
   138: // 'this' is considered to be in that list
   139: int PTreeNode::countMergedList() const
   140: {
   141:   int ct = 1;
   142:   for (PTreeNode const *n = merged; n != NULL; n = n->merged) {
   143:     ct++;
   144:   }
   145:   return ct;
   146: }
   147: 
   148: 
   149: void PTreeNode::addAlternative(PTreeNode *alt)
   150: {
   151:   // insert as 2nd element
   152:   alt->merged = this->merged;
   153:   this->merged = alt;
   154: 
   155:   alternativeCount++;
   156: }
End cpp section to elk/elk_ptreenode.cpp[1]
Start cpp section to elk/elk_useract.cpp[1 /1 ]
     1: #line 22995 "./lpsrc/elk.pak"
     2: // useract.cc            see license.txt for copyright and terms of use
     3: // code for useract.h
     4: 
     5: #include "elk_useract.h"
     6: #include "sm_typ.h"
     7: #include "sm_xassert.h"
     8: 
     9: 
    10: UserActions::~UserActions()
    11: {}
    12: 
    13: 
    14: ParseTables *UserActions::makeTables()
    15: {
    16:   xfailure("this object does not have any tables");
    17:   return NULL;   // silence warning
    18: }
    19: 
    20: 
    21: // ----------------- TrivialUserActions --------------------
    22: UserActions::ReductionActionFunc TrivialUserActions::getReductionAction()
    23: {
    24:   return &TrivialUserActions::doReductionAction;
    25: }
    26: 
    27: STATICDEF SemanticValue TrivialUserActions::doReductionAction(
    28:   UserActions *, int , SemanticValue const *
    29:   SOURCELOCARG( SourceLoc ) )
    30:   { return NULL_SVAL; }
    31: 
    32: SemanticValue TrivialUserActions::duplicateTerminalValue(
    33:   int , SemanticValue sval)
    34:   { return sval; }
    35: 
    36: SemanticValue TrivialUserActions::duplicateNontermValue(
    37:   int , SemanticValue sval)
    38:   { return sval; }
    39: 
    40: 
    41: void TrivialUserActions::deallocateTerminalValue(
    42:   int , SemanticValue )
    43:   {}
    44: 
    45: void TrivialUserActions::deallocateNontermValue(
    46:   int , SemanticValue )
    47:   {}
    48: 
    49: SemanticValue TrivialUserActions::mergeAlternativeParses(
    50:   int , SemanticValue left, SemanticValue
    51:   SOURCELOCARG( SourceLoc ) )
    52:   { return left; }
    53: 
    54: bool TrivialUserActions::keepNontermValue(int , SemanticValue )
    55:   { return true; }     // do not cancel
    56: 
    57: 
    58: UserActions::ReclassifyFunc TrivialUserActions::getReclassifier()
    59: {
    60:   return &TrivialUserActions::reclassifyToken;
    61: }
    62: 
    63: STATICDEF int TrivialUserActions::reclassifyToken(UserActions *,
    64:   int oldTokenType, SemanticValue )
    65:   { return oldTokenType; }
    66: 
    67: sm_string TrivialUserActions::terminalDescription(int, SemanticValue)
    68:   { return sm_string(""); }
    69: 
    70: sm_string TrivialUserActions::nonterminalDescription(int, SemanticValue)
    71:   { return sm_string(""); }
    72: 
    73: char const *TrivialUserActions::terminalName(int)
    74:   { return ""; }
    75: char const *TrivialUserActions::nonterminalName(int)
    76:   { return ""; }
    77: 
End cpp section to elk/elk_useract.cpp[1]