5.22. Lexer
Start ocaml section to src/flx_prelex.mli[1
/1
]
1: # 4 "./lpsrc/flx_lexer.ipk"
2: val src_of_token : Flx_parse.token -> Flx_ast.srcref
3: val string_of_token : Flx_parse.token -> string
4: val name_of_token : Flx_parse.token -> string
5:
Start ocaml section to src/flx_prelex.ml[1
/1
]
1: # 10 "./lpsrc/flx_lexer.ipk"
2: open Flx_parse
3:
4: let string_of_string s = "\"" ^ Flx_string.c_quote_of_string s ^ "\""
5:
6: let string_of_token (tok :Flx_parse.token): string =
7: match tok with
8: | NAME (sr,s) -> s
9: | INTEGER (sr,t,i) -> Big_int.string_of_big_int i
10: | FLOAT (sr,t,v) -> v
11: | STRING (sr,s) -> Flx_string.c_quote_of_string s
12: | CSTRING (sr,s) -> Flx_string.c_quote_of_string s
13: | FSTRING (sr,s) -> Flx_string.c_quote_of_string s
14: | QSTRING (sr,s) -> Flx_string.c_quote_of_string s
15: | WSTRING (sr,s) -> Flx_string.c_quote_of_string s
16: | USTRING (sr,s) -> Flx_string.c_quote_of_string s
17: | USER10 (sr,op,fn) -> "op10 " ^ op
18: | USERLB (sr,_,lb) -> lb
19: | USERRB (sr,rb) -> rb
20: | USER_KEYWORD (sr,s) -> s
21: | USER_STATEMENT_KEYWORD (sr,s,_,_) -> s
22: | USER_STATEMENT_DRIVER (sr,s,_) -> s
23: | HASH_INCLUDE_FILES fs -> "include_files(" ^ String.concat "," fs ^ ")"
24: | TOKEN_LIST ts -> "<<token list>>"
25: (*
26: | PARSE_ACTION sr -> "=>#"
27: *)
28:
29: | DOLLAR _ -> "$"
30: | QUEST _ -> "?"
31: | EXCLAMATION _ -> "!"
32: | LPAR _ -> "("
33: | RPAR _ -> ")"
34: | LSQB _ -> "["
35: | RSQB _ -> "]"
36: | LBRACE _ -> "{"
37: | RBRACE _ -> "}"
38: | COLON _ -> ":"
39: | COMMA _ -> ","
40: | SEMI _ -> ";"
41: | PLUS _ -> "+"
42: | MINUS _ -> "-"
43: | STAR _ -> "*"
44: | SLASH _ -> "/"
45: | VBAR _ -> "|"
46: | AMPER _ -> "&"
47: | LESS _ -> "<"
48: | GREATER _ -> ">"
49: | EQUAL _ -> "="
50: | DOT _ -> "."
51: | PERCENT _ -> "%"
52: | BACKQUOTE _ -> "`"
53: | TILDE _ -> "~"
54: | CIRCUMFLEX _ -> "^"
55: | HASH _ -> "#"
56: | ANDLESS _ -> "&<"
57: | ANDGREATER _ -> "&>"
58: | EQEQUAL _ -> "=="
59: | NOTEQUAL _ -> "!="
60: | LESSEQUAL _ -> "<="
61: | GREATEREQUAL _ -> ">="
62: | LEFTSHIFT _ -> "<<"
63: | RIGHTSHIFT _ -> ">>"
64: | STARSTAR _ -> "**"
65: | LESSCOLON _ -> "<:"
66: | COLONGREATER _ -> ":>"
67: | DOTDOT _ -> ".."
68: | COLONCOLON _ -> "::"
69: | PLUSPLUS _ -> "++"
70: | MINUSMINUS _ -> "--"
71: | PLUSEQUAL _ -> "+="
72: | MINUSEQUAL _ -> "-="
73: | STAREQUAL _ -> "*="
74: | SLASHEQUAL _ -> "/="
75: | PERCENTEQUAL _ -> "%="
76: | CARETEQUAL _ -> "^="
77: | VBAREQUAL _ -> "|="
78: | AMPEREQUAL _ -> "&="
79: | TILDEEQUAL _ -> "~="
80: | COLONEQUAL _ -> ":="
81: | RIGHTARROW _ -> "->"
82: | EQRIGHTARROW _ -> "=>"
83: | LEFTARROW _ -> "<-"
84: | LSQANGLE _ -> "[<"
85: | RSQANGLE _ -> ">]"
86: | LSQBAR _ -> "[|"
87: | RSQBAR _ -> "|]"
88: | AMPERAMPER _ -> "&&"
89: | VBARVBAR _ -> "||"
90: | SLOSHAMPER _ -> "\\&"
91: | SLOSHVBAR _ -> "\\|"
92: | SLOSHCIRCUMFLEX _ -> "\\^"
93: | HASHBANG _ -> "#!"
94: | LEFTSHIFTEQUAL _ -> "<<="
95: | RIGHTSHIFTEQUAL _ -> ">>="
96: | LEFTRIGHTARROW _ -> "<->"
97: | ANDEQEQUAL _ -> "&=="
98: | ANDNOTEQUAL _ -> "&!="
99: | ANDLESSEQUAL _ -> "&<="
100: | ANDGREATEREQUAL _ -> "&>="
101: | DOTDOTDOT _ -> "..."
102: | DOTRIGHTARROW _ -> ".->"
103: | LONGRIGHTARROW _ -> "-->"
104: | PARSE_ACTION _ -> "=>#"
105: | HASHBANGSLASH _ -> "#!/"
106: | ALL _ -> "all"
107: | ASSERT _ -> "assert"
108: | AXIOM _ -> "axiom"
109: | BODY _ -> "body"
110: | CALL _ -> "call"
111: | CASE _ -> "case"
112: | CASENO _ -> "caseno"
113: | CCLASS _ -> "cclass"
114: | CFUNCTION _ -> "cfun"
115: | CLASS _ -> "class"
116: | COMMENT_KEYWORD _ -> "comment"
117: | COMPOUND _ -> "compound"
118: | CONST _ -> "const"
119: | CPARSE _ -> "cparse"
120: | CPROCEDURE _ -> "cproc"
121: | CSTRUCT _ -> "cstruct"
122: | CTOR _ -> "ctor"
123: | CTYPES _ -> "ctypes"
124: | DEF _ -> "def"
125: | DO _ -> "do"
126: | DONE _ -> "done"
127: | ELIF _ -> "elif"
128: | ELSE _ -> "else"
129: | ENDCASE _ -> "endcase"
130: | ENDIF _ -> "endif"
131: | ENDMATCH _ -> "endmatch"
132: | ENUM _ -> "enum"
133: | EXPECT _ -> "expect"
134: | EXPORT _ -> "export"
135: | FOR _ -> "for"
136: | FORGET _ -> "forget"
137: | FORK _ -> "fork"
138: | FUNCTOR _ -> "functor"
139: | FUNCTION _ -> "fun"
140: | GENERATOR _ -> "gen"
141: | GOTO _ -> "goto"
142: | HALT _ -> "halt"
143: | HEADER _ -> "header"
144: | IDENT _ -> "ident"
145: | INCLUDE _ -> "include"
146: | INCOMPLETE _ -> "incomplete"
147: | INF _ -> "inf"
148: | IN _ -> "in"
149: | INSTANCE _ -> "instance"
150: | IS _ -> "is"
151: | INHERIT _ -> "inherit"
152: | INLINE _ -> "inline"
153: | JUMP _ -> "jump"
154: | LEMMA _ -> "lemma"
155: | LET _ -> "let"
156: | LOOP _ -> "loop"
157: | LVAL _ -> "lval"
158: | MACRO _ -> "macro"
159: | MODULE _ -> "module"
160: | NAMESPACE _ -> "namespace"
161: | NAN _ -> "NaN"
162: | NEW _ -> "new"
163: | NOINLINE _ -> "noinline"
164: | NONTERM _ -> "nonterm"
165: | NORETURN _ -> "noreturn"
166: | NOT _ -> "not"
167: | OBJECT _ -> "obj"
168: | OPEN _ -> "open"
169: | PACKAGE _ -> "package"
170: | POD _ -> "pod"
171: | PRIVATE _ -> "private"
172: | PROCEDURE _ -> "proc"
173: | PROPERTY _ -> "property"
174: | REDUCE _ -> "reduce"
175: | REF _ -> "ref"
176: | RENAME _ -> "rename"
177: | REQUIRES _ -> "requires"
178: | RETURN _ -> "return"
179: | STRUCT _ -> "struct"
180: | THEN _ -> "then"
181: | TODO _ -> "todo"
182: | TO _ -> "to"
183: | TYPEDEF _ -> "typedef"
184: | TYPE _ -> "type"
185: | TYPECLASS _ -> "typeclass"
186: | UNION _ -> "union"
187: | USE _ -> "use"
188: | VAL _ -> "val"
189: | VAR _ -> "var"
190: | VIRTUAL _ -> "virtual"
191: | WHERE _ -> "where"
192: | WHEN _ -> "when"
193: | WITH _ -> "with"
194: | YIELD _ -> "yield"
195: | GC_POINTER _ -> "_gc_pointer"
196: | GC_TYPE _ -> "_gc_type"
197: | SVC _ -> "_svc"
198: | DEREF _ -> "_deref"
199: | AND _ -> "and"
200: | AS _ -> "as"
201: | CALLBACK _ -> "callback"
202: | CODE _ -> "code"
203: | IF _ -> "if"
204: | ISIN _ -> "isin"
205: | MATCH _ -> "match"
206: | NOEXPAND _ -> "noexpand"
207: | OF _ -> "of"
208: | OR _ -> "or"
209: | PARSE _ -> "parse"
210: | REGEXP _ -> "regexp"
211: | REGLEX _ -> "reglex"
212: | REGMATCH _ -> "regmatch"
213: | THE _ -> "the"
214: | TYPEMATCH _ -> "typematch"
215: | TYPECASE _ -> "typecase"
216: | WHENCE _ -> "whence"
217: | UNLESS _ -> "unless"
218: | UNDERSCORE _ -> "_"
219: | EXPRESSION _ -> "expr"
220: | FLOAT_LITERAL _ -> "float_literal"
221: | INTEGER_LITERAL _ -> "integer_literal"
222: | STRING_LITERAL _ -> "string_literal"
223: | STATEMENT _ -> "statement"
224: | STATEMENTS _ -> "statements"
225: # 49 "./lpsrc/flx_lexer.ipk"
226: | COMMENT s -> s (* C style comment, includes the /* */ pair *)
227: | COMMENT_NEWLINE s -> "// " ^ s ^ "<NEWLINE>"
228: | WHITE i -> String.make i ' '
229: | NEWLINE -> "<NEWLINE>"
230: | ENDMARKER -> "<<EOF>>"
231: | ERRORTOKEN (sref,s) -> "<<ERROR '"^ s ^"'>>"
232: | SLOSH -> "\\"
233:
234: let name_of_token (tok :Flx_parse.token): string =
235: match tok with
236: | NAME (sr,s) -> "NAME"
237: | INTEGER (sr,t,i) -> "INTEGER"
238: | FLOAT (sr,t,v) -> "FLOAT"
239: | STRING (sr,s) -> "STRING"
240: | CSTRING (sr,s) -> "CSTRING"
241: | FSTRING (sr,s) -> "FSTRING"
242: | QSTRING (sr,s) -> "QSTRING"
243: | WSTRING (sr,s) -> "WSTRING"
244: | USTRING (sr,s) -> "USTRING"
245: | USER10 (sr,op,f) -> "USER10"
246: | USERLB _ -> "USERLB"
247: | USERRB _ -> "USERRB"
248: | USER_KEYWORD (sr,s) -> s
249: | USER_STATEMENT_KEYWORD (sr,s,_,_) -> s
250: | USER_STATEMENT_DRIVER (sr,s,_) -> s
251: | HASH_INCLUDE_FILES _ -> "HASH_INCLUDE_FILES"
252: | TOKEN_LIST _ -> "TOKEN_LIST"
253: (*
254: | PARSE_ACTION sr -> "PARSE_ACTION"
255: *)
256: | DOLLAR _ -> "DOLLAR"
257: | QUEST _ -> "QUEST"
258: | EXCLAMATION _ -> "EXCLAMATION"
259: | LPAR _ -> "LPAR"
260: | RPAR _ -> "RPAR"
261: | LSQB _ -> "LSQB"
262: | RSQB _ -> "RSQB"
263: | LBRACE _ -> "LBRACE"
264: | RBRACE _ -> "RBRACE"
265: | COLON _ -> "COLON"
266: | COMMA _ -> "COMMA"
267: | SEMI _ -> "SEMI"
268: | PLUS _ -> "PLUS"
269: | MINUS _ -> "MINUS"
270: | STAR _ -> "STAR"
271: | SLASH _ -> "SLASH"
272: | VBAR _ -> "VBAR"
273: | AMPER _ -> "AMPER"
274: | LESS _ -> "LESS"
275: | GREATER _ -> "GREATER"
276: | EQUAL _ -> "EQUAL"
277: | DOT _ -> "DOT"
278: | PERCENT _ -> "PERCENT"
279: | BACKQUOTE _ -> "BACKQUOTE"
280: | TILDE _ -> "TILDE"
281: | CIRCUMFLEX _ -> "CIRCUMFLEX"
282: | HASH _ -> "HASH"
283: | ANDLESS _ -> "ANDLESS"
284: | ANDGREATER _ -> "ANDGREATER"
285: | EQEQUAL _ -> "EQEQUAL"
286: | NOTEQUAL _ -> "NOTEQUAL"
287: | LESSEQUAL _ -> "LESSEQUAL"
288: | GREATEREQUAL _ -> "GREATEREQUAL"
289: | LEFTSHIFT _ -> "LEFTSHIFT"
290: | RIGHTSHIFT _ -> "RIGHTSHIFT"
291: | STARSTAR _ -> "STARSTAR"
292: | LESSCOLON _ -> "LESSCOLON"
293: | COLONGREATER _ -> "COLONGREATER"
294: | DOTDOT _ -> "DOTDOT"
295: | COLONCOLON _ -> "COLONCOLON"
296: | PLUSPLUS _ -> "PLUSPLUS"
297: | MINUSMINUS _ -> "MINUSMINUS"
298: | PLUSEQUAL _ -> "PLUSEQUAL"
299: | MINUSEQUAL _ -> "MINUSEQUAL"
300: | STAREQUAL _ -> "STAREQUAL"
301: | SLASHEQUAL _ -> "SLASHEQUAL"
302: | PERCENTEQUAL _ -> "PERCENTEQUAL"
303: | CARETEQUAL _ -> "CARETEQUAL"
304: | VBAREQUAL _ -> "VBAREQUAL"
305: | AMPEREQUAL _ -> "AMPEREQUAL"
306: | TILDEEQUAL _ -> "TILDEEQUAL"
307: | COLONEQUAL _ -> "COLONEQUAL"
308: | RIGHTARROW _ -> "RIGHTARROW"
309: | EQRIGHTARROW _ -> "EQRIGHTARROW"
310: | LEFTARROW _ -> "LEFTARROW"
311: | LSQANGLE _ -> "LSQANGLE"
312: | RSQANGLE _ -> "RSQANGLE"
313: | LSQBAR _ -> "LSQBAR"
314: | RSQBAR _ -> "RSQBAR"
315: | AMPERAMPER _ -> "AMPERAMPER"
316: | VBARVBAR _ -> "VBARVBAR"
317: | SLOSHAMPER _ -> "SLOSHAMPER"
318: | SLOSHVBAR _ -> "SLOSHVBAR"
319: | SLOSHCIRCUMFLEX _ -> "SLOSHCIRCUMFLEX"
320: | HASHBANG _ -> "HASHBANG"
321: | LEFTSHIFTEQUAL _ -> "LEFTSHIFTEQUAL"
322: | RIGHTSHIFTEQUAL _ -> "RIGHTSHIFTEQUAL"
323: | LEFTRIGHTARROW _ -> "LEFTRIGHTARROW"
324: | ANDEQEQUAL _ -> "ANDEQEQUAL"
325: | ANDNOTEQUAL _ -> "ANDNOTEQUAL"
326: | ANDLESSEQUAL _ -> "ANDLESSEQUAL"
327: | ANDGREATEREQUAL _ -> "ANDGREATEREQUAL"
328: | DOTDOTDOT _ -> "DOTDOTDOT"
329: | DOTRIGHTARROW _ -> "DOTRIGHTARROW"
330: | LONGRIGHTARROW _ -> "LONGRIGHTARROW"
331: | PARSE_ACTION _ -> "PARSE_ACTION"
332: | HASHBANGSLASH _ -> "HASHBANGSLASH"
333: | ALL _ -> "ALL"
334: | ASSERT _ -> "ASSERT"
335: | AXIOM _ -> "AXIOM"
336: | BODY _ -> "BODY"
337: | CALL _ -> "CALL"
338: | CASE _ -> "CASE"
339: | CASENO _ -> "CASENO"
340: | CCLASS _ -> "CCLASS"
341: | CFUNCTION _ -> "CFUNCTION"
342: | CLASS _ -> "CLASS"
343: | COMMENT_KEYWORD _ -> "COMMENT_KEYWORD"
344: | COMPOUND _ -> "COMPOUND"
345: | CONST _ -> "CONST"
346: | CPARSE _ -> "CPARSE"
347: | CPROCEDURE _ -> "CPROCEDURE"
348: | CSTRUCT _ -> "CSTRUCT"
349: | CTOR _ -> "CTOR"
350: | CTYPES _ -> "CTYPES"
351: | DEF _ -> "DEF"
352: | DO _ -> "DO"
353: | DONE _ -> "DONE"
354: | ELIF _ -> "ELIF"
355: | ELSE _ -> "ELSE"
356: | ENDCASE _ -> "ENDCASE"
357: | ENDIF _ -> "ENDIF"
358: | ENDMATCH _ -> "ENDMATCH"
359: | ENUM _ -> "ENUM"
360: | EXPECT _ -> "EXPECT"
361: | EXPORT _ -> "EXPORT"
362: | FOR _ -> "FOR"
363: | FORGET _ -> "FORGET"
364: | FORK _ -> "FORK"
365: | FUNCTOR _ -> "FUNCTOR"
366: | FUNCTION _ -> "FUNCTION"
367: | GENERATOR _ -> "GENERATOR"
368: | GOTO _ -> "GOTO"
369: | HALT _ -> "HALT"
370: | HEADER _ -> "HEADER"
371: | IDENT _ -> "IDENT"
372: | INCLUDE _ -> "INCLUDE"
373: | INCOMPLETE _ -> "INCOMPLETE"
374: | INF _ -> "INF"
375: | IN _ -> "IN"
376: | INSTANCE _ -> "INSTANCE"
377: | IS _ -> "IS"
378: | INHERIT _ -> "INHERIT"
379: | INLINE _ -> "INLINE"
380: | JUMP _ -> "JUMP"
381: | LEMMA _ -> "LEMMA"
382: | LET _ -> "LET"
383: | LOOP _ -> "LOOP"
384: | LVAL _ -> "LVAL"
385: | MACRO _ -> "MACRO"
386: | MODULE _ -> "MODULE"
387: | NAMESPACE _ -> "NAMESPACE"
388: | NAN _ -> "NAN"
389: | NEW _ -> "NEW"
390: | NOINLINE _ -> "NOINLINE"
391: | NONTERM _ -> "NONTERM"
392: | NORETURN _ -> "NORETURN"
393: | NOT _ -> "NOT"
394: | OBJECT _ -> "OBJECT"
395: | OPEN _ -> "OPEN"
396: | PACKAGE _ -> "PACKAGE"
397: | POD _ -> "POD"
398: | PRIVATE _ -> "PRIVATE"
399: | PROCEDURE _ -> "PROCEDURE"
400: | PROPERTY _ -> "PROPERTY"
401: | REDUCE _ -> "REDUCE"
402: | REF _ -> "REF"
403: | RENAME _ -> "RENAME"
404: | REQUIRES _ -> "REQUIRES"
405: | RETURN _ -> "RETURN"
406: | STRUCT _ -> "STRUCT"
407: | THEN _ -> "THEN"
408: | TODO _ -> "TODO"
409: | TO _ -> "TO"
410: | TYPEDEF _ -> "TYPEDEF"
411: | TYPE _ -> "TYPE"
412: | TYPECLASS _ -> "TYPECLASS"
413: | UNION _ -> "UNION"
414: | USE _ -> "USE"
415: | VAL _ -> "VAL"
416: | VAR _ -> "VAR"
417: | VIRTUAL _ -> "VIRTUAL"
418: | WHERE _ -> "WHERE"
419: | WHEN _ -> "WHEN"
420: | WITH _ -> "WITH"
421: | YIELD _ -> "YIELD"
422: | GC_POINTER _ -> "GC_POINTER"
423: | GC_TYPE _ -> "GC_TYPE"
424: | SVC _ -> "SVC"
425: | DEREF _ -> "DEREF"
426: | AND _ -> "AND"
427: | AS _ -> "AS"
428: | CALLBACK _ -> "CALLBACK"
429: | CODE _ -> "CODE"
430: | IF _ -> "IF"
431: | ISIN _ -> "ISIN"
432: | MATCH _ -> "MATCH"
433: | NOEXPAND _ -> "NOEXPAND"
434: | OF _ -> "OF"
435: | OR _ -> "OR"
436: | PARSE _ -> "PARSE"
437: | REGEXP _ -> "REGEXP"
438: | REGLEX _ -> "REGLEX"
439: | REGMATCH _ -> "REGMATCH"
440: | THE _ -> "THE"
441: | TYPEMATCH _ -> "TYPEMATCH"
442: | TYPECASE _ -> "TYPECASE"
443: | WHENCE _ -> "WHENCE"
444: | UNLESS _ -> "UNLESS"
445: | UNDERSCORE _ -> "UNDERSCORE"
446: | EXPRESSION _ -> "EXPRESSION"
447: | FLOAT_LITERAL _ -> "FLOAT_LITERAL"
448: | INTEGER_LITERAL _ -> "INTEGER_LITERAL"
449: | STRING_LITERAL _ -> "STRING_LITERAL"
450: | STATEMENT _ -> "STATEMENT"
451: | STATEMENTS _ -> "STATEMENTS"
452: # 91 "./lpsrc/flx_lexer.ipk"
453:
454: | COMMENT s -> "COMMENT"
455: | COMMENT_NEWLINE s -> "COMMENT_NEWLINE"
456: | WHITE i -> "WHITE"
457: | NEWLINE -> "NEWLINE"
458: | ENDMARKER -> "ENDMARKER"
459: | ERRORTOKEN (sref,s) -> "ERRORTOKEN"
460: | SLOSH -> "SLOSH"
461:
462: let src_of_token t = match t with
463: | NEWLINE
464: | COMMENT _
465: | COMMENT_NEWLINE _
466: | WHITE _
467: | ENDMARKER
468: | SLOSH
469: | HASH_INCLUDE_FILES _
470: | TOKEN_LIST _
471: -> ("",0,0,0)
472:
473: | NAME (s,_)
474: | INTEGER (s,_,_)
475: | FLOAT (s,_,_)
476: | STRING (s,_)
477: | CSTRING (s,_)
478: | FSTRING (s,_)
479: | QSTRING (s,_)
480: | WSTRING (s,_)
481: | USTRING (s,_)
482: | USER10 (s,_,_)
483: | USERLB (s,_,_)
484: | USERRB (s,_)
485: | USER_KEYWORD (s,_)
486: | USER_STATEMENT_KEYWORD (s,_,_,_)
487: | USER_STATEMENT_DRIVER (s,_,_)
488: (*
489: | PARSE_ACTION s
490: *)
491: | ERRORTOKEN (s,_)
492:
493: | DOLLAR s
494: | QUEST s
495: | EXCLAMATION s
496: | LPAR s
497: | RPAR s
498: | LSQB s
499: | RSQB s
500: | LBRACE s
501: | RBRACE s
502: | COLON s
503: | COMMA s
504: | SEMI s
505: | PLUS s
506: | MINUS s
507: | STAR s
508: | SLASH s
509: | VBAR s
510: | AMPER s
511: | LESS s
512: | GREATER s
513: | EQUAL s
514: | DOT s
515: | PERCENT s
516: | BACKQUOTE s
517: | TILDE s
518: | CIRCUMFLEX s
519: | HASH s
520: | ANDLESS s
521: | ANDGREATER s
522: | EQEQUAL s
523: | NOTEQUAL s
524: | LESSEQUAL s
525: | GREATEREQUAL s
526: | LEFTSHIFT s
527: | RIGHTSHIFT s
528: | STARSTAR s
529: | LESSCOLON s
530: | COLONGREATER s
531: | DOTDOT s
532: | COLONCOLON s
533: | PLUSPLUS s
534: | MINUSMINUS s
535: | PLUSEQUAL s
536: | MINUSEQUAL s
537: | STAREQUAL s
538: | SLASHEQUAL s
539: | PERCENTEQUAL s
540: | CARETEQUAL s
541: | VBAREQUAL s
542: | AMPEREQUAL s
543: | TILDEEQUAL s
544: | COLONEQUAL s
545: | RIGHTARROW s
546: | EQRIGHTARROW s
547: | LEFTARROW s
548: | LSQANGLE s
549: | RSQANGLE s
550: | LSQBAR s
551: | RSQBAR s
552: | AMPERAMPER s
553: | VBARVBAR s
554: | SLOSHAMPER s
555: | SLOSHVBAR s
556: | SLOSHCIRCUMFLEX s
557: | HASHBANG s
558: | LEFTSHIFTEQUAL s
559: | RIGHTSHIFTEQUAL s
560: | LEFTRIGHTARROW s
561: | ANDEQEQUAL s
562: | ANDNOTEQUAL s
563: | ANDLESSEQUAL s
564: | ANDGREATEREQUAL s
565: | DOTDOTDOT s
566: | DOTRIGHTARROW s
567: | LONGRIGHTARROW s
568: | PARSE_ACTION s
569: | HASHBANGSLASH s
570: | ALL s
571: | ASSERT s
572: | AXIOM s
573: | BODY s
574: | CALL s
575: | CASE s
576: | CASENO s
577: | CCLASS s
578: | CFUNCTION s
579: | CLASS s
580: | COMMENT_KEYWORD s
581: | COMPOUND s
582: | CONST s
583: | CPARSE s
584: | CPROCEDURE s
585: | CSTRUCT s
586: | CTOR s
587: | CTYPES s
588: | DEF s
589: | DO s
590: | DONE s
591: | ELIF s
592: | ELSE s
593: | ENDCASE s
594: | ENDIF s
595: | ENDMATCH s
596: | ENUM s
597: | EXPECT s
598: | EXPORT s
599: | FOR s
600: | FORGET s
601: | FORK s
602: | FUNCTOR s
603: | FUNCTION s
604: | GENERATOR s
605: | GOTO s
606: | HALT s
607: | HEADER s
608: | IDENT s
609: | INCLUDE s
610: | INCOMPLETE s
611: | INF s
612: | IN s
613: | INSTANCE s
614: | IS s
615: | INHERIT s
616: | INLINE s
617: | JUMP s
618: | LEMMA s
619: | LET s
620: | LOOP s
621: | LVAL s
622: | MACRO s
623: | MODULE s
624: | NAMESPACE s
625: | NAN s
626: | NEW s
627: | NOINLINE s
628: | NONTERM s
629: | NORETURN s
630: | NOT s
631: | OBJECT s
632: | OPEN s
633: | PACKAGE s
634: | POD s
635: | PRIVATE s
636: | PROCEDURE s
637: | PROPERTY s
638: | REDUCE s
639: | REF s
640: | RENAME s
641: | REQUIRES s
642: | RETURN s
643: | STRUCT s
644: | THEN s
645: | TODO s
646: | TO s
647: | TYPEDEF s
648: | TYPE s
649: | TYPECLASS s
650: | UNION s
651: | USE s
652: | VAL s
653: | VAR s
654: | VIRTUAL s
655: | WHERE s
656: | WHEN s
657: | WITH s
658: | YIELD s
659: | GC_POINTER s
660: | GC_TYPE s
661: | SVC s
662: | DEREF s
663: | AND s
664: | AS s
665: | CALLBACK s
666: | CODE s
667: | IF s
668: | ISIN s
669: | MATCH s
670: | NOEXPAND s
671: | OF s
672: | OR s
673: | PARSE s
674: | REGEXP s
675: | REGLEX s
676: | REGMATCH s
677: | THE s
678: | TYPEMATCH s
679: | TYPECASE s
680: | WHENCE s
681: | UNLESS s
682: | UNDERSCORE s
683: | EXPRESSION s
684: | FLOAT_LITERAL s
685: | INTEGER_LITERAL s
686: | STRING_LITERAL s
687: | STATEMENT s
688: | STATEMENTS s
689: # 143 "./lpsrc/flx_lexer.ipk"
690: -> s
691:
Start ocaml section to src/flx_lexstate.ml[1
/1
]
1: # 147 "./lpsrc/flx_lexer.ipk"
2: open Flx_util
3: open Flx_parse
4: open Flx_string
5: open Big_int
6: open Flx_exceptions
7: open Flx_ast
8: open List
9:
10: let special_tokens =
11: [
12: ("$",(fun (sr,s)-> DOLLAR sr));
13: ("?",(fun (sr,s)-> QUEST sr));
14: ("!",(fun (sr,s)-> EXCLAMATION sr));
15: ("(",(fun (sr,s)-> LPAR sr));
16: (")",(fun (sr,s)-> RPAR sr));
17: ("[",(fun (sr,s)-> LSQB sr));
18: ("]",(fun (sr,s)-> RSQB sr));
19: ("{",(fun (sr,s)-> LBRACE sr));
20: ("}",(fun (sr,s)-> RBRACE sr));
21: (":",(fun (sr,s)-> COLON sr));
22: (",",(fun (sr,s)-> COMMA sr));
23: (";",(fun (sr,s)-> SEMI sr));
24: ("+",(fun (sr,s)-> PLUS sr));
25: ("-",(fun (sr,s)-> MINUS sr));
26: ("*",(fun (sr,s)-> STAR sr));
27: ("/",(fun (sr,s)-> SLASH sr));
28: ("|",(fun (sr,s)-> VBAR sr));
29: ("&",(fun (sr,s)-> AMPER sr));
30: ("<",(fun (sr,s)-> LESS sr));
31: (">",(fun (sr,s)-> GREATER sr));
32: ("=",(fun (sr,s)-> EQUAL sr));
33: (".",(fun (sr,s)-> DOT sr));
34: ("%",(fun (sr,s)-> PERCENT sr));
35: ("`",(fun (sr,s)-> BACKQUOTE sr));
36: ("~",(fun (sr,s)-> TILDE sr));
37: ("^",(fun (sr,s)-> CIRCUMFLEX sr));
38: ("#",(fun (sr,s)-> HASH sr));
39: ("&<",(fun (sr,s)-> ANDLESS sr));
40: ("&>",(fun (sr,s)-> ANDGREATER sr));
41: ("==",(fun (sr,s)-> EQEQUAL sr));
42: ("!=",(fun (sr,s)-> NOTEQUAL sr));
43: ("<=",(fun (sr,s)-> LESSEQUAL sr));
44: (">=",(fun (sr,s)-> GREATEREQUAL sr));
45: ("<<",(fun (sr,s)-> LEFTSHIFT sr));
46: (">>",(fun (sr,s)-> RIGHTSHIFT sr));
47: ("**",(fun (sr,s)-> STARSTAR sr));
48: ("<:",(fun (sr,s)-> LESSCOLON sr));
49: (":>",(fun (sr,s)-> COLONGREATER sr));
50: ("..",(fun (sr,s)-> DOTDOT sr));
51: ("::",(fun (sr,s)-> COLONCOLON sr));
52: ("++",(fun (sr,s)-> PLUSPLUS sr));
53: ("--",(fun (sr,s)-> MINUSMINUS sr));
54: ("+=",(fun (sr,s)-> PLUSEQUAL sr));
55: ("-=",(fun (sr,s)-> MINUSEQUAL sr));
56: ("*=",(fun (sr,s)-> STAREQUAL sr));
57: ("/=",(fun (sr,s)-> SLASHEQUAL sr));
58: ("%=",(fun (sr,s)-> PERCENTEQUAL sr));
59: ("^=",(fun (sr,s)-> CARETEQUAL sr));
60: ("|=",(fun (sr,s)-> VBAREQUAL sr));
61: ("&=",(fun (sr,s)-> AMPEREQUAL sr));
62: ("~=",(fun (sr,s)-> TILDEEQUAL sr));
63: (":=",(fun (sr,s)-> COLONEQUAL sr));
64: ("->",(fun (sr,s)-> RIGHTARROW sr));
65: ("=>",(fun (sr,s)-> EQRIGHTARROW sr));
66: ("<-",(fun (sr,s)-> LEFTARROW sr));
67: ("[<",(fun (sr,s)-> LSQANGLE sr));
68: (">]",(fun (sr,s)-> RSQANGLE sr));
69: ("[|",(fun (sr,s)-> LSQBAR sr));
70: ("|]",(fun (sr,s)-> RSQBAR sr));
71: ("&&",(fun (sr,s)-> AMPERAMPER sr));
72: ("||",(fun (sr,s)-> VBARVBAR sr));
73: ("\\&",(fun (sr,s)-> SLOSHAMPER sr));
74: ("\\|",(fun (sr,s)-> SLOSHVBAR sr));
75: ("\\^",(fun (sr,s)-> SLOSHCIRCUMFLEX sr));
76: ("#!",(fun (sr,s)-> HASHBANG sr));
77: ("<<=",(fun (sr,s)-> LEFTSHIFTEQUAL sr));
78: (">>=",(fun (sr,s)-> RIGHTSHIFTEQUAL sr));
79: ("<->",(fun (sr,s)-> LEFTRIGHTARROW sr));
80: ("&==",(fun (sr,s)-> ANDEQEQUAL sr));
81: ("&!=",(fun (sr,s)-> ANDNOTEQUAL sr));
82: ("&<=",(fun (sr,s)-> ANDLESSEQUAL sr));
83: ("&>=",(fun (sr,s)-> ANDGREATEREQUAL sr));
84: ("...",(fun (sr,s)-> DOTDOTDOT sr));
85: (".->",(fun (sr,s)-> DOTRIGHTARROW sr));
86: ("-->",(fun (sr,s)-> LONGRIGHTARROW sr));
87: ("=>#",(fun (sr,s)-> PARSE_ACTION sr));
88: ("#!/",(fun (sr,s)-> HASHBANGSLASH sr));
89: # 166 "./lpsrc/flx_lexer.ipk"
90: ]
91:
92: let mk_std_tokens () =
93: let tk = Array.make 4 [] in
94: iter (fun (s,f) ->
95: let n = String.length s in
96: assert (n >0 && n <= 3);
97: tk.(n) <- (s,f) :: tk.(n)
98: )
99: special_tokens
100: ;
101: tk
102:
103: exception Duplicate_macro of string
104:
105: class comment_control =
106: object (self)
107: val mutable nesting_level = 0
108: val mutable text = ""
109:
110: method set_text s = text <- s; nesting_level <- 1
111: method append s = text <- text ^ s
112: method get_comment = text
113:
114: method incr = nesting_level <- nesting_level + 1
115: method decr = nesting_level <- nesting_level - 1
116: method get_nesting_level = nesting_level
117: end
118:
119: exception Found_file of string
120:
121: type condition_t = [
122: | `Processing
123: | `Skip_to_endif
124: | `Skip_to_else
125: | `Subscan
126: ]
127:
128: type location = {
129: mutable buf_pos : int;
130: mutable last_buf_pos : int;
131: mutable line_no : int;
132: mutable original_line_no : int;
133: }
134:
135: class file_control
136: (filename' : string)
137: (basedir': string)
138: (incdirs': string list)
139: =
140: object(self)
141: val mutable loc : location = { buf_pos = 0; last_buf_pos = 0; line_no = 1; original_line_no = 1; }
142: method get_loc = loc
143: method set_loc loc' = loc <- loc'
144:
145: (* this is the physical filename *)
146: val original_filename = filename'
147: val incdirs = incdirs'
148: val basedir = basedir'
149:
150: (* this is the generator file name, can be set with #line directive *)
151: val mutable filename = filename'
152: val mutable condition:condition_t list = [`Processing]
153: val macros : (string,string list * Flx_parse.token list) Hashtbl.t = Hashtbl.create 97
154:
155: method incr_lex_counters lexbuf =
156: loc.line_no <- loc.line_no + 1;
157: loc.original_line_no <- loc.original_line_no + 1;
158: loc.last_buf_pos <- loc.buf_pos;
159: loc.buf_pos <- Lexing.lexeme_end lexbuf
160:
161: method set_buf_pos x = loc.buf_pos <- x
162: method get_buf_pos = loc.buf_pos
163: method get_srcref lexbuf =
164: filename,
165: loc.line_no,
166: Lexing.lexeme_start lexbuf - loc.buf_pos + 1,
167: Lexing.lexeme_end lexbuf - loc.buf_pos
168:
169: method get_physical_srcref lexbuf =
170: original_filename,
171: loc.original_line_no,
172: Lexing.lexeme_start lexbuf - loc.buf_pos + 1,
173: Lexing.lexeme_end lexbuf - loc.buf_pos
174:
175: method incr n =
176: loc.line_no <- loc.line_no + n;
177: loc.original_line_no <- loc.original_line_no + n
178:
179: method set_line n lexbuf =
180: loc.line_no <- n;
181: loc.last_buf_pos <- loc.buf_pos;
182: loc.buf_pos <- Lexing.lexeme_end lexbuf;
183: (* this is a hack .. *)
184: loc.original_line_no <- loc.original_line_no + 1
185:
186: method set_filename f = filename <- f
187: method get_relative f =
188: let fn = Filename.concat basedir f in
189: if not (Sys.file_exists fn) then
190: failwith ("Relative include file \""^f^ "\" not found in "^basedir)
191: else fn
192:
193: method get_absolute f =
194: try
195: List.iter
196: (fun d ->
197: let f = Filename.concat d f in
198: if Sys.file_exists f
199: then raise (Found_file f)
200: )
201: incdirs
202: ;
203: failwith ("Library File <" ^ f ^ "> not found in path")
204: with Found_file s -> s
205:
206: method store_macro name params body =
207: Hashtbl.add macros name (params,body)
208:
209: method undef_macro name = Hashtbl.remove macros name
210:
211: method get_macro name =
212: try Some (Hashtbl.find macros name)
213: with Not_found -> None
214:
215: method get_macros = macros
216:
217: method get_incdirs = incdirs
218: method get_condition = List.hd condition
219: method push_condition c = condition <- (c :: condition)
220: method pop_condition = condition <- List.tl condition
221: method set_condition c = condition <- (c :: List.tl condition)
222: method condition_stack_length = List.length condition
223: end
224:
225: class lexer_state filename basedir incdirs expand_expr' =
226: object (self)
227: val expand_expr: string -> expr_t -> expr_t = expand_expr'
228:
229: val mutable include_files: string list = []
230:
231: val comment_ctrl = new comment_control
232: val file_ctrl = new file_control filename basedir incdirs
233: val mutable at_line_start = true
234:
235: val mutable keywords:
236: (string * (srcref * string -> Flx_parse.token)) list array
237: = [| [] |]
238:
239: val mutable symbols:
240: (string * (srcref * string -> Flx_parse.token)) list array
241: = mk_std_tokens ()
242:
243: val nonterminals:
244: (string, (token list * ast_term_t) list) Hashtbl.t
245: = Hashtbl.create 97
246:
247: val mutable brackets: ((string * string) * string) list = []
248:
249: method get_expand_expr = expand_expr
250: method get_include_files = include_files
251: method add_include_file f = include_files <- f :: include_files
252:
253: method get_symbols = symbols
254: method get_nonterminals = nonterminals
255: method get_brackets = brackets
256:
257: method is_at_line_start = at_line_start
258:
259: method inbody = at_line_start <- false
260: method get_srcref lexbuf = file_ctrl#get_srcref lexbuf
261: method get_physical_srcref lexbuf = file_ctrl#get_physical_srcref lexbuf
262: method string_of_srcref lexbuf =
263: match self#get_srcref lexbuf with
264: (filename, lineno, scol,ecol) ->
265: "File \"" ^ filename ^ "\"" ^
266: ", Line " ^ string_of_int lineno ^
267: ", Columns " ^ string_of_int scol ^
268: "-" ^ string_of_int ecol
269:
270: (* comments *)
271: method comment_level = comment_ctrl#get_nesting_level
272: method incr_comment = comment_ctrl#incr
273: method decr_comment = comment_ctrl#decr
274:
275: method set_comment text = comment_ctrl#set_text text
276: method append_comment text = comment_ctrl#append text
277: method get_comment = comment_ctrl#get_comment
278:
279: (* line counting *)
280: method newline lexbuf =
281: at_line_start <- true;
282: file_ctrl#incr_lex_counters lexbuf
283:
284: (* string decoders *)
285: method decode decoder (s : string) : string =
286: let lfcount s =
287: let n = ref 0 in
288: for i = 0 to (String.length s) - 1 do
289: if s.[i] = '\n' then incr n
290: done;
291: !n
292: in
293: file_ctrl#incr (lfcount s);
294: decoder s
295:
296: method set_line n lexbuf =
297: file_ctrl#set_line n lexbuf;
298: at_line_start <- true
299:
300: method set_filename f = file_ctrl#set_filename f
301:
302: method get_loc = file_ctrl#get_loc
303: method set_loc loc' = file_ctrl#set_loc loc'
304: method get_incdirs = file_ctrl#get_incdirs
305: method get_relative f = file_ctrl#get_relative f
306: method get_absolute f = file_ctrl#get_absolute f
307:
308: method get_condition = file_ctrl#get_condition
309: method push_condition c = file_ctrl#push_condition c
310: method pop_condition = file_ctrl#pop_condition
311: method set_condition c = file_ctrl#set_condition c
312: method condition_stack_length = file_ctrl#condition_stack_length
313:
314: method store_macro name parms body = file_ctrl#store_macro name parms body
315: method undef_macro name = file_ctrl#undef_macro name
316: method get_macro name = file_ctrl#get_macro name
317: method get_macros = file_ctrl#get_macros
318:
319: method add_macros (s:lexer_state) =
320: let h = self#get_macros in
321: Hashtbl.iter
322: (fun k v ->
323: if Hashtbl.mem h k
324: then raise (Duplicate_macro k)
325: else Hashtbl.add h k v
326: )
327: s#get_macros
328: ;
329:
330: (* append new keywords *)
331: let new_keywords = s#get_keywords in
332: let n = Array.length new_keywords in
333: if n > Array.length keywords then begin
334: let old_keywords = keywords in
335: keywords <- Array.make n [];
336: Array.blit old_keywords 0 keywords 0 (Array.length old_keywords)
337: end;
338: for i = 0 to Array.length new_keywords - 1 do
339: keywords.(i) <- new_keywords.(i) @ keywords.(i)
340: done
341: ;
342:
343: (* append new symbols *)
344: let new_symbols = s#get_symbols in
345: let n = Array.length new_symbols in
346: if n > Array.length symbols then begin
347: let old_symbols = symbols in
348: symbols <- Array.make n [];
349: Array.blit old_symbols 0 symbols 0 (Array.length old_symbols)
350: end;
351: for i = 0 to Array.length new_symbols - 1 do
352: symbols.(i) <- new_symbols.(i) @ symbols.(i)
353: done
354: ;
355:
356: brackets <- s#get_brackets @ brackets
357:
358: ;
359: Hashtbl.iter
360: (fun k ls ->
361: let old = try Hashtbl.find nonterminals k with Not_found -> [] in
362: Hashtbl.replace nonterminals k (ls @ old)
363: )
364: s#get_nonterminals
365:
366: method get_keywords = keywords
367:
368: method adjust_keyword_array n =
369: let m = Array.length keywords in
370: if m <= n then begin
371: let a = Array.make (n+1) [] in
372: Array.blit keywords 0 a 0 m;
373: keywords <- a
374: end
375:
376: method adjust_symbol_array n =
377: let m = Array.length symbols in
378: if m <= n then begin
379: let a = Array.make (n+1) [] in
380: Array.blit symbols 0 a 0 m;
381: symbols <- a
382: end
383:
384: method add_infix_symbol (prec:int) s f =
385: let n = String.length s in
386: self#adjust_symbol_array n;
387: let elt = s,(fun (sr,_) -> Flx_parse.USER10 (sr,s,f)) in
388: symbols.(n) <- elt :: symbols.(n)
389:
390: method add_infix_keyword (prec:int) s f =
391: let n = String.length s in
392: self#adjust_keyword_array n;
393: let elt = s,(fun (sr,_) -> Flx_parse.USER10 (sr,s,f)) in
394: keywords.(n) <- elt :: keywords.(n)
395:
396: method add_keyword (s:string) =
397: let n = String.length s in
398: self#adjust_keyword_array n;
399: let elt = s,(fun (sr,_) -> Flx_parse.USER_KEYWORD (sr,s)) in
400: keywords.(n) <- elt :: keywords.(n)
401:
402: method add_statement_keyword (s:string) (sr:range_srcref) (toks: Flx_parse.token list) (term:ast_term_t) =
403: let n = String.length s in
404: self#adjust_keyword_array n;
405: let tokss =
406: try match (assoc s keywords.(n)) (("",0,0,0), "") with
407: | Flx_parse.USER_STATEMENT_KEYWORD (_,_,tokss,_) -> (toks,term) :: tokss
408: | _ -> clierr sr "Conflicting meaning of keyword s"
409: with Not_found -> [toks,term]
410: in
411: let elt = s,(fun (sr,_) -> Flx_parse.USER_STATEMENT_KEYWORD (sr,s,tokss,nonterminals)) in
412: keywords.(n) <- elt :: remove_assoc s keywords.(n)
413:
414:
415: method add_nonterminal (s:string) (sr:range_srcref) (toks: Flx_parse.token list) (term:ast_term_t) =
416: let productions = try Hashtbl.find nonterminals s with Not_found -> [] in
417: Hashtbl.replace nonterminals s ((toks,term)::productions)
418:
419: method add_brackets tok1 tok2 f =
420: let n1 = String.length tok1 in
421: let n2 = String.length tok2 in
422: let n = max n1 n2 in
423: self#adjust_symbol_array n;
424: brackets <- ((tok1,tok2),f) :: brackets;
425: let rbs =
426: let rec aux fnmap brs = match brs with
427: | [] -> rev fnmap
428: | ((l,r),f) :: t ->
429: if l = tok1 then aux ((r,f)::fnmap) t
430: else aux fnmap t
431: in aux [] brackets
432: in
433: let elt = tok1,(fun (sr,_) -> Flx_parse.USERLB (sr,rbs,tok1)) in
434: symbols.(n1) <- elt :: symbols.(n1)
435: ;
436: let elt = tok2,(fun (sr,_) -> Flx_parse.USERRB (sr,tok2)) in
437: symbols.(n1) <- elt :: symbols.(n2)
438:
439: method tokenise_symbols lexbuf (s:string) : token list =
440: (* temporary hack *)
441: let sr = self#get_srcref lexbuf in
442: let rec tk tks s =
443: let m = String.length s in
444: let rec aux n =
445: if n = 0 then (* cannot match even first char *)
446: tk (ERRORTOKEN (sr,String.sub s 0 1)::tks) (String.sub s 1 (m-1))
447: else
448: let f =
449: try Some (assoc (String.sub s 0 n) symbols.(n))
450: with Not_found -> None
451: in
452: match f with
453: | None -> aux (n-1)
454: | Some f ->
455: (* next token *)
456: tk (f (sr,String.sub s 0 n) :: tks) (String.sub s n (m-n))
457: in
458: let n = String.length s in
459: if n = 0 then rev tks
460: else aux (min n (Array.length symbols - 1))
461: in
462: tk [] s
463: end
464:
Start ocaml section to src/flx_lexstate.mli[1
/1
]
1: # 543 "./lpsrc/flx_lexer.ipk"
2: open Flx_ast
3: open Flx_string
4: open Flx_parse
5:
6: exception Duplicate_macro of string
7:
8: class comment_control :
9: object
10: val mutable nesting_level : int
11: val mutable text : string
12: method append : string -> unit
13: method decr : unit
14: method get_comment : string
15: method get_nesting_level : int
16: method incr : unit
17: method set_text : string -> unit
18: end
19:
20: type condition_t = [
21: | `Processing
22: | `Skip_to_endif
23: | `Skip_to_else
24: | `Subscan
25: ]
26:
27: type location = {
28: mutable buf_pos : int;
29: mutable last_buf_pos : int;
30: mutable line_no : int;
31: mutable original_line_no : int;
32: }
33:
34:
35: class file_control :
36: string ->
37: string ->
38: string list ->
39: object
40: val mutable loc: location
41: val filename : string
42: val mutable condition : condition_t list
43: val macros : (string,string list * token list) Hashtbl.t
44:
45: method get_loc : location
46: method set_loc : location -> unit
47:
48: method get_buf_pos : int
49: method get_srcref : Lexing.lexbuf -> srcref
50: method get_physical_srcref : Lexing.lexbuf -> srcref
51: method incr : int -> unit
52: method incr_lex_counters : Lexing.lexbuf -> unit
53: method set_buf_pos : int -> unit
54: method set_line : int -> Lexing.lexbuf -> unit
55: method set_filename : string -> unit
56: method get_relative : string -> string
57: method get_incdirs : string list
58: method get_absolute : string -> string
59:
60: method get_condition : condition_t
61: method push_condition : condition_t -> unit
62: method pop_condition : unit
63: method set_condition : condition_t -> unit
64: method condition_stack_length : int
65:
66: method store_macro : string -> string list -> token list -> unit
67: method undef_macro : string -> unit
68: method get_macro : string -> (string list * token list) option
69: method get_macros : (string,string list * token list) Hashtbl.t
70: end
71:
72: class lexer_state :
73: string ->
74: string ->
75: string list ->
76: (string -> expr_t->expr_t) ->
77: object
78: val expand_expr : string -> expr_t -> expr_t
79: val comment_ctrl : comment_control
80: val file_ctrl : file_control
81:
82: val mutable symbols :
83: (string * (srcref * string -> token)) list array
84: val mutable keywords:
85: (string * (srcref * string -> token)) list array
86: val mutable brackets: ((string * string) * string) list
87: val nonterminals: (string, (token list *ast_term_t) list) Hashtbl.t
88: val mutable include_files : string list
89:
90: method get_expand_expr : string -> expr_t -> expr_t
91:
92: method add_include_file : string -> unit
93: method get_include_files : string list
94:
95: method append_comment : string -> unit
96: method comment_level : int
97: method decode : (string -> string) -> string -> string
98: method decr_comment : unit
99: method get_comment : string
100: method get_srcref : Lexing.lexbuf -> srcref
101: method get_physical_srcref : Lexing.lexbuf -> srcref
102: method incr_comment : unit
103: method newline : Lexing.lexbuf -> unit
104: method set_comment : string -> unit
105: method is_at_line_start : bool
106: method inbody: unit
107: method string_of_srcref : Lexing.lexbuf -> string
108: method set_line : int -> Lexing.lexbuf-> unit
109: method set_filename : string -> unit
110: method get_incdirs : string list
111: method get_relative : string -> string
112: method get_absolute : string -> string
113:
114: method get_condition : condition_t
115: method push_condition : condition_t -> unit
116: method pop_condition : unit
117: method set_condition : condition_t -> unit
118: method condition_stack_length : int
119: method get_loc : location
120: method set_loc : location -> unit
121:
122: method store_macro : string -> string list -> token list -> unit
123: method undef_macro : string -> unit
124: method get_macro : string -> (string list * token list) option
125: method get_macros : (string,string list * token list) Hashtbl.t
126: method add_macros : lexer_state -> unit
127: method adjust_symbol_array : int -> unit
128: method add_infix_symbol:
129: int -> string -> string -> unit
130:
131: method get_keywords:
132: (string * (srcref * string -> token)) list array
133:
134: method adjust_keyword_array : int -> unit
135:
136: method add_infix_keyword:
137: int -> string -> string -> unit
138: method add_keyword:
139: string -> unit
140:
141: method get_brackets:
142: ((string * string) * string) list
143:
144: method get_nonterminals:
145: (string, (token list *ast_term_t) list) Hashtbl.t
146:
147: method get_symbols:
148: (string * (srcref * string -> token)) list array
149:
150: method add_statement_keyword:
151: string -> range_srcref -> token list -> ast_term_t -> unit
152:
153: method add_nonterminal:
154: string -> range_srcref -> token list -> ast_term_t -> unit
155:
156: method add_brackets: string -> string -> string -> unit
157:
158: method tokenise_symbols : Lexing.lexbuf -> string -> token list
159: end
160:
Start ocaml section to src/flx_preproc.mli[1
/1
]
1: # 704 "./lpsrc/flx_lexer.ipk"
2: open Flx_ast
3: open Flx_parse
4: open Flx_lexstate
5: open Lexing
6:
7: val is_in_string : string -> char -> bool
8: val is_white : char -> bool
9: val is_digit : char -> bool
10: val strip_us : string -> string
11:
12: val pre_tokens_of_lexbuf :
13: (lexer_state -> lexbuf -> token list) ->
14: lexbuf -> lexer_state ->
15: token list
16:
17: val pre_tokens_of_string :
18: (lexer_state -> lexbuf -> token list) ->
19: string -> string ->
20: (string -> expr_t -> expr_t) ->
21: token list
22:
23: val line_directive :
24: lexer_state -> range_srcref -> string -> lexbuf ->
25: token list
26:
27: val include_directive :
28: bool ->
29: lexer_state -> range_srcref -> string ->
30: (lexer_state -> lexbuf -> token list) ->
31: token list
32:
33: val handle_preprocessor :
34: lexer_state -> lexbuf -> string ->
35: (lexer_state -> lexbuf -> token list) ->
36: location ->
37: Lexing.position ->
38: token list
39:
Start data section to src/flx_preproc.ml[1
/1
]
1: open Flx_util
2: open Flx_parse
3: open Flx_string
4: open Big_int
5: open Flx_exceptions
6: open Flx_lexstate
7: open List
8:
9: let substr = String.sub
10: let len = String.length
11:
12: let is_in_string s ch =
13: try
14: ignore(String.index s ch);
15: true
16: with Not_found ->
17: false
18:
19: let is_white = is_in_string " \t"
20: let is_digit = is_in_string "0123456789"
21:
22: let strip_us s =
23: let n = String.length s in
24: let x = Buffer.create n in
25: for i=0 to n - 1 do
26: match s.[i] with
27: | '_' -> ()
28: | c -> Buffer.add_char x c
29: done;
30: Buffer.contents x
31:
32:
33: let pre_tokens_of_lexbuf lexer buf state =
34: let rec get lst =
35: let ts = lexer state buf in
36: match ts with
37: | [Flx_parse.ENDMARKER] -> lst
38: | _ ->
39: match state#get_condition with
40: | `Processing | `Subscan ->
41: get (rev_append ts lst)
42: | _ ->
43: get lst
44: in
45: rev (get [])
46:
47: let pre_tokens_of_string lexer s filename expand_expr =
48: let state = new lexer_state filename "" [] expand_expr in
49: pre_tokens_of_lexbuf lexer (Lexing.from_string s) state
50:
51: let line_directive state sr s lexbuf =
52: let i = ref 0 in
53: let a =
54: let a = ref 0 in
55: while is_digit s.[!i] do
56: a := !a * 10 + dec_char2int s.[!i];
57: incr i
58: done;
59: !a
60: in
61: if !i = 0
62: then clierr sr "digits required after #line"
63: else begin
64: while is_white s.[!i] do incr i done;
65: if s.[!i] <> '\n'
66: then begin
67: if s.[!i]<>'"'
68: then clierr sr "double quote required after line number in #line"
69: else begin
70: incr i;
71: let j = !i in
72: while s.[!i]<>'"' && s.[!i]<>'\n' do incr i done;
73:
74: if s.[!i]='\n'
75: then clierr sr "double quote required after filename in #line directive"
76: else begin
77: let filename = String.sub s j (!i-j) in
78: state#set_filename filename;
79: state#set_line a lexbuf
80: end
81: end
82: end else begin
83: (* print_endline ("SETTING LINE " ^ string_of_int a); *)
84: state#set_line a lexbuf
85: end
86: end;
87: [NEWLINE]
88:
89:
90: (* output expansion of input in reverse order with exclusions *)
91: let rec expand' state exclude toks =
92: (* output expansion of input
93: in reverse order
94: with bindings and
95: with exclusions,
96: this function is tail rec and used as a loop
97: *)
98: let rec aux exclude inp out bindings =
99: match inp with
100: | [] -> out
101: | h :: ts ->
102: (* do not expand a symbol recursively *)
103: if mem h exclude
104: then aux exclude ts (h :: out) bindings
105: else
106: (* if it is a parameter name, replace by argument *)
107: let b =
108: try Some (assoc h bindings)
109: with Not_found -> None
110: in match b with
111: | Some x ->
112: (* note binding body is in reverse order *)
113: aux exclude ts (x @ out) bindings
114:
115: | None ->
116: match h with
117: | Flx_parse.NAME (sr,s) ->
118: begin match state#get_macro s with
119: (* not a macro : output it *)
120: | None -> aux exclude ts (h :: out) bindings
121:
122: (* argumentless macro : output expansion of body,
123: current bindings are ignored
124: *)
125: | Some ([], body) ->
126: let body = expand' state (h::exclude) body
127: in aux exclude ts (body @ out) bindings
128:
129: | Some (params,body) ->
130: failwith "Can't handle macros with arguments yet"
131: end
132: | _ -> aux exclude ts (h :: out) bindings
133:
134: in aux [] toks [] []
135:
136: let eval state toks =
137: let e = Flx_tok.parse_tokens Flx_parse.expression (toks @ [ENDMARKER]) in
138: let e = state#get_expand_expr "PREPROC_EVAL" e in
139: e
140:
141: let expand state toks = rev (expand' state [] toks)
142:
143: let eval_bool state sr toks =
144: let toks = expand state toks in
145: let e = eval state toks in
146: match e with
147: | `AST_typed_case (sr,v,`TYP_unitsum 2) ->
148: v = 1
149:
150: | x ->
151: clierr sr
152: (
153: "Preprocessor constant expression of boolean type required\n" ^
154: "Actually got:\n" ^
155: Flx_print.string_of_expr x
156: )
157:
158: let rec parse_params sr toks = match toks with
159: | NAME (_,id) :: COMMA _ :: ts ->
160: let args, body = parse_params sr toks in
161: id :: args, body
162:
163: | NAME (_,id) :: RPAR _ :: ts ->
164: [id], ts
165:
166: | RPAR _ :: ts -> [], ts
167:
168: | h :: _ ->
169: let sr = Flx_srcref.slift (Flx_prelex.src_of_token h) in
170: clierr sr "Malformed #define directive"
171: | [] ->
172: clierr sr "Malformed #define directive"
173:
174: let parse_macro_function state sr name toks =
175: let args, body = parse_params sr toks in
176: state#store_macro name args body
177:
178: let parse_macro_body state sr name toks =
179: match toks with
180: | LPAR _ :: ts -> parse_macro_function state sr name ts
181: | _ -> state#store_macro name [] toks
182:
183: let undef_directive state sr toks =
184: iter
185: begin function
186: | NAME (sr,name) -> state#undef_macro name
187: | h ->
188: let sr = Flx_srcref.slift (Flx_prelex.src_of_token h) in
189: clierr sr "#define requires identifier"
190: end
191: toks
192: ;
193: []
194:
195: let define_directive state sr toks =
196: match toks with
197: | NAME (sr,name) :: ts ->
198: let sr = Flx_srcref.slift sr in
199: begin match state#get_macro name with
200: | None ->
201: parse_macro_body state sr name ts;
202: []
203: | Some _ -> clierr sr ("Duplicate Macro definition for " ^ name)
204: end
205:
206: | h :: _ ->
207: let sr = Flx_srcref.slift (Flx_prelex.src_of_token h) in
208: clierr sr "#define requires identifier"
209: | [] ->
210: clierr sr "#define requires identifier"
211:
212: let infix_directive state sr toks =
213: match toks with
214: | [INTEGER (sr1,kind,v); STRING (sr2,tok); NAME (sr3,fn)] ->
215: if kind <> "int" then
216: clierr sr "#infix directive requires plain integer precedence"
217: ;
218: let j = Big_int.int_of_big_int v in
219: state#add_infix_symbol j tok fn;
220: []
221:
222: | [INTEGER (sr1,kind,v); NAME (sr2,tok); NAME (sr3,fn)] ->
223: if kind <> "int" then
224: clierr sr "#infix directive requires plain integer precedence"
225: ;
226: let j = Big_int.int_of_big_int v in
227: state#add_infix_keyword j tok fn;
228: []
229:
230: | _ ->
231: clierr sr "#infix directive has syntax #infix 99 \"..\" fname"
232:
233: let keyword_directive state sr toks =
234: let rec aux toks = match toks with
235: | NAME (sr,tok) :: t ->
236: state#add_keyword tok;
237: aux t
238: | [] -> []
239: | _ ->
240: clierr sr "#keyword directive has syntax #keyword id1 id2 ..."
241: in aux toks
242:
243: let action_split t =
244: let rec aux inp out = match inp with
245: | [] -> rev out, []
246: | PARSE_ACTION _ :: tail -> rev out, tail
247: | h :: t -> aux t (h::out)
248: in aux t []
249:
250: let statement_directive state sr toks =
251: let toks = Flx_keywords.retok_parser_tokens toks in
252: match toks with
253: | NAME (sr,tok) :: t
254: | USER_STATEMENT_KEYWORD (sr,tok,_,_) :: t ->
255: (*
256: print_endline ("Statement directive " ^ tok);
257: *)
258: let t1,t2 = action_split t in
259: let sts,_ =
260: match t2 with
261: | [] -> [],ENDMARKER
262: | _ -> Flx_tok.parse_tokens Flx_parse.statementsx (t2 @ [ENDMARKER])
263: in
264: (*
265: print_endline ("Action Statements " ^ catmap "\n" (Flx_print.string_of_statement 0) sts);
266: *)
267: state#add_statement_keyword tok (Flx_srcref.slift sr) t1 (`Statements_term sts);
268: []
269:
270: | _ ->
271: clierr sr "#statement directive has syntax #statement kw production"
272:
273: let nonterminal_directive state sr toks =
274: let toks = Flx_keywords.retok_parser_tokens toks in
275: match toks with
276: | NAME (sr,tok) :: t ->
277: (*
278: print_endline ("Adding nonterminal .." ^ tok);
279: *)
280: let t1,t2 = action_split t in
281: (*
282: print_endline ("Action Tokens: " ^ catmap ", " Flx_prelex.string_of_token t2);
283: *)
284: let expr = Flx_tok.parse_tokens Flx_parse.expression (t2 @ [ENDMARKER]) in
285: state#add_nonterminal tok (Flx_srcref.slift sr) t1 (`Expression_term expr);
286: []
287:
288: | _ ->
289: clierr sr "#nonterminal has syntax #nonterminal name production"
290:
291: let bracket_directive state sr toks =
292: match toks with
293: | [STRING (sr1,tok1); STRING (sr2,tok2); NAME (sr3,fn)] ->
294: state#add_brackets tok1 tok2 fn;
295: []
296:
297: | _ ->
298: clierr sr "#bracket directive has syntax #bracket \"lb\" \"rb\" fname"
299:
300: let if_directive state sr toks =
301: state#push_condition
302: (
303: match eval_bool state sr toks with
304: | true -> `Processing
305: | false -> `Skip_to_else
306: )
307: ;
308: []
309:
310: let ifdef_directive state sr toks =
311: begin match toks with
312: | NAME (sr,s) :: _ ->
313: begin match state#get_macro s with
314: | None -> state#push_condition `Skip_to_else
315: | Some _ -> state#push_condition `Processing
316: end
317: | _ -> clierr sr "#ifdef requires identifier"
318: end
319: ;
320: []
321:
322: let ifndef_directive state sr toks =
323: begin match toks with
324: | NAME (sr,s) :: _ ->
325: begin match state#get_macro s with
326: | None -> state#push_condition `Processing
327: | Some _ -> state#push_condition `Skip_to_else
328: end
329: | _ -> clierr sr "#ifndef requires identifier"
330: end
331: ;
332: []
333:
334: let else_directive state sr =
335: begin match state#get_condition with
336: | `Processing -> state#set_condition `Skip_to_endif
337: | `Skip_to_endif -> ()
338: | `Skip_to_else -> state#set_condition `Processing
339: | `Subscan -> syserr sr "unexpected else while subscanning"
340: end
341: ;
342: []
343:
344: let elif_directive state sr toks =
345: begin match state#get_condition with
346: | `Processing -> state#set_condition `Skip_to_endif
347: | `Skip_to_endif -> ()
348: | `Skip_to_else ->
349: state#set_condition
350: (
351: match eval_bool state sr toks with
352: | true -> `Processing
353: | false -> `Skip_to_else
354: )
355: | `Subscan -> syserr sr "unexpected elif while subscanning"
356: end
357: ;
358: []
359:
360:
361: let endif_directive state sr =
362: if state#condition_stack_length < 2
363: then
364: clierr sr "Unmatched endif"
365: else
366: state#pop_condition;
367: []
368:
369: let find_include_file state s sr =
370: if s.[0]<>'"' && s.[0]<>'<'
371: then clierr sr "'\"' or '<' required after #include"
372: ;
373: let rquote = if s.[0]='"' then '"' else '>' in
374: let i = ref 1 in
375: let j = !i in
376: while s.[!i]<>rquote && s.[!i]<>'\n' do incr i done
377: ;
378:
379: if s.[!i]='\n'
380: then clierr sr "double quote required after filename in #include directive"
381: ;
382: let filename = String.sub s j (!i-j) in
383: let filename=
384: if rquote = '"'
385: then state#get_relative filename
386: else state#get_absolute filename
387: in
388: (*
389: print_endline (
390: "//Resolved in path: \"" ^ filename ^ "\""
391: );
392: *)
393: filename
394:
395: let include_directive is_import state sr s pre_flx_lex =
396: let filename = find_include_file state s sr in
397: state#add_include_file filename;
398: let pre_tokens_of_filename filename =
399: let incdirs = state#get_incdirs in
400: let basedir = Filename.dirname filename in
401: let state' = new lexer_state filename basedir incdirs state#get_expand_expr in
402: let infile = open_in filename in
403: let src = Lexing.from_channel infile in
404: let toks = pre_tokens_of_lexbuf pre_flx_lex src state' in
405: close_in infile;
406: if is_import then begin
407: try state#add_macros state'
408: with Duplicate_macro k -> clierr sr
409: ("Duplicate Macro " ^ k ^ " imported")
410: end;
411: iter state#add_include_file state'#get_include_files;
412: toks
413: in
414: pre_tokens_of_filename filename
415:
416: let count_newlines s =
417: let n = ref 0 in
418: let len = ref 0 in
419: let last_len = ref 0 in
420: for i = 0 to String.length s - 1 do
421: if s.[i] = '\n' then begin incr n; last_len := !len; len := 0; end
422: else incr len
423: done;
424: !n,!last_len
425:
426: let handle_preprocessor state lexbuf s pre_flx_lex start_location start_position =
427: let linecount,last_line_len = count_newlines s in
428: let file,line1,col1,_ = state#get_srcref lexbuf in
429: let file',line1',_,_ = state#get_physical_srcref lexbuf in
430:
431: let next_line = line1+linecount in
432: let next_line' = line1'+linecount in
433: let sr = file,line1,col1,next_line-1,last_line_len+1 in
434: let sr' = file',line1',col1,next_line'-1,last_line_len+1 in
435: let saved_buf_pos = Lexing.lexeme_end lexbuf in
436: (*
437: print_endline ("PREPROCESSING: " ^ Flx_srcref.long_string_of_src sr);
438: print_endline ("Trailing buf pos = " ^ si saved_buf_pos);
439: *)
440: let ident,s' =
441:
442: (* .. note the string WILL end with a newline .. *)
443:
444: (* skip spaces *)
445: let i = ref 0 in
446: while is_white s.[!i] && (s.[!i] <> '\n') do incr i done;
447:
448: (* scan non-spaces, stop at #, white, or newline *)
449: let n = ref 0 in
450: while
451: not (is_white s.[!i + !n]) &&
452: not (s.[!i + !n]='\n') &&
453: not (s.[!i + !n]='#')
454: do incr n done;
455:
456: (* grab the preprocessor directive name *)
457: let ident = String.sub s !i !n in
458:
459: (* scan for next non-white *)
460: let j = ref (!i + !n) in
461: while is_white s.[!j] && (s.[!j] <> '\n') do incr j done;
462:
463: (* scan back from end of text for last non-white *)
464: n := String.length s - 1;
465: while !n > !j && is_white(s.[!n-1]) do decr n done;
466:
467: (* grab the text from after the directive name to the end *)
468: let ssl = !n - !j in
469: let rest = String.sub s !j ssl in
470: ident,rest
471: in
472:
473: (*
474: print_endline ("PREPRO i=" ^ ident^", t='"^s'^"',\ns='"^s^"'");
475: *)
476: match ident with
477:
478: (* THESE COMMANDS ARE WEIRD HANGOVERS FROM C WHICH
479: CANNOT HANDLE NORMAL TOKENISATION
480: *)
481: (* print a warning *)
482: | "error" ->
483: begin match state#get_condition with
484: | `Processing ->
485: print_endline ("#error " ^ s');
486: clierr2 sr sr' ("#error " ^ s')
487: | _ -> []
488: end
489:
490: | "warn" ->
491: let result =
492: match state#get_condition with
493: | `Processing ->
494: let desc = Flx_srcref.short_string_of_src sr in
495: print_endline desc
496: ;
497: if sr <> sr' then begin
498: let desc = Flx_srcref.short_string_of_src sr' in
499: print_endline ("Physical File:\n" ^ desc)
500: end
501: ;
502: print_endline ("#warn " ^ s');
503: print_endline "";
504: [NEWLINE]
505: | _ -> []
506: in
507: for i = 1 to linecount do state#newline lexbuf done;
508: result
509:
510: | "line" ->
511: line_directive state sr s' lexbuf
512:
513: | "include"
514: | "import" ->
515: let result =
516: let is_import = ident = "import" in
517: match state#get_condition with
518: | `Processing ->
519: include_directive is_import state sr s' pre_flx_lex
520: | _ -> []
521: in
522: for i = 1 to linecount do state#newline lexbuf done;
523: result
524:
525: (* THESE ONES USE ORDINARY TOKEN STREAM *)
526: | _ ->
527: let result =
528: let src = Lexing.from_string s in
529: (*
530: print_endline ("Start buf pos = " ^ si (start_position.Lexing.pos_cnum));
531: print_endline ("Start loc = " ^ si (start_location.buf_pos));
532: *)
533: state#push_condition `Subscan;
534:
535: (* hack the location to the start of the line *)
536: let b = start_location.buf_pos - start_position.Lexing.pos_cnum in
537: (*
538: print_endline ("Hacking column position to " ^ si b);
539: *)
540: state#set_loc {
541: buf_pos = b;
542: last_buf_pos = b;
543: line_no = line1;
544: original_line_no = line1';
545: };
546:
547: let toks = pre_tokens_of_lexbuf pre_flx_lex src state in
548:
549: state#pop_condition;
550:
551: (* use the special preprocessor token filter *)
552: let toks = Flx_lex1.translate_preprocessor toks in
553:
554: (*
555: iter (fun tok ->
556: let sr = Flx_srcref.slift (Flx_prelex.src_of_token tok) in
557: print_endline (Flx_srcref.long_string_of_src sr)
558: )
559: toks;
560: *)
561:
562: match toks with
563: | [] -> [] (* DUMMY *)
564: | h :: toks ->
565: let h = Flx_prelex.string_of_token h in
566: if h <> ident then
567: failwith (
568: "WOOPS, mismatch on directive name: ident=" ^
569: ident ^ ", head token = " ^
570: h
571: )
572: ;
573: match h with
574:
575: (* conditional compilation *)
576: | "if" -> if_directive state sr toks
577: | "ifdef" -> ifdef_directive state sr toks
578: | "ifndef" -> ifndef_directive state sr toks
579: | "else" -> else_directive state sr
580: | "elif" -> elif_directive state sr toks
581: | "endif" -> endif_directive state sr
582:
583: | _ -> match state#get_condition with
584: | `Skip_to_else
585: | `Skip_to_endif -> []
586: | `Subscan -> syserr sr "Unexpected preprocessor directive in subscan"
587:
588: (* these ones are only done if in processing mode *)
589: | `Processing ->
590: match h with
591:
592: | "define" ->
593: define_directive state sr toks
594:
595: | "undef" ->
596: undef_directive state sr toks
597:
598:
599: | "infix" ->
600: infix_directive state sr toks
601:
602: | "keyword" ->
603: keyword_directive state sr toks
604:
605: | "statement" ->
606: statement_directive state sr toks
607:
608: | "nonterminal" ->
609: nonterminal_directive state sr toks
610:
611: | "bracket" ->
612: bracket_directive state sr toks
613:
614: | _ ->
615: print_endline (state#string_of_srcref lexbuf);
616: print_endline
617: (
618: "LEXICAL ERROR: IGNORING UNKNOWN PREPROCESSOR DIRECTIVE \"" ^
619: ident ^ "\""
620: );
621: [NEWLINE]
622: in
623:
624: (* restore the location to the start of the next line *)
625: state#set_loc {
626: buf_pos = saved_buf_pos;
627: last_buf_pos = saved_buf_pos;
628: line_no = next_line;
629: original_line_no = next_line'
630: };
631: result
632:
633:
Start data section to src/flx_lex.mll[1
/1
]
1: {
2: open Flx_util
3: open Flx_parse
4: open Flx_string
5: open Big_int
6: open Flx_exceptions
7: open Flx_lexstate
8: open Flx_preproc
9:
10: let lexeme = Lexing.lexeme
11: let lexeme_start = Lexing.lexeme_start
12: let lexeme_end = Lexing.lexeme_end
13:
14: let substr = String.sub
15: let len = String.length
16:
17: (* string parsers *)
18: let decode_qstring s = let n = len s in unescape (substr s 0 (n-1))
19: let decode_dstring s = let n = len s in unescape (substr s 0 (n-1))
20: let decode_qqqstring s = let n = len s in unescape (substr s 0 (n-3))
21: let decode_dddstring s = let n = len s in unescape (substr s 0 (n-3))
22:
23: let decode_raw_qstring s = let n = len s in substr s 0 (n-1)
24: let decode_raw_dstring s = let n = len s in substr s 0 (n-1)
25: let decode_raw_qqqstring s = let n = len s in substr s 0 (n-3)
26: let decode_raw_dddstring s = let n = len s in substr s 0 (n-3)
27:
28: exception Ok of int
29: exception SlashSlash of int
30: exception SlashAst of int
31:
32: (* WARNING: hackery: adjust this when lex expression 'white'
33: is adjutsed
34: *)
35:
36: }
37:
38: (* ====================== REGULAR DEFINITIONS ============================ *)
39: (* special characters *)
40: let quote = '\''
41: let dquote = '"'
42: let slosh = '\\'
43: let linefeed = '\n'
44: let tab = '\t'
45: let space = ' '
46: let formfeed = '\012'
47: let vtab = '\011'
48: let carriage_return = '\013'
49: let underscore = '_'
50:
51: (* character sets *)
52: let bindigit = ['0'-'1']
53: let octdigit = ['0'-'7']
54: let digit = ['0'-'9']
55: let hexdigit = digit | ['A'-'F'] | ['a'-'f']
56: let lower = ['a'-'z']
57: let upper = ['A'-'Z']
58: (* let letter = lower | upper *)
59: let letter = lower | upper
60: let hichar = ['\128'-'\255']
61: let white = space | tab
62:
63: (* nasty: form control characters *)
64: let form_control = linefeed | carriage_return | vtab | formfeed
65: let newline_prefix = linefeed | carriage_return
66: let newline = formfeed | linefeed | carriage_return linefeed
67: let hash = '#'
68:
69: let ordinary = letter | digit | hichar |
70: '!' | '$' | '%' | '&' | '(' | ')' | '*' |
71: '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' |
72: '=' | '>' | '?' | '@' | '[' | ']' | '^' | '_' |
73: '`' | '{' | '|' | '}' | '~'
74:
75: (* any sequence of these characters makes one or more tokens *)
76: (* MISSING: # should be in here, but can't be supported atm
77: because preprocessor # uses a conditional, and just errors
78: out if the # isn't at the start of a line .. needs fixing,
79: not sure how to fix it
80: *)
81:
82: let symchar =
83: '!' | '$' | '%' | '&' | '(' | ')' | '*' |
84: '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' |
85: '=' | '>' | '?' | '@' | '[' | ']' | '^' |
86: '`' | '{' | '|' | '}' | '~' | '#' | '\\'
87:
88: let printable = ordinary | quote | dquote | slosh | hash
89:
90: (* identifiers *)
91: let ucn =
92: "\\u" hexdigit hexdigit hexdigit hexdigit
93: | "\\U" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit
94:
95: let prime = '\''
96: let idletter = letter | underscore | hichar | ucn
97: let identifier = idletter (idletter | digit | prime )*
98:
99: (* integers *)
100: let bin_lit = '0' ('b' | 'B') (underscore? bindigit) +
101: let oct_lit = '0' ('o' | 'O') (underscore? octdigit) +
102: let dec_lit = ('0' ('d' | 'D'| "d_" | "D_"))? digit (underscore? digit) *
103: let hex_lit = '0' ('x' | 'X') (underscore? hexdigit) +
104: let fastint_type_suffix = 't'|'T'|'s'|'S'|'i'|'I'|'l'|'L'|'v'|'V'|"ll"|"LL"
105: let exactint_type_suffix =
106: "i8" | "i16" | "i32" | "i64"
107: | "u8" | "u16" | "u32" | "u64"
108: | "I8" | "I16" | "I32" | "I64"
109: | "U8" | "U16" | "U32" | "U64"
110:
111: let signind = 'u' | 'U'
112:
113: let suffix =
114: '_'? exactint_type_suffix
115: | ('_'? fastint_type_suffix)? ('_'? signind)?
116: | ('_'? signind)? ('_'? fastint_type_suffix)?
117:
118: let int_lit = (bin_lit | oct_lit | dec_lit | hex_lit) suffix
119:
120: (* floats: Follows ISO C89, except that we allow underscores *)
121: let decimal_string = digit (underscore? digit) *
122: let hexadecimal_string = hexdigit (underscore? hexdigit) *
123:
124: let decimal_fractional_constant =
125: decimal_string '.' decimal_string?
126: | '.' decimal_string
127:
128: let hexadecimal_fractional_constant =
129: ("0x" |"0X")
130: (hexadecimal_string '.' hexadecimal_string?
131: | '.' hexadecimal_string)
132:
133: let decimal_exponent = ('E'|'e') ('+'|'-')? decimal_string
134: let binary_exponent = ('P'|'p') ('+'|'-')? decimal_string
135:
136: let floating_suffix = 'L' | 'l' | 'F' | 'f' | 'D' | 'd'
137: let floating_literal =
138: (
139: decimal_fractional_constant decimal_exponent? |
140: hexadecimal_fractional_constant binary_exponent?
141: )
142: floating_suffix?
143:
144: (* Python strings *)
145: let qqq = quote quote quote
146: let ddd = dquote dquote dquote
147:
148: let escape = slosh _
149:
150: let dddnormal = ordinary | hash | quote | escape | white | newline
151: let dddspecial = dddnormal | dquote dddnormal | dquote dquote dddnormal
152:
153: let qqqnormal = ordinary | hash | dquote | escape | white | newline
154: let qqqspecial = qqqnormal | quote qqqnormal | quote quote qqqnormal
155:
156: let raw_dddnormal = ordinary | hash | quote | slosh | white | newline
157: let raw_dddspecial = raw_dddnormal | dquote raw_dddnormal | dquote dquote raw_dddnormal
158:
159: let raw_qqqnormal = ordinary | hash | dquote | slosh | space | newline
160: let raw_qqqspecial = raw_qqqnormal | quote raw_qqqnormal | quote quote raw_qqqnormal
161:
162: let qstring = (ordinary | hash | dquote | escape | white) * quote
163: let dstring = (ordinary | hash | quote | escape | white) * dquote
164: let qqqstring = qqqspecial * qqq
165: let dddstring = dddspecial * ddd
166:
167: let raw = 'r' | 'R'
168: let see = 'c' | 'C'
169: let rqc = raw see | see raw
170:
171: let raw_qstring = (ordinary | hash | dquote | escape | white) * quote
172: let raw_dstring = (ordinary | hash | quote | escape | white) * dquote
173:
174: let raw_qqqstring = raw_qqqspecial * qqq
175: let raw_dddstring = raw_dddspecial * ddd
176:
177: let not_hash_or_newline = ordinary | quote | dquote | white | slosh
178: let not_newline = not_hash_or_newline | hash
179: let quoted_filename = dquote (ordinary | hash | quote | white | slosh)+ dquote
180:
181: (* ====================== PARSERS ============================ *)
182: (* string lexers *)
183:
184: (* ----------- BASIC STRING -----------------------------------*)
185:
186: rule parse_qstring state = parse
187: | qstring {
188: state#inbody;
189: [STRING (
190: state#get_srcref lexbuf,
191: state#decode decode_qstring (lexeme lexbuf)
192: )]
193: }
194: | _ {
195: [ERRORTOKEN (
196: state#get_srcref lexbuf,
197: "' string"
198: )]
199: }
200:
201: and parse_dstring state = parse
202: | dstring {
203: state#inbody;
204: [STRING (
205: state#get_srcref lexbuf,
206: state#decode decode_dstring (lexeme lexbuf)
207: )]
208: }
209: | _ {
210: state#inbody;
211: [ERRORTOKEN (
212: state#get_srcref lexbuf,
213: "\" string"
214: )]
215: }
216:
217: and parse_qqqstring state = parse
218: | qqqstring {
219: state#inbody;
220: [STRING (
221: state#get_srcref lexbuf,
222: state#decode decode_qqqstring (lexeme lexbuf)
223: )]
224: }
225: | _ {
226: state#inbody;
227: [ERRORTOKEN (
228: state#get_srcref lexbuf,
229: "''' string"
230: )]
231: }
232:
233: and parse_dddstring state = parse
234: | dddstring {
235: state#inbody;
236: [STRING (
237: state#get_srcref lexbuf,
238: state#decode decode_dddstring (lexeme lexbuf)
239: )]
240: }
241: | _ {
242: state#inbody;
243: [ERRORTOKEN (
244: state#get_srcref lexbuf,
245: "\"\"\" string"
246: )]
247: }
248:
249: (* ----------- FORMAT STRING -----------------------------------*)
250: and parse_fqstring state = parse
251: | qstring {
252: state#inbody;
253: [FSTRING (
254: state#get_srcref lexbuf,
255: state#decode decode_qstring (lexeme lexbuf)
256: )]
257: }
258: | _ {
259: [ERRORTOKEN (
260: state#get_srcref lexbuf,
261: "' string"
262: )]
263: }
264:
265: and parse_fdstring state = parse
266: | dstring {
267: state#inbody;
268: [FSTRING (
269: state#get_srcref lexbuf,
270: state#decode decode_dstring (lexeme lexbuf)
271: )]
272: }
273: | _ {
274: state#inbody;
275: [ERRORTOKEN (
276: state#get_srcref lexbuf,
277: "\" string"
278: )]
279: }
280:
281: and parse_fqqqstring state = parse
282: | qqqstring {
283: state#inbody;
284: [FSTRING (
285: state#get_srcref lexbuf,
286: state#decode decode_qqqstring (lexeme lexbuf)
287: )]
288: }
289: | _ {
290: state#inbody;
291: [ERRORTOKEN (
292: state#get_srcref lexbuf,
293: "''' string"
294: )]
295: }
296:
297: and parse_fdddstring state = parse
298: | dddstring {
299: state#inbody;
300: [FSTRING (
301: state#get_srcref lexbuf,
302: state#decode decode_dddstring (lexeme lexbuf)
303: )]
304: }
305: | _ {
306: state#inbody;
307: [ERRORTOKEN (
308: state#get_srcref lexbuf,
309: "\"\"\" string"
310: )]
311: }
312:
313: (* ----------- INTERPOLATION STRING -----------------------------------*)
314: and parse_Qqstring state = parse
315: | qstring {
316: state#inbody;
317: [QSTRING (
318: state#get_srcref lexbuf,
319: state#decode decode_qstring (lexeme lexbuf)
320: )]
321: }
322: | _ {
323: [ERRORTOKEN (
324: state#get_srcref lexbuf,
325: "' string"
326: )]
327: }
328:
329: and parse_Qdstring state = parse
330: | dstring {
331: state#inbody;
332: [QSTRING (
333: state#get_srcref lexbuf,
334: state#decode decode_dstring (lexeme lexbuf)
335: )]
336: }
337: | _ {
338: state#inbody;
339: [ERRORTOKEN (
340: state#get_srcref lexbuf,
341: "\" string"
342: )]
343: }
344:
345: and parse_Qqqqstring state = parse
346: | qqqstring {
347: state#inbody;
348: [QSTRING (
349: state#get_srcref lexbuf,
350: state#decode decode_qqqstring (lexeme lexbuf)
351: )]
352: }
353: | _ {
354: state#inbody;
355: [ERRORTOKEN (
356: state#get_srcref lexbuf,
357: "''' string"
358: )]
359: }
360:
361: and parse_Qdddstring state = parse
362: | dddstring {
363: state#inbody;
364: [QSTRING (
365: state#get_srcref lexbuf,
366: state#decode decode_dddstring (lexeme lexbuf)
367: )]
368: }
369: | _ {
370: state#inbody;
371: [ERRORTOKEN (
372: state#get_srcref lexbuf,
373: "\"\"\" string"
374: )]
375: }
376:
377: (* ----------- C STRING -----------------------------------*)
378: and parse_cqstring state = parse
379: | qstring {
380: state#inbody;
381: [CSTRING (
382: state#get_srcref lexbuf,
383: state#decode decode_qstring (lexeme lexbuf)
384: )]
385: }
386: | _ {
387: [ERRORTOKEN (
388: state#get_srcref lexbuf,
389: "' string"
390: )]
391: }
392:
393: and parse_cdstring state = parse
394: | dstring {
395: state#inbody;
396: [CSTRING (
397: state#get_srcref lexbuf,
398: state#decode decode_dstring (lexeme lexbuf)
399: )]
400: }
401: | _ {
402: state#inbody;
403: [ERRORTOKEN (
404: state#get_srcref lexbuf,
405: "\" string"
406: )]
407: }
408:
409: and parse_cqqqstring state = parse
410: | qqqstring {
411: state#inbody;
412: [CSTRING (
413: state#get_srcref lexbuf,
414: state#decode decode_qqqstring (lexeme lexbuf)
415: )]
416: }
417: | _ {
418: state#inbody;
419: [ERRORTOKEN (
420: state#get_srcref lexbuf,
421: "''' string"
422: )]
423: }
424:
425: and parse_cdddstring state = parse
426: | dddstring {
427: state#inbody;
428: [CSTRING (
429: state#get_srcref lexbuf,
430: state#decode decode_dddstring (lexeme lexbuf)
431: )]
432: }
433: | _ {
434: state#inbody;
435: [ERRORTOKEN (
436: state#get_srcref lexbuf,
437: "\"\"\" string"
438: )]
439: }
440:
441: (* ----------- WIDE STRING -----------------------------------*)
442: and parse_wqstring state = parse
443: | qstring {
444: state#inbody;
445: [WSTRING (
446: state#get_srcref lexbuf,
447: state#decode decode_qstring (lexeme lexbuf)
448: )]
449: }
450: | _ {
451: [ERRORTOKEN (
452: state#get_srcref lexbuf,
453: "' string"
454: )]
455: }
456:
457: and parse_wdstring state = parse
458: | dstring {
459: state#inbody;
460: [WSTRING (
461: state#get_srcref lexbuf,
462: state#decode decode_dstring (lexeme lexbuf)
463: )]
464: }
465: | _ {
466: state#inbody;
467: [ERRORTOKEN (
468: state#get_srcref lexbuf,
469: "\" string"
470: )]
471: }
472:
473: and parse_wqqqstring state = parse
474: | qqqstring {
475: state#inbody;
476: [WSTRING (
477: state#get_srcref lexbuf,
478: state#decode decode_qqqstring (lexeme lexbuf)
479: )]
480: }
481: | _ {
482: state#inbody;
483: [ERRORTOKEN (
484: state#get_srcref lexbuf,
485: "''' string"
486: )]
487: }
488:
489: and parse_wdddstring state = parse
490: | dddstring {
491: state#inbody;
492: [WSTRING (
493: state#get_srcref lexbuf,
494: state#decode decode_dddstring (lexeme lexbuf)
495: )]
496: }
497: | _ {
498: state#inbody;
499: [ERRORTOKEN (
500: state#get_srcref lexbuf,
501: "\"\"\" string"
502: )]
503: }
504:
505: (* ----------- UNICODE STRING -----------------------------------*)
506: and parse_uqstring state = parse
507: | qstring {
508: state#inbody;
509: [WSTRING (
510: state#get_srcref lexbuf,
511: state#decode decode_qstring (lexeme lexbuf)
512: )]
513: }
514: | _ {
515: [ERRORTOKEN (
516: state#get_srcref lexbuf,
517: "' string"
518: )]
519: }
520:
521: and parse_udstring state = parse
522: | dstring {
523: state#inbody;
524: [USTRING (
525: state#get_srcref lexbuf,
526: state#decode decode_dstring (lexeme lexbuf)
527: )]
528: }
529: | _ {
530: state#inbody;
531: [ERRORTOKEN (
532: state#get_srcref lexbuf,
533: "\" string"
534: )]
535: }
536:
537: and parse_uqqqstring state = parse
538: | qqqstring {
539: state#inbody;
540: [USTRING (
541: state#get_srcref lexbuf,
542: state#decode decode_qqqstring (lexeme lexbuf)
543: )]
544: }
545: | _ {
546: state#inbody;
547: [ERRORTOKEN (
548: state#get_srcref lexbuf,
549: "''' string"
550: )]
551: }
552:
553: and parse_udddstring state = parse
554: | dddstring {
555: state#inbody;
556: [USTRING (
557: state#get_srcref lexbuf,
558: state#decode decode_dddstring (lexeme lexbuf)
559: )]
560: }
561: | _ {
562: state#inbody;
563: [ERRORTOKEN (
564: state#get_srcref lexbuf,
565: "\"\"\" string"
566: )]
567: }
568:
569: (* ----------- RAW STRING -----------------------------------*)
570: and parse_raw_qstring state = parse
571: | raw_qstring {
572: state#inbody;
573: [STRING (
574: state#get_srcref lexbuf,
575: state#decode decode_raw_qstring (lexeme lexbuf)
576: )]
577: }
578: | _ {
579: state#inbody;
580: [ERRORTOKEN (
581: state#get_srcref lexbuf,
582: "raw ' string")]
583: }
584:
585: and parse_raw_dstring state = parse
586: | raw_dstring {
587: state#inbody;
588: [STRING (
589: state#get_srcref lexbuf,
590: state#decode decode_raw_dstring (lexeme lexbuf)
591: )]
592: }
593: | _ {
594: state#inbody;
595: [ERRORTOKEN (
596: state#get_srcref lexbuf,
597: "raw \" string"
598: )]
599: }
600:
601: and parse_raw_qqqstring state = parse
602: | raw_qqqstring {
603: state#inbody;
604: [STRING (
605: state#get_srcref lexbuf,
606: state#decode decode_raw_qqqstring (lexeme lexbuf)
607: )]
608: }
609: | _ { state#inbody;
610: [ERRORTOKEN (
611: state#get_srcref lexbuf,
612: "raw ''' string")] }
613:
614: and parse_raw_dddstring state = parse
615: | raw_dddstring {
616: state#inbody;
617: [STRING (
618: state#get_srcref lexbuf,
619: state#decode decode_raw_dddstring (lexeme lexbuf)
620: )]
621: }
622: | _ {
623: [ERRORTOKEN (
624: state#get_srcref lexbuf,
625: lexeme lexbuf)
626: ]
627: }
628:
629: and parse_raw_cqstring state = parse
630: | raw_qstring {
631: state#inbody;
632: [CSTRING (
633: state#get_srcref lexbuf,
634: state#decode decode_raw_qstring (lexeme lexbuf)
635: )]
636: }
637: | _ {
638: state#inbody;
639: [ERRORTOKEN (
640: state#get_srcref lexbuf,
641: "raw ' cstring")]
642: }
643:
644: and parse_raw_cdstring state = parse
645: | raw_dstring {
646: state#inbody;
647: [STRING (
648: state#get_srcref lexbuf,
649: state#decode decode_raw_dstring (lexeme lexbuf)
650: )]
651: }
652: | _ {
653: state#inbody;
654: [ERRORTOKEN (
655: state#get_srcref lexbuf,
656: "raw \" cstring"
657: )]
658: }
659:
660: and parse_raw_cqqqstring state = parse
661: | raw_qqqstring {
662: state#inbody;
663: [CSTRING (
664: state#get_srcref lexbuf,
665: state#decode decode_raw_qqqstring (lexeme lexbuf)
666: )]
667: }
668: | _ { state#inbody;
669: [ERRORTOKEN (
670: state#get_srcref lexbuf,
671: "raw ''' cstring")] }
672:
673: and parse_raw_cdddstring state = parse
674: | raw_dddstring {
675: state#inbody;
676: [CSTRING (
677: state#get_srcref lexbuf,
678: state#decode decode_raw_dddstring (lexeme lexbuf)
679: )]
680: }
681: | _ {
682: [ERRORTOKEN (
683: state#get_srcref lexbuf,
684: lexeme lexbuf)
685: ]
686: }
687:
688: and parse_hashbang state = parse
689: | not_newline * newline {
690: begin
691: state#newline lexbuf;
692: let lex = lexeme lexbuf in
693: let n = String.length lex in
694: [COMMENT_NEWLINE (String.sub lex 0 (n-1))]
695: end
696: }
697: | _ { [ERRORTOKEN (
698: state#get_srcref lexbuf,
699: lexeme lexbuf)] }
700:
701: and parse_C_comment state = parse
702: | "/*" {
703: state#append_comment (lexeme lexbuf);
704: state#incr_comment;
705: parse_C_comment state lexbuf
706: }
707: | newline {
708: state#newline lexbuf;
709: state#append_comment (lexeme lexbuf);
710: parse_C_comment state lexbuf
711: }
712: | "*/" {
713: state#append_comment (lexeme lexbuf);
714: state#decr_comment;
715: if state#comment_level > 0
716: then parse_C_comment state lexbuf
717: else ()
718: ;
719: state#inbody
720: }
721: | _ {
722: state#append_comment (lexeme lexbuf);
723: parse_C_comment state lexbuf
724: }
725:
726: and parse_line state = parse
727: | not_newline * (newline | eof)
728: {
729: state#newline lexbuf;
730: lexeme lexbuf
731: }
732:
733: and parse_preprocessor state start_location start_position = parse
734: | ( not_newline* slosh space* newline)* not_newline* newline
735: | ( not_newline* hash space* newline) (not_hash_or_newline not_newline* newline)+
736: {
737: let toks = handle_preprocessor state lexbuf
738: (lexeme lexbuf) pre_flx_lex start_location start_position
739: in
740: toks
741: }
742:
743:
744: and pre_flx_lex state = parse
745: (* eof is not eaten up, so parent will find eof and emit ENDMARKER *)
746: | "//" not_newline * (newline | eof) {
747: state#newline lexbuf;
748: let lex = lexeme lexbuf in
749: let n = String.length lex in
750: [COMMENT_NEWLINE (String.sub lex 2 (n-3))]
751: }
752:
753: | "/*" {
754: state#set_comment (lexeme lexbuf);
755: parse_C_comment state lexbuf;
756: [COMMENT (state#get_comment)]
757: }
758:
759: | int_lit {
760: state#inbody;
761: let sr = state#get_srcref lexbuf in
762: let s = lexeme lexbuf in
763: let n = String.length s in
764: let converter, first =
765: if n>1 && s.[0]='0'
766: then
767: match s.[1] with
768: | 'b' | 'B' -> binbig_int_of_string,2
769: | 'o' | 'O' -> octbig_int_of_string,2
770: | 'd' | 'D' -> decbig_int_of_string,2
771: | 'x' | 'X' -> hexbig_int_of_string,2
772: | _ -> decbig_int_of_string,0
773: else decbig_int_of_string,0
774: in
775: let k = ref (n-1) in
776: let t =
777: if n >= 2 && s.[n-2]='i' && s.[n-1]='8'
778: then (k:=n-2; "int8")
779: else if n >= 2 && s.[n-2]='u' && s.[n-1]='8'
780: then (k:=n-2; "uint8")
781: else if n >= 3 && s.[n-3]='i' && s.[n-2]='1' && s.[n-1]='6'
782: then (k:=n-3; "int16")
783: else if n >= 3 && s.[n-3]='u' && s.[n-2]='1' && s.[n-1]='6'
784: then (k:=n-3; "uint16")
785:
786: else if n >= 3 && s.[n-3]='i' && s.[n-2]='3' && s.[n-1]='2'
787: then (k:=n-3; "int32")
788: else if n >= 3 && s.[n-3]='u' && s.[n-2]='3' && s.[n-1]='2'
789: then (k:=n-3; "uint32")
790:
791: else if n >= 3 && s.[n-3]='i' && s.[n-2]='6' && s.[n-1]='4'
792: then (k:=n-3; "int64")
793: else if n >= 3 && s.[n-3]='u' && s.[n-2]='6' && s.[n-1]='4'
794: then (k:=n-3; "uint64")
795:
796: else begin
797: let sign = ref "" in
798: let typ = ref "int" in
799: begin try while !k>first do
800: (match s.[!k] with
801: | 'u' | 'U' -> sign := "u"
802: | 't' | 'T' -> typ := "tiny"
803: | 's' | 'S' -> typ := "short"
804: | 'i' | 'I' -> typ := "int"
805: | 'l' | 'L' ->
806: typ :=
807: if !typ = "long" then "vlong" else "long"
808: | 'v' | 'V' -> typ := "vlong"
809: | _ -> raise Not_found
810: );
811: decr k
812: done with _ -> () end;
813: incr k;
814: !sign ^ !typ
815: end
816: in
817: let d = String.sub s first (!k-first) in
818: let v = (converter d) in
819: [INTEGER (sr, t, v)]
820: }
821:
822: | floating_literal {
823: state#inbody;
824: let str = lexeme lexbuf in
825: let n = String.length str in
826: let last_char = str.[n-1] in
827: match last_char with
828: | 'l'|'L' ->
829: [FLOAT (state#get_srcref lexbuf,"ldouble", strip_us (String.sub str 0 (n-1)))]
830: | 'f'|'F' ->
831: [FLOAT (state#get_srcref lexbuf,"float",strip_us (String.sub str 0 (n-1)))]
832: | _ ->
833: [FLOAT (state#get_srcref lexbuf,"double",strip_us str)]
834: }
835:
836: (* Python strings *)
837: | quote { state#inbody; parse_qstring state lexbuf }
838: | qqq { state#inbody; parse_qqqstring state lexbuf }
839: | dquote { state#inbody; parse_dstring state lexbuf }
840: | ddd { state#inbody; parse_dddstring state lexbuf }
841:
842: (* C strings: type char* *)
843: | ('c'|'C') quote { state#inbody; parse_cqstring state lexbuf }
844: | ('c'|'C') qqq { state#inbody; parse_cqqqstring state lexbuf }
845: | ('c'|'C') dquote { state#inbody; parse_cdstring state lexbuf }
846: | ('c'|'C') ddd { state#inbody; parse_cdddstring state lexbuf }
847:
848: (* Format strings *)
849: | ('f'|'F') quote { state#inbody; parse_fqstring state lexbuf }
850: | ('f'|'F') qqq { state#inbody; parse_fqqqstring state lexbuf }
851: | ('f'|'F') dquote { state#inbody; parse_fdstring state lexbuf }
852: | ('f'|'F') ddd { state#inbody; parse_fdddstring state lexbuf }
853:
854: (* Format strings *)
855: | ('q'|'Q') quote { state#inbody; parse_Qqstring state lexbuf }
856: | ('q'|'Q') qqq { state#inbody; parse_Qqqqstring state lexbuf }
857: | ('q'|'Q') dquote { state#inbody; parse_Qdstring state lexbuf }
858: | ('q'|'Q') ddd { state#inbody; parse_Qdddstring state lexbuf }
859:
860: (* wide strings *)
861: | ('w' | 'W') quote { state#inbody; parse_wqstring state lexbuf }
862: | ('w' | 'W') qqq { state#inbody; parse_wqqqstring state lexbuf }
863: | ('w' | 'W') dquote { state#inbody; parse_wdstring state lexbuf }
864: | ('w' | 'W') ddd { state#inbody; parse_wdddstring state lexbuf }
865:
866: (* UTF32 strings *)
867: | ('u' | 'U') quote { state#inbody; parse_uqstring state lexbuf }
868: | ('u' | 'U') qqq { state#inbody; parse_uqqqstring state lexbuf }
869: | ('u' | 'U') dquote { state#inbody; parse_udstring state lexbuf }
870: | ('u' | 'U') ddd { state#inbody; parse_udddstring state lexbuf }
871:
872: (* Python raw strings *)
873: | ('r'|'R') quote { state#inbody; parse_raw_qstring state lexbuf }
874: | ('r'|'R') qqq { state#inbody; parse_raw_qqqstring state lexbuf }
875: | ('r'|'R') dquote { state#inbody; parse_raw_dstring state lexbuf }
876: | ('r'|'R') ddd { state#inbody; parse_raw_dddstring state lexbuf }
877:
878: (* raw C strings: type char* *)
879: | rqc quote { state#inbody; parse_cqstring state lexbuf }
880: | rqc qqq { state#inbody; parse_cqqqstring state lexbuf }
881: | rqc dquote { state#inbody; parse_cdstring state lexbuf }
882: | rqc ddd { state#inbody; parse_cdddstring state lexbuf }
883:
884: (* this MUST be after strings, so raw strings take precedence
885: over identifiers, eg r'x' is a string, not an identifier,
886: but x'x' is an identifier .. yucky ..
887: *)
888: | identifier {
889: state#inbody;
890: let s = lexeme lexbuf in
891: let s' = Flx_id.utf8_to_ucn s in
892: let src = state#get_srcref lexbuf in
893: try [
894: let keywords = state#get_keywords in
895: let n = String.length s' in
896: if n >= Array.length keywords then raise Not_found;
897: let keywords = keywords.(n) in
898: (List.assoc s' keywords) (src,s')
899: ]
900: with Not_found ->
901: [Flx_keywords.map_flx_keywords src s']
902: }
903:
904: (* whitespace *)
905: | white + {
906: (* we do NOT say 'inbody' here: we want to accept
907: #directives with leading spaces
908: *)
909: let spaces=lexeme lexbuf in
910: let column = ref 0 in
911: let n = String.length spaces in
912: for i=0 to n-1 do match spaces.[i] with
913: | '\t' -> column := ((!column + 8) / 8) * 8
914: | ' ' -> incr column
915: | _ -> raise (Failure "Error in lexer, bad white space character")
916: done;
917: [WHITE (!column)]
918: }
919:
920: | slosh { [SLOSH] }
921:
922: | symchar + {
923: let s = lexeme lexbuf in
924: let n = String.length s in
925: let s',con,lim =
926: try
927: for i = 0 to n - 1 do
928: if s.[i] = '/' && i+1<n then begin
929: if s.[i+1] = '/' then raise (SlashSlash i);
930: if s.[i+1] = '*' then raise (SlashAst i)
931: end
932: done;
933: raise (Ok n)
934: with
935: | SlashSlash i -> String.sub s 0 i,`SlashSlash,i
936: | SlashAst i -> String.sub s 0 i,`SlashAst,i
937: | Ok i -> String.sub s 0 i,`Ok,i
938: in
939: let atstart = state#is_at_line_start in
940: state#inbody;
941: let toks = state#tokenise_symbols lexbuf s' in
942: let toks =
943: match toks,atstart with
944: | [HASH _],true ->
945: let x = state#get_loc in
946: let y = lexbuf.Lexing.lex_curr_p in
947: parse_preprocessor state
948: { x with buf_pos = x.buf_pos }
949: { y with Lexing.pos_fname = y.Lexing.pos_fname }
950: lexbuf
951: | [HASHBANG _ | HASHBANGSLASH _ ],true ->
952: (*
953: print_endline "IGNORING HASHBANG";
954: *)
955: parse_hashbang state lexbuf
956: | _ when con = `SlashSlash ->
957: (*
958: print_endline "EMBEDDED //";
959: *)
960: let lead = String.sub s (lim+2) (n-lim-2) in
961: let lex = parse_line state lexbuf in
962: let m = String.length lex in
963: toks @ [COMMENT_NEWLINE (lead ^ String.sub lex 0 (m-1))]
964:
965: | _ when con = `SlashAst ->
966: (*
967: print_endline "EMBEDDED /*";
968: *)
969: (* NOTE THIS WILL NOT HANDLE /**/ or any other
970: sequence x/*xxxx*/ where the x's are special
971: In particular x/***************/ will fail.
972: *)
973: let lead = String.sub s (lim+2) (n-lim-2) in
974: state#set_comment lead;
975: parse_C_comment state lexbuf;
976: toks @ [COMMENT (state#get_comment)]
977:
978: | _ -> toks
979: in toks
980: }
981:
982: (* end of line *)
983: | newline {
984: state#newline lexbuf;
985: [NEWLINE ]
986: }
987:
988: (* end of file *)
989: | eof {
990: if state#get_condition = `Subscan then [ENDMARKER] else
991: if state#condition_stack_length <> 1
992: then
993: let sr = state#get_srcref lexbuf in
994: let sr = Flx_srcref.slift sr in
995: Flx_exceptions.clierr sr "Unmatched #if at end of file"
996: else
997: [ENDMARKER]
998: }
999:
1000: (* Anything else is an error *)
1001: | _ {
1002: state#inbody;
1003: [
1004: ERRORTOKEN
1005: (
1006: state#get_srcref lexbuf, lexeme lexbuf
1007: )
1008: ]
1009: }
1010:
1011: {
1012: }
1013:
Start ocaml section to src/flx_lex.mli[1
/1
]
1: # 2394 "./lpsrc/flx_lexer.ipk"
2: val pre_flx_lex :
3: Flx_lexstate.lexer_state ->
4: Lexing.lexbuf ->
5: Flx_parse.token list
6:
7: val parse_line :
8: Flx_lexstate.lexer_state ->
9: Lexing.lexbuf ->
10: string
11: