5.23. Pre token filters

Start ocaml section to src/flx_lex1.ml[1 /1 ]
     1: # 39 "./lpsrc/flx_tokeniser.ipk"
     2: 
     3: open Flx_parse
     4: open Flx_exceptions
     5: open List
     6: open Flx_srcref
     7: 
     8: (* remove comments, whitespace, newlines *)
     9: 
    10: let filter_comments x =
    11:   let rec filter x' result  =
    12:     match x' with
    13:     | COMMENT_NEWLINE _ :: t
    14:     | COMMENT _ :: t
    15:     | NEWLINE :: t
    16:     | WHITE _ :: t -> filter t result
    17:     | h :: t -> filter t (h::result)
    18:     | [] -> rev result
    19:   in filter x []
    20: 
    21: (* remove comments, whitespace, newlines, trailing sloshes,
    22:   and a trailing hash on the first line
    23: *)
    24: let filter_preprocessor x =
    25:   let rec filter first_line x' result  =
    26:     match x' with
    27:     | WHITE _ :: t
    28:     | COMMENT _ :: t
    29:       -> filter first_line t result
    30: 
    31:     | COMMENT_NEWLINE _ :: t
    32:     | NEWLINE :: t
    33:     | SLOSH :: NEWLINE :: t
    34:     | SLOSH :: WHITE _ :: NEWLINE :: t
    35:       -> filter false t result
    36: 
    37:     | HASH _ :: NEWLINE :: t
    38:     | HASH _ :: WHITE _ :: NEWLINE :: t
    39:       when first_line  -> filter false t result
    40: 
    41:     | h :: t -> filter first_line t (h::result)
    42:     | [] -> rev result
    43:   in filter true x []
    44: 
    45: 
    46: let compress_ctypes x =
    47:   let rec filter x' result =
    48:     match x' with
    49: # 89 "./lpsrc/flx_tokeniser.ipk"
    50: | NAME(sr,"unsigned") :: NAME(_,"long") :: NAME(_,"long") :: NAME(_,"int") :: t -> 
    51: # 89 "./lpsrc/flx_tokeniser.ipk"
    52:     filter t (NAME (sr, "uvlong") :: result)
    53: # 89 "./lpsrc/flx_tokeniser.ipk"
    54: | NAME(sr,"signed") :: NAME(_,"long") :: NAME(_,"long") :: NAME(_,"int") :: t -> 
    55: # 89 "./lpsrc/flx_tokeniser.ipk"
    56:     filter t (NAME (sr, "vlong") :: result)
    57: # 89 "./lpsrc/flx_tokeniser.ipk"
    58: | NAME(sr,"unsigned") :: NAME(_,"long") :: NAME(_,"long") :: t -> 
    59: # 89 "./lpsrc/flx_tokeniser.ipk"
    60:     filter t (NAME (sr, "uvlong") :: result)
    61: # 89 "./lpsrc/flx_tokeniser.ipk"
    62: | NAME(sr,"unsigned") :: NAME(_,"long") :: NAME(_,"int") :: t -> 
    63: # 89 "./lpsrc/flx_tokeniser.ipk"
    64:     filter t (NAME (sr, "ulong") :: result)
    65: # 89 "./lpsrc/flx_tokeniser.ipk"
    66: | NAME(sr,"signed") :: NAME(_,"long") :: NAME(_,"long") :: t -> 
    67: # 89 "./lpsrc/flx_tokeniser.ipk"
    68:     filter t (NAME (sr, "vlong") :: result)
    69: # 89 "./lpsrc/flx_tokeniser.ipk"
    70: | NAME(sr,"signed") :: NAME(_,"long") :: NAME(_,"int") :: t -> 
    71: # 89 "./lpsrc/flx_tokeniser.ipk"
    72:     filter t (NAME (sr, "long") :: result)
    73: # 89 "./lpsrc/flx_tokeniser.ipk"
    74: | NAME(sr,"long") :: NAME(_,"long") :: NAME(_,"int") :: t -> 
    75: # 89 "./lpsrc/flx_tokeniser.ipk"
    76:     filter t (NAME (sr, "vlong") :: result)
    77: # 89 "./lpsrc/flx_tokeniser.ipk"
    78: | NAME(sr,"long") :: NAME(_,"double") :: NAME(_,"float") :: t -> 
    79: # 89 "./lpsrc/flx_tokeniser.ipk"
    80:     filter t (NAME (sr, "ldouble") :: result)
    81: # 89 "./lpsrc/flx_tokeniser.ipk"
    82: | NAME(sr,"unsigned") :: NAME(_,"long") :: t -> 
    83: # 89 "./lpsrc/flx_tokeniser.ipk"
    84:     filter t (NAME (sr, "ulong") :: result)
    85: # 89 "./lpsrc/flx_tokeniser.ipk"
    86: | NAME(sr,"unsigned") :: NAME(_,"int") :: t -> 
    87: # 89 "./lpsrc/flx_tokeniser.ipk"
    88:     filter t (NAME (sr, "uint") :: result)
    89: # 89 "./lpsrc/flx_tokeniser.ipk"
    90: | NAME(sr,"unsigned") :: NAME(_,"char") :: t -> 
    91: # 89 "./lpsrc/flx_tokeniser.ipk"
    92:     filter t (NAME (sr, "utiny") :: result)
    93: # 89 "./lpsrc/flx_tokeniser.ipk"
    94: | NAME(sr,"signed") :: NAME(_,"long") :: t -> 
    95: # 89 "./lpsrc/flx_tokeniser.ipk"
    96:     filter t (NAME (sr, "long") :: result)
    97: # 89 "./lpsrc/flx_tokeniser.ipk"
    98: | NAME(sr,"signed") :: NAME(_,"int") :: t -> 
    99: # 89 "./lpsrc/flx_tokeniser.ipk"
   100:     filter t (NAME (sr, "int") :: result)
   101: # 89 "./lpsrc/flx_tokeniser.ipk"
   102: | NAME(sr,"signed") :: NAME(_,"char") :: t -> 
   103: # 89 "./lpsrc/flx_tokeniser.ipk"
   104:     filter t (NAME (sr, "tiny") :: result)
   105: # 89 "./lpsrc/flx_tokeniser.ipk"
   106: | NAME(sr,"long") :: NAME(_,"long") :: t -> 
   107: # 89 "./lpsrc/flx_tokeniser.ipk"
   108:     filter t (NAME (sr, "vlong") :: result)
   109: # 89 "./lpsrc/flx_tokeniser.ipk"
   110: | NAME(sr,"long") :: NAME(_,"int") :: t -> 
   111: # 89 "./lpsrc/flx_tokeniser.ipk"
   112:     filter t (NAME (sr, "long") :: result)
   113: # 89 "./lpsrc/flx_tokeniser.ipk"
   114: | NAME(sr,"float") :: NAME(_,"double") :: t -> 
   115: # 89 "./lpsrc/flx_tokeniser.ipk"
   116:     filter t (NAME (sr, "double") :: result)
   117: # 89 "./lpsrc/flx_tokeniser.ipk"
   118: | NAME(sr,"double") :: NAME(_,"float") :: t -> 
   119: # 89 "./lpsrc/flx_tokeniser.ipk"
   120:     filter t (NAME (sr, "double") :: result)
   121: # 89 "./lpsrc/flx_tokeniser.ipk"
   122: | NAME(sr,"unsigned") :: t -> 
   123: # 89 "./lpsrc/flx_tokeniser.ipk"
   124:     filter t (NAME (sr, "uint") :: result)
   125: # 89 "./lpsrc/flx_tokeniser.ipk"
   126: | NAME(sr,"long") :: t -> 
   127: # 89 "./lpsrc/flx_tokeniser.ipk"
   128:     filter t (NAME (sr, "long") :: result)
   129:     | h :: t -> filter t (h::result)
   130:     | [] -> rev result
   131:   in filter x []
   132: 
   133: let unkeyword ts =
   134:   let rec filter inp out = match inp with
   135: # 99 "./lpsrc/flx_tokeniser.ipk"
   136: | (COLONCOLON _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   137: # 99 "./lpsrc/flx_tokeniser.ipk"
   138: | (COLONCOLON _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   139: # 99 "./lpsrc/flx_tokeniser.ipk"
   140: | (DOT _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   141: # 99 "./lpsrc/flx_tokeniser.ipk"
   142: | (DOT _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   143: # 99 "./lpsrc/flx_tokeniser.ipk"
   144: | (RIGHTARROW _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   145: # 99 "./lpsrc/flx_tokeniser.ipk"
   146: | (RIGHTARROW _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   147: # 99 "./lpsrc/flx_tokeniser.ipk"
   148: | (STRUCT _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   149: # 99 "./lpsrc/flx_tokeniser.ipk"
   150: | (STRUCT _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   151: # 99 "./lpsrc/flx_tokeniser.ipk"
   152: | (UNION _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   153: # 99 "./lpsrc/flx_tokeniser.ipk"
   154: | (UNION _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   155: # 99 "./lpsrc/flx_tokeniser.ipk"
   156: | (CLASS _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   157: # 99 "./lpsrc/flx_tokeniser.ipk"
   158: | (CLASS _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   159: # 99 "./lpsrc/flx_tokeniser.ipk"
   160: | (FUNCTION _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   161: # 99 "./lpsrc/flx_tokeniser.ipk"
   162: | (FUNCTION _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   163: # 99 "./lpsrc/flx_tokeniser.ipk"
   164: | (PROCEDURE _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   165: # 99 "./lpsrc/flx_tokeniser.ipk"
   166: | (PROCEDURE _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   167: # 99 "./lpsrc/flx_tokeniser.ipk"
   168: | (GENERATOR _ as cc) :: (USER_KEYWORD (sr,s) as u) :: tail 
   169: # 99 "./lpsrc/flx_tokeniser.ipk"
   170: | (GENERATOR _ as cc) :: (USER_STATEMENT_KEYWORD (sr,s,_,_) as u) :: tail 
   171:   ->
   172:     let sr = Flx_prelex.src_of_token  u in
   173:     let s = Flx_prelex.string_of_token u in
   174:     let u = NAME (sr,s) in
   175:     filter tail (u :: cc :: out)
   176:   | h :: t -> filter t (h::out)
   177:   | [] -> rev out
   178:   in filter ts []
   179: 
   180: let token_packer ts =
   181:   let rec aux n o i = match i with
   182:     | [] ->
   183:       if n = 0 then rev o,[]
   184:       else failwith "At end of file, unterminated token group"
   185: 
   186:     | NAME (sr,"_tok") :: t ->
   187:       let h,t = aux (n+1) [] t in
   188:       aux n (TOKEN_LIST h :: o) t
   189: 
   190:     | NAME (sr,"_etok") :: t ->
   191:       if n = 0 then failwith "Unmatched _etok"
   192:       else rev o,t
   193: 
   194:     | h :: t -> aux n (h::o) t
   195:   in
   196:   fst (aux 0 [] ts)
   197: 
   198: type state = {
   199:   macs: (string * (string list * token list)) list ;
   200:   cstack : bool list;
   201:   cond : bool;
   202: }
   203: 
   204: let cond ls = fold_left (fun x y -> x && y) true ls
   205: 
   206: let token_expander ts =
   207:   let rec aux s o i = match i with
   208:   | TOKEN_LIST ts :: t -> aux s o (ts @ t)
   209: 
   210:   | NAME (sr,name) as h :: t ->
   211:     let err x = clierr (slift sr) x in
   212:     begin match name with
   213:     | "_ifdef" ->
   214:        begin match t with
   215:        | NAME (sr2,name) :: NAME(_,"_then" ) :: t ->
   216:          let cs = mem_assoc name s.macs :: s.cstack in
   217:          aux { s with cond=cond cs; cstack=cs} o t
   218:        | _ -> err "usage: _ifdef token _then .. _endif"
   219:        end
   220: 
   221:     | "_elifdef" ->
   222:        begin match t with
   223:        | NAME (sr2,name) :: NAME(_,"_then" ) :: t ->
   224:          if length s.cstack > 0 then
   225:            let cs = mem_assoc name s.macs :: tl s.cstack in
   226:            aux { s with cond = cond cs; cstack=cs} o t
   227:          else
   228:           err "Unmatch _elif"
   229: 
   230:        | _ -> err "usage: _elifdef token _then .. _endif"
   231:        end
   232: 
   233:     | "_endif" ->
   234:       if length s.cstack > 0 then
   235:         let cs = tl s.cstack in
   236:         aux { s with cond = cond cs; cstack=cs} o t
   237:       else
   238:         err "Unmatch _endif"
   239: 
   240:     | "_else" ->
   241:       if length s.cstack > 0 then
   242:         let cs = not (hd s.cstack) :: tl s.cstack in
   243:         aux { s with cond = cond cs; cstack=cs} o t
   244:       else
   245:         err "Unmatch _else"
   246: 
   247:     | _ when not (s.cond) -> aux s o t
   248: 
   249:     | "_tokdef" ->
   250:       let rec grabdef n o i = match i with
   251:         | NAME (sr,"_tokdef") as  h :: t ->
   252:           grabdef (n+1) (h::o) t
   253: 
   254:         | NAME (sr,"_edef") as h :: t ->
   255:           if n = 0 then rev o,t
   256:           else grabdef (n-1) (h::o) t
   257: 
   258:         | NAME (sr,"_quote") :: h :: t ->
   259:           grabdef n (h::o) t
   260: 
   261:         | h::t -> grabdef n (h::o) t
   262:         | [] -> err "unterminated token macro substream"
   263:       in
   264:       begin match t with
   265:       | NAME (sr2,name) :: t ->
   266:         let rec grabp n o i : string list * token list  =
   267:           if n = 0 then err "too many macro args, runaway?";
   268:           match i with
   269:           | [] -> err "unterminated macro definition"
   270:           | EQUAL _ :: t -> rev o, t
   271:           | NAME (_,s) :: t -> grabp (n-1) (s::o) t
   272:           | _ -> err "macro arg must be identifier"
   273:         in
   274:         let params,t = grabp 10 [] t in
   275:         let mac,t = grabdef 0 [] t in
   276:          aux {s with macs=(name,(params,mac))::s.macs} o t
   277:       | _ -> err "usage: _tokdef name = stream"
   278:       end
   279: 
   280:     | "_undef" ->
   281:        begin match t with
   282:        | NAME (sr2,name) :: t ->
   283:          let rec strip flag inp out = match inp with
   284:          | [] -> rev out
   285:          | (n,_) :: t when flag && n = name ->
   286:            strip false t out
   287:          | h :: t -> strip flag t (h::out)
   288:          in
   289:          let macs = strip true s.macs [] in
   290:          aux {s with macs=macs} o t
   291:        | _ -> err "usage: _undef name"
   292:        end
   293: 
   294:     | "_popto" ->
   295:        begin match t with
   296:        | NAME (sr2,name) :: t ->
   297:          let rec strip inp = match inp with
   298:          | [] -> err ("_popto can't find macro " ^ name);
   299:          | (n,_) :: t when n = name -> t
   300:          | h :: t -> strip t
   301:          in
   302:          let macs = strip s.macs in
   303:          aux {s with macs=macs} o t
   304:        | _ -> err "usage: _popto name"
   305:        end
   306: 
   307:     | _ when mem_assoc name s.macs ->
   308:       let rec graba n o i =
   309:         if n = 0 then rev o,i else
   310:         match i with
   311:         | [] -> err ("Not enough args for macro " ^ name)
   312:         | h :: t -> graba (n-1) (h::o) t
   313:       in
   314:       let params,body = assoc name s.macs in
   315:       let args, t = graba (length params) [] t in
   316:       let pas =
   317:         fold_left2
   318:         (fun m p a -> (p,a) :: m)
   319:         [] params args
   320:       in
   321:       let body =
   322:         map
   323:         (fun t -> match t with
   324:           | NAME(_,s) ->
   325:             (try assoc s pas with Not_found -> t)
   326:           | _ -> t
   327:         )
   328:         body
   329:       in
   330:       aux s o (body @ t)
   331:     | _ -> aux s (h::o) t
   332:     end (* name handling *)
   333: 
   334:   | h :: t when not s.cond -> aux s o t
   335:   | h :: t -> aux s (h::o) t
   336:   | [] -> rev o
   337:   in aux {macs=[]; cond=true; cstack=[]} [] ts
   338: 
   339: 
   340: let translate ts =
   341:   let filters = [
   342:     (* 1 *) filter_comments ;
   343:     (* 2 *) compress_ctypes ;
   344:     (* 3 *) unkeyword ;
   345:     (* 4 *) token_packer;
   346:     (* 5 *) token_expander;
   347:     ]
   348:   and reverse_apply dat fn = fn dat
   349:   in List.fold_left reverse_apply ts filters
   350: 
   351: let translate_preprocessor ts =
   352:   let filters = [
   353:     (* 1 *) filter_preprocessor ;
   354:     (* 2 *) compress_ctypes ;
   355:     ]
   356:   and reverse_apply dat fn = fn dat
   357:   in List.fold_left reverse_apply ts filters
   358: 
End ocaml section to src/flx_lex1.ml[1]
Start ocaml section to src/flx_lex1.mli[1 /1 ]
     1: # 290 "./lpsrc/flx_tokeniser.ipk"
     2: val translate : Flx_parse.token list -> Flx_parse.token list
     3: val translate_preprocessor : Flx_parse.token list -> Flx_parse.token list
     4: 
End ocaml section to src/flx_lex1.mli[1]