Files
rust/src/boot/fe/lexer.mll
Graydon Hoare d6b7c96c3e Populate tree.
2010-06-23 21:03:09 -07:00

363 lines
12 KiB
OCaml

{
open Token;;
open Common;;
exception Lex_err of (string * Common.pos);;
let fail lexbuf s =
let p = lexbuf.Lexing.lex_start_p in
let pos =
(p.Lexing.pos_fname,
p.Lexing.pos_lnum ,
(p.Lexing.pos_cnum) - (p.Lexing.pos_bol))
in
raise (Lex_err (s, pos))
;;
let bump_line p = { p with
Lexing.pos_lnum = p.Lexing.pos_lnum + 1;
Lexing.pos_bol = p.Lexing.pos_cnum }
;;
let keyword_table = Hashtbl.create 100
let _ =
List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok)
[ ("mod", MOD);
("use", USE);
("meta", META);
("auth", AUTH);
("syntax", SYNTAX);
("if", IF);
("else", ELSE);
("while", WHILE);
("do", DO);
("alt", ALT);
("case", CASE);
("for", FOR);
("each", EACH);
("put", PUT);
("ret", RET);
("be", BE);
("fail", FAIL);
("drop", DROP);
("type", TYPE);
("check", CHECK);
("claim", CLAIM);
("prove", PROVE);
("io", IO);
("state", STATE);
("unsafe", UNSAFE);
("native", NATIVE);
("mutable", MUTABLE);
("auto", AUTO);
("fn", FN);
("iter", ITER);
("import", IMPORT);
("export", EXPORT);
("let", LET);
("log", LOG);
("spawn", SPAWN);
("thread", THREAD);
("yield", YIELD);
("join", JOIN);
("bool", BOOL);
("int", INT);
("uint", UINT);
("char", CHAR);
("str", STR);
("rec", REC);
("tup", TUP);
("tag", TAG);
("vec", VEC);
("any", ANY);
("obj", OBJ);
("port", PORT);
("chan", CHAN);
("task", TASK);
("true", LIT_BOOL true);
("false", LIT_BOOL false);
("in", IN);
("as", AS);
("with", WITH);
("bind", BIND);
("u8", MACH TY_u8);
("u16", MACH TY_u16);
("u32", MACH TY_u32);
("u64", MACH TY_u64);
("i8", MACH TY_i8);
("i16", MACH TY_i16);
("i32", MACH TY_i32);
("i64", MACH TY_i64);
("f32", MACH TY_f32);
("f64", MACH TY_f64)
]
;;
}
let hexdig = ['0'-'9' 'a'-'f' 'A'-'F']
let bin = "0b" ['0' '1']['0' '1' '_']*
let hex = "0x" hexdig ['0'-'9' 'a'-'f' 'A'-'F' '_']*
let dec = ['0'-'9']+
let exp = ['e''E']['-''+']? dec
let flo = (dec '.' dec (exp?)) | (dec exp)
let ws = [ ' ' '\t' '\r' ]
let id = ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '0'-'9' '_']*
rule token = parse
ws+ { token lexbuf }
| '\n' { lexbuf.Lexing.lex_curr_p
<- (bump_line lexbuf.Lexing.lex_curr_p);
token lexbuf }
| "//" [^'\n']* { token lexbuf }
| '+' { PLUS }
| '-' { MINUS }
| '*' { STAR }
| '/' { SLASH }
| '%' { PERCENT }
| '=' { EQ }
| '<' { LT }
| "<=" { LE }
| "==" { EQEQ }
| "!=" { NE }
| ">=" { GE }
| '>' { GT }
| '!' { NOT }
| '&' { AND }
| "&&" { ANDAND }
| '|' { OR }
| "||" { OROR }
| "<<" { LSL }
| ">>" { LSR }
| ">>>" { ASR }
| '~' { TILDE }
| '{' { LBRACE }
| '_' (dec as n) { IDX (int_of_string n) }
| '_' { UNDERSCORE }
| '}' { RBRACE }
| "+=" { OPEQ (PLUS) }
| "-=" { OPEQ (MINUS) }
| "*=" { OPEQ (STAR) }
| "/=" { OPEQ (SLASH) }
| "%=" { OPEQ (PERCENT) }
| "&=" { OPEQ (AND) }
| "|=" { OPEQ (OR) }
| "<<=" { OPEQ (LSL) }
| ">>=" { OPEQ (LSR) }
| ">>>=" { OPEQ (ASR) }
| "^=" { OPEQ (CARET) }
| '#' { POUND }
| '@' { AT }
| '^' { CARET }
| '.' { DOT }
| ',' { COMMA }
| ';' { SEMI }
| ':' { COLON }
| "<-" { LARROW }
| "<|" { SEND }
| "->" { RARROW }
| '(' { LPAREN }
| ')' { RPAREN }
| '[' { LBRACKET }
| ']' { RBRACKET }
| id as i
{ try
Hashtbl.find keyword_table i
with
Not_found -> IDENT (i)
}
| bin as n { LIT_INT (Int64.of_string n, n) }
| hex as n { LIT_INT (Int64.of_string n, n) }
| dec as n { LIT_INT (Int64.of_string n, n) }
| flo as n { LIT_FLO n }
| '\'' { char lexbuf }
| '"' { let buf = Buffer.create 32 in
str buf lexbuf }
| eof { EOF }
and str buf = parse
_ as ch
{
match ch with
'"' -> LIT_STR (Buffer.contents buf)
| '\\' -> str_escape buf lexbuf
| _ ->
Buffer.add_char buf ch;
let c = Char.code ch in
if bounds 0 c 0x7f
then str buf lexbuf
else
if ((c land 0b1110_0000) == 0b1100_0000)
then ext_str 1 buf lexbuf
else
if ((c land 0b1111_0000) == 0b1110_0000)
then ext_str 2 buf lexbuf
else
if ((c land 0b1111_1000) == 0b1111_0000)
then ext_str 3 buf lexbuf
else
if ((c land 0b1111_1100) == 0b1111_1000)
then ext_str 4 buf lexbuf
else
if ((c land 0b1111_1110) == 0b1111_1100)
then ext_str 5 buf lexbuf
else fail lexbuf "bad initial utf-8 byte"
}
and str_escape buf = parse
'x' ((hexdig hexdig) as h)
| 'u' ((hexdig hexdig hexdig hexdig) as h)
| 'U'
((hexdig hexdig hexdig hexdig
hexdig hexdig hexdig hexdig) as h)
{
Buffer.add_string buf (char_as_utf8 (int_of_string ("0x" ^ h)));
str buf lexbuf
}
| 'n' { Buffer.add_char buf '\n'; str buf lexbuf }
| 'r' { Buffer.add_char buf '\r'; str buf lexbuf }
| 't' { Buffer.add_char buf '\t'; str buf lexbuf }
| '\\' { Buffer.add_char buf '\\'; str buf lexbuf }
| '"' { Buffer.add_char buf '"'; str buf lexbuf }
| _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) }
and ext_str n buf = parse
_ as ch
{
let c = Char.code ch in
if ((c land 0b1100_0000) == (0b1000_0000))
then
begin
Buffer.add_char buf ch;
if n = 1
then str buf lexbuf
else ext_str (n-1) buf lexbuf
end
else
fail lexbuf "bad trailing utf-8 byte"
}
and char = parse
'\\' { char_escape lexbuf }
| _ as c
{
let c = Char.code c in
if bounds 0 c 0x7f
then end_char c lexbuf
else
if ((c land 0b1110_0000) == 0b1100_0000)
then ext_char 1 (c land 0b0001_1111) lexbuf
else
if ((c land 0b1111_0000) == 0b1110_0000)
then ext_char 2 (c land 0b0000_1111) lexbuf
else
if ((c land 0b1111_1000) == 0b1111_0000)
then ext_char 3 (c land 0b0000_0111) lexbuf
else
if ((c land 0b1111_1100) == 0b1111_1000)
then ext_char 4 (c land 0b0000_0011) lexbuf
else
if ((c land 0b1111_1110) == 0b1111_1100)
then ext_char 5 (c land 0b0000_0001) lexbuf
else fail lexbuf "bad initial utf-8 byte"
}
and char_escape = parse
'x' ((hexdig hexdig) as h)
| 'u' ((hexdig hexdig hexdig hexdig) as h)
| 'U'
((hexdig hexdig hexdig hexdig
hexdig hexdig hexdig hexdig) as h)
{
end_char (int_of_string ("0x" ^ h)) lexbuf
}
| 'n' { end_char (Char.code '\n') lexbuf }
| 'r' { end_char (Char.code '\r') lexbuf }
| 't' { end_char (Char.code '\t') lexbuf }
| '\\' { end_char (Char.code '\\') lexbuf }
| '\'' { end_char (Char.code '\'') lexbuf }
| _ as c { fail lexbuf ("bad escape: \\" ^ (Char.escaped c)) }
and ext_char n accum = parse
_ as c
{
let c = Char.code c in
if ((c land 0b1100_0000) == (0b1000_0000))
then
let accum = (accum lsl 6) lor (c land 0b0011_1111) in
if n = 1
then end_char accum lexbuf
else ext_char (n-1) accum lexbuf
else
fail lexbuf "bad trailing utf-8 byte"
}
and end_char accum = parse
'\'' { LIT_CHAR accum }
and bracequote buf depth = parse
'\\' '{' { Buffer.add_char buf '{';
bracequote buf depth lexbuf }
| '{' { Buffer.add_char buf '{';
bracequote buf (depth+1) lexbuf }
| '\\' '}' { Buffer.add_char buf '}';
bracequote buf depth lexbuf }
| '}' { if depth = 1
then BRACEQUOTE (Buffer.contents buf)
else
begin
Buffer.add_char buf '}';
bracequote buf (depth-1) lexbuf
end }
| '\\' [^'{' '}'] { let s = Lexing.lexeme lexbuf in
Buffer.add_string buf s;
bracequote buf depth lexbuf }
| [^'\\' '{' '}']+ { let s = Lexing.lexeme lexbuf in
Buffer.add_string buf s;
bracequote buf depth lexbuf }