(* simple lexer, executes a DFA, here only recognition of numbers here we use only a...c, A...C for identifier *) exception LEXER_ERROR of string; datatype statename = ERROR | START | ID | VAR | INT | REAL | OP; type inputtype = char; val whitespaces = [#" ", #"\t", #"\n"]; type state = statename * inputtype * statename; type statetable = state list; val automaton = [ ( START, #" ", START), ( START, #"x", ERROR), ( START, #"+", OP), ( START, #"-", OP), ( START, #"*", OP), ( START, #"/", OP), ( START, #"A", VAR ), ( START, #"B", VAR ), ( START, #"C", VAR ), ( VAR, #"A", VAR ), ( VAR, #"B", VAR ), ( VAR, #"C", VAR ), ( START, #"a", ID ), ( START, #"b", ID ), ( START, #"c", ID ), ( ID, #"a", ID ), ( ID, #"b", ID ), ( ID, #"c", ID ), ( START, #"1", INT ), ( START, #"2", INT ), ( START, #"3", INT ), ( START, #"4", INT ), ( START, #"5", INT ), ( START, #"6", INT ), ( START, #"7", INT ), ( START, #"8", INT ), ( START, #"9", INT ), ( START, #"0", INT ), ( INT, #"1", INT ), ( INT, #"2", INT ), ( INT, #"3", INT ), ( INT, #"4", INT ), ( INT, #"5", INT ), ( INT, #"6", INT ), ( INT, #"7", INT ), ( INT, #"8", INT ), ( INT, #"9", INT ), ( INT, #"0", INT ), ( INT, #".", REAL ), ( REAL, #"1", REAL ), ( REAL, #"2", REAL ), ( REAL, #"3", REAL ), ( REAL, #"4", REAL ), ( REAL, #"5", REAL ), ( REAL, #"6", REAL ), ( REAL, #"7", REAL ), ( REAL, #"8", REAL ), ( REAL, #"9", REAL ), ( REAL, #"0", REAL ) ]; val endstates = [VAR, ID, INT, REAL, OP]; fun findTransition (state, symbol) [] = (false, ERROR) | findTransition (x as (thisState, thisSymbol)) ((state, symbol, follow)::Rest) = if thisState=state andalso thisSymbol=symbol then (true, follow) else findTransition x Rest; fun member _ [] = false | member x (y::ys) = (x=y) orelse member x ys; fun reverse1 [] ys = ys | reverse1 (x::xs) ys = reverse1 xs (x::ys); fun reverse xs = reverse1 xs []; fun skip_ws [] = [] | skip_ws (all as c::cs) = if (member c whitespaces) then (skip_ws cs) else all; fun checkEndstate stateName chars c = let val token = implode(reverse chars) in if (member stateName endstates) then (stateName, token) else raise LEXER_ERROR (str(c)^token) end; fun lex1 Cs automaton = lex2 START nil (skip_ws Cs) automaton and lex2 currentState soFar (C::Cs) automaton = (let val {1=found, 2=followState} = findTransition (currentState, C) automaton in if found then lex2 followState (C::soFar) Cs automaton else (checkEndstate currentState soFar C) :: (lex1 (C::Cs) automaton) end handle LEXER_ERROR t => (print("Illegal token: "^t^"\n"); lex1 Cs automaton) ) | lex2 currentState soFar [] automaton = [(checkEndstate currentState soFar (chr 0))] handle LEXER_ERROR t => (print("Unexpected end of input\n");[]); fun lex S = lex1 (explode S) automaton;