(* Simple example of a recursive descent parser for expressions *) (* Available under Gnu General Public Licence, GPL. *) (* L.A., Dept Comp Sci, Monash, 1988 and CSSE, Monash .au 2005 *) (* http://www.csse.monash.edu.au/~lloyd/tildeFP/SML/1997/ *) (* Modified for WFF by Cameron McCormack, 25 August 2006 *) datatype Symbol = (* lexical items *) IdentSy of string | (* x *) notSy | andSy | orSy | (* not and or *) impliesSy | (* implies *) openSy | closeSy | (* ( ) *) eofSy | symbolErr of string; (* ------------------------------------------------------------------------- *) (* Abstract Syntax: WFF ::= string | WFF Bopr WFF | Uopr WFF *) datatype Uopr = knot; (* operators, unary *) datatype Bopr = conj | disj | imp; (* operators, binary *) datatype WFF = binexp of WFF * Bopr * WFF | (* expressions *) unexp of Uopr * WFF | varid of string | expErr of string; (* =======================================LA==csse==monash==.au==14/4/2005== *) (* lexical analysis *) fun member y [] = false | member y (x::xs) = x=y orelse member y xs; fun starts str xs = let fun s [] xs = true | s ys [] = false | s (y::ys) (x::xs) = if y=x then s ys xs else false; in s (explode str) xs end; fun word ip = (* reserved words and also identifiers *) let fun w r "and" = (andSy, r) | w r "or" = (orSy, r) | w r "not" = (notSy, r) | w r name = (IdentSy name, r) and id [] name = w [] name | id (ip as (x::xs)) name = if x >= #"a" andalso x <= #"z" then id xs (name ^ str(x)) else w ip name in id ip "" end (* of word *); fun insymbol [] = (eofSy, []) (* symbols *) | insymbol (ip as (x::xs)) = if member x [#" ", #"\n", #"\t"] then insymbol xs else if starts "(" ip then (openSy, xs) else if starts ")" ip then (closeSy, xs) else if starts "=>" ip then (impliesSy, tl xs) else if x >= #"a" andalso x <= #"z" then word ip else (symbolErr ("insymbol: bad symbol:" ^ str(x) ^ "..."), []) ; fun lexical [] = [eofSy] (* lexical : char list -> Symbol list *) | lexical ip = let val (s,ip2) = insymbol ip in s :: (lexical ip2) end; (* =======================================LA==csse==monash==.au==14/4/2005== *) (* Concrete Syntax: WFF ::= WFF => disjunction disjunction ::= disjunction or conjunction conjunction ::= conjunction and operand operand ::= string | not operand | "(" WFF ")" *) exception ParseError of string * Symbol list; (* (message, rest of tokens) *) fun expression ip = let fun operand (openSy :: rest) = let val (subexp, (s::rest2)) = expression rest in if s=closeSy then (subexp, rest2) else (expErr "subexpression: missing )", rest) end | operand (IdentSy name :: rest) = (varid name, rest) | operand (notSy :: rest) = unExp knot rest | operand ip = (expErr "bad expression", ip) and unExp opr ip = let val (subopd, rest) = operand ip in (unexp (opr, subopd), rest) end and oprSeq nonTerminal separators operators ip = (* parse one or more nonTerminals separated by *) (* separators which stand for operators. *) let val (t, rest) = nonTerminal ip; (* the first *) fun oSeq t1 (s::ss) (opr::oprs) (ip as (x::xs)) = if s = x then (* NB. left associative... *) let val (t2, rest2) = nonTerminal xs in oSeq (binexp(t1,opr,t2)) separators operators rest2 end else (* s not = x *) oSeq t1 ss oprs ip | oSeq t1 _ _ ip = (t1, ip) in oSeq t separators operators rest end (* of oprSeq *) and conjunction ip = oprSeq operand [andSy] [conj] ip and disjunction ip = oprSeq conjunction [orSy] [disj] ip in oprSeq disjunction [impliesSy] [imp] ip end (* of expression *) (* ------------------------------------------------------------------------- *) (* parser driver *) and parseExpression ip = case expression ip of (expErr m, r) => expErr m | (e, eofSy::_) => e | (e, r) => expErr "junk on end of input" ; (* ========================================================================= *)