(* Recursive descent parser for use with "direct" semantics *) (* of simple imperative features expressed in SML (SML97) *) (* See "A Practical Introduction to Denotational Semantics" *) (* Cambridge University Press *) (* L.A., Dept Comp Sci, Monash, 1988 and CSSE, Monash .au 2005 *) type Ide = string; datatype Symbol = (* lexical items *) IdentSy of string | NumSy of int | (* x, 7 *) plusSy | minusSy | timesSy | divSy | (* + * - / *) eqSy | neSy | ltSy | leSy | gtSy | geSy | (* = <> < <= > >= *) notSy | andSy | orSy | (* not and or *) becomesSy | openSy | closeSy | semiSy | (* := ( ) ; *) varSy | (* var *) ifSy | thenSy | elseSy | fiSy | (* if then else fi *) whileSy | doSy | odSy | (* while do od *) beginSy | endSy | (* begin end *) writeSy | (* write *) eofSy | symbolErr of string; (* ------------------------------------------------------------------------- *) (* abstract syntax *) datatype Uopr = uminus | knot; (* operators, unary *) datatype Bopr = plus | minus | times | divide | (* operators, binary *) eq | ne | le | lt | ge | gt | conj | disj ; datatype Exp = binexp of Exp * Bopr * Exp | (* expressions *) unexp of Uopr * Exp | varid of Ide | numeral of int | expErr of string and Cmd = assign of Ide * Exp | (* commands *) ifcmd of Exp * Cmd * Cmd | whilecmd of Exp * Cmd | cmdlist of Cmd * Cmd | proccall of Ide | write of Exp | block of Dec * Cmd | cmdErr of string and Dec = vardec of Ide | (* declarations *) declist of Dec*Dec | procdec of Ide*Cmd | decErr of string; (* =======================================LA==csse==monash==.au==14/4/2005== *) (* lexical analysis *) fun member y [] = false | member y (x::xs) = x=y orelse member y xs; fun starts str xs = let fun s [] xs = true | s ys [] = false | s (y::ys) (x::xs) = if y=x then s ys xs else false; in s (explode str) xs end; fun number ip = let fun nu [] n = (NumSy n, []) | nu (ip as (x::xs)) n = if x >= #"0" andalso x <= #"9" then nu xs (n*10+ord(x)-ord(#"0")) else (NumSy n, ip) in nu ip 0 end; fun word ip = (* reserved words and also identifiers *) let fun w r "if" = (ifSy, r) | w r "then" = (thenSy, r) | w r "else" = (elseSy, r) | w r "fi" = (fiSy, r) | w r "while" = (whileSy, r) | w r "do" = (doSy, r) | w r "od" = (odSy, r) | w r "begin" = (beginSy, r) | w r "end" = (endSy, r) | w r "and" = (andSy, r) | w r "or" = (orSy, r) | w r "not" = (notSy, r) | w r "write" = (writeSy, r) | w r "var" = (varSy, r) | w r name = (IdentSy name, r) and id [] name = w [] name | id (ip as (x::xs)) name = if x >= #"a" andalso x <= #"z" then id xs (name ^ str(x)) else w ip name in id ip "" end (* of word *); fun insymbol [] = (eofSy, []) | insymbol (ip as (x::xs)) = if member x [#" ", #"\n", #"\t"] then insymbol xs else if starts ":=" ip then (becomesSy, tl xs) else if starts ";" ip then (semiSy, xs) else if starts "(" ip then (openSy, xs) else if starts ")" ip then (closeSy, xs) else if starts "+" ip then (plusSy, xs) else if starts "-" ip then (minusSy, xs) else if starts "*" ip then (timesSy, xs) else if starts "/" ip then (divSy, xs) else if starts "<>" ip then (neSy, tl xs) else if starts "<=" ip then (leSy, tl xs) else if starts ">=" ip then (geSy, tl xs) else if starts "=" ip then (eqSy, xs) else if starts "<" ip then (ltSy, xs) else if starts ">" ip then (gtSy, xs) else if x >= #"0" andalso x <= #"9" then number ip else if x >= #"a" andalso x <= #"z" then word ip else (symbolErr ("insymbol: bad symbol:" ^ str(x) ^ "..."), []) ; fun lexical [] = [eofSy] | lexical ip = let val (s,ip2) = insymbol ip in s :: (lexical ip2) end; (* =======================================LA==csse==monash==.au==14/4/2005== *) (* parser *) fun expression ip = let fun operand (openSy :: rest) = let val (subexp, (s::rest2)) = expression rest in if s=closeSy then (subexp, rest2) else (expErr "subexpression: missing )", rest) end | operand (IdentSy name :: rest) = (varid name, rest) | operand (NumSy n :: rest) = (numeral n, rest) | operand (minusSy :: rest) = let val (subopd, rest2) = operand rest in (unexp (uminus, subopd), rest2) end | operand ip = (expErr "bad expression", ip) and oprSeq nonTerminal separators operators ip = let val (t, rest) = nonTerminal ip; fun oSeq t1 (s::ss) (opr::oprs) (ip as (x::xs)) = if s = x then (* NB. left associative... *) let val (t2, rest2) = nonTerminal xs in oSeq (binexp(t1,opr,t2)) separators operators rest2 end else (* s not = x *) oSeq t1 ss oprs ip | oSeq t1 _ _ ip = (t1, ip) in oSeq t separators operators rest end (* of oprSeq *) and term ip = oprSeq operand [timesSy, divSy] [times, divide] ip and arithExp ip = oprSeq term [plusSy, minusSy] [plus, minus] ip and relExp ip = oprSeq arithExp [eqSy,neSy,ltSy,leSy,gtSy,geSy] [eq, ne, lt, le, gt, ge ] ip in oprSeq relExp [andSy, orSy] [conj, disj] ip end (* of expression *) (* ------------------------------------------------------------------------- *) and declarations ip = let fun declaration (varSy :: IdentSy name :: rest) = (vardec name, rest) | declaration r = (decErr "bad declaration", r) in case declaration ip of (decErr m, r) => (decErr m, r) | (d1, semiSy::rest) => (* e.g. var x; ... *) (case declarations rest of (decErr m, r) => (d1, rest) (* e.g. var x; y:= ... *) | (d2, rest) => (declist(d1,d2), rest) ) | (d1, r) => (decErr "decs, no ;", r) end (* of declarations *) (* ------------------------------------------------------------------------- *) and command ip = (* the various kinds of command *) let fun ifCommand ip = (* if ... then ... else ... fi *) case expression ip of (expErr m, r) => (cmdErr ("bad if-expression, " ^ m), r) | (e, thenSy :: r2) => (* then ... *) (case commands r2 of (cmdErr m, r) => (cmdErr ("bad then-command, " ^ m), r) | (cTrue, elseSy :: r3) => (* else ... *) (case commands r3 of (cmdErr m, r) => (cmdErr ("bad else-command, " ^ m), r) | (cFalse, fiSy :: r4) => (ifcmd(e,cTrue,cFalse), r4) | (* ... fi *) (_, r) => (cmdErr "no fi in if command", r) ) | (_, r) => (cmdErr "no else in if command", r) ) | (_, r) => (cmdErr "no then in if command", r) and whileCommand ip = (* while... *) case expression ip of (expErr m, r) => (cmdErr ("bad while expression, " ^ m), r) | (e, doSy :: r2) => (* do... *) (case commands r2 of (cmdErr m, r) => (cmdErr ("do... " ^ m), r) | (body, odSy :: r) => (whilecmd(e, body), r) | (* ...od *) (_, r) => (cmdErr "no od in while", r) ) | (_, r) => (cmdErr "no do in while", r) and blockCommand ip = (* begin ... end *) case declarations ip of (* optional declarations *) (decErr m, r) => (case commands ip of (c, endSy :: r2) => (c, r2) | (* cmds end *) (_, r) => (cmdErr "no end to block", r) ) | (d, r) => (* var x; ... *) (case commands r of (c, endSy :: r2) => (block(d, c), r2) | (* .... cmds end *) (_, r) => (cmdErr "no end to block", r) ) and writeCommand ip = case expression ip of (expErr m, r) => (cmdErr ("in write, " ^ m), r) | (e, r) => (write e, r) and assignment ip = (* x := exp *) case expression ip of (expErr m, r) => (cmdErr ("bad command, " ^ m), r) | (eLeft as (varid v), becomesSy :: r2) => (case expression r2 of (expErr m, r) => (cmdErr ("bad RHS in assignment, " ^ m), r) | (eRight, r3) => (assign(v, eRight), r3) ) | (_, r) => (cmdErr "bad ident:= in assignment", r) and commands ip = (* cmd; cmd; ... *) case command ip of (cmdErr m, r) => (cmdErr m, r) | (c1, semiSy :: r2) => (case commands r2 of (cmdErr m, r) => (cmdErr m, r) | (c2, r3) => (cmdlist(c1, c2), r3) ) | (c1, r) => (c1, r) in case ip of (ifSy :: r) => ifCommand r | (whileSy :: r) => whileCommand r | (beginSy :: r) => blockCommand r | (writeSy :: r) => writeCommand r | _ => assignment ip end (* of command *) (* ------------------------------------------------------------------------- *) and parse ip = case command ip of (cmdErr m, r) => cmdErr m | (c, eofSy::_) => c | (c, r) => cmdErr "junk on end of input" ; (* ========================================================================= *) (* some simple syntax tests... *) (* e.g. expression(lexical(explode "(1+2)*3-4/5+x")); parse(lexical(explode "x := 1 + x")); parse(lexical(explode "if x then x:=1 else x:=2 fi")); parse(lexical(explode "while x > 1 do x:=x-1 od")); parse(lexical(explode "begin var x; var y; x:=y; y:=x end")); parse(lexical(explode "write 7")); *)