module StringUtil where import Data.Char {- - doTokenise: - - Performs the operation of the tokenise function, using a list which increases in size as it - reads in the current token (cTok) and a sliding window buffer which checks to see whether or - not the last (separator size) characters match the separator, and if they do the end of the - current token has been reached. Called by tokenise. -} doTokenise sep [] cTok cBuffer = if ((length cTok) > 0 && cTok /= sep) then if (cBuffer /= sep) then [cTok++cBuffer] else [cTok] else if (cBuffer /= sep) then [cBuffer] else [""] doTokenise sep (x:xs) cTok cBuffer = if (cBuffer == sep) then cTok:(doTokenise sep (x:xs) "" []) else if ((length cBuffer) == (length sep)) then doTokenise sep xs (cTok++[head cBuffer]) ((tail cBuffer)++[x]) else doTokenise sep xs "" (cBuffer++[x]) {- - tokenise: - - Returns a list of the tokens in oString separated by sep. For example, with the arguments: - - tokenise "--" "One--Two--Three--Four" - - the return value would be - - ["One", "Two", "Three", "Four"] -} tokenise sep oString = doTokenise sep oString [] [] {- - doGetFieldType: - - Performs the field extraction, called by getFieldType. -} doGetFieldType [] current = current doGetFieldType (x:xs) current = if (not (isDigit x)) then if (x /= '.') then "String" else doGetFieldType xs "Float" else if (current /= "Float") then doGetFieldType xs "Int" else doGetFieldType xs "Float" {- - getFieldType: - - "Brute force" extraction of basic field type. Starts off doGetFieldType with the type "Int" - as it is the type with the fewest possible number of valid symbols. -} getFieldType x = doGetFieldType x "Int" {- - nonAlphaNumericSub: - - Takes a character and if it is an invalid non-alphanumeric character (anything but a letter, - a digit or an underscore) replaces it with a string representation. -} nonAlphaNumericSub x = case x of '`' -> "Grave" '~' -> "Tilde" '!' -> "Exclamation" '@' -> "At" '#' -> "Hash" '$' -> "Dollar" '%' -> "Percentage" '^' -> "Carat" '&' -> "Ampersand" '*' -> "Asterisk" '(' -> "OpenBracket" ')' -> "CloseBracket" '-' -> "Minus" '+' -> "Plus" '=' -> "Equals" '{' -> "OpenBrace" '}' -> "CloseBrace" '[' -> "OpenSquare" ']' -> "CloseSquare" '|' -> "Pipe" '\\' -> "Backslash" ':' -> "Colon" ';' -> "Semicolon" '"' -> "Quotation" '\'' -> "Apostrophe" ',' -> "Comma" '<' -> "Lessthan" '.' -> "Fullstop" '>' -> "Greaterthan" '?' -> "Question" '/' -> "Slash" _ -> [x]