SQ-HAL Parser Grammar

my $grammar = q{
############################################################################

translate   :    learned | count | sum | average | select | { unknown_msg() }

############################################################################

select  :    (ask|question) qualifier(?) /(our)?/ table eol
                 { Select_T1_F0_C0($item[4]) }

        |    (ask|question) qualifier(?) field /of/ qualifier(?) table eol check[$item[3],$item[6]]
                 { Select_T1_F1_C0($item[3],$item[6]) }

        |    (ask|question) qualifier(?) /(our)?/ table /'?/ qualifier(?) field eol check[$item[7],$item[4]]
                 { Select_T1_F1_C0($item[7],$item[4]) }

        |    (ask|question) qualifier(?) field /and/ field /of/ qualifier(?) table eol check[$item[3],$item[8]] check[$item[5],$item[8]]
                 { Select_T1_F2_C0($item[3],$item[5],$item[8]) }

        |    (ask|question) qualifier(?) /(our)?/ table /'?/ qualifier(?) field /and/ field eol check[$item[7],$item[4]] check[$item[9],$item[4]]
                 { Select_T1_F2_C0($item[7],$item[9],$item[4]) }

        |    (ask|question) qualifier(?) /(our)?/table ignore_words condition eol
                 { Select_T1_F0_C1($item[4], $item[6]) }

        |    (ask|question) qualifier(?) field /of/ qualifier(?) table ignore_words condition eol check[$item[3],$item[6]]
                 { Select_T1_F1_C1($item[3],$item[6],$item[8]) }

        |    (ask|question) qualifier(?) /(our)?/ table /'?/ qualifier(?) field ignore_words condition eol check[$item[7],$item[4]]
                 { Select_T1_F1_C1($item[7],$item[4],$item[9]) }

        |    (ask|question) qualifier(?) field /and/ field /of/ qualifier(?) table ignore_words condition  eol check[$item[3],$item[8]] check[$item[5],$item[8]]
                 { Select_T1_F2_C1($item[3],$item[5],$item[8],$item[10]) }

        |    (ask|question) qualifier(?) /(our)?/ table /'?/ qualifier(?) field /and/ field ignore_words condition  eol check[$item[7],$item[4]] check[$item[9],$item[4]]
                 { Select_T1_F2_C1($item[7],$item[9],$item[4],$item[11]) }

        |    (ask|question) qualifier(?) /(our)?/ table /and/ table eol related[$item[4],$item[6]]
                 { Select_T2_F0_C0($item[4], $item[6]) }

        |    (ask|question) ignore_words table ignore_words table ignore_words eol related[$item[3],$item[5]]
                 { Select_T2_F0_C0($item[3], $item[5]) }

############################################################################
learned : {}

############################################################################

count   :    ask_count table /((is|are) there)|((do)? we have)/ eol
                 { Count_T1_F0_C0( $item[2] ) }

        |    ask qualifier(?) table /count/ eol
                 { Count_T1_F0_C0( $item[3] ) }

        |    ask_count table prep /(the)?/ field (prep)(?) value /((is|are) there)|((do )?we have)/ eol
                 { Count_T1_F0_C1( $item[2], "WHERE $item[5] = " . format_val($item[7])) }

        |    ask_count table ignore_words condition ignore_words eol
                 { Count_T1_F0_C1( $item[2], $item[4]) }

############################################################################

sum     :    (ask|whats) qualifier(?) total field ignore_words table ignore_words eol check[$item[4],$item[6]]
                 { Sum_T1_F1_C0( $item[4], $item[6] ) }

        |    (ask|whats) qualifier(?) total field ignore_words table ignore_words condition ignore_words eol check[$item[4],$item[6]]
                 { Sum_T1_F1_C1( $item[4], $item[6], $item[8] ) }

############################################################################

average :    (ask|whats) qualifier(?) /average/ field ignore_words table ignore_words eol check[$item[4],$item[6]]
                 { Average_T1_F1_C0($ item[4], $item[6] ) }

        |    (ask|whats) qualifier(?) /average/ field ignore_words table ignore_words condition ignore_words eol check[$item[4],$item[6]]
                 { Average_T1_F1_C1( $item[4], $item[6], $item[8] ) }

############################################################################

field   :    FIELDS     ### column names ###

table   :    TABLES     ### table names  ###

############################################################################

qualifier   :    /the|every|all( the)?|any|our/

ask         :    reply to_me

ask_count   :    ask(?) /how (many|much)/|/what number of|count the number of/

total       :    /total|sum of( all)( the)/

pre_val     :    /(by|of|for|to|from|with|is|are)*/

reply       :    /tell|show|list|display/

value       :    date|number|word

number      :    /(\$?)(-?)\d+(\.?)\d*/

word        :    /\w+|\"[\S\s]*\"/               { qq{ $item[1] } }

to_me       :    /((to )?(me|us))?/

question    :    whats|whos

whats       :    /what's|what're|what (is|are)?/

whos        :    /who's|who're|who (is|are)|who/

prep        :    /for|of|with|by/

table_verb  :    table|field

ignore_words :   junk(s?)

junk        :    ...!table_verb /\S+/

eol         :    /\s*/ /[.?]?/ /\s*/ /\z/

############################################################################
### date strings ###

date        :    /today|tomorrow|yesterday/              { parse_date($item[1]) }
            |    day                                     { parse_date($item[1]) }
            |    /(last|previous|next) /(date_period)    { parse_date($item[1].' '.$item[2]) }
            |    (date_format)

date_period :    /week|month/|month|day|/(financial )?year/

month       :    long_month|short_month

long_month  :    /january|february|march|april|may|june|july|august|september|october|november|december/

short_month :    /jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec/

day         :    long_day|short_day

long_day    :    /sunday|monday|tuesday|wednesday|thursday|friday|saturday/

short_day   :    /sun|mon|tue|wed|thu|fri|sat/

date_format :    DD date_sep (MM|month) date_sep (YYYY|YY)   { parse_date($item[3].'/'.$item[1].'/'.$item[5]) }
            |    (MM|month) date_sep DD date_sep (YYYY|YY)   { parse_date($item[1].'/'.$item[3].'/'.$item[5]) }
            |    month DD /,?/(YYYY|YY)                      { parse_date($item[1].'/'.$item[2].'/'.$item[4]) }
            |    (YYYY|YY) date_sep (MM|month) date_sep DD   { parse_date($item[3].'/'.$item[5].'/'.$item[1]) }
            |    DD date_sep (MM|month)                      { parse_date($item[3].'/'.$item[1]) }
            |    (MM|month) date_sep DD                      { parse_date($item[1].'/'.$item[3]) }
            |    month DD                                    { parse_date($item[1].'/'.$item[2]) }

date_sep    :    /-|\//

DD          :    /\d{1,2}/  # 31 || $item[1] < 1}>

MM          :    /\d{1,2}/  # 12 || $item[1] < 1}>

YYYY        :    /\d{4}/

YY          :    /\d{2}/

############################################################################
### Conditions ###
condition   :    greater_than|less_than|between|like|equal

equal       :    field pre_equal value
                     { "WHERE $item[1] = " . format_val($item[3]) }

pre_equal   :    /by|of|for|to|from|with|is|are|equal( to)/

greater_than:    field pre_greater value
                     { "WHERE $item[1] > " . format_val($item[3]) }

pre_greater :    /(is |are )?/ /more|greater|higher|expensive|taller|bigger/ /than/

less_than   :    field pre_less value
                     { "WHERE $item[1] < " . format_val($item[3]) }

pre_less    :    /(is |are )?/ /less|lower|cheaper|shorter|smaller/ /than/

between     :    field pre_between value /and/ value
                     { "WHERE $item[1] >= " . format_val($item[3]) . "\n\tAND $item[1] <= " . format_val($item[5]) }

pre_between :    /(is |are )?/ /between/

like        :    field /starts?/ /with/ value
                     { $item[4] =~ s/ //g;"WHERE $item[1] LIKE \"$item[4]%\"" }

            |    field /ends?|ended/ /in/ value
                     { $item[4] =~ s/ //g;"WHERE $item[1] LIKE \"%$item[4]\"" }

            |    field /contains?/ value
                     { $item[3] =~ s/ //g; "WHERE $item[1] LIKE \"%$item[3]%\"" }

############################################################################
### check if the given field ($arg[0]) is found in the given table ($arg[1]) ###
check       :    

### check if there is any relationship between two given tables ($arg[0] and $arg[1] ###
related     :    

};   ### end of pre-defined grammar ###

### messages to display if SQ-HAL do not understand grammar ###
my @messages = (
    "Could you rephrase that?",
    "What do you mean by that?",
    "What are your trying to say?",
    "I don't understand what you saying?",
);

### month numbers into month string ###
my %month  =
(
    "01"    =>    "Jan",
    "02"    =>    "Feb",
    "03"    =>    "Mar",
    "04"    =>    "Apr",
    "05"    =>    "May",
    "06"    =>    "Jun",
    "07"    =>    "Jul",
    "08"    =>    "Aug",
    "09"    =>    "Sep",
    "10"    =>    "Oct",
    "11"    =>    "Nov",
    "12"    =>    "Dec"
);


############################################################################
# return random message to be outputed for unmactched grammer
sub unknown_msg
############################################################################
{
    ### return a random message from the messages list ###
    return "${messages[int(rand (1+$#messages))]}\n";

}   ## unknown_msg


############################################################################
# check whether the given field name exist in the table
# arguments arg0 = field, arg1 = table
sub check_field
############################################################################
{
    ### return 1 if the field belongs to the specified table ###
    ### otherwise retrun undef                               ###
    return $main::table_columns {$_[1]} {$_[0]};

}   ## check_field


############################################################################
# convert the string into the date format
# return the date in the "#DD-MMM-YYYY#" format
sub parse_date
############################################################################
{
    use Date::Manip;

    eval
    {
        ### convert date string to a proper date ###
        my $date = &ParseDate($_[0]) || return;

        ### return the date as US format date ###
        ### i.e. in the form of DD-MMM-YYYY   ###
        return substr($date,6,2)."-".$month{substr($date,4,2)}."-".substr($date,0,4);
    };
}   ## parse_date


############################################################################
# check the relationship between two tables and if there is a relationship
# then return the corresponding relationship
# arguments - arg0 - table1, arg1 - table 2
sub check_relationship
############################################################################
{
    ### return the relationship between the two tables ###
    return ($main::table_relationships{$_[0]}{$_[1]} || $main::table_relationships{$_[1]}{$_[0]});

}   ## check_relationship

############################################################################
# check for the type of the data and put appropriate quotes or
# hash (#) around it
sub format_val
############################################################################
{
    my $val = $_[0];

    ### remove fornt and end spaces from the input ###
    $val =~ s/^ //;
    $val =~ s/ $//;

    ### remove any quotation marks ###
    $val =~ s/^"//;
    $val =~ s/"$//;

    ### remove the dollar size - these will be interpreted as numbers ###
    $val =~ s/^\$//;

    ### check if the input is a value ###
    if($val =~ m/^(-?)\d+\.*\d*$/)   ### value is a number ###
    {
        return $val;
    }
    elsif ($val =~ m/^\d{1,2}-[A-Z|a-z]{3}-\d{4}$/)    ### date string ###
    {
        ### put hashes around the value ###
        return "#${val}#";
    }
    else    ### string value ###
    {
        ### put quotes around the value ###
        return "\"${val}\"";
    }
}


############################################################################
### various subroutines to define SQL statments ###
###=========================================================================
# SELECT QUERIES

sub Select_T1_F0_C0 { return "SELECT *\nFROM ${_[0]}\n"; }

sub Select_T1_F1_C0 { return "SELECT DISTINCT ${_[0]}\nFROM ${_[1]}\n"; }

sub Select_T1_F2_C0 { return "SELECT DISTINCT ${_[0]}, ${_[1]}\nFROM ${_[2]}\n"; }

sub Select_T1_F0_C1 { return "SELECT *\nFROM ${_[0]}\n${_[1]}\n"; }

sub Select_T1_F1_C1 { return "SELECT DISTINCT ${_[0]}\nFROM ${_[1]}\n${_[2]}\n"; }

sub Select_T1_F2_C1 { return "SELECT DISTINCT ${_[0]}, ${_[1]}\nFROM ${_[2]}\n${_[3]}\n"; }

sub Select_T2_F0_C0 {
    my $relationship = check_relationship($_[0], $_[1]);
    return "SELECT ${_[0]}.*, ${_[1]}.*\nFROM ${_[0]}, ${_[1]} WHERE $relationship\n";
}

sub Select_T2_F1_C0 {
    my $relationship = check_relationship($_[2], $_[3]);
    return "SELECT DISTINCT $_[0].$_[1]\nFROM $_[2], $_[3]\nWHERE $relationship\n";
}

sub Select_T2_F2_C0 {
    my $relationship = check_relationship($_[4], $_[5]);
    return "SELECT DISTINCT $_[0].$_[1], $_[2].$_[3]\nFROM $_[4], $_[5]\nWHERE $relationship\n";
}

sub Select_T2_F0_C1 {
    my $relationship = check_relationship($_[0], $_[1]);
    return "SELECT ${_[0]}.*, ${_[1]}.*\nFROM ${_[0]}, ${_[1]} WHERE $relationship AND $_[2].$_[3]=$_[4]\n";
}

sub Select_T2_F1_C1 {
    my $relationship = check_relationship($_[2], $_[3]);
    return "SELECT DISTINCT $_[0].$_[1]\nFROM $_[2], $_[3]\nWHERE $relationship AND $_[4].$_[5]=$_[6]\n";
}

sub Select_T2_F2_C1 {
    my $relationship = check_relationship($_[4], $_[5]);
    return "SELECT DISTINCT $_[0].$_[1], $_[2].$_[3]\nFROM $_[4], $_[5]\nWHERE $relationship AND $_[6].$_[7]=$_[8]\n";
}

###=========================================================================
# COUNT QUERIES
sub Count_T1_F0_C0 { return "SELECT COUNT(*) AS number_of_$_[0]\nFROM $_[0]\n"; }

sub Count_T1_F1_C0 { return "SELECT DISTINCT COUNT($_[0]) AS number_of_$_[0]\nFROM $_[1]\n"; }

sub Count_T1_F0_C1 { return "SELECT COUNT(*) AS number_of_$_[0]\nFROM $_[0]\n$_[1]\n"; }

sub Count_T1_F1_C1 { return "SELECT COUNT($_[0]) AS number_of_$_[0]\nFROM $_[1]\n$_[2]\n"; }

###=========================================================================
# SUM QUERIES
sub Sum_T1_F1_C0 { return "SELECT SUM($_[0]) AS total_$_[0]\nFROM $_[1]\n"; }

sub Sum_T1_F1_C1 { return "SELECT SUM($_[0]) AS total_$_[0]\nFROM $_[1]\n$_[2]\n"; }

###=========================================================================
# AVERAGE QUERIES
sub Average_T1_F1_C0 { return "SELECT AVG($_[0]) AS average_$_[0]\nFROM $_[1]\n"; }

sub Average_T1_F1_C1 { return "SELECT AVG($_[0]) AS average_$_[0]\nFROM $_[1]\n$_[2]\n";}