/*
 * Java 1.0.2 Grammar for ANTLR parser generator.
 *
 * Developed by MageLang Institute (www.MageLang.com)
 * Authors:
 *  Terence Parr (parrt@magelang.com)
 *  John Mitchell of Non, Inc. (john@non.net)
 *  Jim Coker (jcoker@magelang.com)
 *
 * The grammar looks best at tabs = 4.
 *
 * SOFTWARE RIGHTS
 *
 * This file is a Java language grammar and is free software.  We do not
 * restrict its use or distribution, but you may NOT claim ownership or
 * authorship of this grammar or support code.  An individual or company
 * may otherwise do whatever they wish with the grammar distributed
 * herewith including the incorporation of the grammar or the output
 * generated by ANTLR into commerical software.  You may redistribute in
 * source or binary form without payment of royalties to us as long as
 * this header remains in all source distributions.
 *
 * We encourage users to develop parsers/tools using this grammar.
 * In return, we ask that credit is given to us for developing this
 * grammar.  By "credit", we mean that if you incorporate our grammar or
 * the generated code into one of your programs (commercial product,
 * research project, or otherwise) that you acknowledge this fact in the
 * documentation, research report, etc....  In addition, you should say nice
 * things about us at every opportunity.
 *
 * As long as these guidelines are kept, we expect to continue enhancing
 * this grammar.  Feel free to send us enhancements, fixes, bug reports,
 * suggestions, or general words of encouragement at parrt@magelang.com.
 *
 * DISCLAIMER: We make no guarantees that this grammar works, makes sense,
 *             or can be used to do anything useful.
 *
 * HISTORY:
 *
 * 1.00
 *  Initial release
 *
 * 1.10
 *  Modified grammar to use rule names that are closer to Arthur
 *  Van Hoff's rule names in his JDK compiler.
 *  Fixed the grammar so that it accepts all files in hotjava.src
 *  except for a few that look like errors in java source.
 *  
 *  1.20
 *  Various modifications to match grammar in _The Java Language Specification_
 *  Note that the language spec using "declaration" instead of the term
 *  "definition".  In the old days, a declaration ala C++ was "class A;"
 *  and the definition was "class A {...};".  We use definition in this
 *  grammar.
 *
 * KNOWN PROBLEMS:
 *
 *  It doesn't handle some escape sequences.
 *
 *  It doesn't handle inner classes in the 1.1 language spec.
 */

#header <<
#include "AToken.h"
#include "ATokPtr.h"
typedef ANTLRCommonToken ANTLRToken;
>>

<<
#include "PBlackBox.h"
#include "DLGLexer.h"

int main(int argc, char *argv[])
{
    FILE *in=stdin, *out=stdout;
    int trace = 0;
    if ( argc>1 ) {
        int i = 1;
        if ( strcmp("-trace", argv[i])==0 ) {
            trace = 1;
            i++;
        }
        if ( strcmp("-in", argv[i])==0 ) {
            in = fopen(argv[i+1], "r");
            if ( in == NULL ) {
                fprintf(stderr, "cannot open java file %s\n", argv[i+1]);
                exit(-1);
            }
            i += 2;
        }
        if ( strcmp("-out", argv[i])==0 ) {
            out = fopen(argv[i+1], "w");
            if ( out == NULL ) {
                fprintf(stderr, "cannot open tag file %s\n", argv[i+1]);
                exit(-1);
            }
        }
    }
    ParserBlackBox<DLGLexer, JavaParser, ANTLRToken> p(in);
    if ( trace ) {
        p.parser()->traceOn();
    }
    p.parser()->compilationUnit(out);
    return 0;
}
>>

#lexclass COMMENTS

#token "\*/"            << mode(START); skip(); >>
#token "\n"             << skip(); newline(); >>
#token "\*"             << skip(); >>
#token "~[\*\n]+"       << skip(); >>

#lexclass STRINGS

#token STRINGVAL "\"" << mode (START); >>
#token "\\n"            << replchar('\n'); more(); >>
#token "\\r"            << replchar('\r'); more(); >>
#token "\\t"            << replchar('\t'); more(); >>
#token "\\\n"           << replstr(""); more(); >>
#token "\\\\"           << replchar('\\'); more(); >>
#token "\\\""           << replchar('"'); more(); >>
#token "~[\"\\]+"       << more(); >>

#lexclass START

#token "/\*"            << mode(COMMENTS); skip(); >>
#token "[\t\ ]+"        << skip(); >>
#token "\n"             << newline(); skip(); >>
#token "// ~[\n]* \n"   << newline(); skip(); >>
#token "\""             << mode(STRINGS); more(); >>
#token CHARVAL "'(~[\\]|\\~[]|\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])'"

class JavaParser {
<<
/* Parser Members */
protected:
    int traceIndentLevel;
    int doTracing;
    void tracein(char *r);
    void traceout(char *r);
public:
    void init()
        {
            ANTLRParser::init();
            traceIndentLevel = 0;
            doTracing = 0;
        }
    void traceOn() { doTracing=1; }

    char *currentClassOrInterface;
    ANTLRTokenPtr currentMethod;
    int currentBlockNumber;
    int numBlocks;

    FILE *out;

    void
    syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
        ANTLRTokenType etok, int k) {
        // print nothing out as we don't have anyplace to display it.!!!!
    }
>>

                /* F I L E S  /  P A C K A G E S */

compilationUnit[FILE *output]   /* pass in the output stream */
    :   <<out = output;>>
        { package }
        ( import )*
        ( typeDefinition )*
        "@" // Eof
    ;

package
    :   "package" qualifiedName ";"
    ;

import
    :   "import" qualifiedNameStar ";"
    ;

typeDefinition
    :   (modifier)*
        (   classDefinition
        |   interfaceDefinition
        )
    |   ";"
    ;


                /* T Y P E S / D E C L S */

referenceType
    :   qualifiedName ("\[" "\]")*
    ;

typeSpec
    :   type ("\[" "\]")*
    ;

type:   qualifiedName
    |   builtInType
    ;

builtInType
    :   "void"
    |   "boolean"
    |   "byte"
    |   "char"
    |   "short"
    |   "int"
    |   "float"
    |   "long"
    |   "double"
    ;

qualifiedName
    :   IDENT ("." IDENT)*
    ;

qualifiedNameStar
    :   qualifiedName { "." "\*" }
    ;

modifier
    :   "private"
    |   "public"
    |   "protected"
    |   "static"
    |   "transient"
    |   "final"
    |   "abstract"
    |   "native"
    |   "threadsafe"
    |   "synchronized"
    |   "const"
    ;


                /* C L A S S E S */

classDefinition
    :   "class" id:IDENT extends implements
        <<
        currentClassOrInterface = id->getText();
        fprintf(out,"class %s %d\n", id->getText(), id->getLine());
        >>
        classBlock
    ;

interfaceDefinition
    :   "interface" id:IDENT interfaceExtends implements
        <<
        currentClassOrInterface = id->getText();
        fprintf(out,"interface %s %d\n", id->getText(), id->getLine());
        >>
        classBlock
    ;

classBlock
    :   "\{"
            ( field )*
        "\}"
    ;

extends
    :   "extends" qualifiedName
    |
    ;

interfaceExtends
    :   "extends" qualifiedName ("," qualifiedName)*
    |
    ;

implements
    :   "implements" qualifiedName ( "," qualifiedName )*
    |
    ;

/** in the following rule, two syntactic predicates (the expressions in 
 * parens followed by '?') are used to resolve lookahead issues between
 * constructors and method defs and between method defs and variable defs. 
 * Without the predicates, the rule would be:
 * 
 * field
 *  :   constructorDefinition
 *  |   methodDefinition
 *  |   variableDefinitions
 *  |   "static" compoundStatement
 *  |   ";"
 *  ;
 * 
 *  We could left-factor out the modifiers and typespec, but it does
 *  not cost us much to backtrack over these few tokens and the grammar
 *  is more readable with the predicate.
 */
field
    :   ( (modifier)* methodHead "\{" )?
        constructorDefinition

    |   ( (modifier)* typeSpec methodHead ( "\{" | ";" ) )?
        methodDefinition

    |   variableDefinitions
    |   "static" compoundStatement  // "static { ... }" initializer
    |   ";"
    ;

localVariableDefinitions
    :   typeSpec variableDeclarator ( "," variableDeclarator )*
    ;

variableDefinitions
    :   (modifier)* typeSpec variableDeclarator
        (   "," variableDeclarator
        )*
    ;

variableDeclarator
    :   id:IDENT ("\[" "\]")* { "=" initializer }
        <<
        if ( currentMethod!=NULL ) {
            fprintf(out,"local %s %s %s %d %d\n",
                currentClassOrInterface,
                currentMethod->getText(),
                id->getText(),
                currentBlockNumber,
                id->getLine());
        }
        else {
            fprintf(out,"variable %s %s %d\n",
                currentClassOrInterface,
                id->getText(),
                id->getLine());
        }
        >>
    ;

initializer
    :   assignmentExpression
    |   arrayInitializer
    ;

arrayInitializer
    :   "\{" { initializer ( "," initializer )* {","} } "\}"
    ;

                /* M E T H O D S */

methodHead > [ANTLRTokenPtr id]
    :   i:IDENT <<$id=i; currentMethod=$id; numBlocks = -1;>>
        "\(" {parameterDefinitionList} "\)" ("\[" "\]")* {throwsClause}
    ;

throwsClause
    :   "throws" qualifiedName ("," qualifiedName)*
    ;
    
methodDefinition
    :   <<ANTLRTokenPtr id;>>
        (modifier)* typeSpec methodHead>[id]
        <<
        fprintf(out,"method %s %s %d\n",
                currentClassOrInterface,
                id->getText(),
                id->getLine());
        >>
        ( compoundStatement | ";" )
        <<
        currentMethod=NULL;
        >>
    ;

constructorDefinition
    :   <<ANTLRTokenPtr id;>>
        (modifier)* methodHead > [id]
        <<
        fprintf(out,"method %s %s %d\n",
                currentClassOrInterface,
                id->getText(),
                id->getLine());
        >>
        compoundStatement
        <<
        currentMethod=NULL;
        >>
    ;

parameterDefinitionList
    :   parameterDefinition ( "," parameterDefinition )*
    ;

parameterDefinition
    :   typeSpec IDENT ("\[" "\]")*
    ;


                /* S T A T E M E N T S */

compoundStatement
    :   <<
        numBlocks++;
        int saveBlock = currentBlockNumber;
        currentBlockNumber = numBlocks;
        >>
        "\{"
            (statement)*
            <<
            currentBlockNumber = saveBlock;
            >>
        "\}"
    ;

statement
    :   IDENT ":" statement
    |   compoundStatement

        /* distinguishing between a local variable definition and
         * an expression requires k>2 lookahead.  Rather than increase
         * the lookahead of the overall parser, we use backtracking to
         * ensure we match local variables.  If a local variable declaration
         * is not found, an expression (the next alternative) is attempted.
         * Consider that after having seen "t[" you don't know if it's
         * an assignment to an array "t[3]=4;" or an variable def "t[] b;"
         */
    |   (localVariableDefinitions ";")?

    |   expression ";"

    |   "if" "\(" expression "\)" statement
        /* the {"else" statement} optional clause is a language ambiguity
         * that results in a parser nondeterminism.  The parser's default
         * response of simply matching the "else" if it sees it, resolves
         * the problem.  We use a #pragma to tell the parser that it's
         * approximate lookahead is sufficient to handle the problem--
         * the desired side effect is that ANTLR doesn't warn us about
         * this ambiguity with the #pragma in place.
         */
        #pragma approx
        { "else" statement }

        /* As with locals versus expressions at the statement level,
         * loop variables must be distinguished from expressions.
         */
    |   "for" "\("
            ( (localVariableDefinitions ";")? | expressionList ";" | ";" )
            {expression} ";"
            {expressionList}
        "\)"
        statement

    |   "while" "\(" expression "\)" statement
    |   "do" statement "while" "\(" expression "\)" ";"
    |   "break" {IDENT} ";"
    |   "continue" {IDENT} ";"
    |   "return" {expression} ";"
    |   "switch" "\(" expression "\)" "\{"
            (   "case" expression ":" (statement)*
            |   "default" ":" (statement)*
            )*
        "\}"
    |   tryBlock
    |   "throw" expression ";"
    |   "goto" IDENT ";"
    |   "synchronized" "\(" expression "\)" compoundStatement
    |   ";"
    ;

/* "catch" and "finally" clauses cause ambiguity that is resolved
 * correctly by ANTLR; this is similar to the dangling-else ambiguity.
 * Again, the #pragma is used to turn off a warning message from ANTLR
 * during grammar analysis.  See the statement rule.
 */
tryBlock
    :   "try" compoundStatement
        #pragma approx ( handler )*
        #pragma approx { "finally" compoundStatement }
    ;

handler
    :   "catch" "\(" parameterDefinition "\)" compoundStatement
    ;


                /* E X P R E S S I O N S */

expressionList
    :   assignmentExpression ("," assignmentExpression)*
    ;

expression
    :   assignmentExpression
    ;

/* right-to-left for assignment op -> use tail recursion */
assignmentExpression
    :   conditionalExpression
        {   assignmentOp
            assignmentExpression
        }
    ;

assignmentOp
    :   "="
    |   "\+="
    |   "\-="
    |   "\*="
    |   "/="
    |   "\%="
    |   "\>\>="
    |   "\>\>\>="
    |   "\<\<="
    |   "&="
    |   "^="
    |   "\|="
    ;

conditionalExpression
    :   logicalOrExpression
        { "?" conditionalExpression ":" conditionalExpression }
    ;

logicalOrExpression
    :   logicalAndExpression ("\|\|" logicalAndExpression)*
    ;

logicalAndExpression
    :   inclusiveOrExpression ("&&" inclusiveOrExpression)*
    ;

inclusiveOrExpression
    :   exclusiveOrExpression ("\|" exclusiveOrExpression)*
    ;

exclusiveOrExpression
    :   andExpression ("^" andExpression)*
    ;

andExpression
    :   equalityExpression ("&" equalityExpression)*
    ;

equalityExpression
    :   relationalExpression (("!=" | "==") relationalExpression)*
    ;

relationalExpression
    :   shiftExpression
        (   (   "<"
            |   ">"
            |   "<="
            |   ">="
            )
            shiftExpression
        )*
    ;

shiftExpression
    :   additiveExpression (("\<\<" | "\>\>" | "\>\>\>") additiveExpression)*
    ;

additiveExpression
    :   multiplicativeExpression (("\+" | "\-") multiplicativeExpression)*
    ;

multiplicativeExpression
    :   castExpression (("\*" | "/" | "\%" ) castExpression)*
    ;

/*
 *  This is the way castExpression should look if I had a symbol table:
 * 
 * castExpression
 *  :   unaryExpression
 *  |   <<isType(LT(2)->getText())>>? "\(" typeSpec "\)" castExpression
 *  ;
 * 
 *  I use a syntactic pred (...)? here to just check the lookahead arbitrarily
 *  ahead; slower, but it works.
 */
castExpression
    :   ( "\(" typeSpec "\)" castExpression )?
    |   unaryExpression
    ;

unaryExpression
    :   "\+\+" castExpression
    |   "\-\-" castExpression
    |   "\-" castExpression
    |   "\~" castExpression
    |   "!" castExpression
    |   postfixExpression { "instanceof" referenceType }
    ;

/* ambiguity warning turned off with the pragma.  Ambiguity is
 * new T[n] with "new T" returning from newExpression or
 * with "new T[n]" returning from newExpression.  The [..] stuff
 * could also be matched by the postfixExpression.
 */
newArray
    :   #pragma approx
        ( "\[" expression "\]" )+ ( "\[" "\]" )*
    ;

postfixExpression
    :   primaryExpression
        (   "\[" expression "\]"
        |   "\(" { expressionList } "\)"
        |   "." primaryExpression
        |   "\+\+"
        |   "\-\-"
        )*
    ;

/*
 * Valid new expressions:
 *      new Class(...)
 *      new type[n][m][]...
 *      new Package.Class(...)
 *
 * NOTE: This binding differs from C++.
 */
newExpression
    :   "new" type
        (   "\(" { expressionList } "\)"
        |   newArray
        )
    ;

primaryExpression
    :   IDENT
    |   newExpression
    |   constant
    |   "super"
    |   "this"
    |   "true"
    |   "false"
    |   "null"
    |   STRINGVAL
    |   "\(" expression "\)"
    ;

constant
    :   OCTALINT
    |   DECIMALINT
    |   HEXADECIMALINT
    |   CHARVAL
    |   FLOATONE
    |   FLOATTWO
    ;

}

#token OCTALINT "0[0-7]*{[uUlL]}"
#token DECIMALINT "[1-9][0-9]*{[uUlL]}"
#token HEXADECIMALINT "(0x|0X)[0-9a-fA-F]+{[uUlL]}"
#token FLOATONE "([0-9]+.[0-9]* | [0-9]*.[0-9]+) {[eE]{[\-\+]}[0-9]+} {[fFlLdD]}"
#token FLOATTWO "[0-9]+ [eE]{[\-\+]}[0-9]+ {[fFlLdD]}"

#token IDENT "[a-zA-Z_][a-zA-Z0-9_]*" <<;>>

<<
void JavaParser::
tracein(char *r)
{
    if ( !doTracing ) return;
    for (int i=1; i<=traceIndentLevel; i++) fprintf(stderr, " ");
    traceIndentLevel++;
    fprintf(stderr, "enter %s('%s %s')%s line %d\n",
            r,
            LT(1)->getText(),
            LT(2)->getText(),
            guessing?" [guessing]":"",
            LT(1)->getLine());
}

void JavaParser::
traceout(char *r)
{
    if ( !doTracing ) return;
    traceIndentLevel--;
    for (int i=1; i<=traceIndentLevel; i++) fprintf(stderr, " ");
    fprintf(stderr, "exit %s('%s %s')%s line %d\n",
            r,
            LT(1)->getText(),
            LT(2)->getText(),
            guessing?" [guessing]":"",
            LT(1)->getLine());
}
>>
