The key thing is the BNF production declaration is on the right hand side of the colon ':'. The BNF production declaration format is :
<Java method return type> JavaMethodName(Java Method Parameters) :
{
//Any variable used in the method is declared here.
}
{
// this section is the method body. Any java code should be surrounded by {}
}
Using the above format lets write a BNF production which looks for different types of sql statements like 'create' and 'select'
List<SQLExpression>parseInput():{
List<SQLExpression>sqlExprs = new LinkedList<SQLExpression>();
SQLExpression sqlExpr;
}
{
(sqlExpr = createSQLExpression(){
sqlExprs.add(sqlExpr);
}[<SEMICOLON>])+<EOF>
{
return sqlExprs;
}
}
In the above code snippet, the name of the BNF production is parseInput. The method doesn't have any input parameters and the return type is a list of SQLExpression objects.
The first {} block contains the declaration of all the variables used in the method.
The second {} block is the Method body. Inside the method body, you can call any other BNF production and also can make use any of the defined tokens like
Any java code should be enclosed between {}. So, sqlExprs.add(sqlExpr); is written inside {} block. Remember this should be a valid Java statement which would compile without any problem. All these BNF productions are converted into Java methods and are added to the Parser file that we declared between the PARSER_BEGIN and PARSER_END section of the grammar file. Therefore, any global variable that is declared in the Java Compilation unit can be used here.
For grouping regular expressions, we can use (). This is seen here:
(sqlExpr = createSQLExpression(){
sqlExprs.add(sqlExpr);
}[<SEMICOLON>])+
Here the call to the createSQLExpression BNF production is grouped with
The whole grammar file which can parse create table statement is given below. Compile the below .jj file and copy it into appropriate java package and the supporting classes like SQLExpression, BlitzSQLParserException, CreateAtom, etc should also be provided for proper working of the parser. Disclaimer: Few of the helper BNF production methods used here are taken from Axion sql grammar file.
options{
IGNORE_CASE = true;
STATIC = false;
UNICODE_INPUT = true;
// some performance optimizations
OPTIMIZE_TOKEN_MANAGER = true;
ERROR_REPORTING = false;
}
PARSER_BEGIN(BlitzSQLParser)
package net.java.blitz.db.sql.parser;
import java.util.List;
import java.util.LinkedList;
import net.java.blitz.db.sql.expression.*;
/**
* @author karthikeyan subramanian
* Do not edit this file directly. This file is generated from BlitzSQLGrammar.jj
*
*/
public class BlitzSQLParser{}PARSER_END(BlitzSQLParser)
// ----------------------------------------------------------------------------
// TOKENS
// ----------------------------------------------------------------------------
SKIP:{
" "
| "\n"
| "\r"
| "\t"
}
SKIP:{
<LINE_COMMENT:"--"(~["\n", "\r"])*("\n"
| "\r"
| "\r\n")>
}
SKIP:{
<BLOCK_COMMENT:"/*"(~["*"])*"*"("*"
| (~["*", "/"](~["*"])*"*"))*"/">
}
TOKEN:// KEYWORDS
{
<ADD:"add">
| <ALL:"all">
| <ALTER:"alter">
| <ALWAYS:"always">
| <AND:"and">
| <AS:"as">
| <ASC:"asc">
| <BEGIN:"begin">
| <BETWEEN:"between">
| <BOTH:"both">
| <BY:"by">
| <CAST:"cast">
| <CASCADE:"cascade">
| <CASE:"case">
| <CHECK:"check">
| <CREATE:"create">
| <COLUMN:"column">
| <CONSTRAINT:"constraint">
| <CURRENT_TIMESTAMP:"current_timestamp">
| <CURRENT_DATE:"current_date">
| <CURRENT_TIME:"current_time">
| <CYCLE:"cycle">
| <DATABASE:"database">
| <DATA:"data">
| <DAY:"day">
| <DEFAULT_:"default">
| <DEFERRED:"deferred">
| <DEFERRABLE:"deferrable">
| <DEFRAG:"defrag">
| <DELETE:"delete">
| <DESC:"desc">
| <DISTINCT:"distinct">
| <DROP:"drop">
| <ELSE:"else">
| <END:"end">
| <ESCAPE:"escape">
| <EXCEPTION:"exception">
| <EXISTS:"exists">
| <EXPLAIN:"explain">
| <EXTRACT:"extract">
| <EXTERNAL:"external">
| <FALSE:"false">
| <FIRST:"first">
| <FOR:"for">
| <FOREIGN:"foreign">
| <FROM:"from">
| <FULL:"full">
| <GENERATED:"generated">
| <GROUP:"group">
| <HAVING:"having">
| <HOUR:"hour">
| <IDENTITY:"identity">
| <IF:"if">
| <INCREMENT:"increment">
| <IMMEDIATE:"immediate">
| <IN:"in">
| <INITIALLY:"initially">
| <INDEX:"index">
| <INNER:"inner">
| <INSERT:"insert">
| <INTO:"into">
| <IS:"is">
| <JOIN:"join">
| <KEY:"key">
| <LEADING:"leading">
| <LEFT:"left">
| <LIKE:"like">
| <LIMIT:"limit">
| <LINK:"link">
| <MAXVALUE:"maxvalue">
| <MATCHED:"matched">
| <MERGE:"merge">
| <MINUTE:"minute">
| <MINVALUE:"minvalue">
| <MILLISECOND:"millisecond">
| <MONTH:"month">
| <NEXT:"next">
| <NO:"no">
| <NOT:"not">
| <NULL:"null">
| <OFFSET:"offset">
| <ON:"on">
| <OR:"or">
| <ORDER:"order">
| <ORGANIZATION:"organization">
| <OUTER:"outer">
| <POSITION:"position">
| <PRIMARY:"primary">
| <QUARTER:"quarter">
| <RIGHT:"right">
| <REFERENCES:"references">
| <RENAME:"rename">
| <RESTART:"restart">
| <SECOND:"second">
| <SELECT:"select">
| <SEQUENCE:"sequence">
| <SET:"set">
| <SOUNDS:"sounds">
| <START:"start">
| <SUBSTRING:"substring">
| <SYSDATE:"sysdate">
| <TABLE:"table">
| <THEN:"then">
| <TO:"to">
| <TRAILING:"trailing">
| <TRIM:"trim">
| <TRUE:"true">
| <TRUNCATE:"truncate">
| <UNIQUE:"unique">
| <UPDATE:"update">
| <UPSERT:"upsert">
| <USER:"user">
| <USING:"using">
| <VALUES:"values">
| <VALUE:"value">
| <VIEW:"view">
| <WEEK:"week">
| <WHEN:"when">
| <WHERE:"where">
| <WITH:"with">
| <YEAR:"year">
}
TOKEN:// DATA TYPES
{
<BIT:"bit">
| <BYTE:"byte">
| <INT:"int">
| <REAL:"real">
| <CLOB:"clob">
| <BLOB:"blob">
| <CHAR:"char">
| <CHARACTER:"character">
| <DATE:"date">
| <TIME:"time">
| <FLOAT:"float">
| <BIGINT:"bigint">
| <LONG:"long">
| <RAW:"raw">
| <STRING:"string">
| <BINARY:"binary">
| <NUMERIC:"numeric">
| <NUMBER:"number">
| <DECIMAL:"decimal">
| <DEC:"dec">
| <BOOLEAN:"boolean">
| <TINYINT:"tinyint">
| <INTEGER:"integer">
| <VARCHAR:"varchar">
| <VARCHAR2:"varchar2">
| <LONGVARCHAR:"longvarchar">
| <TEXT:"text">
| <SMALLINT:"smallint">
| <SHORT:"short">
| <VARBINARY:"varbinary">
| <LONGVARBINARY:"longvarbinary">
| <IMAGE:"image">
| <VARYING:"varying">
| <LARGE:"large">
| <TIMESTAMP:"timestamp">
| <OBJECT:"object">
| <JAVA_OBJECT:"java_object">
| <DOUBLE:"double">
}
TOKEN:// LITERALS
{
<INTEGER_LITERAL:(["0"-"9"])+>
| <FLOATING_POINT_LITERAL:(["0"-"9"])+"."(["0"-"9"])+(<EXPONENT>)?
| "."(["0"-"9"])+(<EXPONENT>)?
| (["0"-"9"])+<EXPONENT>
| (["0"-"9"])+(<EXPONENT>)?>
| <#EXPONENT:["e", "E"](["+", "-"])?(["0"-"9"])+>
| <STRING_LITERAL:"'"(~["'"])*("''"(~["'"])*)*"'">
}
TOKEN:// IDENTIFIERS
{
<ID:(<LETTER>)+("_"
| "$"
| "#"
| <DIGIT>
| <LETTER>)*>
| <#LETTER:["A"-"Z", "a"-"z"]>
| <#DIGIT:["0"-"9"]>
}
TOKEN:// SEPARATORS AND OPERATORS
{
<ASSIGN:":=">
| <COMMA:",">
| <CONCAT:"||">
| <SEMICOLON:";">
| <DOT:".">
| <LESS:"<">
| <LESSEQUAL:"<=">
| <GREATER:">">
| <GREATEREQUAL:">=">
| <EQUAL:"=">
| <NOTEQUAL:"!=">
| <NOTEQUAL2:"<>">
| <JOINPLUS:"(+)">
| <OPENPAREN:"(">
| <CLOSEPAREN:")">
| <ASTERISK:"*">
| <SLASH:"/">
| <PLUS:"+">
| <MINUS:"-">
| <QUESTIONMARK:"?">
}
TOKEN:// START QUOTED IDENTIFIER
{
<START_QUOTED_IDENTIFIER:"\"">:STATE_QuotedIdentStart
}
<STATE_QuotedIdentStart>TOKEN:{
<QUOTED_IDENTIFIER:<ID>>:STATE_QuotedIdentEnd
}
<STATE_QuotedIdentEnd>TOKEN:// IDENTIFIER ESCAPE CHAR
{
<END_QUOTED_IDENTIFIER:"\"">:DEFAULT
}
// --------------------------------------------------------------
// SQL Parsing methods.
// --------------------------------------------------------------
// --------------------------------------------------------------
// Call this method to create a List of SQLExpression from the
// given input.
// --------------------------------------------------------------
List<SQLExpression>parseInput():{
List<SQLExpression>sqlExprs = new LinkedList<SQLExpression>();
SQLExpression sqlExpr;
}
{
(sqlExpr = createSQLExpression(){
sqlExprs.add(sqlExpr);
}[<SEMICOLON>])+<EOF>
{
return sqlExprs;
}
}
SQLExpression createSQLExpression():{
SQLExpression sqlExpr = new SQLExpression();
}
{
(parseSelectStatement()
| parseCreateStatement()){
return sqlExpr;
}
}
// --------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------
String SqlIdentifier():{
Token t = null;
}
{
t = SqlQuotedId(){
return t.image;
}
}
Token SqlQuotedId():{
Token t = null;
}
{
(t = <ID>
| <START_QUOTED_IDENTIFIER>t = <QUOTED_IDENTIFIER><END_QUOTED_IDENTIFIER>){
return t;
}
}
Object[]SqlColumnDef():{
Object[]tuple = new Object[6];
}
{
tuple[0] = SqlIdentifier()(SqlExactNumericType(tuple)
| (LOOKAHEAD(2)SqlCharStringType(tuple)
| SqlBinaryStringType(tuple)
| SqlApproximateNumericType(tuple)
| SqlBooleanType(tuple)
| SqlDataTimeType(tuple))) {
return tuple;
}
}
void SqlCharStringType(Object[]tuple):{}{
LOOKAHEAD(2)((<CHAR>
| <CHARACTER>){
tuple[1] = "char";
tuple[2] = "1";
}
[SqlCharLength(tuple)])
| LOOKAHEAD(2)((<CHAR><VARYING>
| <CHARACTER><VARYING>
| <VARCHAR>
| <VARCHAR2>
| <STRING>){
tuple[1] = "varchar";
tuple[2] = "1";
}
[SqlCharLength(tuple)])
| ((LOOKAHEAD(2)<LONG>
| <LONG><VARCHAR>
| <LONGVARCHAR>
| <TEXT>){
tuple[1] = "varchar";
tuple[2] = ""+Integer.MAX_VALUE;
}
)
| ((<CHAR><LARGE><OBJECT>
| <CHARACTER><LARGE><OBJECT>
| <CLOB>){
tuple[1] = "clob";
}
[SqlPrecision(tuple)])
}
void SqlBinaryStringType(Object[]tuple):{}{
(<BYTE>{
tuple[1] = "byte";
}
[SqlPrecision(tuple)])
| LOOKAHEAD(2)((<BINARY>
| <VARBINARY>
| <RAW>){
tuple[1] = "varbinary";
}
[SqlPrecision(tuple)])
| ((LOOKAHEAD(2)<LONG><VARBINARY>
| <LONGVARBINARY>
| <LONG><RAW>
| <IMAGE>){
tuple[1] = "varbinary";
tuple[2] = ""+Integer.MAX_VALUE;
}
[SqlCharLength(tuple)])
| ((<BINARY><LARGE><OBJECT>
| <BLOB>){
tuple[1] = "blob";
}
[SqlPrecision(tuple)])
}
void SqlExactNumericType(Object[]tuple):{
Integer precision = new Integer(BigDecimalType.DEFAULT_PRECISION);
Integer scale = new Integer(BigDecimalType.DEFAULT_SCALE);
}
{
LOOKAHEAD(3)((<NUMERIC>
| <DECIMAL>
| <DEC>
| <NUMBER>){
tuple[1] = "numeric";
tuple[2] = precision.toString();
tuple[3] = scale.toString();
}
[SqlPrecisionAndScale(tuple)]{
precision = Integer.valueOf(tuple[2].toString());
scale = Integer.valueOf(tuple[3].toString());
if (scale.compareTo(precision)>0){
throw new BlitzParserException("scale value exceeds precision value");
}
else if (precision.compareTo(new Integer(BigDecimalType.MAX_PRECISION))>0){
throw new BlitzParserException("precision value exceeds maximum ("+BigDecimalType.MAX_PRECISION+")");
}
}
)
| ((<INTEGER>
| <INT>){
tuple[1] = "integer";
}
)
| ((<SMALLINT>
| <SHORT>){
tuple[1] = "short";
}
)
| ((<BIGINT>){
tuple[1] = "bigint";
}
)
}
void SqlApproximateNumericType(Object[]tuple):{}{
LOOKAHEAD(3)(<FLOAT>{
tuple[1] = "float";
}
[SqlPrecision(tuple)])
| (<REAL>{
tuple[1] = "float";
}
)
| (<DOUBLE>{
tuple[1] = "double";
}
)
}
void SqlBooleanType(Object[]tuple):{}{
((<BOOLEAN>
| <BIT>){
tuple[1] = "boolean";
}
)
}
void SqlDataTimeType(Object[]tuple):{}{
(<DATE>{
tuple[1] = "date";
}
)
| (<TIME>{
tuple[1] = "time";
}
[SqlPrecision(tuple)])
| (<TIMESTAMP>{
tuple[1] = "timestamp";
}
[SqlPrecision(tuple)])
}
void SqlCharLength(Object[]tuple):{}{
// If length is omitted, then a length of 1 (one) is implicit.
<OPENPAREN>tuple[2] = SqlPositiveInteger()<CLOSEPAREN>
}
void SqlPrecisionAndScale(Object[]tuple):{}{
<OPENPAREN>tuple[2] = SqlPositiveInteger()(<COMMA>tuple[3] = SqlUnsignedInteger())*<CLOSEPAREN>
}
void SqlPrecision(Object[]tuple):{}{
<OPENPAREN>tuple[2] = SqlUnsignedInteger()<CLOSEPAREN>
}
String SqlPositiveInteger():{
String valStr = null;
}
{
valStr = SqlUnsignedInteger(){
int val = (valStr != null)?Integer.valueOf(valStr).intValue():-1;
if (val == 0){
throw new BlitzParserException("value must be a positive integer");
}
return valStr;
}
}
String SqlUnsignedInteger():{
Token t = null;
}
{
t = <INTEGER_LITERAL>{
if (t != null){
return t.image;
}
return null;
}
}
CreateAtom createTable():{
String tblName;
String schemaName;
CreateAtom createAtom = new CreateAtom();
Object[] obj;
}
{
<CREATE><TABLE>[SqlIdentifier()<DOT>{
schemaName = token.image;
}
]SqlIdentifier(){
tblName = token.image;
}
[<IF><NOT><EXISTS>{
createAtom.setIfNotExists(true);
}]
<OPENPAREN>[(obj = SqlColumnDef(){
createAtom.addColumnDefinition(new ColumnDefinition(obj));
}
)](<COMMA>obj = SqlColumnDef(){
createAtom.addColumnDefinition(new ColumnDefinition(obj));
}
)*<CLOSEPAREN>
}