// Modified SQLTokenMaker.java from RText - http://rtext.sourceforge.net/

package sdoc.lexers;

import sdoc.lexers.tokens.Token;
import java.util.ArrayList;
import java.util.List;
import javax.swing.text.Segment;
import java.io.CharArrayReader;
import java.io.IOException;
import sdoc.lexers.tokens.TokenFactory;

%%
%public
%class SqlLexer
%implements Lexer
%unicode
%pack
%buffer 128
%ignorecase
%type List



%{

	public SqlLexer()
	{
	}

	private List tokens = new ArrayList();

	private void addNullToken()
	{
		tokens.add(TokenFactory.createNullToken());
	}
		
	public int getLastTokenTypeOnLine(Segment text , int initialTokenType)
	{
		getTokens(text , initialTokenType , 0);
		return ((Token)tokens.get(tokens.size() -1)).type;
	}
	
	private void addToken(int type) 
	{
		
		Token t = TokenFactory.createToken(type , yytext());
		
		if(tokens.size() == 1 && ((Token)tokens.get(0)).type == Token.NULL)
		{
			tokens.remove(0);
		}
		
		tokens.add(t);
	}
	
	
	public List getTokens(Segment text, int initialTokenType, int startOffset) 
	{
		tokens.clear();
	
				// Start off in the proper state.
		int state = Token.NULL;
		switch (initialTokenType) {
			case Token.LITERAL_STRING_DOUBLE_QUOTE:
				state = STRING;
				break;
			case Token.LITERAL_CHAR:
				state = CHAR;
				break;
			default:
				state = Token.NULL;
		}

		try 
		{
			yyreset(new CharArrayReader(text.array , text.offset , text.count));
			yybegin(state);
			return yylex();			
		} 
		catch (IOException ioe) 
		{
			ioe.printStackTrace();
			return new ArrayList();
		}
	}
	
	
%}

LineTerminator		= ([\n])
Letter			= ([A-Za-z])
Digit			= ([0-9])
Whitespace		= ([ \t]+)

IdentifierStart	= ({Letter})
IdentifierPart		= ({IdentifierStart}|{Digit})
Identifier		= ({IdentifierStart}{IdentifierPart}*)

Operator			= (">="|"<="|"<>"|">"|"<"|"="|"+"|"-"|"*"|"/")
Separator			= ([\(\)])

Parameter			= ([:]{Identifier})

Integer			= ({Digit}+)
Float			= (({Digit}+[.]{Digit}*)|([.]{Digit}*))
ApproxNum			= (({Digit}+[eE][+-]?{Digit}+)|({Digit}+[.]{Digit}*[eE][+-]?[0-9]+)|([.][0-9]*[eE][+-]?[0-9]+))

CommentBegin		= ("--")
Comment			= ({CommentBegin}.*)

%state STRING
%state CHAR

%%

/* Keywords */
<YYINITIAL> "ADD"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ALL"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ALTER"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "AND"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ANY"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "AS"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ASC"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "AUTOINCREMENT"			{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "AVA"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BETWEEN"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BINARY"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BIT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BOOLEAN"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BY CREATE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "BYTE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "CHAR"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "CHARACTER"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "COLUMN"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "CONSTRAINT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "COUNT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "COUNTER"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "CURRENCY"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DATABASE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DATE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DATETIME"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DELETE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DESC"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DISALLOW"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DISTINCT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DISTINCTROW"			{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DOUBLE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "DROP"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "EXISTS"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "FLOAT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "FLOAT4"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "FLOAT8"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "FOREIGN"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "FROM"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "GENERAL"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "GROUP"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "GUID"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "HAVING"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INNER"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INSERT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "IGNORE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "IMP"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "IN"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INDEX"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INTEGER"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INTEGER1"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INTEGER2"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INTEGER4"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "INTO"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "IS"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "JOIN"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "KEY"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LEFT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LEVEL"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LIKE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LOGICAL"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LONG"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LONGBINARY"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "LONGTEXT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "MAX"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "MEMO"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "MIN"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "MOD"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "MONEY"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "NOT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "NULL"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "NUMBER"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "NUMERIC"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "OLEOBJECT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ON PIVOT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "OPTION PRIMARY"			{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "ORDER"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "OUTER"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "OWNERACCESS"			{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "PARAMETERS"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "PERCENT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "PRIMARY"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "REAL"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "REFERENCES"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "RIGHT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SELECT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SET"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SHORT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SINGLE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SMALLINT"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SOME"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "STDEV"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "STDEVP"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "STRING"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "SUM"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TABLE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TABLEID"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TEXT"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TIME"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TIMESTAMP"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TOP"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "TRANSFORM"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "UNION"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "UNIQUE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "UPDATE"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VALUE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VALUES"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VAR"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VARBINARY"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VARCHAR"				{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "VARP"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "WHERE"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "WITH"					{ addToken(Token.RESERVED_WORD); }
<YYINITIAL> "YESNO"					{ addToken(Token.RESERVED_WORD); }


<YYINITIAL> {

	{LineTerminator}				{ addNullToken(); return tokens; }

	{Identifier}					{ addToken(Token.IDENTIFIER); }
	";"							{ addToken(Token.IDENTIFIER); }

	{Parameter}					{ addToken(Token.IDENTIFIER); }

	{Comment}						{ addToken(Token.COMMENT_EOL); }

	{Whitespace}					{ addToken(Token.WHITESPACE); }

	{Operator}					{ addToken(Token.OPERATOR); }
	{Separator}					{ addToken(Token.SEPARATOR); }

	{Integer}						{ addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
	{Float}						{ addToken(Token.LITERAL_NUMBER_FLOAT); }
	{ApproxNum}					{ addToken(Token.LITERAL_NUMBER_FLOAT); }

	"\""							{ yybegin(STRING); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);}
	"\'"							{ yybegin(CHAR); addToken(Token.LITERAL_CHAR);}

	"["[^\]]*"]"					{ addToken(Token.PREPROCESSOR); }
	"["[^\]]*						{ addToken(Token.ERROR_IDENTIFIER); addNullToken(); return tokens; }

	<<EOF>>						{ addNullToken(); return tokens; }

	/* Catch any other (unhandled) characters and flag them as OK; */
	/* I don't know enough about SQL to know what's really invalid. */
	.							{ addToken(Token.IDENTIFIER); }

}


<STRING> {

	[^\n\"]+				{addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);}
	\n					{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); return tokens; }
	"\"\""				{addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);}
	"\""					{ yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	<<EOF>>				{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); return tokens; }

}

<CHAR> {

	[^\n\']+				{addToken(Token.LITERAL_CHAR);}
	\n					{ addToken(Token.LITERAL_CHAR); return tokens; }
	"\'\'"				{addToken(Token.LITERAL_CHAR);}
	"\'"					{ yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); }
	<<EOF>>				{ addToken(Token.LITERAL_CHAR); return tokens; }

}


























