#include "pch.h"
#include "lexer.h"
#include "stdio.h"
#include <cstring>
#include <fstream>
#include <streambuf>
#include <sstream>
#define next (stash=source[offset++])
#define undo (--offset)
#define token(t) (tok.token_type=t)
#define error(message) (error=message, token(JAVA_TOKEN_ILLEGAL))
#define clear_token(tok) (buf.clear(), token(tok))
lexer::lexer(string f) :filename(f)
{
error.clear();
lineno = 1;
offset = 0;
tok.value.as_string.clear();
tok.value.as_real = 0;
tok.value.as_int = 0;
tok.value.as_float = 0;
tok.value.as_long = 0;
outfilename = "scan_out";
};
lexer::lexer() {
error.clear();
lineno = 1;
offset = 0;
filename = "";
tok.value.as_string.clear();
tok.value.as_real = 0;
tok.value.as_int = 0;
tok.value.as_float = 0;
tok.value.as_long = 0;
outfilename = "scan_out";
}
lexer::~lexer()
{
}
void lexer::java_lexer_reset() {
//error.clear();
//lineno = 1;
//offset = 0;
tok.value.as_string.clear();
tok.value.as_int = 0;
tok.value.as_real = 0;
tok.value.as_float = 0;
tok.value.as_long = 0;
}
//程序中心法 扫描昂
int lexer::java_scan() {
//char c;
int c;
scan:
switch (c = next) {
case '\n':
lineno += 1;
case '\t':
case ' ':
goto scan;
case '(':
return token(JAVA_TOKEN_LPAREN);
case ')':
return token(JAVA_TOKEN_RPAREN);
case '{':
return token(JAVA_TOKEN_LBRACE);
case '}':
return token(JAVA_TOKEN_RBRACE);
case '[':
return token(JAVA_TOKEN_LBRACK);
case ']':
return token(JAVA_TOKEN_RBRACK);
case ',':
return token(JAVA_TOKEN_COMMA);
case '.':
return token(JAVA_TOKEN_OP_DOT);
case '%':
return token(JAVA_TOKEN_OP_MOD);
case '^':
return next == '=' ? token(JAVA_TOKEN_OP_BIT_NOT_ASSIGN) : \
(undo, token(JAVA_TOKEN_OP_NOT));
case '~':
return token(JAVA_TOKEN_OP_BIT_NOT);
case '?':
return ':' == next ? token(JAVA_TOKEN_OP_COND) : \
(undo, token(JAVA_TOKEN_OP_COND));
case ':':
return token(JAVA_TOKEN_COLON);
case ';':
return token(JAVA_TOKEN_SEMICOLON);
case '+':
switch (c = next) {
case '+':
return token(JAVA_TOKEN_OP_INCR);
case '=':
return token(JAVA_TOKEN_OP_PLUS_ASSIGN);
default:
return undo, token(JAVA_TOKEN_OP_PLUS);
}
case '-':
switch (c = next) {
case '-':
return token(JAVA_TOKEN_OP_DECR);
case '=':
return token(JAVA_TOKEN_OP_MINUS_ASSIGN);
default:
return undo, token(JAVA_TOKEN_OP_MINUS);
}
case '*':
switch (c = next) {
case '=':
return token(JAVA_TOKEN_OP_MUL_ASSIGN);
default:
return undo, token(JAVA_TOKEN_OP_MUL);
}
case '/':
switch (c = next) {
case '/':
while ((c = next) != '\n' &&c);
undo;
goto scan;
case '=':
return token(JAVA_TOKEN_OP_DIV_ASSIGN);
case '*'://注释
while (1) {
c = next;
if (c == '\n') lineno++;
if (c == '*') {
//注释完
if ((c = next) == '/') {
goto scan;
}
else {
undo;
}
}
if (c == 0) {
error("Unclose comment");
return token(JAVA_TOKEN_ILLEGAL);
}
}
}
case '!':
if (next == '=') {
return token(JAVA_TOKEN_OP_NEQ);
}
else {
return token(JAVA_TOKEN_OP_NOT);
}
case '>':
switch (c = next) {
case '=':return token(JAVA_TOKEN_OP_GTE);
case '>':switch (c = next) {
case '=':return token(JAVA_TOKEN_OP_BIT_SHR_ASSIGN);
case '>': return '=' == next ? token(JAVA_TOKEN_OP_BIT_SHR_ZERO_ASSIGN) : \
(undo, token(JAVA_TOKEN_OP_BIT_SHR_ZERO));
default: return undo, token(JAVA_TOKEN_OP_BIT_SHR);
}
default:
return undo, token(JAVA_TOKEN_OP_GT);
}
case '<':
switch (c = next) {
case '=':return token(JAVA_TOKEN_OP_LTE);
case '<':
return '=' == next ? token(JAVA_TOKEN_OP_BIT_SHL_ASSIGN)\
: (undo, token(JAVA_TOKEN_OP_BIT_SHL));
default:
return undo, token(JAVA_TOKEN_OP_LT);
}
case '=':
return '=' == next ? token(JAVA_TOKEN_OP_EQ) : (undo, token(JAVA_TOKEN_OP_ASSIGN));
case '|':
switch (c = next) {
case '|':return token(JAVA_TOKEN_OP_OR);
case '=':return token(JAVA_TOKEN_OP_BIT_OR_ASSIGN);
default:return (undo, token(JAVA_TOKEN_OP_BIT_OR));
}
case '&':
switch (c = next) {
case '&': return token(JAVA_TOKEN_OP_AND);
case '=': return token(JAVA_TOKEN_OP_BIT_AND_ASSIGN);
default: return undo, token(JAVA_TOKEN_OP_BIT_AND);
}
case '"':
//搜索字符串
return scan_string(c);
case 0:
return token(JAVA_TOKEN_EOS);
default:
//标识符
if (isalpha(c) || c == '_' || c == '$') return scan_identity(c);
if (isdigit(c) || c == '.') return scan_number(c);
error("illegal Token");
return 0;
}
}
static int CharToNum(const char c) {
if (c >= '0' and c <= '9') return c - '0';
if (c >= 'a' and c <= 'z') return c - 'a' + 10;
if (c >= 'A' and c <= 'Z') return c - 'A' + 10;
return -1;
}
int lexer::hex_literal() {
int a = CharToNum(next);
int b = CharToNum(next);
int c = CharToNum(next);
int d = CharToNum(next);
if (a > -1 and b > -1 and c > -1 and d > -1) return a << 12 | b << 8 | c << 4 | d;
error("string hex literal \\uxxxx contains invalid digits");
return -1;
}
int lexer::oct_literal() {
int a = CharToNum(next);
int b = CharToNum(next);
int c = CharToNum(next);
if (a > -1 and b > -1 and c > -1) return a << 6 | b << 3 | c;
error("String oct literal \\ddd contains invalid digits");
return -1;
}
int lexer::scan_string(int quate) {
int c, len = 0;
string buf;
token(JAVA_TOKEN_CONSTANT_STRING);
//遇见下一个引号字符串停止
while (quate != (c = next)) {
switch (c) {
case '\n':
lineno++;
break;
//反斜杠字符,转义,此处 C++'\\'代表 输入的'\', 易错
case '\\':
switch (c = next) {
case '\'':
c = '\'';
break;
case'\\':
c = '\\';
break;
case 'a':
c = '\a';
break;
case 'b':
c = '\b';
break;
case 'r':
c = '\r';
break;
case 'n':
c = '\n';
break;
case 'f':
c = '\f';
break;
case 't':
c = '\t';
break;
case 'u':
//16进制表示的字符;
c = hex_literal();
if (-1 == c) {
goto error;
}
// /ddd八进制字符 unfinished
//case 'k'://
// c = oct_literal();
// if (-1 == c) {
// goto error;
// }
// //八进制表示字符
default:
error("error in using \\");
return 0;
}
break;
}
buf += c;
}
tok.value.as_string = buf;
buf.clear();
//成功输出 字符串
return 1;
error:
buf.clear();
return 0;
}
int lexer::scan_identity(int c) {
//默认为标识符
string buf;
token(JAVA_TOKEN_ID);
//字母或美元符号“$”或下划线开头,连接字母或美元符号“$”或下划线或数字字符的串。
do {
buf += c;
} while (isalpha(c = next) || c == '_' || c == '$');
undo;
//检查 是否为关键字/ 若是返回关键字属性值
if (buf == "if") return clear_token(JAVA_TOKEN_IF);
if (buf == "do") return clear_token(JAVA_TOKEN_DO);
if (buf == "for")return clear_token(JAVA_TOKEN_FOR);
if (buf == "int")return clear_token(JAVA_TOKEN_INT);
if (buf == "new")return clear_token(JAVA_TOKEN_NEW);
if (buf == "try")return clear_token(JAVA_TOKEN_TRY);
if (buf == "void")return clear_token(JAVA_TOKEN_VOID);
if (buf == "this")return clear_token(JAVA_TOKEN_THIS);
if (buf == "char") return clear_token(JAVA_TOKEN_CHAR);
if (buf == "byte")return clear_token(JAVA_TOKEN_BYTE);
if (buf == "case")return clear_token(JAVA_TOKEN_CASE);
if (buf == "else")return clear_token(JAVA_TOKEN_ELSE);
if (buf == "long")return clear_token(JAVA_TOKEN_LONG);
if (buf == "enum")return clear_token(JAVA_TOKEN_ENUM);
if (buf == "goto")return clear_token(JAVA_TOKEN_GOTO);
if (buf == "true")return clear_token(JAVA_TOKEN_CONSTANT_TRUE);
if (buf == "null")return clear_token(JAVA_TOKEN_NULLS);
if (buf == "false")return clear_token(JAVA_TOKEN_CONSTANT_FALSE);
if (buf == "const")return clear_token(JAVA_TOKEN_CONST);
if (buf == "short")return clear_token(JAVA_TOKEN_SHORT);
if (buf == "break")return clear_token(JAVA_TOKEN_BREAK);
if (buf == "catch")return clear_token(JAVA_TOKEN_CATCH);
if (buf == "class")return clear_token(JAVA_TOKEN_CLASS);
if (buf == "final")return clear_token(JAVA_TOKEN_FINAL);
if (buf == "float")return clear_token(JAVA_TOK