#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "y86asm.h"
bool_t is_end(line_t *p);
line_t *y86bin_listhead = NULL; /* the head of y86 binary code line list*/
line_t *y86bin_listtail = NULL; /* the tail of y86 binary code line list*/
int y86asm_lineno = 0; /* the current line number of y86 assemble code */
#define err_print(_s, _a ...) do { \
if (y86asm_lineno < 0) \
fprintf(stderr, "[--]: "_s"\n", ## _a); \
else \
fprintf(stderr, "[L%d]: "_s"\n", y86asm_lineno, ## _a); \
} while (0);
int vmaddr = 0; /* vm addr */
/* register table */
reg_t reg_table[REG_CNT] = {
{"%eax", REG_EAX},
{"%ecx", REG_ECX},
{"%edx", REG_EDX},
{"%ebx", REG_EBX},
{"%esp", REG_ESP},
{"%ebp", REG_EBP},
{"%esi", REG_ESI},
{"%edi", REG_EDI},
};
regid_t find_register(char *name)
{
int i;
for (i = 0; i < REG_CNT; i++)
if (!strncmp(name, reg_table[i].name, 4))
return reg_table[i].id;
return REG_ERR;
}
/* instruction set */
instr_t instr_set[] = {
{"nop", 3, HPACK(I_NOP, F_NONE), 1 },
{"halt", 4, HPACK(I_HALT, F_NONE), 1 },
{"rrmovl", 6,HPACK(I_RRMOVL, F_NONE), 2 },
{"cmovle", 6,HPACK(I_RRMOVL, C_LE), 2 },
{"cmovl", 5, HPACK(I_RRMOVL, C_L), 2 },
{"cmove", 5, HPACK(I_RRMOVL, C_E), 2 },
{"cmovne", 6,HPACK(I_RRMOVL, C_NE), 2 },
{"cmovge", 6,HPACK(I_RRMOVL, C_GE), 2 },
{"cmovg", 5, HPACK(I_RRMOVL, C_G), 2 },
{"irmovl", 6,HPACK(I_IRMOVL, F_NONE), 6 },
{"rmmovl", 6,HPACK(I_RMMOVL, F_NONE), 6 },
{"mrmovl", 6,HPACK(I_MRMOVL, F_NONE), 6 },
{"addl", 4, HPACK(I_ALU, A_ADD), 2 },
{"subl", 4, HPACK(I_ALU, A_SUB), 2 },
{"andl", 4, HPACK(I_ALU, A_AND), 2 },
{"xorl", 4, HPACK(I_ALU, A_XOR), 2 },
{"jmp", 3, HPACK(I_JMP, C_YES), 5 },
{"jle", 3, HPACK(I_JMP, C_LE), 5 },
{"jl", 2, HPACK(I_JMP, C_L), 5 },
{"je", 2, HPACK(I_JMP, C_E), 5 },
{"jne", 3, HPACK(I_JMP, C_NE), 5 },
{"jge", 3, HPACK(I_JMP, C_GE), 5 },
{"jg", 2, HPACK(I_JMP, C_G), 5 },
{"call", 4, HPACK(I_CALL, F_NONE), 5 },
{"ret", 3, HPACK(I_RET, F_NONE), 1 },
{"pushl", 5, HPACK(I_PUSHL, F_NONE), 2 },
{"popl", 4, HPACK(I_POPL, F_NONE), 2 },
{".byte", 5, HPACK(I_DIRECTIVE, D_DATA), 1 },
{".word", 5, HPACK(I_DIRECTIVE, D_DATA), 2 },
{".long", 5, HPACK(I_DIRECTIVE, D_DATA), 4 },
{".pos", 4, HPACK(I_DIRECTIVE, D_POS), 0 },
{".align", 6,HPACK(I_DIRECTIVE, D_ALIGN), 0 },
{NULL, 1, 0 , 0 } //end
};
instr_t *find_instr(char *name)
{
int i;
for (i = 0; instr_set[i].name; i++)
if (strncmp(instr_set[i].name, name, instr_set[i].len) == 0)
return &instr_set[i];
return NULL;
}
/* symbol table (don't forget to init and finit it) */
symbol_t *symtab = NULL;
/*
* find_symbol: scan table to find the symbol
* args
* name: the name of symbol
*
* return
* symbol_t: the 'name' symbol
* NULL: not exist
*/
symbol_t *find_symbol(char *name)
{
symbol_t *p = symtab->next;
while(p)
{
if(!strcmp(p->name, name)) return p;
p = p->next;
}
return NULL;
}
/*
* add_symbol: add a new symbol to the symbol table
* args
* name: the name of symbol
*
* return
* 0: success
* -1: error, the symbol has exist
*/
int add_symbol(char *name)
{
/* check duplicate */
symbol_t *p = symtab->next;
while(p)
{
if(!strcmp(p->name, name)) return -1;
p = p->next;
}
/* create new symbol_t (don't forget to free it)*/
symbol_t *np = (symbol_t *)malloc(sizeof(symbol_t));
np->name = name;
np->addr = vmaddr;
/* add the new symbol_t to symbol table */
np->next = symtab->next;
symtab->next = np;
return 0;
}
/* relocation table (don't forget to init and finit it) */
reloc_t *reltab = NULL;
/*
* add_reloc: add a new relocation to the relocation table
* args
* name: the name of symbol
*
* return
* 0: success
* -1: error, the symbol has exist
*/
void add_reloc(char *name, bin_t *bin)
{
/* create new reloc_t (don't forget to free it)*/
reloc_t *nr = (reloc_t *)malloc(sizeof(reloc_t));
nr->y86bin = bin;
nr->name = name;
/* add the new reloc_t to relocation table */
nr->next = reltab->next;
reltab->next = nr;
}
/* macro for parsing y86 assembly code */
#define IS_DIGIT(s) ((*(s)>='0' && *(s)<='9') || *(s)=='-' || *(s)=='+')
#define IS_LETTER(s) ((*(s)>='a' && *(s)<='z') || (*(s)>='A' && *(s)<='Z'))
#define IS_COMMENT(s) (*(s)=='#')
#define IS_REG(s) (*(s)=='%')
#define IS_IMM(s) (*(s)=='$')
#define IS_BLANK(s) (*(s)==' ' || *(s)=='\t')
#define IS_END(s) (*(s)=='\0')
#define IS_HEX(s) (*(s) == '0' && *((s) + 1) == 'x')
#define SKIP_BLANK(s) do { \
while(!IS_END(s) && IS_BLANK(s)) \
(s)++; \
} while(0);
/* return value from different parse_xxx function */
typedef enum { PARSE_ERR=-1, PARSE_REG, PARSE_DIGIT, PARSE_SYMBOL,
PARSE_MEM, PARSE_DELIM, PARSE_INSTR, PARSE_LABEL} parse_t;
/*
* parse_instr: parse an expected data token (e.g., 'rrmovl')
* args
* ptr: point to the start of string
* inst: point to the inst_t within instr_set
*
* return
* PARSE_INSTR: success, move 'ptr' to the first char after token,
* and store the pointer of the instruction to 'inst'
* PARSE_ERR: error, the value of 'ptr' and 'inst' are undefined
*/
parse_t parse_instr(char **ptr, instr_t **inst)
{
char *cur = *ptr;
instr_t *tmp;
/* skip the blank */
SKIP_BLANK(cur);
if (IS_END(cur)) return PARSE_ERR;
/* find_instr and check end */
tmp = find_instr(cur);
if (tmp == NULL) return PARSE_ERR;
cur += tmp->len;
if (!IS_END(cur) && !IS_BLANK(cur))
return PARSE_ERR;
/* set 'ptr' and 'inst' */
*inst = tmp;
*ptr = cur;
return PARSE_INSTR;
}
/*
* parse_delim: parse an expected delimiter token (e.g., ',')
* args
* ptr: point to the start of string
*
* return
* PARSE_DELIM: success, move 'ptr' to the first char after token
* PARSE_ERR: error, the value of 'ptr' and 'delim' are undefined
*/
parse_t parse_delim(char **ptr, char delim)
{
char *cur = *ptr;
/* skip the blank and check */
SKIP_BLANK(cur);
if (IS_END(cur)) return PARSE_ERR;
if(*cur != delim) return PARSE_ERR;
cur += 1;
/* set 'ptr' */
*ptr = cur;
return PARSE_DELIM;
}
/*
* parse_reg: parse an expected register token (e.g., '%eax')
* args
* ptr: point to the start of string
* regid: point to the regid of register
*
* return
* PARSE_REG: success, move 'ptr' to the first char after token,
* and store the regid to 'regid'
* PARSE_ERR: error, the value of 'ptr' and 'regid' are undefined
*/
parse_t parse_reg(char **ptr, regid_t *regid)
{
char *cur = *ptr;
regid_t tmp;
/* skip the blank and check */
SKIP_BLANK(cur);
if (IS_END(cur) || !IS_REG(cur)) return PARSE_ERR;
/* find register */
tmp = find_register(cur);
if(tmp == REG_ERR) return PARSE_ERR;
cur += 4; //reg_len
/* set 'ptr' and 'regid' */
*ptr = cur;
*regid = tmp;
return PARSE_REG;
}
/*
* parse_symbol: parse an expected symbol token (e.g., 'Main')
* args
* ptr: point to the start of string
* name: point to the name of symbol (should be allocated in this function)
*
* return
* PARSE_SYMBOL: success, move 'ptr' to the first char after token,
* and allocate and store name to 'name'
* PARSE_ERR: error, the value of 'ptr' and 'name' are undefined
*/
parse_t parse_symbol(char **ptr, char **name)
{
char *cur = *ptr;
char *n;
/* skip the blank and check */
SKIP_BLANK(cur);
if (IS_END(cur) || !IS_LETTER(cur)) return PARSE_ERR;
/* allocate name and copy to it */
int len;
for(len = 0 ; IS_LETTER(cur + len) || IS_DIGIT(cur + len); len++);
n = (char *)mal