/*
* picasm -- token.c
*
* Copyright 1995-2004 Timo Rossi, <trossi@iki.fi>
* See the file LICENSE for license terms.
*
* Include handling, macro expansion, lexical analysis
*
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include "picasm.h"
#include "util.h"
#include "token.h"
#include "symtab.h"
/*
* keyword table for tokenizer
*
* this must be in sync with the token definitions in picasm.h
*/
static char Keyword_Table[] = {
"include\0"
"macro\0"
"endm\0"
"exitm\0"
"if\0"
"else\0"
"endif\0"
"equ\0"
"set\0"
"end\0"
"org\0"
"ds\0"
"edata\0"
"dt\0"
"data\0"
"cblock\0"
"endc\0"
"config\0"
"picid\0"
"device\0"
"defined\0"
"hibyte\0"
"streq\0"
"streqcase\0"
"isstr\0"
"chrval\0"
"opt\0"
"local\0"
"endlocal\0"
"error\0"
/* 12/14-bit PIC instruction mnemonics */
"addlw\0"
"addwf\0"
"andlw\0"
"andwf\0"
"bcf\0"
"bsf\0"
"btfsc\0"
"btfss\0"
"call\0"
"clrf\0"
"clrw\0"
"clrwdt\0"
"comf\0"
"decf\0"
"decfsz\0"
"goto\0"
"incf\0"
"incfsz\0"
"iorlw\0"
"iorwf\0"
"movlw\0"
"movf\0"
"movwf\0"
"nop\0"
"option\0"
"retfie\0"
"retlw\0"
"return\0"
"rlf\0"
"rrf\0"
"sleep\0"
"sublw\0"
"subwf\0"
"swapf\0"
"tris\0"
"xorlw\0"
"xorwf\0"
/* 16-bit PIC instruction mnemonics */
"addwfc\0"
"btg\0"
"cpfseq\0"
"cpfsgt\0"
"cpfslt\0"
"daw\0"
"dcfsnz\0"
"infsnz\0"
"lcall\0"
"movfp\0"
"movpf\0"
"movlb\0"
"movlr\0"
"mullw\0"
"mulwf\0"
"negw\0"
"rlcf\0"
"rlncf\0"
"rrcf\0"
"rrncf\0"
"setf\0"
"subwfb\0"
"tablrd\0"
"tablwt\0"
"tlrd\0"
"tlwt\0"
"tstfsz\0"
"set_pic_type\0"
"\0"
};
/* tokenizer definitions & variables */
int tok_char;
int token_type, line_buf_off;
char token_string[TOKSIZE];
long token_int_val;
int ifskip_mode; /* TRUE when skipping code inside if..endif */
struct incpath_entry {
struct incpath_entry *next;
char dirname[1]; /*struct hack*/
};
static struct incpath_entry *include_path, *inc_path_next;
void
init_includes(void)
{
include_path = NULL;
inc_path_next = NULL;
}
/*
* include file handling
*/
void
add_include_path(char *dirname)
{
int addslash = 0;
char *p;
struct incpath_entry *ent;
if(dirname == NULL || *dirname == '\0')
fatal_error("Empty directory path");
p = dirname + (strlen(dirname) - 1);
#if defined(MSDOS) || defined(_WIN32) || defined(AMIGA)
#ifdef AMIGA
if(!(*p == ':' || *p == '/'))
addslash = 1;
#else
if(!(*p == ':' || *p == '/' || *p == '\\'))
addslash = 1;
#endif
#else /* defined(MSDOS) ... */
/* Unix etc. */
if(!(*p == '/'))
addslash = 1;
#endif
ent = mem_alloc(sizeof(struct incpath_entry) + strlen(dirname) + addslash);
ent->next = NULL;
strcpy(ent->dirname, dirname);
if(addslash)
strcat(ent->dirname, "/");
if(include_path == NULL)
{
include_path = ent;
inc_path_next = ent;
}
else
{
inc_path_next->next = ent;
inc_path_next = ent;
}
}
void
begin_include(char *fname, int dopath, int err_fatal)
{
struct inc_file *p;
struct incpath_entry *ent;
FILE *fp;
char fnamebuf[512];
int err;
strcpy(fnamebuf, fname);
fp = fopen(fnamebuf, "r");
err = errno;
if(fp == NULL && dopath)
{
for(ent = include_path; ent != NULL; ent = ent->next)
{
int elen = strlen(ent->dirname);
if(elen + strlen(fname) >= sizeof(fnamebuf)-1)
{
warning("Include directory/file name too long");
continue;
}
strcpy(fnamebuf, ent->dirname);
strcpy(fnamebuf+elen, fname);
if((fp = fopen(fnamebuf, "r")) != NULL)
break;
err = errno;
} /*for*/
} /*if*/
if(fp == NULL)
{
if(err_fatal)
{
fatal_error("Can't open '%s': %s", fnamebuf, strerror(err));
}
else
{
error(0, "Can't open include file '%s': %s", fnamebuf, strerror(err));
}
line_buf_ptr = NULL;
tok_char = ' ';
return;
}
p = mem_alloc(sizeof(struct inc_file));
p->type = INC_FILE;
p->v.f.fname = mem_alloc(strlen(fnamebuf)+1);
strcpy(p->v.f.fname, fnamebuf);
p->linenum = 0;
p->cond_nest_count = cond_nest_count;
p->config_flag = 0;
p->v.f.fp = fp;
p->next = current_file;
current_file = p;
line_buf_ptr = NULL;
tok_char = ' ';
}
/*
* Move to previous level of include/macro
*/
void
end_include(void)
{
struct inc_file *p;
struct macro_arg *arg1, *arg2;
int prev_config_flag;
if(current_file != NULL)
{
if(cond_nest_count != current_file->cond_nest_count)
{
error(0, "conditional assembly not terminated by ENDIF");
cond_nest_count = current_file->cond_nest_count;
}
p = current_file->next;
prev_config_flag = current_file->config_flag;
if(current_file->type == INC_FILE)
{
fclose(current_file->v.f.fp);
mem_free(current_file->v.f.fname);
}
else
{ /* free macro arguments */
arg1 = current_file->v.m.args;
while(arg1 != NULL)
{
arg2 = arg1->next;
mem_free(arg1);
arg1 = arg2;
}
}
mem_free(current_file);
current_file = p;
if(current_file != NULL &&
current_file->config_flag == 0 && prev_config_flag != 0)
{
config_done();
}
}
}
/*
* Expand a macro
*/
void
expand_macro(struct symbol *sym)
{
struct inc_file *minc;
struct macro_arg *arg;
char *cp;
int narg;
int parcnt, d_char;
int nch;
write_listing_line(0); /* list the macro call line */
minc = mem_alloc(sizeof(struct inc_file));
minc->type = INC_MACRO;
minc->v.m.sym = sym;
minc->v.m.ml = sym->v.text;
minc->linenum = 0;
minc->cond_nest_count = cond_nest_count;
minc->config_flag = 0;
minc->v.m.args = NULL;
minc->v.m.uniq_id = unique_id_count++;
arg = NULL;
for(narg = 1;;narg++)
{
while(isspace(tok_char)) /* skip whitespace */
read_src_char();
if(tok_char == '\0' || tok_char == ';' || tok_char == EOF)
break;
cp = line_buf_ptr-1;
nch = 0;
/*
* Macro parameters are separated by commas. However, strings and
* character constants (using double and single quotes)
* can be used even if they contain commas. Also commas
* inside parenthesis (such as function parameter delimiters)
* don't count as macro parameter separators.
*
*/
parcnt = 0; /* parenthesis nesting count */
while(!isspace(tok_char) && tok_char != '\0' &&
tok_char != ';' && tok_char != EOF)
{
if(parcnt == 0 && tok_char == ',')
break;
if(tok_char == '(')
{
parcnt++;
}
else if(tok_char == ')')
{
parcnt--;
}
else if(tok_char == '"' || tok_char == '\'')
{
/* quoted string or character constant */
d_char = tok_char;
do
{
read_src_char();
nch++;
} while(tok_char != d_char && tok_char != '\0' &&
tok_char != EOF);
if(tok_char != d_char)
break;
}
read_src_char();
nch++;
}
if(narg >= 10)
warning("Too many macro arguments (max. 9)");
if(arg == NULL)
{
arg = mem_alloc(sizeof(struct macro_arg) + nch);
minc->v.m.args = arg;
}
else
{
arg->next = mem_alloc(sizeof(struct macro_arg) + nch);
arg = arg->next;
}
strncpy(arg->text, cp, nch);
arg->text[nch] = '\0';
arg->next = NULL;
/* skip whitespace */
while(isspace(tok_char))
read_src_char();
if(tok_char != ',')
break;
read_src_char();
}
if(tok_char != ';' && tok_char != '\0' && tok_char != EOF)
error(0, "Extraneous characters after a valid source line");
minc->next = current_file;
current_file = minc;
line_buf_ptr = NULL;
tok_char = ' ';
get_token();
}
/*
* This is used by the new config system
*
* This must not have the new token stuff in the end.
* (see 3 last lines of expand_macro())
*
*/
void
expand_macro_with_args(struct symbol *sym, char *args[], int nargs,
int config_flag)
{
struct inc_file *minc;
struct macro_arg *arg;
int i;
minc = mem_alloc(sizeof(struct inc_file));
minc->type = INC_MACRO;
minc->v.m.sym = sym;
minc->v.m.ml = sym->v.text;
minc->linenum = 0;
minc->cond_nest_count = cond_nest_count;
minc->config_flag = config_flag;
minc->v.m.args = NULL;
minc->v.m.uniq_id = unique_id_count++;
arg = NULL;
for(i = 0; i < nargs; i++)
{
if(arg == NULL)
{
arg = mem_alloc(sizeof(struct macro_arg) + strlen(args[i]));
minc->v.m.args = arg;
}
else
{
arg->next = mem_alloc(sizeof(struct macro_arg) + strlen(args[i]));
arg = arg->next;
}
strcpy(arg->text, args[i]);
arg->next = NULL;
}
minc->next = current_file;
current_file = minc;
}
/*
* Read a character from source file.
* Handles includes and macros.
*/
void
read_src_char(void)
{
char *scp, *pcp, *dcp;
int parm;
struct macro_arg *arg;
static char tmpbuf[12];
if(line_buf_ptr != NULL)
{
tok_char = (unsigned char)(*line_buf_ptr++);
if(tok_char == '\0')
line_buf_ptr = NULL;
return;
}
if(current_file == NULL)
{
tok_char = EOF;
return;
}
getc1:
if(current_file->type == INC_MACRO)
{
if(current_file->v.m.ml == NULL)
{
end_include();
goto getc1;
}
scp = current_file->v.m.ml->text;
dcp = line_buffer;
while(*scp != '\0' && dcp < &line_buffer[sizeof(line_buffer)])
{
if(*scp == '\\')
{
scp++;
if(*scp >= '1' && *scp <= '9')
{ /* macro arg */
parm = *scp - '1'; /* macro arg #, starting from 0 */
for(arg = current_file->v.m.args;
arg != NULL && parm > 0; arg = arg->next, parm--);
if(arg != NULL)
{
for(pcp = arg->text; *pcp != '\0' &&
dcp < &line_buffer[sizeof(line_buffer)];)
*dcp++ = *pcp++;
}
scp++;
}
else if(*scp == '0' || *scp == '@')
{
p_snprintf(tmpbuf, sizeof tmpbuf,
"%03d", current_file->v.m.uniq_id);
for(pcp = tmpbuf; *pcp != '\0' &&
dcp < &line_buffer[sizeof(line_buffer)];)
*dcp++ = *pcp++;
scp++;
}
else if(*scp == '#')
{ /* number of arguments */
for(parm = 0, arg = current_file->v.m.args;
arg != NULL; arg = arg->next, parm++);
p_snprintf(tmpbuf, sizeof tmpbuf,
"%d", parm);
for(pcp = tmpbuf; *pcp != '\0' &&
dcp < &line_buffer[sizeof(line_buffer)];)
*dcp++ = *pcp++;
scp++;
}
else
{
*dcp++ = *scp;
}
} else
{
*dcp++ = *scp++;
}
}
if(dcp == &line_buffer[sizeof(line_buffer)])
{
error(0, "Line buffer overflow");
dcp--;
}
*dcp = '\0'; /* NUL-terminate the line */
current_file->v.m.ml = current_file->v.m.ml->next;
}
else
{
if(fgets(line_buffer, sizeof(line_buffer)-1,
current_file->v.f.fp) == NULL)
{
if(current_file->next != NULL)
{
end_include();
goto getc1;
}
tok_char = EOF;
return;
}
if(line_buffer[0] != '\0')
{
char *p;
p = line_buffer + strlen(line_buffer) - 1;
if(*p == '\n')
*p = '\0';
}
}
current_file->linenum++;
line_buf_ptr = line_buffer;
tok_char = ((unsigned char)(*line_buf_ptr++));
if(tok_char == '\0')
line_buf_ptr = NULL;
}
/*
* Lexical analyzer
* Returns the next token from the source file
*/
void
get_token(void)
{
int tp, base;
char *cp;
/*
* skip spaces
*/
while(isspace(tok_char))
read_src_char();
if(tok_char == EOF)
{
token_type = TOK_EOF;
token_string[0] = '\0';
return;
}
if(tok_char == ';')
{
/* comment */
skip_eol();
token_type = TOK_NEWLINE;
strcpy(token_string, "\\n");
return;
} /* for(;;) */
/*
* character constant (integer)
* (does not currently handle the quote character)
*/
if(tok_char == '\'')
{
read_src_char();
token_string[0] = tok_char;
read_src_char();
if(tok_char != '\'')
goto invalid_token;
read_src_char();
token_string[1] = '\0';
token_int_val = (long)((unsigned char)token_string[0]);
token_type = TOK_INTCONST;
return;
}
if(tok_char == '"')
{ /* string constant (include filename) */
read_src_char();
tp = 0;
while(tp < TOKSIZE-1 && tok_char != '"' && tok_char != EOF)
{
token_string[tp++] = tok_char;
read_src_char();
}
if(tok_char != '\"' && !ifskip_mode)
error(0, "String not terminated");
token_string[tp] = '\0';
read_src_char();
token_type = TOK_STRCONST;
return;
}
/*
* integer number
*/
if(isdigit(tok_char))
{
token_type = TOK_INTCONST;
token_string[0] = tok_char;
tp = 1;
read_src_char();
if(token_string[0] == '0')
{
if(tok_char == 'x' || tok_char == 'X')
{ /* hex number */
token_string[tp++] = tok_char;
read_src_char();
while(tp < TOKSIZE-1 && isxdigit(tok_char))
{
token_string[tp++] = tok_char;
read_src_char();
}
token_string[tp] = '\0';
token_int_val = strtoul(&token_string[2], NULL, 16);
/* should put range check here */
return;
}
}
while(tp < TOKSIZE-2 && isxdigit(tok_char))
{
token_string[tp++] = tok_char;
read_src_char();
}
base = default_radix;
switch(tok_char)
{
case 'H': /* hex */
case 'h':
base = 16; /* hex */
token_string[tp++] = tok_char;
read_src_char();
break;
case 'O': /* octal */
case 'o':
base = 8; /* octal */
token_string[tp++] = tok_char;
read_src_char();
break;
default:
if(token_string[0] == '0' &&
(token_string[1] == 'b' || token_string[1] == 'B'))
{
token_string[tp] = '\0';
token_int_val = strtoul(&token_string[2], &cp, 2);
if(cp != &token_string[tp] && !ifskip_mode)
error(0, "Invalid digit in a number");
/* should put range check here */
return;
}
else if(token_string[tp-1] == 'B' || token_string[tp-1] == 'b')
{
base = 2;
}
else
{
if(token_string[tp-1] != 'D' && token_string[tp-1] != 'd')
token_string[tp++] = '\0';
}
break;
}
token_string[tp] = '\0';
token_int_val = strtoul(token_string, &cp, base);
if(cp != &token_string[tp-1] && !ifskip_mode)
error(0, "Invalid digit in a number");
/* should put range check here */
return;
}
/*
* Handle B'10010100' binary etc.
*/
if((tok_char == 'b' || tok_char == 'B' ||
tok_char == 'd' || tok_char == 'D' ||
tok_char == 'h' || tok_char == 'H' ||
tok_char == 'o' || tok_char == 'O') &&
line_buf_ptr != NULL && *line_buf_ptr == '\'')
{
token_string[0] = tok_char;
read_src_char();
token_string[1] = tok_char;
read_src_char();
tp = 2;
while(tp < TOKSIZE-1 && isxdigit(tok_char))
{
token_string[tp++] = tok_char;
read_src_char();
}
if(tok_char != '\'')
goto invalid_token;
token_string[tp++] = tok_char;
read_src_char();
token_string[tp] = '\0';
switch(token_string[0])
{
case 'b':
case 'B':
base = 2;
break;
case 'o':
case 'O':
base = 8;
break;
case 'h':
case 'H':
base = 16;
break;
case 'd':
case 'D':
default:
base = 10;
break;
}
token_int_val = strtoul(&token_string[2], &cp, base);
if(cp != &token_string[tp-1] && !ifskip_mode)
error(0, "Invalid digit in a number");
/* should put range check here */
token_type = TOK_INTCONST;
return;
}
/*
* keyword or identifier
*/
if(tok_char == '_' || tok_char == '.' || isalpha(tok_char))
{
line_buf_off = (line_buf_ptr - &line_buffer[1]);
token_string[0] = tok_char;
tp = 1;
read_src_char();
if(token_string[0] == '.' &&
tok_char != '_' && !isalnum(tok_char))
{
token_string[1] = '\0';
token_type = TOK_PERIOD;
return;
}
while(tp < TOKSIZE-1 &&
(tok_char == '_' || tok_char == '.' || isalnum(tok_char)))
{
token_string[tp++] = tok_char;
read_src_char();
}
token_string[tp] = '\0';
token_type = FIRST_KW;
cp = Keyword_Table;
while(*cp)
{
if(strcasecmp(token_string, cp) == 0)
return;
while(*cp++)
;
token_type++;
}
token_type = TOK_IDENTIFIER;
return;
}
/*
* non-numeric & non-alpha tokens
*/
switch(tok_char)
{
case '\0':
token_type = TOK_NEWLINE;
strcpy(token_string, "\\n");
skip_eol();
return;
case '<':
token_string[0] = tok_char;
read_src_char();
if(tok_char == '<')
{
token_string[1] = tok_char;
token_string[2] = '\0';
token_type = TOK_LSHIFT;
read_src_char();
return;
}
if(tok_char == '=')
{
token_string[1] = tok_char;
token_string[2] = '\0';
token_type = TOK_LESS_EQ;
read_src_char();
return;
}
if(tok_char == '>')
{
token_string[1] = tok_char;
token_string[2] = '\0';
token_type = TOK_NOT_EQ;
read_src_char();
return;
}
token_type = TOK_LESS;
token_string[1] = '\0';
return;
case '>':
token_string[0] = tok_char;
read_src_char();
if(tok_char == '>')
{
token_string[1] = tok_char;
token_string[2] = '\0';
token_type = TOK_RSHIFT;
read_src_char();
return;
}
if(tok_char == '=')
{
token_string[1] = tok_char;
token_string[2] = '\0';
token_type = TOK_GT_EQ;
read_src_char();
return;
}
token_string[1] = '\0';
token_type = TOK_GREATER;
return;
case '!':
token_string[0] = tok_char;
read_src_char();
if(tok_char != '=')
goto invalid_token;
token_string[1] = tok_char;
token_string[2] = '\0';
read_src_char();
token_type = TOK_NOT_EQ;
return;
case '=':
token_string[0] = tok_char;
read_src_char();
if(tok_char == '=')
{
token_string[1] = tok_char;
read_src_char();
token_string[2] = '\0';
token_type = TOK_EQ;
return;
}
if(tok_char == '<')
{
token_string[1] = tok_char;
read_src_char();
token_string[2] = '\0';
token_type = TOK_LESS_EQ;
return;
}
if(tok_char == '>')
{
token_string[1] = tok_char;
read_src_char();
token_string[2] = '\0';
token_type = TOK_GT_EQ;
return;
}
if(tok_char == '_' || tok_char == '.' ||
isalpha(tok_char))
{ /* local symbol */
line_buf_off = (line_buf_ptr - &line_buffer[2]);
token_string[0] = tok_char;
tp = 1;
read_src_char();
while(tp < TOKSIZE-1 &&
(tok_char == '_' || tok_char == '.' || isalnum(tok_char)))
{
token_string[tp++] = tok_char;
read_src_char();
}
token_string[tp] = '\0';
token_type = TOK_LOCAL_ID;
return;
}
token_string[1] = '\0';
token_type = TOK_EQUAL;
return;
case '$':
read_src_char();
if(!isxdigit(tok_char))
{
token_string[0] = '$';
token_string[1] = '\0';
token_type = TOK_DOLLAR;
return;
}
tp = 0;
do
{
token_string[tp++] = tok_char;
read_src_char();
} while(tp < TOKSIZE-1 && isxdigit(tok_char));
token_string[tp] = '\0';
token_int_val = strtoul(&token_string[0], NULL, 16);
token_type = TOK_INTCONST;
/* should put range check here */
return;
/*
* "#include" and "include" both generate KW_INCLUDE
*/
case '#':
if(strncasecmp(line_buf_ptr, "include", 7) != 0)
goto invalid_token;
strcpy(token_string, "#include");
line_buf_ptr += 7;
tok_char = (unsigned char)(*line_buf_ptr);
if(*line_buf_ptr == '\0')
{
line_buf_ptr = NULL;
tok_char = ' ';
}
token_type = KW_INCLUDE;
return;
case '\\':
token_type = TOK_BACKSLASH;
break;
case ',':
token_type = TOK_COMMA;
break;
case '(':
token_type = TOK_LEFTPAR;
break;
case ')':
token_type = TOK_RIGHTPAR;
break;
case '+':
token_type = TOK_PLUS;
break;
case '-':
token_type = TOK_MINUS;
break;
case '&':
token_type = TOK_BITAND;
break;
case '|':
token_type = TOK_BITOR;
break;
case '^':
token_type = TOK_BITXOR;
break;
case '~':
token_type = TOK_BITNOT;
break;
case '*':
token_type = TOK_ASTERISK;
break;
case '/':
token_type = TOK_SLASH;
break;
case '%':
token_type = TOK_PERCENT;
break;
case ':':
token_type = TOK_COLON;
break;
case '[':
token_type = TOK_LEFTBRAK;
break;
case ']':
token_type = TOK_RIGHTBRAK;
break;
default:
goto invalid_token;
}
token_string[0] = tok_char;
token_string[1] = '\0';
read_src_char();
return;
invalid_token:
if(!ifskip_mode)
error(0, "Invalid token");
token_string[0] = '\0';
token_type = TOK_INVALID;
}
/* skip to the next line */
void
skip_eol(void)
{
line_buf_ptr = NULL;
tok_char = ' ';
}
syntax highlighted by Code2HTML, v. 0.9.1