#!/usr/bin/env python # ------------------------------------------------------------ # fractlexer.py # # tokenizer for UltraFractal formula files # ------------------------------------------------------------ import lex import sys import re import string # set to True to pass through all tokens. This breaks the parser but # is useful for pretty-printing keep_all = False # List of token names. This is always required tokens = ( 'NUMBER', 'COMPLEX', 'ID', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'LPAREN', 'RPAREN', 'LARRAY', 'RARRAY', 'MAG', 'POWER', 'BOOL_NEG', 'BOOL_OR', 'BOOL_AND', 'EQ', 'NEQ', 'LT', 'LTE', 'GT', 'GTE', 'ASSIGN', 'COMMENT', 'NEWLINE', 'ESCAPED_NL', 'COMMA', 'STRING', 'COMMENT_FORMULA', 'FORM_ID', 'FORM_END', 'SECT_SET', 'SECT_PARMS', 'SECT_STM', # keywords 'ELSE', 'ELSEIF', 'ENDFUNC', 'ENDHEADING', 'ENDIF', 'ENDPARAM', 'ENDWHILE', 'FUNC', 'HEADING', 'IF', 'PARAM', 'REPEAT', 'UNTIL', 'WHILE', 'TYPE', 'CONST' ) # lookup table to convert IDs into keywords keywords = [ "else", "elseif", "endfunc", "endheading", "endif", "endparam", "endwhile", "func", "heading", "if", "param", "repeat", "until", "while"] types = ["bool", "color", "complex", "float", "hyper", "grad", "int"] consts = ["true", "false", "yes", "no"] lookup = {} for k in keywords: lookup[k] = string.upper(k) for t in types: lookup[t] = "TYPE" for c in consts: lookup[c] = "CONST" # Regular expression rules for simple tokens t_PLUS = r'\+' t_MINUS = r'-' t_TIMES = r'\*' t_DIVIDE = r'/' t_MOD = r'%' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LARRAY = r'\[' t_RARRAY = r'\]' t_MAG = r'\|' t_POWER = r'\^' t_BOOL_NEG= r'!' t_BOOL_OR = r'\|\|' t_BOOL_AND= r'&&' t_EQ = r'==' t_NEQ = r'!=' t_LT = r'<' t_LTE = r'<=' t_GT = r'>' t_GTE = r'>=' t_ASSIGN = r'=' t_COMMA = r',' t_FORM_END= r'\}' # handle stupid "comment" formula blocks specially # match ; and Comment because some uf repository files do this def t_COMMENT_FORMULA(t): r';?[Cc]omment\s*{[^}]*}' newlines = re.findall(r'\n',t.value) t.lineno += len(newlines) pass # may seem weird, but this includes the starting { # this is to ensure that the generous pattern match doesn't # trigger all the time mid-formula (eg, z = "z^2 + c" is a valid formid) def t_FORM_ID(t): r'[^\r\n;"\{]+{' # remove trailing whitespace and { if not keep_all: t.value = re.sub("\s*{$", "", t.value) return t def t_NUMBER(t): r'(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?i?' if t.value[-1]=="i": # a complex constant t.value = t.value[0:-1] t.type = "COMPLEX" return t # these have to be functions to give them higher precedence than ID # the gnarly regexp syntax is so we are case-insensitive (ick) # and don't match things like "x = pixel:" # default, switch, builtin def t_SECT_SET(t): r'(([Dd][Ee][Ff][Aa][Uu][Ll][Tt])|([Ss][Ww][Ii][Tt][Cc][Hh])|([Bb][Uu][Ii][Ll][Tt][Ii][Nn])):' if not keep_all: t.value = re.sub(":$","",t.value) return t # a section containing parameter settings, as found in .ugr and .upr files # gradient, fractal, layer, mapping, formula, inside, outside, alpha, opacity def t_SECT_PARMS(t): r'(([Gg][Rr][Aa][Dd][Ii][Ee][Nn][Tt])|([Ff][Rr][Aa][Cc][Tt][Aa][Ll])|([Ll][Aa][Yy][Ee][Rr])|([Mm][Aa][Pp][Pp][Ii][Nn][Gg])|([Ff][Oo][Rr][Mm][Uu][Ll][Aa])|([Ii][Nn][Ss][Ii][Dd][Ee])|([Oo][Uu][Tt][Ss][Ii][Dd][Ee])|([Aa][Ll][Pp][Hh][Aa])|([Oo][Pp][Aa][Cc][Ii][Tt][Yy])):' if not keep_all: t.value = re.sub(":$","",t.value) return t # global, transform, init, loop, final, bailout def t_SECT_STM(t): r'(([Gg][Ll][Oo][Bb][Aa][Ll])|([Tt][Rr][Aa][Nn][Ss][Ff][Oo][Rr][Mm])|([Ii][Nn][Ii][Tt])|([Ll][Oo][Oo][Pp])|([Ff][Ii][Nn][Aa][Ll])|([Bb][Aa][Ii][Ll][Oo][Uu][Tt]))?:' if not keep_all: t.value = re.sub(":$","",t.value) return t def t_ID(t): r'[@#]?[a-zA-Z_][a-zA-Z0-9_]*' global lookup lookfor = string.lower(t.value) # case insensitive lookup if lookup.has_key(lookfor): t.type = lookup[lookfor] return t # don't produce tokens for newlines preceded by \ def t_ESCAPED_NL(t): r'\\\r?\s*\n' t.lineno += 1 def t_COMMENT(t): r';[^\n]*' if keep_all: return t def t_NEWLINE(t): r'\r*\n' t.lineno += 1 # track line numbers return t def t_STRING(t): r'"[^"]*"' # embedded quotes not supported in UF? if not keep_all: t.value = re.sub(r'(^")|("$)',"",t.value) # remove trailing and leading " newlines = re.findall(r'\n',t.value) t.lineno += len(newlines) t.value = re.sub(r'\\\r?\n[ \t\v]*',"",t.value) # hide \-split lines return t # A string containing ignored characters (spaces and tabs) t_ignore = ' \t\r' # Error handling rule def t_error(t): #print "Illegal character '%s' on line %d" % (t.value[0], t.lineno) t.value = t.value[0] t.skip(1) return t # Build the lexer lexer = lex.lex(optimize=1) def get_lexer(): global lexer lexer = lex.lex(optimize=1) return lexer # debugging if __name__ == '__main__': #pragma: no cover # Test it out data = open(sys.argv[1],"r").read() # Give the lexer some input lex.input(data) # Tokenize while 1: tok = lex.token() if not tok: break # No more input print tok