eumandy/tools/WEE/parser.e
2016-11-25 00:27:22 -07:00

2960 lines
86 KiB
Plaintext

-- parser.e
--
-- Copyright (c) 2015 Pete Eberlein
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in
-- all copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-- THE SOFTWARE.
-- todo:
-- OE4 while "with entry" and "label"
-- OE4 loop until
-- OE4 labels and goto
-- get_declarations with namespace shouldn't include local symbols
include std/filesys.e
include std/os.e
include std/text.e
include std/map.e
constant
OE4 = 1, -- enable OpenEuphoria 4 syntax
ERROR_ABORT = 0, -- enable abort on errors?
ERROR_PRINT = 0 -- enable printing errors?
sequence defined_words
defined_words = {"OE", "OE4"}
if platform() = WIN32 then
defined_words &= {"WINDOWS", "WIN32"}
elsif platform() = LINUX then
defined_words &= {"LINUX", "UNIX"}
elsif platform() = OSX then
defined_words &= {"OSX", "UNIX"}
end if
ifdef WINDOWS then
-- don't try to open these as files
constant dos_devices = {"CON","PRN","AUX","CLOCK$","NUL","COM1","LPT1","LPT2","LPT3","COM2","COM3","COM4"}
end ifdef
-- where to search for standard include files
global sequence include_search_paths
include_search_paths = include_paths(0)
if sequence(getenv("EUDIR")) then
include_search_paths = append(include_search_paths, getenv("EUDIR") & SLASH & "include")
end if
-- ast node types that are also opcodes
global constant
END = 0,
LOAD = 1,
LOADHI = 2,
MOV = 3,
ADD = 4,
ADDU8 = 5,
MUL = 6,
DIV = 7,
REM = 8,
JL = 9,
JLE = 10,
JE = 11,
JNE = 12,
JMP = 13,
EQ = 14,
NEQ = 15,
LT = 16,
GTE = 17,
LTE = 18,
GT = 19,
QPRINT = 20,
SUB = 21,
SUBU8 = 22,
NOT = 23,
NEG = 24,
AND = 25,
OR = 26,
XOR = 27,
CAT = 28 -- {CAT, expr, expr}
global constant
DECL_ATOM = 1,
DECL_CONSTANT = 2,
DECL_ENUM = 3,
DECL_FUNCTION = 4,
DECL_INTEGER = 5,
DECL_NAMESPACE = 6,
DECL_OBJECT = 7,
DECL_PROCEDURE = 8,
DECL_SEQUENCE = 9,
DECL_TYPE = 10
-- ast node types that are not opcodes
global constant
VAR_DECL = 256, -- {VAR_DECL, "type", pos, {"name", pos, scope-start, [expr]}...}
ASSIGN = 257, -- {ASSIGN, "name", pos, expr}
FUNC = 258, -- {FUNC, "name", pos, [args...]}
PROC = 259, -- {PROC, "name", pos, [args...]}
VARIABLE = 260, -- {VARIABLE, "name", pos}
SUBSCRIPT = 261, -- {SUBSCRIPT, expr, index-expr}
SLICE = 262, -- {SLICE, expr, start-expr, end-expr}
CONST_DECL = 263, -- {CONST_DECL, {"name", pos, scope-start, expr}... }
RETURN = 265, -- {RETURN, [expr]}
EXIT = 266, -- {EXIT}
SEQ = 267, -- {SEQ, [expr,]...}
NUMBER = 268,
WHILE = 269, -- {WHILE, expr, scope-start, scope-end, stmts...}
IF = 270, -- {IF, expr, {scope-start, scope-end, stmts...},
-- [expr, {scope-start, scope-end, elsif-stmts...},]...
-- [{scope-start, scope-end, else-stmts...}]}
ELSE = 271,
FOR = 272, -- {FOR, name, pos, expr, expr, by, scope-start, scope-end, stmts...}
FUNC_DECL = 273, -- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
PROC_DECL = 274, -- {PROC_DECL, ...}
TYPE_DECL = 275, -- {TYPE_DECL, ...}
SEQ_ASSIGN = 276, -- {SEQ_ASSIGN, ["name1", pos1,]... expr}
ADDTO = 277, -- {ADDTO, "name", pos, expr}
SUBTO = 278,
MULTO = 279,
DIVTO = 280,
CATTO = 281,
STRING = 282, -- {STRING, "string-literal"}
SUB_ASSIGN = 283, -- {SUB_ASSIGN, "name", pos, index-expr..., expr}
SUB_ADDTO = 284,
SUB_SUBTO = 285,
SUB_MULTO = 286,
SUB_DIVTO = 287,
SUB_CATTO = 288,
SLICE_ASSIGN = 289, -- {SLICE_ASSIGN, "name", pos, index-expr..., start-expr, end-expr, expr}
SLICE_ADDTO = 290,
SLICE_SUBTO = 291,
SLICE_MULTO = 292,
SLICE_DIVTO = 293,
SLICE_CATTO = 294,
SEQ_LEN = 295, -- {SEQ_LEN}, shorthand for length of enclosing sequence in SUBSCRIPT, SLICE, SUB_*, SLICE_*
ENUM_DECL = 296, -- {ENUM_DECL, "typename"|"", pos, '+'|'-'|'*'|'/', expr,
-- {"name", pos, scope-start, [expr]}...}
INCLUDE = 297, -- {INCLUDE, includes-idx, scope-start, ["namespace"]}
GLOBAL = 298, -- {GLOBAL, decl...}
PUBLIC = 299, -- {PUBLIC, decl...}
EXPORT = 300, -- {EXPORT, decl...}
NAMESPACE = 301, -- {NAMESPACE, "name"}
IFDEF = 302, -- same layout as IF
ELSEDEF = 303,
SWITCH = 304, -- {SWITCH, expr, bool-fallthru, label-string,
-- [{case-values...}, {scope-start, scope-end, stmts...},]... }
-- ("case else" will have case-values={} )
BREAK = 305, -- {BREAK, [label-string]}
CONTINUE = 306, -- {CONTINUE, [label-string]}
DEFAULT = 307, -- {DEFAULT}, used for default arguments in subroutine calls
ENTRY = 308, -- {ENTRY}, used with while loops
RETRY = 309, -- {RETRY}, used with while loops
LABEL = 310, -- {LABEL, "label"}
GOTO = 311, -- {GOTO, "label"}
ELSIF = 312, -- only for lookup table
ELSIFDEF = 313, -- only for lookup table
CASE = 314, -- {CASE, prev-scope-end, scope-start, [values, ...]} (case else will have no values)
KEYWORD = 315,
WITH = 316,
WITHOUT = 317,
SYNTAX_ERROR = 318, -- {SYNTAX_ERROR, pos, len, "message"}
LOOP = 319, -- {LOOP, scope-start, scope-end, stmts...}
UNTIL = 320 -- {UNTIL, expr}
-- keep a copy of parsed files, reparsing if timestamp changes
sequence cache -- { {"path", timestamp, stmts...} ...}
cache = {}
-- a sequence of maps makes identifier lookups fast
sequence maps -- map "decl-name" -> { locator... }
maps = {}
-- functions, procedures, types, enum types, namespace, include-as:
-- locator = ast-index
-- variables, constants, enum values:
-- locator = { ast-index, pos, scope-start, [scope-end] }
-- returns a if test is true, otherwise b
function choose(integer test, object a, object b)
if test then
return a
end if
return b
end function
-- use lookup table for faster keyword checking
map:map lookup_table = map:new_from_kvpairs({
{"?", QPRINT}, {"&", CAT}, {"+", ADD}, {"-",SUB}, {"*", MUL}, {"/", DIV},
{"=", EQ}, {"!=", NEQ}, {"<", LT}, {"<=", LTE}, {">", GT}, {">=", GTE},
{"not", NOT}, {"or", OR}, {"and", AND}, {"xor", XOR},
{"global", GLOBAL}, {"constant", CONST_DECL}, {"return", RETURN},
{"while", WHILE}, {"if", IF}, {"else", ELSE}, {"elsif", ELSIF}, {"end", END},
{"for", FOR}, {"to", KEYWORD}, {"by", KEYWORD}, {"do", KEYWORD}, {"exit", EXIT},
{"function", FUNC_DECL}, {"procedure", PROC_DECL}, {"type", TYPE_DECL},
{"+=", ADDTO}, {"-=", SUBTO}, {"*=", MULTO}, {"/=", DIVTO}, {"&=", CATTO},
{"\"", '"'}, {"$", '$'}, {"'", '\''}, {"(", '('}, {"{", '{'},
{"include", INCLUDE}, {"with", WITH}, {"without", WITHOUT}
} & choose(OE4, {
{"namespace", NAMESPACE}, {"public", PUBLIC}, {"export", EXPORT},
{"ifdef", IFDEF}, {"elsifdef", ELSIFDEF}, {"elsedef", ELSEDEF},
{"switch", SWITCH}, {"case", CASE}, {"break", BREAK},
{"continue", CONTINUE}, {"as", KEYWORD}, {"enum", ENUM_DECL},
{"default", DEFAULT}, {"entry", ENTRY}, {"retry", RETRY}, {"label", LABEL},
{"goto", GOTO}, {"routine", KEYWORD}, {"fallthru", KEYWORD}, {"`", '`'}
},{}))
-- returns text from file, else -1
function read_file(sequence file_name)
integer f
object line
sequence text
ifdef WINDOWS then
if find(upper(filename(file_name)), dos_devices) then
return -1
end if
end ifdef
f = open(file_name, "rb")
if f = -1 then
return -1
end if
line = gets(f)
text = {}
while sequence(line) do
text &= line
line = gets(f)
end while
close(f)
return text
end function
sequence text, source_filename, tok, errors
text = ""
source_filename = "<none>"
tok = ""
errors = {} -- { {SYNTAX_ERROR, pos, len, msg}... }
integer idx, tok_idx, ifdef_ok, ast_idx
idx = 1
tok_idx = 1
ifdef_ok = 1
ast_idx = 1 -- current top-level ast index used for declaring stuff
map:map cur_map -- the current maps[] during parsing
-- declare name, with name = "name" or {"name", pos, scope-start, [scope-end]}
procedure declare(sequence name)
object loc = ast_idx
if length(name) = 0 or length(name[1]) = 0 then
return
end if
if sequence(name[1]) then
loc = name
-- if length(value) < 3 or
-- not sequence(value[1]) or
-- not atom(value[2]) or
-- not atom(value[3]) or
-- (length(value) >= 4 and (not atom(value[4]) or value[4] <= value[3])) then
-- puts(1, "invalid declaration\n")
-- pretty_print(1, value)
-- return
-- end if
name = name[1]
loc[1] = ast_idx
end if
map:put(cur_map, name, append(map:get(cur_map, name, {}), loc))
end procedure
function prefixed(sequence s)
return s[1] = GLOBAL or s[1] = PUBLIC or s[1] = EXPORT
end function
-- update the declarations in this ast to the end of scope
procedure declare_ast(sequence ast, integer start_idx, integer scope_end, integer top = 0)
for j = start_idx to length(ast) do
sequence s = ast[j]
integer n = prefixed(s)
integer decl = s[n+1]
if top then
ast_idx = j
if decl = NAMESPACE then
-- {NAMESPACE, "name"}
declare(s[n+2])
elsif decl = INCLUDE and n+4 <= length(s) then
-- {INCLUDE, includes-idx, scope-start, ["namespace"]}
declare(s[n+4]) -- include as
elsif decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
declare(s[n+2])
sequence args = s[n+4]
integer sub_scope_end = s[n+6]
for i = 1 to length(args) do
declare(args[i][2..4] & sub_scope_end)
end for
declare_ast(s, n+7, sub_scope_end)
elsif decl = CONST_DECL then
-- {CONST_DECL, {"name", pos, scope-start, expr}... }
for i = n+2 to length(s) do
declare(s[i][1..3] & scope_end)
end for
elsif decl = ENUM_DECL then
-- {ENUM_DECL, "typename"|"", pos, '+'|'-'|'*'|'/', expr,
-- {"name", pos, scope-start, [expr]}...}
if length(s[n+2]) then
declare(s[n+2])
end if
for i = n+6 to length(s) do
declare(s[i][1..3] & scope_end)
end for
elsif decl = NAMESPACE then
declare({s[1],1,1})
end if
end if
if decl = VAR_DECL then
-- {VAR_DECL, "type", pos, {"name", pos, scope-start, [expr]}...}
for i = n+4 to length(s) do
declare(s[i][1..3] & scope_end)
end for
elsif decl = WHILE then
-- {WHILE, expr, scope-start, scope-end, stmts...}
declare_ast(s, n+5, s[n+4])
elsif decl = IF then
-- {IF, expr, {scope-start, scope-end, stmts...},
-- [expr, {scope-start, scope-end, elsif-stmts...},]...
-- [{scope-start, scope-end, else-stmts...}]}
for i = n+2 to length(s) by 2 do
if i = length(s) then
declare_ast(s[i], 3, s[i][2])
else
declare_ast(s[i+1], 3, s[i+1][2])
end if
end for
elsif decl = FOR then
-- {FOR, "name", pos, expr, expr, by, scope-start, scope-end, stmts...}
declare(s[n+2..n+3] & s[n+7..n+8])
declare_ast(s, n+9, s[n+8])
elsif decl = SWITCH then
-- {SWITCH, expr, bool-fallthru, label-string,
-- [{case-values...}, {scope-start, scope-end, stmts...},]... }
-- ("case else" will have case-values={} )
for i = n+6 to length(s) by 2 do
declare_ast(s[i], 3, s[i][2])
end for
elsif decl = LOOP then
-- {LOOP, scope-start, scope-end, stmts...}
declare_ast(s, n+4, s[n+3])
end if
end for
end procedure
-- prints an error message indicated at tok_idx
procedure error(sequence msg)
integer line, start
if length(tok) then
errors = append(errors, {SYNTAX_ERROR, tok_idx, length(tok), msg})
end if
if not ERROR_PRINT then return end if
line = 1
start = 1
if tok_idx > length(text) then
tok_idx = length(text)
end if
for i = 1 to tok_idx do
if text[i] = '\n' then
line += 1
start = i+1
end if
end for
for i = tok_idx to length(text) do
if text[i] = '\n' then
--? {start, idx, tok_idx}
printf(2, "%s:%d\n%s\n%s%s\n", {
source_filename,
line,
msg,
text[start..i],
repeat(' ', tok_idx - start) & '^'})
if ERROR_ABORT then abort(1) end if
--wait_key()
return
end if
end for
puts(2, "unexpected end of file\n")
if ERROR_ABORT then abort(1) end if
end procedure
constant ALPHA = 1, NUM = 2, HEX = 4, WS = 8, PUNCT = 16, ALPHANUM = 32
sequence chars = repeat(0, 255)
chars['A'..'Z'] = ALPHA + ALPHANUM
chars['a'..'z'] = ALPHA + ALPHANUM
chars['_'] = ALPHA + choose(OE4, NUM + HEX + ALPHANUM, 0)
chars['0'..'9'] = NUM + HEX + ALPHANUM
chars['A'..'F'] += HEX
chars['a'..'f'] += choose(OE4, HEX, 0)
chars[':'] = choose(OE4, ALPHANUM, 0)
chars['\n'] = WS
chars['\t'] = WS
chars['\r'] = WS
chars[' '] = WS
chars['<'] = PUNCT
chars['>'] = PUNCT
chars['+'] = PUNCT
chars['-'] = PUNCT
chars['*'] = PUNCT
chars['/'] = PUNCT
chars['&'] = PUNCT
chars['!'] = PUNCT
procedure skip_hashbang()
integer c = 0
if length(text) >= 2 and text[1] = '#' and text[2] = '!' then
-- skip special comment for shell interpreter
while not find(c, "\r\n") do
idx += 1
if idx > length(text) then return end if
c = text[idx]
end while
end if
end procedure
procedure skip_whitespace()
integer c
if idx > length(text) then return end if
c = text[idx]
while find(c, "\t\r\n -") or (OE4 and c='/') do
if c = '-' then
if idx >= length(text) or text[idx+1] != '-' then
exit
end if
-- skip comment
while not find(c, "\r\n") do
idx += 1
if idx > length(text) then return end if
c = text[idx]
end while
elsif OE4 and c = '/' then
if idx >= length(text) or text[idx+1] != '*' then
return
end if
idx += 2
-- skip multiline comment
while idx <= length(text) and not equal(text[idx-1..idx], "*/") do
idx += 1
end while
end if
idx += 1
if idx > length(text) then return end if
c = text[idx]
end while
end procedure
function isalpha(integer c)
return and_bits(chars[c], ALPHA)
--return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (OE4 and c = '_')
end function
function isnum(integer c)
return and_bits(chars[c], NUM)
--return (c >= '0' and c <= '9') or (OE4 and c = '_')
end function
function isalphanum(integer c)
return and_bits(chars[c], ALPHANUM)
--return isalpha(c) or isnum(c) or c = '_' or (OE4 and c = ':')
end function
function ishex(integer c)
return and_bits(chars[c], HEX)
--return isnum(c) or (c >= 'A' and c <= 'F')
end function
function ispunct(integer c)
return and_bits(chars[c], PUNCT)
end function
procedure num_token()
-- parse new hex/binary/octal format
if OE4 and idx <= length(text) and text[tok_idx] = '0' and
idx = tok_idx+1 and find(text[idx], "xXbBoO") then
idx += 1
while idx <= length(text) and ishex(text[idx]) do
idx += 1
end while
return
end if
-- parse digits if not starting with '.'
if text[tok_idx] != '.' then
while idx <= length(text) and isnum(text[idx]) do
idx += 1
end while
if idx < length(text) and text[idx] = '.' and text[idx+1] != '.' then
idx += 1
end if
end if
-- parse fractional digits
while idx <= length(text) and isnum(text[idx]) do
idx += 1
end while
-- parse exponent
if idx <= length(text) and (text[idx] = 'e' or text[idx] = 'E') then
idx += 1
while idx <= length(text) and isnum(text[idx]) do
idx += 1
end while
end if
end procedure
function token(sequence try)
if length(tok) = 0 then
skip_whitespace()
if idx > length(text) then
return equal(tok, try)
end if
tok_idx = idx
idx += 1
switch text[tok_idx] do
case 'a','b','c','d','e','f','g','h','i','j','k','l','m','n',
'o','p','q','r','s','t','u','v','w','x','y','z','_',
'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U','V','W','X','Y','Z' then
while idx <= length(text) and (isalphanum(text[idx]) or (OE4 and text[idx] = ':')) do
idx += 1
end while
case '0','1','2','3','4','5','6','7','8','9' then
num_token()
case '.' then
if idx <= length(text) and text[idx] = '.' then
idx += 1
else
num_token()
end if
case '+','-','*','/','&','<','>','!' then
if idx <= length(text) and text[idx] = '=' then
idx += 1
end if
case '#' then
while idx <= length(text) and ishex(text[idx]) do
idx += 1
end while
end switch
tok = text[tok_idx..idx-1]
-- printf(1, "token: %s\n", {tok})
end if
if equal(tok, try) then
tok = ""
return 1
end if
return 0
end function
procedure expect(sequence try)
if not token(try) then
error("expected '" & try & "', not '"&tok&"'")
tok = ""
end if
end procedure
function identifier()
if length(tok) = 0 then
if token("") then
return 0
end if
end if
-- note: namespace is the only keyword than can also be used as an identifier
return isalpha(tok[1]) and map:get(lookup_table, tok, NAMESPACE) = NAMESPACE
end function
function get_token()
sequence result
if length(tok) = 0 then
token("")
end if
result = tok
tok = ""
return result
end function
function escape_character(integer c)
integer i
sequence s
i = find(c, "trn\\\'\"")
if i = 0 then
error("unknown escape character")
return c
end if
s = "\t\r\n\\\'\""
return s[i]
end function
function string_literal()
sequence s
s = ""
-- check for triple-quoted string
if idx+1 <= length(text) and text[idx] = '"' and text[idx+1] = '"' then
-- triple-quoted string
integer start_idx = idx+2
idx += 2
while text[idx] != '"' or text[idx+1] != '"' or text[idx+2] != '"' do
idx += 1
if idx+2 > length(text) then
error("unexpected end of file")
exit
end if
end while
idx += 3
return text[start_idx..idx-4]
end if
while idx <= length(text) and text[idx] != '"' do
if text[idx] = '\n' or text[idx] = '\r' then
error("unterminated string literal")
return s
end if
if text[idx] = '\\' then
idx += 1
if idx <= length(text) then
s &= escape_character(text[idx])
end if
else
s &= text[idx]
end if
idx += 1
if idx > length(text) then
error("unexpected end of file")
return s
end if
end while
idx += 1
return s
end function
function multiline_string_literal()
integer start_idx
start_idx = idx
while text[idx] != '`' do
idx += 1
if idx > length(text) then
error("unexpected end of file")
return text[start_idx..idx-1]
end if
end while
idx += 1
return text[start_idx..idx-2]
end function
function character_literal()
integer c = 0
if idx <= length(text) then
c = text[idx]
if c = '\n' or c = '\r' then
error("unterminated character literal")
end if
idx += 1
if c = '\\' and idx <= length(text) then
c = escape_character(text[idx])
idx += 1
end if
end if
if idx > length(text) then
error("unexpected end of file")
return c
end if
if text[idx] != '\'' then
tok_idx = idx
error("expected '''")
end if
idx += 1
return c
end function
-- returns a bare or quoted filename following an include statement
-- when quoted, backslashes must be escaped
function include_filename()
sequence s
skip_whitespace()
if idx <= length(text) and text[idx] = '\"' then
idx += 1
tok_idx = idx
return string_literal()
end if
s = ""
tok_idx = idx
while idx <= length(text) and not find(text[idx], "\t\r\n ") do
s &= text[idx]
idx += 1
end while
return s
end function
-- returns a sequence of paths from the eu.cfg
-- name is path to eu.cfg file, mode can be "interpret", "translate", "bind"
global function parse_eu_cfg(sequence name, sequence mode = "interpret")
integer fd, section_ok = 1
object line
sequence result = {}
sequence allowed_sections = {"[all]", "["&mode&"]"}
ifdef WINDOWS then
allowed_sections &= {"[windows]", "["&mode&":windows]"}
end ifdef
ifdef UNIX then
allowed_sections &= {"[unix]", "["&mode&":unix]"}
end ifdef
fd = open(name, "r")
if fd = -1 then return {} end if
line = gets(fd)
while sequence(line) do
line = trim(line)
if length(line) then
if line[1] = '-' then
-- comment or compiler option, ignore it
elsif line[1] = '[' then
-- section
section_ok = find(line, allowed_sections)
elsif section_ok then
result = append(result, line)
end if
end if
line = gets(fd)
end while
close(fd)
return result
end function
-- returns a unique timestamp for filename, or -1 if doesn't exist or is a directory
global function get_timestamp(sequence file_name)
object info
ifdef WINDOWS then
if find(upper(filename(file_name)), dos_devices) then
return -1
end if
end ifdef
info = dir(file_name)
if atom(info) or length(info) = 0 or length(info[1]) < 9 then
return -1
end if
info = info[1]
if find('d', info[D_ATTRIBUTES]) or find('D', info[D_ATTRIBUTES]) then
return -1 -- don't work on directories or devices
end if
-- timestamp is contrived (unlike seconds since epoch)
-- just needs to be unique so we can tell if a file was changed.
-- there will be gaps since not all months have 31 days.
return info[D_SECOND] + 60 * (
info[D_MINUTE] + 60 * (
info[D_HOUR] + 24 * (
info[D_DAY] + 31 * (
info[D_MONTH] + 12 *
info[D_YEAR]))))
end function
-- returns index of new/existing cache entry
function cache_entry(sequence filename)
object tmp
integer f
-- find an existing entry
for i = 1 to length(cache) do
if equal(cache[i][1], filename) then
return i
end if
end for
-- create new cache and map entries
cache = append(cache, {filename, 0})
maps = append(maps, map:new())
return length(cache)
end function
-- returns -1 if not found, or index of cache entry
function include_file(sequence filename)
sequence state, tmp, paths
atom ts = -1
integer f
tmp = filename
ts = get_timestamp(tmp)
if ts = -1 and not equal(source_filename, "<none>") then
-- checks for the include file in the same directory as the parent
tmp = dirname(source_filename) & SLASH & tmp
ts = get_timestamp(tmp)
--printf(1, "%s %d\n", {tmp, ts})
if ts = -1 then
-- search for a eu.cfg in the same directory as the parent?
tmp = dirname(source_filename) & SLASH & "eu.cfg"
paths = parse_eu_cfg(tmp)
for i = 1 to length(paths) do
tmp = paths[i]
if tmp[$] != SLASH then
tmp &= SLASH
end if
tmp &= filename
ts = get_timestamp(tmp)
if ts != -1 then
exit
end if
end for
end if
end if
if ts = -1 then
-- search standard include paths (of the editor interpreter instance)
paths = include_search_paths
-- try EUDIR too
for i = 1 to length(paths) do
tmp = paths[i]
if tmp[$] != SLASH then
tmp &= SLASH
end if
tmp &= filename
ts = get_timestamp(tmp)
--printf(1, "%s %d\n", {tmp, ts})
if ts != -1 then
exit
end if
end for
end if
if ts = -1 then
return -1 -- file not found
end if
filename = canonical_path(tmp, 0, CORRECT)
-- verify that the file can be opened
f = open(filename, "rb")
if f = -1 then
puts(2, "Warning: unable to read file: "&filename&"\n")
return -1 -- unable to read file
end if
close(f)
--printf(1, "%s %d\n", {tmp, ts})
return cache_entry(filename)
end function
function to_number(sequence s)
atom x, y
integer base, start
x = 0
y = 0
base = 10
start = 1
if s[1] = '#' then
base = 16
start = 2
elsif OE4 and length(s) >= 2 and s[1] = '0' then
if s[2] = 'b' or s[2] = 'B' then
base = 2
start = 2
elsif s[2] = 'x' or s[2] = 'X' then
base = 16
start = 2
elsif s[2] = 'o' or s[2] = 'O' then
base = 8
start = 2
end if
end if
for i = start to length(s) do
if s[i] = '.' then
y = 1
elsif s[i] != '_' then
if y = 0 then
x = x * base + (s[i] - '0')
if s[i] >= 'A' then
x += (10 - 'A')
end if
else
-- fractional part
y /= base
x += y * (s[i] - '0')
-- FIXME, this is probably not accurate
if s[i] >= 'A' then
x += y * (10 - 'A')
end if
end if
end if
end for
return x
end function
function variable_or_function_call()
sequence e = {VARIABLE, get_token(), tok_idx}
--printf(1, "identifier %s\n", {e[2]})
if token("(") then
-- function call
e[1] = FUNC
if not token(")") then
--printf(1, "function call %s\n", {e[2]})
integer ok = 1
while ok do
if OE4 and token(",") then
e = append(e, {DEFAULT})
else
e = append(e, expr(1))
ok = token(",")
end if
end while
expect(")")
end if
else
while token("[") do
e = {SUBSCRIPT, e, expr(1)}
if token("..") then
e[1] = SLICE
e = append(e, expr(1))
expect("]")
exit
end if
expect("]")
end while
end if
return e
end function
constant precedence = {
0, -- LOAD
0, -- LOADHI
0, -- MOV
4, -- ADD
0, -- ADDU8
5, -- MUL
5, -- DIV
0, -- REM
2, -- JL
2, -- JLE
2, -- JE
2, -- JNE
0, -- JMP
2, -- EQ
2, -- NEQ
2, -- LT
2, -- GTE
2, -- LTE
2, -- GT
0, -- QPRINT
4, -- SUB
0, -- SUBU8
0, -- NOT
0, -- NEG
1, -- AND
1, -- OR
1, -- XOR
3 -- CAT
}
function expr(integer depth)
sequence e, t
t = get_token()
switch t do
case "not" then
e = {NOT, expr(100)}
case "(" then
e = expr(1)
expect(")")
case "-" then
e = {NEG, expr(100)}
if length(e[2]) and e[2][1] = NUMBER then
e = {NUMBER, -e[2][2]}
end if
case "+" then
e = expr(100)
case "\"" then
e = {STRING, string_literal()}
case "`" then
e = {STRING, multiline_string_literal()}
case "'" then
e = {NUMBER, character_literal()}
case "$" then
e = {SEQ_LEN}
case "{" then
e = {SEQ}
if not token("}") then
e = append(e, expr(1))
while token(",") do
if OE4 and token("$") then exit end if
e = append(e, expr(1))
end while
expect("}")
end if
case else
tok = t
if identifier() then
e = variable_or_function_call()
elsif length(tok) and (isnum(tok[1]) or tok[1] = '#' or tok[1] = '.') then
e = {NUMBER, to_number(get_token())}
else
return {SYNTAX_ERROR, tok_idx, length(tok), "expected an expression"}
end if
end switch
while 1 do
t = get_token()
switch t do
case "and","or","xor","<","<=",">",">=","=","!=","&","+","-","*","/" then
integer op = map:get(lookup_table, t)
if depth > precedence[op] then
exit
end if
e = {op, e, expr(precedence[op])}
case else
exit
end switch
end while
tok = t
return e
end function
-- returns a boolean
function ifdef_reduce(sequence e)
if length(e) = 0 then
return 0
elsif e[1] = AND then
return ifdef_reduce(e[2]) and ifdef_reduce(e[3])
elsif e[1] = OR then
return ifdef_reduce(e[2]) or ifdef_reduce(e[3])
elsif e[1] = XOR then
return ifdef_reduce(e[2]) xor ifdef_reduce(e[3])
elsif e[1] = NOT then
return not ifdef_reduce(e[2])
elsif e[1] = VARIABLE then
return find(e[2], defined_words) != 0
end if
return 0
end function
function variable_declaration()
sequence result, tmp
integer save_idx
save_idx = idx
result = {VAR_DECL, get_token(), tok_idx}
if not identifier() then
-- restore the last token
tok = result[2]
tok_idx = result[3]
idx = save_idx
return {}
end if
while 1 do
tmp = {get_token(), tok_idx, 0}
if OE4 and token("=") then
tmp = append(tmp, expr(1))
end if
tmp[3] = tok_idx -- scope-start
result = append(result, tmp)
if not token(",") or (OE4 and token("$")) then
exit
end if
if not identifier() then
error("expected an identifier")
exit
end if
end while
return result
end function
function constant_declaration()
sequence result, tmp
result = {CONST_DECL}
while identifier() do
tmp = {get_token(), tok_idx, 0, 0}
expect("=")
tmp[4] = expr(1)
tmp[3] = tok_idx -- scope-start
result = append(result, tmp)
if not token(",") or (OE4 and token("$")) then
return result
end if
end while
error("expected an identifier name")
return result
end function
function assignment_or_procedure_call()
sequence result, ops
integer ok, save_idx = idx
if not identifier() then
return {}
end if
result = {0, get_token(), tok_idx}
if token("(") then
-- procedure call
result[1] = PROC
if not token(")") then
ok = 1
while ok do
if OE4 and token(",") then
result = append(result, {DEFAULT})
else
result = append(result, expr(1))
ok = token(",")
end if
end while
expect(")")
end if
return result
elsif token("[") then
ops = {SUB_ASSIGN, SUB_ADDTO, SUB_SUBTO, SUB_MULTO, SUB_DIVTO, SUB_CATTO}
tok = "["
while token("[") do
result = append(result, expr(1))
if token("..") then
result = append(result, expr(1))
expect("]")
ops = {SLICE_ASSIGN, SLICE_ADDTO, SLICE_SUBTO, SLICE_MULTO, SLICE_DIVTO, SLICE_CATTO}
exit
end if
expect("]")
end while
else
ops = {ASSIGN, ADDTO, SUBTO, MULTO, DIVTO, CATTO}
end if
if token("=") then
result[1] = ops[1]
elsif token("+=") then
result[1] = ops[2]
elsif token("-=") then
result[1] = ops[3]
elsif token("*=") then
result[1] = ops[4]
elsif token("/=") then
result[1] = ops[5]
elsif token("&=") then
result[1] = ops[6]
else
tok = result[2]
tok_idx = result[3]
idx = save_idx
return {}
end if
return append(result, expr(1))
end function
function subroutine_declaration(integer subroutine)
sequence result, args, tmp
result = {subroutine, "", 0}
if identifier() then
result[2] = get_token()
result[3] = tok_idx
else
error("expected an identifier name")
end if
expect("(")
args = {}
while not token(")") do
if length(args) and not token(",") then
error("expected ',' or ')'")
exit
end if
if identifier() then
tmp = {get_token(), 0, 0, 0}
else
error("expected a type name")
exit
end if
if identifier() then
tmp[2] = get_token()
tmp[3] = tok_idx
else
error("expected an argument name")
exit
end if
if OE4 and token("=") then
tmp = append(tmp, expr(1))
end if
tmp[4] = tok_idx -- scope-start
args = append(args, tmp)
end while
result = append(result, args)
return result
end function
function enum_declaration()
sequence result, tmp
result = {ENUM_DECL, "", 0, '+', {NUMBER, 1}}
if token("type") then
if identifier() then
result[2] = get_token()
result[3] = tok_idx -- scope-start
else
error("expected an identifier name")
end if
end if
if token("by") then
if token("+") then
elsif token("-") then
result[4] = '-'
elsif token("*") then
result[4] = '*'
elsif token("/") then
result[4] = '/'
else
error("expected one of: + - * /")
end if
result[5] = expr(1)
end if
while identifier() do
tmp = {get_token(), tok_idx, 0}
if token("=") then
tmp = append(tmp, expr(1))
end if
tmp[3] = tok_idx -- scope-start
result = append(result, tmp)
if not token(",") or (OE4 and token("$")) then
exit
end if
end while
if length(result[2]) then
expect("end")
expect("type")
end if
return result
end function
function for_declaration()
sequence result
result = {FOR, "", 0}
if identifier() then
result[2] = get_token()
result[3] = tok_idx
else
error("expected an identifier name")
end if
expect("=")
result = append(result, expr(1))
expect("to")
result = append(result, expr(1))
if token("by") then
result = append(result, expr(1))
else
result = append(result, {NUMBER, 1})
end if
if OE4 and token("label") then
if token("\"") and length(string_literal()) then
else
error("expected a label string")
end if
end if
expect("do")
return result
end function
function filter_case_statements(sequence result, sequence ast)
-- {SWITCH, expr, bool-fallthru, label-string,
-- [{case-values...}, {scope-start, scope-end, stmts...},]... }
-- ("case else" will have case-values={} )
integer case_idx = 0, scope_start, scope_end
scope_start = ast[1]
for i = 3 to length(ast) do
sequence s = ast[i]
if s[1] = CASE then
scope_end = s[2]
if case_idx then
result = append(result, scope_start & scope_end & ast[case_idx+1..i-1])
end if
result = append(result, s[4..$])
case_idx = i
scope_start = s[3]
end if
end for
if case_idx then
scope_end = ast[2]
result = append(result, scope_start & scope_end & ast[case_idx+1..$])
end if
return result
end function
procedure with_or_without(integer mode)
if token("type_check") then
elsif token("warning") then
if not OE4 then return end if
if token("save") or token("restore") or token("strict") then
return
end if
if token("=") or token("&=") or token("+=") then
end if
if token("{") then
while tok_idx < length(text) and not token("}") do
get_token()
end while
end if
elsif token("trace") then
elsif token("profile") then
elsif token("profile_time") then
elsif OE4 and token("batch") then
elsif OE4 and token("indirect_includes") then
elsif OE4 and token("inline") then
else
error("unknown with/without option")
end if
end procedure
constant NONE = 0
function check_mode(integer mode, sequence token)
if mode = NONE then
return 0
elsif mode = IF then
tok = "if"
elsif mode = WHILE then
tok = "while"
elsif mode = FOR then
tok = "for"
elsif mode = SWITCH then
tok = "switch"
elsif mode = FUNC_DECL then
tok = "function"
elsif mode = PROC_DECL then
tok = "procedure"
elsif mode = TYPE_DECL then
tok = "type"
elsif mode = IFDEF or mode = ELSEDEF then
return 0
else
tok = "unknown"
end if
error("expected 'end "&tok&"' not '"&token&"'")
tok = token
return 1
end function
constant
PROC_FLAG = 1, -- return is allowed
FUNC_FLAG = 2, -- return with expr is allowed
LOOP_FLAG = 4, -- exit is allowed
CASE_FLAG = 8, -- case statement allowed
UNTIL_FLAG = 16 -- until statement is allowed
function return_statement(integer flags)
if and_bits(flags, FUNC_FLAG) then
return {RETURN, expr(1)}
elsif and_bits(flags, PROC_FLAG) then
return {RETURN}
end if
error("'return' is not allowed here")
return {}
end function
-- mode is NONE, FUNC_DEC, PROC_DECL, TYPE_DECL, IF, IFDEF, SWITCH, WHILE, FOR
-- sub is 0, FUNC, PROC (used to determine if return needs expr)
function statements(integer mode, integer flags)
sequence ast, s, t
integer var_decl_ok, prefix, prefix_idx, saved_ifdef_ok
integer case_ok = and_bits(flags, CASE_FLAG)
flags -= case_ok -- clear CASE_FLAG if set
s = {}
ast = {idx, 0} -- scope-start, scope-end
var_decl_ok = OE4 or find(mode, {NONE,FUNC_DECL,PROC_DECL,TYPE_DECL})
prefix = 0
while idx <= length(text) do
t = get_token()
switch t do
case "elsif", "else" then
tok = t
exit
case "elsifdef", "elsedef" then
tok = t
exit
case "end" then
if mode != NONE then
exit
end if
case "global" then
prefix = GLOBAL
prefix_idx = tok_idx
case "public" then
prefix = PUBLIC
prefix_idx = tok_idx
case "export" then
prefix = EXPORT
prefix_idx = tok_idx
case "while" then
s = {WHILE, expr(1)}
if OE4 and token("with") then
expect("entry")
elsif OE4 and token("entry") then
-- weird early syntax? appears in std includes
end if
if OE4 and token("label") then
if token("\"") and length(string_literal()) then
-- optional label string
else
error("expected a label string")
end if
end if
expect("do")
s &= statements(WHILE, or_bits(flags, LOOP_FLAG))
expect("while")
case "loop" then
s = {LOOP}
if OE4 and token("with") then
expect("entry")
end if
if OE4 and token("label") then
if token("\"") and length(string_literal()) then
-- optional label string
else
error("expected a label string")
end if
end if
expect("do")
s &= statements(LOOP, or_bits(flags, LOOP_FLAG + UNTIL_FLAG))
expect("loop")
case "until" then
if not and_bits(flags, LOOP_FLAG + UNTIL_FLAG) = LOOP_FLAG + UNTIL_FLAG then
error("until must be inside a loop")
end if
s = {UNTIL, expr(1)}
case "entry" then
if not and_bits(flags, LOOP_FLAG) then
error("entry must be inside a loop")
end if
s = {ENTRY}
case "label" then
if token("\"") then
s = {LABEL, string_literal()}-- optional label string
else
error("expected a label string")
end if
case "for" then
s = for_declaration()
s &= statements(FOR, or_bits(flags, LOOP_FLAG))
-- {FOR, name, pos, expr, expr, by, scope-start, scope-end, stmts...}
expect("for")
case "exit" then
if not and_bits(flags, LOOP_FLAG) then
tok = t
error("'exit' must be inside a loop")
tok = ""
end if
s = {EXIT}
if OE4 and token("\"") then
s &= {string_literal()} -- optional label string
end if
case "if" then
s = {IF, expr(1)}
expect("then")
s = append(s, statements(IF, flags))
while token("elsif") do
s = append(s, expr(1))
expect("then")
s = append(s, statements(IF, flags))
end while
if token("else") then
s = append(s, statements(ELSE, flags))
end if
expect("if")
case "ifdef" then
saved_ifdef_ok = ifdef_ok
flags += case_ok
ifdef_ok = ifdef_reduce(expr(1)) and saved_ifdef_ok
expect("then")
s = choose(ifdef_ok, statements(IFDEF, flags), {})
while token("elsifdef") do
ifdef_ok = ifdef_reduce(expr(1)) and length(s) = 0 and saved_ifdef_ok
expect("then")
s = choose(ifdef_ok, statements(IFDEF, flags), s)
end while
if token("elsedef") then
ifdef_ok = length(s) = 0 and saved_ifdef_ok
s = choose(ifdef_ok, statements(ELSEDEF, flags), s)
end if
expect("ifdef")
ifdef_ok = saved_ifdef_ok
flags -= case_ok
if length(s) then
-- splice statements into ast
ast &= s[3..$]
s = {}
end if
case "switch" then
s = {SWITCH, expr(1), 0, ""}
if token("with") then
expect("fallthru")
s[3] = 1 -- enable fallthru
end if
if token("label") then
if token("\"") then
s[4] = string_literal() -- optional label string
else
error("expected a label string")
end if
end if
expect("do")
s = filter_case_statements(s, statements(SWITCH, or_bits(flags, CASE_FLAG)))
expect("switch")
case "case" then
s = {CASE, tok_idx, 0}
if not case_ok then
tok = t
error("'case' must be inside 'switch'")
tok = ""
end if
if not token("else") then
while identifier() or find(tok[1], "\"'0123456789#") do
if token("\"") then
s = append(s, '"'&string_literal()&'"') -- case values
elsif token("'") then
s = append(s, '\''&character_literal()&'\'') -- case values
else
s = append(s, get_token()) -- case values
end if
if not token(",") then exit end if
end while
expect("then")
end if
s[3] = tok_idx -- scope-start
case "break" then
s = {BREAK}
if token("\"") then
s = append(s, string_literal()) -- optional label string
end if
case "continue" then
s = {CONTINUE}
if token("\"") then
s = append(s, string_literal()) -- optional label string
end if
case "goto" then
s = {GOTO}
expect("\"")
s = append(s, string_literal()) -- label string
case "?" then
s = {QPRINT, expr(1)}
case "with" then
if check_mode(mode, "with") then exit end if
with_or_without(1)
case "without" then
if check_mode(mode, "without") then exit end if
with_or_without(0)
case "include" then
if check_mode(mode, "include") then exit end if
tok = include_filename()
s = {INCLUDE, -1, idx}
if ifdef_ok then
s[2] = include_file(tok)
if s[2] = -1 then
error("can't find '"&tok&"'")
end if
end if
tok = ""
if OE4 and token("as") then
if identifier() then
s = append(s, get_token())
s[3] = tok_idx
else
error("expected an identifier")
end if
end if
if prefix = PUBLIC then
s = PUBLIC & s
prefix = 0
end if
if not ifdef_ok then
s = {}
end if
case "constant" then
if check_mode(mode, "constant") then exit end if
s = constant_declaration()
case "function" then
if check_mode(mode, "function") then exit end if
s = subroutine_declaration(FUNC_DECL)
s &= statements(FUNC_DECL, or_bits(flags, FUNC_FLAG))
expect("function")
case "procedure" then
if check_mode(mode, "procedure") then exit end if
s = subroutine_declaration(PROC_DECL)
s &= statements(PROC_DECL, or_bits(flags, PROC_FLAG))
expect("procedure")
case "type" then
if check_mode(mode, "type") then exit end if
s = subroutine_declaration(TYPE_DECL)
s &= statements(TYPE_DECL, or_bits(flags, FUNC_FLAG))
expect("type")
case "return" then
s = return_statement(flags)
case "enum" then
if check_mode(mode, "enum") then exit end if
s = enum_declaration()
case "namespace" then
if mode != NONE or length(ast) > 2 then
error("namespace must be the first statement")
elsif not identifier() then
error("expected namespace identifier")
end if
s = {NAMESPACE, get_token()}
case "{" then
s = {SEQ_ASSIGN}
while identifier() do
s &= {get_token(), tok_idx}
if not token(",") then
exit
end if
end while
expect("}")
expect("=")
s &= {expr(1)}
case else
tok = t
if identifier() then
if var_decl_ok then
s = variable_declaration()
end if
if length(s) = 0 then
s = assignment_or_procedure_call()
end if
if length(s) = 0 then
error("expected statement, not '"&tok&"'")
tok = ""
end if
elsif length(tok) then
error("expected statement, not '"&tok&"'")
tok = ""
end if
end switch
if length(s) then
if prefix then
if (length(s) > 0 and find(s[1], {VAR_DECL, CONST_DECL, ENUM_DECL,
PROC_DECL, FUNC_DECL, TYPE_DECL})) then
s = prefix & s
prefix = 0
else
tok_idx = prefix_idx
error("scope prefix wasn't expected here")
tok = ""
end if
end if
ast = append(ast, s)
s = {}
end if
if length(errors) then
ast &= errors
errors = {}
end if
end while
ast[2] = idx -- scope-end
return ast
end function
global function parse(sequence source_text, sequence file_name)
integer cache_idx
sequence ast
file_name = canonical_path(file_name, 0, CORRECT)
cache_idx = cache_entry(file_name)
source_filename = file_name
text = source_text
idx = 1
tok_idx = 1
tok = ""
ifdef_ok = 1
ast_idx = 3
cur_map = maps[cache_idx]
map:clear(cur_map)
skip_hashbang()
ast = statements(NONE, 0)
declare_ast(ast, 3, length(text), 1)
ast[1..2] = cache[cache_idx][1..2]
cache[cache_idx] = ast
return ast
end function
global function parse_file(sequence file_name)
object text = read_file(file_name)
if atom(text) then
return {} -- unable to read file
end if
return parse(text, file_name)
end function
-- during get_decls we might need to reparse a file if its timestamp changed
procedure check_cache_timestamp(integer idx)
sequence ast, file_name
atom ts
file_name = cache[idx][1]
ts = get_timestamp(file_name)
if cache[idx][2] != ts then
cache[idx][2] = ts
ast = parse_file(file_name)
if length(ast) >= 2 then
ast[1] = file_name
ast[2] = ts
cache[idx] = ast
end if
end if
end procedure
constant
F = "function",
P = "procedure",
T = "type",
I = "integer",
O = "object",
S = "sequence",
A = "atom",
builtins = {
{F, "abort", I, "errcode", 0},
{F, "and_bits", O, "a", 0, O, "b", 0},
{F, "append", S, "target", 0, O, "x", 0},
{F, "arctan", O, "tangent", 0},
{T, A, O, "x", 0},
{F, "c_func", I, "rid", 0, S, "args", 1},
{P, "c_proc", I, "rid", 0, S, "args", 1},
{F, "call", I, "id", 0, S, "args", 1},
{F, "call_func", I, "id", 0, S, "args", 1},
{P, "call_proc", I, "id", 0, S, "args", 1},
{P, "clear_screen"},
{P, "close", A, "fn", 0},
{F, "command_line"},
{F, "compare", O, "compared", 0, O, "reference", 0},
{F, "cos", O, "angle", 0},
{F, "date"},
{P, "delete", O, "x", 0},
{F, "delete_routine", O, "x", 0, I, "rid", 0},
{F, "equal", O, "left", 0, O, "right", 0},
{F, "find", O, "needle", 0, S, "haystack", 0, I, "start", 1},
{F, "floor", O, "value", 0},
{F, "get_key"},
{F, "getc", I, "fn", 0},
{F, "getenv", S, "var_name", 0},
{F, "gets", I, "fn", 0},
{F, "hash", O, "source", 0, A, "algo", 0},
{F, "head", S, "source", 0, A, "size", 1},
{F, "include_paths", I, "convert", 0},
{F, "insert", S, "target", 0, O, "what", 0, I, "index", 0},
{T, I, O, "x", 0},
{F, "length", O, "target", 0},
{F, "log", O, "value", 0},
{F, "machine_func", I, "machine_id", 0, O, "args", 1},
{P, "machine_proc", I, "machine_id", 0, O, "args", 1},
{F, "match", S, "needle", 0, S, "haystack", 0, I, "start", 1},
{P, "mem_copy", A, "destination", 0, A, "origin", 0, I, "len", 0},
{P, "mem_set", A, "destination", 0, I, "byte_value", 0, I, "how_many", 0},
{F, "not_bits", O, "a", 0},
{T, O, O, "x", 0},
{F, "open", S, "path", 0, S, "mode", 0, I, "cleanup", 1},
{F, "option_switches"},
{F, "or_bits", O, "a", 0, O, "b", 0},
{F, "peek", O, "addr_n_length", 0},
{F, "peek2s", O, "addr_n_length", 0},
{F, "peek2u", O, "addr_n_length", 0},
{F, "peek4s", O, "addr_n_length", 0},
{F, "peek4u", O, "addr_n_length", 0},
{F, "peek8s", O, "addr_n_length", 0},
{F, "peek8u", O, "addr_n_length", 0},
{F, "peek_string", A, "addr", 0},
{F, "peeks", O, "addr_n_length", 0},
{F, "pixel"},
{F, "platform"},
{P, "poke", A, "addr", 0, O, "x", 0},
{P, "poke2", A, "addr", 0, O, "x", 0},
{P, "poke4", A, "addr", 0, O, "x", 0},
{P, "poke8", A, "addr", 0, O, "x", 0},
{P, "position", I, "row", 0, I, "column", 0},
{F, "power", O, "base", 0, O, "exponent", 0},
{F, "prepend", S, "target", 0, O, "x", 0},
{P, "print", I, "fn", 0, O, "x", 0},
{P, "printf", I, "fn", 0, S, "format", 0, O, "values", 0},
{P, "puts", I, "fn", 0, O, "text", 0},
{F, "rand", O, "maximum", 0},
{F, "remainder", O, "dividend", 0, O, "divisor", 0},
{F, "remove", S, "target", 0, A, "start", 0, A, "stop", 1},
{F, "repeat", O, "item", 0, A, "count", 0},
{F, "replace", S, "target", 0, O, "replacement", 0, I, "start", 0, I, "stop", 1},
{F, "routine_id", S, "routine_name", 0},
{T, S, O, "x", 0},
{F, "sin", O, "angle", 0},
{F, "splice", S, "target", 0, O, "what", 0, I, "index", 0},
{F, "sprintf", S, "format", 0, O, "values", 0},
{F, "sqrt", O, "value", 0},
{P, "system", S, "command", 0, I, "mode", 1},
{F, "system_exec", S, "command", 0, I, "mode", 1},
{F, "tail", S, "source", 0, A, "size", 1},
{F, "tan", O, "angle", 0},
{P, "task_clock_start"},
{P, "task_clock_stop"},
{F, "task_create", I, "rid", 0, S, "args", 0},
{F, "task_list"},
{F, "task_schedule", A, "task_id", 0, O, "schedule", 0},
{F, "task_self"},
{F, "task_schedule", A, "task_id", 0},
{F, "task_suspend", A, "task_id", 0},
{F, "task_yield"},
{F, "time"},
{P, "trace", I, "mode", 0},
{F, "xor_bits", O, "a", 0, O, "b", 0}
}
map:map builtins_map = map:new()
for i = 1 to length(builtins) do
end for
global function get_builtins()
sequence s
s = {}
for i = 1 to length(builtins) do
if i > 1 then s &= ' ' end if
s &= builtins[i][2]
end for
return s
end function
function is_namespace(sequence ast, sequence name_space)
if length(ast) >= 3 and ast[3][1] = NAMESPACE and equal(ast[3][2], name_space) then
return 1
end if
return 0
end function
sequence include_ids, include_flags
constant
FILTER_LOCAL = 1, -- #01
FILTER_GLOBAL = 2, -- #02
FILTER_PUBLIC = 4, -- #04
FILTER_EXPORT = 8, -- #08
FILTER_INCLUDE = 16, -- #10
FILTER_INCLUDE_AS = 32, -- #20
FILTER_ALL = 63 -- #3F
function get_include_filter(sequence s, sequence name_space, integer filter, integer prefix)
integer idx, include_filter
idx = s[2] -- cache[] index
check_cache_timestamp(idx)
if length(name_space) then
--if length(s) >= 3 then
-- printf(1, "filter=%x namespace=%s include as %s\n", {filter, name_space, s[3]})
--end if
if filter = FILTER_GLOBAL + FILTER_PUBLIC + FILTER_EXPORT then
-- include as namespace -> include
include_filter = FILTER_PUBLIC
elsif and_bits(filter, FILTER_PUBLIC) and prefix = FILTER_PUBLIC then
-- a public include from nested include
include_filter = FILTER_PUBLIC
elsif and_bits(filter, FILTER_INCLUDE_AS) and length(cache[idx]) >= 3 and
equal(cache[idx][3], {NAMESPACE, name_space}) then
-- include has same namespace
include_filter = FILTER_GLOBAL + FILTER_PUBLIC + FILTER_EXPORT
else
include_filter = 0
end if
elsif and_bits(filter, FILTER_INCLUDE_AS) then
-- top-level include
include_filter = FILTER_GLOBAL + FILTER_PUBLIC + FILTER_EXPORT
elsif and_bits(filter, FILTER_PUBLIC) and prefix = FILTER_PUBLIC then
-- public sub-include
include_filter = FILTER_GLOBAL + FILTER_PUBLIC
else
-- sub-include
include_filter = FILTER_GLOBAL
end if
idx = find(s[2], include_ids)
if idx = 0 then
-- new entry
include_ids &= s[2]
include_flags &= 0
idx = length(include_ids)
elsif and_bits(include_flags[idx], include_filter) = include_filter then
-- avoid adding the same symbols again
return -1
end if
include_filter = and_bits(include_filter, not_bits(include_flags[idx]))
include_flags[idx] = or_bits(include_flags[idx], include_filter)
return include_filter + FILTER_INCLUDE
end function
-- returns {{"subroutine-type", "name", ["arg1-type", "arg1-name", is_default]... }... }
function get_args(sequence ast, sequence word, sequence name_space, integer filter)
sequence result, s
integer x, decl, prefix, include_filter
if length(name_space) and is_namespace(ast, name_space) then
filter = and_bits(filter, FILTER_INCLUDE + FILTER_INCLUDE_AS + FILTER_PUBLIC)
if filter = 0 then
return {} -- no namespace or mismatch
end if
end if
result = {}
for i = 3 to length(ast) do
s = ast[i]
prefix = power(2, find(s[1], {GLOBAL, PUBLIC, EXPORT}))
if prefix > 1 then
s = s[2..$] -- remove prefix
end if
decl = s[1]
if and_bits(filter, prefix) = 0 and decl != INCLUDE then
decl = 0
end if
if decl = INCLUDE and and_bits(filter, FILTER_INCLUDE) then
-- {INCLUDE, includes-index, scope-start, ["namespace"]}
x = s[2]
if x != -1 and and_bits(filter, FILTER_INCLUDE) then
include_filter = get_include_filter(s, name_space, filter, prefix)
if include_filter != -1 then
result &= get_args(cache[x], word, name_space, include_filter)
end if
end if
elsif decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL and equal(s[2], word) then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
if decl = FUNC_DECL then
result &= {{"function", s[2]}}
elsif decl = PROC_DECL then
result &= {{"procedure", s[2]}}
elsif decl = TYPE_DECL then
result &= {{"type", s[2]}}
end if
for j = 1 to length(s[4]) do -- display arguments
result[$] &= {s[4][j][1], s[4][j][2], length(s[4][j]) = 5} -- {"type", "name", has-default}
end for
end if
end for
-- scan builtins
if length(result) = 0 then
for i = 1 to length(builtins) do
if equal(word, builtins[i][2]) then
result &= {builtins[i]}
exit
end if
end for
end if
return result
end function
function type_to_decl(sequence name)
if equal(name, "atom") then
return DECL_ATOM
elsif equal(name, "sequence") then
return DECL_SEQUENCE
elsif equal(name, "integer") then
return DECL_INTEGER
end if
return DECL_OBJECT
end function
-- to prevent recursion, symbols may be included from a file
-- only once for each type of flag: global, public, export
-- namespace matches:
-- top-level file has same namespace
-- top-level file has include as same namespace
-- included file has same namespace
-- included file is publicly included by file with same namespace
-- get a list of declarations from ast in scope at pos
-- returns {{"name1", pos1, type1}, {"name2", pos2, type2}...}
-- pos may be be an integer for the position in the current file,
-- or {pos, "include-path"} for included files.
function get_decls(sequence ast, integer pos, sequence name_space, integer filter)
sequence result, s
integer x, decl, prefix, include_filter
if length(name_space) and is_namespace(ast, name_space) then
filter = and_bits(filter, FILTER_INCLUDE + FILTER_INCLUDE_AS + FILTER_PUBLIC)
if filter = 0 then
return {} -- no namespace or mismatch
end if
end if
result = {}
if length(name_space) = 0 and length(ast) >= 3 and ast[3][1] = NAMESPACE then
if and_bits(filter, FILTER_PUBLIC) then
result = {{ast[3][2] & ':', 1, DECL_NAMESPACE}}
end if
end if
for i = 3 to length(ast) do
s = ast[i]
prefix = power(2, find(s[1], {GLOBAL, PUBLIC, EXPORT}))
if prefix > 1 then
s = s[2..$] -- remove prefix
end if
decl = s[1]
if and_bits(prefix, filter) = 0 and decl != INCLUDE then
-- the scope modifier didn't pass the filter
decl = 0
end if
if decl = INCLUDE and and_bits(filter, FILTER_INCLUDE) then
-- {INCLUDE, includes-index, scope-start, ["namespace"]}
x = s[2] -- includes-index into cache
if x != -1 then
--printf(1, "include %s filter=%x\n", {cache[x][1], filter})
if length(name_space) and and_bits(filter, FILTER_INCLUDE_AS) and
length(s) >= 4 and equal(s[4], name_space) and pos >= s[3] then
-- found a matching "include as"
filter = 0
include_filter = FILTER_GLOBAL+FILTER_PUBLIC+FILTER_EXPORT+FILTER_INCLUDE
result = {}
name_space = {}
else
include_filter = get_include_filter(s, name_space, filter, prefix)
end if
if include_filter != -1 then
check_cache_timestamp(x)
s = get_decls(cache[x], 0, name_space, include_filter)
--printf(1, "%s: %d\n", {cache[x][1], length(cache[x])})
for j = 1 to length(s) do
--printf(1, "%s: %d\n", {s[j-1], s[j]})
if not sequence(s[j][2]) then
s[j][2] = {cache[x][1], s[j][2]} -- is {filename, pos}
end if
end for
result &= s
end if
end if
elsif decl = CONST_DECL then
-- {CONST_DECL, {"name", pos, scope-start, expr}... }
--printf(1, "constant\n", {})
for j = 2 to length(s) do
--printf(1, " %s: %d\n", {s[j][1], s[j][2]})
if length(s[j]) >= 3 and (pos >= s[j][3] or filter) then -- in scope?
result = append(result, {s[j][1], s[j][2], DECL_CONSTANT})
end if
end for
elsif decl = VAR_DECL then
-- {VAR_DECL, "type", pos, {"name", pos, scope-start, [expr]}...}
--printf(1, s[2] & "\n", {})
for j = 4 to length(s) do
--printf(1, " %s: %d\n", {s[j][1], s[j][2]})
if length(s[j]) >= 3 and (pos >= s[j][3] or filter) then -- in scope?
result = append(result, {s[j][1], s[j][2], type_to_decl(s[2])})
end if
end for
elsif decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
if decl = FUNC_DECL then
--printf(1, "function %s: %d scope=%d..%d\n", {s[2], s[3], s[5], s[6]})
result = append(result, {s[2], s[3], DECL_FUNCTION})
elsif decl = PROC_DECL then
--printf(1, "procedure %s: %d scope=%d..%d\n", {s[2], s[3], s[5], s[6]})
result = append(result, {s[2], s[3], DECL_PROCEDURE})
elsif decl = TYPE_DECL then
--printf(1, "type %s: %d scope=%d..%d\n", {s[2], s[3], s[5], s[6]})
result = append(result, {s[2], s[3], DECL_TYPE})
end if
if length(s) >= 6 and pos >= s[5] and pos <= s[6] then -- in scope?
for j = 1 to length(s[4]) do -- display arguments
if length(s[4][j]) >= 4 and pos >= s[4][j][4] then
--printf(1, " %s %s: %d\n", {s[4][j][1], s[4][j][2], s[4][j][3]})
result = append(result, {s[4][j][2], s[4][j][3], type_to_decl(s[4][j][1])})
end if
end for
result &= get_decls(s[5..$], pos, name_space, filter)
end if
elsif decl = FOR then
-- {FOR, name, pos, expr, expr, by, scope-start, scope-end, stmts...}
if length(s) >= 8 and pos >= s[7] and pos <= s[8] then -- in scope?
--printf(1, "for %s: %d\n", {s[2], s[3]})
result = append(result, {s[2], s[3], DECL_ATOM})
result &= get_decls(s[7..$], pos, name_space, filter)
end if
elsif decl = WHILE then
-- {WHILE, expr, scope-start, scope-end, stmts...}
if length(s) >= 4 and pos >= s[3] and pos <= s[4] then -- in scope?
result &= get_decls(s[3..$], pos, name_space, filter)
end if
elsif decl = LOOP then
-- {LOOP, scope-start, scope-end, stmts...}
if length(s) >= 3 and pos >= s[2] and pos <= s[3] then -- in scope?
result &= get_decls(s[2..$], pos, name_space, filter)
end if
elsif decl = IF then
-- {IF, expr, {scope-start, scope-end, stmts...},
-- [expr, {scope-start, scope-end, elsif-stmts...},]...
-- [{scope-start, scope-end, else-stmts...}]}
for j = 2 to length(s) by 2 do
x = (j != length(s))
if length(s[j+x]) >= 2 and pos >= s[j+x][1] and pos <= s[j+x][2] then -- in scope?
result &= get_decls(s[j+x], pos, name_space, filter)
end if
end for
elsif decl = ENUM_DECL then
-- {ENUM_DECL, "typename"|"", pos, '+'|'-'|'*'|'/', expr,
-- {"name", pos, scope-start, [expr]}...}
if length(s[2]) then -- has typename
result = append(result, {s[2], s[3], DECL_TYPE})
end if
for j = 6 to length(s) do
if length(s[j]) >= 3 and pos >= s[j][3] then -- in scope?
result = append(result, {s[j][1], s[j][2], DECL_ENUM})
end if
end for
end if
end for
return result
end function
global function get_declarations(sequence ast, integer pos, sequence name_space)
include_ids = {}
include_flags = {}
return get_decls(ast, pos, name_space, choose(length(name_space), FILTER_INCLUDE + FILTER_INCLUDE_AS, FILTER_ALL))
end function
global function get_subroutines(sequence ast)
sequence result, s
integer decl, n
result = {}
for i = 3 to length(ast) do
s = ast[i]
n = prefixed(s)
decl = s[n+1]
if decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
result = append(result, {s[n+2], s[n+3]})
end if
end for
return result
end function
-- returns {["subroutine-type", {["arg1-type", "arg1-name", has-default]...}, ]...}
global function get_subroutine_arguments(sequence ast, sequence word, sequence namespace)
include_ids = {}
include_flags = {}
return get_args(ast, word, namespace, FILTER_ALL)
end function
-- returns {word, namespace, start, end} otherwise {""}
global function word_pos(sequence text, integer pos)
if pos > length(text) then
return {""}
end if
for i = pos+1 to 1 by -1 do
-- find the start of the word
if i = 1 or not isalphanum(text[i-1]) then
-- find the end of the word
while pos < length(text) and isalphanum(text[pos+1]) do
pos += 1
end while
-- words must start with a letter
if i <= length(text) and isalpha(text[i]) then
-- look for a colon
for j = i to pos do
if text[j] = ':' then
-- found namespace and word
return {text[j+1..pos], text[i..j-1], j+1, pos}
end if
end for
-- found word only
return {text[i..pos], "", i, pos}
end if
exit
end if
end for
return {""}
end function
sequence suggested_includes, suggested_word, suggested_namespace, suggested_path
function walk_include(sequence path_name, sequence dirent)
sequence decls
path_name &= SLASH & dirent[D_NAME]
if length(path_name) < 2 or (path_name[$] != 'e' and path_name[$] != 'E') or path_name[$-1] != '.' then
-- path_name doesn't end with .e or .E
return 0
end if
integer cache_idx = cache_entry(canonical_path(path_name, 0, CORRECT))
if cache_idx > 0 then
check_cache_timestamp(cache_idx)
include_ids = {}
include_flags = {}
sequence ast = cache[cache_idx]
if length(suggested_namespace) = 0 or is_namespace(ast, suggested_namespace) then
integer filter = FILTER_GLOBAL + FILTER_PUBLIC + FILTER_EXPORT
if length(suggested_namespace) then
filter += FILTER_INCLUDE
end if
decls = get_decls(ast, 0, suggested_namespace, filter)
for i = 1 to length(decls) do
--puts(1, " "&decls[i]&"\n")
if length(decls[i][1]) >= length(suggested_word) and
equal(decls[i][1][1..length(suggested_word)], suggested_word) then
--puts(1, dirent[D_NAME]&" matched!\n")
suggested_includes = append(suggested_includes,
{decls[i][1] & " --include "& path_name[length(suggested_path)+2..$],
{ast[1], decls[i][2]}, decls[i][3]})
end if
end for
end if
end if
return 0 -- keep searching all files
end function
constant walk_include_id = routine_id("walk_include")
-- returns a list of include files which contain a declaration decl
global function suggest_includes(sequence word, sequence name_space)
sequence paths, path, result
suggested_includes = {}
suggested_word = word
suggested_namespace = name_space
paths = include_search_paths
for i = 1 to length(paths) do
path = paths[i]
--puts(1, "include_dir="&paths[i]&"\n")
if path[$] = SLASH then
path = path[1..$-1]
end if
if length(path) > 8 and equal(path[$-7..$], SLASH & "include") then
suggested_path = path
if walk_dir(path, walk_include_id, 1) = 0 then
-- success!
end if
end if
end for
result = suggested_includes
suggested_includes = {}
return result
end function
-- parse argument expressions, returning the last argument position
global function parse_argument_position(sequence source_text)
integer arg, old_idx
sequence e
text = source_text
idx = 1
tok_idx = 1
tok = ""
arg = 1
for i = 1 to 1000 do
if tok_idx > length(text) then
return arg
end if
if token(")") then
return 0
end if
if token(",") then
arg += 1
end if
old_idx = tok_idx
e = expr(1)
--? {e, tok, tok_idx}
--if length(e) = 0 and length(tok) = 0 then
if old_idx = tok_idx then
return arg
end if
end for
printf(1, "stuck parsing argument position for \"%s\"\n", {text})
? e
? tok
return arg
end function
-- cur_ast: {cache-idx...}
-- cur_scope: {FILTER_* or'd...}
sequence cur_ast, cur_scope, check_result = {}
integer cur_sub = 0 -- whether or not inside subroutine
-- scan ast for a declaration at pos, returns 0 if not found, otherwise
-- one of FUNC_DECL, PROC_DECL, TYPE_DEC, VAR_DECL, CONST_DECL or FOR
function decl_kind(sequence ast, integer start_idx, integer pos)
for j = start_idx to length(ast) do
sequence s = ast[j]
integer decl = s[1]
if decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
if pos = s[3] then
return decl
end if
sequence args = s[4]
for i = 1 to length(args) do
if pos = args[i][3] then
return VAR_DECL
end if
end for
decl = decl_kind(s, 7, pos)
if decl then
return decl
end if
elsif decl = VAR_DECL then
-- {VAR_DECL, "type", pos, {"name", pos, scope-start, [expr]}...}
for i = 4 to length(s) do
if pos = s[i][2] then
return decl
end if
end for
elsif decl = CONST_DECL then
-- {CONST_DECL, {"name", pos, scope-start, expr}... }
for i = 2 to length(s) do
if pos = s[i][2] then
return decl
end if
end for
elsif decl = WHILE then
-- {WHILE, expr, scope-start, scope-end, stmts...}
if pos >= s[3] and pos <= s[4] then
decl = decl_kind(s, 5, pos)
if decl then
return decl
end if
end if
elsif decl = LOOP then
-- {LOOP, scope-start, scope-end, stmts...}
if pos >= s[2] and pos <= s[3] then
decl = decl_kind(s, 4, pos)
if decl then
return decl
end if
end if
elsif decl = IF then
-- {IF, expr, {scope-start, scope-end, stmts...},
-- [expr, {scope-start, scope-end, elsif-stmts...},]...
-- [{scope-start, scope-end, else-stmts...}]}
decl = 0
for i = 2 to length(s) by 2 do
if i = length(s) then
if pos >= s[i][1] and pos <= s[i][2] then
decl = decl_kind(s[i], 3, pos)
end if
else
if pos >= s[i+1][1] and pos <= s[i+1][2] then
decl = decl_kind(s[i+1], 3, pos)
end if
end if
if decl then
return decl
end if
end for
elsif decl = FOR then
-- {FOR, "name", pos, expr, expr, by, scope-start, scope-end, stmts...}
if pos = s[3] then
return decl
end if
if pos >= s[7] and pos <= s[8] then
decl = decl_kind(s, 9, pos)
if decl then
return decl
end if
end if
elsif decl = ENUM_DECL then
-- {ENUM_DECL, "typename"|"", pos, '+'|'-'|'*'|'/', expr,
-- {"name", pos, scope-start, [expr]}...}
if pos = s[3] then
return TYPE_DECL
end if
for i = 6 to length(s) do
if pos = s[i][2] then
return CONST_DECL
end if
end for
elsif decl = SWITCH then
-- {SWITCH, expr, bool-fallthru, label-string,
-- [{case-values...}, {scope-start, scope-end, stmts...},]... }
-- ("case else" will have case-values={} )
for i = 6 to length(s) by 2 do
if pos >= s[i][1] and pos <= s[i][2] then
decl = decl_kind(s[i], 3, pos)
if decl then
return decl
end if
end if
end for
end if
end for
return 0
end function
-- returns the decl kind if pos in scope of loc d, otherwise 0
function decl_check(sequence ast, object d, integer pos, integer filter)
sequence s
if sequence(d) then
-- variable, constant, enum value or for-variable declaration
s = ast[d[1]]
else
-- function, procedure, type or enum-type declaration
s = ast[d]
end if
-- get scope modifier bits
integer prefix = power(2, find(s[1], {GLOBAL, PUBLIC, EXPORT}))
if and_bits(prefix, filter) = 0 then
return 0
end if
if prefix > 1 then
s = s[2..$] -- remove scope modifier
end if
if atom(d) then
return s[1]
end if
-- d: { ast-index, pos, scope-start, [scope-end] }
if s[1] = VAR_DECL or s[1] = CONST_DECL or s[1] = ENUM_DECL then
-- top level always in scope
if and_bits(filter, FILTER_LOCAL) and cur_sub = 0 and pos < d[3] and cur_ast[1] = d[1] then
-- euphoria currently doesn't allow top-level forward references in the same file
return 0
end if
return s[1]
end if
if and_bits(filter, FILTER_LOCAL) and pos >= d[3] and (length(d) < 4 or pos <= d[4]) then
-- in scope
return decl_kind({s}, 1, d[2])
end if
return 0
end function
-- return a sequence of cache_idx
function public_includes(integer cache_idx, sequence result)
sequence ast = cache[cache_idx]
if cache_idx > 0 and not find(cache_idx, result) then
result &= cache_idx
for i = 3 to length(ast) do
sequence s = ast[i]
if s[1] = PUBLIC and s[2] = INCLUDE and s[3] != -1 then
result = public_includes(s[3], result)
end if
end for
end if
return result
end function
-- returns 1 if the type of declaration "name" at pos is in the list of decls
-- otherwise 0
function check_name(sequence name, integer pos, sequence decls)
integer ns = find(':', name)
sequence asts = cur_ast
sequence name_space = ""
if ns then
name_space = name[1..ns-1]
name = name[ns+1..$]
asts = {}
sequence ast = cache[cur_ast[1]]
-- scan for include as
for i = 3 to length(ast) do
sequence s = ast[i]
integer n = prefixed(s)
if s[n+1] = INCLUDE and s[n+2] != -1 and length(s) >= n+4 and equal(s[n+4], name_space) and pos >= s[n+3] then
asts = public_includes(s[n+2], asts)
exit
end if
end for
if length(asts) = 0 then
if equal(name_space, "eu") then
ns = 0 -- special builtins namespace
else
-- search includes for a matching namespace
for i = 1 to length(cur_ast) do
integer cache_idx = cur_ast[i]
ast = cache[cache_idx]
-- does it have a matching namespace at the top
if and_bits(cur_scope[i], FILTER_PUBLIC) and is_namespace(ast, name_space) then
asts = public_includes(cache_idx, asts)
end if
end for
end if
end if
end if
for j = 1 to length(asts) do
integer cache_idx = asts[j]
sequence entries = map:get(maps[cache_idx], name, {})
sequence ast = cache[cache_idx]
integer filter = cur_scope[find(asts[j], cur_ast)]
for i = 1 to length(entries) do
integer decl = decl_check(ast, entries[i], pos, filter)
if find(decl, decls) then
return 1
end if
end for
end for
if ns = 0 then
sequence sub_decls = {PROC_DECL, FUNC_DECL, TYPE_DECL}
-- check builtins
for i = 1 to length(builtins) do
if equal(builtins[i][2], name) then
return find(sub_decls[find(builtins[i][1], {P, F, T})], decls)
end if
end for
end if
return 0
end function
-- scan for variables only, which may be used for assignment
procedure check_var(sequence name, integer pos)
if not check_name(name, pos, {VAR_DECL}) then
check_result &= {pos, length(name), "variable '"&name&"' has not been declared"}
end if
end procedure
-- scan for variables, constants, enum values, or for-variables
procedure check_identifier(sequence name, integer pos)
if not check_name(name, pos, {VAR_DECL, CONST_DECL, ENUM_DECL, FOR}) then
check_result &= {pos, length(name), "identifier '"&name&"' has not been declared"}
end if
end procedure
-- scan for functions or types, used for function call
procedure check_func(sequence name, integer pos)
if not check_name(name, pos, {FUNC_DECL, TYPE_DECL, ENUM_DECL}) then
check_result &= {pos, length(name), "function '"&name&"' has not been declared"}
end if
end procedure
-- scan for types or typed enum, used for variable declaration
procedure check_type(sequence name, integer pos)
if not check_name(name, pos, {TYPE_DECL, ENUM_DECL}) then
check_result &= {pos, length(name), "type '"&name&"' has not been declared"}
end if
end procedure
constant proc_list = choose(OE4, {PROC_DECL, FUNC_DECL, TYPE_DECL, ENUM_DECL}, {PROC_DECL})
-- scan for procedures (OE4: or functions or types)
procedure check_proc(sequence name, integer pos)
if not check_name(name, pos, proc_list) then
check_result &= {pos, length(name), "procedure '"&name&"' has not been declared"}
end if
end procedure
procedure check_expr(sequence expr)
integer decl
if length(expr) = 0 then return end if
decl = expr[1]
if decl = VARIABLE then
-- {VARIABLE, "name", pos}
check_identifier(expr[2], expr[3])
elsif decl = FUNC then
-- {FUNC, "name", pos, [args...]}
check_func(expr[2], expr[3])
for i = 4 to length(expr) do
check_expr(expr[i])
end for
elsif find(decl, {ADD, SUB, MUL, DIV, NEG, NOT, GT, LT, GTE, LTE, EQ,
NEQ, OR, XOR, AND, SUBSCRIPT, CAT, SLICE, SEQ}) then
for i = 2 to length(expr) do
check_expr(expr[i])
end for
elsif decl = SYNTAX_ERROR then
-- {SYNTAX_ERROR, pos, len, "message"}
check_result &= expr[2..4]
end if
end procedure
procedure attempt_redefine(sequence name, integer pos)
check_result &= {pos, length(name), "attempt to redefine '"&name&"'"}
end procedure
-- scan "name" at pos for redefinitions
procedure check_redefinition(sequence name, integer pos)
integer cache_idx = cur_ast[1]
sequence ast = cache[cache_idx]
sequence entries = map:get(maps[cache_idx], name, {})
for i = 1 to length(entries) do
object d = entries[i]
if atom(d) then
sequence s = ast[d]
-- d: ast-index (func/proc/type/enum-type)
if not cur_sub then
integer n = prefixed(s)
if find(s[n+1], {NAMESPACE, INCLUDE}) then
-- it must be a namespace or include-as
if s[n+1] = NAMESPACE or s[n+3] < pos then
-- check if pos is another include-as with the same name
for j = i+1 to length(entries) do
if atom(entries[j]) then
s = ast[entries[j]]
n = prefixed(s)
if s[n+1] = INCLUDE and n+3 <= length(s) and s[n+3] = pos then
attempt_redefine(name, pos)
return
end if
end if
end for
end if
elsif not find(s[n+1], {FUNC_DECL, PROC_DECL, TYPE_DECL, ENUM_DECL}) then
-- not a subroutine? something is wrong with the parser
printf(1, "%s %d %d\n", {name, s[n+1], d})
? entries
elsif n+3 <= length(s) and s[n+3] < pos then
--printf(1, "%s at %d with %d\n", {name, pos, s[n+3]})
attempt_redefine(name, pos)
return
end if
end if
elsif d[2] < pos then
-- d: {ast-index, pos, scope-start, [scope-end]}
sequence s = ast[d[1]]
integer n = prefixed(s)
integer top_level = 1
if find(s[n+1], {FUNC_DECL, PROC_DECL, TYPE_DECL}) and s[n+3] != d[2] then
top_level = 0
end if
if cur_sub != top_level and decl_check(ast, d, pos, FILTER_LOCAL) then
attempt_redefine(name, pos)
return
end if
end if
end for
end procedure
-- add an include to the cur_ast and calculate it's visibility/scope
procedure check_include(sequence s, integer filter)
integer n = s[1] != INCLUDE
integer cache_idx = s[n+2]
integer cur_idx = find(cache_idx, cur_ast)
if cache_idx = -1 then
return
end if
if cur_idx = 0 then
-- create a new entry
check_cache_timestamp(cache_idx)
cur_ast = append(cur_ast, cache_idx)
cur_scope = append(cur_scope, filter)
else
-- existing entry, filter bits already set?
if and_bits(cur_scope[cur_idx], filter) = filter then
return
end if
-- update the filter bits and rescan the ast
cur_scope[cur_idx] = or_bits(cur_scope[cur_idx], filter)
end if
-- scan the ast for sub-includes
sequence ast = cache[cache_idx]
for i = 3 to length(ast) do
s = ast[i]
if s[1] = INCLUDE then
if and_bits(filter, FILTER_LOCAL) then
check_include(s, FILTER_GLOBAL+FILTER_PUBLIC+FILTER_EXPORT)
else
check_include(s, FILTER_GLOBAL)
end if
elsif s[1] = PUBLIC and s[2] = INCLUDE then
if and_bits(filter, FILTER_LOCAL) then
check_include(s, FILTER_GLOBAL+FILTER_PUBLIC+FILTER_EXPORT)
else
check_include(s, and_bits(filter, FILTER_GLOBAL+FILTER_PUBLIC))
end if
end if
end for
end procedure
procedure check_ast(sequence ast, integer start_idx)
for j = start_idx to length(ast) do
sequence s = ast[j]
integer n = prefixed(s)
integer decl = s[n+1]
if decl = INCLUDE then
-- {INCLUDE, includes-idx, scope-start, ["namespace"]}
if n+4 <= length(s) then
check_redefinition(s[n+4], s[n+3])
end if
elsif decl = FUNC_DECL or decl = PROC_DECL or decl = TYPE_DECL then
-- {FUNC_DECL, "name", pos,
-- {{"arg-type", "arg-name", pos, scope-start, [expr]}...},
-- scope-start, scope-end, stmts...}
sequence args = s[n+4]
check_redefinition(s[n+2], s[n+3])
cur_sub = 1
for i = 1 to length(args) do
check_redefinition(args[i][2], args[i][3])
if length(args[i]) >= 5 then
check_expr(args[i][5])
end if
end for
check_ast(s, n+7)
cur_sub = 0
elsif decl = PROC then
-- {PROC, "name", pos, [args...]}
check_proc(s[n+2], s[n+3])
for i = n+4 to length(s) do
check_expr(s[i])
end for
elsif decl = VAR_DECL then
-- {VAR_DECL, "type", pos, {"name", pos, scope-start, [expr]}...}
check_type(s[n+2], s[n+3])
for i = n+4 to length(s) do
check_redefinition(s[i][1], s[i][2])
if length(s[i]) >= 4 then
check_expr(s[i][4])
end if
end for
elsif decl = CONST_DECL then
-- {CONST_DECL, {"name", pos, scope-start, expr}... }
for i = n+2 to length(s) do
check_redefinition(s[i][1], s[i][2])
check_expr(s[i][4])
end for
elsif decl = WHILE then
-- {WHILE, expr, scope-start, scope-end, stmts...}
check_expr(s[n+2])
check_ast(s, n+5)
elsif decl = LOOP then
-- {LOOP, scope-start, scope-end, stmts...}
check_ast(s, n+4)
elsif decl = IF then
-- {IF, expr, {scope-start, scope-end, stmts...},
-- [expr, {scope-start, scope-end, elsif-stmts...},]...
-- [{scope-start, scope-end, else-stmts...}]}
for i = n+2 to length(s) by 2 do
if i = length(s) then
check_ast(s[i], 3)
else
check_expr(s[i])
check_ast(s[i+1], 3)
end if
end for
elsif decl = FOR then
-- {FOR, "name", pos, expr, expr, by, scope-start, scope-end, stmts...}
check_redefinition(s[n+2], s[n+3])
check_expr(s[n+4]) -- first
check_expr(s[n+5]) -- last
check_expr(s[n+6]) -- by
check_ast(s, n+9)
elsif decl = ENUM_DECL then
-- {ENUM_DECL, "typename"|"", pos, '+'|'-'|'*'|'/', expr,
-- {"name", pos, scope-start, [expr]}...}
if length(s[n+2]) then
check_redefinition(s[n+2], s[n+3])
end if
check_expr(s[n+5])
for i = n+6 to length(s) do
check_redefinition(s[i][1], s[i][2])
if length(s[i]) >= 4 then
check_expr(s[i][4])
end if
end for
elsif decl = SWITCH then
-- {SWITCH, expr, bool-fallthru, label-string,
-- [{case-values...}, {scope-start, scope-end, stmts...},]... }
-- ("case else" will have case-values={} )
check_expr(s[n+2])
for i = n+6 to length(s) by 2 do
check_ast(s[i], 3)
end for
elsif find(decl, {ASSIGN, SUB_ASSIGN, SUB_ADDTO, SUB_SUBTO, SUB_MULTO,
SUB_DIVTO, SUB_CATTO, SLICE_ASSIGN, SLICE_ADDTO,
SLICE_SUBTO, SLICE_MULTO, SLICE_DIVTO, SLICE_CATTO}) then
-- {ASSIGN, "name", pos, expr}
-- {SUB_ASSIGN, "name", pos, index-expr..., expr}
-- {SLICE_ASSIGN, "name", pos, index-expr..., start-expr, end-expr, expr}
check_var(s[n+2], s[n+3])
for i = n+4 to length(s) do
check_expr(s[i])
end for
elsif decl = SEQ_ASSIGN then
-- {SEQ_ASSIGN, ["name1", pos1,]... expr}
for i = n+2 to length(s)-1 by 2 do
check_var(s[i], s[i+1])
end for
check_expr(s[$])
elsif decl = QPRINT or decl = UNTIL then
check_expr(s[2])
elsif decl = SYNTAX_ERROR then
-- {SYNTAX_ERROR, pos, len, "message"}
check_result &= s[2..4]
end if
end for
end procedure
-- returns sequence of error positions, lengths, and messages:
-- {pos1, len1, msg1, pos2, len2, msg2, ...}
global function parse_errors(sequence source_text, sequence file_name)
integer cache_idx
sequence result, ast
file_name = canonical_path(file_name, 0, CORRECT)
--atom t0 = time()
ast = parse(source_text, file_name)
--? time() - t0
cache_idx = cache_entry(file_name)
cur_ast = {}
cur_scope = {}
-- check includes up front, so forward references work
check_include({INCLUDE, cache_idx}, FILTER_ALL)
check_ast(ast, 3)
result = check_result
cur_ast = {}
cur_scope = {}
check_result = {}
return result
end function