#!/usr/bin/env lua --[[-------------------------------------------------------------------- LuaSrcDiet Compresses Lua source code by removing unnecessary characters. For Lua 5.1.x source code. Copyright (c) 2008,2011,2012 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ----------------------------------------------------------------------]] --[[-------------------------------------------------------------------- -- NOTES: -- * Remember to update version and date information below (MSG_TITLE) -- * TODO: passing data tables around is a horrific mess -- * TODO: to implement pcall() to properly handle lexer etc. errors -- * TODO: need some automatic testing for a semblance of sanity -- * TODO: the plugin module is highly experimental and unstable ----------------------------------------------------------------------]] -- standard libraries, functions local string = string local math = math local table = table local require = require local print = print local sub = string.sub local gmatch = string.gmatch local match = string.match -- modules incorporated as preload functions follows local preload = package.preload local base = _G local plugin_info = { html = "html generates a HTML file for checking globals", sloc = "sloc calculates SLOC for given source file", } local p_embedded = { 'html', 'sloc', } -- preload function for module llex preload.llex = function() --start of inserted module module "llex" local string = base.require "string" local find = string.find local match = string.match local sub = string.sub ---------------------------------------------------------------------- -- initialize keyword list, variables ---------------------------------------------------------------------- local kw = {} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while]], "%S+") do kw[v] = true end -- see init() for module variables (externally visible): -- tok, seminfo, tokln local z, -- source stream sourceid, -- name of source I, -- position of lexer buff, -- buffer for strings ln -- line number ---------------------------------------------------------------------- -- add information to token listing ---------------------------------------------------------------------- local function addtoken(token, info) local i = #tok + 1 tok[i] = token seminfo[i] = info tokln[i] = ln end ---------------------------------------------------------------------- -- handles line number incrementation and end-of-line characters ---------------------------------------------------------------------- local function inclinenumber(i, is_tok) local sub = sub local old = sub(z, i, i) i = i + 1 -- skip '\n' or '\r' local c = sub(z, i, i) if (c == "\n" or c == "\r") and (c ~= old) then i = i + 1 -- skip '\n\r' or '\r\n' old = old..c end if is_tok then addtoken("TK_EOL", old) end ln = ln + 1 I = i return i end ---------------------------------------------------------------------- -- initialize lexer for given source _z and source name _sourceid ---------------------------------------------------------------------- function init(_z, _sourceid) z = _z -- source sourceid = _sourceid -- name of source I = 1 -- lexer's position in source ln = 1 -- line number tok = {} -- lexed token list* seminfo = {} -- lexed semantic information list* tokln = {} -- line numbers for messages* -- (*) externally visible thru' module -------------------------------------------------------------------- -- initial processing (shbang handling) -------------------------------------------------------------------- local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)") if p then -- skip first line I = I + #q addtoken("TK_COMMENT", q) if #r > 0 then inclinenumber(I, true) end end end ---------------------------------------------------------------------- -- returns a chunk name or id, no truncation for long names ---------------------------------------------------------------------- function chunkid() if sourceid and match(sourceid, "^[=@]") then return sub(sourceid, 2) -- remove first char end return "[string]" end ---------------------------------------------------------------------- -- formats error message and throws error -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- function errorline(s, line) local e = error or base.error e(string.format("%s:%d: %s", chunkid(), line or ln, s)) end local errorline = errorline ------------------------------------------------------------------------ -- count separators ("=") in a long string delimiter ------------------------------------------------------------------------ local function skip_sep(i) local sub = sub local s = sub(z, i, i) i = i + 1 local count = #match(z, "=*", i) i = i + count I = i return (sub(z, i, i) == s) and count or (-count) - 1 end ---------------------------------------------------------------------- -- reads a long string or long comment ---------------------------------------------------------------------- local function read_long_string(is_str, sep) local i = I + 1 -- skip 2nd '[' local sub = sub local c = sub(z, i, i) if c == "\r" or c == "\n" then -- string starts with a newline? i = inclinenumber(i) -- skip it end while true do local p, q, r = find(z, "([\r\n%]])", i) -- (long range match) if not p then errorline(is_str and "unfinished long string" or "unfinished long comment") end i = p if r == "]" then -- delimiter test if skip_sep(i) == sep then buff = sub(z, buff, I) I = I + 1 -- skip 2nd ']' return buff end i = I else -- newline buff = buff.."\n" i = inclinenumber(i) end end--while end ---------------------------------------------------------------------- -- reads a string ---------------------------------------------------------------------- local function read_string(del) local i = I local find = find local sub = sub while true do local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range match) if p then if r == "\n" or r == "\r" then errorline("unfinished string") end i = p if r == "\\" then -- handle escapes i = i + 1 r = sub(z, i, i) if r == "" then break end -- (EOZ error) p = find("abfnrtv\n\r", r, 1, true) ------------------------------------------------------ if p then -- special escapes if p > 7 then i = inclinenumber(i) else i = i + 1 end ------------------------------------------------------ elseif find(r, "%D") then -- other non-digits i = i + 1 ------------------------------------------------------ else -- \xxx sequence local p, q, s = find(z, "^(%d%d?%d?)", i) i = q + 1 if s + 1 > 256 then -- UCHAR_MAX errorline("escape sequence too large") end ------------------------------------------------------ end--if p else i = i + 1 if r == del then -- ending delimiter I = i return sub(z, buff, i - 1) -- return string end end--if r else break -- (error) end--if p end--while errorline("unfinished string") end ------------------------------------------------------------------------ -- main lexer function ------------------------------------------------------------------------ function llex() local find = find local match = match while true do--outer local i = I -- inner loop allows break to be used to nicely section tests while true do--inner ---------------------------------------------------------------- local p, _, r = find(z, "^([_%a][_%w]*)", i) if p then I = i + #r if kw[r] then addtoken("TK_KEYWORD", r) -- reserved word (keyword) else addtoken("TK_NAME", r) -- identifier end break -- (continue) end ---------------------------------------------------------------- local p, _, r = find(z, "^(%.?)%d", i) if p then -- numeral if r == "." then i = i + 1 end local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) i = q + 1 if #r == 1 then -- optional exponent if match(z, "^[%+%-]", i) then -- optional sign i = i + 1 end end local _, q = find(z, "^[_%w]*", i) I = q + 1 local v = sub(z, p, q) -- string equivalent if not base.tonumber(v) then -- handles hex test also errorline("malformed number") end addtoken("TK_NUMBER", v) break -- (continue) end ---------------------------------------------------------------- local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i) if p then if t == "\n" or t == "\r" then -- newline inclinenumber(i, true) else I = q + 1 -- whitespace addtoken("TK_SPACE", r) end break -- (continue) end ---------------------------------------------------------------- local r = match(z, "^%p", i) if r then buff = i local p = find("-[\"\'.=<>~", r, 1, true) if p then -- two-level if block for punctuation/symbols -------------------------------------------------------- if p <= 2 then if p == 1 then -- minus local c = match(z, "^%-%-(%[?)", i) if c then i = i + 2 local sep = -1 if c == "[" then sep = skip_sep(i) end if sep >= 0 then -- long comment addtoken("TK_LCOMMENT", read_long_string(false, sep)) else -- short comment I = find(z, "[\n\r]", i) or (#z + 1) addtoken("TK_COMMENT", sub(z, buff, I - 1)) end break -- (continue) end -- (fall through for "-") else -- [ or long string local sep = skip_sep(i) if sep >= 0 then addtoken("TK_LSTRING", read_long_string(true, sep)) elseif sep == -1 then addtoken("TK_OP", "[") else errorline("invalid long string delimiter") end break -- (continue) end -------------------------------------------------------- elseif p <= 5 then if p < 5 then -- strings I = i + 1 addtoken("TK_STRING", read_string(r)) break -- (continue) end r = match(z, "^%.%.?%.?", i) -- .|..|... dots -- (fall through) -------------------------------------------------------- else -- relational r = match(z, "^%p=?", i) -- (fall through) end end I = i + #r addtoken("TK_OP", r) -- for other symbols, fall through break -- (continue) end ---------------------------------------------------------------- local r = sub(z, i, i) if r ~= "" then I = i + 1 addtoken("TK_OP", r) -- other single-char tokens break end addtoken("TK_EOS", "") -- end of stream, return -- exit here ---------------------------------------------------------------- end--while inner end--while outer end --end of inserted module end -- preload function for module lparser preload.lparser = function() --start of inserted module module "lparser" local string = base.require "string" --[[-------------------------------------------------------------------- -- variable and data structure initialization ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initialization: main variables ---------------------------------------------------------------------- local toklist, -- grammar-only token tables (token table, seminfolist, -- semantic information table, line number toklnlist, -- table, cross-reference table) xreflist, tpos, -- token position line, -- start line # for error messages lastln, -- last line # for ambiguous syntax chk tok, seminfo, ln, xref, -- token, semantic info, line nameref, -- proper position of token fs, -- current function state top_fs, -- top-level function state globalinfo, -- global variable information table globallookup, -- global variable name lookup table localinfo, -- local variable information table ilocalinfo, -- inactive locals (prior to activation) ilocalrefs, -- corresponding references to activate statinfo -- statements labeled by type -- forward references for local functions local explist1, expr, block, exp1, body, chunk ---------------------------------------------------------------------- -- initialization: data structures ---------------------------------------------------------------------- local gmatch = string.gmatch local block_follow = {} -- lookahead check in chunk(), returnstat() for v in gmatch("else elseif end until ", "%S+") do block_follow[v] = true end local binopr_left = {} -- binary operators, left priority local binopr_right = {} -- binary operators, right priority for op, lt, rt in gmatch([[ {+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7} {^ 10 9}{.. 5 4} {~= 3 3}{== 3 3} {< 3 3}{<= 3 3}{> 3 3}{>= 3 3} {and 2 2}{or 1 1} ]], "{(%S+)%s(%d+)%s(%d+)}") do binopr_left[op] = lt + 0 binopr_right[op] = rt + 0 end local unopr = { ["not"] = true, ["-"] = true, ["#"] = true, } -- unary operators local UNARY_PRIORITY = 8 -- priority for unary operators --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- formats error message and throws error (duplicated from llex) -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- local function errorline(s, line) local e = error or base.error e(string.format("(source):%d: %s", line or ln, s)) end ---------------------------------------------------------------------- -- handles incoming token, semantic information pairs -- * NOTE: 'nextt' is named 'next' originally ---------------------------------------------------------------------- -- reads in next token local function nextt() lastln = toklnlist[tpos] tok, seminfo, ln, xref = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos] tpos = tpos + 1 end -- peek at next token (single lookahead for table constructor) local function lookahead() return toklist[tpos] end ---------------------------------------------------------------------- -- throws a syntax error, or if token expected is not there ---------------------------------------------------------------------- local function syntaxerror(msg) local tok = tok if tok ~= "" and tok ~= "" then if tok == "" then tok = seminfo end tok = "'"..tok.."'" end errorline(msg.." near "..tok) end local function error_expected(token) syntaxerror("'"..token.."' expected") end ---------------------------------------------------------------------- -- tests for a token, returns outcome -- * return value changed to boolean ---------------------------------------------------------------------- local function testnext(c) if tok == c then nextt(); return true end end ---------------------------------------------------------------------- -- check for existence of a token, throws error if not found ---------------------------------------------------------------------- local function check(c) if tok ~= c then error_expected(c) end end ---------------------------------------------------------------------- -- verify existence of a token, then skip it ---------------------------------------------------------------------- local function checknext(c) check(c); nextt() end ---------------------------------------------------------------------- -- throws error if condition not matched ---------------------------------------------------------------------- local function check_condition(c, msg) if not c then syntaxerror(msg) end end ---------------------------------------------------------------------- -- verifies token conditions are met or else throw error ---------------------------------------------------------------------- local function check_match(what, who, where) if not testnext(what) then if where == ln then error_expected(what) else syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")") end end end ---------------------------------------------------------------------- -- expect that token is a name, return the name ---------------------------------------------------------------------- local function str_checkname() check("") local ts = seminfo nameref = xref nextt() return ts end ---------------------------------------------------------------------- -- adds given string s in string pool, sets e as VK ---------------------------------------------------------------------- local function codestring(e, s) e.k = "VK" end ---------------------------------------------------------------------- -- consume a name token, adds it to string pool ---------------------------------------------------------------------- local function checkname(e) codestring(e, str_checkname()) end --[[-------------------------------------------------------------------- -- variable (global|local|upvalue) handling -- * to track locals and globals, variable management code needed -- * entry point is singlevar() for variable lookups -- * lookup tables (bl.locallist) are maintained awkwardly in the basic -- block data structures, PLUS the function data structure (this is -- an inelegant hack, since bl is nil for the top level of a function) ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- register a local variable, create local variable object, set in -- to-activate variable list -- * used in new_localvarliteral(), parlist(), fornum(), forlist(), -- localfunc(), localstat() ---------------------------------------------------------------------- local function new_localvar(name, special) local bl = fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist = bl.locallist else locallist = fs.locallist end -- build local variable information object and set localinfo local id = #localinfo + 1 localinfo[id] = { -- new local variable object name = name, -- local variable name xref = { nameref }, -- xref, first value is declaration decl = nameref, -- location of declaration, = xref[1] } if special then -- "self" must be not be changed localinfo[id].isself = true end -- this can override a local with the same name in the same scope -- but first, keep it inactive until it gets activated local i = #ilocalinfo + 1 ilocalinfo[i] = id ilocalrefs[i] = locallist end ---------------------------------------------------------------------- -- actually activate the variables so that they are visible -- * remember Lua semantics, e.g. RHS is evaluated first, then LHS -- * used in parlist(), forbody(), localfunc(), localstat(), body() ---------------------------------------------------------------------- local function adjustlocalvars(nvars) local sz = #ilocalinfo -- i goes from left to right, in order of local allocation, because -- of something like: local a,a,a = 1,2,3 which gives a = 3 while nvars > 0 do nvars = nvars - 1 local i = sz - nvars local id = ilocalinfo[i] -- local's id local obj = localinfo[id] local name = obj.name -- name of local obj.act = xref -- set activation location ilocalinfo[i] = nil local locallist = ilocalrefs[i] -- ref to lookup table to update ilocalrefs[i] = nil local existing = locallist[name] -- if existing, remove old first! if existing then -- do not overlap, set special obj = localinfo[existing] -- form of rem, as -id obj.rem = -id end locallist[name] = id -- activate, now visible to Lua end end ---------------------------------------------------------------------- -- remove (deactivate) variables in current scope (before scope exits) -- * zap entire locallist tables since we are not allocating registers -- * used in leaveblock(), close_func() ---------------------------------------------------------------------- local function removevars() local bl = fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist = bl.locallist else locallist = fs.locallist end -- enumerate the local list at current scope and deactivate 'em for name, id in base.pairs(locallist) do local obj = localinfo[id] obj.rem = xref -- set deactivation location end end ---------------------------------------------------------------------- -- creates a new local variable given a name -- * skips internal locals (those starting with '('), so internal -- locals never needs a corresponding adjustlocalvars() call -- * special is true for "self" which must not be optimized -- * used in fornum(), forlist(), parlist(), body() ---------------------------------------------------------------------- local function new_localvarliteral(name, special) if string.sub(name, 1, 1) == "(" then -- can skip internal locals return end new_localvar(name, special) end ---------------------------------------------------------------------- -- search the local variable namespace of the given fs for a match -- * returns localinfo index -- * used only in singlevaraux() ---------------------------------------------------------------------- local function searchvar(fs, n) local bl = fs.bl local locallist if bl then locallist = bl.locallist while locallist do if locallist[n] then return locallist[n] end -- found bl = bl.prev locallist = bl and bl.locallist end end locallist = fs.locallist return locallist[n] or -1 -- found or not found (-1) end ---------------------------------------------------------------------- -- handle locals, globals and upvalues and related processing -- * search mechanism is recursive, calls itself to search parents -- * used only in singlevar() ---------------------------------------------------------------------- local function singlevaraux(fs, n, var) if fs == nil then -- no more levels? var.k = "VGLOBAL" -- default is global variable return "VGLOBAL" else local v = searchvar(fs, n) -- look up at current level if v >= 0 then var.k = "VLOCAL" var.id = v -- codegen may need to deal with upvalue here return "VLOCAL" else -- not found at current level; try upper one if singlevaraux(fs.prev, n, var) == "VGLOBAL" then return "VGLOBAL" end -- else was LOCAL or UPVAL, handle here var.k = "VUPVAL" -- upvalue in this level return "VUPVAL" end--if v end--if fs end ---------------------------------------------------------------------- -- consume a name token, creates a variable (global|local|upvalue) -- * used in prefixexp(), funcname() ---------------------------------------------------------------------- local function singlevar(v) local name = str_checkname() singlevaraux(fs, name, v) ------------------------------------------------------------------ -- variable tracking ------------------------------------------------------------------ if v.k == "VGLOBAL" then -- if global being accessed, keep track of it by creating an object local id = globallookup[name] if not id then id = #globalinfo + 1 globalinfo[id] = { -- new global variable object name = name, -- global variable name xref = { nameref }, -- xref, first value is declaration } globallookup[name] = id -- remember it else local obj = globalinfo[id].xref obj[#obj + 1] = nameref -- add xref end else -- local/upvalue is being accessed, keep track of it local id = v.id local obj = localinfo[id].xref obj[#obj + 1] = nameref -- add xref end end --[[-------------------------------------------------------------------- -- state management functions with open/close pairs ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- enters a code unit, initializes elements ---------------------------------------------------------------------- local function enterblock(isbreakable) local bl = {} -- per-block state bl.isbreakable = isbreakable bl.prev = fs.bl bl.locallist = {} fs.bl = bl end ---------------------------------------------------------------------- -- leaves a code unit, close any upvalues ---------------------------------------------------------------------- local function leaveblock() local bl = fs.bl removevars() fs.bl = bl.prev end ---------------------------------------------------------------------- -- opening of a function -- * top_fs is only for anchoring the top fs, so that parser() can -- return it to the caller function along with useful output -- * used in parser() and body() ---------------------------------------------------------------------- local function open_func() local new_fs -- per-function state if not fs then -- top_fs is created early new_fs = top_fs else new_fs = {} end new_fs.prev = fs -- linked list of function states new_fs.bl = nil new_fs.locallist = {} fs = new_fs end ---------------------------------------------------------------------- -- closing of a function -- * used in parser() and body() ---------------------------------------------------------------------- local function close_func() removevars() fs = fs.prev end --[[-------------------------------------------------------------------- -- other parsing functions -- * for table constructor, parameter list, argument list ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parse a function name suffix, for function call specifications -- * used in primaryexp(), funcname() ---------------------------------------------------------------------- local function field(v) -- field -> ['.' | ':'] NAME local key = {} nextt() -- skip the dot or colon checkname(key) v.k = "VINDEXED" end ---------------------------------------------------------------------- -- parse a table indexing suffix, for constructors, expressions -- * used in recfield(), primaryexp() ---------------------------------------------------------------------- local function yindex(v) -- index -> '[' expr ']' nextt() -- skip the '[' expr(v) checknext("]") end ---------------------------------------------------------------------- -- parse a table record (hash) field -- * used in constructor() ---------------------------------------------------------------------- local function recfield(cc) -- recfield -> (NAME | '['exp1']') = exp1 local key, val = {}, {} if tok == "" then checkname(key) else-- tok == '[' yindex(key) end checknext("=") expr(val) end ---------------------------------------------------------------------- -- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH) -- * note: retained in this skeleton because it modifies cc.v.k -- * used in constructor() ---------------------------------------------------------------------- local function closelistfield(cc) if cc.v.k == "VVOID" then return end -- there is no list item cc.v.k = "VVOID" end ---------------------------------------------------------------------- -- parse a table list (array) field -- * used in constructor() ---------------------------------------------------------------------- local function listfield(cc) expr(cc.v) end ---------------------------------------------------------------------- -- parse a table constructor -- * used in funcargs(), simpleexp() ---------------------------------------------------------------------- local function constructor(t) -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}' -- field -> recfield | listfield -- fieldsep -> ',' | ';' local line = ln local cc = {} cc.v = {} cc.t = t t.k = "VRELOCABLE" cc.v.k = "VVOID" checknext("{") repeat if tok == "}" then break end -- closelistfield(cc) here local c = tok if c == "" then -- may be listfields or recfields if lookahead() ~= "=" then -- look ahead: expression? listfield(cc) else recfield(cc) end elseif c == "[" then -- constructor_item -> recfield recfield(cc) else -- constructor_part -> listfield listfield(cc) end until not testnext(",") and not testnext(";") check_match("}", "{", line) -- lastlistfield(cc) here end ---------------------------------------------------------------------- -- parse the arguments (parameters) of a function declaration -- * used in body() ---------------------------------------------------------------------- local function parlist() -- parlist -> [ param { ',' param } ] local nparams = 0 if tok ~= ")" then -- is 'parlist' not empty? repeat local c = tok if c == "" then -- param -> NAME new_localvar(str_checkname()) nparams = nparams + 1 elseif c == "..." then nextt() fs.is_vararg = true else syntaxerror(" or '...' expected") end until fs.is_vararg or not testnext(",") end--if adjustlocalvars(nparams) end ---------------------------------------------------------------------- -- parse the parameters of a function call -- * contrast with parlist(), used in function declarations -- * used in primaryexp() ---------------------------------------------------------------------- local function funcargs(f) local args = {} local line = ln local c = tok if c == "(" then -- funcargs -> '(' [ explist1 ] ')' if line ~= lastln then syntaxerror("ambiguous syntax (function call x new statement)") end nextt() if tok == ")" then -- arg list is empty? args.k = "VVOID" else explist1(args) end check_match(")", "(", line) elseif c == "{" then -- funcargs -> constructor constructor(args) elseif c == "" then -- funcargs -> STRING codestring(args, seminfo) nextt() -- must use 'seminfo' before 'next' else syntaxerror("function arguments expected") return end--if c f.k = "VCALL" end --[[-------------------------------------------------------------------- -- mostly expression functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parses an expression in parentheses or a single variable -- * used in primaryexp() ---------------------------------------------------------------------- local function prefixexp(v) -- prefixexp -> NAME | '(' expr ')' local c = tok if c == "(" then local line = ln nextt() expr(v) check_match(")", "(", line) elseif c == "" then singlevar(v) else syntaxerror("unexpected symbol") end--if c end ---------------------------------------------------------------------- -- parses a prefixexp (an expression in parentheses or a single -- variable) or a function call specification -- * used in simpleexp(), assignment(), expr_stat() ---------------------------------------------------------------------- local function primaryexp(v) -- primaryexp -> -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } prefixexp(v) while true do local c = tok if c == "." then -- field field(v) elseif c == "[" then -- '[' exp1 ']' local key = {} yindex(key) elseif c == ":" then -- ':' NAME funcargs local key = {} nextt() checkname(key) funcargs(v) elseif c == "(" or c == "" or c == "{" then -- funcargs funcargs(v) else return end--if c end--while end ---------------------------------------------------------------------- -- parses general expression types, constants handled here -- * used in subexpr() ---------------------------------------------------------------------- local function simpleexp(v) -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... | -- constructor | FUNCTION body | primaryexp local c = tok if c == "" then v.k = "VKNUM" elseif c == "" then codestring(v, seminfo) elseif c == "nil" then v.k = "VNIL" elseif c == "true" then v.k = "VTRUE" elseif c == "false" then v.k = "VFALSE" elseif c == "..." then -- vararg check_condition(fs.is_vararg == true, "cannot use '...' outside a vararg function"); v.k = "VVARARG" elseif c == "{" then -- constructor constructor(v) return elseif c == "function" then nextt() body(v, false, ln) return else primaryexp(v) return end--if c nextt() end ------------------------------------------------------------------------ -- Parse subexpressions. Includes handling of unary operators and binary -- operators. A subexpr is given the rhs priority level of the operator -- immediately left of it, if any (limit is -1 if none,) and if a binop -- is found, limit is compared with the lhs priority level of the binop -- in order to determine which executes first. -- * recursively called -- * used in expr() ------------------------------------------------------------------------ local function subexpr(v, limit) -- subexpr -> (simpleexp | unop subexpr) { binop subexpr } -- * where 'binop' is any binary operator with a priority -- higher than 'limit' local op = tok local uop = unopr[op] if uop then nextt() subexpr(v, UNARY_PRIORITY) else simpleexp(v) end -- expand while operators have priorities higher than 'limit' op = tok local binop = binopr_left[op] while binop and binop > limit do local v2 = {} nextt() -- read sub-expression with higher priority local nextop = subexpr(v2, binopr_right[op]) op = nextop binop = binopr_left[op] end return op -- return first untreated operator end ---------------------------------------------------------------------- -- Expression parsing starts here. Function subexpr is entered with the -- left operator (which is non-existent) priority of -1, which is lower -- than all actual operators. Expr information is returned in parm v. -- * used in cond(), explist1(), index(), recfield(), listfield(), -- prefixexp(), while_stat(), exp1() ---------------------------------------------------------------------- -- this is a forward-referenced local function expr(v) -- expr -> subexpr subexpr(v, 0) end --[[-------------------------------------------------------------------- -- third level parsing functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- parse a variable assignment sequence -- * recursively called -- * used in expr_stat() ------------------------------------------------------------------------ local function assignment(v) local e = {} local c = v.v.k check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL" or c == "VINDEXED", "syntax error") if testnext(",") then -- assignment -> ',' primaryexp assignment local nv = {} -- expdesc nv.v = {} primaryexp(nv.v) -- lparser.c deals with some register usage conflict here assignment(nv) else -- assignment -> '=' explist1 checknext("=") explist1(e) return -- avoid default end e.k = "VNONRELOC" end ---------------------------------------------------------------------- -- parse a for loop body for both versions of the for loop -- * used in fornum(), forlist() ---------------------------------------------------------------------- local function forbody(nvars, isnum) -- forbody -> DO block checknext("do") enterblock(false) -- scope for declared variables adjustlocalvars(nvars) block() leaveblock() -- end of scope for declared variables end ---------------------------------------------------------------------- -- parse a numerical for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function fornum(varname) -- fornum -> NAME = exp1, exp1 [, exp1] DO body local line = line new_localvarliteral("(for index)") new_localvarliteral("(for limit)") new_localvarliteral("(for step)") new_localvar(varname) checknext("=") exp1() -- initial value checknext(",") exp1() -- limit if testnext(",") then exp1() -- optional step else -- default step = 1 end forbody(1, true) end ---------------------------------------------------------------------- -- parse a generic for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function forlist(indexname) -- forlist -> NAME {, NAME} IN explist1 DO body local e = {} -- create control variables new_localvarliteral("(for generator)") new_localvarliteral("(for state)") new_localvarliteral("(for control)") -- create declared variables new_localvar(indexname) local nvars = 1 while testnext(",") do new_localvar(str_checkname()) nvars = nvars + 1 end checknext("in") local line = line explist1(e) forbody(nvars, false) end ---------------------------------------------------------------------- -- parse a function name specification -- * used in func_stat() ---------------------------------------------------------------------- local function funcname(v) -- funcname -> NAME {field} [':' NAME] local needself = false singlevar(v) while tok == "." do field(v) end if tok == ":" then needself = true field(v) end return needself end ---------------------------------------------------------------------- -- parse the single expressions needed in numerical for loops -- * used in fornum() ---------------------------------------------------------------------- -- this is a forward-referenced local function exp1() -- exp1 -> expr local e = {} expr(e) end ---------------------------------------------------------------------- -- parse condition in a repeat statement or an if control structure -- * used in repeat_stat(), test_then_block() ---------------------------------------------------------------------- local function cond() -- cond -> expr local v = {} expr(v) -- read condition end ---------------------------------------------------------------------- -- parse part of an if control structure, including the condition -- * used in if_stat() ---------------------------------------------------------------------- local function test_then_block() -- test_then_block -> [IF | ELSEIF] cond THEN block nextt() -- skip IF or ELSEIF cond() checknext("then") block() -- 'then' part end ---------------------------------------------------------------------- -- parse a local function statement -- * used in local_stat() ---------------------------------------------------------------------- local function localfunc() -- localfunc -> NAME body local v, b = {} new_localvar(str_checkname()) v.k = "VLOCAL" adjustlocalvars(1) body(b, false, ln) end ---------------------------------------------------------------------- -- parse a local variable declaration statement -- * used in local_stat() ---------------------------------------------------------------------- local function localstat() -- localstat -> NAME {',' NAME} ['=' explist1] local nvars = 0 local e = {} repeat new_localvar(str_checkname()) nvars = nvars + 1 until not testnext(",") if testnext("=") then explist1(e) else e.k = "VVOID" end adjustlocalvars(nvars) end ---------------------------------------------------------------------- -- parse a list of comma-separated expressions -- * used in return_stat(), localstat(), funcargs(), assignment(), -- forlist() ---------------------------------------------------------------------- -- this is a forward-referenced local function explist1(e) -- explist1 -> expr { ',' expr } expr(e) while testnext(",") do expr(e) end end ---------------------------------------------------------------------- -- parse function declaration body -- * used in simpleexp(), localfunc(), func_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function body(e, needself, line) -- body -> '(' parlist ')' chunk END open_func() checknext("(") if needself then new_localvarliteral("self", true) adjustlocalvars(1) end parlist() checknext(")") chunk() check_match("end", "function", line) close_func() end ---------------------------------------------------------------------- -- parse a code block or unit -- * used in do_stat(), while_stat(), forbody(), test_then_block(), -- if_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function block() -- block -> chunk enterblock(false) chunk() leaveblock() end --[[-------------------------------------------------------------------- -- second level parsing functions, all with '_stat' suffix -- * since they are called via a table lookup, they cannot be local -- functions (a lookup table of local functions might be smaller...) -- * stat() -> *_stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for a for loop, calls fornum() or forlist() -- * removed 'line' parameter (used to set debug information only) -- * used in stat() ---------------------------------------------------------------------- local function for_stat() -- stat -> for_stat -> FOR (fornum | forlist) END local line = line enterblock(true) -- scope for loop and control variables nextt() -- skip 'for' local varname = str_checkname() -- first variable name local c = tok if c == "=" then fornum(varname) elseif c == "," or c == "in" then forlist(varname) else syntaxerror("'=' or 'in' expected") end check_match("end", "for", line) leaveblock() -- loop scope (`break' jumps to this point) end ---------------------------------------------------------------------- -- parse a while-do control structure, body processed by block() -- * used in stat() ---------------------------------------------------------------------- local function while_stat() -- stat -> while_stat -> WHILE cond DO block END local line = line nextt() -- skip WHILE cond() -- parse condition enterblock(true) checknext("do") block() check_match("end", "while", line) leaveblock() end ---------------------------------------------------------------------- -- parse a repeat-until control structure, body parsed by chunk() -- * originally, repeatstat() calls breakstat() too if there is an -- upvalue in the scope block; nothing is actually lexed, it is -- actually the common code in breakstat() for closing of upvalues -- * used in stat() ---------------------------------------------------------------------- local function repeat_stat() -- stat -> repeat_stat -> REPEAT block UNTIL cond local line = line enterblock(true) -- loop block enterblock(false) -- scope block nextt() -- skip REPEAT chunk() check_match("until", "repeat", line) cond() -- close upvalues at scope level below leaveblock() -- finish scope leaveblock() -- finish loop end ---------------------------------------------------------------------- -- parse an if control structure -- * used in stat() ---------------------------------------------------------------------- local function if_stat() -- stat -> if_stat -> IF cond THEN block -- {ELSEIF cond THEN block} [ELSE block] END local line = line local v = {} test_then_block() -- IF cond THEN block while tok == "elseif" do test_then_block() -- ELSEIF cond THEN block end if tok == "else" then nextt() -- skip ELSE block() -- 'else' part end check_match("end", "if", line) end ---------------------------------------------------------------------- -- parse a return statement -- * used in stat() ---------------------------------------------------------------------- local function return_stat() -- stat -> return_stat -> RETURN explist local e = {} nextt() -- skip RETURN local c = tok if block_follow[c] or c == ";" then -- return no values else explist1(e) -- optional return values end end ---------------------------------------------------------------------- -- parse a break statement -- * used in stat() ---------------------------------------------------------------------- local function break_stat() -- stat -> break_stat -> BREAK local bl = fs.bl nextt() -- skip BREAK while bl and not bl.isbreakable do -- find a breakable block bl = bl.prev end if not bl then syntaxerror("no loop to break") end end ---------------------------------------------------------------------- -- parse a function call with no returns or an assignment statement -- * the struct with .prev is used for name searching in lparse.c, -- so it is retained for now; present in assignment() also -- * used in stat() ---------------------------------------------------------------------- local function expr_stat() local id = tpos - 1 -- stat -> expr_stat -> func | assignment local v = {} v.v = {} primaryexp(v.v) if v.v.k == "VCALL" then -- stat -> func -- call statement uses no results statinfo[id] = "call" else -- stat -> assignment v.prev = nil assignment(v) statinfo[id] = "assign" end end ---------------------------------------------------------------------- -- parse a function statement -- * used in stat() ---------------------------------------------------------------------- local function function_stat() -- stat -> function_stat -> FUNCTION funcname body local line = line local v, b = {}, {} nextt() -- skip FUNCTION local needself = funcname(v) body(b, needself, line) end ---------------------------------------------------------------------- -- parse a simple block enclosed by a DO..END pair -- * used in stat() ---------------------------------------------------------------------- local function do_stat() -- stat -> do_stat -> DO block END local line = line nextt() -- skip DO block() check_match("end", "do", line) end ---------------------------------------------------------------------- -- parse a statement starting with LOCAL -- * used in stat() ---------------------------------------------------------------------- local function local_stat() -- stat -> local_stat -> LOCAL FUNCTION localfunc -- -> LOCAL localstat nextt() -- skip LOCAL if testnext("function") then -- local function? localfunc() else localstat() end end --[[-------------------------------------------------------------------- -- main functions, top level parsing functions -- * accessible functions are: init(lexer), parser() -- * [entry] -> parser() -> chunk() -> stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for statements, calls '_stat' suffixed functions -- * used in chunk() ---------------------------------------------------------------------- local stat_call = { -- lookup for calls in stat() ["if"] = if_stat, ["while"] = while_stat, ["do"] = do_stat, ["for"] = for_stat, ["repeat"] = repeat_stat, ["function"] = function_stat, ["local"] = local_stat, ["return"] = return_stat, ["break"] = break_stat, } local function stat() -- stat -> if_stat while_stat do_stat for_stat repeat_stat -- function_stat local_stat return_stat break_stat -- expr_stat line = ln -- may be needed for error messages local c = tok local fn = stat_call[c] -- handles: if while do for repeat function local return break if fn then statinfo[tpos - 1] = c fn() -- return or break must be last statement if c == "return" or c == "break" then return true end else expr_stat() end return false end ---------------------------------------------------------------------- -- parse a chunk, which consists of a bunch of statements -- * used in parser(), body(), block(), repeat_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function chunk() -- chunk -> { stat [';'] } local islast = false while not islast and not block_follow[tok] do islast = stat() testnext(";") end end ---------------------------------------------------------------------- -- performs parsing, returns parsed data structure ---------------------------------------------------------------------- function parser() open_func() fs.is_vararg = true -- main func. is always vararg nextt() -- read first token chunk() check("") close_func() return { -- return everything globalinfo = globalinfo, localinfo = localinfo, statinfo = statinfo, toklist = toklist, seminfolist = seminfolist, toklnlist = toklnlist, xreflist = xreflist, } end ---------------------------------------------------------------------- -- initialization function ---------------------------------------------------------------------- function init(tokorig, seminfoorig, toklnorig) tpos = 1 -- token position top_fs = {} -- reset top level function state ------------------------------------------------------------------ -- set up grammar-only token tables; impedance-matching... -- note that constants returned by the lexer is source-level, so -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING) ------------------------------------------------------------------ local j = 1 toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {} for i = 1, #tokorig do local tok = tokorig[i] local yep = true if tok == "TK_KEYWORD" or tok == "TK_OP" then tok = seminfoorig[i] elseif tok == "TK_NAME" then tok = "" seminfolist[j] = seminfoorig[i] elseif tok == "TK_NUMBER" then tok = "" seminfolist[j] = 0 -- fake! elseif tok == "TK_STRING" or tok == "TK_LSTRING" then tok = "" seminfolist[j] = "" -- fake! elseif tok == "TK_EOS" then tok = "" else -- non-grammar tokens; ignore them yep = false end if yep then -- set rest of the information toklist[j] = tok toklnlist[j] = toklnorig[i] xreflist[j] = i j = j + 1 end end--for ------------------------------------------------------------------ -- initialize data structures for variable tracking ------------------------------------------------------------------ globalinfo, globallookup, localinfo = {}, {}, {} ilocalinfo, ilocalrefs = {}, {} statinfo = {} -- experimental end --end of inserted module end -- preload function for module optlex preload.optlex = function() --start of inserted module module "optlex" local string = base.require "string" local match = string.match local sub = string.sub local find = string.find local rep = string.rep local print ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ -- error function, can override by setting own function into module error = base.error warn = {} -- table for warning flags local stoks, sinfos, stoklns -- source lists local is_realtoken = { -- significant (grammar) tokens TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, TK_OP = true, TK_EOS = true, } local is_faketoken = { -- whitespace (non-grammar) tokens TK_COMMENT = true, TK_LCOMMENT = true, TK_EOL = true, TK_SPACE = true, } local opt_details -- for extra information ------------------------------------------------------------------------ -- true if current token is at the start of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlinestart(i) local tok = stoks[i - 1] if i <= 1 or tok == "TK_EOL" then return true elseif tok == "" then return atlinestart(i - 1) end return false end ------------------------------------------------------------------------ -- true if current token is at the end of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlineend(i) local tok = stoks[i + 1] if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then return true elseif tok == "" then return atlineend(i + 1) end return false end ------------------------------------------------------------------------ -- counts comment EOLs inside a long comment -- * in order to keep line numbering, EOLs need to be reinserted ------------------------------------------------------------------------ local function commenteols(lcomment) local sep = #match(lcomment, "^%-%-%[=*%[") local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims local i, c = 1, 0 while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) if not p then break end -- if no matches, done i = p + 1 c = c + 1 if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end end return c end ------------------------------------------------------------------------ -- compares two tokens (i, j) and returns the whitespace required -- * see documentation for a reference table of interactions -- * only two grammar/real tokens are being considered -- * if "", no separation is needed -- * if " ", then at least one whitespace (or EOL) is required -- * NOTE: this doesn't work at the start or the end or for EOS! ------------------------------------------------------------------------ local function checkpair(i, j) local match = match local t1, t2 = stoks[i], stoks[j] -------------------------------------------------------------------- if t1 == "TK_STRING" or t1 == "TK_LSTRING" or t2 == "TK_STRING" or t2 == "TK_LSTRING" then return "" -------------------------------------------------------------------- elseif t1 == "TK_OP" or t2 == "TK_OP" then if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then return "" end if t1 == "TK_OP" and t2 == "TK_OP" then -- for TK_OP/TK_OP pairs, see notes in technotes.txt local op, op2 = sinfos[i], sinfos[j] if (match(op, "^%.%.?$") and match(op2, "^%.")) or (match(op, "^[~=<>]$") and op2 == "=") or (op == "[" and (op2 == "[" or op2 == "=")) then return " " end return "" end -- "TK_OP" + "TK_NUMBER" case local op = sinfos[i] if t2 == "TK_OP" then op = sinfos[j] end if match(op, "^%.%.?%.?$") then return " " end return "" -------------------------------------------------------------------- else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then return " " -------------------------------------------------------------------- end end ------------------------------------------------------------------------ -- repack tokens, removing deletions caused by optimization process ------------------------------------------------------------------------ local function repack_tokens() local dtoks, dinfos, dtoklns = {}, {}, {} local j = 1 for i = 1, #stoks do local tok = stoks[i] if tok ~= "" then dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i] j = j + 1 end end stoks, sinfos, stoklns = dtoks, dinfos, dtoklns end ------------------------------------------------------------------------ -- number optimization -- * optimization using string formatting functions is one way of doing -- this, but here, we consider all cases and handle them separately -- (possibly an idiotic approach...) -- * scientific notation being generated is not in canonical form, this -- may or may not be a bad thing -- * note: intermediate portions need to fit into a normal number range -- * optimizations can be divided based on number patterns: -- * hexadecimal: -- (1) no need to remove leading zeros, just skip to (2) -- (2) convert to integer if size equal or smaller -- * change if equal size -> lose the 'x' to reduce entropy -- (3) number is then processed as an integer -- (4) note: does not make 0[xX] consistent -- * integer: -- (1) note: includes anything with trailing ".", ".0", ... -- (2) remove useless fractional part, if present, e.g. 123.000 -- (3) remove leading zeros, e.g. 000123 -- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 -- * with fraction: -- (1) split into digits dot digits -- (2) if no integer portion, take as zero (can omit later) -- (3) handle degenerate .000 case, after which the fractional part -- must be non-zero (if zero, it's matched as an integer) -- (4) remove trailing zeros for fractional portion -- (5) p.q where p > 0 and q > 0 cannot be shortened any more -- (6) otherwise p == 0 and the form is .q, e.g. .000123 -- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 -- * scientific: -- (1) split into (digits dot digits) [eE] ([+-] digits) -- (2) if significand has ".", shift it out so it becomes an integer -- (3) if significand is zero, just use zero -- (4) remove leading zeros for significand -- (5) shift out trailing zeros for significand -- (6) examine exponent and determine which format is best: -- integer, with fraction, scientific ------------------------------------------------------------------------ local function do_number(i) local before = sinfos[i] -- 'before' local z = before -- working representation local y -- 'after', if better -------------------------------------------------------------------- if match(z, "^0[xX]") then -- hexadecimal number local v = base.tostring(base.tonumber(z)) if #v <= #z then z = v -- change to integer, AND continue else return -- no change; stick to hex end end -------------------------------------------------------------------- if match(z, "^%d+%.?0*$") then -- integer or has useless frac z = match(z, "^(%d+)%.?0*$") -- int portion only if z + 0 > 0 then z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros local v = #match(z, "0*$") local nv = base.tostring(v) if v > #nv + 1 then -- scientific is shorter z = sub(z, 1, #z - v).."e"..nv end y = z else y = "0" -- basic zero end -------------------------------------------------------------------- elseif not match(z, "[eE]") then -- number with fraction part local p, q = match(z, "^(%d*)%.(%d+)$") -- split if p == "" then p = 0 end -- int part zero if q + 0 == 0 and p == 0 then y = "0" -- degenerate .000 case else -- now, q > 0 holds and p is a number local v = #match(q, "0*$") -- remove trailing zeros if v > 0 then q = sub(q, 1, #q - v) end -- if p > 0, nothing else we can do to simplify p.q case if p + 0 > 0 then y = p.."."..q else y = "."..q -- tentative, e.g. .000123 local v = #match(q, "^0*") -- # leading spaces local w = #q - v -- # significant digits local nv = base.tostring(#q) -- e.g. compare 123e-6 versus .000123 if w + 2 + #nv < 1 + #q then y = sub(q, -w).."e-"..nv end end end -------------------------------------------------------------------- else -- scientific number local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$") ex = base.tonumber(ex) -- if got ".", shift out fractional portion of significand local p, q = match(sig, "^(%d*)%.(%d*)$") if p then ex = ex - #q sig = p..q end if sig + 0 == 0 then y = "0" -- basic zero else local v = #match(sig, "^0*") -- remove leading zeros sig = sub(sig, v + 1) v = #match(sig, "0*$") -- shift out trailing zeros if v > 0 then sig = sub(sig, 1, #sig - v) ex = ex + v end -- examine exponent and determine which format is best local nex = base.tostring(ex) if ex == 0 then -- it's just an integer y = sig elseif ex > 0 and (ex <= 1 + #nex) then -- a number y = sig..rep("0", ex) elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123 v = #sig + ex y = sub(sig, 1, v).."."..sub(sig, v + 1) elseif ex < 0 and (#nex >= -ex - #sig) then -- e.g. compare 1234e-5 versus .01234 -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig -- -> #nex >= -ex - #sig v = -ex - #sig y = "."..rep("0", v)..sig else -- non-canonical scientific representation y = sig.."e"..ex end end--if sig end -------------------------------------------------------------------- if y and y ~= sinfos[i] then if opt_details then print(" (line "..stoklns[i]..") "..sinfos[i].." -> "..y) opt_details = opt_details + 1 end sinfos[i] = y end end ------------------------------------------------------------------------ -- string optimization -- * note: works on well-formed strings only! -- * optimizations on characters can be summarized as follows: -- \a\b\f\n\r\t\v -- no change -- \\ -- no change -- \"\' -- depends on delim, other can remove \ -- \[\] -- remove \ -- \ -- general escape, remove \ -- \ -- normalize the EOL only -- \ddd -- if \a\b\f\n\r\t\v, change to latter -- if other < ascii 32, keep ddd but zap leading zeros -- but cannot have following digits -- if >= ascii 32, translate it into the literal, then also -- do escapes for \\,\",\' cases -- -- no change -- * switch delimiters if string becomes shorter ------------------------------------------------------------------------ local function do_string(I) local info = sinfos[I] local delim = sub(info, 1, 1) -- delimiter used local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> ' local z = sub(info, 2, -2) -- actual string local i = 1 local c_delim, c_ndelim = 0, 0 -- "/' counts -------------------------------------------------------------------- while i <= #z do local c = sub(z, i, i) ---------------------------------------------------------------- if c == "\\" then -- escaped stuff local j = i + 1 local d = sub(z, j, j) local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true) ------------------------------------------------------------ if not p then -- \ -- remove \ z = sub(z, 1, i - 1)..sub(z, j) i = i + 1 ------------------------------------------------------------ elseif p <= 8 then -- \a\b\f\n\r\t\v\\ i = i + 2 -- no change ------------------------------------------------------------ elseif p <= 10 then -- \ -- normalize EOL local eol = sub(z, j, j + 1) if eol == "\r\n" or eol == "\n\r" then z = sub(z, 1, i).."\n"..sub(z, j + 2) elseif p == 10 then -- \r case z = sub(z, 1, i).."\n"..sub(z, j + 1) end i = i + 2 ------------------------------------------------------------ elseif p <= 12 then -- \"\' -- remove \ for ndelim if d == delim then c_delim = c_delim + 1 i = i + 2 else c_ndelim = c_ndelim + 1 z = sub(z, 1, i - 1)..sub(z, j) i = i + 1 end ------------------------------------------------------------ else -- \ddd -- various steps local s = match(z, "^(%d%d?%d?)", j) j = i + 1 + #s -- skip to location local cv = s + 0 local cc = string.char(cv) local p = find("\a\b\f\n\r\t\v", cc, 1, true) if p then -- special escapes s = "\\"..sub("abfnrtv", p, p) elseif cv < 32 then -- normalized \ddd if match(sub(z, j, j), "%d") then -- if a digit follows, \ddd cannot be shortened s = "\\"..s else s = "\\"..cv end elseif cc == delim then -- \ s = "\\"..cc c_delim = c_delim + 1 elseif cc == "\\" then -- \\ s = "\\\\" else -- literal character s = cc if cc == ndelim then c_ndelim = c_ndelim + 1 end end z = sub(z, 1, i - 1)..s..sub(z, j) i = i + #s ------------------------------------------------------------ end--if p ---------------------------------------------------------------- else-- c ~= "\\" -- -- no change i = i + 1 if c == ndelim then -- count ndelim, for switching delimiters c_ndelim = c_ndelim + 1 end ---------------------------------------------------------------- end--if c end--while -------------------------------------------------------------------- -- switching delimiters, a long-winded derivation: -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes -- simplifying the condition (1)>(2) --> c_delim > c_ndelim if c_delim > c_ndelim then i = 1 while i <= #z do local p, q, r = find(z, "([\'\"])", i) if not p then break end if r == delim then -- \ -> z = sub(z, 1, p - 2)..sub(z, p) i = p else-- r == ndelim -- -> \ z = sub(z, 1, p - 1).."\\"..sub(z, p) i = p + 2 end end--while delim = ndelim -- actually change delimiters end -------------------------------------------------------------------- z = delim..z..delim if z ~= sinfos[I] then if opt_details then print(" (line "..stoklns[I]..") "..sinfos[I].." -> "..z) opt_details = opt_details + 1 end sinfos[I] = z end end ------------------------------------------------------------------------ -- long string optimization -- * note: warning flagged if trailing whitespace found, not trimmed -- * remove first optional newline -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lstring(I) local info = sinfos[I] local delim1 = match(info, "^%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -sep, -1) local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims local y = "" local i = 1 -------------------------------------------------------------------- while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) -- deal with a single line local ln if not p then ln = sub(z, i) elseif p >= i then ln = sub(z, i, p - 1) end if ln ~= "" then -- flag a warning if there are trailing spaces, won't optimize! if match(ln, "%s+$") then warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I] end y = y..ln end if not p then -- done if no more EOLs break end -- deal with line endings, normalize them i = p + 1 if p then if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end -- skip first newline, which can be safely deleted if not(i == 1 and i == p) then y = y.."\n" end end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators if sep >= 3 then local chk, okay = sep - 1 -- loop to test ending delimiter with less of '=' down to zero while chk >= 2 do local delim = "%]"..rep("=", chk - 2).."%]" if not match(y, delim) then okay = chk end chk = chk - 1 end if okay then -- change delimiters sep = rep("=", okay - 2) delim1, delim2 = "["..sep.."[", "]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I] = delim1..y..delim2 end ------------------------------------------------------------------------ -- long comment optimization -- * note: does not remove first optional newline -- * trim trailing whitespace -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lcomment(I) local info = sinfos[I] local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -(sep - 2), -1) local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims local y = "" local i = 1 -------------------------------------------------------------------- while true do local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i) -- deal with a single line, extract and check trailing whitespace local ln if not p then ln = sub(z, i) elseif p >= i then ln = sub(z, i, p - 1) end if ln ~= "" then -- trim trailing whitespace if non-empty line local ws = match(ln, "%s*$") if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end y = y..ln end if not p then -- done if no more EOLs break end -- deal with line endings, normalize them i = p + 1 if p then if #s > 0 and r ~= s then -- skip CRLF or LFCR i = i + 1 end y = y.."\n" end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators sep = sep - 2 if sep >= 3 then local chk, okay = sep - 1 -- loop to test ending delimiter with less of '=' down to zero while chk >= 2 do local delim = "%]"..rep("=", chk - 2).."%]" if not match(y, delim) then okay = chk end chk = chk - 1 end if okay then -- change delimiters sep = rep("=", okay - 2) delim1, delim2 = "--["..sep.."[", "]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I] = delim1..y..delim2 end ------------------------------------------------------------------------ -- short comment optimization -- * trim trailing whitespace ------------------------------------------------------------------------ local function do_comment(i) local info = sinfos[i] local ws = match(info, "%s*$") -- just look from end of string if #ws > 0 then info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace end sinfos[i] = info end ------------------------------------------------------------------------ -- returns true if string found in long comment -- * this is a feature to keep copyright or license texts ------------------------------------------------------------------------ local function keep_lcomment(opt_keep, info) if not opt_keep then return false end -- option not set local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters local sep = #delim1 local delim2 = sub(info, -sep, -1) local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims if find(z, opt_keep, 1, true) then -- try to match return true end end ------------------------------------------------------------------------ -- main entry point -- * currently, lexer processing has 2 passes -- * processing is done on a line-oriented basis, which is easier to -- grok due to the next point... -- * since there are various options that can be enabled or disabled, -- processing is a little messy or convoluted ------------------------------------------------------------------------ function optimize(option, toklist, semlist, toklnlist) -------------------------------------------------------------------- -- set option flags -------------------------------------------------------------------- local opt_comments = option["opt-comments"] local opt_whitespace = option["opt-whitespace"] local opt_emptylines = option["opt-emptylines"] local opt_eols = option["opt-eols"] local opt_strings = option["opt-strings"] local opt_numbers = option["opt-numbers"] local opt_x = option["opt-experimental"] local opt_keep = option.KEEP opt_details = option.DETAILS and 0 -- upvalues for details display print = print or base.print if opt_eols then -- forced settings, otherwise won't work properly opt_comments = true opt_whitespace = true opt_emptylines = true elseif opt_x then opt_whitespace = true end -------------------------------------------------------------------- -- variable initialization -------------------------------------------------------------------- stoks, sinfos, stoklns -- set source lists = toklist, semlist, toklnlist local i = 1 -- token position local tok, info -- current token local prev -- position of last grammar token -- on same line (for TK_SPACE stuff) -------------------------------------------------------------------- -- changes a token, info pair -------------------------------------------------------------------- local function settoken(tok, info, I) I = I or i stoks[I] = tok or "" sinfos[I] = info or "" end -------------------------------------------------------------------- -- experimental optimization for ';' operator -------------------------------------------------------------------- if opt_x then while true do tok, info = stoks[i], sinfos[i] if tok == "TK_EOS" then -- end of stream/pass break elseif tok == "TK_OP" and info == ";" then -- ';' operator found, since it is entirely optional, set it -- as a space to let whitespace optimization do the rest settoken("TK_SPACE", " ") end i = i + 1 end repack_tokens() end -------------------------------------------------------------------- -- processing loop (PASS 1) -------------------------------------------------------------------- i = 1 while true do tok, info = stoks[i], sinfos[i] ---------------------------------------------------------------- local atstart = atlinestart(i) -- set line begin flag if atstart then prev = nil end ---------------------------------------------------------------- if tok == "TK_EOS" then -- end of stream/pass break ---------------------------------------------------------------- elseif tok == "TK_KEYWORD" or -- keywords, identifiers, tok == "TK_NAME" or -- operators tok == "TK_OP" then -- TK_KEYWORD and TK_OP can't be optimized without a big -- optimization framework; it would be more of an optimizing -- compiler, not a source code compressor -- TK_NAME that are locals needs parser to analyze/optimize prev = i ---------------------------------------------------------------- elseif tok == "TK_NUMBER" then -- numbers if opt_numbers then do_number(i) -- optimize end prev = i ---------------------------------------------------------------- elseif tok == "TK_STRING" or -- strings, long strings tok == "TK_LSTRING" then if opt_strings then if tok == "TK_STRING" then do_string(i) -- optimize else do_lstring(i) -- optimize end end prev = i ---------------------------------------------------------------- elseif tok == "TK_COMMENT" then -- short comments if opt_comments then if i == 1 and sub(info, 1, 1) == "#" then -- keep shbang comment, trim whitespace do_comment(i) else -- safe to delete, as a TK_EOL (or TK_EOS) always follows settoken() -- remove entirely end elseif opt_whitespace then -- trim whitespace only do_comment(i) end ---------------------------------------------------------------- elseif tok == "TK_LCOMMENT" then -- long comments if keep_lcomment(opt_keep, info) then ------------------------------------------------------------ -- if --keep, we keep a long comment if is found; -- this is a feature to keep copyright or license texts if opt_whitespace then -- trim whitespace only do_lcomment(i) end prev = i elseif opt_comments then local eols = commenteols(info) ------------------------------------------------------------ -- prepare opt_emptylines case first, if a disposable token -- follows, current one is safe to dump, else keep a space; -- it is implied that the operation is safe for '-', because -- current is a TK_LCOMMENT, and must be separate from a '-' if is_faketoken[stoks[i + 1]] then settoken() -- remove entirely tok = "" else settoken("TK_SPACE", " ") end ------------------------------------------------------------ -- if there are embedded EOLs to keep and opt_emptylines is -- disabled, then switch the token into one or more EOLs if not opt_emptylines and eols > 0 then settoken("TK_EOL", rep("\n", eols)) end ------------------------------------------------------------ -- if optimizing whitespaces, force reinterpretation of the -- token to give a chance for the space to be optimized away if opt_whitespace and tok ~= "" then i = i - 1 -- to reinterpret end ------------------------------------------------------------ else -- disabled case if opt_whitespace then -- trim whitespace only do_lcomment(i) end prev = i end ---------------------------------------------------------------- elseif tok == "TK_EOL" then -- line endings if atstart and opt_emptylines then settoken() -- remove entirely elseif info == "\r\n" or info == "\n\r" then -- normalize the rest of the EOLs for CRLF/LFCR only -- (note that TK_LCOMMENT can change into several EOLs) settoken("TK_EOL", "\n") end ---------------------------------------------------------------- elseif tok == "TK_SPACE" then -- whitespace if opt_whitespace then if atstart or atlineend(i) then -- delete leading and trailing whitespace settoken() -- remove entirely else ------------------------------------------------------------ -- at this point, since leading whitespace have been removed, -- there should be a either a real token or a TK_LCOMMENT -- prior to hitting this whitespace; the TK_LCOMMENT case -- only happens if opt_comments is disabled; so prev ~= nil local ptok = stoks[prev] if ptok == "TK_LCOMMENT" then -- previous TK_LCOMMENT can abut with anything settoken() -- remove entirely else -- prev must be a grammar token; consecutive TK_SPACE -- tokens is impossible when optimizing whitespace local ntok = stoks[i + 1] if is_faketoken[ntok] then -- handle special case where a '-' cannot abut with -- either a short comment or a long comment if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and ptok == "TK_OP" and sinfos[prev] == "-" then -- keep token else settoken() -- remove entirely end else--is_realtoken -- check a pair of grammar tokens, if can abut, then -- delete space token entirely, otherwise keep one space local s = checkpair(prev, i + 1) if s == "" then settoken() -- remove entirely else settoken("TK_SPACE", " ") end end end ------------------------------------------------------------ end end ---------------------------------------------------------------- else error("unidentified token encountered") end ---------------------------------------------------------------- i = i + 1 end--while repack_tokens() -------------------------------------------------------------------- -- processing loop (PASS 2) -------------------------------------------------------------------- if opt_eols then i = 1 -- aggressive EOL removal only works with most non-grammar tokens -- optimized away because it is a rather simple scheme -- basically -- it just checks 'real' token pairs around EOLs if stoks[1] == "TK_COMMENT" then -- first comment still existing must be shbang, skip whole line i = 3 end while true do tok, info = stoks[i], sinfos[i] -------------------------------------------------------------- if tok == "TK_EOS" then -- end of stream/pass break -------------------------------------------------------------- elseif tok == "TK_EOL" then -- consider each TK_EOL local t1, t2 = stoks[i - 1], stoks[i + 1] if is_realtoken[t1] and is_realtoken[t2] then -- sanity check local s = checkpair(i - 1, i + 1) if s == "" or t2 == "TK_EOS" then settoken() -- remove entirely end end end--if tok -------------------------------------------------------------- i = i + 1 end--while repack_tokens() end -------------------------------------------------------------------- if opt_details and opt_details > 0 then print() end -- spacing return stoks, sinfos, stoklns end --end of inserted module end -- preload function for module optparser preload.optparser = function() --start of inserted module module "optparser" local string = base.require "string" local table = base.require "table" ---------------------------------------------------------------------- -- Letter frequencies for reducing symbol entropy (fixed version) -- * Might help a wee bit when the output file is compressed -- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies -- * We use letter frequencies according to a Linotype keyboard, plus -- the underscore, and both lower case and upper case letters. -- * The arrangement below (LC, underscore, %d, UC) is arbitrary. -- * This is certainly not optimal, but is quick-and-dirty and the -- process has no significant overhead ---------------------------------------------------------------------- local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" -- names or identifiers that must be skipped -- * the first two lines are for keywords local SKIP_NAME = {} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while self]], "%S+") do SKIP_NAME[v] = true end ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ local toklist, seminfolist, -- token lists (lexer output) tokpar, seminfopar, xrefpar, -- token lists (parser output) globalinfo, localinfo, -- variable information tables statinfo, -- statment type table globaluniq, localuniq, -- unique name tables var_new, -- index of new variable names varlist -- list of output variables ---------------------------------------------------------------------- -- preprocess information table to get lists of unique names ---------------------------------------------------------------------- local function preprocess(infotable) local uniqtable = {} for i = 1, #infotable do -- enumerate info table local obj = infotable[i] local name = obj.name -------------------------------------------------------------------- if not uniqtable[name] then -- not found, start an entry uniqtable[name] = { decl = 0, token = 0, size = 0, } end -------------------------------------------------------------------- local uniq = uniqtable[name] -- count declarations, tokens, size uniq.decl = uniq.decl + 1 local xref = obj.xref local xcount = #xref uniq.token = uniq.token + xcount uniq.size = uniq.size + xcount * #name -------------------------------------------------------------------- if obj.decl then -- if local table, create first,last pairs obj.id = i obj.xcount = xcount if xcount > 1 then -- if ==1, means local never accessed obj.first = xref[2] obj.last = xref[xcount] end -------------------------------------------------------------------- else -- if global table, add a back ref uniq.id = i end -------------------------------------------------------------------- end--for return uniqtable end ---------------------------------------------------------------------- -- calculate actual symbol frequencies, in order to reduce entropy -- * this may help further reduce the size of compressed sources -- * note that since parsing optimizations is put before lexing -- optimizations, the frequency table is not exact! -- * yes, this will miss --keep block comments too... ---------------------------------------------------------------------- local function recalc_for_entropy(option) local byte = string.byte local char = string.char -- table of token classes to accept in calculating symbol frequency local ACCEPT = { TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, } if not option["opt-comments"] then ACCEPT.TK_COMMENT = true ACCEPT.TK_LCOMMENT = true end -------------------------------------------------------------------- -- create a new table and remove any original locals by filtering -------------------------------------------------------------------- local filtered = {} for i = 1, #toklist do filtered[i] = seminfolist[i] end for i = 1, #localinfo do -- enumerate local info table local obj = localinfo[i] local xref = obj.xref for j = 1, obj.xcount do local p = xref[j] filtered[p] = "" -- remove locals end end -------------------------------------------------------------------- local freq = {} -- reset symbol frequency table for i = 0, 255 do freq[i] = 0 end for i = 1, #toklist do -- gather symbol frequency local tok, info = toklist[i], filtered[i] if ACCEPT[tok] then for j = 1, #info do local c = byte(info, j) freq[c] = freq[c] + 1 end end--if end--for -------------------------------------------------------------------- -- function to re-sort symbols according to actual frequencies -------------------------------------------------------------------- local function resort(symbols) local symlist = {} for i = 1, #symbols do -- prepare table to sort local c = byte(symbols, i) symlist[i] = { c = c, freq = freq[c], } end table.sort(symlist, -- sort selected symbols function(v1, v2) return v1.freq > v2.freq end ) local charlist = {} -- reconstitute the string for i = 1, #symlist do charlist[i] = char(symlist[i].c) end return table.concat(charlist) end -------------------------------------------------------------------- LETTERS = resort(LETTERS) -- change letter arrangement ALPHANUM = resort(ALPHANUM) end ---------------------------------------------------------------------- -- returns a string containing a new local variable name to use, and -- a flag indicating whether it collides with a global variable -- * trapping keywords and other names like 'self' is done elsewhere ---------------------------------------------------------------------- local function new_var_name() local var local cletters, calphanum = #LETTERS, #ALPHANUM local v = var_new if v < cletters then -- single char v = v + 1 var = string.sub(LETTERS, v, v) else -- longer names local range, sz = cletters, 1 -- calculate # chars fit repeat v = v - range range = range * calphanum sz = sz + 1 until range > v local n = v % cletters -- left side cycles faster v = (v - n) / cletters -- do first char first n = n + 1 var = string.sub(LETTERS, n, n) while sz > 1 do local m = v % calphanum v = (v - m) / calphanum m = m + 1 var = var..string.sub(ALPHANUM, m, m) sz = sz - 1 end end var_new = var_new + 1 return var, globaluniq[var] ~= nil end ---------------------------------------------------------------------- -- calculate and print some statistics -- * probably better in main source, put here for now ---------------------------------------------------------------------- local function stats_summary(globaluniq, localuniq, afteruniq, option) local print = print or base.print local fmt = string.format local opt_details = option.DETAILS if option.QUIET then return end local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed decl_g, decl_li, decl_lo, decl_ti, decl_to, token_g, token_li, token_lo, token_ti, token_to, size_g, size_li, size_lo, size_ti, size_to = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 local function avg(c, l) -- safe average function if c == 0 then return 0 end return l / c end -------------------------------------------------------------------- -- collect statistics (note: globals do not have declarations!) -------------------------------------------------------------------- for name, uniq in base.pairs(globaluniq) do uniq_g = uniq_g + 1 token_g = token_g + uniq.token size_g = size_g + uniq.size end for name, uniq in base.pairs(localuniq) do uniq_li = uniq_li + 1 decl_li = decl_li + uniq.decl token_li = token_li + uniq.token size_li = size_li + uniq.size end for name, uniq in base.pairs(afteruniq) do uniq_lo = uniq_lo + 1 decl_lo = decl_lo + uniq.decl token_lo = token_lo + uniq.token size_lo = size_lo + uniq.size end uniq_ti = uniq_g + uniq_li decl_ti = decl_g + decl_li token_ti = token_g + token_li size_ti = size_g + size_li uniq_to = uniq_g + uniq_lo decl_to = decl_g + decl_lo token_to = token_g + token_lo size_to = size_g + size_lo -------------------------------------------------------------------- -- detailed stats: global list -------------------------------------------------------------------- if opt_details then local sorted = {} -- sort table of unique global names by size for name, uniq in base.pairs(globaluniq) do uniq.name = name sorted[#sorted + 1] = uniq end table.sort(sorted, function(v1, v2) return v1.size > v2.size end ) local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s" local hl = string.rep("-", 44) print("*** global variable list (sorted by size) ***\n"..hl) print(fmt(tabf1, "Token", "Input", "Input", "Global")) print(fmt(tabf1, "Count", "Bytes", "Average", "Name")) print(hl) for i = 1, #sorted do local uniq = sorted[i] print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name)) end print(hl) print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL")) print(hl.."\n") -------------------------------------------------------------------- -- detailed stats: local list -------------------------------------------------------------------- local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s" local hl = string.rep("-", 70) print("*** local variable list (sorted by allocation order) ***\n"..hl) print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global")) print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name")) print(hl) for i = 1, #varlist do -- iterate according to order assigned local name = varlist[i] local uniq = afteruniq[name] local old_t, old_s = 0, 0 for j = 1, #localinfo do -- find corresponding old names and calculate local obj = localinfo[j] if obj.name == name then old_t = old_t + obj.xcount old_s = old_s + obj.xcount * #obj.oldname end end print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s), uniq.size, avg(uniq.token, uniq.size), name)) end print(hl) print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li), size_lo, avg(token_lo, size_lo), "TOTAL")) print(hl.."\n") end--if opt_details -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f" local hl = string.rep("-", 58) print("*** local variable optimization summary ***\n"..hl) print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average")) print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes")) print(hl) print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g))) print(hl) print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li))) print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti))) print(hl) print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo))) print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to))) print(hl.."\n") end ---------------------------------------------------------------------- -- experimental optimization for f("string") statements -- * safe to delete parentheses without adding whitespace, as both -- kinds of strings can abut with anything else ---------------------------------------------------------------------- local function optimize_func1() ------------------------------------------------------------------ local function is_strcall(j) -- find f("string") pattern local t1 = tokpar[j + 1] or "" local t2 = tokpar[j + 2] or "" local t3 = tokpar[j + 3] or "" if t1 == "(" and t2 == "" and t3 == ")" then return true end end ------------------------------------------------------------------ local del_list = {} -- scan for function pattern, local i = 1 -- tokens to be deleted are marked while i <= #tokpar do local id = statinfo[i] if id == "call" and is_strcall(i) then -- found & mark () del_list[i + 1] = true -- '(' del_list[i + 3] = true -- ')' i = i + 3 end i = i + 1 end ------------------------------------------------------------------ -- delete a token and adjust all relevant tables -- * currently invalidates globalinfo and localinfo (not updated), -- so any other optimization is done after processing locals -- (of course, we can also lex the source data again...) -- * faster one-pass token deletion ------------------------------------------------------------------ local i, dst, idend = 1, 1, #tokpar local del_list2 = {} while dst <= idend do -- process parser tables if del_list[i] then -- found a token to delete? del_list2[xrefpar[i]] = true i = i + 1 end if i > dst then if i <= idend then -- shift table items lower tokpar[dst] = tokpar[i] seminfopar[dst] = seminfopar[i] xrefpar[dst] = xrefpar[i] - (i - dst) statinfo[dst] = statinfo[i] else -- nil out excess entries tokpar[dst] = nil seminfopar[dst] = nil xrefpar[dst] = nil statinfo[dst] = nil end end i = i + 1 dst = dst + 1 end local i, dst, idend = 1, 1, #toklist while dst <= idend do -- process lexer tables if del_list2[i] then -- found a token to delete? i = i + 1 end if i > dst then if i <= idend then -- shift table items lower toklist[dst] = toklist[i] seminfolist[dst] = seminfolist[i] else -- nil out excess entries toklist[dst] = nil seminfolist[dst] = nil end end i = i + 1 dst = dst + 1 end end ---------------------------------------------------------------------- -- local variable optimization ---------------------------------------------------------------------- local function optimize_locals(option) var_new = 0 -- reset variable name allocator varlist = {} ------------------------------------------------------------------ -- preprocess global/local tables, handle entropy reduction ------------------------------------------------------------------ globaluniq = preprocess(globalinfo) localuniq = preprocess(localinfo) if option["opt-entropy"] then -- for entropy improvement recalc_for_entropy(option) end ------------------------------------------------------------------ -- build initial declared object table, then sort according to -- token count, this might help assign more tokens to more common -- variable names such as 'e' thus possibly reducing entropy -- * an object knows its localinfo index via its 'id' field -- * special handling for "self" special local (parameter) here ------------------------------------------------------------------ local object = {} for i = 1, #localinfo do object[i] = localinfo[i] end table.sort(object, -- sort largest first function(v1, v2) return v1.xcount > v2.xcount end ) ------------------------------------------------------------------ -- the special "self" function parameters must be preserved -- * the allocator below will never use "self", so it is safe to -- keep those implicit declarations as-is ------------------------------------------------------------------ local temp, j, gotself = {}, 1, false for i = 1, #object do local obj = object[i] if not obj.isself then temp[j] = obj j = j + 1 else gotself = true end end object = temp ------------------------------------------------------------------ -- a simple first-come first-served heuristic name allocator, -- note that this is in no way optimal... -- * each object is a local variable declaration plus existence -- * the aim is to assign short names to as many tokens as possible, -- so the following tries to maximize name reuse -- * note that we preserve sort order ------------------------------------------------------------------ local nobject = #object while nobject > 0 do local varname, gcollide repeat varname, gcollide = new_var_name() -- collect a variable name until not SKIP_NAME[varname] -- skip all special names varlist[#varlist + 1] = varname -- keep a list local oleft = nobject ------------------------------------------------------------------ -- if variable name collides with an existing global, the name -- cannot be used by a local when the name is accessed as a global -- during which the local is alive (between 'act' to 'rem'), so -- we drop objects that collides with the corresponding global ------------------------------------------------------------------ if gcollide then -- find the xref table of the global local gref = globalinfo[globaluniq[varname].id].xref local ngref = #gref -- enumerate for all current objects; all are valid at this point for i = 1, nobject do local obj = object[i] local act, rem = obj.act, obj.rem -- 'live' range of local -- if rem < 0, it is a -id to a local that had the same name -- so follow rem to extend it; does this make sense? while rem < 0 do rem = localinfo[-rem].rem end local drop for j = 1, ngref do local p = gref[j] if p >= act and p <= rem then drop = true end -- in range? end if drop then obj.skip = true oleft = oleft - 1 end end--for end--if gcollide ------------------------------------------------------------------ -- now the first unassigned local (since it's sorted) will be the -- one with the most tokens to rename, so we set this one and then -- eliminate all others that collides, then any locals that left -- can then reuse the same variable name; this is repeated until -- all local declaration that can use this name is assigned -- * the criteria for local-local reuse/collision is: -- A is the local with a name already assigned -- B is the unassigned local under consideration -- => anytime A is accessed, it cannot be when B is 'live' -- => to speed up things, we have first/last accesses noted ------------------------------------------------------------------ while oleft > 0 do local i = 1 while object[i].skip do -- scan for first object i = i + 1 end ------------------------------------------------------------------ -- first object is free for assignment of the variable name -- [first,last] gives the access range for collision checking ------------------------------------------------------------------ oleft = oleft - 1 local obja = object[i] i = i + 1 obja.newname = varname obja.skip = true obja.done = true local first, last = obja.first, obja.last local xref = obja.xref ------------------------------------------------------------------ -- then, scan all the rest and drop those colliding -- if A was never accessed then it'll never collide with anything -- otherwise trivial skip if: -- * B was activated after A's last access (last < act) -- * B was removed before A's first access (first > rem) -- if not, see detailed skip below... ------------------------------------------------------------------ if first and oleft > 0 then -- must have at least 1 access local scanleft = oleft while scanleft > 0 do while object[i].skip do -- next valid object i = i + 1 end scanleft = scanleft - 1 local objb = object[i] i = i + 1 local act, rem = objb.act, objb.rem -- live range of B -- if rem < 0, extend range of rem thru' following local while rem < 0 do rem = localinfo[-rem].rem end -------------------------------------------------------- if not(last < act or first > rem) then -- possible collision -------------------------------------------------------- -- B is activated later than A or at the same statement, -- this means for no collision, A cannot be accessed when B -- is alive, since B overrides A (or is a peer) -------------------------------------------------------- if act >= obja.act then for j = 1, obja.xcount do -- ... then check every access local p = xref[j] if p >= act and p <= rem then -- A accessed when B live! oleft = oleft - 1 objb.skip = true break end end--for -------------------------------------------------------- -- A is activated later than B, this means for no collision, -- A's access is okay since it overrides B, but B's last -- access need to be earlier than A's activation time -------------------------------------------------------- else if objb.last and objb.last >= obja.act then oleft = oleft - 1 objb.skip = true end end end -------------------------------------------------------- if oleft == 0 then break end end end--if first ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all possible locals to one variable name, the -- unassigned locals/objects have the skip field reset and the table -- is compacted, to hopefully reduce iteration time ------------------------------------------------------------------ local temp, j = {}, 1 for i = 1, nobject do local obj = object[i] if not obj.done then obj.skip = false temp[j] = obj j = j + 1 end end object = temp -- new compacted object table nobject = #object -- objects left to process ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all locals with new variable names, we can -- patch in the new names, and reprocess to get 'after' stats ------------------------------------------------------------------ for i = 1, #localinfo do -- enumerate all locals local obj = localinfo[i] local xref = obj.xref if obj.newname then -- if got new name, patch it in for j = 1, obj.xcount do local p = xref[j] -- xrefs indexes the token list seminfolist[p] = obj.newname end obj.name, obj.oldname -- adjust names = obj.newname, obj.name else obj.oldname = obj.name -- for cases like 'self' end end ------------------------------------------------------------------ -- deal with statistics output ------------------------------------------------------------------ if gotself then -- add 'self' to end of list varlist[#varlist + 1] = "self" end local afteruniq = preprocess(localinfo) stats_summary(globaluniq, localuniq, afteruniq, option) end ---------------------------------------------------------------------- -- main entry point ---------------------------------------------------------------------- function optimize(option, _toklist, _seminfolist, xinfo) -- set tables toklist, seminfolist -- from lexer = _toklist, _seminfolist tokpar, seminfopar, xrefpar -- from parser = xinfo.toklist, xinfo.seminfolist, xinfo.xreflist globalinfo, localinfo, statinfo -- from parser = xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo ------------------------------------------------------------------ -- optimize locals ------------------------------------------------------------------ if option["opt-locals"] then optimize_locals(option) end ------------------------------------------------------------------ -- other optimizations ------------------------------------------------------------------ if option["opt-experimental"] then -- experimental optimize_func1() -- WARNING globalinfo and localinfo now invalidated! end end --end of inserted module end -- preload function for module equiv preload.equiv = function() --start of inserted module module "equiv" local string = base.require "string" local loadstring = base.loadstring local sub = string.sub local match = string.match local dump = string.dump local byte = string.byte --[[-------------------------------------------------------------------- -- variable and data initialization ----------------------------------------------------------------------]] local is_realtoken = { -- significant (grammar) tokens TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, TK_STRING = true, TK_LSTRING = true, TK_OP = true, TK_EOS = true, } local option, llex, warn --[[-------------------------------------------------------------------- -- functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- initialization function ------------------------------------------------------------------------ function init(_option, _llex, _warn) option = _option llex = _llex warn = _warn end ------------------------------------------------------------------------ -- function to build lists containing a 'normal' lexer stream ------------------------------------------------------------------------ local function build_stream(s) llex.init(s) llex.llex() local stok, sseminfo -- source list (with whitespace elements) = llex.tok, llex.seminfo local tok, seminfo -- processed list (real elements only) = {}, {} for i = 1, #stok do local t = stok[i] if is_realtoken[t] then tok[#tok + 1] = t seminfo[#seminfo + 1] = sseminfo[i] end end--for return tok, seminfo end ------------------------------------------------------------------------ -- test source (lexer stream) equivalence ------------------------------------------------------------------------ function source(z, dat) -------------------------------------------------------------------- -- function to return a dumped string for seminfo compares -------------------------------------------------------------------- local function dumpsem(s) local sf = loadstring("return "..s, "z") if sf then return dump(sf) end end -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("SRCEQUIV: "..msg) end warn.SRC_EQUIV = true end -------------------------------------------------------------------- -- get lexer streams for both source strings, compare -------------------------------------------------------------------- local tok1, seminfo1 = build_stream(z) -- original local tok2, seminfo2 = build_stream(dat) -- compressed -------------------------------------------------------------------- -- compare shbang lines ignoring EOL -------------------------------------------------------------------- local sh1 = match(z, "^(#[^\r\n]*)") local sh2 = match(dat, "^(#[^\r\n]*)") if sh1 or sh2 then if not sh1 or not sh2 or sh1 ~= sh2 then bork("shbang lines different") end end -------------------------------------------------------------------- -- compare by simple count -------------------------------------------------------------------- if #tok1 ~= #tok2 then bork("count "..#tok1.." "..#tok2) return end -------------------------------------------------------------------- -- compare each element the best we can -------------------------------------------------------------------- for i = 1, #tok1 do local t1, t2 = tok1[i], tok2[i] local s1, s2 = seminfo1[i], seminfo2[i] if t1 ~= t2 then -- by type bork("type ["..i.."] "..t1.." "..t2) break end if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then if t1 == "TK_NAME" and option["opt-locals"] then -- can't compare identifiers of locals that are optimized elseif s1 ~= s2 then -- by semantic info (simple) bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end elseif t1 == "TK_EOS" then -- no seminfo to compare else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING" -- compare 'binary' form, so dump a function local s1b,s2b = dumpsem(s1), dumpsem(s2) if not s1b or not s2b or s1b ~= s2b then bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end end end--for -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end ------------------------------------------------------------------------ -- test binary chunk equivalence ------------------------------------------------------------------------ function binary(z, dat) local TNIL = 0 local TBOOLEAN = 1 local TNUMBER = 3 local TSTRING = 4 -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("BINEQUIV: "..msg) end warn.BIN_EQUIV = true end -------------------------------------------------------------------- -- function to remove shbang line so that loadstring runs -------------------------------------------------------------------- local function zap_shbang(s) local shbang = match(s, "^(#[^\r\n]*\r?\n?)") if shbang then -- cut out shbang s = sub(s, #shbang + 1) end return s end -------------------------------------------------------------------- -- attempt to compile, then dump to get binary chunk string -------------------------------------------------------------------- local cz = loadstring(zap_shbang(z), "z") if not cz then bork("failed to compile original sources for binary chunk comparison") return end local cdat = loadstring(zap_shbang(dat), "z") if not cdat then bork("failed to compile compressed result for binary chunk comparison") end -- if loadstring() works, dump assuming string.dump() is error-free local c1 = { i = 1, dat = dump(cz) } c1.len = #c1.dat local c2 = { i = 1, dat = dump(cdat) } c2.len = #c2.dat -------------------------------------------------------------------- -- support functions to handle binary chunk reading -------------------------------------------------------------------- local endian, sz_int, sz_sizet, -- sizes of data types sz_inst, sz_number, getint, getsizet -------------------------------------------------------------------- local function ensure(c, sz) -- check if bytes exist if c.i + sz - 1 > c.len then return end return true end -------------------------------------------------------------------- local function skip(c, sz) -- skip some bytes if not sz then sz = 1 end c.i = c.i + sz end -------------------------------------------------------------------- local function getbyte(c) -- return a byte value local i = c.i if i > c.len then return end local d = sub(c.dat, i, i) c.i = i + 1 return byte(d) end -------------------------------------------------------------------- local function getint_l(c) -- return an int value (little-endian) local n, scale = 0, 1 if not ensure(c, sz_int) then return end for j = 1, sz_int do n = n + scale * getbyte(c) scale = scale * 256 end return n end -------------------------------------------------------------------- local function getint_b(c) -- return an int value (big-endian) local n = 0 if not ensure(c, sz_int) then return end for j = 1, sz_int do n = n * 256 + getbyte(c) end return n end -------------------------------------------------------------------- local function getsizet_l(c) -- return a size_t value (little-endian) local n, scale = 0, 1 if not ensure(c, sz_sizet) then return end for j = 1, sz_sizet do n = n + scale * getbyte(c) scale = scale * 256 end return n end -------------------------------------------------------------------- local function getsizet_b(c) -- return a size_t value (big-endian) local n = 0 if not ensure(c, sz_sizet) then return end for j = 1, sz_sizet do n = n * 256 + getbyte(c) end return n end -------------------------------------------------------------------- local function getblock(c, sz) -- return a block (as a string) local i = c.i local j = i + sz - 1 if j > c.len then return end local d = sub(c.dat, i, j) c.i = i + sz return d end -------------------------------------------------------------------- local function getstring(c) -- return a string local n = getsizet(c) if not n then return end if n == 0 then return "" end return getblock(c, n) end -------------------------------------------------------------------- local function goodbyte(c1, c2) -- compare byte value local b1, b2 = getbyte(c1), getbyte(c2) if not b1 or not b2 or b1 ~= b2 then return end return b1 end -------------------------------------------------------------------- local function badbyte(c1, c2) -- compare byte value local b = goodbyte(c1, c2) if not b then return true end end -------------------------------------------------------------------- local function goodint(c1, c2) -- compare int value local i1, i2 = getint(c1), getint(c2) if not i1 or not i2 or i1 ~= i2 then return end return i1 end -------------------------------------------------------------------- -- recursively-called function to compare function prototypes -------------------------------------------------------------------- local function getfunc(c1, c2) -- source name (ignored) if not getstring(c1) or not getstring(c2) then bork("bad source name"); return end -- linedefined (ignored) if not getint(c1) or not getint(c2) then bork("bad linedefined"); return end -- lastlinedefined (ignored) if not getint(c1) or not getint(c2) then bork("bad lastlinedefined"); return end if not (ensure(c1, 4) and ensure(c2, 4)) then bork("prototype header broken") end -- nups (compared) if badbyte(c1, c2) then bork("bad nups"); return end -- numparams (compared) if badbyte(c1, c2) then bork("bad numparams"); return end -- is_vararg (compared) if badbyte(c1, c2) then bork("bad is_vararg"); return end -- maxstacksize (compared) if badbyte(c1, c2) then bork("bad maxstacksize"); return end -- code (compared) local ncode = goodint(c1, c2) if not ncode then bork("bad ncode"); return end local code1 = getblock(c1, ncode * sz_inst) local code2 = getblock(c2, ncode * sz_inst) if not code1 or not code2 or code1 ~= code2 then bork("bad code block"); return end -- constants (compared) local nconst = goodint(c1, c2) if not nconst then bork("bad nconst"); return end for i = 1, nconst do local ctype = goodbyte(c1, c2) if not ctype then bork("bad const type"); return end if ctype == TBOOLEAN then if badbyte(c1, c2) then bork("bad boolean value"); return end elseif ctype == TNUMBER then local num1 = getblock(c1, sz_number) local num2 = getblock(c2, sz_number) if not num1 or not num2 or num1 ~= num2 then bork("bad number value"); return end elseif ctype == TSTRING then local str1 = getstring(c1) local str2 = getstring(c2) if not str1 or not str2 or str1 ~= str2 then bork("bad string value"); return end end end -- prototypes (compared recursively) local nproto = goodint(c1, c2) if not nproto then bork("bad nproto"); return end for i = 1, nproto do if not getfunc(c1, c2) then bork("bad function prototype"); return end end -- debug information (ignored) -- lineinfo (ignored) local sizelineinfo1 = getint(c1) if not sizelineinfo1 then bork("bad sizelineinfo1"); return end local sizelineinfo2 = getint(c2) if not sizelineinfo2 then bork("bad sizelineinfo2"); return end if not getblock(c1, sizelineinfo1 * sz_int) then bork("bad lineinfo1"); return end if not getblock(c2, sizelineinfo2 * sz_int) then bork("bad lineinfo2"); return end -- locvars (ignored) local sizelocvars1 = getint(c1) if not sizelocvars1 then bork("bad sizelocvars1"); return end local sizelocvars2 = getint(c2) if not sizelocvars2 then bork("bad sizelocvars2"); return end for i = 1, sizelocvars1 do if not getstring(c1) or not getint(c1) or not getint(c1) then bork("bad locvars1"); return end end for i = 1, sizelocvars2 do if not getstring(c2) or not getint(c2) or not getint(c2) then bork("bad locvars2"); return end end -- upvalues (ignored) local sizeupvalues1 = getint(c1) if not sizeupvalues1 then bork("bad sizeupvalues1"); return end local sizeupvalues2 = getint(c2) if not sizeupvalues2 then bork("bad sizeupvalues2"); return end for i = 1, sizeupvalues1 do if not getstring(c1) then bork("bad upvalues1"); return end end for i = 1, sizeupvalues2 do if not getstring(c2) then bork("bad upvalues2"); return end end return true end -------------------------------------------------------------------- -- parse binary chunks to verify equivalence -- * for headers, handle sizes to allow a degree of flexibility -- * assume a valid binary chunk is generated, since it was not -- generated via external means -------------------------------------------------------------------- if not (ensure(c1, 12) and ensure(c2, 12)) then bork("header broken") end skip(c1, 6) -- skip signature(4), version, format endian = getbyte(c1) -- 1 = little endian sz_int = getbyte(c1) -- get data type sizes sz_sizet = getbyte(c1) sz_inst = getbyte(c1) sz_number = getbyte(c1) skip(c1) -- skip integral flag skip(c2, 12) -- skip other header (assume similar) if endian == 1 then -- set for endian sensitive data we need getint = getint_l getsizet = getsizet_l else getint = getint_b getsizet = getsizet_b end getfunc(c1, c2) -- get prototype at root if c1.i ~= c1.len + 1 then bork("inconsistent binary chunk1"); return elseif c2.i ~= c2.len + 1 then bork("inconsistent binary chunk2"); return end -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end --end of inserted module end -- preload function for module plugin/html preload["plugin/html"] = function() --start of inserted module module "plugin/html" local string = base.require "string" local table = base.require "table" local io = base.require "io" ------------------------------------------------------------------------ -- constants and configuration ------------------------------------------------------------------------ local HTML_EXT = ".html" local ENTITIES = { ["&"] = "&", ["<"] = "<", [">"] = ">", ["'"] = "'", ["\""] = """, } -- simple headers and footers local HEADER = [[ %s
]]
local FOOTER = [[
]] -- for more, please see wikimain.css from the Lua wiki site local STYLESHEET = [[ BODY { background: white; color: navy; } pre.code { color: black; } span.comment { color: #00a000; } span.string { color: #009090; } span.keyword { color: black; font-weight: bold; } span.number { color: #993399; } span.operator { } span.name { } span.global { color: #ff0000; font-weight: bold; } span.local { color: #0000ff; font-weight: bold; } ]] ------------------------------------------------------------------------ -- option handling, plays nice with --quiet option ------------------------------------------------------------------------ local option -- local reference to list of options local srcfl, destfl -- filenames local toklist, seminfolist, toklnlist -- token data local function print(...) -- handle quiet option if option.QUIET then return end base.print(...) end ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ function init(_option, _srcfl, _destfl) option = _option srcfl = _srcfl local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") local basename, extension = srcfl, "" if extb and extb > 1 then basename = string.sub(srcfl, 1, extb - 1) extension = string.sub(srcfl, extb, exte) end destfl = basename..HTML_EXT if option.OUTPUT_FILE then destfl = option.OUTPUT_FILE end if srcfl == destfl then base.error("output filename identical to input filename") end end ------------------------------------------------------------------------ -- message display, post-load processing ------------------------------------------------------------------------ function post_load(z) print([[ HTML plugin module for LuaSrcDiet ]]) print("Exporting: "..srcfl.." -> "..destfl.."\n") end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(_toklist, _seminfolist, _toklnlist) toklist, seminfolist, toklnlist = _toklist, _seminfolist, _toklnlist end ------------------------------------------------------------------------ -- escape the usual suspects for HTML/XML ------------------------------------------------------------------------ local function do_entities(z) local i = 1 while i <= #z do local c = string.sub(z, i, i) local d = ENTITIES[c] if d then c = d z = string.sub(z, 1, i - 1)..c..string.sub(z, i + 1) end i = i + #c end--while return z end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname, dat) local OUTF = io.open(fname, "wb") if not OUTF then base.error("cannot open \""..fname.."\" for writing") end local status = OUTF:write(dat) if not status then base.error("cannot write to \""..fname.."\"") end OUTF:close() end ------------------------------------------------------------------------ -- post-parsing processing, gives globalinfo, localinfo ------------------------------------------------------------------------ function post_parse(globalinfo, localinfo) local html = {} local function add(s) -- html helpers html[#html + 1] = s end local function span(class, s) add(''..s..'') end ---------------------------------------------------------------------- for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL local obj = globalinfo[i] local xref = obj.xref for j = 1, #xref do local p = xref[j] toklist[p] = "TK_GLOBAL" end end--for ---------------------------------------------------------------------- for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL local obj = localinfo[i] local xref = obj.xref for j = 1, #xref do local p = xref[j] toklist[p] = "TK_LOCAL" end end--for ---------------------------------------------------------------------- add(string.format(HEADER, -- header and leading stuff do_entities(srcfl), STYLESHEET)) for i = 1, #toklist do -- enumerate token list local tok, info = toklist[i], seminfolist[i] if tok == "TK_KEYWORD" then span("keyword", info) elseif tok == "TK_STRING" or tok == "TK_LSTRING" then span("string", do_entities(info)) elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then span("comment", do_entities(info)) elseif tok == "TK_GLOBAL" then span("global", info) elseif tok == "TK_LOCAL" then span("local", info) elseif tok == "TK_NAME" then span("name", info) elseif tok == "TK_NUMBER" then span("number", info) elseif tok == "TK_OP" then span("operator", do_entities(info)) elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE add(info) end end--for add(FOOTER) save_file(destfl, table.concat(html)) option.EXIT = true end --end of inserted module end -- preload function for module plugin/sloc preload["plugin/sloc"] = function() --start of inserted module module "plugin/sloc" local string = base.require "string" local table = base.require "table" ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ local option -- local reference to list of options local srcfl -- source file name function init(_option, _srcfl, _destfl) option = _option option.QUIET = true srcfl = _srcfl end ------------------------------------------------------------------------ -- splits a block into a table of lines (minus EOLs) ------------------------------------------------------------------------ local function split(blk) local lines = {} local i, nblk = 1, #blk while i <= nblk do local p, q, r, s = string.find(blk, "([\r\n])([\r\n]?)", i) if not p then p = nblk + 1 end lines[#lines + 1] = string.sub(blk, i, p - 1) i = p + 1 if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR i = i + 1 end end return lines end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(toklist, seminfolist, toklnlist) local lnow, sloc = 0, 0 local function chk(ln) -- if a new line, count it as an SLOC if ln > lnow then -- new line # must be > old line # sloc = sloc + 1; lnow = ln end end for i = 1, #toklist do -- enumerate over all tokens local tok, info, ln = toklist[i], seminfolist[i], toklnlist[i] -------------------------------------------------------------------- if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant tok == "TK_NUMBER" or tok == "TK_OP" then chk(ln) -------------------------------------------------------------------- -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop -- is needed in order to mark off lines one-by-one. Since llex.lua -- currently returns the line number of the last part of the string, -- we must subtract in order to get the starting line number. -------------------------------------------------------------------- elseif tok == "TK_STRING" then -- possible multi-line local t = split(info) ln = ln - #t + 1 for j = 1, #t do chk(ln); ln = ln + 1 end -------------------------------------------------------------------- elseif tok == "TK_LSTRING" then -- possible multi-line local t = split(info) ln = ln - #t + 1 for j = 1, #t do if t[j] ~= "" then chk(ln) end ln = ln + 1 end -------------------------------------------------------------------- -- other tokens are comments or whitespace and are ignored -------------------------------------------------------------------- end end--for base.print(srcfl..": "..sloc) -- display result option.EXIT = true end --end of inserted module end -- support modules local llex = require "llex" local lparser = require "lparser" local optlex = require "optlex" local optparser = require "optparser" local equiv = require "equiv" local plugin --[[-------------------------------------------------------------------- -- messages and textual data ----------------------------------------------------------------------]] local MSG_TITLE = [[ LuaSrcDiet: Puts your Lua 5.1 source code on a diet Version 0.12.1 (20120407) Copyright (c) 2012 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ]] local MSG_USAGE = [[ usage: LuaSrcDiet [options] [filenames] example: >LuaSrcDiet myscript.lua -o myscript_.lua options: -v, --version prints version information -h, --help prints usage information -o specify file name to write output -s suffix for output files (default '_') --keep keep block comment with inside --plugin run in plugin/ directory - stop handling arguments (optimization levels) --none all optimizations off (normalizes EOLs only) --basic lexer-based optimizations only --maximum maximize reduction of source (informational) --quiet process files quietly --read-only read file and print token stats only --dump-lexer dump raw tokens from lexer to stdout --dump-parser dump variable tracking tables from parser --details extra info (strings, numbers, locals) features (to disable, insert 'no' prefix like --noopt-comments): %s default settings: %s]] ------------------------------------------------------------------------ -- optimization options, for ease of switching on and off -- * positive to enable optimization, negative (no) to disable -- * these options should follow --opt-* and --noopt-* style for now ------------------------------------------------------------------------ local OPTION = [[ --opt-comments,'remove comments and block comments' --opt-whitespace,'remove whitespace excluding EOLs' --opt-emptylines,'remove empty lines' --opt-eols,'all above, plus remove unnecessary EOLs' --opt-strings,'optimize strings and long strings' --opt-numbers,'optimize numbers' --opt-locals,'optimize local variable names' --opt-entropy,'tries to reduce symbol entropy of locals' --opt-srcequiv,'insist on source (lexer stream) equivalence' --opt-binequiv,'insist on binary chunk equivalence' --opt-experimental,'apply experimental optimizations' ]] -- preset configuration local DEFAULT_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --opt-numbers --opt-locals --opt-srcequiv --opt-binequiv ]] -- override configurations -- * MUST explicitly enable/disable everything for -- total option replacement local BASIC_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local MAXIMUM_CONFIG = [[ --opt-comments --opt-whitespace --opt-emptylines --opt-eols --opt-strings --opt-numbers --opt-locals --opt-entropy --opt-srcequiv --opt-binequiv ]] local NONE_CONFIG = [[ --noopt-comments --noopt-whitespace --noopt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local DEFAULT_SUFFIX = "_" -- default suffix for file renaming local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins --[[-------------------------------------------------------------------- -- startup and initialize option list handling ----------------------------------------------------------------------]] -- simple error message handler; change to error if traceback wanted local function die(msg) print("LuaSrcDiet (error): "..msg); os.exit(1) end --die = error--DEBUG if not match(_VERSION, "5.1", 1, 1) then -- sanity check die("requires Lua 5.1 to run") end ------------------------------------------------------------------------ -- prepares text for list of optimizations, prepare lookup table ------------------------------------------------------------------------ local MSG_OPTIONS = "" do local WIDTH = 24 local o = {} for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do local msg = " "..op msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n" MSG_OPTIONS = MSG_OPTIONS..msg o[op] = true o["--no"..sub(op, 3)] = true end OPTION = o -- replace OPTION with lookup table end MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG) if p_embedded then -- embedded plugins local EMBED_INFO = "\nembedded plugins:\n" for i = 1, #p_embedded do local p = p_embedded[i] EMBED_INFO = EMBED_INFO.." "..plugin_info[p].."\n" end MSG_USAGE = MSG_USAGE..EMBED_INFO end ------------------------------------------------------------------------ -- global variable initialization, option set handling ------------------------------------------------------------------------ local suffix = DEFAULT_SUFFIX -- file suffix local option = {} -- program options local stat_c, stat_l -- statistics tables -- function to set option lookup table based on a text list of options -- note: additional forced settings for --opt-eols is done in optlex.lua local function set_options(CONFIG) for op in gmatch(CONFIG, "(%-%-%S+)") do if sub(op, 3, 4) == "no" and -- handle negative options OPTION["--"..sub(op, 5)] then option[sub(op, 5)] = false else option[sub(op, 3)] = true end end end --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] -- list of token types, parser-significant types are up to TTYPE_GRAMMAR -- while the rest are not used by parsers; arranged for stats display local TTYPES = { "TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar "TK_STRING", "TK_LSTRING", "TK_OP", "TK_EOS", "TK_COMMENT", "TK_LCOMMENT", -- non-grammar "TK_EOL", "TK_SPACE", } local TTYPE_GRAMMAR = 7 local EOLTYPES = { -- EOL names for token dump ["\n"] = "LF", ["\r"] = "CR", ["\n\r"] = "LFCR", ["\r\n"] = "CRLF", } ------------------------------------------------------------------------ -- read source code from file ------------------------------------------------------------------------ local function load_file(fname) local INF = io.open(fname, "rb") if not INF then die('cannot open "'..fname..'" for reading') end local dat = INF:read("*a") if not dat then die('cannot read from "'..fname..'"') end INF:close() return dat end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname, dat) local OUTF = io.open(fname, "wb") if not OUTF then die('cannot open "'..fname..'" for writing') end local status = OUTF:write(dat) if not status then die('cannot write to "'..fname..'"') end OUTF:close() end ------------------------------------------------------------------------ -- functions to deal with statistics ------------------------------------------------------------------------ -- initialize statistics table local function stat_init() stat_c, stat_l = {}, {} for i = 1, #TTYPES do local ttype = TTYPES[i] stat_c[ttype], stat_l[ttype] = 0, 0 end end -- add a token to statistics table local function stat_add(tok, seminfo) stat_c[tok] = stat_c[tok] + 1 stat_l[tok] = stat_l[tok] + #seminfo end -- do totals for statistics table, return average table local function stat_calc() local function avg(c, l) -- safe average function if c == 0 then return 0 end return l / c end local stat_a = {} local c, l = 0, 0 for i = 1, TTYPE_GRAMMAR do -- total grammar tokens local ttype = TTYPES[i] c = c + stat_c[ttype]; l = l + stat_l[ttype] end stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l stat_a.TOTAL_TOK = avg(c, l) c, l = 0, 0 for i = 1, #TTYPES do -- total all tokens local ttype = TTYPES[i] c = c + stat_c[ttype]; l = l + stat_l[ttype] stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype]) end stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l stat_a.TOTAL_ALL = avg(c, l) return stat_a end --[[-------------------------------------------------------------------- -- main tasks ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- a simple token dumper, minimal translation of seminfo data ------------------------------------------------------------------------ local function dump_tokens(srcfl) -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist = llex.tok, llex.seminfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] if tok == "TK_OP" and string.byte(seminfo) < 32 then seminfo = "(".. string.byte(seminfo)..")" elseif tok == "TK_EOL" then seminfo = EOLTYPES[seminfo] else seminfo = "'"..seminfo.."'" end print(tok.." "..seminfo) end--for end ---------------------------------------------------------------------- -- parser dump; dump globalinfo and localinfo tables ---------------------------------------------------------------------- local function dump_parser(srcfl) local print = print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist, toklnlist = llex.tok, llex.seminfo, llex.tokln -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- lparser.init(toklist, seminfolist, toklnlist) local xinfo = lparser.parser() local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local hl = string.rep("-", 72) print("*** Local/Global Variable Tracker Tables ***") print(hl.."\n GLOBALS\n"..hl) -- global tables have a list of xref numbers only for i = 1, #globalinfo do local obj = globalinfo[i] local msg = "("..i..") '"..obj.name.."' -> " local xref = obj.xref for j = 1, #xref do msg = msg..xref[j].." " end print(msg) end -- local tables have xref numbers and a few other special -- numbers that are specially named: decl (declaration xref), -- act (activation xref), rem (removal xref) print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl) for i = 1, #localinfo do local obj = localinfo[i] local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl.. " act:"..obj.act.." rem:"..obj.rem if obj.isself then msg = msg.." isself" end msg = msg.." -> " local xref = obj.xref for j = 1, #xref do msg = msg..xref[j].." " end print(msg) end print(hl.."\n") end ------------------------------------------------------------------------ -- reads source file(s) and reports some statistics ------------------------------------------------------------------------ local function read_only(srcfl) local print = print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) llex.init(z) llex.llex() local toklist, seminfolist = llex.tok, llex.seminfo print(MSG_TITLE) print("Statistics for: "..srcfl.."\n") -------------------------------------------------------------------- -- collect statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat_a = stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local fmt = string.format local function figures(tt) return stat_c[tt], stat_l[tt], stat_a[tt] end local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f" local hl = string.rep("-", 42) print(fmt(tabf1, "Lexical", "Input", "Input", "Input")) print(fmt(tabf1, "Elements", "Count", "Bytes", "Average")) print(hl) for i = 1, #TTYPES do local ttype = TTYPES[i] print(fmt(tabf2, ttype, figures(ttype))) if ttype == "TK_EOS" then print(hl) end end print(hl) print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) print(hl.."\n") end ------------------------------------------------------------------------ -- process source file(s), write output and reports some statistics ------------------------------------------------------------------------ local function process_file(srcfl, destfl) local function print(...) -- handle quiet option if option.QUIET then return end _G.print(...) end if plugin and plugin.init then -- plugin init option.EXIT = false plugin.init(option, srcfl, destfl) if option.EXIT then return end end print(MSG_TITLE) -- title message -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z = load_file(srcfl) if plugin and plugin.post_load then -- plugin post-load z = plugin.post_load(z) or z if option.EXIT then return end end llex.init(z) llex.llex() local toklist, seminfolist, toklnlist = llex.tok, llex.seminfo, llex.tokln if plugin and plugin.post_lex then -- plugin post-lex plugin.post_lex(toklist, seminfolist, toklnlist) if option.EXIT then return end end -------------------------------------------------------------------- -- collect 'before' statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat1_a = stat_calc() local stat1_c, stat1_l = stat_c, stat_l -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- optparser.print = print -- hack lparser.init(toklist, seminfolist, toklnlist) local xinfo = lparser.parser() if plugin and plugin.post_parse then -- plugin post-parse plugin.post_parse(xinfo.globalinfo, xinfo.localinfo) if option.EXIT then return end end optparser.optimize(option, toklist, seminfolist, xinfo) if plugin and plugin.post_optparse then -- plugin post-optparse plugin.post_optparse() if option.EXIT then return end end -------------------------------------------------------------------- -- do lexer optimization here, save output file -------------------------------------------------------------------- local warn = optlex.warn -- use this as a general warning lookup optlex.print = print -- hack toklist, seminfolist, toklnlist = optlex.optimize(option, toklist, seminfolist, toklnlist) if plugin and plugin.post_optlex then -- plugin post-optlex plugin.post_optlex(toklist, seminfolist, toklnlist) if option.EXIT then return end end local dat = table.concat(seminfolist) -- depending on options selected, embedded EOLs in long strings and -- long comments may not have been translated to \n, tack a warning if string.find(dat, "\r\n", 1, 1) or string.find(dat, "\n\r", 1, 1) then warn.MIXEDEOL = true end -------------------------------------------------------------------- -- test source and binary chunk equivalence -------------------------------------------------------------------- equiv.init(option, llex, warn) equiv.source(z, dat) equiv.binary(z, dat) local smsg = "before and after lexer streams are NOT equivalent!" local bmsg = "before and after binary chunks are NOT equivalent!" -- for reporting, die if option was selected, else just warn if warn.SRC_EQUIV then if option["opt-srcequiv"] then die(smsg) end else print("*** SRCEQUIV: token streams are sort of equivalent") if option["opt-locals"] then print("(but no identifier comparisons since --opt-locals enabled)") end print() end if warn.BIN_EQUIV then if option["opt-binequiv"] then die(bmsg) end else print("*** BINEQUIV: binary chunks are sort of equivalent") print() end -------------------------------------------------------------------- -- save optimized source stream to output file -------------------------------------------------------------------- save_file(destfl, dat) -------------------------------------------------------------------- -- collect 'after' statistics -------------------------------------------------------------------- stat_init() for i = 1, #toklist do local tok, seminfo = toklist[i], seminfolist[i] stat_add(tok, seminfo) end--for local stat_a = stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- print("Statistics for: "..srcfl.." -> "..destfl.."\n") local fmt = string.format local function figures(tt) return stat1_c[tt], stat1_l[tt], stat1_a[tt], stat_c[tt], stat_l[tt], stat_a[tt] end local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s", "%-16s%8d%8d%10.2f%8d%8d%10.2f" local hl = string.rep("-", 68) print("*** lexer-based optimizations summary ***\n"..hl) print(fmt(tabf1, "Lexical", "Input", "Input", "Input", "Output", "Output", "Output")) print(fmt(tabf1, "Elements", "Count", "Bytes", "Average", "Count", "Bytes", "Average")) print(hl) for i = 1, #TTYPES do local ttype = TTYPES[i] print(fmt(tabf2, ttype, figures(ttype))) if ttype == "TK_EOS" then print(hl) end end print(hl) print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK"))) print(hl) -------------------------------------------------------------------- -- report warning flags from optimizing process -------------------------------------------------------------------- if warn.LSTRING then print("* WARNING: "..warn.LSTRING) elseif warn.MIXEDEOL then print("* WARNING: ".."output still contains some CRLF or LFCR line endings") elseif warn.SRC_EQUIV then print("* WARNING: "..smsg) elseif warn.BIN_EQUIV then print("* WARNING: "..bmsg) end print() end --[[-------------------------------------------------------------------- -- main functions ----------------------------------------------------------------------]] local arg = {...} -- program arguments local fspec = {} set_options(DEFAULT_CONFIG) -- set to default options at beginning ------------------------------------------------------------------------ -- per-file handling, ship off to tasks ------------------------------------------------------------------------ local function do_files(fspec) for i = 1, #fspec do local srcfl = fspec[i] local destfl ------------------------------------------------------------------ -- find and replace extension for filenames ------------------------------------------------------------------ local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$") local basename, extension = srcfl, "" if extb and extb > 1 then basename = sub(srcfl, 1, extb - 1) extension = sub(srcfl, extb, exte) end destfl = basename..suffix..extension if #fspec == 1 and option.OUTPUT_FILE then destfl = option.OUTPUT_FILE end if srcfl == destfl then die("output filename identical to input filename") end ------------------------------------------------------------------ -- perform requested operations ------------------------------------------------------------------ if option.DUMP_LEXER then dump_tokens(srcfl) elseif option.DUMP_PARSER then dump_parser(srcfl) elseif option.READ_ONLY then read_only(srcfl) else process_file(srcfl, destfl) end end--for end ------------------------------------------------------------------------ -- main function (entry point is after this definition) ------------------------------------------------------------------------ local function main() local argn, i = #arg, 1 if argn == 0 then option.HELP = true end -------------------------------------------------------------------- -- handle arguments -------------------------------------------------------------------- while i <= argn do local o, p = arg[i], arg[i + 1] local dash = match(o, "^%-%-?") if dash == "-" then -- single-dash options if o == "-h" then option.HELP = true; break elseif o == "-v" then option.VERSION = true; break elseif o == "-s" then if not p then die("-s option needs suffix specification") end suffix = p i = i + 1 elseif o == "-o" then if not p then die("-o option needs a file name") end option.OUTPUT_FILE = p i = i + 1 elseif o == "-" then break -- ignore rest of args else die("unrecognized option "..o) end elseif dash == "--" then -- double-dash options if o == "--help" then option.HELP = true; break elseif o == "--version" then option.VERSION = true; break elseif o == "--keep" then if not p then die("--keep option needs a string to match for") end option.KEEP = p i = i + 1 elseif o == "--plugin" then if not p then die("--plugin option needs a module name") end if option.PLUGIN then die("only one plugin can be specified") end option.PLUGIN = p plugin = require(PLUGIN_SUFFIX..p) i = i + 1 elseif o == "--quiet" then option.QUIET = true elseif o == "--read-only" then option.READ_ONLY = true elseif o == "--basic" then set_options(BASIC_CONFIG) elseif o == "--maximum" then set_options(MAXIMUM_CONFIG) elseif o == "--none" then set_options(NONE_CONFIG) elseif o == "--dump-lexer" then option.DUMP_LEXER = true elseif o == "--dump-parser" then option.DUMP_PARSER = true elseif o == "--details" then option.DETAILS = true elseif OPTION[o] then -- lookup optimization options set_options(o) else die("unrecognized option "..o) end else fspec[#fspec + 1] = o -- potential filename end i = i + 1 end--while if option.HELP then print(MSG_TITLE..MSG_USAGE); return true elseif option.VERSION then print(MSG_TITLE); return true end if #fspec > 0 then if #fspec > 1 and option.OUTPUT_FILE then die("with -o, only one source file can be specified") end do_files(fspec) return true else die("nothing to do!") end end -- entry point -> main() -> do_files() if not main() then die("Please run with option -h or --help for usage information") end -- end of script