#!/usr/bin/env lua --[[-------------------------------------------------------------------- LuaSrcDiet Compresses Lua source code by removing unnecessary characters. For Lua 5.1.x source code. Copyright (c) 2008,2011,2012 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ----------------------------------------------------------------------]] --[[-------------------------------------------------------------------- -- NOTES: -- * Remember to update version and date information below (MSG_TITLE) -- * TODO: passing data tables around is a horrific mess -- * TODO: to implement pcall() to properly handle lexer etc. errors -- * TODO: need some automatic testing for a semblance of sanity -- * TODO: the plugin module is highly experimental and unstable ----------------------------------------------------------------------]] -- standard libraries, functions local string=string local math=math local table=table local require=require local print=print local sub=string.sub local gmatch=string.gmatch local match=string.match -- modules incorporated as preload functions follows local preload=package.preload local base=_G local plugin_info={ html="html generates a HTML file for checking globals", sloc="sloc calculates SLOC for given source file", } local p_embedded={ 'html', 'sloc', } -- preload function for module llex preload.llex= function() --start of inserted module module"llex" local string=base.require"string" local find=string.find local match=string.match local sub=string.sub ---------------------------------------------------------------------- -- initialize keyword list, variables ---------------------------------------------------------------------- local kw={} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while]],"%S+")do kw[v]=true end -- see init() for module variables (externally visible): -- tok, seminfo, tokln local z,-- source stream sourceid,-- name of source I,-- position of lexer buff,-- buffer for strings ln-- line number ---------------------------------------------------------------------- -- add information to token listing ---------------------------------------------------------------------- local function addtoken(token,info) local i=#tok+1 tok[i]=token seminfo[i]=info tokln[i]=ln end ---------------------------------------------------------------------- -- handles line number incrementation and end-of-line characters ---------------------------------------------------------------------- local function inclinenumber(i,is_tok) local sub=sub local old=sub(z,i,i) i=i+1-- skip '\n' or '\r' local c=sub(z,i,i) if(c=="\n"or c=="\r")and(c~=old)then i=i+1-- skip '\n\r' or '\r\n' old=old..c end if is_tok then addtoken("TK_EOL",old)end ln=ln+1 I=i return i end ---------------------------------------------------------------------- -- initialize lexer for given source _z and source name _sourceid ---------------------------------------------------------------------- function init(_z,_sourceid) z=_z-- source sourceid=_sourceid-- name of source I=1-- lexer's position in source ln=1-- line number tok={}-- lexed token list* seminfo={}-- lexed semantic information list* tokln={}-- line numbers for messages* -- (*) externally visible thru' module -------------------------------------------------------------------- -- initial processing (shbang handling) -------------------------------------------------------------------- local p,_,q,r=find(z,"^(#[^\r\n]*)(\r?\n?)") if p then-- skip first line I=I+#q addtoken("TK_COMMENT",q) if#r>0 then inclinenumber(I,true)end end end ---------------------------------------------------------------------- -- returns a chunk name or id, no truncation for long names ---------------------------------------------------------------------- function chunkid() if sourceid and match(sourceid,"^[=@]")then return sub(sourceid,2)-- remove first char end return"[string]" end ---------------------------------------------------------------------- -- formats error message and throws error -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- function errorline(s,line) local e=error or base.error e(string.format("%s:%d: %s",chunkid(),line or ln,s)) end local errorline=errorline ------------------------------------------------------------------------ -- count separators ("=") in a long string delimiter ------------------------------------------------------------------------ local function skip_sep(i) local sub=sub local s=sub(z,i,i) i=i+1 local count=#match(z,"=*",i) i=i+count I=i return(sub(z,i,i)==s)and count or(-count)-1 end ---------------------------------------------------------------------- -- reads a long string or long comment ---------------------------------------------------------------------- local function read_long_string(is_str,sep) local i=I+1-- skip 2nd '[' local sub=sub local c=sub(z,i,i) if c=="\r"or c=="\n"then-- string starts with a newline? i=inclinenumber(i)-- skip it end while true do local p,q,r=find(z,"([\r\n%]])",i)-- (long range match) if not p then errorline(is_str and"unfinished long string"or "unfinished long comment") end i=p if r=="]"then-- delimiter test if skip_sep(i)==sep then buff=sub(z,buff,I) I=I+1-- skip 2nd ']' return buff end i=I else-- newline buff=buff.."\n" i=inclinenumber(i) end end--while end ---------------------------------------------------------------------- -- reads a string ---------------------------------------------------------------------- local function read_string(del) local i=I local find=find local sub=sub while true do local p,q,r=find(z,"([\n\r\\\"\'])",i)-- (long range match) if p then if r=="\n"or r=="\r"then errorline("unfinished string") end i=p if r=="\\"then-- handle escapes i=i+1 r=sub(z,i,i) if r==""then break end-- (EOZ error) p=find("abfnrtv\n\r",r,1,true) ------------------------------------------------------ if p then-- special escapes if p>7 then i=inclinenumber(i) else i=i+1 end ------------------------------------------------------ elseif find(r,"%D")then-- other non-digits i=i+1 ------------------------------------------------------ else-- \xxx sequence local p,q,s=find(z,"^(%d%d?%d?)",i) i=q+1 if s+1>256 then-- UCHAR_MAX errorline("escape sequence too large") end ------------------------------------------------------ end--if p else i=i+1 if r==del then-- ending delimiter I=i return sub(z,buff,i-1)-- return string end end--if r else break-- (error) end--if p end--while errorline("unfinished string") end ------------------------------------------------------------------------ -- main lexer function ------------------------------------------------------------------------ function llex() local find=find local match=match while true do--outer local i=I -- inner loop allows break to be used to nicely section tests while true do--inner ---------------------------------------------------------------- local p,_,r=find(z,"^([_%a][_%w]*)",i) if p then I=i+#r if kw[r]then addtoken("TK_KEYWORD",r)-- reserved word (keyword) else addtoken("TK_NAME",r)-- identifier end break-- (continue) end ---------------------------------------------------------------- local p,_,r=find(z,"^(%.?)%d",i) if p then-- numeral if r=="."then i=i+1 end local _,q,r=find(z,"^%d*[%.%d]*([eE]?)",i) i=q+1 if#r==1 then-- optional exponent if match(z,"^[%+%-]",i)then-- optional sign i=i+1 end end local _,q=find(z,"^[_%w]*",i) I=q+1 local v=sub(z,p,q)-- string equivalent if not base.tonumber(v)then-- handles hex test also errorline("malformed number") end addtoken("TK_NUMBER",v) break-- (continue) end ---------------------------------------------------------------- local p,q,r,t=find(z,"^((%s)[ \t\v\f]*)",i) if p then if t=="\n"or t=="\r"then-- newline inclinenumber(i,true) else I=q+1-- whitespace addtoken("TK_SPACE",r) end break-- (continue) end ---------------------------------------------------------------- local r=match(z,"^%p",i) if r then buff=i local p=find("-[\"\'.=<>~",r,1,true) if p then -- two-level if block for punctuation/symbols -------------------------------------------------------- if p<=2 then if p==1 then-- minus local c=match(z,"^%-%-(%[?)",i) if c then i=i+2 local sep=-1 if c=="["then sep=skip_sep(i) end if sep>=0 then-- long comment addtoken("TK_LCOMMENT",read_long_string(false,sep)) else-- short comment I=find(z,"[\n\r]",i)or(#z+1) addtoken("TK_COMMENT",sub(z,buff,I-1)) end break-- (continue) end -- (fall through for "-") else-- [ or long string local sep=skip_sep(i) if sep>=0 then addtoken("TK_LSTRING",read_long_string(true,sep)) elseif sep==-1 then addtoken("TK_OP","[") else errorline("invalid long string delimiter") end break-- (continue) end -------------------------------------------------------- elseif p<=5 then if p<5 then-- strings I=i+1 addtoken("TK_STRING",read_string(r)) break-- (continue) end r=match(z,"^%.%.?%.?",i)-- .|..|... dots -- (fall through) -------------------------------------------------------- else-- relational r=match(z,"^%p=?",i) -- (fall through) end end I=i+#r addtoken("TK_OP",r)-- for other symbols, fall through break-- (continue) end ---------------------------------------------------------------- local r=sub(z,i,i) if r~=""then I=i+1 addtoken("TK_OP",r)-- other single-char tokens break end addtoken("TK_EOS","")-- end of stream, return-- exit here ---------------------------------------------------------------- end--while inner end--while outer end --end of inserted module end -- preload function for module lparser preload.lparser= function() --start of inserted module module"lparser" local string=base.require"string" --[[-------------------------------------------------------------------- -- variable and data structure initialization ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initialization: main variables ---------------------------------------------------------------------- local toklist,-- grammar-only token tables (token table, seminfolist,-- semantic information table, line number toklnlist,-- table, cross-reference table) xreflist, tpos,-- token position line,-- start line # for error messages lastln,-- last line # for ambiguous syntax chk tok,seminfo,ln,xref,-- token, semantic info, line nameref,-- proper position of token fs,-- current function state top_fs,-- top-level function state globalinfo,-- global variable information table globallookup,-- global variable name lookup table localinfo,-- local variable information table ilocalinfo,-- inactive locals (prior to activation) ilocalrefs,-- corresponding references to activate statinfo-- statements labeled by type -- forward references for local functions local explist1,expr,block,exp1,body,chunk ---------------------------------------------------------------------- -- initialization: data structures ---------------------------------------------------------------------- local gmatch=string.gmatch local block_follow={}-- lookahead check in chunk(), returnstat() for v in gmatch("else elseif end until ","%S+")do block_follow[v]=true end local binopr_left={}-- binary operators, left priority local binopr_right={}-- binary operators, right priority for op,lt,rt in gmatch([[ {+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7} {^ 10 9}{.. 5 4} {~= 3 3}{== 3 3} {< 3 3}{<= 3 3}{> 3 3}{>= 3 3} {and 2 2}{or 1 1} ]],"{(%S+)%s(%d+)%s(%d+)}")do binopr_left[op]=lt+0 binopr_right[op]=rt+0 end local unopr={["not"]=true,["-"]=true, ["#"]=true,}-- unary operators local UNARY_PRIORITY=8-- priority for unary operators --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- formats error message and throws error (duplicated from llex) -- * a simplified version, does not report what token was responsible ---------------------------------------------------------------------- local function errorline(s,line) local e=error or base.error e(string.format("(source):%d: %s",line or ln,s)) end ---------------------------------------------------------------------- -- handles incoming token, semantic information pairs -- * NOTE: 'nextt' is named 'next' originally ---------------------------------------------------------------------- -- reads in next token local function nextt() lastln=toklnlist[tpos] tok,seminfo,ln,xref =toklist[tpos],seminfolist[tpos],toklnlist[tpos],xreflist[tpos] tpos=tpos+1 end -- peek at next token (single lookahead for table constructor) local function lookahead() return toklist[tpos] end ---------------------------------------------------------------------- -- throws a syntax error, or if token expected is not there ---------------------------------------------------------------------- local function syntaxerror(msg) local tok=tok if tok~=""and tok~=""then if tok==""then tok=seminfo end tok="'"..tok.."'" end errorline(msg.." near "..tok) end local function error_expected(token) syntaxerror("'"..token.."' expected") end ---------------------------------------------------------------------- -- tests for a token, returns outcome -- * return value changed to boolean ---------------------------------------------------------------------- local function testnext(c) if tok==c then nextt();return true end end ---------------------------------------------------------------------- -- check for existence of a token, throws error if not found ---------------------------------------------------------------------- local function check(c) if tok~=c then error_expected(c)end end ---------------------------------------------------------------------- -- verify existence of a token, then skip it ---------------------------------------------------------------------- local function checknext(c) check(c);nextt() end ---------------------------------------------------------------------- -- throws error if condition not matched ---------------------------------------------------------------------- local function check_condition(c,msg) if not c then syntaxerror(msg)end end ---------------------------------------------------------------------- -- verifies token conditions are met or else throw error ---------------------------------------------------------------------- local function check_match(what,who,where) if not testnext(what)then if where==ln then error_expected(what) else syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")") end end end ---------------------------------------------------------------------- -- expect that token is a name, return the name ---------------------------------------------------------------------- local function str_checkname() check("") local ts=seminfo nameref=xref nextt() return ts end ---------------------------------------------------------------------- -- adds given string s in string pool, sets e as VK ---------------------------------------------------------------------- local function codestring(e,s) e.k="VK" end ---------------------------------------------------------------------- -- consume a name token, adds it to string pool ---------------------------------------------------------------------- local function checkname(e) codestring(e,str_checkname()) end --[[-------------------------------------------------------------------- -- variable (global|local|upvalue) handling -- * to track locals and globals, variable management code needed -- * entry point is singlevar() for variable lookups -- * lookup tables (bl.locallist) are maintained awkwardly in the basic -- block data structures, PLUS the function data structure (this is -- an inelegant hack, since bl is nil for the top level of a function) ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- register a local variable, create local variable object, set in -- to-activate variable list -- * used in new_localvarliteral(), parlist(), fornum(), forlist(), -- localfunc(), localstat() ---------------------------------------------------------------------- local function new_localvar(name,special) local bl=fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist=bl.locallist else locallist=fs.locallist end -- build local variable information object and set localinfo local id=#localinfo+1 localinfo[id]={-- new local variable object name=name,-- local variable name xref={nameref},-- xref, first value is declaration decl=nameref,-- location of declaration, = xref[1] } if special then-- "self" must be not be changed localinfo[id].isself=true end -- this can override a local with the same name in the same scope -- but first, keep it inactive until it gets activated local i=#ilocalinfo+1 ilocalinfo[i]=id ilocalrefs[i]=locallist end ---------------------------------------------------------------------- -- actually activate the variables so that they are visible -- * remember Lua semantics, e.g. RHS is evaluated first, then LHS -- * used in parlist(), forbody(), localfunc(), localstat(), body() ---------------------------------------------------------------------- local function adjustlocalvars(nvars) local sz=#ilocalinfo -- i goes from left to right, in order of local allocation, because -- of something like: local a,a,a = 1,2,3 which gives a = 3 while nvars>0 do nvars=nvars-1 local i=sz-nvars local id=ilocalinfo[i]-- local's id local obj=localinfo[id] local name=obj.name-- name of local obj.act=xref-- set activation location ilocalinfo[i]=nil local locallist=ilocalrefs[i]-- ref to lookup table to update ilocalrefs[i]=nil local existing=locallist[name]-- if existing, remove old first! if existing then-- do not overlap, set special obj=localinfo[existing]-- form of rem, as -id obj.rem=-id end locallist[name]=id-- activate, now visible to Lua end end ---------------------------------------------------------------------- -- remove (deactivate) variables in current scope (before scope exits) -- * zap entire locallist tables since we are not allocating registers -- * used in leaveblock(), close_func() ---------------------------------------------------------------------- local function removevars() local bl=fs.bl local locallist -- locate locallist in current block object or function root object if bl then locallist=bl.locallist else locallist=fs.locallist end -- enumerate the local list at current scope and deactivate 'em for name,id in base.pairs(locallist)do local obj=localinfo[id] obj.rem=xref-- set deactivation location end end ---------------------------------------------------------------------- -- creates a new local variable given a name -- * skips internal locals (those starting with '('), so internal -- locals never needs a corresponding adjustlocalvars() call -- * special is true for "self" which must not be optimized -- * used in fornum(), forlist(), parlist(), body() ---------------------------------------------------------------------- local function new_localvarliteral(name,special) if string.sub(name,1,1)=="("then-- can skip internal locals return end new_localvar(name,special) end ---------------------------------------------------------------------- -- search the local variable namespace of the given fs for a match -- * returns localinfo index -- * used only in singlevaraux() ---------------------------------------------------------------------- local function searchvar(fs,n) local bl=fs.bl local locallist if bl then locallist=bl.locallist while locallist do if locallist[n]then return locallist[n]end-- found bl=bl.prev locallist=bl and bl.locallist end end locallist=fs.locallist return locallist[n]or-1-- found or not found (-1) end ---------------------------------------------------------------------- -- handle locals, globals and upvalues and related processing -- * search mechanism is recursive, calls itself to search parents -- * used only in singlevar() ---------------------------------------------------------------------- local function singlevaraux(fs,n,var) if fs==nil then-- no more levels? var.k="VGLOBAL"-- default is global variable return"VGLOBAL" else local v=searchvar(fs,n)-- look up at current level if v>=0 then var.k="VLOCAL" var.id=v -- codegen may need to deal with upvalue here return"VLOCAL" else-- not found at current level; try upper one if singlevaraux(fs.prev,n,var)=="VGLOBAL"then return"VGLOBAL" end -- else was LOCAL or UPVAL, handle here var.k="VUPVAL"-- upvalue in this level return"VUPVAL" end--if v end--if fs end ---------------------------------------------------------------------- -- consume a name token, creates a variable (global|local|upvalue) -- * used in prefixexp(), funcname() ---------------------------------------------------------------------- local function singlevar(v) local name=str_checkname() singlevaraux(fs,name,v) ------------------------------------------------------------------ -- variable tracking ------------------------------------------------------------------ if v.k=="VGLOBAL"then -- if global being accessed, keep track of it by creating an object local id=globallookup[name] if not id then id=#globalinfo+1 globalinfo[id]={-- new global variable object name=name,-- global variable name xref={nameref},-- xref, first value is declaration } globallookup[name]=id-- remember it else local obj=globalinfo[id].xref obj[#obj+1]=nameref-- add xref end else -- local/upvalue is being accessed, keep track of it local id=v.id local obj=localinfo[id].xref obj[#obj+1]=nameref-- add xref end end --[[-------------------------------------------------------------------- -- state management functions with open/close pairs ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- enters a code unit, initializes elements ---------------------------------------------------------------------- local function enterblock(isbreakable) local bl={}-- per-block state bl.isbreakable=isbreakable bl.prev=fs.bl bl.locallist={} fs.bl=bl end ---------------------------------------------------------------------- -- leaves a code unit, close any upvalues ---------------------------------------------------------------------- local function leaveblock() local bl=fs.bl removevars() fs.bl=bl.prev end ---------------------------------------------------------------------- -- opening of a function -- * top_fs is only for anchoring the top fs, so that parser() can -- return it to the caller function along with useful output -- * used in parser() and body() ---------------------------------------------------------------------- local function open_func() local new_fs-- per-function state if not fs then-- top_fs is created early new_fs=top_fs else new_fs={} end new_fs.prev=fs-- linked list of function states new_fs.bl=nil new_fs.locallist={} fs=new_fs end ---------------------------------------------------------------------- -- closing of a function -- * used in parser() and body() ---------------------------------------------------------------------- local function close_func() removevars() fs=fs.prev end --[[-------------------------------------------------------------------- -- other parsing functions -- * for table constructor, parameter list, argument list ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parse a function name suffix, for function call specifications -- * used in primaryexp(), funcname() ---------------------------------------------------------------------- local function field(v) -- field -> ['.' | ':'] NAME local key={} nextt()-- skip the dot or colon checkname(key) v.k="VINDEXED" end ---------------------------------------------------------------------- -- parse a table indexing suffix, for constructors, expressions -- * used in recfield(), primaryexp() ---------------------------------------------------------------------- local function yindex(v) -- index -> '[' expr ']' nextt()-- skip the '[' expr(v) checknext("]") end ---------------------------------------------------------------------- -- parse a table record (hash) field -- * used in constructor() ---------------------------------------------------------------------- local function recfield(cc) -- recfield -> (NAME | '['exp1']') = exp1 local key,val={},{} if tok==""then checkname(key) else-- tok == '[' yindex(key) end checknext("=") expr(val) end ---------------------------------------------------------------------- -- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH) -- * note: retained in this skeleton because it modifies cc.v.k -- * used in constructor() ---------------------------------------------------------------------- local function closelistfield(cc) if cc.v.k=="VVOID"then return end-- there is no list item cc.v.k="VVOID" end ---------------------------------------------------------------------- -- parse a table list (array) field -- * used in constructor() ---------------------------------------------------------------------- local function listfield(cc) expr(cc.v) end ---------------------------------------------------------------------- -- parse a table constructor -- * used in funcargs(), simpleexp() ---------------------------------------------------------------------- local function constructor(t) -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}' -- field -> recfield | listfield -- fieldsep -> ',' | ';' local line=ln local cc={} cc.v={} cc.t=t t.k="VRELOCABLE" cc.v.k="VVOID" checknext("{") repeat if tok=="}"then break end -- closelistfield(cc) here local c=tok if c==""then-- may be listfields or recfields if lookahead()~="="then-- look ahead: expression? listfield(cc) else recfield(cc) end elseif c=="["then-- constructor_item -> recfield recfield(cc) else-- constructor_part -> listfield listfield(cc) end until not testnext(",")and not testnext(";") check_match("}","{",line) -- lastlistfield(cc) here end ---------------------------------------------------------------------- -- parse the arguments (parameters) of a function declaration -- * used in body() ---------------------------------------------------------------------- local function parlist() -- parlist -> [ param { ',' param } ] local nparams=0 if tok~=")"then-- is 'parlist' not empty? repeat local c=tok if c==""then-- param -> NAME new_localvar(str_checkname()) nparams=nparams+1 elseif c=="..."then nextt() fs.is_vararg=true else syntaxerror(" or '...' expected") end until fs.is_vararg or not testnext(",") end--if adjustlocalvars(nparams) end ---------------------------------------------------------------------- -- parse the parameters of a function call -- * contrast with parlist(), used in function declarations -- * used in primaryexp() ---------------------------------------------------------------------- local function funcargs(f) local args={} local line=ln local c=tok if c=="("then-- funcargs -> '(' [ explist1 ] ')' if line~=lastln then syntaxerror("ambiguous syntax (function call x new statement)") end nextt() if tok==")"then-- arg list is empty? args.k="VVOID" else explist1(args) end check_match(")","(",line) elseif c=="{"then-- funcargs -> constructor constructor(args) elseif c==""then-- funcargs -> STRING codestring(args,seminfo) nextt()-- must use 'seminfo' before 'next' else syntaxerror("function arguments expected") return end--if c f.k="VCALL" end --[[-------------------------------------------------------------------- -- mostly expression functions ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- parses an expression in parentheses or a single variable -- * used in primaryexp() ---------------------------------------------------------------------- local function prefixexp(v) -- prefixexp -> NAME | '(' expr ')' local c=tok if c=="("then local line=ln nextt() expr(v) check_match(")","(",line) elseif c==""then singlevar(v) else syntaxerror("unexpected symbol") end--if c end ---------------------------------------------------------------------- -- parses a prefixexp (an expression in parentheses or a single -- variable) or a function call specification -- * used in simpleexp(), assignment(), expr_stat() ---------------------------------------------------------------------- local function primaryexp(v) -- primaryexp -> -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } prefixexp(v) while true do local c=tok if c=="."then-- field field(v) elseif c=="["then-- '[' exp1 ']' local key={} yindex(key) elseif c==":"then-- ':' NAME funcargs local key={} nextt() checkname(key) funcargs(v) elseif c=="("or c==""or c=="{"then-- funcargs funcargs(v) else return end--if c end--while end ---------------------------------------------------------------------- -- parses general expression types, constants handled here -- * used in subexpr() ---------------------------------------------------------------------- local function simpleexp(v) -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... | -- constructor | FUNCTION body | primaryexp local c=tok if c==""then v.k="VKNUM" elseif c==""then codestring(v,seminfo) elseif c=="nil"then v.k="VNIL" elseif c=="true"then v.k="VTRUE" elseif c=="false"then v.k="VFALSE" elseif c=="..."then-- vararg check_condition(fs.is_vararg==true, "cannot use '...' outside a vararg function"); v.k="VVARARG" elseif c=="{"then-- constructor constructor(v) return elseif c=="function"then nextt() body(v,false,ln) return else primaryexp(v) return end--if c nextt() end ------------------------------------------------------------------------ -- Parse subexpressions. Includes handling of unary operators and binary -- operators. A subexpr is given the rhs priority level of the operator -- immediately left of it, if any (limit is -1 if none,) and if a binop -- is found, limit is compared with the lhs priority level of the binop -- in order to determine which executes first. -- * recursively called -- * used in expr() ------------------------------------------------------------------------ local function subexpr(v,limit) -- subexpr -> (simpleexp | unop subexpr) { binop subexpr } -- * where 'binop' is any binary operator with a priority -- higher than 'limit' local op=tok local uop=unopr[op] if uop then nextt() subexpr(v,UNARY_PRIORITY) else simpleexp(v) end -- expand while operators have priorities higher than 'limit' op=tok local binop=binopr_left[op] while binop and binop>limit do local v2={} nextt() -- read sub-expression with higher priority local nextop=subexpr(v2,binopr_right[op]) op=nextop binop=binopr_left[op] end return op-- return first untreated operator end ---------------------------------------------------------------------- -- Expression parsing starts here. Function subexpr is entered with the -- left operator (which is non-existent) priority of -1, which is lower -- than all actual operators. Expr information is returned in parm v. -- * used in cond(), explist1(), index(), recfield(), listfield(), -- prefixexp(), while_stat(), exp1() ---------------------------------------------------------------------- -- this is a forward-referenced local function expr(v) -- expr -> subexpr subexpr(v,0) end --[[-------------------------------------------------------------------- -- third level parsing functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- parse a variable assignment sequence -- * recursively called -- * used in expr_stat() ------------------------------------------------------------------------ local function assignment(v) local e={} local c=v.v.k check_condition(c=="VLOCAL"or c=="VUPVAL"or c=="VGLOBAL" or c=="VINDEXED","syntax error") if testnext(",")then-- assignment -> ',' primaryexp assignment local nv={}-- expdesc nv.v={} primaryexp(nv.v) -- lparser.c deals with some register usage conflict here assignment(nv) else-- assignment -> '=' explist1 checknext("=") explist1(e) return-- avoid default end e.k="VNONRELOC" end ---------------------------------------------------------------------- -- parse a for loop body for both versions of the for loop -- * used in fornum(), forlist() ---------------------------------------------------------------------- local function forbody(nvars,isnum) -- forbody -> DO block checknext("do") enterblock(false)-- scope for declared variables adjustlocalvars(nvars) block() leaveblock()-- end of scope for declared variables end ---------------------------------------------------------------------- -- parse a numerical for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function fornum(varname) -- fornum -> NAME = exp1, exp1 [, exp1] DO body local line=line new_localvarliteral("(for index)") new_localvarliteral("(for limit)") new_localvarliteral("(for step)") new_localvar(varname) checknext("=") exp1()-- initial value checknext(",") exp1()-- limit if testnext(",")then exp1()-- optional step else -- default step = 1 end forbody(1,true) end ---------------------------------------------------------------------- -- parse a generic for loop, calls forbody() -- * used in for_stat() ---------------------------------------------------------------------- local function forlist(indexname) -- forlist -> NAME {, NAME} IN explist1 DO body local e={} -- create control variables new_localvarliteral("(for generator)") new_localvarliteral("(for state)") new_localvarliteral("(for control)") -- create declared variables new_localvar(indexname) local nvars=1 while testnext(",")do new_localvar(str_checkname()) nvars=nvars+1 end checknext("in") local line=line explist1(e) forbody(nvars,false) end ---------------------------------------------------------------------- -- parse a function name specification -- * used in func_stat() ---------------------------------------------------------------------- local function funcname(v) -- funcname -> NAME {field} [':' NAME] local needself=false singlevar(v) while tok=="."do field(v) end if tok==":"then needself=true field(v) end return needself end ---------------------------------------------------------------------- -- parse the single expressions needed in numerical for loops -- * used in fornum() ---------------------------------------------------------------------- -- this is a forward-referenced local function exp1() -- exp1 -> expr local e={} expr(e) end ---------------------------------------------------------------------- -- parse condition in a repeat statement or an if control structure -- * used in repeat_stat(), test_then_block() ---------------------------------------------------------------------- local function cond() -- cond -> expr local v={} expr(v)-- read condition end ---------------------------------------------------------------------- -- parse part of an if control structure, including the condition -- * used in if_stat() ---------------------------------------------------------------------- local function test_then_block() -- test_then_block -> [IF | ELSEIF] cond THEN block nextt()-- skip IF or ELSEIF cond() checknext("then") block()-- 'then' part end ---------------------------------------------------------------------- -- parse a local function statement -- * used in local_stat() ---------------------------------------------------------------------- local function localfunc() -- localfunc -> NAME body local v,b={} new_localvar(str_checkname()) v.k="VLOCAL" adjustlocalvars(1) body(b,false,ln) end ---------------------------------------------------------------------- -- parse a local variable declaration statement -- * used in local_stat() ---------------------------------------------------------------------- local function localstat() -- localstat -> NAME {',' NAME} ['=' explist1] local nvars=0 local e={} repeat new_localvar(str_checkname()) nvars=nvars+1 until not testnext(",") if testnext("=")then explist1(e) else e.k="VVOID" end adjustlocalvars(nvars) end ---------------------------------------------------------------------- -- parse a list of comma-separated expressions -- * used in return_stat(), localstat(), funcargs(), assignment(), -- forlist() ---------------------------------------------------------------------- -- this is a forward-referenced local function explist1(e) -- explist1 -> expr { ',' expr } expr(e) while testnext(",")do expr(e) end end ---------------------------------------------------------------------- -- parse function declaration body -- * used in simpleexp(), localfunc(), func_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function body(e,needself,line) -- body -> '(' parlist ')' chunk END open_func() checknext("(") if needself then new_localvarliteral("self",true) adjustlocalvars(1) end parlist() checknext(")") chunk() check_match("end","function",line) close_func() end ---------------------------------------------------------------------- -- parse a code block or unit -- * used in do_stat(), while_stat(), forbody(), test_then_block(), -- if_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function block() -- block -> chunk enterblock(false) chunk() leaveblock() end --[[-------------------------------------------------------------------- -- second level parsing functions, all with '_stat' suffix -- * since they are called via a table lookup, they cannot be local -- functions (a lookup table of local functions might be smaller...) -- * stat() -> *_stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for a for loop, calls fornum() or forlist() -- * removed 'line' parameter (used to set debug information only) -- * used in stat() ---------------------------------------------------------------------- local function for_stat() -- stat -> for_stat -> FOR (fornum | forlist) END local line=line enterblock(true)-- scope for loop and control variables nextt()-- skip 'for' local varname=str_checkname()-- first variable name local c=tok if c=="="then fornum(varname) elseif c==","or c=="in"then forlist(varname) else syntaxerror("'=' or 'in' expected") end check_match("end","for",line) leaveblock()-- loop scope (`break' jumps to this point) end ---------------------------------------------------------------------- -- parse a while-do control structure, body processed by block() -- * used in stat() ---------------------------------------------------------------------- local function while_stat() -- stat -> while_stat -> WHILE cond DO block END local line=line nextt()-- skip WHILE cond()-- parse condition enterblock(true) checknext("do") block() check_match("end","while",line) leaveblock() end ---------------------------------------------------------------------- -- parse a repeat-until control structure, body parsed by chunk() -- * originally, repeatstat() calls breakstat() too if there is an -- upvalue in the scope block; nothing is actually lexed, it is -- actually the common code in breakstat() for closing of upvalues -- * used in stat() ---------------------------------------------------------------------- local function repeat_stat() -- stat -> repeat_stat -> REPEAT block UNTIL cond local line=line enterblock(true)-- loop block enterblock(false)-- scope block nextt()-- skip REPEAT chunk() check_match("until","repeat",line) cond() -- close upvalues at scope level below leaveblock()-- finish scope leaveblock()-- finish loop end ---------------------------------------------------------------------- -- parse an if control structure -- * used in stat() ---------------------------------------------------------------------- local function if_stat() -- stat -> if_stat -> IF cond THEN block -- {ELSEIF cond THEN block} [ELSE block] END local line=line local v={} test_then_block()-- IF cond THEN block while tok=="elseif"do test_then_block()-- ELSEIF cond THEN block end if tok=="else"then nextt()-- skip ELSE block()-- 'else' part end check_match("end","if",line) end ---------------------------------------------------------------------- -- parse a return statement -- * used in stat() ---------------------------------------------------------------------- local function return_stat() -- stat -> return_stat -> RETURN explist local e={} nextt()-- skip RETURN local c=tok if block_follow[c]or c==";"then -- return no values else explist1(e)-- optional return values end end ---------------------------------------------------------------------- -- parse a break statement -- * used in stat() ---------------------------------------------------------------------- local function break_stat() -- stat -> break_stat -> BREAK local bl=fs.bl nextt()-- skip BREAK while bl and not bl.isbreakable do-- find a breakable block bl=bl.prev end if not bl then syntaxerror("no loop to break") end end ---------------------------------------------------------------------- -- parse a function call with no returns or an assignment statement -- * the struct with .prev is used for name searching in lparse.c, -- so it is retained for now; present in assignment() also -- * used in stat() ---------------------------------------------------------------------- local function expr_stat() local id=tpos-1 -- stat -> expr_stat -> func | assignment local v={} v.v={} primaryexp(v.v) if v.v.k=="VCALL"then-- stat -> func -- call statement uses no results statinfo[id]="call" else-- stat -> assignment v.prev=nil assignment(v) statinfo[id]="assign" end end ---------------------------------------------------------------------- -- parse a function statement -- * used in stat() ---------------------------------------------------------------------- local function function_stat() -- stat -> function_stat -> FUNCTION funcname body local line=line local v,b={},{} nextt()-- skip FUNCTION local needself=funcname(v) body(b,needself,line) end ---------------------------------------------------------------------- -- parse a simple block enclosed by a DO..END pair -- * used in stat() ---------------------------------------------------------------------- local function do_stat() -- stat -> do_stat -> DO block END local line=line nextt()-- skip DO block() check_match("end","do",line) end ---------------------------------------------------------------------- -- parse a statement starting with LOCAL -- * used in stat() ---------------------------------------------------------------------- local function local_stat() -- stat -> local_stat -> LOCAL FUNCTION localfunc -- -> LOCAL localstat nextt()-- skip LOCAL if testnext("function")then-- local function? localfunc() else localstat() end end --[[-------------------------------------------------------------------- -- main functions, top level parsing functions -- * accessible functions are: init(lexer), parser() -- * [entry] -> parser() -> chunk() -> stat() ----------------------------------------------------------------------]] ---------------------------------------------------------------------- -- initial parsing for statements, calls '_stat' suffixed functions -- * used in chunk() ---------------------------------------------------------------------- local stat_call={-- lookup for calls in stat() ["if"]=if_stat, ["while"]=while_stat, ["do"]=do_stat, ["for"]=for_stat, ["repeat"]=repeat_stat, ["function"]=function_stat, ["local"]=local_stat, ["return"]=return_stat, ["break"]=break_stat, } local function stat() -- stat -> if_stat while_stat do_stat for_stat repeat_stat -- function_stat local_stat return_stat break_stat -- expr_stat line=ln-- may be needed for error messages local c=tok local fn=stat_call[c] -- handles: if while do for repeat function local return break if fn then statinfo[tpos-1]=c fn() -- return or break must be last statement if c=="return"or c=="break"then return true end else expr_stat() end return false end ---------------------------------------------------------------------- -- parse a chunk, which consists of a bunch of statements -- * used in parser(), body(), block(), repeat_stat() ---------------------------------------------------------------------- -- this is a forward-referenced local function chunk() -- chunk -> { stat [';'] } local islast=false while not islast and not block_follow[tok]do islast=stat() testnext(";") end end ---------------------------------------------------------------------- -- performs parsing, returns parsed data structure ---------------------------------------------------------------------- function parser() open_func() fs.is_vararg=true-- main func. is always vararg nextt()-- read first token chunk() check("") close_func() return{-- return everything globalinfo=globalinfo, localinfo=localinfo, statinfo=statinfo, toklist=toklist, seminfolist=seminfolist, toklnlist=toklnlist, xreflist=xreflist, } end ---------------------------------------------------------------------- -- initialization function ---------------------------------------------------------------------- function init(tokorig,seminfoorig,toklnorig) tpos=1-- token position top_fs={}-- reset top level function state ------------------------------------------------------------------ -- set up grammar-only token tables; impedance-matching... -- note that constants returned by the lexer is source-level, so -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING) ------------------------------------------------------------------ local j=1 toklist,seminfolist,toklnlist,xreflist={},{},{},{} for i=1,#tokorig do local tok=tokorig[i] local yep=true if tok=="TK_KEYWORD"or tok=="TK_OP"then tok=seminfoorig[i] elseif tok=="TK_NAME"then tok="" seminfolist[j]=seminfoorig[i] elseif tok=="TK_NUMBER"then tok="" seminfolist[j]=0-- fake! elseif tok=="TK_STRING"or tok=="TK_LSTRING"then tok="" seminfolist[j]=""-- fake! elseif tok=="TK_EOS"then tok="" else -- non-grammar tokens; ignore them yep=false end if yep then-- set rest of the information toklist[j]=tok toklnlist[j]=toklnorig[i] xreflist[j]=i j=j+1 end end--for ------------------------------------------------------------------ -- initialize data structures for variable tracking ------------------------------------------------------------------ globalinfo,globallookup,localinfo={},{},{} ilocalinfo,ilocalrefs={},{} statinfo={}-- experimental end --end of inserted module end -- preload function for module optlex preload.optlex= function() --start of inserted module module"optlex" local string=base.require"string" local match=string.match local sub=string.sub local find=string.find local rep=string.rep local print ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ -- error function, can override by setting own function into module error=base.error warn={}-- table for warning flags local stoks,sinfos,stoklns-- source lists local is_realtoken={-- significant (grammar) tokens TK_KEYWORD=true, TK_NAME=true, TK_NUMBER=true, TK_STRING=true, TK_LSTRING=true, TK_OP=true, TK_EOS=true, } local is_faketoken={-- whitespace (non-grammar) tokens TK_COMMENT=true, TK_LCOMMENT=true, TK_EOL=true, TK_SPACE=true, } local opt_details-- for extra information ------------------------------------------------------------------------ -- true if current token is at the start of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlinestart(i) local tok=stoks[i-1] if i<=1 or tok=="TK_EOL"then return true elseif tok==""then return atlinestart(i-1) end return false end ------------------------------------------------------------------------ -- true if current token is at the end of a line -- * skips over deleted tokens via recursion ------------------------------------------------------------------------ local function atlineend(i) local tok=stoks[i+1] if i>=#stoks or tok=="TK_EOL"or tok=="TK_EOS"then return true elseif tok==""then return atlineend(i+1) end return false end ------------------------------------------------------------------------ -- counts comment EOLs inside a long comment -- * in order to keep line numbering, EOLs need to be reinserted ------------------------------------------------------------------------ local function commenteols(lcomment) local sep=#match(lcomment,"^%-%-%[=*%[") local z=sub(lcomment,sep+1,-(sep-1))-- remove delims local i,c=1,0 while true do local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i) if not p then break end-- if no matches, done i=p+1 c=c+1 if#s>0 and r~=s then-- skip CRLF or LFCR i=i+1 end end return c end ------------------------------------------------------------------------ -- compares two tokens (i, j) and returns the whitespace required -- * see documentation for a reference table of interactions -- * only two grammar/real tokens are being considered -- * if "", no separation is needed -- * if " ", then at least one whitespace (or EOL) is required -- * NOTE: this doesn't work at the start or the end or for EOS! ------------------------------------------------------------------------ local function checkpair(i,j) local match=match local t1,t2=stoks[i],stoks[j] -------------------------------------------------------------------- if t1=="TK_STRING"or t1=="TK_LSTRING"or t2=="TK_STRING"or t2=="TK_LSTRING"then return"" -------------------------------------------------------------------- elseif t1=="TK_OP"or t2=="TK_OP"then if(t1=="TK_OP"and(t2=="TK_KEYWORD"or t2=="TK_NAME"))or (t2=="TK_OP"and(t1=="TK_KEYWORD"or t1=="TK_NAME"))then return"" end if t1=="TK_OP"and t2=="TK_OP"then -- for TK_OP/TK_OP pairs, see notes in technotes.txt local op,op2=sinfos[i],sinfos[j] if(match(op,"^%.%.?$")and match(op2,"^%."))or (match(op,"^[~=<>]$")and op2=="=")or (op=="["and(op2=="["or op2=="="))then return" " end return"" end -- "TK_OP" + "TK_NUMBER" case local op=sinfos[i] if t2=="TK_OP"then op=sinfos[j]end if match(op,"^%.%.?%.?$")then return" " end return"" -------------------------------------------------------------------- else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then return" " -------------------------------------------------------------------- end end ------------------------------------------------------------------------ -- repack tokens, removing deletions caused by optimization process ------------------------------------------------------------------------ local function repack_tokens() local dtoks,dinfos,dtoklns={},{},{} local j=1 for i=1,#stoks do local tok=stoks[i] if tok~=""then dtoks[j],dinfos[j],dtoklns[j]=tok,sinfos[i],stoklns[i] j=j+1 end end stoks,sinfos,stoklns=dtoks,dinfos,dtoklns end ------------------------------------------------------------------------ -- number optimization -- * optimization using string formatting functions is one way of doing -- this, but here, we consider all cases and handle them separately -- (possibly an idiotic approach...) -- * scientific notation being generated is not in canonical form, this -- may or may not be a bad thing -- * note: intermediate portions need to fit into a normal number range -- * optimizations can be divided based on number patterns: -- * hexadecimal: -- (1) no need to remove leading zeros, just skip to (2) -- (2) convert to integer if size equal or smaller -- * change if equal size -> lose the 'x' to reduce entropy -- (3) number is then processed as an integer -- (4) note: does not make 0[xX] consistent -- * integer: -- (1) note: includes anything with trailing ".", ".0", ... -- (2) remove useless fractional part, if present, e.g. 123.000 -- (3) remove leading zeros, e.g. 000123 -- (4) switch to scientific if shorter, e.g. 123000 -> 123e3 -- * with fraction: -- (1) split into digits dot digits -- (2) if no integer portion, take as zero (can omit later) -- (3) handle degenerate .000 case, after which the fractional part -- must be non-zero (if zero, it's matched as an integer) -- (4) remove trailing zeros for fractional portion -- (5) p.q where p > 0 and q > 0 cannot be shortened any more -- (6) otherwise p == 0 and the form is .q, e.g. .000123 -- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6 -- * scientific: -- (1) split into (digits dot digits) [eE] ([+-] digits) -- (2) if significand has ".", shift it out so it becomes an integer -- (3) if significand is zero, just use zero -- (4) remove leading zeros for significand -- (5) shift out trailing zeros for significand -- (6) examine exponent and determine which format is best: -- integer, with fraction, scientific ------------------------------------------------------------------------ local function do_number(i) local before=sinfos[i]-- 'before' local z=before-- working representation local y-- 'after', if better -------------------------------------------------------------------- if match(z,"^0[xX]")then-- hexadecimal number local v=base.tostring(base.tonumber(z)) if#v<=#z then z=v-- change to integer, AND continue else return-- no change; stick to hex end end -------------------------------------------------------------------- if match(z,"^%d+%.?0*$")then-- integer or has useless frac z=match(z,"^(%d+)%.?0*$")-- int portion only if z+0>0 then z=match(z,"^0*([1-9]%d*)$")-- remove leading zeros local v=#match(z,"0*$") local nv=base.tostring(v) if v>#nv+1 then-- scientific is shorter z=sub(z,1,#z-v).."e"..nv end y=z else y="0"-- basic zero end -------------------------------------------------------------------- elseif not match(z,"[eE]")then-- number with fraction part local p,q=match(z,"^(%d*)%.(%d+)$")-- split if p==""then p=0 end-- int part zero if q+0==0 and p==0 then y="0"-- degenerate .000 case else -- now, q > 0 holds and p is a number local v=#match(q,"0*$")-- remove trailing zeros if v>0 then q=sub(q,1,#q-v) end -- if p > 0, nothing else we can do to simplify p.q case if p+0>0 then y=p.."."..q else y="."..q-- tentative, e.g. .000123 local v=#match(q,"^0*")-- # leading spaces local w=#q-v-- # significant digits local nv=base.tostring(#q) -- e.g. compare 123e-6 versus .000123 if w+2+#nv<1+#q then y=sub(q,-w).."e-"..nv end end end -------------------------------------------------------------------- else-- scientific number local sig,ex=match(z,"^([^eE]+)[eE]([%+%-]?%d+)$") ex=base.tonumber(ex) -- if got ".", shift out fractional portion of significand local p,q=match(sig,"^(%d*)%.(%d*)$") if p then ex=ex-#q sig=p..q end if sig+0==0 then y="0"-- basic zero else local v=#match(sig,"^0*")-- remove leading zeros sig=sub(sig,v+1) v=#match(sig,"0*$")-- shift out trailing zeros if v>0 then sig=sub(sig,1,#sig-v) ex=ex+v end -- examine exponent and determine which format is best local nex=base.tostring(ex) if ex==0 then-- it's just an integer y=sig elseif ex>0 and(ex<=1+#nex)then-- a number y=sig..rep("0",ex) elseif ex<0 and(ex>=-#sig)then-- fraction, e.g. .123 v=#sig+ex y=sub(sig,1,v).."."..sub(sig,v+1) elseif ex<0 and(#nex>=-ex-#sig)then -- e.g. compare 1234e-5 versus .01234 -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig -- -> #nex >= -ex - #sig v=-ex-#sig y="."..rep("0",v)..sig else-- non-canonical scientific representation y=sig.."e"..ex end end--if sig end -------------------------------------------------------------------- if y and y~=sinfos[i]then if opt_details then print(" (line "..stoklns[i]..") "..sinfos[i].." -> "..y) opt_details=opt_details+1 end sinfos[i]=y end end ------------------------------------------------------------------------ -- string optimization -- * note: works on well-formed strings only! -- * optimizations on characters can be summarized as follows: -- \a\b\f\n\r\t\v -- no change -- \\ -- no change -- \"\' -- depends on delim, other can remove \ -- \[\] -- remove \ -- \ -- general escape, remove \ -- \ -- normalize the EOL only -- \ddd -- if \a\b\f\n\r\t\v, change to latter -- if other < ascii 32, keep ddd but zap leading zeros -- but cannot have following digits -- if >= ascii 32, translate it into the literal, then also -- do escapes for \\,\",\' cases -- -- no change -- * switch delimiters if string becomes shorter ------------------------------------------------------------------------ local function do_string(I) local info=sinfos[I] local delim=sub(info,1,1)-- delimiter used local ndelim=(delim=="'")and'"'or"'"-- opposite " <-> ' local z=sub(info,2,-2)-- actual string local i=1 local c_delim,c_ndelim=0,0-- "/' counts -------------------------------------------------------------------- while i<=#z do local c=sub(z,i,i) ---------------------------------------------------------------- if c=="\\"then-- escaped stuff local j=i+1 local d=sub(z,j,j) local p=find("abfnrtv\\\n\r\"\'0123456789",d,1,true) ------------------------------------------------------------ if not p then-- \ -- remove \ z=sub(z,1,i-1)..sub(z,j) i=i+1 ------------------------------------------------------------ elseif p<=8 then-- \a\b\f\n\r\t\v\\ i=i+2-- no change ------------------------------------------------------------ elseif p<=10 then-- \ -- normalize EOL local eol=sub(z,j,j+1) if eol=="\r\n"or eol=="\n\r"then z=sub(z,1,i).."\n"..sub(z,j+2) elseif p==10 then-- \r case z=sub(z,1,i).."\n"..sub(z,j+1) end i=i+2 ------------------------------------------------------------ elseif p<=12 then-- \"\' -- remove \ for ndelim if d==delim then c_delim=c_delim+1 i=i+2 else c_ndelim=c_ndelim+1 z=sub(z,1,i-1)..sub(z,j) i=i+1 end ------------------------------------------------------------ else-- \ddd -- various steps local s=match(z,"^(%d%d?%d?)",j) j=i+1+#s-- skip to location local cv=s+0 local cc=string.char(cv) local p=find("\a\b\f\n\r\t\v",cc,1,true) if p then-- special escapes s="\\"..sub("abfnrtv",p,p) elseif cv<32 then-- normalized \ddd if match(sub(z,j,j),"%d")then -- if a digit follows, \ddd cannot be shortened s="\\"..s else s="\\"..cv end elseif cc==delim then-- \ s="\\"..cc c_delim=c_delim+1 elseif cc=="\\"then-- \\ s="\\\\" else-- literal character s=cc if cc==ndelim then c_ndelim=c_ndelim+1 end end z=sub(z,1,i-1)..s..sub(z,j) i=i+#s ------------------------------------------------------------ end--if p ---------------------------------------------------------------- else-- c ~= "\\" -- -- no change i=i+1 if c==ndelim then-- count ndelim, for switching delimiters c_ndelim=c_ndelim+1 end ---------------------------------------------------------------- end--if c end--while -------------------------------------------------------------------- -- switching delimiters, a long-winded derivation: -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes -- simplifying the condition (1)>(2) --> c_delim > c_ndelim if c_delim>c_ndelim then i=1 while i<=#z do local p,q,r=find(z,"([\'\"])",i) if not p then break end if r==delim then-- \ -> z=sub(z,1,p-2)..sub(z,p) i=p else-- r == ndelim -- -> \ z=sub(z,1,p-1).."\\"..sub(z,p) i=p+2 end end--while delim=ndelim-- actually change delimiters end -------------------------------------------------------------------- z=delim..z..delim if z~=sinfos[I]then if opt_details then print(" (line "..stoklns[I]..") "..sinfos[I].." -> "..z) opt_details=opt_details+1 end sinfos[I]=z end end ------------------------------------------------------------------------ -- long string optimization -- * note: warning flagged if trailing whitespace found, not trimmed -- * remove first optional newline -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lstring(I) local info=sinfos[I] local delim1=match(info,"^%[=*%[")-- cut out delimiters local sep=#delim1 local delim2=sub(info,-sep,-1) local z=sub(info,sep+1,-(sep+1))-- lstring without delims local y="" local i=1 -------------------------------------------------------------------- while true do local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i) -- deal with a single line local ln if not p then ln=sub(z,i) elseif p>=i then ln=sub(z,i,p-1) end if ln~=""then -- flag a warning if there are trailing spaces, won't optimize! if match(ln,"%s+$")then warn.LSTRING="trailing whitespace in long string near line "..stoklns[I] end y=y..ln end if not p then-- done if no more EOLs break end -- deal with line endings, normalize them i=p+1 if p then if#s>0 and r~=s then-- skip CRLF or LFCR i=i+1 end -- skip first newline, which can be safely deleted if not(i==1 and i==p)then y=y.."\n" end end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators if sep>=3 then local chk,okay=sep-1 -- loop to test ending delimiter with less of '=' down to zero while chk>=2 do local delim="%]"..rep("=",chk-2).."%]" if not match(y,delim)then okay=chk end chk=chk-1 end if okay then-- change delimiters sep=rep("=",okay-2) delim1,delim2="["..sep.."[","]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I]=delim1..y..delim2 end ------------------------------------------------------------------------ -- long comment optimization -- * note: does not remove first optional newline -- * trim trailing whitespace -- * normalize embedded newlines -- * reduce '=' separators in delimiters if possible ------------------------------------------------------------------------ local function do_lcomment(I) local info=sinfos[I] local delim1=match(info,"^%-%-%[=*%[")-- cut out delimiters local sep=#delim1 local delim2=sub(info,-(sep-2),-1) local z=sub(info,sep+1,-(sep-1))-- comment without delims local y="" local i=1 -------------------------------------------------------------------- while true do local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i) -- deal with a single line, extract and check trailing whitespace local ln if not p then ln=sub(z,i) elseif p>=i then ln=sub(z,i,p-1) end if ln~=""then -- trim trailing whitespace if non-empty line local ws=match(ln,"%s*$") if#ws>0 then ln=sub(ln,1,-(ws+1))end y=y..ln end if not p then-- done if no more EOLs break end -- deal with line endings, normalize them i=p+1 if p then if#s>0 and r~=s then-- skip CRLF or LFCR i=i+1 end y=y.."\n" end end--while -------------------------------------------------------------------- -- handle possible deletion of one or more '=' separators sep=sep-2 if sep>=3 then local chk,okay=sep-1 -- loop to test ending delimiter with less of '=' down to zero while chk>=2 do local delim="%]"..rep("=",chk-2).."%]" if not match(y,delim)then okay=chk end chk=chk-1 end if okay then-- change delimiters sep=rep("=",okay-2) delim1,delim2="--["..sep.."[","]"..sep.."]" end end -------------------------------------------------------------------- sinfos[I]=delim1..y..delim2 end ------------------------------------------------------------------------ -- short comment optimization -- * trim trailing whitespace ------------------------------------------------------------------------ local function do_comment(i) local info=sinfos[i] local ws=match(info,"%s*$")-- just look from end of string if#ws>0 then info=sub(info,1,-(ws+1))-- trim trailing whitespace end sinfos[i]=info end ------------------------------------------------------------------------ -- returns true if string found in long comment -- * this is a feature to keep copyright or license texts ------------------------------------------------------------------------ local function keep_lcomment(opt_keep,info) if not opt_keep then return false end-- option not set local delim1=match(info,"^%-%-%[=*%[")-- cut out delimiters local sep=#delim1 local delim2=sub(info,-sep,-1) local z=sub(info,sep+1,-(sep-1))-- comment without delims if find(z,opt_keep,1,true)then-- try to match return true end end ------------------------------------------------------------------------ -- main entry point -- * currently, lexer processing has 2 passes -- * processing is done on a line-oriented basis, which is easier to -- grok due to the next point... -- * since there are various options that can be enabled or disabled, -- processing is a little messy or convoluted ------------------------------------------------------------------------ function optimize(option,toklist,semlist,toklnlist) -------------------------------------------------------------------- -- set option flags -------------------------------------------------------------------- local opt_comments=option["opt-comments"] local opt_whitespace=option["opt-whitespace"] local opt_emptylines=option["opt-emptylines"] local opt_eols=option["opt-eols"] local opt_strings=option["opt-strings"] local opt_numbers=option["opt-numbers"] local opt_x=option["opt-experimental"] local opt_keep=option.KEEP opt_details=option.DETAILS and 0-- upvalues for details display print=print or base.print if opt_eols then-- forced settings, otherwise won't work properly opt_comments=true opt_whitespace=true opt_emptylines=true elseif opt_x then opt_whitespace=true end -------------------------------------------------------------------- -- variable initialization -------------------------------------------------------------------- stoks,sinfos,stoklns-- set source lists =toklist,semlist,toklnlist local i=1-- token position local tok,info-- current token local prev-- position of last grammar token -- on same line (for TK_SPACE stuff) -------------------------------------------------------------------- -- changes a token, info pair -------------------------------------------------------------------- local function settoken(tok,info,I) I=I or i stoks[I]=tok or"" sinfos[I]=info or"" end -------------------------------------------------------------------- -- experimental optimization for ';' operator -------------------------------------------------------------------- if opt_x then while true do tok,info=stoks[i],sinfos[i] if tok=="TK_EOS"then-- end of stream/pass break elseif tok=="TK_OP"and info==";"then -- ';' operator found, since it is entirely optional, set it -- as a space to let whitespace optimization do the rest settoken("TK_SPACE"," ") end i=i+1 end repack_tokens() end -------------------------------------------------------------------- -- processing loop (PASS 1) -------------------------------------------------------------------- i=1 while true do tok,info=stoks[i],sinfos[i] ---------------------------------------------------------------- local atstart=atlinestart(i)-- set line begin flag if atstart then prev=nil end ---------------------------------------------------------------- if tok=="TK_EOS"then-- end of stream/pass break ---------------------------------------------------------------- elseif tok=="TK_KEYWORD"or-- keywords, identifiers, tok=="TK_NAME"or-- operators tok=="TK_OP"then -- TK_KEYWORD and TK_OP can't be optimized without a big -- optimization framework; it would be more of an optimizing -- compiler, not a source code compressor -- TK_NAME that are locals needs parser to analyze/optimize prev=i ---------------------------------------------------------------- elseif tok=="TK_NUMBER"then-- numbers if opt_numbers then do_number(i)-- optimize end prev=i ---------------------------------------------------------------- elseif tok=="TK_STRING"or-- strings, long strings tok=="TK_LSTRING"then if opt_strings then if tok=="TK_STRING"then do_string(i)-- optimize else do_lstring(i)-- optimize end end prev=i ---------------------------------------------------------------- elseif tok=="TK_COMMENT"then-- short comments if opt_comments then if i==1 and sub(info,1,1)=="#"then -- keep shbang comment, trim whitespace do_comment(i) else -- safe to delete, as a TK_EOL (or TK_EOS) always follows settoken()-- remove entirely end elseif opt_whitespace then-- trim whitespace only do_comment(i) end ---------------------------------------------------------------- elseif tok=="TK_LCOMMENT"then-- long comments if keep_lcomment(opt_keep,info)then ------------------------------------------------------------ -- if --keep, we keep a long comment if is found; -- this is a feature to keep copyright or license texts if opt_whitespace then-- trim whitespace only do_lcomment(i) end prev=i elseif opt_comments then local eols=commenteols(info) ------------------------------------------------------------ -- prepare opt_emptylines case first, if a disposable token -- follows, current one is safe to dump, else keep a space; -- it is implied that the operation is safe for '-', because -- current is a TK_LCOMMENT, and must be separate from a '-' if is_faketoken[stoks[i+1]]then settoken()-- remove entirely tok="" else settoken("TK_SPACE"," ") end ------------------------------------------------------------ -- if there are embedded EOLs to keep and opt_emptylines is -- disabled, then switch the token into one or more EOLs if not opt_emptylines and eols>0 then settoken("TK_EOL",rep("\n",eols)) end ------------------------------------------------------------ -- if optimizing whitespaces, force reinterpretation of the -- token to give a chance for the space to be optimized away if opt_whitespace and tok~=""then i=i-1-- to reinterpret end ------------------------------------------------------------ else-- disabled case if opt_whitespace then-- trim whitespace only do_lcomment(i) end prev=i end ---------------------------------------------------------------- elseif tok=="TK_EOL"then-- line endings if atstart and opt_emptylines then settoken()-- remove entirely elseif info=="\r\n"or info=="\n\r"then -- normalize the rest of the EOLs for CRLF/LFCR only -- (note that TK_LCOMMENT can change into several EOLs) settoken("TK_EOL","\n") end ---------------------------------------------------------------- elseif tok=="TK_SPACE"then-- whitespace if opt_whitespace then if atstart or atlineend(i)then -- delete leading and trailing whitespace settoken()-- remove entirely else ------------------------------------------------------------ -- at this point, since leading whitespace have been removed, -- there should be a either a real token or a TK_LCOMMENT -- prior to hitting this whitespace; the TK_LCOMMENT case -- only happens if opt_comments is disabled; so prev ~= nil local ptok=stoks[prev] if ptok=="TK_LCOMMENT"then -- previous TK_LCOMMENT can abut with anything settoken()-- remove entirely else -- prev must be a grammar token; consecutive TK_SPACE -- tokens is impossible when optimizing whitespace local ntok=stoks[i+1] if is_faketoken[ntok]then -- handle special case where a '-' cannot abut with -- either a short comment or a long comment if(ntok=="TK_COMMENT"or ntok=="TK_LCOMMENT")and ptok=="TK_OP"and sinfos[prev]=="-"then -- keep token else settoken()-- remove entirely end else--is_realtoken -- check a pair of grammar tokens, if can abut, then -- delete space token entirely, otherwise keep one space local s=checkpair(prev,i+1) if s==""then settoken()-- remove entirely else settoken("TK_SPACE"," ") end end end ------------------------------------------------------------ end end ---------------------------------------------------------------- else error("unidentified token encountered") end ---------------------------------------------------------------- i=i+1 end--while repack_tokens() -------------------------------------------------------------------- -- processing loop (PASS 2) -------------------------------------------------------------------- if opt_eols then i=1 -- aggressive EOL removal only works with most non-grammar tokens -- optimized away because it is a rather simple scheme -- basically -- it just checks 'real' token pairs around EOLs if stoks[1]=="TK_COMMENT"then -- first comment still existing must be shbang, skip whole line i=3 end while true do tok,info=stoks[i],sinfos[i] -------------------------------------------------------------- if tok=="TK_EOS"then-- end of stream/pass break -------------------------------------------------------------- elseif tok=="TK_EOL"then-- consider each TK_EOL local t1,t2=stoks[i-1],stoks[i+1] if is_realtoken[t1]and is_realtoken[t2]then-- sanity check local s=checkpair(i-1,i+1) if s==""or t2=="TK_EOS"then settoken()-- remove entirely end end end--if tok -------------------------------------------------------------- i=i+1 end--while repack_tokens() end -------------------------------------------------------------------- if opt_details and opt_details>0 then print()end-- spacing return stoks,sinfos,stoklns end --end of inserted module end -- preload function for module optparser preload.optparser= function() --start of inserted module module"optparser" local string=base.require"string" local table=base.require"table" ---------------------------------------------------------------------- -- Letter frequencies for reducing symbol entropy (fixed version) -- * Might help a wee bit when the output file is compressed -- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies -- * We use letter frequencies according to a Linotype keyboard, plus -- the underscore, and both lower case and upper case letters. -- * The arrangement below (LC, underscore, %d, UC) is arbitrary. -- * This is certainly not optimal, but is quick-and-dirty and the -- process has no significant overhead ---------------------------------------------------------------------- local LETTERS="etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" local ALPHANUM="etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" -- names or identifiers that must be skipped -- * the first two lines are for keywords local SKIP_NAME={} for v in string.gmatch([[ and break do else elseif end false for function if in local nil not or repeat return then true until while self]],"%S+")do SKIP_NAME[v]=true end ------------------------------------------------------------------------ -- variables and data structures ------------------------------------------------------------------------ local toklist,seminfolist,-- token lists (lexer output) tokpar,seminfopar,xrefpar,-- token lists (parser output) globalinfo,localinfo,-- variable information tables statinfo,-- statment type table globaluniq,localuniq,-- unique name tables var_new,-- index of new variable names varlist-- list of output variables ---------------------------------------------------------------------- -- preprocess information table to get lists of unique names ---------------------------------------------------------------------- local function preprocess(infotable) local uniqtable={} for i=1,#infotable do-- enumerate info table local obj=infotable[i] local name=obj.name -------------------------------------------------------------------- if not uniqtable[name]then-- not found, start an entry uniqtable[name]={ decl=0,token=0,size=0, } end -------------------------------------------------------------------- local uniq=uniqtable[name]-- count declarations, tokens, size uniq.decl=uniq.decl+1 local xref=obj.xref local xcount=#xref uniq.token=uniq.token+xcount uniq.size=uniq.size+xcount*#name -------------------------------------------------------------------- if obj.decl then-- if local table, create first,last pairs obj.id=i obj.xcount=xcount if xcount>1 then-- if ==1, means local never accessed obj.first=xref[2] obj.last=xref[xcount] end -------------------------------------------------------------------- else-- if global table, add a back ref uniq.id=i end -------------------------------------------------------------------- end--for return uniqtable end ---------------------------------------------------------------------- -- calculate actual symbol frequencies, in order to reduce entropy -- * this may help further reduce the size of compressed sources -- * note that since parsing optimizations is put before lexing -- optimizations, the frequency table is not exact! -- * yes, this will miss --keep block comments too... ---------------------------------------------------------------------- local function recalc_for_entropy(option) local byte=string.byte local char=string.char -- table of token classes to accept in calculating symbol frequency local ACCEPT={ TK_KEYWORD=true,TK_NAME=true,TK_NUMBER=true, TK_STRING=true,TK_LSTRING=true, } if not option["opt-comments"]then ACCEPT.TK_COMMENT=true ACCEPT.TK_LCOMMENT=true end -------------------------------------------------------------------- -- create a new table and remove any original locals by filtering -------------------------------------------------------------------- local filtered={} for i=1,#toklist do filtered[i]=seminfolist[i] end for i=1,#localinfo do-- enumerate local info table local obj=localinfo[i] local xref=obj.xref for j=1,obj.xcount do local p=xref[j] filtered[p]=""-- remove locals end end -------------------------------------------------------------------- local freq={}-- reset symbol frequency table for i=0,255 do freq[i]=0 end for i=1,#toklist do-- gather symbol frequency local tok,info=toklist[i],filtered[i] if ACCEPT[tok]then for j=1,#info do local c=byte(info,j) freq[c]=freq[c]+1 end end--if end--for -------------------------------------------------------------------- -- function to re-sort symbols according to actual frequencies -------------------------------------------------------------------- local function resort(symbols) local symlist={} for i=1,#symbols do-- prepare table to sort local c=byte(symbols,i) symlist[i]={c=c,freq=freq[c],} end table.sort(symlist,-- sort selected symbols function(v1,v2) return v1.freq>v2.freq end ) local charlist={}-- reconstitute the string for i=1,#symlist do charlist[i]=char(symlist[i].c) end return table.concat(charlist) end -------------------------------------------------------------------- LETTERS=resort(LETTERS)-- change letter arrangement ALPHANUM=resort(ALPHANUM) end ---------------------------------------------------------------------- -- returns a string containing a new local variable name to use, and -- a flag indicating whether it collides with a global variable -- * trapping keywords and other names like 'self' is done elsewhere ---------------------------------------------------------------------- local function new_var_name() local var local cletters,calphanum=#LETTERS,#ALPHANUM local v=var_new if vv local n=v%cletters-- left side cycles faster v=(v-n)/cletters-- do first char first n=n+1 var=string.sub(LETTERS,n,n) while sz>1 do local m=v%calphanum v=(v-m)/calphanum m=m+1 var=var..string.sub(ALPHANUM,m,m) sz=sz-1 end end var_new=var_new+1 return var,globaluniq[var]~=nil end ---------------------------------------------------------------------- -- calculate and print some statistics -- * probably better in main source, put here for now ---------------------------------------------------------------------- local function stats_summary(globaluniq,localuniq,afteruniq,option) local print=print or base.print local fmt=string.format local opt_details=option.DETAILS if option.QUIET then return end local uniq_g,uniq_li,uniq_lo,uniq_ti,uniq_to,-- stats needed decl_g,decl_li,decl_lo,decl_ti,decl_to, token_g,token_li,token_lo,token_ti,token_to, size_g,size_li,size_lo,size_ti,size_to =0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0 local function avg(c,l)-- safe average function if c==0 then return 0 end return l/c end -------------------------------------------------------------------- -- collect statistics (note: globals do not have declarations!) -------------------------------------------------------------------- for name,uniq in base.pairs(globaluniq)do uniq_g=uniq_g+1 token_g=token_g+uniq.token size_g=size_g+uniq.size end for name,uniq in base.pairs(localuniq)do uniq_li=uniq_li+1 decl_li=decl_li+uniq.decl token_li=token_li+uniq.token size_li=size_li+uniq.size end for name,uniq in base.pairs(afteruniq)do uniq_lo=uniq_lo+1 decl_lo=decl_lo+uniq.decl token_lo=token_lo+uniq.token size_lo=size_lo+uniq.size end uniq_ti=uniq_g+uniq_li decl_ti=decl_g+decl_li token_ti=token_g+token_li size_ti=size_g+size_li uniq_to=uniq_g+uniq_lo decl_to=decl_g+decl_lo token_to=token_g+token_lo size_to=size_g+size_lo -------------------------------------------------------------------- -- detailed stats: global list -------------------------------------------------------------------- if opt_details then local sorted={}-- sort table of unique global names by size for name,uniq in base.pairs(globaluniq)do uniq.name=name sorted[#sorted+1]=uniq end table.sort(sorted, function(v1,v2) return v1.size>v2.size end ) local tabf1,tabf2="%8s%8s%10s %s","%8d%8d%10.2f %s" local hl=string.rep("-",44) print("*** global variable list (sorted by size) ***\n"..hl) print(fmt(tabf1,"Token","Input","Input","Global")) print(fmt(tabf1,"Count","Bytes","Average","Name")) print(hl) for i=1,#sorted do local uniq=sorted[i] print(fmt(tabf2,uniq.token,uniq.size,avg(uniq.token,uniq.size),uniq.name)) end print(hl) print(fmt(tabf2,token_g,size_g,avg(token_g,size_g),"TOTAL")) print(hl.."\n") -------------------------------------------------------------------- -- detailed stats: local list -------------------------------------------------------------------- local tabf1,tabf2="%8s%8s%8s%10s%8s%10s %s","%8d%8d%8d%10.2f%8d%10.2f %s" local hl=string.rep("-",70) print("*** local variable list (sorted by allocation order) ***\n"..hl) print(fmt(tabf1,"Decl.","Token","Input","Input","Output","Output","Global")) print(fmt(tabf1,"Count","Count","Bytes","Average","Bytes","Average","Name")) print(hl) for i=1,#varlist do-- iterate according to order assigned local name=varlist[i] local uniq=afteruniq[name] local old_t,old_s=0,0 for j=1,#localinfo do-- find corresponding old names and calculate local obj=localinfo[j] if obj.name==name then old_t=old_t+obj.xcount old_s=old_s+obj.xcount*#obj.oldname end end print(fmt(tabf2,uniq.decl,uniq.token,old_s,avg(old_t,old_s), uniq.size,avg(uniq.token,uniq.size),name)) end print(hl) print(fmt(tabf2,decl_lo,token_lo,size_li,avg(token_li,size_li), size_lo,avg(token_lo,size_lo),"TOTAL")) print(hl.."\n") end--if opt_details -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local tabf1,tabf2="%-16s%8s%8s%8s%8s%10s","%-16s%8d%8d%8d%8d%10.2f" local hl=string.rep("-",58) print("*** local variable optimization summary ***\n"..hl) print(fmt(tabf1,"Variable","Unique","Decl.","Token","Size","Average")) print(fmt(tabf1,"Types","Names","Count","Count","Bytes","Bytes")) print(hl) print(fmt(tabf2,"Global",uniq_g,decl_g,token_g,size_g,avg(token_g,size_g))) print(hl) print(fmt(tabf2,"Local (in)",uniq_li,decl_li,token_li,size_li,avg(token_li,size_li))) print(fmt(tabf2,"TOTAL (in)",uniq_ti,decl_ti,token_ti,size_ti,avg(token_ti,size_ti))) print(hl) print(fmt(tabf2,"Local (out)",uniq_lo,decl_lo,token_lo,size_lo,avg(token_lo,size_lo))) print(fmt(tabf2,"TOTAL (out)",uniq_to,decl_to,token_to,size_to,avg(token_to,size_to))) print(hl.."\n") end ---------------------------------------------------------------------- -- experimental optimization for f("string") statements -- * safe to delete parentheses without adding whitespace, as both -- kinds of strings can abut with anything else ---------------------------------------------------------------------- local function optimize_func1() ------------------------------------------------------------------ local function is_strcall(j)-- find f("string") pattern local t1=tokpar[j+1]or"" local t2=tokpar[j+2]or"" local t3=tokpar[j+3]or"" if t1=="("and t2==""and t3==")"then return true end end ------------------------------------------------------------------ local del_list={}-- scan for function pattern, local i=1-- tokens to be deleted are marked while i<=#tokpar do local id=statinfo[i] if id=="call"and is_strcall(i)then-- found & mark () del_list[i+1]=true-- '(' del_list[i+3]=true-- ')' i=i+3 end i=i+1 end ------------------------------------------------------------------ -- delete a token and adjust all relevant tables -- * currently invalidates globalinfo and localinfo (not updated), -- so any other optimization is done after processing locals -- (of course, we can also lex the source data again...) -- * faster one-pass token deletion ------------------------------------------------------------------ local i,dst,idend=1,1,#tokpar local del_list2={} while dst<=idend do-- process parser tables if del_list[i]then-- found a token to delete? del_list2[xrefpar[i]]=true i=i+1 end if i>dst then if i<=idend then-- shift table items lower tokpar[dst]=tokpar[i] seminfopar[dst]=seminfopar[i] xrefpar[dst]=xrefpar[i]-(i-dst) statinfo[dst]=statinfo[i] else-- nil out excess entries tokpar[dst]=nil seminfopar[dst]=nil xrefpar[dst]=nil statinfo[dst]=nil end end i=i+1 dst=dst+1 end local i,dst,idend=1,1,#toklist while dst<=idend do-- process lexer tables if del_list2[i]then-- found a token to delete? i=i+1 end if i>dst then if i<=idend then-- shift table items lower toklist[dst]=toklist[i] seminfolist[dst]=seminfolist[i] else-- nil out excess entries toklist[dst]=nil seminfolist[dst]=nil end end i=i+1 dst=dst+1 end end ---------------------------------------------------------------------- -- local variable optimization ---------------------------------------------------------------------- local function optimize_locals(option) var_new=0-- reset variable name allocator varlist={} ------------------------------------------------------------------ -- preprocess global/local tables, handle entropy reduction ------------------------------------------------------------------ globaluniq=preprocess(globalinfo) localuniq=preprocess(localinfo) if option["opt-entropy"]then-- for entropy improvement recalc_for_entropy(option) end ------------------------------------------------------------------ -- build initial declared object table, then sort according to -- token count, this might help assign more tokens to more common -- variable names such as 'e' thus possibly reducing entropy -- * an object knows its localinfo index via its 'id' field -- * special handling for "self" special local (parameter) here ------------------------------------------------------------------ local object={} for i=1,#localinfo do object[i]=localinfo[i] end table.sort(object,-- sort largest first function(v1,v2) return v1.xcount>v2.xcount end ) ------------------------------------------------------------------ -- the special "self" function parameters must be preserved -- * the allocator below will never use "self", so it is safe to -- keep those implicit declarations as-is ------------------------------------------------------------------ local temp,j,gotself={},1,false for i=1,#object do local obj=object[i] if not obj.isself then temp[j]=obj j=j+1 else gotself=true end end object=temp ------------------------------------------------------------------ -- a simple first-come first-served heuristic name allocator, -- note that this is in no way optimal... -- * each object is a local variable declaration plus existence -- * the aim is to assign short names to as many tokens as possible, -- so the following tries to maximize name reuse -- * note that we preserve sort order ------------------------------------------------------------------ local nobject=#object while nobject>0 do local varname,gcollide repeat varname,gcollide=new_var_name()-- collect a variable name until not SKIP_NAME[varname]-- skip all special names varlist[#varlist+1]=varname-- keep a list local oleft=nobject ------------------------------------------------------------------ -- if variable name collides with an existing global, the name -- cannot be used by a local when the name is accessed as a global -- during which the local is alive (between 'act' to 'rem'), so -- we drop objects that collides with the corresponding global ------------------------------------------------------------------ if gcollide then -- find the xref table of the global local gref=globalinfo[globaluniq[varname].id].xref local ngref=#gref -- enumerate for all current objects; all are valid at this point for i=1,nobject do local obj=object[i] local act,rem=obj.act,obj.rem-- 'live' range of local -- if rem < 0, it is a -id to a local that had the same name -- so follow rem to extend it; does this make sense? while rem<0 do rem=localinfo[-rem].rem end local drop for j=1,ngref do local p=gref[j] if p>=act and p<=rem then drop=true end-- in range? end if drop then obj.skip=true oleft=oleft-1 end end--for end--if gcollide ------------------------------------------------------------------ -- now the first unassigned local (since it's sorted) will be the -- one with the most tokens to rename, so we set this one and then -- eliminate all others that collides, then any locals that left -- can then reuse the same variable name; this is repeated until -- all local declaration that can use this name is assigned -- * the criteria for local-local reuse/collision is: -- A is the local with a name already assigned -- B is the unassigned local under consideration -- => anytime A is accessed, it cannot be when B is 'live' -- => to speed up things, we have first/last accesses noted ------------------------------------------------------------------ while oleft>0 do local i=1 while object[i].skip do-- scan for first object i=i+1 end ------------------------------------------------------------------ -- first object is free for assignment of the variable name -- [first,last] gives the access range for collision checking ------------------------------------------------------------------ oleft=oleft-1 local obja=object[i] i=i+1 obja.newname=varname obja.skip=true obja.done=true local first,last=obja.first,obja.last local xref=obja.xref ------------------------------------------------------------------ -- then, scan all the rest and drop those colliding -- if A was never accessed then it'll never collide with anything -- otherwise trivial skip if: -- * B was activated after A's last access (last < act) -- * B was removed before A's first access (first > rem) -- if not, see detailed skip below... ------------------------------------------------------------------ if first and oleft>0 then-- must have at least 1 access local scanleft=oleft while scanleft>0 do while object[i].skip do-- next valid object i=i+1 end scanleft=scanleft-1 local objb=object[i] i=i+1 local act,rem=objb.act,objb.rem-- live range of B -- if rem < 0, extend range of rem thru' following local while rem<0 do rem=localinfo[-rem].rem end -------------------------------------------------------- if not(lastrem)then-- possible collision -------------------------------------------------------- -- B is activated later than A or at the same statement, -- this means for no collision, A cannot be accessed when B -- is alive, since B overrides A (or is a peer) -------------------------------------------------------- if act>=obja.act then for j=1,obja.xcount do-- ... then check every access local p=xref[j] if p>=act and p<=rem then-- A accessed when B live! oleft=oleft-1 objb.skip=true break end end--for -------------------------------------------------------- -- A is activated later than B, this means for no collision, -- A's access is okay since it overrides B, but B's last -- access need to be earlier than A's activation time -------------------------------------------------------- else if objb.last and objb.last>=obja.act then oleft=oleft-1 objb.skip=true end end end -------------------------------------------------------- if oleft==0 then break end end end--if first ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all possible locals to one variable name, the -- unassigned locals/objects have the skip field reset and the table -- is compacted, to hopefully reduce iteration time ------------------------------------------------------------------ local temp,j={},1 for i=1,nobject do local obj=object[i] if not obj.done then obj.skip=false temp[j]=obj j=j+1 end end object=temp-- new compacted object table nobject=#object-- objects left to process ------------------------------------------------------------------ end--while ------------------------------------------------------------------ -- after assigning all locals with new variable names, we can -- patch in the new names, and reprocess to get 'after' stats ------------------------------------------------------------------ for i=1,#localinfo do-- enumerate all locals local obj=localinfo[i] local xref=obj.xref if obj.newname then-- if got new name, patch it in for j=1,obj.xcount do local p=xref[j]-- xrefs indexes the token list seminfolist[p]=obj.newname end obj.name,obj.oldname-- adjust names =obj.newname,obj.name else obj.oldname=obj.name-- for cases like 'self' end end ------------------------------------------------------------------ -- deal with statistics output ------------------------------------------------------------------ if gotself then-- add 'self' to end of list varlist[#varlist+1]="self" end local afteruniq=preprocess(localinfo) stats_summary(globaluniq,localuniq,afteruniq,option) end ---------------------------------------------------------------------- -- main entry point ---------------------------------------------------------------------- function optimize(option,_toklist,_seminfolist,xinfo) -- set tables toklist,seminfolist-- from lexer =_toklist,_seminfolist tokpar,seminfopar,xrefpar-- from parser =xinfo.toklist,xinfo.seminfolist,xinfo.xreflist globalinfo,localinfo,statinfo-- from parser =xinfo.globalinfo,xinfo.localinfo,xinfo.statinfo ------------------------------------------------------------------ -- optimize locals ------------------------------------------------------------------ if option["opt-locals"]then optimize_locals(option) end ------------------------------------------------------------------ -- other optimizations ------------------------------------------------------------------ if option["opt-experimental"]then-- experimental optimize_func1() -- WARNING globalinfo and localinfo now invalidated! end end --end of inserted module end -- preload function for module equiv preload.equiv= function() --start of inserted module module"equiv" local string=base.require"string" local loadstring=base.loadstring local sub=string.sub local match=string.match local dump=string.dump local byte=string.byte --[[-------------------------------------------------------------------- -- variable and data initialization ----------------------------------------------------------------------]] local is_realtoken={-- significant (grammar) tokens TK_KEYWORD=true, TK_NAME=true, TK_NUMBER=true, TK_STRING=true, TK_LSTRING=true, TK_OP=true, TK_EOS=true, } local option,llex,warn --[[-------------------------------------------------------------------- -- functions ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- initialization function ------------------------------------------------------------------------ function init(_option,_llex,_warn) option=_option llex=_llex warn=_warn end ------------------------------------------------------------------------ -- function to build lists containing a 'normal' lexer stream ------------------------------------------------------------------------ local function build_stream(s) llex.init(s) llex.llex() local stok,sseminfo-- source list (with whitespace elements) =llex.tok,llex.seminfo local tok,seminfo-- processed list (real elements only) ={},{} for i=1,#stok do local t=stok[i] if is_realtoken[t]then tok[#tok+1]=t seminfo[#seminfo+1]=sseminfo[i] end end--for return tok,seminfo end ------------------------------------------------------------------------ -- test source (lexer stream) equivalence ------------------------------------------------------------------------ function source(z,dat) -------------------------------------------------------------------- -- function to return a dumped string for seminfo compares -------------------------------------------------------------------- local function dumpsem(s) local sf=loadstring("return "..s,"z") if sf then return dump(sf) end end -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("SRCEQUIV: "..msg)end warn.SRC_EQUIV=true end -------------------------------------------------------------------- -- get lexer streams for both source strings, compare -------------------------------------------------------------------- local tok1,seminfo1=build_stream(z)-- original local tok2,seminfo2=build_stream(dat)-- compressed -------------------------------------------------------------------- -- compare shbang lines ignoring EOL -------------------------------------------------------------------- local sh1=match(z,"^(#[^\r\n]*)") local sh2=match(dat,"^(#[^\r\n]*)") if sh1 or sh2 then if not sh1 or not sh2 or sh1~=sh2 then bork("shbang lines different") end end -------------------------------------------------------------------- -- compare by simple count -------------------------------------------------------------------- if#tok1~=#tok2 then bork("count "..#tok1.." "..#tok2) return end -------------------------------------------------------------------- -- compare each element the best we can -------------------------------------------------------------------- for i=1,#tok1 do local t1,t2=tok1[i],tok2[i] local s1,s2=seminfo1[i],seminfo2[i] if t1~=t2 then-- by type bork("type ["..i.."] "..t1.." "..t2) break end if t1=="TK_KEYWORD"or t1=="TK_NAME"or t1=="TK_OP"then if t1=="TK_NAME"and option["opt-locals"]then -- can't compare identifiers of locals that are optimized elseif s1~=s2 then-- by semantic info (simple) bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end elseif t1=="TK_EOS"then -- no seminfo to compare else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING" -- compare 'binary' form, so dump a function local s1b,s2b=dumpsem(s1),dumpsem(s2) if not s1b or not s2b or s1b~=s2b then bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2) break end end end--for -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end ------------------------------------------------------------------------ -- test binary chunk equivalence ------------------------------------------------------------------------ function binary(z,dat) local TNIL=0 local TBOOLEAN=1 local TNUMBER=3 local TSTRING=4 -------------------------------------------------------------------- -- mark and optionally report non-equivalence -------------------------------------------------------------------- local function bork(msg) if option.DETAILS then base.print("BINEQUIV: "..msg)end warn.BIN_EQUIV=true end -------------------------------------------------------------------- -- function to remove shbang line so that loadstring runs -------------------------------------------------------------------- local function zap_shbang(s) local shbang=match(s,"^(#[^\r\n]*\r?\n?)") if shbang then-- cut out shbang s=sub(s,#shbang+1) end return s end -------------------------------------------------------------------- -- attempt to compile, then dump to get binary chunk string -------------------------------------------------------------------- local cz=loadstring(zap_shbang(z),"z") if not cz then bork("failed to compile original sources for binary chunk comparison") return end local cdat=loadstring(zap_shbang(dat),"z") if not cdat then bork("failed to compile compressed result for binary chunk comparison") end -- if loadstring() works, dump assuming string.dump() is error-free local c1={i=1,dat=dump(cz)} c1.len=#c1.dat local c2={i=1,dat=dump(cdat)} c2.len=#c2.dat -------------------------------------------------------------------- -- support functions to handle binary chunk reading -------------------------------------------------------------------- local endian, sz_int,sz_sizet,-- sizes of data types sz_inst,sz_number, getint,getsizet -------------------------------------------------------------------- local function ensure(c,sz)-- check if bytes exist if c.i+sz-1>c.len then return end return true end -------------------------------------------------------------------- local function skip(c,sz)-- skip some bytes if not sz then sz=1 end c.i=c.i+sz end -------------------------------------------------------------------- local function getbyte(c)-- return a byte value local i=c.i if i>c.len then return end local d=sub(c.dat,i,i) c.i=i+1 return byte(d) end -------------------------------------------------------------------- local function getint_l(c)-- return an int value (little-endian) local n,scale=0,1 if not ensure(c,sz_int)then return end for j=1,sz_int do n=n+scale*getbyte(c) scale=scale*256 end return n end -------------------------------------------------------------------- local function getint_b(c)-- return an int value (big-endian) local n=0 if not ensure(c,sz_int)then return end for j=1,sz_int do n=n*256+getbyte(c) end return n end -------------------------------------------------------------------- local function getsizet_l(c)-- return a size_t value (little-endian) local n,scale=0,1 if not ensure(c,sz_sizet)then return end for j=1,sz_sizet do n=n+scale*getbyte(c) scale=scale*256 end return n end -------------------------------------------------------------------- local function getsizet_b(c)-- return a size_t value (big-endian) local n=0 if not ensure(c,sz_sizet)then return end for j=1,sz_sizet do n=n*256+getbyte(c) end return n end -------------------------------------------------------------------- local function getblock(c,sz)-- return a block (as a string) local i=c.i local j=i+sz-1 if j>c.len then return end local d=sub(c.dat,i,j) c.i=i+sz return d end -------------------------------------------------------------------- local function getstring(c)-- return a string local n=getsizet(c) if not n then return end if n==0 then return""end return getblock(c,n) end -------------------------------------------------------------------- local function goodbyte(c1,c2)-- compare byte value local b1,b2=getbyte(c1),getbyte(c2) if not b1 or not b2 or b1~=b2 then return end return b1 end -------------------------------------------------------------------- local function badbyte(c1,c2)-- compare byte value local b=goodbyte(c1,c2) if not b then return true end end -------------------------------------------------------------------- local function goodint(c1,c2)-- compare int value local i1,i2=getint(c1),getint(c2) if not i1 or not i2 or i1~=i2 then return end return i1 end -------------------------------------------------------------------- -- recursively-called function to compare function prototypes -------------------------------------------------------------------- local function getfunc(c1,c2) -- source name (ignored) if not getstring(c1)or not getstring(c2)then bork("bad source name");return end -- linedefined (ignored) if not getint(c1)or not getint(c2)then bork("bad linedefined");return end -- lastlinedefined (ignored) if not getint(c1)or not getint(c2)then bork("bad lastlinedefined");return end if not(ensure(c1,4)and ensure(c2,4))then bork("prototype header broken") end -- nups (compared) if badbyte(c1,c2)then bork("bad nups");return end -- numparams (compared) if badbyte(c1,c2)then bork("bad numparams");return end -- is_vararg (compared) if badbyte(c1,c2)then bork("bad is_vararg");return end -- maxstacksize (compared) if badbyte(c1,c2)then bork("bad maxstacksize");return end -- code (compared) local ncode=goodint(c1,c2) if not ncode then bork("bad ncode");return end local code1=getblock(c1,ncode*sz_inst) local code2=getblock(c2,ncode*sz_inst) if not code1 or not code2 or code1~=code2 then bork("bad code block");return end -- constants (compared) local nconst=goodint(c1,c2) if not nconst then bork("bad nconst");return end for i=1,nconst do local ctype=goodbyte(c1,c2) if not ctype then bork("bad const type");return end if ctype==TBOOLEAN then if badbyte(c1,c2)then bork("bad boolean value");return end elseif ctype==TNUMBER then local num1=getblock(c1,sz_number) local num2=getblock(c2,sz_number) if not num1 or not num2 or num1~=num2 then bork("bad number value");return end elseif ctype==TSTRING then local str1=getstring(c1) local str2=getstring(c2) if not str1 or not str2 or str1~=str2 then bork("bad string value");return end end end -- prototypes (compared recursively) local nproto=goodint(c1,c2) if not nproto then bork("bad nproto");return end for i=1,nproto do if not getfunc(c1,c2)then bork("bad function prototype");return end end -- debug information (ignored) -- lineinfo (ignored) local sizelineinfo1=getint(c1) if not sizelineinfo1 then bork("bad sizelineinfo1");return end local sizelineinfo2=getint(c2) if not sizelineinfo2 then bork("bad sizelineinfo2");return end if not getblock(c1,sizelineinfo1*sz_int)then bork("bad lineinfo1");return end if not getblock(c2,sizelineinfo2*sz_int)then bork("bad lineinfo2");return end -- locvars (ignored) local sizelocvars1=getint(c1) if not sizelocvars1 then bork("bad sizelocvars1");return end local sizelocvars2=getint(c2) if not sizelocvars2 then bork("bad sizelocvars2");return end for i=1,sizelocvars1 do if not getstring(c1)or not getint(c1)or not getint(c1)then bork("bad locvars1");return end end for i=1,sizelocvars2 do if not getstring(c2)or not getint(c2)or not getint(c2)then bork("bad locvars2");return end end -- upvalues (ignored) local sizeupvalues1=getint(c1) if not sizeupvalues1 then bork("bad sizeupvalues1");return end local sizeupvalues2=getint(c2) if not sizeupvalues2 then bork("bad sizeupvalues2");return end for i=1,sizeupvalues1 do if not getstring(c1)then bork("bad upvalues1");return end end for i=1,sizeupvalues2 do if not getstring(c2)then bork("bad upvalues2");return end end return true end -------------------------------------------------------------------- -- parse binary chunks to verify equivalence -- * for headers, handle sizes to allow a degree of flexibility -- * assume a valid binary chunk is generated, since it was not -- generated via external means -------------------------------------------------------------------- if not(ensure(c1,12)and ensure(c2,12))then bork("header broken") end skip(c1,6)-- skip signature(4), version, format endian=getbyte(c1)-- 1 = little endian sz_int=getbyte(c1)-- get data type sizes sz_sizet=getbyte(c1) sz_inst=getbyte(c1) sz_number=getbyte(c1) skip(c1)-- skip integral flag skip(c2,12)-- skip other header (assume similar) if endian==1 then-- set for endian sensitive data we need getint=getint_l getsizet=getsizet_l else getint=getint_b getsizet=getsizet_b end getfunc(c1,c2)-- get prototype at root if c1.i~=c1.len+1 then bork("inconsistent binary chunk1");return elseif c2.i~=c2.len+1 then bork("inconsistent binary chunk2");return end -------------------------------------------------------------------- -- successful comparison if end is reached with no borks -------------------------------------------------------------------- end --end of inserted module end -- preload function for module plugin/html preload["plugin/html"]= function() --start of inserted module module"plugin/html" local string=base.require"string" local table=base.require"table" local io=base.require"io" ------------------------------------------------------------------------ -- constants and configuration ------------------------------------------------------------------------ local HTML_EXT=".html" local ENTITIES={ ["&"]="&",["<"]="<",[">"]=">", ["'"]="'",["\""]=""", } -- simple headers and footers local HEADER=[[ %s
]]
local FOOTER=[[
]] -- for more, please see wikimain.css from the Lua wiki site local STYLESHEET=[[ BODY { background: white; color: navy; } pre.code { color: black; } span.comment { color: #00a000; } span.string { color: #009090; } span.keyword { color: black; font-weight: bold; } span.number { color: #993399; } span.operator { } span.name { } span.global { color: #ff0000; font-weight: bold; } span.local { color: #0000ff; font-weight: bold; } ]] ------------------------------------------------------------------------ -- option handling, plays nice with --quiet option ------------------------------------------------------------------------ local option-- local reference to list of options local srcfl,destfl-- filenames local toklist,seminfolist,toklnlist-- token data local function print(...)-- handle quiet option if option.QUIET then return end base.print(...) end ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ function init(_option,_srcfl,_destfl) option=_option srcfl=_srcfl local extb,exte=string.find(srcfl,"%.[^%.%\\%/]*$") local basename,extension=srcfl,"" if extb and extb>1 then basename=string.sub(srcfl,1,extb-1) extension=string.sub(srcfl,extb,exte) end destfl=basename..HTML_EXT if option.OUTPUT_FILE then destfl=option.OUTPUT_FILE end if srcfl==destfl then base.error("output filename identical to input filename") end end ------------------------------------------------------------------------ -- message display, post-load processing ------------------------------------------------------------------------ function post_load(z) print([[ HTML plugin module for LuaSrcDiet ]]) print("Exporting: "..srcfl.." -> "..destfl.."\n") end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(_toklist,_seminfolist,_toklnlist) toklist,seminfolist,toklnlist =_toklist,_seminfolist,_toklnlist end ------------------------------------------------------------------------ -- escape the usual suspects for HTML/XML ------------------------------------------------------------------------ local function do_entities(z) local i=1 while i<=#z do local c=string.sub(z,i,i) local d=ENTITIES[c] if d then c=d z=string.sub(z,1,i-1)..c..string.sub(z,i+1) end i=i+#c end--while return z end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname,dat) local OUTF=io.open(fname,"wb") if not OUTF then base.error("cannot open \""..fname.."\" for writing")end local status=OUTF:write(dat) if not status then base.error("cannot write to \""..fname.."\"")end OUTF:close() end ------------------------------------------------------------------------ -- post-parsing processing, gives globalinfo, localinfo ------------------------------------------------------------------------ function post_parse(globalinfo,localinfo) local html={} local function add(s)-- html helpers html[#html+1]=s end local function span(class,s) add(''..s..'') end ---------------------------------------------------------------------- for i=1,#globalinfo do-- mark global identifiers as TK_GLOBAL local obj=globalinfo[i] local xref=obj.xref for j=1,#xref do local p=xref[j] toklist[p]="TK_GLOBAL" end end--for ---------------------------------------------------------------------- for i=1,#localinfo do-- mark local identifiers as TK_LOCAL local obj=localinfo[i] local xref=obj.xref for j=1,#xref do local p=xref[j] toklist[p]="TK_LOCAL" end end--for ---------------------------------------------------------------------- add(string.format(HEADER,-- header and leading stuff do_entities(srcfl), STYLESHEET)) for i=1,#toklist do-- enumerate token list local tok,info=toklist[i],seminfolist[i] if tok=="TK_KEYWORD"then span("keyword",info) elseif tok=="TK_STRING"or tok=="TK_LSTRING"then span("string",do_entities(info)) elseif tok=="TK_COMMENT"or tok=="TK_LCOMMENT"then span("comment",do_entities(info)) elseif tok=="TK_GLOBAL"then span("global",info) elseif tok=="TK_LOCAL"then span("local",info) elseif tok=="TK_NAME"then span("name",info) elseif tok=="TK_NUMBER"then span("number",info) elseif tok=="TK_OP"then span("operator",do_entities(info)) elseif tok~="TK_EOS"then-- TK_EOL, TK_SPACE add(info) end end--for add(FOOTER) save_file(destfl,table.concat(html)) option.EXIT=true end --end of inserted module end -- preload function for module plugin/sloc preload["plugin/sloc"]= function() --start of inserted module module"plugin/sloc" local string=base.require"string" local table=base.require"table" ------------------------------------------------------------------------ -- initialization ------------------------------------------------------------------------ local option-- local reference to list of options local srcfl-- source file name function init(_option,_srcfl,_destfl) option=_option option.QUIET=true srcfl=_srcfl end ------------------------------------------------------------------------ -- splits a block into a table of lines (minus EOLs) ------------------------------------------------------------------------ local function split(blk) local lines={} local i,nblk=1,#blk while i<=nblk do local p,q,r,s=string.find(blk,"([\r\n])([\r\n]?)",i) if not p then p=nblk+1 end lines[#lines+1]=string.sub(blk,i,p-1) i=p+1 if pp and r~=s then-- handle Lua-style CRLF, LFCR i=i+1 end end return lines end ------------------------------------------------------------------------ -- post-lexing processing, can work on lexer table output ------------------------------------------------------------------------ function post_lex(toklist,seminfolist,toklnlist) local lnow,sloc=0,0 local function chk(ln)-- if a new line, count it as an SLOC if ln>lnow then-- new line # must be > old line # sloc=sloc+1;lnow=ln end end for i=1,#toklist do-- enumerate over all tokens local tok,info,ln =toklist[i],seminfolist[i],toklnlist[i] -------------------------------------------------------------------- if tok=="TK_KEYWORD"or tok=="TK_NAME"or-- significant tok=="TK_NUMBER"or tok=="TK_OP"then chk(ln) -------------------------------------------------------------------- -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop -- is needed in order to mark off lines one-by-one. Since llex.lua -- currently returns the line number of the last part of the string, -- we must subtract in order to get the starting line number. -------------------------------------------------------------------- elseif tok=="TK_STRING"then-- possible multi-line local t=split(info) ln=ln-#t+1 for j=1,#t do chk(ln);ln=ln+1 end -------------------------------------------------------------------- elseif tok=="TK_LSTRING"then-- possible multi-line local t=split(info) ln=ln-#t+1 for j=1,#t do if t[j]~=""then chk(ln)end ln=ln+1 end -------------------------------------------------------------------- -- other tokens are comments or whitespace and are ignored -------------------------------------------------------------------- end end--for base.print(srcfl..": "..sloc)-- display result option.EXIT=true end --end of inserted module end -- support modules local llex=require"llex" local lparser=require"lparser" local optlex=require"optlex" local optparser=require"optparser" local equiv=require"equiv" local plugin --[[-------------------------------------------------------------------- -- messages and textual data ----------------------------------------------------------------------]] local MSG_TITLE=[[ LuaSrcDiet: Puts your Lua 5.1 source code on a diet Version 0.12.1 (20120407) Copyright (c) 2012 Kein-Hong Man The COPYRIGHT file describes the conditions under which this software may be distributed. ]] local MSG_USAGE=[[ usage: LuaSrcDiet [options] [filenames] example: >LuaSrcDiet myscript.lua -o myscript_.lua options: -v, --version prints version information -h, --help prints usage information -o specify file name to write output -s suffix for output files (default '_') --keep keep block comment with inside --plugin run in plugin/ directory - stop handling arguments (optimization levels) --none all optimizations off (normalizes EOLs only) --basic lexer-based optimizations only --maximum maximize reduction of source (informational) --quiet process files quietly --read-only read file and print token stats only --dump-lexer dump raw tokens from lexer to stdout --dump-parser dump variable tracking tables from parser --details extra info (strings, numbers, locals) features (to disable, insert 'no' prefix like --noopt-comments): %s default settings: %s]] ------------------------------------------------------------------------ -- optimization options, for ease of switching on and off -- * positive to enable optimization, negative (no) to disable -- * these options should follow --opt-* and --noopt-* style for now ------------------------------------------------------------------------ local OPTION=[[ --opt-comments,'remove comments and block comments' --opt-whitespace,'remove whitespace excluding EOLs' --opt-emptylines,'remove empty lines' --opt-eols,'all above, plus remove unnecessary EOLs' --opt-strings,'optimize strings and long strings' --opt-numbers,'optimize numbers' --opt-locals,'optimize local variable names' --opt-entropy,'tries to reduce symbol entropy of locals' --opt-srcequiv,'insist on source (lexer stream) equivalence' --opt-binequiv,'insist on binary chunk equivalence' --opt-experimental,'apply experimental optimizations' ]] -- preset configuration local DEFAULT_CONFIG=[[ --opt-comments --opt-whitespace --opt-emptylines --opt-numbers --opt-locals --opt-srcequiv --opt-binequiv ]] -- override configurations -- * MUST explicitly enable/disable everything for -- total option replacement local BASIC_CONFIG=[[ --opt-comments --opt-whitespace --opt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local MAXIMUM_CONFIG=[[ --opt-comments --opt-whitespace --opt-emptylines --opt-eols --opt-strings --opt-numbers --opt-locals --opt-entropy --opt-srcequiv --opt-binequiv ]] local NONE_CONFIG=[[ --noopt-comments --noopt-whitespace --noopt-emptylines --noopt-eols --noopt-strings --noopt-numbers --noopt-locals --noopt-entropy --opt-srcequiv --opt-binequiv ]] local DEFAULT_SUFFIX="_"-- default suffix for file renaming local PLUGIN_SUFFIX="plugin/"-- relative location of plugins --[[-------------------------------------------------------------------- -- startup and initialize option list handling ----------------------------------------------------------------------]] -- simple error message handler; change to error if traceback wanted local function die(msg) print("LuaSrcDiet (error): "..msg);os.exit(1) end --die = error--DEBUG if not match(_VERSION,"5.1",1,1)then-- sanity check die("requires Lua 5.1 to run") end ------------------------------------------------------------------------ -- prepares text for list of optimizations, prepare lookup table ------------------------------------------------------------------------ local MSG_OPTIONS="" do local WIDTH=24 local o={} for op,desc in gmatch(OPTION,"%s*([^,]+),'([^']+)'")do local msg=" "..op msg=msg..string.rep(" ",WIDTH-#msg)..desc.."\n" MSG_OPTIONS=MSG_OPTIONS..msg o[op]=true o["--no"..sub(op,3)]=true end OPTION=o-- replace OPTION with lookup table end MSG_USAGE=string.format(MSG_USAGE,MSG_OPTIONS,DEFAULT_CONFIG) if p_embedded then-- embedded plugins local EMBED_INFO="\nembedded plugins:\n" for i=1,#p_embedded do local p=p_embedded[i] EMBED_INFO=EMBED_INFO.." "..plugin_info[p].."\n" end MSG_USAGE=MSG_USAGE..EMBED_INFO end ------------------------------------------------------------------------ -- global variable initialization, option set handling ------------------------------------------------------------------------ local suffix=DEFAULT_SUFFIX-- file suffix local option={}-- program options local stat_c,stat_l-- statistics tables -- function to set option lookup table based on a text list of options -- note: additional forced settings for --opt-eols is done in optlex.lua local function set_options(CONFIG) for op in gmatch(CONFIG,"(%-%-%S+)")do if sub(op,3,4)=="no"and-- handle negative options OPTION["--"..sub(op,5)]then option[sub(op,5)]=false else option[sub(op,3)]=true end end end --[[-------------------------------------------------------------------- -- support functions ----------------------------------------------------------------------]] -- list of token types, parser-significant types are up to TTYPE_GRAMMAR -- while the rest are not used by parsers; arranged for stats display local TTYPES={ "TK_KEYWORD","TK_NAME","TK_NUMBER",-- grammar "TK_STRING","TK_LSTRING","TK_OP", "TK_EOS", "TK_COMMENT","TK_LCOMMENT",-- non-grammar "TK_EOL","TK_SPACE", } local TTYPE_GRAMMAR=7 local EOLTYPES={-- EOL names for token dump ["\n"]="LF",["\r"]="CR", ["\n\r"]="LFCR",["\r\n"]="CRLF", } ------------------------------------------------------------------------ -- read source code from file ------------------------------------------------------------------------ local function load_file(fname) local INF=io.open(fname,"rb") if not INF then die('cannot open "'..fname..'" for reading')end local dat=INF:read("*a") if not dat then die('cannot read from "'..fname..'"')end INF:close() return dat end ------------------------------------------------------------------------ -- save source code to file ------------------------------------------------------------------------ local function save_file(fname,dat) local OUTF=io.open(fname,"wb") if not OUTF then die('cannot open "'..fname..'" for writing')end local status=OUTF:write(dat) if not status then die('cannot write to "'..fname..'"')end OUTF:close() end ------------------------------------------------------------------------ -- functions to deal with statistics ------------------------------------------------------------------------ -- initialize statistics table local function stat_init() stat_c,stat_l={},{} for i=1,#TTYPES do local ttype=TTYPES[i] stat_c[ttype],stat_l[ttype]=0,0 end end -- add a token to statistics table local function stat_add(tok,seminfo) stat_c[tok]=stat_c[tok]+1 stat_l[tok]=stat_l[tok]+#seminfo end -- do totals for statistics table, return average table local function stat_calc() local function avg(c,l)-- safe average function if c==0 then return 0 end return l/c end local stat_a={} local c,l=0,0 for i=1,TTYPE_GRAMMAR do-- total grammar tokens local ttype=TTYPES[i] c=c+stat_c[ttype];l=l+stat_l[ttype] end stat_c.TOTAL_TOK,stat_l.TOTAL_TOK=c,l stat_a.TOTAL_TOK=avg(c,l) c,l=0,0 for i=1,#TTYPES do-- total all tokens local ttype=TTYPES[i] c=c+stat_c[ttype];l=l+stat_l[ttype] stat_a[ttype]=avg(stat_c[ttype],stat_l[ttype]) end stat_c.TOTAL_ALL,stat_l.TOTAL_ALL=c,l stat_a.TOTAL_ALL=avg(c,l) return stat_a end --[[-------------------------------------------------------------------- -- main tasks ----------------------------------------------------------------------]] ------------------------------------------------------------------------ -- a simple token dumper, minimal translation of seminfo data ------------------------------------------------------------------------ local function dump_tokens(srcfl) -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z=load_file(srcfl) llex.init(z) llex.llex() local toklist,seminfolist=llex.tok,llex.seminfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- for i=1,#toklist do local tok,seminfo=toklist[i],seminfolist[i] if tok=="TK_OP"and string.byte(seminfo)<32 then seminfo="("..string.byte(seminfo)..")" elseif tok=="TK_EOL"then seminfo=EOLTYPES[seminfo] else seminfo="'"..seminfo.."'" end print(tok.." "..seminfo) end--for end ---------------------------------------------------------------------- -- parser dump; dump globalinfo and localinfo tables ---------------------------------------------------------------------- local function dump_parser(srcfl) local print=print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z=load_file(srcfl) llex.init(z) llex.llex() local toklist,seminfolist,toklnlist =llex.tok,llex.seminfo,llex.tokln -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- lparser.init(toklist,seminfolist,toklnlist) local xinfo=lparser.parser() local globalinfo,localinfo= xinfo.globalinfo,xinfo.localinfo -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local hl=string.rep("-",72) print("*** Local/Global Variable Tracker Tables ***") print(hl.."\n GLOBALS\n"..hl) -- global tables have a list of xref numbers only for i=1,#globalinfo do local obj=globalinfo[i] local msg="("..i..") '"..obj.name.."' -> " local xref=obj.xref for j=1,#xref do msg=msg..xref[j].." "end print(msg) end -- local tables have xref numbers and a few other special -- numbers that are specially named: decl (declaration xref), -- act (activation xref), rem (removal xref) print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl) for i=1,#localinfo do local obj=localinfo[i] local msg="("..i..") '"..obj.name.."' decl:"..obj.decl.. " act:"..obj.act.." rem:"..obj.rem if obj.isself then msg=msg.." isself" end msg=msg.." -> " local xref=obj.xref for j=1,#xref do msg=msg..xref[j].." "end print(msg) end print(hl.."\n") end ------------------------------------------------------------------------ -- reads source file(s) and reports some statistics ------------------------------------------------------------------------ local function read_only(srcfl) local print=print -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z=load_file(srcfl) llex.init(z) llex.llex() local toklist,seminfolist=llex.tok,llex.seminfo print(MSG_TITLE) print("Statistics for: "..srcfl.."\n") -------------------------------------------------------------------- -- collect statistics -------------------------------------------------------------------- stat_init() for i=1,#toklist do local tok,seminfo=toklist[i],seminfolist[i] stat_add(tok,seminfo) end--for local stat_a=stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- local fmt=string.format local function figures(tt) return stat_c[tt],stat_l[tt],stat_a[tt] end local tabf1,tabf2="%-16s%8s%8s%10s","%-16s%8d%8d%10.2f" local hl=string.rep("-",42) print(fmt(tabf1,"Lexical","Input","Input","Input")) print(fmt(tabf1,"Elements","Count","Bytes","Average")) print(hl) for i=1,#TTYPES do local ttype=TTYPES[i] print(fmt(tabf2,ttype,figures(ttype))) if ttype=="TK_EOS"then print(hl)end end print(hl) print(fmt(tabf2,"Total Elements",figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2,"Total Tokens",figures("TOTAL_TOK"))) print(hl.."\n") end ------------------------------------------------------------------------ -- process source file(s), write output and reports some statistics ------------------------------------------------------------------------ local function process_file(srcfl,destfl) local function print(...)-- handle quiet option if option.QUIET then return end _G.print(...) end if plugin and plugin.init then-- plugin init option.EXIT=false plugin.init(option,srcfl,destfl) if option.EXIT then return end end print(MSG_TITLE)-- title message -------------------------------------------------------------------- -- load file and process source input into tokens -------------------------------------------------------------------- local z=load_file(srcfl) if plugin and plugin.post_load then-- plugin post-load z=plugin.post_load(z)or z if option.EXIT then return end end llex.init(z) llex.llex() local toklist,seminfolist,toklnlist =llex.tok,llex.seminfo,llex.tokln if plugin and plugin.post_lex then-- plugin post-lex plugin.post_lex(toklist,seminfolist,toklnlist) if option.EXIT then return end end -------------------------------------------------------------------- -- collect 'before' statistics -------------------------------------------------------------------- stat_init() for i=1,#toklist do local tok,seminfo=toklist[i],seminfolist[i] stat_add(tok,seminfo) end--for local stat1_a=stat_calc() local stat1_c,stat1_l=stat_c,stat_l -------------------------------------------------------------------- -- do parser optimization here -------------------------------------------------------------------- optparser.print=print-- hack lparser.init(toklist,seminfolist,toklnlist) local xinfo=lparser.parser() if plugin and plugin.post_parse then-- plugin post-parse plugin.post_parse(xinfo.globalinfo,xinfo.localinfo) if option.EXIT then return end end optparser.optimize(option,toklist,seminfolist,xinfo) if plugin and plugin.post_optparse then-- plugin post-optparse plugin.post_optparse() if option.EXIT then return end end -------------------------------------------------------------------- -- do lexer optimization here, save output file -------------------------------------------------------------------- local warn=optlex.warn-- use this as a general warning lookup optlex.print=print-- hack toklist,seminfolist,toklnlist =optlex.optimize(option,toklist,seminfolist,toklnlist) if plugin and plugin.post_optlex then-- plugin post-optlex plugin.post_optlex(toklist,seminfolist,toklnlist) if option.EXIT then return end end local dat=table.concat(seminfolist) -- depending on options selected, embedded EOLs in long strings and -- long comments may not have been translated to \n, tack a warning if string.find(dat,"\r\n",1,1)or string.find(dat,"\n\r",1,1)then warn.MIXEDEOL=true end -------------------------------------------------------------------- -- test source and binary chunk equivalence -------------------------------------------------------------------- equiv.init(option,llex,warn) equiv.source(z,dat) equiv.binary(z,dat) local smsg="before and after lexer streams are NOT equivalent!" local bmsg="before and after binary chunks are NOT equivalent!" -- for reporting, die if option was selected, else just warn if warn.SRC_EQUIV then if option["opt-srcequiv"]then die(smsg)end else print("*** SRCEQUIV: token streams are sort of equivalent") if option["opt-locals"]then print("(but no identifier comparisons since --opt-locals enabled)") end print() end if warn.BIN_EQUIV then if option["opt-binequiv"]then die(bmsg)end else print("*** BINEQUIV: binary chunks are sort of equivalent") print() end -------------------------------------------------------------------- -- save optimized source stream to output file -------------------------------------------------------------------- save_file(destfl,dat) -------------------------------------------------------------------- -- collect 'after' statistics -------------------------------------------------------------------- stat_init() for i=1,#toklist do local tok,seminfo=toklist[i],seminfolist[i] stat_add(tok,seminfo) end--for local stat_a=stat_calc() -------------------------------------------------------------------- -- display output -------------------------------------------------------------------- print("Statistics for: "..srcfl.." -> "..destfl.."\n") local fmt=string.format local function figures(tt) return stat1_c[tt],stat1_l[tt],stat1_a[tt], stat_c[tt],stat_l[tt],stat_a[tt] end local tabf1,tabf2="%-16s%8s%8s%10s%8s%8s%10s", "%-16s%8d%8d%10.2f%8d%8d%10.2f" local hl=string.rep("-",68) print("*** lexer-based optimizations summary ***\n"..hl) print(fmt(tabf1,"Lexical", "Input","Input","Input", "Output","Output","Output")) print(fmt(tabf1,"Elements", "Count","Bytes","Average", "Count","Bytes","Average")) print(hl) for i=1,#TTYPES do local ttype=TTYPES[i] print(fmt(tabf2,ttype,figures(ttype))) if ttype=="TK_EOS"then print(hl)end end print(hl) print(fmt(tabf2,"Total Elements",figures("TOTAL_ALL"))) print(hl) print(fmt(tabf2,"Total Tokens",figures("TOTAL_TOK"))) print(hl) -------------------------------------------------------------------- -- report warning flags from optimizing process -------------------------------------------------------------------- if warn.LSTRING then print("* WARNING: "..warn.LSTRING) elseif warn.MIXEDEOL then print("* WARNING: ".."output still contains some CRLF or LFCR line endings") elseif warn.SRC_EQUIV then print("* WARNING: "..smsg) elseif warn.BIN_EQUIV then print("* WARNING: "..bmsg) end print() end --[[-------------------------------------------------------------------- -- main functions ----------------------------------------------------------------------]] local arg={...}-- program arguments local fspec={} set_options(DEFAULT_CONFIG)-- set to default options at beginning ------------------------------------------------------------------------ -- per-file handling, ship off to tasks ------------------------------------------------------------------------ local function do_files(fspec) for i=1,#fspec do local srcfl=fspec[i] local destfl ------------------------------------------------------------------ -- find and replace extension for filenames ------------------------------------------------------------------ local extb,exte=string.find(srcfl,"%.[^%.%\\%/]*$") local basename,extension=srcfl,"" if extb and extb>1 then basename=sub(srcfl,1,extb-1) extension=sub(srcfl,extb,exte) end destfl=basename..suffix..extension if#fspec==1 and option.OUTPUT_FILE then destfl=option.OUTPUT_FILE end if srcfl==destfl then die("output filename identical to input filename") end ------------------------------------------------------------------ -- perform requested operations ------------------------------------------------------------------ if option.DUMP_LEXER then dump_tokens(srcfl) elseif option.DUMP_PARSER then dump_parser(srcfl) elseif option.READ_ONLY then read_only(srcfl) else process_file(srcfl,destfl) end end--for end ------------------------------------------------------------------------ -- main function (entry point is after this definition) ------------------------------------------------------------------------ local function main() local argn,i=#arg,1 if argn==0 then option.HELP=true end -------------------------------------------------------------------- -- handle arguments -------------------------------------------------------------------- while i<=argn do local o,p=arg[i],arg[i+1] local dash=match(o,"^%-%-?") if dash=="-"then-- single-dash options if o=="-h"then option.HELP=true;break elseif o=="-v"then option.VERSION=true;break elseif o=="-s"then if not p then die("-s option needs suffix specification")end suffix=p i=i+1 elseif o=="-o"then if not p then die("-o option needs a file name")end option.OUTPUT_FILE=p i=i+1 elseif o=="-"then break-- ignore rest of args else die("unrecognized option "..o) end elseif dash=="--"then-- double-dash options if o=="--help"then option.HELP=true;break elseif o=="--version"then option.VERSION=true;break elseif o=="--keep"then if not p then die("--keep option needs a string to match for")end option.KEEP=p i=i+1 elseif o=="--plugin"then if not p then die("--plugin option needs a module name")end if option.PLUGIN then die("only one plugin can be specified")end option.PLUGIN=p plugin=require(PLUGIN_SUFFIX..p) i=i+1 elseif o=="--quiet"then option.QUIET=true elseif o=="--read-only"then option.READ_ONLY=true elseif o=="--basic"then set_options(BASIC_CONFIG) elseif o=="--maximum"then set_options(MAXIMUM_CONFIG) elseif o=="--none"then set_options(NONE_CONFIG) elseif o=="--dump-lexer"then option.DUMP_LEXER=true elseif o=="--dump-parser"then option.DUMP_PARSER=true elseif o=="--details"then option.DETAILS=true elseif OPTION[o]then-- lookup optimization options set_options(o) else die("unrecognized option "..o) end else fspec[#fspec+1]=o-- potential filename end i=i+1 end--while if option.HELP then print(MSG_TITLE..MSG_USAGE);return true elseif option.VERSION then print(MSG_TITLE);return true end if#fspec>0 then if#fspec>1 and option.OUTPUT_FILE then die("with -o, only one source file can be specified") end do_files(fspec) return true else die("nothing to do!") end end -- entry point -> main() -> do_files() if not main()then die("Please run with option -h or --help for usage information") end -- end of script