-- -- lolcode.lpeg is a lolcode-to-lua translator written in Lua using the -- LPeg pattern-matching library by Roberto Ierusalimschy. -- -- More information on Lua is available at http://www.lua.org/ and on -- LPeg at http://www.inf.puc-rio.br/~roberto/lpeg.html -- -- Information on the PEG - Parsing Expression Grammar on which LPeg is -- based can be found on Wikipedia: -- http://en.wikipedia.org/wiki/Parsing_expression_grammar -- -- Based on the 1.0 recommendation: -- http://lolcode-ctools.googlecode.com/svn/doc/spec/lolcode-v1.0-rec.txt -- -- Not implemented: -- -the [OUTTA ] clause on GIMMEH :TODO: -- -bitwise operators (AND and OR are logical only) :TODO: -- -XOR (either logical or bitwise) :TODO: -- -CANHAS "file" does a dofile, so it expects Lua code, not LOLCode :TODO: -- -- Implemented: -- +everything else in 1.0 -- +BTW comment syntax (translates to Lua comments) -- +GTFO loop break -- +translator errors - basically there is only one: DO NOT WANT linenumber -- + UP, NERF, TIEMZ, and OVAR are alternative assignment ops to UPZ, NERFZ, TIEMZD, and OVARZ -- (they work in prefix form as assigment ops, and infix as plain operators) -- + YA RLY and YARLY are both acceptable, as are NO WAI and NOWAI, O RLY? and ORLY?, YR and UR -- + the ITZ clause of I HAS A is implemented -- +lots of bugs (let me know what they are!) -- -- Which of the examples work? -- - http://lolcode.com/examples/hai-world works -- - http://lolcode.com/examples/gimmeh works -- - http://lolcode.com/examples/count-1 (second version) works -- - http://lolcode.com/examples/little-number works (after I replaced BIGGER with BIGR) -- - http://lolcode.com/examples/caturday does not work - ON not implemented -- - http://lolcode.com/examples/filezorz does not work - PLZ/AWESOME THX/O NOES/OPEN FILE not implemented -- -- http://lolcode-ctools.googlecode.com/svn/trunk/examples/lolcode-v1.0-rec.lol -- -- Copyright © 2007 Brian Casiello, bcasiello@gmail.com -- -- Released under the MIT license -- -- Permission is hereby granted, free of charge, to any person obtaining -- a copy of this software and associated documentation files (the -- "Software"), to deal in the Software without restriction, including -- without limitation the rights to use, copy, modify, merge, publish, -- distribute, sublicense, and/or sell copies of the Software, and to -- permit persons to whom the Software is furnished to do so, subject to -- the following conditions: -- -- The above copyright notice and this permission notice shall be -- included in all copies or substantial portions of the Software. -- -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require "lpeg" -- parser globals -- -- Eew! Globals! Encapsulate into an object! :TODO: local lineno = 1 -- the LOL script's line number, for error reporting local level = 0 -- indentation level, for prettyprinting the lua code local success = false -- pessimism - assume failure, updated on a good parse -- utility functions -- -- return a string that indents to the current level function indent() return string.rep("\t", level) end -- helper function for BYES and DIAF, which are very similar function byesdiaf(which, default, code, message) local s = "" if message ~= nil then s = s .. indent().."io.stderr:write(" .. message .. ")\n" end if code == nil or code == "" or code == which then code = default end return s .. (indent().."os.exit(" .. code .. ")\n") end -- helper function for assignment operators function assignop(lvalue, factor, operator) if factor == nil or factor == '' then factor = 1 end return indent() .. lvalue .. " = " .. lvalue .. operator .. factor .. "\n" end -- helper functions to parse single- and double-quoted strings, -- and text-to-the-end-of-the-line, for BTW function parsestring(s, i, q) local l = lpeg.match(q, s, i) if not l then return nil end local p = (1 - q)^0 * q return lpeg.match(p, s, l) end function parsesqstring(s, i) return parsestring(s, i, SQUOTE) end function parsedqstring(s, i) return parsestring(s, i, DQUOTE) end TEXT = lpeg.P(function (s, i) local l, r = string.find(s, "\n", i) if not l then return nil end return r end) -- capture handlers -- -- These functions get called when the parser hits various points in the grammer -- and get passed whatever was captured by that part of the grammar -- for any function foo, look for / foo (slash foo) in the grammar to find where it's called -- initialize a new parse -- called by a match at the beginning of the parse, to initialize state -- line number, indentation level, success flag are reset -- dump the lolcode internal library function startup() lineno = 1 level = 0 success = false return [[ -- the lolcode internal library -- -- calls to these functions are generated by the translator _lolcode_ = {} function _lolcode_.readword() local temp = io.read(1) local s = "" while nil == string.find(temp, "%s") do s = s .. temp temp = io.read(1) end return s end ]] end -- if success is set, we have a good parse -- called by a match at the end of the grammar function result(...) if not success then print("DO NOT WANT line " .. lineno) end -- return everything matched by the grammar return table.concat(arg, "") end -- called for HAI -- not used now since HAI is basically a noise word -- (I don't check for balanced HAI/KTHXBYE) function hai() end -- called for CAN HAS STDIO? -- in case we want to dump an i/o library sometime function stdio() end -- a debugging helper - add / foo in the grammar to dump what gets captured function foo(...) print("["..table.concat(arg, "]\t[").."]") end -- called for a newline function bumpline() lineno = lineno + 1 end -- called for BTW function comment(s) return indent() .. "--" .. s .. "\n" end -- called for GTFO function gtfo() return (indent().."break\n") end -- called for GIMMEH -- :TODO: OUTTA is ignored -- Interesting bit: GIMMEH WORD generates a call to the 'internal library' -- which is dumped by the startup handler function gimmeh(what, var, outa) if what == nil or what == "" then what = "LINE" end if what == "LINE" then return indent() .. var .. " = io.read()\n" elseif what == "LETTAR" then return indent() .. var .. " = io.read(1)\n" else return indent() .. var .. " = _lolcode_.readword()\n" end end -- called for kthx function kthx() level = level - 1 return (indent().."end\n") end -- called for IM IN YR -- loop identifier is ignored -- generates an infinite loop, must be broken with GTFO, DIAF, or BYES function loop() local s = indent() .. "while true do\n" level = level + 1 return s end -- called for KTHXBYE -- if we parse this, we consider it a successful parse -- (even though stuff could follow it in the input file!) function kthxbye() success = 1 return (indent().."os.exit(0)\n") end -- called for DIAF -- default code is 1 function diaf(code, message) return byesdiaf("DIAF", 1, code, message) end -- called for BYES -- default code is 0 function byes(code, message) return byesdiaf("BYES", 0, code, message) end -- called for CAN HAS -- (except for CAN HAS STDIO) -- :TODO: currently does a dofile, so expects lua code -- should perform a parse of a lol file and dump its output inline function canhas(s) return (indent().."dofile(" .. s .. ")\n") end -- called for I HAS A -- if no ITZ clause, variable defaults to 0 function ihasa(s, value) if value == nil then value = 0; end return (indent().."local " .. s .. " = " .. value .. "\n") end -- called for VISIBLE function visible(value, bang) local close = ' .. "\\n")' if bang ~= nil and bang ~= '' then close = ')' end return (indent() .. "io.write("..value..close .. "\n") end -- called for INVISIBLE -- :TODO: refactor with visible() function invisible(value, bang) local close = ' .. "\\n")' if bang ~= nil and bang ~= '' then close = ')' end return (indent() .. "io.stderr:write("..value..close .. "\n") end -- called for UPZ function upz(lvalue, factor) return assignop(lvalue, factor, " + ") end -- called for NERFZ function nerfz(lvalue, factor) return assignop(lvalue, factor, " - ") end -- called for TIEMZD function tiemz(lvalue, factor) return assignop(lvalue, factor, " * ") end -- called for OVARZ function ovarz(lvalue, factor) return assignop(lvalue, factor, " / ") end -- called for IZ function iz() local s = indent() .. "if" level = level + 1 return s end -- called after the expression is parsed for IZ -- (there is no THENZ, but 'then' is a lua keyword) function thenz(...) return (table.concat(arg, " ") .. " then\n") end -- called for NOWAI function nowai() level = level - 1 local s = indent() .. "else\n" level = level + 1 return s end -- called for comparison operators function cmpop(negate, cond) if negate == "NOT" then if cond == ">" then return "<=" elseif cond == "<" then return ">=" else return "~=" end else -- there is no not, negate actually holds the condition return negate end end -- called for various types of expressions function factors(...) return table.concat(arg, " ") end function expr(...) return table.concat(arg, " ") end function cfactors(...) return table.concat(arg, " ") end -- called for l-values function lvalue(l) return l end -- called for LOL function lol(lhs, rhs) local i = 1 local n local s = "" local t = "" n = string.find(lhs, "[", 1, true) if n ~= nil then t = string.sub(lhs, 1, n-1) s = s .. indent() .. "if type("..t..") ~= 'table' then "..t.." = {} end\n" end while true do n = string.find(lhs, "]", i, true) if n == nil then break end i = n + 1 t = string.sub(lhs, 1, n) s = s .. indent() .. "if type("..t..") ~= 'table' then "..t.." = {} end\n" end return s .. indent() .. lhs .. " = " .. rhs .. "\n" end function arrayref(lhs, rhs) return rhs.."["..lhs.."]" end -- Syntactic primitives BANG = lpeg.P"!" BANGS = lpeg.P"!!" DIGIT = lpeg.R"09" DQUOTE = lpeg.P'"' EOL = lpeg.P"\n" / bumpline -- End of line EOS = EOL + lpeg.P"." + lpeg.P"?" -- End of statement LETTER = lpeg.R"az" + lpeg.R"AZ" LPAREN = lpeg.P"(" DECIMAL = lpeg.P"." * DIGIT^1 NUMBER = lpeg.P"-"^-1 * DIGIT^1 * DECIMAL^-1 OWS = lpeg.S" \t"^0 -- Optional whitespace QUERY = lpeg.P"?" RPAREN = lpeg.P")" SQUOTE = lpeg.P"'" VARPUNCT = lpeg.S"_" WS = lpeg.S" \t"^1 -- Mandatory whitepsace -- Variables start with a letter or underbar, and can contain letters, underbars, or digits VARSTART = LETTER + VARPUNCT VARCHAR = VARSTART + NUMBER VAR = VARSTART * VARCHAR^0 -- Keywords A = lpeg.P"A" AND = lpeg.P"AND" BIGR = lpeg.P"BIGR" + lpeg.P"BIGGER" BTW = lpeg.P"BTW" BYES = lpeg.P"BYES" CAN = lpeg.P"CAN" DIAF = lpeg.P"DIAF" GIMMEH = lpeg.P"GIMMEH" GTFO = lpeg.P"GTFO" HAI = lpeg.P"HAI" HAS = lpeg.P"HAS" I = lpeg.P"I" IM = lpeg.P"IM" INVISIBLE = lpeg.P"INVISIBLE" IN = lpeg.P"IN" ITZ = lpeg.P"ITZ" IZ = lpeg.P"IZ" KTHX = lpeg.P"KTHX" KTHXBYE = lpeg.P"KTHXBYE" LETTAR = lpeg.P"LETTAR" LIEK = lpeg.P"LIEK" LINE = lpeg.P"LINE" LOL = lpeg.P"LOL" MAH = lpeg.P"MAH" NERF = lpeg.P"NERF" NERFZ = lpeg.P"NERFZ" + lpeg.P"NERF" NOT = lpeg.P"NOT" N = lpeg.P"N" NOWAI = lpeg.P"NO" * OWS * lpeg.P"WAI" OR = lpeg.P"OR" ORLY = lpeg.P"O" * OWS * lpeg.P"RLY" * QUERY OUTTA = lpeg.P"OUTTA" OVAR = lpeg.P"OVAR" OVARZ = lpeg.P"OVARZ" + lpeg.P"OVAR" R = lpeg.P"R" SMALR = lpeg.P"SMALR" + lpeg.P"SMALLER" STDIO = lpeg.P"STDIO" THAN = lpeg.P"THAN" TIEMZ = lpeg.P"TIEMZ" TIEMZD = lpeg.P"TIEMZD" + lpeg.P"TIEMZ" UP = lpeg.P"UP" UPZ = lpeg.P"UPZ" + lpeg.P"UP" VISIBLE = lpeg.P"VISIBLE" WORD = lpeg.P"WORD" XOR = lpeg.P"XOR" YARLY = lpeg.P"YA" * OWS * lpeg.P"RLY" YR = lpeg.P"YR" + lpeg.P"UR" -- primitives that are handler by their own mini-parsers SSTRING = lpeg.P(parsesqstring) DSTRING = lpeg.P(parsedqstring) STRING = SSTRING + DSTRING FILENAME = STRING -- the lolcode grammar itself -- see the LPEG documentation for more info, but it shouldn't be too bad -- if you're used to reading BNF, or yacc input -- The Quickie Intro -- lpeg.P converts its argument into a pattern -- lpeg.V creates a nonterminal (a Variable) for a grammar -- it's a reference to its own rule in the grammar -- p1 * p2 means 'match a p1 followed by a p2' -- p1 + p2 means 'match a p1 or a p2' -- p^n for non-negative n means 'match at least n occurrences of p -- for negative n means 'match at most -n occurences of p' -- so p^0 means 0 or more, p^1 means 1 or more, p^-1 means optional -- lpeg.C(p) creates a 'capture' for a pattern - it returns whatever p matches -- p / "string" captures "string" with %n's (1-9) replaced by whatever captures p has made -- p / function calls the function with whatever captures p has made as arguments -- the resulting capture is whatever the function returns lolcode = lpeg.P{ -- start token is the first listed in the grammar "PROGRAM", -- a pgrogram is 0 or more lines PROGRAM = lpeg.P"" / startup * lpeg.V"LINE"^0 / result, -- a line is a statement followed by an EOS LINE = OWS * lpeg.V"STATEMENT" * OWS * EOS, -- a statement is one of these STATEMENT = lpeg.V"BTW" + lpeg.V"BYES" + lpeg.V"CANHAS" + lpeg.V"DIAF" + lpeg.V"GIMMEH" + lpeg.V"GTFO" + lpeg.V"HAI" + lpeg.V"IHASA" + lpeg.V"IMINYR" + lpeg.V"INVISIBLE" + lpeg.V"IZ" + lpeg.V"KTHXBYE" + lpeg.V"KTHX" + lpeg.V"LOL" + lpeg.V"NERFZ" + lpeg.V"NOWAI" + lpeg.V"OVARZ" + lpeg.V"TIEMZD" + lpeg.V"UPZ" + lpeg.V"VISIBLE" + lpeg.V"YARLY" + lpeg.P"", -- the individual statements -- BTW = BTW * lpeg.V"COMMENT"^0, BYES = BYES * lpeg.V"EXITINFO"^-1 / byes, CANHAS = CAN * WS * HAS * WS * lpeg.V"MODULE_OR_FILE" * WS^0 * QUERY, DIAF = DIAF * lpeg.V"EXITINFO"^-1 / diaf, GIMMEH = GIMMEH * WS * lpeg.C(lpeg.V"GIMMEHTYPE"^-1) * lpeg.C(lpeg.V"LVALUE") * lpeg.C(lpeg.V"OUTTA"^-1) / gimmeh, GTFO = GTFO / gtfo, HAI = HAI / hai, IHASA = I * WS * HAS * WS * A * WS * lpeg.C(lpeg.V"DECL") * lpeg.V"ITZ"^-1 / ihasa, IMINYR = IM * WS * IN * WS * YR * WS * lpeg.V"LOOPLABEL", KTHX = KTHX / kthx, INVISIBLE = INVISIBLE * WS * lpeg.V"EXPR" * OWS * lpeg.C(BANG^-1) / invisible, IZ = IZ / iz * WS * lpeg.V"CEXPR" / thenz * OWS * ORLY^-1, KTHXBYE = KTHXBYE / kthxbye, LOL = LOL * WS * lpeg.V"LVALUE" * WS * R * WS * lpeg.V"EXPR" / lol, UPZ = UPZ * WS * lpeg.V"LVALUE" * BANGS * lpeg.V"EXPR"^-1 / upz, NERFZ = NERFZ * WS * lpeg.C(lpeg.V"LVALUE") * BANGS * lpeg.C(lpeg.V"EXPR"^-1) / nerfz, NOWAI = NOWAI / nowai, TIEMZD = TIEMZD * WS * lpeg.V"LVALUE" * BANGS * lpeg.V"EXPR"^-1 / tiemz, OVARZ = OVARZ * WS * lpeg.V"LVALUE" * BANGS * lpeg.V"EXPR"^-1 / ovarz, VISIBLE = VISIBLE * WS * lpeg.V"EXPR" * OWS * lpeg.C(BANG^-1) / visible, YARLY = YARLY, -- expressions -- -- conditional (boolean) expressions CEXPR = lpeg.V"CFACTORS"^0 * lpeg.V"CFACTOR", CFACTORS = lpeg.V"CFACTOR" * OWS * lpeg.V"BOOLOP" * OWS / cfactors, CFACTOR = lpeg.V"EXPR" * WS * lpeg.V"CMPOP" * WS * lpeg.V"EXPR" + lpeg.V"EXPR", -- expression EXPR = lpeg.V"FACTORS"^0 * lpeg.V"FACTOR" / expr, FACTORS = lpeg.V"FACTOR" * OWS * lpeg.V"ADDOP" * OWS / factors, FACTOR = lpeg.V"TERMS"^0 * lpeg.V"TERM", TERMS = lpeg.V"TERM" * OWS * lpeg.V"MULOP" * OWS, TERM = lpeg.V"ARRAYREF" + lpeg.V"CONSTANT" + lpeg.C(VAR) + lpeg.V"ARRAYREF" + LPAREN * OWS * lpeg.V"EXPR" * OWS * RPAREN, ARRAYREF = lpeg.C(lpeg.V"INDEX") * WS * IN * WS * MAH * WS * lpeg.V"ARRAYREF" / arrayref + VAR / lvalue, INDEX = lpeg.V"CONSTANT" + VAR, -- operators ADDOP = UP / "+" + NERF / "-" + N * WS / "..", BOOLOP = AND / "and" + OR / "or", CMPOP = lpeg.V"NOTOP"^-1 * lpeg.V"CONDITION" / cmpop, CONDITION = BIGR / ">" * WS * THAN + SMALR / "<" * WS * THAN + LIEK / "==", MULOP = TIEMZ / "*" + OVAR / "/", NOTOP = NOT * WS / "NOT", -- other tokens COMMENT = WS * TEXT / comment, CONSTANT = lpeg.C(NUMBER) + lpeg.C(STRING), DECL = VAR, EXITINFO = WS * lpeg.V"EXITCODE" * lpeg.V"EXITTEXT"^-1, EXITCODE = lpeg.V"EXPR", EXITTEXT = WS * lpeg.V"EXPR", GIMMEHNAME = LINE + WORD + LETTAR, GIMMEHTYPE = lpeg.V"GIMMEHNAME" * OWS, LOOPLABEL = VAR / loop, LVALUE = lpeg.V"ARRAYREF", MODULE_OR_FILE = STDIO /stdio + FILENAME / canhas, ITZ = WS * ITZ * OWS * lpeg.V"EXPR", OUTTA = WS * OUTTA * OWS * FILENAME, } -- and at long last, the mainline of the program! -- we process all the args, and then do the translation -- so it really can only handle one file -- :TODO: handle more than one input file -- you can specify a file on the command line, otherwise it reads stdin -- if you specify '-o filename' it'll write the lua code to that file, -- otherwise no output is written -- if you specify '-r', the lua code is run after the lol is translated -- (you can specify both -o filename and -r and the output will be both written and executed) local i = 1 -- command line arg counter local run = false -- true if we want to execute the lua output local outfile = nil -- non-nil to save output to the named file local infile = nil -- non-nil to read from the named file -- process args while i <= table.getn(arg) do if arg[i] == "-r" then run = true elseif arg[i] == "-o" then if i < table.getn(arg) then i = i + 1 outfile = arg[i] end else infile = arg[i] end i = i + 1 end -- suck in the entire input file local ifp = nil if infile ~= nil then ifp = io.open(infile, "r") else ifp = io.stdin end local s = ifp:read("*a") ifp:close() ipf = nil -- actually run the parser! -- write the output local t = lpeg.match(lolcode, s) if outfile ~= nil then local ofp = io.open(outfile, "w") ofp:write(t) ofp:close() end -- execute the lua code if run then assert(loadstring(t))() end