--------------------------------------------------------------------------- -- Copyright (c) 2013, Pantor Engineering AB -- All rights reserved. -- -- Redistribution and use in source and binary forms, with or -- without modification, are permitted provided that the following -- conditions are met: -- -- * Redistributions of source code must retain the above copyright -- notice, this list of conditions and the following disclaimer. -- -- * Redistributions in binary form must reproduce the above -- copyright notice, this list of conditions and the following -- disclaimer in the documentation and/or other materials -- provided with the distribution. -- -- * Neither the name of Pantor Engineering AB nor the names of its -- contributors may be used to endorse or promote products -- derived from this software without specific prior written -- permission. -- -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -- CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -- INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -- BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -- ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -- POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------- -- A Blink schema parser -- The purpose of this implementation is to illustrate a possible way -- of parsing a Blink schema and to serve as a basis or inspiration -- for other, more developed, implementations. -- It supports the full schema grammar, but the error reporting -- facilities could be improved. Performance was not a primary goal. -- It is implemented as a recursive descent parser. Parsed components are -- passed to an event observer. The event observer will be called with -- the following methods: -- obs:onNsDecl (name) -- -- obs:onStartGroupDef (name, id, super, annots, line, col, src) -- obs:onEndGroupDef () -- -- obs:onStartField (line, col, src) -- obs:onEndField (name, id, pres, annots) -- -- obs:onStartDefine (name, id, annots, line, col, src) -- obs:onEndDefine () -- -- obs:onStartEnum (line, col, src) -- obs:onEndEnum () -- -- obs:onTypeRef (name, layout, rank, annots, line, col, src) -- obs:onStringType (rank, maxSize, annots, line, col, src) -- obs:onBinaryType (rank, maxSize, annots, line, col, src) -- obs:onFixedType (rank, size, annots, line, col, src) -- obs:onPrimType (type, rank, annots, line, col, src) -- obs:onEnumSym (name, val, annots, line, col, src) -- -- obs:onSchemaAnnot (annots, line, col, src) -- obs:onIncrAnnot (name, substep, endsWith, id, annots, line, col, src) -- Components that can have substructures are represented by matching -- start and end events. -- The event arguments: -- -- name - name string -- super - super type string -- type - the name of a type ("i8", "u8" ... "object") -- layout - "dynamic" or "static" -- val - enum symbol value string -- id - id string -- rank - "single" or "sequence" -- pres - "optional" or "required" -- substep - component reference substep string -- endsWith - "name" or "type" -- annots - is a table where each entry corresponds to an annotation -- line - line number -- col - column number -- src - file name string -- maxSize - string or binary max size -- size - fixed size -- The two main functions are blink.parseSchemaFile and -- blink.parseSchema. The former just calls the latter with the whole -- content of the specified file as a string -- You may run the parser directly from the command line like such: -- -- lua blink.lua my-schema.blink -- lua blink.lua --silent my-schema.blink --------------------------------------------------------------------------- -- Namespace blink = { } --------------------------------------------------------------------------- -- Parses a Blink schema from the specified file. An optional observer may -- be specified in the second argument function blink.parseSchemaFile (f, obs) local is = assert (io.open (f, "rb")) local s = is:read ("*all") is:close () blink.parseSchema (s, f, obs) end --------------------------------------------------------------------------- -- Parses a Blink schema from the specified string. The second -- argument is an optional file name used in error reporting. The -- third argument is an optional observer. If no observer is -- specified, a default observer is used that simply dumps each event -- as a line of text local tokenize function blink.parseSchema (s, src, obs) obs = obs or blink.createDumpObs () local tok = tokenize (s, src or "-") local pendName local pendId = "" local annotations = { } local function consumeAnnots () local tmp = annotations annotations = { } return tmp end local function consumeId () local id = pendId pendId = "" return id end -- annot ::= -- "@" qNameOrKeyword "=" literal local function annot () local name = tok.nextNameOrKeyword () or tok.expected ("annotation name") tok.require ("=") local val = tok.require ("STRING") while true do local cont = tok.next ("STRING") if cont then val = val .. cont else break end end annotations [name] = val end -- annots ::= -- e -- | annot annots local function annots () while tok.next ("@") do annot () end end -- nsDecl ::= -- "namespace" name local function nsDecl () obs:onNsDecl (tok.require ("NAME", "namespace name")) end -- nameWithId ::= -- name id local function nameWithId (what) pendName = tok.require ("NAME", what) pendId = "" if tok.next ("/") then pendId = tok.next ("UINT", "HEX") or tok.expected ("unsigned integer or hex number") end end -- sequence ::= -- single "[" "]" local function rank () if tok.next ("[") then tok.require ("]") return "sequence" else return "single" end end -- ref ::= -- qName -- | qName "*" local function ref () local name = tok.next () local kind = tok.next ("*") and "dynamic" or "static" local r = rank () obs:onTypeRef (name, kind, r, consumeAnnots (), tok.lastLoc ()) end -- string ::= -- "string" -- | "string" "(" uInt ")" local function string_ () tok.next () local maxSize if tok.next ("(") then maxSize = tok.require ("UINT", "string max size") tok.require (")") end local r = rank () obs:onStringType (r, maxSize, consumeAnnots (), tok.lastLoc ()) end -- binary ::= -- "binary" -- | "binary" "(" uInt ")" local function binary () tok.next () local maxSize if tok.next ("(") then maxSize = tok.require ("UINT", "binary max size") tok.require (")") end local r = rank () obs:onBinaryType (r, maxSize, consumeAnnots (), tok.lastLoc ()) end -- fixed ::= -- | "fixed" "(" uInt ")" local function fixed () tok.next () tok.require ("(") local size = tok.require ("UINT", "fixed size") tok.require (")") local r = rank () obs:onFixedType (r, size, consumeAnnots (), tok.lastLoc ()) end -- "i8" ... "object" local function primType () local t = tok.next () local r = rank () obs:onPrimType (t, r, consumeAnnots (), tok.lastLoc ()) end -- type ::= -- single | sequence -- -- single ::= -- ref | time | number | string | binary | fixed | "bool" | "object" local function type_ () if tok.match ("NAME", "QNAME") then ref () elseif tok.match ("string") then string_ () elseif tok.match ("binary") then binary () elseif tok.match ("fixed") then fixed () elseif tok.match ("namespace", "schema", "type") then tok.expected ("type specifier") elseif tok.matchAnyKeyword () then primType () else tok.expected ("type specifier") end end -- field ::= -- annots type annots nameWithId opt -- -- opt ::= -- e | "?" local function field () annots () obs:onStartField (tok.loc ()) type_ () annots () nameWithId ("field name") local pres = tok.next ("?") and "optional" or "required" obs:onEndField (pendName, consumeId (), pres, consumeAnnots ()) end -- groupDef ::= -- nameWithId super body -- -- super ::= -- e -- | ":" qName -- -- body ::= -- e -- | "->" fields -- fields ::= -- field -- | field "," fields local function groupDef () local sup = "" if tok.next (":") then sup = tok.next ("NAME", "QNAME") or tok.expected ("supertype name") end obs:onStartGroupDef (pendName, consumeId (), sup, consumeAnnots (), tok.lastLoc ()) if tok.next ("->") then repeat field () until not tok.next (",") end obs:onEndGroupDef () end -- sym ::= -- annots name val -- -- val ::= -- e -- | "/" (int | hexNum) local function sym () annots () local name = tok.require ("NAME", "enum symbol name") local val = "" if tok.next ("/") then val = tok.next ("UINT", "INT", "HEX") or tok.expected ("integer or hex number") end obs:onEnumSym (name, val, consumeAnnots (), tok.lastLoc ()) end -- enum ::= -- "|" sym -- | sym "|" syms -- -- syms ::= -- sym -- | sym "|" syms local function enumeration () obs:onStartEnum (tok.lastLoc ()) if tok.next ("|") then sym () else repeat sym () until not tok.next ("|") end obs:onEndEnum () end -- define ::= -- nameWithId "=" (enum | (annots type)) local function define () obs:onStartDefine (pendName, consumeId (), consumeAnnots (), tok.lastLoc ()) annots () if tok.match ("|") or (tok.match ("NAME") and tok.matchPend ("/", "|")) then enumeration () else type_ () end obs:onEndDefine () end -- incrAnnotList ::= -- incrAnnotItem -- | incrAnnotItem "<-" incrAnnotList local function incrAnnotList () if not tok.match ("<-") then tok.expected ("'<-'") end while tok.next ("<-") do if tok.next ("@") then annot () else pendId = tok.next ("INT", "UINT", "HEX") if not pendId then tok.expected ("incremental annotation, integer or hex number") end end end end -- incrAnnot ::= -- compRef "<-" incrAnnotList -- -- compRef ::= -- "schema" -- | qName -- | qName "." "type" -- | qName "." name -- | qName "." name "." "type" local function incrAnnot () local loc = { tok.lastLoc () } if next (annotations) then tok.err ("An incremental annotation clause cannot be preceded" .. " by annotations") end if #pendId > 0 then tok.err ("An incremental annotation clause cannot set an" .. " ID using the slash notation. Use '<- id' instead") end if tok.next ("schema") then incrAnnotList () consumeId () obs:onSchemaAnnot (consumeAnnots (), unpack (loc)) else local endsWith = "name" local substep = "" if tok.next (".") then if tok.next ("type") then endsWith = "type" else substep = tok.require ("NAME", "field or symbol name") if tok.next (".") then tok.require ("type") endsWith = "type" end end end incrAnnotList () obs:onIncrAnnot (pendName, substep, endsWith, consumeId (), consumeAnnots (), unpack (loc)) end end -- def ::= -- annots define -- | annots groupDef -- | incrAnnot local function def () annots () if tok.match ("QNAME") then pendName = tok.next () incrAnnot () elseif tok.match ("schema") then incrAnnot () else nameWithId ("group or type definition name, or an incremental " .. "annotation") if tok.match ("<-", ".") then incrAnnot () elseif tok.next ("=") then define () else groupDef () end end end -- schema ::= -- defs -- | nsDecl defs -- nsDecl ::= -- "namespace" name if tok.next ("namespace") then nsDecl () end -- defs ::= -- e -- | def defs while not tok.match ("END") do def () end end function tokenize (s, src) local line = 1 local col = 0 local lastLine = line local lastCol = col local tokens = { INT = "integer", UINT = "unsigned integer", STRING = "string literal", HEX = "hex number", QNAME = "qualified name", NAME = "name", END = "end of schema", ["->"] = "'->'", ["<-"] = "'<-'" } local single = { } for t in string.gmatch (",.=/[]():?*|@", ".") do single [t] = t tokens [t] = "'" .. t .. "'" end local keywords = { } for i,v in ipairs { "i8", "u8", "i16", "u16", "i32", "u32", "i64", "u64", "f64", "decimal", "date", "timeOfDayMilli", "timeOfDayNano", "nanotime", "millitime", "bool", "string", "binary", "fixed", "object", "namespace", "type", "schema" } do keywords [v] = v tokens [v] = "keyword '" .. v .. "'" end local take = 1 local e = #s local done = take > e local cur = { } local pend = { } local function err (...) error (src .. ":" .. tostring (line) .. ":" .. tostring (col) .. ": error: " .. string.format (unpack (arg)), 0) end local function expected (what) err ("Expected %s but got %s", what, tokens [cur.type]) end local function get () if not done then local c = s:sub (take, take) if c == '\n' then line = line + 1; col = 0 end take = take + 1 done = take > e col = col + 1 return c end end local function peek () if not done then return s:sub (take, take) end end local function lookahead (c) if peek () == c then return get () end end local function skipComment () while true do local c = get () if not c or c == '\n' then return end end end local function isWs (c) return c and c:match ("%s") end local function isDigit (c) return c and c:match ("%d") end local function isHexDigit (c) return c and c:match ("%x") end local function isNameStartChar (c) return c and c:match ("[a-zA-Z_]") end local function isNameChar (c) return isNameStartChar (c) or isDigit (c) end local function clearText () pend.val = "" end local function setText (v) pend.val = v end local function appendText (v) pend.val = pend.val .. v end local function skipWsAndComments () while true do local c = peek () if isWs (c) then get () elseif c == '#' then get () skipComment () else return end end end local function setToken (t) pend.type = t pend.line, pend.col = line, col end local function readUInt (first) setText (first) while isDigit (peek ()) do appendText (get ()) end if isNameStartChar (peek ()) then err ("A number must end in digits") end end local function readHex () setToken ("HEX") setText ("0x") while isHexDigit (peek ()) do appendText (get ()) end if isNameStartChar (peek ()) then err ("A number must end in hex digits") end end local function readStr (q) clearText () while true do local c = get () if not c then err ("Literal not terminated at end of schema, expected %s", q) elseif c == '\n' then err ("Multiline literals are not allowed") elseif c ~= q then appendText (c) else return end end end local function requireNameStart (c, what) if not isNameStartChar (c) then if isWs (c) then err ("Missing %s", what) else err ("Character not allowed at start of %s", what) end end end local function readNcName () while isNameChar (peek ()) do appendText (get ()) end end local function readNameOrKeyword () readNcName () if peek () == ':' then get () setToken ("QNAME") appendText (':') if not done then requireNameStart (peek (), "name part in qualified name") else err ("Missing name part after colon at end of file") end readNcName () else setToken (keywords [pend.val] or "NAME") end end -- Moves the pending token to the current token, and parses a new token -- into the pending token local function advance () lastLine, lastCol = cur.line, cur.col -- save loc cur,pend = pend,cur -- swap tokens if cur.type == "END" then pend.type = cur.type; return end skipWsAndComments () local c = get () if c then setToken (single [c]) if pend.type then return end if c == '-' then if lookahead ('>') then setToken ("->") else if not isDigit (peek ()) then err ("Expected digit or '>' after '-'") else setToken ("INT") readUInt (c) end end elseif c == '"' or c == '\'' then setToken ("STRING") readStr (c) elseif c == '<' then if lookahead ('-') then setToken ("<-") else err ("Expected dash after '<'") end elseif c == '\\' then clearText () setToken ("NAME") if done then err ("Missing name after backslash at end of schema") else requireNameStart (peek (), "name after backslash") end readNcName () elseif c == '0' then if lookahead ('x') then readHex () else setToken ("UINT") readUInt ('0') end elseif isDigit (c) then setToken ("UINT") readUInt (c) elseif isNameStartChar (c) then setText (c) readNameOrKeyword () else err ("Character not allowed here: %q", c) end else pend.type = "END" end end -- Tests if the current token matches any of the arguments local function match (...) for _,t in ipairs (arg) do if cur.type == t then return true end end return false end -- Tests if the pending token matches any of the arguments local function matchPend (...) for _,t in ipairs (arg) do if pend.type == t then return true end end return false end -- Advances to the next token if the current token matches any of -- the arguments, or unconditionally if called with no arguments local function next_ (t, ...) if not t or match (t, unpack (arg)) then local val = cur.val or "" advance () return val end end -- Tests if the current token is a keyword local function matchAnyKeyword () return keywords [cur.type] end -- Advances to the next token if the current token is a name or keyword local function nextNameOrKeyword () if match ("NAME", "QNAME") or matchAnyKeyword () then local val = cur.val or "" advance () return val end end -- Advances to the next token if the current token matches t, otherwise -- raises an error local function require_ (t, what) return next_ (t) or expected (what or tokens [t]) end -- Returns the location of the current token local function loc () return cur.line, cur.col, src end -- Returns the location of the token that was replaced by the -- current token local function lastLoc () return lastLine, lastCol, src end next_ () -- Init current token next_ () -- Init lookahead token local interface = { match = match, matchPend = matchPend, next = next_, require = require_, err = err, expected = expected, matchAnyKeyword = matchAnyKeyword, nextNameOrKeyword = nextNameOrKeyword, loc = loc, lastLoc = lastLoc } return interface end --------------------------------------------------------------------------- -- Creates a default observer that will print a line for each event function blink.createDumpObs () local obs = { } local function fwd (t, evt) local label = evt:sub (3) local function on (self, ...) local props = { } for i,v in ipairs (arg) do if type (v) == "table" then local annots = { } for k,a in next, v do table.insert (annots, k .. "=" .. string.format ("%q", a)) end props [i] = "[" .. table.concat (annots, ", ") .."]" elseif type (v) == "string" then props [i] = string.format ("%q", v) else props [i] = v end end print (label .. " (" .. table.concat (props, ", ") .. ")") end t [evt] = on return on end setmetatable (obs, { __index=fwd }) return obs end --------------------------------------------------------------------------- -- Creates a silent observer function blink.createSilentObs () local obs = { } local function thunk () end setmetatable (obs, { __index=function () return thunk end }) return obs end --------------------------------------------------------------------------- -- Command line mode: -- -- lua blink.lua my-schema.blink -- lua blink.lua --silent my-schema.blink if not debug.getinfo (3) and arg and arg [1] then if arg [1] == "--silent" then blink.parseSchemaFile (arg [2], blink.createSilentObs ()) else blink.parseSchemaFile (arg [1]) end end