-- MIT License -- -- Copyright (c) 2018 LoganDark -- -- Permission is hereby granted, free of charge, to any person obtaining a copy -- of this software and associated documentation files (the "Software"), to deal -- in the Software without restriction, including without limitation the rights -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -- copies of the Software, and to permit persons to whom the Software is -- furnished to do so, subject to the following conditions: -- -- The above copyright notice and this permission notice shall be included in all -- copies or substantial portions of the Software. -- -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- SOFTWARE. local function lookupify(src, list) list = list or {} if type(src) == 'string' then for i = 1, src:len() do list[src:sub(i, i)] = true end elseif type(src) == 'table' then for i = 1, #src do list[src[i]] = true end end return list end local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' local base_digits = '0123456789' local base_operators = '+-*/^%#' local chars = { whitespace = lookupify(' \n\t\r'), validEscapes = lookupify('abfnrtv"\'\\'), ident = lookupify( base_ident .. base_digits, { start = lookupify(base_ident), } ), digits = lookupify( base_digits, { hex = lookupify(base_digits .. 'abcdefABCDEF') } ), symbols = lookupify( base_operators .. ',{}[]();.:', { equality = lookupify('~=><'), operators = lookupify(base_operators) } ) } local keywords = { structure = lookupify({ 'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function', 'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then', 'until', 'while' }), values = lookupify({ 'true', 'false', 'nil' }) } return function(text) local pos = 1 local start = 1 local buffer = {} local lines = {} local function look(delta) delta = pos + (delta or 0) return text:sub(delta, delta) end local function get() pos = pos + 1 return look(-1) end local function getDataLevel() local num = 0 while look(num) == '=' do num = num + 1 end if look(num) == '[' then pos = pos + num + 1 return num end end local function getCurrentTokenText() return text:sub(start, pos - 1) end local currentLineLength = 0 local lineoffset = 0 local function pushToken(type, text) text = text or getCurrentTokenText() local tk = buffer[#buffer] if not tk or tk.type ~= type then tk = { type = type, data = text, posFirst = start - lineoffset, posLast = pos - 1 - lineoffset } if tk.data ~= '' then buffer[#buffer + 1] = tk end else tk.data = tk.data .. text tk.posLast = tk.posLast + text:len() end currentLineLength = currentLineLength + text:len() start = pos return tk end local function newline() lines[#lines + 1] = buffer buffer = {} get() pushToken('newline') buffer[1] = nil lineoffset = lineoffset + currentLineLength currentLineLength = 0 end local function getData(level, type) while true do local char = get() if char == '' then return elseif char == '\n' then pos = pos - 1 pushToken(type) newline() elseif char == ']' then local valid = true for i = 1, level do if look() == '=' then pos = pos + 1 else valid = false break end end if valid and look() == ']' then pos = pos - level - 1 return end end end end local function chompWhitespace() while true do local char = look() if char == '\n' then pushToken('whitespace') newline() elseif chars.whitespace[char] then pos = pos + 1 else break end end pushToken('whitespace') end while true do chompWhitespace() local char = get() if char == '' then break elseif char == '-' and look() == '-' then pos = pos + 1 if look() == '[' then pos = pos + 1 local level = getDataLevel() if level then getData(level, 'comment') pos = pos + level + 2 pushToken('comment') else while true do local char2 = get() if char2 == '' or char2 == '\n' then pos = pos - 1 pushToken('comment') if char2 == '\n' then newline() end break end end end else while true do local char2 = get() if char2 == '' or char2 == '\n' then pos = pos - 1 pushToken('comment') if char2 == '\n' then newline() end break end end end pushToken('comment') elseif char == '\'' or char == '"' then pushToken('string_start') while true do local char2 = get() if char2 == '\\' then pos = pos - 1 pushToken('string') get() local char3 = get() if chars.digits[char3] then for i = 1, 2 do if chars.digits[look()] then pos = pos + 1 end end elseif char3 == 'x' then if chars.digits.hex[look()] and chars.digits.hex[look(1)] then pos = pos + 2 else pushToken('unidentified') end elseif char3 == '\n' then pos = pos - 1 pushToken('escape') newline() elseif not chars.validEscapes[char3] then pushToken('unidentified') end pushToken('escape') elseif char2 == '\n' then pos = pos - 1 pushToken('string') newline() break elseif char2 == char or char2 == '' then pos = pos - 1 pushToken('string') get() break end end pushToken('string_end') elseif chars.ident.start[char] then while chars.ident[look()] do pos = pos + 1 end local word = getCurrentTokenText() if keywords.structure[word] then pushToken('keyword') elseif keywords.values[word] then pushToken('value') else pushToken('ident') end elseif chars.digits[char] or (char == '.' and chars.digits[look()]) then if char == '0' and look() == 'x' then pos = pos + 1 while chars.digits.hex[look()] do pos = pos + 1 end else while chars.digits[look()] do pos = pos + 1 end if look() == '.' then pos = pos + 1 while chars.digits[look()] do pos = pos + 1 end end if look():lower() == 'e' then pos = pos + 1 if look() == '-' then pos = pos + 1 end while chars.digits[look()] do pos = pos + 1 end end end pushToken('number') elseif char == '[' then local level = getDataLevel() if level then pushToken('string_start') getData(level, 'string') pushToken('string') pos = pos + level + 2 pushToken('string_end') else pushToken('symbol') end elseif char == '.' then if look() == '.' then pos = pos + 1 if look() == '.' then pos = pos + 1 end end if getCurrentTokenText():len() == 3 then pushToken('vararg') else pushToken('symbol') end elseif char == ':' and look() == ':' then get() pushToken('label_start') chompWhitespace() if chars.ident.start[look()] then get() while chars.ident[look()] do get() end pushToken('label') chompWhitespace() if look() == ':' and look(1) == ':' then get() get() pushToken('label_end') end end elseif chars.symbols.equality[char] then if look() == '=' then pos = pos + 1 end pushToken('operator') elseif chars.symbols[char] then if chars.symbols.operators[char] then pushToken('operator') else pushToken('symbol') end else pushToken('unidentified') end end lines[#lines + 1] = buffer return lines end