mirror of
https://github.com/1bardesign/batteries.git
synced 2024-11-22 14:14:36 +00:00
66a6c5a50e
As a new user, there were things I was skeptical about and after digging in, these were my conclusions. Compared to the simple and obvious lua wiki solutions, batteries' string functions are slightly faster. GC is the same. Test local str = "hello world" local fn = function() local x = 0 if stringx.ends_with(str, "h") then x = x + 1 end if stringx.ends_with(str, "helll") then x = x + 1 end if stringx.ends_with(str, "helicopter") then x = x + 1 end end local pretty = require "inspect" print("stringx =", pretty({ time_taken = {measure.time_taken(fn, 10000)}, memory_taken = {measure.memory_taken(fn, 10000)} })) local function starts_with(str, prefix) return str:find(prefix, 1, true) == 1 end local function ends_with(str, ending) return ending == "" or str:sub(-#ending) == ending end local fn = function() local x = 0 if ends_with(str, "h") then x = x + 1 end if ends_with(str, "helll") then x = x + 1 end if ends_with(str, "helicopter") then x = x + 1 end end print("find =", pretty({ time_taken = {measure.time_taken(fn, 10000)}, memory_taken = {measure.memory_taken(fn, 10000)} })) starts_with =========== stringx = { memory_taken = { 0, 0, 0 }, time_taken = { 1.5098012518138e-007, 9.988434612751e-008, 2.1699932403862e-005 } } find = { memory_taken = { 0, 0, 0 }, time_taken = { 2.7349997544661e-007, 1.9988510757685e-007, 9.1999536380172e-006 } } ends_with ========= stringx = { memory_taken = { 0, 0, 0 }, time_taken = { 9.0479978825897e-008, 0, 2.5199959054589e-005 } } find = { memory_taken = { 0, 0, 0 }, time_taken = { 2.1833006758243e-007, 1.9988510757685e-007, 6.1000464484096e-006 } }
276 lines
5.7 KiB
Lua
276 lines
5.7 KiB
Lua
--[[
|
|
extra string routines
|
|
]]
|
|
|
|
local path = (...):gsub("stringx", "")
|
|
local assert = require(path .. "assert")
|
|
local pretty = require(path .. "pretty")
|
|
|
|
local stringx = setmetatable({}, {
|
|
__index = string
|
|
})
|
|
|
|
--split a string on a delimiter into an ordered table
|
|
function stringx.split(self, delim, limit)
|
|
delim = delim or ""
|
|
limit = (limit ~= nil and limit) or math.huge
|
|
|
|
assert:type(self, "string", "stringx.split - self", 1)
|
|
assert:type(delim, "string", "stringx.split - delim", 1)
|
|
assert:type(limit, "number", "stringx.split - limit", 1)
|
|
|
|
if limit then
|
|
assert(limit >= 0, "max_split must be positive!")
|
|
end
|
|
|
|
--we try to create as little garbage as possible!
|
|
--only one table to contain the result, plus the split strings.
|
|
--so we do two passes, and work with the bytes underlying the string
|
|
--partly because string.find is not compiled on older luajit :)
|
|
local res = {}
|
|
local length = self:len()
|
|
--
|
|
local delim_length = delim:len()
|
|
--empty delim? split to individual characters
|
|
if delim_length == 0 then
|
|
for i = 1, length do
|
|
table.insert(res, self:sub(i, i))
|
|
end
|
|
return res
|
|
end
|
|
local delim_start = delim:byte(1)
|
|
--pass 1
|
|
--collect split sites
|
|
local i = 1
|
|
while i <= length do
|
|
--scan for delimiter
|
|
if self:byte(i) == delim_start then
|
|
local has_whole_delim = true
|
|
for j = 2, delim_length do
|
|
if self:byte(i + j - 1) ~= delim:byte(j) then
|
|
has_whole_delim = false
|
|
break
|
|
end
|
|
end
|
|
if has_whole_delim then
|
|
if #res < limit then
|
|
table.insert(res, i)
|
|
else
|
|
break
|
|
end
|
|
end
|
|
--iterate forward
|
|
i = i + delim_length
|
|
else
|
|
--iterate forward
|
|
i = i + 1
|
|
end
|
|
end
|
|
--pass 2
|
|
--collect substrings
|
|
i = 1
|
|
for si, j in ipairs(res) do
|
|
res[si] = self:sub(i, j-1)
|
|
i = j + delim_length
|
|
end
|
|
--add the final section
|
|
table.insert(res, self:sub(i, -1))
|
|
--return the collection
|
|
return res
|
|
end
|
|
|
|
stringx.pretty = pretty.string
|
|
|
|
--(generate a map of whitespace byte values)
|
|
local _whitespace_bytes = {}
|
|
do
|
|
local _whitespace = " \t\n\r"
|
|
for i = 1, _whitespace:len() do
|
|
_whitespace_bytes[_whitespace:byte(i)] = true
|
|
end
|
|
end
|
|
|
|
--trim all whitespace off the head and tail of a string
|
|
-- specifically trims space, tab, newline, and carriage return characters
|
|
-- ignores form feeds, vertical tabs, and backspaces
|
|
--
|
|
-- only generates one string of garbage in the case there's actually space to trim
|
|
function stringx.trim(s)
|
|
--cache
|
|
local len = s:len()
|
|
|
|
--we search for the head and tail of the string iteratively
|
|
--we could fuse these loops, but two separate loops is a lot easier to follow
|
|
--and branches less as well.
|
|
local head = 0
|
|
for i = 1, len do
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
head = i
|
|
break
|
|
end
|
|
end
|
|
|
|
local tail = 0
|
|
for i = len, 1, -1 do
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
tail = i
|
|
break
|
|
end
|
|
end
|
|
|
|
--overlapping ranges means no content
|
|
if head > tail then
|
|
return ""
|
|
end
|
|
--limit ranges means no trim
|
|
if head == 1 and tail == len then
|
|
return s
|
|
end
|
|
|
|
--pull out the content
|
|
return s:sub(head, tail)
|
|
end
|
|
|
|
--trim the start of a string
|
|
function stringx.ltrim(s)
|
|
local head = 1
|
|
for i = 1, #s do
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
head = i
|
|
break
|
|
end
|
|
end
|
|
if head == 1 then
|
|
return s
|
|
end
|
|
return s:sub(head)
|
|
end
|
|
|
|
--trim the end of a string
|
|
function stringx.rtrim(s)
|
|
local tail = #s
|
|
|
|
for i = #s, 1, -1 do
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
tail = i
|
|
break
|
|
end
|
|
end
|
|
|
|
if tail == #s then
|
|
return s
|
|
end
|
|
|
|
return s:sub(1, tail)
|
|
end
|
|
|
|
function stringx.deindent(s, keep_trailing_empty)
|
|
--detect windows or unix newlines
|
|
local windows_newlines = s:find("\r\n", nil, true)
|
|
local newline = windows_newlines and "\r\n" or "\n"
|
|
--split along newlines
|
|
local lines = stringx.split(s, newline)
|
|
--detect and strip any leading blank lines
|
|
local leading_newline = false
|
|
while lines[1] == "" do
|
|
leading_newline = true
|
|
table.remove(lines, 1)
|
|
end
|
|
|
|
--nothing to do
|
|
if #lines == 0 then
|
|
return ""
|
|
end
|
|
|
|
--detect indent
|
|
local _, _, indent = lines[1]:find("^([ \t]*)")
|
|
local indent_len = indent and indent:len() or 0
|
|
|
|
--not indented
|
|
if indent_len == 0 then
|
|
return table.concat(lines, newline)
|
|
end
|
|
|
|
--de-indent the lines
|
|
local res = {}
|
|
for _, line in ipairs(lines) do
|
|
local line_start = line:sub(1, indent:len())
|
|
local start_len = line_start:len()
|
|
if
|
|
line_start == indent
|
|
or (
|
|
start_len < indent_len
|
|
and line_start == indent:sub(1, start_len)
|
|
)
|
|
then
|
|
line = line:sub(start_len + 1)
|
|
end
|
|
table.insert(res, line)
|
|
end
|
|
|
|
--should
|
|
if not keep_trailing_empty then
|
|
if res[#res] == "" then
|
|
table.remove(res)
|
|
end
|
|
end
|
|
|
|
return table.concat(res, newline)
|
|
end
|
|
|
|
--alias
|
|
stringx.dedent = stringx.deindent
|
|
|
|
--apply a template to a string
|
|
--supports $template style values, given as a table or function
|
|
-- ie ("hello $name"):format({name = "tom"}) == "hello tom"
|
|
function stringx.apply_template(s, sub)
|
|
local r = s:gsub("%$([%w_]+)", sub)
|
|
return r
|
|
end
|
|
|
|
--check if a given string contains another
|
|
--(without garbage)
|
|
function stringx.contains(haystack, needle)
|
|
for i = 1, #haystack - #needle + 1 do
|
|
local found = true
|
|
for j = 1, #needle do
|
|
if haystack:byte(i + j - 1) ~= needle:byte(j) then
|
|
found = false
|
|
break
|
|
end
|
|
end
|
|
if found then
|
|
return true
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
--check if a given string starts with another
|
|
--(without garbage)
|
|
--Using loops is actually faster than string.find!
|
|
function stringx.starts_with(s, prefix)
|
|
for i = 1, #prefix do
|
|
if s:byte(i) ~= prefix:byte(i) then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end
|
|
|
|
--check if a given string ends with another
|
|
--(without garbage)
|
|
function stringx.ends_with(s, suffix)
|
|
local len = #s
|
|
local suffix_len = #suffix
|
|
for i = 0, suffix_len - 1 do
|
|
if s:byte(len - i) ~= suffix:byte(suffix_len - i) then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end
|
|
|
|
return stringx
|