2020-04-17 00:35:00 +00:00
|
|
|
--[[
|
|
|
|
extra string routines
|
|
|
|
]]
|
|
|
|
|
2021-07-05 06:12:16 +00:00
|
|
|
local path = (...):gsub("stringx", "")
|
2020-05-19 02:03:45 +00:00
|
|
|
local assert = require(path .. "assert")
|
2021-07-05 06:12:16 +00:00
|
|
|
local pretty = require(path .. "pretty")
|
2020-05-19 02:03:45 +00:00
|
|
|
|
2020-04-17 00:35:00 +00:00
|
|
|
local stringx = setmetatable({}, {
|
|
|
|
__index = string
|
|
|
|
})
|
|
|
|
|
|
|
|
--split a string on a delimiter into an ordered table
|
2021-12-26 16:32:43 +00:00
|
|
|
function stringx.split(self, delim, limit)
|
2021-07-05 22:29:13 +00:00
|
|
|
delim = delim or ""
|
2021-12-26 16:32:43 +00:00
|
|
|
limit = (limit ~= nil and limit) or math.huge
|
2021-12-24 18:03:36 +00:00
|
|
|
|
2020-05-19 02:03:45 +00:00
|
|
|
assert:type(self, "string", "stringx.split - self", 1)
|
|
|
|
assert:type(delim, "string", "stringx.split - delim", 1)
|
2021-12-26 16:32:43 +00:00
|
|
|
assert:type(limit, "number", "stringx.split - limit", 1)
|
2020-05-19 02:03:45 +00:00
|
|
|
|
2021-12-26 16:32:43 +00:00
|
|
|
if limit then
|
|
|
|
assert(limit >= 0, "max_split must be positive!")
|
2021-12-24 18:19:38 +00:00
|
|
|
end
|
2021-12-26 16:32:43 +00:00
|
|
|
|
2020-04-17 00:45:15 +00:00
|
|
|
--we try to create as little garbage as possible!
|
|
|
|
--only one table to contain the result, plus the split strings.
|
|
|
|
--so we do two passes, and work with the bytes underlying the string
|
|
|
|
--partly because string.find is not compiled on older luajit :)
|
|
|
|
local res = {}
|
2020-04-17 00:35:00 +00:00
|
|
|
local length = self:len()
|
|
|
|
--
|
|
|
|
local delim_length = delim:len()
|
2020-04-17 00:45:15 +00:00
|
|
|
--empty delim? split to individual characters
|
|
|
|
if delim_length == 0 then
|
|
|
|
for i = 1, length do
|
|
|
|
table.insert(res, self:sub(i, i))
|
|
|
|
end
|
|
|
|
return res
|
|
|
|
end
|
2020-04-17 00:35:00 +00:00
|
|
|
local delim_start = delim:byte(1)
|
2020-04-17 00:45:15 +00:00
|
|
|
--pass 1
|
|
|
|
--collect split sites
|
2020-04-17 00:35:00 +00:00
|
|
|
local i = 1
|
|
|
|
while i <= length do
|
|
|
|
--scan for delimiter
|
|
|
|
if self:byte(i) == delim_start then
|
|
|
|
local has_whole_delim = true
|
|
|
|
for j = 2, delim_length do
|
|
|
|
if self:byte(i + j - 1) ~= delim:byte(j) then
|
|
|
|
has_whole_delim = false
|
2024-09-18 04:59:01 +00:00
|
|
|
--step forward as far as we got
|
|
|
|
i = i + j
|
2020-04-17 00:35:00 +00:00
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if has_whole_delim then
|
2021-12-26 16:32:43 +00:00
|
|
|
if #res < limit then
|
2021-12-24 18:03:36 +00:00
|
|
|
table.insert(res, i)
|
2024-09-18 04:59:01 +00:00
|
|
|
--iterate forward the whole delimiter
|
|
|
|
i = i + delim_length
|
2021-12-24 18:03:36 +00:00
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
2020-04-17 00:35:00 +00:00
|
|
|
end
|
|
|
|
else
|
|
|
|
--iterate forward
|
|
|
|
i = i + 1
|
|
|
|
end
|
|
|
|
end
|
2020-04-17 00:45:15 +00:00
|
|
|
--pass 2
|
|
|
|
--collect substrings
|
2020-04-17 00:35:00 +00:00
|
|
|
i = 1
|
|
|
|
for si, j in ipairs(res) do
|
|
|
|
res[si] = self:sub(i, j-1)
|
|
|
|
i = j + delim_length
|
|
|
|
end
|
|
|
|
--add the final section
|
|
|
|
table.insert(res, self:sub(i, -1))
|
|
|
|
--return the collection
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
|
2021-07-05 06:12:16 +00:00
|
|
|
stringx.pretty = pretty.string
|
2020-04-17 00:45:15 +00:00
|
|
|
|
2020-11-12 05:36:06 +00:00
|
|
|
--(generate a map of whitespace byte values)
|
|
|
|
local _whitespace_bytes = {}
|
|
|
|
do
|
|
|
|
local _whitespace = " \t\n\r"
|
|
|
|
for i = 1, _whitespace:len() do
|
|
|
|
_whitespace_bytes[_whitespace:byte(i)] = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
--trim all whitespace off the head and tail of a string
|
|
|
|
-- specifically trims space, tab, newline, and carriage return characters
|
|
|
|
-- ignores form feeds, vertical tabs, and backspaces
|
|
|
|
--
|
|
|
|
-- only generates one string of garbage in the case there's actually space to trim
|
|
|
|
function stringx.trim(s)
|
|
|
|
--cache
|
|
|
|
local len = s:len()
|
|
|
|
|
|
|
|
--we search for the head and tail of the string iteratively
|
|
|
|
--we could fuse these loops, but two separate loops is a lot easier to follow
|
|
|
|
--and branches less as well.
|
|
|
|
local head = 0
|
|
|
|
for i = 1, len do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
head = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
local tail = 0
|
|
|
|
for i = len, 1, -1 do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
tail = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
--overlapping ranges means no content
|
|
|
|
if head > tail then
|
|
|
|
return ""
|
|
|
|
end
|
|
|
|
--limit ranges means no trim
|
|
|
|
if head == 1 and tail == len then
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
|
|
|
--pull out the content
|
|
|
|
return s:sub(head, tail)
|
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--trim the start of a string
|
2021-04-06 02:43:35 +00:00
|
|
|
function stringx.ltrim(s)
|
2021-04-12 15:38:03 +00:00
|
|
|
local head = 1
|
|
|
|
for i = 1, #s do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
head = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
2021-04-14 07:06:31 +00:00
|
|
|
if head == 1 then
|
|
|
|
return s
|
|
|
|
end
|
2021-04-12 15:38:03 +00:00
|
|
|
return s:sub(head)
|
2021-04-06 02:43:35 +00:00
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--trim the end of a string
|
2021-04-06 02:43:35 +00:00
|
|
|
function stringx.rtrim(s)
|
|
|
|
local tail = #s
|
2021-04-12 15:38:03 +00:00
|
|
|
|
|
|
|
for i = #s, 1, -1 do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
tail = i
|
|
|
|
break
|
|
|
|
end
|
2021-04-06 02:43:35 +00:00
|
|
|
end
|
2021-04-12 15:38:03 +00:00
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
if tail == #s then
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
2021-04-06 02:43:35 +00:00
|
|
|
return s:sub(1, tail)
|
|
|
|
end
|
|
|
|
|
2020-11-12 03:32:56 +00:00
|
|
|
function stringx.deindent(s, keep_trailing_empty)
|
|
|
|
--detect windows or unix newlines
|
|
|
|
local windows_newlines = s:find("\r\n", nil, true)
|
|
|
|
local newline = windows_newlines and "\r\n" or "\n"
|
|
|
|
--split along newlines
|
|
|
|
local lines = stringx.split(s, newline)
|
|
|
|
--detect and strip any leading blank lines
|
|
|
|
while lines[1] == "" do
|
|
|
|
table.remove(lines, 1)
|
|
|
|
end
|
|
|
|
|
|
|
|
--nothing to do
|
|
|
|
if #lines == 0 then
|
|
|
|
return ""
|
|
|
|
end
|
|
|
|
|
|
|
|
--detect indent
|
|
|
|
local _, _, indent = lines[1]:find("^([ \t]*)")
|
|
|
|
local indent_len = indent and indent:len() or 0
|
|
|
|
|
|
|
|
--not indented
|
|
|
|
if indent_len == 0 then
|
|
|
|
return table.concat(lines, newline)
|
|
|
|
end
|
|
|
|
|
|
|
|
--de-indent the lines
|
|
|
|
local res = {}
|
|
|
|
for _, line in ipairs(lines) do
|
2022-09-20 05:00:44 +00:00
|
|
|
if line ~= "" then
|
|
|
|
local line_start = line:sub(1, indent:len())
|
|
|
|
local start_len = line_start:len()
|
|
|
|
if
|
|
|
|
line_start == indent
|
|
|
|
or (
|
|
|
|
start_len < indent_len
|
|
|
|
and line_start == indent:sub(1, start_len)
|
|
|
|
)
|
|
|
|
then
|
|
|
|
line = line:sub(start_len + 1)
|
|
|
|
end
|
2020-11-12 03:32:56 +00:00
|
|
|
end
|
|
|
|
table.insert(res, line)
|
|
|
|
end
|
|
|
|
|
2022-07-11 06:44:51 +00:00
|
|
|
--should we keep any trailing empty lines?
|
2020-11-12 03:32:56 +00:00
|
|
|
if not keep_trailing_empty then
|
2022-07-11 06:44:51 +00:00
|
|
|
while res[#res] == "" do
|
2020-11-12 03:32:56 +00:00
|
|
|
table.remove(res)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return table.concat(res, newline)
|
|
|
|
end
|
|
|
|
|
|
|
|
--alias
|
|
|
|
stringx.dedent = stringx.deindent
|
|
|
|
|
2020-11-19 06:18:38 +00:00
|
|
|
--apply a template to a string
|
|
|
|
--supports $template style values, given as a table or function
|
|
|
|
-- ie ("hello $name"):format({name = "tom"}) == "hello tom"
|
|
|
|
function stringx.apply_template(s, sub)
|
2021-07-13 06:47:19 +00:00
|
|
|
local r = s:gsub("%$([%w_]+)", sub)
|
2020-11-19 06:18:38 +00:00
|
|
|
return r
|
|
|
|
end
|
|
|
|
|
2021-07-05 22:29:30 +00:00
|
|
|
--check if a given string contains another
|
|
|
|
--(without garbage)
|
|
|
|
function stringx.contains(haystack, needle)
|
|
|
|
for i = 1, #haystack - #needle + 1 do
|
|
|
|
local found = true
|
|
|
|
for j = 1, #needle do
|
|
|
|
if haystack:byte(i + j - 1) ~= needle:byte(j) then
|
|
|
|
found = false
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if found then
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
|
2021-03-02 00:47:28 +00:00
|
|
|
--check if a given string starts with another
|
|
|
|
--(without garbage)
|
Add comments for things that surprised me
As a new user, there were things I was skeptical about and after digging
in, these were my conclusions.
Compared to the simple and obvious lua wiki solutions, batteries' string
functions are slightly faster. GC is the same.
Test
local str = "hello world"
local fn = function()
local x = 0
if stringx.ends_with(str, "h") then
x = x + 1
end
if stringx.ends_with(str, "helll") then
x = x + 1
end
if stringx.ends_with(str, "helicopter") then
x = x + 1
end
end
local pretty = require "inspect"
print("stringx =", pretty({
time_taken = {measure.time_taken(fn, 10000)},
memory_taken = {measure.memory_taken(fn, 10000)}
}))
local function starts_with(str, prefix)
return str:find(prefix, 1, true) == 1
end
local function ends_with(str, ending)
return ending == "" or str:sub(-#ending) == ending
end
local fn = function()
local x = 0
if ends_with(str, "h") then
x = x + 1
end
if ends_with(str, "helll") then
x = x + 1
end
if ends_with(str, "helicopter") then
x = x + 1
end
end
print("find =", pretty({
time_taken = {measure.time_taken(fn, 10000)},
memory_taken = {measure.memory_taken(fn, 10000)}
}))
starts_with
===========
stringx = {
memory_taken = { 0, 0, 0 },
time_taken = { 1.5098012518138e-007, 9.988434612751e-008, 2.1699932403862e-005 }
}
find = {
memory_taken = { 0, 0, 0 },
time_taken = { 2.7349997544661e-007, 1.9988510757685e-007, 9.1999536380172e-006 }
}
ends_with
=========
stringx = {
memory_taken = { 0, 0, 0 },
time_taken = { 9.0479978825897e-008, 0, 2.5199959054589e-005 }
}
find = {
memory_taken = { 0, 0, 0 },
time_taken = { 2.1833006758243e-007, 1.9988510757685e-007, 6.1000464484096e-006 }
}
2022-03-02 18:30:52 +00:00
|
|
|
--Using loops is actually faster than string.find!
|
2021-03-02 00:47:28 +00:00
|
|
|
function stringx.starts_with(s, prefix)
|
|
|
|
for i = 1, #prefix do
|
|
|
|
if s:byte(i) ~= prefix:byte(i) then
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
2021-03-02 00:45:19 +00:00
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--check if a given string ends with another
|
|
|
|
--(without garbage)
|
|
|
|
function stringx.ends_with(s, suffix)
|
|
|
|
local len = #s
|
|
|
|
local suffix_len = #suffix
|
|
|
|
for i = 0, suffix_len - 1 do
|
|
|
|
if s:byte(len - i) ~= suffix:byte(suffix_len - i) then
|
2021-04-07 16:59:42 +00:00
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
2022-10-24 05:08:39 +00:00
|
|
|
--split elements by delimiter and trim the results, discarding empties
|
|
|
|
--useful for hand-entered "permissive" data
|
|
|
|
-- "a,b, c, " -> {"a", "b", "c"}
|
|
|
|
function stringx.split_and_trim(s, delim)
|
|
|
|
s = stringx.split(s, delim)
|
|
|
|
for i = #s, 1, -1 do
|
|
|
|
local v = stringx.trim(s[i])
|
|
|
|
if v == "" then
|
|
|
|
table.remove(s, i)
|
|
|
|
else
|
|
|
|
s[i] = v
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
2023-02-12 22:00:04 +00:00
|
|
|
--titlizes a string
|
|
|
|
--"quick brown fox" becomes "Quick Brown Fox"
|
|
|
|
function stringx.title_case(s)
|
|
|
|
s = s:gsub("%s%l", string.upper)
|
|
|
|
s = s:gsub("^%l", string.upper)
|
|
|
|
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
2020-04-17 00:35:00 +00:00
|
|
|
return stringx
|