2020-04-17 00:35:00 +00:00
|
|
|
--[[
|
|
|
|
extra string routines
|
|
|
|
]]
|
|
|
|
|
2021-07-05 06:12:16 +00:00
|
|
|
local path = (...):gsub("stringx", "")
|
2020-05-19 02:03:45 +00:00
|
|
|
local assert = require(path .. "assert")
|
2021-07-05 06:12:16 +00:00
|
|
|
local pretty = require(path .. "pretty")
|
2020-05-19 02:03:45 +00:00
|
|
|
|
2020-04-17 00:35:00 +00:00
|
|
|
local stringx = setmetatable({}, {
|
|
|
|
__index = string
|
|
|
|
})
|
|
|
|
|
|
|
|
--split a string on a delimiter into an ordered table
|
2021-12-24 18:03:36 +00:00
|
|
|
function stringx.split(self, delim, max_split)
|
2021-07-05 22:29:13 +00:00
|
|
|
delim = delim or ""
|
2021-12-24 18:03:36 +00:00
|
|
|
max_split = max_split ~= nil and max_split or math.huge
|
|
|
|
|
2020-05-19 02:03:45 +00:00
|
|
|
assert:type(self, "string", "stringx.split - self", 1)
|
|
|
|
assert:type(delim, "string", "stringx.split - delim", 1)
|
2021-12-24 18:03:36 +00:00
|
|
|
assert:type(max_split, "number", "stringx.split - max_split", 1)
|
2020-05-19 02:03:45 +00:00
|
|
|
|
2021-12-24 18:19:12 +00:00
|
|
|
if max_split then
|
2021-12-24 18:23:37 +00:00
|
|
|
assert(max_split > 0, "max_split must be non-zero and positive!")
|
2021-12-24 18:19:38 +00:00
|
|
|
end
|
2021-12-24 18:19:12 +00:00
|
|
|
|
2020-04-17 00:45:15 +00:00
|
|
|
--we try to create as little garbage as possible!
|
|
|
|
--only one table to contain the result, plus the split strings.
|
|
|
|
--so we do two passes, and work with the bytes underlying the string
|
|
|
|
--partly because string.find is not compiled on older luajit :)
|
|
|
|
local res = {}
|
2020-04-17 00:35:00 +00:00
|
|
|
local length = self:len()
|
|
|
|
--
|
|
|
|
local delim_length = delim:len()
|
2020-04-17 00:45:15 +00:00
|
|
|
--empty delim? split to individual characters
|
|
|
|
if delim_length == 0 then
|
|
|
|
for i = 1, length do
|
|
|
|
table.insert(res, self:sub(i, i))
|
|
|
|
end
|
|
|
|
return res
|
|
|
|
end
|
2020-04-17 00:35:00 +00:00
|
|
|
local delim_start = delim:byte(1)
|
2020-04-17 00:45:15 +00:00
|
|
|
--pass 1
|
|
|
|
--collect split sites
|
2020-04-17 00:35:00 +00:00
|
|
|
local i = 1
|
|
|
|
while i <= length do
|
|
|
|
--scan for delimiter
|
|
|
|
if self:byte(i) == delim_start then
|
|
|
|
local has_whole_delim = true
|
|
|
|
for j = 2, delim_length do
|
|
|
|
if self:byte(i + j - 1) ~= delim:byte(j) then
|
|
|
|
has_whole_delim = false
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if has_whole_delim then
|
2021-12-24 18:03:36 +00:00
|
|
|
if #res < max_split then
|
|
|
|
table.insert(res, i)
|
|
|
|
else
|
|
|
|
break
|
|
|
|
end
|
2020-04-17 00:35:00 +00:00
|
|
|
end
|
|
|
|
--iterate forward
|
|
|
|
i = i + delim_length
|
|
|
|
else
|
|
|
|
--iterate forward
|
|
|
|
i = i + 1
|
|
|
|
end
|
|
|
|
end
|
2020-04-17 00:45:15 +00:00
|
|
|
--pass 2
|
|
|
|
--collect substrings
|
2020-04-17 00:35:00 +00:00
|
|
|
i = 1
|
|
|
|
for si, j in ipairs(res) do
|
|
|
|
res[si] = self:sub(i, j-1)
|
|
|
|
i = j + delim_length
|
|
|
|
end
|
|
|
|
--add the final section
|
|
|
|
table.insert(res, self:sub(i, -1))
|
|
|
|
--return the collection
|
|
|
|
return res
|
|
|
|
end
|
|
|
|
|
2021-07-05 06:12:16 +00:00
|
|
|
stringx.pretty = pretty.string
|
2020-04-17 00:45:15 +00:00
|
|
|
|
2020-11-12 05:36:06 +00:00
|
|
|
--(generate a map of whitespace byte values)
|
|
|
|
local _whitespace_bytes = {}
|
|
|
|
do
|
|
|
|
local _whitespace = " \t\n\r"
|
|
|
|
for i = 1, _whitespace:len() do
|
|
|
|
_whitespace_bytes[_whitespace:byte(i)] = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
--trim all whitespace off the head and tail of a string
|
|
|
|
-- specifically trims space, tab, newline, and carriage return characters
|
|
|
|
-- ignores form feeds, vertical tabs, and backspaces
|
|
|
|
--
|
|
|
|
-- only generates one string of garbage in the case there's actually space to trim
|
|
|
|
function stringx.trim(s)
|
|
|
|
--cache
|
|
|
|
local len = s:len()
|
|
|
|
|
|
|
|
--we search for the head and tail of the string iteratively
|
|
|
|
--we could fuse these loops, but two separate loops is a lot easier to follow
|
|
|
|
--and branches less as well.
|
|
|
|
local head = 0
|
|
|
|
for i = 1, len do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
head = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
local tail = 0
|
|
|
|
for i = len, 1, -1 do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
tail = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
--overlapping ranges means no content
|
|
|
|
if head > tail then
|
|
|
|
return ""
|
|
|
|
end
|
|
|
|
--limit ranges means no trim
|
|
|
|
if head == 1 and tail == len then
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
|
|
|
--pull out the content
|
|
|
|
return s:sub(head, tail)
|
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--trim the start of a string
|
2021-04-06 02:43:35 +00:00
|
|
|
function stringx.ltrim(s)
|
2021-04-12 15:38:03 +00:00
|
|
|
local head = 1
|
|
|
|
for i = 1, #s do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
head = i
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
2021-04-14 07:06:31 +00:00
|
|
|
if head == 1 then
|
|
|
|
return s
|
|
|
|
end
|
2021-04-12 15:38:03 +00:00
|
|
|
return s:sub(head)
|
2021-04-06 02:43:35 +00:00
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--trim the end of a string
|
2021-04-06 02:43:35 +00:00
|
|
|
function stringx.rtrim(s)
|
|
|
|
local tail = #s
|
2021-04-12 15:38:03 +00:00
|
|
|
|
|
|
|
for i = #s, 1, -1 do
|
|
|
|
if not _whitespace_bytes[s:byte(i)] then
|
|
|
|
tail = i
|
|
|
|
break
|
|
|
|
end
|
2021-04-06 02:43:35 +00:00
|
|
|
end
|
2021-04-12 15:38:03 +00:00
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
if tail == #s then
|
|
|
|
return s
|
|
|
|
end
|
|
|
|
|
2021-04-06 02:43:35 +00:00
|
|
|
return s:sub(1, tail)
|
|
|
|
end
|
|
|
|
|
2020-11-12 03:32:56 +00:00
|
|
|
function stringx.deindent(s, keep_trailing_empty)
|
|
|
|
--detect windows or unix newlines
|
|
|
|
local windows_newlines = s:find("\r\n", nil, true)
|
|
|
|
local newline = windows_newlines and "\r\n" or "\n"
|
|
|
|
--split along newlines
|
|
|
|
local lines = stringx.split(s, newline)
|
|
|
|
--detect and strip any leading blank lines
|
|
|
|
local leading_newline = false
|
|
|
|
while lines[1] == "" do
|
|
|
|
leading_newline = true
|
|
|
|
table.remove(lines, 1)
|
|
|
|
end
|
|
|
|
|
|
|
|
--nothing to do
|
|
|
|
if #lines == 0 then
|
|
|
|
return ""
|
|
|
|
end
|
|
|
|
|
|
|
|
--detect indent
|
|
|
|
local _, _, indent = lines[1]:find("^([ \t]*)")
|
|
|
|
local indent_len = indent and indent:len() or 0
|
|
|
|
|
|
|
|
--not indented
|
|
|
|
if indent_len == 0 then
|
|
|
|
return table.concat(lines, newline)
|
|
|
|
end
|
|
|
|
|
|
|
|
--de-indent the lines
|
|
|
|
local res = {}
|
|
|
|
for _, line in ipairs(lines) do
|
|
|
|
local line_start = line:sub(1, indent:len())
|
|
|
|
local start_len = line_start:len()
|
|
|
|
if
|
|
|
|
line_start == indent
|
|
|
|
or (
|
|
|
|
start_len < indent_len
|
|
|
|
and line_start == indent:sub(1, start_len)
|
|
|
|
)
|
|
|
|
then
|
|
|
|
line = line:sub(start_len + 1)
|
|
|
|
end
|
|
|
|
table.insert(res, line)
|
|
|
|
end
|
|
|
|
|
|
|
|
--should
|
|
|
|
if not keep_trailing_empty then
|
|
|
|
if res[#res] == "" then
|
|
|
|
table.remove(res)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return table.concat(res, newline)
|
|
|
|
end
|
|
|
|
|
|
|
|
--alias
|
|
|
|
stringx.dedent = stringx.deindent
|
|
|
|
|
2020-11-19 06:18:38 +00:00
|
|
|
--apply a template to a string
|
|
|
|
--supports $template style values, given as a table or function
|
|
|
|
-- ie ("hello $name"):format({name = "tom"}) == "hello tom"
|
|
|
|
function stringx.apply_template(s, sub)
|
2021-07-13 06:47:19 +00:00
|
|
|
local r = s:gsub("%$([%w_]+)", sub)
|
2020-11-19 06:18:38 +00:00
|
|
|
return r
|
|
|
|
end
|
|
|
|
|
2021-07-05 22:29:30 +00:00
|
|
|
--check if a given string contains another
|
|
|
|
--(without garbage)
|
|
|
|
function stringx.contains(haystack, needle)
|
|
|
|
for i = 1, #haystack - #needle + 1 do
|
|
|
|
local found = true
|
|
|
|
for j = 1, #needle do
|
|
|
|
if haystack:byte(i + j - 1) ~= needle:byte(j) then
|
|
|
|
found = false
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if found then
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
|
2021-03-02 00:47:28 +00:00
|
|
|
--check if a given string starts with another
|
|
|
|
--(without garbage)
|
|
|
|
function stringx.starts_with(s, prefix)
|
|
|
|
for i = 1, #prefix do
|
|
|
|
if s:byte(i) ~= prefix:byte(i) then
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
2021-03-02 00:45:19 +00:00
|
|
|
end
|
|
|
|
|
2021-04-14 07:06:31 +00:00
|
|
|
--check if a given string ends with another
|
|
|
|
--(without garbage)
|
|
|
|
function stringx.ends_with(s, suffix)
|
|
|
|
local len = #s
|
|
|
|
local suffix_len = #suffix
|
|
|
|
for i = 0, suffix_len - 1 do
|
|
|
|
if s:byte(len - i) ~= suffix:byte(suffix_len - i) then
|
2021-04-07 16:59:42 +00:00
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
2020-04-17 00:35:00 +00:00
|
|
|
return stringx
|