batteries/stringx.lua

264 lines
5.4 KiB
Lua
Raw Normal View History

--[[
extra string routines
]]
local path = (...):gsub("stringx", "")
local assert = require(path .. "assert")
local pretty = require(path .. "pretty")
local stringx = setmetatable({}, {
__index = string
})
--split a string on a delimiter into an ordered table
function stringx.split(self, delim)
2021-07-05 22:29:13 +00:00
delim = delim or ""
assert:type(self, "string", "stringx.split - self", 1)
assert:type(delim, "string", "stringx.split - delim", 1)
--we try to create as little garbage as possible!
--only one table to contain the result, plus the split strings.
--so we do two passes, and work with the bytes underlying the string
--partly because string.find is not compiled on older luajit :)
local res = {}
local length = self:len()
--
local delim_length = delim:len()
--empty delim? split to individual characters
if delim_length == 0 then
for i = 1, length do
table.insert(res, self:sub(i, i))
end
return res
end
local delim_start = delim:byte(1)
--pass 1
--collect split sites
local i = 1
while i <= length do
--scan for delimiter
if self:byte(i) == delim_start then
local has_whole_delim = true
for j = 2, delim_length do
if self:byte(i + j - 1) ~= delim:byte(j) then
has_whole_delim = false
break
end
end
if has_whole_delim then
table.insert(res, i)
end
--iterate forward
i = i + delim_length
else
--iterate forward
i = i + 1
end
end
--pass 2
--collect substrings
i = 1
for si, j in ipairs(res) do
res[si] = self:sub(i, j-1)
i = j + delim_length
end
--add the final section
table.insert(res, self:sub(i, -1))
--return the collection
return res
end
stringx.pretty = pretty.string
2020-11-12 05:36:06 +00:00
--(generate a map of whitespace byte values)
local _whitespace_bytes = {}
do
local _whitespace = " \t\n\r"
for i = 1, _whitespace:len() do
_whitespace_bytes[_whitespace:byte(i)] = true
end
end
--trim all whitespace off the head and tail of a string
-- specifically trims space, tab, newline, and carriage return characters
-- ignores form feeds, vertical tabs, and backspaces
--
-- only generates one string of garbage in the case there's actually space to trim
function stringx.trim(s)
--cache
local len = s:len()
--we search for the head and tail of the string iteratively
--we could fuse these loops, but two separate loops is a lot easier to follow
--and branches less as well.
local head = 0
for i = 1, len do
if not _whitespace_bytes[s:byte(i)] then
head = i
break
end
end
local tail = 0
for i = len, 1, -1 do
if not _whitespace_bytes[s:byte(i)] then
tail = i
break
end
end
--overlapping ranges means no content
if head > tail then
return ""
end
--limit ranges means no trim
if head == 1 and tail == len then
return s
end
--pull out the content
return s:sub(head, tail)
end
--trim the start of a string
function stringx.ltrim(s)
local head = 1
for i = 1, #s do
if not _whitespace_bytes[s:byte(i)] then
head = i
break
end
end
if head == 1 then
return s
end
return s:sub(head)
end
--trim the end of a string
function stringx.rtrim(s)
local tail = #s
for i = #s, 1, -1 do
if not _whitespace_bytes[s:byte(i)] then
tail = i
break
end
end
if tail == #s then
return s
end
return s:sub(1, tail)
end
function stringx.deindent(s, keep_trailing_empty)
--detect windows or unix newlines
local windows_newlines = s:find("\r\n", nil, true)
local newline = windows_newlines and "\r\n" or "\n"
--split along newlines
local lines = stringx.split(s, newline)
--detect and strip any leading blank lines
local leading_newline = false
while lines[1] == "" do
leading_newline = true
table.remove(lines, 1)
end
--nothing to do
if #lines == 0 then
return ""
end
--detect indent
local _, _, indent = lines[1]:find("^([ \t]*)")
local indent_len = indent and indent:len() or 0
--not indented
if indent_len == 0 then
return table.concat(lines, newline)
end
--de-indent the lines
local res = {}
for _, line in ipairs(lines) do
local line_start = line:sub(1, indent:len())
local start_len = line_start:len()
if
line_start == indent
or (
start_len < indent_len
and line_start == indent:sub(1, start_len)
)
then
line = line:sub(start_len + 1)
end
table.insert(res, line)
end
--should
if not keep_trailing_empty then
if res[#res] == "" then
table.remove(res)
end
end
return table.concat(res, newline)
end
--alias
stringx.dedent = stringx.deindent
--apply a template to a string
--supports $template style values, given as a table or function
-- ie ("hello $name"):format({name = "tom"}) == "hello tom"
function stringx.apply_template(s, sub)
local r = s:gsub("%$(%w+)", sub)
return r
end
2021-07-05 22:29:30 +00:00
--check if a given string contains another
--(without garbage)
function stringx.contains(haystack, needle)
for i = 1, #haystack - #needle + 1 do
local found = true
for j = 1, #needle do
if haystack:byte(i + j - 1) ~= needle:byte(j) then
found = false
break
end
end
if found then
return true
end
end
return false
end
--check if a given string starts with another
--(without garbage)
function stringx.starts_with(s, prefix)
for i = 1, #prefix do
if s:byte(i) ~= prefix:byte(i) then
return false
end
end
return true
2021-03-02 00:45:19 +00:00
end
--check if a given string ends with another
--(without garbage)
function stringx.ends_with(s, suffix)
local len = #s
local suffix_len = #suffix
for i = 0, suffix_len - 1 do
if s:byte(len - i) ~= suffix:byte(suffix_len - i) then
2021-04-07 16:59:42 +00:00
return false
end
end
return true
end
return stringx