501 lines
13 KiB
Julia
501 lines
13 KiB
Julia
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||
|
||
# starts with and ends with predicates
|
||
|
||
"""
|
||
startswith(s::AbstractString, prefix::AbstractString)
|
||
|
||
Returns `true` if `s` starts with `prefix`. If `prefix` is a vector or set
|
||
of characters, tests whether the first character of `s` belongs to that set.
|
||
|
||
See also [`endswith`](@ref).
|
||
|
||
```jldoctest
|
||
julia> startswith("JuliaLang", "Julia")
|
||
true
|
||
```
|
||
"""
|
||
function startswith(a::AbstractString, b::AbstractString)
|
||
i = start(a)
|
||
j = start(b)
|
||
while !done(a,i) && !done(b,i)
|
||
c, i = next(a,i)
|
||
d, j = next(b,j)
|
||
(c != d) && (return false)
|
||
end
|
||
done(b,i)
|
||
end
|
||
startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str) in chars
|
||
|
||
"""
|
||
endswith(s::AbstractString, suffix::AbstractString)
|
||
|
||
Returns `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
|
||
characters, tests whether the last character of `s` belongs to that set.
|
||
|
||
See also [`startswith`](@ref).
|
||
|
||
```jldoctest
|
||
julia> endswith("Sunday", "day")
|
||
true
|
||
```
|
||
"""
|
||
function endswith(a::AbstractString, b::AbstractString)
|
||
i = endof(a)
|
||
j = endof(b)
|
||
a1 = start(a)
|
||
b1 = start(b)
|
||
while a1 <= i && b1 <= j
|
||
c = a[i]
|
||
d = b[j]
|
||
(c != d) && (return false)
|
||
i = prevind(a,i)
|
||
j = prevind(b,j)
|
||
end
|
||
j < b1
|
||
end
|
||
endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars
|
||
|
||
startswith(a::String, b::String) =
|
||
(a.len >= b.len && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, b.len) == 0)
|
||
startswith(a::Vector{UInt8}, b::Vector{UInt8}) =
|
||
(length(a) >= length(b) && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0)
|
||
|
||
# TODO: fast endswith
|
||
|
||
"""
|
||
chop(s::AbstractString)
|
||
|
||
Remove the last character from `s`.
|
||
|
||
```jldoctest
|
||
julia> a = "March"
|
||
"March"
|
||
|
||
julia> chop(a)
|
||
"Marc"
|
||
```
|
||
"""
|
||
chop(s::AbstractString) = SubString(s, 1, endof(s)-1)
|
||
|
||
"""
|
||
chomp(s::AbstractString)
|
||
|
||
Remove a single trailing newline from a string.
|
||
|
||
```jldoctest
|
||
julia> chomp("Hello\\n")
|
||
"Hello"
|
||
```
|
||
"""
|
||
function chomp(s::AbstractString)
|
||
i = endof(s)
|
||
(i < 1 || s[i] != '\n') && (return SubString(s, 1, i))
|
||
j = prevind(s,i)
|
||
(j < 1 || s[j] != '\r') && (return SubString(s, 1, i-1))
|
||
return SubString(s, 1, j-1)
|
||
end
|
||
function chomp(s::String)
|
||
i = endof(s)
|
||
if i < 1 || codeunit(s,i) != 0x0a
|
||
SubString(s, 1, i)
|
||
elseif i < 2 || codeunit(s,i-1) != 0x0d
|
||
SubString(s, 1, i-1)
|
||
else
|
||
SubString(s, 1, i-2)
|
||
end
|
||
end
|
||
|
||
# NOTE: use with caution -- breaks the immutable string convention!
|
||
# TODO: this is hard to provide with the new representation
|
||
#function chomp!(s::String)
|
||
# if !isempty(s) && codeunit(s,s.len) == 0x0a
|
||
# n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2
|
||
# ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n)
|
||
# end
|
||
# return s
|
||
#end
|
||
chomp!(s::AbstractString) = chomp(s) # copying fallback for other string types
|
||
|
||
const _default_delims = [' ','\t','\n','\v','\f','\r']
|
||
|
||
"""
|
||
lstrip(s::AbstractString[, chars::Chars])
|
||
|
||
Return `s` with any leading whitespace and delimiters removed.
|
||
The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`,
|
||
`\\f`, and `\\r`.
|
||
If `chars` (a character, or vector or set of characters) is provided,
|
||
instead remove characters contained in it.
|
||
|
||
```jldoctest
|
||
julia> a = lpad("March", 20)
|
||
" March"
|
||
|
||
julia> lstrip(a)
|
||
"March"
|
||
```
|
||
"""
|
||
function lstrip(s::AbstractString, chars::Chars=_default_delims)
|
||
i = start(s)
|
||
while !done(s,i)
|
||
c, j = next(s,i)
|
||
if !(c in chars)
|
||
return s[i:end]
|
||
end
|
||
i = j
|
||
end
|
||
s[end+1:end]
|
||
end
|
||
|
||
"""
|
||
rstrip(s::AbstractString[, chars::Chars])
|
||
|
||
Return `s` with any trailing whitespace and delimiters removed.
|
||
The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`,
|
||
`\\f`, and `\\r`.
|
||
If `chars` (a character, or vector or set of characters) is provided,
|
||
instead remove characters contained in it.
|
||
|
||
```jldoctest
|
||
julia> a = rpad("March", 20)
|
||
"March "
|
||
|
||
julia> rstrip(a)
|
||
"March"
|
||
```
|
||
"""
|
||
function rstrip(s::AbstractString, chars::Chars=_default_delims)
|
||
r = RevString(s)
|
||
i = start(r)
|
||
while !done(r,i)
|
||
c, j = next(r,i)
|
||
if !(c in chars)
|
||
return s[1:end-i+1]
|
||
end
|
||
i = j
|
||
end
|
||
s[1:0]
|
||
end
|
||
|
||
"""
|
||
strip(s::AbstractString, [chars::Chars])
|
||
|
||
Return `s` with any leading and trailing whitespace removed.
|
||
If `chars` (a character, or vector or set of characters) is provided,
|
||
instead remove characters contained in it.
|
||
|
||
```jldoctest
|
||
julia> strip("{3, 5}\\n", ['{', '}', '\\n'])
|
||
"3, 5"
|
||
```
|
||
"""
|
||
strip(s::AbstractString) = lstrip(rstrip(s))
|
||
strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
|
||
|
||
## string padding functions ##
|
||
|
||
function lpad(s::AbstractString, n::Integer, p::AbstractString=" ")
|
||
m = n - strwidth(s)
|
||
(m <= 0) && (return s)
|
||
l = strwidth(p)
|
||
if l==1
|
||
return string(p^m, s)
|
||
end
|
||
q = div(m,l)
|
||
r = m - q*l
|
||
i = r != 0 ? chr2ind(p, r) : -1
|
||
string(p^q, p[1:i], s)
|
||
end
|
||
|
||
function rpad(s::AbstractString, n::Integer, p::AbstractString=" ")
|
||
m = n - strwidth(s)
|
||
(m <= 0) && (return s)
|
||
l = strwidth(p)
|
||
if l==1
|
||
return string(s, p^m)
|
||
end
|
||
q = div(m,l)
|
||
r = m - q*l
|
||
i = r != 0 ? chr2ind(p, r) : -1
|
||
string(s, p^q, p[1:i])
|
||
end
|
||
|
||
"""
|
||
lpad(s, n::Integer, p::AbstractString=" ")
|
||
|
||
Make a string at least `n` columns wide when printed by padding `s` on the left
|
||
with copies of `p`.
|
||
|
||
```jldoctest
|
||
julia> lpad("March",10)
|
||
" March"
|
||
```
|
||
"""
|
||
lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p))
|
||
|
||
"""
|
||
rpad(s, n::Integer, p::AbstractString=" ")
|
||
|
||
Make a string at least `n` columns wide when printed by padding `s` on the right
|
||
with copies of `p`.
|
||
|
||
```jldoctest
|
||
julia> rpad("March",20)
|
||
"March "
|
||
```
|
||
"""
|
||
rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p))
|
||
cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p)
|
||
|
||
# splitter can be a Char, Vector{Char}, AbstractString, Regex, ...
|
||
# any splitter that provides search(s::AbstractString, splitter)
|
||
split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} =
|
||
_split(str, splitter, limit, keep, T[])
|
||
|
||
"""
|
||
split(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true)
|
||
|
||
Return an array of substrings by splitting the given string on occurrences of the given
|
||
character delimiters, which may be specified in any of the formats allowed by `search`'s
|
||
second argument (i.e. a single character, collection of characters, string, or regular
|
||
expression). If `chars` is omitted, it defaults to the set of all space characters, and
|
||
`keep` is taken to be `false`. The two keyword arguments are optional: they are a
|
||
maximum size for the result and a flag determining whether empty fields should be kept in
|
||
the result.
|
||
|
||
```jldoctest
|
||
julia> a = "Ma.rch"
|
||
"Ma.rch"
|
||
|
||
julia> split(a,".")
|
||
2-element Array{SubString{String},1}:
|
||
"Ma"
|
||
"rch"
|
||
```
|
||
"""
|
||
split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} =
|
||
_split(str, splitter, limit, keep, SubString{T}[])
|
||
function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array)
|
||
i = start(str)
|
||
n = endof(str)
|
||
r = search(str,splitter,i)
|
||
j, k = first(r), nextind(str,last(r))
|
||
while 0 < j <= n && length(strs) != limit-1
|
||
if i < k
|
||
if keep_empty || i < j
|
||
push!(strs, SubString(str,i,prevind(str,j)))
|
||
end
|
||
i = k
|
||
end
|
||
(k <= j) && (k = nextind(str,j))
|
||
r = search(str,splitter,k)
|
||
j, k = first(r), nextind(str,last(r))
|
||
end
|
||
if keep_empty || !done(str,i)
|
||
push!(strs, SubString(str,i))
|
||
end
|
||
return strs
|
||
end
|
||
|
||
# a bit oddball, but standard behavior in Perl, Ruby & Python:
|
||
split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false)
|
||
|
||
rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} =
|
||
_rsplit(str, splitter, limit, keep, T[])
|
||
|
||
"""
|
||
rsplit(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true)
|
||
|
||
Similar to [`split`](@ref), but starting from the end of the string.
|
||
|
||
```jldoctest
|
||
julia> a = "M.a.r.c.h"
|
||
"M.a.r.c.h"
|
||
|
||
julia> rsplit(a,".")
|
||
5-element Array{SubString{String},1}:
|
||
"M"
|
||
"a"
|
||
"r"
|
||
"c"
|
||
"h"
|
||
|
||
julia> rsplit(a,".";limit=1)
|
||
1-element Array{SubString{String},1}:
|
||
"M.a.r.c.h"
|
||
|
||
julia> rsplit(a,".";limit=2)
|
||
2-element Array{SubString{String},1}:
|
||
"M.a.r.c"
|
||
"h"
|
||
```
|
||
"""
|
||
rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} =
|
||
_rsplit(str, splitter, limit, keep, SubString{T}[])
|
||
function _rsplit(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array)
|
||
i = start(str)
|
||
n = endof(str)
|
||
r = rsearch(str,splitter)
|
||
j = first(r)-1
|
||
k = last(r)
|
||
while((0 <= j < n) && (length(strs) != limit-1))
|
||
if i <= k
|
||
(keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n))
|
||
n = j
|
||
end
|
||
(k <= j) && (j = prevind(str,j))
|
||
r = rsearch(str,splitter,j)
|
||
j = first(r)-1
|
||
k = last(r)
|
||
end
|
||
(keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n))
|
||
return strs
|
||
end
|
||
#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
|
||
|
||
_replace(io, repl, str, r, pattern) = print(io, repl)
|
||
_replace(io, repl::Function, str, r, pattern) =
|
||
print(io, repl(SubString(str, first(r), last(r))))
|
||
|
||
function replace(str::String, pattern, repl, limit::Integer)
|
||
n = 1
|
||
e = endof(str)
|
||
i = a = start(str)
|
||
r = search(str,pattern,i)
|
||
j, k = first(r), last(r)
|
||
out = IOBuffer(StringVector(floor(Int, 1.2sizeof(str))), true, true)
|
||
out.size = 0
|
||
out.ptr = 1
|
||
while j != 0
|
||
if i == a || i <= k
|
||
unsafe_write(out, pointer(str, i), UInt(j-i))
|
||
_replace(out, repl, str, r, pattern)
|
||
end
|
||
if k<j
|
||
i = j
|
||
k = nextind(str, j)
|
||
else
|
||
i = k = nextind(str, k)
|
||
end
|
||
if j > e
|
||
break
|
||
end
|
||
r = search(str,pattern,k)
|
||
j, k = first(r), last(r)
|
||
n == limit && break
|
||
n += 1
|
||
end
|
||
write(out, SubString(str,i))
|
||
String(take!(out))
|
||
end
|
||
|
||
"""
|
||
replace(string::AbstractString, pat, r[, n::Integer=0])
|
||
|
||
Search for the given pattern `pat`, and replace each occurrence with `r`. If `n` is
|
||
provided, replace at most `n` occurrences. As with search, the second argument may be a
|
||
single character, a vector or a set of characters, a string, or a regular expression. If `r`
|
||
is a function, each occurrence is replaced with `r(s)` where `s` is the matched substring.
|
||
If `pat` is a regular expression and `r` is a `SubstitutionString`, then capture group
|
||
references in `r` are replaced with the corresponding matched text.
|
||
"""
|
||
replace(s::AbstractString, pat, f, n::Integer) = replace(String(s), pat, f, n)
|
||
replace(s::AbstractString, pat, r) = replace(s, pat, r, 0)
|
||
|
||
# hex <-> bytes conversion
|
||
|
||
"""
|
||
hex2bytes(s::AbstractString)
|
||
|
||
Convert an arbitrarily long hexadecimal string to its binary representation. Returns an
|
||
`Array{UInt8,1}`, i.e. an array of bytes.
|
||
|
||
```jldoctest
|
||
julia> a = hex(12345)
|
||
"3039"
|
||
|
||
julia> hex2bytes(a)
|
||
2-element Array{UInt8,1}:
|
||
0x30
|
||
0x39
|
||
```
|
||
"""
|
||
function hex2bytes(s::AbstractString)
|
||
a = zeros(UInt8, div(endof(s), 2))
|
||
i, j = start(s), 0
|
||
while !done(s, i)
|
||
c, i = next(s, i)
|
||
n = '0' <= c <= '9' ? c - '0' :
|
||
'a' <= c <= 'f' ? c - 'a' + 10 :
|
||
'A' <= c <= 'F' ? c - 'A' + 10 :
|
||
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
|
||
done(s, i) &&
|
||
throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))"))
|
||
c, i = next(s, i)
|
||
n = '0' <= c <= '9' ? n << 4 + c - '0' :
|
||
'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 :
|
||
'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 :
|
||
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
|
||
a[j += 1] = n
|
||
end
|
||
resize!(a, j)
|
||
return a
|
||
end
|
||
|
||
"""
|
||
bytes2hex(bin_arr::Array{UInt8, 1}) -> String
|
||
|
||
Convert an array of bytes to its hexadecimal representation.
|
||
All characters are in lower-case.
|
||
|
||
```jldoctest
|
||
julia> a = hex(12345)
|
||
"3039"
|
||
|
||
julia> b = hex2bytes(a)
|
||
2-element Array{UInt8,1}:
|
||
0x30
|
||
0x39
|
||
|
||
julia> bytes2hex(b)
|
||
"3039"
|
||
```
|
||
"""
|
||
function bytes2hex(a::AbstractArray{UInt8})
|
||
b = Vector{UInt8}(2*length(a))
|
||
i = 0
|
||
for x in a
|
||
b[i += 1] = hex_chars[1 + x >> 4]
|
||
b[i += 1] = hex_chars[1 + x & 0xf]
|
||
end
|
||
return String(b)
|
||
end
|
||
|
||
# check for pure ASCII-ness
|
||
|
||
function ascii(s::String)
|
||
for (i, b) in enumerate(Vector{UInt8}(s))
|
||
b < 0x80 || throw(ArgumentError("invalid ASCII at index $i in $(repr(s))"))
|
||
end
|
||
return s
|
||
end
|
||
|
||
"""
|
||
ascii(s::AbstractString)
|
||
|
||
Convert a string to `String` type and check that it contains only ASCII data, otherwise
|
||
throwing an `ArgumentError` indicating the position of the first non-ASCII byte.
|
||
|
||
```jldoctest
|
||
julia> ascii("abcdeγfgh")
|
||
ERROR: ArgumentError: invalid ASCII at index 6 in "abcdeγfgh"
|
||
Stacktrace:
|
||
[1] ascii(::String) at ./strings/util.jl:479
|
||
|
||
julia> ascii("abcdefgh")
|
||
"abcdefgh"
|
||
```
|
||
"""
|
||
ascii(x::AbstractString) = ascii(convert(String, x))
|