Add: julia-0.6.2
Former-commit-id: ccc667cf67d569f3fb3df39aa57c2134755a7551
This commit is contained in:
500
julia-0.6.2/share/julia/base/strings/util.jl
Normal file
500
julia-0.6.2/share/julia/base/strings/util.jl
Normal file
@@ -0,0 +1,500 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
# starts with and ends with predicates
|
||||
|
||||
"""
|
||||
startswith(s::AbstractString, prefix::AbstractString)
|
||||
|
||||
Returns `true` if `s` starts with `prefix`. If `prefix` is a vector or set
|
||||
of characters, tests whether the first character of `s` belongs to that set.
|
||||
|
||||
See also [`endswith`](@ref).
|
||||
|
||||
```jldoctest
|
||||
julia> startswith("JuliaLang", "Julia")
|
||||
true
|
||||
```
|
||||
"""
|
||||
function startswith(a::AbstractString, b::AbstractString)
|
||||
i = start(a)
|
||||
j = start(b)
|
||||
while !done(a,i) && !done(b,i)
|
||||
c, i = next(a,i)
|
||||
d, j = next(b,j)
|
||||
(c != d) && (return false)
|
||||
end
|
||||
done(b,i)
|
||||
end
|
||||
startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str) in chars
|
||||
|
||||
"""
|
||||
endswith(s::AbstractString, suffix::AbstractString)
|
||||
|
||||
Returns `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
|
||||
characters, tests whether the last character of `s` belongs to that set.
|
||||
|
||||
See also [`startswith`](@ref).
|
||||
|
||||
```jldoctest
|
||||
julia> endswith("Sunday", "day")
|
||||
true
|
||||
```
|
||||
"""
|
||||
function endswith(a::AbstractString, b::AbstractString)
|
||||
i = endof(a)
|
||||
j = endof(b)
|
||||
a1 = start(a)
|
||||
b1 = start(b)
|
||||
while a1 <= i && b1 <= j
|
||||
c = a[i]
|
||||
d = b[j]
|
||||
(c != d) && (return false)
|
||||
i = prevind(a,i)
|
||||
j = prevind(b,j)
|
||||
end
|
||||
j < b1
|
||||
end
|
||||
endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars
|
||||
|
||||
startswith(a::String, b::String) =
|
||||
(a.len >= b.len && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, b.len) == 0)
|
||||
startswith(a::Vector{UInt8}, b::Vector{UInt8}) =
|
||||
(length(a) >= length(b) && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0)
|
||||
|
||||
# TODO: fast endswith
|
||||
|
||||
"""
|
||||
chop(s::AbstractString)
|
||||
|
||||
Remove the last character from `s`.
|
||||
|
||||
```jldoctest
|
||||
julia> a = "March"
|
||||
"March"
|
||||
|
||||
julia> chop(a)
|
||||
"Marc"
|
||||
```
|
||||
"""
|
||||
chop(s::AbstractString) = SubString(s, 1, endof(s)-1)
|
||||
|
||||
"""
|
||||
chomp(s::AbstractString)
|
||||
|
||||
Remove a single trailing newline from a string.
|
||||
|
||||
```jldoctest
|
||||
julia> chomp("Hello\\n")
|
||||
"Hello"
|
||||
```
|
||||
"""
|
||||
function chomp(s::AbstractString)
|
||||
i = endof(s)
|
||||
(i < 1 || s[i] != '\n') && (return SubString(s, 1, i))
|
||||
j = prevind(s,i)
|
||||
(j < 1 || s[j] != '\r') && (return SubString(s, 1, i-1))
|
||||
return SubString(s, 1, j-1)
|
||||
end
|
||||
function chomp(s::String)
|
||||
i = endof(s)
|
||||
if i < 1 || codeunit(s,i) != 0x0a
|
||||
SubString(s, 1, i)
|
||||
elseif i < 2 || codeunit(s,i-1) != 0x0d
|
||||
SubString(s, 1, i-1)
|
||||
else
|
||||
SubString(s, 1, i-2)
|
||||
end
|
||||
end
|
||||
|
||||
# NOTE: use with caution -- breaks the immutable string convention!
|
||||
# TODO: this is hard to provide with the new representation
|
||||
#function chomp!(s::String)
|
||||
# if !isempty(s) && codeunit(s,s.len) == 0x0a
|
||||
# n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2
|
||||
# ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n)
|
||||
# end
|
||||
# return s
|
||||
#end
|
||||
chomp!(s::AbstractString) = chomp(s) # copying fallback for other string types
|
||||
|
||||
const _default_delims = [' ','\t','\n','\v','\f','\r']
|
||||
|
||||
"""
|
||||
lstrip(s::AbstractString[, chars::Chars])
|
||||
|
||||
Return `s` with any leading whitespace and delimiters removed.
|
||||
The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`,
|
||||
`\\f`, and `\\r`.
|
||||
If `chars` (a character, or vector or set of characters) is provided,
|
||||
instead remove characters contained in it.
|
||||
|
||||
```jldoctest
|
||||
julia> a = lpad("March", 20)
|
||||
" March"
|
||||
|
||||
julia> lstrip(a)
|
||||
"March"
|
||||
```
|
||||
"""
|
||||
function lstrip(s::AbstractString, chars::Chars=_default_delims)
|
||||
i = start(s)
|
||||
while !done(s,i)
|
||||
c, j = next(s,i)
|
||||
if !(c in chars)
|
||||
return s[i:end]
|
||||
end
|
||||
i = j
|
||||
end
|
||||
s[end+1:end]
|
||||
end
|
||||
|
||||
"""
|
||||
rstrip(s::AbstractString[, chars::Chars])
|
||||
|
||||
Return `s` with any trailing whitespace and delimiters removed.
|
||||
The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`,
|
||||
`\\f`, and `\\r`.
|
||||
If `chars` (a character, or vector or set of characters) is provided,
|
||||
instead remove characters contained in it.
|
||||
|
||||
```jldoctest
|
||||
julia> a = rpad("March", 20)
|
||||
"March "
|
||||
|
||||
julia> rstrip(a)
|
||||
"March"
|
||||
```
|
||||
"""
|
||||
function rstrip(s::AbstractString, chars::Chars=_default_delims)
|
||||
r = RevString(s)
|
||||
i = start(r)
|
||||
while !done(r,i)
|
||||
c, j = next(r,i)
|
||||
if !(c in chars)
|
||||
return s[1:end-i+1]
|
||||
end
|
||||
i = j
|
||||
end
|
||||
s[1:0]
|
||||
end
|
||||
|
||||
"""
|
||||
strip(s::AbstractString, [chars::Chars])
|
||||
|
||||
Return `s` with any leading and trailing whitespace removed.
|
||||
If `chars` (a character, or vector or set of characters) is provided,
|
||||
instead remove characters contained in it.
|
||||
|
||||
```jldoctest
|
||||
julia> strip("{3, 5}\\n", ['{', '}', '\\n'])
|
||||
"3, 5"
|
||||
```
|
||||
"""
|
||||
strip(s::AbstractString) = lstrip(rstrip(s))
|
||||
strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
|
||||
|
||||
## string padding functions ##
|
||||
|
||||
function lpad(s::AbstractString, n::Integer, p::AbstractString=" ")
|
||||
m = n - strwidth(s)
|
||||
(m <= 0) && (return s)
|
||||
l = strwidth(p)
|
||||
if l==1
|
||||
return string(p^m, s)
|
||||
end
|
||||
q = div(m,l)
|
||||
r = m - q*l
|
||||
i = r != 0 ? chr2ind(p, r) : -1
|
||||
string(p^q, p[1:i], s)
|
||||
end
|
||||
|
||||
function rpad(s::AbstractString, n::Integer, p::AbstractString=" ")
|
||||
m = n - strwidth(s)
|
||||
(m <= 0) && (return s)
|
||||
l = strwidth(p)
|
||||
if l==1
|
||||
return string(s, p^m)
|
||||
end
|
||||
q = div(m,l)
|
||||
r = m - q*l
|
||||
i = r != 0 ? chr2ind(p, r) : -1
|
||||
string(s, p^q, p[1:i])
|
||||
end
|
||||
|
||||
"""
|
||||
lpad(s, n::Integer, p::AbstractString=" ")
|
||||
|
||||
Make a string at least `n` columns wide when printed by padding `s` on the left
|
||||
with copies of `p`.
|
||||
|
||||
```jldoctest
|
||||
julia> lpad("March",10)
|
||||
" March"
|
||||
```
|
||||
"""
|
||||
lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p))
|
||||
|
||||
"""
|
||||
rpad(s, n::Integer, p::AbstractString=" ")
|
||||
|
||||
Make a string at least `n` columns wide when printed by padding `s` on the right
|
||||
with copies of `p`.
|
||||
|
||||
```jldoctest
|
||||
julia> rpad("March",20)
|
||||
"March "
|
||||
```
|
||||
"""
|
||||
rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p))
|
||||
cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p)
|
||||
|
||||
# splitter can be a Char, Vector{Char}, AbstractString, Regex, ...
|
||||
# any splitter that provides search(s::AbstractString, splitter)
|
||||
split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} =
|
||||
_split(str, splitter, limit, keep, T[])
|
||||
|
||||
"""
|
||||
split(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true)
|
||||
|
||||
Return an array of substrings by splitting the given string on occurrences of the given
|
||||
character delimiters, which may be specified in any of the formats allowed by `search`'s
|
||||
second argument (i.e. a single character, collection of characters, string, or regular
|
||||
expression). If `chars` is omitted, it defaults to the set of all space characters, and
|
||||
`keep` is taken to be `false`. The two keyword arguments are optional: they are a
|
||||
maximum size for the result and a flag determining whether empty fields should be kept in
|
||||
the result.
|
||||
|
||||
```jldoctest
|
||||
julia> a = "Ma.rch"
|
||||
"Ma.rch"
|
||||
|
||||
julia> split(a,".")
|
||||
2-element Array{SubString{String},1}:
|
||||
"Ma"
|
||||
"rch"
|
||||
```
|
||||
"""
|
||||
split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} =
|
||||
_split(str, splitter, limit, keep, SubString{T}[])
|
||||
function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array)
|
||||
i = start(str)
|
||||
n = endof(str)
|
||||
r = search(str,splitter,i)
|
||||
j, k = first(r), nextind(str,last(r))
|
||||
while 0 < j <= n && length(strs) != limit-1
|
||||
if i < k
|
||||
if keep_empty || i < j
|
||||
push!(strs, SubString(str,i,prevind(str,j)))
|
||||
end
|
||||
i = k
|
||||
end
|
||||
(k <= j) && (k = nextind(str,j))
|
||||
r = search(str,splitter,k)
|
||||
j, k = first(r), nextind(str,last(r))
|
||||
end
|
||||
if keep_empty || !done(str,i)
|
||||
push!(strs, SubString(str,i))
|
||||
end
|
||||
return strs
|
||||
end
|
||||
|
||||
# a bit oddball, but standard behavior in Perl, Ruby & Python:
|
||||
split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false)
|
||||
|
||||
rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} =
|
||||
_rsplit(str, splitter, limit, keep, T[])
|
||||
|
||||
"""
|
||||
rsplit(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true)
|
||||
|
||||
Similar to [`split`](@ref), but starting from the end of the string.
|
||||
|
||||
```jldoctest
|
||||
julia> a = "M.a.r.c.h"
|
||||
"M.a.r.c.h"
|
||||
|
||||
julia> rsplit(a,".")
|
||||
5-element Array{SubString{String},1}:
|
||||
"M"
|
||||
"a"
|
||||
"r"
|
||||
"c"
|
||||
"h"
|
||||
|
||||
julia> rsplit(a,".";limit=1)
|
||||
1-element Array{SubString{String},1}:
|
||||
"M.a.r.c.h"
|
||||
|
||||
julia> rsplit(a,".";limit=2)
|
||||
2-element Array{SubString{String},1}:
|
||||
"M.a.r.c"
|
||||
"h"
|
||||
```
|
||||
"""
|
||||
rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} =
|
||||
_rsplit(str, splitter, limit, keep, SubString{T}[])
|
||||
function _rsplit(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array)
|
||||
i = start(str)
|
||||
n = endof(str)
|
||||
r = rsearch(str,splitter)
|
||||
j = first(r)-1
|
||||
k = last(r)
|
||||
while((0 <= j < n) && (length(strs) != limit-1))
|
||||
if i <= k
|
||||
(keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n))
|
||||
n = j
|
||||
end
|
||||
(k <= j) && (j = prevind(str,j))
|
||||
r = rsearch(str,splitter,j)
|
||||
j = first(r)-1
|
||||
k = last(r)
|
||||
end
|
||||
(keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n))
|
||||
return strs
|
||||
end
|
||||
#rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false)
|
||||
|
||||
_replace(io, repl, str, r, pattern) = print(io, repl)
|
||||
_replace(io, repl::Function, str, r, pattern) =
|
||||
print(io, repl(SubString(str, first(r), last(r))))
|
||||
|
||||
function replace(str::String, pattern, repl, limit::Integer)
|
||||
n = 1
|
||||
e = endof(str)
|
||||
i = a = start(str)
|
||||
r = search(str,pattern,i)
|
||||
j, k = first(r), last(r)
|
||||
out = IOBuffer(StringVector(floor(Int, 1.2sizeof(str))), true, true)
|
||||
out.size = 0
|
||||
out.ptr = 1
|
||||
while j != 0
|
||||
if i == a || i <= k
|
||||
unsafe_write(out, pointer(str, i), UInt(j-i))
|
||||
_replace(out, repl, str, r, pattern)
|
||||
end
|
||||
if k<j
|
||||
i = j
|
||||
k = nextind(str, j)
|
||||
else
|
||||
i = k = nextind(str, k)
|
||||
end
|
||||
if j > e
|
||||
break
|
||||
end
|
||||
r = search(str,pattern,k)
|
||||
j, k = first(r), last(r)
|
||||
n == limit && break
|
||||
n += 1
|
||||
end
|
||||
write(out, SubString(str,i))
|
||||
String(take!(out))
|
||||
end
|
||||
|
||||
"""
|
||||
replace(string::AbstractString, pat, r[, n::Integer=0])
|
||||
|
||||
Search for the given pattern `pat`, and replace each occurrence with `r`. If `n` is
|
||||
provided, replace at most `n` occurrences. As with search, the second argument may be a
|
||||
single character, a vector or a set of characters, a string, or a regular expression. If `r`
|
||||
is a function, each occurrence is replaced with `r(s)` where `s` is the matched substring.
|
||||
If `pat` is a regular expression and `r` is a `SubstitutionString`, then capture group
|
||||
references in `r` are replaced with the corresponding matched text.
|
||||
"""
|
||||
replace(s::AbstractString, pat, f, n::Integer) = replace(String(s), pat, f, n)
|
||||
replace(s::AbstractString, pat, r) = replace(s, pat, r, 0)
|
||||
|
||||
# hex <-> bytes conversion
|
||||
|
||||
"""
|
||||
hex2bytes(s::AbstractString)
|
||||
|
||||
Convert an arbitrarily long hexadecimal string to its binary representation. Returns an
|
||||
`Array{UInt8,1}`, i.e. an array of bytes.
|
||||
|
||||
```jldoctest
|
||||
julia> a = hex(12345)
|
||||
"3039"
|
||||
|
||||
julia> hex2bytes(a)
|
||||
2-element Array{UInt8,1}:
|
||||
0x30
|
||||
0x39
|
||||
```
|
||||
"""
|
||||
function hex2bytes(s::AbstractString)
|
||||
a = zeros(UInt8, div(endof(s), 2))
|
||||
i, j = start(s), 0
|
||||
while !done(s, i)
|
||||
c, i = next(s, i)
|
||||
n = '0' <= c <= '9' ? c - '0' :
|
||||
'a' <= c <= 'f' ? c - 'a' + 10 :
|
||||
'A' <= c <= 'F' ? c - 'A' + 10 :
|
||||
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
|
||||
done(s, i) &&
|
||||
throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))"))
|
||||
c, i = next(s, i)
|
||||
n = '0' <= c <= '9' ? n << 4 + c - '0' :
|
||||
'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 :
|
||||
'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 :
|
||||
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
|
||||
a[j += 1] = n
|
||||
end
|
||||
resize!(a, j)
|
||||
return a
|
||||
end
|
||||
|
||||
"""
|
||||
bytes2hex(bin_arr::Array{UInt8, 1}) -> String
|
||||
|
||||
Convert an array of bytes to its hexadecimal representation.
|
||||
All characters are in lower-case.
|
||||
|
||||
```jldoctest
|
||||
julia> a = hex(12345)
|
||||
"3039"
|
||||
|
||||
julia> b = hex2bytes(a)
|
||||
2-element Array{UInt8,1}:
|
||||
0x30
|
||||
0x39
|
||||
|
||||
julia> bytes2hex(b)
|
||||
"3039"
|
||||
```
|
||||
"""
|
||||
function bytes2hex(a::AbstractArray{UInt8})
|
||||
b = Vector{UInt8}(2*length(a))
|
||||
i = 0
|
||||
for x in a
|
||||
b[i += 1] = hex_chars[1 + x >> 4]
|
||||
b[i += 1] = hex_chars[1 + x & 0xf]
|
||||
end
|
||||
return String(b)
|
||||
end
|
||||
|
||||
# check for pure ASCII-ness
|
||||
|
||||
function ascii(s::String)
|
||||
for (i, b) in enumerate(Vector{UInt8}(s))
|
||||
b < 0x80 || throw(ArgumentError("invalid ASCII at index $i in $(repr(s))"))
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
"""
|
||||
ascii(s::AbstractString)
|
||||
|
||||
Convert a string to `String` type and check that it contains only ASCII data, otherwise
|
||||
throwing an `ArgumentError` indicating the position of the first non-ASCII byte.
|
||||
|
||||
```jldoctest
|
||||
julia> ascii("abcdeγfgh")
|
||||
ERROR: ArgumentError: invalid ASCII at index 6 in "abcdeγfgh"
|
||||
Stacktrace:
|
||||
[1] ascii(::String) at ./strings/util.jl:479
|
||||
|
||||
julia> ascii("abcdefgh")
|
||||
"abcdefgh"
|
||||
```
|
||||
"""
|
||||
ascii(x::AbstractString) = ascii(convert(String, x))
|
||||
Reference in New Issue
Block a user