501 lines
10 KiB
Julia
501 lines
10 KiB
Julia
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||
|
||
## core string functions ##
|
||
|
||
endof(s::AbstractString) = error("you must implement endof(", typeof(s), ")")
|
||
next(s::AbstractString, i::Int) = error("you must implement next(", typeof(s), ",Int)")
|
||
next(s::DirectIndexString, i::Int) = (s[i],i+1)
|
||
next(s::AbstractString, i::Integer) = next(s,Int(i))
|
||
|
||
string() = ""
|
||
string(s::AbstractString) = s
|
||
|
||
"""
|
||
String(s::AbstractString)
|
||
|
||
Convert a string to a contiguous byte array representation encoded as UTF-8 bytes.
|
||
This representation is often appropriate for passing strings to C.
|
||
"""
|
||
String(s::AbstractString) = print_to_string(s)
|
||
|
||
convert(::Type{Vector{UInt8}}, s::AbstractString) = convert(Vector{UInt8}, String(s))
|
||
convert(::Type{Array{UInt8}}, s::AbstractString) = convert(Vector{UInt8}, s)
|
||
convert(::Type{String}, s::AbstractString) = String(s)
|
||
convert(::Type{Vector{Char}}, s::AbstractString) = collect(s)
|
||
convert(::Type{Symbol}, s::AbstractString) = Symbol(s)
|
||
convert(::Type{String}, s::Symbol) = unsafe_string(Cstring(s))
|
||
|
||
## generic supplied functions ##
|
||
|
||
start(s::AbstractString) = 1
|
||
done(s::AbstractString,i) = (i > endof(s))
|
||
getindex(s::AbstractString, i::Int) = next(s,i)[1]
|
||
getindex(s::AbstractString, i::Integer) = s[Int(i)]
|
||
getindex(s::AbstractString, i::Colon) = s
|
||
getindex(s::AbstractString, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
|
||
# TODO: handle other ranges with stride ±1 specially?
|
||
getindex(s::AbstractString, v::AbstractVector{<:Integer}) =
|
||
sprint(length(v), io->(for i in v; write(io,s[i]) end))
|
||
getindex(s::AbstractString, v::AbstractVector{Bool}) =
|
||
throw(ArgumentError("logical indexing not supported for strings"))
|
||
|
||
Symbol(s::AbstractString) = Symbol(String(s))
|
||
|
||
"""
|
||
sizeof(s::AbstractString)
|
||
|
||
The number of bytes in string `s`.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> sizeof("❤")
|
||
3
|
||
```
|
||
"""
|
||
sizeof(s::AbstractString) = error("type $(typeof(s)) has no canonical binary representation")
|
||
|
||
eltype(::Type{<:AbstractString}) = Char
|
||
|
||
"""
|
||
```
|
||
*(s::AbstractString, t::AbstractString)
|
||
```
|
||
|
||
Concatenate strings. The `*` operator is an alias to this function.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> "Hello " * "world"
|
||
"Hello world"
|
||
```
|
||
"""
|
||
(*)(s1::AbstractString, ss::AbstractString...) = string(s1, ss...)
|
||
|
||
one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "")
|
||
|
||
length(s::DirectIndexString) = endof(s)
|
||
|
||
"""
|
||
length(s::AbstractString)
|
||
|
||
The number of characters in string `s`.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> length("jμΛIα")
|
||
5
|
||
```
|
||
"""
|
||
function length(s::AbstractString)
|
||
i = start(s)
|
||
if done(s,i)
|
||
return 0
|
||
end
|
||
n = 1
|
||
while true
|
||
c, j = next(s,i)
|
||
if done(s,j)
|
||
return n
|
||
end
|
||
n += 1
|
||
i = j
|
||
end
|
||
end
|
||
|
||
## string comparison functions ##
|
||
|
||
function cmp(a::AbstractString, b::AbstractString)
|
||
if a === b
|
||
return 0
|
||
end
|
||
i = start(a)
|
||
j = start(b)
|
||
while !done(a,i)
|
||
if done(b,j)
|
||
return +1
|
||
end
|
||
c, i = next(a,i)
|
||
d, j = next(b,j)
|
||
if c != d
|
||
return c < d ? -1 : +1
|
||
end
|
||
end
|
||
done(b,j) ? 0 : -1
|
||
end
|
||
|
||
==(a::AbstractString, b::AbstractString) = cmp(a,b) == 0
|
||
isless(a::AbstractString, b::AbstractString) = cmp(a,b) < 0
|
||
|
||
# faster comparisons for symbols
|
||
|
||
cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b)))
|
||
|
||
isless(a::Symbol, b::Symbol) = cmp(a,b) < 0
|
||
|
||
## Generic validation functions ##
|
||
|
||
isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= endof(s))
|
||
|
||
"""
|
||
isvalid(str::AbstractString, i::Integer)
|
||
|
||
Tells whether index `i` is valid for the given string.
|
||
|
||
# Examples
|
||
|
||
```jldoctest
|
||
julia> str = "αβγdef";
|
||
|
||
julia> isvalid(str, 1)
|
||
true
|
||
|
||
julia> str[1]
|
||
'α': Unicode U+03b1 (category Ll: Letter, lowercase)
|
||
|
||
julia> isvalid(str, 2)
|
||
false
|
||
|
||
julia> str[2]
|
||
ERROR: UnicodeError: invalid character index
|
||
[...]
|
||
```
|
||
"""
|
||
function isvalid(s::AbstractString, i::Integer)
|
||
i < 1 && return false
|
||
done(s,i) && return false
|
||
try
|
||
next(s,i)
|
||
true
|
||
catch
|
||
false
|
||
end
|
||
end
|
||
|
||
## Generic indexing functions ##
|
||
|
||
prevind(s::DirectIndexString, i::Integer) = Int(i)-1
|
||
prevind(s::AbstractArray , i::Integer) = Int(i)-1
|
||
nextind(s::DirectIndexString, i::Integer) = Int(i)+1
|
||
nextind(s::AbstractArray , i::Integer) = Int(i)+1
|
||
|
||
"""
|
||
prevind(str::AbstractString, i::Integer)
|
||
|
||
Get the previous valid string index before `i`.
|
||
Returns a value less than `1` at the beginning of the string.
|
||
|
||
# Examples
|
||
|
||
```jldoctest
|
||
julia> prevind("αβγdef", 3)
|
||
1
|
||
|
||
julia> prevind("αβγdef", 1)
|
||
0
|
||
```
|
||
"""
|
||
function prevind(s::AbstractString, i::Integer)
|
||
e = endof(s)
|
||
if i > e
|
||
return e
|
||
end
|
||
j = Int(i)-1
|
||
while j >= 1
|
||
if isvalid(s,j)
|
||
return j
|
||
end
|
||
j -= 1
|
||
end
|
||
return 0 # out of range
|
||
end
|
||
|
||
"""
|
||
nextind(str::AbstractString, i::Integer)
|
||
|
||
Get the next valid string index after `i`.
|
||
Returns a value greater than `endof(str)` at or after the end of the string.
|
||
|
||
# Examples
|
||
|
||
```jldoctest
|
||
julia> str = "αβγdef";
|
||
|
||
julia> nextind(str, 1)
|
||
3
|
||
|
||
julia> endof(str)
|
||
9
|
||
|
||
julia> nextind(str, 9)
|
||
10
|
||
```
|
||
"""
|
||
function nextind(s::AbstractString, i::Integer)
|
||
e = endof(s)
|
||
if i < 1
|
||
return 1
|
||
end
|
||
if i > e
|
||
return Int(i)+1
|
||
end
|
||
for j = Int(i)+1:e
|
||
if isvalid(s,j)
|
||
return j
|
||
end
|
||
end
|
||
next(s,e)[2] # out of range
|
||
end
|
||
|
||
checkbounds(s::AbstractString, i::Integer) = start(s) <= i <= endof(s) || throw(BoundsError(s, i))
|
||
checkbounds(s::AbstractString, r::Range{<:Integer}) = isempty(r) || (minimum(r) >= start(s) && maximum(r) <= endof(s)) || throw(BoundsError(s, r))
|
||
# The following will end up using a deprecated checkbounds, when the covariant parameter is not Integer
|
||
checkbounds(s::AbstractString, I::AbstractArray{<:Real}) = all(i -> checkbounds(s, i), I)
|
||
checkbounds(s::AbstractString, I::AbstractArray{<:Integer}) = all(i -> checkbounds(s, i), I)
|
||
|
||
ind2chr(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
|
||
chr2ind(s::DirectIndexString, i::Integer) = begin checkbounds(s,i); i end
|
||
|
||
|
||
"""
|
||
ind2chr(s::AbstractString, i::Integer)
|
||
|
||
Convert a byte index `i` to a character index with
|
||
respect to string `s`.
|
||
|
||
See also [`chr2ind`](@ref).
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> str = "αβγdef";
|
||
|
||
julia> ind2chr(str, 3)
|
||
2
|
||
|
||
julia> chr2ind(str, 2)
|
||
3
|
||
```
|
||
"""
|
||
function ind2chr(s::AbstractString, i::Integer)
|
||
s[i] # throws error if invalid
|
||
j = 1
|
||
k = start(s)
|
||
while true
|
||
c, l = next(s,k)
|
||
if i <= k
|
||
return j
|
||
end
|
||
j += 1
|
||
k = l
|
||
end
|
||
end
|
||
|
||
"""
|
||
chr2ind(s::AbstractString, i::Integer)
|
||
|
||
Convert a character index `i` to a byte index.
|
||
|
||
See also [`ind2chr`](@ref).
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> str = "αβγdef";
|
||
|
||
julia> chr2ind(str, 2)
|
||
3
|
||
|
||
julia> ind2chr(str, 3)
|
||
2
|
||
```
|
||
"""
|
||
function chr2ind(s::AbstractString, i::Integer)
|
||
i < start(s) && throw(BoundsError(s, i))
|
||
j = 1
|
||
k = start(s)
|
||
while true
|
||
c, l = next(s,k)
|
||
if i == j
|
||
return k
|
||
end
|
||
j += 1
|
||
k = l
|
||
end
|
||
end
|
||
|
||
struct EachStringIndex{T<:AbstractString}
|
||
s::T
|
||
end
|
||
eachindex(s::AbstractString) = EachStringIndex(s)
|
||
|
||
length(e::EachStringIndex) = length(e.s)
|
||
start(e::EachStringIndex) = start(e.s)
|
||
next(e::EachStringIndex, state) = (state, nextind(e.s, state))
|
||
done(e::EachStringIndex, state) = done(e.s, state)
|
||
eltype(::Type{EachStringIndex}) = Int
|
||
|
||
## character column width function ##
|
||
|
||
"""
|
||
strwidth(s::AbstractString)
|
||
|
||
Gives the number of columns needed to print a string.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> strwidth("March")
|
||
5
|
||
```
|
||
"""
|
||
strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
|
||
|
||
"""
|
||
isascii(c::Union{Char,AbstractString}) -> Bool
|
||
|
||
Tests whether a character belongs to the ASCII character set, or whether this is true for
|
||
all elements of a string.
|
||
"""
|
||
isascii(c::Char) = c < Char(0x80)
|
||
isascii(s::AbstractString) = all(isascii, s)
|
||
|
||
## string promotion rules ##
|
||
|
||
promote_rule(::Type{<:AbstractString}, ::Type{<:AbstractString}) = String
|
||
|
||
"""
|
||
isxdigit(c::Char) -> Bool
|
||
|
||
Tests whether a character is a valid hexadecimal digit. Note that this does not
|
||
include `x` (as in the standard `0x` prefix).
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> isxdigit('a')
|
||
true
|
||
|
||
julia> isxdigit('x')
|
||
false
|
||
```
|
||
"""
|
||
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
|
||
|
||
## uppercase, lowercase, and titlecase transformations ##
|
||
|
||
"""
|
||
uppercase(s::AbstractString)
|
||
|
||
Returns `s` with all characters converted to uppercase.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> uppercase("Julia")
|
||
"JULIA"
|
||
```
|
||
"""
|
||
uppercase(s::AbstractString) = map(uppercase, s)
|
||
|
||
"""
|
||
lowercase(s::AbstractString)
|
||
|
||
Returns `s` with all characters converted to lowercase.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> lowercase("STRINGS AND THINGS")
|
||
"strings and things"
|
||
```
|
||
"""
|
||
lowercase(s::AbstractString) = map(lowercase, s)
|
||
|
||
"""
|
||
titlecase(s::AbstractString)
|
||
|
||
Capitalizes the first character of each word in `s`.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> titlecase("the julia programming language")
|
||
"The Julia Programming Language"
|
||
```
|
||
"""
|
||
function titlecase(s::AbstractString)
|
||
startword = true
|
||
b = IOBuffer()
|
||
for c in s
|
||
if isspace(c)
|
||
print(b, c)
|
||
startword = true
|
||
else
|
||
print(b, startword ? titlecase(c) : c)
|
||
startword = false
|
||
end
|
||
end
|
||
return String(take!(b))
|
||
end
|
||
|
||
"""
|
||
ucfirst(s::AbstractString)
|
||
|
||
Returns `string` with the first character converted to uppercase.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> ucfirst("python")
|
||
"Python"
|
||
```
|
||
"""
|
||
function ucfirst(s::AbstractString)
|
||
isempty(s) || isupper(s[1]) ? s : string(uppercase(s[1]),s[nextind(s,1):end])
|
||
end
|
||
|
||
"""
|
||
lcfirst(s::AbstractString)
|
||
|
||
Returns `string` with the first character converted to lowercase.
|
||
|
||
# Example
|
||
|
||
```jldoctest
|
||
julia> lcfirst("Julia")
|
||
"julia"
|
||
```
|
||
"""
|
||
function lcfirst(s::AbstractString)
|
||
isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
|
||
end
|
||
|
||
## string map, filter, has ##
|
||
|
||
function map(f, s::AbstractString)
|
||
out = IOBuffer(StringVector(endof(s)),true,true)
|
||
truncate(out,0)
|
||
for c in s
|
||
c2 = f(c)
|
||
if !isa(c2,Char)
|
||
throw(ArgumentError("map(f,s::AbstractString) requires f to return Char; try map(f,collect(s)) or a comprehension instead"))
|
||
end
|
||
write(out, c2::Char)
|
||
end
|
||
String(take!(out))
|
||
end
|
||
|
||
function filter(f, s::AbstractString)
|
||
out = IOBuffer(StringVector(endof(s)),true,true)
|
||
truncate(out,0)
|
||
for c in s
|
||
if f(c)
|
||
write(out, c)
|
||
end
|
||
end
|
||
String(take!(out))
|
||
end
|