# This file is a part of Julia. License is MIT: https://julialang.org/license # starts with and ends with predicates """ startswith(s::AbstractString, prefix::AbstractString) Returns `true` if `s` starts with `prefix`. If `prefix` is a vector or set of characters, tests whether the first character of `s` belongs to that set. See also [`endswith`](@ref). ```jldoctest julia> startswith("JuliaLang", "Julia") true ``` """ function startswith(a::AbstractString, b::AbstractString) i = start(a) j = start(b) while !done(a,i) && !done(b,i) c, i = next(a,i) d, j = next(b,j) (c != d) && (return false) end done(b,i) end startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str) in chars """ endswith(s::AbstractString, suffix::AbstractString) Returns `true` if `s` ends with `suffix`. If `suffix` is a vector or set of characters, tests whether the last character of `s` belongs to that set. See also [`startswith`](@ref). ```jldoctest julia> endswith("Sunday", "day") true ``` """ function endswith(a::AbstractString, b::AbstractString) i = endof(a) j = endof(b) a1 = start(a) b1 = start(b) while a1 <= i && b1 <= j c = a[i] d = b[j] (c != d) && (return false) i = prevind(a,i) j = prevind(b,j) end j < b1 end endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars startswith(a::String, b::String) = (a.len >= b.len && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, b.len) == 0) startswith(a::Vector{UInt8}, b::Vector{UInt8}) = (length(a) >= length(b) && ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length(b)) == 0) # TODO: fast endswith """ chop(s::AbstractString) Remove the last character from `s`. ```jldoctest julia> a = "March" "March" julia> chop(a) "Marc" ``` """ chop(s::AbstractString) = SubString(s, 1, endof(s)-1) """ chomp(s::AbstractString) Remove a single trailing newline from a string. ```jldoctest julia> chomp("Hello\\n") "Hello" ``` """ function chomp(s::AbstractString) i = endof(s) (i < 1 || s[i] != '\n') && (return SubString(s, 1, i)) j = prevind(s,i) (j < 1 || s[j] != '\r') && (return SubString(s, 1, i-1)) return SubString(s, 1, j-1) end function chomp(s::String) i = endof(s) if i < 1 || codeunit(s,i) != 0x0a SubString(s, 1, i) elseif i < 2 || codeunit(s,i-1) != 0x0d SubString(s, 1, i-1) else SubString(s, 1, i-2) end end # NOTE: use with caution -- breaks the immutable string convention! # TODO: this is hard to provide with the new representation #function chomp!(s::String) # if !isempty(s) && codeunit(s,s.len) == 0x0a # n = (endof(s) < 2 || s.data[end-1] != 0x0d) ? 1 : 2 # ccall(:jl_array_del_end, Void, (Any, UInt), s.data, n) # end # return s #end chomp!(s::AbstractString) = chomp(s) # copying fallback for other string types const _default_delims = [' ','\t','\n','\v','\f','\r'] """ lstrip(s::AbstractString[, chars::Chars]) Return `s` with any leading whitespace and delimiters removed. The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`, `\\f`, and `\\r`. If `chars` (a character, or vector or set of characters) is provided, instead remove characters contained in it. ```jldoctest julia> a = lpad("March", 20) " March" julia> lstrip(a) "March" ``` """ function lstrip(s::AbstractString, chars::Chars=_default_delims) i = start(s) while !done(s,i) c, j = next(s,i) if !(c in chars) return s[i:end] end i = j end s[end+1:end] end """ rstrip(s::AbstractString[, chars::Chars]) Return `s` with any trailing whitespace and delimiters removed. The default delimiters to remove are `' '`, `\\t`, `\\n`, `\\v`, `\\f`, and `\\r`. If `chars` (a character, or vector or set of characters) is provided, instead remove characters contained in it. ```jldoctest julia> a = rpad("March", 20) "March " julia> rstrip(a) "March" ``` """ function rstrip(s::AbstractString, chars::Chars=_default_delims) r = RevString(s) i = start(r) while !done(r,i) c, j = next(r,i) if !(c in chars) return s[1:end-i+1] end i = j end s[1:0] end """ strip(s::AbstractString, [chars::Chars]) Return `s` with any leading and trailing whitespace removed. If `chars` (a character, or vector or set of characters) is provided, instead remove characters contained in it. ```jldoctest julia> strip("{3, 5}\\n", ['{', '}', '\\n']) "3, 5" ``` """ strip(s::AbstractString) = lstrip(rstrip(s)) strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars) ## string padding functions ## function lpad(s::AbstractString, n::Integer, p::AbstractString=" ") m = n - strwidth(s) (m <= 0) && (return s) l = strwidth(p) if l==1 return string(p^m, s) end q = div(m,l) r = m - q*l i = r != 0 ? chr2ind(p, r) : -1 string(p^q, p[1:i], s) end function rpad(s::AbstractString, n::Integer, p::AbstractString=" ") m = n - strwidth(s) (m <= 0) && (return s) l = strwidth(p) if l==1 return string(s, p^m) end q = div(m,l) r = m - q*l i = r != 0 ? chr2ind(p, r) : -1 string(s, p^q, p[1:i]) end """ lpad(s, n::Integer, p::AbstractString=" ") Make a string at least `n` columns wide when printed by padding `s` on the left with copies of `p`. ```jldoctest julia> lpad("March",10) " March" ``` """ lpad(s, n::Integer, p=" ") = lpad(string(s),n,string(p)) """ rpad(s, n::Integer, p::AbstractString=" ") Make a string at least `n` columns wide when printed by padding `s` on the right with copies of `p`. ```jldoctest julia> rpad("March",20) "March " ``` """ rpad(s, n::Integer, p=" ") = rpad(string(s),n,string(p)) cpad(s, n::Integer, p=" ") = rpad(lpad(s,div(n+strwidth(s),2),p),n,p) # splitter can be a Char, Vector{Char}, AbstractString, Regex, ... # any splitter that provides search(s::AbstractString, splitter) split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} = _split(str, splitter, limit, keep, T[]) """ split(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true) Return an array of substrings by splitting the given string on occurrences of the given character delimiters, which may be specified in any of the formats allowed by `search`'s second argument (i.e. a single character, collection of characters, string, or regular expression). If `chars` is omitted, it defaults to the set of all space characters, and `keep` is taken to be `false`. The two keyword arguments are optional: they are a maximum size for the result and a flag determining whether empty fields should be kept in the result. ```jldoctest julia> a = "Ma.rch" "Ma.rch" julia> split(a,".") 2-element Array{SubString{String},1}: "Ma" "rch" ``` """ split(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = _split(str, splitter, limit, keep, SubString{T}[]) function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array) i = start(str) n = endof(str) r = search(str,splitter,i) j, k = first(r), nextind(str,last(r)) while 0 < j <= n && length(strs) != limit-1 if i < k if keep_empty || i < j push!(strs, SubString(str,i,prevind(str,j))) end i = k end (k <= j) && (k = nextind(str,j)) r = search(str,splitter,k) j, k = first(r), nextind(str,last(r)) end if keep_empty || !done(str,i) push!(strs, SubString(str,i)) end return strs end # a bit oddball, but standard behavior in Perl, Ruby & Python: split(str::AbstractString) = split(str, _default_delims; limit=0, keep=false) rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:SubString} = _rsplit(str, splitter, limit, keep, T[]) """ rsplit(s::AbstractString, [chars]; limit::Integer=0, keep::Bool=true) Similar to [`split`](@ref), but starting from the end of the string. ```jldoctest julia> a = "M.a.r.c.h" "M.a.r.c.h" julia> rsplit(a,".") 5-element Array{SubString{String},1}: "M" "a" "r" "c" "h" julia> rsplit(a,".";limit=1) 1-element Array{SubString{String},1}: "M.a.r.c.h" julia> rsplit(a,".";limit=2) 2-element Array{SubString{String},1}: "M.a.r.c" "h" ``` """ rsplit(str::T, splitter; limit::Integer=0, keep::Bool=true) where {T<:AbstractString} = _rsplit(str, splitter, limit, keep, SubString{T}[]) function _rsplit(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, strs::Array) i = start(str) n = endof(str) r = rsearch(str,splitter) j = first(r)-1 k = last(r) while((0 <= j < n) && (length(strs) != limit-1)) if i <= k (keep_empty || (k < n)) && unshift!(strs, SubString(str,k+1,n)) n = j end (k <= j) && (j = prevind(str,j)) r = rsearch(str,splitter,j) j = first(r)-1 k = last(r) end (keep_empty || (n > 0)) && unshift!(strs, SubString(str,1,n)) return strs end #rsplit(str::AbstractString) = rsplit(str, _default_delims, 0, false) _replace(io, repl, str, r, pattern) = print(io, repl) _replace(io, repl::Function, str, r, pattern) = print(io, repl(SubString(str, first(r), last(r)))) function replace(str::String, pattern, repl, limit::Integer) n = 1 e = endof(str) i = a = start(str) r = search(str,pattern,i) j, k = first(r), last(r) out = IOBuffer(StringVector(floor(Int, 1.2sizeof(str))), true, true) out.size = 0 out.ptr = 1 while j != 0 if i == a || i <= k unsafe_write(out, pointer(str, i), UInt(j-i)) _replace(out, repl, str, r, pattern) end if k e break end r = search(str,pattern,k) j, k = first(r), last(r) n == limit && break n += 1 end write(out, SubString(str,i)) String(take!(out)) end """ replace(string::AbstractString, pat, r[, n::Integer=0]) Search for the given pattern `pat`, and replace each occurrence with `r`. If `n` is provided, replace at most `n` occurrences. As with search, the second argument may be a single character, a vector or a set of characters, a string, or a regular expression. If `r` is a function, each occurrence is replaced with `r(s)` where `s` is the matched substring. If `pat` is a regular expression and `r` is a `SubstitutionString`, then capture group references in `r` are replaced with the corresponding matched text. """ replace(s::AbstractString, pat, f, n::Integer) = replace(String(s), pat, f, n) replace(s::AbstractString, pat, r) = replace(s, pat, r, 0) # hex <-> bytes conversion """ hex2bytes(s::AbstractString) Convert an arbitrarily long hexadecimal string to its binary representation. Returns an `Array{UInt8,1}`, i.e. an array of bytes. ```jldoctest julia> a = hex(12345) "3039" julia> hex2bytes(a) 2-element Array{UInt8,1}: 0x30 0x39 ``` """ function hex2bytes(s::AbstractString) a = zeros(UInt8, div(endof(s), 2)) i, j = start(s), 0 while !done(s, i) c, i = next(s, i) n = '0' <= c <= '9' ? c - '0' : 'a' <= c <= 'f' ? c - 'a' + 10 : 'A' <= c <= 'F' ? c - 'A' + 10 : throw(ArgumentError("not a hexadecimal string: $(repr(s))")) done(s, i) && throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))")) c, i = next(s, i) n = '0' <= c <= '9' ? n << 4 + c - '0' : 'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 : 'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 : throw(ArgumentError("not a hexadecimal string: $(repr(s))")) a[j += 1] = n end resize!(a, j) return a end """ bytes2hex(bin_arr::Array{UInt8, 1}) -> String Convert an array of bytes to its hexadecimal representation. All characters are in lower-case. ```jldoctest julia> a = hex(12345) "3039" julia> b = hex2bytes(a) 2-element Array{UInt8,1}: 0x30 0x39 julia> bytes2hex(b) "3039" ``` """ function bytes2hex(a::AbstractArray{UInt8}) b = Vector{UInt8}(2*length(a)) i = 0 for x in a b[i += 1] = hex_chars[1 + x >> 4] b[i += 1] = hex_chars[1 + x & 0xf] end return String(b) end # check for pure ASCII-ness function ascii(s::String) for (i, b) in enumerate(Vector{UInt8}(s)) b < 0x80 || throw(ArgumentError("invalid ASCII at index $i in $(repr(s))")) end return s end """ ascii(s::AbstractString) Convert a string to `String` type and check that it contains only ASCII data, otherwise throwing an `ArgumentError` indicating the position of the first non-ASCII byte. ```jldoctest julia> ascii("abcdeγfgh") ERROR: ArgumentError: invalid ASCII at index 6 in "abcdeγfgh" Stacktrace: [1] ascii(::String) at ./strings/util.jl:479 julia> ascii("abcdefgh") "abcdefgh" ``` """ ascii(x::AbstractString) = ascii(convert(String, x))