fix incorrect folder name for julia-0.6.x
Former-commit-id: ef2c7401e0876f22d2f7762d182cfbcd5a7d9c70
This commit is contained in:
380
julia-0.6.3/share/julia/base/strings/search.jl
Normal file
380
julia-0.6.3/share/julia/base/strings/search.jl
Normal file
@@ -0,0 +1,380 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
const Chars = Union{Char,Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}
|
||||
|
||||
"""
|
||||
search(string::AbstractString, chars::Chars, [start::Integer])
|
||||
|
||||
Search for the first occurrence of the given characters within the given string. The second
|
||||
argument may be a single character, a vector or a set of characters, a string, or a regular
|
||||
expression (though regular expressions are only allowed on contiguous strings, such as ASCII
|
||||
or UTF-8 strings). The third argument optionally specifies a starting index. The return
|
||||
value is a range of indexes where the matching sequence is found, such that `s[search(s,x)] == x`:
|
||||
|
||||
`search(string, "substring")` = `start:end` such that `string[start:end] == "substring"`, or
|
||||
`0:-1` if unmatched.
|
||||
|
||||
`search(string, 'c')` = `index` such that `string[index] == 'c'`, or `0` if unmatched.
|
||||
|
||||
```jldoctest
|
||||
julia> search("Hello to the world", "z")
|
||||
0:-1
|
||||
|
||||
julia> search("JuliaLang","Julia")
|
||||
1:5
|
||||
```
|
||||
"""
|
||||
function search(s::AbstractString, c::Chars, i::Integer)
|
||||
if isempty(c)
|
||||
return 1 <= i <= nextind(s,endof(s)) ? i :
|
||||
throw(BoundsError(s, i))
|
||||
end
|
||||
if i < 1 || i > nextind(s,endof(s))
|
||||
throw(BoundsError(s, i))
|
||||
end
|
||||
while !done(s,i)
|
||||
d, j = next(s,i)
|
||||
if d in c
|
||||
return i
|
||||
end
|
||||
i = j
|
||||
end
|
||||
return 0
|
||||
end
|
||||
search(s::AbstractString, c::Chars) = search(s,c,start(s))
|
||||
|
||||
in(c::Char, s::AbstractString) = (search(s,c)!=0)
|
||||
|
||||
function _searchindex(s, t, i)
|
||||
if isempty(t)
|
||||
return 1 <= i <= nextind(s,endof(s)) ? i :
|
||||
throw(BoundsError(s, i))
|
||||
end
|
||||
t1, j2 = next(t,start(t))
|
||||
while true
|
||||
i = search(s,t1,i)
|
||||
if i == 0 return 0 end
|
||||
c, ii = next(s,i)
|
||||
j = j2; k = ii
|
||||
matched = true
|
||||
while !done(t,j)
|
||||
if done(s,k)
|
||||
matched = false
|
||||
break
|
||||
end
|
||||
c, k = next(s,k)
|
||||
d, j = next(t,j)
|
||||
if c != d
|
||||
matched = false
|
||||
break
|
||||
end
|
||||
end
|
||||
if matched
|
||||
return i
|
||||
end
|
||||
i = ii
|
||||
end
|
||||
end
|
||||
|
||||
function _search_bloom_mask(c)
|
||||
UInt64(1) << (c & 63)
|
||||
end
|
||||
|
||||
_nthbyte(s::String, i) = codeunit(s, i)
|
||||
_nthbyte(a::ByteArray, i) = a[i]
|
||||
|
||||
function _searchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, i)
|
||||
n = sizeof(t)
|
||||
m = sizeof(s)
|
||||
|
||||
if n == 0
|
||||
return 1 <= i <= m+1 ? max(1, i) : 0
|
||||
elseif m == 0
|
||||
return 0
|
||||
elseif n == 1
|
||||
return search(s, _nthbyte(t,1), i)
|
||||
end
|
||||
|
||||
w = m - n
|
||||
if w < 0 || i - 1 > w
|
||||
return 0
|
||||
end
|
||||
|
||||
bloom_mask = UInt64(0)
|
||||
skip = n - 1
|
||||
tlast = _nthbyte(t,n)
|
||||
for j in 1:n
|
||||
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
|
||||
if _nthbyte(t,j) == tlast && j < n
|
||||
skip = n - j - 1
|
||||
end
|
||||
end
|
||||
|
||||
i -= 1
|
||||
while i <= w
|
||||
if _nthbyte(s,i+n) == tlast
|
||||
# check candidate
|
||||
j = 0
|
||||
while j < n - 1
|
||||
if _nthbyte(s,i+j+1) != _nthbyte(t,j+1)
|
||||
break
|
||||
end
|
||||
j += 1
|
||||
end
|
||||
|
||||
# match found
|
||||
if j == n - 1
|
||||
return i+1
|
||||
end
|
||||
|
||||
# no match, try to rule out the next character
|
||||
if i < w && bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0
|
||||
i += n
|
||||
else
|
||||
i += skip
|
||||
end
|
||||
elseif i < w
|
||||
if bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0
|
||||
i += n
|
||||
end
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
|
||||
0
|
||||
end
|
||||
|
||||
searchindex(s::ByteArray, t::ByteArray, i) = _searchindex(s,t,i)
|
||||
|
||||
"""
|
||||
searchindex(s::AbstractString, substring, [start::Integer])
|
||||
|
||||
Similar to [`search`](@ref), but return only the start index at which
|
||||
the substring is found, or `0` if it is not.
|
||||
|
||||
```jldoctest
|
||||
julia> searchindex("Hello to the world", "z")
|
||||
0
|
||||
|
||||
julia> searchindex("JuliaLang","Julia")
|
||||
1
|
||||
|
||||
julia> searchindex("JuliaLang","Lang")
|
||||
6
|
||||
```
|
||||
"""
|
||||
searchindex(s::AbstractString, t::AbstractString, i::Integer) = _searchindex(s,t,i)
|
||||
searchindex(s::AbstractString, t::AbstractString) = searchindex(s,t,start(s))
|
||||
searchindex(s::AbstractString, c::Char, i::Integer) = _searchindex(s,c,i)
|
||||
searchindex(s::AbstractString, c::Char) = searchindex(s,c,start(s))
|
||||
|
||||
function searchindex(s::String, t::String, i::Integer=1)
|
||||
# Check for fast case of a single byte
|
||||
# (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
|
||||
if endof(t) == 1
|
||||
search(s, t[1], i)
|
||||
else
|
||||
_searchindex(s, t, i)
|
||||
end
|
||||
end
|
||||
|
||||
function _search(s, t, i::Integer)
|
||||
idx = searchindex(s,t,i)
|
||||
if isempty(t)
|
||||
idx:idx-1
|
||||
else
|
||||
idx:(idx > 0 ? idx + endof(t) - 1 : -1)
|
||||
end
|
||||
end
|
||||
|
||||
search(s::AbstractString, t::AbstractString, i::Integer=start(s)) = _search(s, t, i)
|
||||
search(s::ByteArray, t::ByteArray, i::Integer=start(s)) = _search(s, t, i)
|
||||
|
||||
function rsearch(s::AbstractString, c::Chars)
|
||||
j = search(RevString(s), c)
|
||||
j == 0 && return 0
|
||||
endof(s)-j+1
|
||||
end
|
||||
|
||||
"""
|
||||
rsearch(s::AbstractString, chars::Chars, [start::Integer])
|
||||
|
||||
Similar to [`search`](@ref), but returning the last occurrence of the given characters within the
|
||||
given string, searching in reverse from `start`.
|
||||
|
||||
```jldoctest
|
||||
julia> rsearch("aaabbb","b")
|
||||
6:6
|
||||
```
|
||||
"""
|
||||
function rsearch(s::AbstractString, c::Chars, i::Integer)
|
||||
e = endof(s)
|
||||
j = search(RevString(s), c, e-i+1)
|
||||
j == 0 && return 0
|
||||
e-j+1
|
||||
end
|
||||
|
||||
function _rsearchindex(s, t, i)
|
||||
if isempty(t)
|
||||
return 1 <= i <= nextind(s,endof(s)) ? i :
|
||||
throw(BoundsError(s, i))
|
||||
end
|
||||
t = RevString(t)
|
||||
rs = RevString(s)
|
||||
l = endof(s)
|
||||
t1, j2 = next(t,start(t))
|
||||
while true
|
||||
i = rsearch(s,t1,i)
|
||||
if i == 0 return 0 end
|
||||
c, ii = next(rs,l-i+1)
|
||||
j = j2; k = ii
|
||||
matched = true
|
||||
while !done(t,j)
|
||||
if done(rs,k)
|
||||
matched = false
|
||||
break
|
||||
end
|
||||
c, k = next(rs,k)
|
||||
d, j = next(t,j)
|
||||
if c != d
|
||||
matched = false
|
||||
break
|
||||
end
|
||||
end
|
||||
if matched
|
||||
return nextind(s,l-k+1)
|
||||
end
|
||||
i = l-ii+1
|
||||
end
|
||||
end
|
||||
|
||||
function _rsearchindex(s::Union{String,ByteArray}, t::Union{String,ByteArray}, k)
|
||||
n = sizeof(t)
|
||||
m = sizeof(s)
|
||||
|
||||
if n == 0
|
||||
return 0 <= k <= m ? max(k, 1) : 0
|
||||
elseif m == 0
|
||||
return 0
|
||||
elseif n == 1
|
||||
return rsearch(s, _nthbyte(t,1), k)
|
||||
end
|
||||
|
||||
w = m - n
|
||||
if w < 0 || k <= 0
|
||||
return 0
|
||||
end
|
||||
|
||||
bloom_mask = UInt64(0)
|
||||
skip = n - 1
|
||||
tfirst = _nthbyte(t,1)
|
||||
for j in n:-1:1
|
||||
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
|
||||
if _nthbyte(t,j) == tfirst && j > 1
|
||||
skip = j - 2
|
||||
end
|
||||
end
|
||||
|
||||
i = min(k - n + 1, w + 1)
|
||||
while i > 0
|
||||
if _nthbyte(s,i) == tfirst
|
||||
# check candidate
|
||||
j = 1
|
||||
while j < n
|
||||
if _nthbyte(s,i+j) != _nthbyte(t,j+1)
|
||||
break
|
||||
end
|
||||
j += 1
|
||||
end
|
||||
|
||||
# match found
|
||||
if j == n
|
||||
return i
|
||||
end
|
||||
|
||||
# no match, try to rule out the next character
|
||||
if i > 1 && bloom_mask & _search_bloom_mask(_nthbyte(s,i-1)) == 0
|
||||
i -= n
|
||||
else
|
||||
i -= skip
|
||||
end
|
||||
elseif i > 1
|
||||
if bloom_mask & _search_bloom_mask(_nthbyte(s,i-1)) == 0
|
||||
i -= n
|
||||
end
|
||||
end
|
||||
i -= 1
|
||||
end
|
||||
|
||||
0
|
||||
end
|
||||
|
||||
rsearchindex(s::ByteArray, t::ByteArray, i::Integer) = _rsearchindex(s,t,i)
|
||||
|
||||
"""
|
||||
rsearchindex(s::AbstractString, substring, [start::Integer])
|
||||
|
||||
Similar to [`rsearch`](@ref), but return only the start index at which the substring is found, or `0` if it is not.
|
||||
|
||||
```jldoctest
|
||||
julia> rsearchindex("aaabbb","b")
|
||||
6
|
||||
|
||||
julia> rsearchindex("aaabbb","a")
|
||||
3
|
||||
```
|
||||
"""
|
||||
rsearchindex(s::AbstractString, t::AbstractString, i::Integer) = _rsearchindex(s,t,i)
|
||||
rsearchindex(s::AbstractString, t::AbstractString) = (isempty(s) && isempty(t)) ? 1 : rsearchindex(s,t,endof(s))
|
||||
|
||||
function rsearchindex(s::String, t::String)
|
||||
# Check for fast case of a single byte
|
||||
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
|
||||
if endof(t) == 1
|
||||
rsearch(s, t[1])
|
||||
else
|
||||
_rsearchindex(s, t, sizeof(s))
|
||||
end
|
||||
end
|
||||
|
||||
function rsearchindex(s::String, t::String, i::Integer)
|
||||
# Check for fast case of a single byte
|
||||
# (for multi-byte UTF-8 sequences, use rsearchindex instead)
|
||||
if endof(t) == 1
|
||||
rsearch(s, t[1], i)
|
||||
elseif endof(t) != 0
|
||||
_rsearchindex(s, t, nextind(s, i)-1)
|
||||
elseif i > sizeof(s)
|
||||
return 0
|
||||
elseif i == 0
|
||||
return 1
|
||||
else
|
||||
return i
|
||||
end
|
||||
end
|
||||
|
||||
function _rsearch(s, t, i::Integer)
|
||||
idx = rsearchindex(s,t,i)
|
||||
if isempty(t)
|
||||
idx:idx-1
|
||||
else
|
||||
idx:(idx > 0 ? idx + endof(t) - 1 : -1)
|
||||
end
|
||||
end
|
||||
|
||||
rsearch(s::AbstractString, t::AbstractString, i::Integer=endof(s)) = _rsearch(s, t, i)
|
||||
rsearch(s::ByteArray, t::ByteArray, i::Integer=endof(s)) = _rsearch(s, t, i)
|
||||
|
||||
"""
|
||||
contains(haystack::AbstractString, needle::AbstractString)
|
||||
|
||||
Determine whether the second argument is a substring of the first.
|
||||
|
||||
```jldoctest
|
||||
julia> contains("JuliaLang is pretty cool!", "Julia")
|
||||
true
|
||||
```
|
||||
"""
|
||||
contains(haystack::AbstractString, needle::AbstractString) = searchindex(haystack,needle)!=0
|
||||
|
||||
in(::AbstractString, ::AbstractString) = error("use contains(x,y) for string containment")
|
||||
Reference in New Issue
Block a user