485 lines
15 KiB
Julia
485 lines
15 KiB
Julia
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||
|
||
# constructors
|
||
@test String([0x61,0x62,0x63,0x21]) == "abc!"
|
||
@test String("abc!") == "abc!"
|
||
|
||
@test isempty(string())
|
||
@test eltype(GenericString) == Char
|
||
@test start("abc") == 1
|
||
@test cmp("ab","abc") == -1
|
||
|
||
# {starts,ends}with
|
||
@test startswith("abcd", 'a')
|
||
@test startswith("abcd", "a")
|
||
@test startswith("abcd", "ab")
|
||
@test !startswith("ab", "abcd")
|
||
@test !startswith("abcd", "bc")
|
||
@test endswith("abcd", 'd')
|
||
@test endswith("abcd", "d")
|
||
@test endswith("abcd", "cd")
|
||
@test !endswith("abcd", "dc")
|
||
@test !endswith("cd", "abcd")
|
||
@test startswith("ab\0cd", "ab\0c")
|
||
@test !startswith("ab\0cd", "ab\0d")
|
||
|
||
@test filter(x -> x ∈ ['f', 'o'], "foobar") == "foo"
|
||
|
||
# string iteration, and issue #1454
|
||
str = "é"
|
||
str_a = vcat(str...)
|
||
@test length(str_a)==1
|
||
@test str_a[1] == str[1]
|
||
|
||
str = "s\u2200"
|
||
@test str[1:end] == str
|
||
|
||
# sizeof
|
||
@test sizeof("abc") == 3
|
||
@test sizeof("\u2222") == 3
|
||
|
||
# issue #3597
|
||
@test string(GenericString("Test")[1:1], "X") == "TX"
|
||
|
||
for T = (UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64,UInt128,Int128,BigInt),
|
||
b = 2:62, _ = 1:10
|
||
n = T != BigInt ? rand(T) : BigInt(rand(Int128))
|
||
@test parse(T,base(b,n),b) == n
|
||
end
|
||
|
||
# issue #6027
|
||
let
|
||
# make symbol with invalid char
|
||
sym = Symbol(Char(0xdcdb))
|
||
@test string(sym) == string(Char(0xdcdb))
|
||
@test String(sym) == string(Char(0xdcdb))
|
||
@test expand(sym) === sym
|
||
res = string(parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
|
||
@test res == """\$(Expr(:error, "invalid character \\\"\\udcdb\\\"\"))"""
|
||
end
|
||
|
||
@test Symbol("asdf") === :asdf
|
||
@test Symbol(:abc,"def",'g',"hi",0) === :abcdefghi0
|
||
@test :a < :b
|
||
@test startswith(string(gensym("asdf")),"##asdf#")
|
||
@test gensym("asdf") != gensym("asdf")
|
||
@test gensym() != gensym()
|
||
@test startswith(string(gensym()),"##")
|
||
@test_throws ArgumentError Symbol("ab\0")
|
||
@test_throws ArgumentError gensym("ab\0")
|
||
|
||
# issue #6949
|
||
let f =IOBuffer(),
|
||
x = split("1 2 3")
|
||
@test write(f, x) == 3
|
||
@test String(take!(f)) == "123"
|
||
@test invoke(write, Tuple{IO, AbstractArray}, f, x) == 3
|
||
@test String(take!(f)) == "123"
|
||
end
|
||
|
||
# issue #7248
|
||
@test_throws BoundsError ind2chr("hello", -1)
|
||
@test_throws BoundsError chr2ind("hello", -1)
|
||
@test_throws BoundsError ind2chr("hellø", -1)
|
||
@test_throws BoundsError chr2ind("hellø", -1)
|
||
@test_throws BoundsError ind2chr("hello", 10)
|
||
@test_throws BoundsError chr2ind("hello", 10)
|
||
@test_throws BoundsError ind2chr("hellø", 10)
|
||
@test_throws BoundsError chr2ind("hellø", 10)
|
||
@test_throws BoundsError checkbounds("hello", 0)
|
||
@test_throws BoundsError checkbounds("hello", 6)
|
||
@test_throws BoundsError checkbounds("hello", 0:3)
|
||
@test_throws BoundsError checkbounds("hello", 4:6)
|
||
@test_throws BoundsError checkbounds("hello", [0:3;])
|
||
@test_throws BoundsError checkbounds("hello", [4:6;])
|
||
@test checkbounds("hello", 2)
|
||
@test checkbounds("hello", 1:5)
|
||
@test checkbounds("hello", [1:5;])
|
||
|
||
# issue #15624 (indexing with out of bounds empty range)
|
||
@test ""[10:9] == ""
|
||
@test "hello"[10:9] == ""
|
||
@test "hellø"[10:9] == ""
|
||
@test SubString("hello", 1, 6)[10:9] == ""
|
||
@test SubString("hello", 1, 0)[10:9] == ""
|
||
@test SubString("hellø", 1, 6)[10:9] == ""
|
||
@test SubString("hellø", 1, 0)[10:9] == ""
|
||
@test SubString("", 1, 6)[10:9] == ""
|
||
@test SubString("", 1, 0)[10:9] == ""
|
||
|
||
|
||
#=
|
||
# issue #7764
|
||
let
|
||
srep = repeat("Σβ",2)
|
||
s="Σβ"
|
||
ss=SubString(s,1,endof(s))
|
||
|
||
@test repeat(ss,2) == "ΣβΣβ"
|
||
|
||
@test endof(srep) == 7
|
||
|
||
@test next(srep, 3) == ('β',5)
|
||
@test next(srep, 7) == ('β',9)
|
||
|
||
@test srep[7] == 'β'
|
||
@test_throws BoundsError srep[8]
|
||
end
|
||
=#
|
||
|
||
# This caused JuliaLang/JSON.jl#82
|
||
@test first('\x00':'\x7f') === '\x00'
|
||
@test last('\x00':'\x7f') === '\x7f'
|
||
|
||
# make sure substrings handle last code unit even if not start of codepoint
|
||
let s = "x\u0302"
|
||
@test s[1:3] == s
|
||
end
|
||
|
||
# issue #9781
|
||
# float(SubString) wasn't tolerant of trailing whitespace, which was different
|
||
# to "normal" strings. This also checks we aren't being too tolerant and allowing
|
||
# any arbitrary trailing characters.
|
||
@test parse(Float64,"1\n") == 1.0
|
||
@test [parse(Float64,x) for x in split("0,1\n",",")][2] == 1.0
|
||
@test_throws ArgumentError parse(Float64,split("0,1 X\n",",")[2])
|
||
@test parse(Float32,"1\n") == 1.0
|
||
@test [parse(Float32,x) for x in split("0,1\n",",")][2] == 1.0
|
||
@test_throws ArgumentError parse(Float32,split("0,1 X\n",",")[2])
|
||
|
||
@test ucfirst("Hola")=="Hola"
|
||
@test ucfirst("hola")=="Hola"
|
||
@test ucfirst("")==""
|
||
@test ucfirst("*")=="*"
|
||
|
||
@test lcfirst("Hola")=="hola"
|
||
@test lcfirst("hola")=="hola"
|
||
@test lcfirst("")==""
|
||
@test lcfirst("*")=="*"
|
||
|
||
# test AbstractString functions at beginning of string.jl
|
||
struct tstStringType <: AbstractString
|
||
data::Array{UInt8,1}
|
||
end
|
||
tstr = tstStringType("12")
|
||
@test_throws ErrorException endof(tstr)
|
||
@test_throws ErrorException next(tstr, Bool(1))
|
||
|
||
gstr = GenericString("12")
|
||
@test typeof(string(gstr))==GenericString
|
||
|
||
@test convert(Array{UInt8}, gstr) ==[49;50]
|
||
@test convert(Array{Char,1}, gstr) ==['1';'2']
|
||
@test convert(Symbol, gstr)==Symbol("12")
|
||
|
||
@test gstr[1] == '1'
|
||
@test gstr[1:1] == "1"
|
||
@test gstr[[1]] == "1"
|
||
|
||
@test done(eachindex("foobar"),7)
|
||
@test eltype(Base.EachStringIndex) == Int
|
||
@test map(uppercase, "foó") == "FOÓ"
|
||
@test chr2ind("fóobar",3) == 4
|
||
|
||
@test Symbol(gstr)==Symbol("12")
|
||
|
||
@test_throws ErrorException sizeof(gstr)
|
||
|
||
@test length(GenericString(""))==0
|
||
|
||
@test nextind(1:1, 1) == 2
|
||
@test nextind([1], 1) == 2
|
||
|
||
@test ind2chr(gstr,2)==2
|
||
|
||
# issue #10307
|
||
@test typeof(map(Int16,AbstractString[])) == Vector{Int16}
|
||
|
||
for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
|
||
for i in [typemax(T), typemin(T)]
|
||
s = "$i"
|
||
@test get(tryparse(T, s)) == i
|
||
end
|
||
end
|
||
|
||
for T in [Int8, Int16, Int32, Int64, Int128]
|
||
for i in [typemax(T), typemin(T)]
|
||
f = "$(i)0"
|
||
@test isnull(tryparse(T, f))
|
||
end
|
||
end
|
||
|
||
# issue #11142
|
||
s = "abcdefghij"
|
||
sp = pointer(s)
|
||
@test unsafe_string(sp) == s
|
||
@test unsafe_string(sp,5) == "abcde"
|
||
@test typeof(unsafe_string(sp)) == String
|
||
s = "abcde\uff\u2000\U1f596"
|
||
sp = pointer(s)
|
||
@test unsafe_string(sp) == s
|
||
@test unsafe_string(sp,5) == "abcde"
|
||
@test typeof(unsafe_string(sp)) == String
|
||
|
||
@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
|
||
@test isnull(tryparse(BigInt, "1234567890-"))
|
||
|
||
@test get(tryparse(Float64, "64")) == 64.0
|
||
@test isnull(tryparse(Float64, "64o"))
|
||
@test get(tryparse(Float32, "32")) == 32.0f0
|
||
@test isnull(tryparse(Float32, "32o"))
|
||
|
||
# issue #10994: handle embedded NUL chars for string parsing
|
||
for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
|
||
@test_throws ArgumentError parse(T, "1\0")
|
||
end
|
||
for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32]
|
||
@test isnull(tryparse(T, "1\0"))
|
||
end
|
||
let s = normalize_string("tést",:NFKC)
|
||
@test unsafe_string(Base.unsafe_convert(Cstring, Base.cconvert(Cstring, s))) == s
|
||
@test unsafe_string(convert(Cstring, Symbol(s))) == s
|
||
end
|
||
@test_throws ArgumentError Base.unsafe_convert(Cstring, Base.cconvert(Cstring, "ba\0d"))
|
||
|
||
cstrdup(s) = @static is_windows() ? ccall(:_strdup, Cstring, (Cstring,), s) : ccall(:strdup, Cstring, (Cstring,), s)
|
||
let p = cstrdup("hello")
|
||
@test unsafe_string(p) == "hello"
|
||
Libc.free(p)
|
||
end
|
||
|
||
# iteration
|
||
@test [c for c in "ḟøøƀäṙ"] == ['ḟ', 'ø', 'ø', 'ƀ', 'ä', 'ṙ']
|
||
@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
|
||
@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, 'ḟ'), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, 'ṙ')]
|
||
|
||
# test all edge conditions
|
||
for (val, pass) in (
|
||
(0, true), (0xd7ff, true),
|
||
(0xd800, false), (0xdfff, false),
|
||
(0xe000, true), (0xffff, true),
|
||
(0x10000, true), (0x10ffff, true),
|
||
(0x110000, false)
|
||
)
|
||
@test isvalid(Char, val) == pass
|
||
end
|
||
for (val, pass) in (
|
||
(b"\x00", true),
|
||
(b"\x7f", true),
|
||
(b"\x80", false),
|
||
(b"\xbf", false),
|
||
(b"\xc0", false),
|
||
(b"\xff", false),
|
||
(b"\xc0\x80", false),
|
||
(b"\xc1\x80", false),
|
||
(b"\xc2\x80", true),
|
||
(b"\xc2\xc0", false),
|
||
(b"\xed\x9f\xbf", true),
|
||
(b"\xed\xa0\x80", false),
|
||
(b"\xed\xbf\xbf", false),
|
||
(b"\xee\x80\x80", true),
|
||
(b"\xef\xbf\xbf", true),
|
||
(b"\xf0\x90\x80\x80", true),
|
||
(b"\xf4\x8f\xbf\xbf", true),
|
||
(b"\xf4\x90\x80\x80", false),
|
||
(b"\xf5\x80\x80\x80", false),
|
||
(b"\ud800\udc00", false),
|
||
(b"\udbff\udfff", false),
|
||
(b"\ud800\u0100", false),
|
||
(b"\udc00\u0100", false),
|
||
(b"\udc00\ud800", false)
|
||
)
|
||
@test isvalid(String, val) == pass == isvalid(String(val))
|
||
end
|
||
|
||
# Issue #11203
|
||
@test isvalid(String, UInt8[]) == true == isvalid("")
|
||
|
||
# Check UTF-8 characters
|
||
# Check ASCII range (true),
|
||
# then single continuation bytes and lead bytes with no following continuation bytes (false)
|
||
for (rng,flg) in ((0:0x7f, true), (0x80:0xff, false))
|
||
for byt in rng
|
||
@test isvalid(String, UInt8[byt]) == flg
|
||
end
|
||
end
|
||
# Check overlong lead bytes for 2-character sequences (false)
|
||
for byt = 0xc0:0xc1
|
||
@test isvalid(String, UInt8[byt,0x80]) == false
|
||
end
|
||
# Check valid lead-in to two-byte sequences (true)
|
||
for byt = 0xc2:0xdf
|
||
for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
|
||
for cont in rng
|
||
@test isvalid(String, UInt8[byt, cont]) == flg
|
||
end
|
||
end
|
||
end
|
||
# Check three-byte sequences
|
||
for r1 in (0xe0:0xec, 0xee:0xef)
|
||
for byt = r1
|
||
# Check for short sequence
|
||
@test isvalid(String, UInt8[byt]) == false
|
||
for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
|
||
for cont in rng
|
||
@test isvalid(String, UInt8[byt, cont]) == false
|
||
@test isvalid(String, UInt8[byt, cont, 0x80]) == flg
|
||
end
|
||
end
|
||
end
|
||
end
|
||
# Check hangul characters (0xd000-0xd7ff) hangul
|
||
# Check for short sequence, or start of surrogate pair
|
||
for (rng,flg) in ((0x00:0x7f, false), (0x80:0x9f, true), (0xa0:0xff, false))
|
||
for cont in rng
|
||
@test isvalid(String, UInt8[0xed, cont]) == false
|
||
@test isvalid(String, UInt8[0xed, cont, 0x80]) == flg
|
||
end
|
||
end
|
||
# Check valid four-byte sequences
|
||
for byt = 0xf0:0xf4
|
||
if (byt == 0xf0)
|
||
r0 = ((0x00:0x8f, false), (0x90:0xbf, true), (0xc0:0xff, false))
|
||
elseif byt == 0xf4
|
||
r0 = ((0x00:0x7f, false), (0x80:0x8f, true), (0x90:0xff, false))
|
||
else
|
||
r0 = ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
|
||
end
|
||
for (rng,flg) in r0
|
||
for cont in rng
|
||
@test isvalid(String, UInt8[byt, cont]) == false
|
||
@test isvalid(String, UInt8[byt, cont, 0x80]) == false
|
||
@test isvalid(String, UInt8[byt, cont, 0x80, 0x80]) == flg
|
||
end
|
||
end
|
||
end
|
||
# Check five-byte sequences, should be invalid
|
||
for byt = 0xf8:0xfb
|
||
@test isvalid(String, UInt8[byt, 0x80, 0x80, 0x80, 0x80]) == false
|
||
end
|
||
# Check six-byte sequences, should be invalid
|
||
for byt = 0xfc:0xfd
|
||
@test isvalid(String, UInt8[byt, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
|
||
end
|
||
# Check seven-byte sequences, should be invalid
|
||
@test isvalid(String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
|
||
|
||
# 11482
|
||
|
||
# lower and upper
|
||
@test uppercase("aBc") == "ABC"
|
||
@test uppercase('A') == 'A'
|
||
@test uppercase('a') == 'A'
|
||
@test lowercase("AbC") == "abc"
|
||
@test lowercase('A') == 'a'
|
||
@test lowercase('a') == 'a'
|
||
@test uppercase('α') == '\u0391'
|
||
@test lowercase('Δ') == 'δ'
|
||
@test lowercase('\U118bf') == '\U118df'
|
||
@test uppercase('\U1044d') == '\U10425'
|
||
@test ucfirst("Abc") == "Abc"
|
||
@test ucfirst("abc") == "Abc"
|
||
@test lcfirst("ABC") == "aBC"
|
||
@test lcfirst("aBC") == "aBC"
|
||
@test ucfirst(GenericString("")) == ""
|
||
@test lcfirst(GenericString("")) == ""
|
||
@test ucfirst(GenericString("a")) == "A"
|
||
@test lcfirst(GenericString("A")) == "a"
|
||
@test lcfirst(GenericString("a")) == "a"
|
||
@test ucfirst(GenericString("A")) == "A"
|
||
|
||
# titlecase
|
||
@test titlecase('lj') == 'Lj'
|
||
@test titlecase("ljubljana") == "Ljubljana"
|
||
@test titlecase("aBc ABC") == "ABc ABC"
|
||
@test titlecase("abcD EFG\n\thij") == "AbcD EFG\n\tHij"
|
||
|
||
# issue # 11464: uppercase/lowercase of GenericString becomes a String
|
||
str = "abcdef\uff\uffff\u10ffffABCDEF"
|
||
@test typeof(uppercase("abcdef")) == String
|
||
@test typeof(uppercase(GenericString(str))) == String
|
||
@test typeof(lowercase("ABCDEF")) == String
|
||
@test typeof(lowercase(GenericString(str))) == String
|
||
|
||
foomap(ch) = (ch > Char(65))
|
||
foobar(ch) = Char(0xd800)
|
||
foobaz(ch) = reinterpret(Char, typemax(UInt32))
|
||
@test_throws ArgumentError map(foomap, GenericString(str))
|
||
@test map(foobar, GenericString(str)) == String(repeat(b"\ud800", outer=[17]))
|
||
@test map(foobaz, GenericString(str)) == String(repeat(b"\ufffd", outer=[17]))
|
||
|
||
@test "a".*["b","c"] == ["ab","ac"]
|
||
@test ["b","c"].*"a" == ["ba","ca"]
|
||
@test ["a","b"].*["c" "d"] == ["ac" "ad"; "bc" "bd"]
|
||
|
||
@test one(String) == ""
|
||
@test prod(["*" for i in 1:3]) == "***"
|
||
@test prod(["*" for i in 1:0]) == ""
|
||
|
||
# Make sure NULL pointers are handled consistently by String
|
||
@test_throws ArgumentError unsafe_string(Ptr{UInt8}(0))
|
||
@test_throws ArgumentError unsafe_string(Ptr{UInt8}(0), 10)
|
||
|
||
# ascii works on ASCII strings and fails on non-ASCII strings
|
||
@test ascii("Hello, world") == "Hello, world"
|
||
@test typeof(ascii("Hello, world")) == String
|
||
@test ascii(GenericString("Hello, world")) == "Hello, world"
|
||
@test typeof(ascii(GenericString("Hello, world"))) == String
|
||
@test_throws ArgumentError ascii("Hello, ∀")
|
||
@test_throws ArgumentError ascii(GenericString("Hello, ∀"))
|
||
|
||
# issue #17271: endof() doesn't throw an error even with invalid strings
|
||
@test endof(String(b"\x90")) == 0
|
||
@test endof(String(b"\xce")) == 1
|
||
|
||
# issue #17624, missing getindex method for String
|
||
@test "abc"[:] == "abc"
|
||
|
||
# issue #18280: next/nextind must return past String's underlying data
|
||
for s in ("Hello", "Σ", "こんにちは", "😊😁")
|
||
@test next(s, endof(s))[2] > sizeof(s)
|
||
@test nextind(s, endof(s)) > sizeof(s)
|
||
end
|
||
|
||
# Test cmp with AbstractStrings that don't index the same as UTF-8, which would include
|
||
# (LegacyString.)UTF16String and (LegacyString.)UTF32String, among others.
|
||
|
||
mutable struct CharStr <: AbstractString
|
||
chars::Vector{Char}
|
||
CharStr(x) = new(collect(x))
|
||
end
|
||
Base.start(x::CharStr) = start(x.chars)
|
||
Base.next(x::CharStr, i::Int) = next(x.chars, i)
|
||
Base.done(x::CharStr, i::Int) = done(x.chars, i)
|
||
Base.endof(x::CharStr) = endof(x.chars)
|
||
|
||
# Simple case, with just ANSI Latin 1 characters
|
||
@test "áB" != CharStr("áá") # returns false with bug
|
||
@test cmp("áB", CharStr("áá")) == -1 # returns 0 with bug
|
||
|
||
# Case with Unicode characters
|
||
@test cmp("\U1f596\U1f596", CharStr("\U1f596")) == 1 # Gives BoundsError with bug
|
||
@test cmp(CharStr("\U1f596"), "\U1f596\U1f596") == -1
|
||
|
||
# repeat function
|
||
@test repeat("xx",3) == repeat("x",6) == "xxxxxx"
|
||
@test repeat("αα",3) == repeat("α",6) == "αααααα"
|
||
|
||
# issue #12495: check that logical indexing attempt raises ArgumentError
|
||
@test_throws ArgumentError "abc"[[true, false, true]]
|
||
@test_throws ArgumentError "abc"[BitArray([true, false, true])]
|
||
|
||
@testset "invalid code point" begin
|
||
s = String([0x61, 0xba, 0x41])
|
||
@test !isvalid(s)
|
||
@test_throws UnicodeError s[2]
|
||
e = try
|
||
s[2]
|
||
catch e
|
||
e
|
||
end
|
||
b = IOBuffer()
|
||
show(b, e)
|
||
@test String(take!(b)) == "UnicodeError: invalid character index 2 (0xba is a continuation byte)"
|
||
end
|