mollusk 0e4acfb8f2 fix incorrect folder name for julia-0.6.x
Former-commit-id: ef2c7401e0876f22d2f7762d182cfbcd5a7d9c70
2018-06-11 03:28:36 -07:00

485 lines
15 KiB
Julia
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# This file is a part of Julia. License is MIT: https://julialang.org/license
# constructors
@test String([0x61,0x62,0x63,0x21]) == "abc!"
@test String("abc!") == "abc!"
@test isempty(string())
@test eltype(GenericString) == Char
@test start("abc") == 1
@test cmp("ab","abc") == -1
# {starts,ends}with
@test startswith("abcd", 'a')
@test startswith("abcd", "a")
@test startswith("abcd", "ab")
@test !startswith("ab", "abcd")
@test !startswith("abcd", "bc")
@test endswith("abcd", 'd')
@test endswith("abcd", "d")
@test endswith("abcd", "cd")
@test !endswith("abcd", "dc")
@test !endswith("cd", "abcd")
@test startswith("ab\0cd", "ab\0c")
@test !startswith("ab\0cd", "ab\0d")
@test filter(x -> x ['f', 'o'], "foobar") == "foo"
# string iteration, and issue #1454
str = "é"
str_a = vcat(str...)
@test length(str_a)==1
@test str_a[1] == str[1]
str = "s\u2200"
@test str[1:end] == str
# sizeof
@test sizeof("abc") == 3
@test sizeof("\u2222") == 3
# issue #3597
@test string(GenericString("Test")[1:1], "X") == "TX"
for T = (UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64,UInt128,Int128,BigInt),
b = 2:62, _ = 1:10
n = T != BigInt ? rand(T) : BigInt(rand(Int128))
@test parse(T,base(b,n),b) == n
end
# issue #6027
let
# make symbol with invalid char
sym = Symbol(Char(0xdcdb))
@test string(sym) == string(Char(0xdcdb))
@test String(sym) == string(Char(0xdcdb))
@test expand(sym) === sym
res = string(parse(string(Char(0xdcdb)," = 1"),1,raise=false)[1])
@test res == """\$(Expr(:error, "invalid character \\\"\\udcdb\\\"\"))"""
end
@test Symbol("asdf") === :asdf
@test Symbol(:abc,"def",'g',"hi",0) === :abcdefghi0
@test :a < :b
@test startswith(string(gensym("asdf")),"##asdf#")
@test gensym("asdf") != gensym("asdf")
@test gensym() != gensym()
@test startswith(string(gensym()),"##")
@test_throws ArgumentError Symbol("ab\0")
@test_throws ArgumentError gensym("ab\0")
# issue #6949
let f =IOBuffer(),
x = split("1 2 3")
@test write(f, x) == 3
@test String(take!(f)) == "123"
@test invoke(write, Tuple{IO, AbstractArray}, f, x) == 3
@test String(take!(f)) == "123"
end
# issue #7248
@test_throws BoundsError ind2chr("hello", -1)
@test_throws BoundsError chr2ind("hello", -1)
@test_throws BoundsError ind2chr("hellø", -1)
@test_throws BoundsError chr2ind("hellø", -1)
@test_throws BoundsError ind2chr("hello", 10)
@test_throws BoundsError chr2ind("hello", 10)
@test_throws BoundsError ind2chr("hellø", 10)
@test_throws BoundsError chr2ind("hellø", 10)
@test_throws BoundsError checkbounds("hello", 0)
@test_throws BoundsError checkbounds("hello", 6)
@test_throws BoundsError checkbounds("hello", 0:3)
@test_throws BoundsError checkbounds("hello", 4:6)
@test_throws BoundsError checkbounds("hello", [0:3;])
@test_throws BoundsError checkbounds("hello", [4:6;])
@test checkbounds("hello", 2)
@test checkbounds("hello", 1:5)
@test checkbounds("hello", [1:5;])
# issue #15624 (indexing with out of bounds empty range)
@test ""[10:9] == ""
@test "hello"[10:9] == ""
@test "hellø"[10:9] == ""
@test SubString("hello", 1, 6)[10:9] == ""
@test SubString("hello", 1, 0)[10:9] == ""
@test SubString("hellø", 1, 6)[10:9] == ""
@test SubString("hellø", 1, 0)[10:9] == ""
@test SubString("", 1, 6)[10:9] == ""
@test SubString("", 1, 0)[10:9] == ""
#=
# issue #7764
let
srep = repeat("Σβ",2)
s="Σβ"
ss=SubString(s,1,endof(s))
@test repeat(ss,2) == "ΣβΣβ"
@test endof(srep) == 7
@test next(srep, 3) == ('β',5)
@test next(srep, 7) == ('β',9)
@test srep[7] == 'β'
@test_throws BoundsError srep[8]
end
=#
# This caused JuliaLang/JSON.jl#82
@test first('\x00':'\x7f') === '\x00'
@test last('\x00':'\x7f') === '\x7f'
# make sure substrings handle last code unit even if not start of codepoint
let s = "x\u0302"
@test s[1:3] == s
end
# issue #9781
# float(SubString) wasn't tolerant of trailing whitespace, which was different
# to "normal" strings. This also checks we aren't being too tolerant and allowing
# any arbitrary trailing characters.
@test parse(Float64,"1\n") == 1.0
@test [parse(Float64,x) for x in split("0,1\n",",")][2] == 1.0
@test_throws ArgumentError parse(Float64,split("0,1 X\n",",")[2])
@test parse(Float32,"1\n") == 1.0
@test [parse(Float32,x) for x in split("0,1\n",",")][2] == 1.0
@test_throws ArgumentError parse(Float32,split("0,1 X\n",",")[2])
@test ucfirst("Hola")=="Hola"
@test ucfirst("hola")=="Hola"
@test ucfirst("")==""
@test ucfirst("*")=="*"
@test lcfirst("Hola")=="hola"
@test lcfirst("hola")=="hola"
@test lcfirst("")==""
@test lcfirst("*")=="*"
# test AbstractString functions at beginning of string.jl
struct tstStringType <: AbstractString
data::Array{UInt8,1}
end
tstr = tstStringType("12")
@test_throws ErrorException endof(tstr)
@test_throws ErrorException next(tstr, Bool(1))
gstr = GenericString("12")
@test typeof(string(gstr))==GenericString
@test convert(Array{UInt8}, gstr) ==[49;50]
@test convert(Array{Char,1}, gstr) ==['1';'2']
@test convert(Symbol, gstr)==Symbol("12")
@test gstr[1] == '1'
@test gstr[1:1] == "1"
@test gstr[[1]] == "1"
@test done(eachindex("foobar"),7)
@test eltype(Base.EachStringIndex) == Int
@test map(uppercase, "foó") == "FOÓ"
@test chr2ind("fóobar",3) == 4
@test Symbol(gstr)==Symbol("12")
@test_throws ErrorException sizeof(gstr)
@test length(GenericString(""))==0
@test nextind(1:1, 1) == 2
@test nextind([1], 1) == 2
@test ind2chr(gstr,2)==2
# issue #10307
@test typeof(map(Int16,AbstractString[])) == Vector{Int16}
for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
for i in [typemax(T), typemin(T)]
s = "$i"
@test get(tryparse(T, s)) == i
end
end
for T in [Int8, Int16, Int32, Int64, Int128]
for i in [typemax(T), typemin(T)]
f = "$(i)0"
@test isnull(tryparse(T, f))
end
end
# issue #11142
s = "abcdefghij"
sp = pointer(s)
@test unsafe_string(sp) == s
@test unsafe_string(sp,5) == "abcde"
@test typeof(unsafe_string(sp)) == String
s = "abcde\uff\u2000\U1f596"
sp = pointer(s)
@test unsafe_string(sp) == s
@test unsafe_string(sp,5) == "abcde"
@test typeof(unsafe_string(sp)) == String
@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
@test isnull(tryparse(BigInt, "1234567890-"))
@test get(tryparse(Float64, "64")) == 64.0
@test isnull(tryparse(Float64, "64o"))
@test get(tryparse(Float32, "32")) == 32.0f0
@test isnull(tryparse(Float32, "32o"))
# issue #10994: handle embedded NUL chars for string parsing
for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128]
@test_throws ArgumentError parse(T, "1\0")
end
for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32]
@test isnull(tryparse(T, "1\0"))
end
let s = normalize_string("tést",:NFKC)
@test unsafe_string(Base.unsafe_convert(Cstring, Base.cconvert(Cstring, s))) == s
@test unsafe_string(convert(Cstring, Symbol(s))) == s
end
@test_throws ArgumentError Base.unsafe_convert(Cstring, Base.cconvert(Cstring, "ba\0d"))
cstrdup(s) = @static is_windows() ? ccall(:_strdup, Cstring, (Cstring,), s) : ccall(:strdup, Cstring, (Cstring,), s)
let p = cstrdup("hello")
@test unsafe_string(p) == "hello"
Libc.free(p)
end
# iteration
@test [c for c in "ḟøøƀäṙ"] == ['ḟ', 'ø', 'ø', 'ƀ', 'ä', 'ṙ']
@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, 'ḟ'), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, 'ṙ')]
# test all edge conditions
for (val, pass) in (
(0, true), (0xd7ff, true),
(0xd800, false), (0xdfff, false),
(0xe000, true), (0xffff, true),
(0x10000, true), (0x10ffff, true),
(0x110000, false)
)
@test isvalid(Char, val) == pass
end
for (val, pass) in (
(b"\x00", true),
(b"\x7f", true),
(b"\x80", false),
(b"\xbf", false),
(b"\xc0", false),
(b"\xff", false),
(b"\xc0\x80", false),
(b"\xc1\x80", false),
(b"\xc2\x80", true),
(b"\xc2\xc0", false),
(b"\xed\x9f\xbf", true),
(b"\xed\xa0\x80", false),
(b"\xed\xbf\xbf", false),
(b"\xee\x80\x80", true),
(b"\xef\xbf\xbf", true),
(b"\xf0\x90\x80\x80", true),
(b"\xf4\x8f\xbf\xbf", true),
(b"\xf4\x90\x80\x80", false),
(b"\xf5\x80\x80\x80", false),
(b"\ud800\udc00", false),
(b"\udbff\udfff", false),
(b"\ud800\u0100", false),
(b"\udc00\u0100", false),
(b"\udc00\ud800", false)
)
@test isvalid(String, val) == pass == isvalid(String(val))
end
# Issue #11203
@test isvalid(String, UInt8[]) == true == isvalid("")
# Check UTF-8 characters
# Check ASCII range (true),
# then single continuation bytes and lead bytes with no following continuation bytes (false)
for (rng,flg) in ((0:0x7f, true), (0x80:0xff, false))
for byt in rng
@test isvalid(String, UInt8[byt]) == flg
end
end
# Check overlong lead bytes for 2-character sequences (false)
for byt = 0xc0:0xc1
@test isvalid(String, UInt8[byt,0x80]) == false
end
# Check valid lead-in to two-byte sequences (true)
for byt = 0xc2:0xdf
for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
for cont in rng
@test isvalid(String, UInt8[byt, cont]) == flg
end
end
end
# Check three-byte sequences
for r1 in (0xe0:0xec, 0xee:0xef)
for byt = r1
# Check for short sequence
@test isvalid(String, UInt8[byt]) == false
for (rng,flg) in ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
for cont in rng
@test isvalid(String, UInt8[byt, cont]) == false
@test isvalid(String, UInt8[byt, cont, 0x80]) == flg
end
end
end
end
# Check hangul characters (0xd000-0xd7ff) hangul
# Check for short sequence, or start of surrogate pair
for (rng,flg) in ((0x00:0x7f, false), (0x80:0x9f, true), (0xa0:0xff, false))
for cont in rng
@test isvalid(String, UInt8[0xed, cont]) == false
@test isvalid(String, UInt8[0xed, cont, 0x80]) == flg
end
end
# Check valid four-byte sequences
for byt = 0xf0:0xf4
if (byt == 0xf0)
r0 = ((0x00:0x8f, false), (0x90:0xbf, true), (0xc0:0xff, false))
elseif byt == 0xf4
r0 = ((0x00:0x7f, false), (0x80:0x8f, true), (0x90:0xff, false))
else
r0 = ((0x00:0x7f, false), (0x80:0xbf, true), (0xc0:0xff, false))
end
for (rng,flg) in r0
for cont in rng
@test isvalid(String, UInt8[byt, cont]) == false
@test isvalid(String, UInt8[byt, cont, 0x80]) == false
@test isvalid(String, UInt8[byt, cont, 0x80, 0x80]) == flg
end
end
end
# Check five-byte sequences, should be invalid
for byt = 0xf8:0xfb
@test isvalid(String, UInt8[byt, 0x80, 0x80, 0x80, 0x80]) == false
end
# Check six-byte sequences, should be invalid
for byt = 0xfc:0xfd
@test isvalid(String, UInt8[byt, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
end
# Check seven-byte sequences, should be invalid
@test isvalid(String, UInt8[0xfe, 0x80, 0x80, 0x80, 0x80, 0x80]) == false
# 11482
# lower and upper
@test uppercase("aBc") == "ABC"
@test uppercase('A') == 'A'
@test uppercase('a') == 'A'
@test lowercase("AbC") == "abc"
@test lowercase('A') == 'a'
@test lowercase('a') == 'a'
@test uppercase('α') == '\u0391'
@test lowercase('Δ') == 'δ'
@test lowercase('\U118bf') == '\U118df'
@test uppercase('\U1044d') == '\U10425'
@test ucfirst("Abc") == "Abc"
@test ucfirst("abc") == "Abc"
@test lcfirst("ABC") == "aBC"
@test lcfirst("aBC") == "aBC"
@test ucfirst(GenericString("")) == ""
@test lcfirst(GenericString("")) == ""
@test ucfirst(GenericString("a")) == "A"
@test lcfirst(GenericString("A")) == "a"
@test lcfirst(GenericString("a")) == "a"
@test ucfirst(GenericString("A")) == "A"
# titlecase
@test titlecase('lj') == 'Lj'
@test titlecase("ljubljana") == "Ljubljana"
@test titlecase("aBc ABC") == "ABc ABC"
@test titlecase("abcD EFG\n\thij") == "AbcD EFG\n\tHij"
# issue # 11464: uppercase/lowercase of GenericString becomes a String
str = "abcdef\uff\uffff\u10ffffABCDEF"
@test typeof(uppercase("abcdef")) == String
@test typeof(uppercase(GenericString(str))) == String
@test typeof(lowercase("ABCDEF")) == String
@test typeof(lowercase(GenericString(str))) == String
foomap(ch) = (ch > Char(65))
foobar(ch) = Char(0xd800)
foobaz(ch) = reinterpret(Char, typemax(UInt32))
@test_throws ArgumentError map(foomap, GenericString(str))
@test map(foobar, GenericString(str)) == String(repeat(b"\ud800", outer=[17]))
@test map(foobaz, GenericString(str)) == String(repeat(b"\ufffd", outer=[17]))
@test "a".*["b","c"] == ["ab","ac"]
@test ["b","c"].*"a" == ["ba","ca"]
@test ["a","b"].*["c" "d"] == ["ac" "ad"; "bc" "bd"]
@test one(String) == ""
@test prod(["*" for i in 1:3]) == "***"
@test prod(["*" for i in 1:0]) == ""
# Make sure NULL pointers are handled consistently by String
@test_throws ArgumentError unsafe_string(Ptr{UInt8}(0))
@test_throws ArgumentError unsafe_string(Ptr{UInt8}(0), 10)
# ascii works on ASCII strings and fails on non-ASCII strings
@test ascii("Hello, world") == "Hello, world"
@test typeof(ascii("Hello, world")) == String
@test ascii(GenericString("Hello, world")) == "Hello, world"
@test typeof(ascii(GenericString("Hello, world"))) == String
@test_throws ArgumentError ascii("Hello, ∀")
@test_throws ArgumentError ascii(GenericString("Hello, ∀"))
# issue #17271: endof() doesn't throw an error even with invalid strings
@test endof(String(b"\x90")) == 0
@test endof(String(b"\xce")) == 1
# issue #17624, missing getindex method for String
@test "abc"[:] == "abc"
# issue #18280: next/nextind must return past String's underlying data
for s in ("Hello", "Σ", "こんにちは", "😊😁")
@test next(s, endof(s))[2] > sizeof(s)
@test nextind(s, endof(s)) > sizeof(s)
end
# Test cmp with AbstractStrings that don't index the same as UTF-8, which would include
# (LegacyString.)UTF16String and (LegacyString.)UTF32String, among others.
mutable struct CharStr <: AbstractString
chars::Vector{Char}
CharStr(x) = new(collect(x))
end
Base.start(x::CharStr) = start(x.chars)
Base.next(x::CharStr, i::Int) = next(x.chars, i)
Base.done(x::CharStr, i::Int) = done(x.chars, i)
Base.endof(x::CharStr) = endof(x.chars)
# Simple case, with just ANSI Latin 1 characters
@test "áB" != CharStr("áá") # returns false with bug
@test cmp("áB", CharStr("áá")) == -1 # returns 0 with bug
# Case with Unicode characters
@test cmp("\U1f596\U1f596", CharStr("\U1f596")) == 1 # Gives BoundsError with bug
@test cmp(CharStr("\U1f596"), "\U1f596\U1f596") == -1
# repeat function
@test repeat("xx",3) == repeat("x",6) == "xxxxxx"
@test repeat("αα",3) == repeat("α",6) == "αααααα"
# issue #12495: check that logical indexing attempt raises ArgumentError
@test_throws ArgumentError "abc"[[true, false, true]]
@test_throws ArgumentError "abc"[BitArray([true, false, true])]
@testset "invalid code point" begin
s = String([0x61, 0xba, 0x41])
@test !isvalid(s)
@test_throws UnicodeError s[2]
e = try
s[2]
catch e
e
end
b = IOBuffer()
show(b, e)
@test String(take!(b)) == "UnicodeError: invalid character index 2 (0xba is a continuation byte)"
end