Add: julia-0.6.2
Former-commit-id: ccc667cf67d569f3fb3df39aa57c2134755a7551
This commit is contained in:
9
julia-0.6.2/share/julia/test/unicode/UnicodeError.jl
Normal file
9
julia-0.6.2/share/julia/test/unicode/UnicodeError.jl
Normal file
@@ -0,0 +1,9 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
@testset "invalid utf8" begin
|
||||
let io = IOBuffer()
|
||||
show(io, UnicodeError(Base.UTF_ERR_SHORT, 1, 10))
|
||||
check = "UnicodeError: invalid UTF-8 sequence starting at index 1 (0xa missing one or more continuation bytes)"
|
||||
@test String(take!(io)) == check
|
||||
end
|
||||
end
|
||||
47
julia-0.6.2/share/julia/test/unicode/utf8.jl
Normal file
47
julia-0.6.2/share/julia/test/unicode/utf8.jl
Normal file
@@ -0,0 +1,47 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
@testset "cesu8 input" begin
|
||||
let ch = 0x10000
|
||||
for hi = 0xd800:0xdbff
|
||||
for lo = 0xdc00:0xdfff
|
||||
@test convert(String, Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch))
|
||||
ch += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@testset "string indexing" begin
|
||||
let str = String(b"this is a test\xed\x80")
|
||||
@test next(str, 15) == ('\ufffd', 16)
|
||||
@test_throws BoundsError getindex(str, 0:3)
|
||||
@test_throws BoundsError getindex(str, 17:18)
|
||||
@test_throws BoundsError getindex(str, 2:17)
|
||||
@test_throws UnicodeError getindex(str, 16:17)
|
||||
@test string(Char(0x110000)) == "\ufffd"
|
||||
end
|
||||
end
|
||||
|
||||
@testset "string reverse" begin
|
||||
@test reverse("") == ""
|
||||
@test reverse("a") == "a"
|
||||
@test reverse("abc") == "cba"
|
||||
@test reverse("xyz\uff\u800\uffff\U10ffff") == "\U10ffff\uffff\u800\uffzyx"
|
||||
for str in [
|
||||
b"xyz\xc1",
|
||||
b"xyz\xd0",
|
||||
b"xyz\xe0",
|
||||
b"xyz\xed\x80",
|
||||
b"xyz\xf0",
|
||||
b"xyz\xf0\x80",
|
||||
b"xyz\xf0\x80\x80"
|
||||
]
|
||||
@test_throws UnicodeError reverse(String(str))
|
||||
end
|
||||
end
|
||||
|
||||
@testset "string convert" begin
|
||||
@test convert(String, b"this is a test\xed\x80\x80") == "this is a test\ud000"
|
||||
## Specifically check UTF-8 string whose lead byte is same as a surrogate
|
||||
@test convert(String, b"\xed\x9f\xbf") == "\ud7ff"
|
||||
end
|
||||
322
julia-0.6.2/share/julia/test/unicode/utf8proc.jl
Normal file
322
julia-0.6.2/share/julia/test/unicode/utf8proc.jl
Normal file
@@ -0,0 +1,322 @@
|
||||
# This file is a part of Julia. License is MIT: https://julialang.org/license
|
||||
|
||||
@testset "string normalization" begin
|
||||
# normalize_string (Unicode normalization etc.):
|
||||
@test normalize_string("\u006e\u0303", :NFC) == "\u00f1"
|
||||
@test "\u006e\u0303" == normalize_string("\u00f1", :NFD)
|
||||
@test normalize_string("\ufb00", :NFC) != "ff"
|
||||
@test normalize_string("\ufb00", :NFKC) == "ff"
|
||||
@test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff"
|
||||
@test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff"
|
||||
@test normalize_string("\u006e\u0303", compose=true) == "\u00f1"
|
||||
@test "\u006e\u0303" == normalize_string("\u00f1", decompose=true)
|
||||
@test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc"
|
||||
@test normalize_string("Σσς",casefold=true) == "σσσ"
|
||||
@test normalize_string("∕⁄", lump=true) == "//"
|
||||
@test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua"
|
||||
@test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028"
|
||||
@test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029"
|
||||
@test normalize_string("\u00f1", stripmark=true) == "n"
|
||||
@test isempty(normalize_string("\u00ad", stripignore=true))
|
||||
@test normalize_string("\t\r", stripcc=true) == " "
|
||||
@test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028"
|
||||
end
|
||||
|
||||
@testset "unicode sa#15" begin
|
||||
#Tests from Unicode SA#15, "Unicode normalization forms"
|
||||
#http://www.unicode.org/reports/tr15/
|
||||
|
||||
@testset "canonical equivalence" begin
|
||||
let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC)
|
||||
==(a,b) = Base.:(==)(a,b)
|
||||
@test ['C', '̧'] == ['Ç']
|
||||
@test ['q', '̇', '̣'] == ['q', '̣', '̇']
|
||||
@test ['가'] == ['ᄀ', 'ᅡ']
|
||||
@test ['Ω'] == ['Ω']
|
||||
end
|
||||
end
|
||||
|
||||
@testset "compatibility equivalence" begin
|
||||
let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC)
|
||||
==(a,b) = Base.:(==)(a,b)
|
||||
@test ['ℌ'] == ['ℍ'] == ['H']
|
||||
@test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ']
|
||||
@test ['①'] == ['1']
|
||||
@test ['カ'] == ['カ']
|
||||
@test ['︷'] == ['{']
|
||||
@test ['⁹'] == ['₉']
|
||||
@test ['㌀'] == ['ア', 'パ', 'ー', 'ト']
|
||||
@test ['¼'] == ['1', '⁄', '4']
|
||||
@test ['dž'] == ['d', 'ž']
|
||||
end
|
||||
end
|
||||
|
||||
@testset "singletons" begin
|
||||
@test normalize_string("\U212b", :NFD) == "A\U030a"
|
||||
@test normalize_string("\U212b", :NFC) == "\U00c5"
|
||||
@test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9"
|
||||
end
|
||||
|
||||
@testset "canonical composites" begin
|
||||
@test normalize_string("\U00c5", :NFC) == "\U00c5"
|
||||
@test normalize_string("\U00c5", :NFD) == "A\U030a"
|
||||
@test normalize_string("\U00f4", :NFC) == "\U00f4"
|
||||
@test normalize_string("\U00f4", :NFD) == "o\U0302"
|
||||
end
|
||||
|
||||
@testset "multiple combining marks" begin
|
||||
@test normalize_string("\U1e69", :NFD) == "s\U0323\U0307"
|
||||
@test normalize_string("\U1e69", :NFC) == "\U1e69"
|
||||
@test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307"
|
||||
@test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307"
|
||||
@test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307"
|
||||
@test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307"
|
||||
end
|
||||
|
||||
@testset "compatibility composites" begin
|
||||
@test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01"
|
||||
@test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi"
|
||||
@test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075"
|
||||
@test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25"
|
||||
@test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307"
|
||||
@test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323"
|
||||
@test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307"
|
||||
@test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69"
|
||||
end
|
||||
end
|
||||
|
||||
@testset "#5939 uft8proc character predicates" begin
|
||||
alower=['a', 'd', 'j', 'y', 'z']
|
||||
ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
|
||||
for c in vcat(alower,ulower)
|
||||
@test islower(c) == true
|
||||
@test isupper(c) == false
|
||||
@test isdigit(c) == false
|
||||
@test isnumber(c) == false
|
||||
end
|
||||
|
||||
aupper=['A', 'D', 'J', 'Y', 'Z']
|
||||
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д']
|
||||
|
||||
for c in vcat(aupper,uupper)
|
||||
@test islower(c) == false
|
||||
@test isupper(c) == true
|
||||
@test isdigit(c) == false
|
||||
@test isnumber(c) == false
|
||||
end
|
||||
|
||||
nocase=['א','ﺵ']
|
||||
alphas=vcat(alower,ulower,aupper,uupper,nocase)
|
||||
|
||||
for c in alphas
|
||||
@test isalpha(c) == true
|
||||
@test isnumber(c) == false
|
||||
end
|
||||
|
||||
anumber=['0', '1', '5', '9']
|
||||
unumber=['٣', '٥', '٨', '¹', 'ⅳ' ]
|
||||
|
||||
for c in anumber
|
||||
@test isdigit(c) == true
|
||||
@test isnumber(c) == true
|
||||
end
|
||||
for c in unumber
|
||||
@test isdigit(c) == false
|
||||
@test isnumber(c) == true
|
||||
end
|
||||
|
||||
alnums=vcat(alphas,anumber,unumber)
|
||||
for c in alnums
|
||||
@test isalnum(c) == true
|
||||
@test ispunct(c) == false
|
||||
end
|
||||
|
||||
asymbol = ['(',')', '~', '$' ]
|
||||
usymbol = ['∪', '∩', '⊂', '⊃', '√', '€', '¥', '↰', '△', '§']
|
||||
|
||||
apunct =['.',',',';',':','&']
|
||||
upunct =['‡', '؟', '჻' ]
|
||||
|
||||
for c in vcat(apunct,upunct)
|
||||
@test ispunct(c) == true
|
||||
@test isalnum(c) == false
|
||||
end
|
||||
|
||||
for c in vcat(alnums,asymbol,usymbol,apunct,upunct)
|
||||
@test isprint(c) == true
|
||||
@test isgraph(c) == true
|
||||
@test isspace(c) == false
|
||||
@test iscntrl(c) == false
|
||||
end
|
||||
|
||||
NBSP = Char(0x0000A0)
|
||||
ENSPACE = Char(0x002002)
|
||||
EMSPACE = Char(0x002003)
|
||||
THINSPACE = Char(0x002009)
|
||||
ZWSPACE = Char(0x002060)
|
||||
|
||||
uspace = [ENSPACE, EMSPACE, THINSPACE]
|
||||
aspace = [' ']
|
||||
acntrl_space = ['\t', '\n', '\v', '\f', '\r']
|
||||
for c in vcat(aspace,uspace)
|
||||
@test isspace(c) == true
|
||||
@test isprint(c) == true
|
||||
@test isgraph(c) == false
|
||||
end
|
||||
|
||||
for c in vcat(acntrl_space)
|
||||
@test isspace(c) == true
|
||||
@test isprint(c) == false
|
||||
@test isgraph(c) == false
|
||||
end
|
||||
|
||||
@test isspace(ZWSPACE) == false # zero-width space
|
||||
|
||||
acontrol = [ Char(0x001c), Char(0x001d), Char(0x001e), Char(0x001f)]
|
||||
latincontrol = [ Char(0x0080), Char(0x0085) ]
|
||||
ucontrol = [ Char(0x200E), Char(0x202E) ]
|
||||
|
||||
for c in vcat(acontrol, acntrl_space, latincontrol)
|
||||
@test iscntrl(c) == true
|
||||
@test isalnum(c) == false
|
||||
@test isprint(c) == false
|
||||
@test isgraph(c) == false
|
||||
end
|
||||
|
||||
for c in ucontrol #non-latin1 controls
|
||||
if c!=Char(0x0085)
|
||||
@test iscntrl(c) == false
|
||||
@test isspace(c) == false
|
||||
@test isalnum(c) == false
|
||||
@test isprint(c) == false
|
||||
@test isgraph(c) == false
|
||||
end
|
||||
end
|
||||
|
||||
@test all(isspace," \t \n \r ")
|
||||
@test !all(isgraph," \t \n \r ")
|
||||
@test !all(isprint," \t \n \r ")
|
||||
@test !all(isalpha," \t \n \r ")
|
||||
@test !all(isnumber," \t \n \r ")
|
||||
@test !all(ispunct," \t \n \r ")
|
||||
|
||||
@test !all(isspace,"ΣβΣβ")
|
||||
@test all(isalpha,"ΣβΣβ")
|
||||
@test all(isgraph,"ΣβΣβ")
|
||||
@test all(isprint,"ΣβΣβ")
|
||||
@test !all(isupper,"ΣβΣβ")
|
||||
@test !all(islower,"ΣβΣβ")
|
||||
@test !all(isnumber,"ΣβΣβ")
|
||||
@test !all(iscntrl,"ΣβΣβ")
|
||||
@test !all(ispunct,"ΣβΣβ")
|
||||
|
||||
@test all(isnumber,"23435")
|
||||
@test all(isdigit,"23435")
|
||||
@test all(isalnum,"23435")
|
||||
@test !all(isalpha,"23435")
|
||||
@test all(iscntrl,string(Char(0x0080)))
|
||||
@test all(ispunct, "‡؟჻")
|
||||
|
||||
@test isxdigit('0')
|
||||
@test isxdigit('a')
|
||||
@test !isxdigit('x')
|
||||
@test !isxdigit('g')
|
||||
end
|
||||
|
||||
@testset "utf8proc" begin
|
||||
# check utf8proc handling of CN category constants
|
||||
let c_ll = 'β', c_cn = '\u038B'
|
||||
@test Base.UTF8proc.category_code(c_ll) == Base.UTF8proc.UTF8PROC_CATEGORY_LL
|
||||
# check codepoint with category code CN
|
||||
@test Base.UTF8proc.category_code(c_cn) == Base.UTF8proc.UTF8PROC_CATEGORY_CN
|
||||
end
|
||||
end
|
||||
|
||||
@testset "graphemes" begin
|
||||
let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
|
||||
"β","l","a","h",
|
||||
"b\u0302","l","á","h"]),
|
||||
("", String[]),
|
||||
("x\u0302", ["x\u0302"]),
|
||||
("\U1d4c1\u0302", ["\U1d4c1\u0302"]),
|
||||
("\U1d4c1\u0302\U1d4c1\u0300", ["\U1d4c1\u0302",
|
||||
"\U1d4c1\u0300"]),
|
||||
("x",["x"]),
|
||||
("abc",["a","b","c"]))
|
||||
for T in (String,GenericString)
|
||||
for nf in (:NFC, :NFD)
|
||||
for (s, g) in grphtest
|
||||
s_ = T(normalize_string(s, nf))
|
||||
g_ = map(s -> normalize_string(s, nf), g)
|
||||
# #9261
|
||||
if length(s_) > 0
|
||||
@test typeof(first(graphemes(s_))) == SubString{typeof(s_)}
|
||||
end
|
||||
grph = collect(graphemes(s_))
|
||||
@test eltype(grph) == SubString{typeof(s_)}
|
||||
@test grph == g_
|
||||
@test length(graphemes(s_)) == length(grph)
|
||||
end
|
||||
S = [T(normalize_string(s)) for (s,g) in grphtest]
|
||||
G = map(graphemes, S)
|
||||
@test map(graphemes, sort!(S)) == sort!(G)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@testset "#3721, #6939 up-to-date character widths" begin
|
||||
@test charwidth('\U1f355') == 2
|
||||
@test strwidth("\U1f355") == 2
|
||||
@test strwidth(GenericString("\U1f355")) == 2
|
||||
@test strwidth("\U1f355\u0302") == 2
|
||||
@test strwidth(GenericString("\U1f355\u0302")) == 2
|
||||
end
|
||||
|
||||
@testset "#10958 handling of embedded NUL chars" begin
|
||||
@test length("\0w") == length("\0α") == 2
|
||||
@test strwidth("\0w") == strwidth("\0α") == 1
|
||||
@test normalize_string("\0W", casefold=true) == "\0w"
|
||||
end
|
||||
|
||||
@testset "ut8proc_map with GenericString" begin
|
||||
@test normalize_string(GenericString("\u006e\u0303"), :NFC) == "\u00f1"
|
||||
end
|
||||
|
||||
@testset "normalize_string keywords" begin
|
||||
@test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, compat=true)
|
||||
@test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, stripmark=true)
|
||||
end
|
||||
|
||||
@testset "fastplus" begin
|
||||
@test lowercase('A') == 'a'
|
||||
@test uppercase('a') == 'A'
|
||||
|
||||
@test is_assigned_char('A')
|
||||
end
|
||||
|
||||
@testset "isspace" begin
|
||||
@test isspace(' ')
|
||||
@test isspace('\t')
|
||||
@test isspace('\r')
|
||||
@test isspace('\u85')
|
||||
@test isspace('\ua0')
|
||||
@test !isspace('\ufffd')
|
||||
@test !isspace('\U10ffff')
|
||||
end
|
||||
|
||||
@testset "grapheme iterators" begin
|
||||
let str = ascii("This is a test")
|
||||
g = graphemes(str)
|
||||
h = hash(str)
|
||||
@test hash(g) == h
|
||||
@test convert(GenericString, g) == str
|
||||
@test repr(g) == "length-14 GraphemeIterator{String} for \"$str\""
|
||||
end
|
||||
end
|
||||
|
||||
@testset "#22693: substring graphemes" begin
|
||||
g = graphemes(SubString("123α56789", 1, 6))
|
||||
@test eltype(g) == SubString{String}
|
||||
@test collect(g) == ["1","2","3","α","5"]
|
||||
end
|
||||
Reference in New Issue
Block a user