mollusk 0e4acfb8f2 fix incorrect folder name for julia-0.6.x
Former-commit-id: ef2c7401e0876f22d2f7762d182cfbcd5a7d9c70
2018-06-11 03:28:36 -07:00

324 lines
11 KiB
Julia
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# This file is a part of Julia. License is MIT: https://julialang.org/license
@testset "string normalization" begin
# normalize_string (Unicode normalization etc.):
@test normalize_string("\u006e\u0303", :NFC) == "\u00f1"
@test "\u006e\u0303" == normalize_string("\u00f1", :NFD)
@test normalize_string("\ufb00", :NFC) != "ff"
@test normalize_string("\ufb00", :NFKC) == "ff"
@test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff"
@test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff"
@test normalize_string("\u006e\u0303", compose=true) == "\u00f1"
@test "\u006e\u0303" == normalize_string("\u00f1", decompose=true)
@test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc"
@test normalize_string("Σσς",casefold=true) == "σσσ"
@test normalize_string("", lump=true) == "//"
@test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua"
@test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028"
@test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029"
@test normalize_string("\u00f1", stripmark=true) == "n"
@test isempty(normalize_string("\u00ad", stripignore=true))
@test normalize_string("\t\r", stripcc=true) == " "
@test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028"
@test normalize_string("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
end
@testset "unicode sa#15" begin
#Tests from Unicode SA#15, "Unicode normalization forms"
#http://www.unicode.org/reports/tr15/
@testset "canonical equivalence" begin
let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC)
==(a,b) = Base.:(==)(a,b)
@test ['C', '̧'] == ['Ç']
@test ['q', '̇', '̣'] == ['q', '̣', '̇']
@test ['가'] == ['ᄀ', 'ᅡ']
@test ['Ω'] == ['Ω']
end
end
@testset "compatibility equivalence" begin
let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC)
==(a,b) = Base.:(==)(a,b)
@test [''] == [''] == ['H']
@test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ']
@test ['①'] == ['1']
@test ['カ'] == ['カ']
@test ['︷'] == ['{']
@test ['⁹'] == ['₉']
@test ['㌀'] == ['ア', 'パ', 'ー', 'ト']
@test ['¼'] == ['1', '', '4']
@test ['dž'] == ['d', 'ž']
end
end
@testset "singletons" begin
@test normalize_string("\U212b", :NFD) == "A\U030a"
@test normalize_string("\U212b", :NFC) == "\U00c5"
@test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9"
end
@testset "canonical composites" begin
@test normalize_string("\U00c5", :NFC) == "\U00c5"
@test normalize_string("\U00c5", :NFD) == "A\U030a"
@test normalize_string("\U00f4", :NFC) == "\U00f4"
@test normalize_string("\U00f4", :NFD) == "o\U0302"
end
@testset "multiple combining marks" begin
@test normalize_string("\U1e69", :NFD) == "s\U0323\U0307"
@test normalize_string("\U1e69", :NFC) == "\U1e69"
@test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307"
@test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307"
@test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307"
@test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307"
end
@testset "compatibility composites" begin
@test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01"
@test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi"
@test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075"
@test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25"
@test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307"
@test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323"
@test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307"
@test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69"
end
end
@testset "#5939 uft8proc character predicates" begin
alower=['a', 'd', 'j', 'y', 'z']
ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
for c in vcat(alower,ulower)
@test islower(c) == true
@test isupper(c) == false
@test isdigit(c) == false
@test isnumber(c) == false
end
aupper=['A', 'D', 'J', 'Y', 'Z']
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д']
for c in vcat(aupper,uupper)
@test islower(c) == false
@test isupper(c) == true
@test isdigit(c) == false
@test isnumber(c) == false
end
nocase=['א','ﺵ']
alphas=vcat(alower,ulower,aupper,uupper,nocase)
for c in alphas
@test isalpha(c) == true
@test isnumber(c) == false
end
anumber=['0', '1', '5', '9']
unumber=['٣', '٥', '٨', '¹', 'ⅳ' ]
for c in anumber
@test isdigit(c) == true
@test isnumber(c) == true
end
for c in unumber
@test isdigit(c) == false
@test isnumber(c) == true
end
alnums=vcat(alphas,anumber,unumber)
for c in alnums
@test isalnum(c) == true
@test ispunct(c) == false
end
asymbol = ['(',')', '~', '$' ]
usymbol = ['', '∩', '⊂', '⊃', '√', '€', '¥', '↰', '△', '§']
apunct =['.',',',';',':','&']
upunct =['‡', '؟', '჻' ]
for c in vcat(apunct,upunct)
@test ispunct(c) == true
@test isalnum(c) == false
end
for c in vcat(alnums,asymbol,usymbol,apunct,upunct)
@test isprint(c) == true
@test isgraph(c) == true
@test isspace(c) == false
@test iscntrl(c) == false
end
NBSP = Char(0x0000A0)
ENSPACE = Char(0x002002)
EMSPACE = Char(0x002003)
THINSPACE = Char(0x002009)
ZWSPACE = Char(0x002060)
uspace = [ENSPACE, EMSPACE, THINSPACE]
aspace = [' ']
acntrl_space = ['\t', '\n', '\v', '\f', '\r']
for c in vcat(aspace,uspace)
@test isspace(c) == true
@test isprint(c) == true
@test isgraph(c) == false
end
for c in vcat(acntrl_space)
@test isspace(c) == true
@test isprint(c) == false
@test isgraph(c) == false
end
@test isspace(ZWSPACE) == false # zero-width space
acontrol = [ Char(0x001c), Char(0x001d), Char(0x001e), Char(0x001f)]
latincontrol = [ Char(0x0080), Char(0x0085) ]
ucontrol = [ Char(0x200E), Char(0x202E) ]
for c in vcat(acontrol, acntrl_space, latincontrol)
@test iscntrl(c) == true
@test isalnum(c) == false
@test isprint(c) == false
@test isgraph(c) == false
end
for c in ucontrol #non-latin1 controls
if c!=Char(0x0085)
@test iscntrl(c) == false
@test isspace(c) == false
@test isalnum(c) == false
@test isprint(c) == false
@test isgraph(c) == false
end
end
@test all(isspace," \t \n \r ")
@test !all(isgraph," \t \n \r ")
@test !all(isprint," \t \n \r ")
@test !all(isalpha," \t \n \r ")
@test !all(isnumber," \t \n \r ")
@test !all(ispunct," \t \n \r ")
@test !all(isspace,"ΣβΣβ")
@test all(isalpha,"ΣβΣβ")
@test all(isgraph,"ΣβΣβ")
@test all(isprint,"ΣβΣβ")
@test !all(isupper,"ΣβΣβ")
@test !all(islower,"ΣβΣβ")
@test !all(isnumber,"ΣβΣβ")
@test !all(iscntrl,"ΣβΣβ")
@test !all(ispunct,"ΣβΣβ")
@test all(isnumber,"23435")
@test all(isdigit,"23435")
@test all(isalnum,"23435")
@test !all(isalpha,"23435")
@test all(iscntrl,string(Char(0x0080)))
@test all(ispunct, "‡؟჻")
@test isxdigit('0')
@test isxdigit('a')
@test !isxdigit('x')
@test !isxdigit('g')
end
@testset "utf8proc" begin
# check utf8proc handling of CN category constants
let c_ll = 'β', c_cn = '\u038B'
@test Base.UTF8proc.category_code(c_ll) == Base.UTF8proc.UTF8PROC_CATEGORY_LL
# check codepoint with category code CN
@test Base.UTF8proc.category_code(c_cn) == Base.UTF8proc.UTF8PROC_CATEGORY_CN
end
end
@testset "graphemes" begin
let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h",
"β","l","a","h",
"b\u0302","l","á","h"]),
("", String[]),
("x\u0302", ["x\u0302"]),
("\U1d4c1\u0302", ["\U1d4c1\u0302"]),
("\U1d4c1\u0302\U1d4c1\u0300", ["\U1d4c1\u0302",
"\U1d4c1\u0300"]),
("x",["x"]),
("abc",["a","b","c"]))
for T in (String,GenericString)
for nf in (:NFC, :NFD)
for (s, g) in grphtest
s_ = T(normalize_string(s, nf))
g_ = map(s -> normalize_string(s, nf), g)
# #9261
if length(s_) > 0
@test typeof(first(graphemes(s_))) == SubString{typeof(s_)}
end
grph = collect(graphemes(s_))
@test eltype(grph) == SubString{typeof(s_)}
@test grph == g_
@test length(graphemes(s_)) == length(grph)
end
S = [T(normalize_string(s)) for (s,g) in grphtest]
G = map(graphemes, S)
@test map(graphemes, sort!(S)) == sort!(G)
end
end
end
end
@testset "#3721, #6939 up-to-date character widths" begin
@test charwidth('\U1f355') == 2
@test strwidth("\U1f355") == 2
@test strwidth(GenericString("\U1f355")) == 2
@test strwidth("\U1f355\u0302") == 2
@test strwidth(GenericString("\U1f355\u0302")) == 2
end
@testset "#10958 handling of embedded NUL chars" begin
@test length("\0w") == length("\0α") == 2
@test strwidth("\0w") == strwidth("\0α") == 1
@test normalize_string("\0W", casefold=true) == "\0w"
end
@testset "ut8proc_map with GenericString" begin
@test normalize_string(GenericString("\u006e\u0303"), :NFC) == "\u00f1"
end
@testset "normalize_string keywords" begin
@test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, compat=true)
@test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, stripmark=true)
end
@testset "fastplus" begin
@test lowercase('A') == 'a'
@test uppercase('a') == 'A'
@test is_assigned_char('A')
end
@testset "isspace" begin
@test isspace(' ')
@test isspace('\t')
@test isspace('\r')
@test isspace('\u85')
@test isspace('\ua0')
@test !isspace('\ufffd')
@test !isspace('\U10ffff')
end
@testset "grapheme iterators" begin
let str = ascii("This is a test")
g = graphemes(str)
h = hash(str)
@test hash(g) == h
@test convert(GenericString, g) == str
@test repr(g) == "length-14 GraphemeIterator{String} for \"$str\""
end
end
@testset "#22693: substring graphemes" begin
g = graphemes(SubString("123α56789", 1, 6))
@test eltype(g) == SubString{String}
@test collect(g) == ["1","2","3","α","5"]
end