Add: julia-0.6.2
Former-commit-id: ccc667cf67d569f3fb3df39aa57c2134755a7551
This commit is contained in:
		
							
								
								
									
										9
									
								
								julia-0.6.2/share/julia/test/unicode/UnicodeError.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								julia-0.6.2/share/julia/test/unicode/UnicodeError.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| # This file is a part of Julia. License is MIT: https://julialang.org/license | ||||
|  | ||||
| @testset "invalid utf8" begin | ||||
|     let io = IOBuffer() | ||||
|         show(io, UnicodeError(Base.UTF_ERR_SHORT, 1, 10)) | ||||
|         check = "UnicodeError: invalid UTF-8 sequence starting at index 1 (0xa missing one or more continuation bytes)" | ||||
|         @test String(take!(io)) == check | ||||
|     end | ||||
| end | ||||
							
								
								
									
										47
									
								
								julia-0.6.2/share/julia/test/unicode/utf8.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								julia-0.6.2/share/julia/test/unicode/utf8.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| # This file is a part of Julia. License is MIT: https://julialang.org/license | ||||
|  | ||||
| @testset "cesu8 input" begin | ||||
|     let ch = 0x10000 | ||||
|         for hi = 0xd800:0xdbff | ||||
|             for lo = 0xdc00:0xdfff | ||||
|                 @test convert(String, Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch)) | ||||
|                 ch += 1 | ||||
|             end | ||||
|         end | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "string indexing" begin | ||||
|     let str = String(b"this is a test\xed\x80") | ||||
|         @test next(str, 15) == ('\ufffd', 16) | ||||
|         @test_throws BoundsError getindex(str, 0:3) | ||||
|         @test_throws BoundsError getindex(str, 17:18) | ||||
|         @test_throws BoundsError getindex(str, 2:17) | ||||
|         @test_throws UnicodeError getindex(str, 16:17) | ||||
|         @test string(Char(0x110000)) == "\ufffd" | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "string reverse" begin | ||||
|     @test reverse("") == "" | ||||
|     @test reverse("a") == "a" | ||||
|     @test reverse("abc") == "cba" | ||||
|     @test reverse("xyz\uff\u800\uffff\U10ffff") == "\U10ffff\uffff\u800\uffzyx" | ||||
|     for str in [ | ||||
|         b"xyz\xc1", | ||||
|         b"xyz\xd0", | ||||
|         b"xyz\xe0", | ||||
|         b"xyz\xed\x80", | ||||
|         b"xyz\xf0", | ||||
|         b"xyz\xf0\x80", | ||||
|         b"xyz\xf0\x80\x80" | ||||
|     ] | ||||
|         @test_throws UnicodeError reverse(String(str)) | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "string convert" begin | ||||
|     @test convert(String, b"this is a test\xed\x80\x80") == "this is a test\ud000" | ||||
|     ## Specifically check UTF-8 string whose lead byte is same as a surrogate | ||||
|     @test convert(String, b"\xed\x9f\xbf") == "\ud7ff" | ||||
| end | ||||
							
								
								
									
										322
									
								
								julia-0.6.2/share/julia/test/unicode/utf8proc.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										322
									
								
								julia-0.6.2/share/julia/test/unicode/utf8proc.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,322 @@ | ||||
| # This file is a part of Julia. License is MIT: https://julialang.org/license | ||||
|  | ||||
| @testset "string normalization" begin | ||||
|     # normalize_string (Unicode normalization etc.): | ||||
|     @test normalize_string("\u006e\u0303", :NFC) == "\u00f1" | ||||
|     @test "\u006e\u0303" == normalize_string("\u00f1", :NFD) | ||||
|     @test normalize_string("\ufb00", :NFC) != "ff" | ||||
|     @test normalize_string("\ufb00", :NFKC) == "ff" | ||||
|     @test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff" | ||||
|     @test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff" | ||||
|     @test normalize_string("\u006e\u0303", compose=true) == "\u00f1" | ||||
|     @test "\u006e\u0303" == normalize_string("\u00f1", decompose=true) | ||||
|     @test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc" | ||||
|     @test normalize_string("Σσς",casefold=true) == "σσσ" | ||||
|     @test normalize_string("∕⁄", lump=true) == "//" | ||||
|     @test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua" | ||||
|     @test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028" | ||||
|     @test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029" | ||||
|     @test normalize_string("\u00f1", stripmark=true) == "n" | ||||
|     @test isempty(normalize_string("\u00ad", stripignore=true)) | ||||
|     @test normalize_string("\t\r", stripcc=true) == "  " | ||||
|     @test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028" | ||||
| end | ||||
|  | ||||
| @testset "unicode sa#15" begin | ||||
|     #Tests from Unicode SA#15, "Unicode normalization forms" | ||||
|     #http://www.unicode.org/reports/tr15/ | ||||
|  | ||||
|     @testset "canonical equivalence" begin | ||||
|         let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC) | ||||
|             ==(a,b) = Base.:(==)(a,b) | ||||
|             @test ['C', '̧'] == ['Ç'] | ||||
|             @test ['q', '̇', '̣'] == ['q', '̣', '̇'] | ||||
|             @test ['가'] == ['ᄀ', 'ᅡ'] | ||||
|             @test ['Ω'] == ['Ω'] | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "compatibility equivalence" begin | ||||
|         let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC) | ||||
|             ==(a,b) = Base.:(==)(a,b) | ||||
|             @test ['ℌ'] == ['ℍ'] == ['H'] | ||||
|             @test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ'] | ||||
|             @test ['①'] == ['1'] | ||||
|             @test ['カ'] == ['カ'] | ||||
|             @test ['︷'] == ['{'] | ||||
|             @test ['⁹'] == ['₉'] | ||||
|             @test ['㌀'] == ['ア', 'パ', 'ー', 'ト'] | ||||
|             @test ['¼'] == ['1', '⁄', '4'] | ||||
|             @test ['dž'] == ['d', 'ž'] | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "singletons" begin | ||||
|         @test normalize_string("\U212b", :NFD) == "A\U030a" | ||||
|         @test normalize_string("\U212b", :NFC) == "\U00c5" | ||||
|         @test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9" | ||||
|     end | ||||
|  | ||||
|     @testset "canonical composites" begin | ||||
|         @test normalize_string("\U00c5", :NFC) == "\U00c5" | ||||
|         @test normalize_string("\U00c5", :NFD) == "A\U030a" | ||||
|         @test normalize_string("\U00f4", :NFC) == "\U00f4" | ||||
|         @test normalize_string("\U00f4", :NFD) == "o\U0302" | ||||
|     end | ||||
|  | ||||
|     @testset "multiple combining marks" begin | ||||
|         @test normalize_string("\U1e69", :NFD) == "s\U0323\U0307" | ||||
|         @test normalize_string("\U1e69", :NFC) == "\U1e69" | ||||
|         @test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307" | ||||
|         @test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307" | ||||
|         @test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307" | ||||
|         @test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307" | ||||
|     end | ||||
|  | ||||
|     @testset "compatibility composites" begin | ||||
|         @test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01" | ||||
|         @test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi" | ||||
|         @test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075" | ||||
|         @test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25" | ||||
|         @test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307" | ||||
|         @test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323" | ||||
|         @test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307" | ||||
|         @test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69" | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "#5939 uft8proc character predicates" begin | ||||
|     alower=['a', 'd', 'j', 'y', 'z'] | ||||
|     ulower=['α', 'β', 'γ', 'δ', 'ф', 'я'] | ||||
|     for c in vcat(alower,ulower) | ||||
|         @test islower(c) == true | ||||
|         @test isupper(c) == false | ||||
|         @test isdigit(c) == false | ||||
|         @test isnumber(c) == false | ||||
|     end | ||||
|  | ||||
|     aupper=['A', 'D', 'J', 'Y', 'Z'] | ||||
|     uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д'] | ||||
|  | ||||
|     for c in vcat(aupper,uupper) | ||||
|         @test islower(c) == false | ||||
|         @test isupper(c) == true | ||||
|         @test isdigit(c) == false | ||||
|         @test isnumber(c) == false | ||||
|     end | ||||
|  | ||||
|     nocase=['א','ﺵ'] | ||||
|     alphas=vcat(alower,ulower,aupper,uupper,nocase) | ||||
|  | ||||
|     for c in alphas | ||||
|         @test isalpha(c) == true | ||||
|         @test isnumber(c) == false | ||||
|     end | ||||
|  | ||||
|     anumber=['0', '1', '5', '9'] | ||||
|     unumber=['٣', '٥', '٨', '¹', 'ⅳ' ] | ||||
|  | ||||
|     for c in anumber | ||||
|         @test isdigit(c) == true | ||||
|         @test isnumber(c) == true | ||||
|     end | ||||
|     for c in unumber | ||||
|         @test isdigit(c) == false | ||||
|         @test isnumber(c) == true | ||||
|     end | ||||
|  | ||||
|     alnums=vcat(alphas,anumber,unumber) | ||||
|     for c in alnums | ||||
|         @test isalnum(c) == true | ||||
|         @test ispunct(c) == false | ||||
|     end | ||||
|  | ||||
|     asymbol = ['(',')', '~', '$' ] | ||||
|     usymbol = ['∪', '∩', '⊂', '⊃', '√', '€', '¥', '↰', '△', '§'] | ||||
|  | ||||
|     apunct =['.',',',';',':','&'] | ||||
|     upunct =['‡', '؟', '჻' ] | ||||
|  | ||||
|     for c in vcat(apunct,upunct) | ||||
|         @test ispunct(c) == true | ||||
|         @test isalnum(c) == false | ||||
|     end | ||||
|  | ||||
|     for c in vcat(alnums,asymbol,usymbol,apunct,upunct) | ||||
|         @test isprint(c) == true | ||||
|         @test isgraph(c) == true | ||||
|         @test isspace(c) == false | ||||
|         @test iscntrl(c) == false | ||||
|     end | ||||
|  | ||||
|     NBSP = Char(0x0000A0) | ||||
|     ENSPACE = Char(0x002002) | ||||
|     EMSPACE = Char(0x002003) | ||||
|     THINSPACE = Char(0x002009) | ||||
|     ZWSPACE = Char(0x002060) | ||||
|  | ||||
|     uspace = [ENSPACE, EMSPACE, THINSPACE] | ||||
|     aspace = [' '] | ||||
|     acntrl_space = ['\t', '\n', '\v', '\f', '\r'] | ||||
|     for c in vcat(aspace,uspace) | ||||
|         @test isspace(c) == true | ||||
|         @test isprint(c) == true | ||||
|         @test isgraph(c) == false | ||||
|     end | ||||
|  | ||||
|     for c in vcat(acntrl_space) | ||||
|         @test isspace(c) == true | ||||
|         @test isprint(c) == false | ||||
|         @test isgraph(c) == false | ||||
|     end | ||||
|  | ||||
|     @test isspace(ZWSPACE) == false # zero-width space | ||||
|  | ||||
|     acontrol = [ Char(0x001c), Char(0x001d), Char(0x001e), Char(0x001f)] | ||||
|     latincontrol = [ Char(0x0080), Char(0x0085) ] | ||||
|     ucontrol = [ Char(0x200E), Char(0x202E) ] | ||||
|  | ||||
|     for c in vcat(acontrol, acntrl_space, latincontrol) | ||||
|         @test iscntrl(c) == true | ||||
|         @test isalnum(c) == false | ||||
|         @test isprint(c) == false | ||||
|         @test isgraph(c) == false | ||||
|     end | ||||
|  | ||||
|     for c in ucontrol  #non-latin1 controls | ||||
|         if c!=Char(0x0085) | ||||
|             @test iscntrl(c) == false | ||||
|             @test isspace(c) == false | ||||
|             @test isalnum(c) == false | ||||
|             @test isprint(c) == false | ||||
|             @test isgraph(c) == false | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @test  all(isspace,"  \t   \n   \r  ") | ||||
|     @test !all(isgraph,"  \t   \n   \r  ") | ||||
|     @test !all(isprint,"  \t   \n   \r  ") | ||||
|     @test !all(isalpha,"  \t   \n   \r  ") | ||||
|     @test !all(isnumber,"  \t   \n   \r  ") | ||||
|     @test !all(ispunct,"  \t   \n   \r  ") | ||||
|  | ||||
|     @test !all(isspace,"ΣβΣβ") | ||||
|     @test  all(isalpha,"ΣβΣβ") | ||||
|     @test  all(isgraph,"ΣβΣβ") | ||||
|     @test  all(isprint,"ΣβΣβ") | ||||
|     @test !all(isupper,"ΣβΣβ") | ||||
|     @test !all(islower,"ΣβΣβ") | ||||
|     @test !all(isnumber,"ΣβΣβ") | ||||
|     @test !all(iscntrl,"ΣβΣβ") | ||||
|     @test !all(ispunct,"ΣβΣβ") | ||||
|  | ||||
|     @test  all(isnumber,"23435") | ||||
|     @test  all(isdigit,"23435") | ||||
|     @test  all(isalnum,"23435") | ||||
|     @test !all(isalpha,"23435") | ||||
|     @test  all(iscntrl,string(Char(0x0080))) | ||||
|     @test  all(ispunct, "‡؟჻") | ||||
|  | ||||
|     @test  isxdigit('0') | ||||
|     @test  isxdigit('a') | ||||
|     @test !isxdigit('x') | ||||
|     @test !isxdigit('g') | ||||
| end | ||||
|  | ||||
| @testset "utf8proc" begin | ||||
|     # check utf8proc handling of CN category constants | ||||
|     let c_ll = 'β', c_cn = '\u038B' | ||||
|         @test Base.UTF8proc.category_code(c_ll) == Base.UTF8proc.UTF8PROC_CATEGORY_LL | ||||
|         # check codepoint with category code CN | ||||
|         @test Base.UTF8proc.category_code(c_cn) == Base.UTF8proc.UTF8PROC_CATEGORY_CN | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "graphemes" begin | ||||
|     let grphtest = (("b\u0300lahβlahb\u0302láh", ["b\u0300","l","a","h", | ||||
|                                                   "β","l","a","h", | ||||
|                                                   "b\u0302","l","á","h"]), | ||||
|                     ("", String[]), | ||||
|                     ("x\u0302", ["x\u0302"]), | ||||
|                     ("\U1d4c1\u0302", ["\U1d4c1\u0302"]), | ||||
|                     ("\U1d4c1\u0302\U1d4c1\u0300", ["\U1d4c1\u0302", | ||||
|                                                     "\U1d4c1\u0300"]), | ||||
|                     ("x",["x"]), | ||||
|                     ("abc",["a","b","c"])) | ||||
|         for T in (String,GenericString) | ||||
|             for nf in (:NFC, :NFD) | ||||
|                 for (s, g) in grphtest | ||||
|                     s_ = T(normalize_string(s, nf)) | ||||
|                     g_ = map(s -> normalize_string(s, nf), g) | ||||
|                     # #9261 | ||||
|                     if length(s_) > 0 | ||||
|                         @test typeof(first(graphemes(s_))) == SubString{typeof(s_)} | ||||
|                     end | ||||
|                     grph = collect(graphemes(s_)) | ||||
|                     @test eltype(grph) == SubString{typeof(s_)} | ||||
|                     @test grph == g_ | ||||
|                     @test length(graphemes(s_)) == length(grph) | ||||
|                 end | ||||
|                 S = [T(normalize_string(s)) for (s,g) in grphtest] | ||||
|                 G = map(graphemes, S) | ||||
|                 @test map(graphemes, sort!(S)) == sort!(G) | ||||
|             end | ||||
|         end | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "#3721, #6939 up-to-date character widths" begin | ||||
|     @test charwidth('\U1f355') == 2 | ||||
|     @test strwidth("\U1f355") == 2 | ||||
|     @test strwidth(GenericString("\U1f355")) == 2 | ||||
|     @test strwidth("\U1f355\u0302") == 2 | ||||
|     @test strwidth(GenericString("\U1f355\u0302")) == 2 | ||||
| end | ||||
|  | ||||
| @testset "#10958 handling of embedded NUL chars" begin | ||||
|     @test length("\0w") == length("\0α") == 2 | ||||
|     @test strwidth("\0w") == strwidth("\0α") == 1 | ||||
|     @test normalize_string("\0W", casefold=true) == "\0w" | ||||
| end | ||||
|  | ||||
| @testset "ut8proc_map with GenericString" begin | ||||
|     @test normalize_string(GenericString("\u006e\u0303"), :NFC) == "\u00f1" | ||||
| end | ||||
|  | ||||
| @testset "normalize_string keywords" begin | ||||
|     @test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, compat=true) | ||||
|     @test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, stripmark=true) | ||||
| end | ||||
|  | ||||
| @testset "fastplus" begin | ||||
|     @test lowercase('A') == 'a' | ||||
|     @test uppercase('a') == 'A' | ||||
|  | ||||
|     @test is_assigned_char('A') | ||||
| end | ||||
|  | ||||
| @testset "isspace" begin | ||||
|     @test isspace(' ') | ||||
|     @test isspace('\t') | ||||
|     @test isspace('\r') | ||||
|     @test isspace('\u85') | ||||
|     @test isspace('\ua0') | ||||
|     @test !isspace('\ufffd') | ||||
|     @test !isspace('\U10ffff') | ||||
| end | ||||
|  | ||||
| @testset "grapheme iterators" begin | ||||
|     let str = ascii("This is a test") | ||||
|         g = graphemes(str) | ||||
|         h = hash(str) | ||||
|         @test hash(g) == h | ||||
|         @test convert(GenericString, g) == str | ||||
|         @test repr(g) == "length-14 GraphemeIterator{String} for \"$str\"" | ||||
|     end | ||||
| end | ||||
|  | ||||
| @testset "#22693: substring graphemes" begin | ||||
|     g = graphemes(SubString("123α56789", 1, 6)) | ||||
|     @test eltype(g) == SubString{String} | ||||
|     @test collect(g) == ["1","2","3","α","5"] | ||||
| end | ||||
		Reference in New Issue
	
	Block a user