mollusk 019f8e3064 Add: julia-0.6.2
Former-commit-id: ccc667cf67d569f3fb3df39aa57c2134755a7551
2018-02-10 10:27:19 -07:00

312 lines
11 KiB
Julia

# This file is a part of Julia. License is MIT: https://julialang.org/license
### Parsing utilities
_directives(::Type{DateFormat{S,T}}) where {S,T} = T.parameters
character_codes(df::Type{DateFormat{S,T}}) where {S,T} = character_codes(_directives(df))
function character_codes(directives::SimpleVector)
letters = sizehint!(Char[], length(directives))
for (i, directive) in enumerate(directives)
if directive <: DatePart
letter = first(directive.parameters)
push!(letters, letter)
end
end
return letters
end
genvar(t::DataType) = Symbol(lowercase(string(Base.datatype_name(t))))
"""
tryparsenext_core(str::AbstractString, pos::Int, len::Int, df::DateFormat, raise=false)
Parses the string according to the directives within the DateFormat. Parsing will start at
character index `pos` and will stop when all directives are used or we have parsed up to
the end of the string, `len`. When a directive cannot be parsed the returned value tuple
will be null if `raise` is false otherwise an exception will be thrown.
Returns a 3-element tuple `(values, pos, num_parsed)`:
* `values::Nullable{Tuple}`: A tuple which contains a value for each `DatePart` within the
`DateFormat` in the order in which they occur. If the string ends before we finish parsing
all the directives the missing values will be filled in with default values.
* `pos::Int`: The character index at which parsing stopped.
* `num_parsed::Int`: The number of values which were parsed and stored within `values`.
Useful for distinguishing parsed values from default values.
"""
@generated function tryparsenext_core(str::AbstractString, pos::Int, len::Int,
df::DateFormat, raise::Bool=false)
directives = _directives(df)
letters = character_codes(directives)
tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters]
value_names = Symbol[genvar(t) for t in tokens]
value_defaults = Tuple(CONVERSION_DEFAULTS[t] for t in tokens)
R = typeof(value_defaults)
# Pre-assign variables to defaults. Allows us to use `@goto done` without worrying about
# unassigned variables.
assign_defaults = Expr[
quote
$name = $default
end
for (name, default) in zip(value_names, value_defaults)
]
vi = 1
parsers = Expr[
begin
if directives[i] <: DatePart
name = value_names[vi]
nullable = Symbol(:nullable_, name)
vi += 1
quote
pos > len && @goto done
$nullable, next_pos = tryparsenext(directives[$i], str, pos, len, locale)
isnull($nullable) && @goto error
$name = unsafe_get($nullable)
pos = next_pos
num_parsed += 1
directive_index += 1
end
else
quote
pos > len && @goto done
nullable_delim, next_pos = tryparsenext(directives[$i], str, pos, len, locale)
isnull(nullable_delim) && @goto error
pos = next_pos
directive_index += 1
end
end
end
for i in 1:length(directives)
]
quote
directives = df.tokens
locale::DateLocale = df.locale
num_parsed = 0
directive_index = 1
$(assign_defaults...)
$(parsers...)
pos > len || @goto error
@label done
return Nullable{$R}($(Expr(:tuple, value_names...))), pos, num_parsed
@label error
if raise
if directive_index > length(directives)
throw(ArgumentError("Found extra characters at the end of date time string"))
else
d = directives[directive_index]
throw(ArgumentError("Unable to parse date time. Expected directive $d at char $pos"))
end
end
return Nullable{$R}(), pos, 0
end
end
"""
tryparsenext_internal(::Type{<:TimeType}, str, pos, len, df::DateFormat, raise=false)
Parses the string according to the directives within the DateFormat. The specified TimeType
type determines the type of and order of tokens returned. If the given DateFormat or string
does not provide a required token a default value will be used. When the string cannot be
parsed the returned value tuple will be null if `raise` is false otherwise an exception will
be thrown.
Returns a 2-element tuple `(values, pos)`:
* `values::Nullable{Tuple}`: A tuple which contains a value for each token as specified by
the passed in type.
* `pos::Int`: The character index at which parsing stopped.
"""
@generated function tryparsenext_internal(::Type{T}, str::AbstractString, pos::Int, len::Int,
df::DateFormat, raise::Bool=false) where T<:TimeType
letters = character_codes(df)
tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters]
value_names = Symbol[genvar(t) for t in tokens]
output_tokens = CONVERSION_TRANSLATIONS[T]
output_names = Symbol[genvar(t) for t in output_tokens]
output_defaults = Tuple(CONVERSION_DEFAULTS[t] for t in output_tokens)
R = typeof(output_defaults)
# Pre-assign output variables to defaults. Ensures that all output variables are
# assigned as the value tuple returned from `tryparsenext_core` may not include all
# of the required variables.
assign_defaults = Expr[
quote
$name = $default
end
for (name, default) in zip(output_names, output_defaults)
]
# Unpacks the value tuple returned by `tryparsenext_core` into separate variables.
value_tuple = Expr(:tuple, value_names...)
quote
values, pos, num_parsed = tryparsenext_core(str, pos, len, df, raise)
isnull(values) && return Nullable{$R}(), pos
$(assign_defaults...)
$value_tuple = unsafe_get(values)
return Nullable{$R}($(Expr(:tuple, output_names...))), pos
end
end
@inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0)
i > len && (return Nullable{Int64}(), i)
min_pos = min_width <= 0 ? i : i + min_width - 1
max_pos = max_width <= 0 ? len : min(i + max_width - 1, len)
d::Int64 = 0
@inbounds while i <= max_pos
c, ii = next(str, i)
if '0' <= c <= '9'
d = d * 10 + (c - '0')
else
break
end
i = ii
end
if i <= min_pos
return Nullable{Int64}(), i
else
return Nullable{Int64}(d), i
end
end
@inline function tryparsenext_word(str::AbstractString, i, len, locale, maxchars=0)
word_start, word_end = i, 0
max_pos = maxchars <= 0 ? len : min(chr2ind(str, ind2chr(str,i) + maxchars - 1), len)
@inbounds while i <= max_pos
c, ii = next(str, i)
if isalpha(c)
word_end = i
else
break
end
i = ii
end
if word_end == 0
return Nullable{SubString}(), i
else
return Nullable{SubString}(SubString(str, word_start, word_end)), i
end
end
function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeFormat))
i, end_pos = start(s), endof(s)
dm = dd = Int64(1)
th = tm = ts = tms = Int64(0)
nv, i = tryparsenext_base10(s, i, end_pos, 1)
dy = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto error
c, i = next(s, i)
c != '-' && @goto error
i > end_pos && @goto done
nv, i = tryparsenext_base10(s, i, end_pos, 1, 2)
dm = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto done
c, i = next(s, i)
c != '-' && @goto error
i > end_pos && @goto done
nv, i = tryparsenext_base10(s, i, end_pos, 1, 2)
dd = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto done
c, i = next(s, i)
c != 'T' && @goto error
i > end_pos && @goto done
nv, i = tryparsenext_base10(s, i, end_pos, 1, 2)
th = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto done
c, i = next(s, i)
c != ':' && @goto error
i > end_pos && @goto done
nv, i = tryparsenext_base10(s, i, end_pos, 1, 2)
tm = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto done
c, i = next(s, i)
c != ':' && @goto error
i > end_pos && @goto done
nv, i = tryparsenext_base10(s, i, end_pos, 1, 2)
ts = isnull(nv) ? (@goto error) : unsafe_get(nv)
i > end_pos && @goto done
c, i = next(s, i)
c != '.' && @goto error
i > end_pos && @goto done
nv, j = tryparsenext_base10(s, i, end_pos, 1, 3)
tms = isnull(nv) ? (@goto error) : unsafe_get(nv)
tms *= 10 ^ (3 - (j - i))
j > end_pos || @goto error
@label done
return DateTime(dy, dm, dd, th, tm, ts, tms)
@label error
throw(ArgumentError("Invalid DateTime string"))
end
function Base.parse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
pos, len = start(str), endof(str)
values, pos = tryparsenext_internal(T, str, pos, len, df, true)
T(unsafe_get(values)...)
end
function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
pos, len = start(str), endof(str)
values, pos = tryparsenext_internal(T, str, pos, len, df, false)
if isnull(values)
Nullable{T}()
elseif isnull(validargs(T, unsafe_get(values)...))
# TODO: validargs gets called twice, since it's called again in the T constructor
Nullable{T}(T(unsafe_get(values)...))
else
Nullable{T}()
end
end
"""
parse_components(str::AbstractString, df::DateFormat) -> Array{Any}
Parse the string into its components according to the directives in the DateFormat.
Each component will be a distinct type, typically a subtype of Period. The order of the
components will match the order of the `DatePart` directives within the DateFormat. The
number of components may be less than the total number of `DatePart`.
"""
@generated function parse_components(str::AbstractString, df::DateFormat)
letters = character_codes(df)
tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters]
quote
pos, len = start(str), endof(str)
values, pos, num_parsed = tryparsenext_core(str, pos, len, df, true)
t = unsafe_get(values)
types = $(Expr(:tuple, tokens...))
result = Vector{Any}(num_parsed)
for (i, typ) in enumerate(types)
i > num_parsed && break
result[i] = typ(t[i]) # Constructing types takes most of the time
end
return result
end
end