# This file is a part of Julia. License is MIT: https://julialang.org/license module HigherOrderFns # This module provides higher order functions specialized for sparse arrays, # particularly map[!]/broadcast[!] for SparseVectors and SparseMatrixCSCs at present. import Base: map, map!, broadcast, broadcast! import Base.Broadcast: _containertype, promote_containertype, broadcast_indices, broadcast_c, broadcast_c! using Base: front, tail, to_shape using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrix, AbstractSparseArray, indtype # This module is organized as follows: # (1) Define a common interface to SparseVectors and SparseMatrixCSCs sufficient for # map[!]/broadcast[!]'s purposes. The methods below are written against this interface. # (2) Define entry points for map[!] (short children of _map_[not]zeropres!). # (3) Define entry points for broadcast[!] (short children of _broadcast_[not]zeropres!). # (4) Define _map_[not]zeropres! specialized for a single (input) sparse vector/matrix. # (5) Define _map_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices. # (6) Define general _map_[not]zeropres! capable of handling >2 (input) sparse vectors/matrices. # (7) Define _broadcast_[not]zeropres! specialized for a single (input) sparse vector/matrix. # (8) Define _broadcast_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices. # (9) Define general _broadcast_[not]zeropres! capable of handling >2 (input) sparse vectors/matrices. # (10) Define (broadcast[!]) methods handling combinations of broadcast scalars and sparse vectors/matrices. # (11) Define (broadcast[!]) methods handling combinations of scalars, sparse vectors/matrices, # structured matrices, and one- and two-dimensional Arrays. # (12) Define (map[!]) methods handling combinations of sparse and structured matrices. # (1) The definitions below provide a common interface to sparse vectors and matrices # sufficient for the purposes of map[!]/broadcast[!]. This interface treats sparse vectors # as n-by-one sparse matrices which, though technically incorrect, is how broacast[!] views # sparse vectors in practice. SparseVecOrMat = Union{SparseVector,SparseMatrixCSC} @inline numrows(A::SparseVector) = A.n @inline numrows(A::SparseMatrixCSC) = A.m @inline numcols(A::SparseVector) = 1 @inline numcols(A::SparseMatrixCSC) = A.n # numrows and numcols respectively yield size(A, 1) and size(A, 2), but avoid a branch @inline columns(A::SparseVector) = 1 @inline columns(A::SparseMatrixCSC) = 1:A.n @inline colrange(A::SparseVector, j) = 1:length(A.nzind) @inline colrange(A::SparseMatrixCSC, j) = nzrange(A, j) @inline colstartind(A::SparseVector, j) = one(indtype(A)) @inline colboundind(A::SparseVector, j) = convert(indtype(A), length(A.nzind) + 1) @inline colstartind(A::SparseMatrixCSC, j) = A.colptr[j] @inline colboundind(A::SparseMatrixCSC, j) = A.colptr[j + 1] @inline storedinds(A::SparseVector) = A.nzind @inline storedinds(A::SparseMatrixCSC) = A.rowval @inline storedvals(A::SparseVecOrMat) = A.nzval @inline setcolptr!(A::SparseVector, j, val) = val @inline setcolptr!(A::SparseMatrixCSC, j, val) = A.colptr[j] = val function trimstorage!(A::SparseVecOrMat, maxstored) resize!(storedinds(A), maxstored) resize!(storedvals(A), maxstored) return maxstored end function expandstorage!(A::SparseVecOrMat, maxstored) length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored) length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored) return maxstored end # (2) map[!] entry points map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A) map(f::Tf, A::SparseMatrixCSC) where {Tf} = _noshapecheck_map(f, A) map(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) where {Tf,N} = (_checksameshape(A, Bs...); _noshapecheck_map(f, A, Bs...)) map(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} = (_checksameshape(A, Bs...); _noshapecheck_map(f, A, Bs...)) map!(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) where {Tf,N} = (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, A, Bs...)) map!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} = (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, A, Bs...)) function _noshapecheck_map!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = _iszero(fofzeros) return fpreszeros ? _map_zeropres!(f, C, A, Bs...) : _map_notzeropres!(f, fofzeros, C, A, Bs...) end function _noshapecheck_map(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = _iszero(fofzeros) maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A) entrytypeC = Base.Broadcast._broadcast_eltype(f, A, Bs...) indextypeC = _promote_indtype(A, Bs...) C = _allocres(size(A), indextypeC, entrytypeC, maxnnzC) return fpreszeros ? _map_zeropres!(f, C, A, Bs...) : _map_notzeropres!(f, fofzeros, C, A, Bs...) end # (3) broadcast[!] entry points broadcast(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A) broadcast(f::Tf, A::SparseMatrixCSC) where {Tf} = _noshapecheck_map(f, A) function broadcast!(f::Tf, C::SparseVecOrMat) where Tf isempty(C) && return _finishempty!(C) fofnoargs = f() if _iszero(fofnoargs) # f() is zero, so empty C trimstorage!(C, 0) _finishempty!(C) else # f() is nonzero, so densify C and fill with independent calls to f() _densestructure!(C) storedvals(C)[1] = fofnoargs broadcast!(f, view(storedvals(C), 2:length(storedvals(C)))) end return C end function broadcast!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} _aresameshape(C, A, Bs...) && return _noshapecheck_map!(f, C, A, Bs...) Base.Broadcast.check_broadcast_indices(indices(C), A, Bs...) fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = _iszero(fofzeros) return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) : _broadcast_notzeropres!(f, fofzeros, C, A, Bs...) end # the following three similar defs are necessary for type stability in the mixed vector/matrix case broadcast(f::Tf, A::SparseVector, Bs::Vararg{SparseVector,N}) where {Tf,N} = _aresameshape(A, Bs...) ? _noshapecheck_map(f, A, Bs...) : _diffshape_broadcast(f, A, Bs...) broadcast(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) where {Tf,N} = _aresameshape(A, Bs...) ? _noshapecheck_map(f, A, Bs...) : _diffshape_broadcast(f, A, Bs...) broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} = _diffshape_broadcast(f, A, Bs...) function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = _iszero(fofzeros) indextypeC = _promote_indtype(A, Bs...) entrytypeC = Base.Broadcast._broadcast_eltype(f, A, Bs...) shapeC = to_shape(Base.Broadcast.broadcast_indices(A, Bs...)) maxnnzC = fpreszeros ? _checked_maxnnzbcres(shapeC, A, Bs...) : _densennz(shapeC) C = _allocres(shapeC, indextypeC, entrytypeC, maxnnzC) return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) : _broadcast_notzeropres!(f, fofzeros, C, A, Bs...) end # helper functions for map[!]/broadcast[!] entry points (and related methods below) @inline _sumnnzs(A) = nnz(A) @inline _sumnnzs(A, Bs...) = nnz(A) + _sumnnzs(Bs...) @inline _iszero(x) = x == 0 @inline _iszero(x::Number) = Base.iszero(x) @inline _iszero(x::AbstractArray) = Base.iszero(x) @inline _zeros_eltypes(A) = (zero(eltype(A)),) @inline _zeros_eltypes(A, Bs...) = (zero(eltype(A)), _zeros_eltypes(Bs...)...) @inline _promote_indtype(A) = indtype(A) @inline _promote_indtype(A, Bs...) = promote_type(indtype(A), _promote_indtype(Bs...)) @inline _aresameshape(A) = true @inline _aresameshape(A, B) = size(A) == size(B) @inline _aresameshape(A, B, Cs...) = _aresameshape(A, B) ? _aresameshape(B, Cs...) : false @inline _checksameshape(As...) = _aresameshape(As...) || throw(DimensionMismatch("argument shapes must match")) @inline _densennz(shape::NTuple{1}) = shape[1] @inline _densennz(shape::NTuple{2}) = shape[1] * shape[2] _maxnnzfrom(shape::NTuple{1}, A) = nnz(A) * div(shape[1], A.n) _maxnnzfrom(shape::NTuple{2}, A::SparseVector) = nnz(A) * div(shape[1], A.n) * shape[2] _maxnnzfrom(shape::NTuple{2}, A::SparseMatrixCSC) = nnz(A) * div(shape[1], A.m) * div(shape[2], A.n) @inline _maxnnzfrom_each(shape, ::Tuple{}) = () @inline _maxnnzfrom_each(shape, As) = (_maxnnzfrom(shape, first(As)), _maxnnzfrom_each(shape, tail(As))...) @inline _unchecked_maxnnzbcres(shape, As::Tuple) = min(_densennz(shape), sum(_maxnnzfrom_each(shape, As))) @inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As) @inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0 @inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0 @inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz) storedinds = Vector{indextype}(maxnnz) storedvals = Vector{entrytype}(maxnnz) return SparseVector(shape..., storedinds, storedvals) end @inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz) pointers = Vector{indextype}(shape[2] + 1) storedinds = Vector{indextype}(maxnnz) storedvals = Vector{entrytype}(maxnnz) return SparseMatrixCSC(shape..., pointers, storedinds, storedvals) end # Ambiguity killers, TODO: nix conflicting specializations ambiguityfunnel(f::Tf, x, y) where {Tf} = _aresameshape(x, y) ? _noshapecheck_map(f, x, y) : _diffshape_broadcast(f, x, y) broadcast(::typeof(+), x::SparseVector, y::SparseVector) = ambiguityfunnel(+, x, y) # base/sparse/sparsevectors.jl:1266 broadcast(::typeof(-), x::SparseVector, y::SparseVector) = ambiguityfunnel(-, x, y) # base/sparse/sparsevectors.jl:1266 broadcast(::typeof(*), x::SparseVector, y::SparseVector) = ambiguityfunnel(*, x, y) # base/sparse/sparsevectors.jl:1266 # (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix "Stores only the nonzero entries of `map(f, Array(A))` in `C`." function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) Ck = 1 @inbounds for j in columns(C) setcolptr!(C, j, Ck) for Ak in colrange(A, j) Cx = f(storedvals(A)[Ak]) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, Ck + nnz(A) - (Ak - 1))) storedinds(C)[Ck] = storedinds(A)[Ak] storedvals(C)[Ck] = Cx Ck += 1 end end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end """ Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and `f(A[i])`/`f(A[i,j])` in place of each stored entry `A[i]`/`A[i,j]` in `A`. """ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) for Ak in colrange(A, j) Cx = f(storedvals(A)[Ak]) Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx) end end # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over / # nonsequential access of storedvals(C) does not appear to improve performance. return C end # helper functions for these methods and some of those below @inline _densecoloffsets(A::SparseVector) = 0 @inline _densecoloffsets(A::SparseMatrixCSC) = 0:A.m:(A.m*(A.n - 1)) function _densestructure!(A::SparseVector) expandstorage!(A, A.n) copy!(A.nzind, 1:A.n) return A end function _densestructure!(A::SparseMatrixCSC) nnzA = A.m * A.n expandstorage!(A, nnzA) copy!(A.colptr, 1:A.m:(nnzA + 1)) for k in _densecoloffsets(A) copy!(A.rowval, k + 1, 1:A.m) end return A end # (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) rowsentinelA = convert(indtype(A), numrows(C) + 1) rowsentinelB = convert(indtype(B), numrows(C) + 1) Ck = 1 @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = colstartind(A, j), colboundind(A, j) Bk, stopBk = colstartind(B, j), colboundind(B, j) Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB while true if Ai == Bi Ai == rowsentinelA && break # column complete Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB elseif Ai < Bi Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA else # Bi < Ai Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB end # NOTE: The ordering of the conditional chain above impacts which matrices this # method performs best for. In the map situation (arguments have same shape, and # likely same or similar stored entry pattern), the Ai == Bi and termination # cases are equally or more likely than the Ai < Bi and Bi < Ai cases. Hence # the ordering of the conditional chain above differs from that in the # corresponding broadcast code (below). if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1)))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over / # nonsequential access of storedvals(C) does not appear to improve performance. rowsentinelA = convert(indtype(A), numrows(A) + 1) rowsentinelB = convert(indtype(B), numrows(B) + 1) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = colstartind(A, j), colboundind(A, j) Bk, stopBk = colstartind(B, j), colboundind(B, j) Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB while true if Ai == Bi Ai == rowsentinelA && break # column complete Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB elseif Ai < Bi Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA else # Bi < Ai Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB end Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end return C end # (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N} spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) rowsentinel = numrows(C) + 1 Ck = 1 stopks = _colstartind_all(1, As) @inbounds for j in columns(C) setcolptr!(C, j, Ck) ks = stopks stopks = _colboundind_all(j, As) rows = _rowforind_all(rowsentinel, ks, stopks, As) activerow = min(rows...) while activerow < rowsentinel # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherargs(activerows, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) Cx = f(vals...) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, Ck + min(length(C), _sumnnzs(As...)) - (sum(ks) - N))) storedinds(C)[Ck] = activerow storedvals(C)[Ck] = Cx Ck += 1 end activerow = min(rows...) end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N} # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over / # nonsequential access of C.nzval does not appear to improve performance. rowsentinel = numrows(C) + 1 stopks = _colstartind_all(1, As) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) ks = stopks stopks = _colboundind_all(j, As) rows = _rowforind_all(rowsentinel, ks, stopks, As) activerow = min(rows...) while activerow < rowsentinel # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherargs(activerows, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) Cx = f(vals...) Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx) activerow = min(rows...) end end return C end # helper methods for map/map! methods just above @inline _colstartind(j, A) = colstartind(A, j) @inline _colstartind_all(j, ::Tuple{}) = () @inline _colstartind_all(j, As) = ( _colstartind(j, first(As)), _colstartind_all(j, tail(As))...) @inline _colboundind(j, A) = colboundind(A, j) @inline _colboundind_all(j, ::Tuple{}) = () @inline _colboundind_all(j, As) = ( _colboundind(j, first(As)), _colboundind_all(j, tail(As))...) @inline _rowforind(rowsentinel, k, stopk, A) = k < stopk ? storedinds(A)[k] : convert(indtype(A), rowsentinel) @inline _rowforind_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () @inline _rowforind_all(rowsentinel, ks, stopks, As) = ( _rowforind(rowsentinel, first(ks), first(stopks), first(As)), _rowforind_all(rowsentinel, tail(ks), tail(stopks), tail(As))...) # fusing the following defs. avoids a few branches, yielding 5-30% runtime reduction # @inline _isactiverow(activerow, row) = row == activerow # @inline _isactiverow_all(activerow, ::Tuple{}) = () # @inline _isactiverow_all(activerow, rows) = ( # _isactiverow(activerow, first(rows)), # _isactiverow_all(activerow, tail(rows))...) # @inline _gatherarg(isactiverow, k, A) = isactiverow ? storedvals(A)[k] : zero(eltype(A)) # @inline _gatherargs(::Tuple{}, ::Tuple{}, ::Tuple{}) = () # @inline _gatherargs(activerows, ks, As) = ( # _gatherarg(first(activerows), first(ks), first(As)), # _gatherargs(tail(activerows), tail(ks), tail(As))...) # @inline _updateind(isactiverow, k) = isactiverow ? (k + oneunit(k)) : k # @inline _updateind_all(::Tuple{}, ::Tuple{}) = () # @inline _updateind_all(activerows, ks) = ( # _updateind(first(activerows), first(ks)), # _updateind_all(tail(activerows), tail(ks))...) # @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) = # isrowactive ? (k < stopk ? storedinds(A)[k] : oftype(presrow, rowsentinel)) : presrow # @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () # @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = ( # _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)), # _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...) @inline function _fusedupdate(rowsentinel, activerow, row, k, stopk, A) # returns (val, nextk, nextrow) if row == activerow nextk = k + oneunit(k) (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel))) else (zero(eltype(A)), k, row) end end @inline _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) = _fusedupdate_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, ks, stopks, As) @inline _fusedupdate_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = (vals, nextks, nextrows) @inline function _fusedupdate_all(vals, nextks, nextrows, rowsentinel, activerow, rows, ks, stopks, As) val, nextk, nextrow = _fusedupdate(rowsentinel, activerow, first(rows), first(ks), first(stopks), first(As)) return _fusedupdate_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow), rowsentinel, activerow, tail(rows), tail(ks), tail(stopks), tail(As)) end # (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf isempty(C) && return _finishempty!(C) spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) # C and A cannot have the same shape, as we directed that case to map in broadcast's # entry point; here we need efficiently handle only heterogeneous C-A combinations where # one or both of C and A has at least one singleton dimension. # # We first divide the cases into two groups: those in which the input argument does not # expand vertically, and those in which the input argument expands vertically. # # Cases without vertical expansion Ck = 1 if numrows(A) == numrows(C) @inbounds for j in columns(C) setcolptr!(C, j, Ck) bccolrangejA = numcols(A) == 1 ? colrange(A, 1) : colrange(A, j) for Ak in bccolrangejA Cx = f(storedvals(A)[Ak]) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A))) storedinds(C)[Ck] = storedinds(A)[Ak] storedvals(C)[Ck] = Cx Ck += 1 end end end # Cases with vertical expansion else # numrows(A) != numrows(C) (=> numrows(A) == 1) @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) fofAx = f(Ax) # if fofAx is zero, then either A's jth column is empty, or A's jth column # contains a nonzero value x but f(Ax) is nonetheless zero, so we need store # nothing in C's jth column. if to the contrary fofAx is nonzero, then we must # densely populate C's jth column with fofAx. if !_iszero(fofAx) for Ci::indtype(C) in 1:numrows(C) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = fofAx Ck += 1 end end end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf # For information on this code, see comments in similar code in _broadcast_zeropres! above # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) # Cases without vertical expansion if numrows(A) == numrows(C) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) bccolrangejA = numcols(A) == 1 ? colrange(A, 1) : colrange(A, j) for Ak in bccolrangejA Cx, Ci = f(storedvals(A)[Ak]), storedinds(A)[Ak] Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end # Cases with vertical expansion else # numrows(A) != numrows(C) (=> numrows(A) == 1) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) fofAx = f(Ax) fofAx != fillvalue && (storedvals(C)[(jo + 1):(jo + numrows(C))] = fofAx) end end return C end # (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf isempty(C) && return _finishempty!(C) spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) rowsentinelA = convert(indtype(A), numrows(C) + 1) rowsentinelB = convert(indtype(B), numrows(C) + 1) # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's # entry point; here we need efficiently handle only heterogeneous combinations of mats/vecs # with no singleton dimensions, one singleton dimension, and two singleton dimensions. # Cases involving objects with two singleton dimensions should be rare and optimizing # that case complicates the code appreciably, so we largely ignore that case's # performance below. # # We first divide the cases into two groups: those in which neither input argument # expands vertically, and those in which at least one argument expands vertically. # # NOTE: Placing the loops over columns outside the conditional chain segregating # argument shape combinations eliminates some code replication but unfortunately # hurts performance appreciably in some cases. # # Cases without vertical expansion Ck = 1 if numrows(A) == numrows(B) == numrows(C) @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) # Restructuring this k/stopk code to avoid unnecessary colptr retrievals does # not improve performance signicantly. Leave in this less complex form. Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB while true if Ai != Bi if Ai < Bi Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA else # Ai > Bi Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB end elseif #= Ai == Bi && =# Ai == rowsentinelA break # column complete else #= Ai == Bi != rowsentinel =# Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB end # NOTE: The ordering of the conditional chain above impacts which matrices # this method perform best for. In contrast to the map situation (arguments # have same shape, and likely same or similar stored entry pattern), where # the Ai == Bi and termination cases are equally or more likely than the # Ai < Bi and Bi < Ai cases, in the broadcast situation (arguments have # different shape, and likely largely disjoint expanded stored entry # pattern) the Ai < Bi and Bi < Ai cases are equally or more likely than the # Ai == Bi and termination cases. Hence the ordering of the conditional # chain above differs from that in the corresponding map code. if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end # Cases with vertical expansion elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) Cx = f(Ax, Bx) if !_iszero(Cx) for Ci::indtype(C) in 1:numrows(C) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1 , vertically expand only A @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) fvAzB = f(Ax, zero(eltype(B))) if _iszero(fvAzB) # either A's jth column is empty, or A's jth column contains a nonzero value # Ax but f(Ax, zero(eltype(B))) is nonetheless zero, so we can scan through # B's jth column without storing every entry in C's jth column while Bk < stopBk Cx = f(Ax, storedvals(B)[Bk]) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = storedinds(B)[Bk] storedvals(C)[Ck] = Cx Ck += 1 end Bk += oneunit(Bk) end else # A's jth column is nonempty and f(Ax, zero(eltype(B))) is not zero, so # we must store (likely) every entry in C's jth column Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB for Ci::indtype(C) in 1:numrows(C) if Bi == Ci Cx = f(Ax, storedvals(B)[Bk]) Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB else Cx = fvAzB end if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end end else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B @inbounds for j in columns(C) setcolptr!(C, j, Ck) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) fzAvB = f(zero(eltype(A)), Bx) if _iszero(fzAvB) # either B's jth column is empty, or B's jth column contains a nonzero value # Bx but f(zero(eltype(A)), Bx) is nonetheless zero, so we can scan through # A's jth column without storing every entry in C's jth column while Ak < stopAk Cx = f(storedvals(A)[Ak], Bx) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = storedinds(A)[Ak] storedvals(C)[Ck] = Cx Ck += 1 end Ak += oneunit(Ak) end else # B's jth column is nonempty and f(zero(eltype(A)), Bx) is not zero, so # we must store (likely) every entry in C's jth column Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA for Ci::indtype(C) in 1:numrows(C) if Ai == Ci Cx = f(storedvals(A)[Ak], Bx) Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA else Cx = fzAvB end if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf # For information on this code, see comments in similar code in _broadcast_zeropres! above # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) rowsentinelA = convert(indtype(A), numrows(C) + 1) rowsentinelB = convert(indtype(B), numrows(C) + 1) # Cases without vertical expansion if numrows(A) == numrows(B) == numrows(C) @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB while true if Ai < Bi Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA elseif Ai > Bi Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB elseif #= Ai == Bi && =# Ai == rowsentinelA break # column complete else #= Ai == Bi != rowsentinel =# Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB end Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end # Cases with vertical expansion elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) Cx = f(Ax, Bx) if Cx != fillvalue for Ck::Int in (jo + 1):(jo + numrows(C)) storedvals(C)[Ck] = Cx end end end elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1, vertically expand only A @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A)) fvAzB = f(Ax, zero(eltype(B))) if _iszero(fvAzB) while Bk < stopBk Cx = f(Ax, storedvals(B)[Bk]) Cx != fillvalue && (storedvals(C)[jo + storedinds(B)[Bk]] = Cx) Bk += oneunit(Bk) end else Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB for Ci::indtype(C) in 1:numrows(C) if Bi == Ci Cx = f(Ax, storedvals(B)[Bk]) Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB else Cx = fvAzB end Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end end else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j)) Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j)) Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B)) fzAvB = f(zero(eltype(A)), Bx) if _iszero(fzAvB) while Ak < stopAk Cx = f(storedvals(A)[Ak], Bx) Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx) Ak += oneunit(Ak) end else Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA for Ci::indtype(C) in 1:numrows(C) if Ai == Ci Cx = f(storedvals(A)[Ak], Bx) Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA else Cx = fzAvB end Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end end end return C end _finishempty!(C::SparseVector) = C _finishempty!(C::SparseMatrixCSC) = (fill!(C.colptr, 1); C) # (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N} isempty(C) && return _finishempty!(C) spaceC::Int = min(length(storedinds(C)), length(storedvals(C))) expandsverts = _expandsvert_all(C, As) expandshorzs = _expandshorz_all(C, As) rowsentinel = numrows(C) + 1 Ck = 1 @inbounds for j in columns(C) setcolptr!(C, j, Ck) ks = _startindforbccol_all(j, expandshorzs, As) stopks = _stopindforbccol_all(j, expandshorzs, As) # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated # colptr lookups, improves performance significantly. So keep the less complex approach here. isemptys = _isemptycol_all(ks, stopks) defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As) rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) defaultCx = f(defargs...) activerow = min(rows...) if _iszero(defaultCx) # zero-preserving column scan while activerow < rowsentinel # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) Cx = f(args...) if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As))) storedinds(C)[Ck] = activerow storedvals(C)[Ck] = Cx Ck += 1 end activerow = min(rows...) end else # zero-non-preserving column scan for Ci in 1:numrows(C) if Ci == activerow # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) Cx = f(args...) activerow = min(rows...) else Cx = defaultCx end if !_iszero(Cx) Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As))) storedinds(C)[Ck] = Ci storedvals(C)[Ck] = Cx Ck += 1 end end end end @inbounds setcolptr!(C, numcols(C) + 1, Ck) trimstorage!(C, Ck - 1) return C end function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N} isempty(C) && return _finishempty!(C) # Build dense matrix structure in C, expanding storage if necessary _densestructure!(C) # Populate values fill!(storedvals(C), fillvalue) expandsverts = _expandsvert_all(C, As) expandshorzs = _expandshorz_all(C, As) rowsentinel = numrows(C) + 1 @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C)) ks = _startindforbccol_all(j, expandshorzs, As) stopks = _stopindforbccol_all(j, expandshorzs, As) # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated # colptr lookups, improves performance significantly. So keep the less complex approach here. isemptys = _isemptycol_all(ks, stopks) defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As) rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) defaultCx = f(defargs...) activerow = min(rows...) if defaultCx == fillvalue # fillvalue-preserving column scan while activerow < rowsentinel # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) Cx = f(args...) Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx) activerow = min(rows...) end else # fillvalue-non-preserving column scan for Ci in 1:numrows(C) if Ci == activerow # activerows = _isactiverow_all(activerow, rows) # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...) # ks = _updateind_all(activerows, ks) # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) Cx = f(args...) activerow = min(rows...) else Cx = defaultCx end Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx) end end end return C end # helper method for broadcast/broadcast! methods just above @inline _expandsvert(C, A) = numrows(A) != numrows(C) @inline _expandsvert_all(C, ::Tuple{}) = () @inline _expandsvert_all(C, As) = (_expandsvert(C, first(As)), _expandsvert_all(C, tail(As))...) @inline _expandshorz(C, A) = numcols(A) != numcols(C) @inline _expandshorz_all(C, ::Tuple{}) = () @inline _expandshorz_all(C, As) = (_expandshorz(C, first(As)), _expandshorz_all(C, tail(As))...) @inline _startindforbccol(j, expandshorz, A) = expandshorz ? colstartind(A, 1) : colstartind(A, j) @inline _startindforbccol_all(j, ::Tuple{}, ::Tuple{}) = () @inline _startindforbccol_all(j, expandshorzs, As) = ( _startindforbccol(j, first(expandshorzs), first(As)), _startindforbccol_all(j, tail(expandshorzs), tail(As))...) @inline _stopindforbccol(j, expandshorz, A) = expandshorz ? colboundind(A, 1) : colboundind(A, j) @inline _stopindforbccol_all(j, ::Tuple{}, ::Tuple{}) = () @inline _stopindforbccol_all(j, expandshorzs, As) = ( _stopindforbccol(j, first(expandshorzs), first(As)), _stopindforbccol_all(j, tail(expandshorzs), tail(As))...) @inline _isemptycol(k, stopk) = k == stopk @inline _isemptycol_all(::Tuple{}, ::Tuple{}) = () @inline _isemptycol_all(ks, stopks) = ( _isemptycol(first(ks), first(stopks)), _isemptycol_all(tail(ks), tail(stopks))...) @inline _initrowforcol(j, rowsentinel, isempty, expandsvert, k, A) = expandsvert || isempty ? convert(indtype(A), rowsentinel) : storedinds(A)[k] @inline _initrowforcol_all(j, rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () @inline _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) = ( _initrowforcol(j, rowsentinel, first(isemptys), first(expandsverts), first(ks), first(As)), _initrowforcol_all(j, rowsentinel, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...) @inline _defargforcol(j, isempty, expandsvert, k, A) = expandsvert && !isempty ? storedvals(A)[k] : zero(eltype(A)) @inline _defargforcol_all(j, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () @inline _defargforcol_all(j, isemptys, expandsverts, ks, As) = ( _defargforcol(j, first(isemptys), first(expandsverts), first(ks), first(As)), _defargforcol_all(j, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...) # fusing the following defs. avoids a few branches and construction of a tuple, yielding 1-20% runtime reduction # @inline _isactiverow(activerow, row) = row == activerow # @inline _isactiverow_all(activerow, ::Tuple{}) = () # @inline _isactiverow_all(activerow, rows) = ( # _isactiverow(activerow, first(rows)), # _isactiverow_all(activerow, tail(rows))...) # @inline _gatherbcarg(isactiverow, defarg, k, A) = isactiverow ? storedvals(A)[k] : defarg # @inline _gatherbcargs(::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () # @inline _gatherbcargs(activerows, defargs, ks, As) = ( # _gatherbcarg(first(activerows), first(defargs), first(ks), first(As)), # _gatherbcargs(tail(activerows), tail(defargs), tail(ks), tail(As))...) # @inline _updateind(isactiverow, k) = isactiverow ? (k + oneunit(k)) : k # @inline _updateind_all(::Tuple{}, ::Tuple{}) = () # @inline _updateind_all(activerows, ks) = ( # _updateind(first(activerows), first(ks)), # _updateind_all(tail(activerows), tail(ks))...) # @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) = # isrowactive ? (k < stopk ? storedinds(A)[k] : oftype(presrow, rowsentinel)) : presrow # @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = () # @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = ( # _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)), # _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...) @inline function _fusedupdatebc(rowsentinel, activerow, row, defarg, k, stopk, A) # returns (val, nextk, nextrow) if row == activerow nextk = k + oneunit(k) (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel))) else (defarg, k, row) end end @inline _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) = _fusedupdatebc_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, defargs, ks, stopks, As) @inline _fusedupdatebc_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = (vals, nextks, nextrows) @inline function _fusedupdatebc_all(vals, nextks, nextrows, rowsentinel, activerow, rows, defargs, ks, stopks, As) val, nextk, nextrow = _fusedupdatebc(rowsentinel, activerow, first(rows), first(defargs), first(ks), first(stopks), first(As)) return _fusedupdatebc_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow), rowsentinel, activerow, tail(rows), tail(defargs), tail(ks), tail(stopks), tail(As)) end # (10) broadcast[!] over combinations of broadcast scalars and sparse vectors/matrices # broadcast shape promotion for combinations of sparse arrays and other types broadcast_indices(::Type{AbstractSparseArray}, A) = indices(A) # broadcast container type promotion for combinations of sparse arrays and other types _containertype(::Type{<:SparseVecOrMat}) = AbstractSparseArray # combinations of sparse arrays with broadcast scalars should yield sparse arrays promote_containertype(::Type{Any}, ::Type{AbstractSparseArray}) = AbstractSparseArray promote_containertype(::Type{AbstractSparseArray}, ::Type{Any}) = AbstractSparseArray # combinations of sparse arrays with tuples should divert to the generic AbstractArray broadcast code # (we handle combinations involving dense vectors/matrices below) promote_containertype(::Type{Tuple}, ::Type{AbstractSparseArray}) = Array promote_containertype(::Type{AbstractSparseArray}, ::Type{Tuple}) = Array # broadcast[!] entry points for combinations of sparse arrays and other (scalar) types @inline function broadcast_c(f, ::Type{AbstractSparseArray}, mixedargs::Vararg{Any,N}) where N parevalf, passedargstup = capturescalars(f, mixedargs) return broadcast(parevalf, passedargstup...) end @inline function broadcast_c!(f, ::Type{AbstractSparseArray}, dest::SparseVecOrMat, mixedsrcargs::Vararg{Any,N}) where N parevalf, passedsrcargstup = capturescalars(f, mixedsrcargs) return broadcast!(parevalf, dest, passedsrcargstup...) end # capturescalars takes a function (f) and a tuple of mixed sparse vectors/matrices and # broadcast scalar arguments (mixedargs), and returns a function (parevalf, i.e. partially # evaluated f) and a reduced argument tuple (passedargstup) containing only the sparse # vectors/matrices in mixedargs in their orginal order, and such that the result of # broadcast(parevalf, passedargstup...) is broadcast(f, mixedargs...) @inline capturescalars(f, mixedargs) = capturescalars((passed, tofill) -> f(tofill...), (), mixedargs...) # Recursion cases for capturescalars @inline capturescalars(f, passedargstup, scalararg, mixedargs...) = capturescalars(capturescalar(f, scalararg), passedargstup, mixedargs...) @inline capturescalars(f, passedargstup, nonscalararg::SparseVecOrMat, mixedargs...) = capturescalars(passnonscalar(f), (passedargstup..., nonscalararg), mixedargs...) @inline passnonscalar(f) = (passed, tofill) -> f(Base.front(passed), (last(passed), tofill...)) @inline capturescalar(f, scalararg) = (passed, tofill) -> f(passed, (scalararg, tofill...)) # Base cases for capturescalars @inline capturescalars(f, passedargstup, scalararg) = (capturelastscalar(f, scalararg), passedargstup) @inline capturescalars(f, passedargstup, nonscalararg::SparseVecOrMat) = (passlastnonscalar(f), (passedargstup..., nonscalararg)) @inline passlastnonscalar(f) = (passed...) -> f(Base.front(passed), (last(passed),)) @inline capturelastscalar(f, scalararg) = (passed...) -> f(passed, (scalararg,)) # NOTE: The following two method definitions work around #19096. broadcast(f::Tf, ::Type{T}, A::SparseMatrixCSC) where {Tf,T} = broadcast(y -> f(T, y), A) broadcast(f::Tf, A::SparseMatrixCSC, ::Type{T}) where {Tf,T} = broadcast(x -> f(x, T), A) # (11) broadcast[!] over combinations of scalars, sparse vectors/matrices, structured matrices, # and one- and two-dimensional Arrays (via promotion of structured matrices and Arrays) # # for combinations involving only scalars, sparse arrays, structured matrices, and dense # vectors/matrices, promote all structured matrices and dense vectors/matrices to sparse # and rebroadcast. otherwise, divert to generic AbstractArray broadcast code. # # this requires three steps: segregate combinations to promote to sparse via Broadcast's # containertype promotion and dispatch layer (broadcast_c[!], containertype, # promote_containertype), separate ambiguous cases from the preceding dispatch # layer in sparse broadcast's internal containertype promotion and dispatch layer # (spbroadcast_c[!], spcontainertype, promote_spcontainertype), and then promote # arguments to sparse as appropriate and rebroadcast. # first (Broadcast containertype) dispatch layer's promotion logic struct PromoteToSparse end # broadcast containertype definitions for structured matrices StructuredMatrix = Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal} _containertype(::Type{<:StructuredMatrix}) = PromoteToSparse broadcast_indices(::Type{PromoteToSparse}, A) = indices(A) # combinations explicitly involving Tuples and PromoteToSparse collections # divert to the generic AbstractArray broadcast code promote_containertype(::Type{PromoteToSparse}, ::Type{Tuple}) = Array promote_containertype(::Type{Tuple}, ::Type{PromoteToSparse}) = Array # combinations involving scalars and PromoteToSparse collections continue in the promote-to-sparse funnel promote_containertype(::Type{PromoteToSparse}, ::Type{Any}) = PromoteToSparse promote_containertype(::Type{Any}, ::Type{PromoteToSparse}) = PromoteToSparse # combinations involving sparse arrays and PromoteToSparse collections continue in the promote-to-sparse funnel promote_containertype(::Type{PromoteToSparse}, ::Type{AbstractSparseArray}) = PromoteToSparse promote_containertype(::Type{AbstractSparseArray}, ::Type{PromoteToSparse}) = PromoteToSparse # combinations involving Arrays and PromoteToSparse collections continue in the promote-to-sparse funnel promote_containertype(::Type{PromoteToSparse}, ::Type{Array}) = PromoteToSparse promote_containertype(::Type{Array}, ::Type{PromoteToSparse}) = PromoteToSparse # combinations involving Arrays and sparse arrays continue in the promote-to-sparse funnel promote_containertype(::Type{AbstractSparseArray}, ::Type{Array}) = PromoteToSparse promote_containertype(::Type{Array}, ::Type{AbstractSparseArray}) = PromoteToSparse # second (internal sparse broadcast containertype) dispatch layer's promotion logic # mostly just disambiguates Array from the main containertype promotion mechanism # AbstractArray serves as a marker to shunt to the generic AbstractArray broadcast code _spcontainertype(x) = _containertype(x) _spcontainertype(::Type{<:Vector}) = Vector _spcontainertype(::Type{<:Matrix}) = Matrix _spcontainertype(::Type{<:RowVector}) = Matrix _spcontainertype(::Type{<:Ref}) = AbstractArray _spcontainertype(::Type{<:AbstractArray}) = AbstractArray # need the following two methods to override the immediately preceding method _spcontainertype(::Type{<:StructuredMatrix}) = PromoteToSparse _spcontainertype(::Type{<:SparseVecOrMat}) = AbstractSparseArray spcontainertype(x) = _spcontainertype(typeof(x)) spcontainertype(ct1, ct2) = promote_spcontainertype(spcontainertype(ct1), spcontainertype(ct2)) @inline spcontainertype(ct1, ct2, cts...) = promote_spcontainertype(spcontainertype(ct1), spcontainertype(ct2, cts...)) promote_spcontainertype(::Type{T}, ::Type{T}) where {T} = T # combinations involving AbstractArrays and/or Tuples divert to the generic AbstractArray broadcast code DivertToAbsArrayBC = Union{Type{AbstractArray},Type{Tuple}} promote_spcontainertype(::DivertToAbsArrayBC, ct) = AbstractArray promote_spcontainertype(ct, ::DivertToAbsArrayBC) = AbstractArray promote_spcontainertype(::DivertToAbsArrayBC, ::DivertToAbsArrayBC) = AbstractArray # combinations involving scalars, sparse arrays, structured matrices (PromoteToSparse), # dense vectors/matrices, and PromoteToSparse collections continue in the promote-to-sparse funnel FunnelToSparseBC = Union{Type{Any},Type{Vector},Type{Matrix},Type{PromoteToSparse},Type{AbstractSparseArray}} promote_spcontainertype(::FunnelToSparseBC, ::FunnelToSparseBC) = PromoteToSparse # first (Broadcast containertype) dispatch layer # (broadcast_c[!], containertype, promote_containertype) @inline broadcast_c(f, ::Type{PromoteToSparse}, As::Vararg{Any,N}) where {N} = spbroadcast_c(f, spcontainertype(As...), As...) @inline broadcast_c!(f, ::Type{AbstractSparseArray}, ::Type{PromoteToSparse}, C, B, As::Vararg{Any,N}) where {N} = spbroadcast_c!(f, AbstractSparseArray, spcontainertype(B, As...), C, B, As...) # where destination C is not an AbstractSparseArray, divert to generic AbstractArray broadcast code @inline broadcast_c!(f, CT::Type, ::Type{PromoteToSparse}, C, B, As::Vararg{Any,N}) where {N} = broadcast_c!(f, CT, Array, C, B, As...) # second (internal sparse broadcast containertype) dispatch layer # (spbroadcast_c[!], spcontainertype, promote_spcontainertype) @inline spbroadcast_c(f, ::Type{PromoteToSparse}, As::Vararg{Any,N}) where {N} = broadcast(f, map(_sparsifystructured, As)...) @inline spbroadcast_c(f, ::Type{AbstractArray}, As::Vararg{Any,N}) where {N} = broadcast_c(f, Array, As...) @inline spbroadcast_c!(f, ::Type{AbstractSparseArray}, ::Type{PromoteToSparse}, C, B, As::Vararg{Any,N}) where {N} = broadcast!(f, C, _sparsifystructured(B), map(_sparsifystructured, As)...) @inline spbroadcast_c!(f, ::Type{AbstractSparseArray}, ::Type{AbstractArray}, C, B, As::Vararg{Any,N}) where {N} = broadcast_c!(f, Array, Array, C, B, As...) @inline _sparsifystructured(M::AbstractMatrix) = SparseMatrixCSC(M) @inline _sparsifystructured(V::AbstractVector) = SparseVector(V) @inline _sparsifystructured(M::AbstractSparseMatrix) = SparseMatrixCSC(M) @inline _sparsifystructured(V::AbstractSparseVector) = SparseVector(V) @inline _sparsifystructured(S::SparseVecOrMat) = S @inline _sparsifystructured(x) = x # (12) map[!] over combinations of sparse and structured matrices StructuredMatrix = Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal} SparseOrStructuredMatrix = Union{SparseMatrixCSC,StructuredMatrix} map(f::Tf, A::StructuredMatrix) where {Tf} = _noshapecheck_map(f, _sparsifystructured(A)) map(f::Tf, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N}) where {Tf,N} = (_checksameshape(A, Bs...); _noshapecheck_map(f, _sparsifystructured(A), map(_sparsifystructured, Bs)...)) map!(f::Tf, C::SparseMatrixCSC, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N}) where {Tf,N} = (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, _sparsifystructured(A), map(_sparsifystructured, Bs)...)) end