From 9af87622e58c851aaf267c26866534103d8e585e Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Sun, 4 Jan 2026 22:58:48 +0100 Subject: [PATCH] Add an option decompression_uplo for symmetric results --- ext/SparseMatrixColoringsCUDAExt.jl | 16 +++-- src/decompression.jl | 50 +++++++++++++++- src/interface.jl | 35 +++++++---- src/result.jl | 90 +++++++++++++++++++---------- 4 files changed, 142 insertions(+), 49 deletions(-) diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 2750964d..7213e033 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -47,13 +47,15 @@ function SMC.StarSetColoringResult( A::CuSparseMatrixCSC, ag::SMC.AdjacencyGraph{T}, color::Vector{<:Integer}, - star_set::SMC.StarSet{<:Integer}, + star_set::SMC.StarSet{<:Integer}; + decompression_uplo::Symbol=:F, ) where {T<:Integer} + @assert decompression_uplo == :F group = SMC.group_by_color(T, color) - compressed_indices = SMC.star_csc_indices(ag, color, star_set) + compressed_indices = SMC.star_csc_indices(ag, color, star_set, decompression_uplo) additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices)) return SMC.StarSetColoringResult( - A, ag, color, group, compressed_indices, additional_info + A, ag, color, group, compressed_indices, decompression_uplo, additional_info ) end @@ -85,13 +87,15 @@ function SMC.StarSetColoringResult( A::CuSparseMatrixCSR, ag::SMC.AdjacencyGraph{T}, color::Vector{<:Integer}, - star_set::SMC.StarSet{<:Integer}, + star_set::SMC.StarSet{<:Integer}; + decompression_uplo::Symbol=:F, ) where {T<:Integer} + @assert decompression_uplo == :F group = SMC.group_by_color(T, color) - compressed_indices = SMC.star_csc_indices(ag, color, star_set) + compressed_indices = SMC.star_csc_indices(ag, color, star_set, decompression_uplo) additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices)) return SMC.StarSetColoringResult( - A, ag, color, group, compressed_indices, additional_info + A, ag, color, group, compressed_indices, decompression_uplo, additional_info ) end diff --git a/src/decompression.jl b/src/decompression.jl index 1528c885..70ea898e 100644 --- a/src/decompression.jl +++ b/src/decompression.jl @@ -453,12 +453,18 @@ function decompress!( check_compatible_pattern(A, ag, uplo) fill!(A, zero(eltype(A))) + l = 0 rvS = rowvals(S) for j in axes(S, 2) for k in nzrange(S, j) i = rvS[k] if in_triangle(i, j, uplo) - A[i, j] = B[compressed_indices[k]] + if result.decompression_uplo == :F + A[i, j] = B[compressed_indices[k]] + else + l += 1 + A[i, j] = B[compressed_indices[l]] + end end end end @@ -472,6 +478,7 @@ function decompress_single_color!( result::StarSetColoringResult, uplo::Symbol=:F, ) + @assert result.decompression_uplo == :F (; ag, compressed_indices, group) = result (; S) = ag check_compatible_pattern(A, ag, uplo) @@ -509,11 +516,12 @@ function decompress!( (; S) = ag nzA = nonzeros(A) check_compatible_pattern(A, ag, uplo) - if uplo == :F + if result.decompression_uplo == uplo for k in eachindex(nzA, compressed_indices) nzA[k] = B[compressed_indices[k]] end else + @assert result.decompression_uplo == :F rvS = rowvals(S) l = 0 # assume A has the same pattern as the triangle for j in axes(S, 2) @@ -529,6 +537,44 @@ function decompress!( return A end +function decompress_single_color!( + A::SparseMatrixCSC, + b::AbstractVector, + c::Integer, + result::StarSetColoringResult, + uplo::Symbol=:F, +) + (; ag, compressed_indices) = result + (; S) = ag + lower_index = (c - 1) * S.n + 1 + upper_index = c * S.n + nzA = nonzeros(A) + if result.decompression_uplo == uplo + uplo == :F && check_same_pattern(A, S) + for k in eachindex(nzA, compressed_indices) + if lower_index <= compressed_indices[k] <= upper_index + nzA[k] = b[compressed_indices[k] - lower_index + 1] + end + end + else + @assert result.decompression_uplo == :F + rvS = rowvals(S) + l = 0 # assume A has the same pattern as the triangle + for j in axes(S, 2) + for k in nzrange(S, j) + i = rvS[k] + if in_triangle(i, j, uplo) + l += 1 + if lower_index <= compressed_indices[k] <= upper_index + nzA[l] = b[i] + end + end + end + end + end + return A +end + ## TreeSetColoringResult function decompress!( diff --git a/src/interface.jl b/src/interface.jl index 936b974c..228c5064 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -190,10 +190,11 @@ function coloring( A::AbstractMatrix, problem::ColoringProblem, algo::GreedyColoringAlgorithm; - decompression_eltype::Type{R}=Float64, symmetric_pattern::Bool=false, + decompression_eltype::Type{R}=Float64, + decompression_uplo::Symbol=:F, ) where {R} - return _coloring(WithResult(), A, problem, algo, R, symmetric_pattern) + return _coloring(WithResult(), A, problem, algo, symmetric_pattern, R, decompression_uplo) end """ @@ -229,8 +230,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:nonsymmetric,:column}, algo::GreedyColoringAlgorithm, + symmetric_pattern::Bool, decompression_eltype::Type, - symmetric_pattern::Bool; + decompression_uplo::Symbol; forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing, ) symmetric_pattern = symmetric_pattern || A isa Union{Symmetric,Hermitian} @@ -252,8 +254,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:nonsymmetric,:row}, algo::GreedyColoringAlgorithm, + symmetric_pattern::Bool, decompression_eltype::Type, - symmetric_pattern::Bool; + decompression_uplo::Symbol; forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing, ) symmetric_pattern = symmetric_pattern || A isa Union{Symmetric,Hermitian} @@ -275,8 +278,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:symmetric,:column}, algo::GreedyColoringAlgorithm{:direct}, + symmetric_pattern::Bool, decompression_eltype::Type, - symmetric_pattern::Bool; + decompression_uplo::Symbol; forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing, ) ag = AdjacencyGraph(A; augmented_graph=false) @@ -286,7 +290,7 @@ function _coloring( end color, star_set = argmin(maximum ∘ first, color_and_star_set_by_order) if speed_setting isa WithResult - return StarSetColoringResult(A, ag, color, star_set) + return StarSetColoringResult(A, ag, color, star_set; decompression_uplo) else return color end @@ -297,8 +301,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:symmetric,:column}, algo::GreedyColoringAlgorithm{:substitution}, - decompression_eltype::Type{R}, symmetric_pattern::Bool, + decompression_eltype::Type{R}, + decompression_uplo::Symbol, ) where {R} ag = AdjacencyGraph(A; augmented_graph=false) color_and_tree_set_by_order = map(algo.orders) do order @@ -307,7 +312,7 @@ function _coloring( end color, tree_set = argmin(maximum ∘ first, color_and_tree_set_by_order) if speed_setting isa WithResult - return TreeSetColoringResult(A, ag, color, tree_set, R) + return TreeSetColoringResult(A, ag, color, tree_set, R; decompression_uplo) else return color end @@ -318,8 +323,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:nonsymmetric,:bidirectional}, algo::GreedyColoringAlgorithm{:direct}, + symmetric_pattern::Bool, decompression_eltype::Type{R}, - symmetric_pattern::Bool; + decompression_uplo::Symbol; forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing, ) where {R} A_and_Aᵀ, edge_to_index = bidirectional_pattern(A; symmetric_pattern) @@ -345,7 +351,9 @@ function _coloring( t -> maximum(t[3]) + maximum(t[4]), outputs_by_order ) # can't use ncolors without computing the full result if speed_setting isa WithResult - symmetric_result = StarSetColoringResult(A_and_Aᵀ, ag, color, star_set) + symmetric_result = StarSetColoringResult( + A_and_Aᵀ, ag, color, star_set; decompression_uplo=:L + ) return BicoloringResult( A, ag, @@ -366,8 +374,9 @@ function _coloring( A::AbstractMatrix, ::ColoringProblem{:nonsymmetric,:bidirectional}, algo::GreedyColoringAlgorithm{:substitution}, - decompression_eltype::Type{R}, symmetric_pattern::Bool, + decompression_eltype::Type{R}, + decompression_uplo::Symbol, ) where {R} A_and_Aᵀ, edge_to_index = bidirectional_pattern(A; symmetric_pattern) ag = AdjacencyGraph(A_and_Aᵀ, edge_to_index, 0; augmented_graph=true) @@ -390,7 +399,9 @@ function _coloring( t -> maximum(t[3]) + maximum(t[4]), outputs_by_order ) # can't use ncolors without computing the full result if speed_setting isa WithResult - symmetric_result = TreeSetColoringResult(A_and_Aᵀ, ag, color, tree_set, R) + symmetric_result = TreeSetColoringResult( + A_and_Aᵀ, ag, color, tree_set, R; decompression_uplo=:L + ) return BicoloringResult( A, ag, diff --git a/src/result.jl b/src/result.jl index c74ceaa1..4544709a 100644 --- a/src/result.jl +++ b/src/result.jl @@ -309,6 +309,7 @@ struct StarSetColoringResult{ color::CT group::GT compressed_indices::VT + decompression_uplo::Symbol additional_info::A end @@ -316,48 +317,66 @@ function StarSetColoringResult( A::AbstractMatrix, ag::AdjacencyGraph{T}, color::Vector{<:Integer}, - star_set::StarSet{<:Integer}, + star_set::StarSet{<:Integer}; + decompression_uplo::Symbol=:F, ) where {T<:Integer} group = group_by_color(T, color) - compressed_indices = star_csc_indices(ag, color, star_set) - return StarSetColoringResult(A, ag, color, group, compressed_indices, nothing) + compressed_indices = star_csc_indices(ag, color, star_set, decompression_uplo) + return StarSetColoringResult( + A, ag, color, group, compressed_indices, decompression_uplo, nothing + ) end function star_csc_indices( - ag::AdjacencyGraph{T}, color::Vector{<:Integer}, star_set + ag::AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::StarSet{<:Integer}, + decompression_uplo::Symbol, ) where {T} (; star, hub) = star_set S = pattern(ag) edge_to_index = edge_indices(ag) n = S.n rvS = rowvals(S) - compressed_indices = zeros(T, nnz(S)) # needs to be independent from the storage in the graph, in case the graph gets reused + nb_indices = nnz(S) + if decompression_uplo != :F + nb_indices = nb_edges(ag) + nb_self_loops + end + compressed_indices = zeros(T, nb_indices) # needs to be independent from the storage in the graph, in case the graph gets reused + l = 0 for j in axes(S, 2) for k in nzrange(S, j) i = rvS[k] if i == j # diagonal coefficients + l += 1 c = color[i] - compressed_indices[k] = (c - 1) * n + i + compressed_indices[l] = (c - 1) * n + i else - # off-diagonal coefficients - index_ij = edge_to_index[k] - s = star[index_ij] - h = abs(hub[s]) - - # Assign the non-hub vertex (spoke) to the correct position in spokes - if i == h - # i is the hub and j is the spoke - c = color[i] - compressed_indices[k] = (c - 1) * n + j - else # j == h - # j is the hub and i is the spoke - c = color[j] - compressed_indices[k] = (c - 1) * n + i + if in_triangle(i, j, decompression_uplo) + # off-diagonal coefficients + l += 1 + index_ij = edge_to_index[k] + s = star[index_ij] + h = abs(hub[s]) + + # Assign the non-hub vertex (spoke) to the correct position in spokes + if i == h + # i is the hub and j is the spoke + c = color[i] + compressed_indices[l] = (c - 1) * n + j + else # j == h + # j is the hub and i is the spoke + c = color[j] + compressed_indices[l] = (c - 1) * n + i + end end end end end + if !augmented_graph(ag) && (decompression_uplo != :F) + resize!(compressed_indices, l) + end return compressed_indices end @@ -391,6 +410,7 @@ struct TreeSetColoringResult{ lower_triangle_offsets::Vector{T} upper_triangle_offsets::Vector{T} buffer::Vector{R} + decompression_uplo::Symbol end function TreeSetColoringResult( @@ -398,7 +418,8 @@ function TreeSetColoringResult( ag::AdjacencyGraph{T}, color::Vector{<:Integer}, tree_set::TreeSet{<:Integer}, - decompression_eltype::Type{R}, + decompression_eltype::Type{R}; + decompression_uplo::Symbol=:F, ) where {T<:Integer,R} (; reverse_bfs_orders, tree_edge_indices, nt) = tree_set (; S, nb_self_loops) = ag @@ -408,7 +429,7 @@ function TreeSetColoringResult( # Vector for the decompression of the diagonal coefficients diagonal_indices = Vector{T}(undef, nb_self_loops) - diagonal_nzind = Vector{T}(undef, nb_self_loops) + diagonal_nzind = (decompression_uplo == :F) ? Vector{T}(undef, nb_self_loops) : T[] if !augmented_graph(ag) l = 0 @@ -418,7 +439,9 @@ function TreeSetColoringResult( if i == j l += 1 diagonal_indices[l] = i - diagonal_nzind[l] = k + if decompression_uplo == :F + diagonal_nzind[l] = k + end end end end @@ -426,8 +449,8 @@ function TreeSetColoringResult( # Vectors for the decompression of the off-diagonal coefficients nedges = nb_edges(ag) - lower_triangle_offsets = Vector{T}(undef, nedges) - upper_triangle_offsets = Vector{T}(undef, nedges) + lower_triangle_offsets = decompression_uplo == :U ? T[] : Vector{T}(undef, nedges) + upper_triangle_offsets = decompression_uplo == :L ? T[] : Vector{T}(undef, nedges) # Index in lower_triangle_offsets and upper_triangle_offsets index_offsets = 0 @@ -451,21 +474,29 @@ function TreeSetColoringResult( if in_triangle(i, j, :L) # uplo = :L or uplo = :F # S[i,j] is stored at index_ij = (S.colptr[j+1] - offset_L) in S.nzval - lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1 + if decompression_uplo != :U + lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1 + end # uplo = :U or uplo = :F # S[j,i] is stored at index_ji = (S.colptr[i] + offset_U) in S.nzval - upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1 + if decompression_uplo != :L + upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1 + end # S[i,j] is in the upper triangular part of S else # uplo = :U or uplo = :F # S[i,j] is stored at index_ij = (S.colptr[j] + offset_U) in S.nzval - upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1 + if decompression_uplo != :L + upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1 + end # uplo = :L or uplo = :F # S[j,i] is stored at index_ji = (S.colptr[i+1] - offset_L) in S.nzval - lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1 + if decompression_uplo != :U + lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1 + end end #! format: on end @@ -488,6 +519,7 @@ function TreeSetColoringResult( lower_triangle_offsets, upper_triangle_offsets, buffer, + decompression_uplo, ) end