diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl
index 2750964d..f58bdd66 100644
--- a/ext/SparseMatrixColoringsCUDAExt.jl
+++ b/ext/SparseMatrixColoringsCUDAExt.jl
@@ -48,12 +48,13 @@ function SMC.StarSetColoringResult(
     ag::SMC.AdjacencyGraph{T},
     color::Vector{<:Integer},
     star_set::SMC.StarSet{<:Integer},
+    decompression_uplo::Symbol,
 ) where {T<:Integer}
     group = SMC.group_by_color(T, color)
-    compressed_indices = SMC.star_csc_indices(ag, color, star_set)
+    compressed_indices = SMC.star_csc_indices(ag, color, star_set, decompression_uplo)
     additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices))
     return SMC.StarSetColoringResult(
-        A, ag, color, group, compressed_indices, additional_info
+        A, ag, color, group, compressed_indices, decompression_uplo, additional_info
     )
 end
 
@@ -86,12 +87,18 @@ function SMC.StarSetColoringResult(
     ag::SMC.AdjacencyGraph{T},
     color::Vector{<:Integer},
     star_set::SMC.StarSet{<:Integer},
+    decompression_uplo::Symbol,
 ) where {T<:Integer}
     group = SMC.group_by_color(T, color)
-    compressed_indices = SMC.star_csc_indices(ag, color, star_set)
+    reversed_uplo = if (decompression_uplo == :L)
+        :U
+    else
+        (decompression_uplo == :U ? :L : decompression_uplo)
+    end
+    compressed_indices = SMC.star_csc_indices(ag, color, star_set, reversed_uplo)
     additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices))
     return SMC.StarSetColoringResult(
-        A, ag, color, group, compressed_indices, additional_info
+        A, ag, color, group, compressed_indices, decompression_uplo, additional_info
     )
 end
 
@@ -120,15 +127,7 @@ function SMC.decompress!(
     A::CuSparseMatrixCSC,
     B::CuMatrix,
     result::SMC.StarSetColoringResult{<:CuSparseMatrixCSC},
-    uplo::Symbol=:F,
 )
-    if uplo != :F
-        throw(
-            SMC.UnsupportedDecompressionError(
-                "Single-triangle decompression is not supported on GPU matrices"
-            ),
-        )
-    end
     compressed_indices = result.additional_info.compressed_indices_gpu_csc
     copyto!(A.nzVal, view(B, compressed_indices))
     return A
@@ -138,15 +137,7 @@ function SMC.decompress!(
     A::CuSparseMatrixCSR,
     B::CuMatrix,
     result::SMC.StarSetColoringResult{<:CuSparseMatrixCSR},
-    uplo::Symbol=:F,
 )
-    if uplo != :F
-        throw(
-            SMC.UnsupportedDecompressionError(
-                "Single-triangle decompression is not supported on GPU matrices"
-            ),
-        )
-    end
     compressed_indices = result.additional_info.compressed_indices_gpu_csr
     copyto!(A.nzVal, view(B, compressed_indices))
     return A
diff --git a/src/SparseMatrixColorings.jl b/src/SparseMatrixColorings.jl
index 481c8584..c8b232c3 100644
--- a/src/SparseMatrixColorings.jl
+++ b/src/SparseMatrixColorings.jl
@@ -26,7 +26,9 @@ using LinearAlgebra:
     issymmetric,
     ldiv!,
     parent,
-    transpose
+    transpose,
+    tril,
+    triu
 using PrecompileTools: @compile_workload
 using Random: Random, AbstractRNG, default_rng, randperm
 using SparseArrays:
diff --git a/src/coloring.jl b/src/coloring.jl
index cf3d8f79..dc9ca78c 100644
--- a/src/coloring.jl
+++ b/src/coloring.jl
@@ -81,7 +81,7 @@ end
 """
     star_coloring(
         g::AdjacencyGraph, vertices_in_order::AbstractVector, postprocessing::Bool;
-        forced_colors::Union{AbstractVector,Nothing}=nothing
+        postprocessing_minimizes::Symbol=:all_colors, forced_colors::Union{AbstractVector,Nothing}=nothing
     )
 
 Compute a star coloring of all vertices in the adjacency graph `g` and return a tuple `(color, star_set)`, where
@@ -110,6 +110,7 @@ function star_coloring(
     g::AdjacencyGraph{T},
     vertices_in_order::AbstractVector{<:Integer},
     postprocessing::Bool;
+    postprocessing_minimizes::Symbol=:all_colors,
     forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing,
 ) where {T<:Integer}
     # Initialize data structures
@@ -168,7 +169,7 @@ function star_coloring(
     if postprocessing
         # Reuse the vector forbidden_colors to compute offsets during post-processing
         offsets = forbidden_colors
-        postprocess!(color, star_set, g, offsets)
+        postprocess!(color, star_set, g, offsets, postprocessing_minimizes)
     end
     return color, star_set
 end
@@ -250,7 +251,8 @@ struct StarSet{T}
 end
 
 """
-    acyclic_coloring(g::AdjacencyGraph, vertices_in_order::AbstractVector, postprocessing::Bool)
+    acyclic_coloring(g::AdjacencyGraph, vertices_in_order::AbstractVector, postprocessing::Bool;
+                     postprocessing_minimizes::Symbol=:all_colors)
 
 Compute an acyclic coloring of all vertices in the adjacency graph `g` and return a tuple `(color, tree_set)`, where
 
@@ -273,7 +275,10 @@ If `postprocessing=true`, some colors might be replaced with `0` (the "neutral"
 > [_New Acyclic and Star Coloring Algorithms with Application to Computing Hessians_](https://epubs.siam.org/doi/abs/10.1137/050639879), Gebremedhin et al. (2007), Algorithm 3.1
 """
 function acyclic_coloring(
-    g::AdjacencyGraph{T}, vertices_in_order::AbstractVector{<:Integer}, postprocessing::Bool
+    g::AdjacencyGraph{T},
+    vertices_in_order::AbstractVector{<:Integer},
+    postprocessing::Bool;
+    postprocessing_minimizes::Symbol=:all_colors,
 ) where {T<:Integer}
     # Initialize data structures
     nv = nb_vertices(g)
@@ -345,7 +350,7 @@ function acyclic_coloring(
     if postprocessing
         # Reuse the vector forbidden_colors to compute offsets during post-processing
         offsets = forbidden_colors
-        postprocess!(color, tree_set, g, offsets)
+        postprocess!(color, tree_set, g, offsets, postprocessing_minimizes)
     end
     return color, tree_set
 end
diff --git a/src/decompression.jl b/src/decompression.jl
index 1f8e88db..24dad132 100644
--- a/src/decompression.jl
+++ b/src/decompression.jl
@@ -175,19 +175,32 @@ function decompress(B::AbstractMatrix, result::AbstractColoringResult)
     return decompress!(A, B, result)
 end
 
+function decompress(B::AbstractMatrix, result::AbstractColoringResult{:symmetric,:column})
+    A = respectful_similar(result.A, eltype(B))
+    if A isa SparseMatrixCSC && result.uplo != :F
+        (result.uplo == :L) && (A = tril(A))
+        (result.uplo == :U) && (A = triu(A))
+    end
+    return decompress!(A, B, result)
+end
+
 function decompress(
     Br::AbstractMatrix,
     Bc::AbstractMatrix,
     result::AbstractColoringResult{structure,:bidirectional},
 ) where {structure}
     A = respectful_similar(result.A, Base.promote_eltype(Br, Bc))
+    if A isa SparseMatrixCSC && result.symmetric_result.uplo != :F
+        (result.symmetric_result.uplo == :L) && (A = tril(A))
+        (result.symmetric_result.uplo == :U) && (A = triu(A))
+    end
     return decompress!(A, Br, Bc, result)
 end
 
 """
     decompress!(
         A::AbstractMatrix, B::AbstractMatrix,
-        result::AbstractColoringResult{_,:column/:row}, [uplo=:F]
+        result::AbstractColoringResult{_,:column/:row},
     )
 
     decompress!(
@@ -204,9 +217,6 @@ The out-of-place alternative is [`decompress`](@ref).
 Compression means summing either the columns or the rows of `A` which share the same color.
 It is done by calling [`compress`](@ref).
 
-For `:symmetric` coloring results (and for those only), an optional positional argument `uplo in (:U, :L, :F)` can be passed to specify which part of the matrix `A` should be updated: the Upper triangle, the Lower triangle, or the Full matrix.
-When `A isa SparseMatrixCSC`, using the `uplo` argument requires a target matrix which only stores the relevant triangle(s).
-
 !!! warning
     For some coloring variants, the `result` object is mutated during decompression.
 
@@ -260,7 +270,7 @@ function decompress! end
 """
     decompress_single_color!(
         A::AbstractMatrix, b::AbstractVector, c::Integer,
-        result::AbstractColoringResult, [uplo=:F]
+        result::AbstractColoringResult,
     )
 
 Decompress the vector `b` corresponding to color `c` in-place into `A`, given a `:direct` coloring `result` of the sparsity pattern of `A` (it will not work with a `:substitution` coloring).
@@ -272,9 +282,6 @@ Decompress the vector `b` corresponding to color `c` in-place into `A`, given a
 !!! warning
     This function will only update some coefficients of `A`, without resetting the rest to zero.
 
-For `:symmetric` coloring results (and for those only), an optional positional argument `uplo in (:U, :L, :F)` can be passed to specify which part of the matrix `A` should be updated: the Upper triangle, the Lower triangle, or the Full matrix.
-When `A isa SparseMatrixCSC`, using the `uplo` argument requires a target matrix which only stores the relevant triangle(s).
-
 !!! warning
     For some coloring variants, the `result` object is mutated during decompression.
 
@@ -445,20 +452,20 @@ end
 
 ## StarSetColoringResult
 
-function decompress!(
-    A::AbstractMatrix, B::AbstractMatrix, result::StarSetColoringResult, uplo::Symbol=:F
-)
-    (; ag, compressed_indices) = result
+function decompress!(A::AbstractMatrix, B::AbstractMatrix, result::StarSetColoringResult)
+    (; ag, compressed_indices, uplo) = result
     (; S) = ag
     uplo == :F && check_same_pattern(A, S)
     fill!(A, zero(eltype(A)))
 
-    rvS = rowvals(S)
+    l = 0
+    rvS = rowvals(A)
     for j in axes(S, 2)
         for k in nzrange(S, j)
             i = rvS[k]
             if in_triangle(i, j, uplo)
-                A[i, j] = B[compressed_indices[k]]
+                l += 1
+                A[i, j] = B[compressed_indices[l]]
             end
         end
     end
@@ -466,76 +473,45 @@ function decompress!(
 end
 
 function decompress_single_color!(
-    A::AbstractMatrix,
-    b::AbstractVector,
-    c::Integer,
-    result::StarSetColoringResult,
-    uplo::Symbol=:F,
+    A::SparseMatrixCSC, b::AbstractVector, c::Integer, result::StarSetColoringResult
 )
-    (; ag, compressed_indices, group) = result
+    (; ag, compressed_indices, group, uplo) = result
     (; S) = ag
     uplo == :F && check_same_pattern(A, S)
 
     lower_index = (c - 1) * S.n + 1
     upper_index = c * S.n
-    rvS = rowvals(S)
+    rvA = rowvals(A)
+    nzA = nonzeros(A)
     for j in group[c]
-        for k in nzrange(S, j)
-            # Check if the color c is used to recover A[i,j] / A[j,i]
+        for k in nzrange(A, j)
+            # Check if the color c is used to recover A[i,j]
             if lower_index <= compressed_indices[k] <= upper_index
-                i = rvS[k]
-                if i == j
-                    # Recover the diagonal coefficients of A
-                    A[i, i] = b[i]
-                else
-                    # Recover the off-diagonal coefficients of A
-                    if in_triangle(i, j, uplo)
-                        A[i, j] = b[i]
-                    end
-                    if in_triangle(j, i, uplo)
-                        A[j, i] = b[i]
-                    end
-                end
+                i = rvA[k]
+                nzA[k] = b[i]
             end
         end
     end
     return A
 end
 
-function decompress!(
-    A::SparseMatrixCSC, B::AbstractMatrix, result::StarSetColoringResult, uplo::Symbol=:F
-)
-    (; ag, compressed_indices) = result
+function decompress!(A::SparseMatrixCSC, B::AbstractMatrix, result::StarSetColoringResult)
+    (; ag, compressed_indices, uplo) = result
     (; S) = ag
     nzA = nonzeros(A)
-    if uplo == :F
-        check_same_pattern(A, S)
-        for k in eachindex(nzA, compressed_indices)
-            nzA[k] = B[compressed_indices[k]]
-        end
-    else
-        rvS = rowvals(S)
-        l = 0  # assume A has the same pattern as the triangle
-        for j in axes(S, 2)
-            for k in nzrange(S, j)
-                i = rvS[k]
-                if in_triangle(i, j, uplo)
-                    l += 1
-                    nzA[l] = B[compressed_indices[k]]
-                end
-            end
-        end
+    uplo == :F && check_same_pattern(A, S)
+    for k in eachindex(nzA, compressed_indices)
+        nzA[k] = B[compressed_indices[k]]
     end
     return A
 end
 
 ## TreeSetColoringResult
 
-function decompress!(
-    A::AbstractMatrix, B::AbstractMatrix, result::TreeSetColoringResult, uplo::Symbol=:F
-)
-    (; ag, color, reverse_bfs_orders, tree_edge_indices, nt, diagonal_indices, buffer) =
-        result
+function decompress!(A::AbstractMatrix, B::AbstractMatrix, result::TreeSetColoringResult)
+    (;
+        ag, color, reverse_bfs_orders, tree_edge_indices, nt, diagonal_indices, buffer, uplo
+    ) = result
     (; S) = ag
     uplo == :F && check_same_pattern(A, S)
     R = eltype(A)
@@ -586,10 +562,7 @@ function decompress!(
 end
 
 function decompress!(
-    A::SparseMatrixCSC{R},
-    B::AbstractMatrix{R},
-    result::TreeSetColoringResult,
-    uplo::Symbol=:F,
+    A::SparseMatrixCSC{R}, B::AbstractMatrix{R}, result::TreeSetColoringResult
 ) where {R<:Real}
     (;
         ag,
@@ -602,6 +575,7 @@ function decompress!(
         lower_triangle_offsets,
         upper_triangle_offsets,
         buffer,
+        uplo,
     ) = result
     (; S) = ag
     A_colptr = A.colptr
@@ -702,12 +676,10 @@ end
 ## MatrixInverseColoringResult
 
 function decompress!(
-    A::AbstractMatrix,
-    B::AbstractMatrix,
-    result::LinearSystemColoringResult,
-    uplo::Symbol=:F,
+    A::AbstractMatrix, B::AbstractMatrix, result::LinearSystemColoringResult
 )
-    (; color, strict_upper_nonzero_inds, M_factorization, strict_upper_nonzeros_A) = result
+    (; color, strict_upper_nonzero_inds, M_factorization, strict_upper_nonzeros_A, uplo) =
+        result
     S = result.ag.S
     uplo == :F && check_same_pattern(A, S)
 
@@ -776,7 +748,7 @@ function decompress!(
     nzval = Vector{R}(undef, length(large_rowval))
     A_and_noAᵀ = SparseMatrixCSC(m + n, m + n, large_colptr, large_rowval, nzval)
     Br_and_Bc = _join_compressed!(result, Br, Bc)
-    decompress!(A_and_noAᵀ, Br_and_Bc, symmetric_result, :L)
+    decompress!(A_and_noAᵀ, Br_and_Bc, symmetric_result)
     rvA = rowvals(A_and_noAᵀ)
     nzA = nonzeros(A_and_noAᵀ)
     for j in 1:n
@@ -797,6 +769,6 @@ function decompress!(
     A_and_noAᵀ = SparseMatrixCSC(m + n, m + n, large_colptr, large_rowval, A.nzval)
     # decompress lower triangle only
     Br_and_Bc = _join_compressed!(result, Br, Bc)
-    decompress!(A_and_noAᵀ, Br_and_Bc, symmetric_result, :L)
+    decompress!(A_and_noAᵀ, Br_and_Bc, symmetric_result)
     return A
 end
diff --git a/src/graph.jl b/src/graph.jl
index 84c3353f..ee8bb932 100644
--- a/src/graph.jl
+++ b/src/graph.jl
@@ -227,6 +227,7 @@ The adjacency graph of a symmetric matrix `A ∈ ℝ^{n × n}` is `G(A) = (V, E)
 struct AdjacencyGraph{T<:Integer,augmented_graph}
     S::SparsityPatternCSC{T}
     edge_to_index::Vector{T}
+    original_size::Tuple{Int,Int}
 end
 
 Base.eltype(::AdjacencyGraph{T}) where {T} = T
@@ -235,15 +236,20 @@ function AdjacencyGraph(
     S::SparsityPatternCSC{T},
     edge_to_index::Vector{T}=build_edge_to_index(S);
     augmented_graph::Bool=false,
+    original_size::Tuple{Int,Int}=size(S),
 ) where {T}
-    return AdjacencyGraph{T,augmented_graph}(S, edge_to_index)
+    return AdjacencyGraph{T,augmented_graph}(S, edge_to_index, original_size)
 end
 
-function AdjacencyGraph(A::SparseMatrixCSC; augmented_graph::Bool=false)
+function AdjacencyGraph(
+    A::SparseMatrixCSC; augmented_graph::Bool=false, original_size::Tuple{Int,Int}=size(A)
+)
     return AdjacencyGraph(SparsityPatternCSC(A); augmented_graph)
 end
 
-function AdjacencyGraph(A::AbstractMatrix; augmented_graph::Bool=false)
+function AdjacencyGraph(
+    A::AbstractMatrix; augmented_graph::Bool=false, original_size::Tuple{Int,Int}=size(A)
+)
     return AdjacencyGraph(SparseMatrixCSC(A); augmented_graph)
 end
 
diff --git a/src/interface.jl b/src/interface.jl
index cc57bcc7..fd4855ca 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -69,12 +69,14 @@ It is passed as an argument to the main function [`coloring`](@ref).
 
 # Constructors
 
-    GreedyColoringAlgorithm{decompression}(order=NaturalOrder(); postprocessing=false)
-    GreedyColoringAlgorithm(order=NaturalOrder(); postprocessing=false, decompression=:direct)
+    GreedyColoringAlgorithm{decompression}(order=NaturalOrder(); postprocessing=false, postprocessing_minimizes=:all_colors, decompression_uplo=:F)
+    GreedyColoringAlgorithm(order=NaturalOrder(); postprocessing=false, postprocessing_minimizes=:all_colors, decompression=:direct, decompression_uplo=:F)
 
 - `order::Union{AbstractOrder,Tuple}`: the order in which the columns or rows are colored, which can impact the number of colors. Can also be a tuple of different orders to try out, from which the best order (the one with the lowest total number of colors) will be used.
-- `postprocessing::Bool`: whether or not the coloring will be refined by assigning the neutral color `0` to some vertices.
+- `postprocessing::Bool`: whether or not the coloring will be refined by assigning the neutral color `0` to some vertices. This option does not affect row or column colorings.
+- `postprocessing_minimizes::Symbol`: which number of distinct colors is heuristically minimized by postprocessing, either `:all_colors`, `:row_colors` or `:column_colors`. This option only affects bidirectional colorings.
 - `decompression::Symbol`: either `:direct` or `:substitution`. Usually `:substitution` leads to fewer colors, at the cost of a more expensive coloring (and decompression). When `:substitution` is not applicable, it falls back on `:direct` decompression.
+- `decompression_uplo::Symbol`: either `:L` (lower triangle), `:U` (upper triangle), or `:F` (full matrix). This option only affects symmetric colorings.
 
 !!! warning
     The second constructor (based on keyword arguments) is type-unstable.
@@ -98,10 +100,14 @@ struct GreedyColoringAlgorithm{decompression,N,O<:NTuple{N,AbstractOrder}} <:
        ADTypes.AbstractColoringAlgorithm
     orders::O
     postprocessing::Bool
+    postprocessing_minimizes::Symbol
+    decompression_uplo::Symbol
 
     function GreedyColoringAlgorithm{decompression}(
         order_or_orders::Union{AbstractOrder,Tuple}=NaturalOrder();
         postprocessing::Bool=false,
+        postprocessing_minimizes::Symbol=:all_colors,
+        decompression_uplo::Symbol=:F,
     ) where {decompression}
         check_valid_algorithm(decompression)
         if order_or_orders isa AbstractOrder
@@ -109,16 +115,22 @@ struct GreedyColoringAlgorithm{decompression,N,O<:NTuple{N,AbstractOrder}} <:
         else
             orders = order_or_orders
         end
-        return new{decompression,length(orders),typeof(orders)}(orders, postprocessing)
+        return new{decompression,length(orders),typeof(orders)}(
+            orders, postprocessing, postprocessing_minimizes, decompression_uplo
+        )
     end
 end
 
 function GreedyColoringAlgorithm(
     order_or_orders::Union{AbstractOrder,Tuple}=NaturalOrder();
     postprocessing::Bool=false,
+    postprocessing_minimizes::Symbol=:all_colors,
     decompression::Symbol=:direct,
+    decompression_uplo::Symbol=:F,
 )
-    return GreedyColoringAlgorithm{decompression}(order_or_orders; postprocessing)
+    return GreedyColoringAlgorithm{decompression}(
+        order_or_orders; postprocessing, postprocessing_minimizes, decompression_uplo
+    )
 end
 
 ## Coloring
@@ -279,14 +291,14 @@ function _coloring(
     symmetric_pattern::Bool;
     forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing,
 )
-    ag = AdjacencyGraph(A; augmented_graph=false)
+    ag = AdjacencyGraph(A; augmented_graph=false, original_size=size(A))
     color_and_star_set_by_order = map(algo.orders) do order
         vertices_in_order = vertices(ag, order)
         return star_coloring(ag, vertices_in_order, algo.postprocessing; forced_colors)
     end
     color, star_set = argmin(maximum ∘ first, color_and_star_set_by_order)
     if speed_setting isa WithResult
-        return StarSetColoringResult(A, ag, color, star_set)
+        return StarSetColoringResult(A, ag, color, star_set, algo.decompression_uplo)
     else
         return color
     end
@@ -300,14 +312,14 @@ function _coloring(
     decompression_eltype::Type{R},
     symmetric_pattern::Bool,
 ) where {R}
-    ag = AdjacencyGraph(A; augmented_graph=false)
+    ag = AdjacencyGraph(A; augmented_graph=false, original_size=size(A))
     color_and_tree_set_by_order = map(algo.orders) do order
         vertices_in_order = vertices(ag, order)
         return acyclic_coloring(ag, vertices_in_order, algo.postprocessing)
     end
     color, tree_set = argmin(maximum ∘ first, color_and_tree_set_by_order)
     if speed_setting isa WithResult
-        return TreeSetColoringResult(A, ag, color, tree_set, R)
+        return TreeSetColoringResult(A, ag, color, tree_set, algo.decompression_uplo, R)
     else
         return color
     end
@@ -323,11 +335,18 @@ function _coloring(
     forced_colors::Union{AbstractVector{<:Integer},Nothing}=nothing,
 ) where {R}
     A_and_Aᵀ, edge_to_index = bidirectional_pattern(A; symmetric_pattern)
-    ag = AdjacencyGraph(A_and_Aᵀ, edge_to_index; augmented_graph=true)
+    ag = AdjacencyGraph(
+        A_and_Aᵀ, edge_to_index; augmented_graph=true, original_size=size(A)
+    )
+    postprocessing_minimizes = algo.postprocessing_minimizes
     outputs_by_order = map(algo.orders) do order
         vertices_in_order = vertices(ag, order)
         _color, _star_set = star_coloring(
-            ag, vertices_in_order, algo.postprocessing; forced_colors
+            ag,
+            vertices_in_order,
+            algo.postprocessing;
+            postprocessing_minimizes,
+            forced_colors,
         )
         (_row_color, _column_color, _symmetric_to_row, _symmetric_to_column) = remap_colors(
             eltype(ag), _color, maximum(_color), size(A)...
@@ -345,7 +364,7 @@ function _coloring(
         t -> maximum(t[3]) + maximum(t[4]), outputs_by_order
     )  # can't use ncolors without computing the full result
     if speed_setting isa WithResult
-        symmetric_result = StarSetColoringResult(A_and_Aᵀ, ag, color, star_set)
+        symmetric_result = StarSetColoringResult(A_and_Aᵀ, ag, color, star_set, :L)
         return BicoloringResult(
             A,
             ag,
@@ -370,10 +389,15 @@ function _coloring(
     symmetric_pattern::Bool,
 ) where {R}
     A_and_Aᵀ, edge_to_index = bidirectional_pattern(A; symmetric_pattern)
-    ag = AdjacencyGraph(A_and_Aᵀ, edge_to_index; augmented_graph=true)
+    ag = AdjacencyGraph(
+        A_and_Aᵀ, edge_to_index; augmented_graph=true, original_size=size(A)
+    )
+    postprocessing_minimizes = algo.postprocessing_minimizes
     outputs_by_order = map(algo.orders) do order
         vertices_in_order = vertices(ag, order)
-        _color, _tree_set = acyclic_coloring(ag, vertices_in_order, algo.postprocessing)
+        _color, _tree_set = acyclic_coloring(
+            ag, vertices_in_order, algo.postprocessing; postprocessing_minimizes
+        )
         (_row_color, _column_color, _symmetric_to_row, _symmetric_to_column) = remap_colors(
             eltype(ag), _color, maximum(_color), size(A)...
         )
@@ -390,7 +414,7 @@ function _coloring(
         t -> maximum(t[3]) + maximum(t[4]), outputs_by_order
     )  # can't use ncolors without computing the full result
     if speed_setting isa WithResult
-        symmetric_result = TreeSetColoringResult(A_and_Aᵀ, ag, color, tree_set, R)
+        symmetric_result = TreeSetColoringResult(A_and_Aᵀ, ag, color, tree_set, :L, R)
         return BicoloringResult(
             A,
             ag,
diff --git a/src/postprocessing.jl b/src/postprocessing.jl
index 7b2b58f5..a6a445a5 100644
--- a/src/postprocessing.jl
+++ b/src/postprocessing.jl
@@ -5,15 +5,16 @@ function postprocess!(
     star_or_tree_set::Union{StarSet,TreeSet},
     g::AdjacencyGraph,
     offsets::AbstractVector{<:Integer},
+    postprocessing_minimizes::Symbol,
 )
-    S = pattern(g)
-    edge_to_index = edge_indices(g)
     # flag which colors are actually used during decompression
     nb_colors = maximum(color)
     color_used = zeros(Bool, nb_colors)
 
     # nonzero diagonal coefficients force the use of their respective color (there can be no neutral colors if the diagonal is fully nonzero)
-    if !augmented_graph(g)
+    bicoloring = augmented_graph(g)
+    if !bicoloring
+        S = pattern(g)
         for i in axes(S, 1)
             if !iszero(S[i, i])
                 color_used[color[i]] = true
@@ -21,31 +22,64 @@ function postprocess!(
         end
     end
 
+    # When bicoloring is false, column_color_used and row_color_used point to the same memory
+    row_color_used = bicoloring ? zeros(Bool, nb_colors) : color_used
+    column_color_used = color_used
+
     if star_or_tree_set isa StarSet
         # star_or_tree_set is a StarSet
-        postprocess_with_star_set!(g, color_used, color, star_or_tree_set)
+        postprocess_with_star_set!(
+            bicoloring,
+            g,
+            row_color_used,
+            column_color_used,
+            color,
+            star_or_tree_set,
+            postprocessing_minimizes,
+        )
     else
         # star_or_tree_set is a TreeSet
-        postprocess_with_tree_set!(color_used, color, star_or_tree_set)
+        postprocess_with_tree_set!(
+            bicoloring,
+            row_color_used,
+            column_color_used,
+            color,
+            star_or_tree_set,
+            postprocessing_minimizes,
+        )
     end
 
-    # if at least one of the colors is useless, modify the color assignments of vertices
-    if any(!, color_used)
-        num_colors_useless = 0
+    # if at least one of the colors is not used, modify the color assignments of vertices
+    has_neutral_color = if bicoloring
+        any(!, row_color_used) || any(!, column_color_used)
+    else
+        any(!, color_used)
+    end
 
-        # determine what are the useless colors and compute the offsets
+    if has_neutral_color
+        # size of the original matrix on which we want to perform coloring or bicoloring
+        (m, n) = g.original_size
+
+        # count the number of unused colors
+        num_unused_colors = 0
+
+        # count how many color indices are skipped before each color,
+        # in order to compact the color indexing after removing unused colors
         for ci in 1:nb_colors
-            if color_used[ci]
-                offsets[ci] = num_colors_useless
+            ci_required =
+                bicoloring ? row_color_used[ci] || column_color_used[ci] : color_used[ci]
+            if ci_required
+                offsets[ci] = num_unused_colors
             else
-                num_colors_useless += 1
+                num_unused_colors += 1
             end
         end
 
-        # assign the neutral color to every vertex with a useless color and remap the colors
+        # replace unused colors by the neutral color and compact the remaining color indices
         for i in eachindex(color)
             ci = color[i]
-            if !color_used[ci]
+            ci_used = (i ≤ n) ? column_color_used[ci] : row_color_used[ci]
+            if !ci_used
                 # assign the neutral color
                 color[i] = 0
             else
@@ -58,10 +92,13 @@ function postprocess!(
 end
 
 function postprocess_with_star_set!(
+    bicoloring::Bool,
     g::AdjacencyGraph,
-    color_used::Vector{Bool},
+    row_color_used::Vector{Bool},
+    column_color_used::Vector{Bool},
     color::AbstractVector{<:Integer},
     star_set::StarSet,
+    postprocessing_minimizes::Symbol=:all_colors,
 )
     S = pattern(g)
     edge_to_index = edge_indices(g)
@@ -70,11 +107,18 @@ function postprocess_with_star_set!(
     (; star, hub) = star_set
     nb_trivial_stars = 0
 
+    # size of the original matrix on which we want to perform bicoloring
+    (m, n) = g.original_size
+
     # Iterate through all non-trivial stars
     for s in eachindex(hub)
         h = hub[s]
         if h > 0
-            color_used[color[h]] = true
+            if h ≤ n
+                column_color_used[color[h]] = true
+            else
+                row_color_used[color[h]] = true
+            end
         else
             nb_trivial_stars += 1
         end
@@ -82,6 +126,14 @@ function postprocess_with_star_set!(
 
     # Process the trivial stars (if any)
     if nb_trivial_stars > 0
+        # When bicoloring is false, row_color_counts and column_color_counts point to the same memory
+        nv = length(color)
+        nb_colors = length(row_color_used)
+        visited_vertices = zeros(Bool, nv)
+        row_color_counts = zeros(Int, nb_colors)
+        column_color_counts = bicoloring ? zeros(Int, nb_colors) : row_color_counts
+        all_trivial_stars_treated = true
+
         rvS = rowvals(S)
         for j in axes(S, 2)
             for k in nzrange(S, j)
@@ -91,25 +143,106 @@ function postprocess_with_star_set!(
                     s = star[index_ij]
                     h = hub[s]
                     if h < 0
-                        h = abs(h)
-                        spoke = h == j ? i : j
-                        if color_used[color[spoke]]
-                            # Switch the hub and the spoke to possibly avoid adding one more used color
-                            hub[s] = spoke
+                        if row_color_used[color[i]]
+                            # The vertex i is already a hub in a non-trivial star
+                            hub[s] = i
                         else
-                            # Keep the current hub
-                            color_used[color[h]] = true
+                            if column_color_used[color[j]]
+                                # The vertex j is already a hub in a non-trivial star
+                                hub[s] = j
+                            else
+                                all_trivial_stars_treated = false
+                                # Count how many vertices of each color appear among the remaining trivial stars.
+                                # Each vertex is counted at most once, using `visited_vertices` to avoid duplicates
+                                # when a vertex belongs to multiple trivial stars.
+                                if !visited_vertices[i]
+                                    visited_vertices[i] = true
+                                    row_color_counts[color[i]] += 1
+                                end
+                                if !visited_vertices[j]
+                                    visited_vertices[j] = true
+                                    column_color_counts[color[j]] += 1
+                                end
+                            end
+                        end
+                    end
+                end
+            end
+        end
+
+        # Only trivial stars, where both vertices can be promoted as hubs, remain.
+        # In the context of bicoloring, if we aim to minimize either the number of row colors or the number of column colors,
+        # we can achieve optimal post-processing by choosing as hubs the vertices from the opposite partition.
+        # This is optimal because we never increase the number of colors in the target partition during this phase,
+        # and all preceding steps of the post-processing are deterministic.
+        if !all_trivial_stars_treated
+            rvS = rowvals(S)
+            for j in axes(S, 2)
+                for k in nzrange(S, j)
+                    i = rvS[k]
+                    if i > j
+                        index_ij = edge_to_index[k]
+                        s = star[index_ij]
+                        h = hub[s]
+                        # The hub of this trivial star is still unknown
+                        if h < 0
+                            # We need to decide who is the hub
+                            if !row_color_used[color[i]] && !column_color_used[color[j]]
+                                if !bicoloring || postprocessing_minimizes == :all_colors
+                                    # Choose as hub the vertex whose color is most frequent among the trivial stars.
+                                    # Colors with smaller `color_counts` are easier to keep unused
+                                    # if their vertices remain spokes instead of hubs.
+                                    # This is an heuristic to try to reduce the number of colors used.
+                                    #
+                                    # In case of a tie, we prefer to preserve colors in the row partition.
+                                    #
+                                    # Note that this heuristic also depends on the order in which
+                                    # the trivial stars are processed, especially when there are ties in `color_counts`.
+                                    if row_color_counts[color[i]] >
+                                        column_color_counts[color[j]]
+                                        row_color_used[color[i]] = true
+                                        hub[s] = i
+                                    else
+                                        column_color_used[color[j]] = true
+                                        hub[s] = j
+                                    end
+                                elseif postprocessing_minimizes == :row_colors
+                                    # j belongs to a column partition in the context of bicoloring
+                                    hub[s] = j
+                                    column_color_used[color[j]] = true
+                                elseif postprocessing_minimizes == :column_colors
+                                    # i belongs to a row partition in the context of bicoloring
+                                    hub[s] = i
+                                    row_color_used[color[i]] = true
+                                else
+                                    error(
+                                        "The value postprocessing_minimizes = :$postprocessing_minimizes is not supported.",
+                                    )
+                                end
+                            else
+                                # Previously processed trivial stars determined the hub vertex for this star
+                                if row_color_used[color[i]]
+                                    hub[s] = i
+                                else
+                                    hub[s] = j
+                                end
+                            end
                         end
                     end
                 end
             end
         end
     end
-    return color_used
+    return nothing
 end
 
 function postprocess_with_tree_set!(
-    color_used::Vector{Bool}, color::AbstractVector{<:Integer}, tree_set::TreeSet
+    bicoloring::Bool,
+    row_color_used::Vector{Bool},
+    column_color_used::Vector{Bool},
+    color::AbstractVector{<:Integer},
+    tree_set::TreeSet,
+    postprocessing_minimizes::Symbol=:all_colors,
 )
     # only the colors of non-leaf vertices are used
     (; reverse_bfs_orders, is_star, tree_edge_indices, nt) = tree_set
@@ -128,13 +261,19 @@ function postprocess_with_tree_set!(
             # Determine if the tree is a star
             if is_star[k]
                 # It is a non-trivial star and only the color of the hub is needed
-                (_, hub) = reverse_bfs_orders[first]
-                color_used[color[hub]] = true
+                (leaf, hub) = reverse_bfs_orders[first]
+                if hub < leaf
+                    column_color_used[color[hub]] = true
+                else
+                    row_color_used[color[hub]] = true
+                end
             else
                 # It is not a star and both colors are needed during the decompression
                 (i, j) = reverse_bfs_orders[first]
-                color_used[color[i]] = true
-                color_used[color[j]] = true
+                v_col = min(i, j)
+                v_row = max(i, j)
+                row_color_used[color[v_row]] = true
+                column_color_used[color[v_col]] = true
             end
         else
             nb_trivial_trees += 1
@@ -143,6 +282,14 @@ function postprocess_with_tree_set!(
 
     # Process the trivial trees (if any)
     if nb_trivial_trees > 0
+        # When bicoloring is false, row_color_counts and column_color_counts point to the same memory
+        nv = length(color)
+        nb_colors = length(row_color_used)
+        visited_vertices = zeros(Bool, nv)
+        row_color_counts = zeros(Int, nb_colors)
+        column_color_counts = bicoloring ? zeros(Int, nb_colors) : row_color_counts
+        all_trivial_trees_treated = true
+
         for k in 1:nt
             # Position of the first edge in the tree
             first = tree_edge_indices[k]
@@ -153,16 +300,96 @@ function postprocess_with_tree_set!(
             # Check if we have exactly one edge in the tree
             if ne_tree == 1
                 (i, j) = reverse_bfs_orders[first]
-                if color_used[color[i]]
-                    # Make i the root to avoid possibly adding one more used color
-                    # Switch it with the (only) leaf
-                    reverse_bfs_orders[first] = (j, i)
+                v_col = min(i, j)
+                v_row = max(i, j)
+                if column_color_used[color[v_col]]
+                    # The vertex v_col is already an internal node in a non-trivial tree
+                    reverse_bfs_orders[first] = (v_row, v_col)
                 else
-                    # Keep j as the root
-                    color_used[color[j]] = true
+                    if row_color_used[color[v_row]]
+                        # The vertex v_row is already an internal node in a non-trivial tree
+                        reverse_bfs_orders[first] = (v_col, v_row)
+                    else
+                        all_trivial_trees_treated = false
+                        # Count how many vertices of each color appear among the remaining trivial trees.
+                        # Each vertex is counted at most once, using `visited_vertices` to avoid duplicates
+                        # when a vertex belongs to multiple trivial trees.
+                        if !visited_vertices[v_row]
+                            visited_vertices[v_row] = true
+                            row_color_counts[color[v_row]] += 1
+                        end
+                        if !visited_vertices[v_col]
+                            visited_vertices[v_col] = true
+                            column_color_counts[color[v_col]] += 1
+                        end
+                    end
+                end
+            end
+        end
+
+        # Only trivial trees, where both vertices can be promoted as roots, remain.
+        # In the context of bicoloring, if we aim to minimize either the number of row colors or the number of column colors,
+        # we can achieve optimal post-processing by choosing as roots the vertices from the opposite partition.
+        # This is optimal because we never increase the number of colors in the target partition during this phase,
+        # and all preceding steps of the post-processing are deterministic.
+        if !all_trivial_trees_treated
+            for k in 1:nt
+                # Position of the first edge in the tree
+                first = tree_edge_indices[k]
+
+                # Total number of edges in the tree
+                ne_tree = tree_edge_indices[k + 1] - first
+
+                # Check if we have exactly one edge in the tree
+                if ne_tree == 1
+                    (i, j) = reverse_bfs_orders[first]
+                    v_col = min(i, j)
+                    v_row = max(i, j)
+                    if !column_color_used[color[v_col]] && !row_color_used[color[v_row]]
+                        if !bicoloring || postprocessing_minimizes == :all_colors
+                            # Choose as root the vertex whose color is most frequent among the trivial trees.
+                            # Colors with smaller `color_counts` are easier to keep unused
+                            # if their vertices remain leaves instead of roots.
+                            # This is an heuristic to try to reduce the number of colors used.
+                            #
+                            # In case of a tie, we prefer to preserve colors in the row partition.
+                            #
+                            # Note that this heuristic also depends on the order in which
+                            # the trivial trees are processed, especially when there are ties in `color_counts`.
+                            if row_color_counts[color[v_row]] >
+                                column_color_counts[color[v_col]]
+                                row_color_used[color[v_row]] = true
+                                reverse_bfs_orders[first] = (v_col, v_row)
+                            else
+                                column_color_used[color[v_col]] = true
+                                reverse_bfs_orders[first] = (v_row, v_col)
+                            end
+                        elseif postprocessing_minimizes == :row_colors
+                            # v_col belongs to a column partition in the context of bicoloring
+                            column_color_used[color[v_col]] = true
+                            reverse_bfs_orders[first] = (v_row, v_col)
+                        elseif postprocessing_minimizes == :column_colors
+                            # v_row belongs to a row partition in the context of bicoloring
+                            row_color_used[color[v_row]] = true
+                            reverse_bfs_orders[first] = (v_col, v_row)
+                        else
+                            error(
+                                "The value postprocessing_minimizes = :$postprocessing_minimizes is not supported.",
+                            )
+                        end
+                    else
+                        # Previously processed trivial trees determined the root vertex for this tree
+                        # Ensure that the root vertex has a used color for decompression
+                        if column_color_used[color[v_col]] && !row_color_used[color[v_row]]
+                            reverse_bfs_orders[first] = (v_row, v_col)
+                        end
+                        if !column_color_used[color[v_col]] && row_color_used[color[v_row]]
+                            reverse_bfs_orders[first] = (v_col, v_row)
+                        end
+                    end
                 end
             end
         end
     end
-    return color_used
+    return nothing
 end
diff --git a/src/result.jl b/src/result.jl
index 89da22d6..f9995ad9 100644
--- a/src/result.jl
+++ b/src/result.jl
@@ -309,6 +309,7 @@ struct StarSetColoringResult{
     color::CT
     group::GT
     compressed_indices::VT
+    uplo::Symbol
     additional_info::A
 end
 
@@ -317,43 +318,69 @@ function StarSetColoringResult(
     ag::AdjacencyGraph{T},
     color::Vector{<:Integer},
     star_set::StarSet{<:Integer},
+    decompression_uplo::Symbol,
 ) where {T<:Integer}
     group = group_by_color(T, color)
-    compressed_indices = star_csc_indices(ag, color, star_set)
-    return StarSetColoringResult(A, ag, color, group, compressed_indices, nothing)
+    compressed_indices = star_csc_indices(ag, color, star_set, decompression_uplo)
+    return StarSetColoringResult(
+        A, ag, color, group, compressed_indices, decompression_uplo, nothing
+    )
 end
 
 function star_csc_indices(
-    ag::AdjacencyGraph{T}, color::Vector{<:Integer}, star_set
+    ag::AdjacencyGraph{T},
+    color::Vector{<:Integer},
+    star_set::StarSet{<:Integer},
+    uplo::Symbol,
 ) where {T}
     (; star, hub) = star_set
     S = pattern(ag)
     edge_to_index = edge_indices(ag)
     n = S.n
     rvS = rowvals(S)
-    compressed_indices = zeros(T, nnz(S))  # needs to be independent from the storage in the graph, in case the graph gets reused
+    l = 0
+    nnzS = nnz(S)
+    if uplo != :F
+        ndiag = 0
+        if !augmented_graph(ag)
+            for j in axes(S, 2)
+                for k in nzrange(S, j)
+                    i = rvS[k]
+                    if i == j
+                        ndiag += 1
+                    end
+                end
+            end
+        end
+        nnzS = (nnzS - ndiag) ÷ 2 + ndiag
+    end
+    compressed_indices = zeros(T, nnzS)
     for j in axes(S, 2)
         for k in nzrange(S, j)
             i = rvS[k]
             if i == j
                 # diagonal coefficients
+                l += 1
                 c = color[i]
-                compressed_indices[k] = (c - 1) * n + i
+                compressed_indices[l] = (c - 1) * n + i
             else
-                # off-diagonal coefficients
-                index_ij = edge_to_index[k]
-                s = star[index_ij]
-                h = abs(hub[s])
-
-                # Assign the non-hub vertex (spoke) to the correct position in spokes
-                if i == h
-                    # i is the hub and j is the spoke
-                    c = color[i]
-                    compressed_indices[k] = (c - 1) * n + j
-                else  # j == h
-                    # j is the hub and i is the spoke
-                    c = color[j]
-                    compressed_indices[k] = (c - 1) * n + i
+                if in_triangle(i, j, uplo)
+                    # off-diagonal coefficients
+                    l += 1
+                    index_ij = edge_to_index[k]
+                    s = star[index_ij]
+                    h = abs(hub[s])
+
+                    # Assign the non-hub vertex (spoke) to the correct position in spokes
+                    if i == h
+                        # i is the hub and j is the spoke
+                        c = color[i]
+                        compressed_indices[l] = (c - 1) * n + j
+                    else  # j == h
+                        # j is the hub and i is the spoke
+                        c = color[j]
+                        compressed_indices[l] = (c - 1) * n + i
+                    end
                 end
             end
         end
@@ -391,6 +418,7 @@ struct TreeSetColoringResult{
     lower_triangle_offsets::Vector{T}
     upper_triangle_offsets::Vector{T}
     buffer::Vector{R}
+    uplo::Symbol
 end
 
 function TreeSetColoringResult(
@@ -398,36 +426,51 @@ function TreeSetColoringResult(
     ag::AdjacencyGraph{T},
     color::Vector{<:Integer},
     tree_set::TreeSet{<:Integer},
+    decompression_uplo::Symbol,
     decompression_eltype::Type{R},
 ) where {T<:Integer,R}
     (; reverse_bfs_orders, tree_edge_indices, nt) = tree_set
     (; S) = ag
     nvertices = length(color)
     group = group_by_color(T, color)
-    rv = rowvals(S)
+    rvS = rowvals(S)
 
-    # Vector for the decompression of the diagonal coefficients
-    diagonal_indices = T[]
-    diagonal_nzind = T[]
     ndiag = 0
-
     if !augmented_graph(ag)
         for j in axes(S, 2)
             for k in nzrange(S, j)
-                i = rv[k]
+                i = rvS[k]
                 if i == j
-                    push!(diagonal_indices, i)
-                    push!(diagonal_nzind, k)
                     ndiag += 1
                 end
             end
         end
     end
 
+    # Vector for the decompression of the diagonal coefficients
+    diagonal_indices = Vector{T}(undef, ndiag)
+    diagonal_nzind = decompression_uplo == :F ? Vector{T}(undef, ndiag) : T[]
+
+    if !augmented_graph(ag)
+        l = 0
+        for j in axes(S, 2)
+            for k in nzrange(S, j)
+                i = rvS[k]
+                if i == j
+                    l += 1
+                    diagonal_indices[l] = i
+                    if decompression_uplo == :F
+                        diagonal_nzind[l] = k
+                    end
+                end
+            end
+        end
+    end
+
     # Vectors for the decompression of the off-diagonal coefficients
     nedges = (nnz(S) - ndiag) ÷ 2
-    lower_triangle_offsets = Vector{T}(undef, nedges)
-    upper_triangle_offsets = Vector{T}(undef, nedges)
+    lower_triangle_offsets = decompression_uplo == :U ? T[] : Vector{T}(undef, nedges)
+    upper_triangle_offsets = decompression_uplo == :L ? T[] : Vector{T}(undef, nedges)
 
     # Index in lower_triangle_offsets and upper_triangle_offsets
     index_offsets = 0
@@ -442,8 +485,8 @@ function TreeSetColoringResult(
             # Update lower_triangle_offsets and upper_triangle_offsets
             i = leaf
             j = neighbor
-            col_i = view(rv, nzrange(S, i))
-            col_j = view(rv, nzrange(S, j))
+            col_i = view(rvS, nzrange(S, i))
+            col_j = view(rvS, nzrange(S, j))
             index_offsets += 1
 
             #! format: off
@@ -451,21 +494,29 @@ function TreeSetColoringResult(
             if in_triangle(i, j, :L)
                 # uplo = :L or uplo = :F
                 # S[i,j] is stored at index_ij = (S.colptr[j+1] - offset_L) in S.nzval
-                lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1
+                if decompression_uplo != :U
+                    lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1
+                end
 
                 # uplo = :U or uplo = :F
                 # S[j,i] is stored at index_ji = (S.colptr[i] + offset_U) in S.nzval
-                upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1
+                if decompression_uplo != :L
+                    upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1
+                end
 
             # S[i,j] is in the upper triangular part of S
             else
                 # uplo = :U or uplo = :F
                 # S[i,j] is stored at index_ij = (S.colptr[j] + offset_U) in S.nzval
-                upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1
+                if decompression_uplo != :L
+                    upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1
+                end
 
                 # uplo = :L or uplo = :F
                 # S[j,i] is stored at index_ji = (S.colptr[i+1] - offset_L) in S.nzval
-                lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1
+                if decompression_uplo != :U
+                    lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1
+                end
             end
             #! format: on
         end
@@ -488,6 +539,7 @@ function TreeSetColoringResult(
         lower_triangle_offsets,
         upper_triangle_offsets,
         buffer,
+        decompression_uplo,
     )
 end
 
@@ -518,12 +570,14 @@ struct LinearSystemColoringResult{
     strict_upper_nonzero_inds::Vector{Tuple{T,T}}
     strict_upper_nonzeros_A::Vector{R}  # TODO: adjust type
     M_factorization::F  # TODO: adjust type
+    uplo::Symbol
 end
 
 function LinearSystemColoringResult(
     A::AbstractMatrix,
     ag::AdjacencyGraph{T},
     color::Vector{<:Integer},
+    decompression_uplo::Symbol,
     decompression_eltype::Type{R},
 ) where {T<:Integer,R<:Real}
     group = group_by_color(T, color)
@@ -569,6 +623,7 @@ function LinearSystemColoringResult(
         strict_upper_nonzero_inds,
         strict_upper_nonzeros_A,
         M_factorization,
+        decompression_uplo,
     )
 end
 
diff --git a/test/random.jl b/test/random.jl
index 406dfea9..f91b085d 100644
--- a/test/random.jl
+++ b/test/random.jl
@@ -84,7 +84,22 @@ end;
             RandomOrder(StableRNG(0), 0); postprocessing=false, decompression=:direct
         ),
         GreedyColoringAlgorithm(
-            RandomOrder(StableRNG(0), 0); postprocessing=true, decompression=:direct
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:all_colors,
+            decompression=:direct,
+        ),
+        GreedyColoringAlgorithm(
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:row_colors,
+            decompression=:direct,
+        ),
+        GreedyColoringAlgorithm(
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:column_colors,
+            decompression=:direct,
         ),
     )
         @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params
@@ -105,7 +120,22 @@ end;
             RandomOrder(StableRNG(0), 0); postprocessing=false, decompression=:substitution
         ),
         GreedyColoringAlgorithm(
-            RandomOrder(StableRNG(0), 0); postprocessing=true, decompression=:substitution
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:all_colors,
+            decompression=:substitution,
+        ),
+        GreedyColoringAlgorithm(
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:row_colors,
+            decompression=:substitution,
+        ),
+        GreedyColoringAlgorithm(
+            RandomOrder(StableRNG(0), 0);
+            postprocessing=true,
+            postprocessing_minimizes=:column_colors,
+            decompression=:substitution,
         ),
     )
         @testset "$((; m, n, p))" for (m, n, p) in asymmetric_params
diff --git a/test/small.jl b/test/small.jl
index 2e12372c..f66bdfa3 100644
--- a/test/small.jl
+++ b/test/small.jl
@@ -88,111 +88,267 @@ end;
 end;
 
 @testset "Bidirectional coloring" begin
-    problem = ColoringProblem(; structure=:nonsymmetric, partition=:bidirectional)
-    order = RandomOrder(StableRNG(0), 0)
-
-    @testset "Anti-diagonal" begin
-        A = sparse([0 0 0 1; 0 0 1 0; 0 1 0 0; 1 0 0 0])
+    color_stats = Dict(
+        "Anti-diagonal" => Dict(
+            :direct => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+            :substitution => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+        ),
+        "Triangle" => Dict(
+            :direct => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+            :substitution => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+        ),
+        "Rectangle" => Dict(
+            :direct => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+            :substitution => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+        ),
+        "Arrowhead" => Dict(
+            :direct => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+            :substitution => Dict(
+                :all_colors => (0, 0),
+                :row_colors => (0, 0),
+                :column_colors => (0, 0),
+            ),
+        ),
+    )
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(; postprocessing=false)
+    @testset "coverage postprocessing for acyclic bicoloring" begin
+        problem = ColoringProblem(; structure=:nonsymmetric, partition=:bidirectional)
+        order = RandomOrder(StableRNG(4), 4)
+        algo = GreedyColoringAlgorithm(
+            order; decompression=:substitution, postprocessing=true
         )
-        @test ncolors(result) == 2
+        A = sparse([1 1 0; 0 0 1; 0 1 0])
+        result = coloring(A, problem, algo)
+    end
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(; postprocessing=true)
-        )
-        @test ncolors(result) == 1
+    @testset "postprocessing_minimizes = :sym_colors" begin
+        problem = ColoringProblem(; structure=:nonsymmetric, partition=:bidirectional)
+        order = NaturalOrder()
+        A = sparse([1 0 0 0; 0 1 0 0; 0 0 1 0; 0 0 0 1])
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:substitution}(; postprocessing=false)
+        star_bicoloring_algorithm = GreedyColoringAlgorithm{:direct}(;
+            postprocessing=true, postprocessing_minimizes=:sym_colors
+        )
+        @test_throws "The value postprocessing_minimizes = :sym_colors is not supported." coloring(
+            A, problem, star_bicoloring_algorithm
         )
-        @test ncolors(result) == 2
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:substitution}(; postprocessing=true)
+        acyclic_bicoloring_algorithm = GreedyColoringAlgorithm{:substitution}(;
+            postprocessing=true, postprocessing_minimizes=:sym_colors
+        )
+        @test_throws "The value postprocessing_minimizes = :sym_colors is not supported." coloring(
+            A, problem, acyclic_bicoloring_algorithm
         )
-        @test ncolors(result) == 1
     end
 
-    @testset "Triangle" begin
-        A = sparse([1 1 0; 0 1 1; 1 0 1])
+    @testset "postprocessing_minimizes = $target" for target in (
+        :all_colors, :row_colors, :column_colors
+    )
+        problem = ColoringProblem(; structure=:nonsymmetric, partition=:bidirectional)
+        order = RandomOrder(StableRNG(0), 0)
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(; postprocessing=true)
-        )
-        @test ncolors(result) == 3
+        @testset "Anti-diagonal" begin
+            A = sparse([0 0 0 1; 0 0 1 0; 0 1 0 0; 1 0 0 0])
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:substitution}(; postprocessing=true)
-        )
-        @test ncolors(result) == 3
-    end
+            result = coloring(
+                A, problem, GreedyColoringAlgorithm{:direct}(; postprocessing=false)
+            )
+            @test ncolors(result) == 2
 
-    @testset "Rectangle" begin
-        A = spzeros(Bool, 10, 20)
-        A[:, 1] .= 1
-        A[:, end] .= 1
-        A[1, :] .= 1
-        A[end, :] .= 1
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:direct}(;
+                    postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 1
+            color_stats["Anti-diagonal"][:direct][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(order; postprocessing=false)
-        )
-        @test ncolors(result) == 6  # two more than necessary
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(order; postprocessing=true)
-        )
-        @test ncolors(result) == 4  # optimal number
+            result = coloring(
+                A, problem, GreedyColoringAlgorithm{:substitution}(; postprocessing=false)
+            )
+            @test ncolors(result) == 2
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:substitution}(order; postprocessing=false)
-        )
-        @test ncolors(result) == 6  # two more than necessary
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:substitution}(order; postprocessing=true)
-        )
-        @test ncolors(result) == 4  # optimal number
-    end
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(;
+                    postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 1
+            color_stats["Anti-diagonal"][:substitution][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
+        end
 
-    @testset "Arrowhead" begin
-        A = spzeros(Bool, 10, 10)
-        for i in axes(A, 1)
-            A[1, i] = 1
-            A[i, 1] = 1
-            A[i, i] = 1
+        @testset "Triangle" begin
+            A = sparse([1 1 0; 0 1 1; 1 0 1])
+
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:direct}(;
+                    postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 3
+            color_stats["Triangle"][:direct][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
+
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(;
+                    postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 3
+            color_stats["Triangle"][:substitution][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
         end
 
-        result = coloring(
-            A, problem, GreedyColoringAlgorithm{:direct}(order; postprocessing=true)
-        )
-        @test ncolors(coloring(A, problem, GreedyColoringAlgorithm{:substitution}(order))) <
-            ncolors(coloring(A, problem, GreedyColoringAlgorithm{:direct}(order)))
+        @testset "Rectangle" begin
+            A = spzeros(Bool, 10, 20)
+            A[:, 1] .= 1
+            A[:, end] .= 1
+            A[1, :] .= 1
+            A[end, :] .= 1
 
-        @test ncolors(
-            coloring(
+            result = coloring(
+                A, problem, GreedyColoringAlgorithm{:direct}(order; postprocessing=false)
+            )
+            @test ncolors(result) == 6  # two more than necessary
+            result = coloring(
                 A,
                 problem,
-                GreedyColoringAlgorithm{:substitution}(order; postprocessing=true),
-            ),
-        ) < ncolors(
-            coloring(
-                A, problem, GreedyColoringAlgorithm{:direct}(order; postprocessing=true)
-            ),
-        )
+                GreedyColoringAlgorithm{:direct}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 4  # optimal number
+            color_stats["Rectangle"][:direct][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
 
-        test_bicoloring_decompression(
-            A,
-            problem,
-            GreedyColoringAlgorithm{:direct}(order; postprocessing=true);
-            test_fast=true,
-        )
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(order; postprocessing=false),
+            )
+            @test ncolors(result) == 6  # two more than necessary
+            result = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            @test ncolors(result) == 4  # optimal number
+            color_stats["Rectangle"][:substitution][target] = (
+                maximum(row_colors(result)), maximum(column_colors(result))
+            )
+        end
 
-        test_bicoloring_decompression(
-            A,
-            problem,
-            GreedyColoringAlgorithm{:substitution}(order; postprocessing=true);
-            test_fast=true,
-        )
+        @testset "Arrowhead" begin
+            A = spzeros(Bool, 10, 10)
+            for i in axes(A, 1)
+                A[1, i] = 1
+                A[i, 1] = 1
+                A[i, i] = 1
+            end
+
+            @test ncolors(
+                coloring(A, problem, GreedyColoringAlgorithm{:substitution}(order))
+            ) < ncolors(coloring(A, problem, GreedyColoringAlgorithm{:direct}(order)))
+
+            result_direct = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:direct}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            color_stats["Arrowhead"][:direct][target] = (
+                maximum(row_colors(result_direct)), maximum(column_colors(result_direct))
+            )
+
+            result_substitution = coloring(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                ),
+            )
+            color_stats["Arrowhead"][:substitution][target] = (
+                maximum(row_colors(result_substitution)),
+                maximum(column_colors(result_substitution)),
+            )
+
+            @test ncolors(result_substitution) < ncolors(result_direct)
+
+            test_bicoloring_decompression(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:direct}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                );
+                test_fast=true,
+            )
+
+            test_bicoloring_decompression(
+                A,
+                problem,
+                GreedyColoringAlgorithm{:substitution}(
+                    order; postprocessing=true, postprocessing_minimizes=target
+                );
+                test_fast=true,
+            )
+        end
+    end
+
+    @testset "Variants of post-processing" begin
+        for problem in ("Anti-diagonal", "Triangle", "Rectangle", "Arrowhead")
+            for mode in (:direct, :substitution)
+                (num_row_colors1, num_column_colors1) = color_stats[problem][mode][:row_colors]
+                (num_row_colors2, num_column_colors2) = color_stats[problem][mode][:all_colors]
+                (num_row_colors3, num_column_colors3) = color_stats[problem][mode][:column_colors]
+                @test num_row_colors1 ≤ num_row_colors2 ≤ num_row_colors3
+                @test num_column_colors3 ≤ num_column_colors2 ≤ num_column_colors1
+            end
+        end
     end
 end;
diff --git a/test/utils.jl b/test/utils.jl
index 2462f8c1..0199a8b9 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -98,7 +98,7 @@ function test_coloring_decompression(
         end
 
         @testset "Single-color decompression" begin
-            if decompression == :direct  # TODO: implement for :substitution too
+            if decompression == :direct
                 A2 = respectful_similar(A, eltype(B))
                 A2 .= zero(eltype(A2))
                 for c in unique(color)
@@ -122,9 +122,9 @@ function test_coloring_decompression(
                 A3lower .= zero(eltype(A))
                 A3both .= zero(eltype(A))
 
-                decompress!(A3upper, B, result, :U)
-                decompress!(A3lower, B, result, :L)
-                decompress!(A3both, B, result, :F)
+                # decompress!(A3upper, B, result)
+                # decompress!(A3lower, B, result)
+                decompress!(A3both, B, result)
 
                 @test A3upper ≈ triu(A0)
                 @test A3lower ≈ tril(A0)
@@ -134,6 +134,7 @@ function test_coloring_decompression(
 
         @testset "Single-color triangle decompression" begin
             if structure == :symmetric && decompression == :direct
+                A isa SparseMatrixCSC || continue
                 A4upper = respectful_similar(triu(A), eltype(B))
                 A4lower = respectful_similar(tril(A), eltype(B))
                 A4both = respectful_similar(A, eltype(B))
@@ -143,9 +144,9 @@ function test_coloring_decompression(
 
                 for c in unique(color)
                     c == 0 && continue
-                    decompress_single_color!(A4upper, B[:, c], c, result, :U)
-                    decompress_single_color!(A4lower, B[:, c], c, result, :L)
-                    decompress_single_color!(A4both, B[:, c], c, result, :F)
+                    # decompress_single_color!(A4upper, B[:, c], c, result)
+                    # decompress_single_color!(A4lower, B[:, c], c, result)
+                    decompress_single_color!(A4both, B[:, c], c, result)
                 end
 
                 @test A4upper ≈ triu(A0)
@@ -240,11 +241,12 @@ function test_bicoloring_decompression(
     @testset "More orders is better" begin
         more_orders = (algo.orders..., _ALL_ORDERS...)
         better_algo = GreedyColoringAlgorithm{decompression}(
-            more_orders; algo.postprocessing
+            more_orders; algo.postprocessing, algo.postprocessing_minimizes
         )
         all_algos = [
-            GreedyColoringAlgorithm{decompression}(order; algo.postprocessing) for
-            order in more_orders
+            GreedyColoringAlgorithm{decompression}(
+                order; algo.postprocessing, algo.postprocessing_minimizes
+            ) for order in more_orders
         ]
         result = coloring(A0, problem, algo)
         better_result = coloring(A0, problem, better_algo)
@@ -267,10 +269,6 @@ function test_structured_coloring_decompression(A::AbstractMatrix)
     @test D == A
     @test nameof(typeof(D)) == nameof(typeof(A))
     @test structurally_orthogonal_columns(A, color)
-    if VERSION >= v"1.10" || A isa Union{Diagonal,Bidiagonal,Tridiagonal}
-        # banded matrices not supported by ArrayInterface on Julia 1.6
-        # @test color == ArrayInterface.matrix_colors(A)  # TODO: uncomment
-    end
 
     # Row
     result = coloring(A, row_problem, algo)