From a651c3b8a8d35708ce7828926a8e87774a800c8b Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 1 Mar 2026 11:40:30 -0500 Subject: [PATCH 01/20] agents: archive completed planning documents --- agents/{ => archive}/art_pts.md | 0 agents/{ => archive}/biconn_comp.md | 0 agents/{ => archive}/doc_refactor_plan.md | 0 agents/{ => archive}/doc_refactor_strategy.md | 0 agents/{ => archive}/dynamic_edge_refactor_plan.md | 0 agents/{ => archive}/jaccard_coef.md | 0 agents/{ => archive}/lbl_prop.md | 0 agents/{ => archive}/vertex_id_const_ref_plan.md | 0 agents/{ => archive}/vertex_id_const_ref_strategy.md | 0 agents/{ => archive}/view_signature_goal.md | 0 agents/{ => archive}/view_signature_plan.md | 0 11 files changed, 0 insertions(+), 0 deletions(-) rename agents/{ => archive}/art_pts.md (100%) rename agents/{ => archive}/biconn_comp.md (100%) rename agents/{ => archive}/doc_refactor_plan.md (100%) rename agents/{ => archive}/doc_refactor_strategy.md (100%) rename agents/{ => archive}/dynamic_edge_refactor_plan.md (100%) rename agents/{ => archive}/jaccard_coef.md (100%) rename agents/{ => archive}/lbl_prop.md (100%) rename agents/{ => archive}/vertex_id_const_ref_plan.md (100%) rename agents/{ => archive}/vertex_id_const_ref_strategy.md (100%) rename agents/{ => archive}/view_signature_goal.md (100%) rename agents/{ => archive}/view_signature_plan.md (100%) diff --git a/agents/art_pts.md b/agents/archive/art_pts.md similarity index 100% rename from agents/art_pts.md rename to agents/archive/art_pts.md diff --git a/agents/biconn_comp.md b/agents/archive/biconn_comp.md similarity index 100% rename from agents/biconn_comp.md rename to agents/archive/biconn_comp.md diff --git a/agents/doc_refactor_plan.md b/agents/archive/doc_refactor_plan.md similarity index 100% rename from agents/doc_refactor_plan.md rename to agents/archive/doc_refactor_plan.md diff --git a/agents/doc_refactor_strategy.md b/agents/archive/doc_refactor_strategy.md similarity index 100% rename from agents/doc_refactor_strategy.md rename to agents/archive/doc_refactor_strategy.md diff --git a/agents/dynamic_edge_refactor_plan.md b/agents/archive/dynamic_edge_refactor_plan.md similarity index 100% rename from agents/dynamic_edge_refactor_plan.md rename to agents/archive/dynamic_edge_refactor_plan.md diff --git a/agents/jaccard_coef.md b/agents/archive/jaccard_coef.md similarity index 100% rename from agents/jaccard_coef.md rename to agents/archive/jaccard_coef.md diff --git a/agents/lbl_prop.md b/agents/archive/lbl_prop.md similarity index 100% rename from agents/lbl_prop.md rename to agents/archive/lbl_prop.md diff --git a/agents/vertex_id_const_ref_plan.md b/agents/archive/vertex_id_const_ref_plan.md similarity index 100% rename from agents/vertex_id_const_ref_plan.md rename to agents/archive/vertex_id_const_ref_plan.md diff --git a/agents/vertex_id_const_ref_strategy.md b/agents/archive/vertex_id_const_ref_strategy.md similarity index 100% rename from agents/vertex_id_const_ref_strategy.md rename to agents/archive/vertex_id_const_ref_strategy.md diff --git a/agents/view_signature_goal.md b/agents/archive/view_signature_goal.md similarity index 100% rename from agents/view_signature_goal.md rename to agents/archive/view_signature_goal.md diff --git a/agents/view_signature_plan.md b/agents/archive/view_signature_plan.md similarity index 100% rename from agents/view_signature_plan.md rename to agents/archive/view_signature_plan.md From 63b253814acccf0c80d962890528097524aae632 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 1 Mar 2026 12:33:44 -0500 Subject: [PATCH 02/20] refactor: simplify index_vertex_range, add underlying_iterator alias to vertex_descriptor_view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - index_vertex_range now checks std::integral::storage_type> instead of digging through vertex_desc::iterator_type — reflects the true semantic intent (vertices addressable by integral index) - Add underlying_iterator = VertexIter alias on vertex_descriptor_view for direct access to the underlying container iterator type --- .../adj_list/adjacency_list_concepts.hpp | 42 +++++++++---------- .../graph/adj_list/vertex_descriptor_view.hpp | 4 +- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/include/graph/adj_list/adjacency_list_concepts.hpp b/include/graph/adj_list/adjacency_list_concepts.hpp index 52f8d58..6f9c882 100644 --- a/include/graph/adj_list/adjacency_list_concepts.hpp +++ b/include/graph/adj_list/adjacency_list_concepts.hpp @@ -100,10 +100,11 @@ concept out_edge_range = std::ranges::forward_range && edge -concept vertex = is_vertex_descriptor_v> && requires(G& g, const V& u, const vertex_id_t& uid) { - vertex_id(g, u); - find_vertex(g, uid); -}; +concept vertex = is_vertex_descriptor_v> && // + requires(G& g, const V& u, const vertex_id_t& uid) { + vertex_id(g, u); + find_vertex(g, uid); + }; // ============================================================================= // Vertex Range Concepts @@ -147,11 +148,12 @@ concept vertex_range = std::ranges::forward_range && std::ranges::sized_range * Requirements: * - The vertex ID type must be integral * - Must satisfy vertex_range - * - The underlying iterator of the vertex_descriptor_view must be a random_access_iterator + * - The underlying container must be integral, allowing random access by index * - * Note: We check the underlying iterator type, not the view itself, because - * vertex_descriptor_view is always a forward_range (synthesizes descriptors on-the-fly) - * but the underlying container may still support random access. + * Note: We check the underlying iterator type via `vertex_range_t::underlying_iterator`, + * not the view's own iterator, because vertex_descriptor_view always models forward_range + * (it synthesizes descriptors on-the-fly) while the underlying container may still support + * random access. `underlying_iterator` is exposed directly on vertex_descriptor_view. * * Examples: * - vertex_descriptor_view over std::vector (index-based) @@ -163,11 +165,11 @@ concept vertex_range = std::ranges::forward_range && std::ranges::sized_range * @tparam G Graph type */ template -concept index_vertex_range = - requires(G& g) { - { vertices(g) } -> vertex_range; - } && std::integral> && - std::random_access_iterator::vertex_desc::iterator_type>; +concept index_vertex_range = std::integral> && // + std::integral::storage_type> && // + requires(G& g) { + { vertices(g) } -> vertex_range; + }; // ============================================================================= // Adjacency List Concepts @@ -296,12 +298,10 @@ concept in_edge_range = std::ranges::forward_range && edge -concept bidirectional_adjacency_list = - adjacency_list && - requires(G& g, vertex_t u, in_edge_t ie) { - { in_edges(g, u) } -> in_edge_range; - { source_id(g, ie) } -> std::convertible_to>; - }; +concept bidirectional_adjacency_list = adjacency_list && requires(G& g, vertex_t u, in_edge_t ie) { + { in_edges(g, u) } -> in_edge_range; + { source_id(g, ie) } -> std::convertible_to>; +}; /** * @brief Concept for bidirectional graphs with index-based vertex access @@ -316,9 +316,7 @@ concept bidirectional_adjacency_list = * @tparam G Graph type */ template -concept index_bidirectional_adjacency_list = - bidirectional_adjacency_list && index_vertex_range; - +concept index_bidirectional_adjacency_list = bidirectional_adjacency_list && index_vertex_range; } // namespace graph::adj_list diff --git a/include/graph/adj_list/vertex_descriptor_view.hpp b/include/graph/adj_list/vertex_descriptor_view.hpp index 695ba19..c41cb79 100644 --- a/include/graph/adj_list/vertex_descriptor_view.hpp +++ b/include/graph/adj_list/vertex_descriptor_view.hpp @@ -23,8 +23,8 @@ namespace graph::adj_list { template class vertex_descriptor_view : public std::ranges::view_interface> { public: - using vertex_desc = vertex_descriptor; - using storage_type = typename vertex_desc::storage_type; + using vertex_desc = vertex_descriptor; + using storage_type = typename vertex_desc::storage_type; /** * @brief Forward iterator that yields vertex_descriptor values From 0ed22f406f0f48a4030a4bc9e13bbeca4163da87 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 1 Mar 2026 14:29:32 -0500 Subject: [PATCH 03/20] docs: add map-based container strategy and implementation plan - map_container_strategy.md: design strategy with concepts, vertex_map, algorithm generalization patterns, and settled design decisions - map_container_plan.md: phased implementation plan (10 phases, 55 sub-phases) with review gates and progress tracking table --- agents/map_container_plan.md | 803 +++++++++++++++++++++++++++++ agents/map_container_strategy.md | 832 +++++++++++++++++++++++++++++++ 2 files changed, 1635 insertions(+) create mode 100644 agents/map_container_plan.md create mode 100644 agents/map_container_strategy.md diff --git a/agents/map_container_plan.md b/agents/map_container_plan.md new file mode 100644 index 0000000..6595bca --- /dev/null +++ b/agents/map_container_plan.md @@ -0,0 +1,803 @@ +# Map-Based Container Strategy — Implementation Plan + +This plan derives from [map_container_strategy.md](map_container_strategy.md) and is +structured so that an agent can execute each phase or sub-phase independently. Every +sub-phase is self-contained: it states what to read, what to create/modify, how to +verify, and what to commit. + +**Branch:** `mapped` + +**Invariant:** After every sub-phase, `ctest` passes all existing tests (4343+). No +sub-phase may break backward compatibility. + +--- + +## Conventions + +| Symbol | Meaning | +|--------|---------| +| **File** | Absolute path relative to repo root | +| **Read** | Files the agent must read for context before editing | +| **Create** | New files to create | +| **Modify** | Existing files to edit | +| **Verify** | Commands to run and expected outcomes | +| **Commit** | Git commit message (conventional-commit style) | + +--- + +## Phase 0 — Preparation (no code changes) + +### 0.1 Verify Baseline + +| Item | Detail | +|------|--------| +| **Action** | Run the full test suite to confirm the starting baseline is green. | +| **Verify** | `cd build/linux-clang-debug && ctest --output-on-failure` — all tests pass | + +### 0.2 Create Reference Directory + +| Item | Detail | +|------|--------| +| **Action** | Create `include/graph/algorithm/index/` and copy all 14 algorithm `.hpp` files into it as byte-identical snapshots. | +| **Create** | `include/graph/algorithm/index/` containing: | + +``` +traversal_common.hpp +breadth_first_search.hpp +depth_first_search.hpp +topological_sort.hpp +dijkstra_shortest_paths.hpp +bellman_ford_shortest_paths.hpp +connected_components.hpp +articulation_points.hpp +biconnected_components.hpp +label_propagation.hpp +mis.hpp +jaccard.hpp +tc.hpp +mst.hpp +``` + +| **Verify** | `diff include/graph/algorithm/breadth_first_search.hpp include/graph/algorithm/index/breadth_first_search.hpp` — no diff (repeat for each file) | + +### 0.3 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `chore: snapshot index-only algorithm implementations for reference` | + +--- + +## Phase 1 — Concepts & Vertex Map Infrastructure + +All changes in this phase are **additive only** — no existing files are modified in a +way that changes behavior. Existing tests must still pass. + +### 1.1 Add `mapped_vertex_range` and Composed Concepts + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/adj_list/adjacency_list_concepts.hpp` — understand current concept hierarchy | +| **Modify** | `include/graph/adj_list/adjacency_list_concepts.hpp` | +| **Changes** | Add after `index_vertex_range` / `index_adjacency_list` / `index_bidirectional_adjacency_list`: | + +```cpp +/// Satisfied by graphs whose vertex IDs are hashable keys (map/unordered_map). +/// Vertex IDs are sparse; lookup is via find_vertex(g, uid). +template +concept hashable_vertex_id = requires(const vertex_id_t& uid) { + { std::hash>{}(uid) } -> std::convertible_to; +}; + +template +concept mapped_vertex_range = + !index_vertex_range && + hashable_vertex_id && + requires(G& g) { + { vertices(g) } -> std::ranges::forward_range; + } && + requires(G& g, const vertex_id_t& uid) { + find_vertex(g, uid); + }; + +template +concept mapped_adjacency_list = adjacency_list && mapped_vertex_range; + +template +concept mapped_bidirectional_adjacency_list = + bidirectional_adjacency_list && mapped_vertex_range; +``` + +| **Verify** | Build succeeds; `ctest` — all existing tests pass | + +### 1.2 Create `vertex_map.hpp` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/adj_list/adjacency_list_concepts.hpp` (for `index_vertex_range`, `vertex_id_t`); `include/graph/algorithm/traversal_common.hpp` (for `num_vertices`, `views::vertexlist` usage patterns) | +| **Create** | `include/graph/adj_list/vertex_map.hpp` | +| **Contents** | | + +```cpp +// vertex_map type alias +// make_vertex_map — eager (with init value) and lazy (capacity only) overloads +// vertex_map_contains(map, uid) — always true for vector, calls contains() for unordered_map +// vertex_map_get(map, uid, default_val) — no-insertion read with fallback +``` + +Key implementation details: +- `vertex_map` = `conditional_t, vector, unordered_map, T>>` +- Eager `make_vertex_map(g, init_value)`: vector(N, init) for index, iterate vertexlist + insert for mapped +- Lazy `make_vertex_map(g)`: vector(N) for index, empty reserved map for mapped +- `vertex_map_contains`: dispatch on `random_access_range` — always `true` vs `m.contains(uid)` +- `vertex_map_get`: dispatch on `random_access_range` — `m[size_t(uid)]` vs `find()`+default + +| **Verify** | Build succeeds | + +### 1.3 Add `is_null_range_v` Trait + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/traversal_common.hpp` — locate `_null_range_type` | +| **Modify** | `include/graph/algorithm/traversal_common.hpp` | +| **Changes** | Add near `_null_range_type`: | + +```cpp +template +inline constexpr bool is_null_range_v = + std::is_same_v, _null_range_type>; +``` + +| **Verify** | Build succeeds; `ctest` — all existing tests pass | + +### 1.4 Create Map-Based Graph Fixtures for Algorithms + +| Item | Detail | +|------|--------| +| **Read** | `tests/common/map_graph_test_data.hpp` (existing sparse data); `tests/common/graph_fixtures.hpp` (existing index-graph fixtures — Dijkstra CLRS, BFS tree, etc.); `tests/common/algorithm_test_types.hpp` (SPARSE_VERTEX_TYPES macro) | +| **Create** | `tests/common/map_graph_fixtures.hpp` | +| **Contents** | Helper functions that build small map-based graphs matching the same topologies used in existing algorithm tests: | + +- `make_clrs_dijkstra_graph()` — the 5-vertex weighted graph used by Dijkstra tests, with sparse vertex IDs (e.g., 10, 20, 30, 40, 50) for map-based types, standard IDs for index types +- `make_bfs_tree()` — small BFS-testable graph +- `make_dfs_graph()` — small graph for DFS/topological sort +- `make_connected_components_graph()` — graph with 2–3 components + +Uses `fixture_selector::use_sparse` to pick ID scheme. + +| **Verify** | Build succeeds | + +### 1.5 Create Vertex Map Unit Tests + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/adj_list/vertex_map.hpp` (from 1.2) | +| **Create** | `tests/adj_list/test_vertex_map.cpp` | +| **Modify** | `tests/CMakeLists.txt` — add the new test file to the build | +| **Tests** | | + +- `vertex_map` resolves to `vector` for `vov_weighted` and `unordered_map` for `mov_weighted` +- `make_vertex_map(g, 0)` (eager) — correct size, all values initialized +- `make_vertex_map(g)` (lazy) — empty map for mapped, sized vector for index +- `vertex_map_contains` — true for all index UIDs; false for absent keys in mapped +- `vertex_map_get(m, uid, default_val)` — returns mapped value if present, default if absent (no insertion) + +| **Verify** | `ctest` — all existing + new tests pass | + +### 1.6 Create Concept Static-Assert Tests + +| Item | Detail | +|------|--------| +| **Create** | `tests/adj_list/test_mapped_concepts.cpp` | +| **Modify** | `tests/CMakeLists.txt` — add the new test file | +| **Tests** | | + +- `static_assert(index_adjacency_list)` — index types satisfy index concept +- `static_assert(!mapped_vertex_range)` — index types do NOT satisfy mapped concept +- `static_assert(mapped_adjacency_list)` — map types satisfy mapped concept +- `static_assert(!index_vertex_range)` — map types do NOT satisfy index concept +- `static_assert(adjacency_list)` — both satisfy the base concept +- `static_assert(adjacency_list)` — both satisfy the base concept +- `static_assert(hashable_vertex_id)` — map types have hashable IDs + +| **Verify** | `ctest` — all tests pass | + +### 1.7 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: add mapped_adjacency_list concepts and vertex_map infrastructure` | + +### 1.R Review Gate + +Confirm before proceeding to Phase 2: +- `mapped_vertex_range` and `index_vertex_range` are mutually exclusive +- `vertex_map` produces correct types for both families +- `make_vertex_map` eager/lazy work correctly +- `vertex_map_contains` and `vertex_map_get` have no insertion side-effects for mapped +- All 4343+ existing tests still pass + +--- + +## Phase 2 — Traversal Algorithms + +Generalize BFS, DFS, and topological sort. These are the simplest algorithms — only +internal state arrays, no user-supplied property map parameters. + +**Workflow per algorithm:** +1. Read the algorithm header + its index/ snapshot + its test file +2. Relax concept from `index_adjacency_list` to `adjacency_list` +3. Replace internal `vector` with `make_vertex_map` (lazy where possible) +4. Replace `vertex_id_t` in containers with `vertex_id_store_t` +5. Replace any `for(uid=0; uid` → `adjacency_list` +- `vector visited(num_vertices(g), false)` → `auto visited = make_vertex_map(g, false);` (eager init — but see note: could be lazy+contains for large sparse graphs; start with eager for correctness parity) +- Queue element type: `vertex_id_t` → `vertex_id_store_t` +- Any `for(uid = 0; uid < N; ++uid)` → `for(auto&& [uid, u] : views::vertexlist(g))` +- Include `vertex_map.hpp` + +| **Modify** | `tests/algorithms/test_breadth_first_search.cpp` | +| **Changes** | Add `TEMPLATE_TEST_CASE` sections using `SPARSE_VERTEX_TYPES` for BFS with map-based graph fixtures | +| **Verify** | `ctest` — all existing + new tests pass | + +### 2.2 Review Gate — BFS + +Review diff: `diff include/graph/algorithm/breadth_first_search.hpp include/graph/algorithm/index/breadth_first_search.hpp` + +Confirm: +- Index-graph behavior is identical +- Map-graph BFS discovers all reachable vertices +- No `static_cast` remains +- No `for(uid=0; uid` → `adjacency_list` +- `vector color(num_vertices(g), White)` → lazy `vertex_map` with default=White via `vertex_map_get` +- Stack frame vertex_id field → `vertex_id_store_t` +- Include `vertex_map.hpp` + +| **Modify** | `tests/algorithms/test_depth_first_search.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test sections | +| **Verify** | `ctest` — all existing + new tests pass | + +### 2.4 Review Gate — DFS + +Review diff against `index/depth_first_search.hpp`. Confirm: +- Stack management works with `vertex_id_store_t` (reference_wrapper for map graphs) +- Color defaults work via lazy pattern + +### 2.5 Generalize `topological_sort` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/topological_sort.hpp`, `include/graph/algorithm/index/topological_sort.hpp`, `tests/algorithms/test_topological_sort.cpp` | +| **Modify** | `include/graph/algorithm/topological_sort.hpp` | +| **Changes** | | + +- Relax concept: `index_adjacency_list` → `adjacency_list` +- Same color pattern as DFS +- `vector> finish_order` → `vector>` +- Include `vertex_map.hpp` + +| **Modify** | `tests/algorithms/test_topological_sort.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test sections | +| **Verify** | `ctest` — all existing + new tests pass | + +### 2.6 Review Gate — Topological Sort + +Review diff against `index/topological_sort.hpp`. + +### 2.7 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize BFS, DFS, topological_sort for mapped graphs` | + +--- + +## Phase 3 — Shortest Path Infrastructure + +Generalize `init_shortest_paths` and `_null_range_type` detection. No algorithm logic +changes yet — only the shared infrastructure. + +### 3.1 Add Graph-Parameterized `init_shortest_paths` Overloads + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/traversal_common.hpp` — locate existing `init_shortest_paths` overloads | +| **Modify** | `include/graph/algorithm/traversal_common.hpp` | +| **Changes** | | + +Keep existing overloads unchanged for backward compatibility. Add new overloads that +take `const G& g` as the first parameter: + +```cpp +// New overload — distances only +template +constexpr void init_shortest_paths(const G& g, Distances& distances); + +// New overload — distances + predecessors +template +constexpr void init_shortest_paths(const G& g, Distances& distances, Predecessors& predecessors); +``` + +For index graphs (`random_access_range`): same fill/iota behavior as today. +For mapped graphs: if map is empty (lazy), no-op. If pre-populated, fill existing entries. +Use `is_null_range_v` to skip predecessor handling. + +| **Verify** | Build succeeds; `ctest` — all existing tests pass | + +### 3.2 Verify `_null_range_type` Detection + +| Item | Detail | +|------|--------| +| **Action** | Add a static_assert or small unit test confirming `is_null_range_v<_null_range_type>` is `true` and `is_null_range_v>` is `false` | +| **Modify** | Can add to `tests/adj_list/test_vertex_map.cpp` or a new test file | +| **Verify** | Test passes | + +### 3.3 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize init_shortest_paths for mapped graphs` | + +### 3.R Review Gate + +Confirm init functions handle: +- Index graph with pre-sized vector → fill + iota (unchanged) +- Mapped graph with empty map → no-op +- Mapped graph with pre-populated map → fill existing entries +- `_null_range_type` → skipped via `is_null_range_v` + +--- + +## Phase 4 — Shortest Path Algorithms + +Generalize Dijkstra and Bellman-Ford. These have the deepest index coupling: user-provided +`Distances`/`Predecessors`, priority queue, `static_cast`. + +### 4.1 Generalize `dijkstra_shortest_paths` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/dijkstra_shortest_paths.hpp`, `include/graph/algorithm/index/dijkstra_shortest_paths.hpp`, `tests/algorithms/test_dijkstra_shortest_paths.cpp`, `include/graph/algorithm/traversal_common.hpp` | +| **Modify** | `include/graph/algorithm/dijkstra_shortest_paths.hpp` | +| **Changes** | | + +Two approaches (per strategy §3 — separate overloads): + +**Option A (simpler first pass):** Use `if constexpr` within existing functions: +- Relax concept to `adjacency_list` (if parameter types are generic enough) +- Replace `static_cast(uid)` → direct `[uid]` +- Queue element → `vertex_id_store_t` +- Use graph-parameterized `init_shortest_paths(g, distances, predecessors)` + +**Option B (full separate overloads — per strategy decision):** +- Keep existing `index_adjacency_list` + `random_access_range` overloads **unchanged** +- Add new overloads: `mapped_adjacency_list` + `vertex_id_map` + `vertex_id_map` +- Both overloads can share a private `_impl` function + +Start with Option A if the parameter concepts can be generalized without breaking +existing callers. Fall back to Option B if needed. + +| **Modify** | `tests/algorithms/test_dijkstra_shortest_paths.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test sections using map-based CLRS Dijkstra fixture | +| **Verify** | `ctest` — all existing + new tests pass | + +### 4.2 Review Gate — Dijkstra + +Review diff against `index/dijkstra_shortest_paths.hpp`. Confirm: +- Priority queue comparator works with `vertex_id_store_t` +- Distance reads use `vertex_map_get` with infinity default (no spurious insertion) +- Predecessor writes only occur during relaxation +- Existing index-graph performance is unchanged (no extra branching in hot path) + +### 4.3 Generalize `bellman_ford_shortest_paths` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/bellman_ford_shortest_paths.hpp`, `include/graph/algorithm/index/bellman_ford_shortest_paths.hpp`, `tests/algorithms/test_bellman_ford_shortest_paths.cpp` | +| **Modify** | `include/graph/algorithm/bellman_ford_shortest_paths.hpp` | +| **Changes** | | + +- Same Distances/Predecessors treatment as Dijkstra +- Convert `for(k = 0; k < N; ++k)` relaxation loop to `views::vertexlist` iteration +- Replace `static_cast()` → direct `[uid]` +- Use graph-parameterized `init_shortest_paths` + +| **Modify** | `tests/algorithms/test_bellman_ford_shortest_paths.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test sections | +| **Verify** | `ctest` — all existing + new tests pass | + +### 4.4 Review Gate — Bellman-Ford + +Review diff against `index/bellman_ford_shortest_paths.hpp`. + +### 4.5 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize dijkstra, bellman_ford for mapped graphs` | + +--- + +## Phase 5 — Component Algorithms + +Generalize `connected_components` (3 functions: `connected_components()`, `kosaraju()`, +`afforest()`). Each function is a separate sub-task. + +### 5.1 Generalize `connected_components()` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/connected_components.hpp`, `include/graph/algorithm/index/connected_components.hpp`, `tests/algorithms/test_connected_components.cpp` | +| **Modify** | `include/graph/algorithm/connected_components.hpp` — the `connected_components()` function only | +| **Changes** | | + +- Relax `Component` from `random_access_range` to accept vertex maps +- `for(uid = 0; uid < N; ++uid)` → `views::vertexlist` +- `iota` for component init → vertex iteration +- Internal `visited` → `vertex_map` + +| **Modify** | `tests/algorithms/test_connected_components.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section for `connected_components()` | +| **Verify** | `ctest` — all existing + new tests pass | + +### 5.2 Review Gate — `connected_components()` + +### 5.3 Generalize `kosaraju()` + +| Item | Detail | +|------|--------| +| **Modify** | `include/graph/algorithm/connected_components.hpp` — `kosaraju()` overloads only | +| **Changes** | | + +- `vector visited` → `vertex_map` +- `vector> order` → `vector>` +- Relax concept as needed + +| **Modify** | `tests/algorithms/test_scc_bidirectional.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 5.4 Review Gate — `kosaraju()` + +### 5.5 Generalize `afforest()` + +| Item | Detail | +|------|--------| +| **Modify** | `include/graph/algorithm/connected_components.hpp` — `afforest()` overloads only | +| **Changes** | | + +- `iota(component)` → vertex iteration +- `static_cast>` adjustments +- Relax concept as needed + +| **Modify** | `tests/algorithms/test_connected_components.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section for `afforest()` | +| **Verify** | `ctest` — all existing + new tests pass | + +### 5.6 Review Gate — `afforest()` + +### 5.7 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize connected_components for mapped graphs` | + +--- + +## Phase 6 — Simple Algorithms (batch, low risk) + +Four algorithms with minimal index coupling — at most one internal array, no user +property map parameters. + +### 6.1 Generalize `jaccard_coefficient` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/jaccard.hpp`, `include/graph/algorithm/index/jaccard.hpp`, `tests/algorithms/test_jaccard.cpp` | +| **Modify** | `include/graph/algorithm/jaccard.hpp` | +| **Changes** | | + +- `vector> nbrs` → `vertex_map>>` +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_jaccard.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 6.2 Generalize `mis` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/mis.hpp`, `include/graph/algorithm/index/mis.hpp`, `tests/algorithms/test_mis.cpp` | +| **Modify** | `include/graph/algorithm/mis.hpp` | +| **Changes** | | + +- `vector removed(N)` → lazy `vertex_map` + `vertex_map_contains` +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_mis.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 6.3 Generalize `triangle_count` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/tc.hpp`, `include/graph/algorithm/index/tc.hpp`, `tests/algorithms/test_triangle_count.cpp` | +| **Modify** | `include/graph/algorithm/tc.hpp` | +| **Changes** | | + +- `for(uid = 0; uid < N; ++uid)` → `views::vertexlist` +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_triangle_count.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 6.4 Generalize `label_propagation` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/label_propagation.hpp`, `include/graph/algorithm/index/label_propagation.hpp`, `tests/algorithms/test_label_propagation.cpp` | +| **Modify** | `include/graph/algorithm/label_propagation.hpp` | +| **Changes** | | + +- `vector> order` → `vector>` +- `iota` → vertex iteration +- Relax `Label` parameter from `random_access_range` to accept vertex maps +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_label_propagation.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 6.5 Review Gate — All Four + +Review diffs for jaccard, mis, triangle_count, label_propagation against their +`index/` snapshots. + +### 6.6 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize jaccard, mis, triangle_count, label_propagation for mapped graphs` | + +--- + +## Phase 7 — Structural Algorithms (highest effort) + +Articulation points (5 internal arrays) and biconnected components (3 internal arrays). + +### 7.1 Generalize `articulation_points` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/articulation_points.hpp`, `include/graph/algorithm/index/articulation_points.hpp`, `tests/algorithms/test_articulation_points.cpp` | +| **Modify** | `include/graph/algorithm/articulation_points.hpp` | +| **Changes** | | + +- 5 internal arrays → vertex_maps: + - `disc` → `make_vertex_map(g)` (or appropriate type) + - `low` → `make_vertex_map(g)` + - `parent` → `make_vertex_map>(g)` + - `child_count` → `make_vertex_map(g)` + - `emitted` → `make_vertex_map(g, false)` +- `static_cast(N)` sentinel → `vertex_map_contains` or a dedicated sentinel +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_articulation_points.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 7.2 Review Gate — Articulation Points + +Most complex transformation. Carefully review: +- Sentinel values work correctly for mapped graphs +- All 5 vertex_maps are correctly initialized +- No `for(uid=0; uid` + +| **Modify** | `tests/algorithms/test_biconnected_components.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section | +| **Verify** | `ctest` — all existing + new tests pass | + +### 7.4 Review Gate — Biconnected Components + +### 7.5 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize articulation_points, biconnected_components for mapped graphs` | + +--- + +## Phase 8 — MST Algorithms + +### 8.1 Generalize `prim` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/mst.hpp`, `include/graph/algorithm/index/mst.hpp`, `tests/algorithms/test_mst.cpp` | +| **Modify** | `include/graph/algorithm/mst.hpp` — `prim()` function only | +| **Changes** | | + +- Internal `vector distance` → `vertex_map` +- Relax `Predecessor`/`Weight` from `random_access_range` to accept vertex maps +- `for(v = 0; v < N; ++v)` → vertex iteration +- Drop `static_cast()` +- Relax concept to `adjacency_list` + +| **Modify** | `tests/algorithms/test_mst.cpp` | +| **Changes** | Add `SPARSE_VERTEX_TYPES` test section for `prim()` | +| **Verify** | `ctest` — all existing + new tests pass | + +### 8.2 Review Gate — Prim + +### 8.3 Evaluate `kruskal` / `inplace_kruskal` + +| Item | Detail | +|------|--------| +| **Read** | `include/graph/algorithm/mst.hpp` — `kruskal()` and `inplace_kruskal()` functions | +| **Action** | Evaluate whether map support makes sense for edge-list-centric algorithms. These use `x_index_edgelist_range` and `disjoint_vector`. | +| **Decision** | If feasible with reasonable effort, generalize. If the `disjoint_vector` coupling is too deep, document as deferred and skip. | + +### 8.4 Review Gate — Kruskal Decision + +### 8.5 Commit (if applicable) + +| Item | Detail | +|------|--------| +| **Commit** | `feat: generalize prim MST for mapped graphs` (and kruskal if included) | + +--- + +## Phase 9 — Cleanup & Documentation + +### 9.1 Final Full Test Run + +| Item | Detail | +|------|--------| +| **Action** | Run tests across multiple presets for confidence: | + +```bash +# Debug build +cd build/linux-clang-debug && cmake --build . && ctest --output-on-failure + +# Release build +cd build/linux-clang-release && cmake --build . && ctest --output-on-failure + +# Address sanitizer +cd build/linux-gcc-asan && cmake --build . && ctest --output-on-failure +``` + +| **Verify** | All pass in all presets | + +### 9.2 Remove Reference Directory + +| Item | Detail | +|------|--------| +| **Action** | `rm -rf include/graph/algorithm/index/` | +| **Verify** | Directory removed; build still succeeds (no code references it) | + +### 9.3 Update Documentation + +| Item | Detail | +|------|--------| +| **Modify** | `docs/user-guide/` — add section on using algorithms with map-based graphs | +| **Modify** | `docs/reference/` — update algorithm reference to document map-graph support | +| **Modify** | `CHANGELOG.md` — add entries for map-based algorithm support | +| **Verify** | Docs build/render correctly | + +### 9.4 Commit + +| Item | Detail | +|------|--------| +| **Commit** | `chore: remove index-only reference copies, update docs for mapped graph support` | + +--- + +## Reference: Key Files + +| File | Role | +|------|------| +| `include/graph/adj_list/adjacency_list_concepts.hpp` | Concept definitions — modified in Phase 1 | +| `include/graph/adj_list/vertex_map.hpp` | New — vertex_map type alias + helpers — created in Phase 1 | +| `include/graph/algorithm/traversal_common.hpp` | Shared infrastructure — modified in Phases 1 & 3 | +| `tests/common/algorithm_test_types.hpp` | `SPARSE_VERTEX_TYPES` macro — already exists | +| `tests/common/map_graph_test_data.hpp` | Sparse test data — already exists | +| `tests/common/map_graph_fixtures.hpp` | New — map-based algorithm fixtures — created in Phase 1 | +| `tests/common/graph_fixtures.hpp` | Existing index-graph fixtures — read for reference | +| `include/graph/algorithm/index/` | Reference snapshots — created in Phase 0, removed in Phase 9 | + +--- + +## Progress Tracking + +| Phase | Sub-Phase | Description | Status | +|-------|-----------|-------------|--------| +| **0** | **0.1** | Verify baseline (all tests pass) | Not Started | +| **0** | **0.2** | Create `index/` directory, copy 14 algorithm files | Not Started | +| **0** | **0.3** | Commit snapshot | Not Started | +| **1** | **1.1** | Add `mapped_vertex_range` + composed concepts | Not Started | +| **1** | **1.2** | Create `vertex_map.hpp` (type alias + helpers) | Not Started | +| **1** | **1.3** | Add `is_null_range_v` trait | Not Started | +| **1** | **1.4** | Create `map_graph_fixtures.hpp` | Not Started | +| **1** | **1.5** | Create vertex_map unit tests | Not Started | +| **1** | **1.6** | Create concept static_assert tests | Not Started | +| **1** | **1.7** | Commit concepts + vertex_map infrastructure | Not Started | +| **1** | **1.R** | Review gate: concept mutual exclusivity, vertex_map correctness | Not Started | +| **2** | **2.1** | Generalize `breadth_first_search` | Not Started | +| **2** | **2.2** | Review gate: BFS diff | Not Started | +| **2** | **2.3** | Generalize `depth_first_search` | Not Started | +| **2** | **2.4** | Review gate: DFS diff | Not Started | +| **2** | **2.5** | Generalize `topological_sort` | Not Started | +| **2** | **2.6** | Review gate: topological sort diff | Not Started | +| **2** | **2.7** | Commit BFS, DFS, topological_sort | Not Started | +| **3** | **3.1** | Add graph-parameterized `init_shortest_paths` overloads | Not Started | +| **3** | **3.2** | Verify `_null_range_type` detection with `is_null_range_v` | Not Started | +| **3** | **3.3** | Commit init_shortest_paths generalization | Not Started | +| **3** | **3.R** | Review gate: init functions handle index/mapped/null correctly | Not Started | +| **4** | **4.1** | Generalize `dijkstra_shortest_paths` | Not Started | +| **4** | **4.2** | Review gate: Dijkstra diff | Not Started | +| **4** | **4.3** | Generalize `bellman_ford_shortest_paths` | Not Started | +| **4** | **4.4** | Review gate: Bellman-Ford diff | Not Started | +| **4** | **4.5** | Commit Dijkstra + Bellman-Ford | Not Started | +| **5** | **5.1** | Generalize `connected_components()` | Not Started | +| **5** | **5.2** | Review gate: connected_components() diff | Not Started | +| **5** | **5.3** | Generalize `kosaraju()` | Not Started | +| **5** | **5.4** | Review gate: kosaraju() diff | Not Started | +| **5** | **5.5** | Generalize `afforest()` | Not Started | +| **5** | **5.6** | Review gate: afforest() diff | Not Started | +| **5** | **5.7** | Commit connected_components | Not Started | +| **6** | **6.1** | Generalize `jaccard_coefficient` | Not Started | +| **6** | **6.2** | Generalize `mis` | Not Started | +| **6** | **6.3** | Generalize `triangle_count` | Not Started | +| **6** | **6.4** | Generalize `label_propagation` | Not Started | +| **6** | **6.5** | Review gate: all four diffs | Not Started | +| **6** | **6.6** | Commit jaccard, mis, triangle_count, label_propagation | Not Started | +| **7** | **7.1** | Generalize `articulation_points` (5 internal arrays) | Not Started | +| **7** | **7.2** | Review gate: articulation_points diff (most complex) | Not Started | +| **7** | **7.3** | Generalize `biconnected_components` (3 internal arrays) | Not Started | +| **7** | **7.4** | Review gate: biconnected_components diff | Not Started | +| **7** | **7.5** | Commit articulation_points + biconnected_components | Not Started | +| **8** | **8.1** | Generalize `prim` MST | Not Started | +| **8** | **8.2** | Review gate: prim diff | Not Started | +| **8** | **8.3** | Evaluate `kruskal` / `inplace_kruskal` feasibility | Not Started | +| **8** | **8.4** | Review gate: kruskal decision | Not Started | +| **8** | **8.5** | Commit MST (prim, and kruskal if applicable) | Not Started | +| **9** | **9.1** | Final full test run (clang-debug, clang-release, gcc-asan) | Not Started | +| **9** | **9.2** | Remove `include/graph/algorithm/index/` directory | Not Started | +| **9** | **9.3** | Update documentation for mapped graph support | Not Started | +| **9** | **9.4** | Commit cleanup + docs | Not Started | diff --git a/agents/map_container_strategy.md b/agents/map_container_strategy.md new file mode 100644 index 0000000..c2a4103 --- /dev/null +++ b/agents/map_container_strategy.md @@ -0,0 +1,832 @@ +# Map-Based Vertex Container Strategy + +## Goal + +Expand the graph library's concept hierarchy, algorithm infrastructure, and data structure +support so that algorithms work with both **index-based** (vector, deque) and **key-based** +(std::map, std::unordered_map) vertex containers. Currently every algorithm requires +`index_adjacency_list`, which gates on `std::integral>` and +`std::integral`. Map-based graphs already compile at the container layer but +cannot be used with any algorithm. + +--- + +## Current State + +### Concept Hierarchy + +``` +vertex_range ← forward + sized range of vertex descriptors + └─ index_vertex_range ← adds: integral vertex_id_t, integral storage_type + +adjacency_list ← vertex_range + out_edge_range + └─ index_adjacency_list ← adds: index_vertex_range + +bidirectional_adjacency_list ← adjacency_list + in_edge_range + └─ index_bidirectional_adjacency_list ← adds: index_vertex_range +``` + +Every algorithm constrains `G` with `index_adjacency_list` or +`index_bidirectional_adjacency_list`. No algorithm uses the un-prefixed +`adjacency_list` concept. + +### Vertex ID Types + +| Container | `storage_type` | `vertex_id_t` | `raw_vertex_id_t` | +|--------------------|-------------------------|-------------------|-----------------------| +| `std::vector` | `size_t` | `size_t` | `size_t` | +| `std::deque` | `size_t` | `size_t` | `size_t` | +| `std::map` | `map::iterator` | `K` | `const K&` | +| `std::unordered_map` | `unordered_map::iterator` | `K` | `const K&` | + +For index-based graphs, `vertex_id_t` is `size_t` and IDs form a dense range `[0, N)`. +For map-based graphs, `vertex_id_t` is the key type (`VId` template parameter, default +`uint32_t`), and IDs are **sparse** — not necessarily contiguous. + +### Algorithm Index Assumptions + +All 14 algorithm files exhibit three categories of index dependency: + +| Category | Pattern | Files | +|----------|---------|-------| +| **Internal state arrays** | `vector visited(N)`, `vector color(N)`, `vector disc(N)`, etc. sized by `num_vertices(g)` and indexed by `arr[uid]` | 13 of 14 | +| **Parameter arrays** | `random_access_range Distances`, `random_access_range Predecessors`, `random_access_range Component` — accessed as `distances[uid]` | 7 of 14 | +| **Sequential iteration** | `for (id_type uid = 0; uid < N; ++uid)` | 6 of 14 | +| **Sequential fill** | `std::iota(pred.begin(), pred.end(), 0)` | 3 of 14 | +| **`static_cast(uid)`** | Explicit cast to index into arrays | 3 of 14 | + +### Existing Map-Based Infrastructure + +The container layer already supports map-based graphs: + +- **7 map-based traits**: `mofl`, `mol`, `mov`, `mod`, `mos`, `mous`, `mom` — all use + `std::map` for vertices +- **`find_vertex` CPO**: Has an `_associative` path that calls `g.find(uid)` — O(log n) + for `std::map` +- **`vertex_descriptor`**: Already branches on iterator category — stores iterator for + bidirectional, stores index for random-access +- **`vertex_id_store_t`**: In `traversal_common.hpp`, conditionally uses + `reference_wrapper` for non-trivial IDs — shows awareness of the duality + +--- + +## Design Strategy + +### 1. New Concept: `mapped_vertex_range` + +Add a concept that captures key-based vertex access — the map analog of +`index_vertex_range`: + +```cpp +/// Satisfied by graphs whose vertex IDs are hashable keys (map/unordered_map). +/// Vertex IDs are sparse; lookup is via find_vertex(g, uid). +template +concept mapped_vertex_range = + !index_vertex_range && // mutually exclusive with index + requires(G& g) { + { vertices(g) } -> vertex_range; + } && + requires(G& g, const vertex_id_t& uid) { + find_vertex(g, uid); // key-based lookup must exist + }; +``` + +And the composed concepts: + +```cpp +template +concept mapped_adjacency_list = adjacency_list && mapped_vertex_range; + +template +concept mapped_bidirectional_adjacency_list = + bidirectional_adjacency_list && mapped_vertex_range; +``` + +The hierarchy becomes: + +``` +vertex_range + ├─ index_vertex_range (integral IDs, dense [0,N)) + └─ mapped_vertex_range (key-based IDs, sparse) + +adjacency_list + ├─ index_adjacency_list + └─ mapped_adjacency_list + +bidirectional_adjacency_list + ├─ index_bidirectional_adjacency_list + └─ mapped_bidirectional_adjacency_list +``` + +### 2. Vertex Property Map: `vertex_map` + +The core abstraction that allows algorithms to store per-vertex data regardless of +container type. This replaces the hard-coded `vector` + `arr[uid]` pattern. + +```cpp +/// A per-vertex associative container: vector for index graphs, +/// unordered_map, T> for mapped graphs. +template +using vertex_map = std::conditional_t< + index_vertex_range, + std::vector, + std::unordered_map, T> +>; +``` + +#### Access Pattern + +Algorithms currently write `distances[static_cast(uid)]`. With `vertex_map`, we +need a uniform access function: + +```cpp +/// O(1) for vector, O(1) amortized for unordered_map. +template +constexpr auto& vertex_map_get(Map& m, const Key& uid) { + if constexpr (std::ranges::random_access_range) { + return m[static_cast(uid)]; + } else { + return m[uid]; // unordered_map::operator[] + } +} +``` + +Or, preferably, a lighter approach using `operator[]` directly — since `size_t` already +works as an index for `vector` and the key type works for `unordered_map`, we can simply +write `distances[uid]` if the map's key type matches `vertex_id_t`. This may require +a thin wrapper type rather than a bare type alias, to ensure `operator[]` works uniformly. + +#### Read vs Write: Guarding Against Spurious Insertion + +For `unordered_map`, `operator[]` **inserts** a default-constructed entry when the key is +absent. This is correct for write-first patterns (`distances[uid] = 0`) but dangerous for +read/test patterns (`if (visited[uid])`) — it would silently populate the map with every +queried vertex, wasting memory and masking logic errors. + +The correct access pattern depends on the algorithm's intent: + +| Intent | vector (index) | unordered_map (mapped) | Helper | +|--------|---------------|----------------------|--------| +| **Write (set value)** | `m[uid] = val` | `m[uid] = val` | `operator[]` — same for both | +| **Read (known present)** | `m[uid]` | `m[uid]` | `operator[]` — safe only after eager init | +| **Test existence** | `m[uid]` (always valid) | `m.contains(uid)` | `vertex_map_contains(m, uid)` | +| **Read-or-default** | `m[uid]` | `m.contains(uid) ? m[uid] : default_val` | `vertex_map_get(m, uid, default_val)` | + +This is **situational** — each algorithm must choose the right pattern: + +- **Eager-initialized maps** (e.g. `make_vertex_map(g, false)` pre-populates all keys): + `operator[]` is safe for both reads and writes since every vertex has an entry. + Appropriate for distances, colors, component labels — where every vertex needs an + initial value anyway. + +- **Lazy / sparse maps** (e.g. `visited` where only discovered vertices are inserted): + Must use `contains()` to test and `insert()`/`emplace()` to write. This is more + memory-efficient when the algorithm only touches a fraction of vertices (e.g. + single-source BFS on a large graph). + +Recommended access helpers: + +```cpp +/// Test whether a vertex ID has an entry. Always true for vector (index graphs). +template +constexpr bool vertex_map_contains(const Map& m, const Key& uid) { + if constexpr (std::ranges::random_access_range) { + return true; // vector: all indices in [0, size) are valid + } else { + return m.contains(uid); + } +} + +/// Read with a default fallback. No insertion for unordered_map. +template +constexpr auto vertex_map_get(const Map& m, const Key& uid, const T& default_val) { + if constexpr (std::ranges::random_access_range) { + return m[static_cast(uid)]; + } else { + auto it = m.find(uid); + return it != m.end() ? it->second : default_val; + } +} +``` + +**Algorithm-specific guidance:** + +| Algorithm | Map Pattern | Rationale | +|-----------|------------|-----------| +| BFS `visited` | **Lazy + `contains()`** | Only discovered vertices need tracking; single-source may touch a small fraction | +| DFS `color` | **Lazy + default=White** | Undiscovered vertices are implicitly White; only touched vertices get Gray/Black entries | +| Dijkstra `distances` | **Lazy + default=∞** | Absent key means infinite distance; entries created only as vertices are discovered/relaxed | +| Dijkstra `predecessor` | **Lazy** | Entries created during relaxation; absent means "no predecessor" (unreached) | +| Dijkstra priority queue | N/A | Stores IDs, not a property map | +| Connected components `component` | **Eager** | User-provided, all vertices need a label | +| Articulation points `disc`/`low` | **Eager** | Iterates all vertices in outer loop | +| Label propagation `label` | **Eager** | User-provided, all vertices need a label | +| MIS `removed` | **Lazy + `contains()`** | Only removed vertices need tracking | + +**Key principle for mapped graphs:** If an algorithm's semantics assign a meaningful +default to absent vertices (infinity for distances, White for DFS color, false for +visited), the map can start **empty** and treat absence as that default. This avoids +O(V) initialization cost and O(V) memory — critical when a single-source algorithm on +a large mapped graph may only reach a small fraction of vertices. + +#### Factory Functions + +Three overloads cover the eager/lazy spectrum: + +```cpp +/// Eager: create a vertex_map with every vertex pre-populated to init_value. +/// For index graphs: vector(N, init_value). O(V) always. +/// For mapped graphs: unordered_map with all keys inserted. O(V). +/// Use when the algorithm reads all vertices before writing (e.g. component labels). +template +constexpr auto make_vertex_map(const G& g, const T& init_value) { + if constexpr (index_vertex_range) { + return std::vector(num_vertices(g), init_value); + } else { + std::unordered_map, T> m; + m.reserve(num_vertices(g)); + for (auto&& [uid, u] : graph::adj_list::views::vertexlist(g)) { + m[uid] = init_value; + } + return m; + } +} + +/// Lazy: create an empty vertex_map with capacity hint. +/// For index graphs: vector(N) with default-constructed values (dense, O(V)). +/// For mapped graphs: empty unordered_map with reserved buckets (O(1) until use). +/// Use with vertex_map_get(m, uid, default_val) when absence has a semantic meaning +/// (e.g. infinity for distances, White for DFS color, false for visited). +template +constexpr auto make_vertex_map(const G& g) { + if constexpr (index_vertex_range) { + return std::vector(num_vertices(g)); + } else { + std::unordered_map, T> m; + m.reserve(num_vertices(g)); + return m; + } +} +``` + +**Note:** For index graphs, both overloads are O(V) — vectors must be sized upfront. +The lazy advantage is exclusively for mapped graphs, where `make_vertex_map(g)` +returns an empty map and entries are created only as the algorithm discovers vertices. + +### 3. Algorithm Generalization Pattern + +#### Step 1: Dual-Concept Constraint + +Each algorithm needs to accept both `index_adjacency_list` and `mapped_adjacency_list`. + +**Decision: Separate overloads (Option B)** + +Keep the existing `index_adjacency_list` overloads **unchanged** — 100% backward +compatible, no risk to existing callers. Add parallel overloads for `mapped_adjacency_list` +where the property map parameters use a `vertex_id_map` concept instead of +`random_access_range`: + +```cpp +// Existing — unchanged, Distances is random_access_range +template +void dijkstra_shortest_paths(G&&, Sources&, Distances&, ...); + +// New — Distances satisfies vertex_id_map +template +requires vertex_id_map && ... +void dijkstra_shortest_paths(G&&, Sources&, Distances&, ...); +``` + +Both overloads can share a common implementation function (private `_impl`) that uses +`if constexpr (index_vertex_range)` for the few places that genuinely differ +(e.g. `vertex_map_get` vs direct `operator[]`). The public overloads are thin wrappers +that dispatch to the shared impl. + +For algorithms with **no user property map parameters** (BFS, DFS, topological sort, +triangle count), the two overloads are identical except for the concept constraint on `G`, +so they can be merged into a single template constrained with `adjacency_list` +if preferred — the separate-overload rule applies specifically to algorithms that +accept `Distances`, `Predecessors`, `Component`, `Label`, etc. + +**Future enhancement:** Once the separate overloads are stable, consider unifying them +into a single template per algorithm using a `vertex_property_map` concept that +both `vector` (index) and `unordered_map` (mapped) satisfy. This would eliminate +the overload duplication while preserving type safety. Deferred to avoid complexity +during the initial migration. + +#### Step 2: Replace Internal State Arrays + +Transform every internal `vector(N, init)` + `arr[uid]` into +`make_vertex_map(g, init)` + `vmap[uid]`: + +**Before (BFS):** +```cpp +std::vector visited(num_vertices(g), false); +// ... +visited[uid] = true; +``` + +**After (BFS):** +```cpp +auto visited = make_vertex_map(g, false); +// ... +visited[uid] = true; // works for both vector and unordered_map +``` + +Note: `vector` is a special case — it uses proxy references and bit-packing. For +unordered_map we'd use `unordered_map`. The `make_vertex_map` approach +handles this naturally since `conditional_t` picks the right type. + +#### Step 3: Replace Sequential Loops + +**Before:** +```cpp +for (id_type uid = 0; uid < N; ++uid) { ... } +``` + +**After:** +```cpp +for (auto&& [uid, u] : views::vertexlist(g)) { ... } +``` + +Most algorithms already use `views::vertexlist` or `views::basic_vertexlist` for outer +loops. The `for (uid = 0; uid < N)` pattern appears in 6 files and must be converted. + +#### Step 4: Replace `std::iota` for Predecessors + +**Before:** +```cpp +std::iota(predecessors.begin(), predecessors.end(), 0); +``` + +**After:** +```cpp +for (auto&& [uid, u] : views::vertexlist(g)) { + predecessors[uid] = uid; // each vertex is its own predecessor +} +``` + +#### Step 5: Replace `static_cast(uid)` + +For index graphs, `uid` is already `size_t`, so the cast is a no-op. For map graphs, +`uid` is the key type and should be used directly. The `vertex_map` abstraction makes +casts unnecessary: + +**Before:** `distances[static_cast(uid)]` +**After:** `distances[uid]` + +### 4. Distances & Predecessors Parameter Types + +Currently `Distances` and `Predecessors` are constrained as `random_access_range`. For +map-based graphs, they'll be `unordered_map, Distance>` or similar. + +**Decision: `vertex_id_map` concept for mapped overloads** + +The index overloads keep `random_access_range` unchanged. The new mapped overloads +use this concept: + +```cpp +/// Satisfied by any associative container that maps vertex IDs to values. +template +concept vertex_id_map = + requires(M& m, const vertex_id_t& uid) { + { m[uid] } -> std::convertible_to; + }; +``` + +`unordered_map` satisfies this. Users can also pass custom property map types +as long as they support `operator[vertex_id_t]`. + +### 5. Vertex ID Storage: `vertex_id_store_t` + +Algorithms store vertex IDs in queues, stacks, vectors, and other transient containers. +For index-based graphs `vertex_id_t` is `size_t` — trivially copyable. But for +map-based graphs it can be an expensive type (e.g. `std::string`). The existing +`vertex_id_store_t` in `traversal_common.hpp` already solves this: + +```cpp +template +using vertex_id_store_t = std::conditional_t< + std::is_reference_v>, + std::reference_wrapper>>, + adj_list::vertex_id_t>; +``` + +- **Index graphs:** `vertex_id_store_t` = `size_t` (same as `vertex_id_t`, zero cost) +- **Map graphs:** `vertex_id_store_t` = `reference_wrapper` (8 bytes, trivially + copyable, references the stable key in the map node — no copy of the key itself) + +Every algorithm that stores vertex IDs in transient containers must use +`vertex_id_store_t` instead of `vertex_id_t`: + +| Algorithm | Current ID Storage | Change To | +|-----------|-------------------|-----------| +| BFS | `std::queue` | `std::queue>` | +| DFS | stack frame with `vertex_id` field | field type → `vertex_id_store_t` | +| Dijkstra | `std::priority_queue` | `std::priority_queue>` | +| Bellman-Ford | iterates `id_type` loop vars | loop vars → `vertex_id_store_t` | +| Topological sort | `std::vector finish_order` | `std::vector>` | +| Connected components | `std::vector> order` | `std::vector>` | +| Label propagation | `std::vector> order` | `std::vector>` | + +**Note:** `reference_wrapper` implicitly converts to `const K&`, so comparisons +and use as map keys work transparently. The priority queue comparator, `unordered_map` +lookups, and `operator[]` on vertex maps all accept it without casts. + +### 6. Priority Queue Adaptation + +Dijkstra uses `std::priority_queue` with `id_type` elements compared via +`distances[static_cast(lhs)] > distances[static_cast(rhs)]`. + +For map-based graphs, the queue stores `vertex_id_store_t` values (cheap reference +wrappers). The comparator changes to: + +```cpp +auto cmp = [&distances](const auto& lhs, const auto& rhs) { + return distances[lhs] > distances[rhs]; +}; +``` + +No structural change needed — drop `static_cast()` and use `vertex_id_store_t` +as the queue element type. + +### 7. `_null_range_type` Adaptation + +The `_null_predecessors` type currently derives from `vector`. For mapped graphs, +it should be detected at compile time so that predecessor tracking is skipped entirely. +The algorithm already checks `if constexpr (is_same_v)` +(or similar). + +**Strategy:** Make `_null_range_type` satisfy both the `random_access_range` concept (as +now) and any new `vertex_id_map` concept, or detect it with a simple type trait: + +```cpp +template +inline constexpr bool is_null_range_v = std::is_same_v, _null_range_type>; +``` + +Algorithms use `if constexpr (is_null_range_v)` to skip predecessor writes. + +### 8. Hashability Requirement for Map-Based Graphs + +`unordered_map, T>` requires that `vertex_id_t` is hashable. For +`std::map`-based graphs where `VId` might be a custom type, we need: + +```cpp +template +concept hashable_vertex_id = requires(const vertex_id_t& uid) { + { std::hash>{}(uid) } -> std::convertible_to; +}; +``` + +This should be part of the `mapped_vertex_range` concept or required by `make_vertex_map`. + +### 9. `init_shortest_paths` Generalization + +The utility function in `traversal_common.hpp` needs dual-path support. + +For **index graphs**, the current behavior is preserved: fill the pre-sized vector with +infinity values and iota-fill predecessors. + +For **mapped graphs**, initialization can be a **no-op** if the algorithm treats absent +keys as "infinite distance" / "no predecessor": + +```cpp +template +constexpr void init_shortest_paths(const G& g, Distances& distances) { + if constexpr (std::ranges::random_access_range) { + // Index graph: fill the pre-sized vector + std::ranges::fill(distances, shortest_path_infinite_distance>()); + } else { + // Mapped graph: if distances map is empty (lazy pattern), nothing to fill — + // the algorithm uses vertex_map_get(distances, uid, infinity) for reads. + // If the caller pre-populated keys, set them all to infinity. + for (auto& [key, val] : distances) { + val = shortest_path_infinite_distance(); + } + } +} + +template +constexpr void init_shortest_paths(const G& g, Distances& distances, Predecessors& predecessors) { + init_shortest_paths(g, distances); + if constexpr (is_null_range_v) { + // No-op: predecessor tracking disabled + } else if constexpr (std::ranges::random_access_range) { + // Index graph: iota-fill [0, 1, 2, ...] + std::iota(predecessors.begin(), predecessors.end(), 0); + } else { + // Mapped graph: leave empty — absent key means "self-predecessor" (unreached). + // The algorithm inserts entries only for vertices whose predecessor changes. + // Callers can check: if (!vertex_map_contains(pred, uid)) => uid is a root/unreached. + } +} +``` + +This means for a mapped graph, the typical call sequence is: +```cpp +auto distances = make_vertex_map(g); // empty map, O(1) +auto predecessor = make_vertex_map>(g); // empty map, O(1) +init_shortest_paths(g, distances, predecessor); // no-op for empty maps +// Algorithm populates entries only for reached vertices. +``` + +--- + +## Transition Strategy: Preserving Existing Implementations + +Before generalizing any algorithm, copy the current index-only implementation to a +reference location so it can be reviewed side-by-side with the new generalized code. + +**Approach:** For each algorithm file being generalized, copy the original to +`include/graph/algorithm/index/`: + +``` +include/graph/algorithm/ + breadth_first_search.hpp ← generalized (new) + depth_first_search.hpp ← generalized (new) + dijkstra_shortest_paths.hpp ← generalized (new) + ... + index/ ← originals preserved for reference + breadth_first_search.hpp + depth_first_search.hpp + dijkstra_shortest_paths.hpp + ... +``` + +**Workflow per algorithm:** +1. Copy `algorithm/foo.hpp` → `algorithm/index/foo.hpp` (unchanged original) +2. Modify `algorithm/foo.hpp` in place to support both index and mapped graphs +3. Verify all existing index-based tests still pass (backward compatibility) +4. Add new tests with map-based graphs +5. Review diff between `algorithm/foo.hpp` and `algorithm/index/foo.hpp` to verify + the generalization is correct and no index-graph performance was lost + +The `index/` directory is a **temporary review aid** — once all algorithms are +generalized and verified, it can be removed. It is not included in the public API +or installed headers. + +--- + +## Implementation Plan + +Each phase below is a self-contained unit of work that compiles, passes all existing tests, +and can be committed independently. Phases are ordered so that each builds on the previous +one. Algorithm conversions within a phase are **one-at-a-time** — each algorithm is a +separate sub-task for review before proceeding to the next. + +### Notation + +- **Existing tests** = the 4343+ tests in `tests/algorithms/` that use index-based graphs. + These must continue to pass after every sub-task. Run `ctest` after each change. +- **Preserve** = copy the original file to `include/graph/algorithm/index/` before editing. +- **Review gate** = stop and review the diff before starting the next sub-task. + +--- + +### Phase 0: Preparation (no code changes) + +**Goal:** Set up the reference directory and verify the starting baseline. + +| Step | Action | Verification | +|------|--------|-------------| +| 0.1 | Run full test suite, confirm all tests pass | `ctest` — all green | +| 0.2 | Create `include/graph/algorithm/index/` directory | Directory exists | +| 0.3 | Copy all 14 algorithm `.hpp` files to `index/` | Files are byte-identical copies | +| 0.4 | Commit: `"chore: snapshot index-only algorithm implementations for reference"` | Clean commit on `mapped` branch | + +**Files created:** +``` +include/graph/algorithm/index/ + traversal_common.hpp + breadth_first_search.hpp + depth_first_search.hpp + topological_sort.hpp + dijkstra_shortest_paths.hpp + bellman_ford_shortest_paths.hpp + connected_components.hpp + articulation_points.hpp + biconnected_components.hpp + label_propagation.hpp + mis.hpp + jaccard.hpp + triangle_count.hpp + mst.hpp +``` + +--- + +### Phase 1: Concepts & Vertex Map Infrastructure + +**Goal:** Add new concepts and the `vertex_map` utility. No algorithm files touched. +All existing tests must still pass (the new code is additive only). + +| Step | Action | Verification | +|------|--------|-------------| +| 1.1 | Add `mapped_vertex_range`, `mapped_adjacency_list`, `mapped_bidirectional_adjacency_list` concepts to `adjacency_list_concepts.hpp` | Compiles, existing tests pass | +| 1.2 | Create `include/graph/adj_list/vertex_map.hpp` with: `vertex_map` alias, `make_vertex_map` (eager + lazy overloads), `vertex_map_contains`, `vertex_map_get` | Compiles | +| 1.3 | Add `is_null_range_v` trait to `traversal_common.hpp` | Compiles, existing tests pass | +| 1.4 | Create `tests/common/map_graph_fixtures.hpp` with helper functions that build small map-based graphs (`mov_graph_traits`, `mol_graph_traits`) matching the same topologies as existing fixtures (e.g. the CLRS Dijkstra graph, a simple BFS tree) | Compiles | +| 1.5 | Create `tests/adj_list/test_vertex_map.cpp` — unit tests for `vertex_map`, `make_vertex_map`, `vertex_map_contains`, `vertex_map_get` on both index and mapped graph types | All new + existing tests pass | +| 1.6 | Create `tests/adj_list/test_mapped_concepts.cpp` — static_assert tests verifying: map-based graphs satisfy `mapped_adjacency_list` and `adjacency_list` but NOT `index_adjacency_list`; index-based graphs satisfy `index_adjacency_list` but NOT `mapped_vertex_range` | All tests pass | +| 1.7 | Commit: `"feat: add mapped_adjacency_list concepts and vertex_map infrastructure"` | Clean commit | + +**Review gate:** Confirm concept mutual exclusivity and vertex_map behavior before +proceeding to algorithm changes. + +--- + +### Phase 2: Traversal Algorithms (one at a time) + +**Goal:** Generalize BFS, DFS, and topological sort. These are the simplest algorithms +(only internal state arrays, no user-supplied property map parameters). + +Each sub-task follows this workflow: +1. Edit `algorithm/foo.hpp` — relax concept from `index_adjacency_list` to `adjacency_list`, + replace internal `vector` with `make_vertex_map`, replace `vertex_id_t` in + containers with `vertex_id_store_t`, replace any `for(uid=0;uid visited` → lazy `vertex_map` + `contains()`; queue element → `vertex_id_store_t` | `test_breadth_first_search.cpp` | +| 2.2 | **Review gate** | Review diff of BFS against `index/breadth_first_search.hpp` | — | +| 2.3 | `depth_first_search` | `vector color` → lazy `vertex_map` + default=White; stack frame ID → `vertex_id_store_t` | `test_depth_first_search.cpp` | +| 2.4 | **Review gate** | Review diff of DFS | — | +| 2.5 | `topological_sort` | Same color pattern as DFS; `vector finish_order` → `vector>` | `test_topological_sort.cpp` | +| 2.6 | **Review gate** | Review diff of topological sort | — | +| 2.7 | Commit: `"feat: generalize BFS, DFS, topological_sort for mapped graphs"` | All tests pass | + +--- + +### Phase 3: Shortest Path Infrastructure + +**Goal:** Generalize `init_shortest_paths`, `_null_range_type`, and the weight function +concepts to work with both container families. No algorithm logic changes yet. + +| Step | Action | Verification | +|------|--------|-------------| +| 3.1 | Add graph-parameterized overloads of `init_shortest_paths` to `traversal_common.hpp` (keep old overloads for backward compat; new ones take `const G& g` first param) | Existing tests pass | +| 3.2 | Verify `_null_range_type` detection works with `is_null_range_v` in both index and mapped contexts | Unit test | +| 3.3 | Commit: `"feat: generalize init_shortest_paths for mapped graphs"` | All tests pass | + +**Review gate:** Confirm init functions work before touching Dijkstra/Bellman-Ford. + +--- + +### Phase 4: Shortest Path Algorithms (one at a time) + +**Goal:** Generalize Dijkstra and Bellman-Ford. These have the deepest index coupling +(user-provided `Distances`/`Predecessors` parameters, priority queue, `static_cast`). + +| Step | Algorithm | Key Changes | Test File | +|------|-----------|------------|-----------| +| 4.1 | `dijkstra_shortest_paths` | Relax concept to `adjacency_list`; relax `Distances`/`Predecessors` from `random_access_range` to accept vertex maps; drop `static_cast()` in favor of direct `distances[uid]`; queue element → `vertex_id_store_t`; use `vertex_map_get` with infinity default for distance reads; use graph-param `init_shortest_paths` | `test_dijkstra_shortest_paths.cpp` | +| 4.2 | **Review gate** | Review diff against `index/dijkstra_shortest_paths.hpp` | — | +| 4.3 | `bellman_ford_shortest_paths` | Same treatment as Dijkstra; additionally convert `for(k=0; k visited` → vertex_map; `vector order` → `vector>` | `test_scc_bidirectional.cpp` | +| 5.4 | **Review gate** | Diff review | — | +| 5.5 | `connected_components` — `afforest()` (both overloads) | `iota(component)` → vertex iteration; `static_cast>` adjustments | `test_connected_components.cpp` | +| 5.6 | **Review gate** | Diff review | — | +| 5.7 | Commit: `"feat: generalize connected_components for mapped graphs"` | All tests pass | + +--- + +### Phase 6: Simple Algorithms (batch, low risk) + +**Goal:** Generalize algorithms with minimal index coupling — each has at most one +internal array and no user property map parameters. + +| Step | Algorithm | Key Changes | Test File | +|------|-----------|------------|-----------| +| 6.1 | `jaccard_coefficient` | `vector> nbrs` → `vertex_map>` | `test_jaccard.cpp` | +| 6.2 | `mis` | `vector removed` → lazy vertex_map + `contains()` | `test_mis.cpp` | +| 6.3 | `triangle_count` | `for(uid=0; uid order` → `vector>`; `iota` → vertex iteration; relax `Label` param | `test_label_propagation.cpp` | +| 6.5 | **Review gate** | Diff review of all four | — | +| 6.6 | Commit: `"feat: generalize jaccard, mis, triangle_count, label_propagation for mapped graphs"` | All tests pass | + +--- + +### Phase 7: Structural Algorithms (highest effort) + +**Goal:** Generalize articulation points and biconnected components, which have the most +internal arrays (5 and 3 respectively). + +| Step | Algorithm | Key Changes | Test File | +|------|-----------|------------|-----------| +| 7.1 | `articulation_points` | 5 internal arrays (`disc`, `low`, `parent`, `child_count`, `emitted`) → vertex_maps; `static_cast(N)` sentinel → use `vertex_map_contains` | `test_articulation_points.cpp` | +| 7.2 | **Review gate** | Diff review — most complex transformation | — | +| 7.3 | `biconnected_components` | 3 internal arrays (`disc`, `low`, `parent`) → vertex_maps; same sentinel change | `test_biconnected_components.cpp` | +| 7.4 | **Review gate** | Diff review | — | +| 7.5 | Commit: `"feat: generalize articulation_points, biconnected_components for mapped graphs"` | All tests pass | + +--- + +### Phase 8: MST Algorithms + +**Goal:** Evaluate and generalize Prim and Kruskal. + +| Step | Algorithm | Key Changes | Test File | +|------|-----------|------------|-----------| +| 8.1 | `prim` | Internal `vector distance` → vertex_map; relax `Predecessor`/`Weight` from `random_access_range`; `for(v=0;v` | `test_mst.cpp` | +| 8.2 | **Review gate** | Diff review | — | +| 8.3 | `kruskal` / `inplace_kruskal` | Uses `x_index_edgelist_range` and `disjoint_vector`. Evaluate whether map support makes sense for edge-list-centric algorithms. May defer. | `test_mst.cpp` | +| 8.4 | **Review gate** | Decide continue or defer | — | +| 8.5 | Commit if applicable | All tests pass | + +--- + +### Phase 9: Cleanup + +**Goal:** Remove temporary reference copies and finalize. + +| Step | Action | Verification | +|------|--------|-------------| +| 9.1 | Final full test run across all presets (clang-debug, gcc-release, gcc-asan) | All pass | +| 9.2 | Remove `include/graph/algorithm/index/` directory | Clean tree | +| 9.3 | Update documentation to reflect that algorithms now support mapped graphs | Docs review | +| 9.4 | Commit: `"chore: remove index-only reference copies, update docs"` | Clean commit | + +--- + +### Phase Summary + +| Phase | Scope | Risk | Gate | +|-------|-------|------|------| +| 0 | Snapshot originals | None | Baseline tests pass | +| 1 | Concepts + vertex_map | Low — additive only | Concept tests pass | +| 2 | BFS, DFS, topo sort | Low — internal arrays only | Per-algorithm diff review | +| 3 | init_shortest_paths | Low — additive overloads | Init tests pass | +| 4 | Dijkstra, Bellman-Ford | Medium — parameter types change | Per-algorithm diff review | +| 5 | Connected components | Medium — 4 function variants | Per-function diff review | +| 6 | Jaccard, MIS, TC, LP | Low — simple patterns | Batch diff review | +| 7 | Art. points, biconn. | High — many internal arrays | Per-algorithm diff review | +| 8 | Prim, Kruskal | Medium — may defer Kruskal | Per-algorithm diff review | +| 9 | Cleanup | None | Final full test suite | + +--- + +## Algorithm Impact Summary + +| Algorithm | Internal Arrays | Parameter Arrays | Sequential Loops | Effort | +|-----------|----------------|-----------------|------------------|--------| +| `breadth_first_search` | `visited` (1) | none | none | Low | +| `depth_first_search` | `color` (1) | none | none | Low | +| `topological_sort` | `color`, `finish_order` (2) | output iter | none | Low | +| `dijkstra_shortest_paths` | none | `Distances`, `Predecessors` (2) | `uid < N` (1) | Medium | +| `bellman_ford_shortest_paths` | none | `Distances`, `Predecessors` (2) | `k < N` (1) | Medium | +| `connected_components` | `visited` (1) | `Component` (1) | `uid < N` (1) | Medium | +| `articulation_points` | 5 arrays | none | none | High | +| `biconnected_components` | 3 arrays | none | none | High | +| `label_propagation` | `order` (1) | `Label` (1) | none | Medium | +| `jaccard_coefficient` | `nbrs` (1) | none | none | Low | +| `mis` | `removed` (1) | none | none | Low | +| `triangle_count` | none | none | `uid < N` (1) | Low | +| `prim` (MST) | `distance` (1) | `Predecessor`, `Weight` (2) | `v < N` (1) | Medium | +| `kruskal` (MST) | `disjoint_set` (1) | none | `uid <= N` (1) | Separate | + +--- + +## Design Decisions + +1. **`vertex_map` is a type alias.** Zero-cost, no wrapper overhead. If uniform + `operator[]` semantics or debug bounds-checking prove necessary later, it can be + promoted to a class — but start simple. + +2. **Prefer lazy over eager population.** For mapped graphs, start with an empty map + and treat absence as the default value (infinity, White, false, etc.) wherever the + algorithm's semantics allow it. Only use eager pre-population when the algorithm + truly requires every vertex to have an entry before the first read (e.g. user-supplied + component labels). See the per-algorithm guidance table in §2. + +3. **`vertex_map` uses `unordered_map` internally, not `std::map`.** Algorithm + bookkeeping (visited, colors, distances) needs O(1) access — `unordered_map` is + the right choice. `std::map` would be O(log n) per access, too slow for inner loops. + The graph's vertex container may be `std::map` (ordered), but the algorithm's scratch + data need not be. + +4. **`mapped_vertex_range` requires `hashable_vertex_id`.** Algorithms universally need + hashable IDs for their internal `unordered_map` usage, so bake this into the concept + rather than requiring each algorithm to add it separately. + +5. **Add new `init_shortest_paths` overloads with graph parameter.** The current + signatures `(Distances&)` and `(Distances&, Predecessors&)` are preserved for backward + compatibility. New overloads taking `const G& g` as the first parameter are added + alongside them. The old overloads can be deprecated once migration is complete. From afd3132d8c60fd7b25d2fef5337117449b71dcb2 Mon Sep 17 00:00:00 2001 From: Phil Ratzloff Date: Sun, 1 Mar 2026 14:33:30 -0500 Subject: [PATCH 04/20] chore: snapshot index-only algorithm implementations for reference Copy all 14 algorithm .hpp files to include/graph/algorithm/index/ as byte-identical reference snapshots before map-based generalization. This directory is a temporary review aid and not part of the public API. --- .../algorithm/index/articulation_points.hpp | 238 +++++ .../index/bellman_ford_shortest_paths.hpp | 412 ++++++++ .../index/biconnected_components.hpp | 270 +++++ .../algorithm/index/breadth_first_search.hpp | 348 +++++++ .../algorithm/index/connected_components.hpp | 873 ++++++++++++++++ .../algorithm/index/depth_first_search.hpp | 364 +++++++ .../index/dijkstra_shortest_paths.hpp | 415 ++++++++ include/graph/algorithm/index/jaccard.hpp | 181 ++++ .../algorithm/index/label_propagation.hpp | 282 +++++ include/graph/algorithm/index/mis.hpp | 201 ++++ include/graph/algorithm/index/mst.hpp | 982 ++++++++++++++++++ include/graph/algorithm/index/tc.hpp | 298 ++++++ .../algorithm/index/topological_sort.hpp | 588 +++++++++++ .../algorithm/index/traversal_common.hpp | 344 ++++++ 14 files changed, 5796 insertions(+) create mode 100644 include/graph/algorithm/index/articulation_points.hpp create mode 100644 include/graph/algorithm/index/bellman_ford_shortest_paths.hpp create mode 100644 include/graph/algorithm/index/biconnected_components.hpp create mode 100644 include/graph/algorithm/index/breadth_first_search.hpp create mode 100644 include/graph/algorithm/index/connected_components.hpp create mode 100644 include/graph/algorithm/index/depth_first_search.hpp create mode 100644 include/graph/algorithm/index/dijkstra_shortest_paths.hpp create mode 100644 include/graph/algorithm/index/jaccard.hpp create mode 100644 include/graph/algorithm/index/label_propagation.hpp create mode 100755 include/graph/algorithm/index/mis.hpp create mode 100644 include/graph/algorithm/index/mst.hpp create mode 100644 include/graph/algorithm/index/tc.hpp create mode 100644 include/graph/algorithm/index/topological_sort.hpp create mode 100644 include/graph/algorithm/index/traversal_common.hpp diff --git a/include/graph/algorithm/index/articulation_points.hpp b/include/graph/algorithm/index/articulation_points.hpp new file mode 100644 index 0000000..9d3b5d8 --- /dev/null +++ b/include/graph/algorithm/index/articulation_points.hpp @@ -0,0 +1,238 @@ +/** + * @file articulation_points.hpp + * + * @brief Articulation Points (cut vertices) algorithm for graphs. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" + +#ifndef GRAPH_ARTICULATION_POINTS_HPP +# define GRAPH_ARTICULATION_POINTS_HPP + +# include +# include +# include + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::vertices; +using adj_list::edges; +using adj_list::target_id; +using adj_list::vertex_id; +using adj_list::num_vertices; +using adj_list::find_vertex; + +/** + * @ingroup graph_algorithms + * @brief Find articulation points (cut vertices) of a graph. + * + * An articulation point is a vertex whose removal (along with its incident edges) + * disconnects the graph into two or more connected components. This implementation + * uses the iterative Hopcroft-Tarjan algorithm based on DFS discovery times and + * low-link values. + * + * The algorithm maintains two arrays: + * - `disc[v]`: DFS discovery time of vertex `v`. + * - `low[v]`: minimum discovery time reachable from the subtree rooted at `v` + * via back-edges. + * + * A vertex `u` is an articulation point if: + * - **Root rule:** `u` is the root of a DFS tree and has two or more DFS children. + * - **Non-root rule:** `u` is not a root and has a child `v` with `low[v] >= disc[u]`. + * + * ## Complexity Analysis + * + * **Time Complexity:** O(|V| + |E|) where V is the number of vertices and E is + * the number of edges. Each vertex and edge is visited exactly once during the DFS. + * + * **Space Complexity:** O(V) for the discovery time, low-link, parent, child count, + * and emitted arrays, plus O(V) for the DFS stack. + * + * ## Supported Graph Properties + * + * ### Directedness + * - ✅ Directed graphs (caller must store both {u,v} and {v,u} for undirected semantics) + * + * ### Edge Properties + * - ✅ Unweighted edges + * - ✅ Weighted edges (weights ignored) + * - ✅ Multi-edges (only the first reverse edge to the DFS parent is skipped as the tree edge; + * additional parallel edges are treated as back-edges that update low-link values) + * - ✅ Self-loops (ignored — do not affect articulation point detection) + * - ✅ Cycles + * + * ### Graph Structure + * - ✅ Connected graphs + * - ✅ Disconnected graphs (processes all components via outer loop) + * - ✅ Empty graphs (returns immediately) + * + * ### Container Requirements + * - Requires: `index_adjacency_list` concept (contiguous vertex IDs) + * - Requires: `std::output_iterator>` + * - Works with: All `dynamic_graph` container combinations with contiguous IDs + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept, + * which implies contiguous vertex IDs from 0 to num_vertices(g)-1. + * @tparam Iter The output iterator type. Must be output_iterator>. + * + * @param g The graph. Callers must supply both directions of each undirected edge. + * @param cut_vertices The output iterator where articulation point vertex IDs will be written. + * No ordering guarantee on the emitted vertices. + * + * @pre g must have contiguous vertex IDs [0, num_vertices(g)) + * @pre For undirected semantics, each edge {u,v} must be stored as both (u,v) and (v,u). + * + * @post Output contains all articulation points, each emitted exactly once. + * @post The graph g is not modified. + * + * **Exception Safety:** Basic exception safety. May throw std::bad_alloc if internal + * vector allocation fails. The graph g remains unchanged; output iterator may be + * partially written. + * + * ## Example Usage + * + * ```cpp + * #include + * #include + * #include + * #include + * + * using namespace graph; + * + * int main() { + * using Graph = container::dynamic_graph>; + * + * // Path graph: 0 - 1 - 2 - 3 (bidirectional) + * Graph g({{0,1},{1,0},{1,2},{2,1},{2,3},{3,2}}); + * + * std::vector> result; + * articulation_points(g, std::back_inserter(result)); + * // result contains {1, 2} (in some order) + * } + * ``` + */ +template +requires std::output_iterator> +void articulation_points(G&& g, Iter cut_vertices) { + using vid_t = vertex_id_t; + + const size_t N = num_vertices(g); + if (N == 0) { + return; + } + + constexpr size_t UNVISITED = std::numeric_limits::max(); + const vid_t NO_PARENT = static_cast(N); // sentinel for "no parent" + + std::vector disc(N, UNVISITED); + std::vector low(N, UNVISITED); + std::vector parent(N, NO_PARENT); + std::vector child_count(N, 0); // DFS tree children count (for root rule) + std::vector emitted(N, false); // deduplication guard + + size_t timer = 0; + + // Frame for iterative DFS: (vertex_id, edge_index, parent_edge_skipped) + // edge_index tracks how far we've iterated through edges(g, uid) + // parent_edge_skipped ensures only the first reverse edge to the DFS parent + // is treated as the tree edge; subsequent parallel edges update low-link. + struct dfs_frame { + vid_t uid; + size_t edge_idx; + bool parent_edge_skipped; + }; + + std::stack stk; + + // Outer loop: handle disconnected graphs + for (auto [start] : views::basic_vertexlist(g)) { + if (disc[start] != UNVISITED) { + continue; + } + + disc[start] = low[start] = timer++; + stk.push({start, 0, false}); + + while (!stk.empty()) { + auto& [uid, edge_idx, parent_skipped] = stk.top(); + + // Collect edges into a temporary to allow indexed access + // We advance through edges one at a time using edge_idx + auto edge_range = views::incidence(g, uid); + auto it = std::ranges::begin(edge_range); + auto it_end = std::ranges::end(edge_range); + + // Advance iterator to edge_idx position + for (size_t i = 0; i < edge_idx && it != it_end; ++i, ++it) { + } + + if (it == it_end) { + // All edges processed — backtrack + stk.pop(); + if (!stk.empty()) { + auto& [par_uid, par_edge_idx, par_skipped] = stk.top(); + // Update low-link of parent + if (low[uid] < low[par_uid]) { + low[par_uid] = low[uid]; + } + + // Check articulation point condition for non-root + if (parent[par_uid] != NO_PARENT) { + // Non-root rule: child v has low[v] >= disc[u] + if (low[uid] >= disc[par_uid] && !emitted[par_uid]) { + *cut_vertices++ = par_uid; + emitted[par_uid] = true; + } + } + } + continue; + } + + vid_t vid = target_id(g, *it); + ++edge_idx; // advance for next iteration + + // Skip self-loops + if (vid == uid) { + continue; + } + + if (disc[vid] == UNVISITED) { + // Tree edge: vid is a new DFS child of uid + parent[vid] = uid; + child_count[uid]++; + disc[vid] = low[vid] = timer++; + stk.push({vid, 0, false}); + } else if (vid == parent[uid] && !parent_skipped) { + // First reverse edge to DFS parent — this is the tree edge; skip it + parent_skipped = true; + } else { + // Back edge (or additional parallel edge to parent): update low-link + if (disc[vid] < low[uid]) { + low[uid] = disc[vid]; + } + } + } + + // Root rule: root is an articulation point iff it has >= 2 DFS children + if (child_count[start] >= 2 && !emitted[start]) { + *cut_vertices++ = start; + emitted[start] = true; + } + } +} + +} // namespace graph + +#endif // GRAPH_ARTICULATION_POINTS_HPP diff --git a/include/graph/algorithm/index/bellman_ford_shortest_paths.hpp b/include/graph/algorithm/index/bellman_ford_shortest_paths.hpp new file mode 100644 index 0000000..d6a6ecc --- /dev/null +++ b/include/graph/algorithm/index/bellman_ford_shortest_paths.hpp @@ -0,0 +1,412 @@ +/** + * @file bellman_ford_shortest_paths.hpp + * + * @brief Single-Source Shortest paths and shortest distances algorithms using Bellman-Ford's algorithm. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" +#include "graph/views/edgelist.hpp" +#include "graph/algorithm/traversal_common.hpp" + +#include +#include +#include +#include + +#ifndef GRAPH_BELLMAN_SHORTEST_PATHS_HPP +# define GRAPH_BELLMAN_SHORTEST_PATHS_HPP + +namespace graph { + +// Using declarations for CPO-based architecture +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::edge_t; +using adj_list::num_vertices; +using adj_list::vertices; +using adj_list::find_vertex; +using adj_list::source_id; +using adj_list::target_id; + +/** + * @brief Get the vertex ids in a negative weight cycle. + * + * If a negative weight cycle exists, the vertex ids in the cycle are output to the output iterator. + * If no negative weight cycle exists, the output iterator is not modified. + * + * @tparam G The graph type. + * @tparam Predecessors The predecessor range type. + * @tparam OutputIterator The output iterator type. + * + * @param g The graph. + * @param predecessor The predecessor range. + * @param cycle_vertex_id A vertex id in the negative weight cycle. If no negative weight cycle exists + * then there will be no vertex id defined. + * @param out_cycle The output iterator that the vertex ids in the cycle are output to. + */ +template +requires output_iterator> +void find_negative_cycle(G& g, + const Predecessors& predecessor, + const optional>& cycle_vertex_id, + OutputIterator out_cycle) { + // Does a negative weight cycle exist? + if (cycle_vertex_id.has_value()) { + vertex_id_t uid = cycle_vertex_id.value(); + do { + *out_cycle++ = uid; + uid = predecessor[uid]; + } while (uid != cycle_vertex_id.value()); + } +} + + +/** + * @brief Multi-source shortest paths using Bellman-Ford algorithm. + * + * Finds shortest paths from one or more source vertices to all other vertices in a weighted graph. + * Unlike Dijkstra's algorithm, Bellman-Ford can handle negative edge weights and detects negative + * weight cycles. Returns an optional vertex ID indicating whether a negative cycle was detected. + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept. + * @tparam Sources Input range of source vertex IDs. + * @tparam Distances Random access range for storing distances. Value type must be arithmetic. + * @tparam Predecessors Random access range for storing predecessor information. Can use _null_predecessors + * if path reconstruction is not needed. + * @tparam WF Edge weight function. Defaults to returning 1 for all edges (unweighted). + * @tparam Visitor Visitor type with callbacks for algorithm events. Defaults to empty_visitor. + * Visitor calls are optimized away if not used. + * @tparam Compare Comparison function for distance values. Defaults to less<>. + * @tparam Combine Function to combine distances and weights. Defaults to plus<>. + * + * @param g The graph to process. + * @param sources Range of source vertex IDs to start from. + * @param distances [out] Shortest distances from sources. Must be sized >= num_vertices(g). + * @param predecessor [out] Predecessor information for path reconstruction. Must be sized >= num_vertices(g). + * @param weight Edge weight function: (const edge_t&) -> Distance. + * @param visitor Visitor for algorithm events (examine, relax, not_relaxed, minimized, not_minimized). + * @param compare Distance comparison function: (Distance, Distance) -> bool. + * @param combine Distance combination function: (Distance, Weight) -> Distance. + * + * @return optional>. Returns empty if no negative cycle detected. Returns a vertex ID + * in the negative cycle if one exists. Use find_negative_cycle() to extract all cycle vertices. + * + * **Complexity:** + * - Time: O(V * E) - iterates over all edges V times + * - Space: O(1) auxiliary space (excluding output parameters) + * + * **Mandates:** + * - G must satisfy index_adjacency_list (integral vertex IDs) + * - Sources must be input_range with values convertible to vertex_id_t + * - Distances must be random_access_range with arithmetic value type + * - Predecessors must be random_access_range with values convertible from vertex_id_t + * - WF must satisfy basic_edge_weight_function + * + * **Preconditions:** + * - All source vertices must be valid: source < num_vertices(g) for vector-based containers + * - distances.size() >= num_vertices(g) + * - predecessor.size() >= num_vertices(g) (unless using _null_predecessors) + * - Weight function must not throw or modify graph state + * + * **Postconditions:** + * - distances[s] == 0 for all sources s + * - If no negative cycle: For reachable v, distances[v] contains shortest distance from nearest source + * - If no negative cycle: For reachable v, predecessor[v] contains predecessor in shortest path tree + * - If negative cycle detected: distances and predecessor may contain intermediate values + * - For unreachable vertices v: distances[v] == numeric_limits::max() + * + * **Effects:** + * - Modifies distances: Sets distances[v] for all vertices v + * - Modifies predecessor: Sets predecessor[v] for all processed edges + * - Does not modify the graph g + * + * **Exception Safety:** + * Basic guarantee. If an exception is thrown: + * - Graph g remains unchanged + * - distances and predecessor may be partially modified (indeterminate state) + * + * **Throws:** + * - std::out_of_range if a source vertex ID is out of range + * - std::out_of_range if distances or predecessor are undersized + * + * **Remarks:** + * - Use Bellman-Ford when: graph has negative weights, need cycle detection, or edges processed sequentially + * - Use Dijkstra when: all weights non-negative and need better performance O((V+E) log V) + * - Negative cycle detection: Algorithm performs V iterations. If any edge relaxes on iteration V, a + * negative cycle exists. The returned vertex ID can be used with find_negative_cycle() to extract + * all vertices in the cycle. + * - Based on Boost.Graph bellman_ford_shortest_paths implementation + * + * @see find_negative_cycle() to extract vertices in detected negative cycle + * @see dijkstra_shortest_paths() for faster algorithm with non-negative weights + */ +// Note on std::remove_reference_t: +// These templates declare G&& (forwarding reference), so for lvalue arguments G deduces as a +// reference type (e.g. vector<…>&). Writing "const G&" when G is already a reference triggers +// reference collapsing: const (vector<…>&) & → vector<…>& — the const is silently discarded. +// We use std::remove_reference_t in WF default types, invoke_result_t, and concept +// constraints so that "const std::remove_reference_t&" always means a true const ref. +// Default lambdas use "const auto&" instead of "const G&" to sidestep the issue entirely. +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires convertible_to, vertex_id_t> && // + is_arithmetic_v> && // + convertible_to, range_value_t> && // + sized_range && // + sized_range && // + basic_edge_weight_function, Compare, Combine> +[[nodiscard]] constexpr optional> bellman_ford_shortest_paths( + G&& g, + const Sources& sources, + Distances& distances, + Predecessors& predecessor, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + using id_type = vertex_id_store_t; + static_assert(std::is_same_v>, + "vertex_id_store_t should equal vertex_id_t for index_adjacency_list"); + using DistanceValue = range_value_t; + using weight_type = invoke_result_t&, edge_t>; + using return_type = optional>; + + // relaxing the target is the function of reducing the distance from the source to the target + auto relax_target = [&g, &predecessor, &distances, &compare, &combine] // + (const edge_t& e, const vertex_id_t& uid, const weight_type& w_e) -> bool { + id_type vid = target_id(g, e); + const DistanceValue d_u = distances[static_cast(uid)]; + const DistanceValue d_v = distances[static_cast(vid)]; + + if (compare(combine(d_u, w_e), d_v)) { + distances[static_cast(vid)] = combine(d_u, w_e); + if constexpr (!is_same_v) { + predecessor[static_cast(vid)] = uid; + } + return true; + } + return false; + }; + + if (size(distances) < num_vertices(g)) { + throw std::out_of_range( + std::format("bellman_ford_shortest_paths: size of distances of {} is less than the number of vertices {}", + size(distances), num_vertices(g))); + } + + if constexpr (!is_same_v) { + if (size(predecessor) < num_vertices(g)) { + throw std::out_of_range( + std::format("bellman_ford_shortest_paths: size of predecessor of {} is less than the number of vertices {}", + size(predecessor), num_vertices(g))); + } + } + + constexpr auto zero = shortest_path_zero(); + constexpr auto infinite = shortest_path_infinite_distance(); + + const id_type N = static_cast(num_vertices(g)); + + // Seed the queue with the initial vertice(s) + for (auto&& source : sources) { + if (source >= N || source < 0) { + throw std::out_of_range( + std::format("bellman_ford_shortest_paths: source vertex id '{}' is out of range", source)); + } + distances[static_cast(source)] = zero; // mark source as discovered + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, source)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, source); + } + } + + // Evaluate the shortest paths + bool at_least_one_edge_relaxed = false; + for (id_type k = 0; k < N; ++k) { + at_least_one_edge_relaxed = false; + for (auto&& [uid, vid, uv, w] : views::edgelist(g, weight)) { + if constexpr (has_on_examine_edge) { + visitor.on_examine_edge(g, uv); + } + if (relax_target(uv, uid, w)) { + at_least_one_edge_relaxed = true; + if constexpr (has_on_edge_relaxed) { + visitor.on_edge_relaxed(g, uv); + } + } else if constexpr (has_on_edge_not_relaxed) { + visitor.on_edge_not_relaxed(g, uv); + } + } + if (!at_least_one_edge_relaxed) + break; + } + + // Check for negative weight cycles + if (at_least_one_edge_relaxed) { + for (auto&& [uid, vid, uv, w] : views::edgelist(g, weight)) { + if (compare(combine(distances[uid], w), distances[vid])) { + if constexpr (!is_same_v) { + predecessor[vid] = uid; // close the cycle + } + if constexpr (has_on_edge_not_minimized) { + visitor.on_edge_not_minimized(g, uv); + } + return return_type(uid); + } else { + if constexpr (has_on_edge_minimized) { + visitor.on_edge_minimized(g, uv); + } + } + } + } + + return return_type(); +} + +/** + * @brief Single-source shortest paths using Bellman-Ford algorithm. + * + * Convenience overload for single source vertex. See multi-source version for full documentation. + * + * @param source Single source vertex ID instead of range. + * + * @return optional> indicating negative cycle detection. + * + * @see bellman_ford_shortest_paths(G&&, const Sources&, Distances&, Predecessors&, WF&&, Visitor&&, Compare&&, Combine&&) + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires is_arithmetic_v> && // + convertible_to, range_value_t> && // + sized_range && // + sized_range && // + basic_edge_weight_function, Compare, Combine> +[[nodiscard]] constexpr optional> bellman_ford_shortest_paths( + G&& g, + const vertex_id_t& source, + Distances& distances, + Predecessors& predecessor, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + return bellman_ford_shortest_paths(g, subrange(&source, (&source + 1)), distances, predecessor, weight, + forward(visitor), forward(compare), forward(combine)); +} + + +/** + * @brief Multi-source shortest distances using Bellman-Ford algorithm (no predecessor tracking). + * + * Computes shortest distances without tracking predecessor information. More efficient when + * path reconstruction is not needed. Can detect negative weight cycles. + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept. + * @tparam Sources Input range of source vertex IDs. + * @tparam Distances Random access range for storing distances. Value type must be arithmetic. + * @tparam WF Edge weight function. Defaults to returning 1 for all edges (unweighted). + * @tparam Visitor Visitor type with callbacks for algorithm events. Defaults to empty_visitor. + * @tparam Compare Comparison function for distance values. Defaults to less<>. + * @tparam Combine Function to combine distances and weights. Defaults to plus<>. + * + * @param g The graph to process. + * @param sources Range of source vertex IDs to start from. + * @param distances [out] Shortest distances from sources. Must be sized >= num_vertices(g). + * @param weight Edge weight function: (const edge_t&) -> Distance. + * @param visitor Visitor for algorithm events. + * @param compare Distance comparison function: (Distance, Distance) -> bool. + * @param combine Distance combination function: (Distance, Weight) -> Distance. + * + * @return optional>. Returns empty if no negative cycle. Returns vertex ID in cycle if detected. + * + * **Effects:** + * - Modifies distances: Sets distances[v] for all vertices v + * - Does not modify the graph g + * - Internally uses _null_predecessors to skip predecessor tracking + * + * @see bellman_ford_shortest_paths() for full documentation and complexity analysis. + * @see find_negative_cycle() to extract cycle vertices (requires predecessor tracking version). + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires convertible_to, vertex_id_t> && // + is_arithmetic_v> && // + sized_range && // + basic_edge_weight_function, Compare, Combine> +[[nodiscard]] constexpr optional> bellman_ford_shortest_distances( + G&& g, + const Sources& sources, + Distances& distances, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + return bellman_ford_shortest_paths(g, sources, distances, _null_predecessors, forward(weight), + forward(visitor), forward(compare), forward(combine)); +} + +/** + * @brief Single-source shortest distances using Bellman-Ford algorithm (no predecessor tracking). + * + * Convenience overload for single source vertex without predecessor tracking. + * + * @param source Single source vertex ID instead of range. + * + * @return optional> indicating negative cycle detection. + * + * @see bellman_ford_shortest_distances(G&&, const Sources&, Distances&, WF&&, Visitor&&, Compare&&, Combine&&) + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires is_arithmetic_v> && // + sized_range && // + basic_edge_weight_function, Compare, Combine> +[[nodiscard]] constexpr optional> bellman_ford_shortest_distances( + G&& g, + const vertex_id_t& source, + Distances& distances, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + return bellman_ford_shortest_paths(g, subrange(&source, (&source + 1)), distances, _null_predecessors, + forward(weight), forward(visitor), forward(compare), + forward(combine)); +} + +} // namespace graph + +#endif // GRAPH_BELLMAN_SHORTEST_PATHS_HPP diff --git a/include/graph/algorithm/index/biconnected_components.hpp b/include/graph/algorithm/index/biconnected_components.hpp new file mode 100644 index 0000000..46110be --- /dev/null +++ b/include/graph/algorithm/index/biconnected_components.hpp @@ -0,0 +1,270 @@ +/** + * @file biconnected_components.hpp + * + * @brief Biconnected Components algorithm for graphs. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" + +#ifndef GRAPH_BICONNECTED_COMPONENTS_HPP +# define GRAPH_BICONNECTED_COMPONENTS_HPP + +# include +# include +# include +# include + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::vertices; +using adj_list::edges; +using adj_list::target_id; +using adj_list::vertex_id; +using adj_list::num_vertices; +using adj_list::find_vertex; + +/** + * @ingroup graph_algorithms + * @brief Find the biconnected components of a graph. + * + * A biconnected component (also called a 2-connected component) is a maximal + * biconnected subgraph — one that is connected and has no articulation points. + * Equivalently, any two vertices in a biconnected component lie on a common + * simple cycle. + * + * This implementation uses the iterative Hopcroft-Tarjan algorithm extended + * with an explicit edge stack. During the DFS, each tree edge and back edge is + * pushed onto the edge stack. Whenever an articulation-point boundary is + * detected on backtrack (i.e., `low[v] >= disc[u]` for child v and parent u), + * the edge stack is popped down to and including the edge (u, v) and the unique + * vertex IDs from those edges form one biconnected component. + * + * Isolated vertices (degree 0) are emitted as trivial single-vertex components. + * Articulation-point vertices appear in more than one component. + * + * ## Complexity Analysis + * + * **Time Complexity:** O(|V| + |E|) where V is the number of vertices and E is + * the number of edges. Each vertex and edge is visited exactly once during the DFS. + * + * **Space Complexity:** O(V + E) for the discovery time and low-link arrays (O(V)), + * the DFS stack (O(V)), and the edge stack (O(E)). + * + * ## Supported Graph Properties + * + * ### Directedness + * - ✅ Directed graphs (caller must store both {u,v} and {v,u} for undirected semantics) + * + * ### Edge Properties + * - ✅ Unweighted edges + * - ✅ Weighted edges (weights ignored) + * - ✅ Multi-edges (only the first reverse edge to the DFS parent is skipped as the tree edge; + * additional parallel edges are treated as back-edges) + * - ✅ Self-loops (ignored — do not affect biconnected component detection) + * - ✅ Cycles + * + * ### Graph Structure + * - ✅ Connected graphs + * - ✅ Disconnected graphs (processes all components via outer loop) + * - ✅ Empty graphs (returns immediately) + * + * ### Container Requirements + * - Requires: `index_adjacency_list` concept (contiguous vertex IDs) + * - Works with: All `dynamic_graph` container combinations with contiguous IDs + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept, + * which implies contiguous vertex IDs from 0 to num_vertices(g)-1. + * @tparam OuterContainer A container of containers for the output components. + * Typically `std::vector>>`. + * + * @param g The graph. Callers must supply both directions of each undirected edge. + * @param components Output container; one inner container is push_back'd per biconnected + * component found. Articulation-point vertices appear in multiple inner + * containers. No ordering guarantee on the order of components or vertex + * IDs within a component. + * + * @pre g must have contiguous vertex IDs [0, num_vertices(g)) + * @pre For undirected semantics, each edge {u,v} must be stored as both (u,v) and (v,u). + * + * @post Every vertex appears in at least one component. + * @post Articulation-point vertices appear in more than one component. + * @post Each component's induced subgraph is biconnected. + * @post The graph g is not modified. + * + * **Exception Safety:** Basic exception safety. May throw std::bad_alloc if internal + * vector or set allocation fails. The graph g remains unchanged; components may be + * partially written. + * + * ## Example Usage + * + * ```cpp + * #include + * #include + * #include + * #include + * + * using namespace graph; + * + * int main() { + * using Graph = container::dynamic_graph>; + * + * // Two triangles joined by bridge 2-3 (bidirectional) + * Graph g({{0,1},{1,0},{1,2},{2,1},{0,2},{2,0}, + * {3,4},{4,3},{4,5},{5,4},{3,5},{5,3}, + * {2,3},{3,2}}); + * + * std::vector>> components; + * biconnected_components(g, components); + * // components contains 3 entries: {0,1,2}, {2,3}, {3,4,5} (in some order) + * } + * ``` + */ +template +void biconnected_components(G&& g, OuterContainer& components) { + using vid_t = vertex_id_t; + using inner_type = typename OuterContainer::value_type; + + const size_t N = num_vertices(g); + if (N == 0) { + return; + } + + constexpr size_t UNVISITED = std::numeric_limits::max(); + const vid_t NO_PARENT = static_cast(N); // sentinel for "no parent" + + std::vector disc(N, UNVISITED); + std::vector low(N, UNVISITED); + std::vector parent(N, NO_PARENT); + + size_t timer = 0; + + // Edge stack: stores (source, target) pairs for edges traversed during DFS. + // When a biconnected component boundary is detected, edges are popped to + // extract the vertex set of that component. + using edge_pair = std::pair; + std::stack edge_stk; + + // Frame for iterative DFS: (vertex_id, edge_index, parent_edge_skipped) + struct dfs_frame { + vid_t uid; + size_t edge_idx; + bool parent_edge_skipped; + }; + + std::stack stk; + + // Helper: pop edges from edge_stk until (u, v) is popped (inclusive). + // Collect unique vertex IDs and push_back as a new component. + auto flush_component = [&](vid_t u, vid_t v) { + std::set vset; + while (true) { + auto [eu, ev] = edge_stk.top(); + edge_stk.pop(); + vset.insert(eu); + vset.insert(ev); + if (eu == u && ev == v) { + break; + } + } + inner_type comp(vset.begin(), vset.end()); + components.push_back(std::move(comp)); + }; + + // Outer loop: handle disconnected graphs + for (auto sv : vertices(g)) { + vid_t start = vertex_id(g, sv); + if (disc[start] != UNVISITED) { + continue; + } + + // Check for isolated vertex (no edges) + auto start_edges = edges(g, sv); + if (std::ranges::begin(start_edges) == std::ranges::end(start_edges)) { + // Isolated vertex — trivial biconnected component + components.push_back(inner_type{static_cast(start)}); + disc[start] = timer++; // mark as visited + continue; + } + + disc[start] = low[start] = timer++; + stk.push({start, 0, false}); + + while (!stk.empty()) { + auto& [uid, edge_idx, parent_skipped] = stk.top(); + + auto edge_range = edges(g, uid); + auto it = std::ranges::begin(edge_range); + auto it_end = std::ranges::end(edge_range); + + // Advance iterator to edge_idx position + for (size_t i = 0; i < edge_idx && it != it_end; ++i, ++it) { + } + + if (it == it_end) { + // All edges processed — backtrack + vid_t backtrack_uid = uid; + stk.pop(); + if (!stk.empty()) { + auto& [par_uid, par_edge_idx, par_skipped] = stk.top(); + // Update low-link of parent + if (low[backtrack_uid] < low[par_uid]) { + low[par_uid] = low[backtrack_uid]; + } + + // Check biconnected component boundary: + // If low[child] >= disc[parent], then parent is an articulation point + // (or is the root), and we flush a component. + if (low[backtrack_uid] >= disc[par_uid]) { + flush_component(par_uid, backtrack_uid); + } + } + continue; + } + + vid_t vid = target_id(g, *it); + ++edge_idx; // advance for next iteration + + // Skip self-loops + if (vid == uid) { + continue; + } + + if (disc[vid] == UNVISITED) { + // Tree edge: vid is a new DFS child of uid + parent[vid] = uid; + disc[vid] = low[vid] = timer++; + edge_stk.push({uid, vid}); + stk.push({vid, 0, false}); + } else if (vid == parent[uid] && !parent_skipped) { + // First reverse edge to DFS parent — this is the tree edge; skip it + parent_skipped = true; + } else { + // Back edge (or additional parallel edge to parent): update low-link + // Only push back edges going to an ancestor (disc[vid] < disc[uid]) + // to avoid pushing forward-direction duplicates. + if (disc[vid] < disc[uid]) { + edge_stk.push({uid, vid}); + } + if (disc[vid] < low[uid]) { + low[uid] = disc[vid]; + } + } + } + } +} + +} // namespace graph + +#endif // GRAPH_BICONNECTED_COMPONENTS_HPP diff --git a/include/graph/algorithm/index/breadth_first_search.hpp b/include/graph/algorithm/index/breadth_first_search.hpp new file mode 100644 index 0000000..c9bed55 --- /dev/null +++ b/include/graph/algorithm/index/breadth_first_search.hpp @@ -0,0 +1,348 @@ +/** + * @file breadth_first_search.hpp + * + * @brief Breadth-first search traversal algorithms for graphs. + * + * Breadth-first search (BFS) is a fundamental graph traversal algorithm that explores + * vertices in order of their distance from the source vertex(es). It visits all vertices + * at distance k before visiting any vertex at distance k+1, making it ideal for finding + * unweighted shortest paths, level-order traversal, and testing graph connectivity. + * + * This implementation provides both single-source and multi-source variants with + * customizable visitor callbacks for tracking traversal events. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors Andrew Lumsdaine, Phil Ratzloff + */ + +#include "graph/graph.hpp" +#include "graph/views/incidence.hpp" +#include "graph/algorithm/traversal_common.hpp" + +#include +#include +#include +#include + +#ifndef GRAPH_BREADTH_FIRST_SEARCH_HPP +# define GRAPH_BREADTH_FIRST_SEARCH_HPP + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::find_vertex; + +/** + * @brief Multi-source breadth-first search with visitor pattern. + * + * Performs breadth-first traversal starting from multiple source vertices simultaneously, + * calling visitor methods at key points during traversal. This is the fundamental BFS + * implementation that supports custom event callbacks for tracking algorithm progress. + * + * BFS explores vertices in waves: all vertices at distance k from any source are visited + * before any vertex at distance k+1. When multiple sources are provided, vertices reachable + * from any source are discovered in the first wave, making this useful for multi-source + * shortest path problems and parallel/concurrent reachability analysis. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * Where V = number of vertices, E = number of edges + * + * **Time Complexity:** + * - Each vertex is visited exactly once: O(V) + * - Each edge is examined exactly once: O(E) + * - Queue operations (push/pop) are O(1) each + * - Total: O(V + E) for all cases + * + * **Space Complexity:** + * - Visited array: O(V) + * - Queue: O(V) worst case (all vertices at same level) + * - Total auxiliary space: O(V) + * + * @par Supported Graph Properties + * + * **Directedness:** + * - ✅ Directed graphs + * - ✅ Undirected graphs + * - ✅ Mixed (with edge direction semantics) + * + * **Edge Properties:** + * - ✅ Unweighted edges (BFS finds shortest paths) + * - ✅ Weighted edges (weights ignored, treats as unweighted) + * - ✅ Uniform weights (optimal shortest path algorithm) + * - ✅ Multi-edges: All edges examined, vertices visited once + * - ✅ Self-loops: Examined but don't affect traversal + * + * **Graph Structure:** + * - ✅ Connected graphs + * - ✅ Disconnected graphs (visits reachable component) + * - ✅ Acyclic graphs (DAG) + * - ✅ Cyclic graphs (visited tracking prevents infinite loops) + * - ✅ Trees (optimal level-order traversal) + * + * **Container Requirements:** + * - Requires: `index_adjacency_list` (vertex IDs are indices) + * - Requires: `input_range` with convertible elements + * - Works with: All `dynamic_graph` container combinations + * - Works with: Vector-based containers (vov, vol, vofl, etc.) + * - Limitations: Requires contiguous vertex IDs for visited tracking + * + * @tparam G Graph type satisfying index_adjacency_list concept + * @tparam Sources Input range of source vertex IDs + * @tparam Visitor Visitor type with optional callback methods + * + * @param g The graph to traverse (forwarding reference) + * @param sources Range of starting vertex IDs + * @param visitor Visitor object to receive traversal events (default: empty_visitor) + * + * @pre `g` must not be modified during traversal + * @pre All vertex IDs in `sources` must be valid: `source < num_vertices(g)` + * @pre `Visitor` methods must not modify graph structure + * + * @post All vertices reachable from any source are visited exactly once + * @post `visitor` callbacks invoked in BFS order + * @post Graph `g` is unchanged + * + * @par Exception Safety + * + * **Guarantee:** Basic exception safety + * + * **Throws:** + * - May throw `std::bad_alloc` if visited array or queue cannot allocate memory + * - May propagate exceptions from visitor callbacks + * - May propagate exceptions from container operations + * + * **State after exception:** + * - Graph `g` remains unchanged + * - Visitor state depends on implementation + * - Partial traversal may have occurred + * + * @par Visitor Callbacks + * + * The visitor can optionally implement any of these methods: + * + * - `on_initialize_vertex(vertex_id)`: Called when vertex is added to initial sources + * - `on_discover_vertex(vertex_id)`: Called when vertex is first encountered + * - `on_examine_vertex(vertex_id)`: Called when vertex is dequeued for processing + * - `on_examine_edge(edge)`: Called for each outgoing edge examined + * - `on_finish_vertex(vertex_id)`: Called after all edges examined + * + * All callbacks are optional via SFINAE (`has_on_*` concept checks). + * + * @par Example Usage + * + * **Basic traversal:** + * @code + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {1,2}, {2,3}}); + * + * std::vector sources = {0}; + * breadth_first_search(g, sources); // Traverses 0->1->2->3 + * @endcode + * + * **With custom visitor:** + * @code + * struct PrintVisitor { + * void on_discover_vertex(auto v) { + * std::cout << "Discovered: " << v << "\n"; + * } + * }; + * + * PrintVisitor visitor; + * std::vector sources = {0}; + * breadth_first_search(g, sources, visitor); + * @endcode + * + * **Multi-source BFS:** + * @code + * std::vector sources = {0, 5, 10}; // Start from 3 vertices + * breadth_first_search(g, sources); // Explores from all simultaneously + * @endcode + * + * @par Implementation Notes + * + * **Data Structures:** + * - Queue: `std::queue` for FIFO vertex processing + * - Visited: `std::vector` for O(1) lookup (space-efficient) + * - No distance tracking (use BFS views for distances) + * + * **Design Decisions:** + * 1. **Why visitor pattern?** + * - Flexibility: Clients customize behavior without modifying algorithm + * - Performance: Callbacks inlined via template, zero overhead + * - Extensibility: Easy to add tracking, statistics, early termination + * + * 2. **Why multi-source as primary interface?** + * - Generality: Single-source is special case + * - Efficiency: No overhead vs separate single-source implementation + * - Use cases: Multi-source shortest paths, reachability from sets + * + * 3. **Why std::vector for visited tracking?** + * - Space efficiency: 1 bit per vertex (8x smaller than std::vector) + * - Performance: Modern std::vector optimizations competitive with bitset + * - Flexibility: Size determined at runtime + * + * **Optimization Opportunities:** + * - For small graphs: Use std::bitset if vertex count known at compile time + * - For sparse visitation: Use std::unordered_set (map-based containers) + * - For parallel BFS: Use concurrent queue and atomic visited flags + * + * @see breadth_first_search(G&&, vertex_id_t, Visitor&&) Single-source convenience wrapper + * @see views::vertices_bfs BFS view for range-based traversal + * @see connected_components For component detection using BFS + * + * @par References + * + * - Moore, E. F. (1959). "The shortest path through a maze". *Proceedings of the International Symposium on the Theory of Switching*. Harvard University Press. + * - Cormen et al. (2009). *Introduction to Algorithms* (3rd ed.). MIT Press. Section 22.2. + */ +template +requires std::convertible_to, vertex_id_t> +void breadth_first_search(G&& g, // graph + const Sources& sources, + Visitor&& visitor = empty_visitor()) { + using id_type = vertex_id_store_t; + static_assert(std::is_same_v>, + "vertex_id_store_t should equal vertex_id_t for index_adjacency_list"); + + // Initialize BFS data structures + std::queue Q; // FIFO queue for level-order traversal + std::vector visited(num_vertices(g), false); // Track visited vertices to prevent cycles + + // Initialize all source vertices + for (auto uid : sources) { + // Notify visitor of initialization + if constexpr (has_on_initialize_vertex) { + visitor.on_initialize_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_initialize_vertex_id) { + visitor.on_initialize_vertex(g, uid); + } + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, uid); + } + // Mark source as visited and add to queue + visited[uid] = true; + Q.push(uid); + } + + // Main BFS loop: process vertices in level-order + while (!Q.empty()) { + // Dequeue next vertex to examine + id_type uid = Q.front(); + Q.pop(); + + // Notify visitor that we're examining this vertex + if constexpr (has_on_examine_vertex) { + visitor.on_examine_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_examine_vertex_id) { + visitor.on_examine_vertex(g, uid); + } + + // Explore all edges from current vertex + for (auto&& [vid, uv] : views::incidence(g, *find_vertex(g, uid))) { + // Notify visitor about this edge + if constexpr (has_on_examine_edge) { + visitor.on_examine_edge(g, uv); + } + + // If target vertex not yet visited, discover it + if (!visited[vid]) { + visited[vid] = true; // Mark as visited before queueing + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, vid)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, vid); + } + Q.push(vid); // Add to queue for later examination + } + } + + // Notify visitor that we've finished examining all edges from this vertex + if constexpr (has_on_finish_vertex) { + visitor.on_finish_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_finish_vertex_id) { + visitor.on_finish_vertex(g, uid); + } + } +} + +/** + * @brief Single-source breadth-first search with visitor pattern. + * + * Convenience wrapper for BFS starting from a single source vertex. + * This function delegates to the multi-source version by wrapping the + * source in a std::array, providing the same visitor pattern capabilities + * with simpler API for the common single-source case. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * Identical to multi-source version since delegation overhead is negligible. + * + * @tparam G Graph type satisfying index_adjacency_list concept + * @tparam Visitor Visitor type with optional callback methods + * + * @param g The graph to traverse (forwarding reference) + * @param source Starting vertex ID + * @param visitor Visitor object to receive traversal events (default: empty_visitor) + * + * @pre `source` must be valid: `source < num_vertices(g)` + * @post All vertices reachable from `source` are visited exactly once + * + * @par Exception Safety + * Basic exception safety (same as multi-source version) + * + * @par Example Usage + * + * **Simple traversal:** + * @code + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {1,2}, {2,3}}); + * breadth_first_search(g, 0); // Start from vertex 0 + * @endcode + * + * **With visitor:** + * @code + * struct DepthTracker { + * std::unordered_map depths; + * int current_depth = 0; + * + * void on_discover_vertex(auto v) { + * depths[v] = current_depth; + * } + * }; + * + * DepthTracker tracker; + * breadth_first_search(g, 0, tracker); + * // tracker.depths now contains BFS depths from vertex 0 + * @endcode + * + * @see breadth_first_search(G&&, Sources&&, Visitor&&) Multi-source version (implementation) + * @see views::vertices_bfs BFS view for range-based traversal + */ +template +void breadth_first_search(G&& g, // graph + const vertex_id_t& source, // starting vertex_id + Visitor&& visitor = empty_visitor()) { + // Wrap single source in array and delegate to multi-source version + std::array, 1> sources{source}; + breadth_first_search(std::forward(g), sources, std::forward(visitor)); +} + +} // namespace graph + +#endif // GRAPH_BREADTH_FIRST_SEARCH_HPP diff --git a/include/graph/algorithm/index/connected_components.hpp b/include/graph/algorithm/index/connected_components.hpp new file mode 100644 index 0000000..9ab3a47 --- /dev/null +++ b/include/graph/algorithm/index/connected_components.hpp @@ -0,0 +1,873 @@ +/** + * @file connected_components.hpp + * + * @brief Connected components algorithms for undirected and directed graphs. + * + * This file provides three algorithms for finding connected components: + * - kosaraju: Finds strongly connected components in directed graphs (requires transpose) + * - connected_components: Finds connected components in undirected graphs + * - afforest: Fast parallel-friendly connected components using neighbor sampling + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + * Kevin Deweese + */ + +#include "graph/graph.hpp" +#include "graph/views/vertexlist.hpp" +#include "graph/views/dfs.hpp" +#include "graph/views/bfs.hpp" +#include +#include +#include + +#ifndef GRAPH_CC_HPP +# define GRAPH_CC_HPP + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::index_bidirectional_adjacency_list; +using adj_list::adjacency_list; +using adj_list::vertex_id_t; +using adj_list::edge_t; +using adj_list::vertex_range_t; +using adj_list::vertices; +using adj_list::edges; +using adj_list::target_id; + +//============================================================================= +// kosaraju - Strongly Connected Components (Directed Graphs) +//============================================================================= + +/** + * @brief Finds strongly connected components in a directed graph using Kosaraju's algorithm. + * + * A strongly connected component (SCC) is a maximal set of vertices where every vertex + * is reachable from every other vertex in the set via directed paths. Kosaraju's algorithm + * performs two depth-first searches: one on the original graph to determine finish times, + * and one on the transpose graph to identify components. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * - Time: Two DFS traversals on graph and transpose + * - Space: O(V) for visited array, finish order, and component assignment + * + * @par Supported Graph Properties + * + * - ✅ Directed graphs (required) + * - ❌ Undirected graphs (use connected_components instead) + * - ✅ Weighted edges (weights ignored) + * - ✅ Self-loops (handled correctly) + * - ✅ Multi-edges (treated as single edge) + * - ✅ Disconnected graphs + * - ✅ Cyclic graphs + * + * @par Container Requirements + * + * - Requires: `index_adjacency_list` + * - Requires: `index_adjacency_list` (transpose) + * - Requires: `random_access_range` + * - Works with: All `dynamic_graph` container combinations + * + * @tparam G Graph type (must satisfy index_adjacency_list concept) + * @tparam GT Graph transpose type (must satisfy index_adjacency_list concept) + * @tparam Component Random access range for component IDs + * + * @param g The directed graph to analyze + * @param g_t The transpose of graph g (edges reversed) + * @param component Output: component[v] = component ID for vertex v + * + * @pre `component.size() >= num_vertices(g)` + * @pre `num_vertices(g) == num_vertices(g_t)` + * @pre `g_t` is the transpose of `g` (all edges reversed) + * + * @post `component[v]` contains the SCC ID for vertex v + * @post Component IDs are assigned 0, 1, 2, ..., num_components-1 + * @post Vertices in the same SCC have the same component ID + * + * @throws May throw std::bad_alloc if internal allocations fail + * + * @par Example + * @code + * // Create directed graph: 0->1->2->0 (cycle), 2->3 + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {1,2}, {2,0}, {2,3}}); + * Graph g_t = transpose(g); // Transpose the graph + * + * std::vector component(num_vertices(g)); + * kosaraju(g, g_t, component); + * + * // component = {0, 0, 0, 1} // Vertices 0,1,2 in SCC 0; vertex 3 in SCC 1 + * @endcode + * + * @par Algorithm Overview + * + * 1. **First DFS Pass (on g):** + * - Visit all vertices and compute finish times + * - Store vertices in post-order (finish time order) + * + * 2. **Second DFS Pass (on g_t):** + * - Process vertices in reverse finish time order + * - Each DFS tree in this pass is one SCC + * - Assign component IDs to vertices as they're visited + * + * @par Implementation Notes + * + * - Uses iterative DFS (stack-based) to avoid recursion stack overflow + * - Finish times tracked via explicit ordering vector + * - Uses numeric_limits::max() as unvisited marker + * - Component IDs assigned in topological order of SCCs + * + * @see connected_components For undirected graphs + * @see afforest For faster parallel-friendly alternative + */ +template +void kosaraju(G&& g, // graph + GT&& g_t, // graph transpose + Component& component // out: strongly connected component assignment + +) { + size_t N(num_vertices(g)); + std::vector visited(N, false); + using CT = typename std::decay::type; + std::fill(component.begin(), component.end(), std::numeric_limits::max()); + std::vector> order; + + // Store a reference to avoid forwarding reference issues in lambda + auto& g_ref = g; + + // Helper: iterative DFS to compute finish times (post-order) + // This creates reverse topological ordering for SCC discovery + auto dfs_finish_order = [&](const vertex_id_t& start) { + std::stack, bool>> stack; // (vertex, children_visited) + stack.push({start, false}); + visited[start] = true; + + while (!stack.empty()) { + auto [uid, children_visited] = stack.top(); + stack.pop(); + + if (children_visited) { + // All children have been visited, add to finish order (post-order) + // This ensures children finish before parents in topological sort + order.push_back(uid); + } else { + // Mark that we'll process this vertex after its children + // Re-push with children_visited=true to record finish time later + stack.push({uid, true}); + + // Push all unvisited neighbors onto stack + // They will be processed (and finish) before this vertex + auto uid_vertex = *find_vertex(g_ref, uid); + for (auto&& [vid, e] : views::incidence(g_ref, uid_vertex)) { + if (!visited[vid]) { + visited[vid] = true; + stack.push({vid, false}); + } + } + } + } + }; + + // First pass: compute finish times on original graph + // Visit all vertices and create reverse topological ordering + for (auto&& vinfo : views::vertexlist(g_ref)) { + auto uid = vertex_id(g_ref, vinfo.vertex); + if (!visited[uid]) { + dfs_finish_order(uid); + } + } + + // Second pass: DFS on transpose graph in reverse finish order + // Each DFS tree in this pass corresponds to exactly one SCC + size_t cid = 0; + std::ranges::reverse_view reverse{order}; + for (auto& uid : reverse) { + if (component[uid] == std::numeric_limits::max()) { + // Use DFS view on transpose to find all vertices in this SCC + // In transpose: if u->v in original, v->u in transpose + // So we find all vertices that can reach this root in original graph + graph::views::vertices_dfs_view> dfs(g_t, uid); + for (auto&& [v] : dfs) { + auto vid = vertex_id(g_t, v); + if (component[vid] != std::numeric_limits::max()) { + // Already assigned to SCC, skip this branch + dfs.cancel(graph::views::cancel_search::cancel_branch); + } else { + component[vid] = cid; // Assign to current SCC + } + } + ++cid; // Move to next SCC + } + } +} + +//============================================================================= +// kosaraju (bidirectional) - Single-graph SCC using in_edges +//============================================================================= + +/** + * @brief Finds strongly connected components using in_edges (no transpose needed). + * + * When the graph satisfies `index_bidirectional_adjacency_list`, the second DFS + * pass can traverse incoming edges directly instead of requiring a separate + * transpose graph. This eliminates the O(V + E) cost of constructing and + * storing the transpose. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * Same asymptotic cost as the two-graph overload, but with lower constant + * factor due to avoided transpose construction. + * + * @par Container Requirements + * + * - Requires: `index_bidirectional_adjacency_list` (in_edges + index vertices) + * - Requires: `random_access_range` + * - Works with: All bidirectional `dynamic_graph` container combinations + * + * @tparam G Graph type (must satisfy index_bidirectional_adjacency_list concept) + * @tparam Component Random access range for component IDs + * + * @param g The directed bidirectional graph to analyze + * @param component Output: component[v] = component ID for vertex v + * + * @pre `component.size() >= num_vertices(g)` + * + * @post `component[v]` contains the SCC ID for vertex v + * @post Component IDs are assigned 0, 1, 2, ..., num_components-1 + * @post Vertices in the same SCC have the same component ID + * + * @par Example + * @code + * // Create bidirectional directed graph: 0->1->2->0 (cycle), 2->3 + * using Traits = container::vov_graph_traits; + * container::dynamic_graph g({{0,1}, {1,2}, {2,0}, {2,3}}); + * + * std::vector component(num_vertices(g)); + * kosaraju(g, component); // No transpose needed! + * @endcode + * + * @see kosaraju(G&&, GT&&, Component&) For non-bidirectional graphs + */ +template +void kosaraju(G&& g, // bidirectional graph + Component& component // out: strongly connected component assignment +) { + size_t N(num_vertices(g)); + std::vector visited(N, false); + using CT = typename std::decay::type; + std::fill(component.begin(), component.end(), std::numeric_limits::max()); + std::vector> order; + + auto& g_ref = g; + + // First pass: iterative DFS to compute finish times (same as two-graph version) + auto dfs_finish_order = [&](const vertex_id_t& start) { + std::stack, bool>> stack; + stack.push({start, false}); + visited[start] = true; + + while (!stack.empty()) { + auto [uid, children_visited] = stack.top(); + stack.pop(); + + if (children_visited) { + order.push_back(uid); + } else { + stack.push({uid, true}); + + auto uid_vertex = *find_vertex(g_ref, uid); + for (auto&& [vid, e] : views::incidence(g_ref, uid_vertex)) { + if (!visited[vid]) { + visited[vid] = true; + stack.push({vid, false}); + } + } + } + } + }; + + for (auto&& vinfo : views::vertexlist(g_ref)) { + auto uid = vertex_id(g_ref, vinfo.vertex); + if (!visited[uid]) { + dfs_finish_order(uid); + } + } + + // Second pass: DFS on reverse edges (via in_edges) in reverse finish order. + // Each DFS tree corresponds to exactly one SCC. + size_t cid = 0; + std::ranges::reverse_view reverse{order}; + for (auto& uid : reverse) { + if (component[uid] == std::numeric_limits::max()) { + // Manual iterative DFS using in_edges + source_id + std::stack> dfs_stack; + dfs_stack.push(uid); + component[uid] = cid; + + while (!dfs_stack.empty()) { + auto current = dfs_stack.top(); + dfs_stack.pop(); + + auto v = *adj_list::find_vertex(g_ref, current); + for (auto&& ie : adj_list::in_edges(g_ref, v)) { + auto src = adj_list::source_id(g_ref, ie); + if (component[src] == std::numeric_limits::max()) { + component[src] = cid; + dfs_stack.push(src); + } + } + } + ++cid; + } + } +} + +//============================================================================= +// connected_components - Connected Components (Undirected Graphs) +//============================================================================= + +/** + * @brief Finds connected components in an undirected graph using iterative DFS. + * + * A connected component is a maximal set of vertices where there exists a path + * between any pair of vertices in the set. This algorithm uses depth-first search + * with an explicit stack to identify all connected components in the graph. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * - Time: Single traversal visiting each vertex and edge once + * - Space: O(V) for component assignment and DFS stack + * + * @par Supported Graph Properties + * + * - ✅ Undirected graphs (treats directed graphs as undirected) + * - ✅ Directed graphs (ignores edge direction) + * - ✅ Weighted edges (weights ignored) + * - ✅ Self-loops (handled correctly, counted as component) + * - ✅ Multi-edges (treated as single edge) + * - ✅ Disconnected graphs (primary use case) + * - ✅ Acyclic graphs + * - ✅ Cyclic graphs + * + * @par Container Requirements + * + * - Requires: `index_adjacency_list` (vertex IDs are indices) + * - Requires: `random_access_range` + * - Works with: All `dynamic_graph` container combinations + * - Works with: Vector-based containers (vov, vol, vofl, etc.) + * + * @tparam G Graph type (must satisfy index_adjacency_list concept) + * @tparam Component Random access range for component IDs + * + * @param g The graph to analyze (treated as undirected) + * @param component Output: component[v] = component ID for vertex v + * + * @return Number of connected components found + * + * @pre `component.size() >= num_vertices(g)` + * + * @post `component[v]` contains the component ID for vertex v + * @post Component IDs are assigned 0, 1, 2, ..., num_components-1 + * @post Vertices in the same component have the same component ID + * @post Return value equals the number of distinct component IDs + * @post Isolated vertices (no edges) are assigned unique component IDs + * + * @throws May throw std::bad_alloc if internal allocations fail + * + * @par Example + * @code + * // Create undirected graph with 2 components: {0,1,2} and {3,4} + * using Graph = container::dynamic_graph<...>; + * Graph g(5); + * // Component 1: 0-1-2 + * g.add_edge(0, 1); + * g.add_edge(1, 2); + * // Component 2: 3-4 + * g.add_edge(3, 4); + * + * std::vector component(num_vertices(g)); + * size_t num_components = connected_components(g, component); + * + * // num_components = 2 + * // component = {0, 0, 0, 1, 1} + * @endcode + * + * @par Algorithm Overview + * + * 1. Initialize all components to unvisited (numeric_limits::max) + * 2. For each unvisited vertex: + * - Start new component with unique ID + * - Use DFS to visit all reachable vertices + * - Assign component ID to all visited vertices + * 3. Return total number of components found + * + * @par Implementation Notes + * + * - Uses iterative DFS with explicit stack (no recursion) + * - Isolated vertices (degree 0) get unique component IDs + * - Handles vertices with no edges specially for efficiency + * - Uses numeric_limits::max() as unvisited marker + * + * @par Special Cases + * + * - **Isolated vertices:** Each gets its own component ID + * - **Empty graph:** Returns 0 (no components) + * - **Single vertex:** Returns 1 (one component) + * - **Fully connected:** Returns 1 (one component) + * + * @see kosaraju For strongly connected components in directed graphs + * @see afforest For faster parallel-friendly alternative + */ +template +size_t connected_components(G&& g, // graph + Component& component // out: connected component assignment +) { + size_t N(num_vertices(g)); + using CT = typename std::decay::type; + // Initialize all components as unvisited + std::fill(component.begin(), component.end(), std::numeric_limits::max()); + + std::stack> S; + CT cid = 0; // Current component ID + for (vertex_id_t uid = 0; uid < N; ++uid) { + if (component[uid] < std::numeric_limits::max()) { + continue; // Already assigned to a component + } + + // Handle isolated vertices (no edges) + if (!num_edges(g, uid)) { + component[uid] = cid++; + continue; + } + + // Start DFS for new component + component[uid] = cid; + S.push(uid); + while (!S.empty()) { + auto vid = S.top(); + S.pop(); + // Visit all unvisited neighbors and add to same component + for (auto&& einfo : views::basic_incidence(g, vid)) { + auto wid = einfo.target_id; + if (component[wid] == std::numeric_limits::max()) { + component[wid] = cid; // Same component as parent + S.push(wid); + } + } + } + ++cid; // Move to next component + } + return cid; +} + +//============================================================================= +// Helper Functions for afforest Algorithm +//============================================================================= + +/** + * @brief Links two vertices into the same component using union-find. + * + * Internal helper for afforest algorithm. Performs path compression while + * linking two components together. + * + * @tparam vertex_id_t Vertex ID type + * @tparam Component Random access range for component IDs + * @param u First vertex ID + * @param v Second vertex ID + * @param component Component assignment array (modified in-place) + */ +template +static void link(vertex_id_t u, vertex_id_t v, Component& component) { + vertex_id_t p1 = component[u]; // Parent of u + vertex_id_t p2 = component[v]; // Parent of v + + // Follow parent pointers with path compression until roots converge + while (p1 != p2) { + vertex_id_t high = std::max(p1, p2); // Higher ID + vertex_id_t low = p1 + (p2 - high); // Lower ID (clever: avoids branch) + vertex_id_t p_high = component[high]; // Parent of higher ID + + // Already linked: high points to low + if (p_high == low) + break; + + if (p_high == high) { + // high is a root (points to itself) + if (component[high] == high) { + component[high] = low; // Link high root to low + break; + } else { + // Race condition: another thread changed it; retry with low + high = low; + } + } + + // Path compression: follow parent links and try again + p1 = component[p_high]; + p2 = component[low]; + } +} + +/** + * @brief Compresses component paths for improved query performance. + * + * Internal helper for afforest algorithm. Performs path compression to + * flatten the component tree structure. + * + * @tparam Component Random access range for component IDs + * @param component Component assignment array (modified in-place) + */ +template +static void compress(Component& component) { + // Two-pass path compression: point each node to its grandparent + // This flattens the union-find tree structure for faster queries + // Note: Does not fully compress to root, but significantly reduces depth + for (size_t i = 0; i < component.size(); ++i) { + if (component[i] != component[component[i]]) { + component[i] = component[component[i]]; // Point to grandparent + } + } +} + +/** + * @brief Samples the most frequent component ID using random sampling. + * + * Internal helper for afforest algorithm. Uses reservoir sampling to identify + * the largest component without full traversal. + * + * @tparam vertex_id_t Vertex ID type + * @tparam Component Random access range for component IDs + * @param component Component assignment array + * @param num_samples Number of random samples to take (default: 1024) + * @return The most frequently occurring component ID in the sample + */ +template +static vertex_id_t sample_frequent_element(Component& component, size_t num_samples = 1024) { + // Use random sampling to find the most common component ID + // This is faster than scanning all vertices for large graphs + // The largest component is likely to be sampled frequently + std::unordered_map counts(32); + std::mt19937 gen; + std::uniform_int_distribution distribution(0, component.size() - 1); + + // Take random samples and count occurrences of each component ID + for (size_t i = 0; i < num_samples; ++i) { + vertex_id_t sample = distribution(gen); + counts[component[sample]]++; + } + + // Return the component ID with highest count + auto&& [num, count] = *std::max_element(counts.begin(), counts.end(), + [](auto&& a, auto&& b) { return std::get<1>(a) < std::get<1>(b); }); + return num; +} + +//============================================================================= +// afforest - Fast Parallel-Friendly Connected Components +//============================================================================= + +/** + * @brief Finds connected components using the Afforest algorithm (neighbor sampling). + * + * Afforest is a fast, parallel-friendly algorithm for finding connected components that + * uses neighbor sampling and union-find with path compression. It processes edges in + * rounds, linking vertices through their first few neighbors, then samples to identify + * the largest component before processing remaining edges. This approach is particularly + * effective for large graphs and can be parallelized efficiently. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | Best case | O(V) | O(V) | + * | Average case | O(V + E·α(V)) | O(V) | + * | Worst case | O(V + E·α(V)) | O(V) | + * + * Where α(V) is the inverse Ackermann function (effectively constant). + * + * - Time: Nearly linear due to union-find with path compression + * - Space: O(V) for component array only (no additional structures) + * - Practical performance: Often faster than DFS-based algorithms for large graphs + * + * @par Supported Graph Properties + * + * - ✅ Undirected graphs (primary use case) + * - ✅ Directed graphs (treats as undirected) + * - ✅ Weighted edges (weights ignored) + * - ✅ Self-loops (handled correctly) + * - ✅ Multi-edges (all edges processed) + * - ✅ Disconnected graphs + * - ✅ Large-scale graphs (designed for performance) + * - ✅ Parallel execution friendly (this implementation is serial) + * + * @par Container Requirements + * + * - Requires: `index_adjacency_list` (vertex IDs are indices) + * - Requires: `random_access_range` + * - Requires: Bidirectional conversion between vertex_id_t and Component value type + * - Works with: All `dynamic_graph` container combinations + * - Works with: Vector-based containers for best performance + * + * @tparam G Graph type (must satisfy index_adjacency_list concept) + * @tparam Component Random access range for component IDs + * + * @param g The graph to analyze (treated as undirected) + * @param component Output: component[v] = component ID for vertex v + * @param neighbor_rounds Number of neighbor sampling rounds (default: 2) + * + * @pre `component.size() >= num_vertices(g)` + * @pre `neighbor_rounds >= 0` + * + * @post `component[v]` contains the component ID for vertex v + * @post Vertices in the same component have the same component ID + * @post Component IDs form a union-find forest (may need compression for queries) + * + * @throws May throw std::bad_alloc if internal allocations fail + * + * @par Example + * @code + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {1,2}, {3,4}, {4,5}}); // Two components + * + * std::vector component(num_vertices(g)); + * afforest(g, component); + * + * // Compress to get canonical component IDs + * compress(component); + * @endcode + * + * @par Algorithm Overview + * + * 1. **Initialization:** Each vertex is its own component + * 2. **Neighbor Rounds:** For r = 0 to neighbor_rounds-1: + * - Link each vertex to its r-th neighbor + * - Compress paths + * 3. **Sampling:** Identify most frequent component (largest) + * 4. **Remaining Edges:** Process edges beyond neighbor_rounds for non-largest components + * 5. **Final Compression:** Flatten union-find structure + * + * @par Implementation Notes + * + * - Uses union-find with path compression for near-constant time operations + * - Neighbor sampling reduces total edge processing for many graphs + * - Sampling step identifies largest component to skip redundant work + * - More efficient than DFS for graphs with large components + * - **Serial implementation**: Current code is single-threaded + * + * @par Parallelization Potential + * + * Afforest is designed to be highly parallelizable and is based on the parallel + * algorithm by Sutton et al. (2018). To implement a multi-threaded version: + * + * - **Atomic operations in `link()`**: Replace regular reads/writes to `component[]` + * with atomic compare-and-swap operations to ensure thread-safe union-find merging + * - **Parallel loop constructs**: Use OpenMP, TBB, or C++17 parallel algorithms + * to parallelize the vertex/edge processing loops + * - **Lock-free union-find**: The algorithm's union-find operations can resolve + * conflicts through atomic CAS, allowing concurrent execution without locks + * - **Phase synchronization**: Barrier synchronization after neighbor rounds + * and compress operations to ensure consistency + * + * The algorithm's design (neighbor sampling + union-find) makes it particularly + * well-suited for parallelization compared to DFS-based approaches, as different + * threads can independently process edges and conflicts are naturally resolved + * by the union-find structure + * + * @par Performance Tuning + * + * - `neighbor_rounds=1`: Fastest, good for dense graphs + * - `neighbor_rounds=2`: Default, balanced performance + * - `neighbor_rounds>2`: More thorough initial linking, diminishing returns + * - For sparse graphs: Lower values perform better + * - For dense graphs: Higher values may improve early component formation + * + * @see connected_components For simpler DFS-based alternative + * @see kosaraju For directed graph strongly connected components + * + * @par References + * + * - Sutton et al. (2018). "Afforest: A Fast Parallel Connected Components Algorithm" + * International Conference on Parallel Processing (ICPP) + */ +template +requires std::convertible_to, vertex_id_t> && + std::convertible_to, range_value_t> +void afforest(G&& g, // graph + Component& component, // out: connected component assignment + const size_t neighbor_rounds = 2) { + size_t N(num_vertices(g)); + // Initialize: each vertex is its own component + std::iota(component.begin(), component.end(), 0); + + // Phase 1: Neighbor sampling - link vertices through first few neighbors + // This quickly forms large components without processing all edges + for (size_t r = 0; r < neighbor_rounds; ++r) { + for (auto&& [uid, u] : views::vertexlist(g)) { + if (r < size(edges(g, u))) { + auto it = edges(g, u).begin(); + std::advance(it, r); // Get r-th neighbor + link(static_cast>(uid), static_cast>(target_id(g, *it)), component); + } + } + compress(component); // Flatten union-find tree after each round + } + + // Phase 2: Identify largest component via sampling + // Skip processing edges within largest component (optimization) + vertex_id_t c = sample_frequent_element>(component); + + // Phase 3: Process remaining edges for vertices not in largest component + // Start from neighbor_rounds to avoid re-processing sampled neighbors + for (auto&& vinfo : views::vertexlist(g)) { + auto uid = vertex_id(g, vinfo.vertex); + if (component[uid] == c) { + continue; // Skip vertices in largest component + } + if (neighbor_rounds < edges(g, uid).size()) { + auto it = edges(g, vinfo.vertex).begin(); + std::advance(it, neighbor_rounds); // Skip already-processed neighbors + for (; it != edges(g, vinfo.vertex).end(); ++it) { + link(static_cast>(uid), static_cast>(target_id(g, *it)), component); + } + } + } + + compress(component); // Final compression for query efficiency +} + +/** + * @brief Finds connected components using Afforest with bidirectional edge processing. + * + * This overload of afforest processes edges in both directions (forward and reverse) + * by accepting both the original graph and its transpose. This can improve convergence + * for directed graphs when treating them as undirected, and may find components faster + * in some graph structures. + * + * @par Complexity Analysis + * + * Same as single-graph afforest, but processes edges in both directions: + * - Time: O(V + (E + E_t)·α(V)) where E_t is edges in transpose + * - Space: O(V) (transpose not counted) + * + * @par Additional Requirements + * + * All requirements from single-graph afforest, plus: + * - `g_t` must be transpose of `g` (edges reversed) OR contain additional edges + * - Can be used for bidirectional edge processing in undirected graphs represented as directed + * + * @tparam G Graph type (must satisfy index_adjacency_list concept) + * @tparam GT Graph transpose type (must satisfy adjacency_list concept) + * @tparam Component Random access range for component IDs + * + * @param g The graph to analyze + * @param g_t The transpose of g (or additional edges to process) + * @param component Output: component[v] = component ID for vertex v + * @param neighbor_rounds Number of neighbor sampling rounds (default: 2) + * + * @pre All preconditions from single-graph afforest + * @pre `num_vertices(g) == num_vertices(g_t)` + * + * @post Same postconditions as single-graph afforest + * + * @par Example + * @code + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {2,3}}); + * Graph g_t = transpose(g); // g_t: {{1,0}, {3,2}} + * + * std::vector component(num_vertices(g)); + * afforest(g, g_t, component); // Process edges in both directions + * + * compress(component); + * @endcode + * + * @par Algorithm Differences + * + * Same as single-graph afforest, with additional step: + * - After processing remaining edges from g, also processes all edges from g_t + * - This ensures bidirectional reachability for vertices not in largest component + * + * @par Use Cases + * + * - Directed graphs represented as undirected (process both edge directions) + * - Graphs where transpose is already available + * - Improving convergence speed for certain graph topologies + * + * @see afforest(G&&, Component&, size_t) For single-graph version + */ +template +requires std::convertible_to, vertex_id_t> && + std::convertible_to, range_value_t> +void afforest(G&& g, // graph + GT&& g_t, // graph transpose + Component& component, // out: connected component assignment + const size_t neighbor_rounds = 2) { + size_t N(num_vertices(g)); + // Initialize: each vertex is its own component + std::iota(component.begin(), component.end(), 0); + + // Phase 1: Neighbor sampling (same as single-graph version) + for (size_t r = 0; r < neighbor_rounds; ++r) { + for (auto&& [uid, u] : views::vertexlist(g)) { + if (r < size(edges(g, u))) { + auto it = edges(g, u).begin(); + std::advance(it, r); // Get r-th neighbor + link(static_cast>(uid), static_cast>(target_id(g, *it)), component); + } + } + compress(component); // Flatten union-find tree + } + + // Phase 2: Identify largest component via sampling + vertex_id_t c = sample_frequent_element>(component); + + // Phase 3: Process remaining edges in both directions + for (auto&& [uid, u] : views::vertexlist(g)) { + if (component[uid] == c) { + continue; // Skip largest component + } + // Process remaining forward edges (from g) + if (neighbor_rounds < edges(g, uid).size()) { + auto it = edges(g, u).begin(); + std::advance(it, neighbor_rounds); // Skip sampled neighbors + for (; it != edges(g, u).end(); ++it) { + link(static_cast>(uid), static_cast>(target_id(g, *it)), component); + } + } + // Process all backward edges (from transpose g_t) + // This ensures bidirectional reachability for undirected graphs + for (auto it2 = edges(g_t, u).begin(); it2 != edges(g_t, u).end(); ++it2) { + link(static_cast>(uid), static_cast>(target_id(g_t, *it2)), component); + } + } + + compress(component); // Final compression +} + +} // namespace graph + +#endif //GRAPH_CC_HPP diff --git a/include/graph/algorithm/index/depth_first_search.hpp b/include/graph/algorithm/index/depth_first_search.hpp new file mode 100644 index 0000000..2057eeb --- /dev/null +++ b/include/graph/algorithm/index/depth_first_search.hpp @@ -0,0 +1,364 @@ +/** + * @file depth_first_search.hpp + * + * @brief Depth-first search traversal algorithm for graphs. + * + * Depth-first search (DFS) is a fundamental graph traversal algorithm that explores + * vertices by going as deep as possible along each branch before backtracking. It + * classifies edges into tree, back, forward, and cross edges, making it the basis + * for cycle detection, topological sorting, strongly connected components, and many + * other graph algorithms. + * + * This implementation provides a single-source variant with customizable visitor + * callbacks for tracking traversal events and edge classification. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors Andrew Lumsdaine, Phil Ratzloff + */ + +#include "graph/graph.hpp" +#include "graph/views/incidence.hpp" +#include "graph/algorithm/traversal_common.hpp" + +#include +#include +#include +#include + +#ifndef GRAPH_DFS_ALGORITHM_HPP +# define GRAPH_DFS_ALGORITHM_HPP + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::find_vertex; + +/** + * @brief Single-source depth-first search with visitor pattern. + * + * Performs depth-first traversal starting from a single source vertex, calling + * visitor methods at key points during traversal. This implementation precisely + * simulates recursive DFS using an explicit stack with stored edge iterators, + * ensuring correct edge classification (tree/back/forward/cross) and proper + * vertex finish ordering. + * + * DFS explores vertices by following edges as deep as possible before backtracking. + * Each vertex transitions through three color states: White (undiscovered) -> + * Gray (discovered, in progress) -> Black (finished). This three-color scheme + * enables precise classification of every edge encountered during traversal. + * + * @par Complexity Analysis + * + * | Case | Time | Space | + * |------|------|-------| + * | All cases | O(V + E) | O(V) | + * + * Where V = number of vertices, E = number of edges + * + * **Time Complexity:** + * - Each vertex is discovered and finished exactly once: O(V) + * - Each edge is examined exactly once: O(E) + * - Edge iterator advancement is O(1) per edge (stored iterators) + * - Total: O(V + E) for all cases + * + * **Space Complexity:** + * - Color array: O(V) + * - Stack: O(V) worst case (linear chain graph) + * - Each stack frame stores a vertex ID and edge iterators + * - Total auxiliary space: O(V) + * + * @par Supported Graph Properties + * + * **Directedness:** + * - ✅ Directed graphs (full edge classification: tree/back/forward/cross) + * - ✅ Undirected graphs (tree/back edges only) + * + * **Edge Properties:** + * - ✅ Unweighted edges + * - ✅ Weighted edges (weights ignored during traversal) + * - ✅ Multi-edges: All edges examined, classified independently + * - ✅ Self-loops: Classified as back edges (vertex is Gray when revisited) + * + * **Graph Structure:** + * - ✅ Connected graphs (visits all vertices) + * - ✅ Disconnected graphs (visits reachable component from source) + * - ✅ Acyclic graphs (DAGs) - no back edges, yields topological order + * - ✅ Cyclic graphs - back edges indicate cycles + * - ✅ Trees - all edges classified as tree edges + * + * **Container Requirements:** + * - Requires: `index_adjacency_list` (vertex IDs are contiguous indices) + * - Works with: All `dynamic_graph` container combinations + * - Works with: Vector-based containers (vov, vol, vofl, etc.) + * - Limitations: Requires contiguous vertex IDs for color array tracking + * + * @tparam G Graph type satisfying index_adjacency_list concept + * @tparam Visitor Visitor type with optional callback methods + * + * @param g The graph to traverse (forwarding reference) + * @param source Starting vertex ID + * @param visitor Visitor object to receive traversal events (default: empty_visitor) + * + * @pre `source` must be valid: `source < num_vertices(g)` + * @pre `g` must not be modified during traversal + * @pre `Visitor` methods must not modify graph structure + * + * @post All vertices reachable from `source` are visited exactly once + * @post `visitor` callbacks invoked in DFS order + * @post `on_finish_vertex` called in reverse topological order for DAGs + * @post Graph `g` is unchanged + * + * @par Exception Safety + * + * **Guarantee:** Basic exception safety + * + * **Throws:** + * - May throw `std::bad_alloc` if color array or stack cannot allocate memory + * - May propagate exceptions from visitor callbacks + * - May propagate exceptions from container operations + * + * **State after exception:** + * - Graph `g` remains unchanged + * - Visitor state depends on implementation + * - Partial traversal may have occurred + * + * @par Visitor Callbacks + * + * The visitor can optionally implement any of these methods: + * + * - `on_initialize_vertex(g, vertex)`: Called for the source vertex before traversal begins + * - `on_start_vertex(g, vertex)`: Called when traversal begins from the source + * - `on_discover_vertex(g, vertex)`: Called when a vertex is first discovered (colored Gray) + * - `on_examine_edge(g, edge)`: Called for each outgoing edge examined + * - `on_tree_edge(g, edge)`: Called when edge leads to an undiscovered (White) vertex + * - `on_back_edge(g, edge)`: Called when edge leads to an ancestor (Gray) vertex — indicates cycle + * - `on_forward_or_cross_edge(g, edge)`: Called when edge leads to a finished (Black) vertex + * - `on_finish_edge(g, edge)`: Called after an edge has been fully classified + * - `on_finish_vertex(g, vertex)`: Called when all edges from a vertex are processed (colored Black) + * + * All callbacks are optional via SFINAE (`has_on_*` concept checks). + * + * @par Edge Classification + * + * Edge (u, v) is classified by the color of vertex v when the edge is examined: + * - **Tree edge**: v is White — part of the DFS tree + * - **Back edge**: v is Gray — v is an ancestor of u (cycle indicator) + * - **Forward/Cross edge**: v is Black — v was fully processed before this examination + * + * @par Example Usage + * + * **Basic traversal:** + * @code + * using Graph = container::dynamic_graph<...>; + * Graph g({{0,1}, {1,2}, {2,3}}); + * depth_first_search(g, 0); // DFS from vertex 0 + * @endcode + * + * **With visitor for cycle detection:** + * @code + * struct CycleDetector { + * bool has_cycle = false; + * void on_back_edge(auto& g, auto& edge) { + * has_cycle = true; + * } + * }; + * + * CycleDetector detector; + * depth_first_search(g, 0, detector); + * if (detector.has_cycle) { ... // graph contains a cycle } + * @endcode + * + * **Topological sort via finish ordering:** + * @code + * struct TopoVisitor { + * std::vector topo_order; + * void on_finish_vertex(auto& g, auto v) { + * topo_order.push_back(v); + * } + * }; + * + * TopoVisitor topo; + * depth_first_search(g, 0, topo); + * std::ranges::reverse(topo.topo_order); // Reverse finish order = topological order + * @endcode + * + * @par Implementation Notes + * + * **Algorithm Overview:** + * The algorithm uses an explicit stack to simulate recursive DFS. Each stack frame + * stores a vertex ID and iterators into its incidence (edge) range, precisely + * mirroring the state of a recursive DFS call frame. When a frame's edge iterator + * reaches its sentinel, the vertex is finished and the frame is popped, just as a + * recursive call would return. + * + * **Data Structures:** + * - Stack: `std::stack` where each frame holds `{vertex_id, it, end}` + * - Color: `std::vector` with three states (White/Gray/Black) using `uint8_t` + * - Edge iterators: Stored in stack frames for O(1) resume after backtracking + * + * **Design Decisions:** + * 1. **Why iterative with explicit stack instead of recursive?** + * - Avoids stack overflow on deep graphs (limited system call stack) + * - Same asymptotic complexity as recursive version + * - Precise control over traversal state for edge classification + * + * 2. **Why store edge iterators in stack frames?** + * - Enables O(1) edge advancement when resuming a vertex after backtracking + * - Without stored iterators, resuming would require O(degree) re-scanning + * - Maintains overall O(V + E) time complexity + * + * 3. **Why three-color scheme instead of two-color (visited/unvisited)?** + * - Distinguishes ancestors (Gray) from completed vertices (Black) + * - Enables precise edge classification (back vs forward/cross) + * - Required for cycle detection (back edge = edge to Gray vertex) + * + * 4. **Why single-source only (no multi-source variant)?** + * - DFS from multiple sources can be achieved by calling repeatedly + * - Multi-source DFS has less well-defined semantics than multi-source BFS + * - Keeps the interface simple for the common case + * + * **Optimization Opportunities:** + * - For parallel DFS: Requires careful synchronization of color states + * - For early termination: Add return value or exception-based mechanism to visitor + * - For iterative deepening: Combine with depth limit for IDDFS + * + * @par References + * + * - Tarjan, R. E. (1972). "Depth-first search and linear graph algorithms". + * *SIAM Journal on Computing*, 1(2), 146-160. + * - Cormen, T. H., Leiserson, C. E., Rivest, R. L., & Stein, C. (2009). + * *Introduction to Algorithms* (3rd ed.). MIT Press. Section 22.3. + * + * @see views::vertices_dfs DFS view for range-based traversal + * @see breadth_first_search BFS algorithm for shortest-path traversal + */ + +template +void depth_first_search(G&& g, // graph + const vertex_id_t& source, // starting vertex_id + Visitor&& visitor = empty_visitor()) { + using id_type = vertex_id_store_t; + static_assert(std::is_same_v>, + "vertex_id_store_t should equal vertex_id_t for index_adjacency_list"); + + // Vertex color states for DFS + enum class Color : uint8_t { + White, // Undiscovered + Gray, // Discovered but not finished + Black // Finished + }; + + std::vector color(num_vertices(g), Color::White); + + // Initialize source vertex + if constexpr (has_on_initialize_vertex) { + visitor.on_initialize_vertex(g, *find_vertex(g, source)); + } else if constexpr (has_on_initialize_vertex_id) { + visitor.on_initialize_vertex(g, source); + } + + // Notify visitor that we're starting from this source + if constexpr (has_on_start_vertex) { + auto src_vertex = *find_vertex(g, source); + visitor.on_start_vertex(g, src_vertex); + } else if constexpr (has_on_start_vertex_id) { + visitor.on_start_vertex(g, source); + } + + // Each stack frame stores a vertex and iterators into its incidence range, + // simulating the call stack of recursive DFS for correct edge classification. + + using inc_range_t = decltype(views::incidence(g, *find_vertex(g, source))); + using inc_iterator_t = std::ranges::iterator_t; + using inc_sentinel_t = std::ranges::sentinel_t; + + struct StackFrame { + id_type vertex_id; + inc_iterator_t it; + inc_sentinel_t end; + }; + + // Discover source and push its stack frame + color[source] = Color::Gray; + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, source)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, source); + } + + std::stack S; + { + auto inc = views::incidence(g, *find_vertex(g, source)); + S.push({source, std::ranges::begin(inc), std::ranges::end(inc)}); + } + + while (!S.empty()) { + auto& frame = S.top(); + + if (frame.it == frame.end) { + // All edges exhausted: mark vertex finished (Black) and pop + color[frame.vertex_id] = Color::Black; + if constexpr (has_on_finish_vertex) { + visitor.on_finish_vertex(g, *find_vertex(g, frame.vertex_id)); + } else if constexpr (has_on_finish_vertex_id) { + visitor.on_finish_vertex(g, frame.vertex_id); + } + S.pop(); + continue; + } + + // Process next edge from this vertex + auto&& [vid, uv] = *frame.it; // structured binding extracts target_id and edge + ++frame.it; // advance iterator before potential push (simulates recursion past this edge) + + if constexpr (has_on_examine_edge) { + visitor.on_examine_edge(g, uv); + } + + if (color[vid] == Color::White) { + // Tree edge: target is undiscovered + if constexpr (has_on_tree_edge) { + visitor.on_tree_edge(g, uv); + } + // Finish this edge before "recursing" into the target vertex + if constexpr (has_on_finish_edge) { + visitor.on_finish_edge(g, uv); + } + // Discover target and push its frame (equivalent to recursive call) + color[vid] = Color::Gray; + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, vid)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, vid); + } + auto inc = views::incidence(g, *find_vertex(g, vid)); + S.push({vid, std::ranges::begin(inc), std::ranges::end(inc)}); + } else if (color[vid] == Color::Gray) { + // Back edge: target is an ancestor still being processed (cycle) + if constexpr (has_on_back_edge) { + visitor.on_back_edge(g, uv); + } + if constexpr (has_on_finish_edge) { + visitor.on_finish_edge(g, uv); + } + } else { + // Forward or cross edge: target is already finished (Black) + if constexpr (has_on_forward_or_cross_edge) { + visitor.on_forward_or_cross_edge(g, uv); + } + if constexpr (has_on_finish_edge) { + visitor.on_finish_edge(g, uv); + } + } + } +} + + +} // namespace graph + +#endif // GRAPH_DFS_ALGORITHM_HPP diff --git a/include/graph/algorithm/index/dijkstra_shortest_paths.hpp b/include/graph/algorithm/index/dijkstra_shortest_paths.hpp new file mode 100644 index 0000000..ede2b58 --- /dev/null +++ b/include/graph/algorithm/index/dijkstra_shortest_paths.hpp @@ -0,0 +1,415 @@ +/** + * @file dijkstra_shortest_paths.hpp + * + * @brief Single-Source & multi-source shortest paths & shortest distances algorithms using Dijkstra's algorithm. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" +#include "graph/algorithm/traversal_common.hpp" + +#include +#include +#include +#include + +#ifndef GRAPH_DIJKSTRA_SHORTEST_PATHS_HPP +# define GRAPH_DIJKSTRA_SHORTEST_PATHS_HPP + +namespace graph { + +// Import CPOs and types for use in algorithms +using adj_list::vertices; +using adj_list::num_vertices; +using adj_list::find_vertex; +using adj_list::target_id; +using adj_list::vertex_id_t; +using adj_list::edge_t; +using adj_list::index_adjacency_list; + +/** + * @brief Multi-source shortest paths using Dijkstra's algorithm. + * + * Finds shortest paths from one or more source vertices to all other vertices in a weighted graph + * with non-negative edge weights. Supports custom weight functions, comparison operators, and + * visitor callbacks for algorithm events. + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept. + * @tparam Sources Input range of source vertex IDs. + * @tparam Distances Random access range for storing distances. Value type must be arithmetic. + * @tparam Predecessors Random access range for storing predecessor information. Can use _null_predecessors + * if path reconstruction is not needed. + * @tparam WF Edge weight function. Defaults to returning 1 for all edges (unweighted). + * @tparam Visitor Visitor type with callbacks for algorithm events. Defaults to empty_visitor. + * Visitor calls are optimized away if not used. + * @tparam Compare Comparison function for distance values. Defaults to less<>. + * @tparam Combine Function to combine distances and weights. Defaults to plus<>. + * + * @param g The graph to process. + * @param sources Range of source vertex IDs to start from. + * @param distances [out] Shortest distances from sources. Must be sized >= num_vertices(g). + * @param predecessor [out] Predecessor information for path reconstruction. Must be sized >= num_vertices(g). + * @param weight Edge weight function: (const edge_t&) -> Distance. + * @param visitor Visitor for algorithm events (discover, examine, relax, finish). + * @param compare Distance comparison function: (Distance, Distance) -> bool. + * @param combine Distance combination function: (Distance, Weight) -> Distance. + * + * @return void. Results are stored in the distances and predecessor output parameters. + * + * **Complexity:** + * - Time: O((V + E) log V) using binary heap priority queue + * - Space: O(V) for priority queue and internal bookkeeping + * + * **Mandates:** + * - G must satisfy index_adjacency_list (integral vertex IDs) + * - Sources must be input_range with values convertible to vertex_id_t + * - Distances must be random_access_range with arithmetic value type + * - Predecessors must be random_access_range with values convertible from vertex_id_t + * - WF must satisfy basic_edge_weight_function + * + * **Preconditions:** + * - All source vertices must be valid: source < num_vertices(g) for vector-based containers + * - distances.size() >= num_vertices(g) + * - predecessor.size() >= num_vertices(g) (unless using _null_predecessors) + * - All edge weights must be non-negative + * - Weight function must not throw or modify graph state + * + * **Postconditions:** + * - distances[s] == 0 for all sources s + * - For reachable vertices v: distances[v] contains shortest distance from nearest source + * - For reachable vertices v: predecessor[v] contains predecessor in shortest path tree + * - For unreachable vertices v: distances[v] == numeric_limits::max() + * + * **Effects:** + * - Modifies distances: Sets distances[v] for all vertices v + * - Modifies predecessor: Sets predecessor[v] for all reachable vertices + * - Does not modify the graph g + * + * **Mandates:** + * - G must satisfy index_adjacency_list (integral vertex IDs) + * - Sources must be input_range with values convertible to vertex_id_t + * - Distances must be random_access_range with arithmetic value type + * - Predecessors must be random_access_range with values convertible from vertex_id_t + * - WF must satisfy basic_edge_weight_function + * + * **Exception Safety:** + * Basic guarantee. If an exception is thrown: + * - Graph g remains unchanged + * - distances and predecessor may be partially modified (indeterminate state) + * + * **Throws:** + * - std::out_of_range if a source vertex ID is out of range + * - std::out_of_range if distances or predecessor are undersized + * - std::out_of_range if a negative edge weight is encountered (for signed weight types) + * - std::logic_error if internal invariant violation detected + * + * **Remarks:** + * - Uses std::priority_queue with lazy deletion (vertices can be re-inserted) + * - For unweighted graphs, use default weight function (equivalent to BFS) + * - For single target, consider A* with admissible heuristic + * - Implementation based on Boost.Graph dijkstra_shortest_paths_no_init + */ +// Note on std::remove_reference_t: +// These templates declare G&& (forwarding reference), so for lvalue arguments G deduces as a +// reference type (e.g. vector<…>&). Writing "const G&" when G is already a reference triggers +// reference collapsing: const (vector<…>&) & → vector<…>& — the const is silently discarded. +// We use std::remove_reference_t in WF default types, invoke_result_t, and concept +// constraints so that "const std::remove_reference_t&" always means a true const ref. +// Default lambdas use "const auto&" instead of "const G&" to sidestep the issue entirely. +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires convertible_to, vertex_id_t> && // + is_arithmetic_v> && // + sized_range && // + sized_range && // + convertible_to, range_value_t> && + basic_edge_weight_function, Compare, Combine> +constexpr void dijkstra_shortest_paths( + G&& g, + const Sources& sources, + Distances& distances, + Predecessors& predecessor, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + using id_type = vertex_id_store_t; + static_assert(std::is_same_v>, + "vertex_id_store_t should equal vertex_id_t for index_adjacency_list"); + using distance_type = range_value_t; + using weight_type = invoke_result_t&, edge_t>; + + // relaxing the target is the function of reducing the distance from the source to the target + auto relax_target = [&g, &predecessor, &distances, &compare, &combine] // + (const edge_t& e, const vertex_id_t& uid, const weight_type& w_e) -> bool { + const id_type vid = target_id(g, e); + const distance_type d_u = distances[static_cast(uid)]; + const distance_type d_v = distances[static_cast(vid)]; + + if (compare(combine(d_u, w_e), d_v)) { + distances[static_cast(vid)] = combine(d_u, w_e); + if constexpr (!is_same_v) { + predecessor[static_cast(vid)] = uid; + } + return true; + } + return false; + }; + + if (size(distances) < num_vertices(g)) { + throw std::out_of_range( + std::format("dijkstra_shortest_paths: size of distances of {} is less than the number of vertices {}", + size(distances), num_vertices(g))); + } + if constexpr (!is_same_v) { + if (size(predecessor) < num_vertices(g)) { + throw std::out_of_range( + std::format("dijkstra_shortest_paths: size of predecessor of {} is less than the number of vertices {}", + size(predecessor), num_vertices(g))); + } + } + + constexpr auto zero = shortest_path_zero(); + constexpr auto infinite = shortest_path_infinite_distance(); + + const id_type N = static_cast(num_vertices(g)); + + auto qcompare = [&distances](id_type a, id_type b) { + return distances[static_cast(a)] > distances[static_cast(b)]; + }; + using Queue = std::priority_queue, decltype(qcompare)>; + Queue queue(qcompare); + + // (The optimizer removes this loop if on_initialize_vertex() is empty.) + if constexpr (has_on_initialize_vertex) { + for (id_type uid = 0; uid < N; ++uid) { + visitor.on_initialize_vertex(g, *find_vertex(g, uid)); + } + } else if constexpr (has_on_initialize_vertex_id) { + for (id_type uid = 0; uid < N; ++uid) { + visitor.on_initialize_vertex(g, uid); + } + } + + // Seed the queue with the initial vertice(s) + for (auto&& source : sources) { + if (source >= N || source < 0) { + throw std::out_of_range(std::format("dijkstra_shortest_paths: source vertex id '{}' is out of range", source)); + } + queue.push(source); + distances[static_cast(source)] = zero; // mark source as discovered + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, source)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, source); + } + } + + // Main loop to process the queue + while (!queue.empty()) { + const id_type uid = queue.top(); + queue.pop(); + if constexpr (has_on_examine_vertex) { + visitor.on_examine_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_examine_vertex_id) { + visitor.on_examine_vertex(g, uid); + } + + // Process all outgoing edges from the current vertex + for (auto&& [vid, uv, w] : views::incidence(g, *find_vertex(g, uid), weight)) { + if constexpr (has_on_examine_edge) { + visitor.on_examine_edge(g, uv); + } + + // Negative weights are not allowed for Dijkstra's algorithm + if constexpr (is_signed_v) { + if (w < zero) { + throw std::out_of_range( + std::format("dijkstra_shortest_paths: invalid negative edge weight of '{}' encountered", w)); + } + } + + const bool is_neighbor_undiscovered = (distances[static_cast(vid)] == infinite); + const bool was_edge_relaxed = relax_target(uv, uid, w); + + if (is_neighbor_undiscovered) { + // tree_edge + if (was_edge_relaxed) { + if constexpr (has_on_edge_relaxed) { + visitor.on_edge_relaxed(g, uv); + } + if constexpr (has_on_discover_vertex) { + visitor.on_discover_vertex(g, *find_vertex(g, vid)); + } else if constexpr (has_on_discover_vertex_id) { + visitor.on_discover_vertex(g, vid); + } + queue.push(vid); + } else { + // This is an indicator of a bug in the algorithm and should be investigated. + throw std::logic_error( + "dijkstra_shortest_paths: unexpected state where an edge to a new vertex was not relaxed"); + } + } else { + // non-tree edge + if (was_edge_relaxed) { + if constexpr (has_on_edge_relaxed) { + visitor.on_edge_relaxed(g, uv); + } + queue.push(vid); // re-enqueue vid to re-evaluate its neighbors with a shorter path + } else { + if constexpr (has_on_edge_not_relaxed) { + visitor.on_edge_not_relaxed(g, uv); + } + } + } + } + + // Note: while we *think* we're done with this vertex, we may not be. If the graph is unbalanced + // and another path to this vertex has a lower accumulated weight, we'll process it again. + // A consequence is that examine_vertex could be called twice (or more) on the same vertex. + if constexpr (has_on_finish_vertex) { + visitor.on_finish_vertex(g, *find_vertex(g, uid)); + } else if constexpr (has_on_finish_vertex_id) { + visitor.on_finish_vertex(g, uid); + } + } // while(!queue.empty()) +} + +/** + * @brief Single-source shortest paths using Dijkstra's algorithm. + * + * Convenience overload for single source vertex. See multi-source version for full documentation. + * + * @param source Single source vertex ID instead of range. + * + * @see dijkstra_shortest_paths(G&&, const Sources&, Distances&, Predecessors&, WF&&, Visitor&&, Compare&&, Combine&&) + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires is_arithmetic_v> && // + sized_range && // + sized_range && // + convertible_to, range_value_t> && + basic_edge_weight_function, Compare, Combine> +constexpr void dijkstra_shortest_paths( + G&& g, + const vertex_id_t& source, + Distances& distances, + Predecessors& predecessor, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + dijkstra_shortest_paths(g, subrange(&source, (&source + 1)), distances, predecessor, weight, + forward(visitor), forward(compare), forward(combine)); +} + +/** + * @brief Multi-source shortest distances using Dijkstra's algorithm (no predecessor tracking). + * + * Computes shortest distances without tracking predecessor information. More efficient when + * path reconstruction is not needed. + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept. + * @tparam Sources Input range of source vertex IDs. + * @tparam Distances Random access range for storing distances. Value type must be arithmetic. + * @tparam WF Edge weight function. Defaults to returning 1 for all edges (unweighted). + * @tparam Visitor Visitor type with callbacks for algorithm events. Defaults to empty_visitor. + * @tparam Compare Comparison function for distance values. Defaults to less<>. + * @tparam Combine Function to combine distances and weights. Defaults to plus<>. + * + * @param g The graph to process. + * @param sources Range of source vertex IDs to start from. + * @param distances [out] Shortest distances from sources. Must be sized >= num_vertices(g). + * @param weight Edge weight function: (const edge_t&) -> Distance. + * @param visitor Visitor for algorithm events (discover, examine, relax, finish). + * @param compare Distance comparison function: (Distance, Distance) -> bool. + * @param combine Distance combination function: (Distance, Weight) -> Distance. + * + * @return void. Results are stored in the distances output parameter. + * + * **Effects:** + * - Modifies distances: Sets distances[v] for all vertices v + * - Does not modify the graph g + * - Internally uses _null_predecessors to skip predecessor tracking + * + * @see dijkstra_shortest_paths() for full documentation and complexity analysis. + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires convertible_to, vertex_id_t> && // + sized_range && // + is_arithmetic_v> && // + basic_edge_weight_function, Compare, Combine> +constexpr void dijkstra_shortest_distances( + G&& g, + const Sources& sources, + Distances& distances, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + dijkstra_shortest_paths(g, sources, distances, _null_predecessors, forward(weight), forward(visitor), + forward(compare), forward(combine)); +} + +/** + * @brief Single-source shortest distances using Dijkstra's algorithm (no predecessor tracking). + * + * Convenience overload for single source vertex without predecessor tracking. + * + * @param source Single source vertex ID instead of range. + * + * @see dijkstra_shortest_distances(G&&, const Sources&, Distances&, WF&&, Visitor&&, Compare&&, Combine&&) + */ +template (const std::remove_reference_t&, const edge_t&)>, + class Visitor = empty_visitor, + class Compare = less>, + class Combine = plus>> +requires is_arithmetic_v> && // + sized_range && // + basic_edge_weight_function, Compare, Combine> +constexpr void dijkstra_shortest_distances( + G&& g, + const vertex_id_t& source, + Distances& distances, + WF&& weight = [](const auto&, + const edge_t& uv) { return range_value_t(1); }, // default weight(g, uv) -> 1 + Visitor&& visitor = empty_visitor(), + Compare&& compare = less>(), + Combine&& combine = plus>()) { + dijkstra_shortest_paths(g, subrange(&source, (&source + 1)), distances, _null_predecessors, forward(weight), + forward(visitor), forward(compare), forward(combine)); +} + +} // namespace graph + +#endif // GRAPH_DIJKSTRA_SHORTEST_PATHS_HPP diff --git a/include/graph/algorithm/index/jaccard.hpp b/include/graph/algorithm/index/jaccard.hpp new file mode 100644 index 0000000..518cf47 --- /dev/null +++ b/include/graph/algorithm/index/jaccard.hpp @@ -0,0 +1,181 @@ +/** + * @file jaccard.hpp + * + * @brief Jaccard Coefficient algorithm for graphs. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" +#include "graph/views/incidence.hpp" +#include "graph/views/vertexlist.hpp" + +#ifndef GRAPH_JACCARD_HPP +# define GRAPH_JACCARD_HPP + +# include +# include +# include + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::edge_t; +using adj_list::num_vertices; + +/** + * @ingroup graph_algorithms + * @brief Calculate the Jaccard coefficient for every edge in a graph. + * + * For each directed edge (u, v) in the graph, the Jaccard coefficient is: + * + * J(u,v) = |N(u) ∩ N(v)| / |N(u) ∪ N(v)| + * + * where N(x) is the open neighborhood of vertex x (the set of all vertices + * adjacent to x, excluding x itself). The coefficient lies in [0, 1] and + * measures the similarity of two vertices based on their shared neighbors. + * + * The callback `out` is invoked once per directed edge with the two endpoint + * IDs, a reference to the edge, and the computed coefficient. For an undirected + * graph stored bidirectionally, `out` is called for both (u,v) and (v,u). + * + * ## Complexity Analysis + * + * **Time Complexity:** O(V + E × d_min) where d_min is the minimum degree of + * the two endpoints per edge. Worst case O(|V|³) when the graph is dense. + * The precomputation of neighbor sets costs O(V + E). + * + * **Space Complexity:** O(V + E) for the precomputed neighbor sets. + * + * ## Supported Graph Properties + * + * ### Directedness + * - ✅ Directed graphs + * - ✅ Undirected graphs (stored bidirectionally — callback fires for both directions) + * + * ### Edge Properties + * - ✅ Unweighted edges + * - ✅ Weighted edges (weights ignored) + * - ✅ Multi-edges (deduplicated into neighbor sets; callers should prefer simple graphs) + * - ❌ Self-loops (skipped — do not affect Jaccard computation) + * + * ### Graph Structure + * - ✅ Connected graphs + * - ✅ Disconnected graphs (processes all components; isolated vertices produce no callbacks) + * - ✅ Empty graphs (returns immediately) + * + * ### Container Requirements + * - Requires: `index_adjacency_list` concept (contiguous vertex IDs) + * - Works with: All `dynamic_graph` container combinations with contiguous IDs + * + * @tparam G The graph type. Must satisfy index_adjacency_list concept. + * @tparam OutOp Callback invoked as `out(uid, vid, uv, val)` for each directed edge. + * @tparam T Floating-point type for the coefficient (default: double). + * + * @param g The graph. + * @param out Callback receiving (vertex_id_t uid, vertex_id_t vid, + * edge_t& uv, T val) for every directed edge. + * + * @pre g must have contiguous vertex IDs [0, num_vertices(g)). + * @pre For undirected semantics, each edge {u,v} must be stored as both (u,v) and (v,u). + * + * @post `out` is called exactly once per directed edge in the graph. + * @post All reported coefficient values lie in [0.0, 1.0]. + * @post The graph g is not modified. + * + * **Exception Safety:** Basic exception safety. May throw std::bad_alloc if internal + * container allocation fails. The graph g remains unchanged; `out` may have been + * partially invoked. + * + * @note T = double is the recommended default. Using integral types will truncate + * results to 0 or 1. + * + * ## Example Usage + * + * ```cpp + * #include + * #include + * #include + * + * using namespace graph; + * + * int main() { + * using Graph = container::dynamic_graph>; + * + * // Triangle: 0-1-2 (bidirectional) + * Graph g({{0,1},{1,0},{1,2},{2,1},{0,2},{2,0}}); + * + * jaccard_coefficient(g, [](auto uid, auto vid, auto& uv, double val) { + * std::cout << uid << " - " << vid << " : " << val << "\n"; + * }); + * // Each edge prints J ≈ 0.333 (1 shared neighbor out of 3 total) + * } + * ``` + */ +template +requires std::invocable, vertex_id_t, edge_t&, T> +void jaccard_coefficient(G&& g, OutOp out) { + using vid_t = vertex_id_t; + + const size_t N = num_vertices(g); + if (N == 0) { + return; + } + + // ============================================================================ + // Phase 1: Build neighbor sets for every vertex (self-loops excluded) + // ============================================================================ + std::vector> nbrs(N); + + for (auto [uid] : views::basic_vertexlist(g)) { + for (auto [tid] : views::basic_incidence(g, uid)) { + if (tid != uid) { // skip self-loops + nbrs[uid].insert(tid); + } + } + } + + // ============================================================================ + // Phase 2: For every directed edge, compute and report the Jaccard coefficient + // ============================================================================ + for (auto [uid] : views::basic_vertexlist(g)) { + for (auto&& [vid, uv] : views::incidence(g, uid)) { + // Skip self-loops + if (vid == uid) { + continue; + } + + // Compute |N(u) ∩ N(v)| by iterating the smaller set and probing the larger + const auto& set_a = (nbrs[uid].size() <= nbrs[vid].size()) ? nbrs[uid] : nbrs[vid]; + const auto& set_b = (nbrs[uid].size() <= nbrs[vid].size()) ? nbrs[vid] : nbrs[uid]; + + size_t intersect_size = 0; + for (auto x : set_a) { + if (set_b.count(x)) { + ++intersect_size; + } + } + + // |N(u) ∪ N(v)| = |N(u)| + |N(v)| - |N(u) ∩ N(v)| + size_t union_size = nbrs[uid].size() + nbrs[vid].size() - intersect_size; + + T val = (union_size == 0) ? T{0} + : static_cast(intersect_size) / static_cast(union_size); + + out(uid, vid, uv, val); + } + } +} + +} // namespace graph + +#endif // GRAPH_JACCARD_HPP diff --git a/include/graph/algorithm/index/label_propagation.hpp b/include/graph/algorithm/index/label_propagation.hpp new file mode 100644 index 0000000..168c35a --- /dev/null +++ b/include/graph/algorithm/index/label_propagation.hpp @@ -0,0 +1,282 @@ +/** + * @file label_propagation.hpp + * + * @brief Label Propagation algorithm for community detection in graphs. + * + * @copyright Copyright (c) 2024 + * + * SPDX-License-Identifier: BSL-1.0 + * + * @authors + * Andrew Lumsdaine + * Phil Ratzloff + */ + +#include "graph/graph.hpp" + +#ifndef GRAPH_LABEL_PROPAGATION_HPP +# define GRAPH_LABEL_PROPAGATION_HPP + +# include +# include +# include +# include +# include +# include +# include + +namespace graph { + +// Using declarations for new namespace structure +using adj_list::index_adjacency_list; +using adj_list::vertex_id_t; +using adj_list::vertices; +using adj_list::edges; +using adj_list::target_id; +using adj_list::vertex_id; +using adj_list::num_vertices; + +/** + * @ingroup graph_algorithms + * @brief Propagate vertex labels by majority voting among neighbours. + * + * Each iteration shuffles the vertex processing order, then sets every vertex's label + * to the most popular label among its neighbours. Ties are broken randomly using the + * supplied random-number generator. The algorithm iterates until no label changes + * (convergence) or until @p max_iters iterations have been performed. + * + * ## Complexity Analysis + * + * **Time Complexity:** O(M) per iteration, where M = |E|. The number of iterations + * required for convergence is typically small relative to graph size. + * + * **Space Complexity:** O(V) for the shuffled vertex-ID vector and frequency map. + * + * ## Supported Graph Properties + * + * ### Directedness + * - ✅ Directed graphs + * + * ### Edge Properties + * - ✅ Unweighted edges + * - ✅ Weighted edges (weights ignored) + * - ✅ Multi-edges (all edges counted in tally) + * - ✅ Self-loops (counted in tally) + * - ✅ Cycles + * + * ### Container Requirements + * - Requires: `index_adjacency_list` concept (contiguous vertex IDs) + * - Requires: `std::ranges::random_access_range