diff --git a/AGENTS.md b/AGENTS.md index b6088f6..7620acb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,6 +24,9 @@ pathrex/ │ │ ├── mod.rs # Core traits (GraphBuilder, GraphDecomposition, GraphSource, │ │ │ # Backend, Graph), error types, RAII wrappers, GrB init │ │ └── inmemory.rs # InMemory marker, InMemoryBuilder, InMemoryGraph +│ ├── rpq/ +│ │ ├── mod.rs # RpqEvaluator (assoc. Result), RpqQuery, Endpoint, PathExpr, RpqError +│ │ └── rpqmatrix.rs # Matrix-plan RPQ evaluator │ ├── sparql/ │ │ └── mod.rs # parse_rpq / extract_rpq → RpqQuery (spargebra) │ └── formats/ @@ -32,7 +35,8 @@ pathrex/ │ └── mm.rs # MatrixMarket directory loader (vertices.txt, edges.txt, *.txt) ├── tests/ │ ├── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph -│ └── mm_tests.rs # Integration tests for MatrixMarket format +│ ├── mm_tests.rs # Integration tests for MatrixMarket format +│ └── rpqmatrix_tests.rs # Integration tests for matrix-plan RPQ evaluator ├── deps/ │ └── LAGraph/ # Git submodule (SparseLinearAlgebra/LAGraph) └── .github/workflows/ci.yml # CI: build GraphBLAS + LAGraph, cargo build & test @@ -274,6 +278,42 @@ and the parsed query contains full IRIs sharing a common prefix. The module handles spargebra's desugaring of sequence paths (`?x // ?y`) from a chain of BGP triples back into a single path expression. +### RPQ evaluation (`src/rpq/`) + +The [`rpq`](src/rpq/mod.rs) module provides an abstraction for evaluating +Regular Path Queries (RPQs) over edge-labeled graphs using GraphBLAS/LAGraph. + +Key public items: + +- [`Endpoint`](src/rpq/mod.rs) — `Variable(String)` or `Named(String)` (IRI string). +- [`PathExpr`](src/rpq/mod.rs) — `Label`, `Sequence`, `Alternative`, `ZeroOrMore`, + `OneOrMore`, `ZeroOrOne`. +- [`RpqQuery`](src/rpq/mod.rs) — `{ subject, path, object }` using the types above; + [`strip_base(&mut self, base)`](src/rpq/mod.rs) removes a shared IRI prefix from + named endpoints and labels. +- [`RpqEvaluator`](src/rpq/mod.rs) — trait with associated type `Result` and + [`evaluate(query, graph)`](src/rpq/mod.rs) taking `&RpqQuery` and + [`GraphDecomposition`], returning `Result`. + Each concrete evaluator exposes its own output type (see below). +- [`RpqError`](src/rpq/mod.rs) — unified error type for RPQ parsing and evaluation: + `Parse` (SPARQL syntax), `Extract` (query extraction), `UnsupportedPath`, + `VertexNotFound`, and `Graph` (wraps [`GraphError`](src/graph/mod.rs) for + label-not-found and GraphBLAS/LAGraph failures). + +[`NfaRpqResult`](src/rpq/nfarpq.rs) wraps a [`GraphblasVector`] of reachable **target** +vertices. When the subject is a variable, every vertex is used as a source and +`LAGraph_RegularPathQuery` returns the union of targets — individual `(source, target)` +pairs are not preserved. + +#### `RpqMatrixEvaluator` (`src/rpq/rpqmatrix.rs`) + +[`RpqMatrixEvaluator`](src/rpq/rpqmatrix.rs) compiles [`PathExpr`] into a Boolean matrix plan +over label adjacency matrices and runs [`LAGraph_RPQMatrix`]. It returns +[`RpqMatrixResult`](src/rpq/rpqmatrix.rs): the path-relation `nnz` plus a +[`GraphblasMatrix`] duplicate of the result matrix (full reachability relation for the path). +Subject/object do not filter the matrix; a named subject is only validated to exist. +Bound objects are not supported yet ([`RpqError::UnsupportedPath`]). + ### FFI layer [`lagraph_sys`](src/lagraph_sys.rs) exposes raw C bindings for GraphBLAS and diff --git a/Cargo.toml b/Cargo.toml index 2420d28..9f24b40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] csv = "1.4.0" +egg = "0.10.0" libc = "0.2" oxrdf = "0.3.3" oxttl = "0.2.3" diff --git a/build.rs b/build.rs index 475046e..9b12997 100644 --- a/build.rs +++ b/build.rs @@ -66,6 +66,7 @@ fn regenerate_bindings() { .allowlist_function("GrB_Matrix_new") .allowlist_function("GrB_Matrix_nvals") .allowlist_function("GrB_Matrix_free") + .allowlist_function("GrB_Matrix_extractElement_BOOL") .allowlist_function("GrB_Matrix_build_BOOL") .allowlist_function("GrB_Vector_new") .allowlist_function("GrB_Vector_free") @@ -74,6 +75,8 @@ fn regenerate_bindings() { .allowlist_function("GrB_Vector_extractTuples_BOOL") .allowlist_function("GrB_vxm") .allowlist_item("LAGRAPH_MSG_LEN") + .allowlist_item("RPQMatrixOp") + .allowlist_type("RPQMatrixPlan") .allowlist_type("LAGraph_Graph") .allowlist_type("LAGraph_Kind") .allowlist_function("LAGraph_CheckGraph") @@ -83,6 +86,11 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_Delete") .allowlist_function("LAGraph_Cached_AT") .allowlist_function("LAGraph_MMRead") + .allowlist_function("LAGraph_RPQMatrix") + .allowlist_function("LAGraph_DestroyRpqMatrixPlan") + .allowlist_function("LAGraph_RPQMatrix_label") + .allowlist_function("LAGraph_RPQMatrix_Free") + .allowlist_function("LAGraph_RegularPathQuery") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/mod.rs b/src/graph/mod.rs index fda7928..514cbb0 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -125,6 +125,7 @@ impl Drop for LagraphGraph { unsafe impl Send for LagraphGraph {} unsafe impl Sync for LagraphGraph {} +#[derive(Debug)] pub struct GraphblasVector { pub inner: GrB_Vector, } diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 3201d28..c1a6193 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -155,6 +155,9 @@ unsafe extern "C" { ncols: GrB_Index, ) -> GrB_Info; } +unsafe extern "C" { + pub fn GrB_Matrix_dup(C: *mut GrB_Matrix, A: GrB_Matrix) -> GrB_Info; +} unsafe extern "C" { pub fn GrB_Matrix_nvals(nvals: *mut GrB_Index, A: GrB_Matrix) -> GrB_Info; } @@ -168,6 +171,14 @@ unsafe extern "C" { dup: GrB_BinaryOp, ) -> GrB_Info; } +unsafe extern "C" { + pub fn GrB_Matrix_extractElement_BOOL( + x: *mut bool, + A: GrB_Matrix, + i: GrB_Index, + j: GrB_Index, + ) -> GrB_Info; +} unsafe extern "C" { pub fn GrB_vxm( w: GrB_Vector, @@ -261,3 +272,43 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum RPQMatrixOp { + RPQ_MATRIX_OP_LABEL = 0, + RPQ_MATRIX_OP_LOR = 1, + RPQ_MATRIX_OP_CONCAT = 2, + RPQ_MATRIX_OP_KLEENE = 3, + RPQ_MATRIX_OP_KLEENE_L = 4, + RPQ_MATRIX_OP_KLEENE_R = 5, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct RPQMatrixPlan { + pub op: RPQMatrixOp, + pub lhs: *mut RPQMatrixPlan, + pub rhs: *mut RPQMatrixPlan, + pub mat: GrB_Matrix, + pub res_mat: GrB_Matrix, +} +unsafe extern "C" { + pub fn LAGraph_RPQMatrix( + nnz: *mut GrB_Index, + plan: *mut RPQMatrixPlan, + msg: *mut ::std::os::raw::c_char, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn LAGraph_RPQMatrix_label( + mat: *mut GrB_Matrix, + x: GrB_Index, + i: GrB_Index, + j: GrB_Index, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn LAGraph_DestroyRpqMatrixPlan(plan: *mut RPQMatrixPlan) -> GrB_Info; +} +unsafe extern "C" { + pub fn LAGraph_RPQMatrix_Free(mat: *mut GrB_Matrix) -> GrB_Info; +} diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs index 6a8a546..c2db0d1 100644 --- a/src/rpq/mod.rs +++ b/src/rpq/mod.rs @@ -10,6 +10,8 @@ //! let result: NfaRpqResult = NfaRpqEvaluator.evaluate(&query, &graph)?; //! ``` +pub mod rpqmatrix; + use crate::graph::{GraphDecomposition, GraphError}; use crate::sparql::ExtractError; use spargebra::SparqlSyntaxError; diff --git a/src/rpq/rpqmatrix.rs b/src/rpq/rpqmatrix.rs new file mode 100644 index 0000000..8587674 --- /dev/null +++ b/src/rpq/rpqmatrix.rs @@ -0,0 +1,248 @@ +//! Plan-based RPQ evaluation using `LAGraph_RPQMatrix`. + +use std::ptr::null_mut; + +use egg::{Id, RecExpr, define_language}; + +use crate::graph::{GraphDecomposition, GraphblasMatrix, ensure_grb_init}; +use crate::lagraph_sys::*; +use crate::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use crate::{grb_ok, la_ok}; + +define_language! { + pub enum RpqPlan { + Label(String), + NamedVertex(String), + "/" = Seq([Id; 2]), + "|" = Alt([Id; 2]), + "*" = Star([Id; 1]), + } +} + +fn to_expr_aux(path: &PathExpr, expr: &mut RecExpr) -> Result { + match path { + PathExpr::Label(label) => Ok(expr.add(RpqPlan::Label(label.clone()))), + + PathExpr::Sequence(lhs, rhs) => { + let l = to_expr_aux(lhs, expr)?; + let r = to_expr_aux(rhs, expr)?; + Ok(expr.add(RpqPlan::Seq([l, r]))) + } + + PathExpr::Alternative(lhs, rhs) => { + let l = to_expr_aux(lhs, expr)?; + let r = to_expr_aux(rhs, expr)?; + Ok(expr.add(RpqPlan::Alt([l, r]))) + } + + PathExpr::ZeroOrMore(inner) => { + let i = to_expr_aux(inner, expr)?; + Ok(expr.add(RpqPlan::Star([i]))) + } + + PathExpr::OneOrMore(inner) => { + let e = to_expr_aux(inner, expr)?; + let s = expr.add(RpqPlan::Star([e])); + Ok(expr.add(RpqPlan::Seq([e, s]))) + } + + PathExpr::ZeroOrOne(_) => Err(RpqError::UnsupportedPath( + "ZeroOrOne (?) is not supported by RPQMatrix".into(), + )), + } +} + +/// Compile a [`RpqQuery`] into +/// [`RecExpr`]. +pub fn query_to_expr(query: &RpqQuery) -> Result, RpqError> { + let mut expr = RecExpr::default(); + let path_root = to_expr_aux(&query.path, &mut expr)?; + + let _root = match (&query.subject, &query.object) { + (Endpoint::Variable(_), Endpoint::Variable(_)) => path_root, + (Endpoint::Named(name), Endpoint::Variable(_)) => { + let diag = expr.add(RpqPlan::NamedVertex(name.clone())); + expr.add(RpqPlan::Seq([diag, path_root])) + } + (Endpoint::Variable(_), Endpoint::Named(name)) => { + let diag = expr.add(RpqPlan::NamedVertex(name.clone())); + expr.add(RpqPlan::Seq([path_root, diag])) + } + (Endpoint::Named(sub), Endpoint::Named(obj)) => { + let diag_sub = expr.add(RpqPlan::NamedVertex(sub.clone())); + let seq1 = expr.add(RpqPlan::Seq([diag_sub, path_root])); + let diag_obj = expr.add(RpqPlan::NamedVertex(obj.clone())); + expr.add(RpqPlan::Seq([seq1, diag_obj])) + } + }; + + Ok(expr) +} + +/// Convert a [`RecExpr`] into the flat [`RPQMatrixPlan`] array that +/// `LAGraph_RPQMatrix` expects. +/// +/// Returns the plan array and a list of owned diagonal matrices that must be +/// freed after evaluation. +pub fn materialize( + expr: &RecExpr, + graph: &G, +) -> Result<(Vec, Vec), RpqError> { + let null_plan = RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat: null_mut(), + res_mat: null_mut(), + }; + let mut plans = vec![null_plan; expr.len()]; + let mut owned_matrices: Vec = Vec::new(); + let n = graph.num_nodes() as GrB_Index; + + for (id, node) in expr.as_ref().iter().enumerate() { + plans[id] = match node { + RpqPlan::Label(label) => { + let lg = graph.get_graph(label)?; + let mat = unsafe { (*lg.inner).A }; + RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat, + res_mat: null_mut(), + } + } + + RpqPlan::NamedVertex(name) => { + let vertex_id = graph + .get_node_id(name) + .ok_or_else(|| RpqError::VertexNotFound(name.clone()))? + as GrB_Index; + let mut mat: GrB_Matrix = null_mut(); + grb_ok!(LAGraph_RPQMatrix_label(&mut mat, vertex_id, n, n,))?; + if mat.is_null() { + return Err(RpqError::Graph(crate::graph::GraphError::GraphBlas( + GrB_Info::GrB_INVALID_VALUE, + ))); + } + owned_matrices.push(mat); + RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat, + res_mat: null_mut(), + } + } + + RpqPlan::Seq([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_CONCAT, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Alt([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LOR, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Star([i]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE, + lhs: null_mut(), + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*i)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + }; + } + + Ok((plans, owned_matrices)) +} + +/// Output of [`RpqMatrixEvaluator`]: full path relation matrix and its nnz. +#[derive(Debug)] +pub struct RpqMatrixResult { + pub nnz: u64, + pub matrix: GraphblasMatrix, +} + +/// RPQ evaluator backed by `LAGraph_RPQMatrix`. +pub struct RpqMatrixEvaluator; + +impl RpqEvaluator for RpqMatrixEvaluator { + type Result = RpqMatrixResult; + + fn evaluate( + &self, + query: &RpqQuery, + graph: &G, + ) -> Result { + ensure_grb_init()?; + + let expr = query_to_expr(query)?; + let (mut plans, owned_matrices) = materialize(&expr, graph)?; + + let root_ptr = unsafe { plans.as_mut_ptr().add(plans.len() - 1) }; + + let mut nnz: GrB_Index = 0; + la_ok!(LAGraph_RPQMatrix(&mut nnz, root_ptr))?; + + let matrix = unsafe { + let mat = (*root_ptr).res_mat; + (*root_ptr).res_mat = null_mut(); + GraphblasMatrix { inner: mat } + }; + + grb_ok!(LAGraph_DestroyRpqMatrixPlan(root_ptr))?; + + // Free diagonal matrices created for named vertices. + for mut mat in owned_matrices { + unsafe { + LAGraph_RPQMatrix_Free(&mut mat); + } + } + + Ok(RpqMatrixResult { + nnz: nnz as u64, + matrix, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::rpq::{Endpoint, PathExpr, RpqQuery}; + use crate::utils::build_graph; + + #[test] + fn evaluate_single_edge_nnz() { + let graph = build_graph(&[("A", "B", "p")]); + let q = RpqQuery { + subject: Endpoint::Variable("x".into()), + path: PathExpr::Label("p".into()), + object: Endpoint::Variable("y".into()), + }; + let result = RpqMatrixEvaluator.evaluate(&q, &graph).expect("evaluate"); + assert_eq!(result.nnz, 1); + } + + #[test] + fn evaluate_named_subject_no_match_nnz() { + // Graph: A --p--> B + // Query: p ?y -> C has no outgoing p edges, nnz=0 + let graph = build_graph(&[("A", "B", "p"), ("C", "D", "q")]); + let q = RpqQuery { + subject: Endpoint::Named("C".into()), + path: PathExpr::Label("p".into()), + object: Endpoint::Variable("y".into()), + }; + let result = RpqMatrixEvaluator.evaluate(&q, &graph).expect("evaluate"); + assert_eq!(result.nnz, 0, "C has no outgoing p edges"); + } +} diff --git a/tests/rpqmatrix_tests.rs b/tests/rpqmatrix_tests.rs new file mode 100644 index 0000000..353c727 --- /dev/null +++ b/tests/rpqmatrix_tests.rs @@ -0,0 +1,466 @@ +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::Path; +use std::sync::LazyLock; + +use pathrex::formats::mm::MatrixMarket; +use pathrex::graph::{Graph, GraphDecomposition, GraphError, InMemory, InMemoryGraph}; +use pathrex::lagraph_sys::{GrB_Index, GrB_Info, GrB_Matrix_extractElement_BOOL}; +use pathrex::rpq::rpqmatrix::{RpqMatrixEvaluator, RpqMatrixResult}; +use pathrex::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use pathrex::sparql::parse_rpq; +use pathrex::utils::build_graph; + +const GRAPH_DIR: &str = "tests/testdata/mm_graph"; +const CASES_DIR: &str = "tests/testdata/cases"; +const BASE_IRI: &str = "http://example.org/"; + +static LA_N_EGG_GRAPH: LazyLock = LazyLock::new(|| { + let mm = MatrixMarket::from_dir(GRAPH_DIR).with_base_iri(BASE_IRI); + Graph::::try_from(mm).expect("Failed to load la-n-egg-rpq graph") +}); + +fn convert_query_line(line: &str) -> RpqQuery { + let query_str = line + .splitn(2, ',') + .nth(1) + .unwrap_or_else(|| panic!("query line has no comma: {line:?}")) + .trim(); + + let sparql = format!("BASE <{BASE_IRI}> SELECT * WHERE {{ {query_str} . }}"); + + let query = + parse_rpq(&sparql).unwrap_or_else(|e| panic!("failed to parse query {line:?}: {e}")); + query +} + +fn load_queries(case_dir: &Path) -> Vec { + let path = case_dir.join("queries.txt"); + let reader = BufReader::new( + File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {e}", path.display())), + ); + reader + .lines() + .map(|l| l.expect("I/O error reading queries.txt")) + .filter(|l| !l.trim().is_empty()) + .map(|l| convert_query_line(&l)) + .collect() +} + +fn load_expected_nnz(case_dir: &Path) -> Vec { + let path = case_dir.join("expected.txt"); + let reader = BufReader::new( + File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {e}", path.display())), + ); + reader + .lines() + .map(|l| l.expect("I/O error reading expected.txt")) + .filter(|l| !l.trim().is_empty()) + .map(|l| { + // Format: ";;" + let mut parts = l.splitn(3, ';'); + let _id = parts.next().expect("missing id field"); + parts + .next() + .expect("missing nnz field") + .parse::() + .unwrap_or_else(|e| panic!("bad nnz in {l:?}: {e}")) + }) + .collect() +} + +fn run_la_n_egg_case(case_name: &str) { + let case_dir = Path::new(CASES_DIR).join(case_name); + let queries = load_queries(&case_dir); + let expected = load_expected_nnz(&case_dir); + + assert_eq!( + queries.len(), + expected.len(), + "case '{case_name}': queries.txt and expected.txt have different line counts" + ); + + let graph = &*LA_N_EGG_GRAPH; + let evaluator = RpqMatrixEvaluator; + + for (i, (query, expected_nnz)) in queries.iter().zip(expected.iter()).enumerate() { + let result = evaluator.evaluate(query, graph).unwrap_or_else(|e| { + panic!("case '{case_name}' query #{i} evaluation failed: {e}\n query: {query:?}") + }); + + assert_eq!( + result.nnz, + *expected_nnz, + "case '{case_name}' query #{i} nnz mismatch\n query: {query:?}\n expected: {expected_nnz}\n actual: {nnz}", + nnz = result.nnz, + ); + } +} + +fn label(s: &str) -> PathExpr { + PathExpr::Label(s.to_string()) +} + +fn var(name: &str) -> Endpoint { + Endpoint::Variable(name.to_string()) +} + +fn named_ep(s: &str) -> Endpoint { + Endpoint::Named(s.to_string()) +} + +fn rq(subject: Endpoint, path: PathExpr, object: Endpoint) -> RpqQuery { + RpqQuery { + subject, + path, + object, + } +} + +fn matrix_entry_set(result: &RpqMatrixResult, row: GrB_Index, col: GrB_Index) -> bool { + unsafe { + let mut x = false; + let info = GrB_Matrix_extractElement_BOOL(&mut x, result.matrix.inner, row, col); + info == GrB_Info::GrB_SUCCESS && x + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&rq(var("x"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y → only A→B, nnz=1 +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&rq(named_ep("A"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 1, "only A→B should be in result"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let b_id = graph.get_node_id("B").expect("B should exist") as GrB_Index; + assert!( + matrix_entry_set(&result, a_id, b_id), + "B should be reachable from A via 'knows'" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(var("x"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y → only A→C, nnz=1 +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 1, "only A→C should be in result"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let c_id = graph.get_node_id("C").expect("C should exist") as GrB_Index; + assert!( + matrix_entry_set(&result, a_id, c_id), + "C should be reachable from A via knows/likes" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: | ?y → A→B and A→C, nnz=2 +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::Alternative(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 2, "A→B and A→C should be in result"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let b_id = graph.get_node_id("B").expect("B should exist") as GrB_Index; + let c_id = graph.get_node_id("C").expect("C should exist") as GrB_Index; + assert!( + matrix_entry_set(&result, a_id, b_id), + "B should be reachable via knows|likes" + ); + assert!( + matrix_entry_set(&result, a_id, c_id), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y → A→A, A→B, A→C, nnz=3 +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 3, "A, B, C all reachable from A via knows*"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let b_id = graph.get_node_id("B").expect("B should exist") as GrB_Index; + let c_id = graph.get_node_id("C").expect("C should exist") as GrB_Index; + + assert!( + matrix_entry_set(&result, a_id, a_id), + "A should be reachable (zero hops)" + ); + assert!( + matrix_entry_set(&result, a_id, b_id), + "B should be reachable (one hop)" + ); + assert!( + matrix_entry_set(&result, a_id, c_id), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y → A→B, A→C (not A→A), nnz=2 +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::OneOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 2, "B and C reachable from A via knows+"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let b_id = graph.get_node_id("B").expect("B should exist") as GrB_Index; + let c_id = graph.get_node_id("C").expect("C should exist") as GrB_Index; + + assert!( + !matrix_entry_set(&result, a_id, a_id), + "A shouldn't be reachable (non-zero length)" + ); + assert!( + matrix_entry_set(&result, a_id, b_id), + "B should be reachable (one hop)" + ); + assert!( + matrix_entry_set(&result, a_id, c_id), + "C should be reachable (two hops)" + ); +} + +#[test] +fn test_zero_or_one_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::ZeroOrOne(Box::new(label("knows"))); + let result = evaluator.evaluate(&rq(var("x"), path, var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath for ZeroOrOne, got: {result:?}" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator.evaluate(&rq(var("x"), label("nonexistent"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator.evaluate(&rq(named_ep("Z"), label("knows"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +/// Graph: A --knows--> B, C --knows--> D +/// Query: ?x → only A→B, nnz=1 +#[test] +fn test_bound_object() { + let graph = build_graph(&[("A", "B", "knows"), ("C", "D", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&rq(var("x"), label("knows"), named_ep("B")), &graph) + .expect("bound object should be supported"); + + assert_eq!(result.nnz, 1, "only A→B should be in result"); +} + +/// Graph: A --knows--> B, C --knows--> D +/// Query: → nnz=1 +#[test] +fn test_bound_subject_and_object() { + let graph = build_graph(&[("A", "B", "knows"), ("C", "D", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&rq(named_ep("A"), label("knows"), named_ep("B")), &graph) + .expect("bound subject+object should be supported"); + + assert_eq!(result.nnz, 1, "only A→B should be in result"); +} + +#[test] +fn test_negated_property_set_rejected_by_sparql_conversion() { + let sparql = "BASE SELECT ?x ?y WHERE { ?x !() ?y . }"; + let r = pathrex::sparql::parse_rpq(sparql); + assert!(matches!(r, Err(RpqError::UnsupportedPath(_)))); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y → all 3 nodes reachable, nnz=3 +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!( + result.nnz, 3, + "all 3 nodes should be reachable from A in a cycle" + ); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let targets = { + let n = graph.num_nodes(); + let mut out = Vec::new(); + for j in 0..n as GrB_Index { + if matrix_entry_set(&result, a_id, j) { + out.push(j); + } + } + out + }; + assert_eq!( + targets.len(), + 3, + "all 3 nodes should be reachable from A in a cycle" + ); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: /*/ ?y → A→D, nnz=1 +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = RpqMatrixEvaluator; + + // knows / likes* / knows + let path = PathExpr::Sequence( + Box::new(PathExpr::Sequence( + Box::new(label("knows")), + Box::new(PathExpr::ZeroOrMore(Box::new(label("likes")))), + )), + Box::new(label("knows")), + ); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + assert_eq!(result.nnz, 1, "only A→D should be in result"); + let a_id = graph.get_node_id("A").expect("A should exist") as GrB_Index; + let d_id = graph.get_node_id("D").expect("D should exist") as GrB_Index; + assert!( + matrix_entry_set(&result, a_id, d_id), + "D should be reachable via knows/likes*/knows" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator.evaluate(&rq(var("x"), path, var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +} + +#[test] +fn test_la_n_egg_any_any() { + run_la_n_egg_case("any-any"); +} + +#[test] +fn test_la_n_egg_any_con() { + run_la_n_egg_case("any-con"); +} + +#[test] +fn test_la_n_egg_con_any() { + run_la_n_egg_case("con-any"); +} diff --git a/tests/testdata/cases/any-any/expected.txt b/tests/testdata/cases/any-any/expected.txt new file mode 100644 index 0000000..9cfb38b --- /dev/null +++ b/tests/testdata/cases/any-any/expected.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd430ce12e3720c7ab8c2a86a45f0c2e62bcba3a0f4556941df10c5cf42de9ac +size 153 diff --git a/tests/testdata/cases/any-any/queries.txt b/tests/testdata/cases/any-any/queries.txt new file mode 100644 index 0000000..40b75f6 --- /dev/null +++ b/tests/testdata/cases/any-any/queries.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf443a9d86806f615c5f80bfd5abcd3e876b0585236830e25e79e8facc08730 +size 483 diff --git a/tests/testdata/cases/any-con/expected.txt b/tests/testdata/cases/any-con/expected.txt new file mode 100644 index 0000000..4f84684 --- /dev/null +++ b/tests/testdata/cases/any-con/expected.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44441db091960cdc7549a6f1c204dc3da93879f7f2325b12df575a46d38ae6b +size 207 diff --git a/tests/testdata/cases/any-con/queries.txt b/tests/testdata/cases/any-con/queries.txt new file mode 100644 index 0000000..d16ebd8 --- /dev/null +++ b/tests/testdata/cases/any-con/queries.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05d3be471b0ba767b1d0257690a34fbbe81c7ef6bfef51c5077447ecfcc1ada +size 1129 diff --git a/tests/testdata/cases/con-any/expected.txt b/tests/testdata/cases/con-any/expected.txt new file mode 100644 index 0000000..4e0b9bc --- /dev/null +++ b/tests/testdata/cases/con-any/expected.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d739c90dca840fea5a023c838cea2246c1ea2275efaeb11c59c04c6a323ef7c +size 219 diff --git a/tests/testdata/cases/con-any/queries.txt b/tests/testdata/cases/con-any/queries.txt new file mode 100644 index 0000000..abb74cb --- /dev/null +++ b/tests/testdata/cases/con-any/queries.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6742818b050bdcb7f5f75d5b43267017456163bfe548302e942e2266740e5e8e +size 1057