Skip to main content

gram_codec/
lib.rs

1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//!     let output = to_gram_pattern(pattern)?;
28//!     println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42pub mod json;
43mod serializer;
44pub mod standard_graph;
45mod value;
46
47// TODO: Temporarily commented out during migration to nom parser
48// Old tree-sitter parser (will be replaced)
49// mod parser;
50// pub(crate) mod transform;
51
52// New nom-based parser module (under development)
53mod parser;
54
55#[cfg(feature = "cst")]
56pub mod cst;
57
58// Optional platform-specific modules
59#[cfg(feature = "wasm")]
60mod wasm;
61
62#[cfg(feature = "python")]
63mod python;
64
65// Public API exports
66pub use ast::{AstPattern, AstSubject, ParseWithHeaderResult};
67pub use error::{Location, SerializeError};
68pub use json::{gram_parse_to_json, gram_stringify_from_json, gram_validate_to_json};
69// Use the new nom-based ParseError from the parser module
70#[cfg(feature = "cst")]
71pub use cst::{lower, parse_gram_cst, CstParseResult};
72pub use parser::ParseError;
73pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
74pub use value::Value;
75
76// Re-export Pattern and Subject from pattern-core for convenience
77pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
78
79// --- New nom-based parser API ---
80
81/// Parse gram notation text into a collection of Pattern structures.
82///
83/// This is the foundational parser for gram notation. It returns all top-level elements,
84/// including any leading record (which appears as a bare pattern with properties but
85/// no identity, labels, or elements).
86///
87/// # Arguments
88///
89/// * `input` - Gram notation text to parse
90///
91/// # Returns
92///
93/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
94/// * `Err(ParseError)` - Parse error with location information
95pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
96    // Handle empty/whitespace-only input
97    if input.trim().is_empty() {
98        return Ok(vec![]);
99    }
100
101    // Parse using nom parser
102    match parser::gram_patterns(input) {
103        Ok((remaining, patterns)) => {
104            // Check if all input was consumed
105            if !remaining.trim().is_empty() {
106                let offset = input.len() - remaining.len();
107                let location = parser::Location::from_offset(input, offset);
108                return Err(ParseError::UnexpectedInput {
109                    location,
110                    snippet: remaining.chars().take(20).collect(),
111                });
112            }
113            Ok(patterns)
114        }
115        Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
116    }
117}
118
119/// Parse gram notation, separating an optional header record from the patterns.
120///
121/// If the first element is a bare record (identity and labels are empty, and it has no elements),
122/// it is returned separately as the header.
123///
124/// # Arguments
125///
126/// * `input` - Gram notation text to parse
127///
128/// # Returns
129///
130/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
131/// * `Err(ParseError)` - If parsing fails
132pub fn parse_gram_with_header(
133    input: &str,
134) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
135    let mut patterns = parse_gram(input)?;
136
137    if patterns.is_empty() {
138        return Ok((None, vec![]));
139    }
140
141    // Check if the first pattern is a bare record
142    let first = &patterns[0];
143    if first.value.identity.0.is_empty()
144        && first.value.labels.is_empty()
145        && first.elements.is_empty()
146        && !first.value.properties.is_empty()
147    {
148        let header_record = patterns.remove(0).value.properties;
149        Ok((Some(header_record), patterns))
150    } else {
151        Ok((None, patterns))
152    }
153}
154
155/// Parse gram notation to AST (Abstract Syntax Tree).
156///
157/// Returns a single AstPattern representing the file-level pattern.
158///
159/// For the stable cross-language boundary used by the native TypeScript and
160/// Python packages, prefer the JSON array helpers in `crate::json`
161/// (`gram_parse_to_json`, `gram_stringify_from_json`, `gram_validate_to_json`).
162///
163/// # Why AST?
164///
165/// - **Language-agnostic**: Pure JSON, works everywhere.
166/// - **Complete**: No information loss.
167/// - **Simple**: Just patterns and subjects (no graph concepts).
168///
169/// # Arguments
170///
171/// * `input` - Gram notation text to parse
172///
173/// # Returns
174///
175/// * `Ok(AstPattern)` - The parsed pattern as AST
176/// * `Err(ParseError)` - If parsing fails
177pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
178    let patterns = parse_gram(input)?;
179
180    if patterns.is_empty() {
181        return Ok(AstPattern::empty());
182    }
183
184    // Maintain "single file-level pattern" contract for AST
185    // If there's exactly one pattern and it's not a bare record, return it.
186    // Otherwise, wrap everything in a file-level pattern.
187    let document_pattern = wrap_as_document(patterns);
188    Ok(AstPattern::from_pattern(&document_pattern))
189}
190
191/// Internal helper to wrap multiple patterns into a single document-level pattern.
192fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
193    if patterns.len() == 1 {
194        let first = &patterns[0];
195        // If it's a "real" pattern (has identity or labels or elements), return it.
196        // Also return it if it has properties but no other fields (a bare record),
197        // because as a single pattern it represents the whole document.
198        if !first.value.identity.0.is_empty()
199            || !first.value.labels.is_empty()
200            || !first.elements.is_empty()
201            || !first.value.properties.is_empty()
202        {
203            return patterns.remove(0);
204        }
205    }
206
207    // Otherwise wrap everything (including the bare record if present)
208    // Actually, if the first is a bare record, it becomes the document's properties
209    let mut properties = Record::new();
210    if !patterns.is_empty() {
211        let first = &patterns[0];
212        if first.value.identity.0.is_empty()
213            && first.value.labels.is_empty()
214            && first.elements.is_empty()
215            && !first.value.properties.is_empty()
216        {
217            properties = patterns.remove(0).value.properties;
218        }
219    }
220
221    let subject = Subject {
222        identity: pattern_core::Symbol(String::new()),
223        labels: std::collections::HashSet::new(),
224        properties,
225    };
226    Pattern::pattern(subject, patterns)
227}
228
229/// Validate gram notation syntax without constructing patterns.
230pub fn validate_gram(input: &str) -> Result<(), ParseError> {
231    parse_gram(input).map(|_| ())
232}
233
234/// Parse a single Gram pattern from text.
235pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
236    let patterns = parse_gram(input)?;
237
238    match patterns.len() {
239        0 => Err(ParseError::UnexpectedInput {
240            location: parser::Location::start(),
241            snippet: "Input contains no patterns".to_string(),
242        }),
243        1 => Ok(patterns.into_iter().next().unwrap()),
244        n => Err(ParseError::UnexpectedInput {
245            location: parser::Location::start(),
246            snippet: format!("Input contains {} patterns, expected exactly 1", n),
247        }),
248    }
249}
250
251// Backward compatibility aliases
252pub use parse_gram as parse_gram_notation;
253pub use standard_graph::FromGram;