gram_codec/lib.rs
1//! # Gram Codec
2//!
3//! Bidirectional codec between Gram notation (human-readable text format) and Pattern data structures.
4//!
5//! This crate provides:
6//! - **Parsing**: Transform Gram notation text into Pattern structures
7//! - **Serialization**: Transform Pattern structures into valid Gram notation
8//!
9//! ## Features
10//!
11//! - Full support for all Gram syntax forms (nodes, relationships, subject patterns, annotations)
12//! - Round-trip correctness (parse → serialize → parse produces equivalent pattern)
13//! - Error recovery (reports all syntax errors, not just the first)
14//! - Multi-platform support (native Rust, WebAssembly, Python)
15//!
16//! ## Example Usage
17//!
18//! ```rust,no_run
19//! use gram_codec::{parse_gram_notation, to_gram_pattern};
20//!
21//! // Parse gram notation into patterns
22//! let gram_text = "(alice:Person {name: \"Alice\"})-[:KNOWS]->(bob:Person {name: \"Bob\"})";
23//! let patterns = parse_gram_notation(gram_text)?;
24//!
25//! // Serialize patterns back to gram notation
26//! for pattern in &patterns {
27//! let output = to_gram_pattern(pattern)?;
28//! println!("{}", output);
29//! }
30//! # Ok::<(), Box<dyn std::error::Error>>(())
31//! ```
32//!
33//! ## Grammar Authority
34//!
35//! This codec uses [`tree-sitter-gram`](https://github.com/gram-data/tree-sitter-gram) as the
36//! authoritative grammar specification. The parser implementation is pure Rust using nom parser
37//! combinators, validated for 100% conformance with the tree-sitter-gram test corpus.
38
39// Module declarations
40pub mod ast;
41mod error;
42pub mod json;
43mod serializer;
44pub mod standard_graph;
45mod value;
46
47// TODO: Temporarily commented out during migration to nom parser
48// Old tree-sitter parser (will be replaced)
49// mod parser;
50// pub(crate) mod transform;
51
52// New nom-based parser module (under development)
53mod parser;
54
55#[cfg(feature = "cst")]
56pub mod cst;
57
58// Optional platform-specific modules
59#[cfg(feature = "wasm")]
60mod wasm;
61
62#[cfg(feature = "python")]
63mod python;
64
65// Public API exports
66pub use ast::{AstPattern, AstSubject, ParseWithHeaderResult};
67pub use error::{Location, SerializeError};
68pub use json::{gram_parse_to_json, gram_stringify_from_json, gram_validate_to_json};
69// Use the new nom-based ParseError from the parser module
70#[cfg(feature = "cst")]
71pub use cst::{lower, parse_gram_cst, CstParseResult};
72pub use parser::ParseError;
73pub use serializer::{to_gram, to_gram_pattern, to_gram_with_header};
74pub use value::Value;
75
76// Re-export Pattern and Subject from pattern-core for convenience
77pub use pattern_core::{Pattern, PropertyRecord as Record, Subject};
78
79// --- New nom-based parser API ---
80
81/// Parse gram notation text into a collection of Pattern structures.
82///
83/// This is the foundational parser for gram notation. It returns all top-level elements,
84/// including any leading record (which appears as a bare pattern with properties but
85/// no identity, labels, or elements).
86///
87/// # Arguments
88///
89/// * `input` - Gram notation text to parse
90///
91/// # Returns
92///
93/// * `Ok(Vec<Pattern<Subject>>)` - Successfully parsed patterns
94/// * `Err(ParseError)` - Parse error with location information
95pub fn parse_gram(input: &str) -> Result<Vec<Pattern<Subject>>, ParseError> {
96 // Handle empty/whitespace-only input
97 if input.trim().is_empty() {
98 return Ok(vec![]);
99 }
100
101 // Parse using nom parser
102 match parser::gram_patterns(input) {
103 Ok((remaining, patterns)) => {
104 // Check if all input was consumed
105 if !remaining.trim().is_empty() {
106 let offset = input.len() - remaining.len();
107 let location = parser::Location::from_offset(input, offset);
108 return Err(ParseError::UnexpectedInput {
109 location,
110 snippet: remaining.chars().take(20).collect(),
111 });
112 }
113 Ok(patterns)
114 }
115 Err(e) => Err(parser::ParseError::from_nom_error(input, e)),
116 }
117}
118
119/// Parse gram notation, separating an optional header record from the patterns.
120///
121/// If the first element is a bare record (identity and labels are empty, and it has no elements),
122/// it is returned separately as the header.
123///
124/// # Arguments
125///
126/// * `input` - Gram notation text to parse
127///
128/// # Returns
129///
130/// * `Ok((Option<Record>, Vec<Pattern<Subject>>))` - Successfully parsed header and patterns
131/// * `Err(ParseError)` - If parsing fails
132pub fn parse_gram_with_header(
133 input: &str,
134) -> Result<(Option<Record>, Vec<Pattern<Subject>>), ParseError> {
135 let mut patterns = parse_gram(input)?;
136
137 if patterns.is_empty() {
138 return Ok((None, vec![]));
139 }
140
141 // Check if the first pattern is a bare record
142 let first = &patterns[0];
143 if first.value.identity.0.is_empty()
144 && first.value.labels.is_empty()
145 && first.elements.is_empty()
146 && !first.value.properties.is_empty()
147 {
148 let header_record = patterns.remove(0).value.properties;
149 Ok((Some(header_record), patterns))
150 } else {
151 Ok((None, patterns))
152 }
153}
154
155/// Parse gram notation to AST (Abstract Syntax Tree).
156///
157/// Returns a single AstPattern representing the file-level pattern.
158///
159/// For the stable cross-language boundary used by the native TypeScript and
160/// Python packages, prefer the JSON array helpers in `crate::json`
161/// (`gram_parse_to_json`, `gram_stringify_from_json`, `gram_validate_to_json`).
162///
163/// # Why AST?
164///
165/// - **Language-agnostic**: Pure JSON, works everywhere.
166/// - **Complete**: No information loss.
167/// - **Simple**: Just patterns and subjects (no graph concepts).
168///
169/// # Arguments
170///
171/// * `input` - Gram notation text to parse
172///
173/// # Returns
174///
175/// * `Ok(AstPattern)` - The parsed pattern as AST
176/// * `Err(ParseError)` - If parsing fails
177pub fn parse_to_ast(input: &str) -> Result<AstPattern, ParseError> {
178 let patterns = parse_gram(input)?;
179
180 if patterns.is_empty() {
181 return Ok(AstPattern::empty());
182 }
183
184 // Maintain "single file-level pattern" contract for AST
185 // If there's exactly one pattern and it's not a bare record, return it.
186 // Otherwise, wrap everything in a file-level pattern.
187 let document_pattern = wrap_as_document(patterns);
188 Ok(AstPattern::from_pattern(&document_pattern))
189}
190
191/// Internal helper to wrap multiple patterns into a single document-level pattern.
192fn wrap_as_document(mut patterns: Vec<Pattern<Subject>>) -> Pattern<Subject> {
193 if patterns.len() == 1 {
194 let first = &patterns[0];
195 // If it's a "real" pattern (has identity or labels or elements), return it.
196 // Also return it if it has properties but no other fields (a bare record),
197 // because as a single pattern it represents the whole document.
198 if !first.value.identity.0.is_empty()
199 || !first.value.labels.is_empty()
200 || !first.elements.is_empty()
201 || !first.value.properties.is_empty()
202 {
203 return patterns.remove(0);
204 }
205 }
206
207 // Otherwise wrap everything (including the bare record if present)
208 // Actually, if the first is a bare record, it becomes the document's properties
209 let mut properties = Record::new();
210 if !patterns.is_empty() {
211 let first = &patterns[0];
212 if first.value.identity.0.is_empty()
213 && first.value.labels.is_empty()
214 && first.elements.is_empty()
215 && !first.value.properties.is_empty()
216 {
217 properties = patterns.remove(0).value.properties;
218 }
219 }
220
221 let subject = Subject {
222 identity: pattern_core::Symbol(String::new()),
223 labels: std::collections::HashSet::new(),
224 properties,
225 };
226 Pattern::pattern(subject, patterns)
227}
228
229/// Validate gram notation syntax without constructing patterns.
230pub fn validate_gram(input: &str) -> Result<(), ParseError> {
231 parse_gram(input).map(|_| ())
232}
233
234/// Parse a single Gram pattern from text.
235pub fn parse_single_pattern(input: &str) -> Result<Pattern<Subject>, ParseError> {
236 let patterns = parse_gram(input)?;
237
238 match patterns.len() {
239 0 => Err(ParseError::UnexpectedInput {
240 location: parser::Location::start(),
241 snippet: "Input contains no patterns".to_string(),
242 }),
243 1 => Ok(patterns.into_iter().next().unwrap()),
244 n => Err(ParseError::UnexpectedInput {
245 location: parser::Location::start(),
246 snippet: format!("Input contains {} patterns, expected exactly 1", n),
247 }),
248 }
249}
250
251// Backward compatibility aliases
252pub use parse_gram as parse_gram_notation;
253pub use standard_graph::FromGram;