Skip to main content

gram_codec/
serializer.rs

1//! Serializer for Pattern structures to Gram notation
2
3use crate::{SerializeError, Value};
4use pattern_core::{Pattern, Subject};
5use std::collections::HashMap;
6
7/// Serialize a Pattern structure to Gram notation
8pub fn to_gram_pattern(pattern: &Pattern<Subject>) -> Result<String, SerializeError> {
9    let format = select_format(pattern);
10
11    match format {
12        GramFormat::Node => serialize_node_pattern(pattern),
13        GramFormat::Relationship => serialize_relationship_pattern(pattern),
14        GramFormat::SubjectPattern => serialize_subject_pattern(pattern),
15        GramFormat::Annotation => serialize_annotation_pattern(pattern),
16        GramFormat::BareRecord => serialize_record(&pattern.value.properties),
17    }
18}
19
20/// Serialize a sequence of patterns to gram notation.
21///
22/// Writes each pattern in sequence, joined by newlines.
23///
24/// # Arguments
25///
26/// * `patterns` - Patterns to serialize
27///
28/// # Returns
29///
30/// * `Ok(String)` - Valid Gram notation
31pub fn to_gram(patterns: &[Pattern<Subject>]) -> Result<String, SerializeError> {
32    patterns
33        .iter()
34        .map(to_gram_pattern)
35        .collect::<Result<Vec<_>, _>>()
36        .map(|lines| lines.join("\n"))
37}
38
39/// Serializes patterns with a leading header record.
40///
41/// Emits the header as a top-level record followed by the patterns,
42/// joined by newlines.
43///
44/// # Arguments
45///
46/// * `header` - Header record to serialize
47/// * `patterns` - Patterns to serialize
48///
49/// # Returns
50///
51/// * `Ok(String)` - Valid Gram notation with header
52pub fn to_gram_with_header(
53    header: crate::Record,
54    patterns: &[Pattern<Subject>],
55) -> Result<String, SerializeError> {
56    let header_str = serialize_record(&header)?;
57    let patterns_str = to_gram(patterns)?;
58
59    if patterns_str.is_empty() {
60        Ok(header_str)
61    } else if header_str.is_empty() {
62        Ok(patterns_str)
63    } else {
64        Ok(format!("{}\n{}", header_str, patterns_str))
65    }
66}
67
68/// Format types for gram notation serialization
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70enum GramFormat {
71    /// Node pattern: `(subject)` - 0 elements
72    Node,
73    /// Relationship pattern: `(left)-->(right)` - 2 atomic elements
74    Relationship,
75    /// Subject pattern: `[subject | elements]` - Other cases
76    SubjectPattern,
77    /// Annotation pattern: `@key(value) element` - 1 element with anonymous subject
78    Annotation,
79    /// Bare record: `{}` - 0 elements, no identity, no labels, has properties
80    BareRecord,
81}
82
83/// Select appropriate gram notation format for a pattern
84fn select_format(pattern: &Pattern<Subject>) -> GramFormat {
85    let elem_count = pattern.elements.len();
86
87    if elem_count == 0 {
88        if pattern.value.identity.0.is_empty()
89            && pattern.value.labels.is_empty()
90            && !pattern.value.properties.is_empty()
91        {
92            GramFormat::BareRecord
93        } else {
94            GramFormat::Node
95        }
96    } else if elem_count == 1 {
97        // Check if this is an annotation (anonymous subject with properties)
98        if is_annotation_pattern(pattern) {
99            GramFormat::Annotation
100        } else {
101            GramFormat::SubjectPattern
102        }
103    } else if elem_count == 2 {
104        // Check if both elements are atomic (relationship notation)
105        if is_relationship_pattern(pattern) {
106            GramFormat::Relationship
107        } else {
108            GramFormat::SubjectPattern
109        }
110    } else {
111        GramFormat::SubjectPattern
112    }
113}
114
115/// Check if pattern qualifies for relationship notation
116///
117/// Relationship notation `(a)-->(b)` or `(a)-[edge]->(b)` is used when:
118/// - Exactly 2 elements
119/// - Both elements are atomic (0 elements each)
120/// - Root is compatible with current parser capabilities
121///
122/// Subject pattern notation `[root | elements]` is used when:
123/// - Root has labels (not yet supported in relationship notation by parser)
124/// - Root has identifier without labels/properties (container pattern)
125///
126/// Examples:
127/// - `(a)-->(b)` - anonymous root → relationship
128/// - `(a)-[r {prop: val}]->(b)` - root with properties → relationship (if supported by parser)
129/// - `[team | (a), (b)]` - root "team" without labels/props → subject pattern
130/// - `[team:Group | (a), (b)]` - root with labels → subject pattern (parser doesn't support `-[:Label]->` yet)
131fn is_relationship_pattern(pattern: &Pattern<Subject>) -> bool {
132    // Must have exactly 2 atomic elements
133    if pattern.elements.len() != 2
134        || !pattern.elements[0].elements.is_empty()
135        || !pattern.elements[1].elements.is_empty()
136    {
137        return false;
138    }
139
140    // The parser NOW supports:
141    // - `(a)-->(b)` (anonymous)
142    // - `(a)-[id]->(b)` (identifier only)
143    // - `(a)-[:Label]->(b)` (labels only)
144    // - `(a)-[{prop: val}]->(b)` (properties only)
145    // - `(a)-[id:Label {prop: val}]->(b)` (all combined)
146    //
147    // So we can use relationship notation for all relationships!
148    true
149}
150
151/// Check if pattern is an annotation
152///
153/// True if:
154/// - Exactly 1 element
155/// - Subject carries any annotation metadata
156fn is_annotation_pattern(pattern: &Pattern<Subject>) -> bool {
157    pattern.elements.len() == 1
158        && (!pattern.value.identity.0.is_empty()
159            || !pattern.value.labels.is_empty()
160            || !pattern.value.properties.is_empty())
161}
162
163/// Serialize as node pattern: `(subject)`
164fn serialize_node_pattern(pattern: &Pattern<Subject>) -> Result<String, SerializeError> {
165    let subject_str = serialize_subject(&pattern.value)?;
166    Ok(format!("({})", subject_str))
167}
168
169/// Serialize as relationship pattern: `(left)-->(right)`
170fn serialize_relationship_pattern(pattern: &Pattern<Subject>) -> Result<String, SerializeError> {
171    if pattern.elements.len() != 2 {
172        return Err(SerializeError::invalid_structure(
173            "Relationship pattern requires exactly 2 elements",
174        ));
175    }
176
177    let left = serialize_node_pattern(&pattern.elements[0])?;
178    let right = serialize_node_pattern(&pattern.elements[1])?;
179
180    // Serialize the edge (relationship) subject if present
181    let edge = if pattern.value.identity.0.is_empty()
182        && pattern.value.labels.is_empty()
183        && pattern.value.properties.is_empty()
184    {
185        // Empty edge: (a)-->(b)
186        String::new()
187    } else {
188        // Edge with labels/properties: (a)-[:KNOWS {since: 2020}]->(b)
189        let edge_str = serialize_subject(&pattern.value)?;
190        format!("[{}]", edge_str)
191    };
192
193    Ok(format!("{}-{}->{}", left, edge, right))
194}
195
196/// Serialize as subject pattern: `[subject | elements]`
197fn serialize_subject_pattern(pattern: &Pattern<Subject>) -> Result<String, SerializeError> {
198    let subject_str = serialize_subject(&pattern.value)?;
199
200    let elements_str = pattern
201        .elements
202        .iter()
203        .map(to_gram_pattern)
204        .collect::<Result<Vec<_>, _>>()?
205        .join(", ");
206
207    Ok(format!("[{} | {}]", subject_str, elements_str))
208}
209
210/// Serialize as annotation pattern: `@@id:Label @key(value) element`
211fn serialize_annotation_pattern(pattern: &Pattern<Subject>) -> Result<String, SerializeError> {
212    if pattern.elements.len() != 1 {
213        return Err(SerializeError::invalid_structure(
214            "Annotation pattern requires exactly 1 element",
215        ));
216    }
217
218    let mut annotations = Vec::new();
219
220    if !pattern.value.identity.0.is_empty() || !pattern.value.labels.is_empty() {
221        let mut identified = String::from("@@");
222
223        if !pattern.value.identity.0.is_empty() {
224            identified.push_str(&quote_identifier(&pattern.value.identity.0));
225        }
226
227        if !pattern.value.labels.is_empty() {
228            let mut labels: Vec<_> = pattern.value.labels.iter().collect();
229            labels.sort();
230            for label in labels {
231                identified.push(':');
232                identified.push_str(&quote_identifier(label));
233            }
234        }
235
236        annotations.push(identified);
237    }
238
239    let mut property_annotations: Vec<String> = pattern
240        .value
241        .properties
242        .iter()
243        .map(|(key, value)| {
244            let gram_value = value_from_pattern_value(value)?;
245            let value_str = gram_value.to_gram_notation();
246            Ok(format!("@{}({})", quote_identifier(key), value_str))
247        })
248        .collect::<Result<Vec<_>, SerializeError>>()?;
249
250    property_annotations.sort();
251    annotations.extend(property_annotations);
252
253    let element_str = to_gram_pattern(&pattern.elements[0])?;
254
255    Ok(format!("{} {}", annotations.join(" "), element_str))
256}
257
258/// Serialize a Subject (identifier + labels + properties)
259fn serialize_subject(subject: &Subject) -> Result<String, SerializeError> {
260    let mut parts = Vec::new();
261
262    // Build identifier with labels (no spaces between them)
263    let mut id_with_labels = String::new();
264
265    // Serialize identifier
266    if !subject.identity.0.is_empty() {
267        id_with_labels.push_str(&quote_identifier(&subject.identity.0));
268    }
269
270    // Serialize labels (concatenate directly without spaces)
271    if !subject.labels.is_empty() {
272        let mut labels: Vec<_> = subject.labels.iter().collect();
273        labels.sort(); // Consistent ordering
274        for label in labels {
275            id_with_labels.push(':');
276            id_with_labels.push_str(&quote_identifier(label));
277        }
278    }
279
280    // Add identifier+labels as a single part
281    if !id_with_labels.is_empty() {
282        parts.push(id_with_labels);
283    }
284
285    // Serialize properties (this goes as a separate part, with space before it)
286    if !subject.properties.is_empty() {
287        let record_str = serialize_record(&subject.properties)?;
288        parts.push(record_str);
289    }
290
291    Ok(parts.join(" "))
292}
293
294/// Serialize property record: `{key1: value1, key2: value2}`
295fn serialize_record(
296    properties: &HashMap<String, pattern_core::Value>,
297) -> Result<String, SerializeError> {
298    if properties.is_empty() {
299        return Ok(String::new());
300    }
301
302    let mut props: Vec<_> = properties.iter().collect();
303    props.sort_by_key(|(k, _)| *k); // Consistent ordering
304
305    let prop_strs: Vec<String> = props
306        .iter()
307        .map(|(key, value)| {
308            // Convert pattern_core::Value to gram_codec::Value
309            let gram_value = value_from_pattern_value(value)?;
310            let value_str = gram_value.to_gram_notation();
311            Ok(format!("{}: {}", quote_identifier(key), value_str))
312        })
313        .collect::<Result<Vec<_>, SerializeError>>()?;
314
315    Ok(format!("{{{}}}", prop_strs.join(", ")))
316}
317
318/// Convert pattern_core::Value to gram_codec::Value
319fn value_from_pattern_value(value: &pattern_core::Value) -> Result<Value, SerializeError> {
320    match value {
321        pattern_core::Value::VString(s) => Ok(Value::String(s.clone())),
322        pattern_core::Value::VSymbol(s) => Ok(Value::String(s.clone())),
323        pattern_core::Value::VInteger(i) => Ok(Value::Integer(*i)),
324        pattern_core::Value::VDecimal(d) => Ok(Value::Decimal(*d)),
325        pattern_core::Value::VBoolean(b) => Ok(Value::Boolean(*b)),
326        pattern_core::Value::VArray(arr) => {
327            let values = arr
328                .iter()
329                .map(value_from_pattern_value)
330                .collect::<Result<Vec<_>, _>>()?;
331            Ok(Value::Array(values))
332        }
333        pattern_core::Value::VRange(range) => {
334            // Convert Option<f64> to i64 bounds
335            // For now, only support bounded integer ranges
336            let lower = range.lower.ok_or_else(|| {
337                SerializeError::invalid_structure("Unbounded lower range not supported")
338            })? as i64;
339            let upper = range.upper.ok_or_else(|| {
340                SerializeError::invalid_structure("Unbounded upper range not supported")
341            })? as i64;
342            Ok(Value::Range { lower, upper })
343        }
344        pattern_core::Value::VTaggedString { tag, content } => Ok(Value::TaggedString {
345            tag: tag.clone(),
346            content: content.clone(),
347        }),
348        pattern_core::Value::VMap(_map) => {
349            // Maps are not supported in gram notation property values
350            // They would need to be serialized as nested patterns
351            Err(SerializeError::invalid_structure(
352                "Map values not supported in gram notation properties",
353            ))
354        }
355        pattern_core::Value::VMeasurement { .. } => {
356            // Measurements are not supported in basic gram notation
357            Err(SerializeError::invalid_structure(
358                "Measurement values not supported in gram notation",
359            ))
360        }
361    }
362}
363
364/// Quote identifier if needed (contains spaces, special chars, or starts with digit)
365/// Uses backtick quoting per grammar: identifiers, labels, and keys use quoted_name (`)
366fn quote_identifier(s: &str) -> String {
367    if needs_quoting(s) {
368        format!("`{}`", escape_backtick_string(s))
369    } else {
370        s.to_string()
371    }
372}
373
374/// Determine if identifier needs backtick quoting
375/// Valid unquoted forms per grammar:
376///   symbol:  /[a-zA-Z_][0-9a-zA-Z_.\-@]*/
377///   integer: /-?(0|[1-9]\d*)/
378fn needs_quoting(s: &str) -> bool {
379    if s.is_empty() {
380        return true;
381    }
382
383    let first = s.chars().next().unwrap();
384
385    if first.is_ascii_alphabetic() || first == '_' {
386        // Symbol: first=[a-zA-Z_], rest=[0-9a-zA-Z_.-@]*
387        return s[first.len_utf8()..]
388            .chars()
389            .any(|c| !c.is_ascii_alphanumeric() && !matches!(c, '_' | '.' | '-' | '@'));
390    }
391
392    if first.is_ascii_digit() || first == '-' {
393        // Integer: -?(0|[1-9]\d*)
394        let digits_part = if first == '-' { &s[1..] } else { s };
395        if digits_part.is_empty() {
396            return true;
397        }
398        if !digits_part.chars().all(|c| c.is_ascii_digit()) {
399            return true;
400        }
401        if digits_part.len() > 1 && digits_part.starts_with('0') {
402            return true;
403        }
404        return false;
405    }
406
407    // Anything else (unicode, @, special char at start) needs quoting
408    true
409}
410
411/// Escape special characters in backtick-quoted identifiers
412fn escape_backtick_string(s: &str) -> String {
413    s.replace('\\', "\\\\")
414        .replace('`', "\\`")
415        .replace('\n', "\\n")
416        .replace('\r', "\\r")
417        .replace('\t', "\\t")
418}