Skip to main content

bynk_grammar/
lib.rs

1//! Render the `tree-sitter-bynk` grammar to EBNF.
2//!
3//! This crate is the single source of the grammar reference. It takes the
4//! compiled grammar JSON (`tree-sitter-bynk/src/grammar.json`) as input and is
5//! otherwise location-agnostic, so the same renderer feeds both the full
6//! appendix page ([`render_appendix`]) and the per-rule includes embedded in
7//! the curated reference page ([`render_production`] / [`render_rule`]). Because
8//! both come from one implementation, an embedded production cannot drift from
9//! the appendix.
10//!
11//! **Display names.** Grammar rule names are parser-internal (`_type_ref`,
12//! `_expression`, …). For the reference we render *readable* names via
13//! [`display_name`]: a trivial `_x ::= y` wrapper collapses to its target, an
14//! optional override applies, otherwise a single leading underscore is stripped.
15//! The transform is applied to both rule heads and the nonterminal references
16//! inside productions, so the whole reference reads as language, not internals.
17//!
18//! See `bynkc/tests/grammar_reference.rs` (the appendix generator) and
19//! `site/src/plugins/remark-bynk-directives.mjs` (the `{{#grammar <rule>}}`
20//! include directive the Book renders with).
21
22use std::error::Error;
23use std::fmt;
24
25use serde_json::{Map, Value};
26
27/// An error rendering a grammar production.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum GrammarError {
30    /// The grammar JSON could not be parsed.
31    Parse(String),
32    /// `name` is not a top-level rule in the grammar.
33    UnknownRule(String),
34}
35
36impl fmt::Display for GrammarError {
37    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38        match self {
39            GrammarError::Parse(e) => write!(f, "could not parse grammar JSON: {e}"),
40            GrammarError::UnknownRule(name) => {
41                write!(
42                    f,
43                    "unknown grammar rule `{name}` (not a top-level production)"
44                )
45            }
46        }
47    }
48}
49
50impl Error for GrammarError {}
51
52/// Display-name overrides for rules whose mechanical name reads badly. Each key
53/// must be a real top-level rule (checked in tests). Keep this tiny — most rules
54/// read fine after collapsing wrappers and stripping the leading underscore.
55const OVERRIDES: &[(&str, &str)] = &[];
56
57fn rules_of(grammar: &Value) -> Option<&Map<String, Value>> {
58    grammar.get("rules").and_then(Value::as_object)
59}
60
61/// If `name`'s body is a single `SYMBOL` (a trivial `_x ::= y` wrapper), return
62/// the target rule name. Such wrappers are collapsed: they never appear as their
63/// own production, and references to them render as the target's display name.
64fn trivial_wrapper_target<'a>(rules: &'a Map<String, Value>, name: &str) -> Option<&'a str> {
65    let body = rules.get(name)?;
66    if body.get("type").and_then(Value::as_str) == Some("SYMBOL") {
67        body.get("name").and_then(Value::as_str)
68    } else {
69        None
70    }
71}
72
73/// The readable display name of a grammar rule: collapse a trivial-wrapper chain
74/// to its target, apply any override, else strip a single leading underscore.
75fn display_name_in(rules: &Map<String, Value>, name: &str) -> String {
76    if let Some(target) = trivial_wrapper_target(rules, name) {
77        return display_name_in(rules, target);
78    }
79    if let Some((_, disp)) = OVERRIDES.iter().find(|(k, _)| *k == name) {
80        return (*disp).to_string();
81    }
82    name.strip_prefix('_').unwrap_or(name).to_string()
83}
84
85/// Render a grammar node to EBNF text plus a precedence level:
86/// 0 = choice (`a | b`), 1 = sequence (`a b`), 2 = atom / postfix (`x`, `(…)*`).
87/// Nonterminal (`SYMBOL`) references are rendered with their display name.
88fn render(rules: &Map<String, Value>, node: &Value) -> (String, u8) {
89    match node.get("type").and_then(Value::as_str).unwrap_or("") {
90        "SYMBOL" => (
91            display_name_in(rules, node["name"].as_str().unwrap_or("?")),
92            2,
93        ),
94        "STRING" => (format!("\"{}\"", node["value"].as_str().unwrap_or("")), 2),
95        "PATTERN" => (format!("/{}/", node["value"].as_str().unwrap_or("")), 2),
96        "BLANK" => ("ε".to_string(), 2),
97        // Wrappers that don't affect the surface grammar: render their content.
98        "PREC" | "PREC_LEFT" | "PREC_RIGHT" | "PREC_DYNAMIC" | "TOKEN" | "IMMEDIATE_TOKEN"
99        | "FIELD" | "ALIAS" => render(rules, &node["content"]),
100        "REPEAT" => (format!("{}*", wrap_atom(rules, &node["content"])), 2),
101        "REPEAT1" => (format!("{}+", wrap_atom(rules, &node["content"])), 2),
102        "SEQ" => {
103            let parts: Vec<String> = members(node).iter().map(|m| wrap(rules, m, 1)).collect();
104            (parts.join(" "), 1)
105        }
106        "CHOICE" => {
107            let all = members(node);
108            let has_blank = all
109                .iter()
110                .any(|m| m.get("type").and_then(Value::as_str) == Some("BLANK"));
111            let non_blank: Vec<&Value> = all
112                .iter()
113                .filter(|m| m.get("type").and_then(Value::as_str) != Some("BLANK"))
114                .collect();
115            if has_blank {
116                // An optional: `X?`.
117                if non_blank.len() == 1 {
118                    (format!("{}?", wrap_atom(rules, non_blank[0])), 2)
119                } else {
120                    let inner: Vec<String> = non_blank.iter().map(|m| render(rules, m).0).collect();
121                    (format!("({})?", inner.join(" | ")), 2)
122                }
123            } else {
124                let inner: Vec<String> = non_blank.iter().map(|m| render(rules, m).0).collect();
125                (inner.join(" | "), 0)
126            }
127        }
128        other => (format!("/* {other} */"), 2),
129    }
130}
131
132fn members(node: &Value) -> Vec<Value> {
133    node["members"].as_array().cloned().unwrap_or_default()
134}
135
136/// Wrap so the result can be a postfix operand (`*`, `+`, `?`): needs an atom.
137fn wrap_atom(rules: &Map<String, Value>, node: &Value) -> String {
138    wrap(rules, node, 2)
139}
140
141/// Wrap `node`'s rendering in parens if its level is below `min`.
142fn wrap(rules: &Map<String, Value>, node: &Value, min: u8) -> String {
143    let (s, level) = render(rules, node);
144    if level < min { format!("({s})") } else { s }
145}
146
147fn render_extra(node: &Value) -> String {
148    match node.get("type").and_then(Value::as_str).unwrap_or("") {
149        "SYMBOL" => format!("`{}`", node["name"].as_str().unwrap_or("?")),
150        "PATTERN" => format!("`/{}/`", node["value"].as_str().unwrap_or("")),
151        "STRING" => format!("`\"{}\"`", node["value"].as_str().unwrap_or("")),
152        _ => "?".to_string(),
153    }
154}
155
156/// Render the complete grammar reference appendix
157/// (`site/src/content/docs/book/reference/grammar-appendix.md`): the
158/// generated-file header, the
159/// notation note, the full `ebnf` block of every production (display names,
160/// trivial wrappers collapsed), and the Tokens & trivia section.
161pub fn render_appendix(grammar_json: &str) -> String {
162    let grammar: Value = serde_json::from_str(grammar_json).expect("grammar.json parses");
163
164    let mut out = String::new();
165    out.push_str("# Complete grammar (appendix)\n\n");
166    out.push_str(
167        "<!-- GENERATED FILE — do not edit by hand.\n     \
168         Source: tree-sitter-bynk/src/grammar.json, via bynkc/tests/grammar_reference.rs.\n     \
169         Regenerate with: BYNK_BLESS=1 cargo test -p bynkc --test grammar_reference -->\n\n",
170    );
171    out.push_str(
172        "The complete Bynk grammar, generated from the `tree-sitter-bynk` grammar. \
173         For the annotated, per-construct reference see [Syntax & grammar](grammar.md).\n\n",
174    );
175    out.push_str("**Notation.** ");
176    out.push_str(
177        "`\"x\"` a literal token · `/x/` a regular expression · `( … )?` optional · \
178         `( … )*` zero or more · `( … )+` one or more · `a | b` choice · `ε` empty. \
179         Rule names are the readable display names (a leading `_` denotes an \
180         internal helper rule; trivial wrappers are collapsed). `doc_block` is an \
181         external token — a `--- … ---` documentation block.\n\n",
182    );
183
184    out.push_str("```ebnf\n");
185    if let Some(rules) = rules_of(&grammar) {
186        for (name, body) in rules {
187            // Trivial wrappers are collapsed into their target.
188            if trivial_wrapper_target(rules, name).is_some() {
189                continue;
190            }
191            let (rendered, _) = render(rules, body);
192            out.push_str(&format!(
193                "{} ::= {rendered}\n",
194                display_name_in(rules, name)
195            ));
196        }
197    }
198    out.push_str("```\n\n");
199
200    out.push_str("## Tokens & trivia\n\n");
201    if let Some(word) = grammar.get("word").and_then(Value::as_str) {
202        out.push_str(&format!("- **Word token:** `{word}`\n"));
203    }
204    if let Some(extras) = grammar.get("extras").and_then(Value::as_array) {
205        let rendered: Vec<String> = extras.iter().map(render_extra).collect();
206        out.push_str(&format!(
207            "- **Ignored between tokens:** {}\n",
208            rendered.join(", ")
209        ));
210    }
211    if let Some(externals) = grammar.get("externals").and_then(Value::as_array) {
212        let rendered: Vec<String> = externals.iter().map(render_extra).collect();
213        out.push_str(&format!("- **External tokens:** {}\n", rendered.join(", ")));
214    }
215
216    out
217}
218
219/// Look up a rule body by name, erroring if the grammar is unparseable or the
220/// rule is not a top-level production.
221fn rule_body<'a>(grammar: &'a Value, name: &str) -> Result<&'a Value, GrammarError> {
222    grammar
223        .get("rules")
224        .and_then(Value::as_object)
225        .and_then(|rules| rules.get(name))
226        .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))
227}
228
229/// Render a single production's right-hand side (display names applied), exactly
230/// as it appears after `<name> ::= ` in the appendix's EBNF block.
231///
232/// Errors if the grammar JSON cannot be parsed, or if `name` is not a top-level
233/// rule of the grammar.
234pub fn render_rule(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
235    let grammar: Value =
236        serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
237    let rules = grammar
238        .get("rules")
239        .and_then(Value::as_object)
240        .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
241    let body = rule_body(&grammar, name)?;
242    Ok(render(rules, body).0)
243}
244
245/// Render a complete production line, `<display name> ::= <rhs>`, as it appears
246/// in the appendix (no surrounding fence). This is what `{{#grammar <rule>}}`
247/// embeds.
248pub fn render_production(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
249    let grammar: Value =
250        serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
251    let rules = grammar
252        .get("rules")
253        .and_then(Value::as_object)
254        .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
255    let body = rule_body(&grammar, name)?;
256    Ok(format!(
257        "{} ::= {}",
258        display_name_in(rules, name),
259        render(rules, body).0
260    ))
261}
262
263/// Every top-level rule that should have exactly one `{{#grammar}}` entry in the
264/// annotated reference: all rules **except** the trivial wrappers the display
265/// layer collapses (so this can never disagree with what is rendered). Grammar
266/// rule order is preserved. Returns an empty vector if the JSON is unparseable.
267pub fn embeddable_rules(grammar_json: &str) -> Vec<String> {
268    let Ok(grammar) = serde_json::from_str::<Value>(grammar_json) else {
269        return Vec::new();
270    };
271    let Some(rules) = rules_of(&grammar) else {
272        return Vec::new();
273    };
274    rules
275        .keys()
276        .filter(|name| trivial_wrapper_target(rules, name.as_str()).is_none())
277        .cloned()
278        .collect()
279}
280
281/// The readable display name for a top-level rule. Errors if the grammar is
282/// unparseable or `name` is not a top-level rule.
283pub fn display_name(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
284    let grammar: Value =
285        serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
286    let rules = grammar
287        .get("rules")
288        .and_then(Value::as_object)
289        .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
290    if !rules.contains_key(name) {
291        return Err(GrammarError::UnknownRule(name.to_string()));
292    }
293    Ok(display_name_in(rules, name))
294}
295
296/// Render the grammar as the JSON document the documentation site consumes. The
297/// `{{#grammar}}` remark directive looks each rule up in `productions` (the same
298/// `<name> ::= <rhs>` line [`render_production`] embeds, over [`embeddable_rules`]),
299/// and the full-grammar page reads `appendix` ([`render_appendix`]). Because both
300/// come from the same renderer as the mdBook preprocessor, the site cannot drift
301/// from the book. Object keys preserve grammar order (serde_json `preserve_order`).
302///
303/// The committed artifact (`site/src/generated/grammar.json`) is drift-guarded by
304/// `bynk-grammar/tests/generated_grammar_json.rs`; regenerate with `BYNK_BLESS=1`.
305pub fn render_site_json(grammar_json: &str) -> String {
306    let mut productions = Map::new();
307    for rule in embeddable_rules(grammar_json) {
308        if let Ok(production) = render_production(grammar_json, &rule) {
309            productions.insert(rule, Value::String(production));
310        }
311    }
312    let mut doc = Map::new();
313    doc.insert(
314        "_generated".into(),
315        Value::String(
316            "GENERATED from tree-sitter-bynk/src/grammar.json. Do not edit by hand. \
317             Regenerate with: BYNK_BLESS=1 cargo test -p bynk-grammar --test generated_grammar_json"
318                .into(),
319        ),
320    );
321    doc.insert("productions".into(), Value::Object(productions));
322    doc.insert(
323        "appendix".into(),
324        Value::String(render_appendix(grammar_json)),
325    );
326    let mut out =
327        serde_json::to_string_pretty(&Value::Object(doc)).expect("serialise grammar JSON");
328    out.push('\n');
329    out
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335    use std::collections::HashMap;
336    use std::fs;
337    use std::path::PathBuf;
338
339    fn grammar_json() -> String {
340        let path =
341            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../tree-sitter-bynk/src/grammar.json");
342        fs::read_to_string(path).expect("read grammar.json")
343    }
344
345    fn rules(grammar: &Value) -> &Map<String, Value> {
346        grammar.get("rules").and_then(Value::as_object).unwrap()
347    }
348
349    #[test]
350    fn render_rule_uses_display_names() {
351        let g = grammar_json();
352        // `_pattern`/`_expression` render without their leading underscore.
353        assert_eq!(
354            render_rule(&g, "match_arm").unwrap(),
355            "pattern \"=>\" expression \",\"?"
356        );
357        // `http_method` is a plain choice, unchanged.
358        assert_eq!(
359            render_rule(&g, "http_method").unwrap(),
360            "\"GET\" | \"POST\" | \"PUT\" | \"PATCH\" | \"DELETE\""
361        );
362        // `http_handler` references `_type_ref` and `block` — display, not raw.
363        let http = render_rule(&g, "http_handler").unwrap();
364        assert!(http.contains("type_ref"), "{http}");
365        assert!(!http.contains("_type_ref"), "{http}");
366        assert!(http.contains("block"), "{http}");
367    }
368
369    #[test]
370    fn embeddable_rules_excludes_trivial_wrappers() {
371        let g = grammar_json();
372        let rules = embeddable_rules(&g);
373        // v0.17 added: adapter_decl, _adapter_body_item, binding_decl,
374        // binding_requirement. v0.20a added: function_type_ref, lambda_expr,
375        // lambda_param. v0.20b added: list_literal. v0.21 added:
376        // float_literal. v0.43 added: string_interpolation. v0.44 added:
377        // service_protocol. v0.45 added: actor_decl, scheme, by_clause. v0.51
378        // added: scheme_config, scheme_arg. v0.79 added: effect_send_stmt. v0.80
379        // added: invariant_decl. v0.81 added: store_field, store_kind,
380        // assign_stmt. v0.85 added: store_annotation, annotation_arg. v0.96
381        // removed: state_decl, commit_stmt (parity cutover, ADR 0123).
382        // v0.103 added: ws_open_handler, ws_close_handler (the `from WebSocket`
383        // lifecycle handlers; `on message` reuses queue_handler). v0.114 added:
384        // property_decl, for_all, for_all_binding (generative tests); mock_expr/
385        // mock_arg renamed to val_expr/val_arg (no count change).
386        assert_eq!(rules.len(), 128);
387        assert!(rules.iter().any(|r| r == "http_handler"));
388        assert!(rules.iter().any(|r| r == "_type_ref"));
389        // The two trivial wrappers the display layer collapses are excluded.
390        assert!(!rules.iter().any(|r| r == "_base_type"));
391        assert!(!rules.iter().any(|r| r == "pred_atom"));
392        // Unparseable JSON yields no rules rather than panicking.
393        assert!(embeddable_rules("not json").is_empty());
394    }
395
396    #[test]
397    fn render_production_includes_display_head() {
398        let g = grammar_json();
399        assert_eq!(
400            render_production(&g, "match_arm").unwrap(),
401            "match_arm ::= pattern \"=>\" expression \",\"?"
402        );
403    }
404
405    #[test]
406    fn display_name_collapses_and_strips() {
407        let g = grammar_json();
408        // Trivial wrapper `_base_type ::= base_type` collapses to its target.
409        assert_eq!(display_name(&g, "_base_type").unwrap(), "base_type");
410        // Helper rules strip the leading underscore.
411        assert_eq!(display_name(&g, "_expression").unwrap(), "expression");
412        assert_eq!(display_name(&g, "_type_ref").unwrap(), "type_ref");
413        // An ordinary rule is unchanged.
414        assert_eq!(display_name(&g, "http_handler").unwrap(), "http_handler");
415    }
416
417    #[test]
418    fn render_rule_unknown_rule_errors() {
419        let g = grammar_json();
420        assert_eq!(
421            render_rule(&g, "no_such_rule"),
422            Err(GrammarError::UnknownRule("no_such_rule".to_string()))
423        );
424    }
425
426    #[test]
427    fn render_rule_invalid_json_errors() {
428        assert!(matches!(
429            render_rule("not json", "match_arm"),
430            Err(GrammarError::Parse(_))
431        ));
432    }
433
434    #[test]
435    fn override_keys_are_real_rules() {
436        let g = grammar_json();
437        for (key, _) in OVERRIDES {
438            assert!(
439                display_name(&g, key).is_ok(),
440                "override key `{key}` is not a top-level rule"
441            );
442        }
443    }
444
445    /// The display transform must not map two displayed productions to the same
446    /// name — that would make the reference ambiguous. Trivial wrappers are
447    /// collapsed and so excluded.
448    #[test]
449    fn display_names_are_unique() {
450        let g = grammar_json();
451        let grammar: Value = serde_json::from_str(&g).unwrap();
452        let rules = rules(&grammar);
453        let mut seen: HashMap<String, String> = HashMap::new();
454        for name in rules.keys() {
455            if trivial_wrapper_target(rules, name).is_some() {
456                continue;
457            }
458            let disp = display_name_in(rules, name);
459            if let Some(prev) = seen.insert(disp.clone(), name.clone()) {
460                panic!("display name `{disp}` for `{name}` collides with `{prev}`");
461            }
462        }
463    }
464
465    /// Pins the two renderers to one implementation: every displayed rule's
466    /// production line must appear verbatim in the appendix, and the appendix
467    /// has exactly one production per non-wrapper rule (wrappers are collapsed,
468    /// nothing is duplicated).
469    #[test]
470    fn every_displayed_rule_matches_the_appendix() {
471        let g = grammar_json();
472        let appendix = render_appendix(&g);
473        let grammar: Value = serde_json::from_str(&g).unwrap();
474        let rules = rules(&grammar);
475
476        let mut displayed = 0;
477        for name in rules.keys() {
478            if trivial_wrapper_target(rules, name).is_some() {
479                continue;
480            }
481            displayed += 1;
482            let line = render_production(&g, name).unwrap();
483            assert!(
484                appendix.contains(&line),
485                "production for `{name}` not found in appendix:\n{line}"
486            );
487        }
488
489        // One `::=` per displayed rule — collapsed wrappers added no lines and
490        // no production is duplicated. (No grammar token contains `::=`.)
491        assert_eq!(appendix.matches("::=").count(), displayed);
492    }
493}