1use std::error::Error;
23use std::fmt;
24
25use serde_json::{Map, Value};
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum GrammarError {
30 Parse(String),
32 UnknownRule(String),
34}
35
36impl fmt::Display for GrammarError {
37 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38 match self {
39 GrammarError::Parse(e) => write!(f, "could not parse grammar JSON: {e}"),
40 GrammarError::UnknownRule(name) => {
41 write!(
42 f,
43 "unknown grammar rule `{name}` (not a top-level production)"
44 )
45 }
46 }
47 }
48}
49
50impl Error for GrammarError {}
51
52const OVERRIDES: &[(&str, &str)] = &[];
56
57fn rules_of(grammar: &Value) -> Option<&Map<String, Value>> {
58 grammar.get("rules").and_then(Value::as_object)
59}
60
61fn trivial_wrapper_target<'a>(rules: &'a Map<String, Value>, name: &str) -> Option<&'a str> {
65 let body = rules.get(name)?;
66 if body.get("type").and_then(Value::as_str) == Some("SYMBOL") {
67 body.get("name").and_then(Value::as_str)
68 } else {
69 None
70 }
71}
72
73fn display_name_in(rules: &Map<String, Value>, name: &str) -> String {
76 if let Some(target) = trivial_wrapper_target(rules, name) {
77 return display_name_in(rules, target);
78 }
79 if let Some((_, disp)) = OVERRIDES.iter().find(|(k, _)| *k == name) {
80 return (*disp).to_string();
81 }
82 name.strip_prefix('_').unwrap_or(name).to_string()
83}
84
85fn render(rules: &Map<String, Value>, node: &Value) -> (String, u8) {
89 match node.get("type").and_then(Value::as_str).unwrap_or("") {
90 "SYMBOL" => (
91 display_name_in(rules, node["name"].as_str().unwrap_or("?")),
92 2,
93 ),
94 "STRING" => (format!("\"{}\"", node["value"].as_str().unwrap_or("")), 2),
95 "PATTERN" => (format!("/{}/", node["value"].as_str().unwrap_or("")), 2),
96 "BLANK" => ("ε".to_string(), 2),
97 "PREC" | "PREC_LEFT" | "PREC_RIGHT" | "PREC_DYNAMIC" | "TOKEN" | "IMMEDIATE_TOKEN"
99 | "FIELD" | "ALIAS" => render(rules, &node["content"]),
100 "REPEAT" => (format!("{}*", wrap_atom(rules, &node["content"])), 2),
101 "REPEAT1" => (format!("{}+", wrap_atom(rules, &node["content"])), 2),
102 "SEQ" => {
103 let parts: Vec<String> = members(node).iter().map(|m| wrap(rules, m, 1)).collect();
104 (parts.join(" "), 1)
105 }
106 "CHOICE" => {
107 let all = members(node);
108 let has_blank = all
109 .iter()
110 .any(|m| m.get("type").and_then(Value::as_str) == Some("BLANK"));
111 let non_blank: Vec<&Value> = all
112 .iter()
113 .filter(|m| m.get("type").and_then(Value::as_str) != Some("BLANK"))
114 .collect();
115 if has_blank {
116 if non_blank.len() == 1 {
118 (format!("{}?", wrap_atom(rules, non_blank[0])), 2)
119 } else {
120 let inner: Vec<String> = non_blank.iter().map(|m| render(rules, m).0).collect();
121 (format!("({})?", inner.join(" | ")), 2)
122 }
123 } else {
124 let inner: Vec<String> = non_blank.iter().map(|m| render(rules, m).0).collect();
125 (inner.join(" | "), 0)
126 }
127 }
128 other => (format!("/* {other} */"), 2),
129 }
130}
131
132fn members(node: &Value) -> Vec<Value> {
133 node["members"].as_array().cloned().unwrap_or_default()
134}
135
136fn wrap_atom(rules: &Map<String, Value>, node: &Value) -> String {
138 wrap(rules, node, 2)
139}
140
141fn wrap(rules: &Map<String, Value>, node: &Value, min: u8) -> String {
143 let (s, level) = render(rules, node);
144 if level < min { format!("({s})") } else { s }
145}
146
147fn render_extra(node: &Value) -> String {
148 match node.get("type").and_then(Value::as_str).unwrap_or("") {
149 "SYMBOL" => format!("`{}`", node["name"].as_str().unwrap_or("?")),
150 "PATTERN" => format!("`/{}/`", node["value"].as_str().unwrap_or("")),
151 "STRING" => format!("`\"{}\"`", node["value"].as_str().unwrap_or("")),
152 _ => "?".to_string(),
153 }
154}
155
156pub fn render_appendix(grammar_json: &str) -> String {
162 let grammar: Value = serde_json::from_str(grammar_json).expect("grammar.json parses");
163
164 let mut out = String::new();
165 out.push_str("# Complete grammar (appendix)\n\n");
166 out.push_str(
167 "<!-- GENERATED FILE — do not edit by hand.\n \
168 Source: tree-sitter-bynk/src/grammar.json, via bynkc/tests/grammar_reference.rs.\n \
169 Regenerate with: BYNK_BLESS=1 cargo test -p bynkc --test grammar_reference -->\n\n",
170 );
171 out.push_str(
172 "The complete Bynk grammar, generated from the `tree-sitter-bynk` grammar. \
173 For the annotated, per-construct reference see [Syntax & grammar](grammar.md).\n\n",
174 );
175 out.push_str("**Notation.** ");
176 out.push_str(
177 "`\"x\"` a literal token · `/x/` a regular expression · `( … )?` optional · \
178 `( … )*` zero or more · `( … )+` one or more · `a | b` choice · `ε` empty. \
179 Rule names are the readable display names (a leading `_` denotes an \
180 internal helper rule; trivial wrappers are collapsed). `doc_block` is an \
181 external token — a `--- … ---` documentation block.\n\n",
182 );
183
184 out.push_str("```ebnf\n");
185 if let Some(rules) = rules_of(&grammar) {
186 for (name, body) in rules {
187 if trivial_wrapper_target(rules, name).is_some() {
189 continue;
190 }
191 let (rendered, _) = render(rules, body);
192 out.push_str(&format!(
193 "{} ::= {rendered}\n",
194 display_name_in(rules, name)
195 ));
196 }
197 }
198 out.push_str("```\n\n");
199
200 out.push_str("## Tokens & trivia\n\n");
201 if let Some(word) = grammar.get("word").and_then(Value::as_str) {
202 out.push_str(&format!("- **Word token:** `{word}`\n"));
203 }
204 if let Some(extras) = grammar.get("extras").and_then(Value::as_array) {
205 let rendered: Vec<String> = extras.iter().map(render_extra).collect();
206 out.push_str(&format!(
207 "- **Ignored between tokens:** {}\n",
208 rendered.join(", ")
209 ));
210 }
211 if let Some(externals) = grammar.get("externals").and_then(Value::as_array) {
212 let rendered: Vec<String> = externals.iter().map(render_extra).collect();
213 out.push_str(&format!("- **External tokens:** {}\n", rendered.join(", ")));
214 }
215
216 out
217}
218
219fn rule_body<'a>(grammar: &'a Value, name: &str) -> Result<&'a Value, GrammarError> {
222 grammar
223 .get("rules")
224 .and_then(Value::as_object)
225 .and_then(|rules| rules.get(name))
226 .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))
227}
228
229pub fn render_rule(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
235 let grammar: Value =
236 serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
237 let rules = grammar
238 .get("rules")
239 .and_then(Value::as_object)
240 .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
241 let body = rule_body(&grammar, name)?;
242 Ok(render(rules, body).0)
243}
244
245pub fn render_production(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
249 let grammar: Value =
250 serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
251 let rules = grammar
252 .get("rules")
253 .and_then(Value::as_object)
254 .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
255 let body = rule_body(&grammar, name)?;
256 Ok(format!(
257 "{} ::= {}",
258 display_name_in(rules, name),
259 render(rules, body).0
260 ))
261}
262
263pub fn embeddable_rules(grammar_json: &str) -> Vec<String> {
268 let Ok(grammar) = serde_json::from_str::<Value>(grammar_json) else {
269 return Vec::new();
270 };
271 let Some(rules) = rules_of(&grammar) else {
272 return Vec::new();
273 };
274 rules
275 .keys()
276 .filter(|name| trivial_wrapper_target(rules, name.as_str()).is_none())
277 .cloned()
278 .collect()
279}
280
281pub fn display_name(grammar_json: &str, name: &str) -> Result<String, GrammarError> {
284 let grammar: Value =
285 serde_json::from_str(grammar_json).map_err(|e| GrammarError::Parse(e.to_string()))?;
286 let rules = grammar
287 .get("rules")
288 .and_then(Value::as_object)
289 .ok_or_else(|| GrammarError::UnknownRule(name.to_string()))?;
290 if !rules.contains_key(name) {
291 return Err(GrammarError::UnknownRule(name.to_string()));
292 }
293 Ok(display_name_in(rules, name))
294}
295
296pub fn render_site_json(grammar_json: &str) -> String {
306 let mut productions = Map::new();
307 for rule in embeddable_rules(grammar_json) {
308 if let Ok(production) = render_production(grammar_json, &rule) {
309 productions.insert(rule, Value::String(production));
310 }
311 }
312 let mut doc = Map::new();
313 doc.insert(
314 "_generated".into(),
315 Value::String(
316 "GENERATED from tree-sitter-bynk/src/grammar.json. Do not edit by hand. \
317 Regenerate with: BYNK_BLESS=1 cargo test -p bynk-grammar --test generated_grammar_json"
318 .into(),
319 ),
320 );
321 doc.insert("productions".into(), Value::Object(productions));
322 doc.insert(
323 "appendix".into(),
324 Value::String(render_appendix(grammar_json)),
325 );
326 let mut out =
327 serde_json::to_string_pretty(&Value::Object(doc)).expect("serialise grammar JSON");
328 out.push('\n');
329 out
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335 use std::collections::HashMap;
336 use std::fs;
337 use std::path::PathBuf;
338
339 fn grammar_json() -> String {
340 let path =
341 PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../tree-sitter-bynk/src/grammar.json");
342 fs::read_to_string(path).expect("read grammar.json")
343 }
344
345 fn rules(grammar: &Value) -> &Map<String, Value> {
346 grammar.get("rules").and_then(Value::as_object).unwrap()
347 }
348
349 #[test]
350 fn render_rule_uses_display_names() {
351 let g = grammar_json();
352 assert_eq!(
354 render_rule(&g, "match_arm").unwrap(),
355 "pattern \"=>\" expression \",\"?"
356 );
357 assert_eq!(
359 render_rule(&g, "http_method").unwrap(),
360 "\"GET\" | \"POST\" | \"PUT\" | \"PATCH\" | \"DELETE\""
361 );
362 let http = render_rule(&g, "http_handler").unwrap();
364 assert!(http.contains("type_ref"), "{http}");
365 assert!(!http.contains("_type_ref"), "{http}");
366 assert!(http.contains("block"), "{http}");
367 }
368
369 #[test]
370 fn embeddable_rules_excludes_trivial_wrappers() {
371 let g = grammar_json();
372 let rules = embeddable_rules(&g);
373 assert_eq!(rules.len(), 128);
387 assert!(rules.iter().any(|r| r == "http_handler"));
388 assert!(rules.iter().any(|r| r == "_type_ref"));
389 assert!(!rules.iter().any(|r| r == "_base_type"));
391 assert!(!rules.iter().any(|r| r == "pred_atom"));
392 assert!(embeddable_rules("not json").is_empty());
394 }
395
396 #[test]
397 fn render_production_includes_display_head() {
398 let g = grammar_json();
399 assert_eq!(
400 render_production(&g, "match_arm").unwrap(),
401 "match_arm ::= pattern \"=>\" expression \",\"?"
402 );
403 }
404
405 #[test]
406 fn display_name_collapses_and_strips() {
407 let g = grammar_json();
408 assert_eq!(display_name(&g, "_base_type").unwrap(), "base_type");
410 assert_eq!(display_name(&g, "_expression").unwrap(), "expression");
412 assert_eq!(display_name(&g, "_type_ref").unwrap(), "type_ref");
413 assert_eq!(display_name(&g, "http_handler").unwrap(), "http_handler");
415 }
416
417 #[test]
418 fn render_rule_unknown_rule_errors() {
419 let g = grammar_json();
420 assert_eq!(
421 render_rule(&g, "no_such_rule"),
422 Err(GrammarError::UnknownRule("no_such_rule".to_string()))
423 );
424 }
425
426 #[test]
427 fn render_rule_invalid_json_errors() {
428 assert!(matches!(
429 render_rule("not json", "match_arm"),
430 Err(GrammarError::Parse(_))
431 ));
432 }
433
434 #[test]
435 fn override_keys_are_real_rules() {
436 let g = grammar_json();
437 for (key, _) in OVERRIDES {
438 assert!(
439 display_name(&g, key).is_ok(),
440 "override key `{key}` is not a top-level rule"
441 );
442 }
443 }
444
445 #[test]
449 fn display_names_are_unique() {
450 let g = grammar_json();
451 let grammar: Value = serde_json::from_str(&g).unwrap();
452 let rules = rules(&grammar);
453 let mut seen: HashMap<String, String> = HashMap::new();
454 for name in rules.keys() {
455 if trivial_wrapper_target(rules, name).is_some() {
456 continue;
457 }
458 let disp = display_name_in(rules, name);
459 if let Some(prev) = seen.insert(disp.clone(), name.clone()) {
460 panic!("display name `{disp}` for `{name}` collides with `{prev}`");
461 }
462 }
463 }
464
465 #[test]
470 fn every_displayed_rule_matches_the_appendix() {
471 let g = grammar_json();
472 let appendix = render_appendix(&g);
473 let grammar: Value = serde_json::from_str(&g).unwrap();
474 let rules = rules(&grammar);
475
476 let mut displayed = 0;
477 for name in rules.keys() {
478 if trivial_wrapper_target(rules, name).is_some() {
479 continue;
480 }
481 displayed += 1;
482 let line = render_production(&g, name).unwrap();
483 assert!(
484 appendix.contains(&line),
485 "production for `{name}` not found in appendix:\n{line}"
486 );
487 }
488
489 assert_eq!(appendix.matches("::=").count(), displayed);
492 }
493}