Skip to main content

bynkc_lsp/
position.rs

1//! Byte-offset ↔ LSP position conversion.
2//!
3//! Bynk source spans are byte offsets into the UTF-8 source. LSP positions
4//! use UTF-16 code units (per the protocol's default position encoding).
5//! For ASCII-only Bynk sources the two agree, but we go through code points
6//! to handle multi-byte characters correctly in identifiers and strings.
7
8use bynk_syntax::span::Span;
9use tower_lsp::lsp_types::{Position, Range};
10
11/// Convert a byte offset into the source string into an LSP position.
12pub fn offset_to_position(source: &str, offset: usize) -> Position {
13    let mut line: u32 = 0;
14    let mut column: u32 = 0;
15    let bytes = source.as_bytes();
16    let limit = offset.min(bytes.len());
17    let mut i = 0;
18    while i < limit {
19        let b = bytes[i];
20        if b == b'\n' {
21            line += 1;
22            column = 0;
23            i += 1;
24            continue;
25        }
26        // Move to next UTF-8 code point boundary.
27        let cp_len = utf8_char_len(b);
28        // LSP default encoding is UTF-16; count UTF-16 code units.
29        // For ASCII (1 byte) and 2/3-byte UTF-8 (1 code unit) we increment
30        // column by 1; for 4-byte UTF-8 (supplementary plane) it's 2 code
31        // units.
32        column += if cp_len == 4 { 2 } else { 1 };
33        i += cp_len;
34    }
35    Position {
36        line,
37        character: column,
38    }
39}
40
41/// Convert an LSP position into a byte offset. Returns None if the position
42/// is past the end of the source.
43pub fn position_to_offset(source: &str, position: Position) -> Option<usize> {
44    let target_line = position.line;
45    let target_char = position.character;
46    let mut line: u32 = 0;
47    let mut character: u32 = 0;
48    let bytes = source.as_bytes();
49    let mut i = 0;
50    while i < bytes.len() {
51        if line == target_line && character == target_char {
52            return Some(i);
53        }
54        let b = bytes[i];
55        if b == b'\n' {
56            if line == target_line {
57                // Position is past end of this line; clamp to line end.
58                return Some(i);
59            }
60            line += 1;
61            character = 0;
62            i += 1;
63            continue;
64        }
65        let cp_len = utf8_char_len(b);
66        character += if cp_len == 4 { 2 } else { 1 };
67        i += cp_len;
68    }
69    if line == target_line && character >= target_char {
70        Some(i)
71    } else {
72        None
73    }
74}
75
76fn utf8_char_len(first: u8) -> usize {
77    if first < 0x80 {
78        1
79    } else if first < 0xC0 {
80        // Continuation byte; should not be the first byte of a char.
81        1
82    } else if first < 0xE0 {
83        2
84    } else if first < 0xF0 {
85        3
86    } else {
87        4
88    }
89}
90
91/// Convert a compiler [`Span`] into an LSP [`Range`].
92pub fn span_to_range(source: &str, span: Span) -> Range {
93    Range {
94        start: offset_to_position(source, span.start),
95        end: offset_to_position(source, span.end),
96    }
97}
98
99/// The position one past the end of the source — used for "replace whole
100/// document" formatting edits.
101pub fn end_position(source: &str) -> Position {
102    offset_to_position(source, source.len())
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn ascii_offsets_match_columns() {
111        let src = "abc\ndef";
112        assert_eq!(offset_to_position(src, 0), Position::new(0, 0));
113        assert_eq!(offset_to_position(src, 2), Position::new(0, 2));
114        assert_eq!(offset_to_position(src, 4), Position::new(1, 0));
115        assert_eq!(offset_to_position(src, 6), Position::new(1, 2));
116    }
117
118    #[test]
119    fn position_round_trip() {
120        let src = "alpha\n  beta\ngamma";
121        let p = Position::new(1, 4);
122        let off = position_to_offset(src, p).unwrap();
123        assert_eq!(offset_to_position(src, off), p);
124    }
125}