flux_syntax/
lexer.rs

1use std::{collections::VecDeque, fmt, iter::Peekable};
2
3pub use rustc_ast::token::{Delimiter, Lit, LitKind};
4use rustc_ast::{
5    token::InvisibleOrigin,
6    tokenstream::{TokenStream, TokenStreamIter, TokenTree},
7};
8use rustc_span::{BytePos, Symbol};
9
10use crate::symbols::kw;
11
12#[derive(Copy, Clone, Debug, PartialEq)]
13pub enum TokenKind {
14    Caret,
15    EqEq,
16    Eq,
17    AndAnd,
18    OrOr,
19    Plus,
20    Minus,
21    Slash,
22    Bang,
23    Star,
24    Colon,
25    Comma,
26    Semi,
27    RArrow,
28    Dot,
29    Le,
30    Ne,
31    GtFollowedByGt,
32    Gt,
33    LtFollowedByLt,
34    Lt,
35    Ge,
36    At,
37    Pound,
38    Iff,
39    FatArrow,
40    Literal(Lit),
41    /// This is used to represent both keywords and (non-reserved) identifiers
42    Ident(Symbol),
43    OpenParen,
44    CloseParen,
45    OpenBrace,
46    CloseBrace,
47    OpenBracket,
48    CloseBracket,
49    OpenInvisible(InvisibleOrigin),
50    CloseInvisible(InvisibleOrigin),
51    Invalid,
52    And,
53    Percent,
54    PathSep,
55    DotDot,
56    Eof,
57}
58
59#[derive(Clone, Copy)]
60pub struct Token {
61    pub kind: TokenKind,
62    pub lo: BytePos,
63    pub hi: BytePos,
64}
65
66impl Token {
67    pub fn new(kind: TokenKind, lo: BytePos, hi: BytePos) -> Self {
68        Self { kind, lo, hi }
69    }
70}
71
72/// Convenience module so we can refer to token kinds as `token::*`
73pub mod token {
74    pub use super::TokenKind::*;
75}
76
77impl TokenKind {
78    pub fn open_delim(delim: Delimiter) -> TokenKind {
79        match delim {
80            Delimiter::Parenthesis => token::OpenParen,
81            Delimiter::Bracket => token::OpenBracket,
82            Delimiter::Brace => token::OpenBrace,
83            Delimiter::Invisible(origin) => token::OpenInvisible(origin),
84        }
85    }
86
87    pub fn close_delim(delim: Delimiter) -> TokenKind {
88        match delim {
89            Delimiter::Parenthesis => token::CloseParen,
90            Delimiter::Bracket => token::CloseBracket,
91            Delimiter::Brace => token::CloseBrace,
92            Delimiter::Invisible(origin) => token::CloseInvisible(origin),
93        }
94    }
95
96    pub fn descr(&self) -> &'static str {
97        match self {
98            TokenKind::Caret => "|",
99            TokenKind::EqEq => "==",
100            TokenKind::Eq => "=",
101            TokenKind::AndAnd => "&&",
102            TokenKind::OrOr => "||",
103            TokenKind::Plus => "+",
104            TokenKind::Minus => "-",
105            TokenKind::Slash => "/",
106            TokenKind::Bang => "!",
107            TokenKind::Star => "*",
108            TokenKind::Colon => ":",
109            TokenKind::Comma => ",",
110            TokenKind::Semi => ";",
111            TokenKind::RArrow => "->",
112            TokenKind::Dot => ".",
113            TokenKind::Le => "<=",
114            TokenKind::Ne => ">=",
115            TokenKind::GtFollowedByGt => ">",
116            TokenKind::Gt => ">",
117            TokenKind::LtFollowedByLt => "<",
118            TokenKind::Lt => "<",
119            TokenKind::Ge => ">=",
120            TokenKind::At => "@",
121            TokenKind::Pound => "#",
122            TokenKind::Iff => "<=>",
123            TokenKind::FatArrow => "=>",
124            TokenKind::Literal(_) => "literal",
125            TokenKind::Ident(_) => "identifier",
126            TokenKind::OpenParen => "(",
127            TokenKind::OpenBrace => "{",
128            TokenKind::OpenBracket => "[",
129            TokenKind::CloseParen => ")",
130            TokenKind::CloseBrace => "}",
131            TokenKind::CloseBracket => "]",
132            TokenKind::OpenInvisible(_) => "",
133            TokenKind::CloseInvisible(_) => "",
134            TokenKind::And => "&",
135            TokenKind::Percent => "%",
136            TokenKind::PathSep => "::",
137            TokenKind::DotDot => "..",
138            TokenKind::Eof => "<eof>",
139            TokenKind::Invalid => "<invalid>",
140        }
141    }
142
143    pub fn is_keyword(self, kw: Symbol) -> bool {
144        matches!(self, TokenKind::Ident(sym) if sym == kw)
145    }
146
147    pub fn is_eof(self) -> bool {
148        matches!(self, TokenKind::Eof)
149    }
150}
151
152impl fmt::Display for TokenKind {
153    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154        match self {
155            TokenKind::Literal(lit) => write!(f, "{lit}"),
156            TokenKind::Ident(sym) => write!(f, "{sym}"),
157            _ => write!(f, "{}", self.descr()),
158        }
159    }
160}
161
162pub struct Cursor<'t> {
163    stack: Vec<Frame<'t>>,
164    tokens: VecDeque<Token>,
165    hi: BytePos,
166}
167
168struct Frame<'t> {
169    cursor: Peekable<TokenStreamIter<'t>>,
170    close: Option<Token>,
171}
172
173impl<'t> Cursor<'t> {
174    pub(crate) fn new(stream: &'t TokenStream, offset: BytePos) -> Self {
175        let mut cursor = Cursor {
176            stack: vec![Frame { cursor: stream.iter().peekable(), close: None }],
177            tokens: VecDeque::new(),
178            hi: offset,
179        };
180        cursor.fetch_tokens();
181        cursor
182    }
183
184    #[must_use]
185    pub fn at(&mut self, pos: usize) -> Token {
186        while self.tokens.len() <= pos && self.fetch_tokens() {}
187        if pos < self.tokens.len() {
188            self.tokens[pos]
189        } else {
190            Token::new(TokenKind::Eof, self.hi, self.hi)
191        }
192    }
193
194    pub fn debug(&mut self, size: usize) -> String {
195        let mut s = String::new();
196        for i in 0..size {
197            s = format!("{s} {}", self.at(i).kind);
198        }
199        s
200    }
201
202    pub fn advance(&mut self) {
203        if let Some(tok) = self.tokens.pop_front() {
204            if self.tokens.is_empty() {
205                self.fetch_tokens();
206            }
207            self.hi = tok.hi;
208        }
209    }
210
211    pub fn advance_by(&mut self, n: usize) {
212        for _ in 0..n {
213            self.advance();
214        }
215    }
216
217    /// Returns the starting byte position of the next token
218    pub fn lo(&self) -> BytePos {
219        if let Some(tok) = self.tokens.front() { tok.lo } else { self.hi }
220    }
221
222    /// Returns the highest byte position the cursor has yielded. You could also think of this as
223    /// the ending position of the last yielded token.
224    pub fn hi(&self) -> BytePos {
225        self.hi
226    }
227
228    fn map_token(&mut self, token: &rustc_ast::token::Token) {
229        let span = token.span;
230        let kind = match token.kind {
231            rustc_ast::token::Lt => TokenKind::Lt,
232            rustc_ast::token::Le => TokenKind::Le,
233            rustc_ast::token::EqEq => TokenKind::EqEq,
234            rustc_ast::token::Eq => TokenKind::Eq,
235            rustc_ast::token::Ne => TokenKind::Ne,
236            rustc_ast::token::AndAnd => TokenKind::AndAnd,
237            rustc_ast::token::OrOr => TokenKind::OrOr,
238            rustc_ast::token::FatArrow => TokenKind::FatArrow,
239            rustc_ast::token::Gt => TokenKind::Gt,
240            rustc_ast::token::Ge => TokenKind::Ge,
241            rustc_ast::token::At => TokenKind::At,
242            rustc_ast::token::Pound => TokenKind::Pound,
243            rustc_ast::token::Comma => TokenKind::Comma,
244            rustc_ast::token::Colon => TokenKind::Colon,
245            rustc_ast::token::Semi => TokenKind::Semi,
246            rustc_ast::token::RArrow => TokenKind::RArrow,
247            rustc_ast::token::Dot => TokenKind::Dot,
248            rustc_ast::token::OpenParen => TokenKind::OpenParen,
249            rustc_ast::token::OpenBrace => TokenKind::OpenBrace,
250            rustc_ast::token::OpenBracket => TokenKind::OpenBracket,
251            rustc_ast::token::CloseParen => TokenKind::CloseParen,
252            rustc_ast::token::CloseBrace => TokenKind::CloseBrace,
253            rustc_ast::token::CloseBracket => TokenKind::CloseBracket,
254            rustc_ast::token::OpenInvisible(origin) => TokenKind::OpenInvisible(origin),
255            rustc_ast::token::CloseInvisible(origin) => TokenKind::CloseInvisible(origin),
256            rustc_ast::token::Literal(lit) => TokenKind::Literal(lit),
257            rustc_ast::token::Ident(symb, _) if symb == kw::True || symb == kw::False => {
258                TokenKind::Literal(Lit { kind: LitKind::Bool, symbol: symb, suffix: None })
259            }
260            rustc_ast::token::Ident(symb, _) => TokenKind::Ident(symb),
261            rustc_ast::token::NtIdent(ident, _) => TokenKind::Ident(ident.name),
262            rustc_ast::token::Or => TokenKind::Caret,
263            rustc_ast::token::Plus => TokenKind::Plus,
264            rustc_ast::token::Slash => TokenKind::Slash,
265            rustc_ast::token::Minus => TokenKind::Minus,
266            rustc_ast::token::And => TokenKind::And,
267            rustc_ast::token::Percent => TokenKind::Percent,
268            rustc_ast::token::Star => TokenKind::Star,
269            rustc_ast::token::Shl => {
270                self.tokens.push_back(Token::new(
271                    TokenKind::LtFollowedByLt,
272                    span.lo(),
273                    span.hi() - BytePos(1),
274                ));
275                self.tokens
276                    .push_back(Token::new(TokenKind::Lt, span.lo() + BytePos(1), span.hi()));
277                return;
278            }
279            rustc_ast::token::Shr => {
280                self.tokens.push_back(Token::new(
281                    TokenKind::GtFollowedByGt,
282                    span.lo(),
283                    span.hi() - BytePos(1),
284                ));
285                self.tokens
286                    .push_back(Token::new(TokenKind::Gt, span.lo() + BytePos(1), span.hi()));
287                return;
288            }
289            rustc_ast::token::Bang => TokenKind::Bang,
290            rustc_ast::token::PathSep => TokenKind::PathSep,
291            rustc_ast::token::DotDot => TokenKind::DotDot,
292            _ => TokenKind::Invalid,
293        };
294        self.tokens
295            .push_back(Token::new(kind, span.lo(), span.hi()));
296    }
297
298    fn fetch_tokens(&mut self) -> bool {
299        let Some(top) = self.stack.last_mut() else { return false };
300
301        match top.cursor.next() {
302            Some(TokenTree::Token(token, _)) => {
303                if let Some(TokenTree::Token(next, _)) = top.cursor.peek() {
304                    match (&token.kind, &next.kind) {
305                        (rustc_ast::token::Le, rustc_ast::token::Gt)
306                            if token.span.hi() == next.span.lo() =>
307                        {
308                            top.cursor.next();
309                            self.tokens.push_back(Token::new(
310                                TokenKind::Iff,
311                                token.span.lo(),
312                                next.span.hi(),
313                            ));
314                            return true;
315                        }
316                        _ => {}
317                    }
318                }
319                self.map_token(token);
320                true
321            }
322            Some(TokenTree::Delimited(_, _spacing, Delimiter::Invisible(..), tokens)) => {
323                self.stack
324                    .push(Frame { cursor: tokens.iter().peekable(), close: None });
325                self.fetch_tokens()
326            }
327            Some(TokenTree::Delimited(span, _spacing, delim, tokens)) => {
328                let close_kind = match delim {
329                    Delimiter::Parenthesis => TokenKind::CloseParen,
330                    Delimiter::Brace => TokenKind::CloseBrace,
331                    Delimiter::Bracket => TokenKind::CloseBracket,
332                    Delimiter::Invisible(origin) => TokenKind::CloseInvisible(*origin),
333                };
334                let close = Token::new(close_kind, span.close.lo(), span.close.hi());
335
336                self.stack
337                    .push(Frame { cursor: tokens.iter().peekable(), close: Some(close) });
338
339                let kind = match delim {
340                    Delimiter::Parenthesis => rustc_ast::token::OpenParen,
341                    Delimiter::Brace => rustc_ast::token::OpenBrace,
342                    Delimiter::Bracket => rustc_ast::token::OpenBracket,
343                    Delimiter::Invisible(origin) => rustc_ast::token::OpenInvisible(*origin),
344                };
345
346                let token = rustc_ast::token::Token { kind, span: span.open };
347                self.map_token(&token);
348                true
349            }
350            None => {
351                let Some(frame) = self.stack.pop() else { return false };
352                if let Some(token) = frame.close {
353                    self.tokens.push_back(token);
354                    true
355                } else {
356                    self.fetch_tokens()
357                }
358            }
359        }
360    }
361}