flux_syntax/
lexer.rs

1use std::{collections::VecDeque, iter::Peekable};
2
3use rustc_ast::tokenstream::{TokenStream, TokenStreamIter, TokenTree};
4use rustc_span::BytePos;
5
6use crate::{
7    symbols::kw,
8    token::{self, Delimiter, Token, TokenKind},
9};
10
11pub struct Cursor<'t> {
12    stack: Vec<Frame<'t>>,
13    tokens: VecDeque<Token>,
14    hi: BytePos,
15}
16
17struct Frame<'t> {
18    cursor: Peekable<TokenStreamIter<'t>>,
19    close: Option<Token>,
20}
21
22impl<'t> Cursor<'t> {
23    pub(crate) fn new(stream: &'t TokenStream, offset: BytePos) -> Self {
24        let mut cursor = Cursor {
25            stack: vec![Frame { cursor: stream.iter().peekable(), close: None }],
26            tokens: VecDeque::new(),
27            hi: offset,
28        };
29        cursor.fetch_tokens();
30        cursor
31    }
32
33    #[must_use]
34    pub fn at(&mut self, pos: usize) -> Token {
35        while self.tokens.len() <= pos && self.fetch_tokens() {}
36        if pos < self.tokens.len() {
37            self.tokens[pos]
38        } else {
39            Token::new(token::Eof, self.hi, self.hi)
40        }
41    }
42
43    pub fn debug(&mut self, size: usize) -> String {
44        let mut s = String::new();
45        for i in 0..size {
46            s = format!("{s} {}", self.at(i).kind);
47        }
48        s
49    }
50
51    pub fn advance(&mut self) {
52        if let Some(tok) = self.tokens.pop_front() {
53            if self.tokens.is_empty() {
54                self.fetch_tokens();
55            }
56            self.hi = tok.hi;
57        }
58    }
59
60    pub fn advance_by(&mut self, n: usize) {
61        for _ in 0..n {
62            self.advance();
63        }
64    }
65
66    /// Returns the starting byte position of the next token
67    pub fn lo(&self) -> BytePos {
68        if let Some(tok) = self.tokens.front() { tok.lo } else { self.hi }
69    }
70
71    /// Returns the highest byte position the cursor has yielded. You could also think of this as
72    /// the ending position of the last yielded token.
73    pub fn hi(&self) -> BytePos {
74        self.hi
75    }
76
77    fn map_token(&mut self, token: &rustc_ast::token::Token) {
78        let span = token.span;
79        let kind = match token.kind {
80            rustc_ast::token::Lt => TokenKind::Lt,
81            rustc_ast::token::Le => TokenKind::Le,
82            rustc_ast::token::EqEq => TokenKind::EqEq,
83            rustc_ast::token::Eq => TokenKind::Eq,
84            rustc_ast::token::Ne => TokenKind::Ne,
85            rustc_ast::token::AndAnd => TokenKind::AndAnd,
86            rustc_ast::token::OrOr => TokenKind::OrOr,
87            rustc_ast::token::FatArrow => TokenKind::FatArrow,
88            rustc_ast::token::Gt => TokenKind::Gt,
89            rustc_ast::token::Ge => TokenKind::Ge,
90            rustc_ast::token::At => TokenKind::At,
91            rustc_ast::token::Pound => TokenKind::Pound,
92            rustc_ast::token::Comma => TokenKind::Comma,
93            rustc_ast::token::Colon => TokenKind::Colon,
94            rustc_ast::token::Semi => TokenKind::Semi,
95            rustc_ast::token::RArrow => TokenKind::RArrow,
96            rustc_ast::token::Dot => TokenKind::Dot,
97            rustc_ast::token::OpenParen => TokenKind::OpenParen,
98            rustc_ast::token::OpenBrace => TokenKind::OpenBrace,
99            rustc_ast::token::OpenBracket => TokenKind::OpenBracket,
100            rustc_ast::token::CloseParen => TokenKind::CloseParen,
101            rustc_ast::token::CloseBrace => TokenKind::CloseBrace,
102            rustc_ast::token::CloseBracket => TokenKind::CloseBracket,
103            rustc_ast::token::OpenInvisible(origin) => TokenKind::OpenInvisible(origin),
104            rustc_ast::token::CloseInvisible(origin) => TokenKind::CloseInvisible(origin),
105            rustc_ast::token::Literal(lit) => TokenKind::Literal(lit),
106            rustc_ast::token::Ident(symb, _) if symb == kw::True || symb == kw::False => {
107                TokenKind::Literal(token::Lit {
108                    kind: token::LitKind::Bool,
109                    symbol: symb,
110                    suffix: None,
111                })
112            }
113            rustc_ast::token::Ident(symb, is_raw) => TokenKind::Ident(symb, is_raw),
114            rustc_ast::token::NtIdent(ident, is_raw) => TokenKind::Ident(ident.name, is_raw),
115            rustc_ast::token::Caret => TokenKind::Caret,
116            rustc_ast::token::Or => TokenKind::Or,
117            rustc_ast::token::Plus => TokenKind::Plus,
118            rustc_ast::token::Slash => TokenKind::Slash,
119            rustc_ast::token::Minus => TokenKind::Minus,
120            rustc_ast::token::And => TokenKind::And,
121            rustc_ast::token::Percent => TokenKind::Percent,
122            rustc_ast::token::Star => TokenKind::Star,
123            rustc_ast::token::Shl => {
124                self.tokens.push_back(Token::new(
125                    TokenKind::LtFollowedByLt,
126                    span.lo(),
127                    span.hi() - BytePos(1),
128                ));
129                self.tokens
130                    .push_back(Token::new(TokenKind::Lt, span.lo() + BytePos(1), span.hi()));
131                return;
132            }
133            rustc_ast::token::Shr => {
134                self.tokens.push_back(Token::new(
135                    TokenKind::GtFollowedByGt,
136                    span.lo(),
137                    span.hi() - BytePos(1),
138                ));
139                self.tokens
140                    .push_back(Token::new(TokenKind::Gt, span.lo() + BytePos(1), span.hi()));
141                return;
142            }
143            rustc_ast::token::Bang => TokenKind::Bang,
144            rustc_ast::token::PathSep => TokenKind::PathSep,
145            rustc_ast::token::DotDot => TokenKind::DotDot,
146            _ => TokenKind::Invalid,
147        };
148        self.tokens
149            .push_back(Token::new(kind, span.lo(), span.hi()));
150    }
151
152    fn fetch_tokens(&mut self) -> bool {
153        let Some(top) = self.stack.last_mut() else { return false };
154
155        match top.cursor.next() {
156            Some(TokenTree::Token(token, _)) => {
157                if let Some(TokenTree::Token(next, _)) = top.cursor.peek() {
158                    match (&token.kind, &next.kind) {
159                        (rustc_ast::token::Le, rustc_ast::token::Gt)
160                            if token.span.hi() == next.span.lo() =>
161                        {
162                            top.cursor.next();
163                            self.tokens.push_back(Token::new(
164                                TokenKind::Iff,
165                                token.span.lo(),
166                                next.span.hi(),
167                            ));
168                            return true;
169                        }
170                        _ => {}
171                    }
172                }
173                self.map_token(token);
174                true
175            }
176            Some(TokenTree::Delimited(_, _spacing, Delimiter::Invisible(..), tokens)) => {
177                self.stack
178                    .push(Frame { cursor: tokens.iter().peekable(), close: None });
179                self.fetch_tokens()
180            }
181            Some(TokenTree::Delimited(span, _spacing, delim, tokens)) => {
182                let close_kind = match delim {
183                    Delimiter::Parenthesis => TokenKind::CloseParen,
184                    Delimiter::Brace => TokenKind::CloseBrace,
185                    Delimiter::Bracket => TokenKind::CloseBracket,
186                    Delimiter::Invisible(origin) => TokenKind::CloseInvisible(*origin),
187                };
188                let close = Token::new(close_kind, span.close.lo(), span.close.hi());
189
190                self.stack
191                    .push(Frame { cursor: tokens.iter().peekable(), close: Some(close) });
192
193                let kind = match delim {
194                    Delimiter::Parenthesis => rustc_ast::token::OpenParen,
195                    Delimiter::Brace => rustc_ast::token::OpenBrace,
196                    Delimiter::Bracket => rustc_ast::token::OpenBracket,
197                    Delimiter::Invisible(origin) => rustc_ast::token::OpenInvisible(*origin),
198                };
199
200                let token = rustc_ast::token::Token { kind, span: span.open };
201                self.map_token(&token);
202                true
203            }
204            None => {
205                let Some(frame) = self.stack.pop() else { return false };
206                if let Some(token) = frame.close {
207                    self.tokens.push_back(token);
208                    true
209                } else {
210                    self.fetch_tokens()
211                }
212            }
213        }
214    }
215}