flux_syntax/
lexer.rs

1use std::{collections::VecDeque, fmt, iter::Peekable};
2
3pub use rustc_ast::token::{Delimiter, Lit, LitKind};
4use rustc_ast::{
5    token::{self, InvisibleOrigin, TokenKind},
6    tokenstream::{TokenStream, TokenStreamIter, TokenTree},
7};
8use rustc_span::{BytePos, Symbol, symbol::kw};
9
10#[derive(Copy, Clone, Debug, PartialEq)]
11pub enum Token {
12    Caret,
13    EqEq,
14    Eq,
15    AndAnd,
16    OrOr,
17    Plus,
18    Minus,
19    Slash,
20    Bang,
21    Star,
22    Colon,
23    Comma,
24    Semi,
25    RArrow,
26    Dot,
27    Le,
28    Ne,
29    GtFollowedByGt,
30    Gt,
31    LtFollowedByLt,
32    Lt,
33    Ge,
34    At,
35    Pound,
36    Underscore,
37    Fn,
38    Async,
39    Iff,
40    FatArrow,
41    Let,
42    Mut,
43    Where,
44    Forall,
45    Exists,
46    In,
47    Impl,
48    Requires,
49    Ensures,
50    Literal(Lit),
51    Ident(Symbol),
52    OpenParen,
53    CloseParen,
54    OpenBrace,
55    CloseBrace,
56    OpenBracket,
57    CloseBracket,
58    OpenInvisible(InvisibleOrigin),
59    CloseInvisible(InvisibleOrigin),
60    Invalid,
61    Ref,
62    And,
63    Percent,
64    Strg,
65    Type,
66    If,
67    Else,
68    PathSep,
69    Qualifier,
70    Property,
71    Sort,
72    Opaque,
73    Local,
74    BitVec,
75    As,
76    Hrn,
77    Hdl,
78    DotDot,
79    Eof,
80}
81
82impl Token {
83    pub fn descr(&self) -> &'static str {
84        match self {
85            Token::Caret => "|",
86            Token::EqEq => "==",
87            Token::Eq => "=",
88            Token::AndAnd => "&&",
89            Token::OrOr => "||",
90            Token::Plus => "+",
91            Token::Minus => "-",
92            Token::Slash => "/",
93            Token::Bang => "!",
94            Token::Star => "*",
95            Token::Colon => ":",
96            Token::Comma => ",",
97            Token::Semi => ";",
98            Token::RArrow => "->",
99            Token::Dot => ".",
100            Token::Le => "<=",
101            Token::Ne => ">=",
102            Token::GtFollowedByGt => ">",
103            Token::Gt => ">",
104            Token::LtFollowedByLt => "<",
105            Token::Lt => "<",
106            Token::Ge => ">=",
107            Token::At => "@",
108            Token::Pound => "#",
109            Token::Underscore => "_",
110            Token::Fn => "fn",
111            Token::Async => "async",
112            Token::Iff => "<=>",
113            Token::FatArrow => "=>",
114            Token::Let => "let",
115            Token::Mut => "mut",
116            Token::Where => "where",
117            Token::Forall => "forall",
118            Token::Exists => "exists",
119            Token::In => "in",
120            Token::Impl => "impl",
121            Token::Requires => "requires",
122            Token::Ensures => "ensures",
123            Token::Literal(_) => "literal",
124            Token::Ident(_) => "identifier",
125            Token::OpenParen => "(",
126            Token::OpenBrace => "{",
127            Token::OpenBracket => "[",
128            Token::CloseParen => ")",
129            Token::CloseBrace => "}",
130            Token::CloseBracket => "]",
131            Token::OpenInvisible(_) => "",
132            Token::CloseInvisible(_) => "",
133            Token::Invalid => "<invalid>",
134            Token::Ref => "ref",
135            Token::And => "&",
136            Token::Percent => "%",
137            Token::Strg => "strg",
138            Token::Type => "type",
139            Token::If => "if",
140            Token::Else => "else",
141            Token::PathSep => "::",
142            Token::Qualifier => "qualifier",
143            Token::Property => "property",
144            Token::Sort => "sort",
145            Token::Opaque => "opaque",
146            Token::Local => "local",
147            Token::BitVec => "bitvec",
148            Token::As => "as",
149            Token::Hrn => "rn",
150            Token::Hdl => "hdl",
151            Token::DotDot => "..",
152            Token::Eof => "<eof>",
153        }
154    }
155}
156
157impl fmt::Display for Token {
158    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159        match self {
160            Token::Literal(lit) => write!(f, "{lit}"),
161            Token::Ident(sym) => write!(f, "{sym}"),
162            _ => write!(f, "{}", self.descr()),
163        }
164    }
165}
166
167pub struct Cursor<'t> {
168    stack: Vec<Frame<'t>>,
169    symbs: Symbols,
170    tokens: VecDeque<(BytePos, Token, BytePos)>,
171    hi: BytePos,
172}
173
174struct Symbols {
175    requires: Symbol,
176    ensures: Symbol,
177    strg: Symbol,
178    qualifier: Symbol,
179    property: Symbol,
180    sort: Symbol,
181    opaque: Symbol,
182    local: Symbol,
183    bitvec: Symbol,
184    hrn: Symbol,
185    hdl: Symbol,
186    forall: Symbol,
187    exists: Symbol,
188}
189
190struct Frame<'t> {
191    cursor: Peekable<TokenStreamIter<'t>>,
192    close: Option<(BytePos, Token, BytePos)>,
193}
194
195impl<'t> Cursor<'t> {
196    pub(crate) fn new(stream: &'t TokenStream, offset: BytePos) -> Self {
197        let mut cursor = Cursor {
198            stack: vec![Frame { cursor: stream.iter().peekable(), close: None }],
199            tokens: VecDeque::new(),
200            symbs: Symbols {
201                strg: Symbol::intern("strg"),
202                requires: Symbol::intern("requires"),
203                ensures: Symbol::intern("ensures"),
204                qualifier: Symbol::intern("qualifier"),
205                property: Symbol::intern("property"),
206                sort: Symbol::intern("sort"),
207                bitvec: Symbol::intern("bitvec"),
208                opaque: Symbol::intern("opaque"),
209                local: Symbol::intern("local"),
210                hrn: Symbol::intern("hrn"),
211                hdl: Symbol::intern("hdl"),
212                forall: Symbol::intern("forall"),
213                exists: Symbol::intern("exists"),
214            },
215            hi: offset,
216        };
217        cursor.fetch_tokens();
218        cursor
219    }
220
221    #[must_use]
222    pub fn at(&mut self, pos: usize) -> (BytePos, Token, BytePos) {
223        while self.tokens.len() <= pos && self.fetch_tokens() {}
224        if pos < self.tokens.len() { self.tokens[pos] } else { (self.hi, Token::Eof, self.hi) }
225    }
226
227    pub fn debug(&mut self, size: usize) -> String {
228        let mut s = String::new();
229        for i in 0..size {
230            s = format!("{s} {}", self.at(i).1);
231        }
232        s
233    }
234
235    pub fn advance(&mut self) {
236        if let Some(tok) = self.tokens.pop_front() {
237            if self.tokens.is_empty() {
238                self.fetch_tokens();
239            }
240            self.hi = tok.2;
241        }
242    }
243
244    pub fn advance_by(&mut self, n: usize) {
245        for _ in 0..n {
246            self.advance();
247        }
248    }
249
250    /// Returns the starting byte position of the next token
251    pub fn lo(&self) -> BytePos {
252        if let Some((lo, ..)) = self.tokens.front() { *lo } else { self.hi }
253    }
254
255    /// Returns the highest byte position the cursor has yielded. You could also think of this as
256    /// the ending position of the last yielded token.
257    pub fn hi(&self) -> BytePos {
258        self.hi
259    }
260
261    fn map_token(&mut self, token: &token::Token) {
262        let span = token.span;
263        let token = match token.kind {
264            TokenKind::Lt => Token::Lt,
265            TokenKind::Le => Token::Le,
266            TokenKind::EqEq => Token::EqEq,
267            TokenKind::Eq => Token::Eq,
268            TokenKind::Ne => Token::Ne,
269            TokenKind::AndAnd => Token::AndAnd,
270            TokenKind::OrOr => Token::OrOr,
271            TokenKind::FatArrow => Token::FatArrow,
272            TokenKind::Gt => Token::Gt,
273            TokenKind::Ge => Token::Ge,
274            TokenKind::At => Token::At,
275            TokenKind::Pound => Token::Pound,
276            TokenKind::Comma => Token::Comma,
277            TokenKind::Colon => Token::Colon,
278            TokenKind::Semi => Token::Semi,
279            TokenKind::RArrow => Token::RArrow,
280            TokenKind::Dot => Token::Dot,
281            TokenKind::OpenParen => Token::OpenParen,
282            TokenKind::OpenBrace => Token::OpenBrace,
283            TokenKind::OpenBracket => Token::OpenBracket,
284            TokenKind::CloseParen => Token::CloseParen,
285            TokenKind::CloseBrace => Token::CloseBrace,
286            TokenKind::CloseBracket => Token::CloseBracket,
287            TokenKind::OpenInvisible(origin) => Token::OpenInvisible(origin),
288            TokenKind::CloseInvisible(origin) => Token::CloseInvisible(origin),
289            TokenKind::Literal(lit) => Token::Literal(lit),
290            TokenKind::Ident(symb, _) if symb == kw::True || symb == kw::False => {
291                Token::Literal(Lit { kind: LitKind::Bool, symbol: symb, suffix: None })
292            }
293            TokenKind::Ident(symb, _) if symb == self.symbs.strg => Token::Strg,
294            TokenKind::Ident(symb, _) if symb == self.symbs.requires => Token::Requires,
295            TokenKind::Ident(symb, _) if symb == self.symbs.ensures => Token::Ensures,
296            TokenKind::Ident(symb, _) if symb == self.symbs.qualifier => Token::Qualifier,
297            TokenKind::Ident(symb, _) if symb == self.symbs.property => Token::Property,
298            TokenKind::Ident(symb, _) if symb == self.symbs.sort => Token::Sort,
299            TokenKind::Ident(symb, _) if symb == self.symbs.opaque => Token::Opaque,
300            TokenKind::Ident(symb, _) if symb == self.symbs.local => Token::Local,
301            TokenKind::Ident(symb, _) if symb == self.symbs.bitvec => Token::BitVec,
302            TokenKind::Ident(symb, _) if symb == self.symbs.hrn => Token::Hrn,
303            TokenKind::Ident(symb, _) if symb == self.symbs.hdl => Token::Hdl,
304            TokenKind::Ident(symb, _) if symb == self.symbs.forall => Token::Forall,
305            TokenKind::Ident(symb, _) if symb == self.symbs.exists => Token::Exists,
306            TokenKind::Ident(symb, _) if symb == kw::Let => Token::Let,
307            TokenKind::Ident(symb, _) if symb == kw::In => Token::In,
308            TokenKind::Ident(symb, _) if symb == kw::Ref => Token::Ref,
309            TokenKind::Ident(symb, _) if symb == kw::Fn => Token::Fn,
310            TokenKind::Ident(symb, _) if symb == kw::Mut => Token::Mut,
311            TokenKind::Ident(symb, _) if symb == kw::Where => Token::Where,
312            TokenKind::Ident(symb, _) if symb == kw::Impl => Token::Impl,
313            TokenKind::Ident(symb, _) if symb == kw::Type => Token::Type,
314            TokenKind::Ident(symb, _) if symb == kw::If => Token::If,
315            TokenKind::Ident(symb, _) if symb == kw::Else => Token::Else,
316            TokenKind::Ident(symb, _) if symb == kw::Async => Token::Async,
317            TokenKind::Ident(symb, _) if symb == kw::As => Token::As,
318            TokenKind::Ident(symb, _) if symb == kw::Underscore => Token::Underscore,
319            TokenKind::Ident(symb, _) => Token::Ident(symb),
320            TokenKind::NtIdent(ident, _) => Token::Ident(ident.name),
321            TokenKind::Or => Token::Caret,
322            TokenKind::Plus => Token::Plus,
323            TokenKind::Slash => Token::Slash,
324            TokenKind::Minus => Token::Minus,
325            TokenKind::And => Token::And,
326            TokenKind::Percent => Token::Percent,
327            TokenKind::Star => Token::Star,
328            TokenKind::Shl => {
329                self.tokens
330                    .push_back((span.lo(), Token::LtFollowedByLt, span.hi() - BytePos(1)));
331                self.tokens
332                    .push_back((span.lo() + BytePos(1), Token::Lt, span.hi()));
333                return;
334            }
335            TokenKind::Shr => {
336                self.tokens
337                    .push_back((span.lo(), Token::GtFollowedByGt, span.hi() - BytePos(1)));
338                self.tokens
339                    .push_back((span.lo() + BytePos(1), Token::Gt, span.hi()));
340                return;
341            }
342            TokenKind::Bang => Token::Bang,
343            TokenKind::PathSep => Token::PathSep,
344            TokenKind::DotDot => Token::DotDot,
345            _ => Token::Invalid,
346        };
347        self.tokens.push_back((span.lo(), token, span.hi()));
348    }
349
350    fn fetch_tokens(&mut self) -> bool {
351        let Some(top) = self.stack.last_mut() else { return false };
352
353        match top.cursor.next() {
354            Some(TokenTree::Token(token, _)) => {
355                if let Some(TokenTree::Token(next, _)) = top.cursor.peek() {
356                    match (&token.kind, &next.kind) {
357                        (TokenKind::Le, TokenKind::Gt) if token.span.hi() == next.span.lo() => {
358                            top.cursor.next();
359                            self.tokens
360                                .push_back((token.span.lo(), Token::Iff, next.span.hi()));
361                            return true;
362                        }
363                        _ => {}
364                    }
365                }
366                self.map_token(token);
367                true
368            }
369            Some(TokenTree::Delimited(_, _spacing, Delimiter::Invisible(..), tokens)) => {
370                self.stack
371                    .push(Frame { cursor: tokens.iter().peekable(), close: None });
372                self.fetch_tokens()
373            }
374            Some(TokenTree::Delimited(span, _spacing, delim, tokens)) => {
375                let close_token = match delim {
376                    Delimiter::Parenthesis => Token::CloseParen,
377                    Delimiter::Brace => Token::CloseBrace,
378                    Delimiter::Bracket => Token::CloseBracket,
379                    Delimiter::Invisible(origin) => Token::CloseInvisible(*origin),
380                };
381                let close = (span.close.lo(), close_token, span.close.hi());
382
383                self.stack
384                    .push(Frame { cursor: tokens.iter().peekable(), close: Some(close) });
385
386                let kind = match delim {
387                    Delimiter::Parenthesis => TokenKind::OpenParen,
388                    Delimiter::Brace => TokenKind::OpenBrace,
389                    Delimiter::Bracket => TokenKind::OpenBracket,
390                    Delimiter::Invisible(origin) => TokenKind::OpenInvisible(*origin),
391                };
392
393                let token = token::Token { kind, span: span.open };
394                self.map_token(&token);
395                true
396            }
397            None => {
398                let Some(frame) = self.stack.pop() else { return false };
399                if let Some(token) = frame.close {
400                    self.tokens.push_back(token);
401                    true
402                } else {
403                    self.fetch_tokens()
404                }
405            }
406        }
407    }
408}