flux_syntax/
lexer.rs

1use std::{collections::VecDeque, fmt, iter::Peekable};
2
3pub use rustc_ast::token::{BinOpToken, Delimiter, Lit, LitKind};
4use rustc_ast::{
5    token::{self, TokenKind},
6    tokenstream::{TokenStream, TokenStreamIter, TokenTree},
7};
8use rustc_span::{BytePos, Symbol, symbol::kw};
9
10#[derive(Copy, Clone, Debug, PartialEq)]
11pub enum Token {
12    Caret,
13    EqEq,
14    Eq,
15    AndAnd,
16    OrOr,
17    Plus,
18    Minus,
19    Slash,
20    Not,
21    Star,
22    Colon,
23    Comma,
24    Semi,
25    RArrow,
26    Dot,
27    Le,
28    Ne,
29    GtFollowedByGt,
30    Gt,
31    LtFollowedByLt,
32    Lt,
33    Ge,
34    At,
35    Pound,
36    Underscore,
37    Fn,
38    Async,
39    Iff,
40    FatArrow,
41    Let,
42    Mut,
43    Where,
44    Forall,
45    Exists,
46    In,
47    Impl,
48    Requires,
49    Ensures,
50    Literal(Lit),
51    Ident(Symbol),
52    OpenDelim(Delimiter),
53    CloseDelim(Delimiter),
54    Invalid,
55    Ref,
56    And,
57    Percent,
58    Strg,
59    Type,
60    If,
61    Else,
62    PathSep,
63    Qualifier,
64    Sort,
65    Opaque,
66    Local,
67    BitVec,
68    As,
69    Hrn,
70    Hdl,
71    DotDot,
72    Eof,
73}
74
75impl Token {
76    pub fn descr(&self) -> &'static str {
77        match self {
78            Token::Caret => "|",
79            Token::EqEq => "==",
80            Token::Eq => "=",
81            Token::AndAnd => "&&",
82            Token::OrOr => "||",
83            Token::Plus => "+",
84            Token::Minus => "-",
85            Token::Slash => "/",
86            Token::Not => "!",
87            Token::Star => "*",
88            Token::Colon => ":",
89            Token::Comma => ",",
90            Token::Semi => ";",
91            Token::RArrow => "->",
92            Token::Dot => ".",
93            Token::Le => "<=",
94            Token::Ne => ">=",
95            Token::GtFollowedByGt => ">",
96            Token::Gt => ">",
97            Token::LtFollowedByLt => "<",
98            Token::Lt => "<",
99            Token::Ge => ">=",
100            Token::At => "@",
101            Token::Pound => "#",
102            Token::Underscore => "_",
103            Token::Fn => "fn",
104            Token::Async => "async",
105            Token::Iff => "<=>",
106            Token::FatArrow => "=>",
107            Token::Let => "let",
108            Token::Mut => "mut",
109            Token::Where => "where",
110            Token::Forall => "forall",
111            Token::Exists => "exists",
112            Token::In => "in",
113            Token::Impl => "impl",
114            Token::Requires => "requires",
115            Token::Ensures => "ensures",
116            Token::Literal(_) => "literal",
117            Token::Ident(_) => "identifier",
118            Token::OpenDelim(Delimiter::Parenthesis) => "(",
119            Token::OpenDelim(Delimiter::Brace) => "{",
120            Token::OpenDelim(Delimiter::Bracket) => "[",
121            Token::OpenDelim(Delimiter::Invisible(_)) => "",
122            Token::CloseDelim(Delimiter::Parenthesis) => ")",
123            Token::CloseDelim(Delimiter::Brace) => "}",
124            Token::CloseDelim(Delimiter::Bracket) => "]",
125            Token::CloseDelim(Delimiter::Invisible(_)) => "",
126            Token::Invalid => "<invalid>",
127            Token::Ref => "ref",
128            Token::And => "&",
129            Token::Percent => "%",
130            Token::Strg => "strg",
131            Token::Type => "type",
132            Token::If => "if",
133            Token::Else => "else",
134            Token::PathSep => "::",
135            Token::Qualifier => "qualifier",
136            Token::Sort => "sort",
137            Token::Opaque => "opaque",
138            Token::Local => "local",
139            Token::BitVec => "bitvec",
140            Token::As => "as",
141            Token::Hrn => "rn",
142            Token::Hdl => "hdl",
143            Token::DotDot => "..",
144            Token::Eof => "<eof>",
145        }
146    }
147}
148
149impl fmt::Display for Token {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        match self {
152            Token::Literal(lit) => write!(f, "{lit}"),
153            Token::Ident(sym) => write!(f, "{sym}"),
154            _ => write!(f, "{}", self.descr()),
155        }
156    }
157}
158
159pub struct Cursor<'t> {
160    stack: Vec<Frame<'t>>,
161    symbs: Symbols,
162    tokens: VecDeque<(BytePos, Token, BytePos)>,
163    hi: BytePos,
164}
165
166struct Symbols {
167    requires: Symbol,
168    ensures: Symbol,
169    strg: Symbol,
170    qualifier: Symbol,
171    sort: Symbol,
172    opaque: Symbol,
173    local: Symbol,
174    bitvec: Symbol,
175    hrn: Symbol,
176    hdl: Symbol,
177    forall: Symbol,
178    exists: Symbol,
179}
180
181struct Frame<'t> {
182    cursor: Peekable<TokenStreamIter<'t>>,
183    close: Option<(BytePos, Token, BytePos)>,
184}
185
186impl<'t> Cursor<'t> {
187    pub(crate) fn new(stream: &'t TokenStream, offset: BytePos) -> Self {
188        let mut cursor = Cursor {
189            stack: vec![Frame { cursor: stream.iter().peekable(), close: None }],
190            tokens: VecDeque::new(),
191            symbs: Symbols {
192                strg: Symbol::intern("strg"),
193                requires: Symbol::intern("requires"),
194                ensures: Symbol::intern("ensures"),
195                qualifier: Symbol::intern("qualifier"),
196                sort: Symbol::intern("sort"),
197                bitvec: Symbol::intern("bitvec"),
198                opaque: Symbol::intern("opaque"),
199                local: Symbol::intern("local"),
200                hrn: Symbol::intern("hrn"),
201                hdl: Symbol::intern("hdl"),
202                forall: Symbol::intern("forall"),
203                exists: Symbol::intern("exists"),
204            },
205            hi: offset,
206        };
207        cursor.fetch_tokens();
208        cursor
209    }
210
211    #[must_use]
212    pub fn at(&mut self, pos: usize) -> (BytePos, Token, BytePos) {
213        while self.tokens.len() <= pos && self.fetch_tokens() {}
214        if pos < self.tokens.len() { self.tokens[pos] } else { (self.hi, Token::Eof, self.hi) }
215    }
216
217    pub fn debug(&mut self, size: usize) -> String {
218        let mut s = String::new();
219        for i in 0..size {
220            s = format!("{s} {}", self.at(i).1);
221        }
222        s
223    }
224
225    pub fn advance(&mut self) {
226        if let Some(tok) = self.tokens.pop_front() {
227            if self.tokens.is_empty() {
228                self.fetch_tokens();
229            }
230            self.hi = tok.2;
231        }
232    }
233
234    pub fn advance_by(&mut self, n: usize) {
235        for _ in 0..n {
236            self.advance();
237        }
238    }
239
240    /// Returns the starting byte position of the next token
241    pub fn lo(&self) -> BytePos {
242        if let Some((lo, ..)) = self.tokens.front() { *lo } else { self.hi }
243    }
244
245    /// Returns the highest byte position the cursor has yielded. You could also think of this as
246    /// the ending position of the last yielded token.
247    pub fn hi(&self) -> BytePos {
248        self.hi
249    }
250
251    fn map_token(&mut self, token: &token::Token) {
252        let span = token.span;
253        let token = match token.kind {
254            TokenKind::Lt => Token::Lt,
255            TokenKind::Le => Token::Le,
256            TokenKind::EqEq => Token::EqEq,
257            TokenKind::Eq => Token::Eq,
258            TokenKind::Ne => Token::Ne,
259            TokenKind::AndAnd => Token::AndAnd,
260            TokenKind::OrOr => Token::OrOr,
261            TokenKind::FatArrow => Token::FatArrow,
262            TokenKind::Gt => Token::Gt,
263            TokenKind::Ge => Token::Ge,
264            TokenKind::At => Token::At,
265            TokenKind::Pound => Token::Pound,
266            TokenKind::Comma => Token::Comma,
267            TokenKind::Colon => Token::Colon,
268            TokenKind::Semi => Token::Semi,
269            TokenKind::RArrow => Token::RArrow,
270            TokenKind::Dot => Token::Dot,
271            TokenKind::OpenDelim(delim) => Token::OpenDelim(delim),
272            TokenKind::CloseDelim(delim) => Token::CloseDelim(delim),
273            TokenKind::Literal(lit) => Token::Literal(lit),
274            TokenKind::Ident(symb, _) if symb == kw::True || symb == kw::False => {
275                Token::Literal(Lit { kind: LitKind::Bool, symbol: symb, suffix: None })
276            }
277            TokenKind::Ident(symb, _) if symb == self.symbs.strg => Token::Strg,
278            TokenKind::Ident(symb, _) if symb == self.symbs.requires => Token::Requires,
279            TokenKind::Ident(symb, _) if symb == self.symbs.ensures => Token::Ensures,
280            TokenKind::Ident(symb, _) if symb == self.symbs.qualifier => Token::Qualifier,
281            TokenKind::Ident(symb, _) if symb == self.symbs.sort => Token::Sort,
282            TokenKind::Ident(symb, _) if symb == self.symbs.opaque => Token::Opaque,
283            TokenKind::Ident(symb, _) if symb == self.symbs.local => Token::Local,
284            TokenKind::Ident(symb, _) if symb == self.symbs.bitvec => Token::BitVec,
285            TokenKind::Ident(symb, _) if symb == self.symbs.hrn => Token::Hrn,
286            TokenKind::Ident(symb, _) if symb == self.symbs.hdl => Token::Hdl,
287            TokenKind::Ident(symb, _) if symb == self.symbs.forall => Token::Forall,
288            TokenKind::Ident(symb, _) if symb == self.symbs.exists => Token::Exists,
289            TokenKind::Ident(symb, _) if symb == kw::Let => Token::Let,
290            TokenKind::Ident(symb, _) if symb == kw::In => Token::In,
291            TokenKind::Ident(symb, _) if symb == kw::Ref => Token::Ref,
292            TokenKind::Ident(symb, _) if symb == kw::Fn => Token::Fn,
293            TokenKind::Ident(symb, _) if symb == kw::Mut => Token::Mut,
294            TokenKind::Ident(symb, _) if symb == kw::Where => Token::Where,
295            TokenKind::Ident(symb, _) if symb == kw::Impl => Token::Impl,
296            TokenKind::Ident(symb, _) if symb == kw::Type => Token::Type,
297            TokenKind::Ident(symb, _) if symb == kw::If => Token::If,
298            TokenKind::Ident(symb, _) if symb == kw::Else => Token::Else,
299            TokenKind::Ident(symb, _) if symb == kw::Async => Token::Async,
300            TokenKind::Ident(symb, _) if symb == kw::As => Token::As,
301            TokenKind::Ident(symb, _) if symb == kw::Underscore => Token::Underscore,
302            TokenKind::Ident(symb, _) => Token::Ident(symb),
303            TokenKind::BinOp(BinOpToken::Or) => Token::Caret,
304            TokenKind::BinOp(BinOpToken::Plus) => Token::Plus,
305            TokenKind::BinOp(BinOpToken::Slash) => Token::Slash,
306            TokenKind::BinOp(BinOpToken::Minus) => Token::Minus,
307            TokenKind::BinOp(BinOpToken::And) => Token::And,
308            TokenKind::BinOp(BinOpToken::Percent) => Token::Percent,
309            TokenKind::BinOp(BinOpToken::Star) => Token::Star,
310            TokenKind::BinOp(BinOpToken::Shl) => {
311                self.tokens
312                    .push_back((span.lo(), Token::LtFollowedByLt, span.hi() - BytePos(1)));
313                self.tokens
314                    .push_back((span.lo() + BytePos(1), Token::Lt, span.hi()));
315                return;
316            }
317            TokenKind::BinOp(BinOpToken::Shr) => {
318                self.tokens
319                    .push_back((span.lo(), Token::GtFollowedByGt, span.hi() - BytePos(1)));
320                self.tokens
321                    .push_back((span.lo() + BytePos(1), Token::Gt, span.hi()));
322                return;
323            }
324            TokenKind::Not => Token::Not,
325            TokenKind::PathSep => Token::PathSep,
326            TokenKind::DotDot => Token::DotDot,
327            _ => Token::Invalid,
328        };
329        self.tokens.push_back((span.lo(), token, span.hi()));
330    }
331
332    fn fetch_tokens(&mut self) -> bool {
333        let Some(top) = self.stack.last_mut() else { return false };
334
335        match top.cursor.next() {
336            Some(TokenTree::Token(token, _)) => {
337                if let Some(TokenTree::Token(next, _)) = top.cursor.peek() {
338                    match (&token.kind, &next.kind) {
339                        (TokenKind::Le, TokenKind::Gt) if token.span.hi() == next.span.lo() => {
340                            top.cursor.next();
341                            self.tokens
342                                .push_back((token.span.lo(), Token::Iff, next.span.hi()));
343                            return true;
344                        }
345                        _ => {}
346                    }
347                }
348                self.map_token(token);
349                true
350            }
351            Some(TokenTree::Delimited(_, _spacing, Delimiter::Invisible(..), tokens)) => {
352                self.stack
353                    .push(Frame { cursor: tokens.iter().peekable(), close: None });
354                self.fetch_tokens()
355            }
356            Some(TokenTree::Delimited(span, _spacing, delim, tokens)) => {
357                let close = (span.close.lo(), Token::CloseDelim(*delim), span.close.hi());
358
359                self.stack
360                    .push(Frame { cursor: tokens.iter().peekable(), close: Some(close) });
361
362                let token = token::Token { kind: TokenKind::OpenDelim(*delim), span: span.open };
363                self.map_token(&token);
364                true
365            }
366            None => {
367                let Some(frame) = self.stack.pop() else { return false };
368                if let Some(token) = frame.close {
369                    self.tokens.push_back(token);
370                    true
371                } else {
372                    self.fetch_tokens()
373                }
374            }
375        }
376    }
377}