1use std::{collections::VecDeque, fmt, iter::Peekable};
2
3pub use rustc_ast::token::{Delimiter, Lit, LitKind};
4use rustc_ast::{
5 token::InvisibleOrigin,
6 tokenstream::{TokenStream, TokenStreamIter, TokenTree},
7};
8use rustc_span::{BytePos, Symbol};
9
10use crate::symbols::kw;
11
12#[derive(Copy, Clone, Debug, PartialEq)]
13pub enum TokenKind {
14 Caret,
15 EqEq,
16 Eq,
17 AndAnd,
18 OrOr,
19 Plus,
20 Minus,
21 Slash,
22 Bang,
23 Star,
24 Colon,
25 Comma,
26 Semi,
27 RArrow,
28 Dot,
29 Le,
30 Ne,
31 GtFollowedByGt,
32 Gt,
33 LtFollowedByLt,
34 Lt,
35 Ge,
36 At,
37 Pound,
38 Iff,
39 FatArrow,
40 Literal(Lit),
41 Ident(Symbol),
43 OpenParen,
44 CloseParen,
45 OpenBrace,
46 CloseBrace,
47 OpenBracket,
48 CloseBracket,
49 OpenInvisible(InvisibleOrigin),
50 CloseInvisible(InvisibleOrigin),
51 Invalid,
52 And,
53 Percent,
54 PathSep,
55 DotDot,
56 Eof,
57}
58
59#[derive(Clone, Copy)]
60pub struct Token {
61 pub kind: TokenKind,
62 pub lo: BytePos,
63 pub hi: BytePos,
64}
65
66impl Token {
67 pub fn new(kind: TokenKind, lo: BytePos, hi: BytePos) -> Self {
68 Self { kind, lo, hi }
69 }
70}
71
72pub mod token {
74 pub use super::TokenKind::*;
75}
76
77impl TokenKind {
78 pub fn open_delim(delim: Delimiter) -> TokenKind {
79 match delim {
80 Delimiter::Parenthesis => token::OpenParen,
81 Delimiter::Bracket => token::OpenBracket,
82 Delimiter::Brace => token::OpenBrace,
83 Delimiter::Invisible(origin) => token::OpenInvisible(origin),
84 }
85 }
86
87 pub fn close_delim(delim: Delimiter) -> TokenKind {
88 match delim {
89 Delimiter::Parenthesis => token::CloseParen,
90 Delimiter::Bracket => token::CloseBracket,
91 Delimiter::Brace => token::CloseBrace,
92 Delimiter::Invisible(origin) => token::CloseInvisible(origin),
93 }
94 }
95
96 pub fn descr(&self) -> &'static str {
97 match self {
98 TokenKind::Caret => "|",
99 TokenKind::EqEq => "==",
100 TokenKind::Eq => "=",
101 TokenKind::AndAnd => "&&",
102 TokenKind::OrOr => "||",
103 TokenKind::Plus => "+",
104 TokenKind::Minus => "-",
105 TokenKind::Slash => "/",
106 TokenKind::Bang => "!",
107 TokenKind::Star => "*",
108 TokenKind::Colon => ":",
109 TokenKind::Comma => ",",
110 TokenKind::Semi => ";",
111 TokenKind::RArrow => "->",
112 TokenKind::Dot => ".",
113 TokenKind::Le => "<=",
114 TokenKind::Ne => ">=",
115 TokenKind::GtFollowedByGt => ">",
116 TokenKind::Gt => ">",
117 TokenKind::LtFollowedByLt => "<",
118 TokenKind::Lt => "<",
119 TokenKind::Ge => ">=",
120 TokenKind::At => "@",
121 TokenKind::Pound => "#",
122 TokenKind::Iff => "<=>",
123 TokenKind::FatArrow => "=>",
124 TokenKind::Literal(_) => "literal",
125 TokenKind::Ident(_) => "identifier",
126 TokenKind::OpenParen => "(",
127 TokenKind::OpenBrace => "{",
128 TokenKind::OpenBracket => "[",
129 TokenKind::CloseParen => ")",
130 TokenKind::CloseBrace => "}",
131 TokenKind::CloseBracket => "]",
132 TokenKind::OpenInvisible(_) => "",
133 TokenKind::CloseInvisible(_) => "",
134 TokenKind::And => "&",
135 TokenKind::Percent => "%",
136 TokenKind::PathSep => "::",
137 TokenKind::DotDot => "..",
138 TokenKind::Eof => "<eof>",
139 TokenKind::Invalid => "<invalid>",
140 }
141 }
142
143 pub fn is_keyword(self, kw: Symbol) -> bool {
144 matches!(self, TokenKind::Ident(sym) if sym == kw)
145 }
146
147 pub fn is_eof(self) -> bool {
148 matches!(self, TokenKind::Eof)
149 }
150}
151
152impl fmt::Display for TokenKind {
153 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154 match self {
155 TokenKind::Literal(lit) => write!(f, "{lit}"),
156 TokenKind::Ident(sym) => write!(f, "{sym}"),
157 _ => write!(f, "{}", self.descr()),
158 }
159 }
160}
161
162pub struct Cursor<'t> {
163 stack: Vec<Frame<'t>>,
164 tokens: VecDeque<Token>,
165 hi: BytePos,
166}
167
168struct Frame<'t> {
169 cursor: Peekable<TokenStreamIter<'t>>,
170 close: Option<Token>,
171}
172
173impl<'t> Cursor<'t> {
174 pub(crate) fn new(stream: &'t TokenStream, offset: BytePos) -> Self {
175 let mut cursor = Cursor {
176 stack: vec![Frame { cursor: stream.iter().peekable(), close: None }],
177 tokens: VecDeque::new(),
178 hi: offset,
179 };
180 cursor.fetch_tokens();
181 cursor
182 }
183
184 #[must_use]
185 pub fn at(&mut self, pos: usize) -> Token {
186 while self.tokens.len() <= pos && self.fetch_tokens() {}
187 if pos < self.tokens.len() {
188 self.tokens[pos]
189 } else {
190 Token::new(TokenKind::Eof, self.hi, self.hi)
191 }
192 }
193
194 pub fn debug(&mut self, size: usize) -> String {
195 let mut s = String::new();
196 for i in 0..size {
197 s = format!("{s} {}", self.at(i).kind);
198 }
199 s
200 }
201
202 pub fn advance(&mut self) {
203 if let Some(tok) = self.tokens.pop_front() {
204 if self.tokens.is_empty() {
205 self.fetch_tokens();
206 }
207 self.hi = tok.hi;
208 }
209 }
210
211 pub fn advance_by(&mut self, n: usize) {
212 for _ in 0..n {
213 self.advance();
214 }
215 }
216
217 pub fn lo(&self) -> BytePos {
219 if let Some(tok) = self.tokens.front() { tok.lo } else { self.hi }
220 }
221
222 pub fn hi(&self) -> BytePos {
225 self.hi
226 }
227
228 fn map_token(&mut self, token: &rustc_ast::token::Token) {
229 let span = token.span;
230 let kind = match token.kind {
231 rustc_ast::token::Lt => TokenKind::Lt,
232 rustc_ast::token::Le => TokenKind::Le,
233 rustc_ast::token::EqEq => TokenKind::EqEq,
234 rustc_ast::token::Eq => TokenKind::Eq,
235 rustc_ast::token::Ne => TokenKind::Ne,
236 rustc_ast::token::AndAnd => TokenKind::AndAnd,
237 rustc_ast::token::OrOr => TokenKind::OrOr,
238 rustc_ast::token::FatArrow => TokenKind::FatArrow,
239 rustc_ast::token::Gt => TokenKind::Gt,
240 rustc_ast::token::Ge => TokenKind::Ge,
241 rustc_ast::token::At => TokenKind::At,
242 rustc_ast::token::Pound => TokenKind::Pound,
243 rustc_ast::token::Comma => TokenKind::Comma,
244 rustc_ast::token::Colon => TokenKind::Colon,
245 rustc_ast::token::Semi => TokenKind::Semi,
246 rustc_ast::token::RArrow => TokenKind::RArrow,
247 rustc_ast::token::Dot => TokenKind::Dot,
248 rustc_ast::token::OpenParen => TokenKind::OpenParen,
249 rustc_ast::token::OpenBrace => TokenKind::OpenBrace,
250 rustc_ast::token::OpenBracket => TokenKind::OpenBracket,
251 rustc_ast::token::CloseParen => TokenKind::CloseParen,
252 rustc_ast::token::CloseBrace => TokenKind::CloseBrace,
253 rustc_ast::token::CloseBracket => TokenKind::CloseBracket,
254 rustc_ast::token::OpenInvisible(origin) => TokenKind::OpenInvisible(origin),
255 rustc_ast::token::CloseInvisible(origin) => TokenKind::CloseInvisible(origin),
256 rustc_ast::token::Literal(lit) => TokenKind::Literal(lit),
257 rustc_ast::token::Ident(symb, _) if symb == kw::True || symb == kw::False => {
258 TokenKind::Literal(Lit { kind: LitKind::Bool, symbol: symb, suffix: None })
259 }
260 rustc_ast::token::Ident(symb, _) => TokenKind::Ident(symb),
261 rustc_ast::token::NtIdent(ident, _) => TokenKind::Ident(ident.name),
262 rustc_ast::token::Or => TokenKind::Caret,
263 rustc_ast::token::Plus => TokenKind::Plus,
264 rustc_ast::token::Slash => TokenKind::Slash,
265 rustc_ast::token::Minus => TokenKind::Minus,
266 rustc_ast::token::And => TokenKind::And,
267 rustc_ast::token::Percent => TokenKind::Percent,
268 rustc_ast::token::Star => TokenKind::Star,
269 rustc_ast::token::Shl => {
270 self.tokens.push_back(Token::new(
271 TokenKind::LtFollowedByLt,
272 span.lo(),
273 span.hi() - BytePos(1),
274 ));
275 self.tokens
276 .push_back(Token::new(TokenKind::Lt, span.lo() + BytePos(1), span.hi()));
277 return;
278 }
279 rustc_ast::token::Shr => {
280 self.tokens.push_back(Token::new(
281 TokenKind::GtFollowedByGt,
282 span.lo(),
283 span.hi() - BytePos(1),
284 ));
285 self.tokens
286 .push_back(Token::new(TokenKind::Gt, span.lo() + BytePos(1), span.hi()));
287 return;
288 }
289 rustc_ast::token::Bang => TokenKind::Bang,
290 rustc_ast::token::PathSep => TokenKind::PathSep,
291 rustc_ast::token::DotDot => TokenKind::DotDot,
292 _ => TokenKind::Invalid,
293 };
294 self.tokens
295 .push_back(Token::new(kind, span.lo(), span.hi()));
296 }
297
298 fn fetch_tokens(&mut self) -> bool {
299 let Some(top) = self.stack.last_mut() else { return false };
300
301 match top.cursor.next() {
302 Some(TokenTree::Token(token, _)) => {
303 if let Some(TokenTree::Token(next, _)) = top.cursor.peek() {
304 match (&token.kind, &next.kind) {
305 (rustc_ast::token::Le, rustc_ast::token::Gt)
306 if token.span.hi() == next.span.lo() =>
307 {
308 top.cursor.next();
309 self.tokens.push_back(Token::new(
310 TokenKind::Iff,
311 token.span.lo(),
312 next.span.hi(),
313 ));
314 return true;
315 }
316 _ => {}
317 }
318 }
319 self.map_token(token);
320 true
321 }
322 Some(TokenTree::Delimited(_, _spacing, Delimiter::Invisible(..), tokens)) => {
323 self.stack
324 .push(Frame { cursor: tokens.iter().peekable(), close: None });
325 self.fetch_tokens()
326 }
327 Some(TokenTree::Delimited(span, _spacing, delim, tokens)) => {
328 let close_kind = match delim {
329 Delimiter::Parenthesis => TokenKind::CloseParen,
330 Delimiter::Brace => TokenKind::CloseBrace,
331 Delimiter::Bracket => TokenKind::CloseBracket,
332 Delimiter::Invisible(origin) => TokenKind::CloseInvisible(*origin),
333 };
334 let close = Token::new(close_kind, span.close.lo(), span.close.hi());
335
336 self.stack
337 .push(Frame { cursor: tokens.iter().peekable(), close: Some(close) });
338
339 let kind = match delim {
340 Delimiter::Parenthesis => rustc_ast::token::OpenParen,
341 Delimiter::Brace => rustc_ast::token::OpenBrace,
342 Delimiter::Bracket => rustc_ast::token::OpenBracket,
343 Delimiter::Invisible(origin) => rustc_ast::token::OpenInvisible(*origin),
344 };
345
346 let token = rustc_ast::token::Token { kind, span: span.open };
347 self.map_token(&token);
348 true
349 }
350 None => {
351 let Some(frame) = self.stack.pop() else { return false };
352 if let Some(token) = frame.close {
353 self.tokens.push_back(token);
354 true
355 } else {
356 self.fetch_tokens()
357 }
358 }
359 }
360 }
361}