nixd
Loading...
Searching...
No Matches
Lexer.h
Go to the documentation of this file.
1/// \file
2/// \brief Lexer declaration. The lexer is a "stateful" lexer and highly tied to
3/// parser.
4///
5/// This should be considered as implementation detail of the parser. So the
6/// header is explicitly made private. Unit tests should be placed in the
7/// lib/Parse/test directory.
8#pragma once
9
10#include "Token.h"
11
13#include "nixf/Basic/Range.h"
14
15#include <cassert>
16#include <optional>
17#include <string_view>
18
19namespace nixf {
20
21class Lexer {
22 const std::string_view Src;
23 std::vector<Diagnostic> &Diags;
24
25 LexerCursor Cur;
26
27 void consume(std::size_t N = 1) {
28 assert(Cur.Offset + N <= Src.length());
29 // Update Line & Column & Offset
30 for (std::size_t I = 0; I < N; ++I) {
31 if (Src[Cur.Offset + I] == '\n') {
32 ++Cur.Line;
33 Cur.Column = 0;
34 } else {
35 ++Cur.Column;
36 }
37 }
38 Cur.Offset += N;
39 }
40
41 // token recorder
42 LexerCursor TokStartPtr;
44 void startToken() {
45 Tok = tok::tok_unknown;
46 TokStartPtr = Cur;
47 }
48 Token finishToken() {
49 return {
50 Tok,
51 {TokStartPtr, Cur},
52 Src.substr(TokStartPtr.Offset, Cur.Offset - TokStartPtr.Offset),
53 };
54 }
55
56 void consumeTrivia();
57
58 bool consumeWhitespaces();
59 bool consumeComments();
60
61 [[nodiscard]] bool eof(std::size_t Offset) const {
62 return Offset >= Src.length();
63 }
64
65 [[nodiscard]] bool eof() const { return eof(Cur.Offset); }
66
67 bool consumeEOL() { return consumePrefix("\r\n") || consumePrefix("\n"); }
68
69 bool lexFloatExp();
70
71 // Advance cursor if it starts with prefix, otherwise do nothing
72 std::optional<LexerCursorRange> consumePrefix(std::string_view Prefix);
73
74 bool consumeOne(char C);
75
76 std::optional<char> consumeOneOf(std::string_view Chars);
77
78 std::optional<LexerCursorRange> consumeManyOf(std::string_view Chars);
79
80 std::optional<LexerCursorRange> consumeManyDigits() {
81 return consumeManyOf("0123456789");
82 }
83
84 std::optional<LexerCursorRange> consumeManyPathChar();
85
86 /// Look ahead and check if we has \p Prefix
87 bool peekPrefix(std::string_view Prefix);
88
89 bool consumePathStart();
90
91 bool consumeURI();
92
93 bool consumeSPath();
94
95 /// Should be called after lexing a "raw" identifier, we check if it is a
96 /// keyword and make assignment: `Tok <- tok_kw_*`
97 void maybeKW();
98
99 void lexIdentifier();
100
101 void lexNumbers();
102
103 [[nodiscard]] std::string_view tokStr() const {
104 return Src.substr(TokStartPtr.Offset, Cur.Offset - TokStartPtr.Offset);
105 }
106
107 [[nodiscard]] std::string_view remain() const {
108 return Src.substr(Cur.Offset);
109 }
110
111 [[nodiscard]] LexerCursorRange curRange() const { return {Cur, Cur}; }
112
113 [[nodiscard]] char peekUnwrap() const { return Src[Cur.Offset]; }
114
115 [[nodiscard]] std::optional<char> peek() const {
116 if (eof())
117 return std::nullopt;
118 return peekUnwrap();
119 }
120
121public:
122 Lexer(std::string_view Src, std::vector<Diagnostic> &Diags)
123 : Src(Src), Diags(Diags), Cur() {}
124
125 /// Reset the cursor at source \p offset (zero-based indexing)
126 void setCur(const LexerCursor &NewCur) {
127 assert(Src.begin() + NewCur.Offset <= Src.end());
128 Cur = NewCur;
129 }
130
131 [[nodiscard]] const LexerCursor &cur() const { return Cur; }
132
133 Token lex();
136 Token lexPath();
137};
138
139} // namespace nixf
A point in the source file.
Definition Range.h:57
Token lex()
Definition Lexer.cpp:439
Lexer(std::string_view Src, std::vector< Diagnostic > &Diags)
Definition Lexer.h:122
Token lexIndString()
Definition Lexer.cpp:405
Token lexPath()
Definition Lexer.cpp:327
Token lexString()
Definition Lexer.cpp:357
void setCur(const LexerCursor &NewCur)
Reset the cursor at source offset (zero-based indexing)
Definition Lexer.h:126
const LexerCursor & cur() const
Definition Lexer.h:131
A token. With it's kind, and the range in source code.
Definition Token.h:55