13bool isUriSchemeChar(
char Ch) {
15 return std::isalnum(Ch) || Ch ==
'+' || Ch ==
'-' || Ch ==
'.';
18bool isUriPathChar(
char Ch) {
20 return std::isalnum(Ch) || Ch ==
'%' || Ch ==
'/' || Ch ==
'?' || Ch ==
':' ||
21 Ch ==
'@' || Ch ==
'&' || Ch ==
'=' || Ch ==
'+' || Ch ==
'$' ||
22 Ch ==
',' || Ch ==
'-' || Ch ==
'_' || Ch ==
'.' || Ch ==
'!' ||
23 Ch ==
'~' || Ch ==
'*' || Ch ==
'\'';
26bool isPathChar(
char Ch) {
28 return std::isdigit(Ch) || std::isalpha(Ch) || Ch ==
'.' || Ch ==
'_' ||
29 Ch ==
'-' || Ch ==
'+';
32bool isIdentifierChar(
char Ch) {
33 return std::isdigit(Ch) || std::isalpha(Ch) || Ch ==
'_' || Ch ==
'\'' ||
39using DK = Diagnostic::DiagnosticKind;
42std::optional<LexerCursorRange> Lexer::consumePrefix(std::string_view Prefix) {
44 if (peekPrefix(Prefix)) {
45 consume(Prefix.length());
51std::optional<LexerCursorRange> Lexer::consumeManyOf(std::string_view Chars) {
54 if (Chars.find(peekUnwrap()) != std::string_view::npos) {
56 while (!eof() && Chars.find(peekUnwrap()) != std::string_view::npos) {
59 return LexerCursorRange{Start, Cur};
64std::optional<char> Lexer::consumeOneOf(std::string_view Chars) {
67 if (Chars.find(peekUnwrap()) != std::string_view::npos) {
68 char Ret = peekUnwrap();
75bool Lexer::consumeOne(
char C) {
85std::optional<LexerCursorRange> Lexer::consumeManyPathChar() {
88 if (
auto Ch = peek(); Ch && isPathChar(*Ch)) {
93 }
while (Ch && isPathChar(*Ch));
94 return LexerCursorRange{Start, Cur};
99bool Lexer::peekPrefix(std::string_view Prefix) {
100 if (Cur.Offset + Prefix.length() > Src.length())
102 if (remain().starts_with(Prefix)) {
108bool Lexer::consumeWhitespaces() {
109 if (
auto Ch = peek(); Ch && !std::isspace(*Ch))
113 }
while (!eof() && std::isspace(peekUnwrap()));
117bool Lexer::consumeComments() {
120 if (std::optional<LexerCursorRange> BeginRange = consumePrefix(
"/*")) {
125 Diagnostic &Diag = Diags.emplace_back(DK::DK_UnterminatedBComment,
126 LexerCursorRange{
cur()});
127 Diag.
note(NK::NK_BCommentBegin, *BeginRange);
131 if (consumePrefix(
"*/"))
136 }
else if (consumePrefix(
"#")) {
139 if (eof() || consumeEOL()) {
148void Lexer::consumeTrivia() {
152 if (consumeWhitespaces() || consumeComments())
158bool Lexer::lexFloatExp() {
160 if (std::optional<char> ECh = consumeOneOf(
"Ee")) {
164 if (!consumeManyDigits()) {
166 Diags.emplace_back(DK::DK_FloatNoExp, curRange()) << std::string(1, *ECh);
174void Lexer::lexNumbers() {
194 auto Ch = consumeManyDigits();
195 assert(Ch.has_value() &&
"lexNumbers() must be called with a digit start");
204 std::string_view Prefix = Src.substr(Ch->lCur().Offset, 2);
205 if (Prefix.starts_with(
"0") && Prefix !=
"0.")
206 Diags.emplace_back(DK::DK_FloatLeadingZero, *Ch) << std::string(Prefix);
212bool Lexer::consumePathStart() {
222 LexerCursor Saved =
cur();
227 if (consumeOne(
'~')) {
228 if (consumeOne(
'/')) {
229 if (
auto Ch = peek(); Ch && isPathChar(*Ch))
231 if (peekPrefix(
"${"))
239 consumeManyPathChar();
243 if (consumeOne(
'/')) {
245 if (
auto Ch = peek(); Ch && isPathChar(*Ch))
249 if (peekPrefix(
"${"))
258bool Lexer::consumeURI() {
263 LexerCursor Saved =
cur();
268 while (!eof() && isUriSchemeChar(peekUnwrap()))
273 if (!eof() && peekUnwrap() ==
':') {
275 if (!eof() && isUriPathChar(peekUnwrap())) {
278 while (!eof() && isUriPathChar(peekUnwrap()));
287bool Lexer::consumeSPath() {
289 LexerCursor Saved =
cur();
294 if (!eof() && isPathChar(peekUnwrap())) {
296 while (!eof() && isPathChar(peekUnwrap()))
304 if (!eof() && isPathChar(peekUnwrap())) {
305 while (!eof() && isPathChar(peekUnwrap()))
322void Lexer::lexIdentifier() {
325 while (!eof() && isIdentifierChar(peekUnwrap()))
329void Lexer::maybeKW() {
333#define TOK_KEYWORD(NAME) \
334 if (tokStr() == #NAME) { \
335 Tok = tok_kw_##NAME; \
338#include "TokenKinds.inc"
351 return finishToken();
354 if (consumePrefix(
"${")) {
355 Tok = tok_dollar_curly;
356 return finishToken();
359 if (peekPrefix(
"~/")) {
360 Tok = tok_path_fragment;
362 while (!eof() && (isPathChar(peekUnwrap()) || peekUnwrap() ==
'/')) {
364 if (peekPrefix(
"${"))
368 return finishToken();
371 if (isPathChar(peekUnwrap()) || peekUnwrap() ==
'/') {
372 Tok = tok_path_fragment;
373 while (!eof() && (isPathChar(peekUnwrap()) || peekUnwrap() ==
'/')) {
375 if (peekPrefix(
"${"))
379 return finishToken();
381 return finishToken();
389 return finishToken();
391 switch (peekUnwrap()) {
401 Tok = tok_string_escape;
404 if (consumePrefix(
"${")) {
405 Tok = tok_dollar_curly;
412 Tok = tok_string_part;
415 if (peekUnwrap() ==
'\\')
417 if (peekUnwrap() ==
'"')
421 if (consumePrefix(
"$${"))
424 if (peekPrefix(
"${"))
429 return finishToken();
436 return finishToken();
438 if (consumePrefix(
"''")) {
439 if (consumePrefix(
"$") || consumePrefix(
"'")) {
440 Tok = tok_string_escape;
441 }
else if (consumePrefix(
"\\")) {
444 Tok = tok_string_escape;
448 return finishToken();
451 if (consumePrefix(
"${")) {
452 Tok = tok_dollar_curly;
453 return finishToken();
456 Tok = tok_string_part;
458 if (peekPrefix(
"''"))
462 if (consumePrefix(
"$${"))
465 if (peekPrefix(
"${"))
469 return finishToken();
477 std::optional<char> Ch = peek();
481 return finishToken();
486 if (isPathChar(*Ch) || *Ch ==
'/' || *Ch ==
'~') {
487 if (consumePathStart()) {
489 Tok = tok_path_fragment;
490 return finishToken();
495 if (std::isalpha(*Ch)) {
498 return finishToken();
502 if (std::isdigit(*Ch)) {
504 return finishToken();
507 if (std::isalpha(*Ch) || *Ch ==
'_') {
513 return finishToken();
520 if (consumeSPath()) {
522 return finishToken();
528 if (consumePrefix(
"''"))
532 if (consumePrefix(
"++")) {
540 if (consumePrefix(
"->")) {
552 if (consumePrefix(
"//")) {
560 if (consumePrefix(
"||"))
562 if (consumePrefix(
"|>"))
563 Tok = tok_op_pipe_into;
566 if (consumePrefix(
"!=")) {
574 if (consumePrefix(
"<=")) {
576 }
else if (consumePrefix(
"<|")) {
577 Tok = tok_op_pipe_from;
584 if (consumePrefix(
">=")) {
592 if (consumePrefix(
"&&")) {
606 if (consumePrefix(
"...")) {
628 Tok = tok_semi_colon;
631 if (consumePrefix(
"==")) {
664 if (consumePrefix(
"${")) {
665 Tok = tok_dollar_curly;
670 if (Tok == tok_unknown)
672 return finishToken();
Diagnostic::DiagnosticKind DK
Lexer declaration. The lexer is a "stateful" lexer and highly tied to parser.
Note & note(Note::NoteKind Kind, LexerCursorRange Range)
Fix & fix(std::string Message)
Fix & edit(TextEdit Edit)
A point in the source file.
const LexerCursor & cur() const
static TextEdit mkInsertion(LexerCursor P, std::string NewText)
A token. With it's kind, and the range in source code.