13bool isUriSchemeChar(
char Ch) {
15 return std::isalnum(Ch) || Ch ==
'+' || Ch ==
'-' || Ch ==
'.';
18bool isUriPathChar(
char Ch) {
20 return std::isalnum(Ch) || Ch ==
'%' || Ch ==
'/' || Ch ==
'?' || Ch ==
':' ||
21 Ch ==
'@' || Ch ==
'&' || Ch ==
'=' || Ch ==
'+' || Ch ==
'$' ||
22 Ch ==
',' || Ch ==
'-' || Ch ==
'_' || Ch ==
'.' || Ch ==
'!' ||
23 Ch ==
'~' || Ch ==
'*' || Ch ==
'\'';
26bool isPathChar(
char Ch) {
28 return std::isdigit(Ch) || std::isalpha(Ch) || Ch ==
'.' || Ch ==
'_' ||
29 Ch ==
'-' || Ch ==
'+';
32bool isIdentifierChar(
char Ch) {
33 return std::isdigit(Ch) || std::isalpha(Ch) || Ch ==
'_' || Ch ==
'\'' ||
39using DK = Diagnostic::DiagnosticKind;
42std::optional<LexerCursorRange> Lexer::consumePrefix(std::string_view Prefix) {
44 if (peekPrefix(Prefix)) {
45 consume(Prefix.length());
51std::optional<LexerCursorRange> Lexer::consumeManyOf(std::string_view Chars) {
54 if (Chars.find(peekUnwrap()) != std::string_view::npos) {
56 while (!eof() && Chars.find(peekUnwrap()) != std::string_view::npos) {
64std::optional<char> Lexer::consumeOneOf(std::string_view Chars) {
67 if (Chars.find(peekUnwrap()) != std::string_view::npos) {
68 char Ret = peekUnwrap();
75bool Lexer::consumeOne(
char C) {
85std::optional<LexerCursorRange> Lexer::consumeManyPathChar() {
88 if (
auto Ch = peek(); Ch && isPathChar(*Ch)) {
93 }
while (Ch && isPathChar(*Ch));
99bool Lexer::peekPrefix(std::string_view Prefix) {
100 if (Cur.Offset + Prefix.length() > Src.length())
102 if (remain().starts_with(Prefix)) {
108bool Lexer::consumeWhitespaces() {
109 if (
auto Ch = peek(); Ch && !std::isspace(*Ch))
113 }
while (!eof() && std::isspace(peekUnwrap()));
117bool Lexer::consumeComments() {
120 if (std::optional<LexerCursorRange> BeginRange = consumePrefix(
"/*")) {
125 Diagnostic &Diag = Diags.emplace_back(DK::DK_UnterminatedBComment,
127 Diag.
note(NK::NK_BCommentBegin, *BeginRange);
131 if (consumePrefix(
"*/"))
136 }
else if (consumePrefix(
"#")) {
139 if (eof() || consumeEOL()) {
148void Lexer::consumeTrivia() {
152 if (consumeWhitespaces() || consumeComments())
158bool Lexer::lexFloatExp() {
160 if (std::optional<char> ECh = consumeOneOf(
"Ee")) {
164 if (!consumeManyDigits()) {
166 Diags.emplace_back(DK::DK_FloatNoExp, curRange()) << std::string(1, *ECh);
174void Lexer::lexNumbers() {
194 auto Ch = consumeManyDigits();
195 assert(Ch.has_value() &&
"lexNumbers() must be called with a digit start");
204 std::string_view Prefix = Src.substr(Ch->lCur().Offset, 2);
205 if (Prefix.starts_with(
"0") && Prefix !=
"0.")
206 Diags.emplace_back(DK::DK_FloatLeadingZero, *Ch) << std::string(Prefix);
212bool Lexer::consumePathStart() {
224 consumeManyPathChar();
228 if (consumeOne(
'/')) {
230 if (
auto Ch = peek(); Ch && isPathChar(*Ch))
234 if (peekPrefix(
"${"))
243bool Lexer::consumeURI() {
253 while (!eof() && isUriSchemeChar(peekUnwrap()))
258 if (!eof() && peekUnwrap() ==
':') {
260 if (!eof() && isUriPathChar(peekUnwrap())) {
263 while (!eof() && isUriPathChar(peekUnwrap()));
272bool Lexer::consumeSPath() {
279 if (!eof() && isPathChar(peekUnwrap())) {
281 while (!eof() && isPathChar(peekUnwrap()))
289 if (!eof() && isPathChar(peekUnwrap())) {
290 while (!eof() && isPathChar(peekUnwrap()))
307void Lexer::lexIdentifier() {
310 while (!eof() && isIdentifierChar(peekUnwrap()))
314void Lexer::maybeKW() {
318#define TOK_KEYWORD(NAME) \
319 if (tokStr() == #NAME) { \
320 Tok = tok_kw_##NAME; \
336 return finishToken();
339 if (consumePrefix(
"${")) {
340 Tok = tok_dollar_curly;
341 return finishToken();
344 if (isPathChar(peekUnwrap()) || peekUnwrap() ==
'/') {
345 Tok = tok_path_fragment;
346 while (!eof() && (isPathChar(peekUnwrap()) || peekUnwrap() ==
'/')) {
348 if (peekPrefix(
"${"))
352 return finishToken();
354 return finishToken();
362 return finishToken();
364 switch (peekUnwrap()) {
374 Tok = tok_string_escape;
377 if (consumePrefix(
"${")) {
378 Tok = tok_dollar_curly;
385 Tok = tok_string_part;
388 if (peekUnwrap() ==
'\\')
390 if (peekUnwrap() ==
'"')
394 if (consumePrefix(
"$${"))
397 if (peekPrefix(
"${"))
402 return finishToken();
409 return finishToken();
411 if (consumePrefix(
"''")) {
412 if (consumePrefix(
"$") || consumePrefix(
"'")) {
413 Tok = tok_string_escape;
414 }
else if (consumePrefix(
"\\")) {
417 Tok = tok_string_escape;
421 return finishToken();
424 if (consumePrefix(
"${")) {
425 Tok = tok_dollar_curly;
426 return finishToken();
429 Tok = tok_string_part;
431 if (peekPrefix(
"''"))
435 if (consumePrefix(
"$${"))
438 if (peekPrefix(
"${"))
442 return finishToken();
450 std::optional<char> Ch = peek();
454 return finishToken();
459 if (isPathChar(*Ch) || *Ch ==
'/') {
460 if (consumePathStart()) {
462 Tok = tok_path_fragment;
463 return finishToken();
468 if (std::isalpha(*Ch)) {
471 return finishToken();
475 if (std::isdigit(*Ch)) {
477 return finishToken();
480 if (std::isalpha(*Ch) || *Ch ==
'_') {
486 return finishToken();
493 if (consumeSPath()) {
495 return finishToken();
501 if (consumePrefix(
"''"))
505 if (consumePrefix(
"++")) {
513 if (consumePrefix(
"->")) {
525 if (consumePrefix(
"//")) {
533 if (consumePrefix(
"||"))
535 if (consumePrefix(
"|>"))
536 Tok = tok_op_pipe_into;
539 if (consumePrefix(
"!=")) {
547 if (consumePrefix(
"<=")) {
549 }
else if (consumePrefix(
"<|")) {
550 Tok = tok_op_pipe_from;
557 if (consumePrefix(
">=")) {
565 if (consumePrefix(
"&&")) {
579 if (consumePrefix(
"...")) {
601 Tok = tok_semi_colon;
604 if (consumePrefix(
"==")) {
637 if (consumePrefix(
"${")) {
638 Tok = tok_dollar_curly;
643 if (Tok == tok_unknown)
645 return finishToken();
Diagnostic::DiagnosticKind DK
Lexer declaration. The lexer is a "stateful" lexer and highly tied to parser.
Note & note(Note::NoteKind Kind, LexerCursorRange Range)
Fix & fix(std::string Message)
Fix & edit(TextEdit Edit)
A point in the source file.
const LexerCursor & cur() const
static TextEdit mkInsertion(LexerCursor P, std::string NewText)
A token. With it's kind, and the range in source code.