nixd
Loading...
Searching...
No Matches
SourceCode.cpp
Go to the documentation of this file.
2#include "lspserver/Logger.h"
3#include <llvm/Support/Errc.h>
4
5namespace lspserver {
6
7/// TODO: support more encodings (from clangd, using Context)
8static OffsetEncoding lspEncoding() { return OffsetEncoding::UTF16; }
9
10template <typename Callback>
11static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
12 bool LoggedInvalid = false;
13 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
14 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
15 for (size_t I = 0; I < U8.size();) {
16 unsigned char C = static_cast<unsigned char>(U8[I]);
17 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
18 if (CB(1, 1))
19 return true;
20 ++I;
21 continue;
22 }
23 // This convenient property of UTF-8 holds for all non-ASCII characters.
24 size_t UTF8Length = llvm::countl_one(C);
25 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
26 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*.
28 if (!LoggedInvalid) {
29 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
30 LoggedInvalid = true;
31 }
32 // We can't give a correct result, but avoid returning something wild.
33 // Pretend this is a valid ASCII byte, for lack of better options.
34 // (Too late to get ISO-8859-* right, we've skipped some bytes already).
35 if (CB(1, 1))
36 return true;
37 ++I;
38 continue;
39 }
40 I += UTF8Length; // Skip over all trailing bytes.
41 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
42 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
43 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
45 }
46 return false;
47}
48
49// Like most strings in clangd, the input is UTF-8 encoded.
50size_t lspLength(llvm::StringRef Code) {
51 size_t Count = 0;
52 switch (lspEncoding()) {
54 Count = Code.size();
55 break;
57 iterateCodepoints(Code, [&](int U8Len, int U16Len) {
58 Count += U16Len;
59 return false;
60 });
61 break;
63 iterateCodepoints(Code, [&](int U8Len, int U16Len) {
64 ++Count;
65 return false;
66 });
67 break;
69 llvm_unreachable("unsupported encoding");
70 }
71 return Count;
72}
73
74// Returns the byte offset into the string that is an offset of \p Units in
75// the specified encoding.
76// Conceptually, this converts to the encoding, truncates to CodeUnits,
77// converts back to UTF-8, and returns the length in bytes.
78static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
79 bool &Valid) {
80 Valid = Units >= 0;
81 if (Units <= 0)
82 return 0;
83 size_t Result = 0;
84 switch (Enc) {
86 Result = Units;
87 break;
89 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
90 Result += U8Len;
91 Units -= U16Len;
92 return Units <= 0;
93 });
94 if (Units < 0) // Offset in the middle of a surrogate pair.
95 Valid = false;
96 break;
98 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
99 Result += U8Len;
100 Units--;
101 return Units <= 0;
102 });
103 break;
105 llvm_unreachable("unsupported encoding");
106 }
107 // Don't return an out-of-range index if we overran.
108 if (Result > U8.size()) {
109 Valid = false;
110 return U8.size();
111 }
112 return Result;
113}
114
115llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
117 if (P.line < 0)
118 return error(llvm::errc::invalid_argument,
119 "Line value can't be negative ({0})", P.line);
120 if (P.character < 0)
121 return error(llvm::errc::invalid_argument,
122 "Character value can't be negative ({0})", P.character);
123 size_t StartOfLine = 0;
124 for (int I = 0; I != P.line; ++I) {
125 size_t NextNL = Code.find('\n', StartOfLine);
126 if (NextNL == llvm::StringRef::npos)
127 return error(llvm::errc::invalid_argument,
128 "Line value is out of range ({0})", P.line);
129 StartOfLine = NextNL + 1;
130 }
131 llvm::StringRef Line =
132 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
133
134 // P.character may be in UTF-16, transcode if necessary.
135 bool Valid;
136 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
138 return error(llvm::errc::invalid_argument,
139 "{0} offset {1} is invalid for line {2}", lspEncoding(),
140 P.character, P.line);
141 return StartOfLine + ByteInLine;
142}
143
144Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
145 Offset = std::min(Code.size(), Offset);
146 llvm::StringRef Before = Code.substr(0, Offset);
147 int Lines = Before.count('\n');
148 size_t PrevNL = Before.rfind('\n');
149 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
150 Position Pos;
151 Pos.line = Lines;
152 Pos.character = lspLength(Before.substr(StartOfLine));
153 return Pos;
154}
155
156// Workaround for editors that have buggy handling of newlines at end of file.
157//
158// The editor is supposed to expose document contents over LSP as an exact
159// string, with whitespace and newlines well-defined. But internally many
160// editors treat text as an array of lines, and there can be ambiguity over
161// whether the last line ends with a newline or not.
162//
163// This confusion can lead to incorrect edits being sent. Failing to apply them
164// is catastrophic: we're desynced, LSP has no mechanism to get back in sync.
165// We apply a heuristic to avoid this state.
166//
167// If our current view of an N-line file does *not* end in a newline, but the
168// editor refers to the start of the next line (an impossible location), then
169// we silently add a newline to make this valid.
170// We will still validate that the rangeLength is correct, *including* the
171// inferred newline.
172//
173// See https://github.com/neovim/neovim/issues/17085
174static void inferFinalNewline(llvm::Expected<size_t> &Err,
175 std::string &Contents, const Position &Pos) {
176 if (Err)
177 return;
178 if (!Contents.empty() && Contents.back() == '\n')
179 return;
180 if (Pos.character != 0)
181 return;
182 if (Pos.line != llvm::count(Contents, '\n') + 1)
183 return;
184 log("Editor sent invalid change coordinates, inferring newline at EOF");
185 Contents.push_back('\n');
186 consumeError(Err.takeError());
187 Err = Contents.size();
188}
189
190llvm::Error applyChange(std::string &Contents,
192 if (!Change.range) {
193 Contents = Change.text;
194 return llvm::Error::success();
195 }
196
197 const Position &Start = Change.range->start;
198 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false);
199 inferFinalNewline(StartIndex, Contents, Start);
200 if (!StartIndex)
201 return StartIndex.takeError();
202
203 const Position &End = Change.range->end;
204 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false);
205 inferFinalNewline(EndIndex, Contents, End);
206 if (!EndIndex)
207 return EndIndex.takeError();
208
209 if (*EndIndex < *StartIndex)
210 return error(llvm::errc::invalid_argument,
211 "Range's end position ({0}) is before start position ({1})",
212 End, Start);
213
214 // Since the range length between two LSP positions is dependent on the
215 // contents of the buffer we compute the range length between the start and
216 // end position ourselves and compare it to the range length of the LSP
217 // message to verify the buffers of the client and server are in sync.
218
219 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
220 // code units.
222 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
223
224 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
225 return error(llvm::errc::invalid_argument,
226 "Change's rangeLength ({0}) doesn't match the "
227 "computed range length ({1}).",
228 *Change.rangeLength, ComputedRangeLength);
229
230 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text);
231
232 return llvm::Error::success();
233}
234} // namespace lspserver
Whether current platform treats paths case insensitively.
Definition Connection.h:11
llvm::unique_function< void(llvm::Expected< T >)> Callback
Definition Function.h:14
llvm::Error error(std::error_code EC, const char *Fmt, Ts &&...Vals)
Definition Logger.h:70
llvm::Error applyChange(std::string &Contents, const TextDocumentContentChangeEvent &Change)
Apply an incremental update to a text document.
bool fromJSON(const llvm::json::Value &, URIForFile &, llvm::json::Path)
size_t lspLength(llvm::StringRef Code)
void elog(const char *Fmt, Ts &&...Vals)
Definition Logger.h:52
Position offsetToPosition(llvm::StringRef Code, size_t Offset)
llvm::Expected< size_t > positionToOffset(llvm::StringRef Code, Position P, bool AllowColumnsBeyondLineLength=true)
void log(const char *Fmt, Ts &&...Vals)
Definition Logger.h:58
int line
Line position in a document (zero-based).