blob: 6219cd3b5d198501e6d078603897615dcd0e3576 [file] [log] [blame]
Rubin Xu6e1e26a2021-02-10 00:04:48 +00001// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_JSON_JSON_PARSER_H_
6#define V8_JSON_JSON_PARSER_H_
7
8#include "src/execution/isolate.h"
9#include "src/heap/factory.h"
10#include "src/objects/objects.h"
11#include "src/zone/zone-containers.h"
12
13namespace v8 {
14namespace internal {
15
16enum ParseElementResult { kElementFound, kElementNotFound };
17
18class JsonString final {
19 public:
20 JsonString()
21 : start_(0),
22 length_(0),
23 needs_conversion_(false),
24 internalize_(false),
25 has_escape_(false),
26 is_index_(false) {}
27
28 explicit JsonString(uint32_t index)
29 : index_(index),
30 length_(0),
31 needs_conversion_(false),
32 internalize_(false),
33 has_escape_(false),
34 is_index_(true) {}
35
36 JsonString(int start, int length, bool needs_conversion,
37 bool needs_internalization, bool has_escape)
38 : start_(start),
39 length_(length),
40 needs_conversion_(needs_conversion),
41 internalize_(needs_internalization ||
42 length_ <= kMaxInternalizedStringValueLength),
43 has_escape_(has_escape),
44 is_index_(false) {}
45
46 bool internalize() const {
47 DCHECK(!is_index_);
48 return internalize_;
49 }
50
51 bool needs_conversion() const {
52 DCHECK(!is_index_);
53 return needs_conversion_;
54 }
55
56 bool has_escape() const {
57 DCHECK(!is_index_);
58 return has_escape_;
59 }
60
61 int start() const {
62 DCHECK(!is_index_);
63 return start_;
64 }
65
66 int length() const {
67 DCHECK(!is_index_);
68 return length_;
69 }
70
71 uint32_t index() const {
72 DCHECK(is_index_);
73 return index_;
74 }
75
76 bool is_index() const { return is_index_; }
77
78 private:
79 static const int kMaxInternalizedStringValueLength = 10;
80
81 union {
82 const int start_;
83 const uint32_t index_;
84 };
85 const int length_;
86 const bool needs_conversion_ : 1;
87 const bool internalize_ : 1;
88 const bool has_escape_ : 1;
89 const bool is_index_ : 1;
90};
91
92struct JsonProperty {
93 JsonProperty() { UNREACHABLE(); }
94 explicit JsonProperty(const JsonString& string) : string(string) {}
95
96 JsonString string;
97 Handle<Object> value;
98};
99
100class JsonParseInternalizer {
101 public:
102 static MaybeHandle<Object> Internalize(Isolate* isolate,
103 Handle<Object> object,
104 Handle<Object> reviver);
105
106 private:
107 JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
108 : isolate_(isolate), reviver_(reviver) {}
109
110 MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
111 Handle<String> key);
112
113 bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);
114
115 Isolate* isolate_;
116 Handle<JSReceiver> reviver_;
117};
118
119enum class JsonToken : uint8_t {
120 NUMBER,
121 STRING,
122 LBRACE,
123 RBRACE,
124 LBRACK,
125 RBRACK,
126 TRUE_LITERAL,
127 FALSE_LITERAL,
128 NULL_LITERAL,
129 WHITESPACE,
130 COLON,
131 COMMA,
132 ILLEGAL,
133 EOS
134};
135
136// A simple json parser.
137template <typename Char>
138class JsonParser final {
139 public:
140 using SeqString = typename CharTraits<Char>::String;
141 using SeqExternalString = typename CharTraits<Char>::ExternalString;
142
143 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
144 Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
145 Handle<Object> result;
146 ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
147 JsonParser(isolate, source).ParseJson(), Object);
148 if (reviver->IsCallable()) {
149 return JsonParseInternalizer::Internalize(isolate, result, reviver);
150 }
151 return result;
152 }
153
154 static constexpr uc32 kEndOfString = static_cast<uc32>(-1);
155 static constexpr uc32 kInvalidUnicodeCharacter = static_cast<uc32>(-1);
156
157 private:
158 struct JsonContinuation {
159 enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
160 JsonContinuation(Isolate* isolate, Type type, size_t index)
161 : scope(isolate),
162 type_(type),
163 index(static_cast<uint32_t>(index)),
164 max_index(0),
165 elements(0) {}
166
167 Type type() const { return static_cast<Type>(type_); }
168 void set_type(Type type) { type_ = static_cast<uint8_t>(type); }
169
170 HandleScope scope;
171 // Unfortunately GCC doesn't like packing Type in two bits.
172 uint32_t type_ : 2;
173 uint32_t index : 30;
174 uint32_t max_index;
175 uint32_t elements;
176 };
177
178 JsonParser(Isolate* isolate, Handle<String> source);
179 ~JsonParser();
180
181 // Parse a string containing a single JSON value.
182 MaybeHandle<Object> ParseJson();
183
184 void advance() { ++cursor_; }
185
186 uc32 CurrentCharacter() {
187 if (V8_UNLIKELY(is_at_end())) return kEndOfString;
188 return *cursor_;
189 }
190
191 uc32 NextCharacter() {
192 advance();
193 return CurrentCharacter();
194 }
195
196 void AdvanceToNonDecimal();
197
198 V8_INLINE JsonToken peek() const { return next_; }
199
200 void Consume(JsonToken token) {
201 DCHECK_EQ(peek(), token);
202 advance();
203 }
204
205 void Expect(JsonToken token) {
206 if (V8_LIKELY(peek() == token)) {
207 advance();
208 } else {
209 ReportUnexpectedToken(peek());
210 }
211 }
212
213 void ExpectNext(JsonToken token) {
214 SkipWhitespace();
215 Expect(token);
216 }
217
218 bool Check(JsonToken token) {
219 SkipWhitespace();
220 if (next_ != token) return false;
221 advance();
222 return true;
223 }
224
225 template <size_t N>
226 void ScanLiteral(const char (&s)[N]) {
227 DCHECK(!is_at_end());
228 // There's at least 1 character, we always consume a character and compare
229 // the next character. The first character was compared before we jumped
230 // to ScanLiteral.
231 STATIC_ASSERT(N > 2);
232 size_t remaining = static_cast<size_t>(end_ - cursor_);
233 if (V8_LIKELY(remaining >= N - 1 &&
234 CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) {
235 cursor_ += N - 1;
236 return;
237 }
238
239 cursor_++;
240 for (size_t i = 0; i < Min(N - 2, remaining - 1); i++) {
241 if (*(s + 1 + i) != *cursor_) {
242 ReportUnexpectedCharacter(*cursor_);
243 return;
244 }
245 cursor_++;
246 }
247
248 DCHECK(is_at_end());
249 ReportUnexpectedToken(JsonToken::EOS);
250 }
251
252 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
253 // section 15.12.1.1. The only allowed whitespace characters between tokens
254 // are tab, carriage-return, newline and space.
255 void SkipWhitespace();
256
257 // A JSON string (production JSONString) is subset of valid JavaScript string
258 // literals. The string must only be double-quoted (not single-quoted), and
259 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
260 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
261 JsonString ScanJsonString(bool needs_internalization);
262 JsonString ScanJsonPropertyKey(JsonContinuation* cont);
263 uc32 ScanUnicodeCharacter();
264 Handle<String> MakeString(const JsonString& string,
265 Handle<String> hint = Handle<String>());
266
267 template <typename SinkChar>
268 void DecodeString(SinkChar* sink, int start, int length);
269
270 template <typename SinkSeqString>
271 Handle<String> DecodeString(const JsonString& string,
272 Handle<SinkSeqString> intermediate,
273 Handle<String> hint);
274
275 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
276 // decimal number literals.
277 // It includes an optional minus sign, must have at least one
278 // digit before and after a decimal point, may not have prefixed zeros (unless
279 // the integer part is zero), and may include an exponent part (e.g., "e-10").
280 // Hexadecimal and octal numbers are not allowed.
281 Handle<Object> ParseJsonNumber();
282
283 // Parse a single JSON value from input (grammar production JSONValue).
284 // A JSON value is either a (double-quoted) string literal, a number literal,
285 // one of "true", "false", or "null", or an object or array literal.
286 MaybeHandle<Object> ParseJsonValue();
287
288 Handle<Object> BuildJsonObject(
289 const JsonContinuation& cont,
290 const std::vector<JsonProperty>& property_stack, Handle<Map> feedback);
291 Handle<Object> BuildJsonArray(
292 const JsonContinuation& cont,
293 const std::vector<Handle<Object>>& element_stack);
294
295 // Mark that a parsing error has happened at the current character.
296 void ReportUnexpectedCharacter(uc32 c);
297 // Mark that a parsing error has happened at the current token.
298 void ReportUnexpectedToken(JsonToken token);
299
300 inline Isolate* isolate() { return isolate_; }
301 inline Factory* factory() { return isolate_->factory(); }
302 inline Handle<JSFunction> object_constructor() { return object_constructor_; }
303
304 static const int kInitialSpecialStringLength = 32;
305
306 static void UpdatePointersCallback(v8::Isolate* v8_isolate, v8::GCType type,
307 v8::GCCallbackFlags flags, void* parser) {
308 reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
309 }
310
311 void UpdatePointers() {
312 DisallowHeapAllocation no_gc;
313 const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
314 if (chars_ != chars) {
315 size_t position = cursor_ - chars_;
316 size_t length = end_ - chars_;
317 chars_ = chars;
318 cursor_ = chars_ + position;
319 end_ = chars_ + length;
320 }
321 }
322
323 private:
324 static const bool kIsOneByte = sizeof(Char) == 1;
325
326 bool is_at_end() const {
327 DCHECK_LE(cursor_, end_);
328 return cursor_ == end_;
329 }
330
331 int position() const { return static_cast<int>(cursor_ - chars_); }
332
333 Isolate* isolate_;
334 const uint64_t hash_seed_;
335 JsonToken next_;
336 // Indicates whether the bytes underneath source_ can relocate during GC.
337 bool chars_may_relocate_;
338 Handle<JSFunction> object_constructor_;
339 const Handle<String> original_source_;
340 Handle<String> source_;
341
342 // Cached pointer to the raw chars in source. In case source is on-heap, we
343 // register an UpdatePointers callback. For this reason, chars_, cursor_ and
344 // end_ should never be locally cached across a possible allocation. The scope
345 // in which we cache chars has to be guarded by a DisallowHeapAllocation
346 // scope.
347 const Char* cursor_;
348 const Char* end_;
349 const Char* chars_;
350};
351
352// Explicit instantiation declarations.
353extern template class JsonParser<uint8_t>;
354extern template class JsonParser<uint16_t>;
355
356} // namespace internal
357} // namespace v8
358
359#endif // V8_JSON_JSON_PARSER_H_