Rubin Xu | 6e1e26a | 2021-02-10 00:04:48 +0000 | [diff] [blame^] | 1 | // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef V8_JSON_JSON_PARSER_H_ |
| 6 | #define V8_JSON_JSON_PARSER_H_ |
| 7 | |
| 8 | #include "src/execution/isolate.h" |
| 9 | #include "src/heap/factory.h" |
| 10 | #include "src/objects/objects.h" |
| 11 | #include "src/zone/zone-containers.h" |
| 12 | |
| 13 | namespace v8 { |
| 14 | namespace internal { |
| 15 | |
| 16 | enum ParseElementResult { kElementFound, kElementNotFound }; |
| 17 | |
| 18 | class JsonString final { |
| 19 | public: |
| 20 | JsonString() |
| 21 | : start_(0), |
| 22 | length_(0), |
| 23 | needs_conversion_(false), |
| 24 | internalize_(false), |
| 25 | has_escape_(false), |
| 26 | is_index_(false) {} |
| 27 | |
| 28 | explicit JsonString(uint32_t index) |
| 29 | : index_(index), |
| 30 | length_(0), |
| 31 | needs_conversion_(false), |
| 32 | internalize_(false), |
| 33 | has_escape_(false), |
| 34 | is_index_(true) {} |
| 35 | |
| 36 | JsonString(int start, int length, bool needs_conversion, |
| 37 | bool needs_internalization, bool has_escape) |
| 38 | : start_(start), |
| 39 | length_(length), |
| 40 | needs_conversion_(needs_conversion), |
| 41 | internalize_(needs_internalization || |
| 42 | length_ <= kMaxInternalizedStringValueLength), |
| 43 | has_escape_(has_escape), |
| 44 | is_index_(false) {} |
| 45 | |
| 46 | bool internalize() const { |
| 47 | DCHECK(!is_index_); |
| 48 | return internalize_; |
| 49 | } |
| 50 | |
| 51 | bool needs_conversion() const { |
| 52 | DCHECK(!is_index_); |
| 53 | return needs_conversion_; |
| 54 | } |
| 55 | |
| 56 | bool has_escape() const { |
| 57 | DCHECK(!is_index_); |
| 58 | return has_escape_; |
| 59 | } |
| 60 | |
| 61 | int start() const { |
| 62 | DCHECK(!is_index_); |
| 63 | return start_; |
| 64 | } |
| 65 | |
| 66 | int length() const { |
| 67 | DCHECK(!is_index_); |
| 68 | return length_; |
| 69 | } |
| 70 | |
| 71 | uint32_t index() const { |
| 72 | DCHECK(is_index_); |
| 73 | return index_; |
| 74 | } |
| 75 | |
| 76 | bool is_index() const { return is_index_; } |
| 77 | |
| 78 | private: |
| 79 | static const int kMaxInternalizedStringValueLength = 10; |
| 80 | |
| 81 | union { |
| 82 | const int start_; |
| 83 | const uint32_t index_; |
| 84 | }; |
| 85 | const int length_; |
| 86 | const bool needs_conversion_ : 1; |
| 87 | const bool internalize_ : 1; |
| 88 | const bool has_escape_ : 1; |
| 89 | const bool is_index_ : 1; |
| 90 | }; |
| 91 | |
| 92 | struct JsonProperty { |
| 93 | JsonProperty() { UNREACHABLE(); } |
| 94 | explicit JsonProperty(const JsonString& string) : string(string) {} |
| 95 | |
| 96 | JsonString string; |
| 97 | Handle<Object> value; |
| 98 | }; |
| 99 | |
| 100 | class JsonParseInternalizer { |
| 101 | public: |
| 102 | static MaybeHandle<Object> Internalize(Isolate* isolate, |
| 103 | Handle<Object> object, |
| 104 | Handle<Object> reviver); |
| 105 | |
| 106 | private: |
| 107 | JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver) |
| 108 | : isolate_(isolate), reviver_(reviver) {} |
| 109 | |
| 110 | MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder, |
| 111 | Handle<String> key); |
| 112 | |
| 113 | bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name); |
| 114 | |
| 115 | Isolate* isolate_; |
| 116 | Handle<JSReceiver> reviver_; |
| 117 | }; |
| 118 | |
| 119 | enum class JsonToken : uint8_t { |
| 120 | NUMBER, |
| 121 | STRING, |
| 122 | LBRACE, |
| 123 | RBRACE, |
| 124 | LBRACK, |
| 125 | RBRACK, |
| 126 | TRUE_LITERAL, |
| 127 | FALSE_LITERAL, |
| 128 | NULL_LITERAL, |
| 129 | WHITESPACE, |
| 130 | COLON, |
| 131 | COMMA, |
| 132 | ILLEGAL, |
| 133 | EOS |
| 134 | }; |
| 135 | |
| 136 | // A simple json parser. |
| 137 | template <typename Char> |
| 138 | class JsonParser final { |
| 139 | public: |
| 140 | using SeqString = typename CharTraits<Char>::String; |
| 141 | using SeqExternalString = typename CharTraits<Char>::ExternalString; |
| 142 | |
| 143 | V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse( |
| 144 | Isolate* isolate, Handle<String> source, Handle<Object> reviver) { |
| 145 | Handle<Object> result; |
| 146 | ASSIGN_RETURN_ON_EXCEPTION(isolate, result, |
| 147 | JsonParser(isolate, source).ParseJson(), Object); |
| 148 | if (reviver->IsCallable()) { |
| 149 | return JsonParseInternalizer::Internalize(isolate, result, reviver); |
| 150 | } |
| 151 | return result; |
| 152 | } |
| 153 | |
| 154 | static constexpr uc32 kEndOfString = static_cast<uc32>(-1); |
| 155 | static constexpr uc32 kInvalidUnicodeCharacter = static_cast<uc32>(-1); |
| 156 | |
| 157 | private: |
| 158 | struct JsonContinuation { |
| 159 | enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement }; |
| 160 | JsonContinuation(Isolate* isolate, Type type, size_t index) |
| 161 | : scope(isolate), |
| 162 | type_(type), |
| 163 | index(static_cast<uint32_t>(index)), |
| 164 | max_index(0), |
| 165 | elements(0) {} |
| 166 | |
| 167 | Type type() const { return static_cast<Type>(type_); } |
| 168 | void set_type(Type type) { type_ = static_cast<uint8_t>(type); } |
| 169 | |
| 170 | HandleScope scope; |
| 171 | // Unfortunately GCC doesn't like packing Type in two bits. |
| 172 | uint32_t type_ : 2; |
| 173 | uint32_t index : 30; |
| 174 | uint32_t max_index; |
| 175 | uint32_t elements; |
| 176 | }; |
| 177 | |
| 178 | JsonParser(Isolate* isolate, Handle<String> source); |
| 179 | ~JsonParser(); |
| 180 | |
| 181 | // Parse a string containing a single JSON value. |
| 182 | MaybeHandle<Object> ParseJson(); |
| 183 | |
| 184 | void advance() { ++cursor_; } |
| 185 | |
| 186 | uc32 CurrentCharacter() { |
| 187 | if (V8_UNLIKELY(is_at_end())) return kEndOfString; |
| 188 | return *cursor_; |
| 189 | } |
| 190 | |
| 191 | uc32 NextCharacter() { |
| 192 | advance(); |
| 193 | return CurrentCharacter(); |
| 194 | } |
| 195 | |
| 196 | void AdvanceToNonDecimal(); |
| 197 | |
| 198 | V8_INLINE JsonToken peek() const { return next_; } |
| 199 | |
| 200 | void Consume(JsonToken token) { |
| 201 | DCHECK_EQ(peek(), token); |
| 202 | advance(); |
| 203 | } |
| 204 | |
| 205 | void Expect(JsonToken token) { |
| 206 | if (V8_LIKELY(peek() == token)) { |
| 207 | advance(); |
| 208 | } else { |
| 209 | ReportUnexpectedToken(peek()); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | void ExpectNext(JsonToken token) { |
| 214 | SkipWhitespace(); |
| 215 | Expect(token); |
| 216 | } |
| 217 | |
| 218 | bool Check(JsonToken token) { |
| 219 | SkipWhitespace(); |
| 220 | if (next_ != token) return false; |
| 221 | advance(); |
| 222 | return true; |
| 223 | } |
| 224 | |
| 225 | template <size_t N> |
| 226 | void ScanLiteral(const char (&s)[N]) { |
| 227 | DCHECK(!is_at_end()); |
| 228 | // There's at least 1 character, we always consume a character and compare |
| 229 | // the next character. The first character was compared before we jumped |
| 230 | // to ScanLiteral. |
| 231 | STATIC_ASSERT(N > 2); |
| 232 | size_t remaining = static_cast<size_t>(end_ - cursor_); |
| 233 | if (V8_LIKELY(remaining >= N - 1 && |
| 234 | CompareChars(s + 1, cursor_ + 1, N - 2) == 0)) { |
| 235 | cursor_ += N - 1; |
| 236 | return; |
| 237 | } |
| 238 | |
| 239 | cursor_++; |
| 240 | for (size_t i = 0; i < Min(N - 2, remaining - 1); i++) { |
| 241 | if (*(s + 1 + i) != *cursor_) { |
| 242 | ReportUnexpectedCharacter(*cursor_); |
| 243 | return; |
| 244 | } |
| 245 | cursor_++; |
| 246 | } |
| 247 | |
| 248 | DCHECK(is_at_end()); |
| 249 | ReportUnexpectedToken(JsonToken::EOS); |
| 250 | } |
| 251 | |
| 252 | // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
| 253 | // section 15.12.1.1. The only allowed whitespace characters between tokens |
| 254 | // are tab, carriage-return, newline and space. |
| 255 | void SkipWhitespace(); |
| 256 | |
| 257 | // A JSON string (production JSONString) is subset of valid JavaScript string |
| 258 | // literals. The string must only be double-quoted (not single-quoted), and |
| 259 | // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
| 260 | // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
| 261 | JsonString ScanJsonString(bool needs_internalization); |
| 262 | JsonString ScanJsonPropertyKey(JsonContinuation* cont); |
| 263 | uc32 ScanUnicodeCharacter(); |
| 264 | Handle<String> MakeString(const JsonString& string, |
| 265 | Handle<String> hint = Handle<String>()); |
| 266 | |
| 267 | template <typename SinkChar> |
| 268 | void DecodeString(SinkChar* sink, int start, int length); |
| 269 | |
| 270 | template <typename SinkSeqString> |
| 271 | Handle<String> DecodeString(const JsonString& string, |
| 272 | Handle<SinkSeqString> intermediate, |
| 273 | Handle<String> hint); |
| 274 | |
| 275 | // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| 276 | // decimal number literals. |
| 277 | // It includes an optional minus sign, must have at least one |
| 278 | // digit before and after a decimal point, may not have prefixed zeros (unless |
| 279 | // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
| 280 | // Hexadecimal and octal numbers are not allowed. |
| 281 | Handle<Object> ParseJsonNumber(); |
| 282 | |
| 283 | // Parse a single JSON value from input (grammar production JSONValue). |
| 284 | // A JSON value is either a (double-quoted) string literal, a number literal, |
| 285 | // one of "true", "false", or "null", or an object or array literal. |
| 286 | MaybeHandle<Object> ParseJsonValue(); |
| 287 | |
| 288 | Handle<Object> BuildJsonObject( |
| 289 | const JsonContinuation& cont, |
| 290 | const std::vector<JsonProperty>& property_stack, Handle<Map> feedback); |
| 291 | Handle<Object> BuildJsonArray( |
| 292 | const JsonContinuation& cont, |
| 293 | const std::vector<Handle<Object>>& element_stack); |
| 294 | |
| 295 | // Mark that a parsing error has happened at the current character. |
| 296 | void ReportUnexpectedCharacter(uc32 c); |
| 297 | // Mark that a parsing error has happened at the current token. |
| 298 | void ReportUnexpectedToken(JsonToken token); |
| 299 | |
| 300 | inline Isolate* isolate() { return isolate_; } |
| 301 | inline Factory* factory() { return isolate_->factory(); } |
| 302 | inline Handle<JSFunction> object_constructor() { return object_constructor_; } |
| 303 | |
| 304 | static const int kInitialSpecialStringLength = 32; |
| 305 | |
| 306 | static void UpdatePointersCallback(v8::Isolate* v8_isolate, v8::GCType type, |
| 307 | v8::GCCallbackFlags flags, void* parser) { |
| 308 | reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers(); |
| 309 | } |
| 310 | |
| 311 | void UpdatePointers() { |
| 312 | DisallowHeapAllocation no_gc; |
| 313 | const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc); |
| 314 | if (chars_ != chars) { |
| 315 | size_t position = cursor_ - chars_; |
| 316 | size_t length = end_ - chars_; |
| 317 | chars_ = chars; |
| 318 | cursor_ = chars_ + position; |
| 319 | end_ = chars_ + length; |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | private: |
| 324 | static const bool kIsOneByte = sizeof(Char) == 1; |
| 325 | |
| 326 | bool is_at_end() const { |
| 327 | DCHECK_LE(cursor_, end_); |
| 328 | return cursor_ == end_; |
| 329 | } |
| 330 | |
| 331 | int position() const { return static_cast<int>(cursor_ - chars_); } |
| 332 | |
| 333 | Isolate* isolate_; |
| 334 | const uint64_t hash_seed_; |
| 335 | JsonToken next_; |
| 336 | // Indicates whether the bytes underneath source_ can relocate during GC. |
| 337 | bool chars_may_relocate_; |
| 338 | Handle<JSFunction> object_constructor_; |
| 339 | const Handle<String> original_source_; |
| 340 | Handle<String> source_; |
| 341 | |
| 342 | // Cached pointer to the raw chars in source. In case source is on-heap, we |
| 343 | // register an UpdatePointers callback. For this reason, chars_, cursor_ and |
| 344 | // end_ should never be locally cached across a possible allocation. The scope |
| 345 | // in which we cache chars has to be guarded by a DisallowHeapAllocation |
| 346 | // scope. |
| 347 | const Char* cursor_; |
| 348 | const Char* end_; |
| 349 | const Char* chars_; |
| 350 | }; |
| 351 | |
| 352 | // Explicit instantiation declarations. |
| 353 | extern template class JsonParser<uint8_t>; |
| 354 | extern template class JsonParser<uint16_t>; |
| 355 | |
| 356 | } // namespace internal |
| 357 | } // namespace v8 |
| 358 | |
| 359 | #endif // V8_JSON_JSON_PARSER_H_ |