blob: 3e20b5f80c631e5fe37cdee226a5d24c4b5e6479 [file] [log] [blame]
Rubin Xu6e1e26a2021-02-10 00:04:48 +00001// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_REGEXP_REGEXP_H_
6#define V8_REGEXP_REGEXP_H_
7
8#include "src/objects/js-regexp.h"
9#include "src/regexp/regexp-error.h"
10
11namespace v8 {
12namespace internal {
13
14class RegExpNode;
15class RegExpTree;
16
17enum class RegExpCompilationTarget : int { kBytecode, kNative };
18
19// TODO(jgruber): Do not expose in regexp.h.
20// TODO(jgruber): Consider splitting between ParseData and CompileData.
21struct RegExpCompileData {
22 // The parsed AST as produced by the RegExpParser.
23 RegExpTree* tree = nullptr;
24
25 // The compiled Node graph as produced by RegExpTree::ToNode methods.
26 RegExpNode* node = nullptr;
27
28 // Either the generated code as produced by the compiler or a trampoline
29 // to the interpreter.
30 Handle<Object> code;
31
32 // True, iff the pattern is a 'simple' atom with zero captures. In other
33 // words, the pattern consists of a string with no metacharacters and special
34 // regexp features, and can be implemented as a standard string search.
35 bool simple = true;
36
37 // True, iff the pattern is anchored at the start of the string with '^'.
38 bool contains_anchor = false;
39
40 // Only use if the pattern contains named captures. If so, this contains a
41 // mapping of capture names to capture indices.
42 Handle<FixedArray> capture_name_map;
43
44 // The error message. Only used if an error occurred during parsing or
45 // compilation.
46 RegExpError error = RegExpError::kNone;
47
48 // The position at which the error was detected. Only used if an
49 // error occurred.
50 int error_pos = 0;
51
52 // The number of capture groups, without the global capture \0.
53 int capture_count = 0;
54
55 // The number of registers used by the generated code.
56 int register_count = 0;
57
58 // The compilation target (bytecode or native code).
59 RegExpCompilationTarget compilation_target;
60};
61
62class RegExp final : public AllStatic {
63 public:
64 // Whether the irregexp engine generates interpreter bytecode.
65 static bool CanGenerateBytecode() {
66 return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
67 }
68
69 // Parses the RegExp pattern and prepares the JSRegExp object with
70 // generic data and choice of implementation - as well as what
71 // the implementation wants to store in the data field.
72 // Returns false if compilation fails.
73 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
74 Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
75 JSRegExp::Flags flags, uint32_t backtrack_limit);
76
77 // Ensures that a regexp is fully compiled and ready to be executed on a
78 // subject string. Returns true on success. Return false on failure, and
79 // then an exception will be pending.
80 V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
81 Handle<JSRegExp> re,
82 Handle<String> subject);
83
84 enum CallOrigin : int {
85 kFromRuntime = 0,
86 kFromJs = 1,
87 };
88
89 // See ECMA-262 section 15.10.6.2.
90 // This function calls the garbage collector if necessary.
91 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
92 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
93 int index, Handle<RegExpMatchInfo> last_match_info);
94
95 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
96 ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
97 Handle<String> subject, int index,
98 Handle<RegExpMatchInfo> last_match_info);
99
100 // Integral return values used throughout regexp code layers.
101 static constexpr int kInternalRegExpFailure = 0;
102 static constexpr int kInternalRegExpSuccess = 1;
103 static constexpr int kInternalRegExpException = -1;
104 static constexpr int kInternalRegExpRetry = -2;
105 static constexpr int kInternalRegExpFallbackToExperimental = -3;
106 static constexpr int kInternalRegExpSmallestResult = -3;
107
108 enum IrregexpResult : int32_t {
109 RE_FAILURE = kInternalRegExpFailure,
110 RE_SUCCESS = kInternalRegExpSuccess,
111 RE_EXCEPTION = kInternalRegExpException,
112 RE_RETRY = kInternalRegExpRetry,
113 RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
114 };
115
116 // Set last match info. If match is nullptr, then setting captures is
117 // omitted.
118 static Handle<RegExpMatchInfo> SetLastMatchInfo(
119 Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
120 Handle<String> subject, int capture_count, int32_t* match);
121
122 V8_EXPORT_PRIVATE static bool CompileForTesting(Isolate* isolate, Zone* zone,
123 RegExpCompileData* input,
124 JSRegExp::Flags flags,
125 Handle<String> pattern,
126 Handle<String> sample_subject,
127 bool is_one_byte);
128
129 V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
130 RegExpNode* node);
131
132 static const int kRegExpTooLargeToOptimize = 20 * KB;
133
134 V8_WARN_UNUSED_RESULT
135 static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
136 Handle<JSRegExp> re,
137 Handle<String> pattern,
138 RegExpError error);
139 static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
140 RegExpError error_text);
141
142 static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
143};
144
145// Uses a special global mode of irregexp-generated code to perform a global
146// search and return multiple results at once. As such, this is essentially an
147// iterator over multiple results (retrieved batch-wise in advance).
148class RegExpGlobalCache final {
149 public:
150 RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
151 Isolate* isolate);
152
153 ~RegExpGlobalCache();
154
155 // Fetch the next entry in the cache for global regexp match results.
156 // This does not set the last match info. Upon failure, nullptr is
157 // returned. The cause can be checked with Result(). The previous result is
158 // still in available in memory when a failure happens.
159 int32_t* FetchNext();
160
161 int32_t* LastSuccessfulMatch();
162
163 bool HasException() { return num_matches_ < 0; }
164
165 private:
166 int AdvanceZeroLength(int last_index);
167
168 int num_matches_;
169 int max_matches_;
170 int current_match_index_;
171 int registers_per_match_;
172 // Pointer to the last set of captures.
173 int32_t* register_array_;
174 int register_array_size_;
175 Handle<JSRegExp> regexp_;
176 Handle<String> subject_;
177 Isolate* isolate_;
178};
179
180// Caches results for specific regexp queries on the isolate. At the time of
181// writing, this is used during global calls to RegExp.prototype.exec and
182// @@split.
183class RegExpResultsCache final : public AllStatic {
184 public:
185 enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
186
187 // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
188 // On success, the returned result is guaranteed to be a COW-array.
189 static Object Lookup(Heap* heap, String key_string, Object key_pattern,
190 FixedArray* last_match_out, ResultsCacheType type);
191 // Attempt to add value_array to the cache specified by type. On success,
192 // value_array is turned into a COW-array.
193 static void Enter(Isolate* isolate, Handle<String> key_string,
194 Handle<Object> key_pattern, Handle<FixedArray> value_array,
195 Handle<FixedArray> last_match_cache, ResultsCacheType type);
196 static void Clear(FixedArray cache);
197
198 static constexpr int kRegExpResultsCacheSize = 0x100;
199
200 private:
201 static constexpr int kStringOffset = 0;
202 static constexpr int kPatternOffset = 1;
203 static constexpr int kArrayOffset = 2;
204 static constexpr int kLastMatchOffset = 3;
205 static constexpr int kArrayEntriesPerCacheEntry = 4;
206};
207
208} // namespace internal
209} // namespace v8
210
211#endif // V8_REGEXP_REGEXP_H_