Blame - src/regexp/regexp.cc - platform/external/v8

blob: b62ad1fff816c416ae2018e3843147f30f0c0f06 [file] [log] [blame]

Rubin Xu	6e1e26a	2021-02-10 00:04:48 +0000	[diff] [blame^]	1	// Copyright 2012 the V8 project authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "src/regexp/regexp.h"
				6
				7	#include "src/codegen/compilation-cache.h"
				8	#include "src/diagnostics/code-tracer.h"
				9	#include "src/heap/heap-inl.h"
				10	#include "src/objects/js-regexp-inl.h"
				11	#include "src/regexp/experimental/experimental.h"
				12	#include "src/regexp/regexp-bytecode-generator.h"
				13	#include "src/regexp/regexp-bytecodes.h"
				14	#include "src/regexp/regexp-compiler.h"
				15	#include "src/regexp/regexp-dotprinter.h"
				16	#include "src/regexp/regexp-interpreter.h"
				17	#include "src/regexp/regexp-macro-assembler-arch.h"
				18	#include "src/regexp/regexp-macro-assembler-tracer.h"
				19	#include "src/regexp/regexp-parser.h"
				20	#include "src/regexp/regexp-utils.h"
				21	#include "src/strings/string-search.h"
				22	#include "src/utils/ostreams.h"
				23
				24	namespace v8 {
				25	namespace internal {
				26
				27	using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
				28
				29	class RegExpImpl final : public AllStatic {
				30	public:
				31	// Returns a string representation of a regular expression.
				32	// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
				33	// This function calls the garbage collector if necessary.
				34	static Handle<String> ToString(Handle<Object> value);
				35
				36	// Prepares a JSRegExp object with Irregexp-specific data.
				37	static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
				38	Handle<String> pattern, JSRegExp::Flags flags,
				39	int capture_count, uint32_t backtrack_limit);
				40
				41	// Prepare a RegExp for being executed one or more times (using
				42	// IrregexpExecOnce) on the subject.
				43	// This ensures that the regexp is compiled for the subject, and that
				44	// the subject is flat.
				45	// Returns the number of integer spaces required by IrregexpExecOnce
				46	// as its "registers" argument. If the regexp cannot be compiled,
				47	// an exception is set as pending, and this function returns negative.
				48	static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
				49	Handle<String> subject);
				50
				51	static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
				52	Handle<String> pattern, JSRegExp::Flags flags,
				53	Handle<String> match_pattern);
				54
				55	static int AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
				56	Handle<String> subject, int index, int32_t* output,
				57	int output_size);
				58
				59	static Handle<Object> AtomExec(Isolate* isolate, Handle<JSRegExp> regexp,
				60	Handle<String> subject, int index,
				61	Handle<RegExpMatchInfo> last_match_info);
				62
				63	// Execute a regular expression on the subject, starting from index.
				64	// If matching succeeds, return the number of matches. This can be larger
				65	// than one in the case of global regular expressions.
				66	// The captures and subcaptures are stored into the registers vector.
				67	// If matching fails, returns RE_FAILURE.
				68	// If execution fails, sets a pending exception and returns RE_EXCEPTION.
				69	static int IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
				70	Handle<String> subject, int index, int32_t* output,
				71	int output_size);
				72
				73	// Execute an Irregexp bytecode pattern.
				74	// On a successful match, the result is a JSArray containing
				75	// captured positions. On a failure, the result is the null value.
				76	// Returns an empty handle in case of an exception.
				77	V8_WARN_UNUSED_RESULT static MaybeHandle<Object> IrregexpExec(
				78	Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
				79	int index, Handle<RegExpMatchInfo> last_match_info);
				80
				81	static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
				82	Handle<String> sample_subject, bool is_one_byte);
				83	static inline bool EnsureCompiledIrregexp(Isolate* isolate,
				84	Handle<JSRegExp> re,
				85	Handle<String> sample_subject,
				86	bool is_one_byte);
				87
				88	// Returns true on success, false on failure.
				89	static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input,
				90	JSRegExp::Flags flags, Handle<String> pattern,
				91	Handle<String> sample_subject, bool is_one_byte,
				92	uint32_t& backtrack_limit);
				93
				94	// For acting on the JSRegExp data FixedArray.
				95	static int IrregexpMaxRegisterCount(FixedArray re);
				96	static void SetIrregexpMaxRegisterCount(FixedArray re, int value);
				97	static int IrregexpNumberOfCaptures(FixedArray re);
				98	static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
				99	static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
				100	};
				101
				102	MaybeHandle<Object> RegExp::ThrowRegExpException(Isolate* isolate,
				103	Handle<JSRegExp> re,
				104	Handle<String> pattern,
				105	RegExpError error) {
				106	Vector<const char> error_data = CStrVector(RegExpErrorString(error));
				107	Handle<String> error_text =
				108	isolate->factory()
				109	->NewStringFromOneByte(Vector<const uint8_t>::cast(error_data))
				110	.ToHandleChecked();
				111	THROW_NEW_ERROR(
				112	isolate,
				113	NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text),
				114	Object);
				115	}
				116
				117	void RegExp::ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
				118	RegExpError error_text) {
				119	USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
				120	error_text));
				121	}
				122
				123	bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) {
				124	return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp);
				125	}
				126
				127	// Identifies the sort of regexps where the regexp engine is faster
				128	// than the code used for atom matches.
				129	static bool HasFewDifferentCharacters(Handle<String> pattern) {
				130	int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
				131	if (length <= kPatternTooShortForBoyerMoore) return false;
				132	const int kMod = 128;
				133	bool character_found[kMod];
				134	int different = 0;
				135	memset(&character_found[0], 0, sizeof(character_found));
				136	for (int i = 0; i < length; i++) {
				137	int ch = (pattern->Get(i) & (kMod - 1));
				138	if (!character_found[ch]) {
				139	character_found[ch] = true;
				140	different++;
				141	// We declare a regexp low-alphabet if it has at least 3 times as many
				142	// characters as it has different characters.
				143	if (different * 3 > length) return false;
				144	}
				145	}
				146	return true;
				147	}
				148
				149	// Generic RegExp methods. Dispatches to implementation specific methods.
				150
				151	// static
				152	MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
				153	Handle<String> pattern,
				154	JSRegExp::Flags flags,
				155	uint32_t backtrack_limit) {
				156	DCHECK(pattern->IsFlat());
				157
				158	// Caching is based only on the pattern and flags, but code also differs when
				159	// a backtrack limit is set. A present backtrack limit is very much not the
				160	// common case, so just skip the cache for these.
				161	const bool is_compilation_cache_enabled =
				162	(backtrack_limit == JSRegExp::kNoBacktrackLimit);
				163
				164	Zone zone(isolate->allocator(), ZONE_NAME);
				165	CompilationCache* compilation_cache = nullptr;
				166	if (is_compilation_cache_enabled) {
				167	compilation_cache = isolate->compilation_cache();
				168	MaybeHandle<FixedArray> maybe_cached =
				169	compilation_cache->LookupRegExp(pattern, flags);
				170	Handle<FixedArray> cached;
				171	if (maybe_cached.ToHandle(&cached)) {
				172	re->set_data(*cached);
				173	return re;
				174	}
				175	}
				176
				177	PostponeInterruptsScope postpone(isolate);
				178	RegExpCompileData parse_result;
				179	FlatStringReader reader(isolate, pattern);
				180	DCHECK(!isolate->has_pending_exception());
				181	if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
				182	&parse_result)) {
				183	// Throw an exception if we fail to parse the pattern.
				184	return RegExp::ThrowRegExpException(isolate, re, pattern,
				185	parse_result.error);
				186	}
				187
				188	bool has_been_compiled = false;
				189
				190	if (FLAG_default_to_experimental_regexp_engine &&
				191	ExperimentalRegExp::CanBeHandled(parse_result.tree, flags,
				192	parse_result.capture_count)) {
				193	DCHECK(FLAG_enable_experimental_regexp_engine);
				194	ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
				195	parse_result.capture_count);
				196	has_been_compiled = true;
				197	} else if (flags & JSRegExp::kLinear) {
				198	DCHECK(FLAG_enable_experimental_regexp_engine);
				199	if (!ExperimentalRegExp::CanBeHandled(parse_result.tree, flags,
				200	parse_result.capture_count)) {
				201	// TODO(mbid): The error could provide a reason for why the regexp can't
				202	// be executed in linear time (e.g. due to back references).
				203	return RegExp::ThrowRegExpException(isolate, re, pattern,
				204	RegExpError::kNotLinear);
				205	}
				206	ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
				207	parse_result.capture_count);
				208	has_been_compiled = true;
				209	} else if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
				210	!HasFewDifferentCharacters(pattern)) {
				211	// Parse-tree is a single atom that is equal to the pattern.
				212	RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern);
				213	has_been_compiled = true;
				214	} else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
				215	parse_result.capture_count == 0) {
				216	RegExpAtom* atom = parse_result.tree->AsAtom();
				217	// The pattern source might (?) contain escape sequences, but they're
				218	// resolved in atom_string.
				219	Vector<const uc16> atom_pattern = atom->data();
				220	Handle<String> atom_string;
				221	ASSIGN_RETURN_ON_EXCEPTION(
				222	isolate, atom_string,
				223	isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
				224	if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
				225	RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
				226	has_been_compiled = true;
				227	}
				228	}
				229	if (!has_been_compiled) {
				230	RegExpImpl::IrregexpInitialize(isolate, re, pattern, flags,
				231	parse_result.capture_count, backtrack_limit);
				232	}
				233	DCHECK(re->data().IsFixedArray());
				234	// Compilation succeeded so the data is set on the regexp
				235	// and we can store it in the cache.
				236	Handle<FixedArray> data(FixedArray::cast(re->data()), isolate);
				237	if (is_compilation_cache_enabled) {
				238	compilation_cache->PutRegExp(pattern, flags, data);
				239	}
				240
				241	return re;
				242	}
				243
				244	// static
				245	bool RegExp::EnsureFullyCompiled(Isolate* isolate, Handle<JSRegExp> re,
				246	Handle<String> subject) {
				247	switch (re->TypeTag()) {
				248	case JSRegExp::NOT_COMPILED:
				249	UNREACHABLE();
				250	case JSRegExp::ATOM:
				251	return true;
				252	case JSRegExp::IRREGEXP:
				253	if (RegExpImpl::IrregexpPrepare(isolate, re, subject) == -1) {
				254	DCHECK(isolate->has_pending_exception());
				255	return false;
				256	}
				257	return true;
				258	case JSRegExp::EXPERIMENTAL:
				259	if (!ExperimentalRegExp::IsCompiled(re, isolate) &&
				260	!ExperimentalRegExp::Compile(isolate, re)) {
				261	DCHECK(isolate->has_pending_exception());
				262	return false;
				263	}
				264	return true;
				265	}
				266	}
				267
				268	// static
				269	MaybeHandle<Object> RegExp::ExperimentalOneshotExec(
				270	Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
				271	int index, Handle<RegExpMatchInfo> last_match_info) {
				272	return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, index,
				273	last_match_info);
				274	}
				275
				276	// static
				277	MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
				278	Handle<String> subject, int index,
				279	Handle<RegExpMatchInfo> last_match_info) {
				280	switch (regexp->TypeTag()) {
				281	case JSRegExp::NOT_COMPILED:
				282	UNREACHABLE();
				283	case JSRegExp::ATOM:
				284	return RegExpImpl::AtomExec(isolate, regexp, subject, index,
				285	last_match_info);
				286	case JSRegExp::IRREGEXP:
				287	return RegExpImpl::IrregexpExec(isolate, regexp, subject, index,
				288	last_match_info);
				289	case JSRegExp::EXPERIMENTAL:
				290	return ExperimentalRegExp::Exec(isolate, regexp, subject, index,
				291	last_match_info);
				292	}
				293	}
				294
				295	// RegExp Atom implementation: Simple string search using indexOf.
				296
				297	void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
				298	Handle<String> pattern, JSRegExp::Flags flags,
				299	Handle<String> match_pattern) {
				300	isolate->factory()->SetRegExpAtomData(re, pattern, flags, match_pattern);
				301	}
				302
				303	static void SetAtomLastCapture(Isolate* isolate,
				304	Handle<RegExpMatchInfo> last_match_info,
				305	String subject, int from, int to) {
				306	SealHandleScope shs(isolate);
				307	last_match_info->SetNumberOfCaptureRegisters(2);
				308	last_match_info->SetLastSubject(subject);
				309	last_match_info->SetLastInput(subject);
				310	last_match_info->SetCapture(0, from);
				311	last_match_info->SetCapture(1, to);
				312	}
				313
				314	int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
				315	Handle<String> subject, int index, int32_t* output,
				316	int output_size) {
				317	DCHECK_LE(0, index);
				318	DCHECK_LE(index, subject->length());
				319
				320	subject = String::Flatten(isolate, subject);
				321	DisallowHeapAllocation no_gc; // ensure vectors stay valid
				322
				323	String needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
				324	int needle_len = needle.length();
				325	DCHECK(needle.IsFlat());
				326	DCHECK_LT(0, needle_len);
				327
				328	if (index + needle_len > subject->length()) {
				329	return RegExp::RE_FAILURE;
				330	}
				331
				332	for (int i = 0; i < output_size; i += 2) {
				333	String::FlatContent needle_content = needle.GetFlatContent(no_gc);
				334	String::FlatContent subject_content = subject->GetFlatContent(no_gc);
				335	DCHECK(needle_content.IsFlat());
				336	DCHECK(subject_content.IsFlat());
				337	// dispatch on type of strings
				338	index =
				339	(needle_content.IsOneByte()
				340	? (subject_content.IsOneByte()
				341	? SearchString(isolate, subject_content.ToOneByteVector(),
				342	needle_content.ToOneByteVector(), index)
				343	: SearchString(isolate, subject_content.ToUC16Vector(),
				344	needle_content.ToOneByteVector(), index))
				345	: (subject_content.IsOneByte()
				346	? SearchString(isolate, subject_content.ToOneByteVector(),
				347	needle_content.ToUC16Vector(), index)
				348	: SearchString(isolate, subject_content.ToUC16Vector(),
				349	needle_content.ToUC16Vector(), index)));
				350	if (index == -1) {
				351	return i / 2; // Return number of matches.
				352	} else {
				353	output[i] = index;
				354	output[i + 1] = index + needle_len;
				355	index += needle_len;
				356	}
				357	}
				358	return output_size / 2;
				359	}
				360
				361	Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
				362	Handle<String> subject, int index,
				363	Handle<RegExpMatchInfo> last_match_info) {
				364	static const int kNumRegisters = 2;
				365	STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
				366	int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
				367
				368	int res =
				369	AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters);
				370
				371	if (res == RegExp::RE_FAILURE) return isolate->factory()->null_value();
				372
				373	DCHECK_EQ(res, RegExp::RE_SUCCESS);
				374	SealHandleScope shs(isolate);
				375	SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0],
				376	output_registers[1]);
				377	return last_match_info;
				378	}
				379
				380	// Irregexp implementation.
				381
				382	// Ensures that the regexp object contains a compiled version of the
				383	// source for either one-byte or two-byte subject strings.
				384	// If the compiled version doesn't already exist, it is compiled
				385	// from the source pattern.
				386	// If compilation fails, an exception is thrown and this function
				387	// returns false.
				388	bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
				389	Handle<String> sample_subject,
				390	bool is_one_byte) {
				391	Object compiled_code = re->Code(is_one_byte);
				392	Object bytecode = re->Bytecode(is_one_byte);
				393	bool needs_initial_compilation =
				394	compiled_code == Smi::FromInt(JSRegExp::kUninitializedValue);
				395	// Recompile is needed when we're dealing with the first execution of the
				396	// regexp after the decision to tier up has been made. If the tiering up
				397	// strategy is not in use, this value is always false.
				398	bool needs_tier_up_compilation =
				399	re->MarkedForTierUp() && bytecode.IsByteArray();
				400
				401	if (FLAG_trace_regexp_tier_up && needs_tier_up_compilation) {
				402	PrintF("JSRegExp object %p needs tier-up compilation\n",
				403	reinterpret_cast<void*>(re->ptr()));
				404	}
				405
				406	if (!needs_initial_compilation && !needs_tier_up_compilation) {
				407	DCHECK(compiled_code.IsCode());
				408	DCHECK_IMPLIES(FLAG_regexp_interpret_all, bytecode.IsByteArray());
				409	return true;
				410	}
				411
				412	DCHECK_IMPLIES(needs_tier_up_compilation, bytecode.IsByteArray());
				413
				414	return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
				415	}
				416
				417	#ifdef DEBUG
				418	namespace {
				419
				420	bool RegExpCodeIsValidForPreCompilation(Handle<JSRegExp> re, bool is_one_byte) {
				421	Object entry = re->Code(is_one_byte);
				422	Object bytecode = re->Bytecode(is_one_byte);
				423	// If we're not using the tier-up strategy, entry can only be a smi
				424	// representing an uncompiled regexp here. If we're using the tier-up
				425	// strategy, entry can still be a smi representing an uncompiled regexp, when
				426	// compiling the regexp before the tier-up, or it can contain a trampoline to
				427	// the regexp interpreter, in which case the bytecode field contains compiled
				428	// bytecode, when recompiling the regexp after the tier-up. If the
				429	// tier-up was forced, which happens for global replaces, entry is a smi
				430	// representing an uncompiled regexp, even though we're "recompiling" after
				431	// the tier-up.
				432	if (re->ShouldProduceBytecode()) {
				433	DCHECK(entry.IsSmi());
				434	DCHECK(bytecode.IsSmi());
				435	int entry_value = Smi::ToInt(entry);
				436	int bytecode_value = Smi::ToInt(bytecode);
				437	DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
				438	DCHECK_EQ(JSRegExp::kUninitializedValue, bytecode_value);
				439	} else {
				440	DCHECK(entry.IsSmi() \|\| (entry.IsCode() && bytecode.IsByteArray()));
				441	}
				442
				443	return true;
				444	}
				445
				446	} // namespace
				447	#endif
				448
				449	bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
				450	Handle<String> sample_subject,
				451	bool is_one_byte) {
				452	// Compile the RegExp.
				453	Zone zone(isolate->allocator(), ZONE_NAME);
				454	PostponeInterruptsScope postpone(isolate);
				455
				456	DCHECK(RegExpCodeIsValidForPreCompilation(re, is_one_byte));
				457
				458	JSRegExp::Flags flags = re->GetFlags();
				459
				460	Handle<String> pattern(re->Pattern(), isolate);
				461	pattern = String::Flatten(isolate, pattern);
				462	RegExpCompileData compile_data;
				463	FlatStringReader reader(isolate, pattern);
				464	if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
				465	&compile_data)) {
				466	// Throw an exception if we fail to parse the pattern.
				467	// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
				468	USE(RegExp::ThrowRegExpException(isolate, re, pattern, compile_data.error));
				469	return false;
				470	}
				471	// The compilation target is a kBytecode if we're interpreting all regexp
				472	// objects, or if we're using the tier-up strategy but the tier-up hasn't
				473	// happened yet. The compilation target is a kNative if we're using the
				474	// tier-up strategy and we need to recompile to tier-up, or if we're producing
				475	// native code for all regexp objects.
				476	compile_data.compilation_target = re->ShouldProduceBytecode()
				477	? RegExpCompilationTarget::kBytecode
				478	: RegExpCompilationTarget::kNative;
				479	uint32_t backtrack_limit = re->BacktrackLimit();
				480	const bool compilation_succeeded =
				481	Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
				482	is_one_byte, backtrack_limit);
				483	if (!compilation_succeeded) {
				484	DCHECK(compile_data.error != RegExpError::kNone);
				485	RegExp::ThrowRegExpException(isolate, re, compile_data.error);
				486	return false;
				487	}
				488
				489	Handle<FixedArray> data =
				490	Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
				491	if (compile_data.compilation_target == RegExpCompilationTarget::kNative) {
				492	data->set(JSRegExp::code_index(is_one_byte), *compile_data.code);
				493	// Reset bytecode to uninitialized. In case we use tier-up we know that
				494	// tier-up has happened this way.
				495	data->set(JSRegExp::bytecode_index(is_one_byte),
				496	Smi::FromInt(JSRegExp::kUninitializedValue));
				497	} else {
				498	DCHECK_EQ(compile_data.compilation_target,
				499	RegExpCompilationTarget::kBytecode);
				500	// Store code generated by compiler in bytecode and trampoline to
				501	// interpreter in code.
				502	data->set(JSRegExp::bytecode_index(is_one_byte), *compile_data.code);
				503	Handle<Code> trampoline =
				504	BUILTIN_CODE(isolate, RegExpInterpreterTrampoline);
				505	data->set(JSRegExp::code_index(is_one_byte), *trampoline);
				506	}
				507	re->SetCaptureNameMap(compile_data.capture_name_map);
				508	int register_max = IrregexpMaxRegisterCount(*data);
				509	if (compile_data.register_count > register_max) {
				510	SetIrregexpMaxRegisterCount(*data, compile_data.register_count);
				511	}
				512	data->set(JSRegExp::kIrregexpBacktrackLimit, Smi::FromInt(backtrack_limit));
				513
				514	if (FLAG_trace_regexp_tier_up) {
				515	PrintF("JSRegExp object %p %s size: %d\n",
				516	reinterpret_cast<void*>(re->ptr()),
				517	re->ShouldProduceBytecode() ? "bytecode" : "native code",
				518	re->ShouldProduceBytecode()
				519	? IrregexpByteCode(*data, is_one_byte).Size()
				520	: IrregexpNativeCode(*data, is_one_byte).Size());
				521	}
				522
				523	return true;
				524	}
				525
				526	int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) {
				527	return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex));
				528	}
				529
				530	void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re, int value) {
				531	re.set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
				532	}
				533
				534	int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) {
				535	return Smi::ToInt(re.get(JSRegExp::kIrregexpCaptureCountIndex));
				536	}
				537
				538	ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
				539	return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte)));
				540	}
				541
				542	Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) {
				543	return Code::cast(re.get(JSRegExp::code_index(is_one_byte)));
				544	}
				545
				546	void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
				547	Handle<String> pattern,
				548	JSRegExp::Flags flags, int capture_count,
				549	uint32_t backtrack_limit) {
				550	// Initialize compiled code entries to null.
				551	isolate->factory()->SetRegExpIrregexpData(re, pattern, flags, capture_count,
				552	backtrack_limit);
				553	}
				554
				555	// static
				556	int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
				557	Handle<String> subject) {
				558	DCHECK(subject->IsFlat());
				559
				560	// Check representation of the underlying storage.
				561	bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
				562	if (!RegExpImpl::EnsureCompiledIrregexp(isolate, regexp, subject,
				563	is_one_byte)) {
				564	return -1;
				565	}
				566
				567	// Only reserve room for output captures. Internal registers are allocated by
				568	// the engine.
				569	return JSRegExp::RegistersForCaptureCount(regexp->CaptureCount());
				570	}
				571
				572	int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
				573	Handle<String> subject, int index,
				574	int32_t* output, int output_size) {
				575	DCHECK_LE(0, index);
				576	DCHECK_LE(index, subject->length());
				577	DCHECK(subject->IsFlat());
				578	DCHECK_GE(output_size,
				579	JSRegExp::RegistersForCaptureCount(regexp->CaptureCount()));
				580
				581	bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
				582
				583	if (!regexp->ShouldProduceBytecode()) {
				584	do {
				585	EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
				586	// The stack is used to allocate registers for the compiled regexp code.
				587	// This means that in case of failure, the output registers array is left
				588	// untouched and contains the capture results from the previous successful
				589	// match. We can use that to set the last match info lazily.
				590	int res = NativeRegExpMacroAssembler::Match(regexp, subject, output,
				591	output_size, index, isolate);
				592	if (res != NativeRegExpMacroAssembler::RETRY) {
				593	DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION \|\|
				594	isolate->has_pending_exception());
				595	STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) ==
				596	RegExp::RE_SUCCESS);
				597	STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::FAILURE) ==
				598	RegExp::RE_FAILURE);
				599	STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) ==
				600	RegExp::RE_EXCEPTION);
				601	return res;
				602	}
				603	// If result is RETRY, the string has changed representation, and we
				604	// must restart from scratch.
				605	// In this case, it means we must make sure we are prepared to handle
				606	// the, potentially, different subject (the string can switch between
				607	// being internal and external, and even between being Latin1 and UC16,
				608	// but the characters are always the same).
				609	is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
				610	} while (true);
				611	UNREACHABLE();
				612	} else {
				613	DCHECK(regexp->ShouldProduceBytecode());
				614
				615	do {
				616	IrregexpInterpreter::Result result =
				617	IrregexpInterpreter::MatchForCallFromRuntime(
				618	isolate, regexp, subject, output, output_size, index);
				619	DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
				620	isolate->has_pending_exception());
				621
				622	switch (result) {
				623	case IrregexpInterpreter::SUCCESS:
				624	case IrregexpInterpreter::EXCEPTION:
				625	case IrregexpInterpreter::FAILURE:
				626	case IrregexpInterpreter::FALLBACK_TO_EXPERIMENTAL:
				627	return result;
				628	case IrregexpInterpreter::RETRY:
				629	// The string has changed representation, and we must restart the
				630	// match.
				631	// We need to reset the tier up to start over with compilation.
				632	if (FLAG_regexp_tier_up) regexp->ResetLastTierUpTick();
				633	is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
				634	EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
				635	break;
				636	}
				637	} while (true);
				638	UNREACHABLE();
				639	}
				640	}
				641
				642	MaybeHandle<Object> RegExpImpl::IrregexpExec(
				643	Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
				644	int previous_index, Handle<RegExpMatchInfo> last_match_info) {
				645	DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
				646
				647	subject = String::Flatten(isolate, subject);
				648
				649	#ifdef DEBUG
				650	if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) {
				651	String pattern = regexp->Pattern();
				652	PrintF("\n\nRegexp match: /%s/\n\n", pattern.ToCString().get());
				653	PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
				654	}
				655	#endif
				656
				657	// For very long subject strings, the regexp interpreter is currently much
				658	// slower than the jitted code execution. If the tier-up strategy is turned
				659	// on, we want to avoid this performance penalty so we eagerly tier-up if the
				660	// subject string length is equal or greater than the given heuristic value.
				661	if (FLAG_regexp_tier_up &&
				662	subject->length() >= JSRegExp::kTierUpForSubjectLengthValue) {
				663	regexp->MarkTierUpForNextExec();
				664	if (FLAG_trace_regexp_tier_up) {
				665	PrintF(
				666	"Forcing tier-up for very long strings in "
				667	"RegExpImpl::IrregexpExec\n");
				668	}
				669	}
				670
				671	// Prepare space for the return values.
				672	int required_registers =
				673	RegExpImpl::IrregexpPrepare(isolate, regexp, subject);
				674	if (required_registers < 0) {
				675	// Compiling failed with an exception.
				676	DCHECK(isolate->has_pending_exception());
				677	return MaybeHandle<Object>();
				678	}
				679
				680	int32_t* output_registers = nullptr;
				681	if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
				682	output_registers = NewArray<int32_t>(required_registers);
				683	}
				684	std::unique_ptr<int32_t[]> auto_release(output_registers);
				685	if (output_registers == nullptr) {
				686	output_registers = isolate->jsregexp_static_offsets_vector();
				687	}
				688
				689	int res =
				690	RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
				691	output_registers, required_registers);
				692
				693	if (res == RegExp::RE_SUCCESS) {
				694	int capture_count = regexp->CaptureCount();
				695	return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
				696	capture_count, output_registers);
				697	} else if (res == RegExp::RE_FALLBACK_TO_EXPERIMENTAL) {
				698	return ExperimentalRegExp::OneshotExec(isolate, regexp, subject,
				699	previous_index, last_match_info);
				700	} else if (res == RegExp::RE_EXCEPTION) {
				701	DCHECK(isolate->has_pending_exception());
				702	return MaybeHandle<Object>();
				703	} else {
				704	DCHECK(res == RegExp::RE_FAILURE);
				705	return isolate->factory()->null_value();
				706	}
				707	}
				708
				709	// static
				710	Handle<RegExpMatchInfo> RegExp::SetLastMatchInfo(
				711	Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
				712	Handle<String> subject, int capture_count, int32_t* match) {
				713	// This is the only place where match infos can grow. If, after executing the
				714	// regexp, RegExpExecStub finds that the match info is too small, it restarts
				715	// execution in RegExpImpl::Exec, which finally grows the match info right
				716	// here.
				717	Handle<RegExpMatchInfo> result =
				718	RegExpMatchInfo::ReserveCaptures(isolate, last_match_info, capture_count);
				719	if (result != last_match_info) {
				720	if (last_match_info == isolate->regexp_last_match_info()) {
				721	// This inner condition is only needed for special situations like the
				722	// regexp fuzzer, where we pass our own custom RegExpMatchInfo to
				723	// RegExpImpl::Exec; there actually want to bypass the Isolate's match
				724	// info and execute the regexp without side effects.
				725	isolate->native_context()->set_regexp_last_match_info(*result);
				726	}
				727	}
				728
				729	int capture_register_count =
				730	JSRegExp::RegistersForCaptureCount(capture_count);
				731	DisallowHeapAllocation no_allocation;
				732	if (match != nullptr) {
				733	for (int i = 0; i < capture_register_count; i += 2) {
				734	result->SetCapture(i, match[i]);
				735	result->SetCapture(i + 1, match[i + 1]);
				736	}
				737	}
				738	result->SetLastSubject(*subject);
				739	result->SetLastInput(*subject);
				740	return result;
				741	}
				742
				743	// static
				744	void RegExp::DotPrintForTesting(const char* label, RegExpNode* node) {
				745	DotPrinter::DotPrint(label, node);
				746	}
				747
				748	namespace {
				749
				750	// Returns true if we've either generated too much irregex code within this
				751	// isolate, or the pattern string is too long.
				752	bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
				753	// Limit the space regexps take up on the heap. In order to limit this we
				754	// would like to keep track of the amount of regexp code on the heap. This
				755	// is not tracked, however. As a conservative approximation we track the
				756	// total regexp code compiled including code that has subsequently been freed
				757	// and the total executable memory at any point.
				758	static constexpr size_t kRegExpExecutableMemoryLimit = 16 * MB;
				759	static constexpr size_t kRegExpCompiledLimit = 1 * MB;
				760
				761	Heap* heap = isolate->heap();
				762	if (pattern->length() > RegExp::kRegExpTooLargeToOptimize) return true;
				763	return (isolate->total_regexp_code_generated() > kRegExpCompiledLimit &&
				764	heap->CommittedMemoryExecutable() > kRegExpExecutableMemoryLimit);
				765	}
				766
				767	} // namespace
				768
				769	// static
				770	bool RegExp::CompileForTesting(Isolate* isolate, Zone* zone,
				771	RegExpCompileData* data, JSRegExp::Flags flags,
				772	Handle<String> pattern,
				773	Handle<String> sample_subject,
				774	bool is_one_byte) {
				775	uint32_t backtrack_limit = JSRegExp::kNoBacktrackLimit;
				776	return RegExpImpl::Compile(isolate, zone, data, flags, pattern,
				777	sample_subject, is_one_byte, backtrack_limit);
				778	}
				779
				780	bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
				781	JSRegExp::Flags flags, Handle<String> pattern,
				782	Handle<String> sample_subject, bool is_one_byte,
				783	uint32_t& backtrack_limit) {
				784	if (JSRegExp::RegistersForCaptureCount(data->capture_count) >
				785	RegExpMacroAssembler::kMaxRegisterCount) {
				786	data->error = RegExpError::kTooLarge;
				787	return false;
				788	}
				789
				790	RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
				791
				792	if (compiler.optimize()) {
				793	compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
				794	}
				795
				796	// Sample some characters from the middle of the string.
				797	static const int kSampleSize = 128;
				798
				799	sample_subject = String::Flatten(isolate, sample_subject);
				800	int chars_sampled = 0;
				801	int half_way = (sample_subject->length() - kSampleSize) / 2;
				802	for (int i = Max(0, half_way);
				803	i < sample_subject->length() && chars_sampled < kSampleSize;
				804	i++, chars_sampled++) {
				805	compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
				806	}
				807
				808	data->node = compiler.PreprocessRegExp(data, flags, is_one_byte);
				809	data->error = AnalyzeRegExp(isolate, is_one_byte, data->node);
				810	if (data->error != RegExpError::kNone) {
				811	return false;
				812	}
				813
				814	// Create the correct assembler for the architecture.
				815	std::unique_ptr<RegExpMacroAssembler> macro_assembler;
				816	if (data->compilation_target == RegExpCompilationTarget::kNative) {
				817	// Native regexp implementation.
				818	DCHECK(!FLAG_jitless);
				819
				820	NativeRegExpMacroAssembler::Mode mode =
				821	is_one_byte ? NativeRegExpMacroAssembler::LATIN1
				822	: NativeRegExpMacroAssembler::UC16;
				823
				824	const int output_register_count =
				825	JSRegExp::RegistersForCaptureCount(data->capture_count);
				826	#if V8_TARGET_ARCH_IA32
				827	macro_assembler.reset(new RegExpMacroAssemblerIA32(isolate, zone, mode,
				828	output_register_count));
				829	#elif V8_TARGET_ARCH_X64
				830	macro_assembler.reset(new RegExpMacroAssemblerX64(isolate, zone, mode,
				831	output_register_count));
				832	#elif V8_TARGET_ARCH_ARM
				833	macro_assembler.reset(new RegExpMacroAssemblerARM(isolate, zone, mode,
				834	output_register_count));
				835	#elif V8_TARGET_ARCH_ARM64
				836	macro_assembler.reset(new RegExpMacroAssemblerARM64(isolate, zone, mode,
				837	output_register_count));
				838	#elif V8_TARGET_ARCH_S390
				839	macro_assembler.reset(new RegExpMacroAssemblerS390(isolate, zone, mode,
				840	output_register_count));
				841	#elif V8_TARGET_ARCH_PPC \|\| V8_TARGET_ARCH_PPC64
				842	macro_assembler.reset(new RegExpMacroAssemblerPPC(isolate, zone, mode,
				843	output_register_count));
				844	#elif V8_TARGET_ARCH_MIPS
				845	macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
				846	output_register_count));
				847	#elif V8_TARGET_ARCH_MIPS64
				848	macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
				849	output_register_count));
				850	#else
				851	#error "Unsupported architecture"
				852	#endif
				853	} else {
				854	DCHECK_EQ(data->compilation_target, RegExpCompilationTarget::kBytecode);
				855	// Interpreted regexp implementation.
				856	macro_assembler.reset(new RegExpBytecodeGenerator(isolate, zone));
				857	}
				858
				859	macro_assembler->set_slow_safe(TooMuchRegExpCode(isolate, pattern));
				860	if (FLAG_enable_experimental_regexp_engine_on_excessive_backtracks &&
				861	ExperimentalRegExp::CanBeHandled(data->tree, flags,
				862	data->capture_count)) {
				863	if (backtrack_limit == JSRegExp::kNoBacktrackLimit) {
				864	backtrack_limit = FLAG_regexp_backtracks_before_fallback;
				865	} else {
				866	backtrack_limit =
				867	std::min(backtrack_limit, FLAG_regexp_backtracks_before_fallback);
				868	}
				869	macro_assembler->set_backtrack_limit(backtrack_limit);
				870	macro_assembler->set_can_fallback(true);
				871	} else {
				872	macro_assembler->set_backtrack_limit(backtrack_limit);
				873	macro_assembler->set_can_fallback(false);
				874	}
				875
				876	// Inserted here, instead of in Assembler, because it depends on information
				877	// in the AST that isn't replicated in the Node structure.
				878	bool is_end_anchored = data->tree->IsAnchoredAtEnd();
				879	bool is_start_anchored = data->tree->IsAnchoredAtStart();
				880	int max_length = data->tree->max_match();
				881	static const int kMaxBacksearchLimit = 1024;
				882	if (is_end_anchored && !is_start_anchored && !IsSticky(flags) &&
				883	max_length < kMaxBacksearchLimit) {
				884	macro_assembler->SetCurrentPositionFromEnd(max_length);
				885	}
				886
				887	if (IsGlobal(flags)) {
				888	RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
				889	if (data->tree->min_match() > 0) {
				890	mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
				891	} else if (IsUnicode(flags)) {
				892	mode = RegExpMacroAssembler::GLOBAL_UNICODE;
				893	}
				894	macro_assembler->set_global_mode(mode);
				895	}
				896
				897	RegExpMacroAssembler* macro_assembler_ptr = macro_assembler.get();
				898	#ifdef DEBUG
				899	std::unique_ptr<RegExpMacroAssembler> tracer_macro_assembler;
				900	if (FLAG_trace_regexp_assembler) {
				901	tracer_macro_assembler.reset(
				902	new RegExpMacroAssemblerTracer(isolate, macro_assembler_ptr));
				903	macro_assembler_ptr = tracer_macro_assembler.get();
				904	}
				905	#endif
				906
				907	RegExpCompiler::CompilationResult result = compiler.Assemble(
				908	isolate, macro_assembler_ptr, data->node, data->capture_count, pattern);
				909
				910	// Code / bytecode printing.
				911	{
				912	#ifdef ENABLE_DISASSEMBLER
				913	if (FLAG_print_regexp_code &&
				914	data->compilation_target == RegExpCompilationTarget::kNative) {
				915	CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
				916	OFStream os(trace_scope.file());
				917	Handle<Code> c = Handle<Code>::cast(result.code);
				918	auto pattern_cstring = pattern->ToCString();
				919	c->Disassemble(pattern_cstring.get(), os, isolate);
				920	}
				921	#endif
				922	if (FLAG_print_regexp_bytecode &&
				923	data->compilation_target == RegExpCompilationTarget::kBytecode) {
				924	Handle<ByteArray> bytecode = Handle<ByteArray>::cast(result.code);
				925	auto pattern_cstring = pattern->ToCString();
				926	RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
				927	bytecode->length(), pattern_cstring.get());
				928	}
				929	}
				930
				931	if (result.error != RegExpError::kNone) {
				932	if (FLAG_correctness_fuzzer_suppressions &&
				933	result.error == RegExpError::kStackOverflow) {
				934	FATAL("Aborting on stack overflow");
				935	}
				936	data->error = result.error;
				937	}
				938
				939	data->code = result.code;
				940	data->register_count = result.num_registers;
				941
				942	return result.Succeeded();
				943	}
				944
				945	RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp,
				946	Handle<String> subject, Isolate* isolate)
				947	: register_array_(nullptr),
				948	register_array_size_(0),
				949	regexp_(regexp),
				950	subject_(subject),
				951	isolate_(isolate) {
				952	DCHECK(IsGlobal(regexp->GetFlags()));
				953
				954	switch (regexp_->TypeTag()) {
				955	case JSRegExp::NOT_COMPILED:
				956	UNREACHABLE();
				957	case JSRegExp::ATOM: {
				958	// ATOM regexps do not have a global loop, so we search for one match at
				959	// a time.
				960	static const int kAtomRegistersPerMatch = 2;
				961	registers_per_match_ = kAtomRegistersPerMatch;
				962	register_array_size_ = registers_per_match_;
				963	break;
				964	}
				965	case JSRegExp::IRREGEXP: {
				966	registers_per_match_ =
				967	RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
				968	if (registers_per_match_ < 0) {
				969	num_matches_ = -1; // Signal exception.
				970	return;
				971	}
				972	if (regexp->ShouldProduceBytecode()) {
				973	// Global loop in interpreted regexp is not implemented. We choose the
				974	// size of the offsets vector so that it can only store one match.
				975	register_array_size_ = registers_per_match_;
				976	max_matches_ = 1;
				977	} else {
				978	register_array_size_ = Max(registers_per_match_,
				979	Isolate::kJSRegexpStaticOffsetsVectorSize);
				980	}
				981	break;
				982	}
				983	case JSRegExp::EXPERIMENTAL: {
				984	if (!ExperimentalRegExp::IsCompiled(regexp, isolate_) &&
				985	!ExperimentalRegExp::Compile(isolate_, regexp)) {
				986	DCHECK(isolate->has_pending_exception());
				987	num_matches_ = -1; // Signal exception.
				988	return;
				989	}
				990	registers_per_match_ =
				991	JSRegExp::RegistersForCaptureCount(regexp->CaptureCount());
				992	register_array_size_ =
				993	Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
				994	break;
				995	}
				996	}
				997
				998	max_matches_ = register_array_size_ / registers_per_match_;
				999
				1000	if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
				1001	register_array_ = NewArray<int32_t>(register_array_size_);
				1002	} else {
				1003	register_array_ = isolate->jsregexp_static_offsets_vector();
				1004	}
				1005
				1006	// Set state so that fetching the results the first time triggers a call
				1007	// to the compiled regexp.
				1008	current_match_index_ = max_matches_ - 1;
				1009	num_matches_ = max_matches_;
				1010	DCHECK_LE(2, registers_per_match_); // Each match has at least one capture.
				1011	DCHECK_GE(register_array_size_, registers_per_match_);
				1012	int32_t* last_match =
				1013	&register_array_[current_match_index_ * registers_per_match_];
				1014	last_match[0] = -1;
				1015	last_match[1] = 0;
				1016	}
				1017
				1018	RegExpGlobalCache::~RegExpGlobalCache() {
				1019	// Deallocate the register array if we allocated it in the constructor
				1020	// (as opposed to using the existing jsregexp_static_offsets_vector).
				1021	if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
				1022	DeleteArray(register_array_);
				1023	}
				1024	}
				1025
				1026	int RegExpGlobalCache::AdvanceZeroLength(int last_index) {
				1027	if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
				1028	unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
				1029	unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
				1030	// Advance over the surrogate pair.
				1031	return last_index + 2;
				1032	}
				1033	return last_index + 1;
				1034	}
				1035
				1036	int32_t* RegExpGlobalCache::FetchNext() {
				1037	current_match_index_++;
				1038
				1039	if (current_match_index_ >= num_matches_) {
				1040	// Current batch of results exhausted.
				1041	// Fail if last batch was not even fully filled.
				1042	if (num_matches_ < max_matches_) {
				1043	num_matches_ = 0; // Signal failed match.
				1044	return nullptr;
				1045	}
				1046
				1047	int32_t* last_match =
				1048	&register_array_[(current_match_index_ - 1) * registers_per_match_];
				1049	int last_end_index = last_match[1];
				1050
				1051	switch (regexp_->TypeTag()) {
				1052	case JSRegExp::NOT_COMPILED:
				1053	UNREACHABLE();
				1054	case JSRegExp::ATOM:
				1055	num_matches_ =
				1056	RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
				1057	register_array_, register_array_size_);
				1058	break;
				1059	case JSRegExp::EXPERIMENTAL: {
				1060	DCHECK(ExperimentalRegExp::IsCompiled(regexp_, isolate_));
				1061	DisallowHeapAllocation no_gc;
				1062	num_matches_ = ExperimentalRegExp::ExecRaw(
				1063	isolate_, RegExp::kFromRuntime, regexp_, subject_,
				1064	register_array_, register_array_size_, last_end_index);
				1065	break;
				1066	}
				1067	case JSRegExp::IRREGEXP: {
				1068	int last_start_index = last_match[0];
				1069	if (last_start_index == last_end_index) {
				1070	// Zero-length match. Advance by one code point.
				1071	last_end_index = AdvanceZeroLength(last_end_index);
				1072	}
				1073	if (last_end_index > subject_->length()) {
				1074	num_matches_ = 0; // Signal failed match.
				1075	return nullptr;
				1076	}
				1077	num_matches_ = RegExpImpl::IrregexpExecRaw(
				1078	isolate_, regexp_, subject_, last_end_index, register_array_,
				1079	register_array_size_);
				1080	break;
				1081	}
				1082	}
				1083
				1084	// Fall back to experimental engine if needed and possible.
				1085	if (num_matches_ == RegExp::kInternalRegExpFallbackToExperimental) {
				1086	num_matches_ = ExperimentalRegExp::OneshotExecRaw(
				1087	isolate_, regexp_, subject_, register_array_, register_array_size_,
				1088	last_end_index);
				1089	}
				1090
				1091	if (num_matches_ <= 0) {
				1092	return nullptr;
				1093	}
				1094	current_match_index_ = 0;
				1095	return register_array_;
				1096	} else {
				1097	return &register_array_[current_match_index_ * registers_per_match_];
				1098	}
				1099	}
				1100
				1101	int32_t* RegExpGlobalCache::LastSuccessfulMatch() {
				1102	int index = current_match_index_ * registers_per_match_;
				1103	if (num_matches_ == 0) {
				1104	// After a failed match we shift back by one result.
				1105	index -= registers_per_match_;
				1106	}
				1107	return &register_array_[index];
				1108	}
				1109
				1110	Object RegExpResultsCache::Lookup(Heap* heap, String key_string,
				1111	Object key_pattern,
				1112	FixedArray* last_match_cache,
				1113	ResultsCacheType type) {
				1114	FixedArray cache;
				1115	if (!key_string.IsInternalizedString()) return Smi::zero();
				1116	if (type == STRING_SPLIT_SUBSTRINGS) {
				1117	DCHECK(key_pattern.IsString());
				1118	if (!key_pattern.IsInternalizedString()) return Smi::zero();
				1119	cache = heap->string_split_cache();
				1120	} else {
				1121	DCHECK(type == REGEXP_MULTIPLE_INDICES);
				1122	DCHECK(key_pattern.IsFixedArray());
				1123	cache = heap->regexp_multiple_cache();
				1124	}
				1125
				1126	uint32_t hash = key_string.Hash();
				1127	uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
				1128	~(kArrayEntriesPerCacheEntry - 1));
				1129	if (cache.get(index + kStringOffset) != key_string \|\|
				1130	cache.get(index + kPatternOffset) != key_pattern) {
				1131	index =
				1132	((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
				1133	if (cache.get(index + kStringOffset) != key_string \|\|
				1134	cache.get(index + kPatternOffset) != key_pattern) {
				1135	return Smi::zero();
				1136	}
				1137	}
				1138
				1139	*last_match_cache = FixedArray::cast(cache.get(index + kLastMatchOffset));
				1140	return cache.get(index + kArrayOffset);
				1141	}
				1142
				1143	void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
				1144	Handle<Object> key_pattern,
				1145	Handle<FixedArray> value_array,
				1146	Handle<FixedArray> last_match_cache,
				1147	ResultsCacheType type) {
				1148	Factory* factory = isolate->factory();
				1149	Handle<FixedArray> cache;
				1150	if (!key_string->IsInternalizedString()) return;
				1151	if (type == STRING_SPLIT_SUBSTRINGS) {
				1152	DCHECK(key_pattern->IsString());
				1153	if (!key_pattern->IsInternalizedString()) return;
				1154	cache = factory->string_split_cache();
				1155	} else {
				1156	DCHECK(type == REGEXP_MULTIPLE_INDICES);
				1157	DCHECK(key_pattern->IsFixedArray());
				1158	cache = factory->regexp_multiple_cache();
				1159	}
				1160
				1161	uint32_t hash = key_string->Hash();
				1162	uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
				1163	~(kArrayEntriesPerCacheEntry - 1));
				1164	if (cache->get(index + kStringOffset) == Smi::zero()) {
				1165	cache->set(index + kStringOffset, *key_string);
				1166	cache->set(index + kPatternOffset, *key_pattern);
				1167	cache->set(index + kArrayOffset, *value_array);
				1168	cache->set(index + kLastMatchOffset, *last_match_cache);
				1169	} else {
				1170	uint32_t index2 =
				1171	((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
				1172	if (cache->get(index2 + kStringOffset) == Smi::zero()) {
				1173	cache->set(index2 + kStringOffset, *key_string);
				1174	cache->set(index2 + kPatternOffset, *key_pattern);
				1175	cache->set(index2 + kArrayOffset, *value_array);
				1176	cache->set(index2 + kLastMatchOffset, *last_match_cache);
				1177	} else {
				1178	cache->set(index2 + kStringOffset, Smi::zero());
				1179	cache->set(index2 + kPatternOffset, Smi::zero());
				1180	cache->set(index2 + kArrayOffset, Smi::zero());
				1181	cache->set(index2 + kLastMatchOffset, Smi::zero());
				1182	cache->set(index + kStringOffset, *key_string);
				1183	cache->set(index + kPatternOffset, *key_pattern);
				1184	cache->set(index + kArrayOffset, *value_array);
				1185	cache->set(index + kLastMatchOffset, *last_match_cache);
				1186	}
				1187	}
				1188	// If the array is a reasonably short list of substrings, convert it into a
				1189	// list of internalized strings.
				1190	if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) {
				1191	for (int i = 0; i < value_array->length(); i++) {
				1192	Handle<String> str(String::cast(value_array->get(i)), isolate);
				1193	Handle<String> internalized_str = factory->InternalizeString(str);
				1194	value_array->set(i, *internalized_str);
				1195	}
				1196	}
				1197	// Convert backing store to a copy-on-write array.
				1198	value_array->set_map_no_write_barrier(
				1199	ReadOnlyRoots(isolate).fixed_cow_array_map());
				1200	}
				1201
				1202	void RegExpResultsCache::Clear(FixedArray cache) {
				1203	for (int i = 0; i < kRegExpResultsCacheSize; i++) {
				1204	cache.set(i, Smi::zero());
				1205	}
				1206	}
				1207
				1208	} // namespace internal
				1209	} // namespace v8