Upgrade V8 to 7.0.276.40
Bug: 117554758
Bug: 117555811
Bug: 117556606
Bug: 117556220
Bug: 117607414
Bug: 117606285
Test: atest com.google.android.gts.devicepolicy.DeviceOwnerTest#testProxyPacProxyTest
Test: atest proxy_resolver_v8_unittest
Change-Id: I2e02d994f107e64e4f465b4d8a02d4159a95240e
diff --git a/src/regexp/OWNERS b/src/regexp/OWNERS
index c493afa..7f916e12 100644
--- a/src/regexp/OWNERS
+++ b/src/regexp/OWNERS
@@ -2,3 +2,5 @@
[email protected]
[email protected]
+
+# COMPONENT: Blink>JavaScript>Runtime
diff --git a/src/regexp/arm/OWNERS b/src/regexp/arm/OWNERS
deleted file mode 100644
index 906a5ce..0000000
--- a/src/regexp/arm/OWNERS
+++ /dev/null
@@ -1 +0,0 @@
[email protected]
diff --git a/src/regexp/arm/regexp-macro-assembler-arm.cc b/src/regexp/arm/regexp-macro-assembler-arm.cc
index 351d34c..f77d521 100644
--- a/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/src/regexp/arm/regexp-macro-assembler-arm.cc
@@ -6,9 +6,12 @@
#include "src/regexp/arm/regexp-macro-assembler-arm.h"
+#include "src/assembler-inl.h"
#include "src/code-stubs.h"
+#include "src/heap/factory.h"
#include "src/log.h"
#include "src/macro-assembler.h"
+#include "src/objects-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
@@ -38,14 +41,13 @@
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[56] Isolate* isolate (address of the current isolate)
- * - fp[52] direct_call (if 1, direct call from JavaScript code,
+ * - fp[52] Isolate* isolate (address of the current isolate)
+ * - fp[48] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
- * - fp[48] stack_area_base (high end of the memory area to use as
+ * - fp[44] stack_area_base (high end of the memory area to use as
* backtracking stack).
- * - fp[44] capture array size (may fit multiple sets of matches)
- * - fp[40] int* capture_array (int[num_saved_registers_], for output).
- * - fp[36] secondary link/return address used by native call.
+ * - fp[40] capture array size (may fit multiple sets of matches)
+ * - fp[36] int* capture_array (int[num_saved_registers_], for output).
* --- sp when called ---
* - fp[32] return address (lr).
* - fp[28] old frame pointer (r11).
@@ -78,17 +80,13 @@
* int start_index,
* Address start,
* Address end,
- * Address secondary_return_address, // Only used by native call.
* int* capture_output_array,
+ * int num_capture_registers,
* byte* stack_area_base,
- * bool direct_call = false)
+ * bool direct_call = false,
+ * Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in arm/simulator-arm.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the LR register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -97,7 +95,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -140,8 +138,8 @@
void RegExpMacroAssemblerARM::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
__ ldr(r0, register_location(reg));
__ add(r0, r0, Operand(by));
@@ -291,7 +289,7 @@
} else {
DCHECK(mode_ == UC16);
int argument_count = 4;
- __ PrepareCallCFunction(argument_count, r2);
+ __ PrepareCallCFunction(argument_count);
// r0 - offset of start of capture
// r1 - length of capture
@@ -315,11 +313,11 @@
__ sub(r1, r1, r4);
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ mov(r3, Operand(0));
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
}
@@ -368,7 +366,7 @@
__ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne));
__ add(r3, r3, r1);
__ cmp(current_input_offset(), r3);
- BranchOrBacktrack(lt, on_no_match);
+ BranchOrBacktrack(le, on_no_match);
} else {
__ cmn(r1, Operand(current_input_offset()));
BranchOrBacktrack(gt, on_no_match);
@@ -449,7 +447,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ sub(r0, current_character(), Operand(minus));
__ and_(r0, r0, Operand(mask));
__ cmp(r0, Operand(c));
@@ -507,12 +505,12 @@
Label success;
__ cmp(current_character(), Operand(' '));
__ b(eq, &success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ sub(r0, current_character(), Operand('\t'));
__ cmp(r0, Operand('\r' - '\t'));
__ b(ls, &success);
// \u00a0 (NBSP).
- __ cmp(r0, Operand(0x00a0 - '\t'));
+ __ cmp(r0, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -534,37 +532,37 @@
BranchOrBacktrack(ls, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(r0, r0, Operand(0x0b));
- __ cmp(r0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(r0, r0, Operand(0x0B));
+ __ cmp(r0, Operand(0x0C - 0x0B));
BranchOrBacktrack(ls, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(r0, r0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(r0, r0, Operand(0x2028 - 0x0B));
__ cmp(r0, Operand(1));
BranchOrBacktrack(ls, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(r0, r0, Operand(0x0b));
- __ cmp(r0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(r0, r0, Operand(0x0B));
+ __ cmp(r0, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(hi, on_no_match);
} else {
Label done;
__ b(ls, &done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(r0, r0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(r0, r0, Operand(0x2028 - 0x0B));
__ cmp(r0, Operand(1));
BranchOrBacktrack(hi, on_no_match);
__ bind(&done);
@@ -577,7 +575,7 @@
__ cmp(current_character(), Operand('z'));
BranchOrBacktrack(hi, on_no_match);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ mov(r0, Operand(map));
__ ldrb(r0, MemOperand(r0, current_character()));
__ cmp(r0, Operand::Zero());
@@ -591,7 +589,7 @@
__ cmp(current_character(), Operand('z'));
__ b(hi, &done);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ mov(r0, Operand(map));
__ ldrb(r0, MemOperand(r0, current_character()));
__ cmp(r0, Operand::Zero());
@@ -666,7 +664,7 @@
__ jmp(&return_r0);
__ bind(&stack_limit_hit);
- CallCheckStackGuardState(r0);
+ CallCheckStackGuardState();
__ cmp(r0, Operand::Zero());
// If returned value is non-zero, we exit with the returned value as result.
__ b(ne, &return_r0);
@@ -842,7 +840,7 @@
if (check_preempt_label_.is_linked()) {
SafeCallTarget(&check_preempt_label_);
- CallCheckStackGuardState(r0);
+ CallCheckStackGuardState();
__ cmp(r0, Operand::Zero());
// If returning non-zero, we should end execution with the given
// result as return value.
@@ -861,14 +859,14 @@
// Call GrowStack(backtrack_stackpointer(), &stack_base)
static const int num_arguments = 3;
- __ PrepareCallCFunction(num_arguments, r0);
+ __ PrepareCallCFunction(num_arguments);
__ mov(r0, backtrack_stackpointer());
__ add(r1, frame_pointer(), Operand(kStackHighEnd));
__ mov(r2, Operand(ExternalReference::isolate_address(isolate())));
ExternalReference grow_stack =
ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ cmp(r0, Operand::Zero());
__ b(eq, &exit_with_exception);
@@ -887,9 +885,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
PROFILE(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -1047,8 +1045,8 @@
// Private methods:
-void RegExpMacroAssemblerARM::CallCheckStackGuardState(Register scratch) {
- __ PrepareCallCFunction(3, scratch);
+void RegExpMacroAssemblerARM::CallCheckStackGuardState() {
+ __ PrepareCallCFunction(3);
// RegExp code frame pointer.
__ mov(r2, frame_pointer());
@@ -1072,7 +1070,7 @@
// Drop the return address from the stack.
__ add(sp, sp, Operand(stack_alignment));
- DCHECK(stack_alignment != 0);
+ DCHECK_NE(0, stack_alignment);
__ ldr(sp, MemOperand(sp, 0));
__ mov(code_pointer(), Operand(masm_->CodeObject()));
@@ -1082,7 +1080,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1132,14 +1130,14 @@
void RegExpMacroAssemblerARM::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ jmp(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ b(condition, &backtrack_label_);
return;
}
@@ -1166,14 +1164,14 @@
void RegExpMacroAssemblerARM::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
__ str(source,
MemOperand(backtrack_stackpointer(), kPointerSize, NegPreIndex));
}
void RegExpMacroAssemblerARM::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ ldr(target,
MemOperand(backtrack_stackpointer(), kPointerSize, PostIndex));
}
@@ -1213,7 +1211,7 @@
// If unaligned load/stores are not supported then this function must only
// be used to load a single character at a time.
if (!CanReadUnaligned()) {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
}
if (mode_ == LATIN1) {
@@ -1222,7 +1220,7 @@
} else if (characters == 2) {
__ ldrh(current_character(), MemOperand(end_of_input_address(), offset));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ ldrb(current_character(), MemOperand(end_of_input_address(), offset));
}
} else {
@@ -1230,7 +1228,7 @@
if (characters == 2) {
__ ldr(current_character(), MemOperand(end_of_input_address(), offset));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ ldrh(current_character(), MemOperand(end_of_input_address(), offset));
}
}
diff --git a/src/regexp/arm/regexp-macro-assembler-arm.h b/src/regexp/arm/regexp-macro-assembler-arm.h
index 6c91064..7c988e9 100644
--- a/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/src/regexp/arm/regexp-macro-assembler-arm.h
@@ -103,9 +103,8 @@
static const int kStoredRegisters = kFramePointer;
// Return address (stored from link register, read into pc on return).
static const int kReturnAddress = kStoredRegisters + 8 * kPointerSize;
- static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack parameters placed by caller.
- static const int kRegisterOutput = kSecondaryReturnAddress + kPointerSize;
+ static const int kRegisterOutput = kReturnAddress + kPointerSize;
static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
static const int kDirectCall = kStackHighEnd + kPointerSize;
@@ -141,7 +140,7 @@
// Generate a call to CheckStackGuardState.
- void CallCheckStackGuardState(Register scratch);
+ void CallCheckStackGuardState();
// The ebp-relative location of a regexp register.
MemOperand register_location(int register_index);
@@ -171,7 +170,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Call and return internally in the generated code in a way that
diff --git a/src/regexp/arm64/OWNERS b/src/regexp/arm64/OWNERS
deleted file mode 100644
index 906a5ce..0000000
--- a/src/regexp/arm64/OWNERS
+++ /dev/null
@@ -1 +0,0 @@
[email protected]
diff --git a/src/regexp/arm64/regexp-macro-assembler-arm64.cc b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
index 49a81a7..bf68d80 100644
--- a/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@@ -6,9 +6,11 @@
#include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
+#include "src/arm64/macro-assembler-arm64-inl.h"
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
+#include "src/objects-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
@@ -39,7 +41,7 @@
* - x29/fp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
* - x16/x17 : IP registers, used by assembler. Very volatile.
- * - csp : Points to tip of C stack.
+ * - sp : Points to tip of C stack.
*
* - x0-x7 : Used as a cache to store 32 bit capture registers. These
* registers need to be retained every time a call to C code
@@ -54,11 +56,8 @@
* (as referred to in
* the code)
*
- * - fp[104] isolate Address of the current isolate.
- * - fp[96] return_address Secondary link/return address
- * used by an exit frame if this is a
- * native call.
- * ^^^ csp when called ^^^
+ * - fp[96] isolate Address of the current isolate.
+ * ^^^ sp when called ^^^
* - fp[88] lr Return from the RegExp code.
* - fp[80] r29 Old frame pointer (CalleeSaved).
* - fp[0..72] r19-r28 Backup of CalleeSaved registers.
@@ -78,7 +77,7 @@
* - ... num_saved_registers_ registers.
* - ...
* - register N + num_registers - 1
- * ^^^^^^^^^ csp ^^^^^^^^^
+ * ^^^^^^^^^ sp ^^^^^^^^^
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
@@ -87,23 +86,17 @@
* The data up to the return address must be placed there by the calling
* code and the remaining arguments are passed in registers, e.g. by calling the
* code entry as cast to a function with the signature:
- * int (*match)(String* input,
- * int start_offset,
- * Address input_start,
- * Address input_end,
- * int* output,
- * int output_size,
- * Address stack_base,
+ * int (*match)(String* input_string,
+ * int start_index,
+ * Address start,
+ * Address end,
+ * int* capture_output_array,
+ * int num_capture_registers,
+ * byte* stack_area_base,
* bool direct_call = false,
- * Address secondary_return_address, // Only used by native call.
- * Isolate* isolate)
+ * Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in arm64/simulator-arm64.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the LR register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -112,7 +105,7 @@
Zone* zone, Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -122,7 +115,6 @@
success_label_(),
backtrack_label_(),
exit_label_() {
- __ SetStackPointer(csp);
DCHECK_EQ(0, registers_to_save % 2);
// We can cache at most 16 W registers in x0-x7.
STATIC_ASSERT(kNumCachedRegisters <= 16);
@@ -160,7 +152,6 @@
void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
DCHECK((reg >= 0) && (reg < num_registers_));
if (by != 0) {
- Register to_advance;
RegisterState register_state = GetRegisterState(reg);
switch (register_state) {
case STACKED:
@@ -168,15 +159,17 @@
__ Add(w10, w10, by);
__ Str(w10, register_location(reg));
break;
- case CACHED_LSW:
- to_advance = GetCachedRegister(reg);
+ case CACHED_LSW: {
+ Register to_advance = GetCachedRegister(reg);
__ Add(to_advance, to_advance, by);
break;
- case CACHED_MSW:
- to_advance = GetCachedRegister(reg);
+ }
+ case CACHED_MSW: {
+ Register to_advance = GetCachedRegister(reg);
__ Add(to_advance, to_advance,
static_cast<int64_t>(by) << kWRegSizeInBits);
break;
+ }
default:
UNREACHABLE();
break;
@@ -253,7 +246,7 @@
for (int i = 0; i < str.length(); i++) {
if (mode_ == LATIN1) {
__ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
- DCHECK(str[i] <= String::kMaxOneByteCharCode);
+ DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
} else {
__ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
}
@@ -283,7 +276,7 @@
DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
// Find length of back-referenced capture.
- DCHECK((start_reg % 2) == 0);
+ DCHECK_EQ(0, start_reg % 2);
if (start_reg < kNumCachedRegisters) {
__ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
__ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
@@ -371,7 +364,7 @@
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
// The current input offset should be <= 0, and fit in a W register.
- __ Check(le, kOffsetOutOfRange);
+ __ Check(le, AbortReason::kOffsetOutOfRange);
}
} else {
DCHECK(mode_ == UC16);
@@ -379,7 +372,7 @@
// The cached registers need to be retained.
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
- DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
+ DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
__ PushCPURegList(cached_registers);
// Put arguments into arguments registers.
@@ -399,11 +392,11 @@
__ Sub(x1, x1, Operand(capture_length, SXTW));
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ Mov(x3, Operand(0));
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ Mov(x3, ExternalReference::isolate_address(isolate()));
}
@@ -444,7 +437,7 @@
Register capture_length = w15;
// Find length of back-referenced capture.
- DCHECK((start_reg % 2) == 0);
+ DCHECK_EQ(0, start_reg % 2);
if (start_reg < kNumCachedRegisters) {
__ Mov(x10, GetCachedRegister(start_reg));
__ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
@@ -508,7 +501,7 @@
__ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
__ Ccmp(current_input_offset(), 0, NoFlag, eq);
// The current input offset should be <= 0, and fit in a W register.
- __ Check(le, kOffsetOutOfRange);
+ __ Check(le, AbortReason::kOffsetOutOfRange);
}
__ Bind(&fallthrough);
}
@@ -541,7 +534,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ Sub(w10, current_character(), minus);
__ And(w10, w10, mask);
CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
@@ -593,11 +586,11 @@
if (mode_ == LATIN1) {
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
- // Check for ' ' or 0x00a0.
+ // Check for ' ' or 0x00A0.
__ Cmp(current_character(), ' ');
- __ Ccmp(current_character(), 0x00a0, ZFlag, ne);
+ __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
__ B(eq, &success);
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Sub(w10, current_character(), '\t');
CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
__ Bind(&success);
@@ -618,12 +611,12 @@
CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
// Here we emit the conditional branch only once at the end to make branch
// prediction more efficient, even though we could branch out of here
// as soon as a character matches.
- __ Cmp(current_character(), 0x0a);
- __ Ccmp(current_character(), 0x0d, ZFlag, ne);
+ __ Cmp(current_character(), 0x0A);
+ __ Ccmp(current_character(), 0x0D, ZFlag, ne);
if (mode_ == UC16) {
__ Sub(w10, current_character(), 0x2028);
// If the Z flag was set we clear the flags to force a branch.
@@ -636,11 +629,11 @@
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
// We have to check all 4 newline characters before emitting
// the conditional branch.
- __ Cmp(current_character(), 0x0a);
- __ Ccmp(current_character(), 0x0d, ZFlag, ne);
+ __ Cmp(current_character(), 0x0A);
+ __ Ccmp(current_character(), 0x0D, ZFlag, ne);
if (mode_ == UC16) {
__ Sub(w10, current_character(), 0x2028);
// If the Z flag was set we clear the flags to force a fall-through.
@@ -657,7 +650,7 @@
// Table is 256 entries, so all Latin1 characters can be tested.
CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ Mov(x10, map);
__ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
@@ -670,7 +663,7 @@
__ Cmp(current_character(), 'z');
__ B(hi, &done);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ Mov(x10, map);
__ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
@@ -711,9 +704,8 @@
// x6: Address stack_base
// x7: int direct_call
- // The stack pointer should be csp on entry.
- // csp[8]: address of the current isolate
- // csp[0]: secondary link/return address used by native call
+ // sp[8]: address of the current isolate
+ // sp[0]: secondary link/return address used by native call
// Tell the system that we have a stack frame. Because the type is MANUAL, no
// code is generated.
@@ -723,15 +715,14 @@
CPURegList argument_registers(x0, x5, x6, x7);
CPURegList registers_to_retain = kCalleeSaved;
- DCHECK(kCalleeSaved.Count() == 11);
+ DCHECK_EQ(11, kCalleeSaved.Count());
registers_to_retain.Combine(lr);
- DCHECK(csp.Is(__ StackPointer()));
__ PushCPURegList(registers_to_retain);
__ PushCPURegList(argument_registers);
// Set frame pointer in place.
- __ Add(frame_pointer(), csp, argument_registers.Count() * kPointerSize);
+ __ Add(frame_pointer(), sp, argument_registers.Count() * kPointerSize);
// Initialize callee-saved registers.
__ Mov(start_offset(), w1);
@@ -762,7 +753,7 @@
ExternalReference::address_of_stack_limit(isolate());
__ Mov(x10, stack_limit);
__ Ldr(x10, MemOperand(x10));
- __ Subs(x10, csp, x10);
+ __ Subs(x10, sp, x10);
// Handle it if the stack pointer is already below the stack limit.
__ B(ls, &stack_limit_hit);
@@ -793,10 +784,10 @@
// Find negative length (offset of start relative to end).
__ Sub(x10, input_start(), input_end());
if (masm_->emit_debug_code()) {
- // Check that the input string length is < 2^30.
+ // Check that the size of the input string chars is in range.
__ Neg(x11, x10);
- __ Cmp(x11, (1<<30) - 1);
- __ Check(ls, kInputStringTooLong);
+ __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
+ __ Check(ls, AbortReason::kInputStringTooLong);
}
__ Mov(current_input_offset(), w10);
@@ -858,9 +849,9 @@
// Get string length.
__ Sub(x10, input_end(), input_start());
if (masm_->emit_debug_code()) {
- // Check that the input string length is < 2^30.
- __ Cmp(x10, (1<<30) - 1);
- __ Check(ls, kInputStringTooLong);
+ // Check that the size of the input string chars is in range.
+ __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
+ __ Check(ls, AbortReason::kInputStringTooLong);
}
// input_start has a start_offset offset on entry. We need to include
// it when computing the length of the whole string.
@@ -1022,9 +1013,7 @@
__ Bind(&return_w0);
// Set stack pointer back to first register to retain
- DCHECK(csp.Is(__ StackPointer()));
- __ Mov(csp, fp);
- __ AssertStackConsistency();
+ __ Mov(sp, fp);
// Restore registers.
__ PopCPURegList(registers_to_retain);
@@ -1035,7 +1024,7 @@
// Registers x0 to x7 are used to store the first captures, they need to be
// retained over calls to C++ code.
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
- DCHECK((cached_registers.Count() * 2) == kNumCachedRegisters);
+ DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
if (check_preempt_label_.is_linked()) {
__ Bind(&check_preempt_label_);
@@ -1043,7 +1032,7 @@
// The cached registers need to be retained.
__ PushCPURegList(cached_registers);
CallCheckStackGuardState(x10);
- // Returning from the regexp code restores the stack (csp <- fp)
+ // Returning from the regexp code restores the stack (sp <- fp)
// so we don't need to drop the link register from it before exiting.
__ Cbnz(w0, &return_w0);
// Reset the cached registers.
@@ -1064,9 +1053,9 @@
ExternalReference grow_stack =
ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, 3);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
- // Returning from the regexp code restores the stack (csp <- fp)
+ // Returning from the regexp code restores the stack (sp <- fp)
// so we don't need to drop the link register from it before exiting.
__ Cbz(w0, &exit_with_exception);
// Otherwise use return value as new stack pointer.
@@ -1084,9 +1073,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
PROFILE(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -1163,7 +1152,7 @@
if (masm_->emit_debug_code()) {
__ Cmp(x10, kWRegMask);
// The code offset has to fit in a W register.
- __ Check(ls, kOffsetOutOfRange);
+ __ Check(ls, AbortReason::kOffsetOutOfRange);
}
}
Push(w10);
@@ -1185,19 +1174,17 @@
void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
- Register cached_register;
RegisterState register_state = GetRegisterState(reg);
switch (register_state) {
case STACKED:
__ Ldr(current_input_offset(), register_location(reg));
break;
case CACHED_LSW:
- cached_register = GetCachedRegister(reg);
- __ Mov(current_input_offset(), cached_register.W());
+ __ Mov(current_input_offset(), GetCachedRegister(reg).W());
break;
case CACHED_MSW:
- cached_register = GetCachedRegister(reg);
- __ Lsr(current_input_offset().X(), cached_register, kWRegSizeInBits);
+ __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
+ kWRegSizeInBits);
break;
default:
UNREACHABLE();
@@ -1282,7 +1269,7 @@
if (num_registers > 0) {
// If there are some remaining registers, they are stored on the stack.
- DCHECK(reg_from >= kNumCachedRegisters);
+ DCHECK_LE(kNumCachedRegisters, reg_from);
// Move down the indexes of the registers on stack to get the correct offset
// in memory.
@@ -1321,7 +1308,7 @@
if (masm_->emit_debug_code()) {
__ Cmp(x10, Operand(w10, SXTW));
// The stack offset needs to fit in a W register.
- __ Check(eq, kOffsetOutOfRange);
+ __ Check(eq, AbortReason::kOffsetOutOfRange);
}
StoreRegister(reg, w10);
}
@@ -1375,14 +1362,13 @@
int align_mask = (alignment / kXRegSize) - 1;
int xreg_to_claim = (3 + align_mask) & ~align_mask;
- DCHECK(csp.Is(__ StackPointer()));
__ Claim(xreg_to_claim);
// CheckStackGuardState needs the end and start addresses of the input string.
__ Poke(input_end(), 2 * kPointerSize);
- __ Add(x5, csp, 2 * kPointerSize);
+ __ Add(x5, sp, 2 * kPointerSize);
__ Poke(input_start(), kPointerSize);
- __ Add(x4, csp, kPointerSize);
+ __ Add(x4, sp, kPointerSize);
__ Mov(w3, start_offset());
// RegExp code frame pointer.
@@ -1393,7 +1379,7 @@
// We need to pass a pointer to the return address as first argument.
// The DirectCEntry stub will place the return address on the stack before
// calling so the stack pointer will point to it.
- __ Mov(x0, csp);
+ __ Mov(x0, sp);
ExternalReference check_stack_guard_state =
ExternalReference::re_check_stack_guard_state(isolate());
@@ -1405,7 +1391,6 @@
__ Peek(input_start(), kPointerSize);
__ Peek(input_end(), 2 * kPointerSize);
- DCHECK(csp.Is(__ StackPointer()));
__ Drop(xreg_to_claim);
// Reload the Code pointer.
@@ -1415,14 +1400,14 @@
void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ B(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
to = &backtrack_label_;
}
__ B(condition, to);
@@ -1433,7 +1418,7 @@
Condition condition,
Label* to) {
if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
- if (to == NULL) {
+ if (to == nullptr) {
to = &backtrack_label_;
}
if (condition == eq) {
@@ -1454,8 +1439,7 @@
ExternalReference::address_of_stack_limit(isolate());
__ Mov(x10, stack_limit);
__ Ldr(x10, MemOperand(x10));
- DCHECK(csp.Is(__ StackPointer()));
- __ Cmp(csp, x10);
+ __ Cmp(sp, x10);
CallIf(&check_preempt_label_, ls);
}
@@ -1489,7 +1473,7 @@
Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
- DCHECK(register_index < kNumCachedRegisters);
+ DCHECK_GT(kNumCachedRegisters, register_index);
return Register::Create(register_index / 2, kXRegSizeInBits);
}
@@ -1497,11 +1481,11 @@
Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
Register maybe_result) {
DCHECK(maybe_result.Is32Bits());
- DCHECK(register_index >= 0);
+ DCHECK_LE(0, register_index);
if (num_registers_ <= register_index) {
num_registers_ = register_index + 1;
}
- Register result;
+ Register result = NoReg;
RegisterState register_state = GetRegisterState(register_index);
switch (register_state) {
case STACKED:
@@ -1528,27 +1512,28 @@
void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
Register source) {
DCHECK(source.Is32Bits());
- DCHECK(register_index >= 0);
+ DCHECK_LE(0, register_index);
if (num_registers_ <= register_index) {
num_registers_ = register_index + 1;
}
- Register cached_register;
RegisterState register_state = GetRegisterState(register_index);
switch (register_state) {
case STACKED:
__ Str(source, register_location(register_index));
break;
- case CACHED_LSW:
- cached_register = GetCachedRegister(register_index);
+ case CACHED_LSW: {
+ Register cached_register = GetCachedRegister(register_index);
if (!source.Is(cached_register.W())) {
__ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
}
break;
- case CACHED_MSW:
- cached_register = GetCachedRegister(register_index);
+ }
+ case CACHED_MSW: {
+ Register cached_register = GetCachedRegister(register_index);
__ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
break;
+ }
default:
UNREACHABLE();
break;
@@ -1565,14 +1550,12 @@
void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
- DCHECK(csp.Is(__ StackPointer()));
__ Pop(lr, xzr);
__ Add(lr, lr, Operand(masm_->CodeObject()));
}
void RegExpMacroAssemblerARM64::SaveLinkRegister() {
- DCHECK(csp.Is(__ StackPointer()));
__ Sub(lr, lr, Operand(masm_->CodeObject()));
__ Push(xzr, lr);
}
@@ -1580,7 +1563,7 @@
MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
DCHECK(register_index < (1<<30));
- DCHECK(register_index >= kNumCachedRegisters);
+ DCHECK_LE(kNumCachedRegisters, register_index);
if (num_registers_ <= register_index) {
num_registers_ = register_index + 1;
}
@@ -1593,7 +1576,7 @@
Register scratch) {
DCHECK(register_index < (1<<30));
DCHECK(register_index < num_saved_registers_);
- DCHECK(register_index >= kNumCachedRegisters);
+ DCHECK_LE(kNumCachedRegisters, register_index);
DCHECK_EQ(register_index % 2, 0);
register_index -= kNumCachedRegisters;
int offset = kFirstCaptureOnStack - register_index * kWRegSize;
@@ -1620,7 +1603,7 @@
// disable it.
// TODO(pielan): See whether or not we should disable unaligned accesses.
if (!CanReadUnaligned()) {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
}
if (cp_offset != 0) {
@@ -1629,7 +1612,7 @@
__ Add(x10, x10, Operand(current_input_offset(), SXTW));
__ Cmp(x10, Operand(w10, SXTW));
// The offset needs to fit in a W register.
- __ Check(eq, kOffsetOutOfRange);
+ __ Check(eq, AbortReason::kOffsetOutOfRange);
} else {
__ Add(w10, current_input_offset(), cp_offset * char_size());
}
@@ -1642,7 +1625,7 @@
} else if (characters == 2) {
__ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
}
} else {
@@ -1650,7 +1633,7 @@
if (characters == 2) {
__ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
}
}
@@ -1661,4 +1644,6 @@
} // namespace internal
} // namespace v8
+#undef __
+
#endif // V8_TARGET_ARCH_ARM64
diff --git a/src/regexp/arm64/regexp-macro-assembler-arm64.h b/src/regexp/arm64/regexp-macro-assembler-arm64.h
index 5db220e..42a41ba 100644
--- a/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.h
@@ -109,9 +109,8 @@
// Return address.
// It is placed above the 11 callee-saved registers.
static const int kReturnAddress = kCalleeSavedRegisters + 11 * kPointerSize;
- static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack parameter placed by caller.
- static const int kIsolate = kSecondaryReturnAddress + kPointerSize;
+ static const int kIsolate = kReturnAddress + kPointerSize;
// Below the frame pointer.
// Register parameters stored by setup code.
@@ -200,7 +199,7 @@
int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Compares reg against immmediate before calling BranchOrBacktrack.
@@ -233,7 +232,7 @@
};
RegisterState GetRegisterState(int register_index) {
- DCHECK(register_index >= 0);
+ DCHECK_LE(0, register_index);
if (register_index >= kNumCachedRegisters) {
return STACKED;
} else {
diff --git a/src/regexp/ia32/regexp-macro-assembler-ia32.cc b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
index 6b4ea24..0d479ca 100644
--- a/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@@ -6,8 +6,10 @@
#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
+#include "src/assembler-inl.h"
#include "src/log.h"
#include "src/macro-assembler.h"
+#include "src/objects-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
@@ -69,9 +71,10 @@
* Address start,
* Address end,
* int* capture_output_array,
- * bool at_start,
+ * int num_capture_registers,
* byte* stack_area_base,
- * bool direct_call)
+ * bool direct_call = false,
+ * Isolate* isolate);
*/
#define __ ACCESS_MASM(masm_)
@@ -80,7 +83,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -122,8 +125,8 @@
void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
__ add(register_location(reg), Immediate(by));
}
@@ -298,11 +301,11 @@
// Isolate* isolate or 0 if unicode flag.
// Set isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ mov(Operand(esp, 3 * kPointerSize),
Immediate(ExternalReference::isolate_address(isolate())));
@@ -468,7 +471,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ lea(eax, Operand(current_character(), -minus));
if (c == 0) {
__ test(eax, Immediate(mask));
@@ -528,12 +531,12 @@
Label success;
__ cmp(current_character(), ' ');
__ j(equal, &success, Label::kNear);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ lea(eax, Operand(current_character(), -'\t'));
__ cmp(eax, '\r' - '\t');
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
- __ cmp(eax, 0x00a0 - '\t');
+ __ cmp(eax, 0x00A0 - '\t');
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
@@ -555,18 +558,18 @@
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ mov(eax, current_character());
__ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(eax, Immediate(0x0B));
+ __ cmp(eax, 0x0C - 0x0B);
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(eax, Immediate(0x2028 - 0x0B));
__ cmp(eax, 0x2029 - 0x2028);
BranchOrBacktrack(below_equal, on_no_match);
}
@@ -579,9 +582,10 @@
BranchOrBacktrack(above, on_no_match);
}
DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
- ExternalReference word_map = ExternalReference::re_word_character_map();
+ ExternalReference word_map =
+ ExternalReference::re_word_character_map(isolate());
__ test_b(current_character(),
- Operand::StaticArray(current_character(), times_1, word_map));
+ masm_->StaticArray(current_character(), times_1, word_map));
BranchOrBacktrack(zero, on_no_match);
return true;
}
@@ -593,9 +597,10 @@
__ j(above, &done);
}
DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
- ExternalReference word_map = ExternalReference::re_word_character_map();
+ ExternalReference word_map =
+ ExternalReference::re_word_character_map(isolate());
__ test_b(current_character(),
- Operand::StaticArray(current_character(), times_1, word_map));
+ masm_->StaticArray(current_character(), times_1, word_map));
BranchOrBacktrack(not_zero, on_no_match);
if (mode_ != LATIN1) {
__ bind(&done);
@@ -607,13 +612,13 @@
// Match any character.
return true;
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029).
// The opposite of '.'.
__ mov(eax, current_character());
__ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ sub(eax, Immediate(0x0B));
+ __ cmp(eax, 0x0C - 0x0B);
if (mode_ == LATIN1) {
BranchOrBacktrack(above, on_no_match);
} else {
@@ -621,9 +626,9 @@
BranchOrBacktrack(below_equal, &done);
DCHECK_EQ(UC16, mode_);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ sub(eax, Immediate(0x2028 - 0x0B));
__ cmp(eax, 1);
BranchOrBacktrack(above, on_no_match);
__ bind(&done);
@@ -676,7 +681,7 @@
ExternalReference stack_limit =
ExternalReference::address_of_stack_limit(isolate());
__ mov(ecx, esp);
- __ sub(ecx, Operand::StaticVariable(stack_limit));
+ __ sub(ecx, masm_->StaticVariable(stack_limit));
// Handle it if the stack pointer is already below the stack limit.
__ j(below_equal, &stack_limit_hit);
// Check if there is room for the variable number of registers above
@@ -910,7 +915,7 @@
ExternalReference grow_stack =
ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ or_(eax, eax);
__ j(equal, &exit_with_exception);
@@ -931,11 +936,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code =
- isolate()->factory()->NewCode(code_desc,
- Code::ComputeFlags(Code::REGEXP),
- masm_->CodeObject());
+ masm_->GetCode(masm_->isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
PROFILE(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -1105,7 +1108,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1153,14 +1156,14 @@
void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ jmp(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ j(condition, &backtrack_label_);
return;
}
@@ -1189,7 +1192,7 @@
void RegExpMacroAssemblerIA32::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
// Notice: This updates flags, unlike normal Push.
__ sub(backtrack_stackpointer(), Immediate(kPointerSize));
__ mov(Operand(backtrack_stackpointer(), 0), source);
@@ -1204,7 +1207,7 @@
void RegExpMacroAssemblerIA32::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ mov(target, Operand(backtrack_stackpointer(), 0));
// Notice: This updates flags, unlike normal Pop.
__ add(backtrack_stackpointer(), Immediate(kPointerSize));
@@ -1216,7 +1219,7 @@
Label no_preempt;
ExternalReference stack_limit =
ExternalReference::address_of_stack_limit(isolate());
- __ cmp(esp, Operand::StaticVariable(stack_limit));
+ __ cmp(esp, masm_->StaticVariable(stack_limit));
__ j(above, &no_preempt);
SafeCall(&check_preempt_label_);
@@ -1229,7 +1232,7 @@
Label no_stack_overflow;
ExternalReference stack_limit =
ExternalReference::address_of_regexp_stack_limit(isolate());
- __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit));
+ __ cmp(backtrack_stackpointer(), masm_->StaticVariable(stack_limit));
__ j(above, &no_stack_overflow);
SafeCall(&stack_overflow_label_);
@@ -1246,7 +1249,7 @@
} else if (characters == 2) {
__ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
}
} else {
@@ -1255,7 +1258,7 @@
__ mov(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ movzx_w(current_character(),
Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
}
diff --git a/src/regexp/ia32/regexp-macro-assembler-ia32.h b/src/regexp/ia32/regexp-macro-assembler-ia32.h
index fa17413..02afc99 100644
--- a/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.h
@@ -152,7 +152,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Call and return internally in the generated code in a way that
diff --git a/src/regexp/interpreter-irregexp.cc b/src/regexp/interpreter-irregexp.cc
index 4f8f96a..2c1b890 100644
--- a/src/regexp/interpreter-irregexp.cc
+++ b/src/regexp/interpreter-irregexp.cc
@@ -16,9 +16,9 @@
#include "src/unicode.h"
#include "src/utils.h"
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
#include "unicode/uchar.h"
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
namespace v8 {
namespace internal {
@@ -114,13 +114,13 @@
static int32_t Load32Aligned(const byte* pc) {
- DCHECK((reinterpret_cast<intptr_t>(pc) & 3) == 0);
+ DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
return *reinterpret_cast<const int32_t *>(pc);
}
static int32_t Load16Aligned(const byte* pc) {
- DCHECK((reinterpret_cast<intptr_t>(pc) & 1) == 0);
+ DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
return *reinterpret_cast<const uint16_t *>(pc);
}
@@ -175,7 +175,6 @@
switch (insn & BYTECODE_MASK) {
BYTECODE(BREAK)
UNREACHABLE();
- return RegExpImpl::RE_FAILURE;
BYTECODE(PUSH_CP)
if (--backtrack_stack_space < 0) {
return RegExpImpl::RE_EXCEPTION;
@@ -301,7 +300,7 @@
break;
}
BYTECODE(LOAD_4_CURRENT_CHARS) {
- DCHECK(sizeof(Char) == 1);
+ DCHECK_EQ(1, sizeof(Char));
int pos = current + (insn >> BYTECODE_SHIFT);
if (pos + 4 > subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
@@ -318,7 +317,7 @@
break;
}
BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
- DCHECK(sizeof(Char) == 1);
+ DCHECK_EQ(1, sizeof(Char));
int pos = current + (insn >> BYTECODE_SHIFT);
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
@@ -520,6 +519,7 @@
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
+ V8_FALLTHROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
bool unicode =
(insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
@@ -538,6 +538,7 @@
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
+ V8_FALLTHROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
bool unicode = (insn & BYTECODE_MASK) ==
BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
diff --git a/src/regexp/jsregexp-inl.h b/src/regexp/jsregexp-inl.h
index 4bcda43..756210b 100644
--- a/src/regexp/jsregexp-inl.h
+++ b/src/regexp/jsregexp-inl.h
@@ -7,8 +7,8 @@
#define V8_REGEXP_JSREGEXP_INL_H_
#include "src/allocation.h"
-#include "src/heap/heap.h"
#include "src/objects.h"
+#include "src/objects/js-regexp-inl.h"
#include "src/regexp/jsregexp.h"
namespace v8 {
@@ -31,7 +31,7 @@
// Fail if last batch was not even fully filled.
if (num_matches_ < max_matches_) {
num_matches_ = 0; // Signal failed match.
- return NULL;
+ return nullptr;
}
int32_t* last_match =
@@ -39,11 +39,9 @@
int last_end_index = last_match[1];
if (regexp_->TypeTag() == JSRegExp::ATOM) {
- num_matches_ = RegExpImpl::AtomExecRaw(regexp_,
- subject_,
- last_end_index,
- register_array_,
- register_array_size_);
+ num_matches_ =
+ RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
+ register_array_, register_array_size_);
} else {
int last_start_index = last_match[0];
if (last_start_index == last_end_index) {
@@ -52,16 +50,14 @@
}
if (last_end_index > subject_->length()) {
num_matches_ = 0; // Signal failed match.
- return NULL;
+ return nullptr;
}
- num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_,
- subject_,
- last_end_index,
- register_array_,
- register_array_size_);
+ num_matches_ = RegExpImpl::IrregexpExecRaw(
+ isolate_, regexp_, subject_, last_end_index, register_array_,
+ register_array_size_);
}
- if (num_matches_ <= 0) return NULL;
+ if (num_matches_ <= 0) return nullptr;
current_match_index_ = 0;
return register_array_;
} else {
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 0ed3086..3fdc3d9 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -5,12 +5,14 @@
#include "src/regexp/jsregexp.h"
#include <memory>
+#include <vector>
#include "src/base/platform/platform.h"
+#include "src/code-tracer.h"
#include "src/compilation-cache.h"
#include "src/elements.h"
#include "src/execution.h"
-#include "src/factory.h"
+#include "src/heap/factory.h"
#include "src/isolate-inl.h"
#include "src/messages.h"
#include "src/ostreams.h"
@@ -25,11 +27,12 @@
#include "src/splay-tree-inl.h"
#include "src/string-search.h"
#include "src/unicode-decoder.h"
+#include "src/unicode-inl.h"
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
#include "unicode/uniset.h"
#include "unicode/utypes.h"
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
#ifndef V8_INTERPRETED_REGEXP
#if V8_TARGET_ARCH_IA32
@@ -48,8 +51,6 @@
#include "src/regexp/mips/regexp-macro-assembler-mips.h"
#elif V8_TARGET_ARCH_MIPS64
#include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
-#elif V8_TARGET_ARCH_X87
-#include "src/regexp/x87/regexp-macro-assembler-x87.h"
#else
#error Unsupported target architecture.
#endif
@@ -59,19 +60,19 @@
namespace v8 {
namespace internal {
-MUST_USE_RESULT
+V8_WARN_UNUSED_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(
- Handle<JSRegExp> re, Handle<String> pattern, Handle<String> error_text) {
- Isolate* isolate = re->GetIsolate();
+ Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
+ Handle<String> error_text) {
THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp,
pattern, error_text),
Object);
}
-
-inline void ThrowRegExpException(Handle<JSRegExp> re,
+inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> error_text) {
- USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text));
+ USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
+ error_text));
}
@@ -79,8 +80,8 @@
const int* ranges,
int ranges_length,
Interval new_range) {
- DCHECK((ranges_length & 1) == 1);
- DCHECK(ranges[ranges_length - 1] == String::kMaxCodePoint + 1);
+ DCHECK_EQ(1, ranges_length & 1);
+ DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
if (containment == kLatticeUnknown) return containment;
bool inside = false;
int last = 0;
@@ -98,14 +99,12 @@
return containment;
}
-
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
-
// Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) {
@@ -128,14 +127,13 @@
return true;
}
-
// Generic RegExp methods. Dispatches to implementation specific methods.
-
-MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
+MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
- Isolate* isolate = re->GetIsolate();
+ DCHECK(pattern->IsFlat());
+
Zone zone(isolate->allocator(), ZONE_NAME);
CompilationCache* compilation_cache = isolate->compilation_cache();
MaybeHandle<FixedArray> maybe_cached =
@@ -145,82 +143,77 @@
re->set_data(*cached);
return re;
}
- pattern = String::Flatten(pattern);
+
PostponeInterruptsScope postpone(isolate);
RegExpCompileData parse_result;
FlatStringReader reader(isolate, pattern);
- if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, flags,
+ DCHECK(!isolate->has_pending_exception());
+ if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
&parse_result)) {
// Throw an exception if we fail to parse the pattern.
- return ThrowRegExpException(re, pattern, parse_result.error);
+ return ThrowRegExpException(isolate, re, pattern, parse_result.error);
}
bool has_been_compiled = false;
- if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
- !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) {
+ if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
+ !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
- AtomCompile(re, pattern, flags, pattern);
+ AtomCompile(isolate, re, pattern, flags, pattern);
has_been_compiled = true;
- } else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) &&
- !(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) {
+ } else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
+ parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string,
- isolate->factory()->NewStringFromTwoByte(atom_pattern),
- Object);
- if (!HasFewDifferentCharacters(atom_string)) {
- AtomCompile(re, pattern, flags, atom_string);
+ isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
+ if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
+ AtomCompile(isolate, re, pattern, flags, atom_string);
has_been_compiled = true;
}
}
if (!has_been_compiled) {
- IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
+ IrregexpInitialize(isolate, re, pattern, flags, parse_result.capture_count);
}
DCHECK(re->data()->IsFixedArray());
// Compilation succeeded so the data is set on the regexp
// and we can store it in the cache.
- Handle<FixedArray> data(FixedArray::cast(re->data()));
+ Handle<FixedArray> data(FixedArray::cast(re->data()), isolate);
compilation_cache->PutRegExp(pattern, flags, data);
return re;
}
-MaybeHandle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
+MaybeHandle<Object> RegExpImpl::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
- return AtomExec(regexp, subject, index, last_match_info);
+ return AtomExec(isolate, regexp, subject, index, last_match_info);
case JSRegExp::IRREGEXP: {
- return IrregexpExec(regexp, subject, index, last_match_info);
+ return IrregexpExec(isolate, regexp, subject, index, last_match_info);
}
default:
UNREACHABLE();
- return MaybeHandle<Object>();
}
}
// RegExp Atom implementation: Simple string search using indexOf.
-
-void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
+void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
+ Handle<String> pattern, JSRegExp::Flags flags,
Handle<String> match_pattern) {
- re->GetIsolate()->factory()->SetRegExpAtomData(re,
- JSRegExp::ATOM,
- pattern,
- flags,
- match_pattern);
+ isolate->factory()->SetRegExpAtomData(re, JSRegExp::ATOM, pattern, flags,
+ match_pattern);
}
-static void SetAtomLastCapture(Handle<RegExpMatchInfo> last_match_info,
+static void SetAtomLastCapture(Isolate* isolate,
+ Handle<RegExpMatchInfo> last_match_info,
String* subject, int from, int to) {
- SealHandleScope shs(last_match_info->GetIsolate());
+ SealHandleScope shs(isolate);
last_match_info->SetNumberOfCaptureRegisters(2);
last_match_info->SetLastSubject(subject);
last_match_info->SetLastInput(subject);
@@ -228,18 +221,13 @@
last_match_info->SetCapture(1, to);
}
-
-int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- int32_t* output,
+int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject, int index, int32_t* output,
int output_size) {
- Isolate* isolate = regexp->GetIsolate();
+ DCHECK_LE(0, index);
+ DCHECK_LE(index, subject->length());
- DCHECK(0 <= index);
- DCHECK(index <= subject->length());
-
- subject = String::Flatten(subject);
+ subject = String::Flatten(isolate, subject);
DisallowHeapAllocation no_gc; // ensure vectors stay valid
String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
@@ -280,22 +268,21 @@
return output_size / 2;
}
-Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, Handle<String> subject,
- int index,
+Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
+ Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info) {
- Isolate* isolate = re->GetIsolate();
-
static const int kNumRegisters = 2;
STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
- int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters);
+ int res =
+ AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters);
if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
SealHandleScope shs(isolate);
- SetAtomLastCapture(last_match_info, *subject, output_registers[0],
+ SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0],
output_registers[1]);
return last_match_info;
}
@@ -309,7 +296,7 @@
// from the source pattern.
// If compilation fails, an exception is thrown and this function
// returns false.
-bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte) {
Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
@@ -318,74 +305,54 @@
#else // V8_INTERPRETED_REGEXP (RegExp native code)
if (compiled_code->IsCode()) return true;
#endif
- // We could potentially have marked this as flushable, but have kept
- // a saved version if we did not flush it yet.
- Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
- if (saved_code->IsCode()) {
- // Reinstate the code in the original place.
- re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
- DCHECK(compiled_code->IsSmi());
- return true;
- }
- return CompileIrregexp(re, sample_subject, is_one_byte);
+ return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
}
-
-bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
+bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte) {
// Compile the RegExp.
- Isolate* isolate = re->GetIsolate();
Zone zone(isolate->allocator(), ZONE_NAME);
PostponeInterruptsScope postpone(isolate);
- // If we had a compilation error the last time this is saved at the
- // saved code index.
+#ifdef DEBUG
Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
- // When arriving here entry can only be a smi, either representing an
- // uncompiled regexp, a previous compilation error, or code that has
- // been flushed.
+ // When arriving here entry can only be a smi representing an uncompiled
+ // regexp.
DCHECK(entry->IsSmi());
- int entry_value = Smi::cast(entry)->value();
- DCHECK(entry_value == JSRegExp::kUninitializedValue ||
- entry_value == JSRegExp::kCompilationErrorValue ||
- (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0));
-
- if (entry_value == JSRegExp::kCompilationErrorValue) {
- // A previous compilation failed and threw an error which we store in
- // the saved code index (we store the error message, not the actual
- // error). Recreate the error object and throw it.
- Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
- DCHECK(error_string->IsString());
- Handle<String> error_message(String::cast(error_string));
- ThrowRegExpException(re, error_message);
- return false;
- }
+ int entry_value = Smi::ToInt(entry);
+ DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
+#endif
JSRegExp::Flags flags = re->GetFlags();
- Handle<String> pattern(re->Pattern());
- pattern = String::Flatten(pattern);
+ Handle<String> pattern(re->Pattern(), isolate);
+ pattern = String::Flatten(isolate, pattern);
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
&compile_data)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
- USE(ThrowRegExpException(re, pattern, compile_data.error));
+ USE(ThrowRegExpException(isolate, re, pattern, compile_data.error));
return false;
}
RegExpEngine::CompilationResult result =
RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
- if (result.error_message != NULL) {
+ if (result.error_message != nullptr) {
// Unable to compile regexp.
+ if (FLAG_abort_on_stack_or_string_length_overflow &&
+ strncmp(result.error_message, "Stack overflow", 15) == 0) {
+ FATAL("Aborting on stack overflow");
+ }
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
CStrVector(result.error_message)).ToHandleChecked();
- ThrowRegExpException(re, error_message);
+ ThrowRegExpException(isolate, re, error_message);
return false;
}
- Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
+ Handle<FixedArray> data =
+ Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
data->set(JSRegExp::code_index(is_one_byte), result.code);
SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data);
@@ -417,12 +384,12 @@
}
int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
- return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
+ return Smi::ToInt(re->get(JSRegExp::kIrregexpCaptureCountIndex));
}
int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
- return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
+ return Smi::ToInt(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex));
}
@@ -435,27 +402,21 @@
return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
}
-
-void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
+void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern,
- JSRegExp::Flags flags,
- int capture_count) {
+ JSRegExp::Flags flags, int capture_count) {
// Initialize compiled code entries to null.
- re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
- JSRegExp::IRREGEXP,
- pattern,
- flags,
- capture_count);
+ isolate->factory()->SetRegExpIrregexpData(re, JSRegExp::IRREGEXP, pattern,
+ flags, capture_count);
}
-
-int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
+int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject) {
DCHECK(subject->IsFlat());
// Check representation of the underlying storage.
bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
- if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
+ if (!EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte)) return -1;
#ifdef V8_INTERPRETED_REGEXP
// Byte-code regexp needs space allocated for all its registers.
@@ -471,18 +432,13 @@
#endif // V8_INTERPRETED_REGEXP
}
-
-int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- int32_t* output,
- int output_size) {
- Isolate* isolate = regexp->GetIsolate();
-
+int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject, int index,
+ int32_t* output, int output_size) {
Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
- DCHECK(index >= 0);
- DCHECK(index <= subject->length());
+ DCHECK_LE(0, index);
+ DCHECK_LE(index, subject->length());
DCHECK(subject->IsFlat());
bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
@@ -490,7 +446,7 @@
#ifndef V8_INTERPRETED_REGEXP
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
- EnsureCompiledIrregexp(regexp, subject, is_one_byte);
+ EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
// The stack is used to allocate registers for the compiled regexp code.
// This means that in case of failure, the output registers array is left
@@ -520,11 +476,10 @@
// the, potentially, different subject (the string can switch between
// being internal and external, and even between being Latin1 and UC16,
// but the characters are always the same).
- IrregexpPrepare(regexp, subject);
+ IrregexpPrepare(isolate, regexp, subject);
is_one_byte = subject->IsOneByteRepresentationUnderneath();
} while (true);
UNREACHABLE();
- return RE_EXCEPTION;
#else // V8_INTERPRETED_REGEXP
DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
@@ -560,12 +515,11 @@
}
MaybeHandle<Object> RegExpImpl::IrregexpExec(
- Handle<JSRegExp> regexp, Handle<String> subject, int previous_index,
- Handle<RegExpMatchInfo> last_match_info) {
- Isolate* isolate = regexp->GetIsolate();
+ Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
+ int previous_index, Handle<RegExpMatchInfo> last_match_info) {
DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- subject = String::Flatten(subject);
+ subject = String::Flatten(isolate, subject);
// Prepare space for the return values.
#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG)
@@ -575,29 +529,31 @@
PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
}
#endif
- int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
+ int required_registers =
+ RegExpImpl::IrregexpPrepare(isolate, regexp, subject);
if (required_registers < 0) {
// Compiling failed with an exception.
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
- int32_t* output_registers = NULL;
+ int32_t* output_registers = nullptr;
if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
output_registers = NewArray<int32_t>(required_registers);
}
std::unique_ptr<int32_t[]> auto_release(output_registers);
- if (output_registers == NULL) {
+ if (output_registers == nullptr) {
output_registers = isolate->jsregexp_static_offsets_vector();
}
- int res = RegExpImpl::IrregexpExecRaw(
- regexp, subject, previous_index, output_registers, required_registers);
+ int res =
+ RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
+ output_registers, required_registers);
if (res == RE_SUCCESS) {
int capture_count =
IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
- return SetLastMatchInfo(
- last_match_info, subject, capture_count, output_registers);
+ return SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
+ output_registers);
}
if (res == RE_EXCEPTION) {
DCHECK(isolate->has_pending_exception());
@@ -608,22 +564,21 @@
}
Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
- Handle<RegExpMatchInfo> last_match_info, Handle<String> subject,
- int capture_count, int32_t* match) {
+ Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
+ Handle<String> subject, int capture_count, int32_t* match) {
// This is the only place where match infos can grow. If, after executing the
// regexp, RegExpExecStub finds that the match info is too small, it restarts
// execution in RegExpImpl::Exec, which finally grows the match info right
// here.
int capture_register_count = (capture_count + 1) * 2;
- Handle<RegExpMatchInfo> result =
- RegExpMatchInfo::ReserveCaptures(last_match_info, capture_register_count);
+ Handle<RegExpMatchInfo> result = RegExpMatchInfo::ReserveCaptures(
+ isolate, last_match_info, capture_register_count);
result->SetNumberOfCaptureRegisters(capture_register_count);
if (*result != *last_match_info) {
// The match info has been reallocated, update the corresponding reference
// on the native context.
- Isolate* isolate = last_match_info->GetIsolate();
if (*last_match_info == *isolate->regexp_last_match_info()) {
isolate->native_context()->set_regexp_last_match_info(*result);
} else if (*last_match_info == *isolate->regexp_internal_match_info()) {
@@ -632,7 +587,7 @@
}
DisallowHeapAllocation no_allocation;
- if (match != NULL) {
+ if (match != nullptr) {
for (int i = 0; i < capture_register_count; i += 2) {
result->SetCapture(i, match[i]);
result->SetCapture(i + 1, match[i + 1]);
@@ -643,14 +598,13 @@
return result;
}
-
RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
- Handle<String> subject,
- Isolate* isolate)
- : register_array_(NULL),
- register_array_size_(0),
- regexp_(regexp),
- subject_(subject) {
+ Handle<String> subject, Isolate* isolate)
+ : register_array_(nullptr),
+ register_array_size_(0),
+ regexp_(regexp),
+ subject_(subject),
+ isolate_(isolate) {
#ifdef V8_INTERPRETED_REGEXP
bool interpreted = true;
#else
@@ -663,14 +617,15 @@
// There is no distinction between interpreted and native for atom regexps.
interpreted = false;
} else {
- registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
+ registers_per_match_ =
+ RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
if (registers_per_match_ < 0) {
num_matches_ = -1; // Signal exception.
return;
}
}
- DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal);
+ DCHECK(IsGlobal(regexp->GetFlags()));
if (!interpreted) {
register_array_size_ =
Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
@@ -692,7 +647,7 @@
// to the compiled regexp.
current_match_index_ = max_matches_ - 1;
num_matches_ = max_matches_;
- DCHECK(registers_per_match_ >= 2); // Each match has at least one capture.
+ DCHECK_LE(2, registers_per_match_); // Each match has at least one capture.
DCHECK_GE(register_array_size_, registers_per_match_);
int32_t* last_match =
®ister_array_[current_match_index_ * registers_per_match_];
@@ -701,8 +656,7 @@
}
int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
- if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 &&
- last_index + 1 < subject_->length() &&
+ if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
// Advance over the surrogate pair.
@@ -901,12 +855,11 @@
return 1;
}
UNREACHABLE();
- return 0;
}
DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
- if (table_ == NULL) {
+ if (table_ == nullptr) {
table_ = new(zone()) DispatchTable(zone());
DispatchTableConstructor cons(table_, ignore_case, zone());
cons.BuildTable(this);
@@ -965,7 +918,7 @@
class RegExpCompiler {
public:
RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- JSRegExp::Flags flags, bool is_one_byte);
+ bool is_one_byte);
int AllocateRegister() {
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
@@ -991,15 +944,15 @@
return unicode_lookaround_position_register_;
}
- RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
- RegExpNode* start,
- int capture_count,
+ RegExpEngine::CompilationResult Assemble(Isolate* isolate,
+ RegExpMacroAssembler* assembler,
+ RegExpNode* start, int capture_count,
Handle<String> pattern);
inline void AddWork(RegExpNode* node) {
if (!node->on_work_list() && !node->label()->is_bound()) {
node->set_on_work_list(true);
- work_list_->Add(node);
+ work_list_->push_back(node);
}
}
@@ -1017,8 +970,6 @@
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
- inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
- inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; }
inline bool one_byte() { return one_byte_; }
inline bool optimize() { return optimize_; }
inline void set_optimize(bool value) { optimize_ = value; }
@@ -1045,10 +996,9 @@
int next_register_;
int unicode_lookaround_stack_register_;
int unicode_lookaround_position_register_;
- List<RegExpNode*>* work_list_;
+ std::vector<RegExpNode*>* work_list_;
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
- JSRegExp::Flags flags_;
bool one_byte_;
bool reg_exp_too_big_;
bool limiting_recursion_;
@@ -1080,13 +1030,12 @@
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- JSRegExp::Flags flags, bool one_byte)
+ bool one_byte)
: next_register_(2 * (capture_count + 1)),
unicode_lookaround_stack_register_(kNoRegister),
unicode_lookaround_position_register_(kNoRegister),
- work_list_(NULL),
+ work_list_(nullptr),
recursion_depth_(0),
- flags_(flags),
one_byte_(one_byte),
reg_exp_too_big_(false),
limiting_recursion_(false),
@@ -1097,17 +1046,12 @@
isolate_(isolate),
zone_(zone) {
accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
- DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
+ DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1);
}
-
RegExpEngine::CompilationResult RegExpCompiler::Assemble(
- RegExpMacroAssembler* macro_assembler,
- RegExpNode* start,
- int capture_count,
- Handle<String> pattern) {
- Isolate* isolate = pattern->GetHeap()->isolate();
-
+ Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
+ int capture_count, Handle<String> pattern) {
#ifdef DEBUG
if (FLAG_trace_regexp_assembler)
macro_assembler_ = new RegExpMacroAssemblerTracer(isolate, macro_assembler);
@@ -1115,7 +1059,7 @@
#endif
macro_assembler_ = macro_assembler;
- List <RegExpNode*> work_list(0);
+ std::vector<RegExpNode*> work_list;
work_list_ = &work_list;
Label fail;
macro_assembler_->PushBacktrack(&fail);
@@ -1123,8 +1067,9 @@
start->Emit(this, &new_trace);
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
- while (!work_list.is_empty()) {
- RegExpNode* node = work_list.RemoveLast();
+ while (!work_list.empty()) {
+ RegExpNode* node = work_list.back();
+ work_list.pop_back();
node->set_on_work_list(false);
if (!node->label()->is_bound()) node->Emit(this, &new_trace);
}
@@ -1135,8 +1080,8 @@
Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
- work_list_ = NULL;
-#ifdef ENABLE_DISASSEMBLER
+ work_list_ = nullptr;
+#if defined(ENABLE_DISASSEMBLER) && !defined(V8_INTERPRETED_REGEXP)
if (FLAG_print_code) {
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
OFStream os(trace_scope.file());
@@ -1163,8 +1108,7 @@
bool Trace::mentions_reg(int reg) {
- for (DeferredAction* action = actions_;
- action != NULL;
+ for (DeferredAction* action = actions_; action != nullptr;
action = action->next()) {
if (action->Mentions(reg))
return true;
@@ -1175,8 +1119,7 @@
bool Trace::GetStoredPosition(int reg, int* cp_offset) {
DCHECK_EQ(0, *cp_offset);
- for (DeferredAction* action = actions_;
- action != NULL;
+ for (DeferredAction* action = actions_; action != nullptr;
action = action->next()) {
if (action->Mentions(reg)) {
if (action->action_type() == ActionNode::STORE_POSITION) {
@@ -1194,8 +1137,7 @@
int Trace::FindAffectedRegisters(OutSet* affected_registers,
Zone* zone) {
int max_register = RegExpCompiler::kNoRegister;
- for (DeferredAction* action = actions_;
- action != NULL;
+ for (DeferredAction* action = actions_; action != nullptr;
action = action->next()) {
if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
Interval range = static_cast<DeferredClearCaptures*>(action)->range();
@@ -1259,8 +1201,7 @@
int store_position = kNoStore;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
- for (DeferredAction* action = actions_;
- action != NULL;
+ for (DeferredAction* action = actions_; action != nullptr;
action = action->next()) {
if (action->Mentions(reg)) {
switch (action->action_type()) {
@@ -1273,7 +1214,7 @@
}
// SET_REGISTER is currently only used for newly introduced loop
// counters. They can have a significant previous value if they
- // occour in a loop. TODO(lrn): Propagate this information, so
+ // occur in a loop. TODO(lrn): Propagate this information, so
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = RESTORE;
@@ -1368,7 +1309,7 @@
DCHECK(!is_trivial());
- if (actions_ == NULL && backtrack() == NULL) {
+ if (actions_ == nullptr && backtrack() == nullptr) {
// Here we just have some deferred cp advances to fix and we are back to
// a normal situation. We may also have to forget some information gained
// through a quick check that was already performed.
@@ -1382,7 +1323,7 @@
// Generate deferred actions here along with code to undo them again.
OutSet affected_registers;
- if (backtrack() != NULL) {
+ if (backtrack() != nullptr) {
// Here we have a concrete backtrack location. These are set up by choice
// nodes and so they indicate that we have a deferred save of the current
// position which we may need to emit here.
@@ -1420,7 +1361,7 @@
max_register,
registers_to_pop,
registers_to_clear);
- if (backtrack() == NULL) {
+ if (backtrack() == nullptr) {
assembler->Backtrack();
} else {
assembler->PopCurrentPosition();
@@ -1481,8 +1422,7 @@
void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
- if (guards_ == NULL)
- guards_ = new(zone) ZoneList<Guard*>(1, zone);
+ if (guards_ == nullptr) guards_ = new (zone) ZoneList<Guard*>(1, zone);
guards_->Add(guard, zone);
}
@@ -1753,7 +1693,7 @@
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
}
Label ok;
- DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
+ DCHECK_EQ(4, unibrow::Ecma262UnCanonicalize::kMaxWidth);
switch (length) {
case 2: {
if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
@@ -1767,7 +1707,7 @@
}
case 4:
macro_assembler->CheckCharacter(chars[3], &ok);
- // Fall through!
+ V8_FALLTHROUGH;
case 3:
macro_assembler->CheckCharacter(chars[0], &ok);
macro_assembler->CheckCharacter(chars[1], &ok);
@@ -1974,13 +1914,12 @@
}
}
-
// Gets a series of segment boundaries representing a character class. If the
// character is in the range between an even and an odd boundary (counting from
// start_index) then go to even_label, otherwise go to odd_label. We already
// know that the character is in the range of min_char to max_char inclusive.
-// Either label can be NULL indicating backtracking. Either label can also be
-// equal to the fall_through label.
+// Either label can be nullptr indicating backtracking. Either label can also
+// be equal to the fall_through label.
static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
int start_index, int end_index, uc32 min_char,
uc32 max_char, Label* fall_through,
@@ -2237,7 +2176,7 @@
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
Trace* trace) {
// If we are generating a greedy loop then don't stop and don't reuse code.
- if (trace->stop_node() != NULL) {
+ if (trace->stop_node() != nullptr) {
return CONTINUE;
}
@@ -2295,9 +2234,7 @@
void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
- if (action_type_ == BEGIN_SUBMATCH) {
- bm->SetRest(offset);
- } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
+ if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
@@ -2407,10 +2344,7 @@
int ChoiceNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
- return EatsAtLeastHelper(still_to_find,
- budget,
- NULL,
- not_at_start);
+ return EatsAtLeastHelper(still_to_find, budget, nullptr, not_at_start);
}
@@ -2497,8 +2431,8 @@
} else {
// For 2-character preloads in one-byte mode or 1-character preloads in
// two-byte mode we also use a 16 bit load with zero extend.
- static const uint32_t kTwoByteMask = 0xffff;
- static const uint32_t kFourByteMask = 0xffffffff;
+ static const uint32_t kTwoByteMask = 0xFFFF;
+ static const uint32_t kFourByteMask = 0xFFFFFFFF;
if (details->characters() == 2 && compiler->one_byte()) {
if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
} else if (details->characters() == 1 && !compiler->one_byte()) {
@@ -2557,7 +2491,7 @@
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
uc16 c = quarks[i];
- if (compiler->ignore_case()) {
+ if (elm.atom()->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = GetCaseIndependentLetters(isolate, c,
compiler->one_byte(), chars);
@@ -2618,6 +2552,7 @@
details->positions(characters_filled_in);
RegExpCharacterClass* tree = elm.char_class();
ZoneList<CharacterRange>* ranges = tree->ranges(zone());
+ DCHECK(!ranges->is_empty());
if (tree->is_negated()) {
// A quick check uses multi-character mask and compare. There is no
// useful way to incorporate a negative char class into this scheme
@@ -2765,29 +2700,26 @@
NodeInfo* info_;
};
-
-RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
- return FilterSuccessor(depth - 1, ignore_case);
+ return FilterSuccessor(depth - 1);
}
-
-RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
- RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
- if (next == NULL) return set_replacement(NULL);
+RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1);
+ if (next == nullptr) return set_replacement(nullptr);
on_success_ = next;
return set_replacement(this);
}
-
-// We need to check for the following characters: 0x39c 0x3bc 0x178.
+// We need to check for the following characters: 0x39C 0x3BC 0x178.
static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
// TODO(dcarney): this could be a lot more efficient.
- return range.Contains(0x39c) ||
- range.Contains(0x3bc) || range.Contains(0x178);
+ return range.Contains(0x039C) || range.Contains(0x03BC) ||
+ range.Contains(0x0178);
}
@@ -2799,8 +2731,7 @@
return false;
}
-
-RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* TextNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@@ -2812,16 +2743,13 @@
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
uint16_t c = quarks[j];
- if (c <= String::kMaxOneByteCharCode) continue;
- if (!ignore_case) return set_replacement(NULL);
- // Here, we need to check for characters whose upper and lower cases
- // are outside the Latin-1 range.
- uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
- // Character is outside Latin-1 completely
- if (converted == 0) return set_replacement(NULL);
- // Convert quark to Latin-1 in place.
- uint16_t* copy = const_cast<uint16_t*>(quarks.start());
- copy[j] = converted;
+ if (elm.atom()->ignore_case()) {
+ c = unibrow::Latin1::TryConvertToLatin1(c);
+ }
+ if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
+ // Replace quark in case we converted to Latin-1.
+ uint16_t* writable_quarks = const_cast<uint16_t*>(quarks.start());
+ writable_quarks[j] = c;
}
} else {
DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
@@ -2835,42 +2763,41 @@
ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
- return set_replacement(NULL);
+ if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
+ continue;
+ return set_replacement(nullptr);
}
} else {
if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
- return set_replacement(NULL);
+ if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
+ continue;
+ return set_replacement(nullptr);
}
}
}
}
- return FilterSuccessor(depth - 1, ignore_case);
+ return FilterSuccessor(depth - 1);
}
-
-RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
{
VisitMarker marker(info());
- RegExpNode* continue_replacement =
- continue_node_->FilterOneByte(depth - 1, ignore_case);
+ RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
// If we can't continue after the loop then there is no sense in doing the
// loop.
- if (continue_replacement == NULL) return set_replacement(NULL);
+ if (continue_replacement == nullptr) return set_replacement(nullptr);
}
- return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
+ return ChoiceNode::FilterOneByte(depth - 1);
}
-
-RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2879,20 +2806,20 @@
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
- if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
+ if (alternative.guards() != nullptr &&
+ alternative.guards()->length() != 0) {
set_replacement(this);
return this;
}
}
int surviving = 0;
- RegExpNode* survivor = NULL;
+ RegExpNode* survivor = nullptr;
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
- RegExpNode* replacement =
- alternative.node()->FilterOneByte(depth - 1, ignore_case);
+ RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
- if (replacement != NULL) {
+ if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
surviving++;
survivor = replacement;
@@ -2910,8 +2837,8 @@
new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
- if (replacement != NULL) {
+ alternatives_->at(i).node()->FilterOneByte(depth - 1);
+ if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
}
@@ -2920,9 +2847,7 @@
return this;
}
-
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
- bool ignore_case) {
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2930,15 +2855,15 @@
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
- RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
- if (replacement == NULL) return set_replacement(NULL);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1);
+ if (replacement == nullptr) return set_replacement(nullptr);
alternatives_->at(1).set_node(replacement);
RegExpNode* neg_node = alternatives_->at(0).node();
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
// If the negative lookahead is always going to fail then
// we don't need to check it.
- if (neg_replacement == NULL) return set_replacement(replacement);
+ if (neg_replacement == nullptr) return set_replacement(replacement);
alternatives_->at(0).set_node(neg_replacement);
return set_replacement(this);
}
@@ -2975,7 +2900,7 @@
bool not_at_start) {
not_at_start = (not_at_start || not_at_start_);
int choice_count = alternatives_->length();
- DCHECK(choice_count > 0);
+ DCHECK_LT(0, choice_count);
alternatives_->at(0).node()->GetQuickCheckDetails(details,
compiler,
characters_filled_in,
@@ -3043,7 +2968,7 @@
new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
if (!compiler->one_byte()) {
- assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
+ assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
}
assembler->CheckCharacter('\n', &ok);
assembler->CheckNotCharacter('\r', new_trace.backtrack());
@@ -3060,7 +2985,7 @@
Trace::TriBool next_is_word_character = Trace::UNKNOWN;
bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
- if (lookahead == NULL) {
+ if (lookahead == nullptr) {
int eats_at_least =
Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
@@ -3191,7 +3116,7 @@
static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
- if (quick_check == NULL) return false;
+ if (quick_check == nullptr) return false;
if (offset >= quick_check->characters()) return false;
return quick_check->positions(offset)->determines_perfectly;
}
@@ -3250,15 +3175,23 @@
TextElement elm = elements()->at(i);
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
+ if (SkipPass(pass, elm.atom()->ignore_case())) continue;
Vector<const uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
- EmitCharacterFunction* emit_function = NULL;
+ EmitCharacterFunction* emit_function = nullptr;
+ uc16 quark = quarks[j];
+ if (elm.atom()->ignore_case()) {
+ // Everywhere else we assume that a non-Latin-1 character cannot match
+ // a Latin-1 character. Avoid the cases where this is assumption is
+ // invalid by using the Latin1 equivalent instead.
+ quark = unibrow::Latin1::TryConvertToLatin1(quark);
+ }
switch (pass) {
case NON_LATIN1_MATCH:
DCHECK(one_byte);
- if (quarks[j] > String::kMaxOneByteCharCode) {
+ if (quark > String::kMaxOneByteCharCode) {
assembler->GoTo(backtrack);
return;
}
@@ -3275,11 +3208,11 @@
default:
break;
}
- if (emit_function != NULL) {
+ if (emit_function != nullptr) {
bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
bool bound_checked =
- emit_function(isolate, compiler, quarks[j], backtrack,
- cp_offset + j, bounds_check, preloaded);
+ emit_function(isolate, compiler, quark, backtrack, cp_offset + j,
+ bounds_check, preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
}
@@ -3301,13 +3234,11 @@
int TextNode::Length() {
TextElement elm = elements()->last();
- DCHECK(elm.cp_offset() >= 0);
+ DCHECK_LE(0, elm.cp_offset());
return elm.cp_offset() + elm.length();
}
-
-bool TextNode::SkipPass(int int_pass, bool ignore_case) {
- TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
+bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
if (ignore_case) {
return pass == SIMPLE_CHARACTER_MATCH;
} else {
@@ -3315,32 +3246,32 @@
}
}
-
TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
- RegExpNode* on_success) {
+ RegExpNode* on_success,
+ JSRegExp::Flags flags) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
- elms->Add(
- TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
- zone);
+ elms->Add(TextElement::CharClass(
+ new (zone) RegExpCharacterClass(zone, ranges, flags)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
-
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
- RegExpNode* on_success) {
+ RegExpNode* on_success,
+ JSRegExp::Flags flags) {
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(lead_ranges, false)),
+ new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
zone);
elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(trail_ranges, false)),
+ new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -3375,27 +3306,15 @@
// check that now.
if (trace->characters_preloaded() == 1) {
for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
- if (!SkipPass(pass, compiler->ignore_case())) {
- TextEmitPass(compiler,
- static_cast<TextEmitPassType>(pass),
- true,
- trace,
- false,
- &bound_checked_to);
- }
+ TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
+ false, &bound_checked_to);
}
first_elt_done = true;
}
for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
- if (!SkipPass(pass, compiler->ignore_case())) {
- TextEmitPass(compiler,
- static_cast<TextEmitPassType>(pass),
- false,
- trace,
- first_elt_done,
- &bound_checked_to);
- }
+ TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
+ first_elt_done, &bound_checked_to);
}
Trace successor_trace(*trace);
@@ -3438,11 +3357,20 @@
TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
- // None of the standard character classes is different in the case
- // independent case and it slows us down if we don't know that.
- if (cc->is_standard(zone())) continue;
- ZoneList<CharacterRange>* ranges = cc->ranges(zone());
- CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
+#ifdef V8_INTL_SUPPORT
+ bool case_equivalents_already_added =
+ NeedsUnicodeCaseEquivalents(cc->flags());
+#else
+ bool case_equivalents_already_added = false;
+#endif
+ if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
+ // None of the standard character classes is different in the case
+ // independent case and it slows us down if we don't know that.
+ if (cc->is_standard(zone())) continue;
+ ZoneList<CharacterRange>* ranges = cc->ranges(zone());
+ CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
+ is_one_byte);
+ }
}
}
}
@@ -3453,24 +3381,24 @@
RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) {
- if (read_backward()) return NULL;
- if (elements()->length() != 1) return NULL;
+ if (read_backward()) return nullptr;
+ if (elements()->length() != 1) return nullptr;
TextElement elm = elements()->at(0);
- if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
+ if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr;
RegExpCharacterClass* node = elm.char_class();
ZoneList<CharacterRange>* ranges = node->ranges(zone());
CharacterRange::Canonicalize(ranges);
if (node->is_negated()) {
- return ranges->length() == 0 ? on_success() : NULL;
+ return ranges->length() == 0 ? on_success() : nullptr;
}
- if (ranges->length() != 1) return NULL;
+ if (ranges->length() != 1) return nullptr;
uint32_t max_char;
if (compiler->one_byte()) {
max_char = String::kMaxOneByteCharCode;
} else {
max_char = String::kMaxUtf16CodeUnit;
}
- return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;
+ return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
}
@@ -3521,7 +3449,7 @@
// Back edge of greedy optimized loop node graph.
int text_length =
GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
- DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
+ DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
// Update the counter-based backtracking info on the stack. This is an
// optimization for greedy loops (see below).
DCHECK(trace->cp_offset() == text_length);
@@ -3541,10 +3469,10 @@
int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
int eats_at_least) {
int preload_characters = Min(4, eats_at_least);
+ DCHECK_LE(preload_characters, 4);
if (compiler->macro_assembler()->CanReadUnaligned()) {
bool one_byte = compiler->one_byte();
if (one_byte) {
- if (preload_characters > 4) preload_characters = 4;
// We can't preload 3 characters because there is no machine instruction
// to do that. We can't just load 4 because we could be reading
// beyond the end of the string, which could cause a memory fault.
@@ -3591,7 +3519,7 @@
~AlternativeGenerationList() {
for (int i = kAFew; i < alt_gens_.length(); i++) {
delete alt_gens_[i];
- alt_gens_[i] = NULL;
+ alt_gens_[i] = nullptr;
}
}
@@ -3612,8 +3540,8 @@
// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
static const int kSpaceRanges[] = {
- '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681,
- 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
+ '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680,
+ 0x1681, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};
static const int kSpaceRangeCount = arraysize(kSpaceRanges);
@@ -3842,7 +3770,7 @@
Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED);
int skip_distance = GetSkipTable(
min_lookahead, max_lookahead, boolean_skip_table);
- DCHECK(skip_distance != 0);
+ DCHECK_NE(0, skip_distance);
Label cont, again;
masm->Bind(&again);
@@ -3940,7 +3868,7 @@
for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == NULL) ? 0 : guards->length();
+ int guard_count = (guards == nullptr) ? 0 : guards->length();
for (int j = 0; j < guard_count; j++) {
DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
}
@@ -3970,7 +3898,7 @@
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
int choice_count = alternatives_->length();
- if (choice_count == 1 && alternatives_->at(0).guards() == NULL) {
+ if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
alternatives_->at(0).node()->Emit(compiler, trace);
return;
}
@@ -3983,7 +3911,7 @@
// For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
// other choice nodes we only flush if we are out of code size budget.
- if (trace->flush_budget() == 0 && trace->actions() != NULL) {
+ if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
trace->Flush(compiler, this);
return;
}
@@ -4029,7 +3957,7 @@
// If there are actions to be flushed we have to limit how many times
// they are flushed. Take the budget of the parent trace and distribute
// it fairly amongst the children.
- if (new_trace.actions() != NULL) {
+ if (new_trace.actions() != nullptr) {
new_trace.set_flush_budget(new_flush_budget);
}
bool next_expects_preload =
@@ -4058,7 +3986,7 @@
// and check it against the pushed value. This avoids pushing backtrack
// information for each iteration of the loop, which could take up a lot of
// space.
- DCHECK(trace->stop_node() == NULL);
+ DCHECK(trace->stop_node() == nullptr);
macro_assembler->PushCurrentPosition();
Label greedy_match_failed;
Trace greedy_match_trace;
@@ -4097,7 +4025,7 @@
if (alternatives_->length() != 2) return eats_at_least;
GuardedAlternative alt1 = alternatives_->at(1);
- if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
+ if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
return eats_at_least;
}
RegExpNode* eats_anything_node = alt1.node();
@@ -4123,7 +4051,7 @@
// not be atoms, they can be any reasonably limited character class or
// small alternation.
BoyerMooreLookahead* bm = bm_info(false);
- if (bm == NULL) {
+ if (bm == nullptr) {
eats_at_least = Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
@@ -4136,7 +4064,7 @@
alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
}
}
- if (bm != NULL) {
+ if (bm != nullptr) {
bm->EmitSkipInstructions(macro_assembler);
}
return eats_at_least;
@@ -4164,7 +4092,7 @@
AlternativeGeneration* alt_gen = alt_gens->at(i);
alt_gen->quick_check_details.set_characters(preload->preload_characters_);
ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == NULL) ? 0 : guards->length();
+ int guard_count = (guards == nullptr) ? 0 : guards->length();
Trace new_trace(*trace);
new_trace.set_characters_preloaded(preload->preload_is_current_ ?
preload->preload_characters_ :
@@ -4215,7 +4143,7 @@
generate_full_check_inline = true;
}
if (generate_full_check_inline) {
- if (new_trace.actions() != NULL) {
+ if (new_trace.actions() != nullptr) {
new_trace.set_flush_budget(new_flush_budget);
}
for (int j = 0; j < guard_count; j++) {
@@ -4244,7 +4172,7 @@
out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == NULL) ? 0 : guards->length();
+ int guard_count = (guards == nullptr) ? 0 : guards->length();
if (next_expects_preload) {
Label reload_current_char;
out_of_line_trace.set_backtrack(&reload_current_char);
@@ -4256,9 +4184,7 @@
// Reload the current character, since the next quick check expects that.
// We don't need to check bounds here because we only get into this
// code through a quick check which already did the checked load.
- macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
- NULL,
- false,
+ macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
preload_characters);
macro_assembler->GoTo(&(alt_gen->after));
} else {
@@ -4383,7 +4309,7 @@
int clear_registers_to = clear_registers_from + clear_register_count - 1;
assembler->ClearRegisters(clear_registers_from, clear_registers_to);
- DCHECK(trace->backtrack() == NULL);
+ DCHECK(trace->backtrack() == nullptr);
assembler->Backtrack();
return;
}
@@ -4407,9 +4333,9 @@
RecursionCheck rc(compiler);
DCHECK_EQ(start_reg_ + 1, end_reg_);
- if (compiler->ignore_case()) {
+ if (IgnoreCase(flags_)) {
assembler->CheckNotBackReferenceIgnoreCase(
- start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
+ start_reg_, read_backward(), IsUnicode(flags_), trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, read_backward(),
trace->backtrack());
@@ -4418,7 +4344,7 @@
if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
// Check that the back reference does not end inside a surrogate pair.
- if (compiler->unicode() && !compiler->one_byte()) {
+ if (IsUnicode(flags_) && !compiler->one_byte()) {
assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
}
on_success()->Emit(compiler, trace);
@@ -4764,7 +4690,7 @@
void RegExpEngine::DotPrint(const char* label,
RegExpNode* node,
bool ignore_case) {
- OFStream os(stdout);
+ StdoutStream os;
DotPrinter printer(os, ignore_case);
printer.PrintNode(label, node);
}
@@ -4797,10 +4723,10 @@
const int* special_class,
int length) {
length--; // Remove final marker.
- DCHECK(special_class[length] == kRangeEndMarker);
- DCHECK(ranges->length() != 0);
- DCHECK(length != 0);
- DCHECK(special_class[0] != 0);
+ DCHECK_EQ(kRangeEndMarker, special_class[length]);
+ DCHECK_NE(0, ranges->length());
+ DCHECK_NE(0, length);
+ DCHECK_NE(0, special_class[0]);
if (ranges->length() != (length >> 1) + 1) {
return false;
}
@@ -4828,7 +4754,7 @@
const int* special_class,
int length) {
length--; // Remove final marker.
- DCHECK(special_class[length] == kRangeEndMarker);
+ DCHECK_EQ(kRangeEndMarker, special_class[length]);
if (ranges->length() * 2 != length) {
return false;
}
@@ -4846,7 +4772,7 @@
bool RegExpCharacterClass::is_standard(Zone* zone) {
// TODO(lrn): Remove need for this function, by not throwing away information
// along the way.
- if (is_negated_) {
+ if (is_negated()) {
return false;
}
if (set_.is_standard()) {
@@ -4925,7 +4851,7 @@
void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
OutSet* outset = entry.out_set();
if (!outset->Get(kBase)) return;
- ZoneList<CharacterRange>** target = NULL;
+ ZoneList<CharacterRange>** target = nullptr;
if (outset->Get(kBmpCodePoints)) {
target = &bmp_;
} else if (outset->Get(kLeadSurrogates)) {
@@ -4936,28 +4862,29 @@
DCHECK(outset->Get(kNonBmpCodePoints));
target = &non_bmp_;
}
- if (*target == NULL) *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
+ if (*target == nullptr)
+ *target = new (zone_) ZoneList<CharacterRange>(2, zone_);
(*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
}
-
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* bmp = splitter->bmp();
if (bmp == nullptr) return;
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
- compiler->zone(), bmp, compiler->read_backward(), on_success)));
+ compiler->zone(), bmp, compiler->read_backward(), on_success,
+ default_flags)));
}
-
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
if (non_bmp == nullptr) return;
- DCHECK(compiler->unicode());
DCHECK(!compiler->one_byte());
Zone* zone = compiler->zone();
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
CharacterRange::Canonicalize(non_bmp);
for (int i = 0; i < non_bmp->length(); i++) {
// Match surrogate pair.
@@ -4977,7 +4904,7 @@
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
- on_success)));
+ on_success, default_flags)));
} else {
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff]
@@ -4985,7 +4912,7 @@
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, kTrailSurrogateEnd),
- compiler->read_backward(), on_success)));
+ compiler->read_backward(), on_success, default_flags)));
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
@@ -4994,7 +4921,7 @@
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(to_l),
CharacterRange::Range(kTrailSurrogateStart, to_t),
- compiler->read_backward(), on_success)));
+ compiler->read_backward(), on_success, default_flags)));
to_l--;
}
if (from_l <= to_l) {
@@ -5003,49 +4930,47 @@
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Range(from_l, to_l),
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
- compiler->read_backward(), on_success)));
+ compiler->read_backward(), on_success, default_flags)));
}
}
}
}
-
RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
- ZoneList<CharacterRange>* match, RegExpNode* on_success,
- bool read_backward) {
+ ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
+ JSRegExp::Flags flags) {
Zone* zone = compiler->zone();
RegExpNode* match_node = TextNode::CreateForCharacterRanges(
- zone, match, read_backward, on_success);
+ zone, match, read_backward, on_success, flags);
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, match_node, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookbehind, !read_backward, lookaround.on_match_success());
+ zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
return lookaround.ForMatch(negative_match);
}
-
RegExpNode* MatchAndNegativeLookaroundInReadDirection(
RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
- bool read_backward) {
+ bool read_backward, JSRegExp::Flags flags) {
Zone* zone = compiler->zone();
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, on_success, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookahead, read_backward, lookaround.on_match_success());
+ zone, lookahead, read_backward, lookaround.on_match_success(), flags);
return TextNode::CreateForCharacterRanges(
- zone, match, read_backward, lookaround.ForMatch(negative_match));
+ zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
}
-
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
if (lead_surrogates == nullptr) return;
Zone* zone = compiler->zone();
@@ -5058,20 +4983,22 @@
// Reading backward. Assert that reading forward, there is no trail
// surrogate, and then backward match the lead surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, trail_surrogates, lead_surrogates, on_success, true);
+ compiler, trail_surrogates, lead_surrogates, on_success, true,
+ default_flags);
} else {
// Reading forward. Forward match the lead surrogate and assert that
// no trail surrogate follows.
match = MatchAndNegativeLookaroundInReadDirection(
- compiler, lead_surrogates, trail_surrogates, on_success, false);
+ compiler, lead_surrogates, trail_surrogates, on_success, false,
+ default_flags);
}
result->AddAlternative(GuardedAlternative(match));
}
-
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
if (trail_surrogates == nullptr) return;
Zone* zone = compiler->zone();
@@ -5084,12 +5011,14 @@
// Reading backward. Backward match the trail surrogate and assert that no
// lead surrogate precedes it.
match = MatchAndNegativeLookaroundInReadDirection(
- compiler, trail_surrogates, lead_surrogates, on_success, true);
+ compiler, trail_surrogates, lead_surrogates, on_success, true,
+ default_flags);
} else {
// Reading forward. Assert that reading backward, there is no lead
// surrogate, and then forward match the trail surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, lead_surrogates, trail_surrogates, on_success, false);
+ compiler, lead_surrogates, trail_surrogates, on_success, false,
+ default_flags);
}
result->AddAlternative(GuardedAlternative(match));
}
@@ -5105,16 +5034,24 @@
// the associated trail surrogate.
ZoneList<CharacterRange>* range = CharacterRange::List(
zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
- return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
+ return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
+ default_flags);
}
+void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
+#ifdef V8_INTL_SUPPORT
+ DCHECK(CharacterRange::IsCanonical(ranges));
-void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
- ZoneList<CharacterRange>* ranges) {
-#ifdef V8_I18N_SUPPORT
+ // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
+ // See also https://crbug.com/v8/6727.
+ // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
+ // which we use frequently internally. But large ranges can also easily be
+ // created by the user. We might want to have a more general caching mechanism
+ // for such ranges.
+ if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
+
// Use ICU to compute the case fold closure over the ranges.
- DCHECK(compiler->unicode());
- DCHECK(compiler->ignore_case());
icu::UnicodeSet set;
for (int i = 0; i < ranges->length(); i++) {
set.add(ranges->at(i).from(), ranges->at(i).to());
@@ -5125,18 +5062,13 @@
// Those are represented as strings in the set. Remove them so that
// we end up with only simple and common case mappings.
set.removeAllStrings();
- Zone* zone = compiler->zone();
for (int i = 0; i < set.getRangeCount(); i++) {
ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
zone);
}
// No errors and everything we collected have been ranges.
-#else
- // Fallback if ICU is not included.
- CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
- ranges, compiler->one_byte());
-#endif // V8_I18N_SUPPORT
CharacterRange::Canonicalize(ranges);
+#endif // V8_INTL_SUPPORT
}
@@ -5145,10 +5077,11 @@
set_.Canonicalize();
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
- if (compiler->unicode() && compiler->ignore_case()) {
- AddUnicodeCaseEquivalents(compiler, ranges);
+ if (NeedsUnicodeCaseEquivalents(flags_)) {
+ AddUnicodeCaseEquivalents(ranges, zone);
}
- if (compiler->unicode() && !compiler->one_byte()) {
+ if (IsUnicode(flags_) && !compiler->one_byte() &&
+ !contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
new (zone) ZoneList<CharacterRange>(2, zone);
@@ -5156,9 +5089,9 @@
ranges = negated;
}
if (ranges->length() == 0) {
- ranges->Add(CharacterRange::Everything(), zone);
+ JSRegExp::Flags default_flags;
RegExpCharacterClass* fail =
- new (zone) RegExpCharacterClass(ranges, true);
+ new (zone) RegExpCharacterClass(zone, ranges, default_flags);
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@@ -5233,10 +5166,12 @@
// i is length or it is the index of an atom.
if (i == length) break;
int first_atom = i;
+ JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
i++;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
+ if (alternative->AsAtom()->flags() != flags) break;
i++;
}
// Sort atoms to get ones with common prefixes together.
@@ -5248,7 +5183,7 @@
DCHECK_LT(first_atom, alternatives->length());
DCHECK_LE(i, alternatives->length());
DCHECK_LE(first_atom, i);
- if (compiler->ignore_case()) {
+ if (IgnoreCase(flags)) {
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
compiler->isolate()->regexp_macro_assembler_canonicalize();
auto compare_closure =
@@ -5280,7 +5215,8 @@
i++;
continue;
}
- RegExpAtom* atom = alternative->AsAtom();
+ RegExpAtom* const atom = alternative->AsAtom();
+ JSRegExp::Flags flags = atom->flags();
unibrow::uchar common_prefix = atom->data().at(0);
int first_with_prefix = i;
int prefix_length = atom->length();
@@ -5288,10 +5224,11 @@
while (i < length) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
- atom = alternative->AsAtom();
+ RegExpAtom* const atom = alternative->AsAtom();
+ if (atom->flags() != flags) break;
unibrow::uchar new_prefix = atom->data().at(0);
if (new_prefix != common_prefix) {
- if (!compiler->ignore_case()) break;
+ if (!IgnoreCase(flags)) break;
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
compiler->isolate()->regexp_macro_assembler_canonicalize();
new_prefix = Canonical(canonicalize, new_prefix);
@@ -5308,7 +5245,7 @@
// common prefix if the terms were similar or presorted in the input.
// Find out how long the common prefix is.
int run_length = i - first_with_prefix;
- atom = alternatives->at(first_with_prefix)->AsAtom();
+ RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
for (int j = 1; j < run_length && prefix_length > 1; j++) {
RegExpAtom* old_atom =
alternatives->at(j + first_with_prefix)->AsAtom();
@@ -5319,8 +5256,8 @@
}
}
}
- RegExpAtom* prefix =
- new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));
+ RegExpAtom* prefix = new (zone)
+ RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
pair->Add(prefix, zone);
ZoneList<RegExpTree*>* suffixes =
@@ -5333,7 +5270,8 @@
suffixes->Add(new (zone) RegExpEmpty(), zone);
} else {
RegExpTree* suffix = new (zone) RegExpAtom(
- old_atom->data().SubVector(prefix_length, old_atom->length()));
+ old_atom->data().SubVector(prefix_length, old_atom->length()),
+ flags);
suffixes->Add(suffix, zone);
}
}
@@ -5366,19 +5304,31 @@
i++;
continue;
}
- RegExpAtom* atom = alternative->AsAtom();
+ RegExpAtom* const atom = alternative->AsAtom();
if (atom->length() != 1) {
alternatives->at(write_posn++) = alternatives->at(i);
i++;
continue;
}
+ JSRegExp::Flags flags = atom->flags();
+ DCHECK_IMPLIES(IsUnicode(flags),
+ !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ bool contains_trail_surrogate =
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
int first_in_run = i;
i++;
+ // Find a run of single-character atom alternatives that have identical
+ // flags (case independence and unicode-ness).
while (i < length) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
- atom = alternative->AsAtom();
+ RegExpAtom* const atom = alternative->AsAtom();
if (atom->length() != 1) break;
+ if (atom->flags() != flags) break;
+ DCHECK_IMPLIES(IsUnicode(flags),
+ !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ contains_trail_surrogate |=
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
i++;
}
if (i > first_in_run + 1) {
@@ -5391,8 +5341,12 @@
DCHECK_EQ(old_atom->length(), 1);
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
}
- alternatives->at(write_posn++) =
- new (zone) RegExpCharacterClass(ranges, false);
+ RegExpCharacterClass::CharacterClassFlags character_class_flags;
+ if (IsUnicode(flags) && contains_trail_surrogate) {
+ character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
+ }
+ alternatives->at(write_posn++) = new (zone)
+ RegExpCharacterClass(zone, ranges, flags, character_class_flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
@@ -5450,7 +5404,7 @@
: compiler_(compiler),
saved_expansion_factor_(compiler->current_expansion_factor()),
ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
- DCHECK(factor > 0);
+ DCHECK_LT(0, factor);
if (ok_to_expand_) {
if (factor > kMaxExpansionFactor) {
// Avoid integer overflow of the current expansion factor.
@@ -5539,7 +5493,7 @@
}
}
if (max <= kMaxUnrolledMaxMatches && min == 0) {
- DCHECK(max > 0); // Due to the 'if' above.
+ DCHECK_LT(0, max); // Due to the 'if' above.
RegExpExpansionLimiter limiter(compiler, max);
if (limiter.ok_to_expand()) {
// Unroll the optional matches up to max.
@@ -5619,6 +5573,43 @@
}
}
+namespace {
+// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
+// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
+RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpAssertion::AssertionType type,
+ JSRegExp::Flags flags) {
+ DCHECK(NeedsUnicodeCaseEquivalents(flags));
+ Zone* zone = compiler->zone();
+ ZoneList<CharacterRange>* word_range =
+ new (zone) ZoneList<CharacterRange>(2, zone);
+ CharacterRange::AddClassEscape('w', word_range, true, zone);
+ int stack_register = compiler->UnicodeLookaroundStackRegister();
+ int position_register = compiler->UnicodeLookaroundPositionRegister();
+ ChoiceNode* result = new (zone) ChoiceNode(2, zone);
+ // Add two choices. The (non-)boundary could start with a word or
+ // a non-word-character.
+ for (int i = 0; i < 2; i++) {
+ bool lookbehind_for_word = i == 0;
+ bool lookahead_for_word =
+ (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
+ // Look to the left.
+ RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
+ stack_register, position_register);
+ RegExpNode* backward = TextNode::CreateForCharacterRanges(
+ zone, word_range, true, lookbehind.on_match_success(), flags);
+ // Look to the right.
+ RegExpLookaround::Builder lookahead(lookahead_for_word,
+ lookbehind.ForMatch(backward),
+ stack_register, position_register);
+ RegExpNode* forward = TextNode::CreateForCharacterRanges(
+ zone, word_range, false, lookahead.on_match_success(), flags);
+ result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
+ }
+ return result;
+}
+} // anonymous namespace
RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
@@ -5631,9 +5622,15 @@
case START_OF_INPUT:
return AssertionNode::AtStart(on_success);
case BOUNDARY:
- return AssertionNode::AtBoundary(on_success);
+ return NeedsUnicodeCaseEquivalents(flags_)
+ ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
+ flags_)
+ : AssertionNode::AtBoundary(on_success);
case NON_BOUNDARY:
- return AssertionNode::AtNonBoundary(on_success);
+ return NeedsUnicodeCaseEquivalents(flags_)
+ ? BoundaryAssertionAsLookaround(compiler, on_success,
+ NON_BOUNDARY, flags_)
+ : AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
return AssertionNode::AtEnd(on_success);
case END_OF_LINE: {
@@ -5647,8 +5644,10 @@
// Create a newline atom.
ZoneList<CharacterRange>* newline_ranges =
new(zone) ZoneList<CharacterRange>(3, zone);
- CharacterRange::AddClassEscape('n', newline_ranges, zone);
- RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n');
+ CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
+ RegExpCharacterClass* newline_atom =
+ new (zone) RegExpCharacterClass('n', default_flags);
TextNode* newline_matcher = new (zone) TextNode(
newline_atom, false, ActionNode::PositiveSubmatchSuccess(
stack_pointer_register, position_register,
@@ -5678,7 +5677,7 @@
RegExpNode* on_success) {
return new (compiler->zone())
BackReferenceNode(RegExpCapture::StartRegister(index()),
- RegExpCapture::EndRegister(index()),
+ RegExpCapture::EndRegister(index()), flags_,
compiler->read_backward(), on_success);
}
@@ -5795,7 +5794,7 @@
ZoneList<CharacterRange>* ranges,
Zone* zone) {
elmc--;
- DCHECK(elmv[elmc] == kRangeEndMarker);
+ DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
for (int i = 0; i < elmc; i += 2) {
DCHECK(elmv[i] < elmv[i + 1]);
ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
@@ -5808,9 +5807,9 @@
ZoneList<CharacterRange>* ranges,
Zone* zone) {
elmc--;
- DCHECK(elmv[elmc] == kRangeEndMarker);
- DCHECK(elmv[0] != 0x0000);
- DCHECK(elmv[elmc - 1] != String::kMaxCodePoint);
+ DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
+ DCHECK_NE(0x0000, elmv[0]);
+ DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
uc16 last = 0x0000;
for (int i = 0; i < elmc; i += 2) {
DCHECK(last <= elmv[i] - 1);
@@ -5821,9 +5820,30 @@
ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
}
+void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+ bool add_unicode_case_equivalents,
+ Zone* zone) {
+ if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
+ // See #sec-runtime-semantics-wordcharacters-abstract-operation
+ // In case of unicode and ignore_case, we need to create the closure over
+ // case equivalent characters before negating.
+ ZoneList<CharacterRange>* new_ranges =
+ new (zone) ZoneList<CharacterRange>(2, zone);
+ AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
+ AddUnicodeCaseEquivalents(new_ranges, zone);
+ if (type == 'W') {
+ ZoneList<CharacterRange>* negated =
+ new (zone) ZoneList<CharacterRange>(2, zone);
+ CharacterRange::Negate(new_ranges, negated, zone);
+ new_ranges = negated;
+ }
+ ranges->AddAll(*new_ranges, zone);
+ return;
+ }
+ AddClassEscape(type, ranges, zone);
+}
-void CharacterRange::AddClassEscape(uc16 type,
- ZoneList<CharacterRange>* ranges,
+void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone) {
switch (type) {
case 's':
@@ -5874,7 +5894,7 @@
return Vector<const int>(kWordRanges, kWordRangeCount - 1);
}
-
+// static
void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
ZoneList<CharacterRange>* ranges,
bool is_one_byte) {
@@ -5883,12 +5903,12 @@
for (int i = 0; i < range_count; i++) {
CharacterRange range = ranges->at(i);
uc32 bottom = range.from();
- if (bottom > String::kMaxUtf16CodeUnit) return;
+ if (bottom > String::kMaxUtf16CodeUnit) continue;
uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
// Nothing to be done for surrogates.
- if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
+ if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
- if (bottom > String::kMaxOneByteCharCode) return;
+ if (bottom > String::kMaxOneByteCharCode) continue;
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
}
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
@@ -5963,9 +5983,9 @@
ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
- if (ranges_ == NULL) {
+ if (ranges_ == nullptr) {
ranges_ = new(zone) ZoneList<CharacterRange>(2, zone);
- CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone);
+ CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
}
return ranges_;
}
@@ -6051,7 +6071,7 @@
void CharacterSet::Canonicalize() {
// Special/default classes are always considered canonical. The result
// of calling ranges() will be sorted.
- if (ranges_ == NULL) return;
+ if (ranges_ == nullptr) return;
CharacterRange::Canonicalize(ranges_);
}
@@ -6125,7 +6145,7 @@
OutSet* OutSet::Extend(unsigned value, Zone* zone) {
if (Get(value))
return this;
- if (successors(zone) != NULL) {
+ if (successors(zone) != nullptr) {
for (int i = 0; i < successors(zone)->length(); i++) {
OutSet* successor = successors(zone)->at(i);
if (successor->Get(value))
@@ -6145,7 +6165,7 @@
if (value < kFirstLimit) {
first_ |= (1 << value);
} else {
- if (remaining_ == NULL)
+ if (remaining_ == nullptr)
remaining_ = new(zone) ZoneList<unsigned>(1, zone);
if (remaining_->is_empty() || !remaining_->Contains(value))
remaining_->Add(value, zone);
@@ -6156,7 +6176,7 @@
bool OutSet::Get(unsigned value) const {
if (value < kFirstLimit) {
return (first_ & (1 << value)) != 0;
- } else if (remaining_ == NULL) {
+ } else if (remaining_ == nullptr) {
return false;
} else {
return remaining_->Contains(value);
@@ -6313,9 +6333,7 @@
void Analysis::VisitText(TextNode* that) {
- if (ignore_case()) {
- that->MakeCaseIndependent(isolate(), is_one_byte_);
- }
+ that->MakeCaseIndependent(isolate(), is_one_byte_);
EnsureAnalyzed(that->on_success());
if (!has_failed()) {
that->CalculateOffsets();
@@ -6396,7 +6414,7 @@
budget = (budget - 1) / alts->length();
for (int i = 0; i < alts->length(); i++) {
GuardedAlternative& alt = alts->at(i);
- if (alt.guards() != NULL && alt.guards()->length() != 0) {
+ if (alt.guards() != nullptr && alt.guards()->length() != 0) {
bm->SetRest(offset); // Give up trying to fill in info.
SaveBMInfo(bm, not_at_start, offset);
return;
@@ -6426,7 +6444,7 @@
return;
}
uc16 character = atom->data()[j];
- if (bm->compiler()->ignore_case()) {
+ if (IgnoreCase(atom->flags())) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = GetCaseIndependentLetters(
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
@@ -6578,9 +6596,9 @@
target->Accept(this);
}
-
RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
- RegExpNode* on_success) {
+ RegExpNode* on_success,
+ JSRegExp::Flags flags) {
// If the regexp matching starts within a surrogate pair, step back
// to the lead surrogate and start matching from there.
DCHECK(!compiler->read_backward());
@@ -6595,11 +6613,11 @@
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpNode* step_back = TextNode::CreateForCharacterRanges(
- zone, lead_surrogates, true, on_success);
+ zone, lead_surrogates, true, on_success, flags);
RegExpLookaround::Builder builder(true, step_back, stack_register,
position_register);
RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
- zone, trail_surrogates, false, builder.on_match_success());
+ zone, trail_surrogates, false, builder.on_match_success(), flags);
optional_step_back->AddAlternative(
GuardedAlternative(builder.ForMatch(match_trail)));
@@ -6616,19 +6634,18 @@
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return IrregexpRegExpTooBig(isolate);
}
- bool ignore_case = flags & JSRegExp::kIgnoreCase;
- bool is_sticky = flags & JSRegExp::kSticky;
- bool is_global = flags & JSRegExp::kGlobal;
- bool is_unicode = flags & JSRegExp::kUnicode;
- RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
- is_one_byte);
+ bool is_sticky = IsSticky(flags);
+ bool is_global = IsGlobal(flags);
+ bool is_unicode = IsUnicode(flags);
+ RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
- if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
+ if (compiler.optimize())
+ compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
- sample_subject = String::Flatten(sample_subject);
+ sample_subject = String::Flatten(isolate, sample_subject);
int chars_sampled = 0;
int half_way = (sample_subject->length() - kSampleSize) / 2;
for (int i = Max(0, half_way);
@@ -6649,9 +6666,11 @@
if (!is_start_anchored && !is_sticky) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
+ JSRegExp::Flags default_flags = JSRegExp::Flags();
RegExpNode* loop_node = RegExpQuantifier::ToNode(
- 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
- &compiler, captured_body, data->contains_anchor);
+ 0, RegExpTree::kInfinity, false,
+ new (zone) RegExpCharacterClass('*', default_flags), &compiler,
+ captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
@@ -6659,26 +6678,27 @@
ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
- new (zone) RegExpCharacterClass('*'), false, loop_node)));
+ new (zone) RegExpCharacterClass('*', default_flags), false,
+ loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
- if (node != NULL) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ if (node != nullptr) {
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
}
- } else if (compiler.unicode() && (is_global || is_sticky)) {
- node = OptionallyStepBackToLeadSurrogate(&compiler, node);
+ } else if (is_unicode && (is_global || is_sticky)) {
+ node = OptionallyStepBackToLeadSurrogate(&compiler, node, flags);
}
- if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
+ if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(isolate, flags, is_one_byte);
+ Analysis analysis(isolate, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
@@ -6717,9 +6737,6 @@
#elif V8_TARGET_ARCH_MIPS64
RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode,
(data->capture_count + 1) * 2);
-#elif V8_TARGET_ARCH_X87
- RegExpMacroAssemblerX87 macro_assembler(isolate, zone, mode,
- (data->capture_count + 1) * 2);
#else
#error "Unsupported architecture"
#endif
@@ -6730,7 +6747,7 @@
RegExpMacroAssemblerIrregexp macro_assembler(isolate, codes, zone);
#endif // V8_INTERPRETED_REGEXP
- macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern));
+ macro_assembler.set_slow_safe(TooMuchRegExpCode(isolate, pattern));
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
@@ -6750,15 +6767,12 @@
macro_assembler.set_global_mode(mode);
}
- return compiler.Assemble(¯o_assembler,
- node,
- data->capture_count,
+ return compiler.Assemble(isolate, ¯o_assembler, node, data->capture_count,
pattern);
}
-
-bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
- Heap* heap = pattern->GetHeap();
+bool RegExpEngine::TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
+ Heap* heap = isolate->heap();
bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
if (heap->isolate()->total_regexp_code_generated() >
RegExpImpl::kRegExpCompiledLimit &&
@@ -6859,7 +6873,8 @@
}
}
// Convert backing store to a copy-on-write array.
- value_array->set_map_no_write_barrier(isolate->heap()->fixed_cow_array_map());
+ value_array->set_map_no_write_barrier(
+ ReadOnlyRoots(isolate).fixed_cow_array_map());
}
diff --git a/src/regexp/jsregexp.h b/src/regexp/jsregexp.h
index 77d61ae..fd2a905 100644
--- a/src/regexp/jsregexp.h
+++ b/src/regexp/jsregexp.h
@@ -7,6 +7,8 @@
#include "src/allocation.h"
#include "src/assembler.h"
+#include "src/isolate.h"
+#include "src/objects/js-regexp.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp-macro-assembler.h"
@@ -20,6 +22,36 @@
class RegExpTree;
class BoyerMooreLookahead;
+inline bool IgnoreCase(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kIgnoreCase) != 0;
+}
+
+inline bool IsUnicode(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kUnicode) != 0;
+}
+
+inline bool IsSticky(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kSticky) != 0;
+}
+
+inline bool IsGlobal(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kGlobal) != 0;
+}
+
+inline bool DotAll(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kDotAll) != 0;
+}
+
+inline bool Multiline(JSRegExp::Flags flags) {
+ return (flags & JSRegExp::kMultiline) != 0;
+}
+
+inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
+ // Both unicode and ignore_case flags are set. We need to use ICU to find
+ // the closure over case equivalents.
+ return IsUnicode(flags) && IgnoreCase(flags);
+}
+
class RegExpImpl {
public:
// Whether V8 is compiled with native regexp support or not.
@@ -40,36 +72,30 @@
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
// Returns false if compilation fails.
- MUST_USE_RESULT static MaybeHandle<Object> Compile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags);
+ V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
+ Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
+ JSRegExp::Flags flags);
// See ECMA-262 section 15.10.6.2.
// This function calls the garbage collector if necessary.
- V8_EXPORT_PRIVATE MUST_USE_RESULT static MaybeHandle<Object> Exec(
- Handle<JSRegExp> regexp, Handle<String> subject, int index,
- Handle<RegExpMatchInfo> last_match_info);
+ V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
+ Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
+ int index, Handle<RegExpMatchInfo> last_match_info);
// Prepares a JSRegExp object with Irregexp-specific data.
- static void IrregexpInitialize(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
+ static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
+ Handle<String> pattern, JSRegExp::Flags flags,
int capture_register_count);
-
- static void AtomCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
+ static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
+ Handle<String> pattern, JSRegExp::Flags flags,
Handle<String> match_pattern);
-
- static int AtomExecRaw(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- int32_t* output,
+ static int AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject, int index, int32_t* output,
int output_size);
- static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
+ static Handle<Object> AtomExec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
@@ -82,7 +108,7 @@
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
- static int IrregexpPrepare(Handle<JSRegExp> regexp,
+ static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject);
// Execute a regular expression on the subject, starting from index.
@@ -91,24 +117,23 @@
// The captures and subcaptures are stored into the registers vector.
// If matching fails, returns RE_FAILURE.
// If execution fails, sets a pending exception and returns RE_EXCEPTION.
- static int IrregexpExecRaw(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- int32_t* output,
+ static int IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject, int index, int32_t* output,
int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
- MUST_USE_RESULT static MaybeHandle<Object> IrregexpExec(
- Handle<JSRegExp> regexp, Handle<String> subject, int index,
- Handle<RegExpMatchInfo> last_match_info);
+ V8_WARN_UNUSED_RESULT static MaybeHandle<Object> IrregexpExec(
+ Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
+ int index, Handle<RegExpMatchInfo> last_match_info);
- // Set last match info. If match is NULL, then setting captures is omitted.
+ // Set last match info. If match is nullptr, then setting captures is
+ // omitted.
static Handle<RegExpMatchInfo> SetLastMatchInfo(
- Handle<RegExpMatchInfo> last_match_info, Handle<String> subject,
- int capture_count, int32_t* match);
+ Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
+ Handle<String> subject, int capture_count, int32_t* match);
class GlobalCache {
public:
@@ -116,17 +141,17 @@
Handle<String> subject,
Isolate* isolate);
- INLINE(~GlobalCache());
+ V8_INLINE ~GlobalCache();
// Fetch the next entry in the cache for global regexp match results.
- // This does not set the last match info. Upon failure, NULL is returned.
- // The cause can be checked with Result(). The previous
- // result is still in available in memory when a failure happens.
- INLINE(int32_t* FetchNext());
+ // This does not set the last match info. Upon failure, nullptr is
+ // returned. The cause can be checked with Result(). The previous result is
+ // still in available in memory when a failure happens.
+ V8_INLINE int32_t* FetchNext();
- INLINE(int32_t* LastSuccessfulMatch());
+ V8_INLINE int32_t* LastSuccessfulMatch();
- INLINE(bool HasException()) { return num_matches_ < 0; }
+ V8_INLINE bool HasException() { return num_matches_ < 0; }
private:
int AdvanceZeroLength(int last_index);
@@ -140,6 +165,7 @@
int register_array_size_;
Handle<JSRegExp> regexp_;
Handle<String> subject_;
+ Isolate* isolate_;
};
// For acting on the JSRegExp data FixedArray.
@@ -162,9 +188,10 @@
static const int kRegExpTooLargeToOptimize = 20 * KB;
private:
- static bool CompileIrregexp(Handle<JSRegExp> re,
+ static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject, bool is_one_byte);
- static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ static inline bool EnsureCompiledIrregexp(Isolate* isolate,
+ Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte);
};
@@ -184,7 +211,7 @@
// integers (< 32). May do zone-allocation.
class OutSet: public ZoneObject {
public:
- OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
+ OutSet() : first_(0), remaining_(nullptr), successors_(nullptr) {}
OutSet* Extend(unsigned value, Zone* zone);
bool Get(unsigned value) const;
static const unsigned kFirstLimit = 32;
@@ -201,7 +228,7 @@
ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
- : first_(first), remaining_(remaining), successors_(NULL) { }
+ : first_(first), remaining_(remaining), successors_(nullptr) {}
uint32_t first_;
ZoneList<unsigned>* remaining_;
ZoneList<OutSet*>* successors_;
@@ -217,7 +244,7 @@
class Entry {
public:
- Entry() : from_(0), to_(0), out_set_(NULL) { }
+ Entry() : from_(0), to_(0), out_set_(nullptr) {}
Entry(uc32 from, uc32 to, OutSet* out_set)
: from_(from), to_(to), out_set_(out_set) {
DCHECK(from <= to);
@@ -406,8 +433,8 @@
int characters() { return characters_; }
void set_characters(int characters) { characters_ = characters; }
Position* positions(int index) {
- DCHECK(index >= 0);
- DCHECK(index < characters_);
+ DCHECK_LE(0, index);
+ DCHECK_GT(characters_, index);
return positions_ + index;
}
uint32_t mask() { return mask_; }
@@ -433,8 +460,11 @@
class RegExpNode: public ZoneObject {
public:
explicit RegExpNode(Zone* zone)
- : replacement_(NULL), on_work_list_(false), trace_count_(0), zone_(zone) {
- bm_info_[0] = bm_info_[1] = NULL;
+ : replacement_(nullptr),
+ on_work_list_(false),
+ trace_count_(0),
+ zone_(zone) {
+ bm_info_[0] = bm_info_[1] = nullptr;
}
virtual ~RegExpNode();
virtual void Accept(NodeVisitor* visitor) = 0;
@@ -472,7 +502,7 @@
// character and that has no guards on it.
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) {
- return NULL;
+ return nullptr;
}
// Collects information on the possible code units (mod 128) that can match if
@@ -489,10 +519,8 @@
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
- // itself, or NULL if the node can never match.
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
- return this;
- }
+ // itself, or nullptr if the node can never match.
+ virtual RegExpNode* FilterOneByte(int depth) { return this; }
// Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated);
@@ -564,7 +592,7 @@
: RegExpNode(on_success->zone()), on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
@@ -572,7 +600,7 @@
}
protected:
- RegExpNode* FilterSuccessor(int depth, bool ignore_case);
+ RegExpNode* FilterSuccessor(int depth);
private:
RegExpNode* on_success_;
@@ -677,13 +705,15 @@
static TextNode* CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
- RegExpNode* on_success);
+ RegExpNode* on_success,
+ JSRegExp::Flags flags);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
- RegExpNode* on_success);
+ RegExpNode* on_success,
+ JSRegExp::Flags flags);
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
@@ -700,7 +730,7 @@
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
void CalculateOffsets();
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth);
private:
enum TextEmitPassType {
@@ -710,7 +740,7 @@
CASE_CHARACTER_MATCH, // Case-independent single character check.
CHARACTER_CLASS_MATCH // Character class.
};
- static bool SkipPass(int pass, bool ignore_case);
+ static bool SkipPass(TextEmitPassType pass, bool ignore_case);
static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
static const int kLastPass = CHARACTER_CLASS_MATCH;
void TextEmitPass(RegExpCompiler* compiler,
@@ -774,11 +804,12 @@
class BackReferenceNode: public SeqRegExpNode {
public:
- BackReferenceNode(int start_reg, int end_reg, bool read_backward,
- RegExpNode* on_success)
+ BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
+ bool read_backward, RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
end_reg_(end_reg),
+ flags_(flags),
read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
@@ -800,6 +831,7 @@
private:
int start_reg_;
int end_reg_;
+ JSRegExp::Flags flags_;
bool read_backward_;
};
@@ -873,7 +905,8 @@
class GuardedAlternative {
public:
- explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
+ explicit GuardedAlternative(RegExpNode* node)
+ : node_(node), guards_(nullptr) {}
void AddGuard(Guard* guard, Zone* zone);
RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
@@ -892,11 +925,11 @@
public:
explicit ChoiceNode(int expected_size, Zone* zone)
: RegExpNode(zone),
- alternatives_(new(zone)
- ZoneList<GuardedAlternative>(expected_size, zone)),
- table_(NULL),
+ alternatives_(new (zone)
+ ZoneList<GuardedAlternative>(expected_size, zone)),
+ table_(nullptr),
not_at_start_(false),
- being_calculated_(false) { }
+ being_calculated_(false) {}
virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) {
alternatives()->Add(node, zone());
@@ -923,7 +956,7 @@
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth);
virtual bool read_backward() { return false; }
protected:
@@ -995,7 +1028,7 @@
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth);
};
@@ -1003,8 +1036,8 @@
public:
LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
: ChoiceNode(2, zone),
- loop_node_(NULL),
- continue_node_(NULL),
+ loop_node_(nullptr),
+ continue_node_(nullptr),
body_can_be_zero_length_(body_can_be_zero_length),
read_backward_(read_backward) {}
void AddLoopAlternative(GuardedAlternative alt);
@@ -1022,7 +1055,7 @@
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
- virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth);
private:
// AddAlternative is made private for loop nodes because alternatives
@@ -1201,7 +1234,7 @@
class DeferredAction {
public:
DeferredAction(ActionNode::ActionType action_type, int reg)
- : action_type_(action_type), reg_(reg), next_(NULL) { }
+ : action_type_(action_type), reg_(reg), next_(nullptr) {}
DeferredAction* next() { return next_; }
bool Mentions(int reg);
int reg() { return reg_; }
@@ -1255,14 +1288,14 @@
Trace()
: cp_offset_(0),
- actions_(NULL),
- backtrack_(NULL),
- stop_node_(NULL),
- loop_label_(NULL),
+ actions_(nullptr),
+ backtrack_(nullptr),
+ stop_node_(nullptr),
+ loop_label_(nullptr),
characters_preloaded_(0),
bound_checked_up_to_(0),
flush_budget_(100),
- at_start_(UNKNOWN) { }
+ at_start_(UNKNOWN) {}
// End the trace. This involves flushing the deferred actions in the trace
// and pushing a backtrack location onto the backtrack stack. Once this is
@@ -1282,13 +1315,9 @@
// a trivial trace is recorded in a label in the node so that gotos can be
// generated to that code.
bool is_trivial() {
- return backtrack_ == NULL &&
- actions_ == NULL &&
- cp_offset_ == 0 &&
- characters_preloaded_ == 0 &&
- bound_checked_up_to_ == 0 &&
- quick_check_performed_.characters() == 0 &&
- at_start_ == UNKNOWN;
+ return backtrack_ == nullptr && actions_ == nullptr && cp_offset_ == 0 &&
+ characters_preloaded_ == 0 && bound_checked_up_to_ == 0 &&
+ quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN;
}
TriBool at_start() { return at_start_; }
void set_at_start(TriBool at_start) { at_start_ = at_start; }
@@ -1307,7 +1336,7 @@
// These set methods and AdvanceCurrentPositionInTrace should be used only on
// new traces - the intention is that traces are immutable after creation.
void add_action(DeferredAction* new_action) {
- DCHECK(new_action->next_ == NULL);
+ DCHECK(new_action->next_ == nullptr);
new_action->next_ = actions_;
actions_ = new_action;
}
@@ -1433,11 +1462,8 @@
// +-------+ +------------+
class Analysis: public NodeVisitor {
public:
- Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
- : isolate_(isolate),
- flags_(flags),
- is_one_byte_(is_one_byte),
- error_message_(NULL) {}
+ Analysis(Isolate* isolate, bool is_one_byte)
+ : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
@@ -1446,9 +1472,9 @@
#undef DECLARE_VISIT
virtual void VisitLoopChoice(LoopChoiceNode* that);
- bool has_failed() { return error_message_ != NULL; }
+ bool has_failed() { return error_message_ != nullptr; }
const char* error_message() {
- DCHECK(error_message_ != NULL);
+ DCHECK(error_message_ != nullptr);
return error_message_;
}
void fail(const char* error_message) {
@@ -1457,12 +1483,8 @@
Isolate* isolate() const { return isolate_; }
- bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
- bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
-
private:
Isolate* isolate_;
- JSRegExp::Flags flags_;
bool is_one_byte_;
const char* error_message_;
@@ -1472,11 +1494,11 @@
struct RegExpCompileData {
RegExpCompileData()
- : tree(NULL),
- node(NULL),
- simple(true),
- contains_anchor(false),
- capture_count(0) { }
+ : tree(nullptr),
+ node(nullptr),
+ simple(true),
+ contains_anchor(false),
+ capture_count(0) {}
RegExpTree* tree;
RegExpNode* node;
bool simple;
@@ -1492,10 +1514,10 @@
struct CompilationResult {
CompilationResult(Isolate* isolate, const char* error_message)
: error_message(error_message),
- code(isolate->heap()->the_hole_value()),
+ code(ReadOnlyRoots(isolate).the_hole_value()),
num_registers(0) {}
CompilationResult(Object* code, int registers)
- : error_message(NULL), code(code), num_registers(registers) {}
+ : error_message(nullptr), code(code), num_registers(registers) {}
const char* error_message;
Object* code;
int num_registers;
@@ -1508,7 +1530,7 @@
Handle<String> sample_subject,
bool is_one_byte);
- static bool TooMuchRegExpCode(Handle<String> pattern);
+ static bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};
diff --git a/src/regexp/mips/OWNERS b/src/regexp/mips/OWNERS
index 89455a4..8bbcab4 100644
--- a/src/regexp/mips/OWNERS
+++ b/src/regexp/mips/OWNERS
@@ -1,6 +1,2 @@
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
\ No newline at end of file
diff --git a/src/regexp/mips/regexp-macro-assembler-mips.cc b/src/regexp/mips/regexp-macro-assembler-mips.cc
index 062d661..36ac932 100644
--- a/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/src/regexp/mips/regexp-macro-assembler-mips.cc
@@ -6,9 +6,11 @@
#include "src/regexp/mips/regexp-macro-assembler-mips.h"
+#include "src/assembler-inl.h"
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
+#include "src/objects-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
@@ -38,14 +40,13 @@
*
* The stack will have the following structure:
*
- * - fp[64] Isolate* isolate (address of the current isolate)
- * - fp[60] direct_call (if 1, direct call from JavaScript code,
+ * - fp[60] Isolate* isolate (address of the current isolate)
+ * - fp[56] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
- * - fp[56] stack_area_base (High end of the memory area to use as
+ * - fp[52] stack_area_base (High end of the memory area to use as
* backtracking stack).
- * - fp[52] capture array size (may fit multiple sets of matches)
- * - fp[48] int* capture_array (int[num_saved_registers_], for output).
- * - fp[44] secondary link/return address used by native call.
+ * - fp[48] capture array size (may fit multiple sets of matches)
+ * - fp[44] int* capture_array (int[num_saved_registers_], for output).
* --- sp when called ---
* - fp[40] return address (lr).
* - fp[36] old frame pointer (r11).
@@ -78,17 +79,13 @@
* int start_index,
* Address start,
* Address end,
- * Address secondary_return_address, // Only used by native call.
* int* capture_output_array,
+ * int num_capture_registers,
* byte* stack_area_base,
- * bool direct_call = false)
+ * bool direct_call = false,
+ * Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in mips/simulator-mips.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the ra register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -97,7 +94,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -147,8 +144,8 @@
void RegExpMacroAssemblerMIPS::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
__ lw(a0, register_location(reg));
__ Addu(a0, a0, Operand(by));
@@ -293,7 +290,7 @@
__ Subu(current_input_offset(), current_input_offset(), Operand(t5));
}
} else {
- DCHECK(mode_ == UC16);
+ DCHECK_EQ(UC16, mode_);
// Put regexp engine registers on stack.
RegList regexp_registers_to_retain = current_input_offset().bit() |
current_character().bit() | backtrack_stackpointer().bit();
@@ -324,11 +321,11 @@
__ Subu(a1, a1, Operand(s3));
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ mov(a3, zero_reg);
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
}
@@ -457,7 +454,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ Subu(a0, current_character(), Operand(minus));
__ And(a0, a0, Operand(mask));
BranchOrBacktrack(on_not_equal, ne, a0, Operand(c));
@@ -511,11 +508,11 @@
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Subu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
- BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t'));
+ BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
@@ -534,34 +531,34 @@
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Subu(a0, a0, Operand(0x0b));
- BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Subu(a0, a0, Operand(0x0B));
+ BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Subu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Subu(a0, a0, Operand(0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Subu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
- BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
- BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Subu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Subu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
@@ -572,7 +569,7 @@
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
@@ -585,7 +582,7 @@
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ li(a0, Operand(map));
__ Addu(a0, a0, current_character());
__ lbu(a0, MemOperand(a0, 0));
@@ -881,7 +878,7 @@
__ CallCFunction(grow_stack, num_arguments);
// Restore regexp registers.
__ MultiPop(regexp_registers);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ Branch(&exit_with_exception, eq, v0, Operand(zero_reg));
// Otherwise use return value as new stack pointer.
@@ -902,9 +899,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
LOG(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -912,7 +909,7 @@
void RegExpMacroAssemblerMIPS::GoTo(Label* to) {
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
@@ -1095,7 +1092,7 @@
// Align the stack pointer and save the original sp value on the stack.
__ mov(scratch, sp);
__ Subu(sp, sp, Operand(kPointerSize));
- DCHECK(base::bits::IsPowerOfTwo32(stack_alignment));
+ DCHECK(base::bits::IsPowerOfTwo(stack_alignment));
__ And(sp, sp, Operand(-stack_alignment));
__ sw(scratch, MemOperand(sp));
@@ -1146,7 +1143,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1197,14 +1194,14 @@
Register rs,
const Operand& rt) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ jmp(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ Branch(&backtrack_label_, condition, rs, rt);
return;
}
@@ -1235,7 +1232,7 @@
void RegExpMacroAssemblerMIPS::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
__ Addu(backtrack_stackpointer(),
backtrack_stackpointer(),
Operand(-kPointerSize));
@@ -1244,7 +1241,7 @@
void RegExpMacroAssemblerMIPS::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ lw(target, MemOperand(backtrack_stackpointer()));
__ Addu(backtrack_stackpointer(), backtrack_stackpointer(), kPointerSize);
}
@@ -1280,12 +1277,12 @@
}
// We assume that we cannot do unaligned loads on MIPS, so this function
// must only be used to load a single character at a time.
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ Addu(t5, end_of_input_address(), Operand(offset));
if (mode_ == LATIN1) {
__ lbu(current_character(), MemOperand(t5, 0));
} else {
- DCHECK(mode_ == UC16);
+ DCHECK_EQ(UC16, mode_);
__ lhu(current_character(), MemOperand(t5, 0));
}
}
diff --git a/src/regexp/mips/regexp-macro-assembler-mips.h b/src/regexp/mips/regexp-macro-assembler-mips.h
index 6dedb1e..6d61601 100644
--- a/src/regexp/mips/regexp-macro-assembler-mips.h
+++ b/src/regexp/mips/regexp-macro-assembler-mips.h
@@ -103,9 +103,8 @@
static const int kStoredRegisters = kFramePointer;
// Return address (stored from link register, read into pc on return).
static const int kReturnAddress = kStoredRegisters + 9 * kPointerSize;
- static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack frame header.
- static const int kStackFrameHeader = kReturnAddress + kPointerSize;
+ static const int kStackFrameHeader = kReturnAddress;
// Stack parameters placed by caller.
static const int kRegisterOutput = kStackFrameHeader + 20;
static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
@@ -171,7 +170,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Label* to,
Condition condition,
Register rs,
diff --git a/src/regexp/mips64/OWNERS b/src/regexp/mips64/OWNERS
index 89455a4..8bbcab4 100644
--- a/src/regexp/mips64/OWNERS
+++ b/src/regexp/mips64/OWNERS
@@ -1,6 +1,2 @@
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
\ No newline at end of file
diff --git a/src/regexp/mips64/regexp-macro-assembler-mips64.cc b/src/regexp/mips64/regexp-macro-assembler-mips64.cc
index e0317de..17a8ce8 100644
--- a/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@@ -6,9 +6,11 @@
#include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
+#include "src/assembler-inl.h"
#include "src/code-stubs.h"
#include "src/log.h"
#include "src/macro-assembler.h"
+#include "src/objects-inl.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp-stack.h"
#include "src/unicode.h"
@@ -17,7 +19,9 @@
namespace internal {
#ifndef V8_INTERPRETED_REGEXP
-/*
+
+/* clang-format off
+ *
* This assembler uses the following register assignment convention
* - t3 : Temporarily stores the index of capture start after a matching pass
* for a global regexp.
@@ -41,15 +45,14 @@
*
* The O32 stack will have the following structure:
*
- * - fp[76] Isolate* isolate (address of the current isolate)
- * - fp[72] direct_call (if 1, direct call from JavaScript code,
+ * - fp[72] Isolate* isolate (address of the current isolate)
+ * - fp[68] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
- * - fp[68] stack_area_base (High end of the memory area to use as
+ * - fp[64] stack_area_base (High end of the memory area to use as
* backtracking stack).
- * - fp[64] capture array size (may fit multiple sets of matches)
- * - fp[60] int* capture_array (int[num_saved_registers_], for output).
+ * - fp[60] capture array size (may fit multiple sets of matches)
* - fp[44..59] MIPS O32 four argument slots
- * - fp[40] secondary link/return address used by native call.
+ * - fp[40] int* capture_array (int[num_saved_registers_], for output).
* --- sp when called ---
* - fp[36] return address (lr).
* - fp[32] old frame pointer (r11).
@@ -74,9 +77,8 @@
*
* The N64 stack will have the following structure:
*
- * - fp[88] Isolate* isolate (address of the current isolate) kIsolate
- * - fp[80] secondary link/return address used by exit frame on native call. kSecondaryReturnAddress
- kStackFrameHeader
+ * - fp[80] Isolate* isolate (address of the current isolate) kIsolate
+ * kStackFrameHeader
* --- sp when called ---
* - fp[72] ra Return from RegExp code (ra). kReturnAddress
* - fp[64] s9, old-fp Old fp, callee saved(s9).
@@ -112,19 +114,15 @@
* int start_index,
* Address start,
* Address end,
- * Address secondary_return_address, // Only used by native call.
* int* capture_output_array,
+ * int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
- * void* return_address,
* Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in mips/simulator-mips.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the ra register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
+ *
+ * clang-format on
*/
#define __ ACCESS_MASM(masm_)
@@ -133,7 +131,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -183,12 +181,12 @@
void RegExpMacroAssemblerMIPS::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
- __ ld(a0, register_location(reg));
+ __ Ld(a0, register_location(reg));
__ Daddu(a0, a0, Operand(by));
- __ sd(a0, register_location(reg));
+ __ Sd(a0, register_location(reg));
}
}
@@ -218,7 +216,7 @@
void RegExpMacroAssemblerMIPS::CheckAtStart(Label* on_at_start) {
- __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
__ Daddu(a0, current_input_offset(), Operand(-char_size()));
BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
}
@@ -226,7 +224,7 @@
void RegExpMacroAssemblerMIPS::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
- __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
__ Daddu(a0, current_input_offset(),
Operand(-char_size() + cp_offset * char_size()));
BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
@@ -240,7 +238,7 @@
void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
Label backtrack_non_equal;
- __ lw(a0, MemOperand(backtrack_stackpointer(), 0));
+ __ Lw(a0, MemOperand(backtrack_stackpointer(), 0));
__ Branch(&backtrack_non_equal, ne, current_input_offset(), Operand(a0));
__ Daddu(backtrack_stackpointer(),
backtrack_stackpointer(),
@@ -253,8 +251,8 @@
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
- __ ld(a0, register_location(start_reg)); // Index of start of capture.
- __ ld(a1, register_location(start_reg + 1)); // Index of end of capture.
+ __ Ld(a0, register_location(start_reg)); // Index of start of capture.
+ __ Ld(a1, register_location(start_reg + 1)); // Index of end of capture.
__ Dsubu(a1, a1, a0); // Length of capture.
// At this point, the capture registers are either both set or both cleared.
@@ -263,7 +261,7 @@
__ Branch(&fallthrough, eq, a1, Operand(zero_reg));
if (read_backward) {
- __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
__ Daddu(t1, t1, a1);
BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
} else {
@@ -292,9 +290,9 @@
Label loop;
__ bind(&loop);
- __ lbu(a3, MemOperand(a0, 0));
+ __ Lbu(a3, MemOperand(a0, 0));
__ daddiu(a0, a0, char_size());
- __ lbu(a4, MemOperand(a2, 0));
+ __ Lbu(a4, MemOperand(a2, 0));
__ daddiu(a2, a2, char_size());
__ Branch(&loop_check, eq, a4, Operand(a3));
@@ -323,8 +321,8 @@
// Compute new value of character position after the matched part.
__ Dsubu(current_input_offset(), a2, end_of_input_address());
if (read_backward) {
- __ ld(t1, register_location(start_reg)); // Index of start of capture.
- __ ld(a2, register_location(start_reg + 1)); // Index of end of capture.
+ __ Ld(t1, register_location(start_reg)); // Index of start of capture.
+ __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture.
__ Daddu(current_input_offset(), current_input_offset(), Operand(t1));
__ Dsubu(current_input_offset(), current_input_offset(), Operand(a2));
}
@@ -360,11 +358,11 @@
__ Dsubu(a1, a1, Operand(s3));
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ mov(a3, zero_reg);
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
}
@@ -379,7 +377,7 @@
// Restore regexp engine registers.
__ MultiPop(regexp_registers_to_retain);
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
- __ ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
+ __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
// Check if function returned non-zero for success or zero for failure.
BranchOrBacktrack(on_no_match, eq, v0, Operand(zero_reg));
@@ -402,8 +400,8 @@
Label success;
// Find length of back-referenced capture.
- __ ld(a0, register_location(start_reg));
- __ ld(a1, register_location(start_reg + 1));
+ __ Ld(a0, register_location(start_reg));
+ __ Ld(a1, register_location(start_reg + 1));
__ Dsubu(a1, a1, a0); // Length to check.
// At this point, the capture registers are either both set or both cleared.
@@ -412,7 +410,7 @@
__ Branch(&fallthrough, eq, a1, Operand(zero_reg));
if (read_backward) {
- __ ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
__ Daddu(t1, t1, a1);
BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
} else {
@@ -432,15 +430,15 @@
Label loop;
__ bind(&loop);
if (mode_ == LATIN1) {
- __ lbu(a3, MemOperand(a0, 0));
+ __ Lbu(a3, MemOperand(a0, 0));
__ daddiu(a0, a0, char_size());
- __ lbu(a4, MemOperand(a2, 0));
+ __ Lbu(a4, MemOperand(a2, 0));
__ daddiu(a2, a2, char_size());
} else {
DCHECK(mode_ == UC16);
- __ lhu(a3, MemOperand(a0, 0));
+ __ Lhu(a3, MemOperand(a0, 0));
__ daddiu(a0, a0, char_size());
- __ lhu(a4, MemOperand(a2, 0));
+ __ Lhu(a4, MemOperand(a2, 0));
__ daddiu(a2, a2, char_size());
}
BranchOrBacktrack(on_no_match, ne, a3, Operand(a4));
@@ -449,8 +447,8 @@
// Move current character position to position after match.
__ Dsubu(current_input_offset(), a2, end_of_input_address());
if (read_backward) {
- __ ld(t1, register_location(start_reg)); // Index of start of capture.
- __ ld(a2, register_location(start_reg + 1)); // Index of end of capture.
+ __ Ld(t1, register_location(start_reg)); // Index of start of capture.
+ __ Ld(a2, register_location(start_reg + 1)); // Index of end of capture.
__ Daddu(current_input_offset(), current_input_offset(), Operand(t1));
__ Dsubu(current_input_offset(), current_input_offset(), Operand(a2));
}
@@ -487,7 +485,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ Dsubu(a0, current_character(), Operand(minus));
__ And(a0, a0, Operand(mask));
BranchOrBacktrack(on_not_equal, ne, a0, Operand(c));
@@ -525,7 +523,7 @@
__ Daddu(a0, a0, current_character());
}
- __ lbu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize));
+ __ Lbu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize));
BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
}
@@ -541,11 +539,11 @@
// One byte space characters are '\t'..'\r', ' ' and \u00a0.
Label success;
__ Branch(&success, eq, current_character(), Operand(' '));
- // Check range 0x09..0x0d.
+ // Check range 0x09..0x0D.
__ Dsubu(a0, current_character(), Operand('\t'));
__ Branch(&success, ls, a0, Operand('\r' - '\t'));
// \u00a0 (NBSP).
- BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00a0 - '\t'));
+ BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
__ bind(&success);
return true;
}
@@ -564,34 +562,34 @@
BranchOrBacktrack(on_no_match, ls, a0, Operand('9' - '0'));
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Dsubu(a0, a0, Operand(0x0b));
- BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Dsubu(a0, a0, Operand(0x0B));
+ BranchOrBacktrack(on_no_match, ls, a0, Operand(0x0C - 0x0B));
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Dsubu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, ls, a0, Operand(1));
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029).
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
__ Xor(a0, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c.
- __ Dsubu(a0, a0, Operand(0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
+ __ Dsubu(a0, a0, Operand(0x0B));
if (mode_ == LATIN1) {
- BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(on_no_match, hi, a0, Operand(0x0C - 0x0B));
} else {
Label done;
- BranchOrBacktrack(&done, ls, a0, Operand(0x0c - 0x0b));
+ BranchOrBacktrack(&done, ls, a0, Operand(0x0C - 0x0B));
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ Dsubu(a0, a0, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ Dsubu(a0, a0, Operand(0x2028 - 0x0B));
BranchOrBacktrack(on_no_match, hi, a0, Operand(1));
__ bind(&done);
}
@@ -602,10 +600,10 @@
// Table is 256 entries, so all Latin1 characters can be tested.
BranchOrBacktrack(on_no_match, hi, current_character(), Operand('z'));
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
- __ lbu(a0, MemOperand(a0, 0));
+ __ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
return true;
}
@@ -615,10 +613,10 @@
// Table is 256 entries, so all Latin1 characters can be tested.
__ Branch(&done, hi, current_character(), Operand('z'));
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map = ExternalReference::re_word_character_map(isolate());
__ li(a0, Operand(map));
__ Daddu(a0, a0, current_character());
- __ lbu(a0, MemOperand(a0, 0));
+ __ Lbu(a0, MemOperand(a0, 0));
BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
if (mode_ != LATIN1) {
__ bind(&done);
@@ -689,7 +687,7 @@
ExternalReference stack_limit =
ExternalReference::address_of_stack_limit(masm_->isolate());
__ li(a0, Operand(stack_limit));
- __ ld(a0, MemOperand(a0));
+ __ Ld(a0, MemOperand(a0));
__ Dsubu(a0, sp, a0);
// Handle it if the stack pointer is already below the stack limit.
__ Branch(&stack_limit_hit, le, a0, Operand(zero_reg));
@@ -710,20 +708,20 @@
// Allocate space on stack for registers.
__ Dsubu(sp, sp, Operand(num_registers_ * kPointerSize));
// Load string end.
- __ ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
+ __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
// Load input start.
- __ ld(a0, MemOperand(frame_pointer(), kInputStart));
+ __ Ld(a0, MemOperand(frame_pointer(), kInputStart));
// Find negative length (offset of start relative to end).
__ Dsubu(current_input_offset(), a0, end_of_input_address());
// Set a0 to address of char before start of the input string
// (effectively string position -1).
- __ ld(a1, MemOperand(frame_pointer(), kStartIndex));
+ __ Ld(a1, MemOperand(frame_pointer(), kStartIndex));
__ Dsubu(a0, current_input_offset(), Operand(char_size()));
__ dsll(t1, a1, (mode_ == UC16) ? 1 : 0);
__ Dsubu(a0, a0, t1);
// Store this value in a local variable, for use when clearing
// position registers.
- __ sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
// Initialize code pointer register
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
@@ -749,19 +747,19 @@
__ li(a2, Operand(num_saved_registers_));
Label init_loop;
__ bind(&init_loop);
- __ sd(a0, MemOperand(a1));
+ __ Sd(a0, MemOperand(a1));
__ Daddu(a1, a1, Operand(-kPointerSize));
__ Dsubu(a2, a2, Operand(1));
__ Branch(&init_loop, ne, a2, Operand(zero_reg));
} else {
for (int i = 0; i < num_saved_registers_; i++) {
- __ sd(a0, register_location(i));
+ __ Sd(a0, register_location(i));
}
}
}
// Initialize backtrack stack pointer.
- __ ld(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd));
+ __ Ld(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd));
__ jmp(&start_label_);
@@ -772,9 +770,9 @@
__ bind(&success_label_);
if (num_saved_registers_ > 0) {
// Copy captures to output.
- __ ld(a1, MemOperand(frame_pointer(), kInputStart));
- __ ld(a0, MemOperand(frame_pointer(), kRegisterOutput));
- __ ld(a2, MemOperand(frame_pointer(), kStartIndex));
+ __ Ld(a1, MemOperand(frame_pointer(), kInputStart));
+ __ Ld(a0, MemOperand(frame_pointer(), kRegisterOutput));
+ __ Ld(a2, MemOperand(frame_pointer(), kStartIndex));
__ Dsubu(a1, end_of_input_address(), a1);
// a1 is length of input in bytes.
if (mode_ == UC16) {
@@ -789,8 +787,8 @@
// unroll the loop once to add an operation between a load of a register
// and the following use of that register.
for (int i = 0; i < num_saved_registers_; i += 2) {
- __ ld(a2, register_location(i));
- __ ld(a3, register_location(i + 1));
+ __ Ld(a2, register_location(i));
+ __ Ld(a3, register_location(i + 1));
if (i == 0 && global_with_zero_length_check()) {
// Keep capture start in a4 for the zero-length check later.
__ mov(t3, a2);
@@ -805,21 +803,21 @@
__ Daddu(a3, a1, Operand(a3));
}
// V8 expects the output to be an int32_t array.
- __ sw(a2, MemOperand(a0));
+ __ Sw(a2, MemOperand(a0));
__ Daddu(a0, a0, kIntSize);
- __ sw(a3, MemOperand(a0));
+ __ Sw(a3, MemOperand(a0));
__ Daddu(a0, a0, kIntSize);
}
}
if (global()) {
// Restart matching if the regular expression is flagged as global.
- __ ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
- __ ld(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
- __ ld(a2, MemOperand(frame_pointer(), kRegisterOutput));
+ __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ Ld(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ __ Ld(a2, MemOperand(frame_pointer(), kRegisterOutput));
// Increment success counter.
__ Daddu(a0, a0, 1);
- __ sd(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ Sd(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
// Capture results have been stored, so the number of remaining global
// output registers is reduced by the number of stored captures.
__ Dsubu(a1, a1, num_saved_registers_);
@@ -827,13 +825,13 @@
__ mov(v0, a0);
__ Branch(&return_v0, lt, a1, Operand(num_saved_registers_));
- __ sd(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ __ Sd(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
// Advance the location for output.
__ Daddu(a2, a2, num_saved_registers_ * kIntSize);
- __ sd(a2, MemOperand(frame_pointer(), kRegisterOutput));
+ __ Sd(a2, MemOperand(frame_pointer(), kRegisterOutput));
// Prepare a0 to initialize registers with its value in the next run.
- __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
if (global_with_zero_length_check()) {
// Special case for zero-length matches.
@@ -861,7 +859,7 @@
// Exit and return v0.
__ bind(&exit_label_);
if (global()) {
- __ ld(v0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ Ld(v0, MemOperand(frame_pointer(), kSuccessfulCaptures));
}
__ bind(&return_v0);
@@ -893,7 +891,7 @@
__ Branch(&return_v0, ne, v0, Operand(zero_reg));
// String might have moved: Reload end of string from frame.
- __ ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
+ __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
SafeReturn();
}
@@ -918,14 +916,14 @@
__ CallCFunction(grow_stack, num_arguments);
// Restore regexp registers.
__ MultiPop(regexp_registers);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ Branch(&exit_with_exception, eq, v0, Operand(zero_reg));
// Otherwise use return value as new stack pointer.
__ mov(backtrack_stackpointer(), v0);
// Restore saved registers and continue.
__ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
- __ ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
+ __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
SafeReturn();
}
@@ -939,9 +937,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
LOG(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -949,7 +947,7 @@
void RegExpMacroAssemblerMIPS::GoTo(Label* to) {
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
@@ -961,22 +959,22 @@
void RegExpMacroAssemblerMIPS::IfRegisterGE(int reg,
int comparand,
Label* if_ge) {
- __ ld(a0, register_location(reg));
- BranchOrBacktrack(if_ge, ge, a0, Operand(comparand));
+ __ Ld(a0, register_location(reg));
+ BranchOrBacktrack(if_ge, ge, a0, Operand(comparand));
}
void RegExpMacroAssemblerMIPS::IfRegisterLT(int reg,
int comparand,
Label* if_lt) {
- __ ld(a0, register_location(reg));
+ __ Ld(a0, register_location(reg));
BranchOrBacktrack(if_lt, lt, a0, Operand(comparand));
}
void RegExpMacroAssemblerMIPS::IfRegisterEqPos(int reg,
Label* if_eq) {
- __ ld(a0, register_location(reg));
+ __ Ld(a0, register_location(reg));
BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset()));
}
@@ -1010,7 +1008,7 @@
void RegExpMacroAssemblerMIPS::PopRegister(int register_index) {
Pop(a0);
- __ sd(a0, register_location(register_index));
+ __ Sd(a0, register_location(register_index));
}
@@ -1028,10 +1026,10 @@
masm_->label_at_put(label, offset);
__ bind(&after_constant);
if (is_int16(cp_offset)) {
- __ lwu(a0, MemOperand(code_pointer(), cp_offset));
+ __ Lwu(a0, MemOperand(code_pointer(), cp_offset));
} else {
__ Daddu(a0, code_pointer(), cp_offset);
- __ lwu(a0, MemOperand(a0, 0));
+ __ Lwu(a0, MemOperand(a0, 0));
}
}
Push(a0);
@@ -1046,20 +1044,20 @@
void RegExpMacroAssemblerMIPS::PushRegister(int register_index,
StackCheckFlag check_stack_limit) {
- __ ld(a0, register_location(register_index));
+ __ Ld(a0, register_location(register_index));
Push(a0);
if (check_stack_limit) CheckStackLimit();
}
void RegExpMacroAssemblerMIPS::ReadCurrentPositionFromRegister(int reg) {
- __ ld(current_input_offset(), register_location(reg));
+ __ Ld(current_input_offset(), register_location(reg));
}
void RegExpMacroAssemblerMIPS::ReadStackPointerFromRegister(int reg) {
- __ ld(backtrack_stackpointer(), register_location(reg));
- __ ld(a0, MemOperand(frame_pointer(), kStackHighEnd));
+ __ Ld(backtrack_stackpointer(), register_location(reg));
+ __ Ld(a0, MemOperand(frame_pointer(), kStackHighEnd));
__ Daddu(backtrack_stackpointer(), backtrack_stackpointer(), Operand(a0));
}
@@ -1082,7 +1080,7 @@
void RegExpMacroAssemblerMIPS::SetRegister(int register_index, int to) {
DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
__ li(a0, Operand(to));
- __ sd(a0, register_location(register_index));
+ __ Sd(a0, register_location(register_index));
}
@@ -1095,27 +1093,27 @@
void RegExpMacroAssemblerMIPS::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
- __ sd(current_input_offset(), register_location(reg));
+ __ Sd(current_input_offset(), register_location(reg));
} else {
__ Daddu(a0, current_input_offset(), Operand(cp_offset * char_size()));
- __ sd(a0, register_location(reg));
+ __ Sd(a0, register_location(reg));
}
}
void RegExpMacroAssemblerMIPS::ClearRegisters(int reg_from, int reg_to) {
DCHECK(reg_from <= reg_to);
- __ ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
for (int reg = reg_from; reg <= reg_to; reg++) {
- __ sd(a0, register_location(reg));
+ __ Sd(a0, register_location(reg));
}
}
void RegExpMacroAssemblerMIPS::WriteStackPointerToRegister(int reg) {
- __ ld(a1, MemOperand(frame_pointer(), kStackHighEnd));
+ __ Ld(a1, MemOperand(frame_pointer(), kStackHighEnd));
__ Dsubu(a0, backtrack_stackpointer(), a1);
- __ sd(a0, register_location(reg));
+ __ Sd(a0, register_location(reg));
}
@@ -1132,9 +1130,9 @@
// Align the stack pointer and save the original sp value on the stack.
__ mov(scratch, sp);
__ Dsubu(sp, sp, Operand(kPointerSize));
- DCHECK(base::bits::IsPowerOfTwo32(stack_alignment));
+ DCHECK(base::bits::IsPowerOfTwo(stack_alignment));
__ And(sp, sp, Operand(-stack_alignment));
- __ sd(scratch, MemOperand(sp));
+ __ Sd(scratch, MemOperand(sp));
__ mov(a2, frame_pointer());
// Code* of self.
@@ -1174,7 +1172,7 @@
// [sp + 2] - C argument slot.
// [sp + 1] - C argument slot.
// [sp + 0] - C argument slot.
- __ ld(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize));
+ __ Ld(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize));
__ li(code_pointer(), Operand(masm_->CodeObject()));
}
@@ -1183,7 +1181,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1222,7 +1220,7 @@
BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
Operand(-cp_offset * char_size()));
} else {
- __ ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
+ __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
__ Daddu(a0, current_input_offset(), Operand(cp_offset * char_size()));
BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
}
@@ -1234,14 +1232,14 @@
Register rs,
const Operand& rt) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ jmp(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ Branch(&backtrack_label_, condition, rs, rt);
return;
}
@@ -1272,17 +1270,17 @@
void RegExpMacroAssemblerMIPS::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
__ Daddu(backtrack_stackpointer(),
backtrack_stackpointer(),
Operand(-kIntSize));
- __ sw(source, MemOperand(backtrack_stackpointer()));
+ __ Sw(source, MemOperand(backtrack_stackpointer()));
}
void RegExpMacroAssemblerMIPS::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
- __ lw(target, MemOperand(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
+ __ Lw(target, MemOperand(backtrack_stackpointer()));
__ Daddu(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize);
}
@@ -1292,7 +1290,7 @@
ExternalReference stack_limit =
ExternalReference::address_of_stack_limit(masm_->isolate());
__ li(a0, Operand(stack_limit));
- __ ld(a0, MemOperand(a0));
+ __ Ld(a0, MemOperand(a0));
SafeCall(&check_preempt_label_, ls, sp, Operand(a0));
}
@@ -1302,7 +1300,7 @@
ExternalReference::address_of_regexp_stack_limit(masm_->isolate());
__ li(a0, Operand(stack_limit));
- __ ld(a0, MemOperand(a0));
+ __ Ld(a0, MemOperand(a0));
SafeCall(&stack_overflow_label_, ls, backtrack_stackpointer(), Operand(a0));
}
@@ -1317,13 +1315,13 @@
}
// We assume that we cannot do unaligned loads on MIPS, so this function
// must only be used to load a single character at a time.
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ Daddu(t1, end_of_input_address(), Operand(offset));
if (mode_ == LATIN1) {
- __ lbu(current_character(), MemOperand(t1, 0));
+ __ Lbu(current_character(), MemOperand(t1, 0));
} else {
DCHECK(mode_ == UC16);
- __ lhu(current_character(), MemOperand(t1, 0));
+ __ Lhu(current_character(), MemOperand(t1, 0));
}
}
diff --git a/src/regexp/mips64/regexp-macro-assembler-mips64.h b/src/regexp/mips64/regexp-macro-assembler-mips64.h
index df2c6c5..37c1d3f 100644
--- a/src/regexp/mips64/regexp-macro-assembler-mips64.h
+++ b/src/regexp/mips64/regexp-macro-assembler-mips64.h
@@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef V8_REGEXP_MIPS_REGEXP_MACRO_ASSEMBLER_MIPS_H_
-#define V8_REGEXP_MIPS_REGEXP_MACRO_ASSEMBLER_MIPS_H_
+#ifndef V8_REGEXP_MIPS64_REGEXP_MACRO_ASSEMBLER_MIPS64_H_
+#define V8_REGEXP_MIPS64_REGEXP_MACRO_ASSEMBLER_MIPS64_H_
#include "src/macro-assembler.h"
#include "src/mips64/assembler-mips64.h"
@@ -107,9 +107,8 @@
// TODO(plind): This 9 - is 8 s-regs (s0..s7) plus fp.
static const int kReturnAddress = kStoredRegisters + 9 * kPointerSize;
- static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack frame header.
- static const int kStackFrameHeader = kSecondaryReturnAddress;
+ static const int kStackFrameHeader = kReturnAddress;
// Stack parameters placed by caller.
static const int kIsolate = kStackFrameHeader + kPointerSize;
@@ -175,7 +174,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Label* to,
Condition condition,
Register rs,
@@ -229,4 +228,4 @@
} // namespace internal
} // namespace v8
-#endif // V8_REGEXP_MIPS_REGEXP_MACRO_ASSEMBLER_MIPS_H_
+#endif // V8_REGEXP_MIPS64_REGEXP_MACRO_ASSEMBLER_MIPS64_H_
diff --git a/src/regexp/ppc/OWNERS b/src/regexp/ppc/OWNERS
index 752e8e3..cf60da5 100644
--- a/src/regexp/ppc/OWNERS
+++ b/src/regexp/ppc/OWNERS
@@ -3,4 +3,5 @@
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
diff --git a/src/regexp/ppc/regexp-macro-assembler-ppc.cc b/src/regexp/ppc/regexp-macro-assembler-ppc.cc
index 531eac1..4944220 100644
--- a/src/regexp/ppc/regexp-macro-assembler-ppc.cc
+++ b/src/regexp/ppc/regexp-macro-assembler-ppc.cc
@@ -6,6 +6,7 @@
#include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
+#include "src/assembler-inl.h"
#include "src/base/bits.h"
#include "src/code-stubs.h"
#include "src/log.h"
@@ -38,8 +39,7 @@
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[44] Isolate* isolate (address of the current isolate)
- * - fp[40] secondary link/return address used by native call.
+ * - fp[40] Isolate* isolate (address of the current isolate)
* - fp[36] lr save area (currently unused)
* - fp[32] backchain (currently unused)
* --- sp when called ---
@@ -81,16 +81,12 @@
* Address start,
* Address end,
* int* capture_output_array,
+ * int num_capture_registers,
* byte* stack_area_base,
- * Address secondary_return_address, // Only used by native call.
- * bool direct_call = false)
+ * bool direct_call = false,
+ * Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in ppc/simulator-ppc.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the LR register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -99,7 +95,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -153,8 +149,8 @@
void RegExpMacroAssemblerPPC::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
__ LoadP(r3, register_location(reg), r0);
__ mov(r0, Operand(by));
@@ -334,11 +330,11 @@
__ sub(r4, r4, r25);
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ li(r6, Operand::Zero());
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ mov(r6, Operand(ExternalReference::isolate_address(isolate())));
}
@@ -387,7 +383,7 @@
__ LoadP(r6, MemOperand(frame_pointer(), kStringStartMinusOne));
__ add(r6, r6, r4);
__ cmp(current_input_offset(), r6);
- BranchOrBacktrack(lt, on_no_match);
+ BranchOrBacktrack(le, on_no_match);
} else {
__ add(r0, r4, current_input_offset(), LeaveOE, SetRC);
BranchOrBacktrack(gt, on_no_match, cr0);
@@ -470,7 +466,7 @@
void RegExpMacroAssemblerPPC::CheckNotCharacterAfterMinusAnd(
uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ subi(r3, current_character(), Operand(minus));
__ mov(r0, Operand(mask));
__ and_(r3, r3, r0);
@@ -525,12 +521,12 @@
Label success;
__ cmpi(current_character(), Operand(' '));
__ beq(&success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ subi(r3, current_character(), Operand('\t'));
__ cmpli(r3, Operand('\r' - '\t'));
__ ble(&success);
// \u00a0 (NBSP).
- __ cmpi(r3, Operand(0x00a0 - '\t'));
+ __ cmpi(r3, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -552,37 +548,37 @@
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subi(r3, r3, Operand(0x0b));
- __ cmpli(r3, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subi(r3, r3, Operand(0x0B));
+ __ cmpli(r3, Operand(0x0C - 0x0B));
BranchOrBacktrack(le, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subi(r3, r3, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subi(r3, r3, Operand(0x2028 - 0x0B));
__ cmpli(r3, Operand(1));
BranchOrBacktrack(le, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ xori(r3, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subi(r3, r3, Operand(0x0b));
- __ cmpli(r3, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subi(r3, r3, Operand(0x0B));
+ __ cmpli(r3, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(gt, on_no_match);
} else {
Label done;
__ ble(&done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subi(r3, r3, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subi(r3, r3, Operand(0x2028 - 0x0B));
__ cmpli(r3, Operand(1));
BranchOrBacktrack(gt, on_no_match);
__ bind(&done);
@@ -595,7 +591,8 @@
__ cmpi(current_character(), Operand('z'));
BranchOrBacktrack(gt, on_no_match);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map =
+ ExternalReference::re_word_character_map(isolate());
__ mov(r3, Operand(map));
__ lbzx(r3, MemOperand(r3, current_character()));
__ cmpli(r3, Operand::Zero());
@@ -609,7 +606,8 @@
__ cmpli(current_character(), Operand('z'));
__ bgt(&done);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map =
+ ExternalReference::re_word_character_map(isolate());
__ mov(r3, Operand(map));
__ lbzx(r3, MemOperand(r3, current_character()));
__ cmpli(r3, Operand::Zero());
@@ -916,7 +914,7 @@
ExternalReference grow_stack =
ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ cmpi(r3, Operand::Zero());
__ beq(&exit_with_exception);
@@ -936,9 +934,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
PROFILE(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -1105,7 +1103,7 @@
// -- preserving original value of sp.
__ mr(scratch, sp);
__ addi(sp, sp, Operand(-(stack_passed_arguments + 1) * kPointerSize));
- DCHECK(base::bits::IsPowerOfTwo32(frame_alignment));
+ DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
__ ClearRightImm(sp, sp, Operand(WhichPowerOf2(frame_alignment)));
__ StoreP(scratch, MemOperand(sp, stack_passed_arguments * kPointerSize));
} else {
@@ -1145,7 +1143,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1195,14 +1193,14 @@
void RegExpMacroAssemblerPPC::BranchOrBacktrack(Condition condition, Label* to,
CRegister cr) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ b(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ b(condition, &backtrack_label_, cr);
return;
}
@@ -1235,13 +1233,13 @@
void RegExpMacroAssemblerPPC::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
__ StorePU(source, MemOperand(backtrack_stackpointer(), -kPointerSize));
}
void RegExpMacroAssemblerPPC::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ LoadP(target, MemOperand(backtrack_stackpointer()));
__ addi(backtrack_stackpointer(), backtrack_stackpointer(),
Operand(kPointerSize));
@@ -1290,7 +1288,7 @@
} else if (characters == 2) {
__ lhz(current_character(), MemOperand(current_character()));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ lbz(current_character(), MemOperand(current_character()));
}
} else {
@@ -1298,7 +1296,7 @@
if (characters == 2) {
__ lwz(current_character(), MemOperand(current_character()));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ lhz(current_character(), MemOperand(current_character()));
}
}
@@ -1309,7 +1307,7 @@
} else if (characters == 2) {
__ lhbrx(current_character(), MemOperand(r0, current_character()));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ lbz(current_character(), MemOperand(current_character()));
}
} else {
@@ -1318,7 +1316,7 @@
__ lwz(current_character(), MemOperand(current_character()));
__ rlwinm(current_character(), current_character(), 16, 0, 31);
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ lhz(current_character(), MemOperand(current_character()));
}
}
diff --git a/src/regexp/ppc/regexp-macro-assembler-ppc.h b/src/regexp/ppc/regexp-macro-assembler-ppc.h
index 9151bf7..1e65600 100644
--- a/src/regexp/ppc/regexp-macro-assembler-ppc.h
+++ b/src/regexp/ppc/regexp-macro-assembler-ppc.h
@@ -7,7 +7,6 @@
#include "src/macro-assembler.h"
#include "src/ppc/assembler-ppc.h"
-#include "src/ppc/frames-ppc.h"
#include "src/regexp/regexp-macro-assembler.h"
namespace v8 {
@@ -96,9 +95,8 @@
static const int kReturnAddress = kStoredRegisters + 7 * kPointerSize;
static const int kCallerFrame = kReturnAddress + kPointerSize;
// Stack parameters placed by caller.
- static const int kSecondaryReturnAddress =
+ static const int kIsolate =
kCallerFrame + kStackFrameExtraParamSlot * kPointerSize;
- static const int kIsolate = kSecondaryReturnAddress + kPointerSize;
// Below the frame pointer.
// Register parameters stored by setup code.
@@ -162,7 +160,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to, CRegister cr = cr7);
// Call and return internally in the generated code in a way that
diff --git a/src/regexp/regexp-ast.cc b/src/regexp/regexp-ast.cc
index 85babb1..782c9c9 100644
--- a/src/regexp/regexp-ast.cc
+++ b/src/regexp/regexp-ast.cc
@@ -15,8 +15,8 @@
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
#undef MAKE_ACCEPT
-#define MAKE_TYPE_CASE(Name) \
- RegExp##Name* RegExpTree::As##Name() { return NULL; } \
+#define MAKE_TYPE_CASE(Name) \
+ RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
bool RegExpTree::Is##Name() { return false; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
#undef MAKE_TYPE_CASE
@@ -156,7 +156,7 @@
that->alternatives()->at(i)->Accept(this, data);
}
os_ << ")";
- return NULL;
+ return nullptr;
}
@@ -167,7 +167,7 @@
that->nodes()->at(i)->Accept(this, data);
}
os_ << ")";
- return NULL;
+ return nullptr;
}
@@ -188,7 +188,7 @@
VisitCharacterRange(that->ranges(zone_)->at(i));
}
os_ << "]";
- return NULL;
+ return nullptr;
}
@@ -213,7 +213,7 @@
os_ << "@B";
break;
}
- return NULL;
+ return nullptr;
}
@@ -224,7 +224,7 @@
os_ << AsUC16(chardata[i]);
}
os_ << "'";
- return NULL;
+ return nullptr;
}
@@ -239,7 +239,7 @@
}
os_ << ")";
}
- return NULL;
+ return nullptr;
}
@@ -253,7 +253,7 @@
os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
that->body()->Accept(this, data);
os_ << ")";
- return NULL;
+ return nullptr;
}
@@ -261,14 +261,14 @@
os_ << "(^ ";
that->body()->Accept(this, data);
os_ << ")";
- return NULL;
+ return nullptr;
}
void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
os_ << "(?: ";
that->body()->Accept(this, data);
os_ << ")";
- return NULL;
+ return nullptr;
}
void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
@@ -277,33 +277,33 @@
os_ << (that->is_positive() ? " + " : " - ");
that->body()->Accept(this, data);
os_ << ")";
- return NULL;
+ return nullptr;
}
void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
void* data) {
os_ << "(<- " << that->index() << ")";
- return NULL;
+ return nullptr;
}
void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
os_ << '%';
- return NULL;
+ return nullptr;
}
std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
RegExpUnparser unparser(os, zone);
- Accept(&unparser, NULL);
+ Accept(&unparser, nullptr);
return os;
}
RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
: alternatives_(alternatives) {
- DCHECK(alternatives->length() > 1);
+ DCHECK_LT(1, alternatives->length());
RegExpTree* first_alternative = alternatives->at(0);
min_match_ = first_alternative->min_match();
max_match_ = first_alternative->max_match();
@@ -326,7 +326,7 @@
RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
: nodes_(nodes) {
- DCHECK(nodes->length() > 1);
+ DCHECK_LT(1, nodes->length());
min_match_ = 0;
max_match_ = 0;
for (int i = 0; i < nodes->length(); i++) {
diff --git a/src/regexp/regexp-ast.h b/src/regexp/regexp-ast.h
index a45d083..1a94832 100644
--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@@ -6,6 +6,8 @@
#define V8_REGEXP_REGEXP_AST_H_
#include "src/objects.h"
+#include "src/objects/js-regexp.h"
+#include "src/objects/string.h"
#include "src/utils.h"
#include "src/zone/zone-containers.h"
#include "src/zone/zone.h"
@@ -79,8 +81,11 @@
CharacterRange() : from_(0), to_(0) {}
// For compatibility with the CHECK_OK macro
CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
- static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
+ static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone);
+ // Add class escapes. Add case equivalent closure for \w and \W if necessary.
+ static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
+ bool add_unicode_case_equivalents, Zone* zone);
static Vector<const int> GetWordBounds();
static inline CharacterRange Singleton(uc32 value) {
return CharacterRange(value, value);
@@ -106,7 +111,7 @@
uc32 to() const { return to_; }
void set_to(uc32 value) { to_ = value; }
bool is_valid() { return from_ <= to_; }
- bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
+ bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; }
bool IsSingleton() { return (from_ == to_); }
static void AddCaseEquivalents(Isolate* isolate, Zone* zone,
ZoneList<CharacterRange>* ranges,
@@ -136,11 +141,11 @@
class CharacterSet final BASE_EMBEDDED {
public:
explicit CharacterSet(uc16 standard_set_type)
- : ranges_(NULL), standard_set_type_(standard_set_type) {}
+ : ranges_(nullptr), standard_set_type_(standard_set_type) {}
explicit CharacterSet(ZoneList<CharacterRange>* ranges)
: ranges_(ranges), standard_set_type_(0) {}
ZoneList<CharacterRange>* ranges(Zone* zone);
- uc16 standard_set_type() { return standard_set_type_; }
+ uc16 standard_set_type() const { return standard_set_type_; }
void set_standard_set_type(uc16 special_set_type) {
standard_set_type_ = special_set_type;
}
@@ -270,7 +275,8 @@
BOUNDARY,
NON_BOUNDARY
};
- explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
+ RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
+ : assertion_type_(type), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAssertion* AsAssertion() override;
@@ -282,15 +288,39 @@
AssertionType assertion_type() { return assertion_type_; }
private:
- AssertionType assertion_type_;
+ const AssertionType assertion_type_;
+ const JSRegExp::Flags flags_;
};
class RegExpCharacterClass final : public RegExpTree {
public:
- RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
- : set_(ranges), is_negated_(is_negated) {}
- explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {}
+ // NEGATED: The character class is negated and should match everything but
+ // the specified ranges.
+ // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
+ // surrogate and should not be unicode-desugared (crbug.com/641091).
+ enum Flag {
+ NEGATED = 1 << 0,
+ CONTAINS_SPLIT_SURROGATE = 1 << 1,
+ };
+ typedef base::Flags<Flag> CharacterClassFlags;
+
+ RegExpCharacterClass(
+ Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
+ CharacterClassFlags character_class_flags = CharacterClassFlags())
+ : set_(ranges),
+ flags_(flags),
+ character_class_flags_(character_class_flags) {
+ // Convert the empty set of ranges to the negated Everything() range.
+ if (ranges->is_empty()) {
+ ranges->Add(CharacterRange::Everything(), zone);
+ character_class_flags_ ^= NEGATED;
+ }
+ }
+ RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
+ : set_(type),
+ flags_(flags),
+ character_class_flags_(CharacterClassFlags()) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpCharacterClass* AsCharacterClass() override;
@@ -317,19 +347,25 @@
// D : non-ASCII digit
// . : non-newline
// * : All characters, for advancing unanchored regexp
- uc16 standard_type() { return set_.standard_set_type(); }
+ uc16 standard_type() const { return set_.standard_set_type(); }
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
- bool is_negated() { return is_negated_; }
+ bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
+ JSRegExp::Flags flags() const { return flags_; }
+ bool contains_split_surrogate() const {
+ return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
+ }
private:
CharacterSet set_;
- bool is_negated_;
+ const JSRegExp::Flags flags_;
+ CharacterClassFlags character_class_flags_;
};
class RegExpAtom final : public RegExpTree {
public:
- explicit RegExpAtom(Vector<const uc16> data) : data_(data) {}
+ explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
+ : data_(data), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAtom* AsAtom() override;
@@ -340,9 +376,12 @@
void AppendToText(RegExpText* text, Zone* zone) override;
Vector<const uc16> data() { return data_; }
int length() { return data_.length(); }
+ JSRegExp::Flags flags() const { return flags_; }
+ bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
private:
Vector<const uc16> data_;
+ const JSRegExp::Flags flags_;
};
@@ -414,7 +453,7 @@
class RegExpCapture final : public RegExpTree {
public:
explicit RegExpCapture(int index)
- : body_(NULL), index_(index), name_(nullptr) {}
+ : body_(nullptr), index_(index), name_(nullptr) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
static RegExpNode* ToNode(RegExpTree* body, int index,
@@ -514,9 +553,10 @@
class RegExpBackReference final : public RegExpTree {
public:
- RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
- explicit RegExpBackReference(RegExpCapture* capture)
- : capture_(capture), name_(nullptr) {}
+ explicit RegExpBackReference(JSRegExp::Flags flags)
+ : capture_(nullptr), name_(nullptr), flags_(flags) {}
+ RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
+ : capture_(capture), name_(nullptr), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpBackReference* AsBackReference() override;
@@ -534,6 +574,7 @@
private:
RegExpCapture* capture_;
const ZoneVector<uc16>* name_;
+ const JSRegExp::Flags flags_;
};
diff --git a/src/regexp/regexp-macro-assembler-irregexp.cc b/src/regexp/regexp-macro-assembler-irregexp.cc
index 3316c33..eb8d474 100644
--- a/src/regexp/regexp-macro-assembler-irregexp.cc
+++ b/src/regexp/regexp-macro-assembler-irregexp.cc
@@ -54,7 +54,7 @@
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
- if (l == NULL) l = &backtrack_;
+ if (l == nullptr) l = &backtrack_;
if (l->is_bound()) {
Emit32(l->pos());
} else {
@@ -69,8 +69,8 @@
void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_POP_REGISTER, register_index);
}
@@ -78,16 +78,16 @@
void RegExpMacroAssemblerIrregexp::PushRegister(
int register_index,
StackCheckFlag check_stack_limit) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_PUSH_REGISTER, register_index);
}
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index, int cp_offset) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_SET_REGISTER_TO_CP, register_index);
Emit32(cp_offset); // Current position offset.
}
@@ -103,24 +103,24 @@
void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
int register_index) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_SET_CP_TO_REGISTER, register_index);
}
void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
int register_index) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_SET_REGISTER_TO_SP, register_index);
}
void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
int register_index) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_SET_SP_TO_REGISTER, register_index);
}
@@ -132,16 +132,16 @@
void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_SET_REGISTER, register_index);
Emit32(to);
}
void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_ADVANCE_REGISTER, register_index);
Emit32(by);
}
@@ -195,8 +195,8 @@
void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
- DCHECK(by >= kMinCPOffset);
- DCHECK(by <= kMaxCPOffset);
+ DCHECK_LE(kMinCPOffset, by);
+ DCHECK_GE(kMaxCPOffset, by);
advance_current_start_ = pc_;
advance_current_offset_ = by;
Emit(BC_ADVANCE_CP, by);
@@ -215,8 +215,8 @@
Label* on_failure,
bool check_bounds,
int characters) {
- DCHECK(cp_offset >= kMinCPOffset);
- DCHECK(cp_offset <= kMaxCPOffset);
+ DCHECK_LE(kMinCPOffset, cp_offset);
+ DCHECK_GE(kMaxCPOffset, cp_offset);
int bytecode;
if (check_bounds) {
if (characters == 4) {
@@ -224,7 +224,7 @@
} else if (characters == 2) {
bytecode = BC_LOAD_2_CURRENT_CHARS;
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
bytecode = BC_LOAD_CURRENT_CHAR;
}
} else {
@@ -233,7 +233,7 @@
} else if (characters == 2) {
bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
}
}
@@ -373,8 +373,8 @@
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
bool read_backward,
Label* on_not_equal) {
- DCHECK(start_reg >= 0);
- DCHECK(start_reg <= kMaxRegister);
+ DCHECK_LE(0, start_reg);
+ DCHECK_GE(kMaxRegister, start_reg);
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
start_reg);
EmitOrLink(on_not_equal);
@@ -383,8 +383,8 @@
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
- DCHECK(start_reg >= 0);
- DCHECK(start_reg <= kMaxRegister);
+ DCHECK_LE(0, start_reg);
+ DCHECK_GE(kMaxRegister, start_reg);
Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
: (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
@@ -397,8 +397,8 @@
void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
int comparand,
Label* on_less_than) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_CHECK_REGISTER_LT, register_index);
Emit32(comparand);
EmitOrLink(on_less_than);
@@ -408,8 +408,8 @@
void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
int comparand,
Label* on_greater_or_equal) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_CHECK_REGISTER_GE, register_index);
Emit32(comparand);
EmitOrLink(on_greater_or_equal);
@@ -418,8 +418,8 @@
void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
Label* on_eq) {
- DCHECK(register_index >= 0);
- DCHECK(register_index <= kMaxRegister);
+ DCHECK_LE(0, register_index);
+ DCHECK_GE(kMaxRegister, register_index);
Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
EmitOrLink(on_eq);
}
@@ -439,8 +439,7 @@
return pc_;
}
-
-void RegExpMacroAssemblerIrregexp::Copy(Address a) {
+void RegExpMacroAssemblerIrregexp::Copy(byte* a) {
MemCopy(a, buffer_.start(), length());
}
diff --git a/src/regexp/regexp-macro-assembler-irregexp.h b/src/regexp/regexp-macro-assembler-irregexp.h
index dad2e9a..74c8526 100644
--- a/src/regexp/regexp-macro-assembler-irregexp.h
+++ b/src/regexp/regexp-macro-assembler-irregexp.h
@@ -20,14 +20,15 @@
// relocation information starting from the end of the buffer. See CodeDesc
// for a detailed comment on the layout (globals.h).
//
- // If the provided buffer is NULL, the assembler allocates and grows its own
- // buffer, and buffer_size determines the initial buffer size. The buffer is
- // owned by the assembler and deallocated upon destruction of the assembler.
+ // If the provided buffer is nullptr, the assembler allocates and grows its
+ // own buffer, and buffer_size determines the initial buffer size. The buffer
+ // is owned by the assembler and deallocated upon destruction of the
+ // assembler.
//
- // If the provided buffer is not NULL, the assembler uses the provided buffer
- // for code generation and assumes its size to be buffer_size. If the buffer
- // is too small, a fatal error occurs. No deallocation of the buffer is done
- // upon destruction of the assembler.
+ // If the provided buffer is not nullptr, the assembler uses the provided
+ // buffer for code generation and assumes its size to be buffer_size. If the
+ // buffer is too small, a fatal error occurs. No deallocation of the buffer is
+ // done upon destruction of the assembler.
RegExpMacroAssemblerIrregexp(Isolate* isolate, Vector<byte> buffer,
Zone* zone);
virtual ~RegExpMacroAssemblerIrregexp();
@@ -104,7 +105,7 @@
inline void Emit(uint32_t bc, uint32_t arg);
// Bytecode buffer.
int length();
- void Copy(Address a);
+ void Copy(byte* a);
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc
index 2e3a8a2..77e8847 100644
--- a/src/regexp/regexp-macro-assembler.cc
+++ b/src/regexp/regexp-macro-assembler.cc
@@ -8,10 +8,11 @@
#include "src/isolate-inl.h"
#include "src/regexp/regexp-stack.h"
#include "src/simulator.h"
+#include "src/unicode-inl.h"
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
#include "unicode/uchar.h"
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
namespace v8 {
namespace internal {
@@ -36,12 +37,12 @@
// This function is not allowed to cause a garbage collection.
// A GC might move the calling generated code and invalidate the
// return address on the stack.
- DCHECK(byte_length % 2 == 0);
+ DCHECK_EQ(0, byte_length % 2);
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
size_t length = byte_length >> 1;
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (isolate == nullptr) {
for (size_t i = 0; i < length; i++) {
uc32 c1 = substring1[i];
@@ -67,7 +68,7 @@
}
return 1;
}
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
DCHECK_NOT_NULL(isolate);
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
@@ -137,8 +138,8 @@
if (subject->IsThinString()) {
subject = ThinString::cast(subject)->actual();
}
- DCHECK(start_index >= 0);
- DCHECK(start_index <= subject->length());
+ DCHECK_LE(0, start_index);
+ DCHECK_LE(start_index, subject->length());
if (subject->IsSeqOneByteString()) {
return reinterpret_cast<const byte*>(
SeqOneByteString::cast(subject)->GetChars() + start_index);
@@ -160,25 +161,28 @@
Isolate* isolate, int start_index, bool is_direct_call,
Address* return_address, Code* re_code, String** subject,
const byte** input_start, const byte** input_end) {
- DCHECK(re_code->instruction_start() <= *return_address);
- DCHECK(*return_address <= re_code->instruction_end());
+ DCHECK(re_code->raw_instruction_start() <= *return_address);
+ DCHECK(*return_address <= re_code->raw_instruction_end());
int return_value = 0;
// Prepare for possible GC.
HandleScope handles(isolate);
- Handle<Code> code_handle(re_code);
- Handle<String> subject_handle(*subject);
+ Handle<Code> code_handle(re_code, isolate);
+ Handle<String> subject_handle(*subject, isolate);
bool is_one_byte = subject_handle->IsOneByteRepresentationUnderneath();
StackLimitCheck check(isolate);
- if (check.JsHasOverflowed()) {
+ bool js_has_overflowed = check.JsHasOverflowed();
+
+ if (is_direct_call) {
+ // Direct calls from JavaScript can be interrupted in two ways:
+ // 1. A real stack overflow, in which case we let the caller throw the
+ // exception.
+ // 2. The stack guard was used to interrupt execution for another purpose,
+ // forcing the call through the runtime system.
+ return_value = js_has_overflowed ? EXCEPTION : RETRY;
+ } else if (js_has_overflowed) {
isolate->StackOverflow();
return_value = EXCEPTION;
- } else if (is_direct_call) {
- // If not real stack overflow the stack guard was used to interrupt
- // execution for another purpose. If this is a direct call from JavaScript
- // retry the RegExp forcing the call through the runtime system.
- // Currently the direct call cannot handle a GC.
- return_value = RETRY;
} else {
Object* result = isolate->stack_guard()->HandleInterrupts();
if (result->IsException(isolate)) return_value = EXCEPTION;
@@ -220,8 +224,8 @@
Isolate* isolate) {
DCHECK(subject->IsFlat());
- DCHECK(previous_index >= 0);
- DCHECK(previous_index <= subject->length());
+ DCHECK_LE(0, previous_index);
+ DCHECK_LE(previous_index, subject->length());
// No allocations before calling the regexp, but we can't use
// DisallowHeapAllocation, since regexps might be preempted, and another
@@ -282,9 +286,15 @@
Address stack_base = stack_scope.stack()->stack_base();
int direct_call = 0;
- int result = CALL_GENERATED_REGEXP_CODE(
- isolate, code->entry(), input, start_offset, input_start, input_end,
- output, output_size, stack_base, direct_call, isolate);
+
+ using RegexpMatcherSig = int(
+ String * input, int start_offset, // NOLINT(readability/casting)
+ const byte* input_start, const byte* input_end, int* output,
+ int output_size, Address stack_base, int direct_call, Isolate* isolate);
+
+ auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
+ int result = fn.Call(input, start_offset, input_start, input_end, output,
+ output_size, stack_base, direct_call, isolate);
DCHECK(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
@@ -295,7 +305,7 @@
return static_cast<Result>(result);
}
-
+// clang-format off
const byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
@@ -304,18 +314,18 @@
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
- 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // '0' - '7'
+ 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
- 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
- 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
+ 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'A' - 'G'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'H' - 'O'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'P' - 'W'
+ 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu, // 'X' - 'Z', '_'
- 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
- 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
- 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
+ 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'a' - 'g'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'h' - 'o'
+ 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'p' - 'w'
+ 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
// Latin-1 range
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
@@ -337,7 +347,7 @@
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
};
-
+// clang-format on
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
Address* stack_base,
@@ -349,8 +359,8 @@
DCHECK(stack_pointer <= old_stack_base);
DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
- if (new_stack_base == NULL) {
- return NULL;
+ if (new_stack_base == kNullAddress) {
+ return kNullAddress;
}
*stack_base = new_stack_base;
intptr_t stack_content_size = old_stack_base - stack_pointer;
diff --git a/src/regexp/regexp-macro-assembler.h b/src/regexp/regexp-macro-assembler.h
index 76efdf9..65da431 100644
--- a/src/regexp/regexp-macro-assembler.h
+++ b/src/regexp/regexp-macro-assembler.h
@@ -88,8 +88,8 @@
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
- // matches. If the label is NULL then we should pop a backtrack address off
- // the stack and go to that.
+ // matches. If the label is nullptr then we should pop a backtrack address
+ // off the stack and go to that.
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
virtual void CheckNotCharacterAfterAnd(unsigned c,
unsigned and_with,
@@ -123,10 +123,10 @@
virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it
- // is. Backtracks instead if the label is NULL.
+ // is. Backtracks instead if the label is nullptr.
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
// Check whether a register is < a given constant and go to a label if it is.
- // Backtracks instead if the label is NULL.
+ // Backtracks instead if the label is nullptr.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
// Check whether a register is == to the current position and go to a
// label if it is.
@@ -244,7 +244,7 @@
static const byte word_character_map[256];
static Address word_character_map_address() {
- return const_cast<Address>(&word_character_map[0]);
+ return reinterpret_cast<Address>(&word_character_map[0]);
}
static Result Execute(Code* code,
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index 3621f7d..c1d2c7d 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -4,17 +4,22 @@
#include "src/regexp/regexp-parser.h"
+#include <vector>
+
#include "src/char-predicates-inl.h"
-#include "src/factory.h"
+#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
#include "src/ostreams.h"
#include "src/regexp/jsregexp.h"
#include "src/utils.h"
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
#include "unicode/uniset.h"
-#endif // V8_I18N_SUPPORT
+// TODO(mathias): Remove this when we no longer need to check
+// `U_ICU_VERSION_MAJOR_NUM`.
+#include "unicode/uvernum.h"
+#endif // V8_INTL_SUPPORT
namespace v8 {
namespace internal {
@@ -24,14 +29,12 @@
: isolate_(isolate),
zone_(zone),
error_(error),
- captures_(NULL),
- named_captures_(NULL),
- named_back_references_(NULL),
+ captures_(nullptr),
+ named_captures_(nullptr),
+ named_back_references_(nullptr),
in_(in),
current_(kEndMarker),
- ignore_case_(flags & JSRegExp::kIgnoreCase),
- multiline_(flags & JSRegExp::kMultiline),
- unicode_(flags & JSRegExp::kUnicode),
+ top_level_flags_(flags),
next_pos_(0),
captures_started_(0),
capture_count_(0),
@@ -39,6 +42,7 @@
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false),
+ has_named_captures_(false),
failed_(false) {
Advance();
}
@@ -70,12 +74,13 @@
}
}
-
void RegExpParser::Advance() {
if (has_next()) {
StackLimitCheck check(isolate());
if (check.HasOverflowed()) {
- if (FLAG_abort_on_stack_overflow) FATAL("Aborting on stack overflow");
+ if (FLAG_abort_on_stack_or_string_length_overflow) {
+ FATAL("Aborting on stack overflow");
+ }
ReportError(CStrVector(
MessageTemplate::TemplateString(MessageTemplate::kStackOverflow)));
} else if (zone()->excess_allocation()) {
@@ -99,7 +104,6 @@
Advance();
}
-
void RegExpParser::Advance(int dist) {
next_pos_ += dist - 1;
Advance();
@@ -134,21 +138,22 @@
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
- if (failed_) return NULL; // Do not overwrite any existing error.
+ if (failed_) return nullptr; // Do not overwrite any existing error.
failed_ = true;
- *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
+ *error_ = isolate()
+ ->factory()
+ ->NewStringFromOneByte(Vector<const uint8_t>::cast(message))
+ .ToHandleChecked();
// Zip to the end to make sure the no more input is read.
current_ = kEndMarker;
next_pos_ = in()->length();
- return NULL;
+ return nullptr;
}
-
-#define CHECK_FAILED /**/); \
- if (failed_) return NULL; \
+#define CHECK_FAILED /**/); \
+ if (failed_) return nullptr; \
((void)0
-
// Pattern ::
// Disjunction
RegExpTree* RegExpParser::ParsePattern() {
@@ -176,8 +181,8 @@
// Atom Quantifier
RegExpTree* RegExpParser::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
- nullptr, ignore_case(), unicode(), zone());
+ RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
+ 0, nullptr, top_level_flags_, zone());
RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
@@ -232,7 +237,7 @@
builder = state->builder();
builder->AddAtom(body);
- // For compatability with JSC and ES3, we allow quantifiers after
+ // For compatibility with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
break;
}
@@ -247,12 +252,12 @@
return ReportError(CStrVector("Nothing to repeat"));
case '^': {
Advance();
- if (multiline()) {
- builder->AddAssertion(
- new (zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));
+ if (builder->multiline()) {
+ builder->AddAssertion(new (zone()) RegExpAssertion(
+ RegExpAssertion::START_OF_LINE, builder->flags()));
} else {
- builder->AddAssertion(
- new (zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT));
+ builder->AddAssertion(new (zone()) RegExpAssertion(
+ RegExpAssertion::START_OF_INPUT, builder->flags()));
set_contains_anchor();
}
continue;
@@ -260,89 +265,37 @@
case '$': {
Advance();
RegExpAssertion::AssertionType assertion_type =
- multiline() ? RegExpAssertion::END_OF_LINE
- : RegExpAssertion::END_OF_INPUT;
- builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));
+ builder->multiline() ? RegExpAssertion::END_OF_LINE
+ : RegExpAssertion::END_OF_INPUT;
+ builder->AddAssertion(
+ new (zone()) RegExpAssertion(assertion_type, builder->flags()));
continue;
}
case '.': {
Advance();
- // everything except \x0a, \x0d, \u2028 and \u2029
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
- CharacterRange::AddClassEscape('.', ranges, zone());
+
+ if (builder->dotall()) {
+ // Everything.
+ CharacterRange::AddClassEscape('*', ranges, false, zone());
+ } else {
+ // Everything except \x0A, \x0D, \u2028 and \u2029
+ CharacterRange::AddClassEscape('.', ranges, false, zone());
+ }
+
RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, false);
+ new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
case '(': {
- SubexpressionType subexpr_type = CAPTURE;
- RegExpLookaround::Type lookaround_type = state->lookaround_type();
- bool is_named_capture = false;
- Advance();
- if (current() == '?') {
- switch (Next()) {
- case ':':
- subexpr_type = GROUPING;
- Advance(2);
- break;
- case '=':
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = POSITIVE_LOOKAROUND;
- Advance(2);
- break;
- case '!':
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = NEGATIVE_LOOKAROUND;
- Advance(2);
- break;
- case '<':
- Advance();
- if (FLAG_harmony_regexp_lookbehind) {
- if (Next() == '=') {
- subexpr_type = POSITIVE_LOOKAROUND;
- lookaround_type = RegExpLookaround::LOOKBEHIND;
- Advance(2);
- break;
- } else if (Next() == '!') {
- subexpr_type = NEGATIVE_LOOKAROUND;
- lookaround_type = RegExpLookaround::LOOKBEHIND;
- Advance(2);
- break;
- }
- }
- if (FLAG_harmony_regexp_named_captures && unicode()) {
- is_named_capture = true;
- Advance();
- break;
- }
- // Fall through.
- default:
- return ReportError(CStrVector("Invalid group"));
- }
- }
-
- const ZoneVector<uc16>* capture_name = nullptr;
- if (subexpr_type == CAPTURE) {
- if (captures_started_ >= kMaxCaptures) {
- return ReportError(CStrVector("Too many captures"));
- }
- captures_started_++;
-
- if (is_named_capture) {
- capture_name = ParseCaptureGroupName(CHECK_FAILED);
- }
- }
- // Store current state and begin new disjunction parsing.
- state = new (zone()) RegExpParserState(
- state, subexpr_type, lookaround_type, captures_started_,
- capture_name, ignore_case(), unicode(), zone());
+ state = ParseOpenParenthesis(state CHECK_FAILED);
builder = state->builder();
continue;
}
case '[': {
- RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);
+ RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED);
builder->AddCharacterClass(cc->AsCharacterClass());
break;
}
@@ -354,13 +307,13 @@
return ReportError(CStrVector("\\ at end of pattern"));
case 'b':
Advance(2);
- builder->AddAssertion(
- new (zone()) RegExpAssertion(RegExpAssertion::BOUNDARY));
+ builder->AddAssertion(new (zone()) RegExpAssertion(
+ RegExpAssertion::BOUNDARY, builder->flags()));
continue;
case 'B':
Advance(2);
- builder->AddAssertion(
- new (zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
+ builder->AddAssertion(new (zone()) RegExpAssertion(
+ RegExpAssertion::NON_BOUNDARY, builder->flags()));
continue;
// AtomEscape ::
// CharacterClassEscape
@@ -377,9 +330,10 @@
Advance(2);
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
- CharacterRange::AddClassEscape(c, ranges, zone());
- RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, false);
+ CharacterRange::AddClassEscape(
+ c, ranges, unicode() && builder->ignore_case(), zone());
+ RegExpCharacterClass* cc = new (zone())
+ RegExpCharacterClass(zone(), ranges, builder->flags());
builder->AddCharacterClass(cc);
break;
}
@@ -388,20 +342,15 @@
uc32 p = Next();
Advance(2);
if (unicode()) {
- if (FLAG_harmony_regexp_property) {
- ZoneList<CharacterRange>* ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- if (!ParsePropertyClass(ranges, p == 'P')) {
- return ReportError(CStrVector("Invalid property name"));
- }
- RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(ranges, false);
- builder->AddCharacterClass(cc);
- } else {
- // With /u, no identity escapes except for syntax characters
- // are allowed. Otherwise, all identity escapes are allowed.
- return ReportError(CStrVector("Invalid escape"));
+ ZoneList<CharacterRange>* ranges =
+ new (zone()) ZoneList<CharacterRange>(2, zone());
+ if (!ParsePropertyClass(ranges, p == 'P')) {
+ return ReportError(CStrVector("Invalid property name"));
}
+ RegExpCharacterClass* cc = new (zone())
+ RegExpCharacterClass(zone(), ranges, builder->flags());
+ builder->AddCharacterClass(cc);
+
} else {
builder->AddCharacter(p);
}
@@ -428,7 +377,8 @@
builder->AddEmpty();
} else {
RegExpCapture* capture = GetCapture(index);
- RegExpTree* atom = new (zone()) RegExpBackReference(capture);
+ RegExpTree* atom =
+ new (zone()) RegExpBackReference(capture, builder->flags());
builder->AddAtom(atom);
}
break;
@@ -444,8 +394,8 @@
Advance(2);
break;
}
+ V8_FALLTHROUGH;
}
- // Fall through.
case '0': {
Advance();
if (unicode() && Next() >= '0' && Next() <= '9') {
@@ -486,9 +436,9 @@
uc32 letter = controlLetter & ~('a' ^ 'A');
if (letter < 'A' || 'Z' < letter) {
// controlLetter is not in range 'A'-'Z' or 'a'-'z'.
- // This is outside the specification. We match JSC in
- // reading the backslash as a literal character instead
- // of as starting an escape.
+ // Read the backslash as a literal character instead of as
+ // starting an escape.
+ // ES#prod-annexB-ExtendedPatternCharacter
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
return ReportError(CStrVector("Invalid unicode escape"));
@@ -496,7 +446,7 @@
builder->AddCharacter('\\');
} else {
Advance(2);
- builder->AddCharacter(controlLetter & 0x1f);
+ builder->AddCharacter(controlLetter & 0x1F);
}
break;
}
@@ -522,17 +472,22 @@
builder->AddCharacter('u');
} else {
// With /u, invalid escapes are not treated as identity escapes.
- return ReportError(CStrVector("Invalid unicode escape"));
+ return ReportError(CStrVector("Invalid Unicode escape"));
}
break;
}
case 'k':
- if (FLAG_harmony_regexp_named_captures && unicode()) {
+ // Either an identity escape or a named back-reference. The two
+ // interpretations are mutually exclusive: '\k' is interpreted as
+ // an identity escape for non-Unicode patterns without named
+ // capture groups, and as the beginning of a named back-reference
+ // in all other cases.
+ if (unicode() || HasNamedCaptures()) {
Advance(2);
ParseNamedBackReference(builder, state CHECK_FAILED);
break;
}
- // Fall through.
+ V8_FALLTHROUGH;
default:
Advance();
// With /u, no identity escapes except for syntax characters
@@ -550,14 +505,14 @@
int dummy;
bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
if (parsed) return ReportError(CStrVector("Nothing to repeat"));
- // Fall through.
+ V8_FALLTHROUGH;
}
case '}':
case ']':
if (unicode()) {
return ReportError(CStrVector("Lone quantifier brackets"));
}
- // Fall through.
+ V8_FALLTHROUGH;
default:
builder->AddUnicodeCharacter(current());
Advance();
@@ -617,6 +572,132 @@
}
}
+RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
+ RegExpParserState* state) {
+ RegExpLookaround::Type lookaround_type = state->lookaround_type();
+ bool is_named_capture = false;
+ JSRegExp::Flags switch_on = JSRegExp::kNone;
+ JSRegExp::Flags switch_off = JSRegExp::kNone;
+ const ZoneVector<uc16>* capture_name = nullptr;
+ SubexpressionType subexpr_type = CAPTURE;
+ Advance();
+ if (current() == '?') {
+ switch (Next()) {
+ case ':':
+ Advance(2);
+ subexpr_type = GROUPING;
+ break;
+ case '=':
+ Advance(2);
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ case '!':
+ Advance(2);
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ case '-':
+ case 'i':
+ case 's':
+ case 'm': {
+ if (!FLAG_regexp_mode_modifiers) {
+ ReportError(CStrVector("Invalid group"));
+ return nullptr;
+ }
+ Advance();
+ bool flags_sense = true; // Switching on flags.
+ while (subexpr_type != GROUPING) {
+ switch (current()) {
+ case '-':
+ if (!flags_sense) {
+ ReportError(CStrVector("Multiple dashes in flag group"));
+ return nullptr;
+ }
+ flags_sense = false;
+ Advance();
+ continue;
+ case 's':
+ case 'i':
+ case 'm': {
+ JSRegExp::Flags bit = JSRegExp::kUnicode;
+ if (current() == 'i') bit = JSRegExp::kIgnoreCase;
+ if (current() == 'm') bit = JSRegExp::kMultiline;
+ if (current() == 's') bit = JSRegExp::kDotAll;
+ if (((switch_on | switch_off) & bit) != 0) {
+ ReportError(CStrVector("Repeated flag in flag group"));
+ return nullptr;
+ }
+ if (flags_sense) {
+ switch_on |= bit;
+ } else {
+ switch_off |= bit;
+ }
+ Advance();
+ continue;
+ }
+ case ')': {
+ Advance();
+ state->builder()
+ ->FlushText(); // Flush pending text using old flags.
+ // These (?i)-style flag switches don't put us in a subexpression
+ // at all, they just modify the flags in the rest of the current
+ // subexpression.
+ JSRegExp::Flags flags =
+ (state->builder()->flags() | switch_on) & ~switch_off;
+ state->builder()->set_flags(flags);
+ return state;
+ }
+ case ':':
+ Advance();
+ subexpr_type = GROUPING; // Will break us out of the outer loop.
+ continue;
+ default:
+ ReportError(CStrVector("Invalid flag group"));
+ return nullptr;
+ }
+ }
+ break;
+ }
+ case '<':
+ Advance();
+ if (Next() == '=') {
+ Advance(2);
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ } else if (Next() == '!') {
+ Advance(2);
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ }
+ is_named_capture = true;
+ has_named_captures_ = true;
+ Advance();
+ break;
+ default:
+ ReportError(CStrVector("Invalid group"));
+ return nullptr;
+ }
+ }
+ if (subexpr_type == CAPTURE) {
+ if (captures_started_ >= kMaxCaptures) {
+ ReportError(CStrVector("Too many captures"));
+ return nullptr;
+ }
+ captures_started_++;
+
+ if (is_named_capture) {
+ capture_name = ParseCaptureGroupName(CHECK_FAILED);
+ }
+ }
+ JSRegExp::Flags flags = (state->builder()->flags() | switch_on) & ~switch_off;
+ // Store current state and begin new disjunction parsing.
+ return new (zone())
+ RegExpParserState(state, subexpr_type, lookaround_type, captures_started_,
+ capture_name, flags, zone());
+}
#ifdef DEBUG
// Currently only used in an DCHECK.
@@ -643,6 +724,8 @@
// noncapturing parentheses and can skip character classes and backslash-escaped
// characters.
void RegExpParser::ScanForCaptures() {
+ DCHECK(!is_scanned_for_captures_);
+ const int saved_position = position();
// Start with captures started previous to current position
int capture_count = captures_started();
// Add count of captures after this position.
@@ -666,12 +749,32 @@
break;
}
case '(':
- if (current() != '?') capture_count++;
+ if (current() == '?') {
+ // At this point we could be in
+ // * a non-capturing group '(:',
+ // * a lookbehind assertion '(?<=' '(?<!'
+ // * or a named capture '(?<'.
+ //
+ // Of these, only named captures are capturing groups.
+
+ Advance();
+ if (current() != '<') break;
+
+ Advance();
+ if (current() == '=' || current() == '!') break;
+
+ // Found a possible named capture. It could turn out to be a syntax
+ // error (e.g. an unterminated or invalid name), but that distinction
+ // does not matter for our purposes.
+ has_named_captures_ = true;
+ }
+ capture_count++;
break;
}
}
capture_count_ = capture_count;
is_scanned_for_captures_ = true;
+ Reset(saved_position);
}
@@ -697,11 +800,7 @@
}
}
if (value > captures_started()) {
- if (!is_scanned_for_captures_) {
- int saved_position = position();
- ScanForCaptures();
- Reset(saved_position);
- }
+ if (!is_scanned_for_captures_) ScanForCaptures();
if (value > capture_count_) {
Reset(start);
return false;
@@ -721,9 +820,6 @@
}
const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
- DCHECK(FLAG_harmony_regexp_named_captures);
- DCHECK(unicode());
-
ZoneVector<uc16>* name =
new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
@@ -741,6 +837,12 @@
}
}
+ // The backslash char is misclassified as both ID_Start and ID_Continue.
+ if (c == '\\') {
+ ReportError(CStrVector("Invalid capture group name"));
+ return nullptr;
+ }
+
if (at_start) {
if (!IdentifierStart::Is(c)) {
ReportError(CStrVector("Invalid capture group name"));
@@ -765,8 +867,6 @@
bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
int index) {
- DCHECK(FLAG_harmony_regexp_named_captures);
- DCHECK(unicode());
DCHECK(0 < index && index <= captures_started_);
DCHECK_NOT_NULL(name);
@@ -774,6 +874,7 @@
named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
} else {
// Check for duplicates and bail if we find any.
+ // TODO(jgruber): O(n^2).
for (const auto& named_capture : *named_captures_) {
if (*named_capture->name() == *name) {
ReportError(CStrVector("Duplicate capture group name"));
@@ -783,7 +884,7 @@
}
RegExpCapture* capture = GetCapture(index);
- DCHECK(capture->name() == nullptr);
+ DCHECK_NULL(capture->name());
capture->set_name(name);
named_captures_->Add(capture, zone());
@@ -808,7 +909,8 @@
if (state->IsInsideCaptureGroup(name)) {
builder->AddEmpty();
} else {
- RegExpBackReference* atom = new (zone()) RegExpBackReference();
+ RegExpBackReference* atom =
+ new (zone()) RegExpBackReference(builder->flags());
atom->set_name(name);
builder->AddAtom(atom);
@@ -860,7 +962,7 @@
int know_captures =
is_scanned_for_captures_ ? capture_count_ : captures_started_;
DCHECK(index <= know_captures);
- if (captures_ == NULL) {
+ if (captures_ == nullptr) {
captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
}
while (captures_->length() < know_captures) {
@@ -888,8 +990,18 @@
return array;
}
+bool RegExpParser::HasNamedCaptures() {
+ if (has_named_captures_ || is_scanned_for_captures_) {
+ return has_named_captures_;
+ }
+
+ ScanForCaptures();
+ DCHECK(is_scanned_for_captures_);
+ return has_named_captures_;
+}
+
bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
- for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {
if (s->group_type() != CAPTURE) continue;
// Return true if we found the matching capture index.
if (index == s->capture_index()) return true;
@@ -902,7 +1014,7 @@
bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(
const ZoneVector<uc16>* name) {
DCHECK_NOT_NULL(name);
- for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {
if (s->capture_name() == nullptr) continue;
if (*s->capture_name() == *name) return true;
}
@@ -980,6 +1092,7 @@
DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
// For compatibility with some other browsers (not all), we parse
// up to three octal digits with a value below 256.
+ // ES#prod-annexB-LegacyOctalEscapeSequence
uc32 value = current() - '0';
Advance();
if ('0' <= current() && current() <= '7') {
@@ -1019,7 +1132,7 @@
if (current() == '{' && unicode()) {
int start = position();
Advance();
- if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
+ if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {
if (current() == '}') {
Advance();
return true;
@@ -1049,17 +1162,18 @@
return result;
}
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
namespace {
bool IsExactPropertyAlias(const char* property_name, UProperty property) {
const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
- if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
+ if (short_name != nullptr && strcmp(property_name, short_name) == 0)
+ return true;
for (int i = 0;; i++) {
const char* long_name = u_getPropertyName(
property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == NULL) break;
+ if (long_name == nullptr) break;
if (strcmp(property_name, long_name) == 0) return true;
}
return false;
@@ -1069,14 +1183,14 @@
UProperty property, int32_t property_value) {
const char* short_name =
u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
- if (short_name != NULL && strcmp(property_value_name, short_name) == 0) {
+ if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) {
return true;
}
for (int i = 0;; i++) {
const char* long_name = u_getPropertyValueName(
property, property_value,
static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == NULL) break;
+ if (long_name == nullptr) break;
if (strcmp(property_value_name, long_name) == 0) return true;
}
return false;
@@ -1128,10 +1242,15 @@
ZoneList<CharacterRange>* result,
bool negate, Zone* zone) {
if (NameEquals(name, "Any")) {
- if (!negate) result->Add(CharacterRange::Everything(), zone);
+ if (negate) {
+ // Leave the list of character ranges empty, since the negation of 'Any'
+ // is the empty set.
+ } else {
+ result->Add(CharacterRange::Everything(), zone);
+ }
} else if (NameEquals(name, "ASCII")) {
result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
- : CharacterRange::Range(0x0, 0x7f),
+ : CharacterRange::Range(0x0, 0x7F),
zone);
} else if (NameEquals(name, "Assigned")) {
return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
@@ -1142,6 +1261,89 @@
return true;
}
+// Explicitly whitelist supported binary properties. The spec forbids supporting
+// properties outside of this set to ensure interoperability.
+bool IsSupportedBinaryProperty(UProperty property) {
+ switch (property) {
+ case UCHAR_ALPHABETIC:
+ // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
+ // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
+ case UCHAR_ASCII_HEX_DIGIT:
+ // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
+ case UCHAR_BIDI_CONTROL:
+ case UCHAR_BIDI_MIRRORED:
+ case UCHAR_CASE_IGNORABLE:
+ case UCHAR_CASED:
+ case UCHAR_CHANGES_WHEN_CASEFOLDED:
+ case UCHAR_CHANGES_WHEN_CASEMAPPED:
+ case UCHAR_CHANGES_WHEN_LOWERCASED:
+ case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
+ case UCHAR_CHANGES_WHEN_TITLECASED:
+ case UCHAR_CHANGES_WHEN_UPPERCASED:
+ case UCHAR_DASH:
+ case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
+ case UCHAR_DEPRECATED:
+ case UCHAR_DIACRITIC:
+ case UCHAR_EMOJI:
+#if U_ICU_VERSION_MAJOR_NUM >= 60
+ case UCHAR_EMOJI_COMPONENT:
+#endif
+ case UCHAR_EMOJI_MODIFIER_BASE:
+ case UCHAR_EMOJI_MODIFIER:
+ case UCHAR_EMOJI_PRESENTATION:
+#if U_ICU_VERSION_MAJOR_NUM >= 62
+ case UCHAR_EXTENDED_PICTOGRAPHIC:
+#endif
+ case UCHAR_EXTENDER:
+ case UCHAR_GRAPHEME_BASE:
+ case UCHAR_GRAPHEME_EXTEND:
+ case UCHAR_HEX_DIGIT:
+ case UCHAR_ID_CONTINUE:
+ case UCHAR_ID_START:
+ case UCHAR_IDEOGRAPHIC:
+ case UCHAR_IDS_BINARY_OPERATOR:
+ case UCHAR_IDS_TRINARY_OPERATOR:
+ case UCHAR_JOIN_CONTROL:
+ case UCHAR_LOGICAL_ORDER_EXCEPTION:
+ case UCHAR_LOWERCASE:
+ case UCHAR_MATH:
+ case UCHAR_NONCHARACTER_CODE_POINT:
+ case UCHAR_PATTERN_SYNTAX:
+ case UCHAR_PATTERN_WHITE_SPACE:
+ case UCHAR_QUOTATION_MARK:
+ case UCHAR_RADICAL:
+#if U_ICU_VERSION_MAJOR_NUM >= 60
+ case UCHAR_REGIONAL_INDICATOR:
+#endif
+ case UCHAR_S_TERM:
+ case UCHAR_SOFT_DOTTED:
+ case UCHAR_TERMINAL_PUNCTUATION:
+ case UCHAR_UNIFIED_IDEOGRAPH:
+ case UCHAR_UPPERCASE:
+ case UCHAR_VARIATION_SELECTOR:
+ case UCHAR_WHITE_SPACE:
+ case UCHAR_XID_CONTINUE:
+ case UCHAR_XID_START:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool IsUnicodePropertyValueCharacter(char c) {
+ // https://tc39.github.io/proposal-regexp-unicode-property-escapes/
+ //
+ // Note that using this to validate each parsed char is quite conservative.
+ // A possible alternative solution would be to only ensure the parsed
+ // property name/value candidate string does not contain '\0' characters and
+ // let ICU lookups trigger the final failure.
+ if ('a' <= c && c <= 'z') return true;
+ if ('A' <= c && c <= 'Z') return true;
+ if ('0' <= c && c <= '9') return true;
+ return (c == '_');
+}
+
} // anonymous namespace
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
@@ -1154,30 +1356,36 @@
// and 'value' is interpreted as one of the available property value names.
// - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
// - Loose matching is not applied.
- List<char> first_part;
- List<char> second_part;
+ std::vector<char> first_part;
+ std::vector<char> second_part;
if (current() == '{') {
// Parse \p{[PropertyName=]PropertyNameValue}
for (Advance(); current() != '}' && current() != '='; Advance()) {
+ if (!IsUnicodePropertyValueCharacter(current())) return false;
if (!has_next()) return false;
- first_part.Add(static_cast<char>(current()));
+ first_part.push_back(static_cast<char>(current()));
}
if (current() == '=') {
for (Advance(); current() != '}'; Advance()) {
+ if (!IsUnicodePropertyValueCharacter(current())) return false;
if (!has_next()) return false;
- second_part.Add(static_cast<char>(current()));
+ second_part.push_back(static_cast<char>(current()));
}
- second_part.Add(0); // null-terminate string.
+ second_part.push_back(0); // null-terminate string.
}
} else {
return false;
}
Advance();
- first_part.Add(0); // null-terminate string.
+ first_part.push_back(0); // null-terminate string.
- if (second_part.is_empty()) {
+ DCHECK(first_part.size() - 1 == std::strlen(first_part.data()));
+ DCHECK(second_part.empty() ||
+ second_part.size() - 1 == std::strlen(second_part.data()));
+
+ if (second_part.empty()) {
// First attempt to interpret as general category property value name.
- const char* name = first_part.ToConstVector().start();
+ const char* name = first_part.data();
if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
result, zone())) {
return true;
@@ -1188,16 +1396,15 @@
}
// Then attempt to interpret as binary property name with value name 'Y'.
UProperty property = u_getPropertyEnum(name);
- if (property < UCHAR_BINARY_START) return false;
- if (property >= UCHAR_BINARY_LIMIT) return false;
+ if (!IsSupportedBinaryProperty(property)) return false;
if (!IsExactPropertyAlias(name, property)) return false;
return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
zone());
} else {
// Both property name and value name are specified. Attempt to interpret
// the property name as enumerated property.
- const char* property_name = first_part.ToConstVector().start();
- const char* value_name = second_part.ToConstVector().start();
+ const char* property_name = first_part.data();
+ const char* value_name = second_part.data();
UProperty property = u_getPropertyEnum(property_name);
if (!IsExactPropertyAlias(property_name, property)) return false;
if (property == UCHAR_GENERAL_CATEGORY) {
@@ -1212,14 +1419,14 @@
}
}
-#else // V8_I18N_SUPPORT
+#else // V8_INTL_SUPPORT
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
bool negate) {
return false;
}
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
uc32 x = 0;
@@ -1241,7 +1448,7 @@
uc32 RegExpParser::ParseClassCharacterEscape() {
- DCHECK(current() == '\\');
+ DCHECK_EQ('\\', current());
DCHECK(has_next() && !IsSpecialClassEscape(Next()));
Advance();
switch (current()) {
@@ -1268,13 +1475,14 @@
case 'c': {
uc32 controlLetter = Next();
uc32 letter = controlLetter & ~('A' ^ 'a');
- // For compatibility with JSC, inside a character class. We also accept
- // digits and underscore as control characters, unless with /u.
+ // Inside a character class, we also accept digits and underscore as
+ // control characters, unless with /u. See Annex B:
+ // ES#prod-annexB-ClassControlLetter
if (letter >= 'A' && letter <= 'Z') {
Advance(2);
// Control letters mapped to ASCII control characters in the range
- // 0x00-0x1f.
- return controlLetter & 0x1f;
+ // 0x00-0x1F.
+ return controlLetter & 0x1F;
}
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
@@ -1284,10 +1492,11 @@
if ((controlLetter >= '0' && controlLetter <= '9') ||
controlLetter == '_') {
Advance(2);
- return controlLetter & 0x1f;
+ return controlLetter & 0x1F;
}
// We match JSC in reading the backslash as a literal
// character instead of as starting an escape.
+ // TODO(v8:6201): Not yet covered by the spec.
return '\\';
}
case '0':
@@ -1296,7 +1505,7 @@
Advance();
return 0;
}
- // Fall through.
+ V8_FALLTHROUGH;
case '1':
case '2':
case '3':
@@ -1307,6 +1516,7 @@
// For compatibility, we interpret a decimal escape that isn't
// a back reference (and therefore either \0 or not valid according
// to the specification) as a 1..3 digit octal character code.
+ // ES#prod-annexB-LegacyOctalEscapeSequence
if (unicode()) {
// With /u, decimal escape is not interpreted as octal character code.
ReportError(CStrVector("Invalid class escape"));
@@ -1354,11 +1564,12 @@
return 0;
}
-
-CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
- DCHECK_EQ(0, *char_class);
- uc32 first = current();
- if (first == '\\') {
+void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
+ Zone* zone,
+ bool add_unicode_case_equivalents,
+ uc32* char_out, bool* is_class_escape) {
+ uc32 current_char = current();
+ if (current_char == '\\') {
switch (Next()) {
case 'w':
case 'W':
@@ -1366,58 +1577,40 @@
case 'D':
case 's':
case 'S': {
- *char_class = Next();
+ CharacterRange::AddClassEscape(static_cast<char>(Next()), ranges,
+ add_unicode_case_equivalents, zone);
Advance(2);
- return CharacterRange::Singleton(0); // Return dummy value.
+ *is_class_escape = true;
+ return;
}
case kEndMarker:
- return ReportError(CStrVector("\\ at end of pattern"));
+ ReportError(CStrVector("\\ at end of pattern"));
+ return;
+ case 'p':
+ case 'P':
+ if (unicode()) {
+ bool negate = Next() == 'P';
+ Advance(2);
+ if (!ParsePropertyClass(ranges, negate)) {
+ ReportError(CStrVector("Invalid property name in character class"));
+ }
+ *is_class_escape = true;
+ return;
+ }
+ break;
default:
- first = ParseClassCharacterEscape(CHECK_FAILED);
+ break;
}
+ *char_out = ParseClassCharacterEscape();
+ *is_class_escape = false;
} else {
Advance();
- }
-
- return CharacterRange::Singleton(first);
-}
-
-static const uc16 kNoCharClass = 0;
-
-// Adds range or pre-defined character class to character ranges.
-// If char_class is not kInvalidClass, it's interpreted as a class
-// escape (i.e., 's' means whitespace, from '\s').
-static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
- uc16 char_class, CharacterRange range,
- Zone* zone) {
- if (char_class != kNoCharClass) {
- CharacterRange::AddClassEscape(char_class, ranges, zone);
- } else {
- ranges->Add(range, zone);
+ *char_out = current_char;
+ *is_class_escape = false;
}
}
-bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
- if (!FLAG_harmony_regexp_property) return false;
- if (!unicode()) return false;
- if (current() != '\\') return false;
- uc32 next = Next();
- bool parse_success = false;
- if (next == 'p') {
- Advance(2);
- parse_success = ParsePropertyClass(ranges, false);
- } else if (next == 'P') {
- Advance(2);
- parse_success = ParsePropertyClass(ranges, true);
- } else {
- return false;
- }
- if (!parse_success)
- ReportError(CStrVector("Invalid property name in character class"));
- return parse_success;
-}
-
-RegExpTree* RegExpParser::ParseCharacterClass() {
+RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeInvalid = "Invalid character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
@@ -1431,11 +1624,12 @@
}
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
+ bool add_unicode_case_equivalents = unicode() && builder->ignore_case();
while (has_more() && current() != ']') {
- bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);
- if (parsed_property) continue;
- uc16 char_class = kNoCharClass;
- CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
+ uc32 char_1, char_2;
+ bool is_class_1, is_class_2;
+ ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
+ &is_class_1 CHECK_FAILED);
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
@@ -1443,41 +1637,40 @@
// following code report an error.
break;
} else if (current() == ']') {
- AddRangeOrEscape(ranges, char_class, first, zone());
+ if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
ranges->Add(CharacterRange::Singleton('-'), zone());
break;
}
- uc16 char_class_2 = kNoCharClass;
- CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
- if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
+ ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
+ &is_class_2 CHECK_FAILED);
+ if (is_class_1 || is_class_2) {
// Either end is an escaped character class. Treat the '-' verbatim.
if (unicode()) {
// ES2015 21.2.2.15.1 step 1.
return ReportError(CStrVector(kRangeInvalid));
}
- AddRangeOrEscape(ranges, char_class, first, zone());
+ if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
ranges->Add(CharacterRange::Singleton('-'), zone());
- AddRangeOrEscape(ranges, char_class_2, next, zone());
+ if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
continue;
}
// ES2015 21.2.2.15.1 step 6.
- if (first.from() > next.to()) {
+ if (char_1 > char_2) {
return ReportError(CStrVector(kRangeOutOfOrder));
}
- ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
+ ranges->Add(CharacterRange::Range(char_1, char_2), zone());
} else {
- AddRangeOrEscape(ranges, char_class, first, zone());
+ if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
}
}
if (!has_more()) {
return ReportError(CStrVector(kUnterminated));
}
Advance();
- if (ranges->length() == 0) {
- ranges->Add(CharacterRange::Everything(), zone());
- is_negated = !is_negated;
- }
- return new (zone()) RegExpCharacterClass(ranges, is_negated);
+ RegExpCharacterClass::CharacterClassFlags character_class_flags;
+ if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
+ return new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags(),
+ character_class_flags);
}
@@ -1487,17 +1680,17 @@
bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
FlatStringReader* input, JSRegExp::Flags flags,
RegExpCompileData* result) {
- DCHECK(result != NULL);
+ DCHECK(result != nullptr);
RegExpParser parser(input, &result->error, flags, isolate, zone);
RegExpTree* tree = parser.ParsePattern();
if (parser.failed()) {
- DCHECK(tree == NULL);
+ DCHECK(tree == nullptr);
DCHECK(!result->error.is_null());
} else {
- DCHECK(tree != NULL);
+ DCHECK(tree != nullptr);
DCHECK(result->error.is_null());
if (FLAG_trace_regexp_parser) {
- OFStream os(stdout);
+ StdoutStream os;
tree->Print(os, zone);
os << "\n";
}
@@ -1511,12 +1704,11 @@
return !parser.failed();
}
-RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)
+RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
: zone_(zone),
pending_empty_(false),
- ignore_case_(ignore_case),
- unicode_(unicode),
- characters_(NULL),
+ flags_(flags),
+ characters_(nullptr),
pending_surrogate_(kNoPendingSurrogate),
terms_(),
alternatives_()
@@ -1551,7 +1743,7 @@
surrogate_pair.Add(lead_surrogate, zone());
surrogate_pair.Add(trail_surrogate, zone());
RegExpAtom* atom =
- new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
+ new (zone()) RegExpAtom(surrogate_pair.ToConstVector(), flags_);
AddAtom(atom);
}
} else {
@@ -1574,9 +1766,10 @@
void RegExpBuilder::FlushCharacters() {
FlushPendingSurrogate();
pending_empty_ = false;
- if (characters_ != NULL) {
- RegExpTree* atom = new (zone()) RegExpAtom(characters_->ToConstVector());
- characters_ = NULL;
+ if (characters_ != nullptr) {
+ RegExpTree* atom =
+ new (zone()) RegExpAtom(characters_->ToConstVector(), flags_);
+ characters_ = nullptr;
text_.Add(atom, zone());
LAST(ADD_ATOM);
}
@@ -1605,7 +1798,7 @@
if (NeedsDesugaringForIgnoreCase(c)) {
AddCharacterClassForDesugaring(c);
} else {
- if (characters_ == NULL) {
+ if (characters_ == nullptr) {
characters_ = new (zone()) ZoneList<uc16>(4, zone());
}
characters_->Add(c, zone());
@@ -1651,7 +1844,8 @@
void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
AddTerm(new (zone()) RegExpCharacterClass(
- CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));
+ zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
+ flags_));
}
@@ -1680,6 +1874,12 @@
void RegExpBuilder::AddAssertion(RegExpTree* assert) {
FlushText();
+ if (terms_.length() > 0 && terms_.last()->IsAssertion()) {
+ // Omit repeated assertions of the same type.
+ RegExpAssertion* last = terms_.last()->AsAssertion();
+ RegExpAssertion* next = assert->AsAssertion();
+ if (last->assertion_type() == next->assertion_type()) return;
+ }
terms_.Add(assert, zone());
LAST(ADD_ASSERT);
}
@@ -1726,7 +1926,7 @@
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode() && ignore_case()) {
icu::UnicodeSet set(c, c);
set.closeOver(USET_CASE_INSENSITIVE);
@@ -1735,7 +1935,7 @@
}
// In the case where ICU is not included, we act as if the unicode flag is
// not set, and do not desugar.
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
return false;
}
@@ -1756,18 +1956,18 @@
return true;
}
RegExpTree* atom;
- if (characters_ != NULL) {
+ if (characters_ != nullptr) {
DCHECK(last_added_ == ADD_CHAR);
// Last atom was character.
Vector<const uc16> char_vector = characters_->ToConstVector();
int num_chars = char_vector.length();
if (num_chars > 1) {
Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
- text_.Add(new (zone()) RegExpAtom(prefix), zone());
+ text_.Add(new (zone()) RegExpAtom(prefix, flags_), zone());
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
}
- characters_ = NULL;
- atom = new (zone()) RegExpAtom(char_vector);
+ characters_ = nullptr;
+ atom = new (zone()) RegExpAtom(char_vector, flags_);
FlushText();
} else if (text_.length() > 0) {
DCHECK(last_added_ == ADD_ATOM);
@@ -1776,8 +1976,14 @@
} else if (terms_.length() > 0) {
DCHECK(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
- // With /u, lookarounds are not quantifiable.
- if (unicode() && atom->IsLookaround()) return false;
+ if (atom->IsLookaround()) {
+ // With /u, lookarounds are not quantifiable.
+ if (unicode()) return false;
+ // Lookbehinds are not quantifiable.
+ if (atom->AsLookaround()->type() == RegExpLookaround::LOOKBEHIND) {
+ return false;
+ }
+ }
if (atom->max_match() == 0) {
// Guaranteed to only match an empty string.
LAST(ADD_TERM);
@@ -1790,7 +1996,6 @@
} else {
// Only call immediately after adding an atom or character!
UNREACHABLE();
- return false;
}
terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
zone());
diff --git a/src/regexp/regexp-parser.h b/src/regexp/regexp-parser.h
index 2cf937f..56d4ac8 100644
--- a/src/regexp/regexp-parser.h
+++ b/src/regexp/regexp-parser.h
@@ -6,6 +6,7 @@
#define V8_REGEXP_REGEXP_PARSER_H_
#include "src/objects.h"
+#include "src/objects/js-regexp.h"
#include "src/regexp/regexp-ast.h"
#include "src/zone/zone.h"
@@ -14,24 +15,23 @@
struct RegExpCompileData;
-
// A BufferedZoneList is an automatically growing list, just like (and backed
// by) a ZoneList, that is optimized for the case of adding and removing
// a single element. The last element added is stored outside the backing list,
// and if no more than one element is ever added, the ZoneList isn't even
// allocated.
-// Elements must not be NULL pointers.
+// Elements must not be nullptr pointers.
template <typename T, int initial_size>
class BufferedZoneList {
public:
- BufferedZoneList() : list_(NULL), last_(NULL) {}
+ BufferedZoneList() : list_(nullptr), last_(nullptr) {}
// Adds element at end of list. This element is buffered and can
// be read using last() or removed using RemoveLast until a new Add or until
// RemoveLast or GetList has been called.
void Add(T* value, Zone* zone) {
- if (last_ != NULL) {
- if (list_ == NULL) {
+ if (last_ != nullptr) {
+ if (list_ == nullptr) {
list_ = new (zone) ZoneList<T*>(initial_size, zone);
}
list_->Add(last_, zone);
@@ -40,28 +40,28 @@
}
T* last() {
- DCHECK(last_ != NULL);
+ DCHECK(last_ != nullptr);
return last_;
}
T* RemoveLast() {
- DCHECK(last_ != NULL);
+ DCHECK(last_ != nullptr);
T* result = last_;
- if ((list_ != NULL) && (list_->length() > 0))
+ if ((list_ != nullptr) && (list_->length() > 0))
last_ = list_->RemoveLast();
else
- last_ = NULL;
+ last_ = nullptr;
return result;
}
T* Get(int i) {
DCHECK((0 <= i) && (i < length()));
- if (list_ == NULL) {
+ if (list_ == nullptr) {
DCHECK_EQ(0, i);
return last_;
} else {
if (i == list_->length()) {
- DCHECK(last_ != NULL);
+ DCHECK(last_ != nullptr);
return last_;
} else {
return list_->at(i);
@@ -70,22 +70,22 @@
}
void Clear() {
- list_ = NULL;
- last_ = NULL;
+ list_ = nullptr;
+ last_ = nullptr;
}
int length() {
- int length = (list_ == NULL) ? 0 : list_->length();
- return length + ((last_ == NULL) ? 0 : 1);
+ int length = (list_ == nullptr) ? 0 : list_->length();
+ return length + ((last_ == nullptr) ? 0 : 1);
}
ZoneList<T*>* GetList(Zone* zone) {
- if (list_ == NULL) {
+ if (list_ == nullptr) {
list_ = new (zone) ZoneList<T*>(initial_size, zone);
}
- if (last_ != NULL) {
+ if (last_ != nullptr) {
list_->Add(last_, zone);
- last_ = NULL;
+ last_ = nullptr;
}
return list_;
}
@@ -99,7 +99,7 @@
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject {
public:
- RegExpBuilder(Zone* zone, bool ignore_case, bool unicode);
+ RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
void AddCharacter(uc16 character);
void AddUnicodeCharacter(uc32 character);
void AddEscapedUnicodeCharacter(uc32 character);
@@ -114,7 +114,14 @@
void NewAlternative(); // '|'
bool AddQuantifierToAtom(int min, int max,
RegExpQuantifier::QuantifierType type);
+ void FlushText();
RegExpTree* ToRegExp();
+ JSRegExp::Flags flags() const { return flags_; }
+ void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
+
+ bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
+ bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
+ bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
private:
static const uc16 kNoPendingSurrogate = 0;
@@ -122,18 +129,15 @@
void AddTrailSurrogate(uc16 trail_surrogate);
void FlushPendingSurrogate();
void FlushCharacters();
- void FlushText();
void FlushTerms();
bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
bool NeedsDesugaringForIgnoreCase(uc32 c);
Zone* zone() const { return zone_; }
- bool ignore_case() const { return ignore_case_; }
- bool unicode() const { return unicode_; }
+ bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
Zone* zone_;
bool pending_empty_;
- bool ignore_case_;
- bool unicode_;
+ JSRegExp::Flags flags_;
ZoneList<uc16>* characters_;
uc16 pending_surrogate_;
BufferedZoneList<RegExpTree, 2> terms_;
@@ -159,7 +163,6 @@
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
RegExpTree* ParseGroup();
- RegExpTree* ParseCharacterClass();
// Parses a {...,...} quantifier and stores the range in the given
// out parameters.
@@ -175,6 +178,7 @@
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate);
+ RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
uc32 ParseOctalLiteral();
@@ -184,8 +188,14 @@
// can be reparsed.
bool ParseBackReferenceIndex(int* index_out);
- bool ParseClassProperty(ZoneList<CharacterRange>* result);
- CharacterRange ParseClassAtom(uc16* char_class);
+ // Parse inside a class. Either add escaped class to the range, or return
+ // false and pass parsed single character through |char_out|.
+ void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
+ bool add_unicode_case_equivalents, uc32* char_out,
+ bool* is_class_escape);
+
+ char ParseClassEscape();
+
RegExpTree* ReportError(Vector<const char> message);
void Advance();
void Advance(int dist);
@@ -199,9 +209,9 @@
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
- bool ignore_case() const { return ignore_case_; }
- bool multiline() const { return multiline_; }
- bool unicode() const { return unicode_; }
+ // The Unicode flag can't be changed using in-regexp syntax, so it's OK to
+ // just read the initial flag value here.
+ bool unicode() const { return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
static bool IsSyntaxCharacterOrSlash(uc32 c);
@@ -219,34 +229,35 @@
class RegExpParserState : public ZoneObject {
public:
+ // Push a state on the stack.
RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type,
RegExpLookaround::Type lookaround_type,
int disjunction_capture_index,
- const ZoneVector<uc16>* capture_name, bool ignore_case,
- bool unicode, Zone* zone)
+ const ZoneVector<uc16>* capture_name,
+ JSRegExp::Flags flags, Zone* zone)
: previous_state_(previous_state),
- builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
+ builder_(new (zone) RegExpBuilder(zone, flags)),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index),
capture_name_(capture_name) {}
// Parser state of containing expression, if any.
- RegExpParserState* previous_state() { return previous_state_; }
- bool IsSubexpression() { return previous_state_ != NULL; }
+ RegExpParserState* previous_state() const { return previous_state_; }
+ bool IsSubexpression() { return previous_state_ != nullptr; }
// RegExpBuilder building this regexp's AST.
- RegExpBuilder* builder() { return builder_; }
+ RegExpBuilder* builder() const { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
- SubexpressionType group_type() { return group_type_; }
+ SubexpressionType group_type() const { return group_type_; }
// Lookahead or Lookbehind.
- RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
+ RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
- int capture_index() { return disjunction_capture_index_; }
+ int capture_index() const { return disjunction_capture_index_; }
// The name of the current sub-expression, if group_type is CAPTURE. Only
// used for named captures.
- const ZoneVector<uc16>* capture_name() { return capture_name_; }
+ const ZoneVector<uc16>* capture_name() const { return capture_name_; }
bool IsNamedCapture() const { return capture_name_ != nullptr; }
@@ -257,17 +268,17 @@
private:
// Linked list implementation of stack of states.
- RegExpParserState* previous_state_;
+ RegExpParserState* const previous_state_;
// Builder for the stored disjunction.
- RegExpBuilder* builder_;
+ RegExpBuilder* const builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
- SubexpressionType group_type_;
+ const SubexpressionType group_type_;
// Stored read direction.
- RegExpLookaround::Type lookaround_type_;
+ const RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
- int disjunction_capture_index_;
+ const int disjunction_capture_index_;
// Stored capture name (if any).
- const ZoneVector<uc16>* capture_name_;
+ const ZoneVector<uc16>* const capture_name_;
};
// Return the 1-indexed RegExpCapture object, allocate if necessary.
@@ -284,6 +295,7 @@
bool ParseNamedBackReference(RegExpBuilder* builder,
RegExpParserState* state);
+ RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
// After the initial parsing pass, patch corresponding RegExpCapture objects
// into all RegExpBackReferences. This is done after initial parsing in order
@@ -292,6 +304,10 @@
Handle<FixedArray> CreateCaptureNameMap();
+ // Returns true iff the pattern contains named captures. May call
+ // ScanForCaptures to look ahead at the remaining pattern.
+ bool HasNamedCaptures();
+
Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; }
@@ -312,17 +328,18 @@
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
- bool ignore_case_;
- bool multiline_;
- bool unicode_;
+ // These are the flags specified outside the regexp syntax ie after the
+ // terminating '/' or in the second argument to the constructor. The current
+ // flags are stored on the RegExpBuilder.
+ JSRegExp::Flags top_level_flags_;
int next_pos_;
int captures_started_;
- // The capture count is only valid after we have scanned for captures.
- int capture_count_;
+ int capture_count_; // Only valid after we have scanned for captures.
bool has_more_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
+ bool has_named_captures_; // Only valid after we have scanned for captures.
bool failed_;
};
diff --git a/src/regexp/regexp-stack.cc b/src/regexp/regexp-stack.cc
index 34f9127..8e9d576 100644
--- a/src/regexp/regexp-stack.cc
+++ b/src/regexp/regexp-stack.cc
@@ -21,11 +21,7 @@
regexp_stack_->Reset();
}
-
-RegExpStack::RegExpStack()
- : isolate_(NULL) {
-}
-
+RegExpStack::RegExpStack() : isolate_(nullptr) {}
RegExpStack::~RegExpStack() {
thread_local_.Free();
@@ -64,23 +60,23 @@
Address RegExpStack::EnsureCapacity(size_t size) {
- if (size > kMaximumStackSize) return NULL;
+ if (size > kMaximumStackSize) return kNullAddress;
if (size < kMinimumStackSize) size = kMinimumStackSize;
if (thread_local_.memory_size_ < size) {
- Address new_memory = NewArray<byte>(static_cast<int>(size));
+ byte* new_memory = NewArray<byte>(size);
if (thread_local_.memory_size_ > 0) {
// Copy original memory into top of new memory.
- MemCopy(reinterpret_cast<void*>(new_memory + size -
- thread_local_.memory_size_),
- reinterpret_cast<void*>(thread_local_.memory_),
- thread_local_.memory_size_);
+ MemCopy(new_memory + size - thread_local_.memory_size_,
+ thread_local_.memory_, thread_local_.memory_size_);
DeleteArray(thread_local_.memory_);
}
thread_local_.memory_ = new_memory;
thread_local_.memory_size_ = size;
- thread_local_.limit_ = new_memory + kStackLimitSlack * kPointerSize;
+ thread_local_.limit_ =
+ reinterpret_cast<Address>(new_memory) + kStackLimitSlack * kPointerSize;
}
- return thread_local_.memory_ + thread_local_.memory_size_;
+ return reinterpret_cast<Address>(thread_local_.memory_) +
+ thread_local_.memory_size_;
}
diff --git a/src/regexp/regexp-stack.h b/src/regexp/regexp-stack.h
index aea46cf..37cecd3 100644
--- a/src/regexp/regexp-stack.h
+++ b/src/regexp/regexp-stack.h
@@ -45,8 +45,9 @@
// Gives the top of the memory used as stack.
Address stack_base() {
- DCHECK(thread_local_.memory_size_ != 0);
- return thread_local_.memory_ + thread_local_.memory_size_;
+ DCHECK_NE(0, thread_local_.memory_size_);
+ return reinterpret_cast<Address>(thread_local_.memory_) +
+ thread_local_.memory_size_;
}
// The total size of the memory allocated for the stack.
@@ -76,7 +77,8 @@
~RegExpStack();
// Artificial limit used when no memory has been allocated.
- static const uintptr_t kMemoryTop = static_cast<uintptr_t>(-1);
+ static const Address kMemoryTop =
+ static_cast<Address>(static_cast<uintptr_t>(-1));
// Minimal size of allocated stack area.
static const size_t kMinimumStackSize = 1 * KB;
@@ -87,14 +89,14 @@
// Structure holding the allocated memory, size and limit.
struct ThreadLocal {
ThreadLocal() { Clear(); }
- // If memory_size_ > 0 then memory_ must be non-NULL.
- Address memory_;
+ // If memory_size_ > 0 then memory_ must be non-nullptr.
+ byte* memory_;
size_t memory_size_;
Address limit_;
void Clear() {
- memory_ = NULL;
+ memory_ = nullptr;
memory_size_ = 0;
- limit_ = reinterpret_cast<Address>(kMemoryTop);
+ limit_ = kMemoryTop;
}
void Free();
};
diff --git a/src/regexp/regexp-utils.cc b/src/regexp/regexp-utils.cc
index 570a348..c787a50 100644
--- a/src/regexp/regexp-utils.cc
+++ b/src/regexp/regexp-utils.cc
@@ -4,9 +4,10 @@
#include "src/regexp/regexp-utils.h"
-#include "src/factory.h"
+#include "src/heap/factory.h"
#include "src/isolate.h"
#include "src/objects-inl.h"
+#include "src/objects/js-regexp-inl.h"
#include "src/regexp/jsregexp.h"
namespace v8 {
@@ -29,7 +30,7 @@
}
if (ok != nullptr) *ok = true;
- Handle<String> last_subject(match_info->LastSubject());
+ Handle<String> last_subject(match_info->LastSubject(), isolate);
return isolate->factory()->NewSubString(last_subject, match_start, match_end);
}
@@ -43,22 +44,26 @@
MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
Handle<JSReceiver> recv,
- int value) {
+ uint64_t value) {
+ Handle<Object> value_as_object =
+ isolate->factory()->NewNumberFromInt64(value);
if (HasInitialRegExpMap(isolate, recv)) {
- JSRegExp::cast(*recv)->SetLastIndex(value);
+ JSRegExp::cast(*recv)->set_last_index(*value_as_object, SKIP_WRITE_BARRIER);
return recv;
} else {
- return Object::SetProperty(recv, isolate->factory()->lastIndex_string(),
- handle(Smi::FromInt(value), isolate), STRICT);
+ return Object::SetProperty(isolate, recv,
+ isolate->factory()->lastIndex_string(),
+ value_as_object, LanguageMode::kStrict);
}
}
MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
Handle<JSReceiver> recv) {
if (HasInitialRegExpMap(isolate, recv)) {
- return handle(JSRegExp::cast(*recv)->LastIndex(), isolate);
+ return handle(JSRegExp::cast(*recv)->last_index(), isolate);
} else {
- return Object::GetProperty(recv, isolate->factory()->lastIndex_string());
+ return Object::GetProperty(isolate, recv,
+ isolate->factory()->lastIndex_string());
}
}
@@ -72,7 +77,8 @@
if (exec->IsUndefined(isolate)) {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, exec,
- Object::GetProperty(regexp, isolate->factory()->exec_string()), Object);
+ Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
+ Object);
}
if (exec->IsCallable()) {
@@ -121,10 +127,11 @@
Handle<Object> match;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, match,
- JSObject::GetProperty(receiver, isolate->factory()->match_symbol()),
+ JSObject::GetProperty(isolate, receiver,
+ isolate->factory()->match_symbol()),
Nothing<bool>());
- if (!match->IsUndefined(isolate)) return Just(match->BooleanValue());
+ if (!match->IsUndefined(isolate)) return Just(match->BooleanValue(isolate));
return Just(object->IsJSRegExp());
}
@@ -132,6 +139,10 @@
// TODO(ishell): Update this check once map changes for constant field
// tracking are landing.
+#ifdef V8_ENABLE_FORCE_SLOW_PATH
+ if (isolate->force_slow_path()) return false;
+#endif
+
if (!obj->IsJSReceiver()) return false;
JSReceiver* recv = JSReceiver::cast(*obj);
@@ -151,16 +162,19 @@
// The smi check is required to omit ToLength(lastIndex) calls with possible
// user-code execution on the fast path.
- Object* last_index = JSRegExp::cast(recv)->LastIndex();
- return last_index->IsSmi() && Smi::cast(last_index)->value() >= 0;
+ Object* last_index = JSRegExp::cast(recv)->last_index();
+ return last_index->IsSmi() && Smi::ToInt(last_index) >= 0;
}
-int RegExpUtils::AdvanceStringIndex(Isolate* isolate, Handle<String> string,
- int index, bool unicode) {
- if (unicode && index < string->length()) {
- const uint16_t first = string->Get(index);
- if (first >= 0xD800 && first <= 0xDBFF && string->length() > index + 1) {
- const uint16_t second = string->Get(index + 1);
+uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
+ bool unicode) {
+ DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
+ const uint64_t string_length = static_cast<uint64_t>(string->length());
+ if (unicode && index < string_length) {
+ const uint16_t first = string->Get(static_cast<uint32_t>(index));
+ if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
+ DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
+ const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
if (second >= 0xDC00 && second <= 0xDFFF) {
return index + 2;
}
@@ -176,14 +190,15 @@
Handle<Object> last_index_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, last_index_obj,
- Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
+ Object::GetProperty(isolate, regexp,
+ isolate->factory()->lastIndex_string()),
Object);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj), Object);
- const int last_index = PositiveNumberToUint32(*last_index_obj);
- const int new_last_index =
- AdvanceStringIndex(isolate, string, last_index, unicode);
+ const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
+ const uint64_t new_last_index =
+ AdvanceStringIndex(string, last_index, unicode);
return SetLastIndex(isolate, regexp, new_last_index);
}
diff --git a/src/regexp/regexp-utils.h b/src/regexp/regexp-utils.h
index eb5f85c..8fc6607 100644
--- a/src/regexp/regexp-utils.h
+++ b/src/regexp/regexp-utils.h
@@ -21,13 +21,13 @@
int capture, bool* ok = nullptr);
// Last index (RegExp.lastIndex) accessors.
- static MUST_USE_RESULT MaybeHandle<Object> SetLastIndex(
- Isolate* isolate, Handle<JSReceiver> regexp, int value);
- static MUST_USE_RESULT MaybeHandle<Object> GetLastIndex(
+ static V8_WARN_UNUSED_RESULT MaybeHandle<Object> SetLastIndex(
+ Isolate* isolate, Handle<JSReceiver> regexp, uint64_t value);
+ static V8_WARN_UNUSED_RESULT MaybeHandle<Object> GetLastIndex(
Isolate* isolate, Handle<JSReceiver> recv);
// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
- static MUST_USE_RESULT MaybeHandle<Object> RegExpExec(
+ static V8_WARN_UNUSED_RESULT MaybeHandle<Object> RegExpExec(
Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
Handle<Object> exec);
@@ -41,9 +41,9 @@
// ES#sec-advancestringindex
// AdvanceStringIndex ( S, index, unicode )
- static int AdvanceStringIndex(Isolate* isolate, Handle<String> string,
- int index, bool unicode);
- static MUST_USE_RESULT MaybeHandle<Object> SetAdvancedStringIndex(
+ static uint64_t AdvanceStringIndex(Handle<String> string, uint64_t index,
+ bool unicode);
+ static V8_WARN_UNUSED_RESULT MaybeHandle<Object> SetAdvancedStringIndex(
Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
bool unicode);
};
diff --git a/src/regexp/s390/OWNERS b/src/regexp/s390/OWNERS
index 752e8e3..cf60da5 100644
--- a/src/regexp/s390/OWNERS
+++ b/src/regexp/s390/OWNERS
@@ -3,4 +3,5 @@
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
diff --git a/src/regexp/s390/regexp-macro-assembler-s390.cc b/src/regexp/s390/regexp-macro-assembler-s390.cc
index e5fd90f..3db1ebc 100644
--- a/src/regexp/s390/regexp-macro-assembler-s390.cc
+++ b/src/regexp/s390/regexp-macro-assembler-s390.cc
@@ -6,6 +6,7 @@
#if V8_TARGET_ARCH_S390
+#include "src/assembler-inl.h"
#include "src/base/bits.h"
#include "src/code-stubs.h"
#include "src/log.h"
@@ -39,8 +40,7 @@
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[112] Isolate* isolate (address of the current isolate)
- * - fp[108] secondary link/return address used by native call.
+ * - fp[108] Isolate* isolate (address of the current isolate)
* - fp[104] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
* - fp[100] stack_area_base (high end of the memory area to use as
@@ -83,16 +83,12 @@
* Address start,
* Address end,
* int* capture_output_array,
+ * int num_capture_registers,
* byte* stack_area_base,
- * Address secondary_return_address, // Only used by native call.
- * bool direct_call = false)
+ * bool direct_call = false,
+ * Isolate* isolate);
* The call is performed by NativeRegExpMacroAssembler::Execute()
- * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro
- * in s390/simulator-s390.h.
- * When calling as a non-direct call (i.e., from C++ code), the return address
- * area is overwritten with the LR register by the RegExp code. When doing a
- * direct call from generated code, the return address is placed there by
- * the calling code, as in a normal exit frame.
+ * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
#define __ ACCESS_MASM(masm_)
@@ -101,7 +97,7 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
+ masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
CodeObjectRequired::kYes)),
mode_(mode),
num_registers_(registers_to_save),
@@ -147,8 +143,8 @@
}
void RegExpMacroAssemblerS390::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT) && is_int8(by)) {
__ AddMI(register_location(reg), Operand(by));
@@ -324,11 +320,11 @@
__ SubP(r3, r3, r6);
}
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ LoadImmP(r5, Operand::Zero());
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ mov(r5, Operand(ExternalReference::isolate_address(isolate())));
}
@@ -376,7 +372,7 @@
__ LoadP(r5, MemOperand(frame_pointer(), kStringStartMinusOne));
__ AddP(r5, r5, r3);
__ CmpP(current_input_offset(), r5);
- BranchOrBacktrack(lt, on_no_match);
+ BranchOrBacktrack(le, on_no_match);
} else {
__ AddP(r0, r3, current_input_offset());
BranchOrBacktrack(gt, on_no_match, cr0);
@@ -447,7 +443,7 @@
void RegExpMacroAssemblerS390::CheckNotCharacterAfterMinusAnd(
uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ lay(r2, MemOperand(current_character(), -minus));
__ And(r2, Operand(mask));
if (c != 0) {
@@ -496,12 +492,12 @@
Label success;
__ CmpP(current_character(), Operand(' '));
__ beq(&success);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ SubP(r2, current_character(), Operand('\t'));
__ CmpLogicalP(r2, Operand('\r' - '\t'));
__ ble(&success);
// \u00a0 (NBSP).
- __ CmpLogicalP(r2, Operand(0x00a0 - '\t'));
+ __ CmpLogicalP(r2, Operand(0x00A0 - '\t'));
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
@@ -523,37 +519,37 @@
BranchOrBacktrack(le, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ SubP(r2, Operand(0x0b));
- __ CmpLogicalP(r2, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ SubP(r2, Operand(0x0B));
+ __ CmpLogicalP(r2, Operand(0x0C - 0x0B));
BranchOrBacktrack(le, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ SubP(r2, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ SubP(r2, Operand(0x2028 - 0x0B));
__ CmpLogicalP(r2, Operand(1));
BranchOrBacktrack(le, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ XorP(r2, current_character(), Operand(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ SubP(r2, Operand(0x0b));
- __ CmpLogicalP(r2, Operand(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ SubP(r2, Operand(0x0B));
+ __ CmpLogicalP(r2, Operand(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(gt, on_no_match);
} else {
Label done;
__ ble(&done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ SubP(r2, Operand(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ SubP(r2, Operand(0x2028 - 0x0B));
__ CmpLogicalP(r2, Operand(1));
BranchOrBacktrack(gt, on_no_match);
__ bind(&done);
@@ -566,7 +562,8 @@
__ CmpP(current_character(), Operand('z'));
BranchOrBacktrack(gt, on_no_match);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map =
+ ExternalReference::re_word_character_map(isolate());
__ mov(r2, Operand(map));
__ LoadlB(r2, MemOperand(r2, current_character()));
__ CmpLogicalP(r2, Operand::Zero());
@@ -580,7 +577,8 @@
__ CmpLogicalP(current_character(), Operand('z'));
__ bgt(&done);
}
- ExternalReference map = ExternalReference::re_word_character_map();
+ ExternalReference map =
+ ExternalReference::re_word_character_map(isolate());
__ mov(r2, Operand(map));
__ LoadlB(r2, MemOperand(r2, current_character()));
__ CmpLogicalP(r2, Operand::Zero());
@@ -776,7 +774,7 @@
// and the following use of that register.
__ lay(r2, MemOperand(r2, num_saved_registers_ * kIntSize));
for (int i = 0; i < num_saved_registers_;) {
- if (false && i < num_saved_registers_ - 4) {
+ if ((false) && i < num_saved_registers_ - 4) {
// TODO(john.yan): Can be optimized by SIMD instructions
__ LoadMultipleP(r3, r6, register_location(i + 3));
if (mode_ == UC16) {
@@ -913,7 +911,7 @@
__ mov(r4, Operand(ExternalReference::isolate_address(isolate())));
ExternalReference grow_stack = ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ CmpP(r2, Operand::Zero());
__ beq(&exit_with_exception);
@@ -932,9 +930,9 @@
}
CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code = isolate()->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject());
+ masm_->GetCode(isolate(), &code_desc);
+ Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
+ masm_->CodeObject());
PROFILE(masm_->isolate(),
RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
@@ -1089,11 +1087,11 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- DCHECK(sizeof(T) == kPointerSize);
+ DCHECK_EQ(kPointerSize, sizeof(T));
#ifdef V8_TARGET_ARCH_S390X
- return reinterpret_cast<T&>(Memory::uint64_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<uint64_t>(re_frame + frame_offset));
#else
- return reinterpret_cast<T&>(Memory::uint32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<uint32_t>(re_frame + frame_offset));
#endif
}
@@ -1139,14 +1137,14 @@
void RegExpMacroAssemblerS390::BranchOrBacktrack(Condition condition, Label* to,
CRegister cr) {
if (condition == al) { // Unconditional.
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ b(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ b(condition, &backtrack_label_);
return;
}
@@ -1178,14 +1176,14 @@
}
void RegExpMacroAssemblerS390::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
__ lay(backtrack_stackpointer(),
MemOperand(backtrack_stackpointer(), -kPointerSize));
__ StoreP(source, MemOperand(backtrack_stackpointer()));
}
void RegExpMacroAssemblerS390::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ LoadP(target, MemOperand(backtrack_stackpointer()));
__ la(backtrack_stackpointer(),
MemOperand(backtrack_stackpointer(), kPointerSize));
@@ -1211,7 +1209,7 @@
void RegExpMacroAssemblerS390::CallCFunctionUsingStub(
ExternalReference function, int num_arguments) {
// Must pass all arguments in registers. The stub pushes on the stack.
- DCHECK(num_arguments <= 8);
+ DCHECK_GE(8, num_arguments);
__ mov(code_pointer(), Operand(function));
Label ret;
__ larl(r14, &ret);
@@ -1252,7 +1250,7 @@
cp_offset * char_size()));
#endif
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ LoadlB(current_character(),
MemOperand(current_input_offset(), end_of_input_address(),
cp_offset * char_size()));
@@ -1268,7 +1266,7 @@
__ rll(current_character(), current_character(), Operand(16));
#endif
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ LoadLogicalHalfWordP(
current_character(),
MemOperand(current_input_offset(), end_of_input_address(),
diff --git a/src/regexp/s390/regexp-macro-assembler-s390.h b/src/regexp/s390/regexp-macro-assembler-s390.h
index 755bc89..40ba5ec 100644
--- a/src/regexp/s390/regexp-macro-assembler-s390.h
+++ b/src/regexp/s390/regexp-macro-assembler-s390.h
@@ -2,13 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#ifndef V8_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
-#define V8_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
+#ifndef V8_REGEXP_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
+#define V8_REGEXP_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
#include "src/macro-assembler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/s390/assembler-s390.h"
-#include "src/s390/frames-s390.h"
namespace v8 {
namespace internal {
@@ -97,8 +96,7 @@
static const int kCaptureArraySize = kCallerFrame;
static const int kStackAreaBase = kCallerFrame + kPointerSize;
// kDirectCall again
- static const int kSecondaryReturnAddress = kStackAreaBase + 2 * kPointerSize;
- static const int kIsolate = kSecondaryReturnAddress + kPointerSize;
+ static const int kIsolate = kStackAreaBase + 2 * kPointerSize;
// Below the frame pointer.
// Register parameters stored by setup code.
@@ -162,7 +160,7 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to, CRegister cr = cr7);
// Call and return internally in the generated code in a way that
@@ -212,4 +210,4 @@
} // namespace internal
} // namespace v8
-#endif // V8_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
+#endif // V8_REGEXP_S390_REGEXP_MACRO_ASSEMBLER_S390_H_
diff --git a/src/regexp/x64/regexp-macro-assembler-x64.cc b/src/regexp/x64/regexp-macro-assembler-x64.cc
index 54dc341..43f8076 100644
--- a/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/src/regexp/x64/regexp-macro-assembler-x64.cc
@@ -6,7 +6,7 @@
#include "src/regexp/x64/regexp-macro-assembler-x64.h"
-#include "src/factory.h"
+#include "src/heap/factory.h"
#include "src/log.h"
#include "src/macro-assembler.h"
#include "src/objects-inl.h"
@@ -85,9 +85,10 @@
* Address start,
* Address end,
* int* capture_output_array,
- * bool at_start,
+ * int num_capture_registers,
* byte* stack_area_base,
- * bool direct_call)
+ * bool direct_call = false,
+ * Isolate* isolate);
*/
#define __ ACCESS_MASM((&masm_))
@@ -96,9 +97,9 @@
Mode mode,
int registers_to_save)
: NativeRegExpMacroAssembler(isolate, zone),
- masm_(isolate, NULL, kRegExpCodeSize, CodeObjectRequired::kYes),
+ masm_(isolate, nullptr, kRegExpCodeSize, CodeObjectRequired::kYes),
no_root_array_scope_(&masm_),
- code_relative_fixup_positions_(4, zone),
+ code_relative_fixup_positions_(zone),
mode_(mode),
num_registers_(registers_to_save),
num_saved_registers_(registers_to_save),
@@ -138,8 +139,8 @@
void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
+ DCHECK_LE(0, reg);
+ DCHECK_GT(num_registers_, reg);
if (by != 0) {
__ addp(register_location(reg), Immediate(by));
}
@@ -236,7 +237,7 @@
if (mode_ == LATIN1) {
Label loop_increment;
- if (on_no_match == NULL) {
+ if (on_no_match == nullptr) {
on_no_match = &backtrack_label_;
}
@@ -311,8 +312,8 @@
// size_t byte_length - length of capture in bytes(!)
// Isolate* isolate or 0 if unicode flag.
#ifdef _WIN64
- DCHECK(rcx.is(arg_reg_1));
- DCHECK(rdx.is(arg_reg_2));
+ DCHECK(rcx == arg_reg_1);
+ DCHECK(rdx == arg_reg_2);
// Compute and set byte_offset1 (start of capture).
__ leap(rcx, Operand(rsi, rdx, times_1, 0));
// Set byte_offset2.
@@ -321,8 +322,8 @@
__ subq(rdx, rbx);
}
#else // AMD64 calling convention
- DCHECK(rdi.is(arg_reg_1));
- DCHECK(rsi.is(arg_reg_2));
+ DCHECK(rdi == arg_reg_1);
+ DCHECK(rsi == arg_reg_2);
// Compute byte_offset2 (current position = rsi+rdi).
__ leap(rax, Operand(rsi, rdi, times_1, 0));
// Compute and set byte_offset1 (start of capture).
@@ -337,11 +338,11 @@
// Set byte_length.
__ movp(arg_reg_3, rbx);
// Isolate.
-#ifdef V8_I18N_SUPPORT
+#ifdef V8_INTL_SUPPORT
if (unicode) {
__ movp(arg_reg_4, Immediate(0));
} else // NOLINT
-#endif // V8_I18N_SUPPORT
+#endif // V8_INTL_SUPPORT
{
__ LoadAddress(arg_reg_4, ExternalReference::isolate_address(isolate()));
}
@@ -492,7 +493,7 @@
uc16 minus,
uc16 mask,
Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
+ DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
__ leap(rax, Operand(current_character(), -minus));
__ andp(rax, Immediate(mask));
__ cmpl(rax, Immediate(c));
@@ -550,12 +551,12 @@
Label success;
__ cmpl(current_character(), Immediate(' '));
__ j(equal, &success, Label::kNear);
- // Check range 0x09..0x0d
+ // Check range 0x09..0x0D
__ leap(rax, Operand(current_character(), -'\t'));
__ cmpl(rax, Immediate('\r' - '\t'));
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
- __ cmpl(rax, Immediate(0x00a0 - '\t'));
+ __ cmpl(rax, Immediate(0x00A0 - '\t'));
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
@@ -577,39 +578,39 @@
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ movl(rax, current_character());
__ xorp(rax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subl(rax, Immediate(0x0b));
- __ cmpl(rax, Immediate(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subl(rax, Immediate(0x0B));
+ __ cmpl(rax, Immediate(0x0C - 0x0B));
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subl(rax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subl(rax, Immediate(0x2028 - 0x0B));
__ cmpl(rax, Immediate(0x2029 - 0x2028));
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
__ movl(rax, current_character());
__ xorp(rax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subl(rax, Immediate(0x0b));
- __ cmpl(rax, Immediate(0x0c - 0x0b));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
+ __ subl(rax, Immediate(0x0B));
+ __ cmpl(rax, Immediate(0x0C - 0x0B));
if (mode_ == LATIN1) {
BranchOrBacktrack(above, on_no_match);
} else {
Label done;
BranchOrBacktrack(below_equal, &done);
// Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subl(rax, Immediate(0x2028 - 0x0b));
+ // computed (current_char ^ 0x01 - 0x0B). I.e., check for
+ // 0x201D (0x2028 - 0x0B) or 0x201E.
+ __ subl(rax, Immediate(0x2028 - 0x0B));
__ cmpl(rax, Immediate(0x2029 - 0x2028));
BranchOrBacktrack(above, on_no_match);
__ bind(&done);
@@ -622,7 +623,7 @@
__ cmpl(current_character(), Immediate('z'));
BranchOrBacktrack(above, on_no_match);
}
- __ Move(rbx, ExternalReference::re_word_character_map());
+ __ Move(rbx, ExternalReference::re_word_character_map(isolate()));
DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
__ testb(Operand(rbx, current_character(), times_1, 0),
current_character());
@@ -636,7 +637,7 @@
__ cmpl(current_character(), Immediate('z'));
__ j(above, &done);
}
- __ Move(rbx, ExternalReference::re_word_character_map());
+ __ Move(rbx, ExternalReference::re_word_character_map(isolate()));
DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
__ testb(Operand(rbx, current_character(), times_1, 0),
current_character());
@@ -978,7 +979,7 @@
ExternalReference grow_stack =
ExternalReference::re_grow_stack(isolate());
__ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
+ // If return nullptr, we have failed to grow the stack, and
// must exit with a stack-overflow exception.
__ testp(rax, rax);
__ j(equal, &exit_with_exception);
@@ -1004,11 +1005,10 @@
FixupCodeRelativePositions();
CodeDesc code_desc;
- masm_.GetCode(&code_desc);
Isolate* isolate = this->isolate();
- Handle<Code> code = isolate->factory()->NewCode(
- code_desc, Code::ComputeFlags(Code::REGEXP),
- masm_.CodeObject());
+ masm_.GetCode(isolate, &code_desc);
+ Handle<Code> code =
+ isolate->factory()->NewCode(code_desc, Code::REGEXP, masm_.CodeObject());
PROFILE(isolate, RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
return Handle<HeapObject>::cast(code);
}
@@ -1208,7 +1208,7 @@
// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
+ return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
}
@@ -1256,14 +1256,14 @@
void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
Label* to) {
if (condition < 0) { // No condition
- if (to == NULL) {
+ if (to == nullptr) {
Backtrack();
return;
}
__ jmp(to);
return;
}
- if (to == NULL) {
+ if (to == nullptr) {
__ j(condition, &backtrack_label_);
return;
}
@@ -1289,7 +1289,7 @@
void RegExpMacroAssemblerX64::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
+ DCHECK(source != backtrack_stackpointer());
// Notice: This updates flags, unlike normal Push.
__ subp(backtrack_stackpointer(), Immediate(kIntSize));
__ movl(Operand(backtrack_stackpointer(), 0), source);
@@ -1304,8 +1304,7 @@
void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
- for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) {
- int position = code_relative_fixup_positions_[i];
+ for (int position : code_relative_fixup_positions_) {
// The position succeeds a relative label offset from position.
// Patch the relative offset to be relative to the Code object pointer
// instead.
@@ -1317,7 +1316,7 @@
+ Code::kHeaderSize
- kHeapObjectTag);
}
- code_relative_fixup_positions_.Clear();
+ code_relative_fixup_positions_.Rewind(0);
}
@@ -1329,7 +1328,7 @@
void RegExpMacroAssemblerX64::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
+ DCHECK(target != backtrack_stackpointer());
__ movsxlq(target, Operand(backtrack_stackpointer(), 0));
// Notice: This updates flags, unlike normal Pop.
__ addp(backtrack_stackpointer(), Immediate(kIntSize));
@@ -1378,7 +1377,7 @@
} else if (characters == 2) {
__ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
}
} else {
@@ -1387,7 +1386,7 @@
__ movl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
} else {
- DCHECK(characters == 1);
+ DCHECK_EQ(1, characters);
__ movzxwl(current_character(),
Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
}
diff --git a/src/regexp/x64/regexp-macro-assembler-x64.h b/src/regexp/x64/regexp-macro-assembler-x64.h
index 4c37771..1cf2f73 100644
--- a/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/src/regexp/x64/regexp-macro-assembler-x64.h
@@ -8,6 +8,7 @@
#include "src/macro-assembler.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/x64/assembler-x64.h"
+#include "src/zone/zone-chunk-list.h"
namespace v8 {
namespace internal {
@@ -211,11 +212,11 @@
inline int char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
+ // is nullptr, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
void MarkPositionForCodeRelativeFixup() {
- code_relative_fixup_positions_.Add(masm_.pc_offset(), zone());
+ code_relative_fixup_positions_.push_back(masm_.pc_offset());
}
void FixupCodeRelativePositions();
@@ -252,9 +253,9 @@
Isolate* isolate() const { return masm_.isolate(); }
MacroAssembler masm_;
- MacroAssembler::NoRootArrayScope no_root_array_scope_;
+ NoRootArrayScope no_root_array_scope_;
- ZoneList<int> code_relative_fixup_positions_;
+ ZoneChunkList<int> code_relative_fixup_positions_;
// Which mode to generate code for (LATIN1 or UC16).
Mode mode_;
diff --git a/src/regexp/x87/OWNERS b/src/regexp/x87/OWNERS
deleted file mode 100644
index 61245ae..0000000
--- a/src/regexp/x87/OWNERS
+++ /dev/null
@@ -1,2 +0,0 @@
[email protected]
[email protected]
diff --git a/src/regexp/x87/regexp-macro-assembler-x87.cc b/src/regexp/x87/regexp-macro-assembler-x87.cc
deleted file mode 100644
index 4a1c3a8..0000000
--- a/src/regexp/x87/regexp-macro-assembler-x87.cc
+++ /dev/null
@@ -1,1272 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#if V8_TARGET_ARCH_X87
-
-#include "src/regexp/x87/regexp-macro-assembler-x87.h"
-
-#include "src/log.h"
-#include "src/macro-assembler.h"
-#include "src/regexp/regexp-macro-assembler.h"
-#include "src/regexp/regexp-stack.h"
-#include "src/unicode.h"
-
-namespace v8 {
-namespace internal {
-
-#ifndef V8_INTERPRETED_REGEXP
-/*
- * This assembler uses the following register assignment convention
- * - edx : Current character. Must be loaded using LoadCurrentCharacter
- * before using any of the dispatch methods. Temporarily stores the
- * index of capture start after a matching pass for a global regexp.
- * - edi : Current position in input, as negative offset from end of string.
- * Please notice that this is the byte offset, not the character offset!
- * - esi : end of input (points to byte after last character in input).
- * - ebp : Frame pointer. Used to access arguments, local variables and
- * RegExp registers.
- * - esp : Points to tip of C stack.
- * - ecx : Points to tip of backtrack stack
- *
- * The registers eax and ebx are free to use for computations.
- *
- * Each call to a public method should retain this convention.
- * The stack will have the following structure:
- * - Isolate* isolate (address of the current isolate)
- * - direct_call (if 1, direct call from JavaScript code, if 0
- * call through the runtime system)
- * - stack_area_base (high end of the memory area to use as
- * backtracking stack)
- * - capture array size (may fit multiple sets of matches)
- * - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (address of end of string)
- * - start of input (address of first character in string)
- * - start index (character index of start)
- * - String* input_string (location of a handle containing the string)
- * --- frame alignment (if applicable) ---
- * - return address
- * ebp-> - old ebp
- * - backup of caller esi
- * - backup of caller edi
- * - backup of caller ebx
- * - success counter (only for global regexps to count matches).
- * - Offset of location before start of input (effectively character
- * string start - 1). Used to initialize capture registers to a
- * non-position.
- * - register 0 ebp[-4] (only positions must be stored in the first
- * - register 1 ebp[-8] num_saved_registers_ registers)
- * - ...
- *
- * The first num_saved_registers_ registers are initialized to point to
- * "character -1" in the string (i.e., char_size() bytes before the first
- * character of the string). The remaining registers starts out as garbage.
- *
- * The data up to the return address must be placed there by the calling
- * code, by calling the code entry as cast to a function with the signature:
- * int (*match)(String* input_string,
- * int start_index,
- * Address start,
- * Address end,
- * int* capture_output_array,
- * bool at_start,
- * byte* stack_area_base,
- * bool direct_call)
- */
-
-#define __ ACCESS_MASM(masm_)
-
-RegExpMacroAssemblerX87::RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone,
- Mode mode,
- int registers_to_save)
- : NativeRegExpMacroAssembler(isolate, zone),
- masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
- CodeObjectRequired::kYes)),
- mode_(mode),
- num_registers_(registers_to_save),
- num_saved_registers_(registers_to_save),
- entry_label_(),
- start_label_(),
- success_label_(),
- backtrack_label_(),
- exit_label_() {
- DCHECK_EQ(0, registers_to_save % 2);
- __ jmp(&entry_label_); // We'll write the entry code later.
- __ bind(&start_label_); // And then continue from here.
-}
-
-
-RegExpMacroAssemblerX87::~RegExpMacroAssemblerX87() {
- delete masm_;
- // Unuse labels in case we throw away the assembler without calling GetCode.
- entry_label_.Unuse();
- start_label_.Unuse();
- success_label_.Unuse();
- backtrack_label_.Unuse();
- exit_label_.Unuse();
- check_preempt_label_.Unuse();
- stack_overflow_label_.Unuse();
-}
-
-
-int RegExpMacroAssemblerX87::stack_limit_slack() {
- return RegExpStack::kStackLimitSlack;
-}
-
-
-void RegExpMacroAssemblerX87::AdvanceCurrentPosition(int by) {
- if (by != 0) {
- __ add(edi, Immediate(by * char_size()));
- }
-}
-
-
-void RegExpMacroAssemblerX87::AdvanceRegister(int reg, int by) {
- DCHECK(reg >= 0);
- DCHECK(reg < num_registers_);
- if (by != 0) {
- __ add(register_location(reg), Immediate(by));
- }
-}
-
-
-void RegExpMacroAssemblerX87::Backtrack() {
- CheckPreemption();
- // Pop Code* offset from backtrack stack, add Code* and jump to location.
- Pop(ebx);
- __ add(ebx, Immediate(masm_->CodeObject()));
- __ jmp(ebx);
-}
-
-
-void RegExpMacroAssemblerX87::Bind(Label* label) {
- __ bind(label);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacter(uint32_t c, Label* on_equal) {
- __ cmp(current_character(), c);
- BranchOrBacktrack(equal, on_equal);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacterGT(uc16 limit, Label* on_greater) {
- __ cmp(current_character(), limit);
- BranchOrBacktrack(greater, on_greater);
-}
-
-
-void RegExpMacroAssemblerX87::CheckAtStart(Label* on_at_start) {
- __ lea(eax, Operand(edi, -char_size()));
- __ cmp(eax, Operand(ebp, kStringStartMinusOne));
- BranchOrBacktrack(equal, on_at_start);
-}
-
-
-void RegExpMacroAssemblerX87::CheckNotAtStart(int cp_offset,
- Label* on_not_at_start) {
- __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
- __ cmp(eax, Operand(ebp, kStringStartMinusOne));
- BranchOrBacktrack(not_equal, on_not_at_start);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacterLT(uc16 limit, Label* on_less) {
- __ cmp(current_character(), limit);
- BranchOrBacktrack(less, on_less);
-}
-
-
-void RegExpMacroAssemblerX87::CheckGreedyLoop(Label* on_equal) {
- Label fallthrough;
- __ cmp(edi, Operand(backtrack_stackpointer(), 0));
- __ j(not_equal, &fallthrough);
- __ add(backtrack_stackpointer(), Immediate(kPointerSize)); // Pop.
- BranchOrBacktrack(no_condition, on_equal);
- __ bind(&fallthrough);
-}
-
-void RegExpMacroAssemblerX87::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
- Label fallthrough;
- __ mov(edx, register_location(start_reg)); // Index of start of capture
- __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
- __ sub(ebx, edx); // Length of capture.
-
- // At this point, the capture registers are either both set or both cleared.
- // If the capture length is zero, then the capture is either empty or cleared.
- // Fall through in both cases.
- __ j(equal, &fallthrough);
-
- // Check that there are sufficient characters left in the input.
- if (read_backward) {
- __ mov(eax, Operand(ebp, kStringStartMinusOne));
- __ add(eax, ebx);
- __ cmp(edi, eax);
- BranchOrBacktrack(less_equal, on_no_match);
- } else {
- __ mov(eax, edi);
- __ add(eax, ebx);
- BranchOrBacktrack(greater, on_no_match);
- }
-
- if (mode_ == LATIN1) {
- Label success;
- Label fail;
- Label loop_increment;
- // Save register contents to make the registers available below.
- __ push(edi);
- __ push(backtrack_stackpointer());
- // After this, the eax, ecx, and edi registers are available.
-
- __ add(edx, esi); // Start of capture
- __ add(edi, esi); // Start of text to match against capture.
- if (read_backward) {
- __ sub(edi, ebx); // Offset by length when matching backwards.
- }
- __ add(ebx, edi); // End of text to match against capture.
-
- Label loop;
- __ bind(&loop);
- __ movzx_b(eax, Operand(edi, 0));
- __ cmpb_al(Operand(edx, 0));
- __ j(equal, &loop_increment);
-
- // Mismatch, try case-insensitive match (converting letters to lower-case).
- __ or_(eax, 0x20); // Convert match character to lower-case.
- __ lea(ecx, Operand(eax, -'a'));
- __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter?
- Label convert_capture;
- __ j(below_equal, &convert_capture); // In range 'a'-'z'.
- // Latin-1: Check for values in range [224,254] but not 247.
- __ sub(ecx, Immediate(224 - 'a'));
- __ cmp(ecx, Immediate(254 - 224));
- __ j(above, &fail); // Weren't Latin-1 letters.
- __ cmp(ecx, Immediate(247 - 224)); // Check for 247.
- __ j(equal, &fail);
- __ bind(&convert_capture);
- // Also convert capture character.
- __ movzx_b(ecx, Operand(edx, 0));
- __ or_(ecx, 0x20);
-
- __ cmp(eax, ecx);
- __ j(not_equal, &fail);
-
- __ bind(&loop_increment);
- // Increment pointers into match and capture strings.
- __ add(edx, Immediate(1));
- __ add(edi, Immediate(1));
- // Compare to end of match, and loop if not done.
- __ cmp(edi, ebx);
- __ j(below, &loop);
- __ jmp(&success);
-
- __ bind(&fail);
- // Restore original values before failing.
- __ pop(backtrack_stackpointer());
- __ pop(edi);
- BranchOrBacktrack(no_condition, on_no_match);
-
- __ bind(&success);
- // Restore original value before continuing.
- __ pop(backtrack_stackpointer());
- // Drop original value of character position.
- __ add(esp, Immediate(kPointerSize));
- // Compute new value of character position after the matched part.
- __ sub(edi, esi);
- if (read_backward) {
- // Subtract match length if we matched backward.
- __ add(edi, register_location(start_reg));
- __ sub(edi, register_location(start_reg + 1));
- }
- } else {
- DCHECK(mode_ == UC16);
- // Save registers before calling C function.
- __ push(esi);
- __ push(edi);
- __ push(backtrack_stackpointer());
- __ push(ebx);
-
- static const int argument_count = 4;
- __ PrepareCallCFunction(argument_count, ecx);
- // Put arguments into allocated stack area, last argument highest on stack.
- // Parameters are
- // Address byte_offset1 - Address captured substring's start.
- // Address byte_offset2 - Address of current character position.
- // size_t byte_length - length of capture in bytes(!)
-// Isolate* isolate or 0 if unicode flag.
-
- // Set isolate.
-#ifdef V8_I18N_SUPPORT
- if (unicode) {
- __ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
- } else // NOLINT
-#endif // V8_I18N_SUPPORT
- {
- __ mov(Operand(esp, 3 * kPointerSize),
- Immediate(ExternalReference::isolate_address(isolate())));
- }
- // Set byte_length.
- __ mov(Operand(esp, 2 * kPointerSize), ebx);
- // Set byte_offset2.
- // Found by adding negative string-end offset of current position (edi)
- // to end of string.
- __ add(edi, esi);
- if (read_backward) {
- __ sub(edi, ebx); // Offset by length when matching backwards.
- }
- __ mov(Operand(esp, 1 * kPointerSize), edi);
- // Set byte_offset1.
- // Start of capture, where edx already holds string-end negative offset.
- __ add(edx, esi);
- __ mov(Operand(esp, 0 * kPointerSize), edx);
-
- {
- AllowExternalCallThatCantCauseGC scope(masm_);
- ExternalReference compare =
- ExternalReference::re_case_insensitive_compare_uc16(isolate());
- __ CallCFunction(compare, argument_count);
- }
- // Pop original values before reacting on result value.
- __ pop(ebx);
- __ pop(backtrack_stackpointer());
- __ pop(edi);
- __ pop(esi);
-
- // Check if function returned non-zero for success or zero for failure.
- __ or_(eax, eax);
- BranchOrBacktrack(zero, on_no_match);
- // On success, advance position by length of capture.
- if (read_backward) {
- __ sub(edi, ebx);
- } else {
- __ add(edi, ebx);
- }
- }
- __ bind(&fallthrough);
-}
-
-
-void RegExpMacroAssemblerX87::CheckNotBackReference(int start_reg,
- bool read_backward,
- Label* on_no_match) {
- Label fallthrough;
- Label success;
- Label fail;
-
- // Find length of back-referenced capture.
- __ mov(edx, register_location(start_reg));
- __ mov(eax, register_location(start_reg + 1));
- __ sub(eax, edx); // Length to check.
-
- // At this point, the capture registers are either both set or both cleared.
- // If the capture length is zero, then the capture is either empty or cleared.
- // Fall through in both cases.
- __ j(equal, &fallthrough);
-
- // Check that there are sufficient characters left in the input.
- if (read_backward) {
- __ mov(ebx, Operand(ebp, kStringStartMinusOne));
- __ add(ebx, eax);
- __ cmp(edi, ebx);
- BranchOrBacktrack(less_equal, on_no_match);
- } else {
- __ mov(ebx, edi);
- __ add(ebx, eax);
- BranchOrBacktrack(greater, on_no_match);
- }
-
- // Save register to make it available below.
- __ push(backtrack_stackpointer());
-
- // Compute pointers to match string and capture string
- __ add(edx, esi); // Start of capture.
- __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
- if (read_backward) {
- __ sub(ebx, eax); // Offset by length when matching backwards.
- }
- __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
-
- Label loop;
- __ bind(&loop);
- if (mode_ == LATIN1) {
- __ movzx_b(eax, Operand(edx, 0));
- __ cmpb_al(Operand(ebx, 0));
- } else {
- DCHECK(mode_ == UC16);
- __ movzx_w(eax, Operand(edx, 0));
- __ cmpw_ax(Operand(ebx, 0));
- }
- __ j(not_equal, &fail);
- // Increment pointers into capture and match string.
- __ add(edx, Immediate(char_size()));
- __ add(ebx, Immediate(char_size()));
- // Check if we have reached end of match area.
- __ cmp(ebx, ecx);
- __ j(below, &loop);
- __ jmp(&success);
-
- __ bind(&fail);
- // Restore backtrack stackpointer.
- __ pop(backtrack_stackpointer());
- BranchOrBacktrack(no_condition, on_no_match);
-
- __ bind(&success);
- // Move current character position to position after match.
- __ mov(edi, ecx);
- __ sub(edi, esi);
- if (read_backward) {
- // Subtract match length if we matched backward.
- __ add(edi, register_location(start_reg));
- __ sub(edi, register_location(start_reg + 1));
- }
- // Restore backtrack stackpointer.
- __ pop(backtrack_stackpointer());
-
- __ bind(&fallthrough);
-}
-
-
-void RegExpMacroAssemblerX87::CheckNotCharacter(uint32_t c,
- Label* on_not_equal) {
- __ cmp(current_character(), c);
- BranchOrBacktrack(not_equal, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_equal) {
- if (c == 0) {
- __ test(current_character(), Immediate(mask));
- } else {
- __ mov(eax, mask);
- __ and_(eax, current_character());
- __ cmp(eax, c);
- }
- BranchOrBacktrack(equal, on_equal);
-}
-
-
-void RegExpMacroAssemblerX87::CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_not_equal) {
- if (c == 0) {
- __ test(current_character(), Immediate(mask));
- } else {
- __ mov(eax, mask);
- __ and_(eax, current_character());
- __ cmp(eax, c);
- }
- BranchOrBacktrack(not_equal, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerX87::CheckNotCharacterAfterMinusAnd(
- uc16 c,
- uc16 minus,
- uc16 mask,
- Label* on_not_equal) {
- DCHECK(minus < String::kMaxUtf16CodeUnit);
- __ lea(eax, Operand(current_character(), -minus));
- if (c == 0) {
- __ test(eax, Immediate(mask));
- } else {
- __ and_(eax, mask);
- __ cmp(eax, c);
- }
- BranchOrBacktrack(not_equal, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacterInRange(
- uc16 from,
- uc16 to,
- Label* on_in_range) {
- __ lea(eax, Operand(current_character(), -from));
- __ cmp(eax, to - from);
- BranchOrBacktrack(below_equal, on_in_range);
-}
-
-
-void RegExpMacroAssemblerX87::CheckCharacterNotInRange(
- uc16 from,
- uc16 to,
- Label* on_not_in_range) {
- __ lea(eax, Operand(current_character(), -from));
- __ cmp(eax, to - from);
- BranchOrBacktrack(above, on_not_in_range);
-}
-
-
-void RegExpMacroAssemblerX87::CheckBitInTable(
- Handle<ByteArray> table,
- Label* on_bit_set) {
- __ mov(eax, Immediate(table));
- Register index = current_character();
- if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
- __ mov(ebx, kTableSize - 1);
- __ and_(ebx, current_character());
- index = ebx;
- }
- __ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize),
- Immediate(0));
- BranchOrBacktrack(not_equal, on_bit_set);
-}
-
-
-bool RegExpMacroAssemblerX87::CheckSpecialCharacterClass(uc16 type,
- Label* on_no_match) {
- // Range checks (c in min..max) are generally implemented by an unsigned
- // (c - min) <= (max - min) check
- switch (type) {
- case 's':
- // Match space-characters
- if (mode_ == LATIN1) {
- // One byte space characters are '\t'..'\r', ' ' and \u00a0.
- Label success;
- __ cmp(current_character(), ' ');
- __ j(equal, &success, Label::kNear);
- // Check range 0x09..0x0d
- __ lea(eax, Operand(current_character(), -'\t'));
- __ cmp(eax, '\r' - '\t');
- __ j(below_equal, &success, Label::kNear);
- // \u00a0 (NBSP).
- __ cmp(eax, 0x00a0 - '\t');
- BranchOrBacktrack(not_equal, on_no_match);
- __ bind(&success);
- return true;
- }
- return false;
- case 'S':
- // The emitted code for generic character classes is good enough.
- return false;
- case 'd':
- // Match ASCII digits ('0'..'9')
- __ lea(eax, Operand(current_character(), -'0'));
- __ cmp(eax, '9' - '0');
- BranchOrBacktrack(above, on_no_match);
- return true;
- case 'D':
- // Match non ASCII-digits
- __ lea(eax, Operand(current_character(), -'0'));
- __ cmp(eax, '9' - '0');
- BranchOrBacktrack(below_equal, on_no_match);
- return true;
- case '.': {
- // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
- __ mov(eax, current_character());
- __ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
- BranchOrBacktrack(below_equal, on_no_match);
- if (mode_ == UC16) {
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
- __ cmp(eax, 0x2029 - 0x2028);
- BranchOrBacktrack(below_equal, on_no_match);
- }
- return true;
- }
- case 'w': {
- if (mode_ != LATIN1) {
- // Table is 256 entries, so all Latin1 characters can be tested.
- __ cmp(current_character(), Immediate('z'));
- BranchOrBacktrack(above, on_no_match);
- }
- DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
- ExternalReference word_map = ExternalReference::re_word_character_map();
- __ test_b(current_character(),
- Operand::StaticArray(current_character(), times_1, word_map));
- BranchOrBacktrack(zero, on_no_match);
- return true;
- }
- case 'W': {
- Label done;
- if (mode_ != LATIN1) {
- // Table is 256 entries, so all Latin1 characters can be tested.
- __ cmp(current_character(), Immediate('z'));
- __ j(above, &done);
- }
- DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
- ExternalReference word_map = ExternalReference::re_word_character_map();
- __ test_b(current_character(),
- Operand::StaticArray(current_character(), times_1, word_map));
- BranchOrBacktrack(not_zero, on_no_match);
- if (mode_ != LATIN1) {
- __ bind(&done);
- }
- return true;
- }
- // Non-standard classes (with no syntactic shorthand) used internally.
- case '*':
- // Match any character.
- return true;
- case 'n': {
- // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
- // The opposite of '.'.
- __ mov(eax, current_character());
- __ xor_(eax, Immediate(0x01));
- // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(eax, Immediate(0x0b));
- __ cmp(eax, 0x0c - 0x0b);
- if (mode_ == LATIN1) {
- BranchOrBacktrack(above, on_no_match);
- } else {
- Label done;
- BranchOrBacktrack(below_equal, &done);
- DCHECK_EQ(UC16, mode_);
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(eax, Immediate(0x2028 - 0x0b));
- __ cmp(eax, 1);
- BranchOrBacktrack(above, on_no_match);
- __ bind(&done);
- }
- return true;
- }
- // No custom implementation (yet): s(UC16), S(UC16).
- default:
- return false;
- }
-}
-
-
-void RegExpMacroAssemblerX87::Fail() {
- STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
- if (!global()) {
- __ Move(eax, Immediate(FAILURE));
- }
- __ jmp(&exit_label_);
-}
-
-
-Handle<HeapObject> RegExpMacroAssemblerX87::GetCode(Handle<String> source) {
- Label return_eax;
- // Finalize code - write the entry point code now we know how many
- // registers we need.
-
- // Entry code:
- __ bind(&entry_label_);
-
- // Tell the system that we have a stack frame. Because the type is MANUAL, no
- // code is generated.
- FrameScope scope(masm_, StackFrame::MANUAL);
-
- // Actually emit code to start a new stack frame.
- __ push(ebp);
- __ mov(ebp, esp);
- // Save callee-save registers. Order here should correspond to order of
- // kBackup_ebx etc.
- __ push(esi);
- __ push(edi);
- __ push(ebx); // Callee-save on MacOS.
- __ push(Immediate(0)); // Number of successful matches in a global regexp.
- __ push(Immediate(0)); // Make room for "string start - 1" constant.
-
- // Check if we have space on the stack for registers.
- Label stack_limit_hit;
- Label stack_ok;
-
- ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
- __ mov(ecx, esp);
- __ sub(ecx, Operand::StaticVariable(stack_limit));
- // Handle it if the stack pointer is already below the stack limit.
- __ j(below_equal, &stack_limit_hit);
- // Check if there is room for the variable number of registers above
- // the stack limit.
- __ cmp(ecx, num_registers_ * kPointerSize);
- __ j(above_equal, &stack_ok);
- // Exit with OutOfMemory exception. There is not enough space on the stack
- // for our working registers.
- __ mov(eax, EXCEPTION);
- __ jmp(&return_eax);
-
- __ bind(&stack_limit_hit);
- CallCheckStackGuardState(ebx);
- __ or_(eax, eax);
- // If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &return_eax);
-
- __ bind(&stack_ok);
- // Load start index for later use.
- __ mov(ebx, Operand(ebp, kStartIndex));
-
- // Allocate space on stack for registers.
- __ sub(esp, Immediate(num_registers_ * kPointerSize));
- // Load string length.
- __ mov(esi, Operand(ebp, kInputEnd));
- // Load input position.
- __ mov(edi, Operand(ebp, kInputStart));
- // Set up edi to be negative offset from string end.
- __ sub(edi, esi);
-
- // Set eax to address of char before start of the string.
- // (effectively string position -1).
- __ neg(ebx);
- if (mode_ == UC16) {
- __ lea(eax, Operand(edi, ebx, times_2, -char_size()));
- } else {
- __ lea(eax, Operand(edi, ebx, times_1, -char_size()));
- }
- // Store this value in a local variable, for use when clearing
- // position registers.
- __ mov(Operand(ebp, kStringStartMinusOne), eax);
-
-#if V8_OS_WIN
- // Ensure that we write to each stack page, in order. Skipping a page
- // on Windows can cause segmentation faults. Assuming page size is 4k.
- const int kPageSize = 4096;
- const int kRegistersPerPage = kPageSize / kPointerSize;
- for (int i = num_saved_registers_ + kRegistersPerPage - 1;
- i < num_registers_;
- i += kRegistersPerPage) {
- __ mov(register_location(i), eax); // One write every page.
- }
-#endif // V8_OS_WIN
-
- Label load_char_start_regexp, start_regexp;
- // Load newline if index is at start, previous character otherwise.
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- __ j(not_equal, &load_char_start_regexp, Label::kNear);
- __ mov(current_character(), '\n');
- __ jmp(&start_regexp, Label::kNear);
-
- // Global regexp restarts matching here.
- __ bind(&load_char_start_regexp);
- // Load previous char as initial value of current character register.
- LoadCurrentCharacterUnchecked(-1, 1);
- __ bind(&start_regexp);
-
- // Initialize on-stack registers.
- if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
- // Fill saved registers with initial value = start offset - 1
- // Fill in stack push order, to avoid accessing across an unwritten
- // page (a problem on Windows).
- if (num_saved_registers_ > 8) {
- __ mov(ecx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ mov(Operand(ebp, ecx, times_1, 0), eax);
- __ sub(ecx, Immediate(kPointerSize));
- __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
- __ j(greater, &init_loop);
- } else { // Unroll the loop.
- for (int i = 0; i < num_saved_registers_; i++) {
- __ mov(register_location(i), eax);
- }
- }
- }
-
- // Initialize backtrack stack pointer.
- __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
-
- __ jmp(&start_label_);
-
- // Exit code:
- if (success_label_.is_linked()) {
- // Save captures when successful.
- __ bind(&success_label_);
- if (num_saved_registers_ > 0) {
- // copy captures to output
- __ mov(ebx, Operand(ebp, kRegisterOutput));
- __ mov(ecx, Operand(ebp, kInputEnd));
- __ mov(edx, Operand(ebp, kStartIndex));
- __ sub(ecx, Operand(ebp, kInputStart));
- if (mode_ == UC16) {
- __ lea(ecx, Operand(ecx, edx, times_2, 0));
- } else {
- __ add(ecx, edx);
- }
- for (int i = 0; i < num_saved_registers_; i++) {
- __ mov(eax, register_location(i));
- if (i == 0 && global_with_zero_length_check()) {
- // Keep capture start in edx for the zero-length check later.
- __ mov(edx, eax);
- }
- // Convert to index from start of string, not end.
- __ add(eax, ecx);
- if (mode_ == UC16) {
- __ sar(eax, 1); // Convert byte index to character index.
- }
- __ mov(Operand(ebx, i * kPointerSize), eax);
- }
- }
-
- if (global()) {
- // Restart matching if the regular expression is flagged as global.
- // Increment success counter.
- __ inc(Operand(ebp, kSuccessfulCaptures));
- // Capture results have been stored, so the number of remaining global
- // output registers is reduced by the number of stored captures.
- __ mov(ecx, Operand(ebp, kNumOutputRegisters));
- __ sub(ecx, Immediate(num_saved_registers_));
- // Check whether we have enough room for another set of capture results.
- __ cmp(ecx, Immediate(num_saved_registers_));
- __ j(less, &exit_label_);
-
- __ mov(Operand(ebp, kNumOutputRegisters), ecx);
- // Advance the location for output.
- __ add(Operand(ebp, kRegisterOutput),
- Immediate(num_saved_registers_ * kPointerSize));
-
- // Prepare eax to initialize registers with its value in the next run.
- __ mov(eax, Operand(ebp, kStringStartMinusOne));
-
- if (global_with_zero_length_check()) {
- // Special case for zero-length matches.
- // edx: capture start index
- __ cmp(edi, edx);
- // Not a zero-length match, restart.
- __ j(not_equal, &load_char_start_regexp);
- // edi (offset from the end) is zero if we already reached the end.
- __ test(edi, edi);
- __ j(zero, &exit_label_, Label::kNear);
- // Advance current position after a zero-length match.
- Label advance;
- __ bind(&advance);
- if (mode_ == UC16) {
- __ add(edi, Immediate(2));
- } else {
- __ inc(edi);
- }
- if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
- }
- __ jmp(&load_char_start_regexp);
- } else {
- __ mov(eax, Immediate(SUCCESS));
- }
- }
-
- __ bind(&exit_label_);
- if (global()) {
- // Return the number of successful captures.
- __ mov(eax, Operand(ebp, kSuccessfulCaptures));
- }
-
- __ bind(&return_eax);
- // Skip esp past regexp registers.
- __ lea(esp, Operand(ebp, kBackup_ebx));
- // Restore callee-save registers.
- __ pop(ebx);
- __ pop(edi);
- __ pop(esi);
- // Exit function frame, restore previous one.
- __ pop(ebp);
- __ ret(0);
-
- // Backtrack code (branch target for conditional backtracks).
- if (backtrack_label_.is_linked()) {
- __ bind(&backtrack_label_);
- Backtrack();
- }
-
- Label exit_with_exception;
-
- // Preempt-code
- if (check_preempt_label_.is_linked()) {
- SafeCallTarget(&check_preempt_label_);
-
- __ push(backtrack_stackpointer());
- __ push(edi);
-
- CallCheckStackGuardState(ebx);
- __ or_(eax, eax);
- // If returning non-zero, we should end execution with the given
- // result as return value.
- __ j(not_zero, &return_eax);
-
- __ pop(edi);
- __ pop(backtrack_stackpointer());
- // String might have moved: Reload esi from frame.
- __ mov(esi, Operand(ebp, kInputEnd));
- SafeReturn();
- }
-
- // Backtrack stack overflow code.
- if (stack_overflow_label_.is_linked()) {
- SafeCallTarget(&stack_overflow_label_);
- // Reached if the backtrack-stack limit has been hit.
-
- Label grow_failed;
- // Save registers before calling C function
- __ push(esi);
- __ push(edi);
-
- // Call GrowStack(backtrack_stackpointer())
- static const int num_arguments = 3;
- __ PrepareCallCFunction(num_arguments, ebx);
- __ mov(Operand(esp, 2 * kPointerSize),
- Immediate(ExternalReference::isolate_address(isolate())));
- __ lea(eax, Operand(ebp, kStackHighEnd));
- __ mov(Operand(esp, 1 * kPointerSize), eax);
- __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer());
- ExternalReference grow_stack =
- ExternalReference::re_grow_stack(isolate());
- __ CallCFunction(grow_stack, num_arguments);
- // If return NULL, we have failed to grow the stack, and
- // must exit with a stack-overflow exception.
- __ or_(eax, eax);
- __ j(equal, &exit_with_exception);
- // Otherwise use return value as new stack pointer.
- __ mov(backtrack_stackpointer(), eax);
- // Restore saved registers and continue.
- __ pop(edi);
- __ pop(esi);
- SafeReturn();
- }
-
- if (exit_with_exception.is_linked()) {
- // If any of the code above needed to exit with an exception.
- __ bind(&exit_with_exception);
- // Exit with Result EXCEPTION(-1) to signal thrown exception.
- __ mov(eax, EXCEPTION);
- __ jmp(&return_eax);
- }
-
- CodeDesc code_desc;
- masm_->GetCode(&code_desc);
- Handle<Code> code =
- isolate()->factory()->NewCode(code_desc,
- Code::ComputeFlags(Code::REGEXP),
- masm_->CodeObject());
- PROFILE(masm_->isolate(),
- RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
- return Handle<HeapObject>::cast(code);
-}
-
-
-void RegExpMacroAssemblerX87::GoTo(Label* to) {
- BranchOrBacktrack(no_condition, to);
-}
-
-
-void RegExpMacroAssemblerX87::IfRegisterGE(int reg,
- int comparand,
- Label* if_ge) {
- __ cmp(register_location(reg), Immediate(comparand));
- BranchOrBacktrack(greater_equal, if_ge);
-}
-
-
-void RegExpMacroAssemblerX87::IfRegisterLT(int reg,
- int comparand,
- Label* if_lt) {
- __ cmp(register_location(reg), Immediate(comparand));
- BranchOrBacktrack(less, if_lt);
-}
-
-
-void RegExpMacroAssemblerX87::IfRegisterEqPos(int reg,
- Label* if_eq) {
- __ cmp(edi, register_location(reg));
- BranchOrBacktrack(equal, if_eq);
-}
-
-
-RegExpMacroAssembler::IrregexpImplementation
- RegExpMacroAssemblerX87::Implementation() {
- return kX87Implementation;
-}
-
-
-void RegExpMacroAssemblerX87::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters) {
- DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
- if (check_bounds) {
- if (cp_offset >= 0) {
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
- } else {
- CheckPosition(cp_offset, on_end_of_input);
- }
- }
- LoadCurrentCharacterUnchecked(cp_offset, characters);
-}
-
-
-void RegExpMacroAssemblerX87::PopCurrentPosition() {
- Pop(edi);
-}
-
-
-void RegExpMacroAssemblerX87::PopRegister(int register_index) {
- Pop(eax);
- __ mov(register_location(register_index), eax);
-}
-
-
-void RegExpMacroAssemblerX87::PushBacktrack(Label* label) {
- Push(Immediate::CodeRelativeOffset(label));
- CheckStackLimit();
-}
-
-
-void RegExpMacroAssemblerX87::PushCurrentPosition() {
- Push(edi);
-}
-
-
-void RegExpMacroAssemblerX87::PushRegister(int register_index,
- StackCheckFlag check_stack_limit) {
- __ mov(eax, register_location(register_index));
- Push(eax);
- if (check_stack_limit) CheckStackLimit();
-}
-
-
-void RegExpMacroAssemblerX87::ReadCurrentPositionFromRegister(int reg) {
- __ mov(edi, register_location(reg));
-}
-
-
-void RegExpMacroAssemblerX87::ReadStackPointerFromRegister(int reg) {
- __ mov(backtrack_stackpointer(), register_location(reg));
- __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
-}
-
-void RegExpMacroAssemblerX87::SetCurrentPositionFromEnd(int by) {
- Label after_position;
- __ cmp(edi, -by * char_size());
- __ j(greater_equal, &after_position, Label::kNear);
- __ mov(edi, -by * char_size());
- // On RegExp code entry (where this operation is used), the character before
- // the current position is expected to be already loaded.
- // We have advanced the position, so it's safe to read backwards.
- LoadCurrentCharacterUnchecked(-1, 1);
- __ bind(&after_position);
-}
-
-
-void RegExpMacroAssemblerX87::SetRegister(int register_index, int to) {
- DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
- __ mov(register_location(register_index), Immediate(to));
-}
-
-
-bool RegExpMacroAssemblerX87::Succeed() {
- __ jmp(&success_label_);
- return global();
-}
-
-
-void RegExpMacroAssemblerX87::WriteCurrentPositionToRegister(int reg,
- int cp_offset) {
- if (cp_offset == 0) {
- __ mov(register_location(reg), edi);
- } else {
- __ lea(eax, Operand(edi, cp_offset * char_size()));
- __ mov(register_location(reg), eax);
- }
-}
-
-
-void RegExpMacroAssemblerX87::ClearRegisters(int reg_from, int reg_to) {
- DCHECK(reg_from <= reg_to);
- __ mov(eax, Operand(ebp, kStringStartMinusOne));
- for (int reg = reg_from; reg <= reg_to; reg++) {
- __ mov(register_location(reg), eax);
- }
-}
-
-
-void RegExpMacroAssemblerX87::WriteStackPointerToRegister(int reg) {
- __ mov(eax, backtrack_stackpointer());
- __ sub(eax, Operand(ebp, kStackHighEnd));
- __ mov(register_location(reg), eax);
-}
-
-
-// Private methods:
-
-void RegExpMacroAssemblerX87::CallCheckStackGuardState(Register scratch) {
- static const int num_arguments = 3;
- __ PrepareCallCFunction(num_arguments, scratch);
- // RegExp code frame pointer.
- __ mov(Operand(esp, 2 * kPointerSize), ebp);
- // Code* of self.
- __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject()));
- // Next address on the stack (will be address of return address).
- __ lea(eax, Operand(esp, -kPointerSize));
- __ mov(Operand(esp, 0 * kPointerSize), eax);
- ExternalReference check_stack_guard =
- ExternalReference::re_check_stack_guard_state(isolate());
- __ CallCFunction(check_stack_guard, num_arguments);
-}
-
-
-// Helper function for reading a value out of a stack frame.
-template <typename T>
-static T& frame_entry(Address re_frame, int frame_offset) {
- return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
-}
-
-
-template <typename T>
-static T* frame_entry_address(Address re_frame, int frame_offset) {
- return reinterpret_cast<T*>(re_frame + frame_offset);
-}
-
-
-int RegExpMacroAssemblerX87::CheckStackGuardState(Address* return_address,
- Code* re_code,
- Address re_frame) {
- return NativeRegExpMacroAssembler::CheckStackGuardState(
- frame_entry<Isolate*>(re_frame, kIsolate),
- frame_entry<int>(re_frame, kStartIndex),
- frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
- frame_entry_address<String*>(re_frame, kInputString),
- frame_entry_address<const byte*>(re_frame, kInputStart),
- frame_entry_address<const byte*>(re_frame, kInputEnd));
-}
-
-
-Operand RegExpMacroAssemblerX87::register_location(int register_index) {
- DCHECK(register_index < (1<<30));
- if (num_registers_ <= register_index) {
- num_registers_ = register_index + 1;
- }
- return Operand(ebp, kRegisterZero - register_index * kPointerSize);
-}
-
-
-void RegExpMacroAssemblerX87::CheckPosition(int cp_offset,
- Label* on_outside_input) {
- if (cp_offset >= 0) {
- __ cmp(edi, -cp_offset * char_size());
- BranchOrBacktrack(greater_equal, on_outside_input);
- } else {
- __ lea(eax, Operand(edi, cp_offset * char_size()));
- __ cmp(eax, Operand(ebp, kStringStartMinusOne));
- BranchOrBacktrack(less_equal, on_outside_input);
- }
-}
-
-
-void RegExpMacroAssemblerX87::BranchOrBacktrack(Condition condition,
- Label* to) {
- if (condition < 0) { // No condition
- if (to == NULL) {
- Backtrack();
- return;
- }
- __ jmp(to);
- return;
- }
- if (to == NULL) {
- __ j(condition, &backtrack_label_);
- return;
- }
- __ j(condition, to);
-}
-
-
-void RegExpMacroAssemblerX87::SafeCall(Label* to) {
- Label return_to;
- __ push(Immediate::CodeRelativeOffset(&return_to));
- __ jmp(to);
- __ bind(&return_to);
-}
-
-
-void RegExpMacroAssemblerX87::SafeReturn() {
- __ pop(ebx);
- __ add(ebx, Immediate(masm_->CodeObject()));
- __ jmp(ebx);
-}
-
-
-void RegExpMacroAssemblerX87::SafeCallTarget(Label* name) {
- __ bind(name);
-}
-
-
-void RegExpMacroAssemblerX87::Push(Register source) {
- DCHECK(!source.is(backtrack_stackpointer()));
- // Notice: This updates flags, unlike normal Push.
- __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
- __ mov(Operand(backtrack_stackpointer(), 0), source);
-}
-
-
-void RegExpMacroAssemblerX87::Push(Immediate value) {
- // Notice: This updates flags, unlike normal Push.
- __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
- __ mov(Operand(backtrack_stackpointer(), 0), value);
-}
-
-
-void RegExpMacroAssemblerX87::Pop(Register target) {
- DCHECK(!target.is(backtrack_stackpointer()));
- __ mov(target, Operand(backtrack_stackpointer(), 0));
- // Notice: This updates flags, unlike normal Pop.
- __ add(backtrack_stackpointer(), Immediate(kPointerSize));
-}
-
-
-void RegExpMacroAssemblerX87::CheckPreemption() {
- // Check for preemption.
- Label no_preempt;
- ExternalReference stack_limit =
- ExternalReference::address_of_stack_limit(isolate());
- __ cmp(esp, Operand::StaticVariable(stack_limit));
- __ j(above, &no_preempt);
-
- SafeCall(&check_preempt_label_);
-
- __ bind(&no_preempt);
-}
-
-
-void RegExpMacroAssemblerX87::CheckStackLimit() {
- Label no_stack_overflow;
- ExternalReference stack_limit =
- ExternalReference::address_of_regexp_stack_limit(isolate());
- __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit));
- __ j(above, &no_stack_overflow);
-
- SafeCall(&stack_overflow_label_);
-
- __ bind(&no_stack_overflow);
-}
-
-
-void RegExpMacroAssemblerX87::LoadCurrentCharacterUnchecked(int cp_offset,
- int characters) {
- if (mode_ == LATIN1) {
- if (characters == 4) {
- __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
- } else if (characters == 2) {
- __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
- } else {
- DCHECK(characters == 1);
- __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
- }
- } else {
- DCHECK(mode_ == UC16);
- if (characters == 2) {
- __ mov(current_character(),
- Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
- } else {
- DCHECK(characters == 1);
- __ movzx_w(current_character(),
- Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
- }
- }
-}
-
-
-#undef __
-
-#endif // V8_INTERPRETED_REGEXP
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_TARGET_ARCH_X87
diff --git a/src/regexp/x87/regexp-macro-assembler-x87.h b/src/regexp/x87/regexp-macro-assembler-x87.h
deleted file mode 100644
index 2f68961..0000000
--- a/src/regexp/x87/regexp-macro-assembler-x87.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_X87_REGEXP_MACRO_ASSEMBLER_X87_H_
-#define V8_REGEXP_X87_REGEXP_MACRO_ASSEMBLER_X87_H_
-
-#include "src/macro-assembler.h"
-#include "src/regexp/regexp-macro-assembler.h"
-#include "src/x87/assembler-x87.h"
-
-namespace v8 {
-namespace internal {
-
-#ifndef V8_INTERPRETED_REGEXP
-class RegExpMacroAssemblerX87: public NativeRegExpMacroAssembler {
- public:
- RegExpMacroAssemblerX87(Isolate* isolate, Zone* zone, Mode mode,
- int registers_to_save);
- virtual ~RegExpMacroAssemblerX87();
- virtual int stack_limit_slack();
- virtual void AdvanceCurrentPosition(int by);
- virtual void AdvanceRegister(int reg, int by);
- virtual void Backtrack();
- virtual void Bind(Label* label);
- virtual void CheckAtStart(Label* on_at_start);
- virtual void CheckCharacter(uint32_t c, Label* on_equal);
- virtual void CheckCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_equal);
- virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
- virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- // A "greedy loop" is a loop that is both greedy and with a simple
- // body. It has a particularly simple implementation.
- virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
- virtual void CheckNotBackReference(int start_reg, bool read_backward,
- Label* on_no_match);
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward, bool unicode,
- Label* on_no_match);
- virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
- virtual void CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
- uc16 minus,
- uc16 mask,
- Label* on_not_equal);
- virtual void CheckCharacterInRange(uc16 from,
- uc16 to,
- Label* on_in_range);
- virtual void CheckCharacterNotInRange(uc16 from,
- uc16 to,
- Label* on_not_in_range);
- virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
-
- // Checks whether the given offset from the current position is before
- // the end of the string.
- virtual void CheckPosition(int cp_offset, Label* on_outside_input);
- virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
- virtual void Fail();
- virtual Handle<HeapObject> GetCode(Handle<String> source);
- virtual void GoTo(Label* label);
- virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
- virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
- virtual void IfRegisterEqPos(int reg, Label* if_eq);
- virtual IrregexpImplementation Implementation();
- virtual void LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds = true,
- int characters = 1);
- virtual void PopCurrentPosition();
- virtual void PopRegister(int register_index);
- virtual void PushBacktrack(Label* label);
- virtual void PushCurrentPosition();
- virtual void PushRegister(int register_index,
- StackCheckFlag check_stack_limit);
- virtual void ReadCurrentPositionFromRegister(int reg);
- virtual void ReadStackPointerFromRegister(int reg);
- virtual void SetCurrentPositionFromEnd(int by);
- virtual void SetRegister(int register_index, int to);
- virtual bool Succeed();
- virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
- virtual void ClearRegisters(int reg_from, int reg_to);
- virtual void WriteStackPointerToRegister(int reg);
-
- // Called from RegExp if the stack-guard is triggered.
- // If the code object is relocated, the return address is fixed before
- // returning.
- static int CheckStackGuardState(Address* return_address,
- Code* re_code,
- Address re_frame);
-
- private:
- // Offsets from ebp of function parameters and stored registers.
- static const int kFramePointer = 0;
- // Above the frame pointer - function parameters and return address.
- static const int kReturn_eip = kFramePointer + kPointerSize;
- static const int kFrameAlign = kReturn_eip + kPointerSize;
- // Parameters.
- static const int kInputString = kFrameAlign;
- static const int kStartIndex = kInputString + kPointerSize;
- static const int kInputStart = kStartIndex + kPointerSize;
- static const int kInputEnd = kInputStart + kPointerSize;
- static const int kRegisterOutput = kInputEnd + kPointerSize;
- // For the case of global regular expression, we have room to store at least
- // one set of capture results. For the case of non-global regexp, we ignore
- // this value.
- static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
- static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
- static const int kDirectCall = kStackHighEnd + kPointerSize;
- static const int kIsolate = kDirectCall + kPointerSize;
- // Below the frame pointer - local stack variables.
- // When adding local variables remember to push space for them in
- // the frame in GetCode.
- static const int kBackup_esi = kFramePointer - kPointerSize;
- static const int kBackup_edi = kBackup_esi - kPointerSize;
- static const int kBackup_ebx = kBackup_edi - kPointerSize;
- static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
- static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
- // First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
-
- // Initial size of code buffer.
- static const size_t kRegExpCodeSize = 1024;
-
- // Load a number of characters at the given offset from the
- // current position, into the current-character register.
- void LoadCurrentCharacterUnchecked(int cp_offset, int character_count);
-
- // Check whether preemption has been requested.
- void CheckPreemption();
-
- // Check whether we are exceeding the stack limit on the backtrack stack.
- void CheckStackLimit();
-
- // Generate a call to CheckStackGuardState.
- void CallCheckStackGuardState(Register scratch);
-
- // The ebp-relative location of a regexp register.
- Operand register_location(int register_index);
-
- // The register containing the current character after LoadCurrentCharacter.
- inline Register current_character() { return edx; }
-
- // The register containing the backtrack stack top. Provides a meaningful
- // name to the register.
- inline Register backtrack_stackpointer() { return ecx; }
-
- // Byte size of chars in the string to match (decided by the Mode argument)
- inline int char_size() { return static_cast<int>(mode_); }
-
- // Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
- void BranchOrBacktrack(Condition condition, Label* to);
-
- // Call and return internally in the generated code in a way that
- // is GC-safe (i.e., doesn't leave absolute code addresses on the stack)
- inline void SafeCall(Label* to);
- inline void SafeReturn();
- inline void SafeCallTarget(Label* name);
-
- // Pushes the value of a register on the backtrack stack. Decrements the
- // stack pointer (ecx) by a word size and stores the register's value there.
- inline void Push(Register source);
-
- // Pushes a value on the backtrack stack. Decrements the stack pointer (ecx)
- // by a word size and stores the value there.
- inline void Push(Immediate value);
-
- // Pops a value from the backtrack stack. Reads the word at the stack pointer
- // (ecx) and increments it by a word size.
- inline void Pop(Register target);
-
- Isolate* isolate() const { return masm_->isolate(); }
-
- MacroAssembler* masm_;
-
- // Which mode to generate code for (LATIN1 or UC16).
- Mode mode_;
-
- // One greater than maximal register index actually used.
- int num_registers_;
-
- // Number of registers to output at the end (the saved registers
- // are always 0..num_saved_registers_-1)
- int num_saved_registers_;
-
- // Labels used internally.
- Label entry_label_;
- Label start_label_;
- Label success_label_;
- Label backtrack_label_;
- Label exit_label_;
- Label check_preempt_label_;
- Label stack_overflow_label_;
-};
-#endif // V8_INTERPRETED_REGEXP
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_X87_REGEXP_MACRO_ASSEMBLER_X87_H_