Cache Autofill heuristics regexes

Runtime for FormStructureBrowserTest.DataDrivenHeuristics decreased from 2.4s to 1.5s
In particular, the test now spends 61ms in AutofillManager::ParseForms, down from 738ms.

BUG=72242
TEST=unit_tests --gtest_filter=Autofill*:FormStructure*, browser_tests --gtest_filter=Autofill*:FormStructure*

Review URL: http://codereview.chromium.org/7066043

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@87710 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/chrome/browser/autofill/autofill_regexes.cc b/chrome/browser/autofill/autofill_regexes.cc
new file mode 100644
index 0000000..5329958
--- /dev/null
+++ b/chrome/browser/autofill/autofill_regexes.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/autofill/autofill_regexes.h"
+
+#include <map>
+#include <utility>
+
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/memory/singleton.h"
+#include "base/stl_util-inl.h"
+#include "base/string16.h"
+#include "unicode/regex.h"
+
+namespace {
+
+// A singleton class that serves as a cache of compiled regex patterns.
+class AutofillRegexes {
+ public:
+  static AutofillRegexes* GetInstance();
+
+  // Returns the compiled regex matcher corresponding to |pattern|.
+  icu::RegexMatcher* GetMatcher(const string16& pattern);
+
+ private:
+  AutofillRegexes();
+  ~AutofillRegexes();
+  friend struct DefaultSingletonTraits<AutofillRegexes>;
+
+  // Maps patterns to their corresponding regex matchers.
+  std::map<string16, icu::RegexMatcher*> matchers_;
+
+  DISALLOW_COPY_AND_ASSIGN(AutofillRegexes);
+};
+
+// static
+AutofillRegexes* AutofillRegexes::GetInstance() {
+  return Singleton<AutofillRegexes>::get();
+}
+
+AutofillRegexes::AutofillRegexes() {
+}
+
+AutofillRegexes::~AutofillRegexes() {
+  STLDeleteContainerPairSecondPointers(matchers_.begin(),
+                                       matchers_.end());
+}
+
+icu::RegexMatcher* AutofillRegexes::GetMatcher(const string16& pattern) {
+  if (!matchers_.count(pattern)) {
+    const icu::UnicodeString icu_pattern(pattern.data(), pattern.length());
+
+    UErrorCode status = U_ZERO_ERROR;
+    icu::RegexMatcher* matcher = new icu::RegexMatcher(icu_pattern,
+                                                       UREGEX_CASE_INSENSITIVE,
+                                                       status);
+    DCHECK(U_SUCCESS(status));
+
+    matchers_.insert(std::make_pair(pattern, matcher));
+  }
+
+  return matchers_[pattern];
+}
+
+}  // namespace
+
+namespace autofill {
+
+bool MatchesPattern(const string16& input, const string16& pattern) {
+  icu::RegexMatcher* matcher =
+      AutofillRegexes::GetInstance()->GetMatcher(pattern);
+  icu::UnicodeString icu_input(input.data(), input.length());
+  matcher->reset(icu_input);
+
+  UErrorCode status = U_ZERO_ERROR;
+  UBool match = matcher->find(0, status);
+  DCHECK(U_SUCCESS(status));
+  return !!match;
+}
+
+}  // namespace autofill
+
diff --git a/chrome/browser/autofill/autofill_regexes.h b/chrome/browser/autofill/autofill_regexes.h
new file mode 100644
index 0000000..21490355
--- /dev/null
+++ b/chrome/browser/autofill/autofill_regexes.h
@@ -0,0 +1,20 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_AUTOFILL_AUTOFILL_REGEXES_H_
+#define CHROME_BROWSER_AUTOFILL_AUTOFILL_REGEXES_H_
+#pragma once
+
+#include "base/string16.h"
+
+// Parsing utilities.
+namespace autofill {
+
+// Case-insensitive regular expression matching.
+// Returns true if |pattern| is found in |input|.
+bool MatchesPattern(const string16& input, const string16& pattern);
+
+}  // namespace autofill
+
+#endif  // CHROME_BROWSER_AUTOFILL_AUTOFILL_REGEXES_H_
diff --git a/chrome/browser/autofill/autofill_scanner.cc b/chrome/browser/autofill/autofill_scanner.cc
index 6526fcc..1fd8a7ec 100644
--- a/chrome/browser/autofill/autofill_scanner.cc
+++ b/chrome/browser/autofill/autofill_scanner.cc
@@ -6,7 +6,6 @@
 
 #include "base/logging.h"
 #include "chrome/browser/autofill/autofill_field.h"
-#include "unicode/regex.h"
 
 AutofillScanner::AutofillScanner(
     const std::vector<const AutofillField*>& fields)
@@ -44,21 +43,3 @@
 void AutofillScanner::SaveCursor() {
   saved_cursors_.push_back(cursor_);
 }
-
-namespace autofill {
-
-bool MatchString(const string16& input, const string16& pattern) {
-  UErrorCode status = U_ZERO_ERROR;
-  icu::UnicodeString icu_pattern(pattern.data(), pattern.length());
-  icu::UnicodeString icu_input(input.data(), input.length());
-  icu::RegexMatcher matcher(icu_pattern, icu_input,
-                            UREGEX_CASE_INSENSITIVE, status);
-  DCHECK(U_SUCCESS(status));
-
-  UBool match = matcher.find(0, status);
-  DCHECK(U_SUCCESS(status));
-  return !!match;
-}
-
-}  // namespace autofill
-
diff --git a/chrome/browser/autofill/autofill_scanner.h b/chrome/browser/autofill/autofill_scanner.h
index 7eca0ce7..789889b 100644
--- a/chrome/browser/autofill/autofill_scanner.h
+++ b/chrome/browser/autofill/autofill_scanner.h
@@ -49,13 +49,4 @@
   DISALLOW_COPY_AND_ASSIGN(AutofillScanner);
 };
 
-// Parsing utilities.
-namespace autofill {
-
-// Case-insensitive regular expression matching.  Returns true if |pattern| is
-// found in |input|.
-bool MatchString(const string16& input, const string16& pattern);
-
-}  // namespace autofill
-
 #endif  // CHROME_BROWSER_AUTOFILL_AUTOFILL_SCANNER_H_
diff --git a/chrome/browser/autofill/credit_card.cc b/chrome/browser/autofill/credit_card.cc
index 4e487da..8abf7fa 100644
--- a/chrome/browser/autofill/credit_card.cc
+++ b/chrome/browser/autofill/credit_card.cc
@@ -14,7 +14,7 @@
 #include "base/string_split.h"
 #include "base/string_util.h"
 #include "base/utf_string_conversions.h"
-#include "chrome/browser/autofill/autofill_scanner.h"
+#include "chrome/browser/autofill/autofill_regexes.h"
 #include "chrome/browser/autofill/autofill_type.h"
 #include "chrome/browser/autofill/field_types.h"
 #include "chrome/common/guid.h"
@@ -320,7 +320,7 @@
 
 void CreditCard::SetInfoForMonthInputType(const string16& value) {
   // Check if |text| is "yyyy-mm" format first, and check normal month format.
-  if (!autofill::MatchString(value, UTF8ToUTF16("^[0-9]{4}-[0-9]{1,2}$")))
+  if (!autofill::MatchesPattern(value, UTF8ToUTF16("^[0-9]{4}-[0-9]{1,2}$")))
     return;
 
   std::vector<string16> year_month;
diff --git a/chrome/browser/autofill/form_field.cc b/chrome/browser/autofill/form_field.cc
index 01f2ee1..eaffc16 100644
--- a/chrome/browser/autofill/form_field.cc
+++ b/chrome/browser/autofill/form_field.cc
@@ -16,6 +16,7 @@
 #include "chrome/browser/autofill/autofill_ecml.h"
 #include "chrome/browser/autofill/address_field.h"
 #include "chrome/browser/autofill/autofill_field.h"
+#include "chrome/browser/autofill/autofill_regexes.h"
 #include "chrome/browser/autofill/autofill_scanner.h"
 #include "chrome/browser/autofill/credit_card_field.h"
 #include "chrome/browser/autofill/email_field.h"
@@ -151,12 +152,12 @@
                       const string16& pattern,
                       int match_type) {
   if ((match_type & FormField::MATCH_LABEL) &&
-      autofill::MatchString(field->label, pattern)) {
+      autofill::MatchesPattern(field->label, pattern)) {
     return true;
   }
 
   if ((match_type & FormField::MATCH_NAME) &&
-      autofill::MatchString(field->name, pattern)) {
+      autofill::MatchesPattern(field->name, pattern)) {
     return true;
   }
 
diff --git a/chrome/browser/autofill/personal_data_manager.cc b/chrome/browser/autofill/personal_data_manager.cc
index d3d78ad..d62c8ba 100644
--- a/chrome/browser/autofill/personal_data_manager.cc
+++ b/chrome/browser/autofill/personal_data_manager.cc
@@ -13,7 +13,7 @@
 #include "chrome/browser/autofill/autofill-inl.h"
 #include "chrome/browser/autofill/autofill_field.h"
 #include "chrome/browser/autofill/autofill_metrics.h"
-#include "chrome/browser/autofill/autofill_scanner.h"
+#include "chrome/browser/autofill/autofill_regexes.h"
 #include "chrome/browser/autofill/form_structure.h"
 #include "chrome/browser/autofill/phone_number.h"
 #include "chrome/browser/autofill/phone_number_i18n.h"
@@ -76,14 +76,14 @@
   // This regex is more permissive than the official rfc2822 spec on the
   // subject, but it does reject obvious non-email addresses.
   const string16 kEmailPattern = ASCIIToUTF16("^[^@]+@[^@]+\\.[a-z]{2,6}$");
-  return autofill::MatchString(value, kEmailPattern);
+  return autofill::MatchesPattern(value, kEmailPattern);
 }
 
 // Valid for US zip codes only.
 bool IsValidZip(const string16& value) {
   // Basic US zip code matching.
   const string16 kZipPattern = ASCIIToUTF16("^\\d{5}(-\\d{4})?$");
-  return autofill::MatchString(value, kZipPattern);
+  return autofill::MatchesPattern(value, kZipPattern);
 }
 
 // Returns true if minimum requirements for import of a given |profile| have
diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi
index c80cfd3..a505502 100644
--- a/chrome/chrome_browser.gypi
+++ b/chrome/chrome_browser.gypi
@@ -164,6 +164,8 @@
         'browser/autofill/autofill_metrics.h',
         'browser/autofill/autofill_profile.cc',
         'browser/autofill/autofill_profile.h',
+        'browser/autofill/autofill_regexes.cc',
+        'browser/autofill/autofill_regexes.h',
         'browser/autofill/autofill_scanner.cc',
         'browser/autofill/autofill_scanner.h',
         'browser/autofill/autofill_type.cc',