blob: 037896d0a4450143e55a8de13b6bac2a7c7960c4 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/browser/address_field.h"
#include <stddef.h>
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/string16.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
#include "components/autofill/browser/autofill_field.h"
#include "components/autofill/browser/autofill_regex_constants.h"
#include "components/autofill/browser/autofill_scanner.h"
#include "components/autofill/browser/field_types.h"
#include "ui/base/l10n/l10n_util.h"
namespace autofill {
FormField* AddressField::Parse(AutofillScanner* scanner) {
if (scanner->IsEnd())
return NULL;
scoped_ptr<AddressField> address_field(new AddressField);
const AutofillField* const initial_field = scanner->Cursor();
size_t saved_cursor = scanner->SaveCursor();
base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
// Allow address fields to appear in any order.
size_t begin_trailing_non_labeled_fields = 0;
bool has_trailing_non_labeled_fields = false;
while (!scanner->IsEnd()) {
const size_t cursor = scanner->SaveCursor();
if (ParseAddressLines(scanner, address_field.get()) ||
ParseCity(scanner, address_field.get()) ||
ParseState(scanner, address_field.get()) ||
ParseZipCode(scanner, address_field.get()) ||
ParseCountry(scanner, address_field.get()) ||
ParseCompany(scanner, address_field.get())) {
has_trailing_non_labeled_fields = false;
continue;
} else if (ParseField(scanner, attention_ignored, NULL) ||
ParseField(scanner, region_ignored, NULL)) {
// We ignore the following:
// * Attention.
// * Province/Region/Other.
continue;
} else if (scanner->Cursor() != initial_field &&
ParseEmptyLabel(scanner, NULL)) {
// Ignore non-labeled fields within an address; the page
// MapQuest Driving Directions North America.html contains such a field.
// We only ignore such fields after we've parsed at least one other field;
// otherwise we'd effectively parse address fields before other field
// types after any non-labeled fields, and we want email address fields to
// have precedence since some pages contain fields labeled
// "Email address".
if (!has_trailing_non_labeled_fields) {
has_trailing_non_labeled_fields = true;
begin_trailing_non_labeled_fields = cursor;
}
continue;
} else {
// No field found.
break;
}
}
// If we have identified any address fields in this field then it should be
// added to the list of fields.
if (address_field->company_ != NULL ||
address_field->address1_ != NULL || address_field->address2_ != NULL ||
address_field->city_ != NULL || address_field->state_ != NULL ||
address_field->zip_ != NULL || address_field->zip4_ ||
address_field->country_ != NULL) {
// Don't slurp non-labeled fields at the end into the address.
if (has_trailing_non_labeled_fields)
scanner->RewindTo(begin_trailing_non_labeled_fields);
address_field->type_ = address_field->FindType();
return address_field.release();
}
scanner->RewindTo(saved_cursor);
return NULL;
}
AddressField::AddressType AddressField::FindType() const {
// First look at the field name, which itself will sometimes contain
// "bill" or "ship".
if (company_) {
base::string16 name = StringToLowerASCII(company_->name);
return AddressTypeFromText(name);
}
if (address1_) {
base::string16 name = StringToLowerASCII(address1_->name);
return AddressTypeFromText(name);
}
if (address2_) {
base::string16 name = StringToLowerASCII(address2_->name);
return AddressTypeFromText(name);
}
if (city_) {
base::string16 name = StringToLowerASCII(city_->name);
return AddressTypeFromText(name);
}
if (zip_) {
base::string16 name = StringToLowerASCII(zip_->name);
return AddressTypeFromText(name);
}
if (state_) {
base::string16 name = StringToLowerASCII(state_->name);
return AddressTypeFromText(name);
}
if (country_) {
base::string16 name = StringToLowerASCII(country_->name);
return AddressTypeFromText(name);
}
return kGenericAddress;
}
AddressField::AddressField()
: company_(NULL),
address1_(NULL),
address2_(NULL),
city_(NULL),
state_(NULL),
zip_(NULL),
zip4_(NULL),
country_(NULL),
type_(kGenericAddress) {
}
bool AddressField::ClassifyField(FieldTypeMap* map) const {
AutofillFieldType address_company;
AutofillFieldType address_line1;
AutofillFieldType address_line2;
AutofillFieldType address_city;
AutofillFieldType address_state;
AutofillFieldType address_zip;
AutofillFieldType address_country;
switch (type_) {
case kShippingAddress:
// Fall through. Autofill does not support shipping addresses.
case kGenericAddress:
address_company = COMPANY_NAME;
address_line1 = ADDRESS_HOME_LINE1;
address_line2 = ADDRESS_HOME_LINE2;
address_city = ADDRESS_HOME_CITY;
address_state = ADDRESS_HOME_STATE;
address_zip = ADDRESS_HOME_ZIP;
address_country = ADDRESS_HOME_COUNTRY;
break;
case kBillingAddress:
address_company = COMPANY_NAME;
address_line1 = ADDRESS_BILLING_LINE1;
address_line2 = ADDRESS_BILLING_LINE2;
address_city = ADDRESS_BILLING_CITY;
address_state = ADDRESS_BILLING_STATE;
address_zip = ADDRESS_BILLING_ZIP;
address_country = ADDRESS_BILLING_COUNTRY;
break;
default:
NOTREACHED();
return false;
}
bool ok = AddClassification(company_, address_company, map);
ok = ok && AddClassification(address1_, address_line1, map);
ok = ok && AddClassification(address2_, address_line2, map);
ok = ok && AddClassification(city_, address_city, map);
ok = ok && AddClassification(state_, address_state, map);
ok = ok && AddClassification(zip_, address_zip, map);
ok = ok && AddClassification(country_, address_country, map);
return ok;
}
// static
bool AddressField::ParseCompany(AutofillScanner* scanner,
AddressField* address_field) {
if (address_field->company_ && !address_field->company_->IsEmpty())
return false;
return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe),
&address_field->company_);
}
// static
bool AddressField::ParseAddressLines(AutofillScanner* scanner,
AddressField* address_field) {
// We only match the string "address" in page text, not in element names,
// because sometimes every element in a group of address fields will have
// a name containing the string "address"; for example, on the page
// Kohl's - Register Billing Address.html the text element labeled "city"
// has the name "BILL_TO_ADDRESS<>city". We do match address labels
// such as "address1", which appear as element names on various pages (eg
// AmericanGirl-Registration.html, BloomingdalesBilling.html,
// EBay Registration Enter Information.html).
if (address_field->address1_)
return false;
base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
if (!ParseField(scanner, pattern, &address_field->address1_) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address_field->address1_)) {
return false;
}
// Optionally parse more address lines, which may have empty labels.
// Some pages have 3 address lines (eg SharperImageModifyAccount.html)
// Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
if (!ParseEmptyLabel(scanner, &address_field->address2_) &&
!ParseField(scanner, pattern, &address_field->address2_)) {
ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address_field->address2_);
}
// Try for a third line, which we will promptly discard.
if (address_field->address2_ != NULL) {
pattern = UTF8ToUTF16(autofill::kAddressLine3Re);
ParseField(scanner, pattern, NULL);
}
return true;
}
// static
bool AddressField::ParseCountry(AutofillScanner* scanner,
AddressField* address_field) {
// Parse a country. The occasional page (e.g.
// Travelocity_New Member Information1.html) calls this a "location".
if (address_field->country_ && !address_field->country_->IsEmpty())
return false;
return ParseFieldSpecifics(scanner,
UTF8ToUTF16(autofill::kCountryRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->country_);
}
// static
bool AddressField::ParseZipCode(AutofillScanner* scanner,
AddressField* address_field) {
// Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
// is called a "post code".
//
// HACK: Just for the MapQuest driving directions page we match the
// exact name "1z", which MapQuest uses to label its zip code field.
// Hopefully before long we'll be smart enough to find the zip code
// on that page automatically.
if (address_field->zip_)
return false;
base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe);
if (!ParseField(scanner, pattern, &address_field->zip_))
return false;
address_field->type_ = kGenericAddress;
// Look for a zip+4, whose field name will also often contain
// the substring "zip".
ParseField(scanner,
UTF8ToUTF16(autofill::kZip4Re),
&address_field->zip4_);
return true;
}
// static
bool AddressField::ParseCity(AutofillScanner* scanner,
AddressField* address_field) {
// Parse a city name. Some UK pages (e.g. The China Shop2.html) use
// the term "town".
if (address_field->city_)
return false;
// Select fields are allowed here. This occurs on top-100 site rediff.com.
return ParseFieldSpecifics(scanner,
UTF8ToUTF16(autofill::kCityRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->city_);
}
// static
bool AddressField::ParseState(AutofillScanner* scanner,
AddressField* address_field) {
if (address_field->state_)
return false;
return ParseFieldSpecifics(scanner,
UTF8ToUTF16(autofill::kStateRe),
MATCH_DEFAULT | MATCH_SELECT,
&address_field->state_);
}
AddressField::AddressType AddressField::AddressTypeFromText(
const base::string16 &text) {
size_t same_as = text.find(UTF8ToUTF16(autofill::kAddressTypeSameAsRe));
size_t use_shipping = text.find(UTF8ToUTF16(autofill::kAddressTypeUseMyRe));
if (same_as != base::string16::npos || use_shipping != base::string16::npos)
// This text could be a checkbox label such as "same as my billing
// address" or "use my shipping address".
// ++ It would help if we generally skipped all text that appears
// after a check box.
return kGenericAddress;
// Not all pages say "billing address" and "shipping address" explicitly;
// for example, Craft Catalog1.html has "Bill-to Address" and
// "Ship-to Address".
size_t bill = text.rfind(UTF8ToUTF16(autofill::kBillingDesignatorRe));
size_t ship = text.rfind(UTF8ToUTF16(autofill::kShippingDesignatorRe));
if (bill == base::string16::npos && ship == base::string16::npos)
return kGenericAddress;
if (bill != base::string16::npos && ship == base::string16::npos)
return kBillingAddress;
if (bill == base::string16::npos && ship != base::string16::npos)
return kShippingAddress;
if (bill > ship)
return kBillingAddress;
return kShippingAddress;
}
} // namespace autofill