[sql] Validate database files before enabling memory-mapping.

With regular I/O, filesystem corruption would cause SQLite to return
SQLITE_IOERR codes.  With memory-mapping, filesystem errors can cause a
crash when accessed.  There are databases with filesystem corruption,
this CL only enables memory-mapped I/O for parts of the database which
have been successfully read at some point using regular I/O.

[Relands https://codereview.chromium.org/1426743006 which was reverted at
 https://codereview.chromium.org/1432953002 .  CL is identical, fix to
 sql::Connection was landed at https://codereview.chromium.org/1426743006 .]

BUG=537742
[email protected], [email protected]

Review URL: https://codereview.chromium.org/1442753004

Cr-Commit-Position: refs/heads/master@{#359478}
diff --git a/sql/connection.cc b/sql/connection.cc
index 5385f49f..825d3192 100644
--- a/sql/connection.cc
+++ b/sql/connection.cc
@@ -554,6 +554,8 @@
   scoped_ptr<char[]> buf(new char[page_size]);
   for (sqlite3_int64 pos = 0; pos < preload_size; pos += page_size) {
     rc = file->pMethods->xRead(file, buf.get(), page_size, pos);
+
+    // TODO(shess): Consider calling OnSqliteError().
     if (rc != SQLITE_OK)
       return;
   }
@@ -878,6 +880,144 @@
   return debug_info;
 }
 
+size_t Connection::GetAppropriateMmapSize() {
+  AssertIOAllowed();
+
+  // TODO(shess): Using sql::MetaTable seems indicated, but mixing
+  // sql::MetaTable and direct access seems error-prone.  It might make sense to
+  // simply integrate sql::MetaTable functionality into sql::Connection.
+
+#if defined(OS_IOS)
+  // iOS SQLite does not support memory mapping.
+  return 0;
+#endif
+
+  // If the database doesn't have a place to track progress, assume the worst.
+  // This will happen when new databases are created.
+  if (!DoesTableExist("meta")) {
+    RecordOneEvent(EVENT_MMAP_META_MISSING);
+    return 0;
+  }
+
+  // Key into meta table to get status from a previous run.  The value
+  // represents how much data in bytes has successfully been read from the
+  // database.  |kMmapFailure| indicates that there was a read error and the
+  // database should not be memory-mapped, while |kMmapSuccess| indicates that
+  // the entire file was read at some point and can be memory-mapped without
+  // constraint.
+  const char* kMmapStatusKey = "mmap_status";
+  static const sqlite3_int64 kMmapFailure = -2;
+  static const sqlite3_int64 kMmapSuccess = -1;
+
+  // Start reading from 0 unless status is found in meta table.
+  sqlite3_int64 mmap_ofs = 0;
+
+  // Retrieve the current status.  It is fine for the status to be missing
+  // entirely, but any error prevents memory-mapping.
+  {
+    const char* kMmapStatusSql = "SELECT value FROM meta WHERE key = ?";
+    Statement s(GetUniqueStatement(kMmapStatusSql));
+    s.BindString(0, kMmapStatusKey);
+    if (s.Step()) {
+      mmap_ofs = s.ColumnInt64(0);
+    } else if (!s.Succeeded()) {
+      RecordOneEvent(EVENT_MMAP_META_FAILURE_READ);
+      return 0;
+    }
+  }
+
+  // Database read failed in the past, don't memory map.
+  if (mmap_ofs == kMmapFailure) {
+    RecordOneEvent(EVENT_MMAP_FAILED);
+    return 0;
+  } else if (mmap_ofs != kMmapSuccess) {
+    // Continue reading from previous offset.
+    DCHECK_GE(mmap_ofs, 0);
+
+    // TODO(shess): Could this reading code be shared with Preload()?  It would
+    // require locking twice (this code wouldn't be able to access |db_size| so
+    // the helper would have to return amount read).
+
+    // Read more of the database looking for errors.  The VFS interface is used
+    // to assure that the reads are valid for SQLite.  |g_reads_allowed| is used
+    // to limit checking to 20MB per run of Chromium.
+    sqlite3_file* file = NULL;
+    sqlite3_int64 db_size = 0;
+    if (SQLITE_OK != GetSqlite3FileAndSize(db_, &file, &db_size)) {
+      RecordOneEvent(EVENT_MMAP_VFS_FAILURE);
+      return 0;
+    }
+
+    // Read the data left, or |g_reads_allowed|, whichever is smaller.
+    // |g_reads_allowed| limits the total amount of I/O to spend verifying data
+    // in a single Chromium run.
+    sqlite3_int64 amount = db_size - mmap_ofs;
+    if (amount < 0)
+      amount = 0;
+    if (amount > 0) {
+      base::AutoLock lock(g_sqlite_init_lock.Get());
+      static sqlite3_int64 g_reads_allowed = 20 * 1024 * 1024;
+      if (g_reads_allowed < amount)
+        amount = g_reads_allowed;
+      g_reads_allowed -= amount;
+    }
+
+    // |amount| can be <= 0 if |g_reads_allowed| ran out of quota, or if the
+    // database was truncated after a previous pass.
+    if (amount <= 0 && mmap_ofs < db_size) {
+      DCHECK_EQ(0, amount);
+      RecordOneEvent(EVENT_MMAP_SUCCESS_NO_PROGRESS);
+    } else {
+      static const int kPageSize = 4096;
+      char buf[kPageSize];
+      while (amount > 0) {
+        int rc = file->pMethods->xRead(file, buf, sizeof(buf), mmap_ofs);
+        if (rc == SQLITE_OK) {
+          mmap_ofs += sizeof(buf);
+          amount -= sizeof(buf);
+        } else if (rc == SQLITE_IOERR_SHORT_READ) {
+          // Reached EOF for a database with page size < |kPageSize|.
+          mmap_ofs = db_size;
+          break;
+        } else {
+          // TODO(shess): Consider calling OnSqliteError().
+          mmap_ofs = kMmapFailure;
+          break;
+        }
+      }
+
+      // Log these events after update to distinguish meta update failure.
+      Events event;
+      if (mmap_ofs >= db_size) {
+        mmap_ofs = kMmapSuccess;
+        event = EVENT_MMAP_SUCCESS_NEW;
+      } else if (mmap_ofs > 0) {
+        event = EVENT_MMAP_SUCCESS_PARTIAL;
+      } else {
+        DCHECK_EQ(kMmapFailure, mmap_ofs);
+        event = EVENT_MMAP_FAILED_NEW;
+      }
+
+      const char* kMmapUpdateStatusSql = "REPLACE INTO meta VALUES (?, ?)";
+      Statement s(GetUniqueStatement(kMmapUpdateStatusSql));
+      s.BindString(0, kMmapStatusKey);
+      s.BindInt64(1, mmap_ofs);
+      if (!s.Run()) {
+        RecordOneEvent(EVENT_MMAP_META_FAILURE_UPDATE);
+        return 0;
+      }
+
+      RecordOneEvent(event);
+    }
+  }
+
+  if (mmap_ofs == kMmapFailure)
+    return 0;
+  if (mmap_ofs == kMmapSuccess)
+    return 256 * 1024 * 1024;
+  return mmap_ofs;
+}
+
 void Connection::TrimMemory(bool aggressively) {
   if (!db_)
     return;
@@ -1699,14 +1839,14 @@
   }
 
   // Enable memory-mapped access.  The explicit-disable case is because SQLite
-  // can be built to default-enable mmap.  This value will be capped by
-  // SQLITE_MAX_MMAP_SIZE, which could be different between 32-bit and 64-bit
-  // platforms.
-  if (mmap_disabled_) {
-    ignore_result(Execute("PRAGMA mmap_size = 0"));
-  } else {
-    ignore_result(Execute("PRAGMA mmap_size = 268435456"));  // 256MB.
-  }
+  // can be built to default-enable mmap.  GetAppropriateMmapSize() calculates a
+  // safe range to memory-map based on past regular I/O.  This value will be
+  // capped by SQLITE_MAX_MMAP_SIZE, which could be different between 32-bit and
+  // 64-bit platforms.
+  size_t mmap_size = mmap_disabled_ ? 0 : GetAppropriateMmapSize();
+  std::string mmap_sql =
+      base::StringPrintf("PRAGMA mmap_size = %" PRIuS, mmap_size);
+  ignore_result(Execute(mmap_sql.c_str()));
 
   // Determine if memory-mapping has actually been enabled.  The Execute() above
   // can succeed without changing the amount mapped.