blob: d82b7fc88193e3ea0ec74ff415c5302316cd81b8 [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
Implements a task builder for benchmarking effects of NoState Prefetch.
Noticeable steps of the task pipeline:
* Save a WPR archive
* Process the WPR archive to make all resources cacheable
* Process cache archive to patch response headers back to their original
values.
* Find out which resources are discoverable by NoState Prefetch
(HTMLPreloadScanner)
* Load pages with empty/full/prefetched cache
* Extract most important metrics to a CSV
"""
import csv
import logging
import json
import os
import re
import shutil
from urlparse import urlparse
import chrome_cache
import common_util
import loading_trace
from prefetch_view import PrefetchSimulationView
from request_dependencies_lens import RequestDependencyLens
import sandwich_metrics
import sandwich_runner
import task_manager
import wpr_backend
class Discoverer(object):
# Do not prefetch anything.
EmptyCache = 'empty-cache'
# Prefetches everything to load fully from cache (impossible in practice).
FullCache = 'full-cache'
# Prefetches the first resource following the redirection chain.
MainDocument = 'main-document'
# All resources which are fetched from the main document and their
# redirections.
Parser = 'parser'
# Simulation of HTMLPreloadScanner on the main document and their
# redirections and subsets:
# Store: only resources that don't have Cache-Control: No-Store.
HTMLPreloadScanner = 'html-scanner'
HTMLPreloadScannerStore = 'html-scanner-store'
# List of all available sub-resource discoverers.
SUBRESOURCE_DISCOVERERS = set([
Discoverer.EmptyCache,
Discoverer.FullCache,
Discoverer.MainDocument,
Discoverer.Parser,
Discoverer.HTMLPreloadScanner,
Discoverer.HTMLPreloadScannerStore,
])
_UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$')
def _PatchWpr(wpr_archive):
"""Patches a WPR archive to get all resources into the HTTP cache and avoid
invalidation and revalidations.
Args:
wpr_archive: wpr_backend.WprArchiveBackend WPR archive to patch.
"""
# Sets the resources cache max-age to 10 years.
MAX_AGE = 10 * 365 * 24 * 60 * 60
CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE)
logging.info('number of entries: %d', len(wpr_archive.ListUrlEntries()))
patched_entry_count = 0
for url_entry in wpr_archive.ListUrlEntries():
response_headers = url_entry.GetResponseHeadersDict()
if 'cache-control' in response_headers and \
response_headers['cache-control'] == CACHE_CONTROL:
continue
# Override the cache-control header to set the resources max age to MAX_AGE.
#
# Important note: Some resources holding sensitive information might have
# cache-control set to no-store which allow the resource to be cached but
# not cached in the file system. NoState-Prefetch is going to take care of
# this case. But in here, to simulate NoState-Prefetch, we don't have other
# choices but save absolutely all cached resources on disk so they survive
# after killing chrome for cache save, modification and push.
url_entry.SetResponseHeader('cache-control', CACHE_CONTROL)
# TODO(gabadie): May need to extend Vary blacklist (referer?)
#
# All of these Vary and Pragma possibilities need to be removed from
# response headers in order for Chrome to store a resource in HTTP cache and
# not to invalidate it.
url_entry.RemoveResponseHeaderDirectives('vary', {'*', 'cookie'})
url_entry.RemoveResponseHeaderDirectives('pragma', {'no-cache'})
patched_entry_count += 1
logging.info('number of entries patched: %d', patched_entry_count)
def _FilterOutDataAndIncompleteRequests(requests):
for request in filter(lambda r: not r.IsDataRequest(), requests):
# The protocol is only known once the response has been received. But the
# trace recording might have been stopped with still some JavaScript
# originated requests that have not received any responses yet.
if request.protocol is None:
assert not request.HasReceivedResponse()
continue
if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}:
raise RuntimeError('Unknown request protocol {}'.format(request.protocol))
yield request
def _PatchCacheArchive(cache_archive_path, loading_trace_path,
cache_archive_dest_path):
"""Patch the cache archive.
Note: This method update the raw response headers of cache entries' to store
the ones such as Set-Cookie that were pruned by the
net::HttpCacheTransaction, and remove the stream index 2 holding resource's
compile meta data.
Args:
cache_archive_path: Input archive's path to patch.
loading_trace_path: Path of the loading trace that have recorded the cache
archive <cache_archive_path>.
cache_archive_dest_path: Archive destination's path.
"""
trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path:
cache_path = os.path.join(tmp_path, 'cache')
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path)
cache_backend = chrome_cache.CacheBackend(cache_path, 'simple')
cache_entries = set(cache_backend.ListKeys())
logging.info('Original cache size: %d bytes' % cache_backend.GetSize())
for request in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
# On requests having an upload data stream such as POST requests,
# net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with
# the upload data stream's session unique identifier.
#
# It is fine to not patch these requests since when reopening Chrome,
# there is no way the entry can be reused since the upload data stream's
# identifier will be different.
#
# The fact that these entries are kept in the cache after closing Chrome
# properly by closing the Chrome tab as the ChromeControler.SetSlowDeath()
# do is known chrome bug (crbug.com/610725).
if request.url not in cache_entries:
continue
# Chrome prunes Set-Cookie from response headers before storing them in
# disk cache. Also, it adds implicit "Vary: cookie" header to all redirect
# response headers. Sandwich manages the cache, but between recording the
# cache and benchmarking the cookie jar is invalidated. This leads to
# invalidation of all cacheable redirects.
raw_headers = request.GetRawResponseHeaders()
cache_backend.UpdateRawResponseHeaders(request.url, raw_headers)
# NoState-Prefetch would only fetch the resources, but not parse them.
cache_backend.DeleteStreamForKey(request.url, 2)
chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path)
logging.info('Patched cache size: %d bytes' % cache_backend.GetSize())
def _DiscoverRequests(dependencies_lens, subresource_discoverer):
trace = dependencies_lens.loading_trace
first_resource_request = trace.request_track.GetFirstResourceRequest()
if subresource_discoverer == Discoverer.EmptyCache:
requests = []
elif subresource_discoverer == Discoverer.FullCache:
requests = dependencies_lens.loading_trace.request_track.GetEvents()
elif subresource_discoverer == Discoverer.MainDocument:
requests = [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
elif subresource_discoverer == Discoverer.Parser:
requests = PrefetchSimulationView.ParserDiscoverableRequests(
first_resource_request, dependencies_lens)
elif subresource_discoverer == Discoverer.HTMLPreloadScanner:
requests = PrefetchSimulationView.PreloadedRequests(
first_resource_request, dependencies_lens, trace)
else:
assert False
logging.info('number of requests discovered by %s: %d',
subresource_discoverer, len(requests))
return requests
def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):
with open(original_headers_path) as file_input:
original_headers = json.load(file_input)
pruned_requests = set()
for request in requests:
request_original_headers = original_headers[request.url]
if ('cache-control' in request_original_headers and
'no-store' in request_original_headers['cache-control'].lower()):
pruned_requests.add(request)
return [r for r in requests if r not in pruned_requests]
def _ExtractDiscoverableUrls(
original_headers_path, loading_trace_path, subresource_discoverer):
"""Extracts discoverable resource urls from a loading trace according to a
sub-resource discoverer.
Args:
original_headers_path: Path of JSON containing the original headers.
loading_trace_path: Path of the loading trace recorded at original cache
creation.
subresource_discoverer: The sub-resources discoverer that should white-list
the resources to keep in cache for the NoState-Prefetch benchmarks.
Returns:
A set of urls.
"""
assert subresource_discoverer in SUBRESOURCE_DISCOVERERS, \
'unknown prefetch simulation {}'.format(subresource_discoverer)
logging.info('loading %s', loading_trace_path)
trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path)
dependencies_lens = RequestDependencyLens(trace)
# Build the list of discovered requests according to the desired simulation.
discovered_requests = []
if subresource_discoverer == Discoverer.HTMLPreloadScannerStore:
requests = _DiscoverRequests(
dependencies_lens, Discoverer.HTMLPreloadScanner)
discovered_requests = _PruneOutOriginalNoStoreRequests(
original_headers_path, requests)
else:
discovered_requests = _DiscoverRequests(
dependencies_lens, subresource_discoverer)
whitelisted_urls = set()
for request in _FilterOutDataAndIncompleteRequests(discovered_requests):
logging.debug('white-listing %s', request.url)
whitelisted_urls.add(request.url)
logging.info('number of white-listed resources: %d', len(whitelisted_urls))
return whitelisted_urls
def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
"""Compare URL sets and log the diffs.
Args:
ref_url_set: Set of reference urls.
url_set: Set of urls to compare to the reference.
url_set_name: The set name for logging purposes.
"""
assert type(ref_url_set) == set
assert type(url_set) == set
if ref_url_set == url_set:
logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
return
missing_urls = ref_url_set.difference(url_set)
unexpected_urls = url_set.difference(ref_url_set)
logging.error(' %s are not matching (expected %d, had %d)' % \
(url_set_name, len(ref_url_set), len(url_set)))
logging.error(' List of %d missing resources:' % len(missing_urls))
for url in sorted(missing_urls):
logging.error('- ' + url)
logging.error(' List of %d unexpected resources:' % len(unexpected_urls))
for url in sorted(unexpected_urls):
logging.error('+ ' + url)
class _RequestOutcome:
All, ServedFromCache, NotServedFromCache, Post = range(4)
def _ListUrlRequests(trace, request_kind):
"""Lists requested URLs from a trace.
Args:
trace: (loading_trace.LoadingTrace) loading trace.
request_kind: _RequestOutcome indicating the subset of requests to output.
Returns:
set([str])
"""
urls = set()
for request_event in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
if (request_kind == _RequestOutcome.ServedFromCache and
request_event.from_disk_cache):
urls.add(request_event.url)
elif (request_kind == _RequestOutcome.Post and
request_event.method.upper().strip() == 'POST'):
urls.add(request_event.url)
elif (request_kind == _RequestOutcome.NotServedFromCache and
not request_event.from_disk_cache):
urls.add(request_event.url)
elif request_kind == _RequestOutcome.All:
urls.add(request_event.url)
return urls
class _RunOutputVerifier(object):
"""Object to verify benchmark run from traces and WPR log stored in the
runner output directory.
"""
def __init__(self, cache_validation_result, benchmark_setup):
"""Constructor.
Args:
cache_validation_result: JSON of the cache validation task.
benchmark_setup: JSON of the benchmark setup.
"""
self._cache_whitelist = set(benchmark_setup['cache_whitelist'])
self._original_requests = set(
cache_validation_result['effective_encoded_data_lengths'].keys())
self._original_post_requests = set(
cache_validation_result['effective_post_requests'])
self._original_cached_requests = self._original_requests.intersection(
self._cache_whitelist)
self._original_uncached_requests = self._original_requests.difference(
self._cache_whitelist)
self._all_sent_url_requests = set()
def VerifyTrace(self, trace):
"""Verifies a trace with the cache validation result and the benchmark
setup.
"""
effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
effective_cached_requests = \
_ListUrlRequests(trace, _RequestOutcome.ServedFromCache)
effective_uncached_requests = \
_ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)
missing_requests = self._original_requests.difference(effective_requests)
unexpected_requests = effective_requests.difference(self._original_requests)
expected_cached_requests = \
self._original_cached_requests.difference(missing_requests)
expected_uncached_requests = self._original_uncached_requests.union(
unexpected_requests).difference(missing_requests)
# POST requests are known to be unable to use the cache.
expected_cached_requests.difference_update(effective_post_requests)
expected_uncached_requests.update(effective_post_requests)
_PrintUrlSetComparison(self._original_requests, effective_requests,
'All resources')
_PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')
_PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,
'Cached resources')
_PrintUrlSetComparison(expected_uncached_requests,
effective_uncached_requests, 'Non cached resources')
self._all_sent_url_requests.update(effective_uncached_requests)
def VerifyWprLog(self, wpr_log_path):
"""Verifies WPR log with previously verified traces."""
all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)
all_wpr_urls = set()
unserved_wpr_urls = set()
wpr_command_colliding_urls = set()
for request in all_wpr_requests:
if request.is_wpr_host:
continue
if urlparse(request.url).path.startswith('/web-page-replay'):
wpr_command_colliding_urls.add(request.url)
elif request.is_served is False:
unserved_wpr_urls.add(request.url)
all_wpr_urls.add(request.url)
_PrintUrlSetComparison(set(), unserved_wpr_urls,
'Distinct unserved resources from WPR')
_PrintUrlSetComparison(set(), wpr_command_colliding_urls,
'Distinct resources colliding to WPR commands')
_PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,
'Distinct resource requests to WPR')
def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):
"""Validates a cache archive content.
Args:
cache_build_trace_path: Path of the generated trace at the cache build time.
cache_archive_path: Cache archive's path to validate.
Returns:
{
'effective_encoded_data_lengths':
{URL of all requests: encoded_data_length},
'effective_post_requests': [URLs of POST requests],
'expected_cached_resources': [URLs of resources expected to be cached],
'successfully_cached': [URLs of cached sub-resources]
}
"""
# TODO(gabadie): What's the best way of propagating errors happening in here?
logging.info('lists cached urls from %s' % cache_archive_path)
with common_util.TemporaryDirectory() as cache_directory:
chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
cache_keys = set(
chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())
trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)
effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)
effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)
effective_encoded_data_lengths = {}
for request in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
if request.from_disk_cache or request.served_from_cache:
# At cache archive creation time, a request might be loaded several times,
# but avoid the request.encoded_data_length == 0 if loaded from cache.
continue
if request.url in effective_encoded_data_lengths:
effective_encoded_data_lengths[request.url] = max(
effective_encoded_data_lengths[request.url],
request.GetEncodedDataLength())
else:
effective_encoded_data_lengths[request.url] = (
request.GetEncodedDataLength())
upload_data_stream_cache_entry_keys = set()
upload_data_stream_requests = set()
for cache_entry_key in cache_keys:
match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key)
if not match:
continue
upload_data_stream_cache_entry_keys.add(cache_entry_key)
upload_data_stream_requests.add(match.group('url'))
expected_cached_requests = effective_requests.difference(
effective_post_requests)
effective_cache_keys = cache_keys.difference(
upload_data_stream_cache_entry_keys)
_PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,
'POST resources')
_PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,
'Cached resources')
return {
'effective_encoded_data_lengths': effective_encoded_data_lengths,
'effective_post_requests': [url for url in effective_post_requests],
'expected_cached_resources': [url for url in expected_cached_requests],
'successfully_cached_resources': [url for url in effective_cache_keys]
}
def _ProcessRunOutputDir(
cache_validation_result, benchmark_setup, runner_output_dir):
"""Process benchmark's run output directory.
Args:
cache_validation_result: Same as for _RunOutputVerifier
benchmark_setup: Same as for _RunOutputVerifier
runner_output_dir: Same as for SandwichRunner.output_dir
Returns:
List of dictionary.
"""
run_metrics_list = []
run_output_verifier = _RunOutputVerifier(
cache_validation_result, benchmark_setup)
cached_encoded_data_lengths = (
cache_validation_result['effective_encoded_data_lengths'])
for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(
runner_output_dir):
trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)
logging.info('loading trace: %s', trace_path)
trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)
logging.info('verifying trace: %s', trace_path)
run_output_verifier.VerifyTrace(trace)
logging.info('extracting metrics from trace: %s', trace_path)
served_from_network_bytes = 0
served_from_cache_bytes = 0
urls_hitting_network = set()
for request in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
# Ignore requests served from the blink's cache.
if request.served_from_cache:
continue
urls_hitting_network.add(request.url)
if request.from_disk_cache:
served_from_cache_bytes += cached_encoded_data_lengths[request.url]
else:
served_from_network_bytes += request.GetEncodedDataLength()
# Make sure the served from blink's cache requests have at least one
# corresponding request that was not served from the blink's cache.
for request in _FilterOutDataAndIncompleteRequests(
trace.request_track.GetEvents()):
assert (request.url in urls_hitting_network or
not request.served_from_cache)
run_metrics = {
'url': trace.url,
'repeat_id': repeat_id,
'subresource_discoverer': benchmark_setup['subresource_discoverer'],
'cache_recording.subresource_count':
len(cache_validation_result['effective_encoded_data_lengths']),
'cache_recording.cached_subresource_count_theoretic':
len(cache_validation_result['successfully_cached_resources']),
'cache_recording.cached_subresource_count':
len(cache_validation_result['expected_cached_resources']),
'benchmark.subresource_count': len(_ListUrlRequests(
trace, _RequestOutcome.All)),
'benchmark.served_from_cache_count_theoretic':
len(benchmark_setup['cache_whitelist']),
'benchmark.served_from_cache_count': len(_ListUrlRequests(
trace, _RequestOutcome.ServedFromCache)),
'benchmark.served_from_network_bytes': served_from_network_bytes,
'benchmark.served_from_cache_bytes': served_from_cache_bytes
}
run_metrics.update(
sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(
repeat_dir, trace))
run_metrics_list.append(run_metrics)
run_metrics_list.sort(key=lambda e: e['repeat_id'])
wpr_log_path = os.path.join(
runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)
logging.info('verifying wpr log: %s', wpr_log_path)
run_output_verifier.VerifyWprLog(wpr_log_path)
return run_metrics_list
class PrefetchBenchmarkBuilder(task_manager.Builder):
"""A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""
def __init__(self, common_builder):
task_manager.Builder.__init__(self,
common_builder.output_directory,
common_builder.output_subdirectory)
self._common_builder = common_builder
self._original_headers_path = None
self._wpr_archive_path = None
self._cache_path = None
self._trace_from_grabbing_reference_cache = None
self._cache_validation_task = None
self._PopulateCommonPipelines()
def _PopulateCommonPipelines(self):
"""Creates necessary tasks to produce initial cache archive.
Also creates a task for producing a json file with a mapping of URLs to
subresources (urls-resources.json).
Here is the full dependency tree for the returned task:
common/patched-cache-validation.json
depends on: common/patched-cache.zip
depends on: common/original-cache.zip
depends on: common/webpages-patched.wpr
depends on: common/webpages.wpr
"""
self._original_headers_path = self.RebaseOutputPath(
'common/response-headers.json')
@self.RegisterTask('common/webpages-patched.wpr',
dependencies=[self._common_builder.original_wpr_task])
def BuildPatchedWpr():
common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)
shutil.copyfile(
self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)
wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)
# Save up original response headers.
original_response_headers = {e.url: e.GetResponseHeadersDict() \
for e in wpr_archive.ListUrlEntries()}
with open(self._original_headers_path, 'w') as file_output:
json.dump(original_response_headers, file_output)
# Patch WPR.
_PatchWpr(wpr_archive)
wpr_archive.Persist()
@self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])
def BuildOriginalCache():
runner = self._common_builder.CreateSandwichRunner()
runner.wpr_archive_path = BuildPatchedWpr.path
runner.cache_archive_path = BuildOriginalCache.path
runner.cache_operation = sandwich_runner.CacheOperation.SAVE
runner.output_dir = BuildOriginalCache.run_path
runner.Run()
BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'
original_cache_trace_path = os.path.join(
BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)
@self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])
def BuildPatchedCache():
_PatchCacheArchive(BuildOriginalCache.path,
original_cache_trace_path, BuildPatchedCache.path)
@self.RegisterTask('common/patched-cache-validation.json',
[BuildPatchedCache])
def ValidatePatchedCache():
cache_validation_result = _ValidateCacheArchiveContent(
original_cache_trace_path, BuildPatchedCache.path)
with open(ValidatePatchedCache.path, 'w') as output:
json.dump(cache_validation_result, output)
self._wpr_archive_path = BuildPatchedWpr.path
self._trace_from_grabbing_reference_cache = original_cache_trace_path
self._cache_path = BuildPatchedCache.path
self._cache_validation_task = ValidatePatchedCache
self._common_builder.default_final_tasks.append(ValidatePatchedCache)
def PopulateLoadBenchmark(self, subresource_discoverer,
transformer_list_name, transformer_list):
"""Populate benchmarking tasks from its setup tasks.
Args:
subresource_discoverer: Name of a subresources discoverer.
transformer_list_name: A string describing the transformers, will be used
in Task names (prefer names without spaces and special characters).
transformer_list: An ordered list of function that takes an instance of
SandwichRunner as parameter, would be applied immediately before
SandwichRunner.Run() in the given order.
Here is the full dependency of the added tree for the returned task:
<transformer_list_name>/<subresource_discoverer>-metrics.csv
depends on: <transformer_list_name>/<subresource_discoverer>-run/
depends on: common/<subresource_discoverer>-cache.zip
depends on: common/<subresource_discoverer>-setup.json
depends on: common/patched-cache-validation.json
"""
additional_column_names = [
'url',
'repeat_id',
'subresource_discoverer',
'cache_recording.subresource_count',
'cache_recording.cached_subresource_count_theoretic',
'cache_recording.cached_subresource_count',
'benchmark.subresource_count',
'benchmark.served_from_cache_count_theoretic',
'benchmark.served_from_cache_count',
'benchmark.served_from_network_bytes',
'benchmark.served_from_cache_bytes']
assert subresource_discoverer in SUBRESOURCE_DISCOVERERS
assert 'common' not in SUBRESOURCE_DISCOVERERS
shared_task_prefix = os.path.join('common', subresource_discoverer)
task_prefix = os.path.join(transformer_list_name, subresource_discoverer)
@self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,
dependencies=[self._cache_validation_task])
def SetupBenchmark():
whitelisted_urls = _ExtractDiscoverableUrls(
original_headers_path=self._original_headers_path,
loading_trace_path=self._trace_from_grabbing_reference_cache,
subresource_discoverer=subresource_discoverer)
common_util.EnsureParentDirectoryExists(SetupBenchmark.path)
with open(SetupBenchmark.path, 'w') as output:
json.dump({
'cache_whitelist': [url for url in whitelisted_urls],
'subresource_discoverer': subresource_discoverer,
}, output)
@self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,
dependencies=[SetupBenchmark])
def BuildBenchmarkCacheArchive():
benchmark_setup = json.load(open(SetupBenchmark.path))
chrome_cache.ApplyUrlWhitelistToCacheArchive(
cache_archive_path=self._cache_path,
whitelisted_urls=benchmark_setup['cache_whitelist'],
output_cache_archive_path=BuildBenchmarkCacheArchive.path)
@self.RegisterTask(task_prefix + '-run/',
dependencies=[BuildBenchmarkCacheArchive])
def RunBenchmark():
runner = self._common_builder.CreateSandwichRunner()
for transformer in transformer_list:
transformer(runner)
runner.wpr_archive_path = self._wpr_archive_path
runner.wpr_out_log_path = os.path.join(
RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)
runner.cache_archive_path = BuildBenchmarkCacheArchive.path
runner.cache_operation = sandwich_runner.CacheOperation.PUSH
runner.output_dir = RunBenchmark.path
runner.Run()
@self.RegisterTask(task_prefix + '-metrics.csv',
dependencies=[RunBenchmark])
def ProcessRunOutputDir():
benchmark_setup = json.load(open(SetupBenchmark.path))
cache_validation_result = json.load(
open(self._cache_validation_task.path))
run_metrics_list = _ProcessRunOutputDir(
cache_validation_result, benchmark_setup, RunBenchmark.path)
with open(ProcessRunOutputDir.path, 'w') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +
sandwich_metrics.COMMON_CSV_COLUMN_NAMES))
writer.writeheader()
for trace_metrics in run_metrics_list:
writer.writerow(trace_metrics)
self._common_builder.default_final_tasks.append(ProcessRunOutputDir)