| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| Implements a task builder for benchmarking effects of NoState Prefetch. |
| Noticeable steps of the task pipeline: |
| * Save a WPR archive |
| * Process the WPR archive to make all resources cacheable |
| * Process cache archive to patch response headers back to their original |
| values. |
| * Find out which resources are discoverable by NoState Prefetch |
| (HTMLPreloadScanner) |
| * Load pages with empty/full/prefetched cache |
| * Extract most important metrics to a CSV |
| """ |
| |
| import csv |
| import logging |
| import json |
| import os |
| import re |
| import shutil |
| from urlparse import urlparse |
| |
| import chrome_cache |
| import common_util |
| import loading_trace |
| from prefetch_view import PrefetchSimulationView |
| from request_dependencies_lens import RequestDependencyLens |
| import sandwich_metrics |
| import sandwich_runner |
| import task_manager |
| import wpr_backend |
| |
| |
| class Discoverer(object): |
| # Do not prefetch anything. |
| EmptyCache = 'empty-cache' |
| |
| # Prefetches everything to load fully from cache (impossible in practice). |
| FullCache = 'full-cache' |
| |
| # Prefetches the first resource following the redirection chain. |
| MainDocument = 'main-document' |
| |
| # All resources which are fetched from the main document and their |
| # redirections. |
| Parser = 'parser' |
| |
| # Simulation of HTMLPreloadScanner on the main document and their |
| # redirections and subsets: |
| # Store: only resources that don't have Cache-Control: No-Store. |
| HTMLPreloadScanner = 'html-scanner' |
| HTMLPreloadScannerStore = 'html-scanner-store' |
| |
| |
| # List of all available sub-resource discoverers. |
| SUBRESOURCE_DISCOVERERS = set([ |
| Discoverer.EmptyCache, |
| Discoverer.FullCache, |
| Discoverer.MainDocument, |
| Discoverer.Parser, |
| Discoverer.HTMLPreloadScanner, |
| Discoverer.HTMLPreloadScannerStore, |
| ]) |
| |
| |
| _UPLOAD_DATA_STREAM_REQUESTS_REGEX = re.compile(r'^\d+/(?P<url>.*)$') |
| |
| |
| def _PatchWpr(wpr_archive): |
| """Patches a WPR archive to get all resources into the HTTP cache and avoid |
| invalidation and revalidations. |
| |
| Args: |
| wpr_archive: wpr_backend.WprArchiveBackend WPR archive to patch. |
| """ |
| # Sets the resources cache max-age to 10 years. |
| MAX_AGE = 10 * 365 * 24 * 60 * 60 |
| CACHE_CONTROL = 'public, max-age={}'.format(MAX_AGE) |
| |
| logging.info('number of entries: %d', len(wpr_archive.ListUrlEntries())) |
| patched_entry_count = 0 |
| for url_entry in wpr_archive.ListUrlEntries(): |
| response_headers = url_entry.GetResponseHeadersDict() |
| if 'cache-control' in response_headers and \ |
| response_headers['cache-control'] == CACHE_CONTROL: |
| continue |
| # Override the cache-control header to set the resources max age to MAX_AGE. |
| # |
| # Important note: Some resources holding sensitive information might have |
| # cache-control set to no-store which allow the resource to be cached but |
| # not cached in the file system. NoState-Prefetch is going to take care of |
| # this case. But in here, to simulate NoState-Prefetch, we don't have other |
| # choices but save absolutely all cached resources on disk so they survive |
| # after killing chrome for cache save, modification and push. |
| url_entry.SetResponseHeader('cache-control', CACHE_CONTROL) |
| |
| # TODO(gabadie): May need to extend Vary blacklist (referer?) |
| # |
| # All of these Vary and Pragma possibilities need to be removed from |
| # response headers in order for Chrome to store a resource in HTTP cache and |
| # not to invalidate it. |
| url_entry.RemoveResponseHeaderDirectives('vary', {'*', 'cookie'}) |
| url_entry.RemoveResponseHeaderDirectives('pragma', {'no-cache'}) |
| patched_entry_count += 1 |
| logging.info('number of entries patched: %d', patched_entry_count) |
| |
| |
| def _FilterOutDataAndIncompleteRequests(requests): |
| for request in filter(lambda r: not r.IsDataRequest(), requests): |
| # The protocol is only known once the response has been received. But the |
| # trace recording might have been stopped with still some JavaScript |
| # originated requests that have not received any responses yet. |
| if request.protocol is None: |
| assert not request.HasReceivedResponse() |
| continue |
| if request.protocol not in {'http/0.9', 'http/1.0', 'http/1.1'}: |
| raise RuntimeError('Unknown request protocol {}'.format(request.protocol)) |
| yield request |
| |
| |
| def _PatchCacheArchive(cache_archive_path, loading_trace_path, |
| cache_archive_dest_path): |
| """Patch the cache archive. |
| |
| Note: This method update the raw response headers of cache entries' to store |
| the ones such as Set-Cookie that were pruned by the |
| net::HttpCacheTransaction, and remove the stream index 2 holding resource's |
| compile meta data. |
| |
| Args: |
| cache_archive_path: Input archive's path to patch. |
| loading_trace_path: Path of the loading trace that have recorded the cache |
| archive <cache_archive_path>. |
| cache_archive_dest_path: Archive destination's path. |
| """ |
| trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
| with common_util.TemporaryDirectory(prefix='sandwich_tmp') as tmp_path: |
| cache_path = os.path.join(tmp_path, 'cache') |
| chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_path) |
| cache_backend = chrome_cache.CacheBackend(cache_path, 'simple') |
| cache_entries = set(cache_backend.ListKeys()) |
| logging.info('Original cache size: %d bytes' % cache_backend.GetSize()) |
| for request in _FilterOutDataAndIncompleteRequests( |
| trace.request_track.GetEvents()): |
| # On requests having an upload data stream such as POST requests, |
| # net::HttpCache::GenerateCacheKey() prefixes the cache entry's key with |
| # the upload data stream's session unique identifier. |
| # |
| # It is fine to not patch these requests since when reopening Chrome, |
| # there is no way the entry can be reused since the upload data stream's |
| # identifier will be different. |
| # |
| # The fact that these entries are kept in the cache after closing Chrome |
| # properly by closing the Chrome tab as the ChromeControler.SetSlowDeath() |
| # do is known chrome bug (crbug.com/610725). |
| if request.url not in cache_entries: |
| continue |
| # Chrome prunes Set-Cookie from response headers before storing them in |
| # disk cache. Also, it adds implicit "Vary: cookie" header to all redirect |
| # response headers. Sandwich manages the cache, but between recording the |
| # cache and benchmarking the cookie jar is invalidated. This leads to |
| # invalidation of all cacheable redirects. |
| raw_headers = request.GetRawResponseHeaders() |
| cache_backend.UpdateRawResponseHeaders(request.url, raw_headers) |
| # NoState-Prefetch would only fetch the resources, but not parse them. |
| cache_backend.DeleteStreamForKey(request.url, 2) |
| chrome_cache.ZipDirectoryContent(cache_path, cache_archive_dest_path) |
| logging.info('Patched cache size: %d bytes' % cache_backend.GetSize()) |
| |
| |
| def _DiscoverRequests(dependencies_lens, subresource_discoverer): |
| trace = dependencies_lens.loading_trace |
| first_resource_request = trace.request_track.GetFirstResourceRequest() |
| |
| if subresource_discoverer == Discoverer.EmptyCache: |
| requests = [] |
| elif subresource_discoverer == Discoverer.FullCache: |
| requests = dependencies_lens.loading_trace.request_track.GetEvents() |
| elif subresource_discoverer == Discoverer.MainDocument: |
| requests = [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| elif subresource_discoverer == Discoverer.Parser: |
| requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| first_resource_request, dependencies_lens) |
| elif subresource_discoverer == Discoverer.HTMLPreloadScanner: |
| requests = PrefetchSimulationView.PreloadedRequests( |
| first_resource_request, dependencies_lens, trace) |
| else: |
| assert False |
| logging.info('number of requests discovered by %s: %d', |
| subresource_discoverer, len(requests)) |
| return requests |
| |
| |
| def _PruneOutOriginalNoStoreRequests(original_headers_path, requests): |
| with open(original_headers_path) as file_input: |
| original_headers = json.load(file_input) |
| pruned_requests = set() |
| for request in requests: |
| request_original_headers = original_headers[request.url] |
| if ('cache-control' in request_original_headers and |
| 'no-store' in request_original_headers['cache-control'].lower()): |
| pruned_requests.add(request) |
| return [r for r in requests if r not in pruned_requests] |
| |
| |
| def _ExtractDiscoverableUrls( |
| original_headers_path, loading_trace_path, subresource_discoverer): |
| """Extracts discoverable resource urls from a loading trace according to a |
| sub-resource discoverer. |
| |
| Args: |
| original_headers_path: Path of JSON containing the original headers. |
| loading_trace_path: Path of the loading trace recorded at original cache |
| creation. |
| subresource_discoverer: The sub-resources discoverer that should white-list |
| the resources to keep in cache for the NoState-Prefetch benchmarks. |
| |
| Returns: |
| A set of urls. |
| """ |
| assert subresource_discoverer in SUBRESOURCE_DISCOVERERS, \ |
| 'unknown prefetch simulation {}'.format(subresource_discoverer) |
| logging.info('loading %s', loading_trace_path) |
| trace = loading_trace.LoadingTrace.FromJsonFile(loading_trace_path) |
| dependencies_lens = RequestDependencyLens(trace) |
| |
| # Build the list of discovered requests according to the desired simulation. |
| discovered_requests = [] |
| if subresource_discoverer == Discoverer.HTMLPreloadScannerStore: |
| requests = _DiscoverRequests( |
| dependencies_lens, Discoverer.HTMLPreloadScanner) |
| discovered_requests = _PruneOutOriginalNoStoreRequests( |
| original_headers_path, requests) |
| else: |
| discovered_requests = _DiscoverRequests( |
| dependencies_lens, subresource_discoverer) |
| |
| whitelisted_urls = set() |
| for request in _FilterOutDataAndIncompleteRequests(discovered_requests): |
| logging.debug('white-listing %s', request.url) |
| whitelisted_urls.add(request.url) |
| logging.info('number of white-listed resources: %d', len(whitelisted_urls)) |
| return whitelisted_urls |
| |
| |
| def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
| """Compare URL sets and log the diffs. |
| |
| Args: |
| ref_url_set: Set of reference urls. |
| url_set: Set of urls to compare to the reference. |
| url_set_name: The set name for logging purposes. |
| """ |
| assert type(ref_url_set) == set |
| assert type(url_set) == set |
| if ref_url_set == url_set: |
| logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) |
| return |
| missing_urls = ref_url_set.difference(url_set) |
| unexpected_urls = url_set.difference(ref_url_set) |
| logging.error(' %s are not matching (expected %d, had %d)' % \ |
| (url_set_name, len(ref_url_set), len(url_set))) |
| logging.error(' List of %d missing resources:' % len(missing_urls)) |
| for url in sorted(missing_urls): |
| logging.error('- ' + url) |
| logging.error(' List of %d unexpected resources:' % len(unexpected_urls)) |
| for url in sorted(unexpected_urls): |
| logging.error('+ ' + url) |
| |
| |
| class _RequestOutcome: |
| All, ServedFromCache, NotServedFromCache, Post = range(4) |
| |
| |
| def _ListUrlRequests(trace, request_kind): |
| """Lists requested URLs from a trace. |
| |
| Args: |
| trace: (loading_trace.LoadingTrace) loading trace. |
| request_kind: _RequestOutcome indicating the subset of requests to output. |
| |
| Returns: |
| set([str]) |
| """ |
| urls = set() |
| for request_event in _FilterOutDataAndIncompleteRequests( |
| trace.request_track.GetEvents()): |
| if (request_kind == _RequestOutcome.ServedFromCache and |
| request_event.from_disk_cache): |
| urls.add(request_event.url) |
| elif (request_kind == _RequestOutcome.Post and |
| request_event.method.upper().strip() == 'POST'): |
| urls.add(request_event.url) |
| elif (request_kind == _RequestOutcome.NotServedFromCache and |
| not request_event.from_disk_cache): |
| urls.add(request_event.url) |
| elif request_kind == _RequestOutcome.All: |
| urls.add(request_event.url) |
| return urls |
| |
| |
| class _RunOutputVerifier(object): |
| """Object to verify benchmark run from traces and WPR log stored in the |
| runner output directory. |
| """ |
| |
| def __init__(self, cache_validation_result, benchmark_setup): |
| """Constructor. |
| |
| Args: |
| cache_validation_result: JSON of the cache validation task. |
| benchmark_setup: JSON of the benchmark setup. |
| """ |
| self._cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| self._original_requests = set( |
| cache_validation_result['effective_encoded_data_lengths'].keys()) |
| self._original_post_requests = set( |
| cache_validation_result['effective_post_requests']) |
| self._original_cached_requests = self._original_requests.intersection( |
| self._cache_whitelist) |
| self._original_uncached_requests = self._original_requests.difference( |
| self._cache_whitelist) |
| self._all_sent_url_requests = set() |
| |
| def VerifyTrace(self, trace): |
| """Verifies a trace with the cache validation result and the benchmark |
| setup. |
| """ |
| effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| effective_cached_requests = \ |
| _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
| effective_uncached_requests = \ |
| _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
| |
| missing_requests = self._original_requests.difference(effective_requests) |
| unexpected_requests = effective_requests.difference(self._original_requests) |
| expected_cached_requests = \ |
| self._original_cached_requests.difference(missing_requests) |
| expected_uncached_requests = self._original_uncached_requests.union( |
| unexpected_requests).difference(missing_requests) |
| |
| # POST requests are known to be unable to use the cache. |
| expected_cached_requests.difference_update(effective_post_requests) |
| expected_uncached_requests.update(effective_post_requests) |
| |
| _PrintUrlSetComparison(self._original_requests, effective_requests, |
| 'All resources') |
| _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') |
| _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, |
| 'Cached resources') |
| _PrintUrlSetComparison(expected_uncached_requests, |
| effective_uncached_requests, 'Non cached resources') |
| |
| self._all_sent_url_requests.update(effective_uncached_requests) |
| |
| def VerifyWprLog(self, wpr_log_path): |
| """Verifies WPR log with previously verified traces.""" |
| all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) |
| all_wpr_urls = set() |
| unserved_wpr_urls = set() |
| wpr_command_colliding_urls = set() |
| |
| for request in all_wpr_requests: |
| if request.is_wpr_host: |
| continue |
| if urlparse(request.url).path.startswith('/web-page-replay'): |
| wpr_command_colliding_urls.add(request.url) |
| elif request.is_served is False: |
| unserved_wpr_urls.add(request.url) |
| all_wpr_urls.add(request.url) |
| |
| _PrintUrlSetComparison(set(), unserved_wpr_urls, |
| 'Distinct unserved resources from WPR') |
| _PrintUrlSetComparison(set(), wpr_command_colliding_urls, |
| 'Distinct resources colliding to WPR commands') |
| _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests, |
| 'Distinct resource requests to WPR') |
| |
| |
| def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
| """Validates a cache archive content. |
| |
| Args: |
| cache_build_trace_path: Path of the generated trace at the cache build time. |
| cache_archive_path: Cache archive's path to validate. |
| |
| Returns: |
| { |
| 'effective_encoded_data_lengths': |
| {URL of all requests: encoded_data_length}, |
| 'effective_post_requests': [URLs of POST requests], |
| 'expected_cached_resources': [URLs of resources expected to be cached], |
| 'successfully_cached': [URLs of cached sub-resources] |
| } |
| """ |
| # TODO(gabadie): What's the best way of propagating errors happening in here? |
| logging.info('lists cached urls from %s' % cache_archive_path) |
| with common_util.TemporaryDirectory() as cache_directory: |
| chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| cache_keys = set( |
| chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
| trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
| effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| effective_encoded_data_lengths = {} |
| for request in _FilterOutDataAndIncompleteRequests( |
| trace.request_track.GetEvents()): |
| if request.from_disk_cache or request.served_from_cache: |
| # At cache archive creation time, a request might be loaded several times, |
| # but avoid the request.encoded_data_length == 0 if loaded from cache. |
| continue |
| if request.url in effective_encoded_data_lengths: |
| effective_encoded_data_lengths[request.url] = max( |
| effective_encoded_data_lengths[request.url], |
| request.GetEncodedDataLength()) |
| else: |
| effective_encoded_data_lengths[request.url] = ( |
| request.GetEncodedDataLength()) |
| |
| upload_data_stream_cache_entry_keys = set() |
| upload_data_stream_requests = set() |
| for cache_entry_key in cache_keys: |
| match = _UPLOAD_DATA_STREAM_REQUESTS_REGEX.match(cache_entry_key) |
| if not match: |
| continue |
| upload_data_stream_cache_entry_keys.add(cache_entry_key) |
| upload_data_stream_requests.add(match.group('url')) |
| |
| expected_cached_requests = effective_requests.difference( |
| effective_post_requests) |
| effective_cache_keys = cache_keys.difference( |
| upload_data_stream_cache_entry_keys) |
| |
| _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, |
| 'POST resources') |
| _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
| 'Cached resources') |
| |
| return { |
| 'effective_encoded_data_lengths': effective_encoded_data_lengths, |
| 'effective_post_requests': [url for url in effective_post_requests], |
| 'expected_cached_resources': [url for url in expected_cached_requests], |
| 'successfully_cached_resources': [url for url in effective_cache_keys] |
| } |
| |
| |
| def _ProcessRunOutputDir( |
| cache_validation_result, benchmark_setup, runner_output_dir): |
| """Process benchmark's run output directory. |
| |
| Args: |
| cache_validation_result: Same as for _RunOutputVerifier |
| benchmark_setup: Same as for _RunOutputVerifier |
| runner_output_dir: Same as for SandwichRunner.output_dir |
| |
| Returns: |
| List of dictionary. |
| """ |
| run_metrics_list = [] |
| run_output_verifier = _RunOutputVerifier( |
| cache_validation_result, benchmark_setup) |
| cached_encoded_data_lengths = ( |
| cache_validation_result['effective_encoded_data_lengths']) |
| for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( |
| runner_output_dir): |
| trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) |
| |
| logging.info('loading trace: %s', trace_path) |
| trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
| |
| logging.info('verifying trace: %s', trace_path) |
| run_output_verifier.VerifyTrace(trace) |
| |
| logging.info('extracting metrics from trace: %s', trace_path) |
| served_from_network_bytes = 0 |
| served_from_cache_bytes = 0 |
| urls_hitting_network = set() |
| for request in _FilterOutDataAndIncompleteRequests( |
| trace.request_track.GetEvents()): |
| # Ignore requests served from the blink's cache. |
| if request.served_from_cache: |
| continue |
| urls_hitting_network.add(request.url) |
| if request.from_disk_cache: |
| served_from_cache_bytes += cached_encoded_data_lengths[request.url] |
| else: |
| served_from_network_bytes += request.GetEncodedDataLength() |
| |
| # Make sure the served from blink's cache requests have at least one |
| # corresponding request that was not served from the blink's cache. |
| for request in _FilterOutDataAndIncompleteRequests( |
| trace.request_track.GetEvents()): |
| assert (request.url in urls_hitting_network or |
| not request.served_from_cache) |
| |
| run_metrics = { |
| 'url': trace.url, |
| 'repeat_id': repeat_id, |
| 'subresource_discoverer': benchmark_setup['subresource_discoverer'], |
| 'cache_recording.subresource_count': |
| len(cache_validation_result['effective_encoded_data_lengths']), |
| 'cache_recording.cached_subresource_count_theoretic': |
| len(cache_validation_result['successfully_cached_resources']), |
| 'cache_recording.cached_subresource_count': |
| len(cache_validation_result['expected_cached_resources']), |
| 'benchmark.subresource_count': len(_ListUrlRequests( |
| trace, _RequestOutcome.All)), |
| 'benchmark.served_from_cache_count_theoretic': |
| len(benchmark_setup['cache_whitelist']), |
| 'benchmark.served_from_cache_count': len(_ListUrlRequests( |
| trace, _RequestOutcome.ServedFromCache)), |
| 'benchmark.served_from_network_bytes': served_from_network_bytes, |
| 'benchmark.served_from_cache_bytes': served_from_cache_bytes |
| } |
| run_metrics.update( |
| sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( |
| repeat_dir, trace)) |
| run_metrics_list.append(run_metrics) |
| run_metrics_list.sort(key=lambda e: e['repeat_id']) |
| |
| wpr_log_path = os.path.join( |
| runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) |
| logging.info('verifying wpr log: %s', wpr_log_path) |
| run_output_verifier.VerifyWprLog(wpr_log_path) |
| return run_metrics_list |
| |
| |
| class PrefetchBenchmarkBuilder(task_manager.Builder): |
| """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
| |
| def __init__(self, common_builder): |
| task_manager.Builder.__init__(self, |
| common_builder.output_directory, |
| common_builder.output_subdirectory) |
| self._common_builder = common_builder |
| |
| self._original_headers_path = None |
| self._wpr_archive_path = None |
| self._cache_path = None |
| self._trace_from_grabbing_reference_cache = None |
| self._cache_validation_task = None |
| self._PopulateCommonPipelines() |
| |
| def _PopulateCommonPipelines(self): |
| """Creates necessary tasks to produce initial cache archive. |
| |
| Also creates a task for producing a json file with a mapping of URLs to |
| subresources (urls-resources.json). |
| |
| Here is the full dependency tree for the returned task: |
| common/patched-cache-validation.json |
| depends on: common/patched-cache.zip |
| depends on: common/original-cache.zip |
| depends on: common/webpages-patched.wpr |
| depends on: common/webpages.wpr |
| """ |
| self._original_headers_path = self.RebaseOutputPath( |
| 'common/response-headers.json') |
| |
| @self.RegisterTask('common/webpages-patched.wpr', |
| dependencies=[self._common_builder.original_wpr_task]) |
| def BuildPatchedWpr(): |
| common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
| shutil.copyfile( |
| self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
| wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path) |
| |
| # Save up original response headers. |
| original_response_headers = {e.url: e.GetResponseHeadersDict() \ |
| for e in wpr_archive.ListUrlEntries()} |
| with open(self._original_headers_path, 'w') as file_output: |
| json.dump(original_response_headers, file_output) |
| |
| # Patch WPR. |
| _PatchWpr(wpr_archive) |
| wpr_archive.Persist() |
| |
| @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
| def BuildOriginalCache(): |
| runner = self._common_builder.CreateSandwichRunner() |
| runner.wpr_archive_path = BuildPatchedWpr.path |
| runner.cache_archive_path = BuildOriginalCache.path |
| runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
| runner.output_dir = BuildOriginalCache.run_path |
| runner.Run() |
| BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
| original_cache_trace_path = os.path.join( |
| BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
| |
| @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
| def BuildPatchedCache(): |
| _PatchCacheArchive(BuildOriginalCache.path, |
| original_cache_trace_path, BuildPatchedCache.path) |
| |
| @self.RegisterTask('common/patched-cache-validation.json', |
| [BuildPatchedCache]) |
| def ValidatePatchedCache(): |
| cache_validation_result = _ValidateCacheArchiveContent( |
| original_cache_trace_path, BuildPatchedCache.path) |
| with open(ValidatePatchedCache.path, 'w') as output: |
| json.dump(cache_validation_result, output) |
| |
| self._wpr_archive_path = BuildPatchedWpr.path |
| self._trace_from_grabbing_reference_cache = original_cache_trace_path |
| self._cache_path = BuildPatchedCache.path |
| self._cache_validation_task = ValidatePatchedCache |
| |
| self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
| |
| def PopulateLoadBenchmark(self, subresource_discoverer, |
| transformer_list_name, transformer_list): |
| """Populate benchmarking tasks from its setup tasks. |
| |
| Args: |
| subresource_discoverer: Name of a subresources discoverer. |
| transformer_list_name: A string describing the transformers, will be used |
| in Task names (prefer names without spaces and special characters). |
| transformer_list: An ordered list of function that takes an instance of |
| SandwichRunner as parameter, would be applied immediately before |
| SandwichRunner.Run() in the given order. |
| |
| Here is the full dependency of the added tree for the returned task: |
| <transformer_list_name>/<subresource_discoverer>-metrics.csv |
| depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
| depends on: common/<subresource_discoverer>-cache.zip |
| depends on: common/<subresource_discoverer>-setup.json |
| depends on: common/patched-cache-validation.json |
| """ |
| additional_column_names = [ |
| 'url', |
| 'repeat_id', |
| 'subresource_discoverer', |
| 'cache_recording.subresource_count', |
| 'cache_recording.cached_subresource_count_theoretic', |
| 'cache_recording.cached_subresource_count', |
| 'benchmark.subresource_count', |
| 'benchmark.served_from_cache_count_theoretic', |
| 'benchmark.served_from_cache_count', |
| 'benchmark.served_from_network_bytes', |
| 'benchmark.served_from_cache_bytes'] |
| |
| assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
| assert 'common' not in SUBRESOURCE_DISCOVERERS |
| shared_task_prefix = os.path.join('common', subresource_discoverer) |
| task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
| |
| @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
| dependencies=[self._cache_validation_task]) |
| def SetupBenchmark(): |
| whitelisted_urls = _ExtractDiscoverableUrls( |
| original_headers_path=self._original_headers_path, |
| loading_trace_path=self._trace_from_grabbing_reference_cache, |
| subresource_discoverer=subresource_discoverer) |
| |
| common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
| with open(SetupBenchmark.path, 'w') as output: |
| json.dump({ |
| 'cache_whitelist': [url for url in whitelisted_urls], |
| 'subresource_discoverer': subresource_discoverer, |
| }, output) |
| |
| @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
| dependencies=[SetupBenchmark]) |
| def BuildBenchmarkCacheArchive(): |
| benchmark_setup = json.load(open(SetupBenchmark.path)) |
| chrome_cache.ApplyUrlWhitelistToCacheArchive( |
| cache_archive_path=self._cache_path, |
| whitelisted_urls=benchmark_setup['cache_whitelist'], |
| output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
| |
| @self.RegisterTask(task_prefix + '-run/', |
| dependencies=[BuildBenchmarkCacheArchive]) |
| def RunBenchmark(): |
| runner = self._common_builder.CreateSandwichRunner() |
| for transformer in transformer_list: |
| transformer(runner) |
| runner.wpr_archive_path = self._wpr_archive_path |
| runner.wpr_out_log_path = os.path.join( |
| RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
| runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
| runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
| runner.output_dir = RunBenchmark.path |
| runner.Run() |
| |
| @self.RegisterTask(task_prefix + '-metrics.csv', |
| dependencies=[RunBenchmark]) |
| def ProcessRunOutputDir(): |
| benchmark_setup = json.load(open(SetupBenchmark.path)) |
| cache_validation_result = json.load( |
| open(self._cache_validation_task.path)) |
| |
| run_metrics_list = _ProcessRunOutputDir( |
| cache_validation_result, benchmark_setup, RunBenchmark.path) |
| with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| writer.writeheader() |
| for trace_metrics in run_metrics_list: |
| writer.writerow(trace_metrics) |
| |
| self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |