| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import copy |
| import unittest |
| |
| from content_classification_lens import (ContentClassificationLens, |
| _RulesMatcher) |
| from request_track import Request |
| import test_utils |
| |
| |
| class ContentClassificationLensTestCase(unittest.TestCase): |
| _DOCUMENT_URL = 'http://bla.com' |
| _MAIN_FRAME_ID = '123.1' |
| _REQUEST = Request.FromJsonDict({'url': _DOCUMENT_URL, |
| 'document_url': _DOCUMENT_URL, |
| 'request_id': '1234.1', |
| 'frame_id': _MAIN_FRAME_ID, |
| 'initiator': {'type': 'other'}, |
| 'timestamp': 2, |
| 'status': 200, |
| 'timing': {}, |
| 'resource_type': 'Document'}) |
| _PAGE_EVENTS = [{'method': 'Page.frameStartedLoading', |
| 'frame_id': _MAIN_FRAME_ID}, |
| {'method': 'Page.frameAttached', |
| 'frame_id': '123.13', 'parent_frame_id': _MAIN_FRAME_ID}] |
| _RULES = ['bla.com'] |
| |
| def testGetDocumentUrl(self): |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, [], []) |
| self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl()) |
| # Don't be fooled by redirects. |
| request = copy.deepcopy(self._REQUEST) |
| request.status = 302 |
| request.document_url = 'http://www.bla.com' |
| trace = test_utils.LoadingTraceFromEvents( |
| [request, self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, [], []) |
| self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl()) |
| |
| def testGetDocumentUrlSeveralChanges(self): |
| request = copy.deepcopy(self._REQUEST) |
| request.status = 200 |
| request.document_url = 'http://www.blabla.com' |
| request2 = copy.deepcopy(request) |
| request2.document_url = 'http://www.blablabla.com' |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST, request, request2], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, [], []) |
| self.assertEquals(request2.document_url, lens._GetDocumentUrl()) |
| |
| def testNoRules(self): |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, [], []) |
| self.assertFalse(lens.IsAdRequest(self._REQUEST)) |
| self.assertFalse(lens.IsTrackingRequest(self._REQUEST)) |
| |
| def testAdRequest(self): |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, self._RULES, []) |
| self.assertTrue(lens.IsAdRequest(self._REQUEST)) |
| self.assertFalse(lens.IsTrackingRequest(self._REQUEST)) |
| |
| def testTrackingRequest(self): |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, [], self._RULES) |
| self.assertFalse(lens.IsAdRequest(self._REQUEST)) |
| self.assertTrue(lens.IsTrackingRequest(self._REQUEST)) |
| |
| def testMainFrameIsNotAnAdFrame(self): |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, self._RULES, []) |
| self.assertFalse(lens.IsAdOrTrackingFrame(self._MAIN_FRAME_ID)) |
| |
| def testAdFrame(self): |
| request = copy.deepcopy(self._REQUEST) |
| request.request_id = '1234.2' |
| request.frame_id = '123.123' |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST, request], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, self._RULES, []) |
| self.assertTrue(lens.IsAdOrTrackingFrame(request.frame_id)) |
| |
| def testAdAndTrackingRequests(self): |
| ad_request = copy.deepcopy(self._REQUEST) |
| ad_request.request_id = '1234.2' |
| ad_request.frame_id = '123.123' |
| non_ad_request_non_ad_frame = copy.deepcopy(self._REQUEST) |
| non_ad_request_non_ad_frame.request_id = '1234.3' |
| non_ad_request_non_ad_frame.url = 'http://www.example.com' |
| non_ad_request_non_ad_frame.frame_id = '123.456' |
| non_ad_request_ad_frame = copy.deepcopy(self._REQUEST) |
| non_ad_request_ad_frame.request_id = '1234.4' |
| non_ad_request_ad_frame.url = 'http://www.example.com' |
| non_ad_request_ad_frame.frame_id = ad_request.frame_id |
| |
| trace = test_utils.LoadingTraceFromEvents( |
| [self._REQUEST, ad_request, non_ad_request_non_ad_frame, |
| non_ad_request_ad_frame], self._PAGE_EVENTS) |
| lens = ContentClassificationLens(trace, self._RULES, []) |
| self.assertSetEqual( |
| set([self._REQUEST, ad_request, non_ad_request_ad_frame]), |
| set(lens.AdAndTrackingRequests())) |
| |
| |
| class _MatcherTestCase(unittest.TestCase): |
| _RULES_WITH_WHITELIST = ['/thisisanad.', '@@myadvertisingdomain.com/*', |
| '@@||www.mydomain.com/ads/$elemhide'] |
| _SCRIPT_RULE = 'domainwithscripts.com/*$script' |
| _THIRD_PARTY_RULE = 'domainwithscripts.com/*$third-party' |
| _SCRIPT_REQUEST = Request.FromJsonDict( |
| {'url': 'http://domainwithscripts.com/bla.js', |
| 'resource_type': 'Script', |
| 'request_id': '1234.1', |
| 'frame_id': '123.1', |
| 'initiator': {'type': 'other'}, |
| 'timestamp': 2, |
| 'timing': {}}) |
| |
| def testRemovesWhitelistRules(self): |
| matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, False) |
| self.assertEquals(3, len(matcher._rules)) |
| matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, True) |
| self.assertEquals(1, len(matcher._rules)) |
| |
| def testScriptRule(self): |
| matcher = _RulesMatcher([self._SCRIPT_RULE], False) |
| request = copy.deepcopy(self._SCRIPT_REQUEST) |
| request.resource_type = 'Stylesheet' |
| self.assertFalse(matcher.Matches( |
| request, ContentClassificationLensTestCase._DOCUMENT_URL)) |
| self.assertTrue(matcher.Matches( |
| self._SCRIPT_REQUEST, ContentClassificationLensTestCase._DOCUMENT_URL)) |
| |
| def testGetTldPlusOne(self): |
| self.assertEquals( |
| 'easy.com', |
| _RulesMatcher._GetTldPlusOne('http://www.easy.com/hello/you')) |
| self.assertEquals( |
| 'not-so-easy.co.uk', |
| _RulesMatcher._GetTldPlusOne('http://www.not-so-easy.co.uk/hello/you')) |
| self.assertEquals( |
| 'hard.co.uk', |
| _RulesMatcher._GetTldPlusOne('http://hard.co.uk/')) |
| |
| def testThirdPartyRule(self): |
| matcher = _RulesMatcher([self._THIRD_PARTY_RULE], False) |
| request = copy.deepcopy(self._SCRIPT_REQUEST) |
| document_url = 'http://www.domainwithscripts.com/good-morning' |
| self.assertFalse(matcher.Matches(request, document_url)) |
| document_url = 'http://anotherdomain.com/good-morning' |
| self.assertTrue(matcher.Matches(request, document_url)) |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |