blob: 18025b0aea68a59e1034fe3aab12f271da28fa40 [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import copy
import unittest
from content_classification_lens import (ContentClassificationLens,
_RulesMatcher)
from request_track import Request
import test_utils
class ContentClassificationLensTestCase(unittest.TestCase):
_DOCUMENT_URL = 'http://bla.com'
_MAIN_FRAME_ID = '123.1'
_REQUEST = Request.FromJsonDict({'url': _DOCUMENT_URL,
'document_url': _DOCUMENT_URL,
'request_id': '1234.1',
'frame_id': _MAIN_FRAME_ID,
'initiator': {'type': 'other'},
'timestamp': 2,
'status': 200,
'timing': {},
'resource_type': 'Document'})
_PAGE_EVENTS = [{'method': 'Page.frameStartedLoading',
'frame_id': _MAIN_FRAME_ID},
{'method': 'Page.frameAttached',
'frame_id': '123.13', 'parent_frame_id': _MAIN_FRAME_ID}]
_RULES = ['bla.com']
def testGetDocumentUrl(self):
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, [], [])
self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
# Don't be fooled by redirects.
request = copy.deepcopy(self._REQUEST)
request.status = 302
request.document_url = 'http://www.bla.com'
trace = test_utils.LoadingTraceFromEvents(
[request, self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, [], [])
self.assertEquals(self._DOCUMENT_URL, lens._GetDocumentUrl())
def testGetDocumentUrlSeveralChanges(self):
request = copy.deepcopy(self._REQUEST)
request.status = 200
request.document_url = 'http://www.blabla.com'
request2 = copy.deepcopy(request)
request2.document_url = 'http://www.blablabla.com'
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST, request, request2], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, [], [])
self.assertEquals(request2.document_url, lens._GetDocumentUrl())
def testNoRules(self):
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, [], [])
self.assertFalse(lens.IsAdRequest(self._REQUEST))
self.assertFalse(lens.IsTrackingRequest(self._REQUEST))
def testAdRequest(self):
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, self._RULES, [])
self.assertTrue(lens.IsAdRequest(self._REQUEST))
self.assertFalse(lens.IsTrackingRequest(self._REQUEST))
def testTrackingRequest(self):
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, [], self._RULES)
self.assertFalse(lens.IsAdRequest(self._REQUEST))
self.assertTrue(lens.IsTrackingRequest(self._REQUEST))
def testMainFrameIsNotAnAdFrame(self):
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, self._RULES, [])
self.assertFalse(lens.IsAdOrTrackingFrame(self._MAIN_FRAME_ID))
def testAdFrame(self):
request = copy.deepcopy(self._REQUEST)
request.request_id = '1234.2'
request.frame_id = '123.123'
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST, request], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, self._RULES, [])
self.assertTrue(lens.IsAdOrTrackingFrame(request.frame_id))
def testAdAndTrackingRequests(self):
ad_request = copy.deepcopy(self._REQUEST)
ad_request.request_id = '1234.2'
ad_request.frame_id = '123.123'
non_ad_request_non_ad_frame = copy.deepcopy(self._REQUEST)
non_ad_request_non_ad_frame.request_id = '1234.3'
non_ad_request_non_ad_frame.url = 'http://www.example.com'
non_ad_request_non_ad_frame.frame_id = '123.456'
non_ad_request_ad_frame = copy.deepcopy(self._REQUEST)
non_ad_request_ad_frame.request_id = '1234.4'
non_ad_request_ad_frame.url = 'http://www.example.com'
non_ad_request_ad_frame.frame_id = ad_request.frame_id
trace = test_utils.LoadingTraceFromEvents(
[self._REQUEST, ad_request, non_ad_request_non_ad_frame,
non_ad_request_ad_frame], self._PAGE_EVENTS)
lens = ContentClassificationLens(trace, self._RULES, [])
self.assertSetEqual(
set([self._REQUEST, ad_request, non_ad_request_ad_frame]),
set(lens.AdAndTrackingRequests()))
class _MatcherTestCase(unittest.TestCase):
_RULES_WITH_WHITELIST = ['/thisisanad.', '@@myadvertisingdomain.com/*',
'@@||www.mydomain.com/ads/$elemhide']
_SCRIPT_RULE = 'domainwithscripts.com/*$script'
_THIRD_PARTY_RULE = 'domainwithscripts.com/*$third-party'
_SCRIPT_REQUEST = Request.FromJsonDict(
{'url': 'http://domainwithscripts.com/bla.js',
'resource_type': 'Script',
'request_id': '1234.1',
'frame_id': '123.1',
'initiator': {'type': 'other'},
'timestamp': 2,
'timing': {}})
def testRemovesWhitelistRules(self):
matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, False)
self.assertEquals(3, len(matcher._rules))
matcher = _RulesMatcher(self._RULES_WITH_WHITELIST, True)
self.assertEquals(1, len(matcher._rules))
def testScriptRule(self):
matcher = _RulesMatcher([self._SCRIPT_RULE], False)
request = copy.deepcopy(self._SCRIPT_REQUEST)
request.resource_type = 'Stylesheet'
self.assertFalse(matcher.Matches(
request, ContentClassificationLensTestCase._DOCUMENT_URL))
self.assertTrue(matcher.Matches(
self._SCRIPT_REQUEST, ContentClassificationLensTestCase._DOCUMENT_URL))
def testGetTldPlusOne(self):
self.assertEquals(
'easy.com',
_RulesMatcher._GetTldPlusOne('http://www.easy.com/hello/you'))
self.assertEquals(
'not-so-easy.co.uk',
_RulesMatcher._GetTldPlusOne('http://www.not-so-easy.co.uk/hello/you'))
self.assertEquals(
'hard.co.uk',
_RulesMatcher._GetTldPlusOne('http://hard.co.uk/'))
def testThirdPartyRule(self):
matcher = _RulesMatcher([self._THIRD_PARTY_RULE], False)
request = copy.deepcopy(self._SCRIPT_REQUEST)
document_url = 'http://www.domainwithscripts.com/good-morning'
self.assertFalse(matcher.Matches(request, document_url))
document_url = 'http://anotherdomain.com/good-morning'
self.assertTrue(matcher.Matches(request, document_url))
if __name__ == '__main__':
unittest.main()