#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/10/15 15:03 # @Author : zhangzhe # @File : day4_yanzhengma.py # @Software: PyCharm import requests from lxml import etree import chaojiying as cj import MySQLdb as mdb conn = mdb.connect(host='localhost', port=3306, user='root', password='123456', db='db131', charset='utf8') cursor = conn.cursor() # 广度优先 class BFS(): def __init__(self, url): self.__visited_list = [] # 存放已经访问过的url self.__unvisited_list = [url] # 存放未访问的url # 向未访问的列表中添加url def add_unvisited(self, url): # 判断新的url不存在于已经访问过的列表中,也不存在于未访问列表中 if url not in self.__unvisited_list and url not in self.__visited_list and url.startswith('http'): self.__unvisited_list.append(url) # 向已访问的列表中添加url def add_visited(self, url): self.__visited_list.append(url) # 从未访问的列表中取出一个url,并从中删除 def get_url_from_unvisited(self): print(self.__unvisited_list) # 取出最后添加的url return s