import requests from bs4 import BeautifulSoup # 爬取飞猪IP免费代理 class SpiderApp: # 初始化属性 def __init__(self): # 地址、头部、请求对象、解析对象 self.url = "https://www.feizhuip.com/?source=baidu&keyword=feizhuIP" self.head = {"user-agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"} self.req = requests self.bs = BeautifulSoup # 存储page1中的路径和标题 self.href_list = [] # 响应对象 self.response = "" # 请求第一级页面:获取二级页面的路径和标题 def sendReqPage1(self): # 1 发起请求 self.response = self.req.get(url=self.url,headers=self.head) # 2 查看状态 print("code-",self.response.status_code) # 3 解析数据 解析器:html5lib需要安装 soup = sel
飞猪爬虫项目
最新推荐文章于 2025-07-13 00:36:29 发布