web-爬虫

这篇博客介绍了如何在使用Selenium遇到动态渲染页面时,通过execute_script方法模拟JavaScript点击事件。文章详细分析了一个特定的jQuery AJAX请求,并探讨了如何理解和转换这个请求为Request形式,以便在没有浏览器环境中执行。内容涉及到设置User-Agent、处理动态加载的数据以及错误处理。此外,还展示了如何解析和处理返回的数据节点。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

selenium

很多页面是加载完动态渲染的,而通过driver get Element 对象获得对象是不能直接执行的。
此时可以通过exexute_script(‘js_scrpt’,params)来完成
但这种执行方式不会返回页面的结果,可能是完成了一次ajax请求,动态改造了页面。
例如

from selenium import webdriver as wd 
options = wd.ChromeOptions()
options.add_argument('lang=gbk')
options.add_argument('User-Agent=Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36')
#options.add_argument('--headless')  # 浏览器不提供可视化页面 
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
driver = wd.Chrome(chrome_options=options)
dataurl='https://data.stats.gov.cn/easyquery.htm'
driver.get(dataurl)
driver.implicitly_wait(5)  # 智能等待 一般引入执行

id='treeZhiBiao_12_a'
driver.execute_script("$('#treeZhiBiao_2_a').click()") # $()获取执行的是

调试,发现ajax请求的发起和返回如下:
一次Ajax请求的发起
分析js,理解请求的发生,尽量转换为request请求
在这里插入图片描述

$.ajax({
				contentType: setting.async.contentType,
				type: setting.async.type,
				url: tools.apply(setting.async.url, [setting.treeId, node], setting.async.url),
				data: tmpParam,
				dataType: setting.async.dataType,
				success: function(msg) {
					if (_tmpV != data.getRoot(setting)._ver) {
						return;
					}
					var newNodes = [];
					try {
						if (!msg || msg.length == 0) {
							newNodes = [];
						} else if (typeof msg == "string") {
							newNodes = eval("(" + msg + ")");
						} else {
							newNodes = msg;
						}
					} catch(err) {
						newNodes = msg;
					}

					if (node) {
						node.isAjaxing = null;
						node.zAsync = true;
					}
					view.setNodeLineIcos(setting, node);
					if (newNodes && newNodes !== "") {
						newNodes = tools.apply(setting.async.dataFilter, [setting.treeId, node, newNodes], newNodes);
						view.addNodes(setting, node, !!newNodes ? tools.clone(newNodes) : [], !!isSilent);
					} else {
						view.addNodes(setting, node, [], !!isSilent);
					}
					setting.treeObj.trigger(consts.event.ASYNC_SUCCESS, [setting.treeId, node, msg]);
					tools.apply(callback);
				},
				error: function(XMLHttpRequest, textStatus, errorThrown) {
					if (_tmpV != data.getRoot(setting)._ver) {
						return;
					}
					if (node) node.isAjaxing = null;
					view.setNodeLineIcos(setting, node);
					setting.treeObj.trigger(consts.event.ASYNC_ERROR, [setting.treeId, node, XMLHttpRequest, textStatus, errorThrown]);
				}
			});
//https://data.stats.gov.cn/easyquery.htm?id=zb&dbcode=hgyd&wdcode=zb&m=getTree
var data = [{"dbcode":"hgyd","id":"A01","isParent":true,"name":"价格指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A02","isParent":true,"name":"工业","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A03","isParent":true,"name":"能源","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A04","isParent":true,"name":"固定资产投资(不含农户)","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A05","isParent":true,"name":"服务业生产指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0E","isParent":true,"name":"城镇调查失业率","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A06","isParent":true,"name":"房地产","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A07","isParent":true,"name":"国内贸易","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A08","isParent":true,"name":"对外经济","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A09","isParent":true,"name":"交通运输","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0A","isParent":true,"name":"邮电通信","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0B","isParent":true,"name":"采购经理指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0C","isParent":true,"name":"财政","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0D","isParent":true,"name":"金融","pid":"","wdcode":"zb"}]
//id=A01&dbcode=hgyd&wdcode=zb&m=getTree  获取细项
//id=A0106&dbcode=hgyd&wdcode=zb&m=getTree
/*  url编码后的数据:
m=QueryData&dbcode=hgyd&rowcode=zb&colcode=sj&wds=%5B%5D&dfwds=%5B%7B%22wdcode%22%3A%22zb%22%2C%22valuecode%22%3A%22A010701%22%7D%5D&k1=1629453895767&h=1

m: QueryData
dbcode: hgyd
rowcode: zb
colcode: sj
wds: [] ==%5B%5D
dfwds: [{"wdcode":"zb","valuecode":"A010701"}] == %5B%7B%22wdcode%22%3A%22zb%22%2C%22valuecode%22%3A%22A010701%22%7D%5D
k1: 1629453895767
h: 1
*/
var data =
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值