selenium
很多页面是加载完动态渲染的,而通过driver get Element 对象获得对象是不能直接执行的。
此时可以通过exexute_script(‘js_scrpt’,params)来完成
但这种执行方式不会返回页面的结果,可能是完成了一次ajax请求,动态改造了页面。
例如
from selenium import webdriver as wd
options = wd.ChromeOptions()
options.add_argument('lang=gbk')
options.add_argument('User-Agent=Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36')
#options.add_argument('--headless') # 浏览器不提供可视化页面
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
driver = wd.Chrome(chrome_options=options)
dataurl='https://data.stats.gov.cn/easyquery.htm'
driver.get(dataurl)
driver.implicitly_wait(5) # 智能等待 一般引入执行
id='treeZhiBiao_12_a'
driver.execute_script("$('#treeZhiBiao_2_a').click()") # $()获取执行的是
调试,发现ajax请求的发起和返回如下:
分析js,理解请求的发生,尽量转换为request请求
$.ajax({
contentType: setting.async.contentType,
type: setting.async.type,
url: tools.apply(setting.async.url, [setting.treeId, node], setting.async.url),
data: tmpParam,
dataType: setting.async.dataType,
success: function(msg) {
if (_tmpV != data.getRoot(setting)._ver) {
return;
}
var newNodes = [];
try {
if (!msg || msg.length == 0) {
newNodes = [];
} else if (typeof msg == "string") {
newNodes = eval("(" + msg + ")");
} else {
newNodes = msg;
}
} catch(err) {
newNodes = msg;
}
if (node) {
node.isAjaxing = null;
node.zAsync = true;
}
view.setNodeLineIcos(setting, node);
if (newNodes && newNodes !== "") {
newNodes = tools.apply(setting.async.dataFilter, [setting.treeId, node, newNodes], newNodes);
view.addNodes(setting, node, !!newNodes ? tools.clone(newNodes) : [], !!isSilent);
} else {
view.addNodes(setting, node, [], !!isSilent);
}
setting.treeObj.trigger(consts.event.ASYNC_SUCCESS, [setting.treeId, node, msg]);
tools.apply(callback);
},
error: function(XMLHttpRequest, textStatus, errorThrown) {
if (_tmpV != data.getRoot(setting)._ver) {
return;
}
if (node) node.isAjaxing = null;
view.setNodeLineIcos(setting, node);
setting.treeObj.trigger(consts.event.ASYNC_ERROR, [setting.treeId, node, XMLHttpRequest, textStatus, errorThrown]);
}
});
//https://data.stats.gov.cn/easyquery.htm?id=zb&dbcode=hgyd&wdcode=zb&m=getTree
var data = [{"dbcode":"hgyd","id":"A01","isParent":true,"name":"价格指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A02","isParent":true,"name":"工业","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A03","isParent":true,"name":"能源","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A04","isParent":true,"name":"固定资产投资(不含农户)","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A05","isParent":true,"name":"服务业生产指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0E","isParent":true,"name":"城镇调查失业率","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A06","isParent":true,"name":"房地产","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A07","isParent":true,"name":"国内贸易","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A08","isParent":true,"name":"对外经济","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A09","isParent":true,"name":"交通运输","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0A","isParent":true,"name":"邮电通信","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0B","isParent":true,"name":"采购经理指数","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0C","isParent":true,"name":"财政","pid":"","wdcode":"zb"},{"dbcode":"hgyd","id":"A0D","isParent":true,"name":"金融","pid":"","wdcode":"zb"}]
//id=A01&dbcode=hgyd&wdcode=zb&m=getTree 获取细项
//id=A0106&dbcode=hgyd&wdcode=zb&m=getTree
/* url编码后的数据:
m=QueryData&dbcode=hgyd&rowcode=zb&colcode=sj&wds=%5B%5D&dfwds=%5B%7B%22wdcode%22%3A%22zb%22%2C%22valuecode%22%3A%22A010701%22%7D%5D&k1=1629453895767&h=1
m: QueryData
dbcode: hgyd
rowcode: zb
colcode: sj
wds: [] ==%5B%5D
dfwds: [{"wdcode":"zb","valuecode":"A010701"}] == %5B%7B%22wdcode%22%3A%22zb%22%2C%22valuecode%22%3A%22A010701%22%7D%5D
k1: 1629453895767
h: 1
*/
var data =