一、使用selenium去使用phantomjs,原因是因为selenium封装了phantomjs一部分功能,selenium又提供了python的接口模块,在python语言中可以很好地去使用selenium,间接地就可以使用phantomjs。
例子如:
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
SERVICE_ARGS = ['--load-images=false', '--disk-cache=true']
driver = webdriver.PhantomJS(executable_path=r"E:\phantomjs-2.1.1-windows\bin\phantomjs.exe",service_args=SERVICE_ARGS)
def login_newrank(url):
try:
driver.get(url)
login = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@class="login-normal-tap"]')))
login.click()
print('登陆界面')
user = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="account_input"]')))
user.send_keys('13500000000')
pwd = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="password_input"]')))
pwd.send_keys('abc123')
confirm = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@id="pwd_confirm"]')))
confirm.click()
print('登陆')
except Exception as e:
print(e)
return login_newrank(url)
time.sleep(3)
print(driver.page_source)
if __name__ == '__main__':
url = 'http://www.newrank.cn/public/login/login.html?back=http%3A//www.newrank.cn/'
login_newrank(url)
二、使用phantomjs Webservice作为一种web服务的形式(api),将其与其他语言分离开来(比如python)。如:
servcie.js
var system=require('system');
var args=system.args;
if (args.length ===2){
var port=Number(args[1]);
}
else{
var port=8080;
}
var webserver = require('webserver');
var server = webserver.create()
var service = server.listen(port, function(request, response) {
try{
var postRaw=request.postRaw;
var aaa=new Array();
aaa=postRaw.split("=");
var url=aaa[0];
var md5_url=aaa[1];
url=decodeURIComponent(url);
var webPage = require('webpage');
var page = webPage.create();
page.settings.userAgent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36';
page.settings.resourceTimeout = 20000;//timeout is 20s
page.onError = function(msg, trace) {
console.log("[Warning]This is page.onError");
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
};
phantom.onError = function(msg, trace) {
console.log("[Warning]This is phantom.onError");
var msgStack = ['PHANTOM ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
console.log(msgStack.join('\n'));
phantom.exit(1);
};
page.open(url, function (status) {
console.log('Target_url is ' + url);
});
page.onLoadFinished = function(status) {
console.log('Status: ' + status);
if(status=='success'){
var current_url = page.url;
var body= page.content;
response.status=200;
response.write(body);
page.close();
response.close();
}
else
{
var body="1";
var current_url="";
response.status=200;
response.write(body);
page.close();
response.close();
}
};
}
catch(e)
{
console.log('[Error]'+e.message+'happen'+e.lineNumber+'line');
}
});
requests_test.py
import requests
import hashlib
import base64,re
from multiprocessing.dummy import Pool
class http_request:
def __init__(self,port="8080"):
self.url="http://localhost:"+port
def getwebbody(self,domain):
'''
获取网页源代码
'''
base_domain=base64.b64encode(domain.encode('utf8'))
md5_domain=hashlib.md5(base_domain).hexdigest()
payload={domain:md5_domain}
try:
response=requests.post(self.url,data=payload,timeout=30).content
return response
except requests.exceptions.ConnectionError:
print ("requests connection error")
except Exception as e:
print (e)
return
if __name__=="__main__":
port="8080"
cur=http_request(port)
# domain="http://app.cntv.cn/special/cportal/newlive/index.html?id=LiveRZy6XP4F1Z2DERFogaLe170917&fromapp=cctvnews&from=singlemessage&isappinstalled=1&btime=1505612965&bauth=4ef2309698028ea2f53824d6bc707cae"
# domain="http://izhibo.ifeng.com/live.html?liveid=110695&c_from_app=ifengnews&aman=06o208R4ecqe0b8f56fe88Ve54Ub9d1ec9x515aa08"
# domain="http://wap-live.myzaker.com/?wap_open_type=wap&live_id=21840"
# domain="https://c.m.163.com/news/l/154715.html?spss=newsapp&spsw=1&from=singlemessage&isappinstalled=1"
domain="http://www.newscctv.net/219h5/#/article?videoId=ACBEF932-87FB-A8FB-179D-9BE3CCCEF9DA"
ctn = cur.getwebbody(domain)
print(ctn)
m3u8 = re.findall("<video.*?src=\"(http.*?m3u8)",ctn.decode('utf8'))[0]
print(m3u8)
但这个例子里其中有些获取不到m3u8,具体原因暂时未知。
参考:
https://thief.one/2017/03/31/Phantomjs%E6%AD%A3%E7%A1%AE%E6%89%93%E5%BC%80%E6%96%B9%E5%BC%8F/
https://thief.one/2017/03/01/Phantomjs%E7%88%AC%E8%BF%87%E7%9A%84%E9%82%A3%E4%BA%9B%E5%9D%91/