最近在看selenium相关,练练手
#!/usr/bin/python
#coding=utf-8
'''
selenium spider
https://movie.douban.com/top250
'''
from selenium import webdriver
import time
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
url = 'https://movie.douban.com/top250'
f = open('D:\douban_movie.txt','w')
#打开豆瓣电影
driver = webdriver.Firefox()
driver.get(url)
driver.implicitly_wait(3)
driver.maximize_window()
#获取所有包含电影简介的元素
for i in range(10):
content = driver.find_elements_by_class_name('info')
# 遍历每一页元素内容,并提取电影名
for i in content:
movie = i.text
print movie.split('\n')[0]
f.write(movie.split('\n')[0] + '\n')
#跳转页面到下一页,重新遍历
driver.find_element_by_class_name('next').click()
driver.implicitly_wait(5)
f.close()
time.sleep(3)
#关闭浏览器
driver.close()