写了一个小爬虫,同时把数据存到MySQL库里,源码如下:
import requests
from bs4 import BeautifulSoup
import pymysql
headers = {"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}
url ='http://ring.itools.cn/'
html = requests.get(url)
soup = BeautifulSoup(html.text,'lxml')
#print(soup)
orderNumbers = soup.select('.sound b')
songNames = soup.select('.sound h2')
playAddrs = soup.select('.sound_play')
conn = pymysql.connect(
host='localhost',
port=3306,
user='scuser',
password='scrapy',
db='csdndb',
charset='utf8'
)
for orderNumber,songName,playAddr in zip(orderNumbers,songNames,playAddrs):
data = {
'orderNumber':orderNumber.string,
'musicName':songName.text,
'musicAddr':playAddr.get('lurl')
}
#print(data['orderNumber'],data['orderNumber'],data['orderNumber'])
coursor = conn.cursor()
table = 'music_name'
values = ','.join(['%s']*len(data))
sql = 'insert into {table}(orderNumber,musicName,musicAddr) values({values})'.\
format(table=table,values=values)
try:
if coursor.execute(sql,tuple(data.values())):
print("插入成功")
conn.commit()
except:
print("插入失败")
conn.rollback()
conn.close()
运行报错:
Traceback (most recent call last):
File "H:/Python_workspace/Study_work/Music_2019.03.19.py", line 42, in <module>
if coursor.execute(sql,tuple(data.values())):
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\cursors.py", line 170, in execute
result = self._query(query)
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\cursors.py", line 328, in _query
conn.query(q)
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\connections.py", line 515, in query
self._execute_command(COMMAND.COM_QUERY, sql)
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\connections.py", line 745, in _execute_command
raise err.InterfaceError("(0, '')")
pymysql.err.InterfaceError: (0, '')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "H:/Python_workspace/Study_work/Music_2019.03.19.py", line 47, in <module>
conn.rollback()
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\connections.py", line 431, in rollback
self._execute_command(COMMAND.COM_QUERY, "ROLLBACK")
File "C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python36_64\Lib\site-packages\pymysql\connections.py", line 745, in _execute_command
raise err.InterfaceError("(0, '')")
pymysql.err.InterfaceError: (0, '')
插入失败
注意最后: raise err.InterfaceError("(0, '')")
pymysql.err.InterfaceError: (0, '')
经过各种尝试发现,把MySQL的连接信息写到for循环里就可以正常运行了,至于原因,我也不知道,希望有大佬可以给出解释。
改正后的代码:
import requests
from bs4 import BeautifulSoup
import pymysql
headers = {"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}
url ='http://ring.itools.cn/'
html = requests.get(url)
soup = BeautifulSoup(html.text,'lxml')
#print(soup)
orderNumbers = soup.select('.sound b')
songNames = soup.select('.sound h2')
playAddrs = soup.select('.sound_play')
for orderNumber,songName,playAddr in zip(orderNumbers,songNames,playAddrs):
data = {
'orderNumber':orderNumber.string,
'musicName':songName.text,
'musicAddr':playAddr.get('lurl')
}
#print(data['orderNumber'],data['orderNumber'],data['orderNumber'])
conn = pymysql.connect(
host='localhost',
port=3306,
user='scuser',
password='scrapy',
db='csdndb',
charset='utf8'
)
coursor = conn.cursor()
table = 'music_name'
values = ','.join(['%s']*len(data))
sql = 'insert into {table}(orderNumber,musicName,musicAddr) values({values})'.\
format(table=table,values=values)
try:
if coursor.execute(sql,tuple(data.values())):
print("插入成功")
conn.commit()
except:
print("插入失败")
conn.rollback()
conn.close()