[爬虫]爬取5173网站上QQ幻想世界的帐号销售情况(MySQL)
要求
爬取的数据写入MySQL数据库
代码
#单 位:常州旺龙
#作 者:OLDNI
#开发日期:2023/11/14
'''
爬取5173网站QQ幻想世界的销售情况
数据写入到了办公室虚拟机192.168.1.124数据库
'''
import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By
#创建浏览器对象
options=webdriver.ChromeOptions()
#解决闪退的问题
options.add_experimental_option('detach',True)
options.add_argument('--disable-blink-features=AutomationControlled')
# 隐藏"正在受到自动软件的控制"这几个字
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser=webdriver.Chrome(options=options)
browser.get('http://www.5173.com/')
browser.implicitly_wait(3)
#连接数据库(事先创建好数据库、表、字段)
db= pymysql.connect(host='192.168.1.124',port=3306,user='oldboy',
password='www.123.nyc',db='hxsj',charset='utf8')
cur=db.cursor()
#打开url爬取数据
urls_d={
'双5--状元':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-pydicn-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--天地':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-fxviao-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--铁血':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-rveqyy-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--海天':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-xu0npx-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--广寒':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-kxfc5x-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--千重':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-frqszf-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--凤凰':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-zipc44-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'牛气--蒸蒸':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-jsxddp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'牛气--红红':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-3whel5-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'虎年--如虎':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-5l33as-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'虎年--虎虎':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-fcf1jp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'召唤--神秘':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-mbryti-dpc4jg-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'玉兔--玉兔':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-j3uc4g-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'玉兔--嫦娥':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-wfodye-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
}
for name,url in urls_d.items():
new_tab=f'window.open("{url}")'
browser.execute_script(new_tab)
browser.switch_to.window(browser.window_handles[-1])
div_list=browser.find_elements(By.CSS_SELECTOR,'.sin_pdlbox')
for div in div_list:
title=div.find_element(By.CSS_SELECTOR,'h2').text
url_account=div.find_element(By.CSS_SELECTOR,'h2>a').get_attribute('href')
atribute=div.find_element(By.CSS_SELECTOR,'li:nth-child(4)').text.split(':')[-1]
price=div.find_element(By.CSS_SELECTOR,'ul.pdlist_price').text
#获取的价格类型是带有小数的数字字符串,需要转float,再车int
price=int(float(price))
sale_time=div.find_element(By.CSS_SELECTOR,'ul.pdlist_num').text
#把数据写入数据库,values里的数据是字符串的必须要加引号
sql = f"insert into a1(server,title,price,saletime,atribute,detail) values('{name}','{title}',{price},'{sale_time}','{atribute}','{url_account}')"
try:
cur.execute(sql)
except Exception as e:
print(f'报错了:{e}')
browser.close()
browser.switch_to.window(browser.window_handles[0])
#数据库提交数据
db.commit()
browser.quit()
db.close()