[爬虫]爬取5173网站上QQ幻想世界的帐号销售情况(excel)
要求
爬取的数据写入excel表格
代码
#单 位:常州旺龙
#作 者:OLDNI
#开发日期:2023/11/13
'''
爬取5173网站QQ幻想世界的销售情况
数据写入到了excel表格
'''
import os
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from selenium import webdriver
from selenium.webdriver.common.by import By
#创建浏览器对象
options=webdriver.ChromeOptions()
#解决闪退的问题
options.add_experimental_option('detach',True)
options.add_argument('--disable-blink-features=AutomationControlled')
# 隐藏"正在受到自动软件的控制"这几个字
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser=webdriver.Chrome(options=options)
browser.get('http://www.5173.com/')
browser.implicitly_wait(3)
#创建excel表格
wb=Workbook()
sheet=wb.active
header=['服务器','标题','价格','已售时间','属性','详情页']
sheet.append(header)
#打开url爬取数据
urls_d={
'双5--状元':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-pydicn-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--天地':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-fxviao-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--铁血':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-rveqyy-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'双5--海天':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-xu0npx-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--广寒':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-kxfc5x-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--千重':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-frqszf-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'唯吾--凤凰':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-zipc44-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'牛气--蒸蒸':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-jsxddp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'牛气--红红':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-3whel5-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'虎年--如虎':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-5l33as-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'虎年--虎虎':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-fcf1jp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'召唤--神秘':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-mbryti-dpc4jg-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'玉兔--玉兔':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-j3uc4g-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
'玉兔--嫦娥':
'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-wfodye-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
}
for name,url in urls_d.items():
new_tab=f'window.open("{url}")'
browser.execute_script(new_tab)
browser.switch_to.window(browser.window_handles[-1])
div_list=browser.find_elements(By.CSS_SELECTOR,'.sin_pdlbox')
for div in div_list:
title=div.find_element(By.CSS_SELECTOR,'h2').text
url_account=div.find_element(By.CSS_SELECTOR,'h2>a').get_attribute('href')
atribute=div.find_element(By.CSS_SELECTOR,'li:nth-child(4)').text.split(':')[-1]
price=div.find_element(By.CSS_SELECTOR,'ul.pdlist_price').text
#获取的价格类型是带有小数的数字字符串,需要转float,再车int
price=int(float(price))
sale_time=div.find_element(By.CSS_SELECTOR,'ul.pdlist_num').text
#标头排序: '服务器','标题','价格','已售时间','属性','详情页'
data=[name,title,price,sale_time,atribute,url_account]
sheet.append(data)
browser.close()
browser.switch_to.window(browser.window_handles[0])
browser.quit()
#设置表格样式
#设置列宽
column_number_and_dimensions={'A':12,'B':69,'C':12,'D':12,'E':60,'F':40,}
for n,d in column_number_and_dimensions.items():
sheet.column_dimensions[n].width = d
#冻结窗格
sheet.freeze_panes='B2'
#设置表头样式
cells=sheet[1]
font=Font(bold=True)
patternfill=PatternFill(fill_type='solid',fgColor='FFFF00')
alignment=Alignment(horizontal='center')
for cell in cells:
cell.font=font
cell.fill=patternfill
cell.alignment=alignment
wb.save('5173-QQ幻想世界帐号销售情况.xlsx')
os.startfile('5173-QQ幻想世界帐号销售情况.xlsx')