[爬虫]爬取5173网站上QQ幻想世界的帐号销售情况(MySQL)

作者: oldboy 分类: Python 发布时间: 2023-11-14 21:42

要求

爬取的数据写入MySQL数据库

代码

#单   位:常州旺龙
#作   者:OLDNI
#开发日期:2023/11/14

'''
爬取5173网站QQ幻想世界的销售情况
数据写入到了办公室虚拟机192.168.1.124数据库
'''

import pymysql
from selenium import webdriver
from selenium.webdriver.common.by import By

#创建浏览器对象
options=webdriver.ChromeOptions()
#解决闪退的问题
options.add_experimental_option('detach',True)
options.add_argument('--disable-blink-features=AutomationControlled')
# 隐藏"正在受到自动软件的控制"这几个字
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
browser=webdriver.Chrome(options=options)
browser.get('http://www.5173.com/')
browser.implicitly_wait(3)

#连接数据库(事先创建好数据库、表、字段)
db= pymysql.connect(host='192.168.1.124',port=3306,user='oldboy',
                    password='www.123.nyc',db='hxsj',charset='utf8')
cur=db.cursor()

#打开url爬取数据
urls_d={
    '双5--状元':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-pydicn-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '双5--天地':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-fxviao-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '双5--铁血':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-rveqyy-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '双5--海天':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-vvprso-xu0npx-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '唯吾--广寒':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-kxfc5x-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '唯吾--千重':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-frqszf-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '唯吾--凤凰':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-n0afld-zipc44-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '牛气--蒸蒸':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-jsxddp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '牛气--红红':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-e421u5-3whel5-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '虎年--如虎':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-5l33as-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '虎年--虎虎':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-bqbiyo-fcf1jp-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '召唤--神秘':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-mbryti-dpc4jg-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '玉兔--玉兔':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-j3uc4g-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    '玉兔--嫦娥':
    'http://s.5173.com/search/97af17b6b3fb4f8c95dda37c2421c90b-xptjnl-gnhyta-wfodye-0-funuxn-0-0-0-a-a-a-a-a-0-0-0-0.shtml',
    }
for name,url in urls_d.items():
    new_tab=f'window.open("{url}")'
    browser.execute_script(new_tab)
    browser.switch_to.window(browser.window_handles[-1])
    div_list=browser.find_elements(By.CSS_SELECTOR,'.sin_pdlbox')
    for div in div_list:
        title=div.find_element(By.CSS_SELECTOR,'h2').text
        url_account=div.find_element(By.CSS_SELECTOR,'h2>a').get_attribute('href')
        atribute=div.find_element(By.CSS_SELECTOR,'li:nth-child(4)').text.split(':')[-1]
        price=div.find_element(By.CSS_SELECTOR,'ul.pdlist_price').text
        #获取的价格类型是带有小数的数字字符串,需要转float,再车int
        price=int(float(price))
        sale_time=div.find_element(By.CSS_SELECTOR,'ul.pdlist_num').text
        #把数据写入数据库,values里的数据是字符串的必须要加引号
        sql = f"insert into a1(server,title,price,saletime,atribute,detail) values('{name}','{title}',{price},'{sale_time}','{atribute}','{url_account}')"
        try:
            cur.execute(sql)
        except Exception as e:
            print(f'报错了:{e}')
    browser.close()
    browser.switch_to.window(browser.window_handles[0])
#数据库提交数据
db.commit()

browser.quit()
db.close()

效果

如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

标签云