[爬虫]根据excel表格里的数据爬取后台数据并写入excel表格

作者: oldboy 分类: Python 发布时间: 2024-02-07 17:24

1、要求:根据已经存在的IP表C列里的IP到鸟云平台上查询订单状态并写入的后面对应的D列

2、代码:

#单   位:常州旺龙
#作   者:OLDNI
#开发日期:2024/2/1
import os
import re
from datetime import datetime

from bs4 import BeautifulSoup
from openpyxl import load_workbook
import requests
from openpyxl.styles import PatternFill

url_ori='http://w.yhyun.com/index.php/meihao2021/baremetal/index.html?highSearch=&buyTime1=&buyTime2=&endTime1=&endTime2=&lockStatus=&_field=&_order=&page=1&pagesize=10&keywordType=ip&keyword=61.160.212.130&lineid=&runStatus=&password=&buyTime1=&buyTime2=&endTime1=&endTime2=&lockStatus='
lst= re.split('keyword=|&lineid', url_ori)
headers={
    #这里的cookie是已经登录帐户的
    'Cookie':'idcStockTask=off; PHPSESSID=e53pqbra071u9di5gbj1usk3s3; 116yunzkeys_language=zh-CN; 116yun__referer__=L2luZGV4LnBocC9tZWloYW8yMDIxL0JhcmVtZXRhbFN0b2NrL2luZGV4Lmh0bWw%2Fa2V5d29yZFR5cGU9aXBtaWlwJmtleXdvcmQ9JnBhZ2U9MSZwYWdlc2l6ZT0xMCZsaW5lSWQ9MTc1MTM0NzQzMzI5ODY3OCZjYWJpbmV0SWQ9JnN0YXR1cz0wJmNoZWNrX3N0YXR1cz0mZW5hYmxlPQ%3D%3D',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
}

wb=load_workbook('./check_ip_state.xlsx') #把要查询的机柜或整个IP表的数据粘贴到check_ip_state.xlsx表里
sheet=wb.active
cells=sheet['C']  #获取C列的所有数据
for cell in cells:
    #获取单元格的值
    ip=cell.value
    #获取单元格的坐标
    coordinate=cell.coordinate
    # 获取当前ip单元格的前面一个单元格的坐标(比如C1单元格的前面一个单元格是B1)
    row_index = cell.row
    before_coordinate = 'B' + str(row_index)
    #获取机柜号
    cabinet=sheet[before_coordinate].value
    #获取当前ip单元格的后面一个单元格的坐标(比如C1单元格的后面一个单元格是D1)
    row_index=cell.row
    after_coordinate='D'+str(row_index)

    #如果单元格为空就跳过本次循环
    if not ip:
        continue
    url=lst[0]+'keyword='+ip+'&lineid'+lst[2]
    resp = requests.get(url, headers=headers)
    soup = BeautifulSoup(resp.text, 'html.parser')
    try:
        tbody = soup.select('#server_list_table > tbody > tr')[0]
        line = tbody.select('td')[1].text
        model = tbody.select('td')[2].text
        ip = tbody.select('td')[3].text.split()[0]
        ipmi_ip = tbody.select('td')[4].text.strip()
        id = tbody.select('td')[7].text
        start_time = tbody.select('td')[8].text
        stop_time = tbody.select('td')[9].text
        # print(line, model, ip, ipmi_ip, id, start_time, stop_time)
    except:
        msg = '闲置'
        print(cabinet,ip, coordinate,after_coordinate,msg)
        sheet[after_coordinate]=msg
        #单元格无值的话再跳过本次循环,不在执行下面的命令
        continue
    print(cabinet,ip,coordinate,after_coordinate, stop_time)
    sheet[after_coordinate] = stop_time
#服务器的状态为闲置里填充绿色(0000FF00)
patternfill=PatternFill(patternType='solid',fgColor='0000FF00')
cells=sheet['D']
for cell in cells:
    if cell.value == '闲置':
        cell.fill=patternfill
now_time=datetime.now().strftime('%Y-%m-%d')
file_name='./check_ip_state' + now_time + '.xlsx'
wb.save(file_name)
#因为os.startfile只能打开绝对路径的文件,先通过os.path.abspath获取绝对路径
file_path=os.path.abspath(file_name)
os.startfile(file_path)

3、效果

如果觉得我的文章对您有用,请随意打赏。您的支持将鼓励我继续创作!

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

标签云