[爬虫]根据excel表格里的数据爬取后台数据并写入excel表格
1、要求:根据已经存在的IP表C列里的IP到鸟云平台上查询订单状态并写入的后面对应的D列
2、代码:
#单 位:常州旺龙
#作 者:OLDNI
#开发日期:2024/2/1
import os
import re
from datetime import datetime
from bs4 import BeautifulSoup
from openpyxl import load_workbook
import requests
from openpyxl.styles import PatternFill
url_ori='http://w.yhyun.com/index.php/meihao2021/baremetal/index.html?highSearch=&buyTime1=&buyTime2=&endTime1=&endTime2=&lockStatus=&_field=&_order=&page=1&pagesize=10&keywordType=ip&keyword=61.160.212.130&lineid=&runStatus=&password=&buyTime1=&buyTime2=&endTime1=&endTime2=&lockStatus='
lst= re.split('keyword=|&lineid', url_ori)
headers={
#这里的cookie是已经登录帐户的
'Cookie':'idcStockTask=off; PHPSESSID=e53pqbra071u9di5gbj1usk3s3; 116yunzkeys_language=zh-CN; 116yun__referer__=L2luZGV4LnBocC9tZWloYW8yMDIxL0JhcmVtZXRhbFN0b2NrL2luZGV4Lmh0bWw%2Fa2V5d29yZFR5cGU9aXBtaWlwJmtleXdvcmQ9JnBhZ2U9MSZwYWdlc2l6ZT0xMCZsaW5lSWQ9MTc1MTM0NzQzMzI5ODY3OCZjYWJpbmV0SWQ9JnN0YXR1cz0wJmNoZWNrX3N0YXR1cz0mZW5hYmxlPQ%3D%3D',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
}
wb=load_workbook('./check_ip_state.xlsx') #把要查询的机柜或整个IP表的数据粘贴到check_ip_state.xlsx表里
sheet=wb.active
cells=sheet['C'] #获取C列的所有数据
for cell in cells:
#获取单元格的值
ip=cell.value
#获取单元格的坐标
coordinate=cell.coordinate
# 获取当前ip单元格的前面一个单元格的坐标(比如C1单元格的前面一个单元格是B1)
row_index = cell.row
before_coordinate = 'B' + str(row_index)
#获取机柜号
cabinet=sheet[before_coordinate].value
#获取当前ip单元格的后面一个单元格的坐标(比如C1单元格的后面一个单元格是D1)
row_index=cell.row
after_coordinate='D'+str(row_index)
#如果单元格为空就跳过本次循环
if not ip:
continue
url=lst[0]+'keyword='+ip+'&lineid'+lst[2]
resp = requests.get(url, headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')
try:
tbody = soup.select('#server_list_table > tbody > tr')[0]
line = tbody.select('td')[1].text
model = tbody.select('td')[2].text
ip = tbody.select('td')[3].text.split()[0]
ipmi_ip = tbody.select('td')[4].text.strip()
id = tbody.select('td')[7].text
start_time = tbody.select('td')[8].text
stop_time = tbody.select('td')[9].text
# print(line, model, ip, ipmi_ip, id, start_time, stop_time)
except:
msg = '闲置'
print(cabinet,ip, coordinate,after_coordinate,msg)
sheet[after_coordinate]=msg
#单元格无值的话再跳过本次循环,不在执行下面的命令
continue
print(cabinet,ip,coordinate,after_coordinate, stop_time)
sheet[after_coordinate] = stop_time
#服务器的状态为闲置里填充绿色(0000FF00)
patternfill=PatternFill(patternType='solid',fgColor='0000FF00')
cells=sheet['D']
for cell in cells:
if cell.value == '闲置':
cell.fill=patternfill
now_time=datetime.now().strftime('%Y-%m-%d')
file_name='./check_ip_state' + now_time + '.xlsx'
wb.save(file_name)
#因为os.startfile只能打开绝对路径的文件,先通过os.path.abspath获取绝对路径
file_path=os.path.abspath(file_name)
os.startfile(file_path)
3、效果