有个客户的机器基数很大,但是故障率很高,客户要求进行筛查,把带外的风险项挑出来,并进行提前的风险件更换。公司工具可以批量收集服务器日志,但是信息显示还是不够直观和简洁,于是用python重新做一个脚本,选择了内存、pcie的ce和uce四种类型来进行筛选,并输出到csv文件里,python还是很好用的,解压压缩包包以及时间戳进行一定的筛选都可以轻松做到,代码如下
import os
import csv
import tarfile
import sys
from datetime import datetime
#import pythoncopy
#import codeimport
def get_serversn(file_path):
with open(file_path, encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
ServerSN = None
for line in lines:
if "Chassis Serial Number" in line:
ServerSN = line.strip().split("=")[1]
return ServerSN
def check_pcie_uce(txt_path,ssn,slot,ucn,uce_sensor_counts):
model = None
with open(txt_path, encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
SensorName = None
uce_WarnTime =None
WarnDesc = None
SeverityLevel = None
uce_sensor_counts = uce_sensor_counts
for line in lines:
if "Bus Uncorrectable Error" in line:
ucn += 1
# print(line)
SensorName = line.strip().split(',')[14].strip()
uce_WarnTime = line.strip().split(',')[0].strip()
WarnDesc = line.strip().split(',')[22].strip()
if SensorName in uce_sensor_counts:
uce_sensor_counts[SensorName] += 1
else:
uce_sensor_counts[SensorName] = 1
return ucn,uce_sensor_counts,uce_WarnTime
def check_pcie_ce(txt_path,ssn,slot,cn,ce_sensor_counts):
# 读取txt文件内容
model = None
# encoding = get_file_encoding(txt_path)
with open(txt_path, encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
SensorName = None
ce_WarnTime =None
WarnDesc = None
SeverityLevel = None
ce_sensor_counts = ce_sensor_counts
for line in lines:
if "Bus Correctable Error" in line:
# print(line)
# print("get memory",cn)
cn += 1
SensorName = line.strip().split(',')[14].strip()
ce_WarnTime = line.strip().split(',')[0].strip()
WarnDesc = line.strip().split(',')[22].strip()
SeverityLevel = line.strip().split(',')[1].strip()
if SensorName in ce_sensor_counts:
ce_sensor_counts[SensorName] += 1
else:
ce_sensor_counts[SensorName] = 1
return cn,ce_sensor_counts,ce_WarnTime
def check_mem_ce(txt_path,ssn,slot,cn,sensor_counts):
# 读取txt文件内容
model = None
# encoding = get_file_encoding(txt_path)
with open(txt_path, encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
global wd_num
global se_num
SensorName = None
WarnTime =None
WarnDesc = None
SeverityLevel = None
sensor_counts = sensor_counts
for line in lines:
if "Correctable ECC" in line:
# print(line)
# print("get memory",cn)
cn += 1
SensorName = line.strip().split(',')[14].strip()
WarnTime = line.strip().split(',')[0].strip()
WarnDesc = line.strip().split(',')[22].strip()
SeverityLevel = line.strip().split(',')[1].strip()
if SensorName in sensor_counts:
sensor_counts[SensorName] += 1
else:
sensor_counts[SensorName] = 1
return cn,sensor_counts,WarnTime
def main(folder_path):
# 遍历指定文件夹中的机器序列号文件夹,找到符合要求的txt文件
global Server_sn
global ce_num
global uce_num
global memce_num
global sensor_counts
global ce_sensor_counts
global uce_sensor_counts
global lastwarntime
global ce_lastwarntime
global uce_lastwarntime
global tnum
global ce_turelasttime
global uce_turelasttime
for machine_folder in os.listdir(folder_path):
machine_folder_path = os.path.join(folder_path, machine_folder)
if not os.path.isdir(machine_folder_path): # 跳过非文件夹
continue
sensor_counts= {}
ce_sensor_counts = {}
uce_sensor_counts = {}
tnum +=1
ce_num = 0
uce_num = 0
memce_num = 0
lastwarntime = ""
Server_sn = ""
uce_turelasttime = ""
ce_turelasttime = ""
tarfolder_path = os.path.join(machine_folder_path, "event")
tarfiles = [f for f in os.listdir(tarfolder_path) if f.endswith('.tar.gz')]
for tfile in tarfiles:
file_path = os.path.join(tarfolder_path, tfile)
tar = tarfile.open(file_path, "r:gz")
tar.extractall(tarfolder_path)
tar.close()
for smartdata_folder in os.listdir(os.path.join(machine_folder_path, "event")):
smartdata_folder_path = os.path.join(machine_folder_path, "event", smartdata_folder)
if "tar.gz" in smartdata_folder:
continue
folder_name = smartdata_folder
date_str = folder_name[-8:]
folder_date = datetime.strptime(date_str, '%Y%m%d')
input_date = datetime.strptime(inputdate, '%Y%m%d')
if folder_date < input_date:
continue
elif folder_date >= input_date:
print("after,begin check",folder_name)
fru_folder_path = os.path.join(machine_folder_path, "static", "FruInfo.ini")
Server_sn = get_serversn(fru_folder_path)
all_files = sorted(os.listdir(smartdata_folder_path))
for file in all_files:
if file.endswith('.csv') and "000" in file:
file_path = os.path.join(smartdata_folder_path, file)
ce_num,ce_sensor_counts,ce_lastwarntime = check_pcie_ce(file_path,Server_sn,smartdata_folder_path,ce_num,ce_sensor_counts)
uce_num,uce_sensor_counts,uce_lastwarntime = check_pcie_uce(file_path,Server_sn,smartdata_folder_path,uce_num,uce_sensor_counts)
memce_num,sensor_counts,lastwarntime = check_mem_ce(file_path,Server_sn,smartdata_folder_path,memce_num,sensor_counts)
if lastwarntime:
turelasttime = lastwarntime
if ce_lastwarntime:
ce_turelasttime = ce_lastwarntime
if uce_lastwarntime:
uce_turelasttime = uce_lastwarntime
if uce_num != 0 and datetime.strptime(uce_turelasttime.split(' ')[0], '%Y-%m-%d') > changewarndate :
pcie_uce_max = max(uce_sensor_counts.values)
with open("result.csv", "a",newline='') as f:
write=csv.writer(f)
write.writerow([Server_sn,'Pcie UCE',uce_sensor_counts,uce_turelasttime,pcie_uce_max])
else:
pass
if memce_num != 0 and datetime.strptime(turelasttime.split(' ')[0], '%Y-%m-%d') > changewarndate:
mem_ce_max = max(sensor_counts.values())
with open("result.csv", "a",newline='') as f:
write=csv.writer(f)
write.writerow([Server_sn,"Memory CE",sensor_counts,turelasttime,mem_ce_max])
else:
pass
if ce_num != 0 and datetime.strptime(ce_turelasttime.split(' ')[0], '%Y-%m-%d') > changewarndate:
pcie_ce_num = max(ce_sensor_counts.values())
with open("result.csv", "a",newline='') as f:
write=csv.writer(f)
write.writerow([Server_sn,'Pcie CE',ce_sensor_counts,ce_turelasttime,pcie_ce_num])
else:
pass
print("this is %s server, Sn: %s" % (tnum, Server_sn))
input('enter to exit')
if __name__ == "__main__":
folder_path = r"D:\"
inputdate = '20240101' #初步筛选目标日志时间提高筛选速度
warndate = '2024-02-01'
changewarndate = datetime.strptime(warndate, '%Y-%m-%d')
with open("result.csv", "w",newline='') as f: # 清空result.txt
write=csv.writer(f)
write.writerow((['Server SN','Type','Sensor and Num','Last Warn Time','Max num']))
tnum = 0
main(folder_path)