import pandas as pd import numpy as np import os import matplotlib.pyplot as plt from tqdm import tqdm from concurrent.futures import ProcessPoolExecutor # ========== 配置参数 ========== D_TYPE = {0: np.float32, 1: np.float32} CHUNK_SIZE = 10000 # ========== 数据处理函数 ========== def process_sheet(df: pd.DataFrame, target: float) -> float: """处理单个工作表的核心逻辑""" valid_mask = df.iloc[:, [0,1]].notna().all(axis=1) a_col = df.iloc[valid_mask, 0].astype(np.float32) b_col = df.iloc[valid_mask, 1].astype(np.float32) mask = a_col.between( target - 0.003, target + 0.003, inclusive='both' ) return b_col[mask].sum() def process_excel(file_path: str, target: float) -> float: """处理单个Excel文件的并行单元""" try: all_sheets = pd.read_excel( file_path, sheet_name=None, skiprows=5, usecols="A:B", header=None, dtype=D_TYPE, engine='openpyxl' ) total = sum( process_sheet(df, target) for df in all_sheets.values() ) return total / len(all_sheets) except Exception as e: print(f"处理文件 {os.path.basename(file_path)} 时出错: {str(e)}") return 0.0 # ========== 主处理流程 ========== if __name__ == "__main__": # 初始化中文字体配置（核心修复） plt.rcParams['font.sans-serif'] = ['Microsoft YaHei', '黑体', '楷体'] # 多字体备选 plt.rcParams['axes.unicode_minus'] = False # 用户输入 target_value = float(input("请输入要筛选的A列目标值：")) input_dir = input("请输入Excel文件所在的文件夹路径：").strip() output_dir = input("请输入结果图表输出路径：").strip() os.makedirs(output_dir, exist_ok=True) # 获取文件列表 files = [ os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".xlsx") ] # 并行处理 with ProcessPoolExecutor() as executor: futures = { executor.submit(process_excel, file, target_value): os.path.basename(file) for file in files } results = [] with tqdm(total=len(files), desc="处理进度") as pbar: for future in futures: filename = futures[future] try: avg_sum = future.result() results.append((filename, avg_sum)) except Exception as e: print(f"文件 {filename} 处理异常: {str(e)}") finally: pbar.update(1) # ========== 可视化优化 ========== plt.style.use('seaborn-v0_8-darkgrid') plt.figure(figsize=(12, 6), dpi=100) plt.barh( [x[0] for x in results], [x[1] for x in results], color='#4C72B0' ) plt.xlabel("B列总和平均值", fontsize=12, fontweight='bold') plt.ylabel("文件名", fontsize=12, fontweight='bold') plt.title(f"目标值 {target_value}±0.003 数据统计\n({len(files)}个文件)", fontsize=14, pad=20) plt.grid(axis='x', linestyle='--', alpha=0.7) # 调整布局和保存 plt.tight_layout() output_path = os.path.join(output_dir, "optimized_result.png") plt.savefig(output_path, dpi=300, bbox_inches='tight') print(f"\n处理完成！优化结果图表已保存至：{output_path}") 这段代码有这个问题：Warning (from warnings module): File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\tkinter\__init__.py", line 804 func(*args) UserWarning: Glyph 21644 (\N{CJK UNIFIED IDEOGRAPH-548C}) missing from current font. Warning (from warnings module): File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\tkinter\__init__.py", line 804 func(*args) UserWarning: Glyph 24179 (\N{CJK UNIFIED IDEOGRAPH-5E73}) missing from current font. Warning (from warnings module): File "C:\Users\asus\AppData\Local\Programs\Python\Python38\lib\tkinter\__init__.py", line 804 func(*args) UserWarning: Glyph 22343 (\N{CJK UNIFIED IDEOGRAPH-5747}) missing from current font.如何解决给我完整代码

python数据分析与可视化 import pandas as pd import numpy as np import m

import matplotlib.pyplot as plt # 创建示例数据 data = pd.DataFrame({ 'A': np.random.randn(1000), 'B': np.random.randn(1000), 'C': np.random.randn(1000), }) # 计算每列的平均值 data_mean = data....

import pandas as pd.docx

import pandas as pd import numpy as np - **Pandas**: 一个强大的数据分析与处理库。 - **NumPy**: 用于进行数值计算的 Python 库。 #### 2. 读取 CSV 文件到 DataFrame python wine_data = pd.read_csv('...

import pandas as pd import numpy as np import os import matplotlib.pyplot as plt from tqdm import tqdm from concurrent.futures import ProcessPoolExecutor # ========== 配置参数 ========== D_TYPE = {0: np.float32, 1: np.float32} CHUNK_SIZE = 10000 # ========== 数据处理函数 ========== def process_sheet(df: pd.DataFrame, target: float) -> float: """处理单个工作表的核心逻辑""" valid_mask = df.iloc[:, [0,1]].notna().all(axis=1) a_col = df.iloc[valid_mask, 0].astype(np.float32) b_col = df.iloc[valid_mask, 1].astype(np.float32) mask = a_col.between( target - 0.003, target + 0.003, inclusive='both' ) return b_col[mask].sum() def process_excel(file_path: str, target: float) -> float: """处理单个Excel文件的并行单元""" try: all_sheets = pd.read_excel( file_path, sheet_name=None, skiprows=5, usecols="A:B", header=None, dtype=D_TYPE, engine='openpyxl' ) total = sum( process_sheet(df, target) for df in all_sheets.values() ) return total / len(all_sheets) except Exception as e: print(f"处理文件 {os.path.basename(file_path)} 时出错: {str(e)}") return 0.0 # ========== 主处理流程 ========== if name == "main": # 初始化中文字体配置（核心修复） plt.rcParams['font.sans-serif'] = ['Microsoft YaHei', 'SimHei', 'KaiTi'] # 多字体备选 plt.rcParams['axes.unicode_minus'] = False # 用户输入 target_value = float(input("请输入要筛选的A列目标值：")) input_dir = input("请输入Excel文件所在的文件夹路径：").strip() output_dir = input("请输入结果图表输出路径：").strip() os.makedirs(output_dir, exist_ok=True) # 获取文件列表 files = [ os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".xlsx") ] # 并行处理 with ProcessPoolExecutor() as executor: futures = { executor.submit(process_excel, file, target_value): os.path.basename(file) for file in files } results = [] with tqdm(total=len(files), desc="处理进度") as pbar: for future in futures: filename = futures[future] try: avg_sum = future.result() results.append((filename, avg_sum)) except Exception as e: print(f"文件 {filename} 处理异常: {str(e)}") finally: pbar.update(1) # ========== 可视化优化 ========== plt.style.use('seaborn-v0_8-darkgrid') plt.figure(figsize=(12, 6), dpi=100) plt.barh( [x[0] for x in results], [x[1] for x in results], color='#4C72B0' ) plt.xlabel("B列总和平均值", fontsize=12, fontweight='bold') plt.ylabel("文件名", fontsize=12, fontweight='bold') plt.title(f"目标值 {target_value}±0.003 数据统计\n({len(files)}个文件)", fontsize=14, pad=20) plt.grid(axis='x', linestyle='--', alpha=0.7) # 调整布局和保存 plt.tight_layout() output_path = os.path.join(output_dir, "optimized_result.png") plt.savefig(output_path, dpi=300, bbox_inches='tight') print(f"\n处理完成！优化结果图表已保存至：{output_path}") 这段代码的文字路径在哪修改

from datetime import datetime date_str = datetime.now().strftime("%Y%m%d") output_dir = os.path.join(r"E:\reports", f"report_{date_str}") #### 方案3：配置文件读取（适合复杂项目） python # 创建...

import datetime import numpy as np import oracledb import pandas as pd import matplotlib.pyplot as plt from pptx import Presentation from pptx.util import Inches import os import matplotlib.colors from matplotlib.patches import Patch import time import logging import test import concurrent.futures # 获取当前时间 now = datetime.datetime.now() # 将时间转换为字符串，格式为YYYY-MM-DD_HH-MM-SS timestamp = now.strftime("%Y-%m-%d") logger = logging.getLogger() logger.setLevel(logging.INFO) format = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') logFile = f'{timestamp}-log.txt' fh = logging.FileHandler(logFile, mode='a', encoding='utf-8') fh.setLevel(logging.INFO) fh.setFormatter(format) logger.addHandler(fh) start_time1 = time.time() # 数据库连接信息 dsn = ( "(DESCRIPTION=" "(ADDRESS_LIST=(ADDRESS=(PROTOCOL=TCP)(HOST=10.10.164.60)(PORT=1521)))" "(CONNECT_DATA=(SERVICE_NAME=YMSDB01)))" ) username = 'acme' password = 'acme' oracledb.init_oracle_client(lib_dir=r"C:\Users\Q06412\Downloads\instantclient-basic-windows.x64-21.17.0.0.0dbru\instantclient_21_17") try: # 创建连接 connection = oracledb.connect(user=username, password=password, dsn=dsn) # 创建一个游标 cursor = connection.cursor() logging.info("SQL查询5个状态为0的lot任务---开始") # SQL 查询语句 sql_query = """ SELECT DISTINCT DRB_ID FROM ( SELECT DISTINCT DRB_ID,LOT_ID,ROW_NUMBER () OVER (ORDER BY CREATE_TIME ASC) AS RN FROM DRB_INFO_CONFIG dic WHERE STATUS=0 ) DRB_CNT WHERE RN <=5 """ # 执行查询 cursor.execute(sql_query) # 获取所有结果 lotresults = cursor.fetchall() logging.info("SQL查询5个状态为0的lot任务---结束") # print(len(results)) ==5 遍历每个drblot for i in range(0, len(lotresults)): # 5个lot查出来立马修改status=2，表示正在进程中 sql_query = """ update DRB_INFO_CONFIG set STATUS = :1 where DRB_ID = :2 """ cursor.execute(sql_query, (2, lotresults[i][0])) connection.commit() except Exception as e: logging.info(e) finally: # 关闭游标和连接 cursor.close() connection.close() def execute_query(sql_query): # 每个线程独立创建连接（关键！避免线程安全问题） # 创建连接 connection = oracledb.connect(user=username, password=password, dsn=dsn) cursor = connection.cursor() try: cursor.execute(sql_query) results = cursor.fetchall() return results # 返回结果供主线程收集 except Exception as ee: logging.info(ee) finally: connection.close() # 确保关闭连接 with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: # 控制并发数 # 提交所有查询任务 future_to_query = { executor.submit(execute_query, test.sql_query): test for test in lotresults } # 收集结果（按完成顺序） results = [] for future in concurrent.futures.as_completed(future_to_query): test = future_to_query[future] try: result = future.result() results.append(result) # 存储结果 except Exception as e: print(f"Query {test.sql_query} failed: {e}") end_time1 = time.time() run_time = end_time1 - start_time1 logging.info("代码运行时间为： %s 秒", run_time)

import pandas as pd # 数据处理 from concurrent.futures import ThreadPoolExecutor # 线程池 2. 初始化配置 python # 日志配置（采用标准logging模块） logger = logging.getLogger() logger.setLevel...

import os import glob import pandas as pd import numpy as np import torch import torch.nn as nn import torch.optim as optim import random import matplotlib.pyplot as plt # 設定 matplotlib 以支援中文（若系統有中文字型，請根據實際狀況修改） plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei'] plt.rcParams['axes.unicode_minus'] = False # ============================================================================= # (A) 從 station_data 檔案讀取資料 # ============================================================================= def load_station_data(folder_path): """ 從指定資料夾中讀取檔名符合 station_data_out_20250313_.txt 的檔案，並解析每行資料，整理成符合模型需求的 DataFrame。每行範例： ba:92:7a:b0:b7:50 -71 39.0 MBit/s 24.0 MBit/s 10 0 256 17.211Mbps 145356 將轉換成以下欄位 (依模型需求)： - channel：預設填入 0（環境 reset 時會重新分配） - total_throughput：從 throughput 欄位取得（去掉 "Mbps"） - pdr：這裡暫用 parts[6]（例如 10） - snr：這裡暫用接收速率 parts[4]（例如 24.0） - rssi：直接取 signal 值 (parts[1]) - noise：取 parts[7]（例如 0） """ pattern = os.path.join(folder_path, "station_data_out_20250313_.txt") files = glob.glob(pattern) data = [] for file in files: with open(file, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue parts = line.split() if len(parts) != 11: # 欄位數不正確時略過此行 continue try: mac = parts[0] signal = int(parts[1]) # throughput 從 parts[9] 去除 "Mbps" throughput = float(parts[9].rstrip("Mbps")) channel = 0 # 初始值，reset 時會重新分配 AP pdr = float(parts[6]) snr = float(parts[4]) rssi = signal noise = float(parts[7]) except Exception as e: print(f"解析錯誤: {line}\n{e}") contin

import pandas as pd import glob def load_station_data(file_pattern): """ 读取匹配指定模式的所有文本文件到DataFrame 参数： file_pattern: 文件路径模式，如"station_data_out_20250313_*.txt" 返回： ...

import pandas as pd import numpy as np import matplotlib.pyplot as plt from concurrent.futures import ThreadPoolExecutor import os def plot_histogram(group_data, group_name, plot_col, output_dir='histograms', bins=30): """ 单个分组的直方图绘制函数 """ try: # 计算分位数边界 lower = group_data[plot_col].quantile(0.05) upper = group_data[plot_col].quantile(0.95) # 过滤数据 filtered_data = group_data[(group_data[plot_col] >= lower) & (group_data[plot_col] <= upper)] if filtered_data.empty: print(f"分组 {group_name} 无有效数据") return # 创建画布 fig, ax = plt.subplots(figsize=(10, 6)) # 绘制直方图 ax.hist(filtered_data[plot_col], bins=bins, edgecolor='black', alpha=0.7) # 计算统计指标 mean = filtered_data[plot_col].mean() median = filtered_data[plot_col].median() std = filtered_data[plot_col].std() # 添加标注 textstr = '\n'.join(( f'均值: {mean:.2f}', f'中位数: {median:.2f}', f'标准差: {std:.2f}')) ax.text(0.75, 0.95, textstr, transform=ax.transAxes, verticalalignment='top', bbox=dict(boxstyle='round', alpha=0.5)) # 设置标题 ax.set_title(f'{plot_col} 分布 - 分组: {group_name}\n(显示5%-95%分位数数据)') ax.set_xlabel(plot_col) ax.set_ylabel('频数') # 保存图片 os.makedirs(output_dir, exist_ok=True) safe_name = str(group_name).replace("/", "_").replace("\\", "_") # 处理特殊字符 plt.savefig(f'{output_dir}/{plot_col}_{safe_name}.png') plt.close() except Exception as e: print(f"分组 {group_name} 绘图失败: {str(e)}") def batch_histogram(df, group_col, plot_col, max_workers=4): """ 批量直方图生成主函数 """ groups = df[group_col].unique() with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [] for group in groups: group_data = df[df[group_col] == group] futures.append( executor.submit( plot_histogram, group_data=group_data, group_name=group, plot_col=plot_col ) ) # 等待所有任务完成 for future in futures: future.result() # 使用示例 if name == "main": # 生成测试数据 data = { 'group': np.random.choice(['A', 'B', 'C'], 1000), 'value': np.concatenate([ np.random.normal(50, 10, 700), np.random.normal(150, 50, 300) ]) } df = pd.DataFrame(data) # 执行批处理 batch_histogram(df=df, group_col='group', plot_col='value', max_workers=16) 该画图脚本存在什么问题？

这个脚本的主要功能是使用pandas、numpy、matplotlib和concurrent.futures来批量生成不同分组的直方图。主函数batch_histogram利用线程池来并行处理各个分组的数据，每个线程调用plot_histogram函数来绘制直方图，并...

2022年学校网络安全保障工作总结报告.docx

2022Java程序员面试题集.docx

2022代理软件销售协议书.docx

相关推荐

python数据分析与可视化 import pandas as pd import numpy as np import m

import pandas as pd.docx

数据科学与tqdm：掌握与Pandas和NumPy的高效协作

【Python数据分析三剑客】：Pandas、NumPy与Anaconda的完美协同

【Python科学计算宝典】：NumPy与Pandas的高效应用

Numpy.Testing测试报告：生成和解读测试结果（详细解析）

【NumPy进阶应用】：NumPy高级特性和技巧的深入探索

高级应用：在Matplotlib中实现自定义图表动画

Matplotlib性能优化圣手：大数据集处理技巧大公开

提升matplotlib绘图效率：性能优化的6个秘诀

【Python进度条魔法：tqdm终极指南】：提升工作效率的7个技巧

实时数据可视化新境界：Matplotlib与网络数据源无缝集成教程

【Python科学计算诀窍】：如何利用NumPy和SciPy加速计算

2022年学校网络安全保障工作总结报告.docx

2022Java程序员面试题集.docx

2022代理软件销售协议书.docx

大家在看

最新VISIO各种图形图标大集合.

TreeComboBox控件

matlab的欧拉方法代码-BEM_flow_simulation:计算流体力学：使用边界元方法模拟障碍物周围/附近的流动

SDCC簡明手冊

01.WS 445-2014 电子病历基本数据集.rar

最新推荐

2022年学校网络安全保障工作总结报告.docx

2022Java程序员面试题集.docx

2022代理软件销售协议书.docx

2022光环项目管理流程.docx

2022常用网络爱情语句.docx

ChmDecompiler 3.60：批量恢复CHM电子书源文件工具

【数据融合技术】：甘肃土壤类型空间分析中的专业性应用

redistemplate.opsForValue()返回值

ktorrent 2.2.4版本Linux客户端发布

【空间分布规律】：甘肃土壤类型与农业生产的关联性研究