当前位置: 首页 > news >正文

简单的智能数据分析程序

利用DS制作了一个简易的数据分析程序

支持加载excel或CSV文档的数据进行常见的数据分析及可视化展示

支持常见数据清洗:移除重复值、处理缺失值、类型转换、规整小数、修改列名等

支持常见的统计分析、查看基本的数据信息

支持自定义XY轴数据的图表展示,包括柱状图、折线图、直方图、饼图、散点图、箱线图、热力图、雷达图,并可以保存图表到指定位置

支持经过清洗后的数据保存到excel文件

# -*- coding: utf-8 -*-
"""
Created on 2025-11-7 14:30:28@author: oldhen
"""import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
import numpy as np
import requests
import threading
import time
from datetime import datetime
#import json
import matplotlib.font_manager as fm  # 添加字体管理器class DataAnalysisApp:# 设置图形样式sns.set_style("whitegrid")def __init__(self, root):self.root = rootself.root.title("智能数据分析助手")self.root.geometry("1200x800")# 初始化数据self.df = Noneself.original_df = Noneself.api_url = Noneself.api_auto_update = Falseself.api_update_interval = 60  # 默认60秒# 用于列排序的状态跟踪self.sort_states = {}  # 记录每列的排序状态# 设置中文字体 - 新增self.setup_chinese_font()# 设置样式self.setup_styles()# 创建界面self.create_widgets()def setup_chinese_font(self):"""设置中文字体,解决字体缺失问题"""try:# 方法1:尝试使用系统字体plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']plt.rcParams['axes.unicode_minus'] = False# 方法2:检查字体是否真正可用available_fonts = [f.name for f in fm.fontManager.ttflist]chinese_fonts = ['SimHei', 'Microsoft YaHei', 'KaiTi', 'SimSun']for font in chinese_fonts:if font in available_fonts:print(f"找到中文字体: {font}")plt.rcParams['font.sans-serif'] = [font, 'DejaVu Sans']breakelse:print("未找到中文字体,使用默认字体")# 方法3:强制重新加载字体缓存fm._rebuild()except Exception as e:print(f"字体设置失败: {e}")# 如果所有方法都失败,使用英文标签return Falsereturn Truedef setup_styles(self):# 设置主题颜色self.bg_color = "#f0f0f0"self.frame_bg = "#ffffff"self.accent_color = "#4a6fa5"self.highlight_color = "#6b8cbc"# 配置样式style = ttk.Style()style.configure("TFrame", background=self.bg_color)style.configure("TButton", background=self.accent_color, foreground="black", font=("Arial", 10))style.configure("TLabel", background=self.bg_color, font=("Arial", 10))style.configure("Title.TLabel", background=self.bg_color, font=("Arial", 12, "bold"))style.configure("TRadiobutton", background=self.bg_color, font=("Arial", 10))def center_window(self, window, parent=None):"""将窗口居中显示在父窗口中间"""window.update_idletasks()if parent is None:parent = self.root# 获取父窗口位置和尺寸parent_x = parent.winfo_x()parent_y = parent.winfo_y()parent_width = parent.winfo_width()parent_height = parent.winfo_height()# 获取子窗口尺寸width = window.winfo_width()height = window.winfo_height()# 计算居中位置x = parent_x + (parent_width - width) // 2y = parent_y + (parent_height - height) // 2# 设置窗口位置window.geometry(f"+{x}+{y}")def create_widgets(self):# 主框架main_frame = ttk.Frame(self.root, padding="10")main_frame.pack(fill=tk.BOTH, expand=True)# 区域1: 数据控制self.create_data_control_frame(main_frame)# 区域2: 数据清洗和简单分析 - 左右分布的两个区域analysis_cleaning_frame = ttk.Frame(main_frame)analysis_cleaning_frame.pack(fill=tk.X, pady=(0, 10))# 左侧: 数据清洗self.create_data_cleaning_frame(analysis_cleaning_frame)# 右侧: 简单分析self.create_simple_analysis_frame(analysis_cleaning_frame)# 区域3: 可视化选项和预览self.create_visualization_frame(main_frame)def create_data_control_frame(self, parent):control_frame = ttk.LabelFrame(parent, text="数据控制", padding="10")control_frame.pack(fill=tk.X, pady=(0, 10))# 按钮buttons = [("导入CSV文件", self.load_csv),("导入Excel文件", self.load_excel),("从API获取数据", self.get_api_data),("设置API自动更新", self.set_api_auto_update),("停止自动更新", self.stop_auto_update),("保存到Excel文件", self.save_to_excel) ]for text, command in buttons:btn = ttk.Button(control_frame, text=text, command=command)btn.pack(side=tk.LEFT, padx=5)def create_data_cleaning_frame(self, parent):# 左侧分布的数据清洗区域cleaning_frame = ttk.LabelFrame(parent, text="数据清洗", padding="10")cleaning_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False, padx=(0, 5))# 按钮cleaning_buttons = [("移除重复值", self.remove_duplicates),("处理缺失值", self.handle_missing_values),("数据类型转换", self.convert_data_types),("规整小数位", self.round_decimal_places),("修改列名", self.rename_column),("重置数据", self.reset_data)]for text, command in cleaning_buttons:btn = ttk.Button(cleaning_frame, text=text, command=command)btn.pack(side=tk.LEFT, padx=2)def create_simple_analysis_frame(self, parent):# 右侧分布的简单分析区域analysis_frame = ttk.LabelFrame(parent, text="简单分析", padding="10")analysis_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=(5, 0))# 按钮analysis_buttons = [("显示数据信息", self.show_data_info),("查看统计信息", self.show_statistics),("相关分析", self.correlation_analysis)  ]for text, command in analysis_buttons:btn = ttk.Button(analysis_frame, text=text, command=command)btn.pack(side=tk.LEFT, padx=2)def create_visualization_frame(self, parent):viz_frame = ttk.Frame(parent)viz_frame.pack(fill=tk.BOTH, expand=True)# 左侧: 可视化选项self.create_viz_options_frame(viz_frame)# 右侧: 数据预览和图表展示self.create_preview_frame(viz_frame)def create_viz_options_frame(self, parent):options_frame = ttk.LabelFrame(parent, text="可视化选项", padding="10")options_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10))# 图表类型选择chart_type_label = ttk.Label(options_frame, text="图表类型:", style="Title.TLabel")chart_type_label.pack(anchor=tk.W, pady=(0, 5))self.chart_type = tk.StringVar(value="柱状图")chart_types = ["柱状图", "折线图", "散点图", "饼图", "箱线图", "热力图", "直方图","雷达图"]for chart in chart_types:rb = ttk.Radiobutton(options_frame, text=chart, variable=self.chart_type, value=chart)rb.pack(anchor=tk.W)# X轴数据选择x_axis_label = ttk.Label(options_frame, text="X轴数据:", style="Title.TLabel")x_axis_label.pack(anchor=tk.W, pady=(10, 5))self.x_axis_var = tk.StringVar()self.x_axis_combo = ttk.Combobox(options_frame, textvariable=self.x_axis_var, state="readonly")self.x_axis_combo.pack(fill=tk.X, pady=(0, 10))# Y轴数据选择y_axis_label = ttk.Label(options_frame, text="Y轴数据:", style="Title.TLabel")y_axis_label.pack(anchor=tk.W, pady=(0, 5))self.y_axis_var = tk.StringVar()self.y_axis_combo = ttk.Combobox(options_frame, textvariable=self.y_axis_var, state="readonly")self.y_axis_combo.pack(fill=tk.X, pady=(0, 10))# 分组数据选择group_label = ttk.Label(options_frame, text="分组数据(可选):", style="Title.TLabel")group_label.pack(anchor=tk.W, pady=(0, 5))self.group_var = tk.StringVar(value="无")self.group_combo = ttk.Combobox(options_frame, textvariable=self.group_var, state="readonly")self.group_combo.pack(fill=tk.X, pady=(0, 10))# 显示选项框架display_frame = ttk.LabelFrame(options_frame, text="显示选项", padding="5")display_frame.pack(fill=tk.X, pady=10)# 限制X轴类别数量选项self.limit_categories = tk.BooleanVar(value=True)limit_cb = ttk.Checkbutton(display_frame, text="限制X轴类别数量 (最多10个)", variable=self.limit_categories)limit_cb.pack(anchor=tk.W, pady=2)# 显示所有数据选项self.show_all_data = tk.BooleanVar(value=False)all_data_cb = ttk.Checkbutton(display_frame, text="显示所有数据 (可能影响性能)", variable=self.show_all_data,command=self.update_data_preview  # 当选项改变时更新预览)all_data_cb.pack(anchor=tk.W, pady=2)# 生成和保存按钮button_frame = ttk.Frame(options_frame)button_frame.pack(fill=tk.X, pady=10)generate_btn = ttk.Button(button_frame, text="生成图表", command=self.generate_chart)generate_btn.pack(side=tk.LEFT, padx=(0, 10))save_btn = ttk.Button(button_frame, text="保存图表", command=self.save_chart)save_btn.pack(side=tk.LEFT)def create_preview_frame(self, parent):preview_frame = ttk.Frame(parent)preview_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True)# 数据预览data_preview_frame = ttk.LabelFrame(preview_frame, text="数据预览", padding="10")data_preview_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 10))# 创建Treeview显示数据self.create_data_treeview(data_preview_frame)# 图表展示chart_frame = ttk.LabelFrame(preview_frame, text="图表展示", padding="10")chart_frame.pack(fill=tk.BOTH, expand=True)# 创建图表区域self.create_chart_area(chart_frame)def create_data_treeview(self, parent):# 创建滚动条tree_scroll = ttk.Scrollbar(parent)tree_scroll.pack(side=tk.RIGHT, fill=tk.Y)# 创建Treeviewself.data_tree = ttk.Treeview(parent, yscrollcommand=tree_scroll.set)self.data_tree.pack(fill=tk.BOTH, expand=True)# 配置滚动条tree_scroll.config(command=self.data_tree.yview)def create_chart_area(self, parent):# 创建图表框架self.chart_frame = ttk.Frame(parent)self.chart_frame.pack(fill=tk.BOTH, expand=True)# 初始时显示空图表self.fig, self.ax = plt.subplots(figsize=(8, 6))self.ax.text(0.5, 0.5, "请导入数据并选择图表类型", horizontalalignment='center', verticalalignment='center',transform=self.ax.transAxes, fontsize=14)self.ax.set_xticks([])self.ax.set_yticks([])# 创建画布self.canvas = FigureCanvasTkAgg(self.fig, self.chart_frame)self.canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)self.canvas.draw()# 新增功能:相关分析def correlation_analysis(self):"""执行相关分析并显示相关性矩阵"""if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 检查是否有数值列numeric_columns = self.df.select_dtypes(include=[np.number]).columnsif len(numeric_columns) < 2:messagebox.showwarning("警告", "相关分析需要至少两个数值列")return# 创建相关分析窗口self.create_correlation_window()def create_correlation_window(self):"""创建相关分析窗口"""# 创建新窗口corr_window = tk.Toplevel(self.root)corr_window.title("相关分析")corr_window.geometry("600x500")corr_window.resizable(True, True)# 设置窗口居中和模态corr_window.transient(self.root)corr_window.grab_set()self.center_window(corr_window)# 主框架main_frame = ttk.Frame(corr_window, padding="10")main_frame.pack(fill=tk.BOTH, expand=True)# 标题#title_label = ttk.Label(main_frame, text="相关性矩阵", style="Title.TLabel")#title_label.pack(pady=(0, 10))# 说明文本info_label = ttk.Label(main_frame, text="以下显示各数值列之间的相关系数矩阵:", wraplength=550)info_label.pack(pady=(0, 10))# 创建Treeview框架tree_frame = ttk.Frame(main_frame)tree_frame.pack(fill=tk.BOTH, expand=True, pady=10)# 创建滚动条v_scrollbar = ttk.Scrollbar(tree_frame)v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)h_scrollbar = ttk.Scrollbar(tree_frame, orient=tk.HORIZONTAL)h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)# 创建Treeview显示相关性矩阵corr_tree = ttk.Treeview(tree_frame, yscrollcommand=v_scrollbar.set,xscrollcommand=h_scrollbar.set)corr_tree.pack(fill=tk.BOTH, expand=True)# 配置滚动条v_scrollbar.config(command=corr_tree.yview)h_scrollbar.config(command=corr_tree.xview)# 计算相关性矩阵numeric_df = self.df.select_dtypes(include=[np.number])correlation_matrix = numeric_df.corr()# 设置Treeview列columns = ['列名'] + list(correlation_matrix.columns)corr_tree["columns"] = columnscorr_tree["show"] = "headings"# 设置列标题for col in columns:corr_tree.heading(col, text=col)corr_tree.column(col, width=80, anchor=tk.CENTER)# 添加数据行for idx, row in correlation_matrix.iterrows():row_values = [idx] + [f"{val:.4f}" for val in row]corr_tree.insert("", "end", values=row_values)# 按钮框架button_frame = ttk.Frame(main_frame)button_frame.pack(fill=tk.X, pady=10)# 关闭按钮close_btn = ttk.Button(button_frame, text="关闭", command=corr_window.destroy)close_btn.pack(side=tk.RIGHT, padx=5)# 保存按钮save_btn = ttk.Button(button_frame, text="保存相关性矩阵", command=lambda: self.save_correlation_matrix(correlation_matrix))save_btn.pack(side=tk.RIGHT, padx=5)def save_correlation_matrix(self, correlation_matrix):"""保存相关性矩阵到文件"""file_path = filedialog.asksaveasfilename(defaultextension=".csv",filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")])if file_path:try:if file_path.endswith('.csv'):correlation_matrix.to_csv(file_path)else:correlation_matrix.to_excel(file_path)messagebox.showinfo("成功", f"相关性矩阵已保存到: {file_path}")except Exception as e:messagebox.showerror("错误", f"保存文件时出错: {str(e)}")# 数据控制功能def load_csv(self):file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])if file_path:try:self.df = pd.read_csv(file_path)self.original_df = self.df.copy()self.update_data_preview()self.update_column_combos()messagebox.showinfo("成功", "CSV文件导入成功!")except Exception as e:messagebox.showerror("错误", f"导入CSV文件时出错: {str(e)}")def load_excel(self):file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])if file_path:try:self.df = pd.read_excel(file_path)self.original_df = self.df.copy()self.update_data_preview()self.update_column_combos()messagebox.showinfo("成功", "Excel文件导入成功!")except Exception as e:messagebox.showerror("错误", f"导入Excel文件时出错: {str(e)}")def get_api_data(self):# 创建自定义对话框而不是使用simpledialogdialog = tk.Toplevel(self.root)dialog.title("API数据")dialog.geometry("400x150")dialog.resizable(False, False)dialog.transient(self.root)dialog.grab_set()# 设置窗口居中self.center_window(dialog)# 创建内容ttk.Label(dialog, text="请输入API URL:").pack(pady=10)url_var = tk.StringVar()url_entry = ttk.Entry(dialog, textvariable=url_var, width=50)url_entry.pack(pady=5, padx=20, fill=tk.X)def on_ok():url = url_var.get()dialog.destroy()if url:self.api_url = urltry:response = requests.get(url)if response.status_code == 200:# 假设API返回JSON格式数据data = response.json()self.df = pd.DataFrame(data)self.original_df = self.df.copy()self.update_data_preview()self.update_column_combos()messagebox.showinfo("成功", "API数据获取成功!")else:messagebox.showerror("错误", f"API请求失败,状态码: {response.status_code}")except Exception as e:messagebox.showerror("错误", f"获取API数据时出错: {str(e)}")def on_cancel():dialog.destroy()# 按钮框架button_frame = ttk.Frame(dialog)button_frame.pack(pady=10)ttk.Button(button_frame, text="确定", command=on_ok).pack(side=tk.LEFT, padx=5)ttk.Button(button_frame, text="取消", command=on_cancel).pack(side=tk.LEFT, padx=5)# 绑定回车键dialog.bind('<Return>', lambda e: on_ok())url_entry.focus_set()def set_api_auto_update(self):if not self.api_url:messagebox.showwarning("警告", "请先设置API URL")return# 创建自定义对话框dialog = tk.Toplevel(self.root)dialog.title("自动更新设置")dialog.geometry("400x150")dialog.resizable(False, False)dialog.transient(self.root)dialog.grab_set()# 设置窗口居中self.center_window(dialog)# 创建内容ttk.Label(dialog, text="请输入更新间隔(秒):").pack(pady=10)interval_var = tk.StringVar(value="60")interval_entry = ttk.Entry(dialog, textvariable=interval_var, width=20)interval_entry.pack(pady=5)def on_ok():try:interval = int(interval_var.get())dialog.destroy()if interval > 0:self.api_update_interval = intervalself.api_auto_update = Trueself.auto_update_thread = threading.Thread(target=self.auto_update_data, daemon=True)self.auto_update_thread.start()messagebox.showinfo("成功", f"已设置自动更新,间隔: {interval}秒")else:messagebox.showerror("错误", "更新间隔必须大于0")except ValueError:messagebox.showerror("错误", "请输入有效的数字")def on_cancel():dialog.destroy()# 按钮框架button_frame = ttk.Frame(dialog)button_frame.pack(pady=10)ttk.Button(button_frame, text="确定", command=on_ok).pack(side=tk.LEFT, padx=5)ttk.Button(button_frame, text="取消", command=on_cancel).pack(side=tk.LEFT, padx=5)# 绑定回车键dialog.bind('<Return>', lambda e: on_ok())interval_entry.focus_set()interval_entry.select_range(0, tk.END)def stop_auto_update(self):self.api_auto_update = Falsemessagebox.showinfo("成功", "已停止自动更新")def auto_update_data(self):while self.api_auto_update:time.sleep(self.api_update_interval)try:response = requests.get(self.api_url)if response.status_code == 200:data = response.json()self.df = pd.DataFrame(data)self.update_data_preview()self.update_column_combos()# 在主线程中更新UIself.root.after(0, lambda: messagebox.showinfo("自动更新", f"数据已自动更新于 {datetime.now().strftime('%H:%M:%S')}"))except Exception as e:self.root.after(0, lambda: messagebox.showerror("自动更新错误", f"自动更新数据时出错: {str(e)}"))def save_to_excel(self):"""保存数据到Excel文件"""if self.df is None:messagebox.showwarning("警告", "没有数据可保存")returntry:# 弹出保存文件对话框file_path = filedialog.asksaveasfilename(defaultextension=".xlsx",filetypes=[("Excel files", "*.xlsx"),("Excel 97-2003 files", "*.xls"),("All files", "*.*")],title="保存数据到Excel")if file_path:# 显示保存进度progress_window = tk.Toplevel(self.root)progress_window.title("保存中...")progress_window.geometry("300x100")progress_window.resizable(False, False)progress_window.transient(self.root)# 设置窗口居中self.center_window(progress_window)ttk.Label(progress_window, text="正在保存数据,请稍候...").pack(pady=20)progress_window.update()# 保存数据到Excelself.df.to_excel(file_path, index=False)# 关闭进度窗口progress_window.destroy()# 显示成功消息messagebox.showinfo("成功", f"数据已成功保存到:\n{file_path}")except Exception as e:messagebox.showerror("错误", f"保存Excel文件时出错: {str(e)}")    # 数据清洗功能def show_data_info(self):if self.df is None:messagebox.showwarning("警告", "请先导入数据")returninfo = f"数据形状: {self.df.shape}\n\n"info += "列信息:\n"for col in self.df.columns:info += f"- {col}: {self.df[col].dtype}\n"info += f"\n缺失值统计:\n{self.df.isnull().sum()}"messagebox.showinfo("数据信息", info)# 新增功能:查看均值等统计信息def show_statistics(self):"""显示统计信息窗口"""if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 创建统计信息窗口stats_window = tk.Toplevel(self.root)stats_window.title("统计信息")stats_window.geometry("500x450")stats_window.resizable(False, False)stats_window.transient(self.root)stats_window.grab_set()# 设置窗口居中self.center_window(stats_window)# 主框架main_frame = ttk.Frame(stats_window, padding="15")main_frame.pack(fill=tk.BOTH, expand=True)# 标题#title_label = ttk.Label(main_frame, text="选择列查看统计信息", style="Title.TLabel")#title_label.pack(pady=(0, 15))# 列选择框架column_frame = ttk.Frame(main_frame)column_frame.pack(fill=tk.X, pady=10)ttk.Label(column_frame, text="选择列:").pack(side=tk.LEFT, padx=(0, 10))self.selected_column_stats = tk.StringVar()column_combo = ttk.Combobox(column_frame, textvariable=self.selected_column_stats, state="readonly", width=30)column_combo.pack(side=tk.LEFT, fill=tk.X, expand=True)# 设置列选项if self.df is not None:columns = list(self.df.columns)column_combo['values'] = columnsif columns:column_combo.set(columns[0])# 统计信息显示框架stats_frame = ttk.LabelFrame(main_frame, text="统计信息", padding="10")stats_frame.pack(fill=tk.BOTH, expand=True, pady=10)# 创建统计信息显示标签self.stats_labels = {}stats_config = [("均值", "mean"),("最小值", "min"),("最大值", "max"),("中位数", "median"),("标准差", "std"),("方差", "var"),("第一四分位数 (Q1)", "q1"),("第三四分位数 (Q3)", "q3")]for stat_name, stat_key in stats_config:row_frame = ttk.Frame(stats_frame)row_frame.pack(fill=tk.X, pady=5)ttk.Label(row_frame, text=f"{stat_name}:").pack(side=tk.LEFT, padx=(0, 10))result_label = ttk.Label(row_frame, text="")result_label.pack(side=tk.LEFT)self.stats_labels[stat_key] = result_label# 关闭按钮close_btn = ttk.Button(main_frame, text="关闭", command=stats_window.destroy)close_btn.pack(pady=10)# 绑定列选择事件def on_column_selected(*args):column_name = self.selected_column_stats.get()if column_name:self.update_statistics_display(column_name)self.selected_column_stats.trace("w", on_column_selected)# 初始化显示on_column_selected()def is_numeric_column(self, column_name):"""检查列是否为数值型"""if self.df is None or column_name not in self.df.columns:return Falsereturn pd.api.types.is_numeric_dtype(self.df[column_name])def update_statistics_display(self, column_name):"""更新统计信息显示"""if not self.is_numeric_column(column_name):# 清空所有统计信息显示for label in self.stats_labels.values():label.config(text="非数值列")returntry:# 计算所有统计信息mean_value = self.df[column_name].mean()min_value = self.df[column_name].min()max_value = self.df[column_name].max()median_value = self.df[column_name].median()std_value = self.df[column_name].std()var_value = self.df[column_name].var()q1_value = self.df[column_name].quantile(0.25)q3_value = self.df[column_name].quantile(0.75)# 更新显示self.stats_labels["mean"].config(text=f"{mean_value:.4f}")self.stats_labels["min"].config(text=f"{min_value:.4f}")self.stats_labels["max"].config(text=f"{max_value:.4f}")self.stats_labels["median"].config(text=f"{median_value:.4f}")self.stats_labels["std"].config(text=f"{std_value:.4f}")self.stats_labels["var"].config(text=f"{var_value:.4f}")self.stats_labels["q1"].config(text=f"{q1_value:.4f}")self.stats_labels["q3"].config(text=f"{q3_value:.4f}")except Exception as e:# 如果计算出错,显示错误信息for label in self.stats_labels.values():label.config(text="计算错误")def remove_duplicates(self):if self.df is None:messagebox.showwarning("警告", "请先导入数据")returnbefore_count = len(self.df)self.df = self.df.drop_duplicates()after_count = len(self.df)self.update_data_preview()messagebox.showinfo("成功", f"已移除 {before_count - after_count} 个重复值")def handle_missing_values(self):if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 创建自定义对话框dialog = tk.Toplevel(self.root)dialog.title("处理缺失值")dialog.geometry("500x200")dialog.resizable(False, False)dialog.transient(self.root)dialog.grab_set()# 设置窗口居中self.center_window(dialog)# 创建内容ttk.Label(dialog, text="选择处理方式:", style="Title.TLabel").pack(pady=10)option_var = tk.StringVar()options = [("删除包含缺失值的行", "1"),("用均值填充(数值列)", "2"),("用众数填充(分类列)", "3")]for text, value in options:rb = ttk.Radiobutton(dialog, text=text, variable=option_var, value=value)rb.pack(anchor=tk.W, padx=20)def on_ok():option = option_var.get()dialog.destroy()if option:if option == "1":before_count = self.df.isnull().sum().sum()self.df = self.df.dropna()after_count = self.df.isnull().sum().sum()messagebox.showinfo("成功", f"已删除包含缺失值的行,处理了 {before_count - after_count} 个缺失值")elif option == "2":for col in self.df.select_dtypes(include=[np.number]).columns:self.df[col].fillna(self.df[col].mean(), inplace=True)messagebox.showinfo("成功", "已用均值填充数值列的缺失值")elif option == "3":for col in self.df.select_dtypes(include=['object']).columns:self.df[col].fillna(self.df[col].mode()[0] if not self.df[col].mode().empty else "未知", inplace=True)messagebox.showinfo("成功", "已用众数填充分类列的缺失值")self.update_data_preview()else:messagebox.showwarning("警告", "请选择处理方式")def on_cancel():dialog.destroy()# 按钮框架button_frame = ttk.Frame(dialog)button_frame.pack(pady=10)ttk.Button(button_frame, text="确定", command=on_ok).pack(side=tk.LEFT, padx=5)ttk.Button(button_frame, text="取消", command=on_cancel).pack(side=tk.LEFT, padx=5)# 设置默认选项option_var.set("1")def convert_data_types(self):if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 创建数据类型转换窗口self.create_data_type_conversion_window()def create_data_type_conversion_window(self):"""创建数据类型转换窗口"""# 创建新窗口conversion_window = tk.Toplevel(self.root)conversion_window.title("数据类型转换")conversion_window.geometry("500x350")conversion_window.resizable(True, True)# 设置窗口居中和模态conversion_window.transient(self.root)conversion_window.grab_set()# 设置窗口居中self.center_window(conversion_window)# 主框架main_frame = ttk.Frame(conversion_window, padding="15")main_frame.pack(fill=tk.BOTH, expand=True)# 列选择框架column_frame = ttk.Frame(main_frame)column_frame.pack(fill=tk.X, pady=10)ttk.Label(column_frame, text="选择列:").pack(side=tk.LEFT, padx=(0, 10))self.selected_column = tk.StringVar()column_combo = ttk.Combobox(column_frame, textvariable=self.selected_column, state="readonly", width=30)column_combo.pack(side=tk.LEFT, fill=tk.X, expand=True)# 设置列选项if self.df is not None:columns = list(self.df.columns)column_combo['values'] = columnsif columns:column_combo.set(columns[0])# 当前数据类型显示current_type_frame = ttk.Frame(main_frame)current_type_frame.pack(fill=tk.X, pady=10)ttk.Label(current_type_frame, text="当前数据类型:").pack(side=tk.LEFT, padx=(0, 10))self.current_type_var = tk.StringVar(value="未选择")current_type_label = ttk.Label(current_type_frame, textvariable=self.current_type_var)current_type_label.pack(side=tk.LEFT)# 更新当前数据类型显示def update_current_type(*args):col = self.selected_column.get()if col and self.df is not None:self.current_type_var.set(str(self.df[col].dtype))self.selected_column.trace("w", update_current_type)# 目标数据类型选择框架target_type_frame = ttk.Frame(main_frame)target_type_frame.pack(fill=tk.X, pady=10)ttk.Label(target_type_frame, text="目标数据类型:").pack(side=tk.LEFT, padx=(0, 10))self.target_type = tk.StringVar()type_combo = ttk.Combobox(target_type_frame, textvariable=self.target_type, state="readonly", width=30)type_combo.pack(side=tk.LEFT, fill=tk.X, expand=True)# 定义支持的数据类型data_types = ["字符串 (str)","整数 (int)","浮点数 (float)","布尔值 (bool)","日期时间 (datetime)","分类数据 (category)","时间差 (timedelta)","复数 (complex)","对象 (object)"]type_combo['values'] = data_typestype_combo.set(data_types[0])# 转换选项框架options_frame = ttk.LabelFrame(main_frame, text="转换选项", padding="10")options_frame.pack(fill=tk.X, pady=10)self.ignore_errors = tk.BooleanVar(value=True)ignore_cb = ttk.Checkbutton(options_frame, text="忽略转换错误 (将无法转换的值设为NaN)", variable=self.ignore_errors)ignore_cb.pack(anchor=tk.W)self.downcast_int = tk.BooleanVar(value=False)downcast_cb = ttk.Checkbutton(options_frame, text="向下转换整数 (使用最小可能的整数类型)", variable=self.downcast_int)downcast_cb.pack(anchor=tk.W, pady=(5, 0))# 按钮框架button_frame = ttk.Frame(main_frame)button_frame.pack(fill=tk.X, pady=20)# 预览按钮preview_btn = ttk.Button(button_frame, text="预览转换", command=lambda: self.preview_conversion(self.selected_column.get(), self.target_type.get()))preview_btn.pack(side=tk.LEFT, padx=(0, 10))# 应用按钮apply_btn = ttk.Button(button_frame, text="应用转换", command=lambda: self.apply_conversion(self.selected_column.get(), self.target_type.get()))apply_btn.pack(side=tk.LEFT, padx=(0, 10))# 关闭按钮close_btn = ttk.Button(button_frame, text="关闭", command=conversion_window.destroy)close_btn.pack(side=tk.LEFT)# 初始化当前数据类型显示update_current_type()def preview_conversion(self, column, target_type):"""预览数据类型转换结果"""if not column:messagebox.showwarning("警告", "请选择要转换的列")returntry:# 获取转换前的数据样本original_sample = self.df[column].head(10).copy()original_info = f"原始数据类型: {self.df[column].dtype}\n\n前10行数据:\n{original_sample}"# 尝试转换converted_series = self.convert_column_type(column, target_type, preview=True)if converted_series is not None:converted_sample = converted_series.head(10)converted_info = f"转换后数据类型: {converted_series.dtype}\n\n前10行数据:\n{converted_sample}"# 显示预览结果preview_text = f"转换预览:\n\n{original_info}\n\n{converted_info}"messagebox.showinfo("转换预览", preview_text)else:messagebox.showerror("错误", "无法执行转换预览")except Exception as e:messagebox.showerror("错误", f"预览转换时出错: {str(e)}")def apply_conversion(self, column, target_type):"""应用数据类型转换"""if not column:messagebox.showwarning("警告", "请选择要转换的列")returntry:# 执行转换result = self.convert_column_type(column, target_type, preview=False)if result is not None:self.update_data_preview()self.update_column_combos()messagebox.showinfo("成功", f"已成功将列 '{column}' 转换为 {target_type}")else:messagebox.showerror("错误", "数据类型转换失败")except Exception as e:messagebox.showerror("错误", f"应用转换时出错: {str(e)}")def convert_column_type(self, column, target_type, preview=False):"""执行具体的数据类型转换"""if self.df is None or column not in self.df.columns:return Nonetry:# 根据用户选择的目标类型进行转换if target_type == "字符串 (str)":result = self.df[column].astype(str)elif target_type == "整数 (int)":if self.ignore_errors.get():result = pd.to_numeric(self.df[column], errors='coerce').astype('Int64')  # 可空整数类型else:result = pd.to_numeric(self.df[column], errors='raise').astype(int)elif target_type == "浮点数 (float)":if self.ignore_errors.get():result = pd.to_numeric(self.df[column], errors='coerce').astype(float)else:result = pd.to_numeric(self.df[column], errors='raise').astype(float)elif target_type == "布尔值 (bool)":# 尝试常见布尔值转换bool_map = {'true': True, 'false': False,'是': True, '否': False,'yes': True, 'no': False,'1': True, '0': False}if self.ignore_errors.get():result = self.df[column].map(bool_map).fillna(self.df[column]).astype(bool)else:result = self.df[column].astype(bool)elif target_type == "日期时间 (datetime)":if self.ignore_errors.get():result = pd.to_datetime(self.df[column], errors='coerce')else:result = pd.to_datetime(self.df[column], errors='raise')elif target_type == "分类数据 (category)":result = self.df[column].astype('category')elif target_type == "时间差 (timedelta)":if self.ignore_errors.get():result = pd.to_timedelta(self.df[column], errors='coerce')else:result = pd.to_timedelta(self.df[column], errors='raise')elif target_type == "复数 (complex)":if self.ignore_errors.get():result = self.df[column].apply(lambda x: complex(x) if self.is_complex_convertible(x) else np.nan)else:result = self.df[column].apply(complex)elif target_type == "对象 (object)":result = self.df[column].astype(object)else:messagebox.showerror("错误", f"不支持的数据类型: {target_type}")return None# 如果不是预览模式,则实际更新数据框if not preview:self.df[column] = resultreturn resultexcept Exception as e:if not preview:  # 只在实际应用时显示错误messagebox.showerror("转换错误", f"将列 '{column}' 转换为 {target_type} 时出错:\n{str(e)}")raisedef is_complex_convertible(self, value):"""检查值是否可以转换为复数"""try:complex(value)return Trueexcept (ValueError, TypeError):return False# 新增方法:规整小数位功能def round_decimal_places(self):"""规整小数位数"""if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 检查数据中是否有数值列numeric_columns = self.df.select_dtypes(include=[np.number]).columnsif len(numeric_columns) == 0:messagebox.showwarning("警告", "数据中没有数值列可供规整")return# 创建规整小数位窗口self.create_round_decimal_window()def create_round_decimal_window(self):"""创建规整小数位窗口"""# 创建新窗口round_window = tk.Toplevel(self.root)round_window.title("规整小数位")round_window.geometry("400x220")round_window.resizable(False,False)# 设置窗口居中和模态round_window.transient(self.root)round_window.grab_set()self.center_window(round_window)# 主框架main_frame = ttk.Frame(round_window, padding="20")main_frame.pack(fill=tk.BOTH, expand=True)# 说明文本info_label = ttk.Label(main_frame, text="请选择要规整的列和要保留的小数位数 (0-6):", wraplength=350)info_label.pack(pady=(0, 10))# 列选择框架column_frame = ttk.Frame(main_frame)column_frame.pack(fill=tk.X, pady=10)ttk.Label(column_frame, text="选择列:").pack(side=tk.LEFT, padx=(0, 10))# 获取数值列numeric_columns = self.df.select_dtypes(include=[np.number]).columns.tolist()self.selected_column_round = tk.StringVar()column_combo = ttk.Combobox(column_frame, textvariable=self.selected_column_round, state="readonly", width=20)column_combo.pack(side=tk.LEFT, fill=tk.X, expand=True)column_combo['values'] = numeric_columnsif numeric_columns:column_combo.set(numeric_columns[0])# 输入框架input_frame = ttk.Frame(main_frame)input_frame.pack(fill=tk.X,pady=10)ttk.Label(input_frame, text="小数位数:").pack(side=tk.LEFT, padx=(0, 10))# 创建验证函数,只允许输入0-6的数字def validate_input(new_value):if new_value == "":return Truetry:value = int(new_value)return 0 <= value <= 6except ValueError:return Falsevcmd = (round_window.register(validate_input), '%P')self.decimal_places_var = tk.StringVar(value="2")decimal_entry = ttk.Entry(input_frame, textvariable=self.decimal_places_var,validate="key",validatecommand=vcmd,width=10)decimal_entry.pack(side=tk.LEFT)# 按钮框架button_frame = ttk.Frame(main_frame)button_frame.pack(fill=tk.X, pady=20)# 确定按钮ok_btn = ttk.Button(button_frame, text="确定", command=lambda: self.apply_round_decimal(round_window))ok_btn.pack(side=tk.LEFT, padx=(0, 10))# 取消按钮cancel_btn = ttk.Button(button_frame, text="取消", command=round_window.destroy)cancel_btn.pack(side=tk.LEFT)# 绑定回车键round_window.bind('<Return>', lambda e: self.apply_round_decimal(round_window))decimal_entry.focus_set()decimal_entry.select_range(0, tk.END)def apply_round_decimal(self, window):"""应用小数位规整"""try:# 获取输入的小数位数decimal_places = int(self.decimal_places_var.get())# 验证输入范围if not (0 <= decimal_places <= 6):self.error_label.config(text="请输入0-6之间的数字")return# 获取选择的列selected_column = self.selected_column_round.get()if not selected_column:self.error_label.config(text="请选择要规整的列")return# 执行规整操作original_dtype = self.df[selected_column].dtype# 对选定的列进行小数位规整self.df[selected_column] = self.df[selected_column].round(decimal_places)# 保持原始数据类型(如果原来是整数,规整后可能变成浮点数,这里可以保持原类型)if original_dtype in [np.int64, np.int32, np.int16, np.int8]:# 如果原始是整数类型,且小数位数为0,可以转换回整数if decimal_places == 0:self.df[selected_column] = self.df[selected_column].astype(original_dtype)# 更新数据预览self.update_data_preview()# 显示成功消息#messagebox.showinfo("成功", f"已成功规整列 '{selected_column}' 的小数位数为 {decimal_places} 位")# 关闭窗口window.destroy()except ValueError:self.error_label.config(text="请输入有效的数字")except Exception as e:messagebox.showerror("错误", f"规整小数位时出错: {str(e)}")def rename_column(self):"""修改列名"""if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 创建修改列名窗口self.create_rename_column_window()def create_rename_column_window(self):"""创建修改列名窗口"""# 创建新窗口rename_window = tk.Toplevel(self.root)rename_window.title("修改列名")rename_window.geometry("400x280")rename_window.resizable(False, False)# 设置窗口居中和模态rename_window.transient(self.root)rename_window.grab_set()self.center_window(rename_window)# 主框架main_frame = ttk.Frame(rename_window, padding="20")main_frame.pack(fill=tk.BOTH, expand=True)# 当前列选择current_col_frame = ttk.Frame(main_frame)current_col_frame.pack(fill=tk.X, pady=10)ttk.Label(current_col_frame, text="选择要修改的列:").pack(anchor=tk.W)self.selected_rename_column = tk.StringVar()current_col_combo = ttk.Combobox(current_col_frame, textvariable=self.selected_rename_column, state="readonly", width=30)current_col_combo.pack(fill=tk.X, pady=5)# 设置列选项if self.df is not None:columns = list(self.df.columns)current_col_combo['values'] = columnsif columns:current_col_combo.set(columns[0])# 新列名输入new_name_frame = ttk.Frame(main_frame)new_name_frame.pack(fill=tk.X, pady=10)ttk.Label(new_name_frame, text="输入新列名:").pack(anchor=tk.W)self.new_column_name = tk.StringVar()new_name_entry = ttk.Entry(new_name_frame, textvariable=self.new_column_name, width=30)new_name_entry.pack(fill=tk.X, pady=5)# 错误提示标签self.rename_error_label = ttk.Label(main_frame, text="", foreground="red")self.rename_error_label.pack(pady=5)# 按钮框架button_frame = ttk.Frame(main_frame)button_frame.pack(fill=tk.X, pady=10)# 确定按钮ok_btn = ttk.Button(button_frame, text="确定", command=lambda: self.apply_column_rename(rename_window))ok_btn.pack(side=tk.LEFT, padx=(0, 10))# 取消按钮cancel_btn = ttk.Button(button_frame, text="取消", command=rename_window.destroy)cancel_btn.pack(side=tk.LEFT)# 绑定回车键rename_window.bind('<Return>', lambda e: self.apply_column_rename(rename_window))new_name_entry.focus_set()def apply_column_rename(self, window):"""应用列名修改"""try:# 获取选择的列和新列名selected_column = self.selected_rename_column.get()new_name = self.new_column_name.get().strip()# 验证输入if not selected_column:self.rename_error_label.config(text="请选择要修改的列")returnif not new_name:self.rename_error_label.config(text="请输入新列名")return# 检查新列名是否已存在(除了当前列本身)if new_name in self.df.columns and new_name != selected_column:self.rename_error_label.config(text=f"列名 '{new_name}' 已存在")return# 执行列名修改self.df = self.df.rename(columns={selected_column: new_name})# 如果存在原始数据,也更新原始数据的列名if self.original_df is not None and selected_column in self.original_df.columns:self.original_df = self.original_df.rename(columns={selected_column: new_name})# 更新数据预览和列选择框self.update_data_preview()self.update_column_combos()# 显示成功消息messagebox.showinfo("成功", f"已成功将列 '{selected_column}' 重命名为 '{new_name}'")# 关闭窗口window.destroy()except Exception as e:messagebox.showerror("错误", f"修改列名时出错: {str(e)}")def reset_data(self):if self.original_df is not None:self.df = self.original_df.copy()self.update_data_preview()self.update_column_combos()messagebox.showinfo("成功", "数据已重置")else:messagebox.showwarning("警告", "没有原始数据可重置")# 可视化功能def update_column_combos(self):if self.df is not None:columns = list(self.df.columns)self.x_axis_combo['values'] = columnsself.y_axis_combo['values'] = columnsself.group_combo['values'] = ["无"] + columns# 设置默认值if columns:self.x_axis_combo.set(columns[0])if len(columns) > 1:self.y_axis_combo.set(columns[1])else:self.y_axis_combo.set(columns[0])def generate_chart(self):if self.df is None:messagebox.showwarning("警告", "请先导入数据")returnchart_type = self.chart_type.get()x_col = self.x_axis_var.get()y_col = self.y_axis_var.get()group_col = self.group_var.get() if self.group_var.get() != "无" else Noneif not x_col or not y_col:messagebox.showwarning("警告", "请选择X轴和Y轴数据")returnif chart_type == "雷达图":# 雷达图需要数值数据if not pd.api.types.is_numeric_dtype(self.df[y_col]):messagebox.showwarning("警告", "雷达图需要数值数据,请选择数值列作为Y轴")return# 如果数据点太多,提示用户#if self.df[x_col].nunique() > 12:#    messagebox.showwarning("提示", f"雷达图建议不超过12个类别以获得最佳显示效果")     try:# 清除当前图表self.ax.clear()# 如果不是雷达图,确保使用直角坐标系if chart_type != "雷达图":# 如果当前是极坐标系,重新创建直角坐标系if hasattr(self.ax, 'name') and self.ax.name == 'polar':self.fig.delaxes(self.ax)  # 删除极坐标轴self.ax = self.fig.add_subplot(111)  # 创建新的直角坐标轴# 准备数据 - 根据选项限制类别数量plot_df = self.df.copy()# 对于分类数据,如果类别过多且用户选择限制,则只保留前10个类别if (self.limit_categories.get() and chart_type in ["柱状图", "折线图", "饼图", "箱线图"] andself.df[x_col].dtype == 'object'):# 获取前10个最常见的类别top_categories = self.df[x_col].value_counts().head(10).indexplot_df = self.df[self.df[x_col].isin(top_categories)]# 如果过滤后数据为空,使用原始数据if plot_df.empty:plot_df = self.df.copy()messagebox.showwarning("警告", "无法限制类别数量,使用全部数据")else:# 显示提示信息unique_count = self.df[x_col].nunique()if unique_count > 10:messagebox.showinfo("提示", f"X轴数据有 {unique_count} 个类别,已限制显示前10个最常见的类别。\n"f"如需显示全部数据,请取消勾选'限制X轴类别数量'选项。")# 根据选择的图表类型生成图表if chart_type == "柱状图":if group_col:sns.barplot(data=plot_df, x=x_col, y=y_col, hue=group_col, ax=self.ax)else:sns.barplot(data=plot_df, x=x_col, y=y_col, ax=self.ax)self.ax.tick_params(axis='x', rotation=45)elif chart_type == "折线图":if group_col:# 对于折线图,需要确保X轴数据是排序的plot_df = plot_df.sort_values(by=x_col)sns.lineplot(data=plot_df, x=x_col, y=y_col, hue=group_col, ax=self.ax)else:plot_df = plot_df.sort_values(by=x_col)sns.lineplot(data=plot_df, x=x_col, y=y_col, ax=self.ax)elif chart_type == "散点图":if group_col:sns.scatterplot(data=plot_df, x=x_col, y=y_col, hue=group_col, ax=self.ax)else:sns.scatterplot(data=plot_df, x=x_col, y=y_col, ax=self.ax)elif chart_type == "饼图":# 饼图需要特殊处理,通常只使用一列数据if group_col:pie_data = plot_df.groupby(group_col)[y_col].sum()else:pie_data = plot_df[y_col].value_counts()# 改进的饼图处理:取前9个,其余合并为"其他"if len(pie_data) > 10 and self.limit_categories.get():top_9 = pie_data.head(9)other_sum = pie_data.iloc[9:].sum()# 创建新的饼图数据,包含前9个和"其他"pie_data = pd.concat([top_9, pd.Series([other_sum], index=['其他'])])# 显示提示信息messagebox.showinfo("提示", f"饼图数据有 {len(plot_df[y_col].value_counts())} 个类别,已显示前9个最常见的类别,其余合并为'其他'。")self.ax.pie(pie_data, labels=pie_data.index, autopct='%1.1f%%')self.ax.set_ylabel('')  # 清除Y轴标签elif chart_type == "箱线图":if group_col:sns.boxplot(data=plot_df, x=group_col, y=y_col, ax=self.ax)else:sns.boxplot(data=plot_df, y=y_col, ax=self.ax)elif chart_type == "热力图":# 热力图通常需要数值数据的相关性矩阵numeric_df = plot_df.select_dtypes(include=[np.number])if len(numeric_df.columns) < 2:messagebox.showwarning("警告", "热力图需要至少两个数值列")returncorr_matrix = numeric_df.corr()sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=self.ax)elif chart_type == "直方图":if group_col:for category in plot_df[group_col].unique():self.ax.hist(plot_df[plot_df[group_col] == category][y_col], alpha=0.5, label=str(category))self.ax.legend()else:self.ax.hist(plot_df[y_col])self.ax.set_xlabel(y_col)self.ax.set_ylabel('频率')elif chart_type == "雷达图":# 弹出窗口让用户选择雷达图要素self.show_radar_chart_options()return  # 雷达图有单独的生成流程,这里直接返回# 设置标题 - 确保使用中文字体self.ax.set_title(f"{chart_type}: {y_col} vs {x_col}", fontproperties=self.get_chinese_font())# 设置坐标轴标签 - 确保使用中文字体self.ax.set_xlabel(x_col, fontproperties=self.get_chinese_font())self.ax.set_ylabel(y_col, fontproperties=self.get_chinese_font())# 更新画布self.canvas.draw()except Exception as e:messagebox.showerror("错误", f"生成图表时出错: {str(e)}")def show_radar_chart_options(self):"""显示雷达图选项窗口"""if self.df is None:messagebox.showwarning("警告", "请先导入数据")return# 创建雷达图选项窗口radar_window = tk.Toplevel(self.root)radar_window.title("雷达图选项")radar_window.geometry("500x450")radar_window.resizable(False, False)radar_window.transient(self.root)radar_window.grab_set()# 设置窗口居中self.center_window(radar_window)# 主框架main_frame = ttk.Frame(radar_window, padding="15")main_frame.pack(fill=tk.BOTH, expand=True)# 标题title_label = ttk.Label(main_frame, text="雷达图设置", style="Title.TLabel")title_label.pack(pady=(0, 15))# 分类列选择category_frame = ttk.Frame(main_frame)category_frame.pack(fill=tk.X, pady=10)ttk.Label(category_frame, text="分类列 (X轴):").pack(anchor=tk.W)self.radar_category_var = tk.StringVar()category_combo = ttk.Combobox(category_frame, textvariable=self.radar_category_var, state="readonly", width=40)category_combo.pack(fill=tk.X, pady=5)# 数值列选择value_frame = ttk.Frame(main_frame)value_frame.pack(fill=tk.X, pady=10)ttk.Label(value_frame, text="数值列 (Y轴):").pack(anchor=tk.W)self.radar_value_var = tk.StringVar()value_combo = ttk.Combobox(value_frame, textvariable=self.radar_value_var, state="readonly", width=40)value_combo.pack(fill=tk.X, pady=5)# 分组列选择group_frame = ttk.Frame(main_frame)group_frame.pack(fill=tk.X, pady=10)ttk.Label(group_frame, text="分组列 (可选):").pack(anchor=tk.W)self.radar_group_var = tk.StringVar(value="无")group_combo = ttk.Combobox(group_frame, textvariable=self.radar_group_var, state="readonly", width=40)group_combo.pack(fill=tk.X, pady=5)# 分类数量限制选项limit_frame = ttk.Frame(main_frame)limit_frame.pack(fill=tk.X, pady=10)self.radar_limit_categories = tk.BooleanVar(value=True)limit_cb = ttk.Checkbutton(limit_frame, text="限制分类数量 (最多10个)", variable=self.radar_limit_categories)limit_cb.pack(anchor=tk.W)# 说明文本info_label = ttk.Label(main_frame, text="注意: 雷达图使用分类列作为角度轴(不建议超过10个),数值列作为半径轴。\n""如果分组,每个分组将显示为一个多边形。",justify=tk.LEFT)info_label.pack(fill=tk.X, pady=10)# 按钮框架button_frame = ttk.Frame(main_frame)button_frame.pack(fill=tk.X, pady=20)# 生成按钮generate_btn = ttk.Button(button_frame, text="生成雷达图", command=lambda: self.generate_radar_from_options(radar_window))generate_btn.pack(side=tk.LEFT, padx=(0, 10))# 取消按钮cancel_btn = ttk.Button(button_frame, text="取消", command=radar_window.destroy)cancel_btn.pack(side=tk.LEFT)# 设置选项columns = list(self.df.columns)category_combo['values'] = columnsvalue_combo['values'] = columnsgroup_combo['values'] = ["无"] + columns# 设置默认值if columns:category_combo.set(columns[0])if len(columns) > 1:value_combo.set(columns[1])else:value_combo.set(columns[0])# 绑定回车键radar_window.bind('<Return>', lambda e: self.generate_radar_from_options(radar_window))def generate_radar_from_options(self, window):"""从选项窗口生成雷达图"""category_col = self.radar_category_var.get()value_col = self.radar_value_var.get()group_col = self.radar_group_var.get() if self.radar_group_var.get() != "无" else Noneif not category_col or not value_col:messagebox.showwarning("警告", "请选择分类列和数值列")return# 验证数值列是否为数值类型if not pd.api.types.is_numeric_dtype(self.df[value_col]):messagebox.showwarning("警告", "数值列必须为数值类型")return# 准备数据plot_df = self.df.copy()# 限制分类数量if self.radar_limit_categories.get():unique_categories = plot_df[category_col].unique()if len(unique_categories) > 10:# 获取前10个最常见的分类top_categories = plot_df[category_col].value_counts().head(10).indexplot_df = plot_df[plot_df[category_col].isin(top_categories)]# 显示提示信息messagebox.showinfo("提示", f"已限制显示前10个最常见的类别。\n"f"如需显示全部数据,请取消勾选'限制分类数量'选项。")# 关闭选项窗口window.destroy()# 生成雷达图self.generate_radar_chart(plot_df, category_col, value_col, group_col)def generate_radar_chart(self, data, category_col, value_col, group_col=None):"""生成雷达图"""try:# 清除当前图表self.fig.clear()self.ax = self.fig.add_subplot(111, polar=True)# 准备数据if group_col:# 分组雷达图groups = data[group_col].unique()# 获取所有类别categories = data[category_col].unique()num_vars = len(categories)# 计算每个角度的位置angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()angles += angles[:1]  # 闭合图形# 为每个组绘制雷达图colors = plt.cm.tab10(np.linspace(0, 1, len(groups)))for i, group in enumerate(groups):group_data = data[data[group_col] == group]values = []# 获取每个类别的值for category in categories:category_values = group_data[group_data[category_col] == category][value_col]if len(category_values) > 0:values.append(category_values.mean())else:values.append(0)# 闭合图形values += values[:1]# 绘制雷达图self.ax.plot(angles, values, 'o-', linewidth=2, label=str(group), color=colors[i])self.ax.fill(angles, values, alpha=0.25, color=colors[i])# 添加类别标签self.ax.set_xticks(angles[:-1])self.ax.set_xticklabels(categories, fontproperties=self.get_chinese_font())# 添加图例self.ax.legend(loc='upper right', bbox_to_anchor=(1.7, 1.0))    #此处可修改雷达图图例与主图的位置关系,1.7是与主图间距,0.7是在主图纵向的位置else:# 单个雷达图# 获取类别和对应的平均值category_stats = data.groupby(category_col)[value_col].mean().reset_index()categories = category_stats[category_col].tolist()values = category_stats[value_col].tolist()num_vars = len(categories)# 计算角度angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()angles += angles[:1]  # 闭合图形values += values[:1]  # 闭合图形# 绘制雷达图self.ax.plot(angles, values, 'o-', linewidth=2, color='b')self.ax.fill(angles, values, alpha=0.25, color='b')# 添加类别标签self.ax.set_xticks(angles[:-1])self.ax.set_xticklabels(categories, fontproperties=self.get_chinese_font())# 设置标题#title = f"雷达图: {value_col} vs {category_col}"#if group_col:#    title += f" (按 {group_col} 分组)"#self.ax.set_title(title, fontproperties=self.get_chinese_font(), pad=20)# 添加网格self.ax.grid(True)# 更新画布self.canvas.draw()except Exception as e:messagebox.showerror("错误", f"生成雷达图时出错: {str(e)}")# 恢复为直角坐标系self.fig.clear()self.ax = self.fig.add_subplot(111)self.ax.text(0.5, 0.5, "生成雷达图时出错", horizontalalignment='center', verticalalignment='center',transform=self.ax.transAxes, fontsize=14)self.ax.set_xticks([])self.ax.set_yticks([])self.canvas.draw()def get_chinese_font(self):"""获取中文字体设置"""try:# 尝试获取系统中可用的中文字体available_fonts = [f.name for f in fm.fontManager.ttflist]chinese_fonts = ['SimHei', 'Microsoft YaHei', 'KaiTi', 'SimSun']for font in chinese_fonts:if font in available_fonts:return fm.FontProperties(fname=fm.findfont(fm.FontProperties(family=font)))except:pass# 如果找不到中文字体,返回默认字体return fm.FontProperties()def save_chart(self):if self.fig is None:messagebox.showwarning("警告", "没有图表可保存")returnfile_path = filedialog.asksaveasfilename(defaultextension=".png",filetypes=[("PNG files", "*.png"), ("JPEG files", "*.jpg"), ("PDF files", "*.pdf")])if file_path:try:self.fig.savefig(file_path, dpi=300, bbox_inches='tight')messagebox.showinfo("成功", f"图表已保存到: {file_path}")except Exception as e:messagebox.showerror("错误", f"保存图表时出错: {str(e)}")# 辅助功能def update_data_preview(self):# 清除现有数据for item in self.data_tree.get_children():self.data_tree.delete(item)# 设置列if self.df is not None:self.data_tree["columns"] = list(self.df.columns)self.data_tree["show"] = "headings"# 设置列标题for col in self.df.columns:self.data_tree.heading(col, text=col, command=lambda c=col: self.sort_treeview(c))self.data_tree.column(col, width=100)# 添加数据行 (根据选项决定显示多少行)display_data = self.df if self.show_all_data.get() else self.df.head(100)for idx, row in display_data.iterrows():# 将行数据转换为列表,处理NaN值为空字符串row_values = []for value in row:if pd.isna(value):row_values.append("")  # 使用空字符串表示缺失值else:row_values.append(value)# 插入行self.data_tree.insert("", "end", values=row_values)# 更新数据预览标题parent = self.data_tree.masterif hasattr(parent, 'configure'):parent.configure(text="数据预览")def sort_treeview(self, column):"""对Treeview按列进行排序"""if self.df is None:return# 获取当前列的排序状态if column not in self.sort_states:self.sort_states[column] = "desc"  # 默认第一次点击降序# 切换排序状态if self.sort_states[column] == "desc":self.sort_states[column] = "asc"sorted_df = self.df.sort_values(by=column, ascending=True)else:self.sort_states[column] = "desc"sorted_df = self.df.sort_values(by=column, ascending=False)# 更新显示的数据self.df = sorted_dfself.update_data_preview()# 更新列标题以显示排序状态for col in self.data_tree["columns"]:current_text = colif col == column:if self.sort_states[column] == "desc":current_text = col + " ▼"else:current_text = col + " ▲"self.data_tree.heading(col, text=current_text, command=lambda c=col: self.sort_treeview(c))# 主程序
if __name__ == "__main__":root = tk.Tk()app = DataAnalysisApp(root)root.mainloop()

http://www.dtcms.com/a/581787.html

相关文章:

  • 网页制作元素有哪些前端角度实现网站首页加载慢优化
  • C++中的智能指针std::shared_ptr是线程安全的吗?以及它的详细实现原理
  • 网站服务器安装教程视频教程电子商务网站规划
  • 【vsftpd报错】227 Entering Passive Mode,553 Could not create file.
  • 有多少网站可以推广业务那个公司做app
  • 正规的大连网站建设a963中华室内设计官网
  • 中承信安信创软件检测:CMA资质+国家标准双重保障的测试报告
  • #智能CI/CD流水线与AIOps 论坛@AiDD深圳站
  • 医疗AI模型与控制器自动化CI/CD流水线
  • NumPy -数组运算与操作
  • 中美最近军事新闻邯郸网站优化公司
  • windows本机vscode通过ssh免密登录远程linux服务器 git push/pull 免密
  • go语言网站开发教程门户网站是如何做引流的
  • SG-ECAT_S-TCP(EtherCAT 转 ModbusTCP 网关)
  • 分享一些在C++中使用异常处理的最佳实践
  • 物流网站怎么开网络最好的运营商
  • 学习随笔-async和await
  • 祁阳做网站河南工程建设验收公示网
  • PCIe协议分析仪-VIAVI设置抓取ASPM协商过程
  • ThreadLocal 相关知识点
  • OSG新版GLSL语法全解析
  • 智守边界:入侵报警系统的主动防御时代
  • 为什么网站建设起来搜素不到电子商务网站建设考题
  • 济南网站建设是什么合肥seo网络营销推广
  • 【Hot100|4-LeetCode 283. 移动零 】
  • 操作系统拿着文件名查找磁盘文件的全过程
  • 【Hot100 | 6 LeetCode 15. 三数之和】
  • 哪些网站用wordpress建设银行网站总是崩溃
  • c#实现redis的调用与基础类
  • 【深度学习新浪潮】什么是投机解码?大模型推理优化的核心技术解析(含代码实操)