#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Excel工资核算文件分析工具 分析工资核算相关的Excel文件,提取字段信息和引用关系 """ import pandas as pd import os import re from pathlib import Path def analyze_excel_file(file_path, sheet_name=None): """分析单个Excel文件""" try: # 获取所有工作表名称 excel_file = pd.ExcelFile(file_path) sheets = excel_file.sheet_names print(f"\n=== 分析文件: {os.path.basename(file_path)} ===") print(f"工作表数量: {len(sheets)}") print(f"工作表名称: {sheets}") analysis_result = { 'file_name': os.path.basename(file_path), 'sheets': {}, 'all_columns': set(), 'formulas': [], 'references': [] } # 分析每个工作表 for sheet in sheets: if sheet_name and sheet != sheet_name: continue try: df = pd.read_excel(file_path, sheet_name=sheet, header=0) print(f"\n--- 工作表: {sheet} ---") print(f"行数: {len(df)}") print(f"列数: {len(df.columns)}") print(f"列名: {list(df.columns)}") # 收集列名 analysis_result['all_columns'].update(df.columns) analysis_result['sheets'][sheet] = { 'rows': len(df), 'columns': len(df.columns), 'column_names': list(df.columns), 'sample_data': df.head(3).to_dict('records') if len(df) > 0 else [] } # 查找公式和引用 for col in df.columns: for idx, value in df[col].items(): if pd.notna(value) and isinstance(value, str): # 查找Excel公式模式 formula_patterns = [ r'=([A-Z]+[0-9]+)', # 单元格引用 r'=([A-Z]+[0-9]+:[A-Z]+[0-9]+)', # 范围引用 r'=([A-Z]+[0-9]+\[[^\]]+\])', # 数组引用 r'=([A-Z]+[0-9]+\$[0-9]+)', # 绝对引用 r'=([A-Z]+[0-9]+:[A-Z]+[0-9]+\$[0-9]+)', # 绝对范围引用 ] for pattern in formula_patterns: matches = re.findall(pattern, str(value)) if matches: analysis_result['formulas'].append({ 'sheet': sheet, 'cell': f"{col}{idx+1}", 'formula': str(value), 'references': matches }) analysis_result['references'].extend(matches) except Exception as e: print(f"分析工作表 {sheet} 时出错: {e}") analysis_result['sheets'][sheet] = {'error': str(e)} return analysis_result except Exception as e: print(f"分析文件 {file_path} 时出错: {e}") return None def main(): """主函数""" base_path = "/Users/mr.wang/代码库/绿纤/lvqianmeiye_ERP/参考资料/工资核算 -7月" # 要分析的文件列表 files_to_analyze = [ "工资(全字段).xlsx", "①B-a-26考勤汇总表.xlsx", "②B-a-17每日早报.xlsx", "③B-a-③呈现-消耗明细表.xlsx", "④B-a-25社保统计表.xlsx", "⑤B-a-12奖励统计表.xlsx", "B-b-③健康师底薪.xlsx", "B-b-④健康师提成-金三角顾问.xlsx", "B-b-⑤其他岗位工资.xlsx", "B-b-⑤当月数据及门店毛利.xlsx" ] all_results = {} print("开始分析工资核算Excel文件...") print("=" * 60) for filename in files_to_analyze: file_path = os.path.join(base_path, filename) if os.path.exists(file_path): result = analyze_excel_file(file_path) if result: all_results[filename] = result else: print(f"文件不存在: {filename}") # 生成分析报告 generate_report(all_results) def generate_report(all_results): """生成分析报告""" report_path = "/Users/mr.wang/代码库/绿纤/lvqianmeiye_ERP/工资核算Excel分析报告.md" with open(report_path, 'w', encoding='utf-8') as f: f.write("# 工资核算Excel文件分析报告\n\n") f.write("## 概述\n\n") f.write("本报告分析了工资核算相关的Excel文件,提取了字段信息、公式引用关系等关键数据。\n\n") # 文件概览 f.write("## 文件概览\n\n") f.write("| 文件名 | 工作表数量 | 总列数 | 状态 |\n") f.write("|--------|------------|--------|------|\n") for filename, result in all_results.items(): if result: sheet_count = len(result['sheets']) total_columns = len(result['all_columns']) f.write(f"| {filename} | {sheet_count} | {total_columns} | ✅ |\n") else: f.write(f"| {filename} | - | - | ❌ |\n") # 详细分析 f.write("\n## 详细分析\n\n") for filename, result in all_results.items(): if not result: continue f.write(f"### {filename}\n\n") # 工作表信息 f.write("#### 工作表信息\n\n") for sheet_name, sheet_info in result['sheets'].items(): if 'error' in sheet_info: f.write(f"- **{sheet_name}**: 分析出错 - {sheet_info['error']}\n") else: f.write(f"- **{sheet_name}**: {sheet_info['rows']}行 x {sheet_info['columns']}列\n") f.write(f" - 列名: {', '.join([str(col) for col in sheet_info['column_names']])}\n") # 字段统计 f.write(f"\n#### 字段统计\n\n") f.write(f"总字段数: {len(result['all_columns'])}\n\n") f.write("所有字段列表:\n") for col in sorted(result['all_columns'], key=str): f.write(f"- {col}\n") # 公式引用 if result['formulas']: f.write(f"\n#### 公式引用\n\n") f.write(f"发现 {len(result['formulas'])} 个公式引用:\n\n") for formula in result['formulas'][:10]: # 只显示前10个 f.write(f"- **{formula['cell']}** ({formula['sheet']}): {formula['formula']}\n") f.write(f" - 引用: {', '.join(formula['references'])}\n") if len(result['formulas']) > 10: f.write(f"\n... 还有 {len(result['formulas']) - 10} 个公式\n") f.write("\n---\n\n") # 字段汇总 f.write("## 字段汇总\n\n") all_columns = set() for result in all_results.values(): if result: all_columns.update(result['all_columns']) f.write(f"所有文件共发现 {len(all_columns)} 个唯一字段:\n\n") for col in sorted(all_columns, key=str): f.write(f"- {col}\n") # 引用关系汇总 f.write("\n## 引用关系汇总\n\n") all_references = set() for result in all_results.values(): if result: all_references.update(result['references']) f.write(f"发现 {len(all_references)} 个唯一引用:\n\n") for ref in sorted(all_references, key=str): f.write(f"- {ref}\n") print(f"\n分析报告已生成: {report_path}") if __name__ == "__main__": main()