analyze_formulas.py
11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Excel公式分析工具
分析工资核算Excel文件中的公式和引用关系
"""
import pandas as pd
import openpyxl
import os
import re
from pathlib import Path
def extract_formulas_from_excel(file_path):
"""从Excel文件中提取所有公式"""
try:
# 使用openpyxl读取Excel文件以获取公式
workbook = openpyxl.load_workbook(file_path, data_only=False)
formulas = []
references = []
for sheet_name in workbook.sheetnames:
worksheet = workbook[sheet_name]
for row in worksheet.iter_rows():
for cell in row:
if cell.data_type == 'f': # 公式类型
formula = str(cell.value)
cell_ref = f"{sheet_name}!{cell.coordinate}"
# 提取公式中的引用
refs = extract_references_from_formula(formula)
formulas.append({
'file': os.path.basename(file_path),
'sheet': sheet_name,
'cell': cell.coordinate,
'formula': formula,
'references': refs
})
references.extend(refs)
return formulas, references
except Exception as e:
print(f"分析文件 {file_path} 时出错: {e}")
return [], []
def extract_references_from_formula(formula):
"""从公式中提取引用关系"""
references = []
# 匹配各种引用模式
patterns = [
# XLOOKUP引用模式
r"XLOOKUP\([^,]+,\s*'([^']+)'!([^,]+),\s*'([^']+)'!([^,]+)",
# VLOOKUP引用模式
r"VLOOKUP\([^,]+,\s*'([^']+)'!([^,]+)",
# 直接工作表引用
r"'([^']+)'!([A-Z]+\d+)",
# 工作表范围引用
r"'([^']+)'!([A-Z]+\d+:[A-Z]+\d+)",
# 简单单元格引用
r"([A-Z]+\d+)",
# 范围引用
r"([A-Z]+\d+:[A-Z]+\d+)",
]
for pattern in patterns:
matches = re.findall(pattern, formula)
for match in matches:
if isinstance(match, tuple):
if len(match) == 2:
references.append({
'type': 'worksheet_reference',
'file': match[0] if match[0] else None,
'range': match[1]
})
elif len(match) == 4:
references.append({
'type': 'xlookup_reference',
'file': match[0],
'lookup_range': match[1],
'file2': match[2],
'return_range': match[3]
})
else:
references.append({
'type': 'cell_reference',
'range': match
})
return references
def analyze_formula_dependencies(formulas):
"""分析公式依赖关系"""
dependencies = {}
for formula in formulas:
cell_key = f"{formula['file']}!{formula['sheet']}!{formula['cell']}"
dependencies[cell_key] = {
'formula': formula['formula'],
'dependencies': formula['references']
}
return dependencies
def generate_formula_report(formulas, references, dependencies):
"""生成公式分析报告"""
report_path = "/Users/mr.wang/代码库/绿纤/lvqianmeiye_ERP/Excel公式分析报告.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write("# Excel公式分析报告\n\n")
f.write("## 概述\n\n")
f.write("本报告分析了工资核算Excel文件中的公式和引用关系,帮助理解数据计算逻辑。\n\n")
# 统计信息
f.write("## 统计信息\n\n")
f.write(f"- 总公式数量: {len(formulas)}\n")
f.write(f"- 总引用数量: {len(references)}\n")
f.write(f"- 涉及文件数量: {len(set(f['file'] for f in formulas))}\n\n")
# 按文件分组分析
files = {}
for formula in formulas:
file_name = formula['file']
if file_name not in files:
files[file_name] = []
files[file_name].append(formula)
f.write("## 文件公式分析\n\n")
for file_name, file_formulas in files.items():
f.write(f"### {file_name}\n\n")
f.write(f"公式数量: {len(file_formulas)}\n\n")
# 按工作表分组
sheets = {}
for formula in file_formulas:
sheet_name = formula['sheet']
if sheet_name not in sheets:
sheets[sheet_name] = []
sheets[sheet_name].append(formula)
for sheet_name, sheet_formulas in sheets.items():
f.write(f"#### 工作表: {sheet_name}\n\n")
f.write(f"公式数量: {len(sheet_formulas)}\n\n")
# 显示前10个公式
for i, formula in enumerate(sheet_formulas[:10]):
f.write(f"**{formula['cell']}**:\n")
f.write(f"```\n{formula['formula']}\n```\n")
if formula['references']:
f.write("引用关系:\n")
for ref in formula['references']:
if ref['type'] == 'xlookup_reference':
f.write(f"- XLOOKUP: {ref['file']}!{ref['lookup_range']} -> {ref['file2']}!{ref['return_range']}\n")
elif ref['type'] == 'worksheet_reference':
f.write(f"- 工作表引用: {ref['file']}!{ref['range']}\n")
elif ref['type'] == 'cell_reference':
f.write(f"- 单元格引用: {ref['range']}\n")
f.write("\n")
else:
f.write("无外部引用\n\n")
if len(sheet_formulas) > 10:
f.write(f"... 还有 {len(sheet_formulas) - 10} 个公式\n\n")
f.write("---\n\n")
# 引用关系汇总
f.write("## 引用关系汇总\n\n")
# 统计引用类型
ref_types = {}
for ref in references:
ref_type = ref['type']
if ref_type not in ref_types:
ref_types[ref_type] = 0
ref_types[ref_type] += 1
f.write("### 引用类型统计\n\n")
for ref_type, count in ref_types.items():
f.write(f"- {ref_type}: {count} 个\n")
f.write("\n")
# 外部文件引用
external_refs = {}
for ref in references:
if ref['type'] == 'xlookup_reference' and ref['file']:
file_name = ref['file']
if file_name not in external_refs:
external_refs[file_name] = []
external_refs[file_name].append(ref)
if external_refs:
f.write("### 外部文件引用\n\n")
for file_name, refs in external_refs.items():
f.write(f"**{file_name}**: {len(refs)} 个引用\n")
for ref in refs[:5]: # 只显示前5个
f.write(f"- {ref['file']}!{ref['lookup_range']} -> {ref['file2']}!{ref['return_range']}\n")
if len(refs) > 5:
f.write(f"... 还有 {len(refs) - 5} 个引用\n")
f.write("\n")
# 公式依赖关系图
f.write("## 公式依赖关系\n\n")
f.write("### 关键公式分析\n\n")
# 找出包含XLOOKUP的公式
xlookup_formulas = [f for f in formulas if 'XLOOKUP' in f['formula']]
if xlookup_formulas:
f.write("#### XLOOKUP公式分析\n\n")
for formula in xlookup_formulas[:10]:
f.write(f"**{formula['file']}!{formula['sheet']}!{formula['cell']}**:\n")
f.write(f"```\n{formula['formula']}\n```\n")
# 解析XLOOKUP参数
xlookup_match = re.search(r"XLOOKUP\(([^,]+),\s*'([^']+)'!([^,]+),\s*'([^']+)'!([^,]+)", formula['formula'])
if xlookup_match:
lookup_value = xlookup_match.group(1).strip()
lookup_file = xlookup_match.group(2)
lookup_range = xlookup_match.group(3)
return_file = xlookup_match.group(4)
return_range = xlookup_match.group(5)
f.write("参数解析:\n")
f.write(f"- 查找值: {lookup_value}\n")
f.write(f"- 查找范围: {lookup_file}!{lookup_range}\n")
f.write(f"- 返回范围: {return_file}!{return_range}\n")
f.write("\n")
# 数据流分析
f.write("### 数据流向分析\n\n")
f.write("基于公式分析,数据流向如下:\n\n")
# 分析数据源文件
data_sources = set()
for ref in references:
if ref['type'] == 'xlookup_reference' and ref['file']:
data_sources.add(ref['file'])
f.write("**数据源文件**:\n")
for source in sorted(data_sources):
f.write(f"- {source}\n")
f.write("\n")
f.write("**数据流向**:\n")
f.write("1. 基础数据从各数据源文件获取\n")
f.write("2. 通过XLOOKUP/VLOOKUP函数进行数据匹配\n")
f.write("3. 在工资核算表中进行汇总计算\n")
f.write("4. 生成最终的工资计算结果\n\n")
return report_path
def main():
"""主函数"""
base_path = "/Users/mr.wang/代码库/绿纤/lvqianmeiye_ERP/参考资料/工资核算 -7月"
# 要分析的文件列表
files_to_analyze = [
"工资(全字段).xlsx",
"①B-a-26考勤汇总表.xlsx",
"②B-a-17每日早报.xlsx",
"③B-a-③呈现-消耗明细表.xlsx",
"④B-a-25社保统计表.xlsx",
"⑤B-a-12奖励统计表.xlsx",
"B-b-③健康师底薪.xlsx",
"B-b-④健康师提成-金三角顾问.xlsx",
"B-b-⑤其他岗位工资.xlsx",
"B-b-⑤当月数据及门店毛利.xlsx"
]
all_formulas = []
all_references = []
print("开始分析Excel公式...")
print("=" * 60)
for filename in files_to_analyze:
file_path = os.path.join(base_path, filename)
if os.path.exists(file_path):
print(f"正在分析: {filename}")
formulas, references = extract_formulas_from_excel(file_path)
all_formulas.extend(formulas)
all_references.extend(references)
print(f" 发现 {len(formulas)} 个公式")
else:
print(f"文件不存在: {filename}")
# 分析依赖关系
dependencies = analyze_formula_dependencies(all_formulas)
# 生成报告
report_path = generate_formula_report(all_formulas, all_references, dependencies)
print(f"\n公式分析完成!")
print(f"总公式数量: {len(all_formulas)}")
print(f"总引用数量: {len(all_references)}")
print(f"分析报告: {report_path}")
if __name__ == "__main__":
main()