import re
import sys
import os
import ast
from tokenize import generate_tokens, COMMENT, STRING, NL, INDENT, DEDENT
import iodef extract_entities(filename):"""提取类和函数到单独文件"""with open(filename, 'r', encoding='utf-8') as f:content = f.read()# 计算总行数total_lines = content.count('\n') + 1long_function_threshold = max(80, total_lines // 12)# 初始化结果entities = []current_pos = 0# 尝试使用AST解析获取更准确的位置信息try:tree = ast.parse(content)ast_entities = []for node in tree.body:if isinstance(node, ast.ClassDef):start_line = node.linenoend_line = node.end_linenoast_entities.append(('class', node.name, start_line, end_line))elif isinstance(node, ast.FunctionDef):start_line = node.linenoend_line = node.end_linenoast_entities.append(('function', node.name, start_line, end_line))# 使用AST信息辅助提取lines = content.split('\n')for e_type, name, start_line, end_line in ast_entities:start_index = sum(len(line) + 1 for line in lines[:start_line-1])end_index = sum(len(line) + 1 for line in lines[:end_line]) - 1# 对于函数,检查是否为长函数if e_type == 'function':func_lines = end_line - start_line + 1if func_lines > long_function_threshold:func_content = content[start_index:end_index]entities.append(('function', name, start_index, end_index, func_content))# 类总是提取elif e_type == 'class':class_content = content[start_index:end_index]entities.append(('class', name, start_index, end_index, class_content))except SyntaxError:# 如果AST解析失败,使用基于缩进的方法return extract_entities_with_indent(content, total_lines, long_function_threshold)return entities, content, total_linesdef extract_entities_with_indent(content, total_lines, long_function_threshold):"""当AST解析失败时使用基于缩进的方法"""entities = []current_pos = 0# 类定义正则(支持装饰器和继承)class_pattern = re.compile(r'^([ \t]*)@?.*?\b(class|struct)\s+(\w+)\s*[\(:]?[^{:]*[:{]?', re.MULTILINE)# 函数定义正则(支持装饰器和类型注解)func_pattern = re.compile(r'^([ \t]*)@?.*?\b(def)\s+(\w+)\s*\([^{:]*\)\s*[^{:]*[:{]?', re.MULTILINE)# 提取类for match in class_pattern.finditer(content):indent = match.group(1)class_name = match.group(3)start_index = match.start()# 找到类体的结束位置end_index = find_block_end(content, start_index, indent)if end_index == -1:continueclass_content = content[start_index:end_index]entities.append(('class', class_name, start_index, end_index, class_content))current_pos = end_index# 提取函数(只提取顶级函数,忽略类内方法)for match in func_pattern.finditer(content):func_name = match.group(3)start_index = match.start()indent = match.group(1)# 跳过类内的方法if any(start_index > c_start and start_index < c_end for (t, _, c_start, c_end, _) in entities if t == 'class'):continue# 找到函数体的结束位置end_index = find_block_end(content, start_index, indent)if end_index == -1:continuefunc_content = content[start_index:end_index]func_lines = func_content.count('\n') + 1# 检查是否为长函数if func_lines > long_function_threshold:entities.append(('function', func_name, start_index, end_index, func_content))current_pos = end_index# 按起始位置排序entities.sort(key=lambda x: x[2])return entities, content, total_linesdef find_block_end(content, start_index, base_indent):"""找到代码块的结束位置"""base_indent_level = len(base_indent) if base_indent else 0current_index = start_indexstack = []in_string = Falsestring_char = Nonewhile current_index < len(content):char = content[current_index]# 处理字符串字面量if not in_string and char in ('"', "'"):in_string = Truestring_char = charelif in_string and char == string_char:# 检查是否是转义的引号if content[current_index-1] == '\\':# 检查转义字符本身是否被转义backslash_count = 0i = current_index - 1while i >= 0 and content[i] == '\\':backslash_count += 1i -= 1if backslash_count % 2 == 0: # 偶数个反斜杠,引号未被转义in_string = Falsestring_char = Noneelse:in_string = Falsestring_char = Noneif in_string:current_index += 1continue# 处理括号if char in '([{':stack.append(char)elif char in ')]}':if not stack:return -1 # 不匹配的括号last_open = stack.pop()if (last_open == '(' and char != ')') or \(last_open == '[' and char != ']') or \(last_open == '{' and char != '}'):return -1 # 括号不匹配# 检查代码块结束if char == '\n':next_line_start = current_index + 1if next_line_start >= len(content):return next_line_start # 文件结束# 检查下一行的缩进级别next_line_end = content.find('\n', next_line_start)if next_line_end == -1:next_line_end = len(content)next_line = content[next_line_start:next_line_end]indent_level = len(next_line) - len(next_line.lstrip())# 如果缩进小于基础缩进且没有未闭合的括号,则块结束if indent_level <= base_indent_level and not stack:# 确保不是空行或注释stripped_line = next_line.strip()if stripped_line and not stripped_line.startswith('#'):return next_line_startcurrent_index += 1return len(content) # 到达文件末尾def write_entities(entities, content, filename):"""将实体写入文件并生成剩余内容"""# 创建输出目录base_name = os.path.splitext(os.path.basename(filename))[0]output_dir = f"{base_name}_split"os.makedirs(output_dir, exist_ok=True)# 提取覆盖范围covered_ranges = []for entity in entities:e_type, e_name, start, end, e_content = entity# 确保每个实体都写入单独的文件output_path = os.path.join(output_dir, f"{e_name}.py")with open(output_path, 'w', encoding='utf-8') as f:f.write(e_content)covered_ranges.append((start, end))# 生成剩余内容covered_ranges.sort(key=lambda x: x[0])remaining_parts = []last_pos = 0for start, end in covered_ranges:remaining_parts.append(content[last_pos:start])last_pos = endremaining_parts.append(content[last_pos:])remaining_content = ''.join(remaining_parts)# 写入剩余文件left_path = os.path.join(output_dir, "left.py")with open(left_path, 'w', encoding='utf-8') as f:f.write(remaining_content)return output_dirdef main():if len(sys.argv) != 2:print("Usage: python split_code.py <source_file.py>")sys.exit(1)filename = sys.argv[1]if not os.path.isfile(filename):print(f"Error: File not found - {filename}")sys.exit(1)try:entities, content, total_lines = extract_entities(filename)output_dir = write_entities(entities, content, filename)class_count = sum(1 for e in entities if e[0] == 'class')func_count = sum(1 for e in entities if e[0] == 'function')print(f"Processed {filename}:")print(f"- Total lines: {total_lines}")print(f"- Long function threshold: {max(80, total_lines//12)} lines")print(f"- Extracted: {class_count} classes, {func_count} long functions")print(f"- Output directory: {output_dir}")print(f"- Remaining code in: {os.path.join(output_dir, 'left.py')}")except Exception as e:print(f"Error processing file: {str(e)}")import tracebacktraceback.print_exc()sys.exit(1)if __name__ == "__main__":main()
需求:
写一个python脚本,阅读python源代码,i, 把源文件中各个class 切分到文件中,文件名是类名,ii,长函数(代码行超过本文件长度1/12, 或者超过80行,两个条件满足一个即可)切分到文件中,文件名是函数名。 iii,剩余部分放在left.py文件中,谢谢