《中国古代史教程》 朱绍侯、龚留柱 主编 (TXT、双层PDF版)

按章分割后的txt文件,把脚注批量转为尾注的代码。把全书分割成章,也可以写程序,因为比较简易,就不分享了。

import re

def process_footnotes(text):
    # 将文本按页面分割(两个连续的换行符)
    pages = re.split(r'\n\n(?=\S)', text)
    
    # 存储所有脚注和正文
    all_footnotes = []
    all_main_text = []
    
    # 处理每一页
    for i, page in enumerate(pages):
        # 分离正文和脚注
        footnotes = re.findall(r'^[\u2460-\u2473].*$', page, re.MULTILINE)
        main_text = re.sub(r'^[\u2460-\u2473].*$', '', page, flags=re.MULTILINE).strip()
        
        # 验证注解符号数量
        main_symbols = re.findall('[\u2460-\u2473]', main_text)
        foot_symbols = re.findall('[\u2460-\u2473]', '\n'.join(footnotes), re.MULTILINE)
        
        if len(main_symbols) != len(foot_symbols):
            print(f"警告:第 {i+1} 页的注解符号数量不匹配")
            print(f"页面内容:\n{page}\n")
            print(f"正文中的符号数量:{len(main_symbols)}")
            print(f"脚注中的符号数量:{len(foot_symbols)}")
            print("=" * 50)
        
        all_main_text.append(main_text)
        all_footnotes.extend(footnotes)
    
    # 处理正文
    new_note_count = 1
    def repl_func(match):
        nonlocal new_note_count
        new_symbol = f'[{new_note_count}]'
        new_note_count += 1
        return new_symbol
    
    processed_main_text = []
    for text in all_main_text:
        processed_text = re.sub(r'[\u2460-\u2473]', repl_func, text)
        processed_main_text.append(processed_text)
    
    # 处理脚注
    new_note_count = 1
    processed_footnotes = []
    for footnote in all_footnotes:
        # 处理一行中多个注释符号的情况
        while re.search(r'[\u2460-\u2473]', footnote):
            footnote = re.sub(r'[\u2460-\u2473]', lambda m: f'[{new_note_count}]', footnote, count=1)
            new_note_count += 1
        processed_footnotes.append(footnote)
    
    # 合并处理后的正文和脚注
    full_text = '\n\n\n'.join(processed_main_text)
    full_text += '\n\n' + '\n'.join(processed_footnotes)
    
    return full_text

# 读取文件
with open(r'c:\Users\xxx\Desktop\002\第一章.txt', 'r', encoding='utf-8') as file:
    text = file.read()

# 处理文本
processed_text = process_footnotes(text)

# 保存文件
with open(r'c:\Users\xxx\Desktop\002\第一章-output.txt', 'w', encoding='utf-8') as file:
    file.write(processed_text)

print("处理完成,结果已保存 output.txt。")