按章分割后的txt文件,把脚注批量转为尾注的代码。把全书分割成章,也可以写程序,因为比较简易,就不分享了。
import re
def process_footnotes(text):
# 将文本按页面分割(两个连续的换行符)
pages = re.split(r'\n\n(?=\S)', text)
# 存储所有脚注和正文
all_footnotes = []
all_main_text = []
# 处理每一页
for i, page in enumerate(pages):
# 分离正文和脚注
footnotes = re.findall(r'^[\u2460-\u2473].*$', page, re.MULTILINE)
main_text = re.sub(r'^[\u2460-\u2473].*$', '', page, flags=re.MULTILINE).strip()
# 验证注解符号数量
main_symbols = re.findall('[\u2460-\u2473]', main_text)
foot_symbols = re.findall('[\u2460-\u2473]', '\n'.join(footnotes), re.MULTILINE)
if len(main_symbols) != len(foot_symbols):
print(f"警告:第 {i+1} 页的注解符号数量不匹配")
print(f"页面内容:\n{page}\n")
print(f"正文中的符号数量:{len(main_symbols)}")
print(f"脚注中的符号数量:{len(foot_symbols)}")
print("=" * 50)
all_main_text.append(main_text)
all_footnotes.extend(footnotes)
# 处理正文
new_note_count = 1
def repl_func(match):
nonlocal new_note_count
new_symbol = f'[{new_note_count}]'
new_note_count += 1
return new_symbol
processed_main_text = []
for text in all_main_text:
processed_text = re.sub(r'[\u2460-\u2473]', repl_func, text)
processed_main_text.append(processed_text)
# 处理脚注
new_note_count = 1
processed_footnotes = []
for footnote in all_footnotes:
# 处理一行中多个注释符号的情况
while re.search(r'[\u2460-\u2473]', footnote):
footnote = re.sub(r'[\u2460-\u2473]', lambda m: f'[{new_note_count}]', footnote, count=1)
new_note_count += 1
processed_footnotes.append(footnote)
# 合并处理后的正文和脚注
full_text = '\n\n\n'.join(processed_main_text)
full_text += '\n\n' + '\n'.join(processed_footnotes)
return full_text
# 读取文件
with open(r'c:\Users\xxx\Desktop\002\第一章.txt', 'r', encoding='utf-8') as file:
text = file.read()
# 处理文本
processed_text = process_footnotes(text)
# 保存文件
with open(r'c:\Users\xxx\Desktop\002\第一章-output.txt', 'w', encoding='utf-8') as file:
file.write(processed_text)
print("处理完成,结果已保存 output.txt。")