import re # ===== 1. 预编译正则表达式，提升匹配速度 ===== PY_PATTERN = re.compile(r"(?<=\[)([^]]+[1-5])") def pinyinify(pystr): c1s = { 'A': ['Ā', 'Á', 'Ǎ', 'À', 'A'], 'O': ['Ō', 'Ó', 'Ǒ', 'Ò', 'O'], 'E': ['Ē', 'É', 'Ě', 'È', 'E'], 'a': ['ā', 'á', 'ǎ', 'à', 'a'], 'o': ['ō', 'ó', 'ǒ', 'ò', 'o'], 'e': ['ē', 'é', 'ě', 'è', 'e'], } c2s = { 'i': ['ī', 'í', 'ǐ', 'ì', 'i'], 'u': ['ū', 'ú', 'ǔ', 'ù', 'u'], 'ü': ['ǖ', 'ǘ', 'ǚ', 'ǜ', 'ü'], } rms = {'r5': 'r', 'm2': 'ḿ', 'm4': 'm̀'} pys = pystr.split() results = [] for pyo in pys: py = pyo[:] if py in rms: results.append(rms[py]) continue if py[-1] not in ('1', '2', '3', '4', '5'): results.append(py) continue py = py.replace('u:', 'ü') n = int(py[-1]) - 1 flag = 0 # 优先级处理 for c in py[:-1]: if c in c1s: results.append(py[:-1].replace(c, c1s[c][n])) flag = 1 break if flag: continue for c in reversed(py[:-1]): if c in c2s: results.append(py[:-1].replace(c, c2s[c][n])) flag = 1 break if flag: continue results.append(pyo) return ' '.join(results) # ===== 主程序 ===== simplified_indexes = set() css = '' count = 0 print("开始处理，请稍候...") # 一次性打开输出文件，全程保持打开状态 with open("dict.txt", encoding="utf-8", mode="w") as f_out: with open("cedict_ts.u8", encoding="utf-8-sig", mode="r") as f_in: for line in f_in: line = line.strip() if not line or line.startswith("#"): continue # 解析行数据 parts = line.split(" [") cnhws = list(dict.fromkeys(parts[0].split())) pinyin_part = parts[1].split("] ")[0] endefs = parts[1].split(" /")[1].strip("/").split("/") # 生成 Header if len(cnhws) == 2: hw_html = '' + ''.join(cnhws) + '' simplified_indexes.add((cnhws[1], cnhws[0])) # (简体, 繁体) else: hw_html = '' + cnhws[0] + '' header = '' + hw_html + '' + pinyinify(pinyin_part) + '' # 生成并处理 Definition (只处理一次) dc_raw = '❍ ' + '❍ '.join(endefs) + '' # 使用预编译的正则进行替换 dc = PY_PATTERN.sub(lambda m: pinyinify(m.group(1)), dc_raw) # 写入词条 (繁体和简体都指向同一份内容) full_entry = "\n" + css + header + dc + "\n\n" for cnhw in cnhws: f_out.write(cnhw + full_entry) count += 1 # 写入简体索引 print("正在生成简体索引...") for hw_simplified, hw_traditional in sorted(simplified_indexes): f_out.write(hw_simplified + "\n@@@LINK=" + hw_traditional + "\n\n") count += 1 print(f"完成！共处理词条 {count} 条。")