LPD mdx 制作参考

发音

# html>body>entry>derivative>poll>sk_pron
from bs4 import BeautifulSoup
with open('../output', 'r', encoding='utf-8') as f:
    soup = BeautifulSoup(f, 'html.parser')
tags = soup.select('html>body>entry>foreignpron>sk_pron')

更多词头


with open('../../mdx_release/entries.txt', 'r', encoding='utf-8') as f:
    soup = BeautifulSoup(f, 'html.parser', multi_valued_attributes=None)
hwds = []
# driv
for deriv in soup.find_all("deriv"):
    hwd_deriv = str(deriv.decode_contents())
    hwds.append(hwd_deriv)

词头元字符统计


def is_valid_string(s):
    return all(char in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ |’/-,.~" for char in s)

comp 元素替换表

for entry in soup.find_all('entry'):
    for comp in entry.find_all('comp'):
        hwds = comp.get_text()
        hwds = hwds.replace('ˈ', '')
        hwds = hwds.replace('ˌ', '')
        hwds = hwds.replace('◂', '')
        hwds = hwds.replace('₍₎', '')
        hwds = hwds.split(', ')
        hwds_comp_json.extend(hwds)

mdx mp3 格式

sk_pron["href"] = "sound://" + mp3_path_mdx

mdx entry css 格式

hwd + '\n' + '<div class="j24lpd2008">' + '<link rel="stylesheet" href="lpd.css" />' + v0['entry'] + '</div>' + '\n' + '</>'