import re
from bs4 import BeautifulSoup

'''
┳
'''


with open('dc.htm', encoding='iso-8859-1', mode='r') as frd:
    dc = frd.read()

dc = re.sub(r'(?<=\n\n  )(<p><b>(?!The )|<div class="fig)', r'┳<entry>\1', dc)

dc = re.sub(r'(<p><br |<div style="clear: both">|<pre>)', r'┳\1', dc)

dc = re.sub(r'(<entry>[^┳]*)(?=\n\n)', r'\1</entry>┳', dc)

dc = dc.replace('\n', '').replace('┳', '\n')

dc = re.sub(r' {2,}', r' ', dc)




with open('ndc.htm', 'w') as fwn:
    fwn.write(dc)

'''
soup = BeautifulSoup(dc,
'''
