标签 剑桥双解清洗 下的文章

import re
import time
file1 = open(r'..\data\剑桥双解清洗'+str(time.time())+'.csv', 'a',encoding='utf-8')
with open(r'..\data\剑桥双解清洗.txt',encoding='utf-8') as file:
    conten = file.readlines()
    for lin in conten:
        word=re.findall(r'<font style="font-weight:bold;">(.*?)</font>', lin)
        en = re.findall(r'<font style="margin-right:1px;">(.*?)\\n', lin)
        str_first1 = re.sub('<font style="color:navy;margin-left:12pt;" >',"|",str(en))
        str_first2 = re.sub('<.*?>',"",str(str_first1))
        print(str_first2)
        gg = str(word)+'\t'+str_first2 +'\n'
        file1.write(gg)