分类 about english 下的文章

children of mixed race.
the principal knows everyone by name
he government does not recognize the need for more funding.
invest in the stock market
house prices will remain static for a long period
A million pounds seemed a suitably round number.
in a dark dinner suit.
The powder is mixed with cold water to form a paste.

import re
import time
file1 = open(r'..\data\朗文双解清洗'+str(time.time())+'.csv', 'a',encoding='utf-8')
with open(r'..\data\朗文双解.csv',encoding='utf-8') as file:
    conten = file.readlines()
    for lin in conten:
        word=re.findall(r'^.*\t', lin)
        en = re.findall(r'<font class=L_SYL>(.*?)</span>', lin)
        str_first1 = re.sub('<font color=black>',"|",str(en))
        str_first2 = re.sub('<span class=L_POS>',"|",str(str_first1))
        str_first3 = re.sub('</font>',"|",str(str_first2))
        str_first4 = re.sub('<.*?>',"",str(str_first3))
        # print(str_first2)
        gg = str(word)+'\t'+str_first4 +'\n'
        file1.write(gg)
        print(word,str_first4)

import re
import time
file1 = open(r'..\data\剑桥双解清洗'+str(time.time())+'.csv', 'a',encoding='utf-8')
with open(r'..\data\剑桥双解清洗.txt',encoding='utf-8') as file:
    conten = file.readlines()
    for lin in conten:
        word=re.findall(r'<font style="font-weight:bold;">(.*?)</font>', lin)
        en = re.findall(r'<font style="margin-right:1px;">(.*?)\\n', lin)
        str_first1 = re.sub('<font style="color:navy;margin-left:12pt;" >',"|",str(en))
        str_first2 = re.sub('<.*?>',"",str(str_first1))
        print(str_first2)
        gg = str(word)+'\t'+str_first2 +'\n'
        file1.write(gg)

import re
import time
newtime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
file1 = open(r'..\data\朗文双解清洗'+str(newtime)+'.csv', 'a',encoding='utf-8')
with open(r'..\data\朗文双解.txt',encoding='utf-8') as file:
    conten = file.readlines()
    for lin in conten:
        word=re.findall(r'[123459789]\..*', lin)
        en = re.findall(r'★.*', lin)
        print(en,word)
        gg = str(en)+'\n'+str(word) +'\n'
        file1.write(gg)