import re
from bs4 import BeautifulSoup
import time
file1 = open(r'C:\朗文双解_g_1.csv', 'a',encoding='utf-8')
with open(r'C:\朗文双解ffffff.txt',encoding='utf-8') as file:
conten = file.readlines()
for lin in conten:
word=re.findall(r'^.*\t', lin)[0]
str_first1 = re.sub('<span class="cn_txt"> ', '<span class="cn_txt"> |', str(lin))
soup = BeautifulSoup(str(str_first1), 'html.parser')
# soup = BeautifulSoup(str(str_first1),'lxml')
# print(soup)
# yb=soup.findAll(name="span", attrs={"class": ("PRON")})
en_1_g = soup.findAll(name="span", attrs={"class": ("english LDOCE_switch_lang switch_children")})
for i in en_1_g:
print(word,i.get_text())
gg = str(word)+'\t'+i.get_text()+'\n'
print(gg)
file1.write(gg)
en_2_g=soup.findAll(name="span", attrs={"class": ("SIGNPOST LDOCE_switch_lang LDOCE5pp_sensefold_other")})
for ii in en_2_g:
print(word,i.get_text())
gg = str(word)+'\t'+ii.get_text()+'\n'
print(gg)
file1.write(gg)
# en_3_g=soup.findAll(name="span", attrs={"class": ("DEF LDOCE_switch_lang switch_siblings")})
# ff=[]
#
# for i2 in en_3_g:
# ff.append(i2.get_text())
# print(word,ff)
# gg = str(word) + '\t' + str(ff) + '\n'
# file1.write(gg)
# print(word,i.get_text())
# gg = str(word)+'\t'+i2.get_text()+'\n'
# print(gg)
# file1.write(gg)