This commit is contained in:
2022-03-23 09:48:17 +08:00
parent c2cad265cb
commit dafee9e18d
4 changed files with 41 additions and 1 deletions

View File

@@ -0,0 +1,47 @@
"""
This code is supported by the website: https://www.guanjihuan.com
"""
from bs4 import BeautifulSoup
import re
import requests
import urllib.request
import os
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
all_a_tag = soup.find_all('a', href=True)
for a_tag in all_a_tag:
href = a_tag['href']
if re.search('https://www.ldoceonline.com/dictionary/', href):
print(href[39:])
exist_1 = os.path.exists('words_mp3_breProns/'+href[39:]+'.mp3')
exist_2 = os.path.exists('words_mp3_ameProns/'+href[39:]+'.mp3')
if exist_1 and exist_2:
continue
header = {'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'} # 头部信息
request = urllib.request.Request(href,headers=header)
reponse = urllib.request.urlopen(request).read()
soup2 = BeautifulSoup(reponse, features='lxml')
span = soup2.find_all('span', {"class":"speaker brefile fas fa-volume-up hideOnAmp"})
for span0 in span:
href2 = span0['data-src-mp3']
if re.search('https://www.ldoceonline.com/media/english/breProns/', href2):
print(href2)
r = requests.get(href2, headers=header, stream=True)
with open('words_mp3_breProns/'+href[39:]+'.mp3', 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
break
span = soup2.find_all('span', {"class":"speaker amefile fas fa-volume-up hideOnAmp"})
for span0 in span:
href2 = span0['data-src-mp3']
if re.search('https://www.ldoceonline.com/media/english/ameProns/', href2):
print(href2)
r = requests.get(href2, headers=header, stream=True)
with open('words_mp3_ameProns/'+href[39:]+'.mp3', 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
break
print()

View File

@@ -0,0 +1,8 @@
# Words in webpage: https://www.guanjihuan.com/archives/4418
# installation:
# (1) pip install pygame
# (2) pip install --upgrade guan
import guan
guan.play_academic_words()
# guan.play_academic_words(bre_or_ame='ame', random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)

View File

@@ -0,0 +1,33 @@
"""
This code is supported by the website: https://www.guanjihuan.com
"""
from urllib import response
from bs4 import BeautifulSoup
import re
import requests
import urllib.request
import os
import ssl
from urllib.request import urlopen
ssl._create_default_https_context = ssl._create_unverified_context
html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
all_a_tag = soup.find_all('a', href=True)
for a_tag in all_a_tag:
href = a_tag['href']
if re.search('https://www.merriam-webster.com/dictionary/', href):
print(href[43:])
exist = os.path.exists('prons/'+href[43:]+'.mp3')
if exist:
continue
header = {'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'} # 头部信息
html = urlopen(href).read().decode('utf-8')
mp3_file = re.findall('https://media.merriam-webster.com/audio/prons/en/us/mp3/.*.mp3",', html, re.S)[0][:-2]
print(mp3_file[:-2])
print()
r = requests.get(mp3_file, headers=header, stream=True)
with open('prons/'+href[43:]+'.mp3', 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)

View File

@@ -0,0 +1,8 @@
# Words in webpage: https://www.guanjihuan.com/archives/10897
# installation:
# (1) pip install pygame
# (2) pip install --upgrade guan
import guan
guan.play_element_words()
# guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)