From aa963a7e5c66c36fd685e0fd9a297828791e07bb Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Wed, 8 Dec 2021 06:46:14 +0800 Subject: [PATCH] update --- .../download_word_mp3.py | 43 +++++++++++++++++++ .../2021.12.08_academic_words/play_mp3.py | 18 ++++++++ 2 files changed, 61 insertions(+) create mode 100644 language_learning/2021.12.08_academic_words/download_word_mp3.py create mode 100644 language_learning/2021.12.08_academic_words/play_mp3.py diff --git a/language_learning/2021.12.08_academic_words/download_word_mp3.py b/language_learning/2021.12.08_academic_words/download_word_mp3.py new file mode 100644 index 0000000..714fffc --- /dev/null +++ b/language_learning/2021.12.08_academic_words/download_word_mp3.py @@ -0,0 +1,43 @@ +from bs4 import BeautifulSoup +import re +import requests +import urllib.request +import os +import ssl +ssl._create_default_https_context = ssl._create_unverified_context +html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8') +soup = BeautifulSoup(html, features='lxml') +all_a_tag = soup.find_all('a', href=True) +for a_tag in all_a_tag: + href = a_tag['href'] + if re.search('https://www.ldoceonline.com/dictionary/', href): + print(href[39:]) + exist_1 = os.path.exists('words_mp3_breProns/'+href[39:]+'.mp3') + exist_2 = os.path.exists('words_mp3_breProns/'+href[39:]+'.mp3') + if exist_1 and exist_2: + continue + header = {'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'} # 头部信息 + request = urllib.request.Request(href,headers=header) + reponse = urllib.request.urlopen(request).read() + soup2 = BeautifulSoup(reponse, features='lxml') + span = soup2.find_all('span', {"class":"speaker brefile fas fa-volume-up hideOnAmp"}) + for span0 in span: + href2 = span0['data-src-mp3'] + if re.search('https://www.ldoceonline.com/media/english/breProns/', href2): + print(href2) + r = requests.get(href2, headers=header, stream=True) + with open('words_mp3_breProns/'+href[39:]+'.mp3', 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + break + span = soup2.find_all('span', {"class":"speaker amefile fas fa-volume-up hideOnAmp"}) + for span0 in span: + href2 = span0['data-src-mp3'] + if re.search('https://www.ldoceonline.com/media/english/ameProns/', href2): + print(href2) + r = requests.get(href2, headers=header, stream=True) + with open('words_mp3_ameProns/'+href[39:]+'.mp3', 'wb') as f: + for chunk in r.iter_content(chunk_size=32): + f.write(chunk) + break + print() \ No newline at end of file diff --git a/language_learning/2021.12.08_academic_words/play_mp3.py b/language_learning/2021.12.08_academic_words/play_mp3.py new file mode 100644 index 0000000..5e70ef5 --- /dev/null +++ b/language_learning/2021.12.08_academic_words/play_mp3.py @@ -0,0 +1,18 @@ +import pygame +import time +import os +import random + +# directory = 'C:/Users/user/Desktop/words_mp3_breProns/' # 英音 +directory = 'C:/Users/user/Desktop/words_mp3_ameProns/' # 美音 +pygame.mixer.init() +for root, dirs, files in os.walk(directory): + num_array = list(range(len(files))) + random.shuffle(num_array) # 随机播放 + for i0 in num_array: + print(files[i0][:-4]) + print('https://www.ldoceonline.com/dictionary/'+files[i0][:-4], '\n') + track = pygame.mixer.music.load(directory+files[i0]) + pygame.mixer.music.play() + time.sleep(3) # 调节间隔时间 + pygame.mixer.music.stop() \ No newline at end of file