update

2022-03-23 09:48:17 +08:00
parent c2cad265cb
commit dafee9e18d
4 changed files with 41 additions and 1 deletions
--- a/language_learning/2020.11.25_academic_words/download_academic_word_mp3.py
+++ b/language_learning/2020.11.25_academic_words/download_academic_word_mp3.py
@@ -0,0 +1,47 @@
+"""
+This code is supported by the website: https://www.guanjihuan.com
+"""
+
+from bs4 import BeautifulSoup
+import re
+import requests
+import urllib.request
+import os
+import ssl
+ssl._create_default_https_context = ssl._create_unverified_context
+html = urllib.request.urlopen("https://www.guanjihuan.com/archives/4418").read().decode('utf-8')
+soup = BeautifulSoup(html, features='lxml')
+all_a_tag = soup.find_all('a', href=True)
+for a_tag in all_a_tag:
+    href = a_tag['href']
+    if re.search('https://www.ldoceonline.com/dictionary/', href):
+        print(href[39:])
+        exist_1 = os.path.exists('words_mp3_breProns/'+href[39:]+'.mp3')
+        exist_2 = os.path.exists('words_mp3_ameProns/'+href[39:]+'.mp3')
+        if exist_1 and exist_2:
+            continue
+        header = {'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}   # 头部信息
+        request = urllib.request.Request(href,headers=header)
+        reponse = urllib.request.urlopen(request).read()
+        soup2 = BeautifulSoup(reponse, features='lxml') 
+        span = soup2.find_all('span', {"class":"speaker brefile fas fa-volume-up hideOnAmp"})
+        for span0 in span:
+            href2 = span0['data-src-mp3']
+            if re.search('https://www.ldoceonline.com/media/english/breProns/', href2):
+                print(href2)
+                r = requests.get(href2, headers=header, stream=True)
+                with open('words_mp3_breProns/'+href[39:]+'.mp3', 'wb') as f:
+                    for chunk in r.iter_content(chunk_size=32):
+                        f.write(chunk)
+            break
+        span = soup2.find_all('span', {"class":"speaker amefile fas fa-volume-up hideOnAmp"})
+        for span0 in span:
+            href2 = span0['data-src-mp3']
+            if re.search('https://www.ldoceonline.com/media/english/ameProns/', href2):
+                print(href2)
+                r = requests.get(href2, headers=header, stream=True)
+                with open('words_mp3_ameProns/'+href[39:]+'.mp3', 'wb') as f:
+                    for chunk in r.iter_content(chunk_size=32):
+                        f.write(chunk)
+            break
+        print()
--- a/language_learning/2020.11.25_academic_words/play_academic_words_with_guan.py
+++ b/language_learning/2020.11.25_academic_words/play_academic_words_with_guan.py
@@ -0,0 +1,8 @@
+# Words in webpage: https://www.guanjihuan.com/archives/4418
+# installation:
+# (1) pip install pygame
+# (2) pip install --upgrade guan
+
+import guan
+guan.play_academic_words()
+# guan.play_academic_words(bre_or_ame='ame', random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)
--- a/language_learning/2021.03.18_element_words/download_element_word_mp3.py
+++ b/language_learning/2021.03.18_element_words/download_element_word_mp3.py
@@ -0,0 +1,33 @@
+"""
+This code is supported by the website: https://www.guanjihuan.com
+"""
+
+from urllib import response
+from bs4 import BeautifulSoup
+import re
+import requests
+import urllib.request
+import os
+import ssl
+from urllib.request import urlopen
+
+ssl._create_default_https_context = ssl._create_unverified_context
+html = urllib.request.urlopen("https://www.guanjihuan.com/archives/10897").read().decode('utf-8')
+soup = BeautifulSoup(html, features='lxml')
+all_a_tag = soup.find_all('a', href=True)
+for a_tag in all_a_tag:
+    href = a_tag['href']
+    if re.search('https://www.merriam-webster.com/dictionary/', href):
+        print(href[43:])
+        exist = os.path.exists('prons/'+href[43:]+'.mp3')
+        if exist:
+            continue
+        header = {'User-Agent':'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}   # 头部信息
+        html = urlopen(href).read().decode('utf-8')
+        mp3_file = re.findall('https://media.merriam-webster.com/audio/prons/en/us/mp3/.*.mp3",', html, re.S)[0][:-2]
+        print(mp3_file[:-2])
+        print()
+        r = requests.get(mp3_file, headers=header, stream=True)
+        with open('prons/'+href[43:]+'.mp3', 'wb') as f:
+            for chunk in r.iter_content(chunk_size=32):
+                f.write(chunk)
--- a/language_learning/2021.03.18_element_words/play_element_words_with_guan.py
+++ b/language_learning/2021.03.18_element_words/play_element_words_with_guan.py
@@ -0,0 +1,8 @@
+# Words in webpage: https://www.guanjihuan.com/archives/10897
+# installation:
+# (1) pip install pygame
+# (2) pip install --upgrade guan
+
+import guan
+guan.play_element_words()
+# guan.play_element_words(random_on=0, show_translation=1, show_link=1, translation_time=2, rest_time=1)