update

2021-08-09 12:45:46 +08:00
parent b101731c06
commit b115f2fb1d
4 changed files with 151 additions and 196 deletions
--- a/language_learning/2020.10.17_0_web_scraping_with_BeautifulSoup/BeautifulSoup.py
+++ b/language_learning/2020.10.17_0_web_scraping_with_BeautifulSoup/BeautifulSoup.py
--- a/language_learning/2020.10.31_0_download_from_sci_hub_with_python/download_from_sci_hub_with_python.py
+++ b/language_learning/2020.10.31_0_download_from_sci_hub_with_python/download_from_sci_hub_with_python.py
@@ -1,45 +0,0 @@
 """
 This code is supported by the website: https://www.guanjihuan.com
 The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6846
 """
 from bs4 import BeautifulSoup
 from urllib.request import urlopen
 import re  # 正则模块
 import requests
 import os
 # os.chdir('D:')  # 设置文件保存的位置
 # 输入
 address_array = []
 for i in range(10):  # 最多一次性下载10篇
    address = input('\n输入DOI/链接/标题：')
    address_array.append(address)
    continue_or_not = input('\n继续添加（1）/不继续添加（0）：')
    if int(continue_or_not) == 0:
        break
 # 下载
 for address in address_array:
    r = requests.post('https://sci-hub.st/', data={'request': address})
    print('\n响应结果是：', r)
    print('访问的地址是：', r.url)
    soup = BeautifulSoup(r.text, features='lxml')
    pdf_URL = soup.iframe['src']
    if re.search(re.compile('^https:'), pdf_URL):
        pass
    else:
        pdf_URL = 'https:'+pdf_URL
    print('PDF的地址是：', pdf_URL)
    name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
    print('PDF文件名是：', name)
    print('保存的位置在：', os.getcwd())
    print('\n正在下载')
    r = requests.get(pdf_URL, stream=True)
    with open(name, 'wb') as f:
        for chunk in r.iter_content(chunk_size=32):
            f.write(chunk)
    print('下载完成！')
 print('\n全部下载完成！')
--- a/language_learning/2020.10.31_1_download_references_in_a_pdf_file_with_python/download_references_in_a_pdf_file_with_python.py
+++ b/language_learning/2020.10.31_1_download_references_in_a_pdf_file_with_python/download_references_in_a_pdf_file_with_python.py
--- a/language_learning/2020.10.31_1_download_references_in_a_pdf_file_with_python/get_links_from_a_pdf_file.py
+++ b/language_learning/2020.10.31_1_download_references_in_a_pdf_file_with_python/get_links_from_a_pdf_file.py