0.1.122
This commit is contained in:
		| @@ -1,7 +1,7 @@ | |||||||
| [metadata] | [metadata] | ||||||
| # replace with your username: | # replace with your username: | ||||||
| name = guan | name = guan | ||||||
| version = 0.1.121 | version = 0.1.122 | ||||||
| author = guanjihuan | author = guanjihuan | ||||||
| author_email = guanjihuan@163.com | author_email = guanjihuan@163.com | ||||||
| description = An open source python package | description = An open source python package | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| Metadata-Version: 2.1 | Metadata-Version: 2.1 | ||||||
| Name: guan | Name: guan | ||||||
| Version: 0.1.121 | Version: 0.1.122 | ||||||
| Summary: An open source python package | Summary: An open source python package | ||||||
| Home-page: https://py.guanjihuan.com | Home-page: https://py.guanjihuan.com | ||||||
| Author: guanjihuan | Author: guanjihuan | ||||||
|   | |||||||
| @@ -694,6 +694,60 @@ def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | |||||||
|             content = content + '\n\n' + text |             content = content + '\n\n' + text | ||||||
|     return content |     return content | ||||||
|  |  | ||||||
|  | # 从HTML中获取所有的链接 | ||||||
|  | def get_links_from_html(html_link, links_with_text=0): | ||||||
|  |     from bs4 import BeautifulSoup | ||||||
|  |     import urllib.request | ||||||
|  |     import ssl | ||||||
|  |     ssl._create_default_https_context = ssl._create_unverified_context | ||||||
|  |     html = urllib.request.urlopen(html_link).read().decode('utf-8') | ||||||
|  |     soup = BeautifulSoup(html, features="lxml") | ||||||
|  |     a_tags = soup.find_all('a') | ||||||
|  |     if links_with_text == 0: | ||||||
|  |         link_array = [tag.get('href') for tag in a_tags if tag.get('href')] | ||||||
|  |         return link_array | ||||||
|  |     else: | ||||||
|  |         link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')] | ||||||
|  |         return link_array_with_text | ||||||
|  |  | ||||||
|  | # 检查链接的有效性 | ||||||
|  | def check_link(url, timeout=3, allow_redirects=True): | ||||||
|  |     import requests | ||||||
|  |     try: | ||||||
|  |         response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects) | ||||||
|  |         if response.status_code == 200: | ||||||
|  |             return True | ||||||
|  |         else: | ||||||
|  |             return False | ||||||
|  |     except requests.exceptions.RequestException: | ||||||
|  |         return False | ||||||
|  |  | ||||||
|  | # 检查链接数组中链接的有效性 | ||||||
|  | def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1): | ||||||
|  |     import guan | ||||||
|  |     failed_link_array0 = [] | ||||||
|  |     for link in link_array: | ||||||
|  |         if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||||
|  |             pass | ||||||
|  |         else: | ||||||
|  |             failed_link_array0.append(link) | ||||||
|  |             if print_show: | ||||||
|  |                 print(link) | ||||||
|  |     failed_link_array = [] | ||||||
|  |     if try_again: | ||||||
|  |         if print_show: | ||||||
|  |             print('\nTry again:\n') | ||||||
|  |         for link in failed_link_array0: | ||||||
|  |             if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): | ||||||
|  |                 pass | ||||||
|  |             else: | ||||||
|  |                 failed_link_array.append(link) | ||||||
|  |                 if print_show: | ||||||
|  |                     print(link) | ||||||
|  |     else: | ||||||
|  |         failed_link_array = failed_link_array0 | ||||||
|  |     return failed_link_array | ||||||
|  |  | ||||||
| # 生成二维码 | # 生成二维码 | ||||||
| def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): | def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): | ||||||
|     import qrcode |     import qrcode | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user