From ab25e2175b178aa5ed17b7d57cb93e95af6d20cd Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Wed, 30 Oct 2024 22:51:59 +0800 Subject: [PATCH] 0.1.122 --- PyPI/setup.cfg | 2 +- PyPI/src/guan.egg-info/PKG-INFO | 2 +- PyPI/src/guan/others.py | 54 +++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/PyPI/setup.cfg b/PyPI/setup.cfg index a696e8f..26d9dbc 100644 --- a/PyPI/setup.cfg +++ b/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.1.121 +version = 0.1.122 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/PyPI/src/guan.egg-info/PKG-INFO b/PyPI/src/guan.egg-info/PKG-INFO index 424d6cc..90761bb 100644 --- a/PyPI/src/guan.egg-info/PKG-INFO +++ b/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.1.121 +Version: 0.1.122 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/PyPI/src/guan/others.py b/PyPI/src/guan/others.py index 17d41a4..2c320cb 100644 --- a/PyPI/src/guan/others.py +++ b/PyPI/src/guan/others.py @@ -694,6 +694,60 @@ def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', content = content + '\n\n' + text return content +# 从HTML中获取所有的链接 +def get_links_from_html(html_link, links_with_text=0): + from bs4 import BeautifulSoup + import urllib.request + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + html = urllib.request.urlopen(html_link).read().decode('utf-8') + soup = BeautifulSoup(html, features="lxml") + a_tags = soup.find_all('a') + if links_with_text == 0: + link_array = [tag.get('href') for tag in a_tags if tag.get('href')] + return link_array + else: + link_array_with_text = [(tag.get('href'), tag.text) for tag in a_tags if tag.get('href')] + return link_array_with_text + +# 检查链接的有效性 +def check_link(url, timeout=3, allow_redirects=True): + import requests + try: + response = requests.head(url, timeout=timeout, allow_redirects=allow_redirects) + if response.status_code == 200: + return True + else: + return False + except requests.exceptions.RequestException: + return False + +# 检查链接数组中链接的有效性 +def check_link_array(link_array, timeout=3, allow_redirects=True, try_again=0, print_show=1): + import guan + failed_link_array0 = [] + for link in link_array: + if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): + pass + else: + failed_link_array0.append(link) + if print_show: + print(link) + failed_link_array = [] + if try_again: + if print_show: + print('\nTry again:\n') + for link in failed_link_array0: + if link=='#' or guan.check_link(link, timeout=timeout, allow_redirects=allow_redirects): + pass + else: + failed_link_array.append(link) + if print_show: + print(link) + else: + failed_link_array = failed_link_array0 + return failed_link_array + # 生成二维码 def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.png'): import qrcode