diff --git a/API_Reference/API_Reference.py b/API_Reference/API_Reference.py index 1c8b87b..6a46d3d 100644 --- a/API_Reference/API_Reference.py +++ b/API_Reference/API_Reference.py @@ -1,4 +1,4 @@ -# API Reference shows all functions in Guan package. The current version is guan-0.0.186, updated on December 07, 2023. +# API Reference shows all functions in Guan package. The current version is guan-0.0.187, updated on December 07, 2023. import guan @@ -824,6 +824,9 @@ guan.statistics_with_day_and_time(content='', filename='a', file_format='.txt') # 统计Python文件中import的数量并排序 import_statement_counter = guan.count_number_of_import_statements(filename, file_format='.py', num=1000) +# 从网页的标签中获取内容 +content = guan.get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']) + # 将RGB转成HEX hex = guan.rgb_to_hex(rgb, pound=1) diff --git a/Source_Code/PyPI/setup.cfg b/Source_Code/PyPI/setup.cfg index 17f4a0a..e8161ad 100644 --- a/Source_Code/PyPI/setup.cfg +++ b/Source_Code/PyPI/setup.cfg @@ -1,7 +1,7 @@ [metadata] # replace with your username: name = guan -version = 0.0.186 +version = 0.0.187 author = guanjihuan author_email = guanjihuan@163.com description = An open source python package diff --git a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO index 3d15882..37ba2cd 100644 --- a/Source_Code/PyPI/src/guan.egg-info/PKG-INFO +++ b/Source_Code/PyPI/src/guan.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: guan -Version: 0.0.186 +Version: 0.0.187 Summary: An open source python package Home-page: https://py.guanjihuan.com Author: guanjihuan diff --git a/Source_Code/PyPI/src/guan/__init__.py b/Source_Code/PyPI/src/guan/__init__.py index 8b6be8e..c671261 100644 --- a/Source_Code/PyPI/src/guan/__init__.py +++ b/Source_Code/PyPI/src/guan/__init__.py @@ -1,6 +1,6 @@ # Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com. -# The current version is guan-0.0.186, updated on December 07, 2023. +# The current version is guan-0.0.187, updated on December 07, 2023. # Installation: pip install --upgrade guan @@ -3600,6 +3600,28 @@ def count_number_of_import_statements(filename, file_format='.py', num=1000): import_statement_counter = Counter(import_array).most_common(num) return import_statement_counter +# 从网页的标签中获取内容 +def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']): + from bs4 import BeautifulSoup + import requests + import ssl + ssl._create_default_https_context = ssl._create_unverified_context + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36' + } + response = requests.get(link, headers=headers) + html = response.text + soup = BeautifulSoup(html) + all_tags = soup.find_all(tags) + content = '' + for tag in all_tags: + text = tag.get_text().replace('\n', '') + if content == '': + content = text + else: + content = content + '\n\n' + text + return content + # 将RGB转成HEX def rgb_to_hex(rgb, pound=1): if pound==0: