This commit is contained in:
guanjihuan 2023-10-07 06:18:49 +08:00
parent cf006f477f
commit 9f917548fa
4 changed files with 7 additions and 11 deletions

View File

@ -1,4 +1,4 @@
# API Reference shows all functions in Guan package. The current version is guan-0.0.187, updated on December 07, 2023.
# API Reference shows all functions in Guan package. The current version is guan-0.0.188, updated on December 07, 2023.
import guan

View File

@ -1,7 +1,7 @@
[metadata]
# replace with your username:
name = guan
version = 0.0.187
version = 0.0.188
author = guanjihuan
author_email = guanjihuan@163.com
description = An open source python package

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: guan
Version: 0.0.187
Version: 0.0.188
Summary: An open source python package
Home-page: https://py.guanjihuan.com
Author: guanjihuan

View File

@ -1,6 +1,6 @@
# Guan is an open-source python package developed and maintained by https://www.guanjihuan.com/about (Ji-Huan Guan, 关济寰). The primary location of this package is on website https://py.guanjihuan.com. GitHub link: https://github.com/guanjihuan/py.guanjihuan.com.
# The current version is guan-0.0.187, updated on December 07, 2023.
# The current version is guan-0.0.188, updated on December 07, 2023.
# Installation: pip install --upgrade guan
@ -3603,15 +3603,11 @@ def count_number_of_import_statements(filename, file_format='.py', num=1000):
# 从网页的标签中获取内容
def get_html_from_tags(link, tags=['title', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'a']):
from bs4 import BeautifulSoup
import requests
import urllib.request
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'
}
response = requests.get(link, headers=headers)
html = response.text
soup = BeautifulSoup(html)
html = urllib.request.urlopen(link).read().decode('utf-8')
soup = BeautifulSoup(html, features="lxml")
all_tags = soup.find_all(tags)
content = ''
for tag in all_tags: