This commit is contained in:
guanjihuan 2024-12-24 21:58:44 +08:00
parent 2b5991b7f2
commit e132cd8f75
3 changed files with 24 additions and 21 deletions

View File

@ -1,7 +1,7 @@
[metadata] [metadata]
# replace with your username: # replace with your username:
name = guan name = guan
version = 0.1.136 version = 0.1.137
author = guanjihuan author = guanjihuan
author_email = guanjihuan@163.com author_email = guanjihuan@163.com
description = An open source python package description = An open source python package

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: guan Name: guan
Version: 0.1.136 Version: 0.1.137
Summary: An open source python package Summary: An open source python package
Home-page: https://py.guanjihuan.com Home-page: https://py.guanjihuan.com
Author: guanjihuan Author: guanjihuan

View File

@ -1255,25 +1255,28 @@ def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, rep
title = item.find('title').text title = item.find('title').text
content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text content = item.find('.//content:encoded', namespaces={'content': 'http://purl.org/rss/1.0/modules/content/'}).text
if convert_content == 1: if convert_content == 1:
content = re.sub(r'<!--.*?-->', '', content) try:
content = content.replace('<p>', '') content = re.sub(r'<!--.*?-->', '', content)
content = content.replace('</p>', '') content = content.replace('<p>', '')
content = content.replace('<ol>', '') content = content.replace('</p>', '')
content = content.replace('</ol>', '') content = content.replace('<ol>', '')
content = content.replace('<ul>', '') content = content.replace('</ol>', '')
content = content.replace('</ul>', '') content = content.replace('<ul>', '')
content = content.replace('<strong>', '') content = content.replace('</ul>', '')
content = content.replace('</strong>', '') content = content.replace('<strong>', '')
content = content.replace('</li>', '') content = content.replace('</strong>', '')
content = content.replace('<li>', '+ ') content = content.replace('</li>', '')
content = content.replace('</h3>', '') content = content.replace('<li>', '+ ')
content = re.sub(r'<h2.*?>', '## ', content) content = content.replace('</h3>', '')
content = re.sub(r'<h3.*?>', '### ', content) content = re.sub(r'<h2.*?>', '## ', content)
content = re.sub(r'<h4.*?>', '#### ', content) content = re.sub(r'<h3.*?>', '### ', content)
for replace_item in replace_more: content = re.sub(r'<h4.*?>', '#### ', content)
content = content.replace(replace_item, '') for replace_item in replace_more:
for _ in range(100): content = content.replace(replace_item, '')
content = content.replace('\n\n\n', '\n\n') for _ in range(100):
content = content.replace('\n\n\n', '\n\n')
except:
print(f'提示:字符串替换出现问题!出现问题的内容为:{content}')
else: else:
pass pass
markdown_content = f"# {title}\n{content}" markdown_content = f"# {title}\n{content}"