Compare commits
	
		
			10 Commits
		
	
	
		
			2d98dd5b07
			...
			73adde082b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 73adde082b | |||
| 0027293f2b | |||
| ebf86b30eb | |||
| 5be00bb720 | |||
| 2b5bfab114 | |||
| 81565244f1 | |||
| fea2d76548 | |||
| 047c2353b6 | |||
| d2b76e2cd7 | |||
| 67d4c9e5a4 | 
@@ -3,11 +3,11 @@
 | 
			
		||||
 | 
			
		||||
% 陈数高效法
 | 
			
		||||
clear;clc;
 | 
			
		||||
n=1000 % 积分密度
 | 
			
		||||
n=100 % 积分密度
 | 
			
		||||
delta=2*pi/n;
 | 
			
		||||
C=0; 
 | 
			
		||||
for kx=-pi:(2*pi/n):pi
 | 
			
		||||
    for ky=-pi:(2*pi/n):pi
 | 
			
		||||
for kx=-pi:(2*pi/n):pi-(2*pi/n)
 | 
			
		||||
    for ky=-pi:(2*pi/n):pi-(2*pi/n)
 | 
			
		||||
        VV=get_vector(HH(kx,ky));
 | 
			
		||||
        Vkx=get_vector(HH(kx+delta,ky)); % 略偏离kx的波函数
 | 
			
		||||
        Vky=get_vector(HH(kx,ky+delta)); % 略偏离ky的波函数
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,63 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/10890
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
a = [[ 0 , 0  ,  1.5 ,  0.32635182-0.98480775j],
 | 
			
		||||
 [0  ,   0  , -0.32635182-0.98480775j, 1.5  ],
 | 
			
		||||
 [ 1.5 ,    -0.32635182+0.98480775j ,0, 0 ],
 | 
			
		||||
 [ 0.32635182+0.98480775j , 1.5 ,  0, 0 ]]
 | 
			
		||||
 | 
			
		||||
def Schmidt_orthogonalization(eigenvector):
 | 
			
		||||
    num = eigenvector.shape[1]
 | 
			
		||||
    for i in range(num):
 | 
			
		||||
        for i0 in range(i):
 | 
			
		||||
            eigenvector[:, i] = eigenvector[:, i] - eigenvector[:, i0]*np.dot(eigenvector[:, i].transpose().conj(), eigenvector[:, i0])/(np.dot(eigenvector[:, i0].transpose().conj(),eigenvector[:, i0]))
 | 
			
		||||
        eigenvector[:, i] = eigenvector[:, i]/np.linalg.norm(eigenvector[:, i])
 | 
			
		||||
    return eigenvector
 | 
			
		||||
 | 
			
		||||
def verify_orthogonality(vectors):
 | 
			
		||||
    identity = np.eye(vectors.shape[1])
 | 
			
		||||
    product = np.dot(vectors.T.conj(), vectors)
 | 
			
		||||
    return np.allclose(product, identity)
 | 
			
		||||
 | 
			
		||||
# 对 np.linalg.eigh() 的特征向量正交化
 | 
			
		||||
 | 
			
		||||
E, v = np.linalg.eigh(a)
 | 
			
		||||
print(verify_orthogonality(v))
 | 
			
		||||
 | 
			
		||||
v1 = Schmidt_orthogonalization(v)
 | 
			
		||||
print(verify_orthogonality(v1))
 | 
			
		||||
 | 
			
		||||
from scipy.linalg import orth
 | 
			
		||||
v2 = orth(v)
 | 
			
		||||
print(verify_orthogonality(v2))
 | 
			
		||||
 | 
			
		||||
v3, S, Vt = np.linalg.svd(v)
 | 
			
		||||
print(verify_orthogonality(v3))
 | 
			
		||||
 | 
			
		||||
v4, R = np.linalg.qr(v)
 | 
			
		||||
print(verify_orthogonality(v4))
 | 
			
		||||
 | 
			
		||||
print()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 对 np.linalg.eig() 的特征向量正交化
 | 
			
		||||
 | 
			
		||||
E, v = np.linalg.eig(a)
 | 
			
		||||
print(verify_orthogonality(v))
 | 
			
		||||
 | 
			
		||||
v1 = Schmidt_orthogonalization(v)
 | 
			
		||||
print(verify_orthogonality(v1))
 | 
			
		||||
 | 
			
		||||
from scipy.linalg import orth
 | 
			
		||||
v2 = orth(v)
 | 
			
		||||
print(verify_orthogonality(v2))
 | 
			
		||||
 | 
			
		||||
v3, S, Vt = np.linalg.svd(v)
 | 
			
		||||
print(verify_orthogonality(v3))
 | 
			
		||||
 | 
			
		||||
v4, R = np.linalg.qr(v)
 | 
			
		||||
print(verify_orthogonality(v4))
 | 
			
		||||
							
								
								
									
										7
									
								
								2021.05.07_Quantum_Espresso/silicon/command.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								2021.05.07_Quantum_Espresso/silicon/command.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,7 @@
 | 
			
		||||
pw.x < pw.scf.silicon_bands.in > pw.scf.silicon_bands.out
 | 
			
		||||
 | 
			
		||||
pw.x < pw.bands.silicon.in > pw.bands.silicon.out
 | 
			
		||||
 | 
			
		||||
bands.x < pp.bands.silicon.in > pp.bands.silicon.out
 | 
			
		||||
 | 
			
		||||
python plot_bands.py
 | 
			
		||||
							
								
								
									
										30
									
								
								2021.05.07_Quantum_Espresso/silicon/plot_bands.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								2021.05.07_Quantum_Espresso/silicon/plot_bands.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
plt.rcParams["figure.dpi"]=150
 | 
			
		||||
plt.rcParams["figure.facecolor"]="white"
 | 
			
		||||
plt.rcParams["figure.figsize"]=(8, 6)
 | 
			
		||||
 | 
			
		||||
# load data
 | 
			
		||||
data = np.loadtxt('./si_bands.dat.gnu')
 | 
			
		||||
 | 
			
		||||
k = np.unique(data[:, 0])
 | 
			
		||||
bands = np.reshape(data[:, 1], (-1, len(k)))
 | 
			
		||||
 | 
			
		||||
for band in range(len(bands)):
 | 
			
		||||
    plt.plot(k, bands[band, :], linewidth=1, alpha=0.5, color='k')
 | 
			
		||||
plt.xlim(min(k), max(k))
 | 
			
		||||
 | 
			
		||||
# Fermi energy
 | 
			
		||||
plt.axhline(6.6416, linestyle=(0, (5, 5)), linewidth=0.75, color='k', alpha=0.5)
 | 
			
		||||
# High symmetry k-points (check bands_pp.out)
 | 
			
		||||
plt.axvline(0.8660, linewidth=0.75, color='k', alpha=0.5)
 | 
			
		||||
plt.axvline(1.8660, linewidth=0.75, color='k', alpha=0.5)
 | 
			
		||||
plt.axvline(2.2196, linewidth=0.75, color='k', alpha=0.5)
 | 
			
		||||
# text labels
 | 
			
		||||
plt.xticks(ticks= [0, 0.8660, 1.8660, 2.2196, 3.2802], \
 | 
			
		||||
           labels=['L', '$\Gamma$', 'X', 'U', '$\Gamma$'])
 | 
			
		||||
plt.ylabel("Energy (eV)")
 | 
			
		||||
plt.text(2.3, 5.6, 'Fermi energy')
 | 
			
		||||
plt.savefig('si_bands.jpg')
 | 
			
		||||
plt.show()
 | 
			
		||||
							
								
								
									
										13
									
								
								2021.05.07_Quantum_Espresso/silicon/pp.bands.silicon.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								2021.05.07_Quantum_Espresso/silicon/pp.bands.silicon.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
&BANDS
 | 
			
		||||
  prefix = 'silicon'
 | 
			
		||||
  outdir = './tmp/'
 | 
			
		||||
  filband = 'si_bands.dat'
 | 
			
		||||
/
 | 
			
		||||
 | 
			
		||||
K_POINTS {crystal_b}
 | 
			
		||||
5
 | 
			
		||||
  0.0000 0.5000 0.0000 20  !L
 | 
			
		||||
  0.0000 0.0000 0.0000 30  !G
 | 
			
		||||
  -0.500 0.0000 -0.500 10  !X
 | 
			
		||||
  -0.375 0.2500 -0.375 30  !U
 | 
			
		||||
  0.0000 0.0000 0.0000 20  !G
 | 
			
		||||
							
								
								
									
										38
									
								
								2021.05.07_Quantum_Espresso/silicon/pw.bands.silicon.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								2021.05.07_Quantum_Espresso/silicon/pw.bands.silicon.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
&control
 | 
			
		||||
  calculation = 'bands',
 | 
			
		||||
  restart_mode = 'from_scratch',
 | 
			
		||||
  prefix = 'silicon',
 | 
			
		||||
  outdir = './tmp/'
 | 
			
		||||
  pseudo_dir = '/opt/qe-7.3.1/pseudo'
 | 
			
		||||
  verbosity = 'high'
 | 
			
		||||
/
 | 
			
		||||
 | 
			
		||||
&system
 | 
			
		||||
  ibrav =  2,
 | 
			
		||||
  celldm(1) = 10.2076,
 | 
			
		||||
  nat =  2,
 | 
			
		||||
  ntyp = 1,
 | 
			
		||||
  ecutwfc = 50,
 | 
			
		||||
  ecutrho = 400,
 | 
			
		||||
  nbnd = 8
 | 
			
		||||
 /
 | 
			
		||||
 | 
			
		||||
&electrons
 | 
			
		||||
  conv_thr = 1e-8,
 | 
			
		||||
  mixing_beta = 0.6
 | 
			
		||||
 /
 | 
			
		||||
 | 
			
		||||
ATOMIC_SPECIES
 | 
			
		||||
  Si 28.086  Si.pz-vbc.UPF
 | 
			
		||||
 | 
			
		||||
ATOMIC_POSITIONS (alat)
 | 
			
		||||
  Si 0.00 0.00 0.00
 | 
			
		||||
  Si 0.25 0.25 0.25
 | 
			
		||||
 | 
			
		||||
K_POINTS {crystal_b}
 | 
			
		||||
5
 | 
			
		||||
  0.0000 0.5000 0.0000 20  !L
 | 
			
		||||
  0.0000 0.0000 0.0000 30  !G
 | 
			
		||||
  -0.500 0.0000 -0.500 10  !X
 | 
			
		||||
  -0.375 0.2500 -0.375 30  !U
 | 
			
		||||
  0.0000 0.0000 0.0000 20  !G
 | 
			
		||||
							
								
								
									
										36
									
								
								2021.05.07_Quantum_Espresso/silicon/pw.scf.silicon_bands.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								2021.05.07_Quantum_Espresso/silicon/pw.scf.silicon_bands.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
&CONTROL
 | 
			
		||||
  calculation = 'scf',
 | 
			
		||||
  restart_mode = 'from_scratch',
 | 
			
		||||
  prefix = 'silicon',
 | 
			
		||||
  outdir = './tmp/'
 | 
			
		||||
  pseudo_dir = '/opt/qe-7.3.1/pseudo'
 | 
			
		||||
  verbosity = 'high'
 | 
			
		||||
/
 | 
			
		||||
 | 
			
		||||
&SYSTEM
 | 
			
		||||
  ibrav =  2,
 | 
			
		||||
  celldm(1) = 10.2076,
 | 
			
		||||
  nat =  2,
 | 
			
		||||
  ntyp = 1,
 | 
			
		||||
  ecutwfc = 50,
 | 
			
		||||
  ecutrho = 400,
 | 
			
		||||
  nbnd = 8,
 | 
			
		||||
!  occupations = 'smearing',
 | 
			
		||||
!  smearing = 'gaussian',
 | 
			
		||||
!  degauss = 0.005
 | 
			
		||||
/
 | 
			
		||||
 | 
			
		||||
&ELECTRONS
 | 
			
		||||
  conv_thr = 1e-8,
 | 
			
		||||
  mixing_beta = 0.6
 | 
			
		||||
/
 | 
			
		||||
 | 
			
		||||
ATOMIC_SPECIES
 | 
			
		||||
  Si 28.086 Si.pz-vbc.UPF
 | 
			
		||||
 | 
			
		||||
ATOMIC_POSITIONS (alat)
 | 
			
		||||
  Si 0.0 0.0 0.0
 | 
			
		||||
  Si 0.25 0.25 0.25
 | 
			
		||||
 | 
			
		||||
K_POINTS (automatic)
 | 
			
		||||
  8 8 8 0 0 0
 | 
			
		||||
@@ -1,88 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
from collections import Counter
 | 
			
		||||
import datetime
 | 
			
		||||
import random
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# time.sleep(random.uniform(0,1800))  # 爬虫简单伪装,在固定时间后0到30分钟后开始运行。调试的时候把该语句注释。
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 获取链接
 | 
			
		||||
try:
 | 
			
		||||
    with open('prb_link_list.txt', 'r', encoding='UTF-8') as f:  # 如果文件存在
 | 
			
		||||
        link_list = f.read().split('\n')   # 历史已经访问过的链接(数组类型)
 | 
			
		||||
except:
 | 
			
		||||
    with open('prb_link_list.txt', 'w', encoding='UTF-8') as f:  # 如果文件不存在
 | 
			
		||||
        link_list = [] 
 | 
			
		||||
f = open('prb_link_list.txt', 'a', encoding='UTF-8')  # 打开文件(补充)
 | 
			
		||||
f.write('\nLink list obtained on '+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+':\n')
 | 
			
		||||
match_href = []  # 在本次运行中满足条件的链接
 | 
			
		||||
for loop in range(3):
 | 
			
		||||
    if loop == 0:
 | 
			
		||||
        start_link = "https://journals.aps.org/prb/recent?page=1"  # 看第一页
 | 
			
		||||
    elif loop == 1:
 | 
			
		||||
        start_link = "https://journals.aps.org/prb/recent?page=2"  # 看第二页
 | 
			
		||||
    elif loop == 2: 
 | 
			
		||||
        start_link = "https://journals.aps.org/prb/recent?page=3"  # 看第三页(三页基本上覆盖了当天的所有更新)
 | 
			
		||||
    html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
    soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
    all_a_tag = soup.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
    for a_tag in all_a_tag:
 | 
			
		||||
        href = a_tag['href']  # 超链接字符串
 | 
			
		||||
        if re.search('/abstract/', href): # 文章的链接
 | 
			
		||||
            if re.search('https://journals.aps.org', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                href = 'https://journals.aps.org'+ href
 | 
			
		||||
            if href not in match_href and href not in link_list and re.search('\?', href)==None:  # 链接不重复
 | 
			
		||||
                match_href.append(href)
 | 
			
		||||
                f.write(href+'\n')
 | 
			
		||||
f.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 获取摘要
 | 
			
		||||
try:
 | 
			
		||||
    f = open('prb_all.txt', 'a', encoding='UTF-8')  # 全部记录
 | 
			
		||||
except:
 | 
			
		||||
    f = open('prb_all.txt', 'w', encoding='UTF-8')  # 如果文件不存在
 | 
			
		||||
try:
 | 
			
		||||
    f_month = open('prb_'+str(year)+'.'+str(month).rjust(2,'0')+'.txt', 'a', encoding='UTF-8')  # 一个月的记录
 | 
			
		||||
except:
 | 
			
		||||
    f_month = open('prb_'+str(year)+'.'+str(month).rjust(2,'0')+'.txt', 'w', encoding='UTF-8')  # 如果文件不存在
 | 
			
		||||
f.write('\n\n['+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'][total number='+str(len(match_href))+']\n\n\n')
 | 
			
		||||
f_month.write('\n\n['+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'][total number='+str(len(match_href))+']\n\n\n')
 | 
			
		||||
print('total number=', len(match_href))  # 调试的时候显示这个
 | 
			
		||||
i00 = 0
 | 
			
		||||
for href in match_href: 
 | 
			
		||||
    i00 += 1
 | 
			
		||||
    print('reading number', i00, '...')  # 调试的时候显示这个
 | 
			
		||||
    # time.sleep(random.uniform(10,110))  # 爬虫简单伪装,休息一分钟左右。如果链接个数有60个,那么程序运行时间延长60分钟。调试的时候把该语句注释。
 | 
			
		||||
    try:
 | 
			
		||||
        html = urlopen(href).read().decode('utf-8')   # 打开文章链接
 | 
			
		||||
        soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
        title = soup.title   # 文章标题
 | 
			
		||||
        f.write(str(title.get_text())+'\n\n')   
 | 
			
		||||
        f_month.write(str(title.get_text())+'\n\n') 
 | 
			
		||||
        f.write(str(href)+'\n\n')   # 文章链接
 | 
			
		||||
        f_month.write(str(href)+'\n\n') 
 | 
			
		||||
        abstract = re.findall('"yes"><p>.*</p><div', html, re.S)[0][9:-8]  # 文章摘要
 | 
			
		||||
        word_list = abstract.split(' ')  # 划分单词
 | 
			
		||||
        for word in word_list:
 | 
			
		||||
            if re.search('<', word)==None and re.search('>', word)==None:  # 有些内容满足过滤条件,因此信息可能会丢失。
 | 
			
		||||
                f.write(word+' ')
 | 
			
		||||
                f_month.write(word+' ')
 | 
			
		||||
        f.write('\n\n\n')
 | 
			
		||||
        f_month.write('\n\n\n')
 | 
			
		||||
    except:
 | 
			
		||||
        pass
 | 
			
		||||
f.close()
 | 
			
		||||
@@ -1,88 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
from collections import Counter
 | 
			
		||||
import datetime
 | 
			
		||||
import random
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# time.sleep(random.uniform(0,1800))  # 爬虫简单伪装,在固定时间后0到30分钟后开始运行。调试的时候把该语句注释。
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 获取链接
 | 
			
		||||
try:
 | 
			
		||||
    with open('prl_link_list.txt', 'r', encoding='UTF-8') as f:  # 如果文件存在
 | 
			
		||||
        link_list = f.read().split('\n')   # 历史已经访问过的链接(数组类型)
 | 
			
		||||
except:
 | 
			
		||||
    with open('prl_link_list.txt', 'w', encoding='UTF-8') as f:  # 如果文件不存在
 | 
			
		||||
        link_list = [] 
 | 
			
		||||
f = open('prl_link_list.txt', 'a', encoding='UTF-8')  # 打开文件(补充)
 | 
			
		||||
f.write('\nLink list obtained on '+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+':\n')
 | 
			
		||||
match_href = []  # 在本次运行中满足条件的链接
 | 
			
		||||
for loop in range(3):
 | 
			
		||||
    if loop == 0:
 | 
			
		||||
        start_link = "https://journals.aps.org/prl/recent?page=1"  # 看第一页
 | 
			
		||||
    elif loop == 1:
 | 
			
		||||
        start_link = "https://journals.aps.org/prl/recent?page=2"  # 看第二页
 | 
			
		||||
    elif loop == 2: 
 | 
			
		||||
        start_link = "https://journals.aps.org/prl/recent?page=3"  # 看第三页(三页基本上覆盖了当天的所有更新)
 | 
			
		||||
    html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
    soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
    all_a_tag = soup.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
    for a_tag in all_a_tag:
 | 
			
		||||
        href = a_tag['href']  # 超链接字符串
 | 
			
		||||
        if re.search('/abstract/', href): # 文章的链接
 | 
			
		||||
            if re.search('https://journals.aps.org', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                href = 'https://journals.aps.org'+ href
 | 
			
		||||
            if href not in match_href and href not in link_list and re.search('\?', href)==None:  # 链接不重复
 | 
			
		||||
                match_href.append(href)
 | 
			
		||||
                f.write(href+'\n')
 | 
			
		||||
f.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 获取摘要
 | 
			
		||||
try:
 | 
			
		||||
    f = open('prl_all.txt', 'a', encoding='UTF-8')  # 全部记录
 | 
			
		||||
except:
 | 
			
		||||
    f = open('prl_all.txt', 'w', encoding='UTF-8')  # 如果文件不存在
 | 
			
		||||
try:
 | 
			
		||||
    f_month = open('prl_'+str(year)+'.'+str(month).rjust(2,'0')+'.txt', 'a', encoding='UTF-8')  # 一个月的记录
 | 
			
		||||
except:
 | 
			
		||||
    f_month = open('prl_'+str(year)+'.'+str(month).rjust(2,'0')+'.txt', 'w', encoding='UTF-8')  # 如果文件不存在
 | 
			
		||||
f.write('\n\n['+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'][total number='+str(len(match_href))+']\n\n\n')
 | 
			
		||||
f_month.write('\n\n['+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'][total number='+str(len(match_href))+']\n\n\n')
 | 
			
		||||
print('total number=', len(match_href))  # 调试的时候显示这个
 | 
			
		||||
i00 = 0
 | 
			
		||||
for href in match_href: 
 | 
			
		||||
    i00 += 1
 | 
			
		||||
    print('reading number', i00, '...')  # 调试的时候显示这个
 | 
			
		||||
    # time.sleep(random.uniform(10,110))  # 爬虫简单伪装,休息一分钟左右。如果链接个数有60个,那么程序运行时间延长60分钟。调试的时候把该语句注释。
 | 
			
		||||
    try:
 | 
			
		||||
        html = urlopen(href).read().decode('utf-8')   # 打开文章链接
 | 
			
		||||
        soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
        title = soup.title   # 文章标题
 | 
			
		||||
        f.write(str(title.get_text())+'\n\n')   
 | 
			
		||||
        f_month.write(str(title.get_text())+'\n\n') 
 | 
			
		||||
        f.write(str(href)+'\n\n')   # 文章链接
 | 
			
		||||
        f_month.write(str(href)+'\n\n') 
 | 
			
		||||
        abstract = re.findall('"yes"><p>.*</p><div', html, re.S)[0][9:-8]  # 文章摘要
 | 
			
		||||
        word_list = abstract.split(' ')  # 划分单词
 | 
			
		||||
        for word in word_list:
 | 
			
		||||
            if re.search('<', word)==None and re.search('>', word)==None:  # 有些内容满足过滤条件,因此信息可能会丢失。
 | 
			
		||||
                f.write(word+' ')
 | 
			
		||||
                f_month.write(word+' ')
 | 
			
		||||
        f.write('\n\n\n')
 | 
			
		||||
        f_month.write('\n\n\n')
 | 
			
		||||
    except:
 | 
			
		||||
        pass
 | 
			
		||||
f.close()
 | 
			
		||||
@@ -1,41 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import re  
 | 
			
		||||
from collections import Counter
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    file_name = 'prb_all.txt'
 | 
			
		||||
    with open(file_name, 'r', encoding='UTF-8') as f:  # 打开文件
 | 
			
		||||
        paper_list = f.read().split('\n\n\n')  # 通过三个回车划分不同文章
 | 
			
		||||
    word_list = []  
 | 
			
		||||
    ignore = ignore_words()  # 过滤常见单词
 | 
			
		||||
    for paper in paper_list:
 | 
			
		||||
        word_list_in_one_paper = []
 | 
			
		||||
        if len(paper)>20:  # 通过字符串长度过滤日期
 | 
			
		||||
            content_list = paper.split('\n\n')  # 通过两个回车划分内容
 | 
			
		||||
            for content in content_list:
 | 
			
		||||
                if re.search('https://', content)==None: # 过滤文章链接
 | 
			
		||||
                    words = content.split(' ')  # 通过空格划分单词
 | 
			
		||||
                    for word in words:
 | 
			
		||||
                        if word not in word_list_in_one_paper:  # 一篇文章的某个单词只统计一次
 | 
			
		||||
                            if word not in ignore and len(word)>1:  # 过滤词汇
 | 
			
		||||
                                word_list.append(word)
 | 
			
		||||
                                word_list_in_one_paper.append(word)       
 | 
			
		||||
    num = 300
 | 
			
		||||
    most_common_words = Counter(word_list).most_common(num)  # 统计出现最多的num个词汇
 | 
			
		||||
    print('\n出现频率最高的前', num, '个词汇:')
 | 
			
		||||
    for word in most_common_words:
 | 
			
		||||
        print(word)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ignore_words(): # 可自行增删
 | 
			
		||||
    ignore = ['Phys.', 'the', 'to', 'of', 'in', 'under', 'and', 'by', 'The', 'at', 'with', 'up', 'be', 'above', 'below', 'are', 'is', 'for', 'that', 'as', 'we', '<a', 'abstract', 'abstract"','<span', 'which', 'We', 'such', 'has', 'two', 'these', 'it', 'all', 'results', 'result', 'each', 'have', 'between', 'on', 'an', 'can', 'also', 'from', 'Our', 'our', 'using', 'where', 'These', 'out', 'both', 'due', 'less', 'along', 'but', 'In', 'show', 'into', 'study', 'find', 'provide', 'change','not', 'open', 'this', 'show', 'into', 'study', 'find', 'provide', 'change', 'present', 'Using', 'large', 'This', 'However', 'appear', 'studied', 'obtain', 'been', 'Both', 'they', 'effects', 'effect', 'compute', 'more', 'does', 'shown', 'Based', 'reveal', 'highly', 'number', 'However,', 'was', 'near', 'full', 'based', 'several', 'suggest', 'agreement', 'predicted', 'values', 'work', 'emphasize', 'without', 'or', 'work,', 'studies', 'future', 'identify', 'present.', 'predict', 'presence', 'their', 'were', 'From', 'its', 'By', 'how', 'ground', 'observed', 'recent', 'For', 'other', 'Here', 'test', 'further', 'Its', 'similar', 'however,', 'range', 'within', 'value', 'possible', 'may', 'than', 'low', 'us', 'obtained', 'around', 'consider', 'about', 'very', 'will', 'when', 'played', 'consist', 'consists', 'Here,', 'observe', 'gives', 'It', 'over', 'cannot', 'As', 'whose', 'new', 'some', 'only', 'from', 'yields', 'shows', 'data', 'direct', 'related', 'different', 'evidence', 'role', 'function', 'origin', 'specific', 'set', 'confirm', 'give', 'Moreover', 'develop', 'including', 'could', 'used', 'means', 'allows', 'make', 'e.g.,', 'provides', 'system', 'systems', 'field', 'fields', 'model', 'model,', 'state', 'states', 'states.', 'state.', 'band', 'bands', 'method', 'methods', 'nature', 'rate', 'zero', 'single', 'theory', 'first', 'one', 'complex', 'approach', 'schemes', 'terms', 'even', 'case', 'analysis', 'weight', 'volume', 'evolution', 'well', 'external', 'measured', 'introducing', 'dependence', 'properties', 'demonstrate', 'remains', 'through', 'measurements', 'samples', 'findings', 'respect', 'investigate', 'behavior', 'importance', 'considered', 'experimental', 'increase', 'propose', 'follows', 'increase', 'emerged', 'interesting', 'behaviors', 'influenced', 'paramount', 'indicate', 'Rev.', 'concepts', 'induced', 'zone', 'regions', 'exact', 'contribution', 'behavior', 'formation', 'measurements.', 'utilizing', 'constant', 'regime', 'features', 'strength', 'compare', 'determined', 'combination', 'compare', 'determined', 'At', 'inside', 'ambient', 'then', 'important', 'report', 'Moreover,', 'Despite', 'found', 'because', 'process', 'and,', 'significantly', 'realized', 'much', 'natural', 'since', 'grows', 'any', 'compared', 'while', 'forms.', 'appears', 'indicating', 'coefficient', 'suggested', 'time', 'exhibits', 'calculations.', 'developed', 'array', 'discuss', 'field', 'becomes', 'allowing', 'indicates', 'via', 'introduce', 'considering', 'times.', 'constructed', 'explain', 'form', 'owing', 'parameters.', 'parameter', 'operation', 'probe', 'experiments', 'interest', 'strategies', 'seen', 'emerge', 'generic', 'geometry', 'numbers', 'observation', 'avenue', 'theretically', 'three', 'excellent', 'amount', 'notable', 'example', 'being', 'promising', 'latter', 'little', 'imposed', 'put', 'resource', 'together', 'produce', 'successfully','there', 'enhanced', 'this', 'great', 'dirven', 'increasing','should', 'otherwise', 'Further', 'field,', 'known', 'changes', 'still', 'beyond', 'various', 'center', 'previously', 'way', 'peculiar', 'detailed', 'understanding', 'good', 'years', 'where', 'Me', 'origins', 'years.', 'attributed', 'known,', 'them', 'reported', 'no', 'systems', 'agree', 'examined', 'rise', 'calculate', 'those', 'particular', 'relation', 'defined', 'either', 'again', 'current', 'exhibit', 'calculated', 'here', 'made', 'Further', 'consisting', 'constitutes', 'originated', 'if', 'exceed', 'access']
 | 
			
		||||
    return ignore
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
@@ -1,37 +0,0 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
f = open('nature_physics.html', 'w', encoding='UTF-8') 
 | 
			
		||||
f.write('<meta charset="utf-8"><style type="text/css">a{text-decoration: none;color: #0a5794;}a:hover {text-decoration: underline;color: red; }</style>')
 | 
			
		||||
f.write('<p>'+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+' 已更新</p>')
 | 
			
		||||
 | 
			
		||||
match_href = []
 | 
			
		||||
start_link = "https://www.nature.com/nphys/research-articles"
 | 
			
		||||
html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
all_article = soup.find_all('article', {"class":"u-full-height c-card c-card--flush"}) 
 | 
			
		||||
for article in all_article:
 | 
			
		||||
    all_a_tag = article.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
    for a_tag in all_a_tag:
 | 
			
		||||
        href = a_tag['href']  # 超链接字符串
 | 
			
		||||
        if re.search('/articles/', href): # 文章的链接
 | 
			
		||||
            if re.search('https://www.nature.com', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                href = 'https://www.nature.com'+ href
 | 
			
		||||
            if href not in match_href and re.search('\?', href)==None:  # 链接不重复
 | 
			
		||||
                match_href.append(href)
 | 
			
		||||
                f.write('<li><a target=\"_blank\" href=\"')
 | 
			
		||||
                f.write(href)   # 文章链接
 | 
			
		||||
                f.write('\">')
 | 
			
		||||
                f.write(a_tag.get_text())
 | 
			
		||||
                f.write('</a>  ')
 | 
			
		||||
    time = article.find('time', {"class": "c-meta__item c-meta__item--block-at-lg"}).get_text()
 | 
			
		||||
    f.write(time+'</li>')
 | 
			
		||||
f.close()
 | 
			
		||||
@@ -1,36 +0,0 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
f = open('physics_magazine.html', 'w', encoding='UTF-8') 
 | 
			
		||||
f.write('<meta charset="utf-8"><style type="text/css">a{text-decoration: none;color: #0a5794;}a:hover {text-decoration: underline;color: red; }</style>')
 | 
			
		||||
f.write('<p>'+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+' 已更新</p>')
 | 
			
		||||
 | 
			
		||||
match_href = []
 | 
			
		||||
start_link = "https://physics.aps.org/"
 | 
			
		||||
html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
all_articles = soup.find_all('div', {"class":"feed-item-details"})
 | 
			
		||||
for article in all_articles:
 | 
			
		||||
    all_a_tag = article.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
    for a_tag in all_a_tag:
 | 
			
		||||
        href = a_tag['href']  # 超链接字符串
 | 
			
		||||
        if re.search('/articles/', href): # 文章的链接
 | 
			
		||||
            if re.search('https://physics.aps.org', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                href = 'https://physics.aps.org'+ href
 | 
			
		||||
            if href not in match_href:
 | 
			
		||||
                match_href.append(href)
 | 
			
		||||
                f.write('<li><a target=\"_blank\" href=\"')
 | 
			
		||||
                f.write(href)   # 文章链接
 | 
			
		||||
                f.write('\">')
 | 
			
		||||
                f.write(a_tag.get_text())
 | 
			
		||||
                f.write('</a>  ')
 | 
			
		||||
    time = article.find('time', {"class": "feed-item-date"}).get_text()
 | 
			
		||||
    f.write(time+'</li>')
 | 
			
		||||
f.close()
 | 
			
		||||
@@ -1,42 +0,0 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
f = open('prb.html', 'w', encoding='UTF-8') 
 | 
			
		||||
f.write('<meta charset="utf-8"><style type="text/css">a{text-decoration: none;color: #0a5794;}a:hover {text-decoration: underline;color: red; }</style>')
 | 
			
		||||
f.write('<p>'+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+' 已更新</p>')
 | 
			
		||||
 | 
			
		||||
match_href = []
 | 
			
		||||
for loop in range(1):
 | 
			
		||||
    if loop == 0:
 | 
			
		||||
        start_link = "https://journals.aps.org/prb/recent"  # 看第一页
 | 
			
		||||
    # elif loop == 1:
 | 
			
		||||
    #     start_link = "https://journals.aps.org/prb/recent?page=2"  # 看第二页
 | 
			
		||||
    html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
    soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
    all_article = soup.find_all('div', {"class":"article panel article-result"}) 
 | 
			
		||||
    for article in all_article:
 | 
			
		||||
        all_a_tag = article.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
        for a_tag in all_a_tag:
 | 
			
		||||
            href = a_tag['href']  # 超链接字符串
 | 
			
		||||
            if re.search('/abstract/', href): # 文章的链接
 | 
			
		||||
                if re.search('https://journals.aps.org', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                    href = 'https://journals.aps.org'+ href
 | 
			
		||||
                if href not in match_href and re.search('\?', href)==None:  # 链接不重复
 | 
			
		||||
                    match_href.append(href)
 | 
			
		||||
                    f.write('<li><a target=\"_blank\" href=\"')
 | 
			
		||||
                    f.write(href)   # 文章链接
 | 
			
		||||
                    f.write('\">')
 | 
			
		||||
                    f.write(a_tag.get_text())
 | 
			
		||||
                    f.write('</a>  ')
 | 
			
		||||
        info = article.find('h6', {"class": "pub-info"}).get_text()
 | 
			
		||||
        f.write(re.findall('– Published .*', info, re.S)[0][12:]+'</li>')
 | 
			
		||||
f.close()
 | 
			
		||||
 | 
			
		||||
@@ -1,42 +0,0 @@
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
f = open('prl.html', 'w', encoding='UTF-8') 
 | 
			
		||||
f.write('<meta charset="utf-8"><style type="text/css">a{text-decoration: none;color: #0a5794;}a:hover {text-decoration: underline;color: red; }</style>')
 | 
			
		||||
f.write('<p>'+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+' 已更新</p>')
 | 
			
		||||
 | 
			
		||||
match_href = []
 | 
			
		||||
for loop in range(1):
 | 
			
		||||
    if loop == 0:
 | 
			
		||||
        start_link = "https://journals.aps.org/prl/recent"  # 看第一页
 | 
			
		||||
    # elif loop == 1:
 | 
			
		||||
    #     start_link = "https://journals.aps.org/prl/recent?page=2"  # 看第二页
 | 
			
		||||
    html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
    soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
    all_article = soup.find_all('div', {"class":"article panel article-result"})
 | 
			
		||||
    for article in all_article:
 | 
			
		||||
        all_a_tag = article.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
        for a_tag in all_a_tag:
 | 
			
		||||
            href = a_tag['href']  # 超链接字符串
 | 
			
		||||
            if re.search('/abstract/', href): # 文章的链接
 | 
			
		||||
                if re.search('https://journals.aps.org', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                    href = 'https://journals.aps.org'+ href
 | 
			
		||||
                if href not in match_href and re.search('\?', href)==None:  # 链接不重复
 | 
			
		||||
                    match_href.append(href)
 | 
			
		||||
                    f.write('<li><a target=\"_blank\" href=\"')
 | 
			
		||||
                    f.write(href)   # 文章链接
 | 
			
		||||
                    f.write('\">')
 | 
			
		||||
                    f.write(a_tag.get_text())
 | 
			
		||||
                    f.write('</a>  ')
 | 
			
		||||
        info = article.find('h6', {"class": "pub-info"}).get_text()
 | 
			
		||||
        f.write(re.findall('– Published.*', info, re.S)[0][12:]+'</li>')
 | 
			
		||||
f.close()
 | 
			
		||||
 | 
			
		||||
@@ -1,66 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/17937
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
import re  
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
year = datetime.datetime.now().year
 | 
			
		||||
month = datetime.datetime.now().month
 | 
			
		||||
day = datetime.datetime.now().day
 | 
			
		||||
 | 
			
		||||
# 获取链接
 | 
			
		||||
 | 
			
		||||
# 由于没有模拟登录知乎,因此只能爬取到最新的两篇文章
 | 
			
		||||
authors = ["https://www.zhihu.com/people/guanjihuan/posts"]  # Guan
 | 
			
		||||
 | 
			
		||||
match_href = []
 | 
			
		||||
for i0 in range(len(authors)):
 | 
			
		||||
    start_link = authors[i0]
 | 
			
		||||
    html = urlopen(start_link).read().decode('utf-8')  # 打开网页
 | 
			
		||||
    soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
    all_a_tag = soup.find_all('a', href=True)  # 获取超链接标签
 | 
			
		||||
    for a_tag in all_a_tag:
 | 
			
		||||
        href = a_tag['href']  # 超链接字符串
 | 
			
		||||
        if re.search('//zhuanlan.zhihu.com/p/', href) and not re.search('edit', href): # 文章的链接
 | 
			
		||||
            if re.search('https:', href)==None:  # 如果链接不是完整的,那么补充完整
 | 
			
		||||
                href = 'https:'+ href
 | 
			
		||||
            if href not in match_href:
 | 
			
		||||
                match_href.append(href)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 对链接进行排序
 | 
			
		||||
numbers = []
 | 
			
		||||
match_href_new = [] 
 | 
			
		||||
for href in match_href:
 | 
			
		||||
    numbers.append(int(href[29:]))
 | 
			
		||||
numbers.sort(reverse = True)
 | 
			
		||||
for n in numbers:
 | 
			
		||||
    match_href_new.append('https://zhuanlan.zhihu.com/p/'+str(n))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 获取内容并写入文件
 | 
			
		||||
f = open('zhihu.html', 'w', encoding='UTF-8') 
 | 
			
		||||
f.write('<meta charset="utf-8"><style type="text/css">a{text-decoration: none;color: #004e4e;}a:hover {text-decoration: underline;color: red; }</style>')
 | 
			
		||||
 | 
			
		||||
f.write('<p>'+str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+' 已更新</p>')
 | 
			
		||||
for href in match_href_new: 
 | 
			
		||||
    try:
 | 
			
		||||
        html = urlopen(href).read().decode('utf-8')   # 打开文章链接
 | 
			
		||||
        soup = BeautifulSoup(html, features='lxml') # 放入soup中
 | 
			
		||||
        title = soup.title   # 文章标题
 | 
			
		||||
        f.write('<li><a target=\"_blank\" href=\"')
 | 
			
		||||
        f.write(str(href))   # 文章链接
 | 
			
		||||
        f.write('\">')
 | 
			
		||||
        f.write(str(title.get_text()[:-5]))
 | 
			
		||||
        f.write('</a>  ') 
 | 
			
		||||
        author = soup.find("span", {"class": "UserLink AuthorInfo-name"})
 | 
			
		||||
        f.write(str(author.get_text()+'  '))
 | 
			
		||||
        post_time = soup.find("div", {"class" : "ContentItem-time"})
 | 
			
		||||
        f.write(str(post_time.get_text()[4:-6])+'</li>')
 | 
			
		||||
    except:
 | 
			
		||||
        pass
 | 
			
		||||
f.close()
 | 
			
		||||
@@ -1,7 +1,20 @@
 | 
			
		||||
import pickle
 | 
			
		||||
 | 
			
		||||
data = [1, 2, 3]
 | 
			
		||||
 | 
			
		||||
# 保存为文件
 | 
			
		||||
with open('a.txt', 'wb') as f:
 | 
			
		||||
    pickle.dump(data, f)
 | 
			
		||||
with open('a.txt', 'rb') as f:
 | 
			
		||||
    data_load = pickle.load(f)
 | 
			
		||||
print(data_load)
 | 
			
		||||
    data_load_from_file = pickle.load(f)
 | 
			
		||||
print(data_load_from_file)
 | 
			
		||||
print()
 | 
			
		||||
 | 
			
		||||
# 把对象转换成字节流
 | 
			
		||||
serialized_data = pickle.dumps(data) # 转换成字节流
 | 
			
		||||
print(type(serialized_data))
 | 
			
		||||
print(serialized_data)
 | 
			
		||||
print()
 | 
			
		||||
loaded_data = pickle.loads(serialized_data) # 转换成原类型
 | 
			
		||||
print(type(loaded_data))
 | 
			
		||||
print(loaded_data)
 | 
			
		||||
							
								
								
									
										49
									
								
								2024.11.21_kelly_formula/kelly_formula.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								2024.11.21_kelly_formula/kelly_formula.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/43508
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
 | 
			
		||||
investment_ratio_array = np.arange(0.1, 1.1, 0.1)
 | 
			
		||||
investment_times = 1000
 | 
			
		||||
test_times = 100
 | 
			
		||||
 | 
			
		||||
# 几个例子:https://www.guanjihuan.com/archives/43412
 | 
			
		||||
 | 
			
		||||
# 例子(2)的参数
 | 
			
		||||
p = 0.6 # 胜率
 | 
			
		||||
b = 1 # 收益
 | 
			
		||||
a = 1 # 损失
 | 
			
		||||
 | 
			
		||||
# # 例子(3)的参数
 | 
			
		||||
# p = 0.5
 | 
			
		||||
# b = 1
 | 
			
		||||
# a = 0.5
 | 
			
		||||
 | 
			
		||||
win_array = [] # 胜出的仓位
 | 
			
		||||
for i0 in range(test_times):
 | 
			
		||||
    # print(i0)
 | 
			
		||||
    capital_array = []
 | 
			
		||||
    for f in investment_ratio_array:
 | 
			
		||||
        capital = 1
 | 
			
		||||
        for _ in range(investment_times):
 | 
			
		||||
            investment = capital*f
 | 
			
		||||
            if investment>0:
 | 
			
		||||
                random_value = np.random.uniform(0, 1)
 | 
			
		||||
                if random_value<p:
 | 
			
		||||
                    capital = capital+investment*b
 | 
			
		||||
                else:
 | 
			
		||||
                    capital = capital-investment*a
 | 
			
		||||
        capital_array.append(capital)
 | 
			
		||||
    max_capital_index = capital_array.index(max(capital_array))
 | 
			
		||||
    win_array.append(investment_ratio_array[max_capital_index])
 | 
			
		||||
 | 
			
		||||
def kelly_formula(p, b, a):
 | 
			
		||||
    f=(p/a)-((1-p)/b)
 | 
			
		||||
    return f
 | 
			
		||||
 | 
			
		||||
print(kelly_formula(p=p, b=b, a=a))
 | 
			
		||||
plt.hist(win_array, bins=100, color='skyblue')
 | 
			
		||||
plt.show()
 | 
			
		||||
							
								
								
									
										33
									
								
								2024.12.02_MNIST/download_MNIST_and_show_image.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								2024.12.02_MNIST/download_MNIST_and_show_image.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/43720
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from torchvision import datasets, transforms
 | 
			
		||||
 | 
			
		||||
transform = transforms.Compose([transforms.ToTensor()]) # 定义数据预处理步骤(转换为Tensor)
 | 
			
		||||
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform) # 加载 MNIST 数据集,训练集
 | 
			
		||||
print(type(train_dataset))
 | 
			
		||||
size_of_train_dataset = len(train_dataset)
 | 
			
		||||
print(size_of_train_dataset)
 | 
			
		||||
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform) # 加载 MNIST 数据集,测试集
 | 
			
		||||
print(type(test_dataset))
 | 
			
		||||
size_of_test_dataset = len(test_dataset)
 | 
			
		||||
print(size_of_test_dataset)
 | 
			
		||||
 | 
			
		||||
import random
 | 
			
		||||
rand_number = random.randint(0, size_of_train_dataset-1)
 | 
			
		||||
image, label = train_dataset[rand_number] # 获取一张图像和标签
 | 
			
		||||
print(type(image))
 | 
			
		||||
print(image.shape)
 | 
			
		||||
image = image.squeeze(0)  # 去掉单通道的维度 (1, 28, 28) -> (28, 28)
 | 
			
		||||
print(type(image))
 | 
			
		||||
print(image.shape)
 | 
			
		||||
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
# import os
 | 
			
		||||
# os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # 解决可能的多个 OpenMP 库版本冲突的问题。如果有 OMP 报错,可以试着使用这个解决。
 | 
			
		||||
plt.imshow(image, cmap='gray') # 显示图像
 | 
			
		||||
plt.title(f"Label: {label}")  # 标签值(理论值)
 | 
			
		||||
plt.axis('off')  # 不显示坐标轴
 | 
			
		||||
plt.show()
 | 
			
		||||
							
								
								
									
										100
									
								
								2024.12.02_MNIST/train_and_predict_MNIST.PY
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								2024.12.02_MNIST/train_and_predict_MNIST.PY
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,100 @@
 | 
			
		||||
"""
 | 
			
		||||
This code is supported by the website: https://www.guanjihuan.com
 | 
			
		||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/43720
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
import torch.nn as nn
 | 
			
		||||
import torch.optim as optim
 | 
			
		||||
from torch.utils.data import DataLoader, TensorDataset
 | 
			
		||||
from torchvision import datasets, transforms
 | 
			
		||||
 | 
			
		||||
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])  # 数据转换(将图片转换为 Tensor 并进行归一化处理,均值和标准差为 0.5)
 | 
			
		||||
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform) # 下载训练数据集
 | 
			
		||||
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform) # 下载测试数据集
 | 
			
		||||
 | 
			
		||||
# 训练函数
 | 
			
		||||
def train(model, train_loader, criterion, optimizer, num_epochs=5):
 | 
			
		||||
    for epoch in range(num_epochs):
 | 
			
		||||
        model.train()
 | 
			
		||||
        running_loss = 0.0
 | 
			
		||||
        correct = 0
 | 
			
		||||
        total = 0
 | 
			
		||||
        for images, labels in train_loader:
 | 
			
		||||
            # print(images.shape)
 | 
			
		||||
            optimizer.zero_grad()  # 清除以前的梯度
 | 
			
		||||
            outputs = model(images) # 前向传播
 | 
			
		||||
            loss = criterion(outputs, labels)
 | 
			
		||||
            loss.backward() # 反向传播和优化
 | 
			
		||||
            optimizer.step()  
 | 
			
		||||
            running_loss += loss.item() 
 | 
			
		||||
            _, predicted = torch.max(outputs, 1) # 计算准确率
 | 
			
		||||
            total += labels.size(0)
 | 
			
		||||
            correct += (predicted == labels).sum().item()     
 | 
			
		||||
        avg_loss = running_loss / len(train_loader)
 | 
			
		||||
        accuracy = 100 * correct / total
 | 
			
		||||
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
 | 
			
		||||
 | 
			
		||||
# 测试函数
 | 
			
		||||
def test(model, test_loader):
 | 
			
		||||
    model.eval()  # 设置为评估模式
 | 
			
		||||
    correct = 0
 | 
			
		||||
    total = 0
 | 
			
		||||
    with torch.no_grad():  # 禁用梯度计算
 | 
			
		||||
        for images, labels in test_loader:
 | 
			
		||||
            outputs = model(images)
 | 
			
		||||
            _, predicted = torch.max(outputs, 1)
 | 
			
		||||
            total += labels.size(0)
 | 
			
		||||
            correct += (predicted == labels).sum().item()
 | 
			
		||||
    accuracy = 100 * correct / total
 | 
			
		||||
    print(f'Test Accuracy: {accuracy:.2f}%')
 | 
			
		||||
 | 
			
		||||
# 训练和测试
 | 
			
		||||
def train_and_test(model, train_loader, test_loader):
 | 
			
		||||
    criterion = nn.CrossEntropyLoss()  # 交叉熵损失
 | 
			
		||||
    optimizer = optim.Adam(model.parameters(), lr=0.001)
 | 
			
		||||
    train(model, train_loader, criterion, optimizer, num_epochs=10)
 | 
			
		||||
    test(model, test_loader)
 | 
			
		||||
 | 
			
		||||
# 扁平化数据,并重建 DataLoader(用于全连接神经网络输入端的数据处理)
 | 
			
		||||
def flatten_data(data_loader): 
 | 
			
		||||
    images_array = []
 | 
			
		||||
    labels_array = []
 | 
			
		||||
    for images, labels in data_loader:
 | 
			
		||||
        images = torch.flatten(images, start_dim=1) # 除去batch维度后,其他维度展平
 | 
			
		||||
        images_array.append(images)
 | 
			
		||||
        labels_array.append(labels)
 | 
			
		||||
    images_array = torch.cat(images_array, dim=0)
 | 
			
		||||
    labels_array = torch.cat(labels_array, dim=0)
 | 
			
		||||
    dataset_new = TensorDataset(images_array, labels_array)
 | 
			
		||||
    loader_new = DataLoader(dataset_new, batch_size=64, shuffle=True)
 | 
			
		||||
    return loader_new
 | 
			
		||||
 | 
			
		||||
# 数据加载器
 | 
			
		||||
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
 | 
			
		||||
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
 | 
			
		||||
 | 
			
		||||
# 扁平化数据
 | 
			
		||||
train_loader_new = flatten_data(train_loader)
 | 
			
		||||
test_loader_new = flatten_data(test_loader)
 | 
			
		||||
 | 
			
		||||
# 安装软件包:pip install --upgrade guan
 | 
			
		||||
import guan
 | 
			
		||||
 | 
			
		||||
hidden_size = 64
 | 
			
		||||
 | 
			
		||||
print('---全连接神经网络模型(包含一个隐藏层)---')
 | 
			
		||||
model = guan.fully_connected_neural_network_with_one_hidden_layer(input_size=28*28, hidden_size=hidden_size, output_size=10, activation='relu')
 | 
			
		||||
train_and_test(model, train_loader_new, test_loader_new)
 | 
			
		||||
 | 
			
		||||
print('---全连接神经网络模型(包含两个隐藏层)---')
 | 
			
		||||
model = guan.fully_connected_neural_network_with_two_hidden_layers(input_size=28*28, hidden_size_1=hidden_size, hidden_size_2=hidden_size, output_size=10, activation_1='relu', activation_2='relu')
 | 
			
		||||
train_and_test(model, train_loader_new, test_loader_new)
 | 
			
		||||
 | 
			
		||||
print('---全连接神经网络模型(包含三个隐藏层)---')
 | 
			
		||||
model = guan.fully_connected_neural_network_with_three_hidden_layers(input_size=28*28, hidden_size_1=hidden_size, hidden_size_2=hidden_size, hidden_size_3=hidden_size, output_size=10, activation_1='relu', activation_2='relu', activation_3='relu')
 | 
			
		||||
train_and_test(model, train_loader_new, test_loader_new)
 | 
			
		||||
 | 
			
		||||
print('---卷积神经网络模型(包含两个卷积层和两个全连接层)---')
 | 
			
		||||
model = guan.convolutional_neural_network_with_two_convolutional_layers_and_two_fully_connected_layers(in_channels=1, out_channels_1=32, out_channels_2=64, kernel_size_1=3, kernel_size_2=3, stride_1=1, stride_2=1, padding_1=1, padding_2=1, pooling=1, pooling_kernel_size=2, pooling_stride=2, input_size=7*7*64, hidden_size_1=hidden_size, hidden_size_2=hidden_size, output_size=10)
 | 
			
		||||
train_and_test(model, train_loader, test_loader)
 | 
			
		||||
							
								
								
									
										16
									
								
								2024.12.11_ollama/ollama_with_python.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								2024.12.11_ollama/ollama_with_python.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,16 @@
 | 
			
		||||
# 直接输出
 | 
			
		||||
import ollama
 | 
			
		||||
response = ollama.chat(model="llama3.2:latest", messages=[{"role": "user","content": "你好"}], stream=False)
 | 
			
		||||
print(response['message']['content'])
 | 
			
		||||
 | 
			
		||||
# 流式输出
 | 
			
		||||
import ollama
 | 
			
		||||
response = ollama.chat(model="llama3.2:latest", messages=[{"role": "user", "content": "你好"}], stream=True)
 | 
			
		||||
for part in response:
 | 
			
		||||
    print(part['message']['content'], end='', flush=True)
 | 
			
		||||
 | 
			
		||||
# 流式输出,且模型后台常驻(需要手动 ollama stop 关闭)
 | 
			
		||||
import ollama
 | 
			
		||||
response = ollama.chat(model="llama3.2:latest", messages=[{"role": "user", "content": "你好"}], stream=True, keep_alive=-1)
 | 
			
		||||
for part in response:
 | 
			
		||||
    print(part['message']['content'], end='', flush=True)
 | 
			
		||||
		Reference in New Issue
	
	Block a user