category
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
\documentclass{article} %文档类声明
|
||||
%导言区(文档类声明和正文间的是导言区)
|
||||
\begin{document} %正文开始
|
||||
Hello, world! %正文
|
||||
\end{document} %正文结束
|
||||
\documentclass{article} %文档类声明
|
||||
%导言区(文档类声明和正文间的是导言区)
|
||||
\begin{document} %正文开始
|
||||
Hello, world! %正文
|
||||
\end{document} %正文结束
|
34
language_learning/2019.12.04_latex_example/simple_example.tex → language_learning/latex/2019.12.04_latex_example/simple_example.tex
Executable file → Normal file
34
language_learning/2019.12.04_latex_example/simple_example.tex → language_learning/latex/2019.12.04_latex_example/simple_example.tex
Executable file → Normal file
@@ -1,18 +1,18 @@
|
||||
\documentclass{article} %文档类声明
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
|
||||
|
||||
\begin{document} %正文
|
||||
\title{这是一个标题} %标题
|
||||
\author{作者名字} %作者
|
||||
\date{} %\maketitle默认会加上当前时间,用\date{},空着内容可以取消时间的显示
|
||||
\maketitle %加了这个,标题、作者等信息才会显示
|
||||
\section{节}
|
||||
\subsection{小节}
|
||||
\subsubsection{子小节}
|
||||
Hello, world! %下面空一行代表换行
|
||||
\textbf{用\textbackslash textbf\{\}可以加粗文本} %\textbf{}可以加粗文本
|
||||
\section{节}
|
||||
\part{}
|
||||
\part{}
|
||||
\documentclass{article} %文档类声明
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
|
||||
|
||||
\begin{document} %正文
|
||||
\title{这是一个标题} %标题
|
||||
\author{作者名字} %作者
|
||||
\date{} %\maketitle默认会加上当前时间,用\date{},空着内容可以取消时间的显示
|
||||
\maketitle %加了这个,标题、作者等信息才会显示
|
||||
\section{节}
|
||||
\subsection{小节}
|
||||
\subsubsection{子小节}
|
||||
Hello, world! %下面空一行代表换行
|
||||
\textbf{用\textbackslash textbf\{\}可以加粗文本} %\textbf{}可以加粗文本
|
||||
\section{节}
|
||||
\part{}
|
||||
\part{}
|
||||
\end{document}
|
0
language_learning/2019.12.05_beamer_as_slides/1.jpg → language_learning/latex/2019.12.05_beamer_as_slides/1.jpg
Executable file → Normal file
0
language_learning/2019.12.05_beamer_as_slides/1.jpg → language_learning/latex/2019.12.05_beamer_as_slides/1.jpg
Executable file → Normal file
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
98
language_learning/2019.12.05_beamer_as_slides/beamer.tex → language_learning/latex/2019.12.05_beamer_as_slides/beamer.tex
Executable file → Normal file
98
language_learning/2019.12.05_beamer_as_slides/beamer.tex → language_learning/latex/2019.12.05_beamer_as_slides/beamer.tex
Executable file → Normal file
@@ -1,50 +1,50 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
\usepackage{graphicx} %这个包提供\includegraphics命令来插入图片
|
||||
|
||||
\usetheme{Boadilla} %主题
|
||||
\usecolortheme{default} %主题的颜色
|
||||
|
||||
\title{这是PPT标题} %标题
|
||||
\author{作者名字\inst{1},作者名字\inst{2}} %作者
|
||||
\institute{\inst{1}第一个单位\and\inst{2}第二个单位} %这里的\and有换行的效果
|
||||
\date{\today} %时间(默认也会显示)
|
||||
\logo{\includegraphics[height=1.0cm]{1.jpg}} %右下角的小log
|
||||
|
||||
\begin{document} %正文开始
|
||||
\begin{frame} %相当于ppt里的一页
|
||||
\titlepage %标题页
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{目录} %当前页的标题
|
||||
\tableofcontents %制作目录,需要\section{}配合
|
||||
\end{frame}
|
||||
|
||||
\section{第一节} %用来做目录
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题1}
|
||||
这是第一节第一页的内容。This is a text in the first frame. This is a text in the first frame. This is a text in the first frame.
|
||||
\end{frame}
|
||||
|
||||
\section{第二节}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题2}
|
||||
这是第二节第一页的内容。这里使用了\textbackslash pause。\pause This is a text in the second frame. This is a text in the second frame. This is a text in the second frame. %\pause是暂停,前后会分成两页。
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题3:Two-column slide}
|
||||
\begin{columns} %分成列
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This is a text in first column.
|
||||
$$E=mc^2$$
|
||||
\begin{itemize} %制作列表
|
||||
\item First item
|
||||
\item Second item
|
||||
\end{itemize}
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This text will be in the second column
|
||||
and on a second tought this is a nice looking
|
||||
layout in some cases.
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
\documentclass{beamer}
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
\usepackage{graphicx} %这个包提供\includegraphics命令来插入图片
|
||||
|
||||
\usetheme{Boadilla} %主题
|
||||
\usecolortheme{default} %主题的颜色
|
||||
|
||||
\title{这是PPT标题} %标题
|
||||
\author{作者名字\inst{1},作者名字\inst{2}} %作者
|
||||
\institute{\inst{1}第一个单位\and\inst{2}第二个单位} %这里的\and有换行的效果
|
||||
\date{\today} %时间(默认也会显示)
|
||||
\logo{\includegraphics[height=1.0cm]{1.jpg}} %右下角的小log
|
||||
|
||||
\begin{document} %正文开始
|
||||
\begin{frame} %相当于ppt里的一页
|
||||
\titlepage %标题页
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{目录} %当前页的标题
|
||||
\tableofcontents %制作目录,需要\section{}配合
|
||||
\end{frame}
|
||||
|
||||
\section{第一节} %用来做目录
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题1}
|
||||
这是第一节第一页的内容。This is a text in the first frame. This is a text in the first frame. This is a text in the first frame.
|
||||
\end{frame}
|
||||
|
||||
\section{第二节}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题2}
|
||||
这是第二节第一页的内容。这里使用了\textbackslash pause。\pause This is a text in the second frame. This is a text in the second frame. This is a text in the second frame. %\pause是暂停,前后会分成两页。
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题3:Two-column slide}
|
||||
\begin{columns} %分成列
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This is a text in first column.
|
||||
$$E=mc^2$$
|
||||
\begin{itemize} %制作列表
|
||||
\item First item
|
||||
\item Second item
|
||||
\end{itemize}
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This text will be in the second column
|
||||
and on a second tought this is a nice looking
|
||||
layout in some cases.
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
\end{document}
|
80
language_learning/2019.10.29_matlab_example/matlab_example.m → language_learning/others/2019.10.29_matlab_example/matlab_example.m
Executable file → Normal file
80
language_learning/2019.10.29_matlab_example/matlab_example.m → language_learning/others/2019.10.29_matlab_example/matlab_example.m
Executable file → Normal file
@@ -1,41 +1,41 @@
|
||||
% This code is supported by the website: https://www.guanjihuan.com
|
||||
% The newest version of this code is on the web page: https://www.guanjihuan.com/archives/766
|
||||
|
||||
|
||||
%在matlab里加上百分号“%”是注释。
|
||||
%快捷键:选中按ctrl+R为注释,选中按ctrl+T为取消注释,
|
||||
clc; %clc有窗口清空的效果,一般都用上
|
||||
clear all; %clear all可以清空所有变量,一般都用上
|
||||
clf; %clf为清空输出的图片内容,在画图的时候最好添加上
|
||||
|
||||
aa=1 %没加分号“;”,默认打印输出
|
||||
bb=2; %加了分号“;”,即不打印输出
|
||||
cc1=zeros(2,3) %零矩阵用zeros()
|
||||
cc2=eye(3,3) %单位矩阵
|
||||
|
||||
%矩阵乘积
|
||||
matrix1=[3,3;3,3] %里面分号代表矩阵换一行。下标是从1开始记。
|
||||
matrix2=[2,0;0,2]
|
||||
matrix_product_1=matrix1*matrix2 % *是正常的矩阵乘积
|
||||
matrix_product_2=matrix1.*matrix2 % .*是矩阵每个元素对应相乘
|
||||
|
||||
%循环
|
||||
for i0=1:0.5:2 %循环内容为for到end。a:b:c代表最小为a,最大为c,步长为b
|
||||
for_result=i0+1i %i在matlab中代表虚数,所以起变量名最好不要用i。要输出内容,后面不加分号即可
|
||||
end
|
||||
|
||||
%判断
|
||||
if aa~=1 %在matlab中,~=代表不等于,==代表等于
|
||||
dd=100
|
||||
else
|
||||
dd=300
|
||||
end
|
||||
|
||||
matrix=[2,3;5,7]
|
||||
%求本征矢和本征值
|
||||
[V,D]=eig(matrix) %在matlab中,V的列向量是本征矢,注意是列。D的对角上是对应本征值。
|
||||
%求逆
|
||||
inv1=inv(matrix) %求逆
|
||||
inv2=matrix^-1 %求逆也可以这样写
|
||||
%画图
|
||||
% This code is supported by the website: https://www.guanjihuan.com
|
||||
% The newest version of this code is on the web page: https://www.guanjihuan.com/archives/766
|
||||
|
||||
|
||||
%在matlab里加上百分号“%”是注释。
|
||||
%快捷键:选中按ctrl+R为注释,选中按ctrl+T为取消注释,
|
||||
clc; %clc有窗口清空的效果,一般都用上
|
||||
clear all; %clear all可以清空所有变量,一般都用上
|
||||
clf; %clf为清空输出的图片内容,在画图的时候最好添加上
|
||||
|
||||
aa=1 %没加分号“;”,默认打印输出
|
||||
bb=2; %加了分号“;”,即不打印输出
|
||||
cc1=zeros(2,3) %零矩阵用zeros()
|
||||
cc2=eye(3,3) %单位矩阵
|
||||
|
||||
%矩阵乘积
|
||||
matrix1=[3,3;3,3] %里面分号代表矩阵换一行。下标是从1开始记。
|
||||
matrix2=[2,0;0,2]
|
||||
matrix_product_1=matrix1*matrix2 % *是正常的矩阵乘积
|
||||
matrix_product_2=matrix1.*matrix2 % .*是矩阵每个元素对应相乘
|
||||
|
||||
%循环
|
||||
for i0=1:0.5:2 %循环内容为for到end。a:b:c代表最小为a,最大为c,步长为b
|
||||
for_result=i0+1i %i在matlab中代表虚数,所以起变量名最好不要用i。要输出内容,后面不加分号即可
|
||||
end
|
||||
|
||||
%判断
|
||||
if aa~=1 %在matlab中,~=代表不等于,==代表等于
|
||||
dd=100
|
||||
else
|
||||
dd=300
|
||||
end
|
||||
|
||||
matrix=[2,3;5,7]
|
||||
%求本征矢和本征值
|
||||
[V,D]=eig(matrix) %在matlab中,V的列向量是本征矢,注意是列。D的对角上是对应本征值。
|
||||
%求逆
|
||||
inv1=inv(matrix) %求逆
|
||||
inv2=matrix^-1 %求逆也可以这样写
|
||||
%画图
|
||||
plot([0:20],[10:-1:-10],'-o') %更多画图技巧可参考官方文档或网上资料
|
@@ -1,47 +1,47 @@
|
||||
# 一级标题
|
||||
## 二级标题
|
||||
### 三级标题
|
||||
#### 四级标题
|
||||
|
||||
有序列表:数字加一个点
|
||||
|
||||
1. 列表内容
|
||||
2. 列表内容
|
||||
3. 列表内容
|
||||
|
||||
无序列表:用 + - * 任何一种都可以。为了不和其他记号重复,个人倾向于用 + 。
|
||||
|
||||
+ 列表内容
|
||||
+ 嵌套前面加几个空格。为了保险起见,个人倾向于用四个空格或一个Tab。
|
||||
+ 列表内容
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
|
||||
*倾斜:前后一个星号*
|
||||
|
||||
**加粗:前后两个星号**
|
||||
|
||||
***斜体加粗:前后三个星号***
|
||||
|
||||
代码:用三个反引号。如下所示。
|
||||
|
||||
```
|
||||
print('hello world')
|
||||
```
|
||||
|
||||
分割线:三个或者三个以上的 - 或 * 。为了不和其他记号重复,个人倾向于用 --- 。
|
||||
|
||||
---
|
||||
|
||||
在Markdown中空一行可采用以下符号。该符号为HTML中的符号,在Markdown中也是支持的。
|
||||
|
||||
<br />
|
||||
|
||||
以下是表格的书写形式。其中,第二行用一个横杆也是可以。为了保险起见,个人倾向于用三个横杆。
|
||||
|
||||
| 右对齐 | 居中对齐 | 左对齐 |
|
||||
| ---: | :---: | :--- |
|
||||
| 单元格 123 | 单元格 456 | 单元格 759 |
|
||||
| 单元格 | 单元格 | 单元格 |
|
||||
# 一级标题
|
||||
## 二级标题
|
||||
### 三级标题
|
||||
#### 四级标题
|
||||
|
||||
有序列表:数字加一个点
|
||||
|
||||
1. 列表内容
|
||||
2. 列表内容
|
||||
3. 列表内容
|
||||
|
||||
无序列表:用 + - * 任何一种都可以。为了不和其他记号重复,个人倾向于用 + 。
|
||||
|
||||
+ 列表内容
|
||||
+ 嵌套前面加几个空格。为了保险起见,个人倾向于用四个空格或一个Tab。
|
||||
+ 列表内容
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
|
||||
*倾斜:前后一个星号*
|
||||
|
||||
**加粗:前后两个星号**
|
||||
|
||||
***斜体加粗:前后三个星号***
|
||||
|
||||
代码:用三个反引号。如下所示。
|
||||
|
||||
```
|
||||
print('hello world')
|
||||
```
|
||||
|
||||
分割线:三个或者三个以上的 - 或 * 。为了不和其他记号重复,个人倾向于用 --- 。
|
||||
|
||||
---
|
||||
|
||||
在Markdown中空一行可采用以下符号。该符号为HTML中的符号,在Markdown中也是支持的。
|
||||
|
||||
<br />
|
||||
|
||||
以下是表格的书写形式。其中,第二行用一个横杆也是可以。为了保险起见,个人倾向于用三个横杆。
|
||||
|
||||
| 右对齐 | 居中对齐 | 左对齐 |
|
||||
| ---: | :---: | :--- |
|
||||
| 单元格 123 | 单元格 456 | 单元格 759 |
|
||||
| 单元格 | 单元格 | 单元格 |
|
||||
| 单元格 | 单元格 | 单元格 |
|
234
language_learning/2019.10.10_python_example/python_example.py → language_learning/python/2019.10.10_python_example/python_example.py
Executable file → Normal file
234
language_learning/2019.10.10_python_example/python_example.py → language_learning/python/2019.10.10_python_example/python_example.py
Executable file → Normal file
@@ -1,118 +1,118 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/417
|
||||
"""
|
||||
|
||||
|
||||
# 第一部分:Python基本操作(循环,判断,函数,文件写入)
|
||||
print('\n第一部分:Python基本操作(循环,判断,函数,文件写入)\n') # \n代表换行
|
||||
|
||||
for i in range(5): # 循环(这里只举例for循环,还有其他循环)
|
||||
print('我是循环产生的数:', i) # Python中没有end,因此每个语句的缩进很重要
|
||||
if i == 2: # 判断
|
||||
print('判断:我是第三个数 2')
|
||||
else:
|
||||
pass # pass代表不执行任何语句,用于占位,可以之后再补充,不然空着会报错
|
||||
print() # 输出空一行
|
||||
|
||||
def fun0(arg): # 定义函数
|
||||
print('我是函数中的内容,参数值为:', arg)
|
||||
return arg*2 # 返回值
|
||||
|
||||
print('函数返回值:', fun0(5)) # 调用函数
|
||||
print()
|
||||
|
||||
def main(): # “主函数”,其实也是一个普通的函数,也可以起其他名字
|
||||
print('我是主函数中的内容。')
|
||||
print()
|
||||
|
||||
if __name__ == '__main__': # 如果直接运行本文件,那么执行以下内容。如果是import本文件,那么不执行。
|
||||
main()
|
||||
|
||||
# 关于类class,这里不举例了。科学计算中主要还是面向过程,面向对象用的比较少。
|
||||
|
||||
# 文件写入
|
||||
# 第一种方式
|
||||
with open('test1.txt', 'w') as f1: # 其中'w'为重新写入,改为'a'是补充内容
|
||||
f1.write(str(100)+'\n这是第一种方式写入文件') # str()为转换成字符串
|
||||
# 第二种方式
|
||||
f2 = open('test2.txt', 'w') # 打开文件
|
||||
f2.write(str(200)+'\n这是第二种方式写入文件') # 写入文件
|
||||
f2.close() # 关闭文件
|
||||
print('已写入文件!')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
# 第二部分:Numpy库中常用的语句
|
||||
print('\n\n\n第二部分:Numpy库中常用的语句\n')
|
||||
import numpy as np
|
||||
|
||||
print('零矩阵:\n', np.zeros((2, 3))) # 注意np.zeros()里需要填元组,因此显示的是两个括号
|
||||
print('单位矩阵:\n', np.identity(3)) # 3行3列的单位矩阵,或者可以用np.eye()
|
||||
print('把一维数组按对角矩阵排列:\n', np.diag([1, 3, 5]))
|
||||
print()
|
||||
|
||||
print('指定步长的等差数列:\n', np.arange(1, 5, .5)) # 区间是左闭右开[1, 5),步长为0.5
|
||||
print('指定个数的等差数列:\n', np.linspace(-2, 2, 5)) # 区间是左闭右闭[-2, 2], 数量是5
|
||||
print()
|
||||
|
||||
print('随机数:\n', np.random.uniform(-2, 2)) # 随机浮点数
|
||||
print('随机整数:\n', np.random.randint(-10, 10)) # 区间是左闭右开[-10, 10)
|
||||
print()
|
||||
|
||||
# 随机数除了使用numpy库,也使用random生成
|
||||
import random
|
||||
print('使用random库的随机数:\n', random.uniform(-2,2)) # 随机浮点数
|
||||
print('使用random库的随机整数:\n', random.randint(-10, 10)) # 区间是左闭右闭[-10, 10]
|
||||
print()
|
||||
|
||||
print('数组从小到大排列:\n', np.sort([1, 7, 0, 3]))
|
||||
print('数组从小到大排列对应的索引:\n', np.argsort([1, 7, 0, 3])) # 注意Python中下标是从0开始的
|
||||
print()
|
||||
|
||||
matrix0 = np.array([[1, 2+9j, 3], [2, 5, 7]]) # numpy数组
|
||||
print('矩阵0:\n', matrix0)
|
||||
print('矩阵的维度:\n', matrix0.shape) # 查看矩阵的维度
|
||||
print('矩阵的行数:\n', matrix0.shape[0]) # 查看矩阵的行数
|
||||
print('矩阵的列数:\n', matrix0.shape[1]) # 查看矩阵的列数
|
||||
print('矩阵转置:\n', matrix0.transpose()) # 矩阵转置
|
||||
print('矩阵转置共轭:\n', matrix0.transpose().conj()) # 矩阵转置共轭
|
||||
print()
|
||||
|
||||
matrix1 = np.array([[3, 5], [2, 7]])
|
||||
eigenvalue, eigenvector = np.linalg.eig(matrix1) # 求本征值,本征向量
|
||||
print('矩阵1:\n', matrix1)
|
||||
print('本征值:\n', eigenvalue)
|
||||
print('本征向量:\n', eigenvector) # 列向量为本征向量
|
||||
print('逆矩阵:\n', np.linalg.inv(matrix1)) # 求逆
|
||||
print('计算行列式:\n', np.linalg.det(matrix1)) # 行列式
|
||||
print()
|
||||
|
||||
matrix2 = np.array([[1, 2], [3, 4]])
|
||||
print('矩阵2:\n', matrix2)
|
||||
print('矩阵1和矩阵2相乘:\n', np.matmul(matrix1, matrix2)) # 矩阵乘积,或者可以用np.dot()
|
||||
print()
|
||||
|
||||
a = np.array([1, 2])
|
||||
print('numpy数组a=', a)
|
||||
b = np.array([3, 4])
|
||||
print('numpy数组b=', b)
|
||||
c = np.append(a, b, axis=0) # 增加元素
|
||||
print('numpy数组增加元素:\n', c)
|
||||
d = np.append([a], [b], axis=0) # 增加行(列数要相同),或者用np.row_stack(([a], [b]))
|
||||
print('numpy数组增加行:\n', d)
|
||||
e = np.append([a], [b], axis=1) # 增加列(行数要相同),或者用np.column_stack(([a], [b]))
|
||||
print('numpy数组增加列:\n', e)
|
||||
print('重新观察:a=', a)
|
||||
print('重新观察:b=', b)
|
||||
print()
|
||||
|
||||
|
||||
# 如果不是numpy数组,原python数组可以直接用以下方法增加元素
|
||||
c = [100, 200]
|
||||
print('python数组c=', c)
|
||||
c.append(300)
|
||||
print('增加元素后,c=', c)
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/417
|
||||
"""
|
||||
|
||||
|
||||
# 第一部分:Python基本操作(循环,判断,函数,文件写入)
|
||||
print('\n第一部分:Python基本操作(循环,判断,函数,文件写入)\n') # \n代表换行
|
||||
|
||||
for i in range(5): # 循环(这里只举例for循环,还有其他循环)
|
||||
print('我是循环产生的数:', i) # Python中没有end,因此每个语句的缩进很重要
|
||||
if i == 2: # 判断
|
||||
print('判断:我是第三个数 2')
|
||||
else:
|
||||
pass # pass代表不执行任何语句,用于占位,可以之后再补充,不然空着会报错
|
||||
print() # 输出空一行
|
||||
|
||||
def fun0(arg): # 定义函数
|
||||
print('我是函数中的内容,参数值为:', arg)
|
||||
return arg*2 # 返回值
|
||||
|
||||
print('函数返回值:', fun0(5)) # 调用函数
|
||||
print()
|
||||
|
||||
def main(): # “主函数”,其实也是一个普通的函数,也可以起其他名字
|
||||
print('我是主函数中的内容。')
|
||||
print()
|
||||
|
||||
if __name__ == '__main__': # 如果直接运行本文件,那么执行以下内容。如果是import本文件,那么不执行。
|
||||
main()
|
||||
|
||||
# 关于类class,这里不举例了。科学计算中主要还是面向过程,面向对象用的比较少。
|
||||
|
||||
# 文件写入
|
||||
# 第一种方式
|
||||
with open('test1.txt', 'w') as f1: # 其中'w'为重新写入,改为'a'是补充内容
|
||||
f1.write(str(100)+'\n这是第一种方式写入文件') # str()为转换成字符串
|
||||
# 第二种方式
|
||||
f2 = open('test2.txt', 'w') # 打开文件
|
||||
f2.write(str(200)+'\n这是第二种方式写入文件') # 写入文件
|
||||
f2.close() # 关闭文件
|
||||
print('已写入文件!')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
# 第二部分:Numpy库中常用的语句
|
||||
print('\n\n\n第二部分:Numpy库中常用的语句\n')
|
||||
import numpy as np
|
||||
|
||||
print('零矩阵:\n', np.zeros((2, 3))) # 注意np.zeros()里需要填元组,因此显示的是两个括号
|
||||
print('单位矩阵:\n', np.identity(3)) # 3行3列的单位矩阵,或者可以用np.eye()
|
||||
print('把一维数组按对角矩阵排列:\n', np.diag([1, 3, 5]))
|
||||
print()
|
||||
|
||||
print('指定步长的等差数列:\n', np.arange(1, 5, .5)) # 区间是左闭右开[1, 5),步长为0.5
|
||||
print('指定个数的等差数列:\n', np.linspace(-2, 2, 5)) # 区间是左闭右闭[-2, 2], 数量是5
|
||||
print()
|
||||
|
||||
print('随机数:\n', np.random.uniform(-2, 2)) # 随机浮点数
|
||||
print('随机整数:\n', np.random.randint(-10, 10)) # 区间是左闭右开[-10, 10)
|
||||
print()
|
||||
|
||||
# 随机数除了使用numpy库,也使用random生成
|
||||
import random
|
||||
print('使用random库的随机数:\n', random.uniform(-2,2)) # 随机浮点数
|
||||
print('使用random库的随机整数:\n', random.randint(-10, 10)) # 区间是左闭右闭[-10, 10]
|
||||
print()
|
||||
|
||||
print('数组从小到大排列:\n', np.sort([1, 7, 0, 3]))
|
||||
print('数组从小到大排列对应的索引:\n', np.argsort([1, 7, 0, 3])) # 注意Python中下标是从0开始的
|
||||
print()
|
||||
|
||||
matrix0 = np.array([[1, 2+9j, 3], [2, 5, 7]]) # numpy数组
|
||||
print('矩阵0:\n', matrix0)
|
||||
print('矩阵的维度:\n', matrix0.shape) # 查看矩阵的维度
|
||||
print('矩阵的行数:\n', matrix0.shape[0]) # 查看矩阵的行数
|
||||
print('矩阵的列数:\n', matrix0.shape[1]) # 查看矩阵的列数
|
||||
print('矩阵转置:\n', matrix0.transpose()) # 矩阵转置
|
||||
print('矩阵转置共轭:\n', matrix0.transpose().conj()) # 矩阵转置共轭
|
||||
print()
|
||||
|
||||
matrix1 = np.array([[3, 5], [2, 7]])
|
||||
eigenvalue, eigenvector = np.linalg.eig(matrix1) # 求本征值,本征向量
|
||||
print('矩阵1:\n', matrix1)
|
||||
print('本征值:\n', eigenvalue)
|
||||
print('本征向量:\n', eigenvector) # 列向量为本征向量
|
||||
print('逆矩阵:\n', np.linalg.inv(matrix1)) # 求逆
|
||||
print('计算行列式:\n', np.linalg.det(matrix1)) # 行列式
|
||||
print()
|
||||
|
||||
matrix2 = np.array([[1, 2], [3, 4]])
|
||||
print('矩阵2:\n', matrix2)
|
||||
print('矩阵1和矩阵2相乘:\n', np.matmul(matrix1, matrix2)) # 矩阵乘积,或者可以用np.dot()
|
||||
print()
|
||||
|
||||
a = np.array([1, 2])
|
||||
print('numpy数组a=', a)
|
||||
b = np.array([3, 4])
|
||||
print('numpy数组b=', b)
|
||||
c = np.append(a, b, axis=0) # 增加元素
|
||||
print('numpy数组增加元素:\n', c)
|
||||
d = np.append([a], [b], axis=0) # 增加行(列数要相同),或者用np.row_stack(([a], [b]))
|
||||
print('numpy数组增加行:\n', d)
|
||||
e = np.append([a], [b], axis=1) # 增加列(行数要相同),或者用np.column_stack(([a], [b]))
|
||||
print('numpy数组增加列:\n', e)
|
||||
print('重新观察:a=', a)
|
||||
print('重新观察:b=', b)
|
||||
print()
|
||||
|
||||
|
||||
# 如果不是numpy数组,原python数组可以直接用以下方法增加元素
|
||||
c = [100, 200]
|
||||
print('python数组c=', c)
|
||||
c.append(300)
|
||||
print('增加元素后,c=', c)
|
||||
print()
|
@@ -1,42 +1,42 @@
|
||||
from multiprocessing import Process
|
||||
import os
|
||||
import time
|
||||
|
||||
def run_proc(name): # 要执行的代码
|
||||
start_time = time.perf_counter()
|
||||
time.sleep(2)
|
||||
end_time = time.perf_counter()
|
||||
print ('Process id running on %s = %s' % (name, os.getpid()), '; running time = %s' % (end_time-start_time))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# 串行
|
||||
print('串行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
run_proc('job1')
|
||||
run_proc('job2')
|
||||
run_proc('job3')
|
||||
run_proc('job4')
|
||||
end_time = time.perf_counter()
|
||||
print('CPU执行时间(s)=', (end_time-start_time), '\n')
|
||||
|
||||
# 并行
|
||||
print('并行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
p1 = Process(target=run_proc, args=('job1',))
|
||||
p2 = Process(target=run_proc, args=('job2',))
|
||||
p3 = Process(target=run_proc, args=('job3',))
|
||||
p4 = Process(target=run_proc, args=('job4',))
|
||||
p1.start()
|
||||
p2.start()
|
||||
p3.start()
|
||||
p4.start()
|
||||
p1.join() # join()方法可以等待子进程结束后再继续往下运行
|
||||
p2.join()
|
||||
p3.join()
|
||||
p4.join()
|
||||
end_time = time.perf_counter()
|
||||
from multiprocessing import Process
|
||||
import os
|
||||
import time
|
||||
|
||||
def run_proc(name): # 要执行的代码
|
||||
start_time = time.perf_counter()
|
||||
time.sleep(2)
|
||||
end_time = time.perf_counter()
|
||||
print ('Process id running on %s = %s' % (name, os.getpid()), '; running time = %s' % (end_time-start_time))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# 串行
|
||||
print('串行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
run_proc('job1')
|
||||
run_proc('job2')
|
||||
run_proc('job3')
|
||||
run_proc('job4')
|
||||
end_time = time.perf_counter()
|
||||
print('CPU执行时间(s)=', (end_time-start_time), '\n')
|
||||
|
||||
# 并行
|
||||
print('并行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
p1 = Process(target=run_proc, args=('job1',))
|
||||
p2 = Process(target=run_proc, args=('job2',))
|
||||
p3 = Process(target=run_proc, args=('job3',))
|
||||
p4 = Process(target=run_proc, args=('job4',))
|
||||
p1.start()
|
||||
p2.start()
|
||||
p3.start()
|
||||
p4.start()
|
||||
p1.join() # join()方法可以等待子进程结束后再继续往下运行
|
||||
p2.join()
|
||||
p3.join()
|
||||
p4.join()
|
||||
end_time = time.perf_counter()
|
||||
print('运行时间(s)=', (end_time-start_time))
|
@@ -1,21 +1,21 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
print('程序1开始的时间:', time.ctime())
|
||||
start1 = time.time()
|
||||
os.chdir('D:') # 代码位置
|
||||
os.system('python a.py') # 运行a.py
|
||||
end1 = time.time()
|
||||
print('程序1运行时间(min)=', (end1-start1)/60,'\n')
|
||||
|
||||
print('程序2开始的时间:', time.ctime())
|
||||
start2 = time.time()
|
||||
os.chdir('E:') # 代码位置
|
||||
os.system('python b.py') # 运行b.py
|
||||
end2 = time.time()
|
||||
print('程序2运行时间(min)=', (end2-start2)/60, '\n')
|
||||
|
||||
end = time.time()
|
||||
import os
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
print('程序1开始的时间:', time.ctime())
|
||||
start1 = time.time()
|
||||
os.chdir('D:') # 代码位置
|
||||
os.system('python a.py') # 运行a.py
|
||||
end1 = time.time()
|
||||
print('程序1运行时间(min)=', (end1-start1)/60,'\n')
|
||||
|
||||
print('程序2开始的时间:', time.ctime())
|
||||
start2 = time.time()
|
||||
os.chdir('E:') # 代码位置
|
||||
os.system('python b.py') # 运行b.py
|
||||
end2 = time.time()
|
||||
print('程序2运行时间(min)=', (end2-start2)/60, '\n')
|
||||
|
||||
end = time.time()
|
||||
print('总运行时间(min)=', (end-start)/60)
|
@@ -1,137 +1,137 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
# 参数
|
||||
key_word_array = ['photonic', 'Berry phase']
|
||||
original_path = 'D:\\文献'
|
||||
|
||||
# 查找所有的PDF文件路径
|
||||
pdf_file_all = find_files_pdf(original_path)
|
||||
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
|
||||
|
||||
f = open('error.txt','w',encoding='utf-8')
|
||||
f.close()
|
||||
for key_word in key_word_array:
|
||||
f = open(str(key_word)+'.txt','w',encoding='utf-8')
|
||||
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
|
||||
f.close()
|
||||
|
||||
# 查找包含关键词的PDF文件
|
||||
i0 = 1
|
||||
begin = time.time()
|
||||
for pdf_file in pdf_file_all:
|
||||
print('查找第', i0, '个文件,', end='')
|
||||
begin0 = time.time()
|
||||
try:
|
||||
content = get_text_from_pdf(pdf_file)
|
||||
for key_word in key_word_array:
|
||||
if re.search(re.compile(key_word),content):
|
||||
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
|
||||
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
|
||||
except:
|
||||
print('出现异常!位置在:\n\n', pdf_file, '\n')
|
||||
with open('error.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
|
||||
end0 = time.time()
|
||||
print('用时', end0-begin0, '秒')
|
||||
i0 += 1
|
||||
print('\n全部搜索结束!')
|
||||
end = time.time()
|
||||
print('\n总共用时:', (end-begin)/60, '分')
|
||||
|
||||
|
||||
def find_files_pdf(path): # 查找所有PDF文件
|
||||
file_all = find_files(path)
|
||||
pdf_file_all = []
|
||||
for file0 in file_all:
|
||||
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
|
||||
pdf_file_all.append(file0)
|
||||
return pdf_file_all
|
||||
|
||||
|
||||
def find_files(path): # 查找所有文件
|
||||
file_all = []
|
||||
path_next_loop = [path]
|
||||
for i in range(10000): # i为文件在文件夹中的深度
|
||||
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
|
||||
for file_in_one_loop in file_all_in_one_loop:
|
||||
file_all.append(file_in_one_loop)
|
||||
if path_next_loop == []:
|
||||
break
|
||||
return file_all
|
||||
|
||||
|
||||
def find_files_loop_module(path_all): # 查找文件的一个循环模块
|
||||
file_all_in_one_loop = []
|
||||
path_next_loop = []
|
||||
for path in path_all:
|
||||
filenames = os.listdir(path)
|
||||
for filename in filenames:
|
||||
filename = os.path.join(path,filename)
|
||||
if os.path.isfile(filename): # 如果是文件
|
||||
file_all_in_one_loop.append(filename)
|
||||
else: # 如果是文件夹
|
||||
path_next_loop.append(filename)
|
||||
return file_all_in_one_loop, path_next_loop
|
||||
|
||||
|
||||
def get_text_from_pdf(file_path): # 从PDF中获取文本
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(file_path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
# 参数
|
||||
key_word_array = ['photonic', 'Berry phase']
|
||||
original_path = 'D:\\文献'
|
||||
|
||||
# 查找所有的PDF文件路径
|
||||
pdf_file_all = find_files_pdf(original_path)
|
||||
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
|
||||
|
||||
f = open('error.txt','w',encoding='utf-8')
|
||||
f.close()
|
||||
for key_word in key_word_array:
|
||||
f = open(str(key_word)+'.txt','w',encoding='utf-8')
|
||||
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
|
||||
f.close()
|
||||
|
||||
# 查找包含关键词的PDF文件
|
||||
i0 = 1
|
||||
begin = time.time()
|
||||
for pdf_file in pdf_file_all:
|
||||
print('查找第', i0, '个文件,', end='')
|
||||
begin0 = time.time()
|
||||
try:
|
||||
content = get_text_from_pdf(pdf_file)
|
||||
for key_word in key_word_array:
|
||||
if re.search(re.compile(key_word),content):
|
||||
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
|
||||
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
|
||||
except:
|
||||
print('出现异常!位置在:\n\n', pdf_file, '\n')
|
||||
with open('error.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
|
||||
end0 = time.time()
|
||||
print('用时', end0-begin0, '秒')
|
||||
i0 += 1
|
||||
print('\n全部搜索结束!')
|
||||
end = time.time()
|
||||
print('\n总共用时:', (end-begin)/60, '分')
|
||||
|
||||
|
||||
def find_files_pdf(path): # 查找所有PDF文件
|
||||
file_all = find_files(path)
|
||||
pdf_file_all = []
|
||||
for file0 in file_all:
|
||||
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
|
||||
pdf_file_all.append(file0)
|
||||
return pdf_file_all
|
||||
|
||||
|
||||
def find_files(path): # 查找所有文件
|
||||
file_all = []
|
||||
path_next_loop = [path]
|
||||
for i in range(10000): # i为文件在文件夹中的深度
|
||||
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
|
||||
for file_in_one_loop in file_all_in_one_loop:
|
||||
file_all.append(file_in_one_loop)
|
||||
if path_next_loop == []:
|
||||
break
|
||||
return file_all
|
||||
|
||||
|
||||
def find_files_loop_module(path_all): # 查找文件的一个循环模块
|
||||
file_all_in_one_loop = []
|
||||
path_next_loop = []
|
||||
for path in path_all:
|
||||
filenames = os.listdir(path)
|
||||
for filename in filenames:
|
||||
filename = os.path.join(path,filename)
|
||||
if os.path.isfile(filename): # 如果是文件
|
||||
file_all_in_one_loop.append(filename)
|
||||
else: # 如果是文件夹
|
||||
path_next_loop.append(filename)
|
||||
return file_all_in_one_loop, path_next_loop
|
||||
|
||||
|
||||
def get_text_from_pdf(file_path): # 从PDF中获取文本
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(file_path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,63 +1,63 @@
|
||||
import os
|
||||
os.chdir('D:/') # PDF文件存放的位置
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
content = get_text_from_pdf('a')
|
||||
with open('a.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def get_text_from_pdf(filename):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
path = filename+".pdf"
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
os.chdir('D:/') # PDF文件存放的位置
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
content = get_text_from_pdf('a')
|
||||
with open('a.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def get_text_from_pdf(filename):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
path = filename+".pdf"
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1,6 +1,6 @@
|
||||
f = open('combine.txt', 'w')
|
||||
for job_index in range(7):
|
||||
with open('a'+str(job_index)+'.txt', 'r') as f0:
|
||||
text = f0.read()
|
||||
f.write(text)
|
||||
f = open('combine.txt', 'w')
|
||||
for job_index in range(7):
|
||||
with open('a'+str(job_index)+'.txt', 'r') as f0:
|
||||
text = f0.read()
|
||||
f.write(text)
|
||||
f.close()
|
Reference in New Issue
Block a user