from PyPDF2 import PdfWriter, PdfReader
#-*- coding: utf-8 -*-
from PyPDF2 import PdfReader
from PyPDF2 import PdfFileReader, PdfWriter
from PyPDF2 import PdfWriter
import calendar
import time
import os
def pdf_page():
for i in range(0, number_of_pages):
page = reader.pages[i]
text = page.extract_text()
print(text)
list_alone = []
if '凭证来源:核心' in text:
# print(i)
list_alone.append(i)
print(list_alone)
else:
print('*****')
global list_dy
list_dy = [item for item in range(number_of_pages) if item not in list_alone]
print(list_dy)
#print(list_alone)
def pdf_write():
ts = calendar.timegm(time.gmtime())
for i in list_dy:
writer.add_page(reader.pages[i])
ts = calendar.timegm(time.gmtime())
with open("pypdf-output"+str(ts)+".pdf", "wb") as fp:
writer.write(fp)
#文件根目录,该地址可以根据实际情况进行修改
path_root = "C:/Users/ZYBX/Desktop/PDF"
datanames = os.listdir(path_root)
list = []
for i in datanames:
list.append(i)
print(list)
for i in list:
print(i)
reader = PdfReader("C:/Users/ZYBX/Desktop/PDF/" + str(i))
writer = PdfWriter()
number_of_pages = len(reader.pages)
print(number_of_pages)
list_alone = []
for j in range(0,number_of_pages):
page = reader.pages[j]
text = page.extract_text()
print(text)
# if '凭证来源:核心' in text:
if '重庆' in text:
list_alone.append(j)
# print(list_alone)
else:
print('*****')
# print(list_alone)
# list_dy = [item for item in range(number_of_pages) if item not in list_alone]
# print(list_dy)
# # pdf_write()
for k in list_alone:
writer.add_page(reader.pages[k])
ts = calendar.timegm(time.gmtime())
with open("pypdf-output"+str(i)+".pdf", "wb") as fp:
writer.write(fp)
# global list_dy
# list_dy = [item for item in range(number_of_pages) if item not in list_alone]
# print(list_dy)
# pdf_page()
# pdf_write()
python处理PDF日常操作
需积分: 0 144 浏览量
2023-03-31
17:36:14
上传
评论
收藏 2KB ZIP 举报
weixin_41715446
- 粉丝: 0
- 资源: 1
最新资源
- python代码案例详解-旅行商问题的多种求解算法.zip
- springcloud.rar
- 一键清理maven项目依赖下载失败生成的.lastUpdated文件, 以及解决无法连接远程仓库导致本地依赖找不到的批处理脚本
- C#+WPF开发IOC框架实战架构师课程
- 《JAVA WEB项目开发案例精粹》中的代码案例源文件(超全).rar
- 背景music,用作业网轮地址
- comment_6e56cd5d6e140046ef1cd248910ea5ca.png
- 基于卷积神经网络的自动驾驶系统的设计与实现
- MySQL的执行原理-2.md
- 基于 FCN 网络对无人机俯拍下的生活场景语义分割(12类别)【包含数据集、完整代码、训练好的结果、权重文件等等】
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈