# -*- 作者:闫浩 -*-#
"""日期:2022年04月26日"""
"""
文件转换
1. pdf 转换 doc
2. doc/docx 转换 pdf
"""
import requests
import string
import time
import math
import random
import os
from requests_toolbelt import MultipartEncoder
class PdfToWord(object):
"""pdf 转 word"""
def __init__(self, pdf_path, word_path=None):
self.url = "https://pdf2doc.com"
self.uid = None
self.id_ = None
self.pdf_path = pdf_path
self.word_path = word_path
self.prepare()
def prepare(self):
self.uid = self.get_uid()
# self.id_ = self.get_id()
# id_ :可以是任意字符串。
self.id_ = "o_1f0b39rqd9c013o8os1rh313s91"
@staticmethod
def __base32(x):
result = ""
while x > 0:
result = string.printable[x % 32] + result
x //= 32
return result
def get_id(self):
uid = self.__base32(int(time.time() * 1000)) # Python equivalent of new Date().getTime().toString(32)
for x in range(5):
uid += self.__base32(int(math.floor(random.random() * 65535)))
return "o_" + uid + self.__base32(1)
@staticmethod
def get_uid():
chars = "0123456789abcdefghiklmnopqrstuvwxyz"
result = ""
for x in range(16):
char = int(math.floor(random.random() * len(chars)))
result += chars[char:char + 1]
return result
def process(self):
"""
文件处理:
1.上传;
2.转换;
3.查看转换状态;
4.下载;
:return:
"""
# pdf 文件上传
self.upload(self.pdf_path)
# pdf 转换 Word
self.convert()
def upload(self, file_path):
"""
pdf文件上传操作
:param file_path:
:return:
"""
# file_name = file_path.split("\\")[-1]
# print(file_name)
file_name = "a.pdf"
f = open(file_path, "rb")
param = {
"id": self.id_,
"name": file_name,
"file": (file_name, f, "application/pdf")
}
form_data = MultipartEncoder(fields=param)
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36",
"Content-Type": form_data.content_type
}
new_url = "{}/upload/{}".format(self.url, self.uid)
requests.post(new_url, data=form_data, headers=header)
"""
response.text:
{"data":{"file":"a.pdf","file_size_human":"146K"},"id":"o_1f0b39rqd9c013o8os1rh313s91","jsonrpc":"2.0","result":null}
"""
f.close()
def convert(self):
"""上传pdf 转换 word"""
url = "{}/convert/{}/{}".format(self.url, self.uid, self.id_)
response = requests.get(url)
result = response.json()
if result.get("status") == "success":
# 转换成功,查看转换状态,获取word文件名
word_file_name = self.status()
if not word_file_name:
print("转换失败")
# 下载word 文件
self.download_(word_file_name)
time.sleep(1)
def status(self):
"""
查看文件转换状态
:return: 文件名
"""
url = "{}/status/{}/{}".format(self.url, self.uid, self.id_)
response = requests.get(url)
result = response.json()
return result.get("convert_result")
def download_(self, word_file):
"""
下载模块
:param word_file:
:return:
"""
url = "{}/download/{}/{}/{}".format(self.url, self.uid, self.id_, word_file)
response = requests.get(url)
with open(self.word_path, "wb") as f:
f.write(response.content)
class WordToPdf(object):
"""doc/docx 转换 pdf"""
def __init__(self, word_path, pdf_path):
self.url = "https://doctopdf.com"
self.word_path = word_path
self.pdf_path = pdf_path
self.uid = None
self.id_ = None
self.prepare()
def prepare(self):
self.uid = self.get_uid()
# self.id_ = self.get_id()
# id_ :可以是任意字符串。
self.id_ = "o_1f0b39rqd9c013o8os1rh313s91"
@staticmethod
def get_uid():
chars = "0123456789abcdefghiklmnopqrstuvwxyz"
result = ""
for x in range(16):
char = int(math.floor(random.random() * len(chars)))
result += chars[char:char + 1]
return result
def process(self):
"""
文件处理:
1.上传;
2.转换;
3.查看转换状态;
4.下载;
:return:
"""
# word 文件上传
self.upload(self.word_path)
# doc/docx 转换 pdf
self.convert()
def upload(self, file_path):
"""
pdf文件上传操作
:param file_path:
:return:
"""
# file_name = file_path.split("\\")[-1]
# print(file_name)
ext = file_path.split(".")[-1]
file_name = f"a.{ext}"
f = open(file_path, "rb")
param = {
"id": self.id_,
"name": file_name,
"file": (file_name, f, "application/doc")
}
form_data = MultipartEncoder(fields=param)
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36",
"Content-Type": form_data.content_type
}
new_url = "{}/upload/{}".format(self.url, self.uid)
res = requests.post(new_url, data=form_data, headers=header)
"""
response.text:
{"data":{"file":"a.pdf","file_size_human":"146K"},"id":"o_1f0b39rqd9c013o8os1rh313s91","jsonrpc":"2.0","result":null}
"""
f.close()
def convert(self):
"""上传pdf 转换 word"""
url = "{}/convert/{}/{}".format(self.url, self.uid, self.id_)
response = requests.get(url)
result = response.json()
if result.get("status") == "success":
# 转换成功,查看转换状态,获取word文件名
word_file_name = self.status()
if not word_file_name:
print("转换失败")
# 下载word 文件
self.download_(word_file_name)
time.sleep(1)
def status(self):
"""
查看文件转换状态
:return: 文件名
"""
url = "{}/status/{}/{}".format(self.url, self.uid, self.id_)
response = requests.get(url)
result = response.json()
return result.get("convert_result")
def download_(self, d_file):
"""
下载模块
:param d_file:
:return:
"""
url = "{}/download/{}/{}/{}".format(self.url, self.uid, self.id_, d_file)
response = requests.get(url)
with open(self.pdf_path, "wb") as f:
f.write(response.content)
# def main():
# print("1111")
# if len(sys.argv) < 2:
# print("Usage : python3 {} [user_name]".format(sys.argv[0]))
# return
# pdf_list = sys.argv[1:]
# print(pdf_list)
# new_pdf_list = []
# for pdf in pdf_list:
# if not os.path.exists(pdf):
# continue
# file_name, ext = None, None
# try:
# file_name, ext = pdf.split(".")
# except ValueError as e:
# print("异常:{}".format(e))
# except Exception as e:
# print("未知错误:{}".format(e))
# print(file_name, ext)
# if ext not in ("pdf",):
# continue
# new_pdf_list.append(pdf)
# p = PdfToWord()
# p.process(new_pdf_list)
if __name__ == '__main__':
pdf_path = r"C:\Users\小胖\Desktop\abc.pdf"
# pdf_path = "D:\\python\\test\\test3\\
浩哥——大数据+Python
- 粉丝: 0
- 资源: 2
最新资源
- java全大撒大撒大苏打
- pca20241222
- LabVIEW实现LoRa通信【LabVIEW物联网实战】
- CS-TY4-4WCN-转-公版-XP1-8B4WF-wifi8188
- 计算机网络期末复习资料(课后题答案+往年考试题+复习提纲+知识点总结)
- 从零学习自动驾驶Lattice规划算法(下) 轨迹采样 轨迹评估 碰撞检测 包含matlab代码实现和cpp代码实现,方便对照学习 cpp代码用vs2019编译 依赖qt5.15做可视化 更新:
- 风光储、风光储并网直流微电网simulink仿真模型 系统由光伏发电系统、风力发电系统、混合储能系统(可单独储能系统)、逆变器VSR+大电网构成 光伏系统采用扰动观察法实现mppt控
- (180014016)pycairo-1.18.2-cp35-cp35m-win32.whl.rar
- (180014046)pycairo-1.21.0-cp311-cp311-win32.whl.rar
- DS-7808-HS-HF / DS-7808-HW-E1
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈