🐍PDF2DOCX🐍
🐍单文件版本🐍
import os
import pathlib
from pdf2docx import Converter
msgStart = r"——————————Python脚本开始——————————"
msgEnd = r"——————————Python脚本结束——————————"
def ChoosePath_Absolute(pathPDF):
print("输入PDF文件路径:", pathPDF)
# Python脚本目录
nameFile = pathlib.Path(pathPDF).stem
pathDOC = nameFile + ".docx"
print("输出DOC文件到Python脚本目录:", pathDOC)
PDF2DOC(pathPDF, pathDOC)
def ChoosePath_SameDirectory(pathPDF):
print("输入PDF文件路径:", pathPDF)
# PDF文件同目录
(fileDir, fileNameWithExtension) = os.path.split(pathPDF)
(fileName, fileExtension) = os.path.splitext(fileNameWithExtension)
pathDOC = fileDir + "\\" + fileName + ".docx"
print("输出DOC文件到PDD文件目录:", pathDOC)
PDF2DOC(pathPDF, pathDOC)
def PDF2DOC(pathPDF, pathDOC):
converter = Converter(pathPDF)
converter.convert(pathDOC, start=0, end=None)
converter.close()
pathPDF = r"C:\Users\Administrator\Desktop\穷奇.pdf"
if __name__ == '__main__':
print(msgStart)
# 如果需要输入
# pathInput = input("请输入PDF文件路径地址:")
# print(pathInput)
#
# if pathInput == '':
# print("输入为空,使用脚本内置测试路径:", pathPDF)
# else:
# pathPDF = pathInput
# 路径二选一,选一行不用的注释掉
# ChoosePath_Absolute(pathPDF)
ChoosePath_SameDirectory(pathPDF)
print(msgEnd)
🐍文件夹版本🐍
import os
from pathlib import Path
from pdf2docx import Converter
msgStart = r"——————————Python脚本开始——————————"
msgEnd = r"——————————Python脚本结束——————————"
strNewLine = "\r\n"
def EnuPDFirGetPDFFilePath(pathDir):
print("需要遍历的路径:", pathDir, strNewLine)
listPathPDF = []
# 遍历.PDF文件
for filePDF in Path(pathDir).rglob('*.PDF'):
# print("PDF:", filePDF)
listPathPDF.append(filePDF)
# 选择一种路径保存DOCX
ChoosePath_SameDirectory(filePDF)
# ChoosePath_Absolute(filePDF)
print("“.PDF”文件列表:")
print(listPathPDF)
# return listPathPDF
def ChoosePath_Absolute(pathPDF):
print("输入PDF文件路径:", pathPDF)
# Python脚本目录
nameFile = Path(pathPDF).stem
pathDOC = nameFile + ".docx"
print("输出DOC文件到Python脚本目录:", pathDOC)
# PDF2DOC(pathPDF, pathDOC)
def ChoosePath_SameDirectory(pathPDF):
print("输入PDF文件路径:", pathPDF)
# PDF文件同目录
(fileDir, fileNameWithExtension) = os.path.split(pathPDF)
(fileName, fileExtension) = os.path.splitext(fileNameWithExtension)
pathDOC = fileDir + "\\" + fileName + ".docx"
print("输出DOC文件到PDD文件目录:", pathDOC, strNewLine)
# PDF2DOC(pathPDF, pathDOC)
def PDF2DOC(pathPDF, pathDOC):
converter = Converter(pathPDF)
converter.convert(pathDOC, start=0, end=None)
converter.close()
pathPDF = r"C:\Users\Administrator\Desktop\穷奇.pdf"
pathDir = r"C:\Users\Administrator\Desktop"
if __name__ == '__main__':
print(msgStart)
# 如果需要输入
pathInput = input("请输入PDF文件路径地址:")
print(pathInput)
if pathInput == '':
print("输入为空,使用默认测试路径:" + strNewLine + pathPDF + strNewLine + pathDir + strNewLine)
pass
else:
pathPDF = pathInput
pathDir = pathInput
# 遍历文件夹
EnuPDFirGetPDFFilePath(pathDir)
# 路径二选一,选一行不用的注释掉
# ChoosePath_Absolute(pathPDF)
# ChoosePath_SameDirectory(pathPDF)
print(msgEnd)
3.10Python版本修改
报错
ImportError:cannot import name 'Iterable' from 'collections'
参考
版本>3.7时
pdf2docx\test\Line.py第18行collections
添加.abc
改为collections.abc