🐍PDF2DOCX🐍

🐍单文件版本🐍

  1. import os
  2. import pathlib
  3. from pdf2docx import Converter
  4. msgStart = r"——————————Python脚本开始——————————"
  5. msgEnd = r"——————————Python脚本结束——————————"
  6. def ChoosePath_Absolute(pathPDF):
  7. print("输入PDF文件路径:", pathPDF)
  8. # Python脚本目录
  9. nameFile = pathlib.Path(pathPDF).stem
  10. pathDOC = nameFile + ".docx"
  11. print("输出DOC文件到Python脚本目录:", pathDOC)
  12. PDF2DOC(pathPDF, pathDOC)
  13. def ChoosePath_SameDirectory(pathPDF):
  14. print("输入PDF文件路径:", pathPDF)
  15. # PDF文件同目录
  16. (fileDir, fileNameWithExtension) = os.path.split(pathPDF)
  17. (fileName, fileExtension) = os.path.splitext(fileNameWithExtension)
  18. pathDOC = fileDir + "\\" + fileName + ".docx"
  19. print("输出DOC文件到PDD文件目录:", pathDOC)
  20. PDF2DOC(pathPDF, pathDOC)
  21. def PDF2DOC(pathPDF, pathDOC):
  22. converter = Converter(pathPDF)
  23. converter.convert(pathDOC, start=0, end=None)
  24. converter.close()
  25. pathPDF = r"C:\Users\Administrator\Desktop\穷奇.pdf"
  26. if __name__ == '__main__':
  27. print(msgStart)
  28. # 如果需要输入
  29. # pathInput = input("请输入PDF文件路径地址:")
  30. # print(pathInput)
  31. #
  32. # if pathInput == '':
  33. # print("输入为空,使用脚本内置测试路径:", pathPDF)
  34. # else:
  35. # pathPDF = pathInput
  36. # 路径二选一,选一行不用的注释掉
  37. # ChoosePath_Absolute(pathPDF)
  38. ChoosePath_SameDirectory(pathPDF)
  39. print(msgEnd)

🐍文件夹版本🐍

  1. import os
  2. from pathlib import Path
  3. from pdf2docx import Converter
  4. msgStart = r"——————————Python脚本开始——————————"
  5. msgEnd = r"——————————Python脚本结束——————————"
  6. strNewLine = "\r\n"
  7. def EnuPDFirGetPDFFilePath(pathDir):
  8. print("需要遍历的路径:", pathDir, strNewLine)
  9. listPathPDF = []
  10. # 遍历.PDF文件
  11. for filePDF in Path(pathDir).rglob('*.PDF'):
  12. # print("PDF:", filePDF)
  13. listPathPDF.append(filePDF)
  14. # 选择一种路径保存DOCX
  15. ChoosePath_SameDirectory(filePDF)
  16. # ChoosePath_Absolute(filePDF)
  17. print("“.PDF”文件列表:")
  18. print(listPathPDF)
  19. # return listPathPDF
  20. def ChoosePath_Absolute(pathPDF):
  21. print("输入PDF文件路径:", pathPDF)
  22. # Python脚本目录
  23. nameFile = Path(pathPDF).stem
  24. pathDOC = nameFile + ".docx"
  25. print("输出DOC文件到Python脚本目录:", pathDOC)
  26. # PDF2DOC(pathPDF, pathDOC)
  27. def ChoosePath_SameDirectory(pathPDF):
  28. print("输入PDF文件路径:", pathPDF)
  29. # PDF文件同目录
  30. (fileDir, fileNameWithExtension) = os.path.split(pathPDF)
  31. (fileName, fileExtension) = os.path.splitext(fileNameWithExtension)
  32. pathDOC = fileDir + "\\" + fileName + ".docx"
  33. print("输出DOC文件到PDD文件目录:", pathDOC, strNewLine)
  34. # PDF2DOC(pathPDF, pathDOC)
  35. def PDF2DOC(pathPDF, pathDOC):
  36. converter = Converter(pathPDF)
  37. converter.convert(pathDOC, start=0, end=None)
  38. converter.close()
  39. pathPDF = r"C:\Users\Administrator\Desktop\穷奇.pdf"
  40. pathDir = r"C:\Users\Administrator\Desktop"
  41. if __name__ == '__main__':
  42. print(msgStart)
  43. # 如果需要输入
  44. pathInput = input("请输入PDF文件路径地址:")
  45. print(pathInput)
  46. if pathInput == '':
  47. print("输入为空,使用默认测试路径:" + strNewLine + pathPDF + strNewLine + pathDir + strNewLine)
  48. pass
  49. else:
  50. pathPDF = pathInput
  51. pathDir = pathInput
  52. # 遍历文件夹
  53. EnuPDFirGetPDFFilePath(pathDir)
  54. # 路径二选一,选一行不用的注释掉
  55. # ChoosePath_Absolute(pathPDF)
  56. # ChoosePath_SameDirectory(pathPDF)
  57. print(msgEnd)

3.10Python版本修改

报错

  1. ImportError:cannot import name 'Iterable' from 'collections'

参考

版本>3.7时

pdf2docx\test\Line.py第18行
collections添加.abc改为collections.abc
image.png