PDF - 图1
© getcodify.com

由於語法渲染問題而影響閱讀體驗, 請移步博客閱讀~
本文GitPage地址

PYPDF2

Read & Write

  1. from PyPDF2 import PdfFileReader, PdfFileWriter
  2. readFile = 'read.pdf'
  3. writeFile = 'write.pdf'
  4. pdfReader = PdfFileReader(open(readFile, 'rb'))
  5. pdfWriter.write(open(writeFile, 'wb'))

Pick First two Page

  1. from PyPDF2 import PdfFileReader, PdfFileWriter
  2. import PyPDF2
  3. readFile = 'SA.pdf'
  4. writeFile = 'write.pdf'
  5. pdfWriter = PyPDF2.PdfFileWriter()
  6. pdfReader = PdfFileReader(open(readFile, 'rb'))
  7. pdfWriter.write(open(writeFile, 'wb'))
  8. for page in range(2):
  9. pageObj = pdfReader.getPage(page)
  10. pdfWriter.addPage(pageObj)
  11. newFile = open(writeFile,'wb')
  12. pdfWriter.write(newFile)
  13. newFile.close()

2.1 Double the Pages

  1. from PyPDF2 import PdfFileReader, PdfFileWriter
  2. import PyPDF2
  3. readFile = 'SA.pdf'
  4. writeFile = 'write.pdf'
  5. pdfWriter = PyPDF2.PdfFileWriter()
  6. pdfReader = PdfFileReader(open(readFile, 'rb'))
  7. pdfWriter.write(open(writeFile, 'wb'))
  8. for page in range(2):
  9. pageObj = pdfReader.getPage(page)
  10. pdfWriter.addPage(pageObj)
  11. pageObj = pdfReader.getPage(page)
  12. pdfWriter.addPage(pageObj)
  13. newFile = open(writeFile,'wb')
  14. pdfWriter.write(newFile)
  15. newFile.close()

PDF - 图2

3. Water Mark

  1. cm =1
  2. def create_watermark(content):
  3. #默认大小为21cm*29.7cm
  4. c = canvas.Canvas('mark.pdf', pagesize = (30*cm, 30*cm))
  5. c.translate(10*cm, 10*cm) #移动坐标原点(坐标系左下为(0,0)))
  6. #c.setFont('song',22)#设置字体为宋体,大小22号
  7. c.setFillColorRGB(0.5,0.5,0.5)#灰色
  8. c.rotate(45)#旋转45度,坐标系被旋转
  9. c.drawString(-7*cm, 0*cm, content)
  10. c.drawString(7*cm, 0*cm, content)
  11. c.drawString(0*cm, 7*cm, content)
  12. c.drawString(0*cm, -7*cm, content)
  13. c.save()#关闭并保存pdf文件

PDF - 图3

  1. from reportlab.pdfgen import canvas
  2. cm =1
  3. def create_watermark(W, H):
  4. #默认大小为21cm*29.7cm
  5. c = canvas.Canvas('mark.pdf', pagesize = (W, H))
  6. c.translate(10*cm, 10*cm) #移动坐标原点(坐标系左下为(0,0)))
  7. #c.setFont('song',22)#设置字体为宋体,大小22号
  8. #c.setFillColorRGB(0.5,0.5,0.5)#灰色
  9. #c.rotate(45)#旋转45度,坐标系被旋转
  10. #c.drawString(-7*cm, 0*cm, content)
  11. #c.drawString(7*cm, 0*cm, content)
  12. #c.drawString(0*cm, 7*cm, content)
  13. #c.drawString(0*cm, -7*cm, content)
  14. #指定描边的颜色
  15. #c.setStrokeColorRGB(0, 1, 0)
  16. #指定填充颜色
  17. c.setFillColorRGB(255, 255, 255)
  18. #画一个矩形
  19. c.rect(0, 0, W, H/2 -10 , fill=1)
  20. c.save()#关闭并保存pdf文件
  21. create_watermark(580,820)
  22. add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out)
  1. ##encoding=utf-8
  2. ##author: walker
  3. ##date: 2014-03-18
  4. ##function:给pdf添加水印
  5. from PyPDF2 import PdfFileWriter, PdfFileReader
  6. from reportlab.pdfgen import canvas
  7. ##所有路径为绝对路径
  8. def add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out):
  9. pdf_output = PdfFileWriter()
  10. input_stream = open(pdf_file_in, 'rb')
  11. pdf_input = PdfFileReader(pdf_file_in)
  12. # PDF文件被加密了
  13. if pdf_input.getIsEncrypted():
  14. print( '该PDF文件被加密了.')
  15. # 尝试用空密码解密
  16. try:
  17. pdf_input.decrypt('')
  18. except Exception or e:
  19. print( '尝试用空密码解密失败.')
  20. return False
  21. else:
  22. print( '用空密码解密成功.')
  23. # 获取PDF文件的页数
  24. pageNum = pdf_input.getNumPages()
  25. #读入水印pdf文件
  26. pdf_watermark = PdfFileReader(open(pdf_file_mark, 'rb'))
  27. # 给每一页打水印
  28. for i in range(pageNum):
  29. page = pdf_input.getPage(i)
  30. page.mergePage(pdf_watermark.getPage(0))
  31. page.compressContentStreams() #压缩内容
  32. pdf_output.addPage(page)
  33. return pdf_output
  34. PDF1 = add_watermark("GRE阅读白皮书.pdf", pdf_file_mark, pdf_file_out)
  35. PDF2 = add_watermark("GRE阅读白皮书.pdf", pdf_file_mark2, pdf_file_out)
  36. pdf_output = PdfFileWriter()
  37. for i in range(PDF2.getNumPages()):
  38. page = PDF1.getPage(i)
  39. pdf_output.addPage(page)
  40. page = PDF2.getPage(i)
  41. pdf_output.addPage(page)
  42. newFile = open(pdf_file_out,'wb')
  43. pdf_output.write(newFile)
  44. newFile.close()

Add Page number

  1. ###!/usr/bin/env python3
  2. ## -*- coding:utf-8 -*-
  3. ## 本示例使用两个第三方库来实现为PDF文件添加文字水印
  4. ## 这两个库是pyPdf和reportlab
  5. ## 使用的Python版本是Python 3.7
  6. ## origing from https://www.cnblogs.com/kayb/p/10846341.html
  7. ## 作者:小磊
  8. ##链接:https://www.zhihu.com/question/19628465/answer/353504051
  9. ##来源:知乎
  10. ##著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
  11. ##!/usr/bin/env python3
  12. ## -*- coding: utf-8 -*-
  13. helpDoc = '''
  14. Add Page Number to PDF file with Python
  15. Python 给 PDF 添加 页码
  16. usage:
  17. python addPageNumberToPDF.py [PDF path]
  18. require:
  19. pip install reportlab pypdf2
  20. Support both Python2/3, But more recommend Python3
  21. tips:
  22. * output file will save at pdfWithNumbers/[PDF path]_page.pdf
  23. * only support A4 size PDF
  24. * tested on Python2/Python3@ubuntu
  25. * more large size of PDF require more RAM
  26. * if segmentation fault, plaese try use Python 3
  27. * if generate PDF document is damaged, plaese try use Python 3
  28. Author:
  29. Lei Yang (ylxx@live.com)
  30. GitHub:
  31. https://gist.github.com/DIYer22/b9ede6b5b96109788a47973649645c1f
  32. '''
  33. print(helpDoc)
  34. import reportlab
  35. from reportlab.lib.units import mm
  36. from reportlab.pdfgen import canvas
  37. from PyPDF2 import PdfFileWriter, PdfFileReader
  38. path = 'test.pdf'
  39. def createPagePdf(num, tmp):
  40. c = canvas.Canvas(tmp)
  41. for i in range(1,num+1):
  42. c.drawString((210//2)*mm, (4)*mm, str(i))
  43. c.showPage()
  44. c.save()
  45. return
  46. with open(tmp, 'rb') as f:
  47. pdf = PdfFileReader(f)
  48. layer = pdf.getPage(0)
  49. return layer
  50. if __name__ == "__main__":
  51. pass
  52. import sys,os
  53. if len(sys.argv) == 1:
  54. if not os.path.isfile(path):
  55. sys.exit(1)
  56. else:
  57. path = sys.argv[1]
  58. base = os.path.basename(path)
  59. tmp = "__tmp.pdf"
  60. batch = 10
  61. batch = 0
  62. output = PdfFileWriter()
  63. with open(path, 'rb') as f:
  64. pdf = PdfFileReader(f,strict=False)
  65. n = pdf.getNumPages()
  66. if batch == 0:
  67. batch = -n
  68. createPagePdf(n,tmp)
  69. if not os.path.isdir('pdfWithNumbers/'):
  70. os.mkdir('pdfWithNumbers/')
  71. with open(tmp, 'rb') as ftmp:
  72. numberPdf = PdfFileReader(ftmp)
  73. for p in range(n):
  74. if not p%batch and p:
  75. newpath = path.replace(base, 'pdfWithNumbers/'+ base[:-4] + '_page_%d'%(p//batch) + path[-4:])
  76. with open(newpath, 'wb') as f:
  77. output.write(f)
  78. output = PdfFileWriter()
  79. print('page: %d of %d'%(p, n))
  80. page = pdf.getPage(p)
  81. numberLayer = numberPdf.getPage(p)
  82. page.mergePage(numberLayer)
  83. output.addPage(page)
  84. if output.getNumPages():
  85. newpath = path.replace(base, 'pdfWithNumbers/' + base[:-4] + '_page_%d'%(p//batch + 1) + path[-4:])
  86. with open(newpath, 'wb') as f:
  87. output.write(f)
  88. os.remove(tmp)

Access the size of pages

Reference: SUN_SU3 2020

  1. def pdf_size(path, page =0):
  2. pdf = PdfFileReader(open(path, 'rb'))
  3. page_1 = pdf.getPage(page)
  4. if page_1.get('/Rotate', 0) in [90, 270]:
  5. return page_1['/MediaBox'][2], page_1['/MediaBox'][3]
  6. else:
  7. return page_1['/MediaBox'][3], page_1['/MediaBox'][2]
  8. height, width = pdf_size(path)
  9. print('height: %s, width: %s'%(height, width))
  1. height: 767.06, width: 575.29

This is the size of PDF file made by Sony DPT-1

Crop the pages of PDF

For doing this, you need to know the size of your pdf and the width/height ratio.

  1. File = "Improving_Reading_Skills.pdf"
  2. height, width = pdf_size(File,20) # Function from above
  3. C_width = round(float(690)/(767.06/575.29),2)
  4. with open(File, "rb") as in_f:
  5. input1 = PdfFileReader(in_f)
  6. output = PdfFileWriter()
  7. # number
  8. numPages = input1.getNumPages()
  9. print ("document has %s pages." % numPages)
  10. # Start
  11. for i in range(10):
  12. page = input1.getPage(i)
  13. print( page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
  14. # (x, y) from left to right, from botton to top
  15. #page.trimBox.lowerLeft = (400, 700)
  16. page.cropBox.lowerLeft = (500, 600)
  17. page.cropBox.upperRight = (100, 200)
  18. output.addPage(page)
  19. # End
  20. with open("out.pdf", "wb") as out_f:
  21. output.write(out_f)

PDF - 图4

When left and right page is different

  1. with open(File, "rb") as in_f:
  2. input1 = PdfFileReader(in_f)
  3. output = PdfFileWriter()
  4. # number
  5. numPages = input1.getNumPages()
  6. print ("document has %s pages." % numPages)
  7. # Start
  8. for i in range(numPages):
  9. page = input1.getPage(i)
  10. print( page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
  11. # (x, y) from left to right, from botton to top
  12. #page.trimBox.lowerLeft = (400, 700)
  13. if i%2 == 0:
  14. page.cropBox.lowerLeft = (540, 680)
  15. page.trimBox.lowerLeft = (540, 680)
  16. page.cropBox.upperRight = (60, 40)
  17. page.trimBox.upperRight = (60, 40)
  18. if i%2 == 1:
  19. page.cropBox.lowerLeft = (580, 680)
  20. page.trimBox.lowerLeft = (580, 680)
  21. page.cropBox.upperRight = (100, 40)
  22. page.trimBox.upperRight = (100, 40)
  23. output.addPage(page)
  24. # End
  25. with open("out.pdf", "wb") as out_f:
  26. output.write(out_f)

pdfplumber

Read

  1. import pdfplumber
  2. path = 'MMR.pdf'
  3. pdf = pdfplumber.open(path)

Reference: SUN_SU3 2020

  1. import pdfplumber
  2. path = 'MMR.pdf'
  3. def run(path):
  4. with pdfplumber.open(path) as pdf:
  5. page_1 = pdf.pages[0]
  6. return page_1.height, page_1.width
  7. height, width = run(path)
  8. print('height: %s, width: %s'%(height, width))
  1. height: 841.920, width: 595.200

Enjoy~

本文由Python腳本GitHub/語雀自動更新

由於語法渲染問題而影響閱讀體驗, 請移步博客閱讀~
本文GitPage地址

GitHub: Karobben
Blog:Karobben
BiliBili:史上最不正經的生物狗