标签归档:pdfrw

Python 修改PDF文档尺寸以及去除水印图片

# -*- coding: UTF-8 -*-

import sys
import os

from pdfrw import PageMerge, PdfReader, PdfWriter, IndirectPdfDict
import fitz


# resize
def adjust(page):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = ((x2 - 421) / 2, (y2 - 595) / 2, 421, 595)
    page = PageMerge().add(page, viewrect=viewrect)
    return page.render()

fin, = sys.argv[1:]
fout = 'mid.' + os.path.basename(fin)
reader = PdfReader(fin)
writer = PdfWriter(fout)
for p in reader.pages:
    writer.addpage(adjust(p))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()

# trip backgroud images
doc = fitz.open(fout)
for i in range(len(doc)):
    imglist = doc.getPageImageList(i)
    for img in imglist:
        xref = img[0]
        if xref==51:
            doc._deleteObject(xref)
        print(img)
doc.save('new.' + os.path.basename(fin))