采用chrome headless方案
为什么要采用Chrome headless
因为wkhtmltopdf内置的为qt的webkit,已经很久不更新了,很多css3以及html5都支持不友好。
Chrome官方提供的页面转换为PDF的接口
https://chromedevtools.github.io/devtools-protocol/tot/Page#method-printToPDF
命令行方式
chrome --headless --print-to-pdf=path/to/file.pdf https://example.com
参考:HTML to PDF conversion using Chrome pdfium?
NodeJS扩展
html-pdf-chrome HTML to PDF converter via Chrome/Chromium.
PHP扩展
chrome-html-to-pdf Converts HTML to PDF using Google Chrome
Chrome命令行参数列表
List of Chromium Command Line Switches
采用Qt的Webkit(PyQt5)
由于当前的chrome转换存在BUG,转换大文件时内存消耗特别大,生成的文件也比较大,测试了10多种方法后,最后决定采用PyQt5来做
#!/usr/bin/env python3
import sys
import argparse
from PyQt5.QtCore import QUrl, QMarginsF
from PyQt5.QtGui import QPageLayout, QPageSize
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWidgets import QApplication
class PrinterView(QWebEngineView):
def __init__(self, url, filename, do_preview, parent=None):
super(PrinterView, self).__init__(parent)
self.do_preview = do_preview
self.setUrl(QUrl(url))
self.setZoomFactor(1)
self.loadFinished.connect(self.load_finished)
self.filename = filename
def load_finished(self):
if self.do_preview:
self.show()
else:
pageLayout = QPageLayout(QPageSize(QPageSize.A5), QPageLayout.Portrait,
QMarginsF(0, 0, 0, 0))
self.page().printToPdf(self.filename, pageLayout)
self.page().pdfPrintingFinished.connect(on_pdf_finished)
def on_pdf_finished(result):
if result:
print(result)
QApplication.exit()
else:
QApplication.exit(1)
if __name__ == '__main__':
app = QApplication(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument("--url", "-i", help="Input URL (http://example.com, file:///home/user/example.html, ...)",
required=True)
parser.add_argument("--output", "-o", help="Write pdf to this file", required=True)
parser.add_argument("--preview", "-p", help="Open preview", action="store_true")
args = parser.parse_args()
a = PrinterView(args.url, args.output, args.preview)
sys.exit(app.exec_())
采用qt打印
import sys
import argparse
from PyQt5.QtCore import QUrl, QMarginsF
from PyQt5.QtGui import QPageLayout, QPageSize
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineProfile
from PyQt5.QtWidgets import QApplication
from PyQt5.QtPrintSupport import QPrinter, QPrintDialog
class PrinterView(QWebEngineView):
def __init__(self, url, filename, do_preview, parent=None):
self.printer = QPrinter()
self.printer.setPageSize(QPrinter.A5)
self.printer.setOrientation(QPrinter.Portrait)
self.printer.setOutputFormat(QPrinter.PdfFormat)
self.printer.setOutputFileName(filename)
self.printer.setPageMargins(0, 0, 0, 0, QPrinter.Millimeter)
super(PrinterView, self).__init__(parent)
self.do_preview = do_preview
self.page().profile().setHttpCacheMaximumSize(5 * 1024 * 1024 * 1024)
self.page().profile().setHttpCacheType(QWebEngineProfile.MemoryHttpCache)
self.setUrl(QUrl(url))
self.setZoomFactor(1)
self.loadFinished.connect(self.load_finished2)
self.filename = filename
def load_finished(self):
if self.do_preview:
self.show()
else:
pageLayout = QPageLayout(QPageSize(QPageSize.A5), QPageLayout.Portrait,
QMarginsF(0, 0, 0, 0))
self.page().printToPdf(self.filename, pageLayout)
self.page().pdfPrintingFinished.connect(on_pdf_finished)
def load_finished2(self):
self.show()
self.page().print(self.printer, on_pdf_finished)
def on_pdf_finished(result):
if result:
print(result)
QApplication.exit()
else:
QApplication.exit(1)
if __name__ == '__main__':
app = QApplication(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument("--url", "-i", help="Input URL (http://example.com, file:///home/user/example.html, ...)",
required=True)
parser.add_argument("--output", "-o", help="Write pdf to this file", required=True)
parser.add_argument("--preview", "-p", help="Open preview", action="store_true")
args = parser.parse_args()
a = PrinterView(args.url, args.output, args.preview)
sys.exit(app.exec_())
使用firefox的pdf
slimer-html-pdf – convert any HTML document to PDF format using slimerjs (Gecko)
大文件合并(Python)
def on_pdf_finished(self, result):
if result:
print(result + ', total ' + str(self.total))
else:
print("导出失败")
self.printed = self.printed + 1
print('导出第', self.printed, '本')
if self.printed < self.total:
self.print_book()
else:
print('开始合并')
merger = PdfFileMerger()
for index in range(0, self.total):
filepath = self.filename + '.' + str(index) + '.pdf'
merger.append(filepath)
print('合并第', index, '本')
merger.write(self.filename)
merger.close()
print('合并完成,开始清除临时文件')
# for index in range(0, self.total):
# filepath = self.filename + '.' + str(index) + '.pdf'
# os.remove(filepath)
print('清除临时文件完成')
QApplication.exit()