Converting emojis to Unicode and vice versa in python 3
# -*- coding: UTF-8 -*-
text = u"?"
print(text.encode('unicode-escape').decode('ASCII')) # output: \U0001f188\ue513\ue220\ue21c
Converting emojis to Unicode and vice versa in python 3
# -*- coding: UTF-8 -*-
text = u"?"
print(text.encode('unicode-escape').decode('ASCII')) # output: \U0001f188\ue513\ue220\ue21c
sudo yum install -y samba samba-client ## centos
sudo apt install smbclient samba ## ubuntu
[sambashare]
comment = Samba on Ubuntu
path = /home/username/sambashare
read only = no
browsable = yes
sudo systemctl enable smb ## centos
sudo systemctl start smb ## centos
sudo systemctl enable smbd ## ubuntu
sudo systemctl start smbd ## ubuntu
sudo smbpasswd -a username
打开资源管理器,地址栏输入"\ip",然后输入密码即可
写 Scrapy 爬虫时,遇到了 js 进行跳转的页面,大家有没有好的解决方法?
答案是:
Splash is a javascript rendering service with an HTTP API. It’s a lightweight browser with an HTTP API, implemented in Python 3 using Twisted and QT5.
It’s fast, lightweight and state-less which makes it easy to distribute.
Documentation Documentation is available here: https://splash.readthedocs.io/
This library provides Scrapy and JavaScript integration using Splash. The license is BSD 3-clause.
定位的最新定义是:如何让你在潜在客户的心智中与众不同。
一个词定位:奔驰宝马沃尔沃
倾听是沟通的基础。倾听过程还要对信息进行解析,并作出积极回应
相关代码
try {
setTimeout(function () {
t = Math.random();
data = {};
data.from = location.href;
data.urls = [];
isUrlLooksGood = function (url) {
if (url === 'about:blank') { return true; }
url = new URL(url); hostname = url.hostname;
var l = getLocation(url);
return /(baidu.com|qq.com|mydomain.com)$/.test(l.hostname);
};
$('script').each(function () {
var src = $(this).attr('src'); if (!src) { return; }
if (!isUrlLooksGood(src)) {
data.urls.push(src);
}
});
$('iframe').each(function () {
var src = $(this).attr('src'); if (!src) { return; }
if (!isUrlLooksGood(src)) {
data.urls.push(src);
}
});
if (data.urls.length > 0) {
(new Image).src = '/http_hijack.png?t=' + Math.random() + '&d=' + encodeURIComponent(JSON.stringify(data));
}
}, 3000);
} catch (e) { }
因为wkhtmltopdf内置的为qt的webkit,已经很久不更新了,很多css3以及html5都支持不友好。
https://chromedevtools.github.io/devtools-protocol/tot/Page#method-printToPDF
chrome --headless --print-to-pdf=path/to/file.pdf https://example.com
参考:HTML to PDF conversion using Chrome pdfium?
html-pdf-chrome HTML to PDF converter via Chrome/Chromium.
chrome-html-to-pdf Converts HTML to PDF using Google Chrome
List of Chromium Command Line Switches
由于当前的chrome转换存在BUG,转换大文件时内存消耗特别大,生成的文件也比较大,测试了10多种方法后,最后决定采用PyQt5来做
#!/usr/bin/env python3
import sys
import argparse
from PyQt5.QtCore import QUrl, QMarginsF
from PyQt5.QtGui import QPageLayout, QPageSize
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWidgets import QApplication
class PrinterView(QWebEngineView):
def __init__(self, url, filename, do_preview, parent=None):
super(PrinterView, self).__init__(parent)
self.do_preview = do_preview
self.setUrl(QUrl(url))
self.setZoomFactor(1)
self.loadFinished.connect(self.load_finished)
self.filename = filename
def load_finished(self):
if self.do_preview:
self.show()
else:
pageLayout = QPageLayout(QPageSize(QPageSize.A5), QPageLayout.Portrait,
QMarginsF(0, 0, 0, 0))
self.page().printToPdf(self.filename, pageLayout)
self.page().pdfPrintingFinished.connect(on_pdf_finished)
def on_pdf_finished(result):
if result:
print(result)
QApplication.exit()
else:
QApplication.exit(1)
if __name__ == '__main__':
app = QApplication(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument("--url", "-i", help="Input URL (http://example.com, file:///home/user/example.html, ...)",
required=True)
parser.add_argument("--output", "-o", help="Write pdf to this file", required=True)
parser.add_argument("--preview", "-p", help="Open preview", action="store_true")
args = parser.parse_args()
a = PrinterView(args.url, args.output, args.preview)
sys.exit(app.exec_())
import sys
import argparse
from PyQt5.QtCore import QUrl, QMarginsF
from PyQt5.QtGui import QPageLayout, QPageSize
from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineProfile
from PyQt5.QtWidgets import QApplication
from PyQt5.QtPrintSupport import QPrinter, QPrintDialog
class PrinterView(QWebEngineView):
def __init__(self, url, filename, do_preview, parent=None):
self.printer = QPrinter()
self.printer.setPageSize(QPrinter.A5)
self.printer.setOrientation(QPrinter.Portrait)
self.printer.setOutputFormat(QPrinter.PdfFormat)
self.printer.setOutputFileName(filename)
self.printer.setPageMargins(0, 0, 0, 0, QPrinter.Millimeter)
super(PrinterView, self).__init__(parent)
self.do_preview = do_preview
self.page().profile().setHttpCacheMaximumSize(5 * 1024 * 1024 * 1024)
self.page().profile().setHttpCacheType(QWebEngineProfile.MemoryHttpCache)
self.setUrl(QUrl(url))
self.setZoomFactor(1)
self.loadFinished.connect(self.load_finished2)
self.filename = filename
def load_finished(self):
if self.do_preview:
self.show()
else:
pageLayout = QPageLayout(QPageSize(QPageSize.A5), QPageLayout.Portrait,
QMarginsF(0, 0, 0, 0))
self.page().printToPdf(self.filename, pageLayout)
self.page().pdfPrintingFinished.connect(on_pdf_finished)
def load_finished2(self):
self.show()
self.page().print(self.printer, on_pdf_finished)
def on_pdf_finished(result):
if result:
print(result)
QApplication.exit()
else:
QApplication.exit(1)
if __name__ == '__main__':
app = QApplication(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument("--url", "-i", help="Input URL (http://example.com, file:///home/user/example.html, ...)",
required=True)
parser.add_argument("--output", "-o", help="Write pdf to this file", required=True)
parser.add_argument("--preview", "-p", help="Open preview", action="store_true")
args = parser.parse_args()
a = PrinterView(args.url, args.output, args.preview)
sys.exit(app.exec_())
slimer-html-pdf – convert any HTML document to PDF format using slimerjs (Gecko)
def on_pdf_finished(self, result):
if result:
print(result + ', total ' + str(self.total))
else:
print("导出失败")
self.printed = self.printed + 1
print('导出第', self.printed, '本')
if self.printed < self.total:
self.print_book()
else:
print('开始合并')
merger = PdfFileMerger()
for index in range(0, self.total):
filepath = self.filename + '.' + str(index) + '.pdf'
merger.append(filepath)
print('合并第', index, '本')
merger.write(self.filename)
merger.close()
print('合并完成,开始清除临时文件')
# for index in range(0, self.total):
# filepath = self.filename + '.' + str(index) + '.pdf'
# os.remove(filepath)
print('清除临时文件完成')
QApplication.exit()
fuser -k /var/www/html/centos/
一款html和markdown标签互转的工具,直接输入Html,网页会自动帮你转换。
Javascript Implementation for converting HTML to Markdown text.
Javascript implementation for converting HTML to Markdown text. Browser and Node.js support.
Convert HTML into Markdown with JavaScript.
用途: 爬虫爬文章保存到本地为 Markdown 格式
将内容复制到左侧输入框内,点击生成MD,在中部编辑器处进行二次修改,并在右侧的预览框中查看效果。 确认无误后点击上方的复制代码按钮即可将代码复制到剪贴板中!
An HTML-to-markdown conversion helper for PHP
Markdown language support for IntelliJ platform