采用了ip2region的PHP扩展模式
php.ini配置
[ip2region]
extension=ip2region.so
ip2region.db_file=/usr/local/php/ip2region/ip2region.db
相关代码
$ip = $request->get['ip'];
return Ip2region::btreeSearchString($ip);
采用了ip2region的PHP扩展模式
php.ini配置
[ip2region]
extension=ip2region.so
ip2region.db_file=/usr/local/php/ip2region/ip2region.db
相关代码
$ip = $request->get['ip'];
return Ip2region::btreeSearchString($ip);
func fixUrl(href, base string) (string) {
uri, err := url.Parse(href)
if err != nil {
return ""
}
baseUrl, err := url.Parse(base)
if err != nil {
return ""
}
uri = baseUrl.ResolveReference(uri)
return uri.String()
}
根据当前文章标题,找到相近的10篇文章
测试案例
家里想买一台婴儿理发器,自己给小孩子理发,如何选购婴儿理发器
当前采用any模式进行匹配,匹配结果
家里想买一台彩色激光多功能一体机,什么牌子、什么型号的好?
我家里想买一台家用净水器,一个重庆的朋友给我推荐德国曼稣勒净水器,广东有德国曼稣勒净水器卖吗?
滚筒洗衣机有哪些品牌,家里想买一台滚筒洗衣机好不好,有什么好处
家里想买一台跑步机,有什么需要注意的地方吗
家里想买一台修鞋机,谁告诉我买什么牌子的好呢?
家里想买一台智能电视,长虹Q2F好吗?听说是首款移动互联电视呢。
我是搞婚庆的,想买一台高清摄像机自己用,请大家帮忙参考一下!价格在1W—2W,2W多点也不要紧。
家里想买一台跑步机,不知道什么牌子跑步机比较好
壁挂式新风系统有什么特点?最近家里想买一台新风机,由于家里已
家里想买一台婴儿理发器,自己给小孩子理发,如何选购婴儿理发器
运宝婴儿理发器怎样啊?好用吗?我想一个运宝理发器给小孩子用。
婴儿理发器哪个牌子好呢?一般大家都是怎么给小孩子理发的呢?怎么让小孩子不乱动呢?
家里想买一台婴儿理发器,自己给小孩子理发,如何选购婴儿理发器
婴儿理发器哪个好宝妈们有给宝贝买理发器吗?哪款更方便好用且静
什么牌子的充电式婴儿理发器好用又便宜 什么牌子的充电式婴儿理
婴儿理发器,家里有的来。 我想知道婴儿理发器,哪个牌子的那款
婴儿理发器想买一个好些的婴儿理发器,要静音,充电,陶瓷头,可
婴儿理发器什么牌子的好 宝宝现在5个多月了,想自己给宝贝理发
给婴儿用的电动理发器哪个牌子好?老公要自己给儿子理发,那就买
蓄电池能接“百特静音电动婴儿理发器”吗?
采用结巴分词获取词性
家里/s 想买/v 一台/m 婴儿/n 理发器/n ,/w 自己/r 给/p 小孩子/n 理发/v ,/w 如何/r 选购/v 婴儿/n 理发器/n
获取名词
婴儿 理发器 小孩子 婴儿 理发器
然后进行搜索
<?php
use Fukuball\Jieba\Jieba;
use Fukuball\Jieba\Finalseg;
use Fukuball\Jieba\Posseg;
use Fukuball\Jieba\JiebaAnalyse;
use NilPortugues\Sphinx\SphinxClient;
Jieba::init();
Finalseg::init();
Posseg::init();
JiebaAnalyse::init();
$sphinxSearch_new = new SphinxClient();
$sphinxSearch_new->setServer($host, 9312);
$sphinxSearch_new->setMatchMode(SPH_MATCH_EXTENDED2);
$sphinxSearch_new->setRankingMode(SPH_RANK_EXPR, 'doc_word_count');
$sphinxSearch_new->setSortMode(SPH_SORT_EXTENDED, '@weight DESC, id desc');
$sphinxSearch_new->setLimits(0, 10);
$seg_list = Posseg::cut($v);
$words = array_map(function ($wd) {
return $wd['word'] . "/" . $wd["tag"] . " ";
}, $seg_list);
$valid_words = array_filter($seg_list, function ($wd) {
return in_array($wd["tag"], ['n', 'ng', 'nrt', 'ns', 'nt', 'nz', 'j', 'l', 'vn']);
});
$valid_words = array_map(function ($wd) {
return $wd['word'];
}, $valid_words);
$valid_words_str = implode(" ", $valid_words);
$sphinxSearch_new->addQuery("\"$valid_words_str\"/2", 'wd_question');
$result = $sphinxSearch_new->runQueries();
默认PHP版本会比较慢,所以采用php扩展(扩展启动慢,性好执行速度还行10000次0.5秒)
<?php
$time = microtime(true);
for($i=0;$i<10000;$i++) {
$result = jieba('小明硕士毕业于中国科学院计算所,后在日本京都大学深造', 2);
}
$timenew = microtime(true);
echo "共耗时:" . ($timenew - $time) . PHP_EOL;
https://github.com/fxsjy/jieba/issues/411
POS = {
"n": { # 1. 名词 (1个一类,7个二类,5个三类)
"n": "名词",
"nr": "人名",
"nr1": "汉语姓氏",
"nr2": "汉语名字",
"nrj": "日语人名",
"nrf": "音译人名",
"ns": "地名",
"nsf": "音译地名",
"nt": "机构团体名",
"nz": "其它专名",
"nl": "名词性惯用语",
"ng": "名词性语素"
},
"t": { # 2. 时间词(1个一类,1个二类)
"t": "时间词",
"tg": "时间词性语素"
},
"s": { # 3. 处所词(1个一类)
"s": "处所词"
},
"f": { # 4. 方位词(1个一类)
"f": "方位词"
},
"v": { # 5. 动词(1个一类,9个二类)
"v": "动词",
"vd": "副动词",
"vn": "名动词",
"vshi": "动词“是”",
"vyou": "动词“有”",
"vf": "趋向动词",
"vx": "形式动词",
"vi": "不及物动词(内动词)",
"vl": "动词性惯用语",
"vg": "动词性语素"
},
"a": { # 6. 形容词(1个一类,4个二类)
"a": "形容词",
"ad": "副形词",
"an": "名形词",
"ag": "形容词性语素",
"al": "形容词性惯用语"
},
"b": { # 7. 区别词(1个一类,2个二类)
"b": "区别词",
"bl": "区别词性惯用语"
},
"z": { # 8. 状态词(1个一类)
"z": "状态词"
},
"r": { # 9. 代词(1个一类,4个二类,6个三类)
"r": "代词",
"rr": "人称代词",
"rz": "指示代词",
"rzt": "时间指示代词",
"rzs": "处所指示代词",
"rzv": "谓词性指示代词",
"ry": "疑问代词",
"ryt": "时间疑问代词",
"rys": "处所疑问代词",
"ryv": "谓词性疑问代词",
"rg": "代词性语素"
},
"m": { # 10. 数词(1个一类,1个二类)
"m": "数词",
"mq": "数量词"
},
"q": { # 11. 量词(1个一类,2个二类)
"q": "量词",
"qv": "动量词",
"qt": "时量词"
},
"d": { # 12. 副词(1个一类)
"d": "副词"
},
"p": { # 13. 介词(1个一类,2个二类)
"p": "介词",
"pba": "介词“把”",
"pbei": "介词“被”"
},
"c": { # 14. 连词(1个一类,1个二类)
"c": "连词",
"cc": "并列连词"
},
"u": { # 15. 助词(1个一类,15个二类)
"u": "助词",
"uzhe": "着",
"ule": "了 喽",
"uguo": "过",
"ude1": "的 底",
"ude2": "地",
"ude3": "得",
"usuo": "所",
"udeng": "等 等等 云云",
"uyy": "一样 一般 似的 般",
"udh": "的话",
"uls": "来讲 来说 而言 说来",
"uzhi": "之",
"ulian": "连 " # (“连小学生都会”)
},
"e": { # 16. 叹词(1个一类)
"e": "叹词"
},
"y": { # 17. 语气词(1个一类)
"y": "语气词(delete yg)"
},
"o": { # 18. 拟声词(1个一类)
"o": "拟声词"
},
"h": { # 19. 前缀(1个一类)
"h": "前缀"
},
"k": { # 20. 后缀(1个一类)
"k": "后缀"
},
"x": { # 21. 字符串(1个一类,2个二类)
"x": "字符串",
"xx": "非语素字",
"xu": "网址URL"
},
"w": { # 22. 标点符号(1个一类,16个二类)
"w": "标点符号",
"wkz": "左括号", # ( 〔 [ { 《 【 〖 〈 半角:( [ { <
"wky": "右括号", # ) 〕 ] } 》 】 〗 〉 半角: ) ] { >
"wyz": "全角左引号", # “ ‘ 『
"wyy": "全角右引号", # ” ’ 』
"wj": "全角句号", # 。
"ww": "问号", # 全角:? 半角:?
"wt": "叹号", # 全角:! 半角:!
"wd": "逗号", # 全角:, 半角:,
"wf": "分号", # 全角:; 半角: ;
"wn": "顿号", # 全角:、
"wm": "冒号", # 全角:: 半角: :
"ws": "省略号", # 全角:…… …
"wp": "破折号", # 全角:—— -- ——- 半角:--- ----
"wb": "百分号千分号", # 全角:% ‰ 半角:%
"wh": "单位符号" # 全角:¥ $ £ ° ℃ 半角:$
}
}
$words = [];
$seg_list = jieba($t, 2);
foreach ($seg_list as $k => $v) {
$words[] = ['t' => $v, 'w' => $k];
}
$valid_words = [];
$stop_words = [',', ',', '.', '。', '!', '!', '?', '?', ' ', ' '];
$name_words = ['n', 'ng', 'nrt', 'nr', 'ns', 'nt', 'nz', 'j', 'vn'];
foreach ($words as $k => $v) {
// 动词
if ($v['t'] == 'v') {
// 最后一位
if (!isset($words[$k + 1])) {
$valid_words[] = $v['w'];
continue;
}
// 后面接标点符号
if (isset($words[$k + 1]) && $words[$k + 1]['t'] == 'x'
&& in_array($words[$k + 1]['w'], $stop_words)
) {
$valid_words[] = $v['w'];
continue;
}
}
// 未知词
if ($v['t'] == 'x' && !in_array($v['w'], $stop_words)) {
// 后面接名词
if (isset($words[$k + 1]) && in_array($words[$k + 1]['t'], ['n', 'nr', 'v', 'uj'])) {
$valid_words[] = $v['w'];
continue;
}
// 接连词+名词
if (isset($words[$k + 1]) && ($words[$k + 1]['t'] == 'p' || $words[$k + 1]['t'] == 'c')
&& isset($words[$k + 2]) && in_array($words[$k + 2]['t'], $name_words)
) {
$valid_words[] = $v['w'];
continue;
}
}
// 名词,缩略语
if (in_array($v['t'], $name_words)) {
$valid_words[] = $v['w'];
}
}
$valid_words_str = implode(" ", $valid_words);
Transfrom relative path into absolute URL using PHP
function rel2abs($rel, $base)
{
/* return if already absolute URL */
if (parse_url($rel, PHP_URL_SCHEME) != '')
return ($rel);
/* queries and anchors */
if ($rel[0] == '#' || $rel[0] == '?')
return ($base . $rel);
/* parse base URL and convert to local variables: $scheme, $host, $path, $query, $port, $user, $pass */
extract(parse_url($base));
/* remove non-directory element from path */
$path = preg_replace('#/[^/]*$#', '', $path);
/* destroy path if relative url points to root */
if ($rel[0] == '/')
$path = '';
/* dirty absolute URL */
$abs = '';
/* do we have a user in our URL? */
if (isset($user)) {
$abs .= $user;
/* password too? */
if (isset($pass))
$abs .= ':' . $pass;
$abs .= '@';
}
$abs .= $host;
/* did somebody sneak in a port? */
if (isset($port))
$abs .= ':' . $port;
$abs .= $path . '/' . $rel . (isset($query) ? '?' . $query : '');
/* replace '//' or '/./' or '/foo/../' with '/' */
$re = ['#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'];
for ($n = 1; $n > 0; $abs = preg_replace($re, '/', $abs, -1, $n)) {
}
/* absolute URL is ready! */
return ($scheme . '://' . $abs);
}
实际使用中发现parse耗时很多,而且经常搜索不到词。弃坑。
$ git clone https://github.com/c4ys/sphinx-jieba
$ cd sphinx-jieba
$ git submodule update --init --recursive
$ sudo apt install gcc cmake automake g++
$ sudo apt install libmysqld-dev
$ ./configure --prefix=/usr/local/sphinx-jieba
$ cp cppjieba/include/cppjieba src/ -r
$ cp cppjieba/deps/limonp src/ -r
$ sudo make install
sql如下
CREATE TABLE documents ( id INTEGER PRIMARY KEY NOT NULL AUTO_INCREMENT, title VARCHAR(255) NOT NULL );
REPLACE INTO documents ( title ) VALUES
('广州狗场直销泰迪边牧阿拉斯加等各名犬 微信视频同步'),
('出售阿拉金毛拉多泰迪萨摩哈士奇等30多个品种 保健康可送货'),
('广州哪里买纯种哈士奇 雪橇犬哈士奇多少钱'),
('广州边境牧羊犬狗场 广州哪里有卖边牧犬小狗 边境牧羊犬小狗'),
('广州跳跳犬舍 纯种憨厚老实巴哥幼犬 小型短毛犬 纯种健康'),
('广州地区金毛多少钱一只巡回犬赛级品质 签协议 健康血统有保'),
('广州狗场直销阿拉斯加金毛泰迪哈士奇萨摩耶秋田德牧等各种名犬');
source src1
{
type = mysql
sql_query_pre = SET NAMES utf8
sql_host = localhost
sql_user = test
sql_pass =
sql_db = test
sql_port = 3306 # optional, default is 3306
sql_query = SELECT id, title FROM documents
sql_field_string = title
}
index test1
{
source = src1
path = /usr/local/sphinx-jieba/var/data/test1
charset_type = utf-8
chinese_dictionary = /usr/local/sphinx/etc/xdict
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /usr/local/sphinx-jieba/var/log/searchd.log
query_log = /usr/local/sphinx-jieba/var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /usr/local/sphinx-jieba/var/log/searchd.pid
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
workers = threads # for RT to work
binlog_path = /usr/local/sphinx-jieba/var/data
}
sudo cp cppjieba/dict/* /usr/local/sphinx-jieba/etc/ -r
cd /usr/local/sphinx-jieba/
sudo cp etc/jieba.dict.utf8 etc/xdictjieba.dict.utf8
sudo cp etc/user.dict.utf8 etc/xdictuser.dict.utf8
sudo cp etc/hmm_model.utf8 etc/xdicthmm_model.utf8
sudo cp etc/idf.utf8 etc/xdictidf.utf8
sudo cp etc/stop_words.utf8 etc/xdictstop_words.utf8
sudo bin/indexer --all
sudo bin/searchd
mysql -h 127.0.0.1 -P 9306
连接mysql
select * from test1 where match('宠物狗') limit 1000;
https://www.percona.com/doc/percona-server/LATEST/tokudb/tokudb_installation.html
参考:https://mirrors.gzqdn.org/help/percona/
sudo apt install libjemalloc-dev
在/etc/mysql/percona-server.conf.d/mysqld_safe.cnf文件中添加
[mysqld_safe]
malloc-lib= /usr/include/jemalloc
查看Transparent huge pages状态
cat /sys/kernel/mm/transparent_hugepage/enabled
关闭Transparent huge pages需要以root身份运行
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
apt-get install percona-server-tokudb-5.7
sudo ps_tokudb_admin --enable -uroot -pPassw0rd
mysql> SHOW ENGINES;
mysql> SELECT @@tokudb_version;
包含了ubuntu,fedora,percona,nodesource,npm,pypi,mongodb等等流行的软件包。
存储过程加上事务能够提高插入效率:
CREATE DEFINER=`root`@`%` PROCEDURE `autoinsert`(IN NUM INT)
BEGIN
DECLARE INIT_NUM INT DEFAULT 0 ;
START TRANSACTION;
WHILE(INIT_NUM < NUM)
DO
insert into `user`(`name`,`city_id`) values(rand_str(10),rand_int(2));
SET INIT_NUM = INIT_NUM+1;
END WHILE;
COMMIT;
END
CREATE TABLE `test` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`forum_id` bigint(20) unsigned NOT NULL,
`created` datetime NOT NULL,
PRIMARY KEY (`id`),
KEY `test_forum_id_IDX` (`forum_id`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=750136 DEFAULT CHARSET=utf8mb4
DROP PROCEDURE IF EXISTS test.BatchInsertTest;
delimiter //
CREATE PROCEDURE BatchInsertTest(IN loop_time INT)
BEGIN
DECLARE Var INT;
SET Var = 0;
START TRANSACTION;
WHILE Var < loop_time DO
INSERT INTO `test` ( `forum_id`, `created`) select FLOOR(1 + (RAND() * 1000)),FROM_UNIXTIME(UNIX_TIMESTAMP()-(RAND() * 30 * 86400));
SET Var = Var + 1;
END WHILE;
COMMIT;
END;
//
delimiter ;
CALL BatchInsertTest(10000);
select count(*) from test;