Elasticsearch最高效开源的搜索引擎框架,我们下面添加几条员工数据进行演示,详情可以在线文档查看,这里用PHP操作Elasticsearch的一些例子,在使用例子的情况下需要先安装Elasticsearch搜索引擎,安装方法可以查看官网,地址:https://www.elastic.co/downloads/elasticsearch
Elasticsearch依赖java,地址:http://www.java.com/
中文文档地址:http://es.xiaoleilu.com/
公共方法,用于操作Elasticsearch
/** * [PostCurl post请求] * @param [string] $url [请求地址] * @param [string] $option [参数] * @param [integer] $header [http头部信息] * @param [string] $type [请求类型] * @return [array] [返回的数据] */ function PostCurl($url, $option = '', $header = '', $type = 'POST') { if(empty($header)) $header = array(); $curl = curl_init (); // 启动一个CURL会话 curl_setopt($curl, CURLOPT_URL, $url); // 要访问的地址 curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查 curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在 curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'); // 模拟用户使用的浏览器 if (! empty($option)) { if($type == 'PUT') { $options = json_encode($option); } else { $options = json_encode($option, JSON_FORCE_OBJECT); } curl_setopt($curl, CURLOPT_POSTFIELDS, $options); // Post提交的数据包 } curl_setopt($curl, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环 curl_setopt($curl, CURLOPT_HTTPHEADER, $header); // 设置HTTP头 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 获取的信息以文件流的形式返回 curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $type); $result = curl_exec($curl); // 执行操作 curl_close($curl); // 关闭CURL会话 return json_decode($result, true); }
定义url地址
$url = 'http://localhost:9200/';
1、添加员工数据
关系数据库 ⇒ 数据库 ⇒ 表 ⇒ 行 ⇒ 列(Columns)
Elasticsearch ⇒ 索引 ⇒ 类型 ⇒ 文档 ⇒ 字段(Fields)
索引 megacorp
类型 employee
文档 1 ~ 3
字段
// 定义员工数据 $param = array( 1 => array( 'first_name' => 'John', 'last_name' => 'Smith', 'age' => 25, 'about' => 'I love to go rock climbing', 'interests' => array('sports', 'music') ), 2 => array( 'first_name' => 'Jane', 'last_name' => 'Smith', 'age' => 32, 'about' => 'I like to collect rock albums', 'interests' => array('music') ), 3 => array( 'first_name' => 'Douglas', 'last_name' => 'Fir', 'age' => 35, 'about' => 'I like to build cabinets', 'interests' => array('forestry') ), ); // 循环将员工数据加入 Elasticsearch 搜索引擎中 $success = 0; foreach($param as $k=>$v) { $result = PostCurl($url.'megacorp/employee/'.$k, $v, '', 'PUT'); if(isset($result['_shards']['successful']) && $result['_shards']['successful'] == 1) $success++; } echo $success.'<br />'; 输出 3
2、获取id为1的员工数据
$result = PostCurl($url.'megacorp/employee/1', '', '', 'GET'); print_r($result); 返回数据 Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_version] => 28 [found] => 1 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) )
3、搜索全部员工
$result = PostCurl($url.'megacorp/employee/_search', '', '', 'GET'); print_r($result); 返回数据 Array ( [took] => 4 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 3 [max_score] => 1 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 1 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 1 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) [2] => Array ( [_index] => megacorp [_type] => employee [_id] => 3 [_score] => 1 [_source] => Array ( [first_name] => Douglas [last_name] => Fir [age] => 35 [about] => I like to build cabinets [interests] => Array ( [0] => forestry ) ) ) ) ) )
4、搜索姓 last_name等于Smith的员工, 轻量的搜索方法
$result = PostCurl($url.'megacorp/employee/_search?q=last_name:Smith', '', '', 'GET'); print_r($result); 返回数据 Array ( [took] => 6 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 2 [max_score] => 0.30685282 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 0.30685282 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.30685282 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) ) ) )
5、使用Query DSL搜索。搜索姓 last_name等于Smith的员工, 轻量的搜索方法
$param = array( 'query' => array( 'match' => array('last_name' => 'Smith'), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 1 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 2 [max_score] => 0.30685282 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 0.30685282 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.30685282 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) ) ) )
6、更加复杂的搜索,搜索姓 last_name等于Smith的员工, 轻量的搜索方法。年龄大于30岁的限定条
$param = array( 'query' => array( 'filtered' => array( 'query' => array( 'match' => array('last_name' => 'Smith'), ), 'filter' => array( 'range' => array( 'age' => array('gt' => 30) ), ), ), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 4 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 1 [max_score] => 0.30685282 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 0.30685282 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) ) ) )
7、更加复杂的全文搜索。一项在传统数据库很难实现的功能、我们将会搜索所有喜欢 rock climbing 的员工
$param = array( 'query' => array( 'match' => array('about' => 'rock climbing'), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 7 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 2 [max_score] => 0.16273327 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.16273327 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 0.016878016 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) ) ) )
8、段落搜索。我们只需要查询到 about 字段只包含 rock climbing 的短语的员工
$param = array( 'query' => array( 'match_phrase' => array('about' => 'rock climbing'), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 2 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 1 [max_score] => 0.23013961 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.23013961 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) ) ) )
9、高亮我们的搜索。但是添加一个 highlight 参数
$param = array( 'query' => array( 'match_phrase' => array('about' => 'rock climbing'), ), 'highlight' => array( 'fields' => array( 'about' => array() ) ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 2 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 1 [max_score] => 0.23013961 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.23013961 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) [highlight] => Array ( [about] => Array ( [0] => I love to go rock climbing ) ) ) ) ) )
10、统计。例如,找一下员工中最受欢迎的兴趣是什么
$param = array( 'aggs' => array( 'all_interests' => array( 'terms' => array('field' => 'interests'), ), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 2 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 3 [max_score] => 1 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 1 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 1 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) [2] => Array ( [_index] => megacorp [_type] => employee [_id] => 3 [_score] => 1 [_source] => Array ( [first_name] => Douglas [last_name] => Fir [age] => 35 [about] => I like to build cabinets [interests] => Array ( [0] => forestry ) ) ) ) ) [aggregations] => Array ( [all_interests] => Array ( [doc_count_error_upper_bound] => 0 [sum_other_doc_count] => 0 [buckets] => Array ( [0] => Array ( [key] => music [doc_count] => 2 ) [1] => Array ( [key] => forestry [doc_count] => 1 ) [2] => Array ( [key] => sports [doc_count] => 1 ) ) ) ) )
11、如果只想要查询姓 smith 的员工的兴趣汇总情况
$param = array( 'query' => array( 'match' => array( 'last_name' => 'smith' ), ), 'aggs' => array( 'all_interests' => array( 'terms' => array('field' => 'interests'), ), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 2 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 2 [max_score] => 0.30685282 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 0.30685282 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 0.30685282 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) ) ) [aggregations] => Array ( [all_interests] => Array ( [doc_count_error_upper_bound] => 0 [sum_other_doc_count] => 0 [buckets] => Array ( [0] => Array ( [key] => music [doc_count] => 2 ) [1] => Array ( [key] => sports [doc_count] => 1 ) ) ) ) )
12、汇总还允许多个层面的统计。比如我们还可以统计每一个兴趣下的平均年龄
$param = array( 'aggs' => array( 'all_interests' => array( 'terms' => array('field' => 'interests'), 'aggs' => array( 'avg_age' => array( 'avg' => array( 'field' => 'age' ), ), ), ), ), ); $result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET'); print_r($result); 返回数据 Array ( [took] => 16 [timed_out] => [_shards] => Array ( [total] => 5 [successful] => 5 [failed] => 0 ) [hits] => Array ( [total] => 3 [max_score] => 1 [hits] => Array ( [0] => Array ( [_index] => megacorp [_type] => employee [_id] => 2 [_score] => 1 [_source] => Array ( [first_name] => Jane [last_name] => Smith [age] => 32 [about] => I like to collect rock albums [interests] => Array ( [0] => music ) ) ) [1] => Array ( [_index] => megacorp [_type] => employee [_id] => 1 [_score] => 1 [_source] => Array ( [first_name] => John [last_name] => Smith [age] => 25 [about] => I love to go rock climbing [interests] => Array ( [0] => sports [1] => music ) ) ) [2] => Array ( [_index] => megacorp [_type] => employee [_id] => 3 [_score] => 1 [_source] => Array ( [first_name] => Douglas [last_name] => Fir [age] => 35 [about] => I like to build cabinets [interests] => Array ( [0] => forestry ) ) ) ) ) [aggregations] => Array ( [all_interests] => Array ( [doc_count_error_upper_bound] => 0 [sum_other_doc_count] => 0 [buckets] => Array ( [0] => Array ( [key] => music [doc_count] => 2 [avg_age] => Array ( [value] => 28.5 ) ) [1] => Array ( [key] => forestry [doc_count] => 1 [avg_age] => Array ( [value] => 35 ) ) [2] => Array ( [key] => sports [doc_count] => 1 [avg_age] => Array ( [value] => 25 ) ) ) ) ) )
删除指定日期之前的数据
curl -XDELETE 'http://localhost:9200/logstash-2017.02.25*'
上面这些已经可以基本掌握Elasticsearch的操作,后面更深入的、分布式集群,等...... 敬请关注~
发表评论: