Elasticsearch最高效开源的搜索引擎框架,我们下面添加几条员工数据进行演示,详情可以在线文档查看,这里用PHP操作Elasticsearch的一些例子,在使用例子的情况下需要先安装Elasticsearch搜索引擎,安装方法可以查看官网,地址:https://www.elastic.co/downloads/elasticsearch
Elasticsearch依赖java,地址:http://www.java.com/
中文文档地址:http://es.xiaoleilu.com/
公共方法,用于操作Elasticsearch
/**
* [PostCurl post请求]
* @param [string] $url [请求地址]
* @param [string] $option [参数]
* @param [integer] $header [http头部信息]
* @param [string] $type [请求类型]
* @return [array] [返回的数据]
*/
function PostCurl($url, $option = '', $header = '', $type = 'POST')
{
if(empty($header)) $header = array();
$curl = curl_init (); // 启动一个CURL会话
curl_setopt($curl, CURLOPT_URL, $url); // 要访问的地址
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'); // 模拟用户使用的浏览器
if (! empty($option)) {
if($type == 'PUT')
{
$options = json_encode($option);
} else {
$options = json_encode($option, JSON_FORCE_OBJECT);
}
curl_setopt($curl, CURLOPT_POSTFIELDS, $options); // Post提交的数据包
}
curl_setopt($curl, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环
curl_setopt($curl, CURLOPT_HTTPHEADER, $header); // 设置HTTP头
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 获取的信息以文件流的形式返回
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $type);
$result = curl_exec($curl); // 执行操作
curl_close($curl); // 关闭CURL会话
return json_decode($result, true);
}
定义url地址
$url = 'http://localhost:9200/';
1、添加员工数据
关系数据库 ⇒ 数据库 ⇒ 表 ⇒ 行 ⇒ 列(Columns)
Elasticsearch ⇒ 索引 ⇒ 类型 ⇒ 文档 ⇒ 字段(Fields)
索引 megacorp
类型 employee
文档 1 ~ 3
字段
// 定义员工数据
$param = array(
1 => array(
'first_name' => 'John',
'last_name' => 'Smith',
'age' => 25,
'about' => 'I love to go rock climbing',
'interests' => array('sports', 'music')
),
2 => array(
'first_name' => 'Jane',
'last_name' => 'Smith',
'age' => 32,
'about' => 'I like to collect rock albums',
'interests' => array('music')
),
3 => array(
'first_name' => 'Douglas',
'last_name' => 'Fir',
'age' => 35,
'about' => 'I like to build cabinets',
'interests' => array('forestry')
),
);
// 循环将员工数据加入 Elasticsearch 搜索引擎中
$success = 0;
foreach($param as $k=>$v)
{
$result = PostCurl($url.'megacorp/employee/'.$k, $v, '', 'PUT');
if(isset($result['_shards']['successful']) && $result['_shards']['successful'] == 1) $success++;
}
echo $success.'<br />';
输出 3
2、获取id为1的员工数据
$result = PostCurl($url.'megacorp/employee/1', '', '', 'GET');
print_r($result);
返回数据
Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_version] => 28
[found] => 1
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
3、搜索全部员工
$result = PostCurl($url.'megacorp/employee/_search', '', '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 4
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 3
[max_score] => 1
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 1
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 1
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
[2] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 3
[_score] => 1
[_source] => Array
(
[first_name] => Douglas
[last_name] => Fir
[age] => 35
[about] => I like to build cabinets
[interests] => Array
(
[0] => forestry
)
)
)
)
)
)
4、搜索姓 last_name等于Smith的员工, 轻量的搜索方法
$result = PostCurl($url.'megacorp/employee/_search?q=last_name:Smith', '', '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 6
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 2
[max_score] => 0.30685282
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 0.30685282
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.30685282
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
)
)
)
5、使用Query DSL搜索。搜索姓 last_name等于Smith的员工, 轻量的搜索方法
$param = array(
'query' => array(
'match' => array('last_name' => 'Smith'),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 1
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 2
[max_score] => 0.30685282
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 0.30685282
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.30685282
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
)
)
)
6、更加复杂的搜索,搜索姓 last_name等于Smith的员工, 轻量的搜索方法。年龄大于30岁的限定条
$param = array(
'query' => array(
'filtered' => array(
'query' => array(
'match' => array('last_name' => 'Smith'),
),
'filter' => array(
'range' => array(
'age' => array('gt' => 30)
),
),
),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 4
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 1
[max_score] => 0.30685282
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 0.30685282
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
)
)
)
7、更加复杂的全文搜索。一项在传统数据库很难实现的功能、我们将会搜索所有喜欢 rock climbing 的员工
$param = array(
'query' => array(
'match' => array('about' => 'rock climbing'),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 7
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 2
[max_score] => 0.16273327
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.16273327
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 0.016878016
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
)
)
)
8、段落搜索。我们只需要查询到 about 字段只包含 rock climbing 的短语的员工
$param = array(
'query' => array(
'match_phrase' => array('about' => 'rock climbing'),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 2
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 1
[max_score] => 0.23013961
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.23013961
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
)
)
)
9、高亮我们的搜索。但是添加一个 highlight 参数
$param = array(
'query' => array(
'match_phrase' => array('about' => 'rock climbing'),
),
'highlight' => array(
'fields' => array(
'about' => array()
)
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 2
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 1
[max_score] => 0.23013961
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.23013961
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
[highlight] => Array
(
[about] => Array
(
[0] => I love to go rock climbing
)
)
)
)
)
)
10、统计。例如,找一下员工中最受欢迎的兴趣是什么
$param = array(
'aggs' => array(
'all_interests' => array(
'terms' => array('field' => 'interests'),
),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 2
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 3
[max_score] => 1
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 1
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 1
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
[2] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 3
[_score] => 1
[_source] => Array
(
[first_name] => Douglas
[last_name] => Fir
[age] => 35
[about] => I like to build cabinets
[interests] => Array
(
[0] => forestry
)
)
)
)
)
[aggregations] => Array
(
[all_interests] => Array
(
[doc_count_error_upper_bound] => 0
[sum_other_doc_count] => 0
[buckets] => Array
(
[0] => Array
(
[key] => music
[doc_count] => 2
)
[1] => Array
(
[key] => forestry
[doc_count] => 1
)
[2] => Array
(
[key] => sports
[doc_count] => 1
)
)
)
)
)
11、如果只想要查询姓 smith 的员工的兴趣汇总情况
$param = array(
'query' => array(
'match' => array(
'last_name' => 'smith'
),
),
'aggs' => array(
'all_interests' => array(
'terms' => array('field' => 'interests'),
),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 2
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 2
[max_score] => 0.30685282
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 0.30685282
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 0.30685282
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
)
)
[aggregations] => Array
(
[all_interests] => Array
(
[doc_count_error_upper_bound] => 0
[sum_other_doc_count] => 0
[buckets] => Array
(
[0] => Array
(
[key] => music
[doc_count] => 2
)
[1] => Array
(
[key] => sports
[doc_count] => 1
)
)
)
)
)
12、汇总还允许多个层面的统计。比如我们还可以统计每一个兴趣下的平均年龄
$param = array(
'aggs' => array(
'all_interests' => array(
'terms' => array('field' => 'interests'),
'aggs' => array(
'avg_age' => array(
'avg' => array(
'field' => 'age'
),
),
),
),
),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);
返回数据
Array
(
[took] => 16
[timed_out] =>
[_shards] => Array
(
[total] => 5
[successful] => 5
[failed] => 0
)
[hits] => Array
(
[total] => 3
[max_score] => 1
[hits] => Array
(
[0] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 2
[_score] => 1
[_source] => Array
(
[first_name] => Jane
[last_name] => Smith
[age] => 32
[about] => I like to collect rock albums
[interests] => Array
(
[0] => music
)
)
)
[1] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 1
[_score] => 1
[_source] => Array
(
[first_name] => John
[last_name] => Smith
[age] => 25
[about] => I love to go rock climbing
[interests] => Array
(
[0] => sports
[1] => music
)
)
)
[2] => Array
(
[_index] => megacorp
[_type] => employee
[_id] => 3
[_score] => 1
[_source] => Array
(
[first_name] => Douglas
[last_name] => Fir
[age] => 35
[about] => I like to build cabinets
[interests] => Array
(
[0] => forestry
)
)
)
)
)
[aggregations] => Array
(
[all_interests] => Array
(
[doc_count_error_upper_bound] => 0
[sum_other_doc_count] => 0
[buckets] => Array
(
[0] => Array
(
[key] => music
[doc_count] => 2
[avg_age] => Array
(
[value] => 28.5
)
)
[1] => Array
(
[key] => forestry
[doc_count] => 1
[avg_age] => Array
(
[value] => 35
)
)
[2] => Array
(
[key] => sports
[doc_count] => 1
[avg_age] => Array
(
[value] => 25
)
)
)
)
)
)
删除指定日期之前的数据
curl -XDELETE 'http://localhost:9200/logstash-2017.02.25*'
上面这些已经可以基本掌握Elasticsearch的操作,后面更深入的、分布式集群,等...... 敬请关注~