龚哥哥 - 山里男儿 爱生活、做自己!
Elasticsearch入门 搜索引擎
发表于 2016-8-11 | PHP

Elasticsearch最高效开源的搜索引擎框架,我们下面添加几条员工数据进行演示,详情可以在线文档查看,这里用PHP操作Elasticsearch的一些例子,在使用例子的情况下需要先安装Elasticsearch搜索引擎,安装方法可以查看官网,地址:https://www.elastic.co/downloads/elasticsearch

Elasticsearch依赖java,地址:http://www.java.com/

中文文档地址:http://es.xiaoleilu.com/

公共方法,用于操作Elasticsearch

/**
 * [PostCurl post请求]
 * @param  [string]  $url    [请求地址]
 * @param  [string]  $option [参数]
 * @param  [integer] $header [http头部信息]
 * @param  [string]  $type   [请求类型]
 * @return [array]           [返回的数据]
 */
function PostCurl($url, $option = '', $header = '', $type = 'POST')
{
    if(empty($header)) $header = array();
    $curl = curl_init (); // 启动一个CURL会话
    curl_setopt($curl, CURLOPT_URL, $url); // 要访问的地址
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // 对认证证书来源的检查
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 从证书中检查SSL加密算法是否存在
    curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)'); // 模拟用户使用的浏览器
    if (! empty($option)) {
        if($type == 'PUT')
        {
            $options = json_encode($option);
        } else {
            $options = json_encode($option, JSON_FORCE_OBJECT);
        }
        curl_setopt($curl, CURLOPT_POSTFIELDS, $options); // Post提交的数据包
    }
    curl_setopt($curl, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环
    curl_setopt($curl, CURLOPT_HTTPHEADER, $header); // 设置HTTP头
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 获取的信息以文件流的形式返回
    curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $type);
    $result = curl_exec($curl); // 执行操作
    curl_close($curl); // 关闭CURL会话
    return json_decode($result, true);
}

定义url地址

$url = 'http://localhost:9200/';

1、添加员工数据

关系数据库     ⇒ 数据库 ⇒ 表     ⇒ 行     ⇒ 列(Columns)

Elasticsearch   ⇒ 索引   ⇒ 类型   ⇒ 文档   ⇒ 字段(Fields)

索引 megacorp

类型 employee

文档 1 ~ 3

字段

// 定义员工数据
$param = array(
    1   =>   array(
            'first_name'    =>   'John',
            'last_name'     =>   'Smith',
            'age'           =>   25,
            'about'         =>   'I love to go rock climbing',
            'interests'     =>   array('sports', 'music')
        ),
    2   =>   array(
            'first_name'    =>   'Jane',
            'last_name'     =>   'Smith',
            'age'           =>   32,
            'about'         =>   'I like to collect rock albums',
            'interests'     =>   array('music')
        ),
    3   =>   array(
            'first_name'    =>   'Douglas',
            'last_name'     =>   'Fir',
            'age'           =>   35,
            'about'         =>   'I like to build cabinets',
            'interests'     =>   array('forestry')
        ),
    );

// 循环将员工数据加入 Elasticsearch 搜索引擎中
$success = 0;
foreach($param as $k=>$v)
{
    $result = PostCurl($url.'megacorp/employee/'.$k, $v, '', 'PUT');
    if(isset($result['_shards']['successful']) && $result['_shards']['successful'] == 1) $success++;
}
echo $success.'<br />';

输出 3

2、获取id为1的员工数据

$result = PostCurl($url.'megacorp/employee/1', '', '', 'GET');
print_r($result);

返回数据
Array
(
    [_index] => megacorp
    [_type] => employee
    [_id] => 1
    [_version] => 28
    [found] => 1
    [_source] => Array
        (
            [first_name] => John
            [last_name] => Smith
            [age] => 25
            [about] => I love to go rock climbing
            [interests] => Array
                (
                    [0] => sports
                    [1] => music
                )

        )

)

3、搜索全部员工

$result = PostCurl($url.'megacorp/employee/_search', '', '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 4
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 3
            [max_score] => 1
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                    [2] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 3
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Douglas
                                    [last_name] => Fir
                                    [age] => 35
                                    [about] => I like to build cabinets
                                    [interests] => Array
                                        (
                                            [0] => forestry
                                        )

                                )

                        )

                )

        )

)

4、搜索姓 last_name等于Smith的员工, 轻量的搜索方法

$result = PostCurl($url.'megacorp/employee/_search?q=last_name:Smith', '', '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 6
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 2
            [max_score] => 0.30685282
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                )

        )

)

5、使用Query DSL搜索。搜索姓 last_name等于Smith的员工, 轻量的搜索方法

$param = array(
    'query' =>   array(
            'match' =>   array('last_name'   =>   'Smith'),
        ),
    );
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 1
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 2
            [max_score] => 0.30685282
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                )

        )

)

6、更加复杂的搜索,搜索姓 last_name等于Smith的员工, 轻量的搜索方法。年龄大于30岁的限定条

$param = array(
    'query' =>   array(
        'filtered'  =>   array(
            'query'     =>   array(
                'match' =>   array('last_name'   =>   'Smith'),
            ),

            'filter'    =>   array(
                'range' =>   array(
                    'age'   =>   array('gt'  =>   30)
                ),
            ),
        ),
    ),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 4
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 1
            [max_score] => 0.30685282
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                )

        )

)

7、更加复杂的全文搜索。一项在传统数据库很难实现的功能、我们将会搜索所有喜欢 rock climbing 的员工

$param = array(
    'query' =>   array(
            'match' =>   array('about'   =>   'rock climbing'),
        ),
    );
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 7
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 2
            [max_score] => 0.16273327
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.16273327
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 0.016878016
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                )

        )

)

8、段落搜索。我们只需要查询到 about 字段只包含 rock climbing 的短语的员工

$param = array(
    'query' =>   array(
            'match_phrase'  =>   array('about'   =>   'rock climbing'),
        ),
    );
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 2
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 1
            [max_score] => 0.23013961
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.23013961
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                )

        )

)

9、高亮我们的搜索。但是添加一个 highlight 参数

$param = array(
    'query' =>   array(
            'match_phrase'  =>   array('about'   =>   'rock climbing'),
        ),
    'highlight' =>   array(
            'fields'    =>   array(
                    'about' =>   array()
                )
        ),
    );
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 2
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 1
            [max_score] => 0.23013961
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.23013961
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                            [highlight] => Array
                                (
                                    [about] => Array
                                        (
                                            [0] => I love to go rock climbing
                                        )

                                )

                        )

                )

        )

)

10、统计。例如,找一下员工中最受欢迎的兴趣是什么

$param = array(
    'aggs'  =>   array(
        'all_interests' =>   array(
            'terms' =>   array('field'   =>   'interests'),
        ),
    ),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 2
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 3
            [max_score] => 1
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                    [2] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 3
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Douglas
                                    [last_name] => Fir
                                    [age] => 35
                                    [about] => I like to build cabinets
                                    [interests] => Array
                                        (
                                            [0] => forestry
                                        )

                                )

                        )

                )

        )

    [aggregations] => Array
        (
            [all_interests] => Array
                (
                    [doc_count_error_upper_bound] => 0
                    [sum_other_doc_count] => 0
                    [buckets] => Array
                        (
                            [0] => Array
                                (
                                    [key] => music
                                    [doc_count] => 2
                                )

                            [1] => Array
                                (
                                    [key] => forestry
                                    [doc_count] => 1
                                )

                            [2] => Array
                                (
                                    [key] => sports
                                    [doc_count] => 1
                                )

                        )

                )

        )

)

11、如果只想要查询姓 smith 的员工的兴趣汇总情况

$param = array(
    'query' =>   array(
        'match' =>   array(
            'last_name' =>   'smith'
        ),
    ),
    'aggs'  =>   array(
        'all_interests' =>   array(
            'terms' =>   array('field'   =>   'interests'),
        ),
    ),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 2
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 2
            [max_score] => 0.30685282
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 0.30685282
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                )

        )

    [aggregations] => Array
        (
            [all_interests] => Array
                (
                    [doc_count_error_upper_bound] => 0
                    [sum_other_doc_count] => 0
                    [buckets] => Array
                        (
                            [0] => Array
                                (
                                    [key] => music
                                    [doc_count] => 2
                                )

                            [1] => Array
                                (
                                    [key] => sports
                                    [doc_count] => 1
                                )

                        )

                )

        )

)

12、汇总还允许多个层面的统计。比如我们还可以统计每一个兴趣下的平均年龄

$param = array(
    'aggs'  =>   array(
        'all_interests' =>   array(
            'terms' =>   array('field'   =>   'interests'),
            'aggs'  =>   array(
                'avg_age' => array(
                    'avg'   =>   array(
                        'field' =>   'age'
                    ),
                ),
            ),
        ),
    ),
);
$result = PostCurl($url.'megacorp/employee/_search', $param, '', 'GET');
print_r($result);

返回数据
Array
(
    [took] => 16
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 3
            [max_score] => 1
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 2
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Jane
                                    [last_name] => Smith
                                    [age] => 32
                                    [about] => I like to collect rock albums
                                    [interests] => Array
                                        (
                                            [0] => music
                                        )

                                )

                        )

                    [1] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 1
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => John
                                    [last_name] => Smith
                                    [age] => 25
                                    [about] => I love to go rock climbing
                                    [interests] => Array
                                        (
                                            [0] => sports
                                            [1] => music
                                        )

                                )

                        )

                    [2] => Array
                        (
                            [_index] => megacorp
                            [_type] => employee
                            [_id] => 3
                            [_score] => 1
                            [_source] => Array
                                (
                                    [first_name] => Douglas
                                    [last_name] => Fir
                                    [age] => 35
                                    [about] => I like to build cabinets
                                    [interests] => Array
                                        (
                                            [0] => forestry
                                        )

                                )

                        )

                )

        )

    [aggregations] => Array
        (
            [all_interests] => Array
                (
                    [doc_count_error_upper_bound] => 0
                    [sum_other_doc_count] => 0
                    [buckets] => Array
                        (
                            [0] => Array
                                (
                                    [key] => music
                                    [doc_count] => 2
                                    [avg_age] => Array
                                        (
                                            [value] => 28.5
                                        )

                                )

                            [1] => Array
                                (
                                    [key] => forestry
                                    [doc_count] => 1
                                    [avg_age] => Array
                                        (
                                            [value] => 35
                                        )

                                )

                            [2] => Array
                                (
                                    [key] => sports
                                    [doc_count] => 1
                                    [avg_age] => Array
                                        (
                                            [value] => 25
                                        )

                                )

                        )

                )

        )

)

删除指定日期之前的数据

curl -XDELETE 'http://localhost:9200/logstash-2017.02.25*'

上面这些已经可以基本掌握Elasticsearch的操作,后面更深入的、分布式集群,等...... 敬请关注~

发表评论:

TOP