‹#›
make sense of your (BIG) data!
David Pilato
Developer | Evangelist
@dadoonet
‹#›
3
4
5
6
6
7
7
8
8
9
9
Big data?
10
11
12 Source: http://www.csc.com/insights/flxwd/78931-big_data_just_beginning_to_explode
12 Source: http://www.csc.com/insights/flxwd/78931-big_data_just_beginning_to_explode
35.000.000.000.000.000 mb
13 Source: http://www.domo.com/learn/data-never-sleeps-2
Some data
CREATE TABLE user
(
name VARCHAR(100),
comments VARCHAR(1000)
);
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
15
Search on term
SELECT * FROM user WHERE name="David";
Empty set (0,00 sec)
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
16
Search like
SELECT * FROM user WHERE name LIKE "%David%";
+--------------+----------------------+
| name | comments |
+--------------+----------------------+
| David Pilato | Developer at elastic |
| David Gageot | Engineer at Docker |
| David David | Who is that guy? |
+--------------+----------------------+
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
17
Search like
SELECT * FROM user WHERE name LIKE "%David%Pilato%";
+--------------+----------------------+
| name | comments |
+--------------+----------------------+
| David Pilato | Developer at elastic |
+--------------+----------------------+
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
18
Search like with inverted terms
SELECT * FROM user WHERE name LIKE "%Pilato%David%";
Empty set (0,00 sec)
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
19
Search in two fields
SELECT * FROM user WHERE name LIKE "%David%" OR
comments LIKE "%David%";
+--------------+---------------------------------------------+
| name | comments |
+--------------+---------------------------------------------+
| David Pilato | Developer at elastic |
| Malloum Laya | Worked with David at french customs service |
| David Gageot | Engineer at Docker |
| David David | Who is that guy? |
+--------------+---------------------------------------------+
INSERT INTO user VALUES ('David Pilato', 'Developer at elastic');
INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at
french customs service');
INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker');
INSERT INTO user VALUES ('David David', 'Who is that guy?');
20
21
22
23
search engine?
24
search engine?
24
25
25
Lucene
25
REST/JSON Lucene
25
REST/JSON
scalable
Lucene
25
plug & play
REST/JSON
scalable
Lucene
25
plug & play
REST/JSON
scalable
Apache 2 license
Lucene
start…
26
$ wget https://artifacts.elastic.co/downloads/elasticsearch/
elasticsearch-5.0.0-beta1.tar.gz
$ tar -xf elasticsearch-5.0.0-beta1.tar.gz
$ ./elasticsearch-5.0.0-beta1/bin/elasticsearch
[INFO ][node ][Ghost Maker] version[5.0.0-beta1], pid[72965], …
[INFO ][transport][Ghost Maker] publish_address {[/127.0.0.1:9300]}
[INFO ][http ][Ghost Maker] publish_address {[/127.0.0.1:9200]}
[INFO ][node ][Ghost Maker] started
… and play!
27
$ curl -XPUT localhost:9200/sessions/session/1 -d '{
"title" : "Elasticsearch",
"subtitle" : "Make sense of your (BIG) data !",
"date" : "2016-10-07T16:30:00",
"tags" : [ "realtime", "bigdata" ],
"speakers" : [{
"first_name" : "David",
"last_name" : "Pilato"
}]
}'
search!
28
$ curl http://localhost:9200/sessions/session/_search -d'
{
"query": {
"multi_match": {
"query": "elasticsearch bigdata david",
"fields": [ "title^3", "tags^2", "speakers.first_name" ]
}
},
"post_filter": {
"range": {
"date": {
"from": "2016-10-01",
"to": "2016-11-01"
}
}
}
}'
compute?
29
compute!
$ curl http://localhost:9200/sessions/session/_search -d'
{
"query": { ... },
"aggs": {
"by_date": {
"date_histogram": {
"field": "date",
"interval": "day",
"format" : "dd/MM/yyyy"
}
}
}
}'
30
compute!
$ curl http://localhost:9200/sessions/session/_search -d'
{
"query": { ... },
"aggs": {
"by_date": {
"date_histogram": {
"field": "date",
"interval": "day",
"format" : "dd/MM/yyyy"
}
}
}
}'
30
"by_date": [
{ "key_as_string": "03/10/2016", "doc_count": 1 },
{ "key_as_string": "07/10/2016", "doc_count": 2 },
{ "key_as_string": "10/10/2016", "doc_count": 3 }
]
compute!
$ curl http://localhost:9200/sessions/session/_search -d'
{
"query": { ... },
"aggs": {
"by_date": {
"date_histogram": {
"field": "date",
"interval": "day",
"format" : "dd/MM/yyyy"
}
}
}
}'
30
"by_date": [
{ "key_as_string": "03/10/2016", "doc_count": 1 },
{ "key_as_string": "07/10/2016", "doc_count": 2 },
{ "key_as_string": "10/10/2016", "doc_count": 3 }
]
Let’s make sense of …
• logs
• twitter
• github
• marketing data
• ...
• your data
• your big data
32
Let’s make sense of …
• logs
• twitter
• github
• marketing data
• ...
• your data
• your big data
33
{
"name":"Pilato David",
"dateOfBirth":"1971-12-26",
"gender":"male",
"children":3,
"marketing":{
"fashion":334,
"music":3363,
"hifi":2351
},
"address":{
"country":"France",
"city":"Paris",
"location": [2.332395, 48.861871]
}
}
Let's inject 1 000 000
marketing documents
34
Demo
‹#›
Demo
35
36
infom
ercial
37
The only Elasticsearch as a Service offering
powered by the creators of the Elastic Stack
• Always runs on the latest software
• One-click to scale/upgrade with no downtime
• Free Kibana and backups every 30 minutes
• Dedicated, SLA-based support
• Easily add X-Pack features: security (Shield),
alerting (Watcher), and monitoring (Marvel)
• Pricing starts at $45 a month
infom
ercial
39
‹#›
https://www.elastic.co/subscriptions
Thank you!
David Pilato
Developer | Evangelist
@dadoonet

MAKE SENSE OF YOUR BIG DATA

  • 1.
    ‹#› make sense ofyour (BIG) data! David Pilato Developer | Evangelist @dadoonet
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 20.
    Some data CREATE TABLEuser ( name VARCHAR(100), comments VARCHAR(1000) ); INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 15
  • 21.
    Search on term SELECT* FROM user WHERE name="David"; Empty set (0,00 sec) INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 16
  • 22.
    Search like SELECT *FROM user WHERE name LIKE "%David%"; +--------------+----------------------+ | name | comments | +--------------+----------------------+ | David Pilato | Developer at elastic | | David Gageot | Engineer at Docker | | David David | Who is that guy? | +--------------+----------------------+ INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 17
  • 23.
    Search like SELECT *FROM user WHERE name LIKE "%David%Pilato%"; +--------------+----------------------+ | name | comments | +--------------+----------------------+ | David Pilato | Developer at elastic | +--------------+----------------------+ INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 18
  • 24.
    Search like withinverted terms SELECT * FROM user WHERE name LIKE "%Pilato%David%"; Empty set (0,00 sec) INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 19
  • 25.
    Search in twofields SELECT * FROM user WHERE name LIKE "%David%" OR comments LIKE "%David%"; +--------------+---------------------------------------------+ | name | comments | +--------------+---------------------------------------------+ | David Pilato | Developer at elastic | | Malloum Laya | Worked with David at french customs service | | David Gageot | Engineer at Docker | | David David | Who is that guy? | +--------------+---------------------------------------------+ INSERT INTO user VALUES ('David Pilato', 'Developer at elastic'); INSERT INTO user VALUES ('Malloum Laya', 'Worked with David at french customs service'); INSERT INTO user VALUES ('David Gageot', 'Engineer at Docker'); INSERT INTO user VALUES ('David David', 'Who is that guy?'); 20
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
    start… 26 $ wget https://artifacts.elastic.co/downloads/elasticsearch/ elasticsearch-5.0.0-beta1.tar.gz $tar -xf elasticsearch-5.0.0-beta1.tar.gz $ ./elasticsearch-5.0.0-beta1/bin/elasticsearch [INFO ][node ][Ghost Maker] version[5.0.0-beta1], pid[72965], … [INFO ][transport][Ghost Maker] publish_address {[/127.0.0.1:9300]} [INFO ][http ][Ghost Maker] publish_address {[/127.0.0.1:9200]} [INFO ][node ][Ghost Maker] started
  • 38.
    … and play! 27 $curl -XPUT localhost:9200/sessions/session/1 -d '{ "title" : "Elasticsearch", "subtitle" : "Make sense of your (BIG) data !", "date" : "2016-10-07T16:30:00", "tags" : [ "realtime", "bigdata" ], "speakers" : [{ "first_name" : "David", "last_name" : "Pilato" }] }'
  • 39.
    search! 28 $ curl http://localhost:9200/sessions/session/_search-d' { "query": { "multi_match": { "query": "elasticsearch bigdata david", "fields": [ "title^3", "tags^2", "speakers.first_name" ] } }, "post_filter": { "range": { "date": { "from": "2016-10-01", "to": "2016-11-01" } } } }'
  • 40.
  • 41.
    compute! $ curl http://localhost:9200/sessions/session/_search-d' { "query": { ... }, "aggs": { "by_date": { "date_histogram": { "field": "date", "interval": "day", "format" : "dd/MM/yyyy" } } } }' 30
  • 42.
    compute! $ curl http://localhost:9200/sessions/session/_search-d' { "query": { ... }, "aggs": { "by_date": { "date_histogram": { "field": "date", "interval": "day", "format" : "dd/MM/yyyy" } } } }' 30 "by_date": [ { "key_as_string": "03/10/2016", "doc_count": 1 }, { "key_as_string": "07/10/2016", "doc_count": 2 }, { "key_as_string": "10/10/2016", "doc_count": 3 } ]
  • 43.
    compute! $ curl http://localhost:9200/sessions/session/_search-d' { "query": { ... }, "aggs": { "by_date": { "date_histogram": { "field": "date", "interval": "day", "format" : "dd/MM/yyyy" } } } }' 30 "by_date": [ { "key_as_string": "03/10/2016", "doc_count": 1 }, { "key_as_string": "07/10/2016", "doc_count": 2 }, { "key_as_string": "10/10/2016", "doc_count": 3 } ]
  • 45.
    Let’s make senseof … • logs • twitter • github • marketing data • ... • your data • your big data 32
  • 46.
    Let’s make senseof … • logs • twitter • github • marketing data • ... • your data • your big data 33 { "name":"Pilato David", "dateOfBirth":"1971-12-26", "gender":"male", "children":3, "marketing":{ "fashion":334, "music":3363, "hifi":2351 }, "address":{ "country":"France", "city":"Paris", "location": [2.332395, 48.861871] } }
  • 47.
    Let's inject 1000 000 marketing documents 34 Demo
  • 48.
  • 49.
  • 50.
    37 The only Elasticsearchas a Service offering powered by the creators of the Elastic Stack • Always runs on the latest software • One-click to scale/upgrade with no downtime • Free Kibana and backups every 30 minutes • Dedicated, SLA-based support • Easily add X-Pack features: security (Shield), alerting (Watcher), and monitoring (Marvel) • Pricing starts at $45 a month infom ercial
  • 52.
  • 53.