SlideShare a Scribd company logo
1	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Parallel	
  SQL	
  and	
  Analy.cs	
  with	
  Solr	
  
Yonik	
  Seeley	
  
Cloudera	
  
2	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
My	
  Background	
  
• Creator	
  of	
  Solr	
  
• Cloudera	
  Engineer	
  	
  
• LucidWorks	
  Co-­‐Founder	
  
• Lucene/Solr	
  commiFer,	
  PMC	
  member	
  
• Apache	
  SoJware	
  FoundaLon	
  member	
  
• M.S.	
  in	
  Computer	
  Science,	
  Stanford	
  
3	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
What	
  is	
  Apache	
  Solr	
  
•  Search	
  server	
  
• like	
  a	
  database,	
  but	
  different	
  indexing	
  technology	
  (Apache	
  Lucene)	
  
• opLmized	
  for	
  interacLve	
  results	
  
•  Columns	
  (aka	
  docValues)	
  for	
  fast	
  scans	
  
•  HighlighLng	
  
•  FaceLng	
  (category	
  counts)	
  
•  SpaLal	
  search	
  
•  Powers	
  search	
  for	
  the	
  leading	
  Hadoop	
  Big	
  Data	
  vendors	
  	
  	
  
4	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
ParLal	
  Solr	
  Architecture	
  
Lucene	
  
Streaming	
  Expressions	
  
Parallel	
  SQL	
  
Distributed	
  Search	
  
Facets	
  &	
  
StaLsLcs	
  
Solr	
  Request	
  
Framework	
  
JSON	
  Facet	
  
API	
  
Green	
  blocks	
  
are	
  newer	
  
addiLons	
  
5	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Different	
  ways	
  to	
  calculate	
  things	
  in	
  Solr	
  
•  Faceted	
  search	
  v1	
  /	
  stats	
  module	
  
	
  facet=true&facet.field=color&facet.limit=5	
  
•  JSON	
  Facet	
  API	
  (faceted	
  search	
  v2)	
  
	
  	
  	
  {colors:{type:terms,	
  field:color,	
  limit:5}}	
  
•  Streaming	
  expressions	
  
	
  	
  	
  rollup(search(techproducts,q="*:*",fl="id,color",	
  
sort="color	
  asc"),	
  over="color",	
  count(*))	
  	
  
•  Parallel	
  SQL	
  
	
  select	
  count(*)	
  from	
  techproducts	
  where	
  _text_='(*:*)'	
  group	
  
by	
  color"	
  
6	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
JSON	
  Facet	
  API	
  
7	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Faceted	
  Search	
  
•  Breaks	
  search	
  results	
  into	
  
buckets	
  
•  Generally	
  provides	
  bucket	
  
counts	
  
•  Allows	
  user	
  to	
  filter	
  /	
  "drill	
  
into"	
  results	
  
8	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
FaceLng	
  
Search	
  
StaLsLcs	
  
Facet	
  Module	
  Goals	
  
Search	
  
Joins	
  
Grouping	
  
Field	
  
Collapsing	
  
New	
  Facet	
  Module	
  
JSON	
  Facet	
  API	
  
•  IntegraLon	
  
•  Performance	
  
•  Ease	
  of	
  use	
  
HighlighLng	
  
Nested	
  
Documents	
  
Geosearch	
  
9	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Simple	
  JSON	
  Facet	
  request	
  and	
  response	
  
curl	
  http://localhost:8983/solr/query	
  -­‐d	
  '	
  
q=widgets&	
  
json.facet=	
  
{	
  
	
  	
  x	
  :	
  "avg(price)"	
  ,	
  	
  
	
  	
  y	
  :	
  "unique(brand)"	
  
}	
  
'	
  
	
  
[…]	
  
"facets"	
  :	
  {	
  
	
  	
  "count"	
  :	
  314,	
  
	
  	
  "x"	
  :	
  102.5,	
  
	
  	
  "y"	
  :	
  28	
  
}	
  
root	
  domain	
  defined	
  by	
  docs	
  
matching	
  the	
  query	
   count	
  of	
  docs	
  in	
  the	
  bucket	
  
10	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Terms	
  facet	
  example	
  
	
  json.facet={	
  
	
  	
  	
  shoes	
  :	
  {	
  
	
  	
  	
  	
  	
  type	
  :	
  terms,	
  	
  
	
  	
  	
  	
  	
  field	
  :	
  shoe_style,	
  
	
  	
  	
  	
  	
  sort	
  :	
  {x	
  :	
  desc},	
  
	
  	
  	
  	
  	
  facet	
  :	
  {	
  
	
  	
  	
  	
  	
  	
  	
  x	
  :	
  "avg(price)",	
  
	
  	
  	
  	
  	
  	
  	
  y	
  :	
  "unique(brand)"	
  
	
  	
  	
  	
  	
  }	
  
	
  	
  	
  }	
  
	
  }	
  
"facets":	
  {	
  
	
  	
  "count"	
  :	
  472,	
  
	
  	
  "shoes":	
  {	
  
	
  	
  	
  	
  "buckets"	
  :	
  [	
  
	
  	
  	
  	
  	
  	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  "val"	
  :	
  "Hiking",	
  
	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  34,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "x"	
  :	
  135.25,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "y"	
  :	
  17,	
  
	
  	
  	
  	
  	
  	
  },	
  
	
  	
  	
  	
  	
  	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  "val"	
  :	
  "Running",	
  
	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  45,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "x"	
  :	
  110.75,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "y"	
  :	
  24,	
  
	
  	
  	
  	
  	
  	
  },	
  
	
  
Calculated	
  per-­‐bucket	
  
Sort	
  by	
  any	
  stat!	
  
11	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Sub-­‐facet	
  example	
  
	
  json.facet={	
  
	
  	
  	
  shoes:{	
  
	
  	
  	
  	
  	
  type	
  :	
  terms,	
  
	
  	
  	
  	
  	
  field	
  :	
  shoe_style,	
  
	
  	
  	
  	
  	
  sort	
  :	
  {x	
  :	
  desc},	
  
	
  	
  	
  	
  	
  facet	
  :	
  {	
  
	
  	
  	
  	
  	
  	
  	
  x	
  :	
  "avg(price)",	
  
	
  	
  	
  	
  	
  	
  	
  y	
  :	
  "unique(brand)",	
  
	
  	
  	
  	
  	
  	
  	
  colors	
  :	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  type	
  :	
  terms,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  field	
  :	
  color	
  
	
  	
  	
  	
  	
  	
  	
  }	
  	
  
	
  	
  	
  	
  	
  }	
  
	
  	
  	
  }	
  
	
  }	
  
"facets":	
  {	
  
	
  	
  "count"	
  :	
  472,	
  
	
  	
  "shoes":	
  {	
  
	
  	
  	
  	
  "buckets"	
  :	
  [	
  
	
  	
  	
  	
  	
  	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  "val"	
  :	
  "Hiking",	
  
	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  34,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "x"	
  :	
  135.25,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "y"	
  :	
  17,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "colors"	
  :	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "buckets"	
  :	
  [	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  {	
  "val"	
  :	
  "brown",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  12	
  },	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  {	
  "val"	
  :	
  "black",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  10	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  },	
  […]	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  ]	
  
	
  	
  	
  	
  	
  	
  	
  	
  }	
  //	
  end	
  of	
  colors	
  sub-­‐facet	
  
	
  	
  	
  	
  	
  	
  },	
  //	
  end	
  of	
  Hiking	
  bucket	
  
	
  	
  	
  	
  	
  	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  "val"	
  :	
  "Running",	
  
	
  	
  	
  	
  	
  	
  	
  	
  "count"	
  :	
  45,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "x"	
  :	
  110.75,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "y"	
  :	
  24,	
  
	
  	
  	
  	
  	
  	
  	
  	
  "colors"	
  :	
  {	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  "buckets"	
  :	
  […]	
  
12	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Facet	
  Types	
  
•  Terms	
  Facet	
  
• Creates	
  new	
  domains	
  (facet	
  buckets)	
  based	
  on	
  values	
  in	
  a	
  field	
  
•  Range	
  Facet	
  
• Creates	
  mulLple	
  buckets	
  based	
  on	
  date	
  ranges	
  or	
  numeric	
  ranges	
  
•  Query	
  Facet	
  
• Creates	
  a	
  single	
  bucket	
  of	
  documents	
  that	
  match	
  any	
  given	
  query	
  
•  Unlimited	
  nesLng:	
  Any	
  facet	
  types	
  may	
  have	
  any	
  number	
  of	
  sub-­‐facets	
  
•  MulL-­‐select	
  faceLng	
  (filter	
  exclusion)	
  
•  Nested	
  documents	
  (block	
  join)	
  
13	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Streaming	
  Expressions	
  
14	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Solr	
  Streaming	
  Expressions	
  
• Generic	
  plalorm	
  for	
  distributed	
  computaLon	
  
• The	
  basis	
  for	
  implemenLng	
  distributed	
  parallel	
  SQL	
  
• relaLonal	
  operaLons	
  on	
  streams	
  
• Works	
  across	
  enLre	
  result	
  sets	
  (or	
  subsets)	
  
• normal	
  search	
  operaLons	
  are	
  designed	
  for	
  fast	
  top-­‐N	
  operaLons	
  
• Map-­‐reduce	
  like	
  "shuffle"	
  parLLons	
  result	
  sets	
  for	
  greater	
  scalability	
  
• Worker	
  nodes	
  can	
  be	
  allocated	
  from	
  a	
  collecLon	
  for	
  parallelism	
  
• Incorporates	
  streams	
  from	
  non-­‐Solr	
  systems	
  
15	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
search()	
  expression	
  
$	
  curl	
  hFp://localhost:8983/solr/techproducts/stream	
  -­‐d	
  
'expr=search(techproducts,	
  q="*:*",	
  fl="id,price,score",	
  sort="id	
  asc")'	
  
	
  
{"result-­‐set":{"docs":[	
  
{"score":1.0,"id":"0579B002","price":179.99},	
  
{"score":1.0,"id":"100-­‐435805","price":649.99},	
  
{"score":1.0,"id":"3007WFP","price":2199.0},	
  
{"score":1.0,"id":"VDBDB1A16"},	
  
{"score":1.0,"id":"VS1GB400C3","price":74.99},	
  
{"EOF":true,"RESPONSE_TIME":6}]}}	
  
	
  
resulLng	
  tuple	
  stream	
  
16	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Search	
  Tuple	
  Stream	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  1	
  
Replica	
  1	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  2	
  
Replica	
  1	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  3	
  
Replica	
  1	
  
Worker	
  
Tuple	
  Stream	
  
Tuple	
  Stream	
  
/stream	
  worker	
  
execuLng	
  the	
  "search"	
  
expression	
  
•  search()	
  is	
  a	
  stream	
  source	
  
•  Fully	
  SolrCloud	
  aware	
  (knows	
  cluster	
  layout)	
  
•  Fully	
  streaming	
  (no	
  big	
  buffers)	
  
17	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
search	
  expression	
  args	
  
search( 	
   	
   	
   	
   	
   	
   	
  //	
  parses	
  to	
  CloudSolrStream	
  java	
  class	
  
	
  techproducts, 	
   	
   	
   	
  //	
  name	
  of	
  the	
  collecLon	
  to	
  search	
  
	
  zkHost="localhost:9983", 	
  //	
  (opt)	
  zookeeper	
  address	
  of	
  collecLon	
  to	
  search	
  
	
  qt="/select",	
   	
   	
   	
   	
  //	
  (opt)	
  the	
  request	
  handler	
  to	
  use	
  (/export	
  is	
  also	
  available)	
  
	
  rows=1000000,	
  	
  	
   	
   	
   	
  //	
  (opt)	
  number	
  of	
  rows	
  to	
  retrieve	
  	
  
	
  q=*:*,	
   	
   	
   	
   	
   	
   	
  //	
  query	
  to	
  match	
  returned	
  documents	
  
	
  fl="id,price,score", 	
   	
   	
  //	
  which	
  fields	
  to	
  return	
  
	
  sort="id	
  asc,	
  price	
  desc", 	
  //	
  how	
  to	
  sort	
  the	
  results	
  
	
  	
  	
  	
  	
  	
  aliases="id=myid,price=myprice"	
  	
  //	
  (opt)	
  renames	
  output	
  fields	
  
)	
  
18	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
rollup()	
  expression	
  
•  Groups	
  tuples	
  by	
  common	
  field	
  values	
  
•  Emits	
  rollup	
  value	
  along	
  with	
  metrics	
  
•  Closest	
  equivalent	
  to	
  face.ng	
  
rollup(	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  
	
  	
  	
  	
  	
  	
  search(collecLon1,	
  qt="/export"	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  q="*:*",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  fl="id,manu,price",	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  sort="manu	
  asc"),	
  
	
  	
  	
  	
  	
  	
  	
  over="manu"),	
  
	
  	
  	
  	
  	
  	
  	
  count(*),	
  
	
  	
  	
  	
  	
  	
  	
  max(price)	
  
)	
  
metrics	
  
{"result-­‐set":{"docs":[	
  
{"manu":"apple","count(*)":1.0},	
  
{"manu":"asus","count(*)":1.0},	
  
{"manu":"aL","count(*)":1.0},	
  
{"manu":"belkin","count(*)":2.0},	
  
{"manu":"canon","count(*)":2.0},	
  
{"manu":"corsair","count(*)":3.0},	
  
[...]	
  
19	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Parallel	
  Tuple	
  Stream	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  1	
  
Replica	
  1	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  2	
  
Replica	
  1	
  
Shard	
  1	
  
Replica	
  2	
  
Shard	
  3	
  
Replica	
  1	
  
Worker	
  
ParLLon	
  1	
  
Worker	
  
ParLLon	
  2	
  
Worker	
  
Tuple	
  Stream	
  
20	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Streaming	
  Expressions	
  –	
  parallel	
  
•  Wraps	
  a	
  stream	
  and	
  sends	
  to	
  N	
  worker	
  
nodes	
  
•  The	
  first	
  parameter	
  is	
  the	
  collec.on	
  to	
  
use	
  for	
  the	
  intermediate	
  worker	
  nodes	
  
•  par..onKeys	
  must	
  be	
  provided	
  to	
  
underlying	
  workers	
  
• usually	
  makes	
  sense	
  to	
  par..on	
  by	
  
what	
  you	
  are	
  grouping	
  on	
  
•  inner	
  and	
  outer	
  sorts	
  should	
  match	
  
parallel(collecLon1,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  rollup(	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  search(techproducts,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  q="*:*",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  fl="id,manu,price",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  sort="manu	
  asc",	
  	
  	
  	
  	
  	
  	
  	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  parLLonKeys="manu"),	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  over="manu	
  asc"),	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  workers=2,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  zkHost="localhost:9983",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  sort="manu	
  asc")	
  
21	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Distributed	
  Joins!	
  
innerJoin(	
  
	
  	
  	
  	
  search(people,	
  q=*:*,	
  fl="personId,name",	
  sort="personId	
  asc"),	
  
	
  	
  	
  	
  search(pets,	
  q=type:cat,	
  fl="personId,petName",	
  sort="personId	
  asc"),	
  
	
  	
  	
  	
  on="personId"	
  
)	
  
	
  
Also:	
  leJOuterJoin,	
  hashJoin,	
  outerHashJoin,	
  
22	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
More	
  stream	
  decorators	
  
•  complement	
  –	
  emits	
  tuples	
  from	
  A	
  which	
  do	
  not	
  exist	
  in	
  B	
  
•  intersect	
  –	
  emits	
  tuples	
  from	
  A	
  whish	
  do	
  exist	
  in	
  B	
  
•  merge	
  
•  reduce	
  
•  sort	
  
•  top	
  –	
  reorders	
  the	
  stream	
  and	
  returns	
  the	
  top	
  N	
  tuples	
  
•  unique	
  –	
  emits	
  only	
  the	
  first	
  tuple	
  for	
  each	
  value	
  
•  select	
  –	
  select,	
  rename,	
  or	
  give	
  default	
  values	
  to	
  fields	
  in	
  a	
  tuple	
  
	
  
hFps://cwiki.apache.org/confluence/display/solr/Streaming+Expressions	
  
	
  
23	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
jdbc()	
  expression	
  stream	
  
join	
  with	
  other	
  data	
  sources!	
  
innerJoin(	
  
	
  	
  select(	
  	
  	
  	
  search(collecLon1,	
  [...]),	
  
	
  	
  	
  	
  	
  	
  	
  personId_i	
  as	
  personId,	
  	
  	
  	
  raLng_f	
  as	
  raLng	
  	
  ),	
  
	
  	
  select(	
  	
  	
  	
  jdbc(connecLon="jdbc:hsqldb:mem:.",	
  sql="select	
  PEOPLE.ID	
  as	
  
PERSONID,	
  PEOPLE.NAME,	
  COUNTRIES.COUNTRY_NAME	
  from	
  PEOPLE	
  inner	
  join	
  
COUNTRIES	
  on	
  PEOPLE.COUNTRY_CODE	
  =	
  COUNTRIES.CODE	
  order	
  by	
  PEOPLE.ID",	
  
sort="ID	
  asc",	
  get_column_name=true),	
  
	
  	
  	
  	
  	
  	
  	
  ID	
  as	
  personId,	
  	
  	
  	
  NAME	
  as	
  personName,	
  	
  	
  	
  COUNTRY_NAME	
  as	
  country	
  	
  ),	
  	
  	
  	
  	
  
	
  	
  	
  on="personId"	
  
)	
  
24	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Parallel	
  SQL	
  
25	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
/sql	
  Handler	
  
Why	
  SQL?	
  
• External	
  integraLons	
  
• Higher	
  level	
  language	
  –	
  says	
  what	
  we	
  want,	
  not	
  how	
  to	
  get	
  it	
  
• SQL	
  has	
  made	
  a	
  comeback	
  along	
  with	
  big	
  data,	
  more	
  ubiquitous	
  than	
  ever	
  
•  /sql	
  REST	
  endpoint	
  by	
  default	
  on	
  all	
  solr	
  nodes	
  
•  Translates	
  SQL	
  -­‐>	
  parallel	
  streaming	
  expressions	
  
•  SQL	
  tables	
  map	
  to	
  SolrCloud	
  collecLons	
  
•  Currently	
  uses	
  Presto	
  SQL	
  parser	
  
• Switch	
  to	
  Apache	
  Calcite	
  parser	
  in	
  the	
  works	
  
26	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
27	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Simplest	
  SQL	
  Example	
  
$	
  curl	
  hFp://localhost:8983/solr/techproducts/sql	
  -­‐d	
  "stmt=select	
  id	
  from	
  techproducts"	
  
	
  
{"result-­‐set":{"docs":[	
  
{"id":"EN7800GTX/2DHTV/256M"},	
  
{"id":"100-­‐435805"},	
  
{"id":"UTF8TEST"},	
  
{"id":"SOLR1000"},	
  
{"id":"9885A004"},	
  
[...]	
  
tables	
  map	
  to	
  
collecLons	
  
28	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
SQL	
  handler	
  HTTP	
  parameters	
  
curl	
  hFp://localhost:8983/solr/techproducts/sql	
  -­‐d	
  '	
  
&stmt=<sql_statement>	
  
&numWorkers=4	
  	
  //	
  currently	
  used	
  by	
  GROUP	
  BY	
  and	
  DISTINCT	
  (via	
  parallel	
  stream)	
  
&workerCollecLon=collecLon1	
  	
  //	
  where	
  to	
  create	
  intermediate	
  workers	
  
&workerZkhost=localhost:9983	
  	
  //	
  cluster	
  (zookeeper	
  ensemble)	
  address	
  
&aggregaLonMode=map_reduce	
  |	
  facet	
  
29	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
The	
  WHERE	
  clause	
  
•  WHERE	
  clauses	
  are	
  all	
  pushed	
  down	
  to	
  the	
  search	
  layer	
  
	
  
select	
  id	
  
	
  	
  where	
  popularity=10	
  	
  	
  //	
  simple	
  match	
  on	
  numeric	
  field	
  "popularity"	
  
	
  	
  where	
  popularity='[5	
  TO	
  10]'	
  	
  	
  //	
  solr	
  range	
  query	
  (note	
  the	
  quotes)	
  
	
  	
  where	
  name='hard	
  drive'	
  	
  	
  //	
  phrase	
  query	
  on	
  the	
  "name"	
  field	
  
	
  	
  where	
  name='((memory	
  retail)	
  AND	
  popularity:[5	
  TO	
  10])'	
  	
  //	
  arbitrary	
  solr	
  query	
  
	
  	
  where	
  name='(memory	
  retail)'	
  AND	
  popularity='[5	
  TO	
  10]'	
  //	
  boolean	
  logic	
  
	
  
	
  
30	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Ordering	
  and	
  LimiLng	
  
select	
  id,score	
  from	
  techproducts	
  	
  
	
  	
  	
  	
  where	
  text='(memory	
  hard	
  drive)'	
  	
  
	
  	
  	
  	
  ORDER	
  BY	
  popularity	
  desc	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  //	
  default	
  order	
  is	
  score	
  desc	
  for	
  limited	
  queries	
  
	
  	
  	
  	
  LIMIT	
  100	
  
	
  
•  Limited	
  queries	
  use	
  /select	
  handler	
  
•  Unlimited	
  queries	
  use	
  /export	
  handler	
  
• fields	
  selected	
  need	
  to	
  be	
  docValues	
  
• fields	
  in	
  "order	
  by"	
  need	
  to	
  be	
  docValues	
  
• no	
  "score"	
  field	
  allowed	
  
31	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
More	
  SQL	
  examples	
  
select	
  disLnct	
  fieldA	
  as	
  fa,	
  fieldB	
  as	
  ‚	
  from	
  tableA	
  order	
  by	
  fa	
  desc,	
  ‚	
  desc	
  
	
  
//	
  simple	
  stats	
  	
  
select	
  count(fieldA)	
  as	
  count,	
  sum(fieldB)	
  as	
  sum	
  from	
  tableA	
  where	
  fieldC	
  =	
  'Hello'	
  
	
  
select	
  fieldA,	
  fieldB,	
  count(*),	
  sum(fieldC),	
  avg(fieldY)	
  from	
  tableA	
  	
  
	
  	
  	
  	
  where	
  fieldC	
  =	
  'term1	
  term2'	
  	
  
	
  	
  	
  	
  group	
  by	
  fieldA,	
  fieldB	
  	
  
	
  	
  	
  	
  having	
  ((sum(fieldC)	
  >	
  1000)	
  AND	
  (avg(fieldY)	
  <=	
  10))	
  
	
  	
  	
  	
  order	
  by	
  sum(fieldC)	
  asc	
  	
  
	
  
32	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Solr	
  JDBC	
  Driver	
  
33	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Solr	
  JDBC	
  driver	
  works	
  with	
  Apache	
  Zeppelin	
  
34	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Graph	
  Traversal	
  
35	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Graph	
  Filter	
  
•  Follows	
  ad	
  hoc	
  edges	
  
•  Not	
  distributed!	
  
• still	
  useable	
  on	
  
partitioned	
  data	
  
•  Can	
  filter	
  on	
  each	
  hop	
  
•  Can	
  specify	
  max	
  depth	
  
•  Cycle	
  detection	
  
	
  
fq={!graph	
  from=parents	
  to=id}
id:"Philip	
  J.	
  Fry"	
  
id	
  :	
  "Philip	
  J.	
  Fry"	
  
parents:["Yancy	
  Fry,	
  Sr.","Mrs.	
  Fry"]	
  
id	
  :	
  "Yancy	
  Fry"	
  
parents:["Yancy	
  Fry,	
  Sr.","Mrs.	
  Fry"]	
  
id	
  :	
  "Yancy	
  Fry,	
  Sr."	
  
parents:["Mildred,	
  "Philip	
  J.	
  Fry"]	
  
id	
  :	
  "Mrs.	
  Fry"	
  
parents:["Mr.	
  Gleisner",	
  
	
  "Mrs.	
  Gleisner"]	
  
id	
  :	
  "Mildred"	
  
id	
  :	
  "Hubert	
  J.	
  	
  
Farnsworth"	
  
id	
  :	
  "Philip	
  J.	
  Fry"	
  
parents:["Yancy	
  Fry,	
  Sr.","Mrs.	
  Fry"]	
  
Cycle!	
  
36	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Graph	
  streaming	
  expressions	
  
•  Breadth-­‐first	
  graph	
  traversals	
  
•  Fully	
  integrated	
  with	
  streaming,	
  fully	
  distributed	
  
•  Traverse	
  across	
  collecLons	
  as	
  well	
  as	
  shards	
  
•  Compute	
  aggregaLons	
  
	
  
curl	
  http://localhost:8983/solr/emails/stream	
  –d	
  '	
  
	
  	
  expr=gatherNodes(emails,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  walk="johndoe@apache.org-­‐>from",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  gather="to")	
  
'	
  
37	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Graph	
  streaming	
  expressions	
  example	
  
•  Index	
  some	
  books	
  in	
  one	
  collecLon	
  
curl	
  http://localhost:8983/solr/books/update	
  -­‐H	
  'Content-­‐type:text/csv'	
  -­‐d	
  '	
  
id,cat,pubyear_i,title,author,series_s,sequence_i	
  
book1,fantasy,2000,A	
  Storm	
  of	
  Swords,George	
  R.R.	
  Martin,A	
  Song	
  of	
  Ice	
  and	
  Fire,3	
  
book2,fantasy,2005,A	
  Feast	
  for	
  Crows,George	
  R.R.	
  Martin,A	
  Song	
  of	
  Ice	
  and	
  Fire,4	
  
book3,fantasy,2011,A	
  Dance	
  with	
  Dragons,George	
  R.R.	
  Martin,A	
  Song	
  of	
  Ice	
  and	
  Fire,5	
  
book4,sci-­‐fi,1987,Consider	
  Phlebas,Iain	
  M.	
  Banks,The	
  Culture,1	
  
book5,sci-­‐fi,1988,The	
  Player	
  of	
  Games,Iain	
  M.	
  Banks,The	
  Culture,2	
  
book6,sci-­‐fi,1990,Use	
  of	
  Weapons,Iain	
  M.	
  Banks,The	
  Culture,3	
  
book7,fantasy,1984,Shadows	
  Linger,Glen	
  Cook,The	
  Black	
  Company,2	
  
book8,fantasy,1984,The	
  White	
  Rose,Glen	
  Cook,The	
  Black	
  Company,3	
  
book9,fantasy,1989,Shadow	
  Games,Glen	
  Cook,The	
  Black	
  Company,4	
  
book10,sci-­‐fi,2001,Gridlinked,Neal	
  Asher,Ian	
  Cormac,1	
  
book11,sci-­‐fi,2003,The	
  Line	
  of	
  Polity,Neal	
  Asher,Ian	
  Cormac,2	
  
book12,sci-­‐fi,2005,Brass	
  Man,Neal	
  Asher,Ian	
  Cormac,3	
  
'	
  
38	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Graph	
  streaming	
  expressions	
  example	
  
•  Index	
  some	
  book	
  reviews	
  into	
  another	
  collecLon	
  
curl	
  http://localhost:8983/solr/reviews/update-­‐H	
  'Content-­‐type:text/csv'	
  -­‐d	
  '	
  
id,book_s,user_s,rating_i,review_t	
  
book1_r1,book1,Yonik,5,awesome	
  book!	
  
book1_r2,book1,Aarav,2,too	
  bloody	
  
book1_r3,book1,Haruka,5,awesome	
  world	
  building	
  
book2_r1,book2,Yonik,5,another	
  great	
  one	
  
book2_r2,book2,Maria,5,wow!	
  
book4_r1,book4,Yonik,2,i	
  am	
  lying...	
  actually	
  liked	
  it	
  
book4_r2,book4,Aarav,5,Loved	
  it	
  
book7_r1,book7,Yonik,4,read	
  back	
  in	
  college	
  but	
  it	
  was	
  good	
  
book10_r1,book10,Maria,5,I	
  want	
  a	
  gridlink!	
  
book11_r1,book11,Maria,1,Blech	
  
book11_r2,book11,Aarav,4,is	
  this	
  the	
  first	
  book?	
  
book12_r1,book12,Yonik,5,Mr.	
  Crane	
  is	
  scary...	
  
'	
  
1.	
  Find	
  books	
  I	
  like	
  
2.	
  Find	
  who	
  else	
  rated	
  
those	
  books	
  highly	
  
3.	
  Find	
  other	
  books	
  
they	
  rated	
  highly	
  
4.	
  Profit!	
  
39	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
1.	
  Search	
  expression	
  to	
  find	
  my	
  high	
  raLngs	
  
URL="http://localhost:8983/solr/reviews/stream"	
  
	
  
#	
  Use	
  search	
  expression	
  to	
  find	
  reviews	
  that	
  I	
  have	
  the	
  book	
  a	
  "5"	
  
curl	
  $URL	
  -­‐d	
  'expr=search(reviews,	
  q="user_s:Yonik	
  AND	
  rating_i:5",	
  
fl="id,book_s,user_s,rating_i",	
  sort="user_s	
  asc")'	
  
	
  
	
  
{"result-­‐set":{"docs":[	
  
{"raLng_i":5,"id":"book2_r1","user_s":"Yonik","book_s":"book2"},	
  
{"raLng_i":5,"id":"book1_r1","user_s":"Yonik","book_s":"book1"},	
  
{"raLng_i":5,"id":"book12_r1","user_s":"Yonik","book_s":"book12"},	
  
{"EOF":true,"RESPONSE_TIME":4}]}}	
  
40	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
2.	
  gatherNodes	
  expression	
  to	
  find	
  users	
  
curl	
  $URL	
  -­‐d	
  'expr=gatherNodes(reviews,	
  
	
  	
  	
  search(reviews,	
  q="user_s:Yonik	
  AND	
  rating_i:5",	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  fl="book_s,user_s,rating_i",sort="user_s	
  asc"),	
  
	
  	
  	
  walk="book_s-­‐>book_s",	
  
	
  	
  	
  gather="user_s",	
  
	
  	
  	
  fq="rating_i:[4	
  TO	
  *]	
  -­‐user_s:Yonik",	
  
	
  	
  	
  trackTraversal=true	
  )'	
  
	
  
	
  
{"result-­‐set":{"docs":[	
  
{"node":"Haruka","collecLon":"reviews","field":"user_s","ancestors":["book1"],"level":1},	
  
{"node":"Maria","collecLon":"reviews","field":"user_s","ancestors":["book2"],"level":1},	
  
{"EOF":true,"RESPONSE_TIME":22}]}}	
  
"gather"	
  values	
  
41	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
3.	
  gatherNodes	
  to	
  find	
  high	
  raLngs	
  by	
  those	
  users	
  	
  
curl	
  $URL	
  -­‐d	
  'expr=gatherNodes(reviews,	
  
	
  	
  	
  	
  gatherNodes(reviews,	
  search(reviews,q="user_s:Yonik	
  AND	
  rating_i:
5",fl="id,book_s,user_s,rating_i",sort="user_s	
  asc"),	
  walk="book_s-­‐>book_s",	
  
gather="user_s",fq="rating_i:[4	
  TO	
  *]	
  -­‐user_s:Yonik"),	
  
	
  	
  	
  	
  walk="node-­‐>user_s",	
  gather="book_s",	
  fq="rating_i:[4	
  TO	
  *]",	
  
	
  	
  	
  	
  avg(rating_i),	
  
	
  	
  	
  	
  trackTraversal=true)'	
  
	
  
	
  
	
  
{"result-­‐set":{"docs":[	
  
{"node":"book10","avg(raLng_i)":5.0,"field":"book_s","level":
2,"collecLon":"reviews","ancestors":["Maria"]},	
  
{"EOF":true,"RESPONSE_TIME":65}]}}	
  
42	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Retrieving	
  complete	
  traversal	
  
curl	
  $URL	
  -­‐d	
  'expr=gatherNodes(reviews,	
  [...],	
  scaFer="branches,leaves")'	
  
	
  
	
  
	
  
{"result-­‐set":{"docs":[	
  
{"node":"book12","collecLon":"reviews","field":"book_s","level":0},	
  
{"node":"book1","collecLon":"reviews","field":"book_s","level":0},	
  
{"node":"book2","collecLon":"reviews","field":"book_s","level":0},	
  
{"node":"Haruka","collecLon":"reviews","field":"user_s","level":1},	
  
{"node":"Maria","collecLon":"reviews","field":"user_s","level":1},	
  
{"node":"book10","avg(raLng_i)":5.0,"field":"book_s","level":2,	
  
"collecLon":"reviews","ancestors":["Maria"]},	
  
{"EOF":true,"RESPONSE_TIME":111}]}}	
  
43	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Solr	
  admin	
  stream	
  view	
  
44	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
More	
  graph	
  expressions	
  
•  shortestPath	
  
• Finds	
  the	
  shortest	
  path	
  between	
  "from"	
  and	
  "to"	
  
	
  
•  scoreNodes	
  :	
  l-­‐idf	
  inspired	
  scoring	
  
• wraps	
  a	
  gatherNodes	
  expression	
  that	
  finds	
  the	
  co-­‐occurrence	
  count	
  
• l	
  factor	
  –	
  the	
  co-­‐occurrence	
  count	
  
• idf	
  factor	
  –	
  boosts	
  nodes	
  that	
  are	
  rarer	
  overall	
  
45	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Network	
  analysis	
  and	
  visualizaLon	
  
curl	
  http://localhost:8983/solr/reviews/graph	
  -­‐d	
  'expr=gatherNodes(reviews,	
  [...],	
  
scaFer="branches,leaves")'	
  
	
  
	
  
	
  
<?xml	
  version="1.0"	
  encoding="UTF-­‐8"?>	
  
<graphml	
  xmlns="hFp://graphml.graphdrawing.org/xmlns"	
  	
  
xmlns:xsi="hFp://www.w3.org/2001/XMLSchema-­‐instance"	
  	
  
xsi:schemaLocaLon="hFp://graphml.graphdrawing.org/xmlns	
  hFp://graphml.graphdrawing.org/xmlns/1.0/
graphml.xsd">	
  
<graph	
  id="G"	
  edgedefault="directed">	
  
<node	
  id="book12">	
  
	
  	
  <data	
  key="field">book_s</data>	
  
	
  	
  <data	
  key="level">0</data>	
  
</node>	
  
<node	
  id="book1">	
  
	
  	
  <data	
  key="field">book_s</data>	
  
[...]	
  
46	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
47	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Streaming	
  Expressions	
  vs	
  JSON	
  Facets	
  
48	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
JSON	
  Facet	
  API	
  
•  More	
  focused	
  on	
  web-­‐scale	
  interacLve	
  
responses	
  
•  Tighter	
  integraLon	
  
• ULlizes	
  exisLng	
  distributed	
  search	
  
framework	
  /	
  just	
  another	
  search	
  
component	
  
• single	
  request-­‐response	
  top-­‐N,	
  
grouping,	
  highlighLng,	
  faceLng,	
  etc.	
  
• block	
  join	
  /	
  nested	
  document	
  
support	
  
•  More	
  expressive?	
  
Streaming	
  Expressions	
  
•  More	
  general	
  purpose,	
  larger	
  scope	
  
• wrap	
  streams	
  within	
  streams	
  to	
  do	
  
preFy	
  much	
  anything	
  
• not	
  Led	
  to	
  documents	
  (analyLcs	
  across	
  
joins	
  w/	
  external	
  DBs)	
  
• update	
  streams,	
  machine	
  learning	
  
streams,	
  etc.	
  
•  Exact	
  results	
  (e.g.	
  cardinality)	
  
•  distributed	
  joins,	
  graph	
  
•  Increasingly	
  will	
  use	
  JSON	
  Facet	
  API	
  to	
  
push	
  work	
  to	
  leaves	
  
49	
  ©	
  Cloudera,	
  Inc.	
  All	
  rights	
  reserved.	
  
Thank	
  you	
  
yonik@cloudera.com	
  

More Related Content

What's hot

Faster Data Analytics with Apache Spark using Apache Solr
Faster Data Analytics with Apache Spark using Apache SolrFaster Data Analytics with Apache Spark using Apache Solr
Faster Data Analytics with Apache Spark using Apache Solr
Chitturi Kiran
 
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
Lucidworks
 
Solr Indexing and Analysis Tricks
Solr Indexing and Analysis TricksSolr Indexing and Analysis Tricks
Solr Indexing and Analysis Tricks
Erik Hatcher
 
Solr Troubleshooting - TreeMap approach
Solr Troubleshooting - TreeMap approachSolr Troubleshooting - TreeMap approach
Solr Troubleshooting - TreeMap approach
Alexandre Rafalovitch
 
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
Alexandre Rafalovitch
 
it's just search
it's just searchit's just search
it's just search
Erik Hatcher
 
Solr Black Belt Pre-conference
Solr Black Belt Pre-conferenceSolr Black Belt Pre-conference
Solr Black Belt Pre-conferenceErik Hatcher
 
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
Chris Fregly
 
Couchbase 5.5: N1QL and Indexing features
Couchbase 5.5: N1QL and Indexing featuresCouchbase 5.5: N1QL and Indexing features
Couchbase 5.5: N1QL and Indexing features
Keshav Murthy
 
OrientDB vs Neo4j - Comparison of query/speed/functionality
OrientDB vs Neo4j - Comparison of query/speed/functionalityOrientDB vs Neo4j - Comparison of query/speed/functionality
OrientDB vs Neo4j - Comparison of query/speed/functionality
Curtis Mosters
 
Solr Masterclass Bangkok, June 2014
Solr Masterclass Bangkok, June 2014Solr Masterclass Bangkok, June 2014
Solr Masterclass Bangkok, June 2014
Alexandre Rafalovitch
 
Introduction to Apache Solr
Introduction to Apache SolrIntroduction to Apache Solr
Introduction to Apache Solr
Christos Manios
 
Spark SQL with Scala Code Examples
Spark SQL with Scala Code ExamplesSpark SQL with Scala Code Examples
Spark SQL with Scala Code Examples
Todd McGrath
 
Apache Solr Workshop
Apache Solr WorkshopApache Solr Workshop
Apache Solr Workshop
Saumitra Srivastav
 
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
Michael Rys
 
Spark meetup v2.0.5
Spark meetup v2.0.5Spark meetup v2.0.5
Spark meetup v2.0.5
Yan Zhou
 
Solr Recipes
Solr RecipesSolr Recipes
Solr Recipes
Erik Hatcher
 
Beyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and SolrBeyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and Solr
Bertrand Delacretaz
 
Couchbase N1QL: Index Advisor
Couchbase N1QL: Index AdvisorCouchbase N1QL: Index Advisor
Couchbase N1QL: Index Advisor
Keshav Murthy
 

What's hot (20)

Faster Data Analytics with Apache Spark using Apache Solr
Faster Data Analytics with Apache Spark using Apache SolrFaster Data Analytics with Apache Spark using Apache Solr
Faster Data Analytics with Apache Spark using Apache Solr
 
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
The Evolution of Streaming Expressions - Joel Bernstein, Alfresco & Dennis Go...
 
Solr Indexing and Analysis Tricks
Solr Indexing and Analysis TricksSolr Indexing and Analysis Tricks
Solr Indexing and Analysis Tricks
 
Solr Troubleshooting - TreeMap approach
Solr Troubleshooting - TreeMap approachSolr Troubleshooting - TreeMap approach
Solr Troubleshooting - TreeMap approach
 
Solr Flair
Solr FlairSolr Flair
Solr Flair
 
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
Rebuilding Solr 6 examples - layer by layer (LuceneSolrRevolution 2016)
 
it's just search
it's just searchit's just search
it's just search
 
Solr Black Belt Pre-conference
Solr Black Belt Pre-conferenceSolr Black Belt Pre-conference
Solr Black Belt Pre-conference
 
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
Advanced Apache Spark Meetup Spark SQL + DataFrames + Catalyst Optimizer + Da...
 
Couchbase 5.5: N1QL and Indexing features
Couchbase 5.5: N1QL and Indexing featuresCouchbase 5.5: N1QL and Indexing features
Couchbase 5.5: N1QL and Indexing features
 
OrientDB vs Neo4j - Comparison of query/speed/functionality
OrientDB vs Neo4j - Comparison of query/speed/functionalityOrientDB vs Neo4j - Comparison of query/speed/functionality
OrientDB vs Neo4j - Comparison of query/speed/functionality
 
Solr Masterclass Bangkok, June 2014
Solr Masterclass Bangkok, June 2014Solr Masterclass Bangkok, June 2014
Solr Masterclass Bangkok, June 2014
 
Introduction to Apache Solr
Introduction to Apache SolrIntroduction to Apache Solr
Introduction to Apache Solr
 
Spark SQL with Scala Code Examples
Spark SQL with Scala Code ExamplesSpark SQL with Scala Code Examples
Spark SQL with Scala Code Examples
 
Apache Solr Workshop
Apache Solr WorkshopApache Solr Workshop
Apache Solr Workshop
 
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
Bring your code to explore the Azure Data Lake: Execute your .NET/Python/R co...
 
Spark meetup v2.0.5
Spark meetup v2.0.5Spark meetup v2.0.5
Spark meetup v2.0.5
 
Solr Recipes
Solr RecipesSolr Recipes
Solr Recipes
 
Beyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and SolrBeyond full-text searches with Lucene and Solr
Beyond full-text searches with Lucene and Solr
 
Couchbase N1QL: Index Advisor
Couchbase N1QL: Index AdvisorCouchbase N1QL: Index Advisor
Couchbase N1QL: Index Advisor
 

Viewers also liked

Parallel SQL and Streaming Expressions in Apache Solr 6
Parallel SQL and Streaming Expressions in Apache Solr 6Parallel SQL and Streaming Expressions in Apache Solr 6
Parallel SQL and Streaming Expressions in Apache Solr 6
Shalin Shekhar Mangar
 
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, FlipkartNear Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
Lucidworks
 
Webinar: What's New in Solr 6
Webinar: What's New in Solr 6Webinar: What's New in Solr 6
Webinar: What's New in Solr 6
Lucidworks
 
Searching The Enterprise Data Lake With Solr - Watch Us Do It!: Presented by...
Searching The Enterprise Data Lake With Solr  - Watch Us Do It!: Presented by...Searching The Enterprise Data Lake With Solr  - Watch Us Do It!: Presented by...
Searching The Enterprise Data Lake With Solr - Watch Us Do It!: Presented by...
Lucidworks
 
Build a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
Build a Great Application in Minutes!: Presented by Stefan Olafsson, TwigkitBuild a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
Build a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
Lucidworks
 
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
Lucidworks
 
Large Scale ETL for Hadoop and Cloudera Search using Morphlines
Large Scale ETL for Hadoop and Cloudera Search using MorphlinesLarge Scale ETL for Hadoop and Cloudera Search using Morphlines
Large Scale ETL for Hadoop and Cloudera Search using Morphlines
whoschek
 
Cloudera Search Webinar: Big Data Search, Bigger Insights
Cloudera Search Webinar: Big Data Search, Bigger InsightsCloudera Search Webinar: Big Data Search, Bigger Insights
Cloudera Search Webinar: Big Data Search, Bigger Insights
Cloudera, Inc.
 
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAwareLeveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
Lucidworks
 
NYC Lucene/Solr Meetup: Spark / Solr
NYC Lucene/Solr Meetup: Spark / SolrNYC Lucene/Solr Meetup: Spark / Solr
NYC Lucene/Solr Meetup: Spark / Solrthelabdude
 
Query Understanding at LinkedIn [Talk at Facebook]
Query Understanding at LinkedIn [Talk at Facebook]Query Understanding at LinkedIn [Talk at Facebook]
Query Understanding at LinkedIn [Talk at Facebook]Abhimanyu Lad
 
Search@airbnb
Search@airbnbSearch@airbnb
Search@airbnb
Mousom Gupta
 
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
Lucidworks
 
Webinar: Ecommerce, Rules, and Relevance
Webinar: Ecommerce, Rules, and RelevanceWebinar: Ecommerce, Rules, and Relevance
Webinar: Ecommerce, Rules, and Relevance
Lucidworks
 
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBM
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBMBuilding and Running Solr-as-a-Service: Presented by Shai Erera, IBM
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBM
Lucidworks
 
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
Lucidworks
 
Using Morphlines for On-the-Fly ETL
Using Morphlines for On-the-Fly ETLUsing Morphlines for On-the-Fly ETL
Using Morphlines for On-the-Fly ETL
Cloudera, Inc.
 
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, LucidworksVisualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
Lucidworks
 
SolrCloud on Hadoop
SolrCloud on HadoopSolrCloud on Hadoop
SolrCloud on Hadoop
Alex Moundalexis
 
Airbnb Search Architecture: Presented by Maxim Charkov, Airbnb
Airbnb Search Architecture: Presented by Maxim Charkov, AirbnbAirbnb Search Architecture: Presented by Maxim Charkov, Airbnb
Airbnb Search Architecture: Presented by Maxim Charkov, Airbnb
Lucidworks
 

Viewers also liked (20)

Parallel SQL and Streaming Expressions in Apache Solr 6
Parallel SQL and Streaming Expressions in Apache Solr 6Parallel SQL and Streaming Expressions in Apache Solr 6
Parallel SQL and Streaming Expressions in Apache Solr 6
 
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, FlipkartNear Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
Near Real Time Indexing: Presented by Umesh Prasad & Thejus V M, Flipkart
 
Webinar: What's New in Solr 6
Webinar: What's New in Solr 6Webinar: What's New in Solr 6
Webinar: What's New in Solr 6
 
Searching The Enterprise Data Lake With Solr - Watch Us Do It!: Presented by...
Searching The Enterprise Data Lake With Solr  - Watch Us Do It!: Presented by...Searching The Enterprise Data Lake With Solr  - Watch Us Do It!: Presented by...
Searching The Enterprise Data Lake With Solr - Watch Us Do It!: Presented by...
 
Build a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
Build a Great Application in Minutes!: Presented by Stefan Olafsson, TwigkitBuild a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
Build a Great Application in Minutes!: Presented by Stefan Olafsson, Twigkit
 
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
Streaming Aggregation in Solr - New Horizons for Search: Presented by Erick E...
 
Large Scale ETL for Hadoop and Cloudera Search using Morphlines
Large Scale ETL for Hadoop and Cloudera Search using MorphlinesLarge Scale ETL for Hadoop and Cloudera Search using Morphlines
Large Scale ETL for Hadoop and Cloudera Search using Morphlines
 
Cloudera Search Webinar: Big Data Search, Bigger Insights
Cloudera Search Webinar: Big Data Search, Bigger InsightsCloudera Search Webinar: Big Data Search, Bigger Insights
Cloudera Search Webinar: Big Data Search, Bigger Insights
 
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAwareLeveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
Leveraging the Power of Solr with Spark: Presented by Johannes Weigend, QAware
 
NYC Lucene/Solr Meetup: Spark / Solr
NYC Lucene/Solr Meetup: Spark / SolrNYC Lucene/Solr Meetup: Spark / Solr
NYC Lucene/Solr Meetup: Spark / Solr
 
Query Understanding at LinkedIn [Talk at Facebook]
Query Understanding at LinkedIn [Talk at Facebook]Query Understanding at LinkedIn [Talk at Facebook]
Query Understanding at LinkedIn [Talk at Facebook]
 
Search@airbnb
Search@airbnbSearch@airbnb
Search@airbnb
 
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
Reflected Intelligence - Lucene/Solr as a self-learning data system: Presente...
 
Webinar: Ecommerce, Rules, and Relevance
Webinar: Ecommerce, Rules, and RelevanceWebinar: Ecommerce, Rules, and Relevance
Webinar: Ecommerce, Rules, and Relevance
 
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBM
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBMBuilding and Running Solr-as-a-Service: Presented by Shai Erera, IBM
Building and Running Solr-as-a-Service: Presented by Shai Erera, IBM
 
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
Downtown SF Lucene/Solr Meetup: Developing Scalable Search for User Generated...
 
Using Morphlines for On-the-Fly ETL
Using Morphlines for On-the-Fly ETLUsing Morphlines for On-the-Fly ETL
Using Morphlines for On-the-Fly ETL
 
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, LucidworksVisualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
Visualize Solr Data with Banana: Presented by Andrew Thanalertvisuti, Lucidworks
 
SolrCloud on Hadoop
SolrCloud on HadoopSolrCloud on Hadoop
SolrCloud on Hadoop
 
Airbnb Search Architecture: Presented by Maxim Charkov, Airbnb
Airbnb Search Architecture: Presented by Maxim Charkov, AirbnbAirbnb Search Architecture: Presented by Maxim Charkov, Airbnb
Airbnb Search Architecture: Presented by Maxim Charkov, Airbnb
 

Similar to Parallel SQL and Analytics with Solr: Presented by Yonik Seeley, Cloudera

ELK Stack - Turn boring logfiles into sexy dashboard
ELK Stack - Turn boring logfiles into sexy dashboardELK Stack - Turn boring logfiles into sexy dashboard
ELK Stack - Turn boring logfiles into sexy dashboard
Georg Sorst
 
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
confluent
 
Designing with malli
Designing with malliDesigning with malli
Designing with malli
Metosin Oy
 
Streaming Solr - Activate 2018 talk
Streaming Solr - Activate 2018 talkStreaming Solr - Activate 2018 talk
Streaming Solr - Activate 2018 talk
Amrit Sarkar
 
Building Analytics Applications with Streaming Expressions in Apache Solr - A...
Building Analytics Applications with Streaming Expressions in Apache Solr - A...Building Analytics Applications with Streaming Expressions in Apache Solr - A...
Building Analytics Applications with Streaming Expressions in Apache Solr - A...
Lucidworks
 
Solr As A SparkSQL DataSource
Solr As A SparkSQL DataSourceSolr As A SparkSQL DataSource
Solr As A SparkSQL DataSource
Spark Summit
 
d3sparql.js demo at SWAT4LS 2014 in Berlin
d3sparql.js demo at SWAT4LS 2014 in Berlind3sparql.js demo at SWAT4LS 2014 in Berlin
d3sparql.js demo at SWAT4LS 2014 in Berlin
Toshiaki Katayama
 
MongoDB 3.0
MongoDB 3.0 MongoDB 3.0
MongoDB 3.0
Victoria Malaya
 
JSLT: JSON querying and transformation
JSLT: JSON querying and transformationJSLT: JSON querying and transformation
JSLT: JSON querying and transformation
Lars Marius Garshol
 
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
DataStax Academy
 
Intravert Server side processing for Cassandra
Intravert Server side processing for CassandraIntravert Server side processing for Cassandra
Intravert Server side processing for Cassandra
Edward Capriolo
 
Elasticsearch in 15 Minutes
Elasticsearch in 15 MinutesElasticsearch in 15 Minutes
Elasticsearch in 15 Minutes
Karel Minarik
 
Back to Basics Webinar 5: Introduction to the Aggregation Framework
Back to Basics Webinar 5: Introduction to the Aggregation FrameworkBack to Basics Webinar 5: Introduction to the Aggregation Framework
Back to Basics Webinar 5: Introduction to the Aggregation Framework
MongoDB
 
Building and Deploying Application to Apache Mesos
Building and Deploying Application to Apache MesosBuilding and Deploying Application to Apache Mesos
Building and Deploying Application to Apache Mesos
Joe Stein
 
Scala for Java Programmers
Scala for Java ProgrammersScala for Java Programmers
Scala for Java Programmers
Eric Pederson
 
Retail referencearchitecture productcatalog
Retail referencearchitecture productcatalogRetail referencearchitecture productcatalog
Retail referencearchitecture productcatalog
MongoDB
 
The openCypher Project - An Open Graph Query Language
The openCypher Project - An Open Graph Query LanguageThe openCypher Project - An Open Graph Query Language
The openCypher Project - An Open Graph Query Language
Neo4j
 
IE9에서 HTML5 개발하기
IE9에서 HTML5 개발하기IE9에서 HTML5 개발하기
IE9에서 HTML5 개발하기
Reagan Hwang
 
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
Codemotion
 
Solr vs. Elasticsearch - Case by Case
Solr vs. Elasticsearch - Case by CaseSolr vs. Elasticsearch - Case by Case
Solr vs. Elasticsearch - Case by Case
Alexandre Rafalovitch
 

Similar to Parallel SQL and Analytics with Solr: Presented by Yonik Seeley, Cloudera (20)

ELK Stack - Turn boring logfiles into sexy dashboard
ELK Stack - Turn boring logfiles into sexy dashboardELK Stack - Turn boring logfiles into sexy dashboard
ELK Stack - Turn boring logfiles into sexy dashboard
 
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
Closing the Loop in Extended Reality with Kafka Streams and Machine Learning ...
 
Designing with malli
Designing with malliDesigning with malli
Designing with malli
 
Streaming Solr - Activate 2018 talk
Streaming Solr - Activate 2018 talkStreaming Solr - Activate 2018 talk
Streaming Solr - Activate 2018 talk
 
Building Analytics Applications with Streaming Expressions in Apache Solr - A...
Building Analytics Applications with Streaming Expressions in Apache Solr - A...Building Analytics Applications with Streaming Expressions in Apache Solr - A...
Building Analytics Applications with Streaming Expressions in Apache Solr - A...
 
Solr As A SparkSQL DataSource
Solr As A SparkSQL DataSourceSolr As A SparkSQL DataSource
Solr As A SparkSQL DataSource
 
d3sparql.js demo at SWAT4LS 2014 in Berlin
d3sparql.js demo at SWAT4LS 2014 in Berlind3sparql.js demo at SWAT4LS 2014 in Berlin
d3sparql.js demo at SWAT4LS 2014 in Berlin
 
MongoDB 3.0
MongoDB 3.0 MongoDB 3.0
MongoDB 3.0
 
JSLT: JSON querying and transformation
JSLT: JSON querying and transformationJSLT: JSON querying and transformation
JSLT: JSON querying and transformation
 
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
NYC* 2013 - "Advanced Data Processing: Beyond Queries and Slices"
 
Intravert Server side processing for Cassandra
Intravert Server side processing for CassandraIntravert Server side processing for Cassandra
Intravert Server side processing for Cassandra
 
Elasticsearch in 15 Minutes
Elasticsearch in 15 MinutesElasticsearch in 15 Minutes
Elasticsearch in 15 Minutes
 
Back to Basics Webinar 5: Introduction to the Aggregation Framework
Back to Basics Webinar 5: Introduction to the Aggregation FrameworkBack to Basics Webinar 5: Introduction to the Aggregation Framework
Back to Basics Webinar 5: Introduction to the Aggregation Framework
 
Building and Deploying Application to Apache Mesos
Building and Deploying Application to Apache MesosBuilding and Deploying Application to Apache Mesos
Building and Deploying Application to Apache Mesos
 
Scala for Java Programmers
Scala for Java ProgrammersScala for Java Programmers
Scala for Java Programmers
 
Retail referencearchitecture productcatalog
Retail referencearchitecture productcatalogRetail referencearchitecture productcatalog
Retail referencearchitecture productcatalog
 
The openCypher Project - An Open Graph Query Language
The openCypher Project - An Open Graph Query LanguageThe openCypher Project - An Open Graph Query Language
The openCypher Project - An Open Graph Query Language
 
IE9에서 HTML5 개발하기
IE9에서 HTML5 개발하기IE9에서 HTML5 개발하기
IE9에서 HTML5 개발하기
 
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
Full-Text Search Explained - Philipp Krenn - Codemotion Rome 2017
 
Solr vs. Elasticsearch - Case by Case
Solr vs. Elasticsearch - Case by CaseSolr vs. Elasticsearch - Case by Case
Solr vs. Elasticsearch - Case by Case
 

More from Lucidworks

Search is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce StrategySearch is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce Strategy
Lucidworks
 
Drive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in SalesforceDrive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in Salesforce
Lucidworks
 
How Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant ProductsHow Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant Products
Lucidworks
 
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product DiscoveryLucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks
 
Connected Experiences Are Personalized Experiences
Connected Experiences Are Personalized ExperiencesConnected Experiences Are Personalized Experiences
Connected Experiences Are Personalized Experiences
Lucidworks
 
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Lucidworks
 
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
Lucidworks
 
Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020
Lucidworks
 
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Lucidworks
 
AI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and RosetteAI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and Rosette
Lucidworks
 
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual MomentThe Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
Lucidworks
 
Webinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - EuropeWebinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - Europe
Lucidworks
 
Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19
Lucidworks
 
Applying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 ResearchApplying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 Research
Lucidworks
 
Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1
Lucidworks
 
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce StrategyWebinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Lucidworks
 
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Lucidworks
 
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision IntelligenceApply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
Lucidworks
 
Webinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise SearchWebinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise Search
Lucidworks
 
Why Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and BeyondWhy Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and Beyond
Lucidworks
 

More from Lucidworks (20)

Search is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce StrategySearch is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce Strategy
 
Drive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in SalesforceDrive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in Salesforce
 
How Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant ProductsHow Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant Products
 
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product DiscoveryLucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
 
Connected Experiences Are Personalized Experiences
Connected Experiences Are Personalized ExperiencesConnected Experiences Are Personalized Experiences
Connected Experiences Are Personalized Experiences
 
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
 
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
 
Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020
 
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
 
AI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and RosetteAI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and Rosette
 
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual MomentThe Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
 
Webinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - EuropeWebinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - Europe
 
Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19
 
Applying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 ResearchApplying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 Research
 
Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1
 
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce StrategyWebinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
 
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
 
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision IntelligenceApply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
 
Webinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise SearchWebinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise Search
 
Why Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and BeyondWhy Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and Beyond
 

Recently uploaded

JMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and GrafanaJMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and Grafana
RTTS
 
How world-class product teams are winning in the AI era by CEO and Founder, P...
How world-class product teams are winning in the AI era by CEO and Founder, P...How world-class product teams are winning in the AI era by CEO and Founder, P...
How world-class product teams are winning in the AI era by CEO and Founder, P...
Product School
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
Jemma Hussein Allen
 
Assuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyesAssuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyes
ThousandEyes
 
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
Product School
 
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
James Anderson
 
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
UiPathCommunity
 
DevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA ConnectDevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA Connect
Kari Kakkonen
 
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
Product School
 
Key Trends Shaping the Future of Infrastructure.pdf
Key Trends Shaping the Future of Infrastructure.pdfKey Trends Shaping the Future of Infrastructure.pdf
Key Trends Shaping the Future of Infrastructure.pdf
Cheryl Hung
 
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptxIOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
Abida Shariff
 
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
Product School
 
When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...
Elena Simperl
 
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
DanBrown980551
 
Essentials of Automations: Optimizing FME Workflows with Parameters
Essentials of Automations: Optimizing FME Workflows with ParametersEssentials of Automations: Optimizing FME Workflows with Parameters
Essentials of Automations: Optimizing FME Workflows with Parameters
Safe Software
 
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdfFIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance
 
GraphRAG is All You need? LLM & Knowledge Graph
GraphRAG is All You need? LLM & Knowledge GraphGraphRAG is All You need? LLM & Knowledge Graph
GraphRAG is All You need? LLM & Knowledge Graph
Guy Korland
 
Leading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdfLeading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdf
OnBoard
 
Search and Society: Reimagining Information Access for Radical Futures
Search and Society: Reimagining Information Access for Radical FuturesSearch and Society: Reimagining Information Access for Radical Futures
Search and Society: Reimagining Information Access for Radical Futures
Bhaskar Mitra
 
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
Tobias Schneck
 

Recently uploaded (20)

JMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and GrafanaJMeter webinar - integration with InfluxDB and Grafana
JMeter webinar - integration with InfluxDB and Grafana
 
How world-class product teams are winning in the AI era by CEO and Founder, P...
How world-class product teams are winning in the AI era by CEO and Founder, P...How world-class product teams are winning in the AI era by CEO and Founder, P...
How world-class product teams are winning in the AI era by CEO and Founder, P...
 
The Future of Platform Engineering
The Future of Platform EngineeringThe Future of Platform Engineering
The Future of Platform Engineering
 
Assuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyesAssuring Contact Center Experiences for Your Customers With ThousandEyes
Assuring Contact Center Experiences for Your Customers With ThousandEyes
 
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
AI for Every Business: Unlocking Your Product's Universal Potential by VP of ...
 
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
GDG Cloud Southlake #33: Boule & Rebala: Effective AppSec in SDLC using Deplo...
 
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
Dev Dives: Train smarter, not harder – active learning and UiPath LLMs for do...
 
DevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA ConnectDevOps and Testing slides at DASA Connect
DevOps and Testing slides at DASA Connect
 
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
From Daily Decisions to Bottom Line: Connecting Product Work to Revenue by VP...
 
Key Trends Shaping the Future of Infrastructure.pdf
Key Trends Shaping the Future of Infrastructure.pdfKey Trends Shaping the Future of Infrastructure.pdf
Key Trends Shaping the Future of Infrastructure.pdf
 
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptxIOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
IOS-PENTESTING-BEGINNERS-PRACTICAL-GUIDE-.pptx
 
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
De-mystifying Zero to One: Design Informed Techniques for Greenfield Innovati...
 
When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...When stars align: studies in data quality, knowledge graphs, and machine lear...
When stars align: studies in data quality, knowledge graphs, and machine lear...
 
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
LF Energy Webinar: Electrical Grid Modelling and Simulation Through PowSyBl -...
 
Essentials of Automations: Optimizing FME Workflows with Parameters
Essentials of Automations: Optimizing FME Workflows with ParametersEssentials of Automations: Optimizing FME Workflows with Parameters
Essentials of Automations: Optimizing FME Workflows with Parameters
 
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdfFIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
FIDO Alliance Osaka Seminar: The WebAuthn API and Discoverable Credentials.pdf
 
GraphRAG is All You need? LLM & Knowledge Graph
GraphRAG is All You need? LLM & Knowledge GraphGraphRAG is All You need? LLM & Knowledge Graph
GraphRAG is All You need? LLM & Knowledge Graph
 
Leading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdfLeading Change strategies and insights for effective change management pdf 1.pdf
Leading Change strategies and insights for effective change management pdf 1.pdf
 
Search and Society: Reimagining Information Access for Radical Futures
Search and Society: Reimagining Information Access for Radical FuturesSearch and Society: Reimagining Information Access for Radical Futures
Search and Society: Reimagining Information Access for Radical Futures
 
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
Kubernetes & AI - Beauty and the Beast !?! @KCD Istanbul 2024
 

Parallel SQL and Analytics with Solr: Presented by Yonik Seeley, Cloudera

  • 1. 1  ©  Cloudera,  Inc.  All  rights  reserved.   Parallel  SQL  and  Analy.cs  with  Solr   Yonik  Seeley   Cloudera  
  • 2. 2  ©  Cloudera,  Inc.  All  rights  reserved.   My  Background   • Creator  of  Solr   • Cloudera  Engineer     • LucidWorks  Co-­‐Founder   • Lucene/Solr  commiFer,  PMC  member   • Apache  SoJware  FoundaLon  member   • M.S.  in  Computer  Science,  Stanford  
  • 3. 3  ©  Cloudera,  Inc.  All  rights  reserved.   What  is  Apache  Solr   •  Search  server   • like  a  database,  but  different  indexing  technology  (Apache  Lucene)   • opLmized  for  interacLve  results   •  Columns  (aka  docValues)  for  fast  scans   •  HighlighLng   •  FaceLng  (category  counts)   •  SpaLal  search   •  Powers  search  for  the  leading  Hadoop  Big  Data  vendors      
  • 4. 4  ©  Cloudera,  Inc.  All  rights  reserved.   ParLal  Solr  Architecture   Lucene   Streaming  Expressions   Parallel  SQL   Distributed  Search   Facets  &   StaLsLcs   Solr  Request   Framework   JSON  Facet   API   Green  blocks   are  newer   addiLons  
  • 5. 5  ©  Cloudera,  Inc.  All  rights  reserved.   Different  ways  to  calculate  things  in  Solr   •  Faceted  search  v1  /  stats  module    facet=true&facet.field=color&facet.limit=5   •  JSON  Facet  API  (faceted  search  v2)        {colors:{type:terms,  field:color,  limit:5}}   •  Streaming  expressions        rollup(search(techproducts,q="*:*",fl="id,color",   sort="color  asc"),  over="color",  count(*))     •  Parallel  SQL    select  count(*)  from  techproducts  where  _text_='(*:*)'  group   by  color"  
  • 6. 6  ©  Cloudera,  Inc.  All  rights  reserved.   JSON  Facet  API  
  • 7. 7  ©  Cloudera,  Inc.  All  rights  reserved.   Faceted  Search   •  Breaks  search  results  into   buckets   •  Generally  provides  bucket   counts   •  Allows  user  to  filter  /  "drill   into"  results  
  • 8. 8  ©  Cloudera,  Inc.  All  rights  reserved.   FaceLng   Search   StaLsLcs   Facet  Module  Goals   Search   Joins   Grouping   Field   Collapsing   New  Facet  Module   JSON  Facet  API   •  IntegraLon   •  Performance   •  Ease  of  use   HighlighLng   Nested   Documents   Geosearch  
  • 9. 9  ©  Cloudera,  Inc.  All  rights  reserved.   Simple  JSON  Facet  request  and  response   curl  http://localhost:8983/solr/query  -­‐d  '   q=widgets&   json.facet=   {      x  :  "avg(price)"  ,        y  :  "unique(brand)"   }   '     […]   "facets"  :  {      "count"  :  314,      "x"  :  102.5,      "y"  :  28   }   root  domain  defined  by  docs   matching  the  query   count  of  docs  in  the  bucket  
  • 10. 10  ©  Cloudera,  Inc.  All  rights  reserved.   Terms  facet  example    json.facet={        shoes  :  {            type  :  terms,              field  :  shoe_style,            sort  :  {x  :  desc},            facet  :  {                x  :  "avg(price)",                y  :  "unique(brand)"            }        }    }   "facets":  {      "count"  :  472,      "shoes":  {          "buckets"  :  [              {                  "val"  :  "Hiking",                  "count"  :  34,                  "x"  :  135.25,                  "y"  :  17,              },              {                  "val"  :  "Running",                  "count"  :  45,                  "x"  :  110.75,                  "y"  :  24,              },     Calculated  per-­‐bucket   Sort  by  any  stat!  
  • 11. 11  ©  Cloudera,  Inc.  All  rights  reserved.   Sub-­‐facet  example    json.facet={        shoes:{            type  :  terms,            field  :  shoe_style,            sort  :  {x  :  desc},            facet  :  {                x  :  "avg(price)",                y  :  "unique(brand)",                colors  :  {                    type  :  terms,                    field  :  color                }              }        }    }   "facets":  {      "count"  :  472,      "shoes":  {          "buckets"  :  [              {                  "val"  :  "Hiking",                  "count"  :  34,                  "x"  :  135.25,                  "y"  :  17,                  "colors"  :  {                      "buckets"  :  [                          {  "val"  :  "brown",                              "count"  :  12  },                          {  "val"  :  "black",                              "count"  :  10                          },  […]                      ]                  }  //  end  of  colors  sub-­‐facet              },  //  end  of  Hiking  bucket              {                  "val"  :  "Running",                  "count"  :  45,                  "x"  :  110.75,                  "y"  :  24,                  "colors"  :  {                      "buckets"  :  […]  
  • 12. 12  ©  Cloudera,  Inc.  All  rights  reserved.   Facet  Types   •  Terms  Facet   • Creates  new  domains  (facet  buckets)  based  on  values  in  a  field   •  Range  Facet   • Creates  mulLple  buckets  based  on  date  ranges  or  numeric  ranges   •  Query  Facet   • Creates  a  single  bucket  of  documents  that  match  any  given  query   •  Unlimited  nesLng:  Any  facet  types  may  have  any  number  of  sub-­‐facets   •  MulL-­‐select  faceLng  (filter  exclusion)   •  Nested  documents  (block  join)  
  • 13. 13  ©  Cloudera,  Inc.  All  rights  reserved.   Streaming  Expressions  
  • 14. 14  ©  Cloudera,  Inc.  All  rights  reserved.   Solr  Streaming  Expressions   • Generic  plalorm  for  distributed  computaLon   • The  basis  for  implemenLng  distributed  parallel  SQL   • relaLonal  operaLons  on  streams   • Works  across  enLre  result  sets  (or  subsets)   • normal  search  operaLons  are  designed  for  fast  top-­‐N  operaLons   • Map-­‐reduce  like  "shuffle"  parLLons  result  sets  for  greater  scalability   • Worker  nodes  can  be  allocated  from  a  collecLon  for  parallelism   • Incorporates  streams  from  non-­‐Solr  systems  
  • 15. 15  ©  Cloudera,  Inc.  All  rights  reserved.   search()  expression   $  curl  hFp://localhost:8983/solr/techproducts/stream  -­‐d   'expr=search(techproducts,  q="*:*",  fl="id,price,score",  sort="id  asc")'     {"result-­‐set":{"docs":[   {"score":1.0,"id":"0579B002","price":179.99},   {"score":1.0,"id":"100-­‐435805","price":649.99},   {"score":1.0,"id":"3007WFP","price":2199.0},   {"score":1.0,"id":"VDBDB1A16"},   {"score":1.0,"id":"VS1GB400C3","price":74.99},   {"EOF":true,"RESPONSE_TIME":6}]}}     resulLng  tuple  stream  
  • 16. 16  ©  Cloudera,  Inc.  All  rights  reserved.   Search  Tuple  Stream   Shard  1   Replica  2   Shard  1   Replica  1   Shard  1   Replica  2   Shard  2   Replica  1   Shard  1   Replica  2   Shard  3   Replica  1   Worker   Tuple  Stream   Tuple  Stream   /stream  worker   execuLng  the  "search"   expression   •  search()  is  a  stream  source   •  Fully  SolrCloud  aware  (knows  cluster  layout)   •  Fully  streaming  (no  big  buffers)  
  • 17. 17  ©  Cloudera,  Inc.  All  rights  reserved.   search  expression  args   search(              //  parses  to  CloudSolrStream  java  class    techproducts,        //  name  of  the  collecLon  to  search    zkHost="localhost:9983",  //  (opt)  zookeeper  address  of  collecLon  to  search    qt="/select",          //  (opt)  the  request  handler  to  use  (/export  is  also  available)    rows=1000000,            //  (opt)  number  of  rows  to  retrieve      q=*:*,              //  query  to  match  returned  documents    fl="id,price,score",      //  which  fields  to  return    sort="id  asc,  price  desc",  //  how  to  sort  the  results              aliases="id=myid,price=myprice"    //  (opt)  renames  output  fields   )  
  • 18. 18  ©  Cloudera,  Inc.  All  rights  reserved.   rollup()  expression   •  Groups  tuples  by  common  field  values   •  Emits  rollup  value  along  with  metrics   •  Closest  equivalent  to  face.ng   rollup(                                                                                          search(collecLon1,  qt="/export"                              q="*:*",                            fl="id,manu,price",                              sort="manu  asc"),                over="manu"),                count(*),                max(price)   )   metrics   {"result-­‐set":{"docs":[   {"manu":"apple","count(*)":1.0},   {"manu":"asus","count(*)":1.0},   {"manu":"aL","count(*)":1.0},   {"manu":"belkin","count(*)":2.0},   {"manu":"canon","count(*)":2.0},   {"manu":"corsair","count(*)":3.0},   [...]  
  • 19. 19  ©  Cloudera,  Inc.  All  rights  reserved.   Parallel  Tuple  Stream   Shard  1   Replica  2   Shard  1   Replica  1   Shard  1   Replica  2   Shard  2   Replica  1   Shard  1   Replica  2   Shard  3   Replica  1   Worker   ParLLon  1   Worker   ParLLon  2   Worker   Tuple  Stream  
  • 20. 20  ©  Cloudera,  Inc.  All  rights  reserved.   Streaming  Expressions  –  parallel   •  Wraps  a  stream  and  sends  to  N  worker   nodes   •  The  first  parameter  is  the  collec.on  to   use  for  the  intermediate  worker  nodes   •  par..onKeys  must  be  provided  to   underlying  workers   • usually  makes  sense  to  par..on  by   what  you  are  grouping  on   •  inner  and  outer  sorts  should  match   parallel(collecLon1,                    rollup(                                search(techproducts,                                            q="*:*",                                            fl="id,manu,price",                                            sort="manu  asc",                                                            parLLonKeys="manu"),                                over="manu  asc"),                    workers=2,                    zkHost="localhost:9983",                    sort="manu  asc")  
  • 21. 21  ©  Cloudera,  Inc.  All  rights  reserved.   Distributed  Joins!   innerJoin(          search(people,  q=*:*,  fl="personId,name",  sort="personId  asc"),          search(pets,  q=type:cat,  fl="personId,petName",  sort="personId  asc"),          on="personId"   )     Also:  leJOuterJoin,  hashJoin,  outerHashJoin,  
  • 22. 22  ©  Cloudera,  Inc.  All  rights  reserved.   More  stream  decorators   •  complement  –  emits  tuples  from  A  which  do  not  exist  in  B   •  intersect  –  emits  tuples  from  A  whish  do  exist  in  B   •  merge   •  reduce   •  sort   •  top  –  reorders  the  stream  and  returns  the  top  N  tuples   •  unique  –  emits  only  the  first  tuple  for  each  value   •  select  –  select,  rename,  or  give  default  values  to  fields  in  a  tuple     hFps://cwiki.apache.org/confluence/display/solr/Streaming+Expressions    
  • 23. 23  ©  Cloudera,  Inc.  All  rights  reserved.   jdbc()  expression  stream   join  with  other  data  sources!   innerJoin(      select(        search(collecLon1,  [...]),                personId_i  as  personId,        raLng_f  as  raLng    ),      select(        jdbc(connecLon="jdbc:hsqldb:mem:.",  sql="select  PEOPLE.ID  as   PERSONID,  PEOPLE.NAME,  COUNTRIES.COUNTRY_NAME  from  PEOPLE  inner  join   COUNTRIES  on  PEOPLE.COUNTRY_CODE  =  COUNTRIES.CODE  order  by  PEOPLE.ID",   sort="ID  asc",  get_column_name=true),                ID  as  personId,        NAME  as  personName,        COUNTRY_NAME  as  country    ),                on="personId"   )  
  • 24. 24  ©  Cloudera,  Inc.  All  rights  reserved.   Parallel  SQL  
  • 25. 25  ©  Cloudera,  Inc.  All  rights  reserved.   /sql  Handler   Why  SQL?   • External  integraLons   • Higher  level  language  –  says  what  we  want,  not  how  to  get  it   • SQL  has  made  a  comeback  along  with  big  data,  more  ubiquitous  than  ever   •  /sql  REST  endpoint  by  default  on  all  solr  nodes   •  Translates  SQL  -­‐>  parallel  streaming  expressions   •  SQL  tables  map  to  SolrCloud  collecLons   •  Currently  uses  Presto  SQL  parser   • Switch  to  Apache  Calcite  parser  in  the  works  
  • 26. 26  ©  Cloudera,  Inc.  All  rights  reserved.  
  • 27. 27  ©  Cloudera,  Inc.  All  rights  reserved.   Simplest  SQL  Example   $  curl  hFp://localhost:8983/solr/techproducts/sql  -­‐d  "stmt=select  id  from  techproducts"     {"result-­‐set":{"docs":[   {"id":"EN7800GTX/2DHTV/256M"},   {"id":"100-­‐435805"},   {"id":"UTF8TEST"},   {"id":"SOLR1000"},   {"id":"9885A004"},   [...]   tables  map  to   collecLons  
  • 28. 28  ©  Cloudera,  Inc.  All  rights  reserved.   SQL  handler  HTTP  parameters   curl  hFp://localhost:8983/solr/techproducts/sql  -­‐d  '   &stmt=<sql_statement>   &numWorkers=4    //  currently  used  by  GROUP  BY  and  DISTINCT  (via  parallel  stream)   &workerCollecLon=collecLon1    //  where  to  create  intermediate  workers   &workerZkhost=localhost:9983    //  cluster  (zookeeper  ensemble)  address   &aggregaLonMode=map_reduce  |  facet  
  • 29. 29  ©  Cloudera,  Inc.  All  rights  reserved.   The  WHERE  clause   •  WHERE  clauses  are  all  pushed  down  to  the  search  layer     select  id      where  popularity=10      //  simple  match  on  numeric  field  "popularity"      where  popularity='[5  TO  10]'      //  solr  range  query  (note  the  quotes)      where  name='hard  drive'      //  phrase  query  on  the  "name"  field      where  name='((memory  retail)  AND  popularity:[5  TO  10])'    //  arbitrary  solr  query      where  name='(memory  retail)'  AND  popularity='[5  TO  10]'  //  boolean  logic      
  • 30. 30  ©  Cloudera,  Inc.  All  rights  reserved.   Ordering  and  LimiLng   select  id,score  from  techproducts            where  text='(memory  hard  drive)'            ORDER  BY  popularity  desc                                //  default  order  is  score  desc  for  limited  queries          LIMIT  100     •  Limited  queries  use  /select  handler   •  Unlimited  queries  use  /export  handler   • fields  selected  need  to  be  docValues   • fields  in  "order  by"  need  to  be  docValues   • no  "score"  field  allowed  
  • 31. 31  ©  Cloudera,  Inc.  All  rights  reserved.   More  SQL  examples   select  disLnct  fieldA  as  fa,  fieldB  as  ‚  from  tableA  order  by  fa  desc,  ‚  desc     //  simple  stats     select  count(fieldA)  as  count,  sum(fieldB)  as  sum  from  tableA  where  fieldC  =  'Hello'     select  fieldA,  fieldB,  count(*),  sum(fieldC),  avg(fieldY)  from  tableA            where  fieldC  =  'term1  term2'            group  by  fieldA,  fieldB            having  ((sum(fieldC)  >  1000)  AND  (avg(fieldY)  <=  10))          order  by  sum(fieldC)  asc      
  • 32. 32  ©  Cloudera,  Inc.  All  rights  reserved.   Solr  JDBC  Driver  
  • 33. 33  ©  Cloudera,  Inc.  All  rights  reserved.   Solr  JDBC  driver  works  with  Apache  Zeppelin  
  • 34. 34  ©  Cloudera,  Inc.  All  rights  reserved.   Graph  Traversal  
  • 35. 35  ©  Cloudera,  Inc.  All  rights  reserved.   Graph  Filter   •  Follows  ad  hoc  edges   •  Not  distributed!   • still  useable  on   partitioned  data   •  Can  filter  on  each  hop   •  Can  specify  max  depth   •  Cycle  detection     fq={!graph  from=parents  to=id} id:"Philip  J.  Fry"   id  :  "Philip  J.  Fry"   parents:["Yancy  Fry,  Sr.","Mrs.  Fry"]   id  :  "Yancy  Fry"   parents:["Yancy  Fry,  Sr.","Mrs.  Fry"]   id  :  "Yancy  Fry,  Sr."   parents:["Mildred,  "Philip  J.  Fry"]   id  :  "Mrs.  Fry"   parents:["Mr.  Gleisner",    "Mrs.  Gleisner"]   id  :  "Mildred"   id  :  "Hubert  J.     Farnsworth"   id  :  "Philip  J.  Fry"   parents:["Yancy  Fry,  Sr.","Mrs.  Fry"]   Cycle!  
  • 36. 36  ©  Cloudera,  Inc.  All  rights  reserved.   Graph  streaming  expressions   •  Breadth-­‐first  graph  traversals   •  Fully  integrated  with  streaming,  fully  distributed   •  Traverse  across  collecLons  as  well  as  shards   •  Compute  aggregaLons     curl  http://localhost:8983/solr/emails/stream  –d  '      expr=gatherNodes(emails,                                        walk="johndoe@apache.org-­‐>from",                                        gather="to")   '  
  • 37. 37  ©  Cloudera,  Inc.  All  rights  reserved.   Graph  streaming  expressions  example   •  Index  some  books  in  one  collecLon   curl  http://localhost:8983/solr/books/update  -­‐H  'Content-­‐type:text/csv'  -­‐d  '   id,cat,pubyear_i,title,author,series_s,sequence_i   book1,fantasy,2000,A  Storm  of  Swords,George  R.R.  Martin,A  Song  of  Ice  and  Fire,3   book2,fantasy,2005,A  Feast  for  Crows,George  R.R.  Martin,A  Song  of  Ice  and  Fire,4   book3,fantasy,2011,A  Dance  with  Dragons,George  R.R.  Martin,A  Song  of  Ice  and  Fire,5   book4,sci-­‐fi,1987,Consider  Phlebas,Iain  M.  Banks,The  Culture,1   book5,sci-­‐fi,1988,The  Player  of  Games,Iain  M.  Banks,The  Culture,2   book6,sci-­‐fi,1990,Use  of  Weapons,Iain  M.  Banks,The  Culture,3   book7,fantasy,1984,Shadows  Linger,Glen  Cook,The  Black  Company,2   book8,fantasy,1984,The  White  Rose,Glen  Cook,The  Black  Company,3   book9,fantasy,1989,Shadow  Games,Glen  Cook,The  Black  Company,4   book10,sci-­‐fi,2001,Gridlinked,Neal  Asher,Ian  Cormac,1   book11,sci-­‐fi,2003,The  Line  of  Polity,Neal  Asher,Ian  Cormac,2   book12,sci-­‐fi,2005,Brass  Man,Neal  Asher,Ian  Cormac,3   '  
  • 38. 38  ©  Cloudera,  Inc.  All  rights  reserved.   Graph  streaming  expressions  example   •  Index  some  book  reviews  into  another  collecLon   curl  http://localhost:8983/solr/reviews/update-­‐H  'Content-­‐type:text/csv'  -­‐d  '   id,book_s,user_s,rating_i,review_t   book1_r1,book1,Yonik,5,awesome  book!   book1_r2,book1,Aarav,2,too  bloody   book1_r3,book1,Haruka,5,awesome  world  building   book2_r1,book2,Yonik,5,another  great  one   book2_r2,book2,Maria,5,wow!   book4_r1,book4,Yonik,2,i  am  lying...  actually  liked  it   book4_r2,book4,Aarav,5,Loved  it   book7_r1,book7,Yonik,4,read  back  in  college  but  it  was  good   book10_r1,book10,Maria,5,I  want  a  gridlink!   book11_r1,book11,Maria,1,Blech   book11_r2,book11,Aarav,4,is  this  the  first  book?   book12_r1,book12,Yonik,5,Mr.  Crane  is  scary...   '   1.  Find  books  I  like   2.  Find  who  else  rated   those  books  highly   3.  Find  other  books   they  rated  highly   4.  Profit!  
  • 39. 39  ©  Cloudera,  Inc.  All  rights  reserved.   1.  Search  expression  to  find  my  high  raLngs   URL="http://localhost:8983/solr/reviews/stream"     #  Use  search  expression  to  find  reviews  that  I  have  the  book  a  "5"   curl  $URL  -­‐d  'expr=search(reviews,  q="user_s:Yonik  AND  rating_i:5",   fl="id,book_s,user_s,rating_i",  sort="user_s  asc")'       {"result-­‐set":{"docs":[   {"raLng_i":5,"id":"book2_r1","user_s":"Yonik","book_s":"book2"},   {"raLng_i":5,"id":"book1_r1","user_s":"Yonik","book_s":"book1"},   {"raLng_i":5,"id":"book12_r1","user_s":"Yonik","book_s":"book12"},   {"EOF":true,"RESPONSE_TIME":4}]}}  
  • 40. 40  ©  Cloudera,  Inc.  All  rights  reserved.   2.  gatherNodes  expression  to  find  users   curl  $URL  -­‐d  'expr=gatherNodes(reviews,        search(reviews,  q="user_s:Yonik  AND  rating_i:5",                      fl="book_s,user_s,rating_i",sort="user_s  asc"),        walk="book_s-­‐>book_s",        gather="user_s",        fq="rating_i:[4  TO  *]  -­‐user_s:Yonik",        trackTraversal=true  )'       {"result-­‐set":{"docs":[   {"node":"Haruka","collecLon":"reviews","field":"user_s","ancestors":["book1"],"level":1},   {"node":"Maria","collecLon":"reviews","field":"user_s","ancestors":["book2"],"level":1},   {"EOF":true,"RESPONSE_TIME":22}]}}   "gather"  values  
  • 41. 41  ©  Cloudera,  Inc.  All  rights  reserved.   3.  gatherNodes  to  find  high  raLngs  by  those  users     curl  $URL  -­‐d  'expr=gatherNodes(reviews,          gatherNodes(reviews,  search(reviews,q="user_s:Yonik  AND  rating_i: 5",fl="id,book_s,user_s,rating_i",sort="user_s  asc"),  walk="book_s-­‐>book_s",   gather="user_s",fq="rating_i:[4  TO  *]  -­‐user_s:Yonik"),          walk="node-­‐>user_s",  gather="book_s",  fq="rating_i:[4  TO  *]",          avg(rating_i),          trackTraversal=true)'         {"result-­‐set":{"docs":[   {"node":"book10","avg(raLng_i)":5.0,"field":"book_s","level": 2,"collecLon":"reviews","ancestors":["Maria"]},   {"EOF":true,"RESPONSE_TIME":65}]}}  
  • 42. 42  ©  Cloudera,  Inc.  All  rights  reserved.   Retrieving  complete  traversal   curl  $URL  -­‐d  'expr=gatherNodes(reviews,  [...],  scaFer="branches,leaves")'         {"result-­‐set":{"docs":[   {"node":"book12","collecLon":"reviews","field":"book_s","level":0},   {"node":"book1","collecLon":"reviews","field":"book_s","level":0},   {"node":"book2","collecLon":"reviews","field":"book_s","level":0},   {"node":"Haruka","collecLon":"reviews","field":"user_s","level":1},   {"node":"Maria","collecLon":"reviews","field":"user_s","level":1},   {"node":"book10","avg(raLng_i)":5.0,"field":"book_s","level":2,   "collecLon":"reviews","ancestors":["Maria"]},   {"EOF":true,"RESPONSE_TIME":111}]}}  
  • 43. 43  ©  Cloudera,  Inc.  All  rights  reserved.   Solr  admin  stream  view  
  • 44. 44  ©  Cloudera,  Inc.  All  rights  reserved.   More  graph  expressions   •  shortestPath   • Finds  the  shortest  path  between  "from"  and  "to"     •  scoreNodes  :  l-­‐idf  inspired  scoring   • wraps  a  gatherNodes  expression  that  finds  the  co-­‐occurrence  count   • l  factor  –  the  co-­‐occurrence  count   • idf  factor  –  boosts  nodes  that  are  rarer  overall  
  • 45. 45  ©  Cloudera,  Inc.  All  rights  reserved.   Network  analysis  and  visualizaLon   curl  http://localhost:8983/solr/reviews/graph  -­‐d  'expr=gatherNodes(reviews,  [...],   scaFer="branches,leaves")'         <?xml  version="1.0"  encoding="UTF-­‐8"?>   <graphml  xmlns="hFp://graphml.graphdrawing.org/xmlns"     xmlns:xsi="hFp://www.w3.org/2001/XMLSchema-­‐instance"     xsi:schemaLocaLon="hFp://graphml.graphdrawing.org/xmlns  hFp://graphml.graphdrawing.org/xmlns/1.0/ graphml.xsd">   <graph  id="G"  edgedefault="directed">   <node  id="book12">      <data  key="field">book_s</data>      <data  key="level">0</data>   </node>   <node  id="book1">      <data  key="field">book_s</data>   [...]  
  • 46. 46  ©  Cloudera,  Inc.  All  rights  reserved.  
  • 47. 47  ©  Cloudera,  Inc.  All  rights  reserved.   Streaming  Expressions  vs  JSON  Facets  
  • 48. 48  ©  Cloudera,  Inc.  All  rights  reserved.   JSON  Facet  API   •  More  focused  on  web-­‐scale  interacLve   responses   •  Tighter  integraLon   • ULlizes  exisLng  distributed  search   framework  /  just  another  search   component   • single  request-­‐response  top-­‐N,   grouping,  highlighLng,  faceLng,  etc.   • block  join  /  nested  document   support   •  More  expressive?   Streaming  Expressions   •  More  general  purpose,  larger  scope   • wrap  streams  within  streams  to  do   preFy  much  anything   • not  Led  to  documents  (analyLcs  across   joins  w/  external  DBs)   • update  streams,  machine  learning   streams,  etc.   •  Exact  results  (e.g.  cardinality)   •  distributed  joins,  graph   •  Increasingly  will  use  JSON  Facet  API  to   push  work  to  leaves  
  • 49. 49  ©  Cloudera,  Inc.  All  rights  reserved.   Thank  you   yonik@cloudera.com