SlideShare a Scribd company logo
1 of 22
Speeding up Lucene using
GPUs
Ishan Chattopadhyaya
Committer & PMC Member, Apache Lucene/Solr
Kishore Angani
Senior Search Architect, Unbxd
@ichattopadhyaya @kishore_angani
#Activate18 #ActivateSearch
Agenda
• Brief History of Computers
• GPGPU computing
• Inverted Index
• Searching on GPU
 Spatial Search
 BM25
 Advanced Scoring
 Faceting
• Work ahead
Brief History of Computers
What is GPGPU computing?
What is an Inverted Index?
Search on GPU
 Spatial Search
 Native scoring (BM25)
 Faceting
 Advanced Scoring
LUCENE-7745
Spatial Search
Spatial Search: Code
int *docs;
float *lats, *lngs;
long N;
device_vector<int> gpu_docs;
device_vector<float> gpu_lats, gpu_lngs;
JNIEXPORT jint JNICALL Java_GpuGeoDist_initIndex(JNIEnv *env, jobject obj, jintArray docIdsArray,
jfloatArray latsArray, jfloatArray lngsArray) {
N = env->GetArrayLength(docIdsArray);
docs = env->GetIntArrayElements(docIdsArray, NULL);
lats = env->GetFloatArrayElements(latsArray, NULL);
lngs = env->GetFloatArrayElements(lngsArray, NULL);
// Copy the vectors to the device
host_vector<int> cpu_docs(N);
for (int i=0; i<N; i++) cpu_docs[i] = docs[i];
gpu_docs = cpu_docs;
host_vector<float> cpu_lats(N);
for (int i=0; i<N; i++) cpu_lats[i] = lats[i];
gpu_lats = cpu_lats;
host_vector<float> cpu_lngs(N);
for (int i=0; i<N; i++) cpu_lngs[i] = lngs[i];
gpu_lngs = cpu_lngs;
}
Spatial Search: Code
JNIEXPORT jobject JNICALL Java_GpuGeoDist_findNearest(JNIEnv *env, jobject obj, jfloat lat, jfloat lng)
{
// Actual scoring/sorting on device
device_vector<float> gpu_distances(N);
transform(gpu_lats.begin(), gpu_lats.end(), gpu_lngs.begin(), gpu_distances.begin(), geodist(lat, lng));
device_vector<int> docIds = gpu_docs;
thrust::sort_by_key(gpu_distances.begin(), gpu_distances.end(), docIds.begin());
int *docs = (int*)malloc(N*4*2);
float *distances = &((float*)docs)[N];
thrust::copy(docIds.begin(), docIds.end(), docs);
thrust::copy(gpu_distances.begin(), gpu_distances.end(), distances);
jobject directBuffer = env->NewDirectByteBuffer((void*)docs, N*2*4);
return directBuffer;
}
struct geodist {
const float refX,refY;
geodist(float _x, float _y): refX(_x), refY(_y) {}
__host__ __device__ float operator()(float &x, float &y) const {
return sqrt((x - refX) * (x-refX) + (y-refY)*(y-refY));
}
};
Spatial Search: Benchmarks
Native Scoring (BM25): Formula
Native Scoring (BM25): Initialization
 Postings
 Norms
 Inverted Document Frequencies
Native Scoring (BM25): Query
processing
 Term dictionary lookup
 Scattering term rows
 Computing term scores
 Sorting by scores
Native Scoring (BM25): Code
Native Scoring (BM25): Benchmarks
Advanced Scoring
Faceting
Work ahead
Thank you!
Ishan Chattopadhyaya
Committer & PMC Member, Apache Lucene/Solr
Kishore Angani
Senior Search Architect, Unbxd
@ichattopadhyaya @kishore_angani
#Activate18 #ActivateSearch
Thank you!
Ishan Chattopadhyaya
Committer & PMC Member, Apache Lucene/Solr
Kishore Angani
Senior Search Architect, Unbxd
@ichattopadhyaya @kishore_angani
#Activate18 #ActivateSearch
Speeding Up Lucene with GPUs - Ishan Chattopadhyaya, Unbxd

More Related Content

Similar to Speeding Up Lucene with GPUs - Ishan Chattopadhyaya, Unbxd

20210301_PGconf_Online_GPU_PostGIS_GiST_Index
20210301_PGconf_Online_GPU_PostGIS_GiST_Index20210301_PGconf_Online_GPU_PostGIS_GiST_Index
20210301_PGconf_Online_GPU_PostGIS_GiST_IndexKohei KaiGai
 
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...Citus Data
 
20201128_OSC_Fukuoka_Online_GPUPostGIS
20201128_OSC_Fukuoka_Online_GPUPostGIS20201128_OSC_Fukuoka_Online_GPUPostGIS
20201128_OSC_Fukuoka_Online_GPUPostGISKohei KaiGai
 
20150318-SFPUG-Meetup-PGStrom
20150318-SFPUG-Meetup-PGStrom20150318-SFPUG-Meetup-PGStrom
20150318-SFPUG-Meetup-PGStromKohei KaiGai
 
Apache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingApache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingSpark Summit
 
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUs
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUsScaling out Tensorflow-as-a-Service on Spark and Commodity GPUs
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUsJim Dowling
 
Apache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingApache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingSpark Summit
 
Geospatial data platform at Uber
Geospatial data platform at UberGeospatial data platform at Uber
Geospatial data platform at UberDataWorks Summit
 
GPU enablement for data science on OpenShift | DevNation Tech Talk
GPU enablement for data science on OpenShift | DevNation Tech TalkGPU enablement for data science on OpenShift | DevNation Tech Talk
GPU enablement for data science on OpenShift | DevNation Tech TalkRed Hat Developers
 
Uber Geo spatial data platform at DataWorks Summit
Uber Geo spatial data platform at DataWorks SummitUber Geo spatial data platform at DataWorks Summit
Uber Geo spatial data platform at DataWorks SummitZhenxiao Luo
 
[Serverless OpenHack Tokyo] Azure Serverless (English)
[Serverless OpenHack Tokyo] Azure Serverless (English)[Serverless OpenHack Tokyo] Azure Serverless (English)
[Serverless OpenHack Tokyo] Azure Serverless (English)Naoki (Neo) SATO
 
GPU-Accelerating UDFs in PySpark with Numba and PyGDF
GPU-Accelerating UDFs in PySpark with Numba and PyGDFGPU-Accelerating UDFs in PySpark with Numba and PyGDF
GPU-Accelerating UDFs in PySpark with Numba and PyGDFKeith Kraus
 
Presto GeoSpatial @ Strata New York 2017
Presto GeoSpatial @ Strata New York 2017Presto GeoSpatial @ Strata New York 2017
Presto GeoSpatial @ Strata New York 2017Zhenxiao Luo
 
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~Kohei KaiGai
 
LTから入門するPython開発環境 #PyLadiesTokyo
LTから入門するPython開発環境 #PyLadiesTokyoLTから入門するPython開発環境 #PyLadiesTokyo
LTから入門するPython開発環境 #PyLadiesTokyoHidenori Matsuki
 
The GPS Architecture on Android
The GPS Architecture on AndroidThe GPS Architecture on Android
The GPS Architecture on AndroidPing-Chin Huang
 
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...Anne Nicolas
 

Similar to Speeding Up Lucene with GPUs - Ishan Chattopadhyaya, Unbxd (20)

20210301_PGconf_Online_GPU_PostGIS_GiST_Index
20210301_PGconf_Online_GPU_PostGIS_GiST_Index20210301_PGconf_Online_GPU_PostGIS_GiST_Index
20210301_PGconf_Online_GPU_PostGIS_GiST_Index
 
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...
Around the world with extensions | PostgreSQL Conference Europe 2018 | Craig ...
 
Programar para GPUs
Programar para GPUsProgramar para GPUs
Programar para GPUs
 
20201128_OSC_Fukuoka_Online_GPUPostGIS
20201128_OSC_Fukuoka_Online_GPUPostGIS20201128_OSC_Fukuoka_Online_GPUPostGIS
20201128_OSC_Fukuoka_Online_GPUPostGIS
 
NVIDIA CUDA
NVIDIA CUDANVIDIA CUDA
NVIDIA CUDA
 
20150318-SFPUG-Meetup-PGStrom
20150318-SFPUG-Meetup-PGStrom20150318-SFPUG-Meetup-PGStrom
20150318-SFPUG-Meetup-PGStrom
 
Apache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingApache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim Dowling
 
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUs
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUsScaling out Tensorflow-as-a-Service on Spark and Commodity GPUs
Scaling out Tensorflow-as-a-Service on Spark and Commodity GPUs
 
Apache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim DowlingApache Spark and Tensorflow as a Service with Jim Dowling
Apache Spark and Tensorflow as a Service with Jim Dowling
 
Geospatial data platform at Uber
Geospatial data platform at UberGeospatial data platform at Uber
Geospatial data platform at Uber
 
GPU enablement for data science on OpenShift | DevNation Tech Talk
GPU enablement for data science on OpenShift | DevNation Tech TalkGPU enablement for data science on OpenShift | DevNation Tech Talk
GPU enablement for data science on OpenShift | DevNation Tech Talk
 
Uber Geo spatial data platform at DataWorks Summit
Uber Geo spatial data platform at DataWorks SummitUber Geo spatial data platform at DataWorks Summit
Uber Geo spatial data platform at DataWorks Summit
 
[Serverless OpenHack Tokyo] Azure Serverless (English)
[Serverless OpenHack Tokyo] Azure Serverless (English)[Serverless OpenHack Tokyo] Azure Serverless (English)
[Serverless OpenHack Tokyo] Azure Serverless (English)
 
GPU-Accelerating UDFs in PySpark with Numba and PyGDF
GPU-Accelerating UDFs in PySpark with Numba and PyGDFGPU-Accelerating UDFs in PySpark with Numba and PyGDF
GPU-Accelerating UDFs in PySpark with Numba and PyGDF
 
Presto GeoSpatial @ Strata New York 2017
Presto GeoSpatial @ Strata New York 2017Presto GeoSpatial @ Strata New York 2017
Presto GeoSpatial @ Strata New York 2017
 
Exploiting GPUs in Spark
Exploiting GPUs in SparkExploiting GPUs in Spark
Exploiting GPUs in Spark
 
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~
GPGPU Accelerates PostgreSQL ~Unlock the power of multi-thousand cores~
 
LTから入門するPython開発環境 #PyLadiesTokyo
LTから入門するPython開発環境 #PyLadiesTokyoLTから入門するPython開発環境 #PyLadiesTokyo
LTから入門するPython開発環境 #PyLadiesTokyo
 
The GPS Architecture on Android
The GPS Architecture on AndroidThe GPS Architecture on Android
The GPS Architecture on Android
 
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...
Kernel Recipes 2018 - New GPIO interface for linux user space - Bartosz Golas...
 

More from Lucidworks

Search is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce StrategySearch is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce StrategyLucidworks
 
Drive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in SalesforceDrive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in SalesforceLucidworks
 
How Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant ProductsHow Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant ProductsLucidworks
 
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product DiscoveryLucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product DiscoveryLucidworks
 
Connected Experiences Are Personalized Experiences
Connected Experiences Are Personalized ExperiencesConnected Experiences Are Personalized Experiences
Connected Experiences Are Personalized ExperiencesLucidworks
 
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...Lucidworks
 
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...Lucidworks
 
Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020Lucidworks
 
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...Lucidworks
 
AI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and RosetteAI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and RosetteLucidworks
 
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual MomentThe Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual MomentLucidworks
 
Webinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - EuropeWebinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - EuropeLucidworks
 
Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19Lucidworks
 
Applying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 ResearchApplying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 ResearchLucidworks
 
Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1Lucidworks
 
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce StrategyWebinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce StrategyLucidworks
 
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...Lucidworks
 
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision IntelligenceApply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision IntelligenceLucidworks
 
Webinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise SearchWebinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise SearchLucidworks
 
Why Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and BeyondWhy Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and BeyondLucidworks
 

More from Lucidworks (20)

Search is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce StrategySearch is the Tip of the Spear for Your B2B eCommerce Strategy
Search is the Tip of the Spear for Your B2B eCommerce Strategy
 
Drive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in SalesforceDrive Agent Effectiveness in Salesforce
Drive Agent Effectiveness in Salesforce
 
How Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant ProductsHow Crate & Barrel Connects Shoppers with Relevant Products
How Crate & Barrel Connects Shoppers with Relevant Products
 
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product DiscoveryLucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
Lucidworks & IMRG Webinar – Best-In-Class Retail Product Discovery
 
Connected Experiences Are Personalized Experiences
Connected Experiences Are Personalized ExperiencesConnected Experiences Are Personalized Experiences
Connected Experiences Are Personalized Experiences
 
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
Intelligent Insight Driven Policing with MC+A, Toronto Police Service and Luc...
 
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
[Webinar] Intelligent Policing. Leveraging Data to more effectively Serve Com...
 
Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020Preparing for Peak in Ecommerce | eTail Asia 2020
Preparing for Peak in Ecommerce | eTail Asia 2020
 
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
Accelerate The Path To Purchase With Product Discovery at Retail Innovation C...
 
AI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and RosetteAI-Powered Linguistics and Search with Fusion and Rosette
AI-Powered Linguistics and Search with Fusion and Rosette
 
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual MomentThe Service Industry After COVID-19: The Soul of Service in a Virtual Moment
The Service Industry After COVID-19: The Soul of Service in a Virtual Moment
 
Webinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - EuropeWebinar: Smart answers for employee and customer support after covid 19 - Europe
Webinar: Smart answers for employee and customer support after covid 19 - Europe
 
Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19Smart Answers for Employee and Customer Support After COVID-19
Smart Answers for Employee and Customer Support After COVID-19
 
Applying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 ResearchApplying AI & Search in Europe - featuring 451 Research
Applying AI & Search in Europe - featuring 451 Research
 
Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1Webinar: Accelerate Data Science with Fusion 5.1
Webinar: Accelerate Data Science with Fusion 5.1
 
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce StrategyWebinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
Webinar: 5 Must-Have Items You Need for Your 2020 Ecommerce Strategy
 
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
Where Search Meets Science and Style Meets Savings: Nordstrom Rack's Journey ...
 
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision IntelligenceApply Knowledge Graphs and Search for Real-World Decision Intelligence
Apply Knowledge Graphs and Search for Real-World Decision Intelligence
 
Webinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise SearchWebinar: Building a Business Case for Enterprise Search
Webinar: Building a Business Case for Enterprise Search
 
Why Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and BeyondWhy Insight Engines Matter in 2020 and Beyond
Why Insight Engines Matter in 2020 and Beyond
 

Recently uploaded

TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc
 
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEarley Information Science
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...Neo4j
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024Rafal Los
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slidespraypatel2
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Miguel Araújo
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountPuma Security, LLC
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking MenDelhi Call girls
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slidevu2urc
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Drew Madelung
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?Antenna Manufacturer Coco
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...Igalia
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityPrincipled Technologies
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...apidays
 
Factors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptxFactors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptxKatpro Technologies
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Servicegiselly40
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...Martijn de Jong
 
Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024The Digital Insurer
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 

Recently uploaded (20)

TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptxEIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
EIS-Webinar-Prompt-Knowledge-Eng-2024-04-08.pptx
 
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...Workshop - Best of Both Worlds_ Combine  KG and Vector search for  enhanced R...
Workshop - Best of Both Worlds_ Combine KG and Vector search for enhanced R...
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slides
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
 
Breaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path MountBreaking the Kubernetes Kill Chain: Host Path Mount
Breaking the Kubernetes Kill Chain: Host Path Mount
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
 
What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?What Are The Drone Anti-jamming Systems Technology?
What Are The Drone Anti-jamming Systems Technology?
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
Raspberry Pi 5: Challenges and Solutions in Bringing up an OpenGL/Vulkan Driv...
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivity
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
Factors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptxFactors to Consider When Choosing Accounts Payable Services Providers.pptx
Factors to Consider When Choosing Accounts Payable Services Providers.pptx
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 
Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024Finology Group – Insurtech Innovation Award 2024
Finology Group – Insurtech Innovation Award 2024
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 

Speeding Up Lucene with GPUs - Ishan Chattopadhyaya, Unbxd

  • 1. Speeding up Lucene using GPUs Ishan Chattopadhyaya Committer & PMC Member, Apache Lucene/Solr Kishore Angani Senior Search Architect, Unbxd @ichattopadhyaya @kishore_angani #Activate18 #ActivateSearch
  • 2. Agenda • Brief History of Computers • GPGPU computing • Inverted Index • Searching on GPU  Spatial Search  BM25  Advanced Scoring  Faceting • Work ahead
  • 3. Brief History of Computers
  • 4. What is GPGPU computing?
  • 5. What is an Inverted Index?
  • 6. Search on GPU  Spatial Search  Native scoring (BM25)  Faceting  Advanced Scoring
  • 9. Spatial Search: Code int *docs; float *lats, *lngs; long N; device_vector<int> gpu_docs; device_vector<float> gpu_lats, gpu_lngs; JNIEXPORT jint JNICALL Java_GpuGeoDist_initIndex(JNIEnv *env, jobject obj, jintArray docIdsArray, jfloatArray latsArray, jfloatArray lngsArray) { N = env->GetArrayLength(docIdsArray); docs = env->GetIntArrayElements(docIdsArray, NULL); lats = env->GetFloatArrayElements(latsArray, NULL); lngs = env->GetFloatArrayElements(lngsArray, NULL); // Copy the vectors to the device host_vector<int> cpu_docs(N); for (int i=0; i<N; i++) cpu_docs[i] = docs[i]; gpu_docs = cpu_docs; host_vector<float> cpu_lats(N); for (int i=0; i<N; i++) cpu_lats[i] = lats[i]; gpu_lats = cpu_lats; host_vector<float> cpu_lngs(N); for (int i=0; i<N; i++) cpu_lngs[i] = lngs[i]; gpu_lngs = cpu_lngs; }
  • 10. Spatial Search: Code JNIEXPORT jobject JNICALL Java_GpuGeoDist_findNearest(JNIEnv *env, jobject obj, jfloat lat, jfloat lng) { // Actual scoring/sorting on device device_vector<float> gpu_distances(N); transform(gpu_lats.begin(), gpu_lats.end(), gpu_lngs.begin(), gpu_distances.begin(), geodist(lat, lng)); device_vector<int> docIds = gpu_docs; thrust::sort_by_key(gpu_distances.begin(), gpu_distances.end(), docIds.begin()); int *docs = (int*)malloc(N*4*2); float *distances = &((float*)docs)[N]; thrust::copy(docIds.begin(), docIds.end(), docs); thrust::copy(gpu_distances.begin(), gpu_distances.end(), distances); jobject directBuffer = env->NewDirectByteBuffer((void*)docs, N*2*4); return directBuffer; } struct geodist { const float refX,refY; geodist(float _x, float _y): refX(_x), refY(_y) {} __host__ __device__ float operator()(float &x, float &y) const { return sqrt((x - refX) * (x-refX) + (y-refY)*(y-refY)); } };
  • 13. Native Scoring (BM25): Initialization  Postings  Norms  Inverted Document Frequencies
  • 14. Native Scoring (BM25): Query processing  Term dictionary lookup  Scattering term rows  Computing term scores  Sorting by scores
  • 20. Thank you! Ishan Chattopadhyaya Committer & PMC Member, Apache Lucene/Solr Kishore Angani Senior Search Architect, Unbxd @ichattopadhyaya @kishore_angani #Activate18 #ActivateSearch
  • 21. Thank you! Ishan Chattopadhyaya Committer & PMC Member, Apache Lucene/Solr Kishore Angani Senior Search Architect, Unbxd @ichattopadhyaya @kishore_angani #Activate18 #ActivateSearch