SlideShare a Scribd company logo
Lazy Multigram Learning Environment for ACRS SISY 2008 Zoran Popović shoom013[at] gmail.com Institute for Multidisciplinary Research Belgrade University
Automated Content Recommendation Systems (ACRS) and Information Retrieval ,[object Object],[object Object],[object Object],[object Object],[object Object]
k-Nearest Neighbour Method (kNN) ,[object Object]
k-Nearest Neighbour Method (kNN) ,[object Object],[object Object],[object Object],[object Object],[object Object]
SVM classification ,[object Object]
SVM classification ,[object Object],[object Object],[object Object],[object Object],[object Object]
N-grams ,[object Object],[object Object],[object Object],[object Object]
N-grams ,[object Object],[object Object],[object Object],[object Object]
ngram.jar – generating N-grams java ngram.generator.Arff inDir outfile.arff [options] Options: -l <Lmin>  =  lower rank bound (default=1) -m <Lmax>  =  upper rank bound (default=10) -i <invf>  =  inverse frequency threshold (default=0.34) -N <N>  =  N-gram order (default=3) -D <depth> =  biggest number of N-grams (default=4294967295) -w <url> =  use database with jdbc url to write data -r <url> =  use database with jdbc url to read arff -u =  do not use normalized vectors for output EXAMPLE:   .  Arff.sh . .ut.arff -l 1 -m 500 -N 4 -i 0.5 -D 1048576     (subfolders as category names)
ngram.jar – JDBC storage ,[object Object],[object Object],[object Object],[object Object]
Results with N-grams ,[object Object],[object Object]
Some good indications about performance with multigrams ,[object Object],[object Object],[object Object]
Weka, Data Mining Tool – ARFF (Attribute-Relation File Format)
Example of multi-instance ARFF file with sparse data ,[object Object]
Weka's SVM MI SMO classifier
Weka – JDBC Horizontal form of data is needed - all attributes in each row ARFF supports data given by sparse vectors (zero values omitted – this also speeds up SVM)
<WEKA_HOME>/DatabaseUtils.props: ... jdbcDriver=...org.gjt.mm.mysql.Driver,oracle.jdbc.driver.OracleDriver ... CHAR=0 ... VARCHAR=0 VARCHAR2=0 ... NUMBER=7 .... Weka – JDBC
[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],Weka command line
SQL and vertical N-gram storage PROFILES:  NSHARED NGRAMS:  TSHARED
SQL and data transformation ,[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object],[object Object]
SQL and data transformation Data2.sql – cursor function for a query returning horizontal form of data: create or replace function fewcols(p_lmin number, p_lmax number, p_invf number, p_bagid number, p_norm boolean default true) return sys_refcursor is str varchar2(32000); cat varchar2(64) := null; opt  sys_refcursor; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; begin str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; open opt for 'select ' || str || ' from dual'; return opt; end; /
SQL and data transformation Data3.sql – procedure generating table DATA3 in horizontal form: create or replace procedure data(p_lmin number, p_lmax number, p_invf number, p_norm boolean default true) is str varchar2(32000); cat varchar2(64) := null; first boolean := true; p_bagid number; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram  from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; cursor B IS select distinct bag_id from tshared; begin open B; loop fetch B into p_bagid; exit when B%NOTFOUND; str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; if first then first := false; begin execute immediate 'drop table data3'; exception when others then null; end; execute immediate 'create table data3 as select ' || str || ' from dual'; else execute immediate 'insert into data3 select ' || str || ' from dual'; end if; end loop; commit; end; /
Conclusions so far ... ,[object Object],[object Object],[object Object]
? Questions ...

More Related Content

Similar to SISY 2008

Lk module3
Lk module3Lk module3
Lk module3
Krishna Nanda
 
MLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reducedMLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reduced
Chao Chen
 
interenship.pptx
interenship.pptxinterenship.pptx
interenship.pptx
Naveen316549
 
Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...
Yao Yao
 
Machine learning using spark
Machine learning using sparkMachine learning using spark
Machine learning using spark
Ran Silberman
 
Workshop NGS data analysis - 2
Workshop NGS data analysis - 2Workshop NGS data analysis - 2
Workshop NGS data analysis - 2
Maté Ongenaert
 
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scalaAutomate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Chetan Khatri
 
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Yao Yao
 
Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016
Comsysto Reply GmbH
 
Scala in Places API
Scala in Places APIScala in Places API
Scala in Places API
Łukasz Bałamut
 
Spark ml streaming
Spark ml streamingSpark ml streaming
Spark ml streaming
Adam Doyle
 
No more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in productionNo more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in production
Chetan Khatri
 
PPT on Data Science Using Python
PPT on Data Science Using PythonPPT on Data Science Using Python
PPT on Data Science Using Python
NishantKumar1179
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptx
ParveenShaik21
 
2014.06.24.what is ubix
2014.06.24.what is ubix2014.06.24.what is ubix
2014.06.24.what is ubix
Jim Cooley
 
Snmp class
Snmp classSnmp class
Snmp class
aduitsis
 
Fosdem2017 Scientific computing on Jruby
Fosdem2017  Scientific computing on JrubyFosdem2017  Scientific computing on Jruby
Fosdem2017 Scientific computing on Jruby
Prasun Anand
 
Device status anomaly detection
Device status anomaly detectionDevice status anomaly detection
Device status anomaly detection
David Tung
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Spark
samthemonad
 
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
GeeksLab Odessa
 

Similar to SISY 2008 (20)

Lk module3
Lk module3Lk module3
Lk module3
 
MLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reducedMLlib sparkmeetup_8_6_13_final_reduced
MLlib sparkmeetup_8_6_13_final_reduced
 
interenship.pptx
interenship.pptxinterenship.pptx
interenship.pptx
 
Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...Lab 2: Classification and Regression Prediction Models, training and testing ...
Lab 2: Classification and Regression Prediction Models, training and testing ...
 
Machine learning using spark
Machine learning using sparkMachine learning using spark
Machine learning using spark
 
Workshop NGS data analysis - 2
Workshop NGS data analysis - 2Workshop NGS data analysis - 2
Workshop NGS data analysis - 2
 
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scalaAutomate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
Automate ml workflow_transmogrif_ai-_chetan_khatri_berlin-scala
 
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
Mini-lab 1: Stochastic Gradient Descent classifier, Optimizing Logistic Regre...
 
Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016Machinelearning Spark Hadoop User Group Munich Meetup 2016
Machinelearning Spark Hadoop User Group Munich Meetup 2016
 
Scala in Places API
Scala in Places APIScala in Places API
Scala in Places API
 
Spark ml streaming
Spark ml streamingSpark ml streaming
Spark ml streaming
 
No more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in productionNo more struggles with Apache Spark workloads in production
No more struggles with Apache Spark workloads in production
 
PPT on Data Science Using Python
PPT on Data Science Using PythonPPT on Data Science Using Python
PPT on Data Science Using Python
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptx
 
2014.06.24.what is ubix
2014.06.24.what is ubix2014.06.24.what is ubix
2014.06.24.what is ubix
 
Snmp class
Snmp classSnmp class
Snmp class
 
Fosdem2017 Scientific computing on Jruby
Fosdem2017  Scientific computing on JrubyFosdem2017  Scientific computing on Jruby
Fosdem2017 Scientific computing on Jruby
 
Device status anomaly detection
Device status anomaly detectionDevice status anomaly detection
Device status anomaly detection
 
Spark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with SparkSpark 4th Meetup Londond - Building a Product with Spark
Spark 4th Meetup Londond - Building a Product with Spark
 
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
AI&BigData Lab.Руденко Петр. Automation and optimisation of machine learning ...
 

More from Zoran Popovic

Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jezikuEvaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Zoran Popovic
 
Veštačka inteligencija 2
Veštačka inteligencija 2Veštačka inteligencija 2
Veštačka inteligencija 2
Zoran Popovic
 
Veštačka inteligencija 1
Veštačka inteligencija 1Veštačka inteligencija 1
Veštačka inteligencija 1
Zoran Popovic
 
Machine Learning
Machine LearningMachine Learning
Machine Learning
Zoran Popovic
 
Ekspertni sistemi
Ekspertni sistemiEkspertni sistemi
Ekspertni sistemi
Zoran Popovic
 
Soft Computing
Soft ComputingSoft Computing
Soft Computing
Zoran Popovic
 
Magistarska teza
Magistarska tezaMagistarska teza
Magistarska teza
Zoran Popovic
 
Magistarska teza - prezentacija
Magistarska teza - prezentacijaMagistarska teza - prezentacija
Magistarska teza - prezentacija
Zoran Popovic
 
Tag
TagTag
SAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... ItaniumSAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... Itanium
Zoran Popovic
 
SSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle loginSSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle login
Zoran Popovic
 
Migration to 9i
Migration to 9iMigration to 9i
Migration to 9i
Zoran Popovic
 
ETRAN 2008
ETRAN 2008ETRAN 2008
ETRAN 2008
Zoran Popovic
 

More from Zoran Popovic (13)

Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jezikuEvaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
Evaluacija programa za obeležavanje (etiketiranje) teksta na srpskom jeziku
 
Veštačka inteligencija 2
Veštačka inteligencija 2Veštačka inteligencija 2
Veštačka inteligencija 2
 
Veštačka inteligencija 1
Veštačka inteligencija 1Veštačka inteligencija 1
Veštačka inteligencija 1
 
Machine Learning
Machine LearningMachine Learning
Machine Learning
 
Ekspertni sistemi
Ekspertni sistemiEkspertni sistemi
Ekspertni sistemi
 
Soft Computing
Soft ComputingSoft Computing
Soft Computing
 
Magistarska teza
Magistarska tezaMagistarska teza
Magistarska teza
 
Magistarska teza - prezentacija
Magistarska teza - prezentacijaMagistarska teza - prezentacija
Magistarska teza - prezentacija
 
Tag
TagTag
Tag
 
SAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... ItaniumSAP, Linux, Virtualization and ... Itanium
SAP, Linux, Virtualization and ... Itanium
 
SSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle loginSSO secure communication flow for web Oracle login
SSO secure communication flow for web Oracle login
 
Migration to 9i
Migration to 9iMigration to 9i
Migration to 9i
 
ETRAN 2008
ETRAN 2008ETRAN 2008
ETRAN 2008
 

Recently uploaded

How to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptxHow to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptx
danishmna97
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Paige Cruz
 
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdfUnlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Malak Abu Hammad
 
Infrastructure Challenges in Scaling RAG with Custom AI models
Infrastructure Challenges in Scaling RAG with Custom AI modelsInfrastructure Challenges in Scaling RAG with Custom AI models
Infrastructure Challenges in Scaling RAG with Custom AI models
Zilliz
 
“I’m still / I’m still / Chaining from the Block”
“I’m still / I’m still / Chaining from the Block”“I’m still / I’m still / Chaining from the Block”
“I’m still / I’m still / Chaining from the Block”
Claudio Di Ciccio
 
Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1
DianaGray10
 
Serial Arm Control in Real Time Presentation
Serial Arm Control in Real Time PresentationSerial Arm Control in Real Time Presentation
Serial Arm Control in Real Time Presentation
tolgahangng
 
AI 101: An Introduction to the Basics and Impact of Artificial Intelligence
AI 101: An Introduction to the Basics and Impact of Artificial IntelligenceAI 101: An Introduction to the Basics and Impact of Artificial Intelligence
AI 101: An Introduction to the Basics and Impact of Artificial Intelligence
IndexBug
 
Introduction to CHERI technology - Cybersecurity
Introduction to CHERI technology - CybersecurityIntroduction to CHERI technology - Cybersecurity
Introduction to CHERI technology - Cybersecurity
mikeeftimakis1
 
Mariano G Tinti - Decoding SpaceX
Mariano G Tinti - Decoding SpaceXMariano G Tinti - Decoding SpaceX
Mariano G Tinti - Decoding SpaceX
Mariano Tinti
 
How to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For FlutterHow to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For Flutter
Daiki Mogmet Ito
 
Full-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalizationFull-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalization
Zilliz
 
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy SurveyTrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc
 
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAUHCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
panagenda
 
Driving Business Innovation: Latest Generative AI Advancements & Success Story
Driving Business Innovation: Latest Generative AI Advancements & Success StoryDriving Business Innovation: Latest Generative AI Advancements & Success Story
Driving Business Innovation: Latest Generative AI Advancements & Success Story
Safe Software
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
Kumud Singh
 
GenAI Pilot Implementation in the organizations
GenAI Pilot Implementation in the organizationsGenAI Pilot Implementation in the organizations
GenAI Pilot Implementation in the organizations
kumardaparthi1024
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
Safe Software
 
RESUME BUILDER APPLICATION Project for students
RESUME BUILDER APPLICATION Project for studentsRESUME BUILDER APPLICATION Project for students
RESUME BUILDER APPLICATION Project for students
KAMESHS29
 
20240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 202420240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 2024
Matthew Sinclair
 

Recently uploaded (20)

How to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptxHow to Get CNIC Information System with Paksim Ga.pptx
How to Get CNIC Information System with Paksim Ga.pptx
 
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdfObservability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
Observability Concepts EVERY Developer Should Know -- DeveloperWeek Europe.pdf
 
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdfUnlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
Unlock the Future of Search with MongoDB Atlas_ Vector Search Unleashed.pdf
 
Infrastructure Challenges in Scaling RAG with Custom AI models
Infrastructure Challenges in Scaling RAG with Custom AI modelsInfrastructure Challenges in Scaling RAG with Custom AI models
Infrastructure Challenges in Scaling RAG with Custom AI models
 
“I’m still / I’m still / Chaining from the Block”
“I’m still / I’m still / Chaining from the Block”“I’m still / I’m still / Chaining from the Block”
“I’m still / I’m still / Chaining from the Block”
 
Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1Communications Mining Series - Zero to Hero - Session 1
Communications Mining Series - Zero to Hero - Session 1
 
Serial Arm Control in Real Time Presentation
Serial Arm Control in Real Time PresentationSerial Arm Control in Real Time Presentation
Serial Arm Control in Real Time Presentation
 
AI 101: An Introduction to the Basics and Impact of Artificial Intelligence
AI 101: An Introduction to the Basics and Impact of Artificial IntelligenceAI 101: An Introduction to the Basics and Impact of Artificial Intelligence
AI 101: An Introduction to the Basics and Impact of Artificial Intelligence
 
Introduction to CHERI technology - Cybersecurity
Introduction to CHERI technology - CybersecurityIntroduction to CHERI technology - Cybersecurity
Introduction to CHERI technology - Cybersecurity
 
Mariano G Tinti - Decoding SpaceX
Mariano G Tinti - Decoding SpaceXMariano G Tinti - Decoding SpaceX
Mariano G Tinti - Decoding SpaceX
 
How to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For FlutterHow to use Firebase Data Connect For Flutter
How to use Firebase Data Connect For Flutter
 
Full-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalizationFull-RAG: A modern architecture for hyper-personalization
Full-RAG: A modern architecture for hyper-personalization
 
TrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy SurveyTrustArc Webinar - 2024 Global Privacy Survey
TrustArc Webinar - 2024 Global Privacy Survey
 
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAUHCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
HCL Notes und Domino Lizenzkostenreduzierung in der Welt von DLAU
 
Driving Business Innovation: Latest Generative AI Advancements & Success Story
Driving Business Innovation: Latest Generative AI Advancements & Success StoryDriving Business Innovation: Latest Generative AI Advancements & Success Story
Driving Business Innovation: Latest Generative AI Advancements & Success Story
 
Mind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AIMind map of terminologies used in context of Generative AI
Mind map of terminologies used in context of Generative AI
 
GenAI Pilot Implementation in the organizations
GenAI Pilot Implementation in the organizationsGenAI Pilot Implementation in the organizations
GenAI Pilot Implementation in the organizations
 
Essentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FMEEssentials of Automations: The Art of Triggers and Actions in FME
Essentials of Automations: The Art of Triggers and Actions in FME
 
RESUME BUILDER APPLICATION Project for students
RESUME BUILDER APPLICATION Project for studentsRESUME BUILDER APPLICATION Project for students
RESUME BUILDER APPLICATION Project for students
 
20240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 202420240609 QFM020 Irresponsible AI Reading List May 2024
20240609 QFM020 Irresponsible AI Reading List May 2024
 

SISY 2008

  • 1. Lazy Multigram Learning Environment for ACRS SISY 2008 Zoran Popović shoom013[at] gmail.com Institute for Multidisciplinary Research Belgrade University
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9. ngram.jar – generating N-grams java ngram.generator.Arff inDir outfile.arff [options] Options: -l <Lmin> = lower rank bound (default=1) -m <Lmax> = upper rank bound (default=10) -i <invf> = inverse frequency threshold (default=0.34) -N <N> = N-gram order (default=3) -D <depth> = biggest number of N-grams (default=4294967295) -w <url> = use database with jdbc url to write data -r <url> = use database with jdbc url to read arff -u = do not use normalized vectors for output EXAMPLE: . Arff.sh . .ut.arff -l 1 -m 500 -N 4 -i 0.5 -D 1048576 (subfolders as category names)
  • 10.
  • 11.
  • 12.
  • 13. Weka, Data Mining Tool – ARFF (Attribute-Relation File Format)
  • 14.
  • 15. Weka's SVM MI SMO classifier
  • 16. Weka – JDBC Horizontal form of data is needed - all attributes in each row ARFF supports data given by sparse vectors (zero values omitted – this also speeds up SVM)
  • 17. <WEKA_HOME>/DatabaseUtils.props: ... jdbcDriver=...org.gjt.mm.mysql.Driver,oracle.jdbc.driver.OracleDriver ... CHAR=0 ... VARCHAR=0 VARCHAR2=0 ... NUMBER=7 .... Weka – JDBC
  • 18.
  • 19. SQL and vertical N-gram storage PROFILES: NSHARED NGRAMS: TSHARED
  • 20.
  • 21. SQL and data transformation Data2.sql – cursor function for a query returning horizontal form of data: create or replace function fewcols(p_lmin number, p_lmax number, p_invf number, p_bagid number, p_norm boolean default true) return sys_refcursor is str varchar2(32000); cat varchar2(64) := null; opt sys_refcursor; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; begin str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; open opt for 'select ' || str || ' from dual'; return opt; end; /
  • 22. SQL and data transformation Data3.sql – procedure generating table DATA3 in horizontal form: create or replace procedure data(p_lmin number, p_lmax number, p_invf number, p_norm boolean default true) is str varchar2(32000); cat varchar2(64) := null; first boolean := true; p_bagid number; j number; n number; norm number := 1; cursor C(lmin number, lmax number, invf number, bagid number) is select T.rank-1 rank, TS.count count, TS.bag_id, TS.category, TS.N from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid order by TS.bag_id asc, TS.N asc, T.rank asc; cursor CS(lmin number, lmax number, invf number, bagid number) is select sqrt(sum((T.rank-1)*(T.rank-1))) norm from tshared TS, (select rownum - (select count(bag_id) from tshared ts0 where ts0.N<T0.N) rank0, rownum rank, N, ngram from (select N, ngram, count from nshared order by N asc, count desc) T0) T where TS.ngram=T.ngram and TS.N=T.N and T.rank0 between lmin and lmax and (select count(bag_id) from tshared TS2 where TS2.ngram=TS.ngram and TS2.N=TS.N)/(select count(distinct bag_id) from tshared) <= invf and TS.bag_id=bagid; cursor B IS select distinct bag_id from tshared; begin open B; loop fetch B into p_bagid; exit when B%NOTFOUND; str := to_char(p_bagid)||' bag_id'; j:=p_lmin; if p_norm then open CS(p_lmin,p_lmax,p_invf,p_bagid); fetch CS into norm; if norm=0 then norm:=1; end if; close CS; end if; for i in C(p_lmin,p_lmax,p_invf,p_bagid) loop if cat is null then cat := i.category; end if; if j<i.rank then for n in j .. i.rank-1 loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ' || to_char(i.count/norm,'9999.99999999') || ' A' || to_char(i.rank); j := i.rank+1; end loop; if cat is null or j<=p_lmax then select distinct category into cat from tshared where bag_id=p_bagid; for n in j .. p_lmax loop str := str || ', 0 A' || to_char(n); end loop; end if; str := str || ', ''' || cat || ''' category'; if first then first := false; begin execute immediate 'drop table data3'; exception when others then null; end; execute immediate 'create table data3 as select ' || str || ' from dual'; else execute immediate 'insert into data3 select ' || str || ' from dual'; end if; end loop; commit; end; /
  • 23.