SlideShare a Scribd company logo
1 of 25
Download to read offline
MACHINE LEARNING WITH
GO
Golang Bristol - April 2020
James Bowman
@jamesebowman
@jamesebowman
MAGIC !!
@jamesebowman
MAGIC !!
MATHS
& ALGORITHMS
Artificial Intelligence
Any technique which enables commuters to mimic
human behaviour.
Machine Learning
Subset of AI techniques which use statistical methods
to enable machines to ‘learn’ how to carry out tasks
without being explicitly programmed how to do them.
Deep Learning
Subset of ML techniques using multi-layered neural
networks (algorithms inspired by the structure and
function of the human brain). Typically suited to self-
learning and feature extraction.
Artificial Intelligence
Machine Learning
f(x)
Deep Learning
@jamesebowman
@jamesebowman
@jamesebowman
SO WHY GO?!?
• Relatively expressive and productive
• Strong typing (more explicit)
• Performant and Scalable
@jamesebowman
Supervised
Learning
Unsupervised
Learning
Reinforcement
Learning
Classification
Regression
Clustering
Dimensionality
Reduction
• House Price Prediction
• Demand Forecasting
• Image Recognition
• Ad Click Prediction
• Medical Diagnosis
• Spam Filtering
• Customer Segmentation
• Data Mining
• Recommendations
• Visualisation
• Feature Extraction
• Compression
• Skill Acquisition
• Control Systems
• Game AI
• Real-time Decisions
Machine Learning
@jamesebowman
BASIC ML WORKFLOW
Train Model
Historical
Data
Live Data
Training
Data
Test Data Evaluate
Model
Deploy/Use
Model
Performance
Metrics
Predictions
@jamesebowman
THE DIABETES DATASET
• Prima are a group of native Americans living in Arizona
• Highest rate of obesity and diabetes recorded
• Study conducted by National Institute of Diabetes and
Digestive and Kidney Diseases collected diagnosis data on
female patients with the aim of predicting diabetes.
# Pregnancies Glucose
Blood
Pressure
SkinThickness Insulin BMI
Diabetes
Pedigree
Function
Age Outcome (Class
Label)
6 148 72 35 0 33.6 0.627 50 1
1 85 66 29 0 26.6 0.351 31 0
https://www.kaggle.com/uciml/pima-indians-diabetes-database
@jamesebowman
INTUITIVELY
Patients with similar attributes tend to share the
same diagnosis
@jamesebowman
K-NEAREST
NEIGHBOURS
CLASSIFIER
Predicts class (Y) as the
average (mode) of the classes
for the K most similar
(nearest) observations from
the training data
K=3
0
1
1
Y = Mode of the K nearest observations
{0, 1, 1} = 1
0
1
0
0
0
0
0
1
Y
@jamesebowman
FEATUREVECTORS
• Observations (records) can be
represented as n-dimensional
numerical feature vectors
• Feature vectors can be thought
of as points in Euclidean space
P(x, y)
y
x
P(x, y, z)
y
x
z
[
p1
p2]
p1
p2
p3
p1
p2
p3
.
.
.
pn
n=2 (2D)
n=3 (3D)
=
=
@jamesebowman
NEAREST NEIGHBOURS
• ‘Nearest’ = shortest distance
• Where distance uses a formal distance
metric
• In n dimensional Euclidean space, distance
between points p and q is given by
Pythagoras formula:
d(p, q) =
n
∑
i=1
(pi − qi)2
= (p1 − q1)2
+ (p2 − q2)2
+ . . . + (pn − qn)2
p
q
d(p, q)
p1 - q1
p2-q2
LETS GO IMPLEMENT IT
(pun intended)
@jamesebowman
LETS BUILD A MODEL
type Predictor interface {
Fit(X *mat.Dense, Y []string)
Predict(X *mat.Dense) []string
}
1. Fit ‘trains’ the model using training data
2. Predict infers the class for the test or live
production data
@jamesebowman
EVALUATE WITH A SIMPLE
HARNESS
1. Load the dataset from the CSV
file
2. Split the data into training and test
sets
3. Train the model with the training
data
4. Predict classes for the test data
5. Compare predictions with test
data labels to find model accuracy
func Evaluate(dsPath string, model Predictor) (float64,
error) {
records, err := loadFile(dsPath)
if err != nil {
return 0, err
}
trainData, trainLabels, testData, testLabels :=
split(true, records, 0.7)
model.Fit(trainData, trainLabels)
predictions := model.Predict(testData)
return evaluate(predictions, testLabels), nil
}
@jamesebowman
1. LOADTHE DATASET FROM
THE CSV FILE
func loadFile(path string) ([][]string, error) {
var records [][]string
file, err := os.Open(path)
if err != nil {
return records, err
}
reader := csv.NewReader(file)
return reader.ReadAll()
}
@jamesebowman
2. SPLITTHE DATA INTO
TRAINING ANDTEST SETS
func split(header bool, records [][]string, trainProportion float64) (mat.Matrix, []string, mat.Matrix,
[]string) {
if header {
records = records[1:]
}
datasetLength := len(records)
indx := make([]int, int(float64(datasetLength)*trainProportion))
r := rnd.New(rnd.NewSource(uint64(47)))
sampleuv.WithoutReplacement(indx, datasetLength, r)
sort.Ints(indx)
trainData := mat.NewDense(len(indx), len(records[0]), nil)
trainLabels := make([]string, len(indx))
testData := mat.NewDense(len(records)-len(indx), len(records[0]), nil)
testLabels := make([]string, len(records)-len(indx))
var trainind, testind int
for i, v := range records {
if trainind < len(indx) && i == indx[trainind] {
// training set
readRecord(trainLabels, trainData, trainind, v)
} else {
// test set
readRecord(testLabels, testData, testind, v)
}
}
return trainData, trainLabels, testData, testLabels
}
@jamesebowman
2. SPLITTHE DATA INTO
TRAINING ANDTEST SETS
func readRecord(labels []string, data *mat.Dense, recordNum int, record []string) {
labels[recordNum] = record[len(record)-1]
for i, v := range record[:len(record)-1] {
s, err := strconv.ParseFloat(v, 64)
if err != nil {
// replace invalid numbers with 0
s = 0
}
data.Set(recordNum, i, s)
}
}
@jamesebowman
3.TRAINTHE MODEL WITH
THETRAINING DATA
type KNNClassifier struct {
K int
Distance func(a, b mat.Vector) float64
datapoints *mat.Dense
classes []string
}
func (k *KNNClassifier) Fit(X *mat.Dense, Y []string) {
k.datapoints = X
k.classes = Y
}
@jamesebowman
4. PREDICT CLASSES FORTHE
TEST DATA
func (k *KNNClassifier) Predict(X *mat.Dense) []string {
r, _ := X.Dims()
targets := make([]string, r)
distances := make([]float64, len(k.classes))
inds := make([]int, len(k.classes))
for i := 0; i < r; i++ {
votes := make(map[string]float64)
for j := 0; j < len(k.classes); j++ {
distances[j] = k.Distance(
k.datapoints.RowView(j),
X.RowView(i),
)
}
floats.Argsort(distances, inds)
for n := 0; n < k.K; n++ {
votes[k.classes[inds[n]]]++
}
var winningCount float64
for k, v := range votes {
if v > winningCount {
targets[i] = k
winningCount = v
}
}
}
return targets
}
1. For each observation to predict for
(row in the matrix):
2. Calculate the distance to every
training observation
3. Sort the distances
4. Count the frequency of each class
corresponding to the top k closest
5. Determine the highest frequency class
@jamesebowman
4. PREDICT CLASSES FORTHE
TEST DATA
func EuclideanDistance(a, b mat.Vector) float64 {
var v mat.VecDense
v.SubVec(a, b)
return math.Sqrt(mat.Dot(&v, &v))
}
= (p1 − q1)2
+ (p2 − q2)2
+ . . . + (pn − qn)2
@jamesebowman
5. COMPARE PREDICTIONS WITHTEST
DATA LABELSTO FIND MODEL
ACCURACY
func evaluate(predictions, labels []string) float64 {
var correct float64
for i, v := range labels {
if predictions[i] == v {
correct++
}
}
return correct / float64(len(labels))
}
@jamesebowman
PERFORMANCE
0.69
QUESTIONS
?
@jamesebowman

More Related Content

What's hot

International Journal of Computational Engineering Research(IJCER)
International Journal of Computational Engineering Research(IJCER)International Journal of Computational Engineering Research(IJCER)
International Journal of Computational Engineering Research(IJCER)ijceronline
 
Lecture 5 Relationship between pixel-2
Lecture 5 Relationship between pixel-2Lecture 5 Relationship between pixel-2
Lecture 5 Relationship between pixel-2VARUN KUMAR
 
Generative adversarial networks
Generative adversarial networksGenerative adversarial networks
Generative adversarial networksKyuri Kim
 
Auto encoding-variational-bayes
Auto encoding-variational-bayesAuto encoding-variational-bayes
Auto encoding-variational-bayesmehdi Cherti
 
Introduction to RBM for written digits recognition
Introduction to RBM for written digits recognitionIntroduction to RBM for written digits recognition
Introduction to RBM for written digits recognitionSergey Kharagorgiev
 
Fuzzy c means clustering protocol for wireless sensor networks
Fuzzy c means clustering protocol for wireless sensor networksFuzzy c means clustering protocol for wireless sensor networks
Fuzzy c means clustering protocol for wireless sensor networksmourya chandra
 
Rabbit challenge 3 DNN Day2
Rabbit challenge 3 DNN Day2Rabbit challenge 3 DNN Day2
Rabbit challenge 3 DNN Day2TOMMYLINK1
 
Auto-encoding variational bayes
Auto-encoding variational bayesAuto-encoding variational bayes
Auto-encoding variational bayesKyuri Kim
 
2021 01-04-learning filter-basis
2021 01-04-learning filter-basis2021 01-04-learning filter-basis
2021 01-04-learning filter-basisJAEMINJEONG5
 
CP 2011 Poster
CP 2011 PosterCP 2011 Poster
CP 2011 PosterSAAM007
 
The Power of Ensembles in Machine Learning
The Power of Ensembles in Machine LearningThe Power of Ensembles in Machine Learning
The Power of Ensembles in Machine LearningAmit Kapoor
 
ECCV2010: feature learning for image classification, part 4
ECCV2010: feature learning for image classification, part 4ECCV2010: feature learning for image classification, part 4
ECCV2010: feature learning for image classification, part 4zukun
 
Gradient boosting in practice: a deep dive into xgboost
Gradient boosting in practice: a deep dive into xgboostGradient boosting in practice: a deep dive into xgboost
Gradient boosting in practice: a deep dive into xgboostJaroslaw Szymczak
 
Graph convolutional networks in apache spark
Graph convolutional networks in apache sparkGraph convolutional networks in apache spark
Graph convolutional networks in apache sparkEmiliano Martinez Sanchez
 
Kernels in convolution
Kernels in convolutionKernels in convolution
Kernels in convolutionRevanth Kumar
 
方策勾配型強化学習の基礎と応用
方策勾配型強化学習の基礎と応用方策勾配型強化学習の基礎と応用
方策勾配型強化学習の基礎と応用Ryo Iwaki
 

What's hot (20)

Sharbani bhattacharya sacta 2014
Sharbani bhattacharya sacta 2014Sharbani bhattacharya sacta 2014
Sharbani bhattacharya sacta 2014
 
International Journal of Computational Engineering Research(IJCER)
International Journal of Computational Engineering Research(IJCER)International Journal of Computational Engineering Research(IJCER)
International Journal of Computational Engineering Research(IJCER)
 
Lecture 5 Relationship between pixel-2
Lecture 5 Relationship between pixel-2Lecture 5 Relationship between pixel-2
Lecture 5 Relationship between pixel-2
 
Generative adversarial networks
Generative adversarial networksGenerative adversarial networks
Generative adversarial networks
 
3 handouts section3-7
3 handouts section3-73 handouts section3-7
3 handouts section3-7
 
Auto encoding-variational-bayes
Auto encoding-variational-bayesAuto encoding-variational-bayes
Auto encoding-variational-bayes
 
Introduction to RBM for written digits recognition
Introduction to RBM for written digits recognitionIntroduction to RBM for written digits recognition
Introduction to RBM for written digits recognition
 
Fuzzy c means clustering protocol for wireless sensor networks
Fuzzy c means clustering protocol for wireless sensor networksFuzzy c means clustering protocol for wireless sensor networks
Fuzzy c means clustering protocol for wireless sensor networks
 
Rabbit challenge 3 DNN Day2
Rabbit challenge 3 DNN Day2Rabbit challenge 3 DNN Day2
Rabbit challenge 3 DNN Day2
 
Auto-encoding variational bayes
Auto-encoding variational bayesAuto-encoding variational bayes
Auto-encoding variational bayes
 
2021 01-04-learning filter-basis
2021 01-04-learning filter-basis2021 01-04-learning filter-basis
2021 01-04-learning filter-basis
 
CP 2011 Poster
CP 2011 PosterCP 2011 Poster
CP 2011 Poster
 
The Power of Ensembles in Machine Learning
The Power of Ensembles in Machine LearningThe Power of Ensembles in Machine Learning
The Power of Ensembles in Machine Learning
 
ECCV2010: feature learning for image classification, part 4
ECCV2010: feature learning for image classification, part 4ECCV2010: feature learning for image classification, part 4
ECCV2010: feature learning for image classification, part 4
 
Gradient boosting in practice: a deep dive into xgboost
Gradient boosting in practice: a deep dive into xgboostGradient boosting in practice: a deep dive into xgboost
Gradient boosting in practice: a deep dive into xgboost
 
Clustering tutorial
Clustering tutorialClustering tutorial
Clustering tutorial
 
The Perceptron (D1L2 Deep Learning for Speech and Language)
The Perceptron (D1L2 Deep Learning for Speech and Language)The Perceptron (D1L2 Deep Learning for Speech and Language)
The Perceptron (D1L2 Deep Learning for Speech and Language)
 
Graph convolutional networks in apache spark
Graph convolutional networks in apache sparkGraph convolutional networks in apache spark
Graph convolutional networks in apache spark
 
Kernels in convolution
Kernels in convolutionKernels in convolution
Kernels in convolution
 
方策勾配型強化学習の基礎と応用
方策勾配型強化学習の基礎と応用方策勾配型強化学習の基礎と応用
方策勾配型強化学習の基礎と応用
 

Similar to ML with Go: KNN Classifier for Diabetes Prediction

Machine Learning With R
Machine Learning With RMachine Learning With R
Machine Learning With RDavid Chiu
 
Visualizing the Model Selection Process
Visualizing the Model Selection ProcessVisualizing the Model Selection Process
Visualizing the Model Selection ProcessBenjamin Bengfort
 
data frames.pptx
data frames.pptxdata frames.pptx
data frames.pptxRacksaviR
 
Machine Learning: Classification Concepts (Part 1)
Machine Learning: Classification Concepts (Part 1)Machine Learning: Classification Concepts (Part 1)
Machine Learning: Classification Concepts (Part 1)Daniel Chan
 
Introduction to Machine Learning
Introduction to Machine LearningIntroduction to Machine Learning
Introduction to Machine LearningBig_Data_Ukraine
 
Introduction to k-Nearest Neighbors and Amazon SageMaker
Introduction to k-Nearest Neighbors and Amazon SageMaker Introduction to k-Nearest Neighbors and Amazon SageMaker
Introduction to k-Nearest Neighbors and Amazon SageMaker Suman Debnath
 
Spock Framework - Slidecast
Spock Framework - SlidecastSpock Framework - Slidecast
Spock Framework - SlidecastDaniel Kolman
 
Feature Engineering - Getting most out of data for predictive models
Feature Engineering - Getting most out of data for predictive modelsFeature Engineering - Getting most out of data for predictive models
Feature Engineering - Getting most out of data for predictive modelsGabriel Moreira
 
Synthetic Image Data Generation using GAN &Triple GAN.pptx
Synthetic Image Data Generation using GAN &Triple GAN.pptxSynthetic Image Data Generation using GAN &Triple GAN.pptx
Synthetic Image Data Generation using GAN &Triple GAN.pptxRupeshKumar301638
 
Pointcuts and Analysis
Pointcuts and AnalysisPointcuts and Analysis
Pointcuts and AnalysisWiwat Ruengmee
 
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)Craig Chao
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...FarhanAhmade
 
Intelligent Ruby + Machine Learning
Intelligent Ruby + Machine LearningIntelligent Ruby + Machine Learning
Intelligent Ruby + Machine LearningIlya Grigorik
 
Learning Predictive Modeling with TSA and Kaggle
Learning Predictive Modeling with TSA and KaggleLearning Predictive Modeling with TSA and Kaggle
Learning Predictive Modeling with TSA and KaggleYvonne K. Matos
 
Implementation of K-Nearest Neighbor Algorithm
Implementation of K-Nearest Neighbor AlgorithmImplementation of K-Nearest Neighbor Algorithm
Implementation of K-Nearest Neighbor AlgorithmDipesh Shome
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting SpatialFAO
 
Python 03-parameters-graphics.pptx
Python 03-parameters-graphics.pptxPython 03-parameters-graphics.pptx
Python 03-parameters-graphics.pptxTseChris
 
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...IRJET Journal
 

Similar to ML with Go: KNN Classifier for Diabetes Prediction (20)

Machine Learning With R
Machine Learning With RMachine Learning With R
Machine Learning With R
 
Visualizing the Model Selection Process
Visualizing the Model Selection ProcessVisualizing the Model Selection Process
Visualizing the Model Selection Process
 
data frames.pptx
data frames.pptxdata frames.pptx
data frames.pptx
 
Machine Learning: Classification Concepts (Part 1)
Machine Learning: Classification Concepts (Part 1)Machine Learning: Classification Concepts (Part 1)
Machine Learning: Classification Concepts (Part 1)
 
Introduction to Machine Learning
Introduction to Machine LearningIntroduction to Machine Learning
Introduction to Machine Learning
 
Introduction to k-Nearest Neighbors and Amazon SageMaker
Introduction to k-Nearest Neighbors and Amazon SageMaker Introduction to k-Nearest Neighbors and Amazon SageMaker
Introduction to k-Nearest Neighbors and Amazon SageMaker
 
Spock Framework - Slidecast
Spock Framework - SlidecastSpock Framework - Slidecast
Spock Framework - Slidecast
 
Spock Framework
Spock FrameworkSpock Framework
Spock Framework
 
Feature Engineering - Getting most out of data for predictive models
Feature Engineering - Getting most out of data for predictive modelsFeature Engineering - Getting most out of data for predictive models
Feature Engineering - Getting most out of data for predictive models
 
Synthetic Image Data Generation using GAN &Triple GAN.pptx
Synthetic Image Data Generation using GAN &Triple GAN.pptxSynthetic Image Data Generation using GAN &Triple GAN.pptx
Synthetic Image Data Generation using GAN &Triple GAN.pptx
 
Pointcuts and Analysis
Pointcuts and AnalysisPointcuts and Analysis
Pointcuts and Analysis
 
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
Leveraging R in Big Data of Mobile Ads (R在行動廣告大數據的應用)
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
 
Intelligent Ruby + Machine Learning
Intelligent Ruby + Machine LearningIntelligent Ruby + Machine Learning
Intelligent Ruby + Machine Learning
 
Learning Predictive Modeling with TSA and Kaggle
Learning Predictive Modeling with TSA and KaggleLearning Predictive Modeling with TSA and Kaggle
Learning Predictive Modeling with TSA and Kaggle
 
Implementation of K-Nearest Neighbor Algorithm
Implementation of K-Nearest Neighbor AlgorithmImplementation of K-Nearest Neighbor Algorithm
Implementation of K-Nearest Neighbor Algorithm
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting Spatial
 
Python 03-parameters-graphics.pptx
Python 03-parameters-graphics.pptxPython 03-parameters-graphics.pptx
Python 03-parameters-graphics.pptx
 
cluster(python)
cluster(python)cluster(python)
cluster(python)
 
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...
IRJET- Unabridged Review of Supervised Machine Learning Regression and Classi...
 

Recently uploaded

The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxMalak Abu Hammad
 
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersEnhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersThousandEyes
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxOnBoard
 
How to convert PDF to text with Nanonets
How to convert PDF to text with NanonetsHow to convert PDF to text with Nanonets
How to convert PDF to text with Nanonetsnaman860154
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 
Install Stable Diffusion in windows machine
Install Stable Diffusion in windows machineInstall Stable Diffusion in windows machine
Install Stable Diffusion in windows machinePadma Pradeep
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024BookNet Canada
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024BookNet Canada
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Scott Keck-Warren
 
APIForce Zurich 5 April Automation LPDG
APIForce Zurich 5 April  Automation LPDGAPIForce Zurich 5 April  Automation LPDG
APIForce Zurich 5 April Automation LPDGMarianaLemus7
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitecturePixlogix Infotech
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationRidwan Fadjar
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking MenDelhi Call girls
 
Connect Wave/ connectwave Pitch Deck Presentation
Connect Wave/ connectwave Pitch Deck PresentationConnect Wave/ connectwave Pitch Deck Presentation
Connect Wave/ connectwave Pitch Deck PresentationSlibray Presentation
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsMemoori
 

Recently uploaded (20)

The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptx
 
The transition to renewables in India.pdf
The transition to renewables in India.pdfThe transition to renewables in India.pdf
The transition to renewables in India.pdf
 
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for PartnersEnhancing Worker Digital Experience: A Hands-on Workshop for Partners
Enhancing Worker Digital Experience: A Hands-on Workshop for Partners
 
Maximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptxMaximizing Board Effectiveness 2024 Webinar.pptx
Maximizing Board Effectiveness 2024 Webinar.pptx
 
How to convert PDF to text with Nanonets
How to convert PDF to text with NanonetsHow to convert PDF to text with Nanonets
How to convert PDF to text with Nanonets
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 
Install Stable Diffusion in windows machine
Install Stable Diffusion in windows machineInstall Stable Diffusion in windows machine
Install Stable Diffusion in windows machine
 
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptxE-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
E-Vehicle_Hacking_by_Parul Sharma_null_owasp.pptx
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
Vulnerability_Management_GRC_by Sohang Sengupta.pptx
Vulnerability_Management_GRC_by Sohang Sengupta.pptxVulnerability_Management_GRC_by Sohang Sengupta.pptx
Vulnerability_Management_GRC_by Sohang Sengupta.pptx
 
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
Transcript: New from BookNet Canada for 2024: BNC BiblioShare - Tech Forum 2024
 
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
#StandardsGoals for 2024: What’s new for BISAC - Tech Forum 2024
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food Manufacturing
 
Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024Advanced Test Driven-Development @ php[tek] 2024
Advanced Test Driven-Development @ php[tek] 2024
 
APIForce Zurich 5 April Automation LPDG
APIForce Zurich 5 April  Automation LPDGAPIForce Zurich 5 April  Automation LPDG
APIForce Zurich 5 April Automation LPDG
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC Architecture
 
My Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 PresentationMy Hashitalk Indonesia April 2024 Presentation
My Hashitalk Indonesia April 2024 Presentation
 
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
08448380779 Call Girls In Diplomatic Enclave Women Seeking Men
 
Connect Wave/ connectwave Pitch Deck Presentation
Connect Wave/ connectwave Pitch Deck PresentationConnect Wave/ connectwave Pitch Deck Presentation
Connect Wave/ connectwave Pitch Deck Presentation
 
AI as an Interface for Commercial Buildings
AI as an Interface for Commercial BuildingsAI as an Interface for Commercial Buildings
AI as an Interface for Commercial Buildings
 

ML with Go: KNN Classifier for Diabetes Prediction

  • 1. MACHINE LEARNING WITH GO Golang Bristol - April 2020 James Bowman @jamesebowman
  • 4. Artificial Intelligence Any technique which enables commuters to mimic human behaviour. Machine Learning Subset of AI techniques which use statistical methods to enable machines to ‘learn’ how to carry out tasks without being explicitly programmed how to do them. Deep Learning Subset of ML techniques using multi-layered neural networks (algorithms inspired by the structure and function of the human brain). Typically suited to self- learning and feature extraction. Artificial Intelligence Machine Learning f(x) Deep Learning @jamesebowman
  • 6. @jamesebowman SO WHY GO?!? • Relatively expressive and productive • Strong typing (more explicit) • Performant and Scalable
  • 7. @jamesebowman Supervised Learning Unsupervised Learning Reinforcement Learning Classification Regression Clustering Dimensionality Reduction • House Price Prediction • Demand Forecasting • Image Recognition • Ad Click Prediction • Medical Diagnosis • Spam Filtering • Customer Segmentation • Data Mining • Recommendations • Visualisation • Feature Extraction • Compression • Skill Acquisition • Control Systems • Game AI • Real-time Decisions Machine Learning
  • 8. @jamesebowman BASIC ML WORKFLOW Train Model Historical Data Live Data Training Data Test Data Evaluate Model Deploy/Use Model Performance Metrics Predictions
  • 9. @jamesebowman THE DIABETES DATASET • Prima are a group of native Americans living in Arizona • Highest rate of obesity and diabetes recorded • Study conducted by National Institute of Diabetes and Digestive and Kidney Diseases collected diagnosis data on female patients with the aim of predicting diabetes. # Pregnancies Glucose Blood Pressure SkinThickness Insulin BMI Diabetes Pedigree Function Age Outcome (Class Label) 6 148 72 35 0 33.6 0.627 50 1 1 85 66 29 0 26.6 0.351 31 0 https://www.kaggle.com/uciml/pima-indians-diabetes-database
  • 10. @jamesebowman INTUITIVELY Patients with similar attributes tend to share the same diagnosis
  • 11. @jamesebowman K-NEAREST NEIGHBOURS CLASSIFIER Predicts class (Y) as the average (mode) of the classes for the K most similar (nearest) observations from the training data K=3 0 1 1 Y = Mode of the K nearest observations {0, 1, 1} = 1 0 1 0 0 0 0 0 1 Y
  • 12. @jamesebowman FEATUREVECTORS • Observations (records) can be represented as n-dimensional numerical feature vectors • Feature vectors can be thought of as points in Euclidean space P(x, y) y x P(x, y, z) y x z [ p1 p2] p1 p2 p3 p1 p2 p3 . . . pn n=2 (2D) n=3 (3D) = =
  • 13. @jamesebowman NEAREST NEIGHBOURS • ‘Nearest’ = shortest distance • Where distance uses a formal distance metric • In n dimensional Euclidean space, distance between points p and q is given by Pythagoras formula: d(p, q) = n ∑ i=1 (pi − qi)2 = (p1 − q1)2 + (p2 − q2)2 + . . . + (pn − qn)2 p q d(p, q) p1 - q1 p2-q2
  • 14. LETS GO IMPLEMENT IT (pun intended)
  • 15. @jamesebowman LETS BUILD A MODEL type Predictor interface { Fit(X *mat.Dense, Y []string) Predict(X *mat.Dense) []string } 1. Fit ‘trains’ the model using training data 2. Predict infers the class for the test or live production data
  • 16. @jamesebowman EVALUATE WITH A SIMPLE HARNESS 1. Load the dataset from the CSV file 2. Split the data into training and test sets 3. Train the model with the training data 4. Predict classes for the test data 5. Compare predictions with test data labels to find model accuracy func Evaluate(dsPath string, model Predictor) (float64, error) { records, err := loadFile(dsPath) if err != nil { return 0, err } trainData, trainLabels, testData, testLabels := split(true, records, 0.7) model.Fit(trainData, trainLabels) predictions := model.Predict(testData) return evaluate(predictions, testLabels), nil }
  • 17. @jamesebowman 1. LOADTHE DATASET FROM THE CSV FILE func loadFile(path string) ([][]string, error) { var records [][]string file, err := os.Open(path) if err != nil { return records, err } reader := csv.NewReader(file) return reader.ReadAll() }
  • 18. @jamesebowman 2. SPLITTHE DATA INTO TRAINING ANDTEST SETS func split(header bool, records [][]string, trainProportion float64) (mat.Matrix, []string, mat.Matrix, []string) { if header { records = records[1:] } datasetLength := len(records) indx := make([]int, int(float64(datasetLength)*trainProportion)) r := rnd.New(rnd.NewSource(uint64(47))) sampleuv.WithoutReplacement(indx, datasetLength, r) sort.Ints(indx) trainData := mat.NewDense(len(indx), len(records[0]), nil) trainLabels := make([]string, len(indx)) testData := mat.NewDense(len(records)-len(indx), len(records[0]), nil) testLabels := make([]string, len(records)-len(indx)) var trainind, testind int for i, v := range records { if trainind < len(indx) && i == indx[trainind] { // training set readRecord(trainLabels, trainData, trainind, v) } else { // test set readRecord(testLabels, testData, testind, v) } } return trainData, trainLabels, testData, testLabels }
  • 19. @jamesebowman 2. SPLITTHE DATA INTO TRAINING ANDTEST SETS func readRecord(labels []string, data *mat.Dense, recordNum int, record []string) { labels[recordNum] = record[len(record)-1] for i, v := range record[:len(record)-1] { s, err := strconv.ParseFloat(v, 64) if err != nil { // replace invalid numbers with 0 s = 0 } data.Set(recordNum, i, s) } }
  • 20. @jamesebowman 3.TRAINTHE MODEL WITH THETRAINING DATA type KNNClassifier struct { K int Distance func(a, b mat.Vector) float64 datapoints *mat.Dense classes []string } func (k *KNNClassifier) Fit(X *mat.Dense, Y []string) { k.datapoints = X k.classes = Y }
  • 21. @jamesebowman 4. PREDICT CLASSES FORTHE TEST DATA func (k *KNNClassifier) Predict(X *mat.Dense) []string { r, _ := X.Dims() targets := make([]string, r) distances := make([]float64, len(k.classes)) inds := make([]int, len(k.classes)) for i := 0; i < r; i++ { votes := make(map[string]float64) for j := 0; j < len(k.classes); j++ { distances[j] = k.Distance( k.datapoints.RowView(j), X.RowView(i), ) } floats.Argsort(distances, inds) for n := 0; n < k.K; n++ { votes[k.classes[inds[n]]]++ } var winningCount float64 for k, v := range votes { if v > winningCount { targets[i] = k winningCount = v } } } return targets } 1. For each observation to predict for (row in the matrix): 2. Calculate the distance to every training observation 3. Sort the distances 4. Count the frequency of each class corresponding to the top k closest 5. Determine the highest frequency class
  • 22. @jamesebowman 4. PREDICT CLASSES FORTHE TEST DATA func EuclideanDistance(a, b mat.Vector) float64 { var v mat.VecDense v.SubVec(a, b) return math.Sqrt(mat.Dot(&v, &v)) } = (p1 − q1)2 + (p2 − q2)2 + . . . + (pn − qn)2
  • 23. @jamesebowman 5. COMPARE PREDICTIONS WITHTEST DATA LABELSTO FIND MODEL ACCURACY func evaluate(predictions, labels []string) float64 { var correct float64 for i, v := range labels { if predictions[i] == v { correct++ } } return correct / float64(len(labels)) }