SlideShare a Scribd company logo
1 of 21
pg 1. Implement and demonstratethe FIND-Salgorithm for finding the most specific hypothesis
based on a given set of training data samples. Read the training data from a .CSV file.
import csv
num_attributes=6
a=[]
print("n the given training data setn")
with open('sports.csv','r') as csvfile:
reader=csv.reader(csvfile)
for row in reader:
a.append(row)
print(row)
print("n the initial value of hypothesis: ")
hypothesis=['0']*num_attributes
print(hypothesis)
for j in range(0,num_attributes):
hypothesis[j]=a[0][j]
print("n find s: finding a maximaly specific hypothesisn")
for i in range(0,len(a)):
if a[i][num_attributes]=='yes':
for j in range(0,num_attributes):
if a[i][j]!=hypothesis[j]:
hypothesis[j]='?'
else:
hypothesis[j]=a[i][j]
print("for training instance no:{0} the hypothesis is".format(i),hypothesis)
print("output")
print(hypothesis)
output:
the given training data set
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'Yes']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'Yes']
['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'no']
['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'Yes']
the initial value of hypothesis:
['0', '0', '0', '0', '0', '0']
find s: finding a maximaly specific hypothesis
for training instance no:0 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
for training instance no:1 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
for training instance no:2 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
for training instance no:3 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
output
['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
pg 2:2. For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithmto output a description of the set of all
hypotheses consistent with the training examples.
import csv
a=[]
print("n The Given Training Data Setn")
with open('C:UsersISE-68Desktopws.csv','r') as csvFile:
reader=csv.reader(csvFile)
for row in reader:
a.append(row)
print(row)
num_attributes=len(a[0])-1
print("n The initial value of hypothesis: ")
S=['0']*num_attributes
G=['?']*num_attributes
print("n The most specific hypothesis S0:[0,0,0,0,0,0]n")
print("n The most general hypothesis G0:[?,?,?,?,?,?]n")
#compare with first training example
for j in range(0,num_attributes):
S[j]=a[0][j]
#compare with remaining training examples of given data set
print("n Candidate Elimination algorithm hypotheses version space computationn")
temp=[]
for i in range(0,len(a)):
if a[i][num_attributes]=='Yes':
for j in range(0,num_attributes):
if a[i][j]!=S[j]:
S[j]='?'
for j in range(0,num_attributes):
for k in range(1,len(temp)):
if temp[k][j]!='?' and temp[k][j]!=S[j]:
del temp[k]
print("for Training Example No:{0} the hypothesis is S{0} ".format(i+1),S)
if(len(temp)==0):
print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),G)
else:
print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),temp)
if a[i][num_attributes]=='No':
for j in range(0,num_attributes):
if S[j]!=a[i][j] and S[j]!='?':
G[j]=S[j]
temp.append(G)
G=['?']*num_attributes
print("for Training Example No:{0} the hypothesis is S{0} ".format(i+1),S)
print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),temp)
output:
The Given Training Data Set
['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes']
['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes']
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No']
['Sunny', 'Warm', 'High', 'Strong', 'Cold', 'Change', 'Yes']
The initial value of hypothesis:
The most specific hypothesis S0:[0,0,0,0,0,0]
The most general hypothesis G0:[?,?,?,?,?,?]
Candidate Elimination algorithm hypotheses version space computation
for Training Example No:1 the hypothesis is S1 ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm',
'Same']
for Training Example No:1 the hypothesis is G1 ['?', '?', '?', '?', '?', '?']
for Training Example No:2 the hypothesis is S2 ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
for Training Example No:2 the hypothesis is G2 ['?', '?', '?', '?', '?', '?']
for Training Example No:3 the hypothesis is S3 ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
for Training Example No:3 the hypothesis is G3 [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', 'Same']]
for Training Example No:4 the hypothesis is S4 ['Sunny', 'Warm', '?', 'Strong', '?', '?']
for Training Example No:4 the hypothesis is G4 [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?',
'?', '?']]
pg 3:3. Write a program to demonstrate the working of the decision tree based ID3 algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge toclassify a
new sample
import numpy as np
import math
import csv
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile)
metadata = next(datareader)
traindata=[]
for row in datareader:
traindata.append(row)
return (metadata, traindata)
def subtables(data, col, delete):
dict = {}
items = np.unique(data[:, col]) # get unique values in particular column
count = np.zeros((items.shape[0], 1), dtype=np.int32) #number of row = number of values
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
#count has the data of number of times each value is present in
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
return items, dict
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
counts = np.zeros((items.shape[0], 1))
sums = 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size)
for count in counts:
sums += -1 * count * math.log(count, 2)
return sums
def gain_ratio(data, col):
items, dict = subtables(data, col, delete=False)
#item is the unique value and dict is the data corresponding to it
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
total_entropy = entropy(data[:, -1])
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy
def create_node(data, metadata):
if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])
return node
gains = np.zeros((data.shape[1] - 1, 1))
#size of gains= number of attribute to calculate gain
for col in range(data.shape[1] - 1):
gains[col] = gain_ratio(data, col)
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
items, dict = subtables(data, split, delete=True)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s
def print_tree(node, level):
if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)
metadata, traindata = read_data("tennis.csv")
data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)
output:
Outlook
Overcast
[b'Yes']
Rainy
Windy
b'False'
[b'Yes']
b'True'
[b'No']
Sunny
Humidity
b'High'
[b'No']
b'Normal'
[b'Yes']
pg 4:4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and
test the same using appropriate data sets.
import numpy as np
X = np.array(([2,9],[1,5],[3,6]), dtype=float)
print("X=",X)
y = np.array(([92], [86], [89]), dtype=float)
print("y=",y)
y=y/100
print("y=",y)
def sigmoid (x):
return 1/(1 + np.exp(-x))
def derivatives_sigmoid(x):
return x * (1 - x)
epoch=10000
lr=0.1
inputlayer_neurons = 2
hiddenlayer_neurons = 3
output_neurons = 1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
print("whn",wh)
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
print("bhn",bh)
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
print("woutn",wout)
bout=np.random.uniform(size=(1,output_neurons))
print("boutn",bout)
for i in range(epoch):
hinp1=np.dot(X,wh)
print("nn hinp1n",hinp1)
hinp=hinp1 + bh
print("hinpn",hinp)
hlayer_act = sigmoid(hinp)
print("hlayer_actn",hlayer_act)
outinp1=np.dot(hlayer_act,wout)
print("outinp1n",outinp1)
outinp=outinp1+bout
print("outinpn",outinp)
output=sigmoid(outinp)
print("outputn",output)
EO = y-output
print("EOn",EO)
outgrad = derivatives_sigmoid(output)
print("outgradn",outgrad)
d_output = EO* outgrad
print("d_outputn",d_output)
EH= d_output.dot(wout.T)
print("EHn",EH)
hiddengrad=derivatives_sigmoid(hlayer_act)
print("hiddengradn",hiddengrad)
d_hiddenlayer=EH * hiddengrad
print("d_hiddenlayern",d_hiddenlayer)
wout += hlayer_act.T.dot(d_output) *lr
print("woutn",wout)
bout += np.sum(d_output, axis=0,keepdims=True) *lr
print("boutn",bout)
wh += X.T.dot(d_hiddenlayer) *lr
print("whn",wh)
bh +=np.sum(d_hiddenlayer,axis=0,keepdims=True) *lr
print("bhn",bh)
print("nn input: n" + str(X))
print("actual output: n"+ str(y))
print("predicted output: n", output)
output:
too big
pg 5:5. Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile)
metadata = next(datareader)
traindata=[]
for row in datareader:
traindata.append(row)
return (metadata, traindata)
def splitDataset(dataset, splitRatio): #splits dataset to training set and test set based on
split ratio
trainSize = int(len(dataset) * splitRatio)
trainSet = []
testset = list(dataset)
i=0
while len(trainSet) < trainSize:
trainSet.append(testset.pop(i))
return [trainSet, testset]
def classify(data,test):
total_size = data.shape[0]
print("training data size=",total_size)
print("test data sixe=",test.shape[0])
target=np.unique(data[:,-1])
count = np.zeros((target.shape[0]), dtype=np.int32)
prob = np.zeros((target.shape[0]), dtype=np.float32)
print("target count probability")
for y in range(target.shape[0]):
for x in range(data.shape[0]):
if data[x,data.shape[1]-1] == target[y]:
count[y] += 1
prob[y]=count[y]/total_size # comptes the probability of target
print(target[y],"t",count[y],"t",prob[y])
prob0 = np.zeros((test.shape[1]-1), dtype=np.float32)
prob1 = np.zeros((test.shape[1]-1), dtype=np.float32)
accuracy=0
print("Instance prediction taget")
for t in range(test.shape[0]):
for k in range(test.shape[1]-1): # for each attribute in column
count1=count0=0
for j in range(data.shape[0]):
if test[t,k]== data[j,k] and data[j,data.shape[1]-1]== target[0]:
count0+=1
elif test[t,k]== data[j,k] and data[j,data.shape[1]-1]== target[1]:
count1+=1
prob0[k]= count0/count[0] #Find no probability of each attribute
prob1[k]= count1/count[1] #Find yes probability of each attribute
probno=prob[0]
probyes=prob[1]
for i in range(test.shape[1]-1):
probno=probno*prob0[i]
probyes=probyes*prob1[i]
if probno>probyes: # prediction
predict='No'
else:
predict='Yes'
print(t+1,"t",predict,"t ",test[t,test.shape[1]-1])
if predict== test[t,test.shape[1]-1]: # computing accuracy
accuracy+=1
final_accuracy=(accuracy/test.shape[0])*100
print("accuracy",final_accuracy,"%")
return
metadata, traindata = read_data("tennis.csv")
splitRatio = 0.4
trainingset, testset = splitDataset(traindata, splitRatio)
training=np.array(trainingset)
testing=np.array(testset)
print("------------------Training Data-------------------")
print(trainingset)
print("-------------------Test Data-------------------")
print(testset)
classify(training,testing)
output:
------------------Training Data-------------------
[['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Overcast', 'Hot', 'High',
'False', 'Yes'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'False', 'Yes']]
-------------------Test Data-------------------
[['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Overcast', 'Cool', 'Normal', 'True', 'Yes'], ['Sunny', 'Mild',
'High', 'False', 'No'], ['Sunny', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'Normal', 'False',
'Yes'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Overcast', 'Mild', 'High', 'True', 'Yes'], ['Overcast',
'Hot', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']]
training data size= 5
test data sixe= 9
target count probability
No 2 0.4
Yes 3 0.6
Instance prediction taget
1 Yes No
2 Yes Yes
3 Yes No
4 Yes Yes
5 Yes Yes
6 Yes Yes
7 Yes Yes
8 Yes Yes
9 Yes No
accuracy 66.66666666666666 %
pg 6:6. Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Built-in Java classes/API can be used to write the
program. Calculate the accuracy, precision, and recall for your data set.
import pandas as pd
msg=pd.read_csv('naivetext1.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
#splitting the dataset into train and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)#tabular representation
print(xtrain_dtm) #sparse matrix representation
#Training Naive Bayes (NB) classifier on training data.
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
#printing accuracy metrics
from sklearn import metrics
print('Accuracy metrics')
print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))
output:
Accuracy metrics
Accuracy of the classifer is 0.6
Confusion matrix
[[1 2]
[0 2]]
Recall and Precison
1.0
0.5
pg 7:
7.Write a program to construct aBayesian network considering medical data. Use this model to
demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can
use Java/Python ML library classes/API.
# data analysis, splitting and wrangling
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# machine learning
from sklearn.naive_bayes import GaussianNB
# column names in accordance with feature information
col_names = ['age','sex','chest_pain','blood_pressure',
'serum_cholestoral','fasting_blood_sugar', 'electrocardiographic',
'max_heart_rate','induced_angina','ST_depression',
'slope','no_of_vessels','thal','diagnosis']
# read the file
df = pd.read_csv("heart_disease_dataset.csv", names=col_names, header=None,
na_values="?")
print("Number of records: {}nNumber of variables: {}".format(df.shape[0], df.shape[1]))
# display the first 5 lines my comments
df.head()
df.info()
# extract numeric columns and find categorical ones
numeric_columns = ['serum_cholestoral', 'max_heart_rate', 'age', 'blood_pressure',
'ST_depression']
categorical_columns = [c for c in df.columns if c not in numeric_columns]
print(categorical_columns)
# count values of explained variable
df.diagnosis.value_counts()
# create a boolean vector and map it with corresponding values (True=1, False=0)
df.diagnosis = (df.diagnosis != 0).astype(int)
df.diagnosis.value_counts()
# view of descriptive statistics
df[numeric_columns].describe()
# count ill vs healthy people grouped by sex
df.groupby(['sex','diagnosis'])['diagnosis'].count()
# average number of diagnosed people grouped by number of blood vessels detected by
fluoroscopy
df[['no_of_vessels','diagnosis']].groupby('no_of_vessels').mean()
# show columns having missing values
df.isnull().sum()
# fill missing values with mode
df['no_of_vessels'].fillna(df['no_of_vessels'].mode()[0], inplace=True)
df['thal'].fillna(df['thal'].mode()[0], inplace=True)
# extract the target variable
X, y = df.iloc[:, :-1], df.iloc[:, -1]
print(X.shape)
print(y.shape)
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2606)
print ("train_set_x shape: " + str(X_train.shape))
print ("train_set_y shape: " + str(y_train.shape))
print ("test_set_x shape: " + str(X_test.shape))
print ("test_set_y shape: " + str(y_test.shape))
# scale feature matrices
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = GaussianNB()
# train model
model.fit(X_train,y_train)
# check accuracy and print out the results
fit_accuracy = model.score(X_train, y_train)
test_accuracy = model.score(X_test, y_test)
print(f"Train accuracy: {fit_accuracy:0.2%}")
print(f"Test accuracy: {test_accuracy:0.2%}")
output:
Number of records: 303
Number of variables: 14
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
age 303 non-null float64
sex 303 non-null float64
chest_pain 303 non-null float64
blood_pressure 303 non-null float64
serum_cholestoral 303 non-null float64
fasting_blood_sugar 303 non-null float64
electrocardiographic 303 non-null float64
max_heart_rate 303 non-null float64
induced_angina 303 non-null float64
ST_depression 303 non-null float64
slope 303 non-null float64
no_of_vessels 299 non-null float64
thal 301 non-null float64
diagnosis 303 non-null int64
dtypes: float64(13), int64(1)
memory usage: 33.2 KB
['sex', 'chest_pain', 'fasting_blood_sugar', 'electrocardiographic', 'induced_angina', 'slope',
'no_of_vessels', 'thal', 'diagnosis']
(303, 13)
(303,)
train_set_x shape: (212, 13)
train_set_y shape: (212,)
test_set_x shape: (91, 13)
test_set_y shape: (91,)
Train accuracy: 85.38%
Test accuracy: 86.81%
pg 8:
8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same dataset for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment
on the quality of clustering. You can add Java/Python ML library classes/API in the program.
#from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
data=pd.read_csv("clusterdata.csv")
df1=pd.DataFrame(data)
print(df1)
f1 = df1['Distance_Feature'].values
f2 = df1['Speeding_Feature'].values
X=np.matrix(list(zip(f1,f2)))
plt.plot(1)
plt.subplot(511)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Dataset')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
plt.scatter(f1,f2)
colors = ['b', 'g', 'r']
markers = ['o', 'v', 's']
# create new plot and data for K- means algorithm
plt.plot(2)
ax=plt.subplot(513)
kmeans_model = KMeans(n_clusters=3).fit(X)
for i, l in enumerate(kmeans_model.labels_):
fig1=plt.plot(f1[i], f2[i], color=colors[l],marker=markers[l])
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('K- Means')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
# create new plot and data for gaussian mixture
plt.plot(3)
plt.subplot(515)
gmm=GaussianMixture(n_components=3).fit(X)
labels= gmm.predict(X)
for i, l in enumerate(labels):
plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l])
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Gaussian Mixture')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
plt.show()
output:
Driver_ID Distance_Feature Speeding_Feature
0 3423311935 71.24 28
1 3423313212 52.53 25
2 3423313724 64.54 27
3 3423311373 55.69 22
4 3423310999 54.58 25
5 3423313857 41.91 10
6 3423312432 58.64 20
7 3423311434 52.02 8
8 3423311328 31.25 34
9 3423312488 44.31 19
10 3423311254 49.35 40
11 3423312943 58.07 45
12 3423312536 44.22 22
13 3423311542 55.73 19
14 3423312176 46.63 43
15 3423314176 52.97 32
16 3423314202 46.25 35
17 3423311346 51.55 27
18 3423310666 57.05 26
19 3423313527 58.45 30
20 3423312182 43.42 23
21 3423313590 55.68 37
22 3423312268 55.15 18
pg 9:
9.Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print
both correct and wrong predictions. Java/Python ML library classes can be used for this
problem.
Program 9
#KNN algorithm
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
#from sklearn import metrics
import pandas as pd
import numpy as np
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
#print(iris_data)
#print(iris_labels)
X_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.20)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(x_test)
print('confusion_matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))
output:
confusion_matrix is as follows
[[11 0 0]
[ 0 9 0]
[ 0 1 9]]
Accuracy Metrics
precision recall f1-score support
0 1.00 1.00 1.00 11
1 0.90 1.00 0.95 9
2 1.00 0.90 0.95 10
avg / total 0.97 0.97 0.97 30
pg 10:
10.Implement the non-parametric Locally Weighted Regressionalgorithm in order to fit data
points. Select appropriate data set for your experiment and draw graphs.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np1
def kernel(point,xmat, k):
m,n= np1.shape(xmat)
weights = np1.mat(np1.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np1.shape(xmat)
ypred = np1.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
# load data points
data = pd.read_csv('tips1.csv')
bill = np1.array(data.total_bill)
tip = np1.array(data.tip)
#preparing and add 1 in bill
mbill = np1.mat(bill) # mat treats array as matrix
mtip = np1.mat(tip)
m= np1.shape(mbill)[1]
print("******",m)
one = np1.mat(np1.ones(m))
X= np1.hstack((one.T,mbill.T)) #Stack arrays in sequence horizontally (column wise).
#set k here
ypred = localWeightRegression(X,mtip,2)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='blue')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=1)
plt.xlabel('Total bill')
plt.ylabel('Tip')
#plt.show();
output:
Text(0,0.5,'Tip')

More Related Content

What's hot

Webi Report Function Overview
Webi Report Function OverviewWebi Report Function Overview
Webi Report Function OverviewSrinath Reddy
 
Gentlest Introduction to Tensorflow - Part 2
Gentlest Introduction to Tensorflow - Part 2Gentlest Introduction to Tensorflow - Part 2
Gentlest Introduction to Tensorflow - Part 2Khor SoonHin
 
Python for R developers and data scientists
Python for R developers and data scientistsPython for R developers and data scientists
Python for R developers and data scientistsLambda Tree
 
Gentlest Introduction to Tensorflow
Gentlest Introduction to TensorflowGentlest Introduction to Tensorflow
Gentlest Introduction to TensorflowKhor SoonHin
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization Sourabh Sahu
 
Gentlest Introduction to Tensorflow - Part 3
Gentlest Introduction to Tensorflow - Part 3Gentlest Introduction to Tensorflow - Part 3
Gentlest Introduction to Tensorflow - Part 3Khor SoonHin
 
関数潮流(Function Tendency)
関数潮流(Function Tendency)関数潮流(Function Tendency)
関数潮流(Function Tendency)riue
 
Python For Data Science Cheat Sheet
Python For Data Science Cheat SheetPython For Data Science Cheat Sheet
Python For Data Science Cheat SheetKarlijn Willems
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Dr. Volkan OBAN
 
Probabilistic Programming in Scala
Probabilistic Programming in ScalaProbabilistic Programming in Scala
Probabilistic Programming in ScalaBeScala
 

What's hot (20)

Webi Report Function Overview
Webi Report Function OverviewWebi Report Function Overview
Webi Report Function Overview
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Gentlest Introduction to Tensorflow - Part 2
Gentlest Introduction to Tensorflow - Part 2Gentlest Introduction to Tensorflow - Part 2
Gentlest Introduction to Tensorflow - Part 2
 
Unit 2 dsa LINEAR DATA STRUCTURE
Unit 2 dsa LINEAR DATA STRUCTUREUnit 2 dsa LINEAR DATA STRUCTURE
Unit 2 dsa LINEAR DATA STRUCTURE
 
Python for R developers and data scientists
Python for R developers and data scientistsPython for R developers and data scientists
Python for R developers and data scientists
 
Gentlest Introduction to Tensorflow
Gentlest Introduction to TensorflowGentlest Introduction to Tensorflow
Gentlest Introduction to Tensorflow
 
Functional programming in scala
Functional programming in scalaFunctional programming in scala
Functional programming in scala
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization
 
Gentlest Introduction to Tensorflow - Part 3
Gentlest Introduction to Tensorflow - Part 3Gentlest Introduction to Tensorflow - Part 3
Gentlest Introduction to Tensorflow - Part 3
 
L5 array
L5 arrayL5 array
L5 array
 
Array
ArrayArray
Array
 
Array notes
Array notesArray notes
Array notes
 
関数潮流(Function Tendency)
関数潮流(Function Tendency)関数潮流(Function Tendency)
関数潮流(Function Tendency)
 
cluster(python)
cluster(python)cluster(python)
cluster(python)
 
Python for R users
Python for R usersPython for R users
Python for R users
 
20170509 rand db_lesugent
20170509 rand db_lesugent20170509 rand db_lesugent
20170509 rand db_lesugent
 
Python For Data Science Cheat Sheet
Python For Data Science Cheat SheetPython For Data Science Cheat Sheet
Python For Data Science Cheat Sheet
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
NumPy Refresher
NumPy RefresherNumPy Refresher
NumPy Refresher
 
Probabilistic Programming in Scala
Probabilistic Programming in ScalaProbabilistic Programming in Scala
Probabilistic Programming in Scala
 

Similar to Ml all programs

python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdf
python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdfpython3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdf
python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdfinfo706022
 
fds Practicle 1to 6 program.pdf
fds Practicle 1to 6 program.pdffds Practicle 1to 6 program.pdf
fds Practicle 1to 6 program.pdfGaneshPawar819187
 
ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions Dr. Volkan OBAN
 
design and analysis of algorithm Lab files
design and analysis of algorithm Lab filesdesign and analysis of algorithm Lab files
design and analysis of algorithm Lab filesNitesh Dubey
 
COMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILECOMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILEAnushka Rai
 
ML with python.pdf
ML with python.pdfML with python.pdf
ML with python.pdfn58648017
 
Python Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit TestingPython Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit TestingPython Ireland
 
Presentation on Pandas in _ detail .pptx
Presentation on Pandas in _ detail .pptxPresentation on Pandas in _ detail .pptx
Presentation on Pandas in _ detail .pptx16115yogendraSingh
 
Python programming workshop
Python programming workshopPython programming workshop
Python programming workshopBAINIDA
 
python practicals-solution-2019-20-class-xii.pdf
python practicals-solution-2019-20-class-xii.pdfpython practicals-solution-2019-20-class-xii.pdf
python practicals-solution-2019-20-class-xii.pdfrajatxyz
 
Baby Steps to Machine Learning at DevFest Lagos 2019
Baby Steps to Machine Learning at DevFest Lagos 2019Baby Steps to Machine Learning at DevFest Lagos 2019
Baby Steps to Machine Learning at DevFest Lagos 2019Robert John
 
wk5ppt1_Titanic
wk5ppt1_Titanicwk5ppt1_Titanic
wk5ppt1_TitanicAliciaWei1
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxParveenShaik21
 
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdf
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdfsolution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdf
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdfparthp5150s
 
support vector regression
support vector regressionsupport vector regression
support vector regressionAkhilesh Joshi
 
Machine Learning Algorithms
Machine Learning AlgorithmsMachine Learning Algorithms
Machine Learning AlgorithmsHichem Felouat
 
Unit 4_Working with Graphs _python (2).pptx
Unit 4_Working with Graphs _python (2).pptxUnit 4_Working with Graphs _python (2).pptx
Unit 4_Working with Graphs _python (2).pptxprakashvs7
 

Similar to Ml all programs (20)

python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdf
python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdfpython3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdf
python3 HashTableSolutionmain.pyfrom ChainingHashTable impor.pdf
 
fds Practicle 1to 6 program.pdf
fds Practicle 1to 6 program.pdffds Practicle 1to 6 program.pdf
fds Practicle 1to 6 program.pdf
 
ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions ggtimeseries-->ggplot2 extensions
ggtimeseries-->ggplot2 extensions
 
design and analysis of algorithm Lab files
design and analysis of algorithm Lab filesdesign and analysis of algorithm Lab files
design and analysis of algorithm Lab files
 
Practicle 1.docx
Practicle 1.docxPracticle 1.docx
Practicle 1.docx
 
Aastha Shah.docx
Aastha Shah.docxAastha Shah.docx
Aastha Shah.docx
 
COMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILECOMPUTER SCIENCE CLASS 12 PRACTICAL FILE
COMPUTER SCIENCE CLASS 12 PRACTICAL FILE
 
ML with python.pdf
ML with python.pdfML with python.pdf
ML with python.pdf
 
Python Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit TestingPython Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit Testing
 
Presentation on Pandas in _ detail .pptx
Presentation on Pandas in _ detail .pptxPresentation on Pandas in _ detail .pptx
Presentation on Pandas in _ detail .pptx
 
Python programming workshop
Python programming workshopPython programming workshop
Python programming workshop
 
python practicals-solution-2019-20-class-xii.pdf
python practicals-solution-2019-20-class-xii.pdfpython practicals-solution-2019-20-class-xii.pdf
python practicals-solution-2019-20-class-xii.pdf
 
Baby Steps to Machine Learning at DevFest Lagos 2019
Baby Steps to Machine Learning at DevFest Lagos 2019Baby Steps to Machine Learning at DevFest Lagos 2019
Baby Steps to Machine Learning at DevFest Lagos 2019
 
wk5ppt1_Titanic
wk5ppt1_Titanicwk5ppt1_Titanic
wk5ppt1_Titanic
 
Naïve Bayes.pptx
Naïve Bayes.pptxNaïve Bayes.pptx
Naïve Bayes.pptx
 
Python-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptxPython-for-Data-Analysis.pptx
Python-for-Data-Analysis.pptx
 
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdf
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdfsolution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdf
solution-of-practicals-class-xii-comp.-sci.-083-2021-22 (1).pdf
 
support vector regression
support vector regressionsupport vector regression
support vector regression
 
Machine Learning Algorithms
Machine Learning AlgorithmsMachine Learning Algorithms
Machine Learning Algorithms
 
Unit 4_Working with Graphs _python (2).pptx
Unit 4_Working with Graphs _python (2).pptxUnit 4_Working with Graphs _python (2).pptx
Unit 4_Working with Graphs _python (2).pptx
 

Recently uploaded

B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptx
B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptxB.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptx
B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptxpriyanshujha201
 
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...amitlee9823
 
Call Girls in Gomti Nagar - 7388211116 - With room Service
Call Girls in Gomti Nagar - 7388211116  - With room ServiceCall Girls in Gomti Nagar - 7388211116  - With room Service
Call Girls in Gomti Nagar - 7388211116 - With room Servicediscovermytutordmt
 
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...Aggregage
 
It will be International Nurses' Day on 12 May
It will be International Nurses' Day on 12 MayIt will be International Nurses' Day on 12 May
It will be International Nurses' Day on 12 MayNZSG
 
Monthly Social Media Update April 2024 pptx.pptx
Monthly Social Media Update April 2024 pptx.pptxMonthly Social Media Update April 2024 pptx.pptx
Monthly Social Media Update April 2024 pptx.pptxAndy Lambert
 
Organizational Transformation Lead with Culture
Organizational Transformation Lead with CultureOrganizational Transformation Lead with Culture
Organizational Transformation Lead with CultureSeta Wicaksana
 
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756dollysharma2066
 
A DAY IN THE LIFE OF A SALESMAN / WOMAN
A DAY IN THE LIFE OF A  SALESMAN / WOMANA DAY IN THE LIFE OF A  SALESMAN / WOMAN
A DAY IN THE LIFE OF A SALESMAN / WOMANIlamathiKannappan
 
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756dollysharma2066
 
Dr. Admir Softic_ presentation_Green Club_ENG.pdf
Dr. Admir Softic_ presentation_Green Club_ENG.pdfDr. Admir Softic_ presentation_Green Club_ENG.pdf
Dr. Admir Softic_ presentation_Green Club_ENG.pdfAdmir Softic
 
How to Get Started in Social Media for Art League City
How to Get Started in Social Media for Art League CityHow to Get Started in Social Media for Art League City
How to Get Started in Social Media for Art League CityEric T. Tung
 
VIP Call Girls In Saharaganj ( Lucknow ) 🔝 8923113531 🔝 Cash Payment (COD) 👒
VIP Call Girls In Saharaganj ( Lucknow  ) 🔝 8923113531 🔝  Cash Payment (COD) 👒VIP Call Girls In Saharaganj ( Lucknow  ) 🔝 8923113531 🔝  Cash Payment (COD) 👒
VIP Call Girls In Saharaganj ( Lucknow ) 🔝 8923113531 🔝 Cash Payment (COD) 👒anilsa9823
 
Famous Olympic Siblings from the 21st Century
Famous Olympic Siblings from the 21st CenturyFamous Olympic Siblings from the 21st Century
Famous Olympic Siblings from the 21st Centuryrwgiffor
 
Pharma Works Profile of Karan Communications
Pharma Works Profile of Karan CommunicationsPharma Works Profile of Karan Communications
Pharma Works Profile of Karan Communicationskarancommunications
 
Boost the utilization of your HCL environment by reevaluating use cases and f...
Boost the utilization of your HCL environment by reevaluating use cases and f...Boost the utilization of your HCL environment by reevaluating use cases and f...
Boost the utilization of your HCL environment by reevaluating use cases and f...Roland Driesen
 
Regression analysis: Simple Linear Regression Multiple Linear Regression
Regression analysis:  Simple Linear Regression Multiple Linear RegressionRegression analysis:  Simple Linear Regression Multiple Linear Regression
Regression analysis: Simple Linear Regression Multiple Linear RegressionRavindra Nath Shukla
 
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangalore
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service BangaloreCall Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangalore
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangaloreamitlee9823
 

Recently uploaded (20)

B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptx
B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptxB.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptx
B.COM Unit – 4 ( CORPORATE SOCIAL RESPONSIBILITY ( CSR ).pptx
 
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...
Call Girls Jp Nagar Just Call 👗 7737669865 👗 Top Class Call Girl Service Bang...
 
Call Girls in Gomti Nagar - 7388211116 - With room Service
Call Girls in Gomti Nagar - 7388211116  - With room ServiceCall Girls in Gomti Nagar - 7388211116  - With room Service
Call Girls in Gomti Nagar - 7388211116 - With room Service
 
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...
The Path to Product Excellence: Avoiding Common Pitfalls and Enhancing Commun...
 
It will be International Nurses' Day on 12 May
It will be International Nurses' Day on 12 MayIt will be International Nurses' Day on 12 May
It will be International Nurses' Day on 12 May
 
Monthly Social Media Update April 2024 pptx.pptx
Monthly Social Media Update April 2024 pptx.pptxMonthly Social Media Update April 2024 pptx.pptx
Monthly Social Media Update April 2024 pptx.pptx
 
Organizational Transformation Lead with Culture
Organizational Transformation Lead with CultureOrganizational Transformation Lead with Culture
Organizational Transformation Lead with Culture
 
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Majnu Ka Tilla, Delhi Contact Us 8377877756
 
A DAY IN THE LIFE OF A SALESMAN / WOMAN
A DAY IN THE LIFE OF A  SALESMAN / WOMANA DAY IN THE LIFE OF A  SALESMAN / WOMAN
A DAY IN THE LIFE OF A SALESMAN / WOMAN
 
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756
FULL ENJOY Call Girls In Mahipalpur Delhi Contact Us 8377877756
 
unwanted pregnancy Kit [+918133066128] Abortion Pills IN Dubai UAE Abudhabi
unwanted pregnancy Kit [+918133066128] Abortion Pills IN Dubai UAE Abudhabiunwanted pregnancy Kit [+918133066128] Abortion Pills IN Dubai UAE Abudhabi
unwanted pregnancy Kit [+918133066128] Abortion Pills IN Dubai UAE Abudhabi
 
Dr. Admir Softic_ presentation_Green Club_ENG.pdf
Dr. Admir Softic_ presentation_Green Club_ENG.pdfDr. Admir Softic_ presentation_Green Club_ENG.pdf
Dr. Admir Softic_ presentation_Green Club_ENG.pdf
 
How to Get Started in Social Media for Art League City
How to Get Started in Social Media for Art League CityHow to Get Started in Social Media for Art League City
How to Get Started in Social Media for Art League City
 
VIP Call Girls In Saharaganj ( Lucknow ) 🔝 8923113531 🔝 Cash Payment (COD) 👒
VIP Call Girls In Saharaganj ( Lucknow  ) 🔝 8923113531 🔝  Cash Payment (COD) 👒VIP Call Girls In Saharaganj ( Lucknow  ) 🔝 8923113531 🔝  Cash Payment (COD) 👒
VIP Call Girls In Saharaganj ( Lucknow ) 🔝 8923113531 🔝 Cash Payment (COD) 👒
 
Famous Olympic Siblings from the 21st Century
Famous Olympic Siblings from the 21st CenturyFamous Olympic Siblings from the 21st Century
Famous Olympic Siblings from the 21st Century
 
Pharma Works Profile of Karan Communications
Pharma Works Profile of Karan CommunicationsPharma Works Profile of Karan Communications
Pharma Works Profile of Karan Communications
 
Boost the utilization of your HCL environment by reevaluating use cases and f...
Boost the utilization of your HCL environment by reevaluating use cases and f...Boost the utilization of your HCL environment by reevaluating use cases and f...
Boost the utilization of your HCL environment by reevaluating use cases and f...
 
Regression analysis: Simple Linear Regression Multiple Linear Regression
Regression analysis:  Simple Linear Regression Multiple Linear RegressionRegression analysis:  Simple Linear Regression Multiple Linear Regression
Regression analysis: Simple Linear Regression Multiple Linear Regression
 
Mifty kit IN Salmiya (+918133066128) Abortion pills IN Salmiyah Cytotec pills
Mifty kit IN Salmiya (+918133066128) Abortion pills IN Salmiyah Cytotec pillsMifty kit IN Salmiya (+918133066128) Abortion pills IN Salmiyah Cytotec pills
Mifty kit IN Salmiya (+918133066128) Abortion pills IN Salmiyah Cytotec pills
 
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangalore
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service BangaloreCall Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangalore
Call Girls Hebbal Just Call 👗 7737669865 👗 Top Class Call Girl Service Bangalore
 

Ml all programs

  • 1. pg 1. Implement and demonstratethe FIND-Salgorithm for finding the most specific hypothesis based on a given set of training data samples. Read the training data from a .CSV file. import csv num_attributes=6 a=[] print("n the given training data setn") with open('sports.csv','r') as csvfile: reader=csv.reader(csvfile) for row in reader: a.append(row) print(row) print("n the initial value of hypothesis: ") hypothesis=['0']*num_attributes print(hypothesis) for j in range(0,num_attributes): hypothesis[j]=a[0][j] print("n find s: finding a maximaly specific hypothesisn") for i in range(0,len(a)): if a[i][num_attributes]=='yes': for j in range(0,num_attributes): if a[i][j]!=hypothesis[j]: hypothesis[j]='?' else: hypothesis[j]=a[i][j] print("for training instance no:{0} the hypothesis is".format(i),hypothesis) print("output") print(hypothesis) output: the given training data set ['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'Yes'] ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'Yes'] ['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'no'] ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'Yes'] the initial value of hypothesis: ['0', '0', '0', '0', '0', '0'] find s: finding a maximaly specific hypothesis
  • 2. for training instance no:0 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] for training instance no:1 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] for training instance no:2 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] for training instance no:3 the hypothesis is ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] output ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] pg 2:2. For a given set of training data examples stored in a .CSV file, implement and demonstrate the Candidate-Elimination algorithmto output a description of the set of all hypotheses consistent with the training examples. import csv a=[] print("n The Given Training Data Setn") with open('C:UsersISE-68Desktopws.csv','r') as csvFile: reader=csv.reader(csvFile) for row in reader: a.append(row) print(row) num_attributes=len(a[0])-1 print("n The initial value of hypothesis: ") S=['0']*num_attributes G=['?']*num_attributes print("n The most specific hypothesis S0:[0,0,0,0,0,0]n") print("n The most general hypothesis G0:[?,?,?,?,?,?]n") #compare with first training example for j in range(0,num_attributes): S[j]=a[0][j] #compare with remaining training examples of given data set print("n Candidate Elimination algorithm hypotheses version space computationn") temp=[] for i in range(0,len(a)): if a[i][num_attributes]=='Yes': for j in range(0,num_attributes): if a[i][j]!=S[j]: S[j]='?' for j in range(0,num_attributes):
  • 3. for k in range(1,len(temp)): if temp[k][j]!='?' and temp[k][j]!=S[j]: del temp[k] print("for Training Example No:{0} the hypothesis is S{0} ".format(i+1),S) if(len(temp)==0): print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),G) else: print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),temp) if a[i][num_attributes]=='No': for j in range(0,num_attributes): if S[j]!=a[i][j] and S[j]!='?': G[j]=S[j] temp.append(G) G=['?']*num_attributes print("for Training Example No:{0} the hypothesis is S{0} ".format(i+1),S) print("for Training Example No:{0} the hypothesis is G{0} ".format(i+1),temp) output: The Given Training Data Set ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same', 'Yes'] ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'Yes'] ['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'No'] ['Sunny', 'Warm', 'High', 'Strong', 'Cold', 'Change', 'Yes'] The initial value of hypothesis: The most specific hypothesis S0:[0,0,0,0,0,0] The most general hypothesis G0:[?,?,?,?,?,?] Candidate Elimination algorithm hypotheses version space computation for Training Example No:1 the hypothesis is S1 ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same'] for Training Example No:1 the hypothesis is G1 ['?', '?', '?', '?', '?', '?'] for Training Example No:2 the hypothesis is S2 ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same'] for Training Example No:2 the hypothesis is G2 ['?', '?', '?', '?', '?', '?'] for Training Example No:3 the hypothesis is S3 ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same'] for Training Example No:3 the hypothesis is G3 [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?',
  • 4. '?', '?'], ['?', '?', '?', '?', '?', 'Same']] for Training Example No:4 the hypothesis is S4 ['Sunny', 'Warm', '?', 'Strong', '?', '?'] for Training Example No:4 the hypothesis is G4 [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']] pg 3:3. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge toclassify a new sample import numpy as np import math import csv class Node: def __init__(self, attribute): self.attribute = attribute self.children = [] self.answer = "" def __str__(self): return self.attribute def read_data(filename): with open(filename, 'r') as csvfile: datareader = csv.reader(csvfile) metadata = next(datareader) traindata=[] for row in datareader: traindata.append(row) return (metadata, traindata) def subtables(data, col, delete): dict = {} items = np.unique(data[:, col]) # get unique values in particular column count = np.zeros((items.shape[0], 1), dtype=np.int32) #number of row = number of values for x in range(items.shape[0]): for y in range(data.shape[0]):
  • 5. if data[y, col] == items[x]: count[x] += 1 #count has the data of number of times each value is present in for x in range(items.shape[0]): dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32") pos = 0 for y in range(data.shape[0]): if data[y, col] == items[x]: dict[items[x]][pos] = data[y] pos += 1 if delete: dict[items[x]] = np.delete(dict[items[x]], col, 1) return items, dict def entropy(S): items = np.unique(S) if items.size == 1: return 0 counts = np.zeros((items.shape[0], 1)) sums = 0 for x in range(items.shape[0]): counts[x] = sum(S == items[x]) / (S.size) for count in counts: sums += -1 * count * math.log(count, 2) return sums def gain_ratio(data, col): items, dict = subtables(data, col, delete=False) #item is the unique value and dict is the data corresponding to it total_size = data.shape[0] entropies = np.zeros((items.shape[0], 1)) for x in range(items.shape[0]): ratio = dict[items[x]].shape[0]/(total_size)
  • 6. entropies[x] = ratio * entropy(dict[items[x]][:, -1]) total_entropy = entropy(data[:, -1]) for x in range(entropies.shape[0]): total_entropy -= entropies[x] return total_entropy def create_node(data, metadata): if (np.unique(data[:, -1])).shape[0] == 1: node = Node("") node.answer = np.unique(data[:, -1]) return node gains = np.zeros((data.shape[1] - 1, 1)) #size of gains= number of attribute to calculate gain for col in range(data.shape[1] - 1): gains[col] = gain_ratio(data, col) split = np.argmax(gains) node = Node(metadata[split]) metadata = np.delete(metadata, split, 0) items, dict = subtables(data, split, delete=True) for x in range(items.shape[0]): child = create_node(dict[items[x]], metadata) node.children.append((items[x], child)) return node def empty(size): s = "" for x in range(size): s += " " return s
  • 7. def print_tree(node, level): if node.answer != "": print(empty(level), node.answer) return print(empty(level), node.attribute) for value, n in node.children: print(empty(level + 1), value) print_tree(n, level + 2) metadata, traindata = read_data("tennis.csv") data = np.array(traindata) node = create_node(data, metadata) print_tree(node, 0) output: Outlook Overcast [b'Yes'] Rainy Windy b'False' [b'Yes'] b'True' [b'No'] Sunny Humidity b'High' [b'No'] b'Normal' [b'Yes'] pg 4:4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same using appropriate data sets. import numpy as np X = np.array(([2,9],[1,5],[3,6]), dtype=float) print("X=",X) y = np.array(([92], [86], [89]), dtype=float)
  • 8. print("y=",y) y=y/100 print("y=",y) def sigmoid (x): return 1/(1 + np.exp(-x)) def derivatives_sigmoid(x): return x * (1 - x) epoch=10000 lr=0.1 inputlayer_neurons = 2 hiddenlayer_neurons = 3 output_neurons = 1 wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) print("whn",wh) bh=np.random.uniform(size=(1,hiddenlayer_neurons)) print("bhn",bh) wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) print("woutn",wout) bout=np.random.uniform(size=(1,output_neurons)) print("boutn",bout) for i in range(epoch): hinp1=np.dot(X,wh) print("nn hinp1n",hinp1) hinp=hinp1 + bh print("hinpn",hinp) hlayer_act = sigmoid(hinp) print("hlayer_actn",hlayer_act) outinp1=np.dot(hlayer_act,wout) print("outinp1n",outinp1) outinp=outinp1+bout print("outinpn",outinp) output=sigmoid(outinp) print("outputn",output) EO = y-output print("EOn",EO) outgrad = derivatives_sigmoid(output) print("outgradn",outgrad) d_output = EO* outgrad print("d_outputn",d_output) EH= d_output.dot(wout.T)
  • 9. print("EHn",EH) hiddengrad=derivatives_sigmoid(hlayer_act) print("hiddengradn",hiddengrad) d_hiddenlayer=EH * hiddengrad print("d_hiddenlayern",d_hiddenlayer) wout += hlayer_act.T.dot(d_output) *lr print("woutn",wout) bout += np.sum(d_output, axis=0,keepdims=True) *lr print("boutn",bout) wh += X.T.dot(d_hiddenlayer) *lr print("whn",wh) bh +=np.sum(d_hiddenlayer,axis=0,keepdims=True) *lr print("bhn",bh) print("nn input: n" + str(X)) print("actual output: n"+ str(y)) print("predicted output: n", output) output: too big pg 5:5. Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets. import numpy as np import math import csv def read_data(filename): with open(filename, 'r') as csvfile: datareader = csv.reader(csvfile) metadata = next(datareader) traindata=[] for row in datareader: traindata.append(row) return (metadata, traindata) def splitDataset(dataset, splitRatio): #splits dataset to training set and test set based on split ratio trainSize = int(len(dataset) * splitRatio)
  • 10. trainSet = [] testset = list(dataset) i=0 while len(trainSet) < trainSize: trainSet.append(testset.pop(i)) return [trainSet, testset] def classify(data,test): total_size = data.shape[0] print("training data size=",total_size) print("test data sixe=",test.shape[0]) target=np.unique(data[:,-1]) count = np.zeros((target.shape[0]), dtype=np.int32) prob = np.zeros((target.shape[0]), dtype=np.float32) print("target count probability") for y in range(target.shape[0]): for x in range(data.shape[0]): if data[x,data.shape[1]-1] == target[y]: count[y] += 1 prob[y]=count[y]/total_size # comptes the probability of target print(target[y],"t",count[y],"t",prob[y]) prob0 = np.zeros((test.shape[1]-1), dtype=np.float32) prob1 = np.zeros((test.shape[1]-1), dtype=np.float32) accuracy=0 print("Instance prediction taget") for t in range(test.shape[0]): for k in range(test.shape[1]-1): # for each attribute in column count1=count0=0 for j in range(data.shape[0]): if test[t,k]== data[j,k] and data[j,data.shape[1]-1]== target[0]: count0+=1 elif test[t,k]== data[j,k] and data[j,data.shape[1]-1]== target[1]: count1+=1 prob0[k]= count0/count[0] #Find no probability of each attribute prob1[k]= count1/count[1] #Find yes probability of each attribute probno=prob[0] probyes=prob[1] for i in range(test.shape[1]-1): probno=probno*prob0[i]
  • 11. probyes=probyes*prob1[i] if probno>probyes: # prediction predict='No' else: predict='Yes' print(t+1,"t",predict,"t ",test[t,test.shape[1]-1]) if predict== test[t,test.shape[1]-1]: # computing accuracy accuracy+=1 final_accuracy=(accuracy/test.shape[0])*100 print("accuracy",final_accuracy,"%") return metadata, traindata = read_data("tennis.csv") splitRatio = 0.4 trainingset, testset = splitDataset(traindata, splitRatio) training=np.array(trainingset) testing=np.array(testset) print("------------------Training Data-------------------") print(trainingset) print("-------------------Test Data-------------------") print(testset) classify(training,testing) output: ------------------Training Data------------------- [['Sunny', 'Hot', 'High', 'False', 'No'], ['Sunny', 'Hot', 'High', 'True', 'No'], ['Overcast', 'Hot', 'High', 'False', 'Yes'], ['Rainy', 'Mild', 'High', 'False', 'Yes'], ['Rainy', 'Cool', 'Normal', 'False', 'Yes']] -------------------Test Data------------------- [['Rainy', 'Cool', 'Normal', 'True', 'No'], ['Overcast', 'Cool', 'Normal', 'True', 'Yes'], ['Sunny', 'Mild', 'High', 'False', 'No'], ['Sunny', 'Cool', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'Normal', 'False', 'Yes'], ['Sunny', 'Mild', 'Normal', 'True', 'Yes'], ['Overcast', 'Mild', 'High', 'True', 'Yes'], ['Overcast', 'Hot', 'Normal', 'False', 'Yes'], ['Rainy', 'Mild', 'High', 'True', 'No']] training data size= 5 test data sixe= 9 target count probability
  • 12. No 2 0.4 Yes 3 0.6 Instance prediction taget 1 Yes No 2 Yes Yes 3 Yes No 4 Yes Yes 5 Yes Yes 6 Yes Yes 7 Yes Yes 8 Yes Yes 9 Yes No accuracy 66.66666666666666 % pg 6:6. Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier model to perform this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy, precision, and recall for your data set. import pandas as pd msg=pd.read_csv('naivetext1.csv',names=['message','label']) print('The dimensions of the dataset',msg.shape) msg['labelnum']=msg.label.map({'pos':1,'neg':0}) X=msg.message y=msg.labelnum print(X) print(y) #splitting the dataset into train and test data from sklearn.model_selection import train_test_split xtrain,xtest,ytrain,ytest=train_test_split(X,y) print(xtest.shape) print(xtrain.shape) print(ytest.shape) print(ytrain.shape) #output of count vectoriser is a sparse matrix from sklearn.feature_extraction.text import CountVectorizer count_vect = CountVectorizer() xtrain_dtm = count_vect.fit_transform(xtrain) xtest_dtm=count_vect.transform(xtest)
  • 13. print(count_vect.get_feature_names()) df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names()) print(df)#tabular representation print(xtrain_dtm) #sparse matrix representation #Training Naive Bayes (NB) classifier on training data. from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB().fit(xtrain_dtm,ytrain) predicted = clf.predict(xtest_dtm) #printing accuracy metrics from sklearn import metrics print('Accuracy metrics') print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted)) print('Confusion matrix') print(metrics.confusion_matrix(ytest,predicted)) print('Recall and Precison ') print(metrics.recall_score(ytest,predicted)) print(metrics.precision_score(ytest,predicted)) output: Accuracy metrics Accuracy of the classifer is 0.6 Confusion matrix [[1 2] [0 2]] Recall and Precison 1.0 0.5 pg 7: 7.Write a program to construct aBayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API. # data analysis, splitting and wrangling import pandas as pd import numpy as np
  • 14. from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler # machine learning from sklearn.naive_bayes import GaussianNB # column names in accordance with feature information col_names = ['age','sex','chest_pain','blood_pressure', 'serum_cholestoral','fasting_blood_sugar', 'electrocardiographic', 'max_heart_rate','induced_angina','ST_depression', 'slope','no_of_vessels','thal','diagnosis'] # read the file df = pd.read_csv("heart_disease_dataset.csv", names=col_names, header=None, na_values="?") print("Number of records: {}nNumber of variables: {}".format(df.shape[0], df.shape[1])) # display the first 5 lines my comments df.head() df.info() # extract numeric columns and find categorical ones numeric_columns = ['serum_cholestoral', 'max_heart_rate', 'age', 'blood_pressure', 'ST_depression'] categorical_columns = [c for c in df.columns if c not in numeric_columns] print(categorical_columns) # count values of explained variable df.diagnosis.value_counts() # create a boolean vector and map it with corresponding values (True=1, False=0) df.diagnosis = (df.diagnosis != 0).astype(int) df.diagnosis.value_counts() # view of descriptive statistics df[numeric_columns].describe()
  • 15. # count ill vs healthy people grouped by sex df.groupby(['sex','diagnosis'])['diagnosis'].count() # average number of diagnosed people grouped by number of blood vessels detected by fluoroscopy df[['no_of_vessels','diagnosis']].groupby('no_of_vessels').mean() # show columns having missing values df.isnull().sum() # fill missing values with mode df['no_of_vessels'].fillna(df['no_of_vessels'].mode()[0], inplace=True) df['thal'].fillna(df['thal'].mode()[0], inplace=True) # extract the target variable X, y = df.iloc[:, :-1], df.iloc[:, -1] print(X.shape) print(y.shape) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2606) print ("train_set_x shape: " + str(X_train.shape)) print ("train_set_y shape: " + str(y_train.shape)) print ("test_set_x shape: " + str(X_test.shape)) print ("test_set_y shape: " + str(y_test.shape)) # scale feature matrices scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) model = GaussianNB() # train model model.fit(X_train,y_train) # check accuracy and print out the results fit_accuracy = model.score(X_train, y_train) test_accuracy = model.score(X_test, y_test)
  • 16. print(f"Train accuracy: {fit_accuracy:0.2%}") print(f"Test accuracy: {test_accuracy:0.2%}") output: Number of records: 303 Number of variables: 14 <class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 14 columns): age 303 non-null float64 sex 303 non-null float64 chest_pain 303 non-null float64 blood_pressure 303 non-null float64 serum_cholestoral 303 non-null float64 fasting_blood_sugar 303 non-null float64 electrocardiographic 303 non-null float64 max_heart_rate 303 non-null float64 induced_angina 303 non-null float64 ST_depression 303 non-null float64 slope 303 non-null float64 no_of_vessels 299 non-null float64 thal 301 non-null float64 diagnosis 303 non-null int64 dtypes: float64(13), int64(1) memory usage: 33.2 KB ['sex', 'chest_pain', 'fasting_blood_sugar', 'electrocardiographic', 'induced_angina', 'slope', 'no_of_vessels', 'thal', 'diagnosis'] (303, 13) (303,) train_set_x shape: (212, 13) train_set_y shape: (212,) test_set_x shape: (91, 13) test_set_y shape: (91,) Train accuracy: 85.38% Test accuracy: 86.81%
  • 17. pg 8: 8. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same dataset for clustering using k-Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You can add Java/Python ML library classes/API in the program. #from sklearn import metrics import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.mixture import GaussianMixture from sklearn.cluster import KMeans data=pd.read_csv("clusterdata.csv") df1=pd.DataFrame(data) print(df1) f1 = df1['Distance_Feature'].values f2 = df1['Speeding_Feature'].values X=np.matrix(list(zip(f1,f2))) plt.plot(1) plt.subplot(511) plt.xlim([0, 100]) plt.ylim([0, 50]) plt.title('Dataset') plt.ylabel('speeding_feature') plt.xlabel('distance_feature') plt.scatter(f1,f2) colors = ['b', 'g', 'r'] markers = ['o', 'v', 's'] # create new plot and data for K- means algorithm plt.plot(2) ax=plt.subplot(513) kmeans_model = KMeans(n_clusters=3).fit(X) for i, l in enumerate(kmeans_model.labels_): fig1=plt.plot(f1[i], f2[i], color=colors[l],marker=markers[l])
  • 18. plt.xlim([0, 100]) plt.ylim([0, 50]) plt.title('K- Means') plt.ylabel('speeding_feature') plt.xlabel('distance_feature') # create new plot and data for gaussian mixture plt.plot(3) plt.subplot(515) gmm=GaussianMixture(n_components=3).fit(X) labels= gmm.predict(X) for i, l in enumerate(labels): plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l]) plt.xlim([0, 100]) plt.ylim([0, 50]) plt.title('Gaussian Mixture') plt.ylabel('speeding_feature') plt.xlabel('distance_feature') plt.show() output: Driver_ID Distance_Feature Speeding_Feature 0 3423311935 71.24 28 1 3423313212 52.53 25 2 3423313724 64.54 27 3 3423311373 55.69 22 4 3423310999 54.58 25 5 3423313857 41.91 10 6 3423312432 58.64 20 7 3423311434 52.02 8 8 3423311328 31.25 34 9 3423312488 44.31 19 10 3423311254 49.35 40 11 3423312943 58.07 45 12 3423312536 44.22 22 13 3423311542 55.73 19 14 3423312176 46.63 43 15 3423314176 52.97 32
  • 19. 16 3423314202 46.25 35 17 3423311346 51.55 27 18 3423310666 57.05 26 19 3423313527 58.45 30 20 3423312182 43.42 23 21 3423313590 55.68 37 22 3423312268 55.15 18 pg 9: 9.Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both correct and wrong predictions. Java/Python ML library classes can be used for this problem. Program 9 #KNN algorithm from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import classification_report,confusion_matrix #from sklearn import metrics import pandas as pd import numpy as np from sklearn import datasets iris=datasets.load_iris() iris_data=iris.data iris_labels=iris.target #print(iris_data) #print(iris_labels) X_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.20) classifier=KNeighborsClassifier(n_neighbors=5) classifier.fit(X_train,y_train) y_pred=classifier.predict(x_test) print('confusion_matrix is as follows') print(confusion_matrix(y_test,y_pred)) print('Accuracy Metrics') print(classification_report(y_test,y_pred)) output: confusion_matrix is as follows [[11 0 0] [ 0 9 0]
  • 20. [ 0 1 9]] Accuracy Metrics precision recall f1-score support 0 1.00 1.00 1.00 11 1 0.90 1.00 0.95 9 2 1.00 0.90 0.95 10 avg / total 0.97 0.97 0.97 30 pg 10: 10.Implement the non-parametric Locally Weighted Regressionalgorithm in order to fit data points. Select appropriate data set for your experiment and draw graphs. import matplotlib.pyplot as plt import pandas as pd import numpy as np1 def kernel(point,xmat, k): m,n= np1.shape(xmat) weights = np1.mat(np1.eye((m))) for j in range(m): diff = point - X[j] weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2)) return weights def localWeight(point,xmat,ymat,k): wei = kernel(point,xmat,k) W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T)) return W def localWeightRegression(xmat,ymat,k): m,n = np1.shape(xmat) ypred = np1.zeros(m) for i in range(m): ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k) return ypred # load data points
  • 21. data = pd.read_csv('tips1.csv') bill = np1.array(data.total_bill) tip = np1.array(data.tip) #preparing and add 1 in bill mbill = np1.mat(bill) # mat treats array as matrix mtip = np1.mat(tip) m= np1.shape(mbill)[1] print("******",m) one = np1.mat(np1.ones(m)) X= np1.hstack((one.T,mbill.T)) #Stack arrays in sequence horizontally (column wise). #set k here ypred = localWeightRegression(X,mtip,2) SortIndex = X[:,1].argsort(0) xsort = X[SortIndex][:,0] fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.scatter(bill,tip, color='blue') ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=1) plt.xlabel('Total bill') plt.ylabel('Tip') #plt.show(); output: Text(0,0.5,'Tip')