SlideShare a Scribd company logo
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 1/14
In [1]:
In [2]:
In [3]:
train set shape: (56000, 52)
test set shape: (24000, 51)
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import catboost
from rgf.sklearn import RGFClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
train=pd.read_csv("Datasets/2020 Qualifying competition/Train.csv")
test=pd.read_csv("Datasets/2020 Qualifying competition/Test.csv")
print("train set shape:",train.shape)
print("test set shape:",test.shape)
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 2/14
In [4]:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56000 entries, 0 to 55999
Data columns (total 52 columns):
Applicant_ID 56000 non-null object
form_field1 53471 non-null float64
form_field2 52156 non-null float64
form_field3 55645 non-null float64
form_field4 55645 non-null float64
form_field5 55645 non-null float64
form_field6 42640 non-null float64
form_field7 50837 non-null float64
form_field8 42640 non-null float64
form_field9 47992 non-null float64
form_field10 55645 non-null float64
form_field11 24579 non-null float64
form_field12 46105 non-null float64
form_field13 50111 non-null float64
form_field14 56000 non-null int64
form_field15 33525 non-null float64
form_field16 42964 non-null float64
form_field17 44849 non-null float64
form_field18 45598 non-null float64
form_field19 55996 non-null float64
form_field20 55645 non-null float64
form_field21 40146 non-null float64
form_field22 35600 non-null float64
form_field23 27877 non-null float64
form_field24 42703 non-null float64
form_field25 50550 non-null float64
form_field26 48562 non-null float64
form_field27 46701 non-null float64
form_field28 55645 non-null float64
form_field29 55645 non-null float64
form_field30 30491 non-null float64
form_field31 16592 non-null float64
form_field32 50550 non-null float64
form_field33 54744 non-null float64
form_field34 55645 non-null float64
form_field35 32852 non-null float64
form_field36 54005 non-null float64
form_field37 50550 non-null float64
form_field38 55645 non-null float64
form_field39 51789 non-null float64
form_field40 12271 non-null float64
form_field41 17771 non-null float64
form_field42 54677 non-null float64
form_field43 55432 non-null float64
form_field44 50617 non-null float64
form_field45 24683 non-null float64
form_field46 40096 non-null float64
form_field47 56000 non-null object
form_field48 35111 non-null float64
form_field49 55645 non-null float64
# Information on each of the columns in the dataset
train.info()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 3/14
form_field50 44944 non-null float64
default_status 56000 non-null object
dtypes: float64(48), int64(1), object(3)
memory usage: 22.2+ MB
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 4/14
In [5]:
Out[5]: Applicant_ID object
form_field1 float64
form_field2 float64
form_field3 float64
form_field4 float64
form_field5 float64
form_field6 float64
form_field7 float64
form_field8 float64
form_field9 float64
form_field10 float64
form_field11 float64
form_field12 float64
form_field13 float64
form_field14 int64
form_field15 float64
form_field16 float64
form_field17 float64
form_field18 float64
form_field19 float64
form_field20 float64
form_field21 float64
form_field22 float64
form_field23 float64
form_field24 float64
form_field25 float64
form_field26 float64
form_field27 float64
form_field28 float64
form_field29 float64
form_field30 float64
form_field31 float64
form_field32 float64
form_field33 float64
form_field34 float64
form_field35 float64
form_field36 float64
form_field37 float64
form_field38 float64
form_field39 float64
form_field40 float64
form_field41 float64
form_field42 float64
form_field43 float64
form_field44 float64
form_field45 float64
form_field46 float64
form_field47 object
form_field48 float64
form_field49 float64
form_field50 float64
dtype: object
test.dtypes
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 5/14
In [6]:
Out[6]:
Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f
0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689
1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898
2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 NaN 956
3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044
4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214
5 rows × 52 columns
train.head()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 6/14
In [7]:
Out[7]: Applicant_ID object
form_field1 float64
form_field2 float64
form_field3 float64
form_field4 float64
form_field5 float64
form_field6 float64
form_field7 float64
form_field8 float64
form_field9 float64
form_field10 float64
form_field11 float64
form_field12 float64
form_field13 float64
form_field14 int64
form_field15 float64
form_field16 float64
form_field17 float64
form_field18 float64
form_field19 float64
form_field20 float64
form_field21 float64
form_field22 float64
form_field23 float64
form_field24 float64
form_field25 float64
form_field26 float64
form_field27 float64
form_field28 float64
form_field29 float64
form_field30 float64
form_field31 float64
form_field32 float64
form_field33 float64
form_field34 float64
form_field35 float64
form_field36 float64
form_field37 float64
form_field38 float64
form_field39 float64
form_field40 float64
form_field41 float64
form_field42 float64
form_field43 float64
form_field44 float64
form_field45 float64
form_field46 float64
form_field47 object
form_field48 float64
form_field49 float64
form_field50 float64
default_status object
dtype: object
train.dtypes
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 7/14
In [8]:
Out[8]: Applicant_ID 0
form_field1 2529
form_field2 3844
form_field3 355
form_field4 355
form_field5 355
form_field6 13360
form_field7 5163
form_field8 13360
form_field9 8008
form_field10 355
form_field11 31421
form_field12 9895
form_field13 5889
form_field14 0
form_field15 22475
form_field16 13036
form_field17 11151
form_field18 10402
form_field19 4
form_field20 355
form_field21 15854
form_field22 20400
form_field23 28123
form_field24 13297
form_field25 5450
form_field26 7438
form_field27 9299
form_field28 355
form_field29 355
form_field30 25509
form_field31 39408
form_field32 5450
form_field33 1256
form_field34 355
form_field35 23148
form_field36 1995
form_field37 5450
form_field38 355
form_field39 4211
form_field40 43729
form_field41 38229
form_field42 1323
form_field43 568
form_field44 5383
form_field45 31317
form_field46 15904
form_field47 0
form_field48 20889
form_field49 355
form_field50 11056
default_status 0
dtype: int64
# Exploring the number of missing values(NaN) in each column of the dataset
train.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 8/14
In [9]:
In [10]:
Out[10]: Applicant_ID 0
form_field1 0
form_field2 0
form_field3 0
form_field4 0
form_field5 0
form_field6 0
form_field7 0
form_field8 0
form_field9 0
form_field10 0
form_field11 0
form_field12 0
form_field13 0
form_field14 0
form_field15 0
form_field16 0
form_field17 0
form_field18 0
f fi ld19 0
#fill empty cell(NaN data)with -999
train=train.fillna(-999)
test= test.fillna(-999)
test.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 9/14
In [11]:
Out[11]: Applicant_ID 0
form_field1 0
form_field2 0
form_field3 0
form_field4 0
form_field5 0
form_field6 0
form_field7 0
form_field8 0
form_field9 0
form_field10 0
form_field11 0
form_field12 0
form_field13 0
form_field14 0
form_field15 0
form_field16 0
form_field17 0
form_field18 0
form_field19 0
form_field20 0
form_field21 0
form_field22 0
form_field23 0
form_field24 0
form_field25 0
form_field26 0
form_field27 0
form_field28 0
form_field29 0
form_field30 0
form_field31 0
form_field32 0
form_field33 0
form_field34 0
form_field35 0
form_field36 0
form_field37 0
form_field38 0
form_field39 0
form_field40 0
form_field41 0
form_field42 0
form_field43 0
form_field44 0
form_field45 0
form_field46 0
form_field47 0
form_field48 0
form_field49 0
form_field50 0
default_status 0
dtype: int64
train.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 10/14
In [12]:
In [13]:
In [14]:
In [15]:
Out[12]:
Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f
0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689
1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898
2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956
3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044
4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214
5 rows × 52 columns
Out[15]: 0 0
1 0
2 1
3 0
4 0
..
55995 0
55996 1
55997 0
55998 0
55999 0
Name: default_status, Length: 56000, dtype: int32
train.head()
# Encoding Categorical features
encoder= ('form_field47', 'default_status')
for x in encoder:
le= LabelEncoder()
train[x] =le.fit_transform(train[x].values)
test['form_field47']=le.fit_transform(test['form_field47'].values)
y= train["default_status"]
y
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 11/14
In [16]:
In [17]:
In [18]:
In [19]:
In [20]:
In [21]:
In [23]:
In [24]:
Out[16]:
form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_field7 form_field
0 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689720.0 252072
1 3456.0 0.67400 0.2342 0.0 0.000 0.0 898979.0 497531
2 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956940.0 -9999
3 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044703.0 385499
4 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214728.0 214728
5 rows × 50 columns
# drop appropriate column
train= train.drop(["Applicant_ID","default_status"], axis=1)
test= test.drop(["Applicant_ID"], axis=1)
train.head()
robust= MinMaxScaler()
scaledtrain= robust.fit_transform(train)
scaledtrain= DataFrame(scaledtrain)
scaledtest= robust.transform(test)
scaledtest= DataFrame(scaledtest)
# split input(x) and output(y) data
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3,
#stratify=y,random_state=42)
params= {'n_estimators': 2500,'learning_rate':0.01,'objective': 'CrossEntropy','e
'random_seed': 3500,'verbose': False}
fold= StratifiedKFold(n_splits= 15, shuffle= True,random_state= 999)
from catboost import CatBoostClassifier
model1= CatBoostClassifier(random_state= 499, verbose= False)
model2= RGFClassifier(max_leaf= 1500)
model3= LGBMClassifier(n_estimators= 900, random_state= 499)
localscore, testscore= [], []
model= VotingClassifier(estimators= [('model1', model1), ('model2', model2), ('mo
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 12/14
In [25]:
#########################fold0
[Voting] ................... (1 of 3) Processing model1, total= 36.0s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold1
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.8min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold2
[Voting] ................... (1 of 3) Processing model1, total= 36.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold3
[Voting] ................... (1 of 3) Processing model1, total= 38.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold4
[Voting] ................... (1 of 3) Processing model1, total= 37.6s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 12.2s
#########################fold5
[Voting] ................... (1 of 3) Processing model1, total= 35.5s
[Voting] ................... (2 of 3) Processing model2, total= 1.8min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold6
[Voting] ................... (1 of 3) Processing model1, total= 37.6s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold7
[Voting] ................... (1 of 3) Processing model1, total= 35.9s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.3s
#########################fold8
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold9
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold10
for i, (trainval, valvalue) in enumerate(fold.split(scaledtrain, y)):
print( '#' * 25+ 'fold'+ str(i))
train_x, val_x= scaledtrain.iloc[trainval], scaledtrain.iloc[valvalue]
train_y, val_y= y.iloc[trainval], y.iloc[valvalue]
model.fit(train_x, train_y)
pred= model.predict_proba(val_x)[:,-1]
result= roc_auc_score(val_y, pred)
localscore.append(result)
testpred= model.predict_proba(scaledtest)[:,-1]
testscore.append(testpred)
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 13/14
In [26]:
In [28]:
[Voting] ................... (1 of 3) Processing model1, total= 35.9s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.3s
#########################fold11
[Voting] ................... (1 of 3) Processing model1, total= 37.7s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 12.6s
#########################fold12
[Voting] ................... (1 of 3) Processing model1, total= 35.8s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.4s
#########################fold13
[Voting] ................... (1 of 3) Processing model1, total= 37.1s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.0s
#########################fold14
[Voting] ................... (1 of 3) Processing model1, total= 36.2s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.6s
0.84036499050012
Out[28]:
0 1 2 3 4 5 6 7 8
0 0.837618 0.835608 0.837453 0.856249 0.829797 0.84765 0.836926 0.84438 0.823843 0.85009
print(np.mean(localscore))
output= pd.DataFrame(localscore).T
output.head()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 14/14
In [29]:
In [33]:
In [34]:
In [ ]:
Out[29]:
Applicant_ID default_status
0 Apcnt_1000032 1
1 Apcnt_1000048 1
2 Apcnt_1000052 1
3 Apcnt_1000076 1
4 Apcnt_1000080 1
... ... ...
23995 Apcnt_999940 1
23996 Apcnt_999956 1
23997 Apcnt_999976 1
23998 Apcnt_999984 1
23999 Apcnt_999992 1
24000 rows × 2 columns
submission= pd.read_csv('Datasets/2020 Qualifying Competition/SampleSubmission.cs
submission
test_pred= np.mean(testscore, axis=0)
submission['default_status']= test_pred
submission.to_csv('grace.csv', index= False)

More Related Content

What's hot

FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3
Syahriha Ruslan
 
Computer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paperComputer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paper
Deepak Singh
 
Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021 Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021
FarhanAhmade
 
Sp 1418794917
Sp 1418794917Sp 1418794917
Sp 1418794917
lakshmi r
 
Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)
Poonam Chopra
 
FINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEMFINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEM
Amira Dolce Farhana
 
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTREC Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
jatin batra
 
Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009
Deepak Singh
 
Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)
Poonam Chopra
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
FarhanAhmade
 
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEMFINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
Amira Dolce Farhana
 
Computer Science Sample Paper 2015
Computer Science Sample Paper 2015Computer Science Sample Paper 2015
Computer Science Sample Paper 2015
Poonam Chopra
 
Fp304 DATABASE SYSTEM JUNE 2012
Fp304   DATABASE SYSTEM JUNE 2012Fp304   DATABASE SYSTEM JUNE 2012
Fp304 DATABASE SYSTEM JUNE 2012
Syahriha Ruslan
 
FP304 DATABASE SYSTEM FINAL PAPER
FP304    DATABASE SYSTEM FINAL PAPERFP304    DATABASE SYSTEM FINAL PAPER
FP304 DATABASE SYSTEM FINAL PAPER
Syahriha Ruslan
 
Computer science ms
Computer science msComputer science ms
Computer science ms
B Bhuvanesh
 
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
BGS Institute of Technology, Adichunchanagiri University (ACU)
 
7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers 7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers
BGS Institute of Technology, Adichunchanagiri University (ACU)
 
Oops qb cse
Oops qb cseOops qb cse
7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...
BGS Institute of Technology, Adichunchanagiri University (ACU)
 
5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...
BGS Institute of Technology, Adichunchanagiri University (ACU)
 

What's hot (20)

FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3
 
Computer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paperComputer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paper
 
Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021 Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021
 
Sp 1418794917
Sp 1418794917Sp 1418794917
Sp 1418794917
 
Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)
 
FINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEMFINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEM
 
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTREC Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
 
Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009
 
Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
 
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEMFINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
 
Computer Science Sample Paper 2015
Computer Science Sample Paper 2015Computer Science Sample Paper 2015
Computer Science Sample Paper 2015
 
Fp304 DATABASE SYSTEM JUNE 2012
Fp304   DATABASE SYSTEM JUNE 2012Fp304   DATABASE SYSTEM JUNE 2012
Fp304 DATABASE SYSTEM JUNE 2012
 
FP304 DATABASE SYSTEM FINAL PAPER
FP304    DATABASE SYSTEM FINAL PAPERFP304    DATABASE SYSTEM FINAL PAPER
FP304 DATABASE SYSTEM FINAL PAPER
 
Computer science ms
Computer science msComputer science ms
Computer science ms
 
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
 
7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers 7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers
 
Oops qb cse
Oops qb cseOops qb cse
Oops qb cse
 
7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...
 
5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...
 

Similar to Loan-defaulters-predictions(Python codes)

03 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp0103 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp01
wingsrai
 
Open SQL & Internal Table
Open SQL & Internal TableOpen SQL & Internal Table
Open SQL & Internal Table
sapdocs. info
 
03 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp0203 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp02
tabish
 
Debug Information And Where They Come From
Debug Information And Where They Come FromDebug Information And Where They Come From
Debug Information And Where They Come From
Min-Yih Hsu
 
alexnet.pdf
alexnet.pdfalexnet.pdf
alexnet.pdf
BhautikDaxini1
 
R getting spatial
R getting spatialR getting spatial
R getting spatial
FAO
 
Cics cheat sheet
Cics cheat sheetCics cheat sheet
Cics cheat sheet
Rafi Shaik
 
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
Kamiya Toshihiro
 
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeksBeginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
JinTaek Seo
 
10. R getting spatial
10.  R getting spatial10.  R getting spatial
10. R getting spatial
ExternalEvents
 
JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2
ytoshima
 
Cs practical file
Cs practical fileCs practical file
Cs practical file
Shailendra Garg
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with code
Tim Hong
 
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docxSpring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
rafbolet0
 
Boosting Developer Productivity with Clang
Boosting Developer Productivity with ClangBoosting Developer Productivity with Clang
Boosting Developer Productivity with Clang
Samsung Open Source Group
 
KPMG - TASK 1.pdf
KPMG - TASK 1.pdfKPMG - TASK 1.pdf
KPMG - TASK 1.pdf
Darshana6228
 
CBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paperCBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paper
Knowledge Center Computer
 
Declaring friend function with inline code
Declaring friend function with inline codeDeclaring friend function with inline code
Declaring friend function with inline code
Rajeev Sharan
 
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr KołaczkowskiCassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Modern Data Stack France
 
Part II: LLVM Intermediate Representation
Part II: LLVM Intermediate RepresentationPart II: LLVM Intermediate Representation
Part II: LLVM Intermediate Representation
Wei-Ren Chen
 

Similar to Loan-defaulters-predictions(Python codes) (20)

03 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp0103 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp01
 
Open SQL & Internal Table
Open SQL & Internal TableOpen SQL & Internal Table
Open SQL & Internal Table
 
03 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp0203 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp02
 
Debug Information And Where They Come From
Debug Information And Where They Come FromDebug Information And Where They Come From
Debug Information And Where They Come From
 
alexnet.pdf
alexnet.pdfalexnet.pdf
alexnet.pdf
 
R getting spatial
R getting spatialR getting spatial
R getting spatial
 
Cics cheat sheet
Cics cheat sheetCics cheat sheet
Cics cheat sheet
 
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
 
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeksBeginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
 
10. R getting spatial
10.  R getting spatial10.  R getting spatial
10. R getting spatial
 
JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2
 
Cs practical file
Cs practical fileCs practical file
Cs practical file
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with code
 
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docxSpring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
 
Boosting Developer Productivity with Clang
Boosting Developer Productivity with ClangBoosting Developer Productivity with Clang
Boosting Developer Productivity with Clang
 
KPMG - TASK 1.pdf
KPMG - TASK 1.pdfKPMG - TASK 1.pdf
KPMG - TASK 1.pdf
 
CBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paperCBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paper
 
Declaring friend function with inline code
Declaring friend function with inline codeDeclaring friend function with inline code
Declaring friend function with inline code
 
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr KołaczkowskiCassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
 
Part II: LLVM Intermediate Representation
Part II: LLVM Intermediate RepresentationPart II: LLVM Intermediate Representation
Part II: LLVM Intermediate Representation
 

Recently uploaded

A Study of Variable-Role-based Feature Enrichment in Neural Models of Code
A Study of Variable-Role-based Feature Enrichment in Neural Models of CodeA Study of Variable-Role-based Feature Enrichment in Neural Models of Code
A Study of Variable-Role-based Feature Enrichment in Neural Models of Code
Aftab Hussain
 
E-commerce Application Development Company.pdf
E-commerce Application Development Company.pdfE-commerce Application Development Company.pdf
E-commerce Application Development Company.pdf
Hornet Dynamics
 
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata
 
Transform Your Communication with Cloud-Based IVR Solutions
Transform Your Communication with Cloud-Based IVR SolutionsTransform Your Communication with Cloud-Based IVR Solutions
Transform Your Communication with Cloud-Based IVR Solutions
TheSMSPoint
 
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CDKuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
rodomar2
 
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
mz5nrf0n
 
GraphSummit Paris - The art of the possible with Graph Technology
GraphSummit Paris - The art of the possible with Graph TechnologyGraphSummit Paris - The art of the possible with Graph Technology
GraphSummit Paris - The art of the possible with Graph Technology
Neo4j
 
Fundamentals of Programming and Language Processors
Fundamentals of Programming and Language ProcessorsFundamentals of Programming and Language Processors
Fundamentals of Programming and Language Processors
Rakesh Kumar R
 
Revolutionizing Visual Effects Mastering AI Face Swaps.pdf
Revolutionizing Visual Effects Mastering AI Face Swaps.pdfRevolutionizing Visual Effects Mastering AI Face Swaps.pdf
Revolutionizing Visual Effects Mastering AI Face Swaps.pdf
Undress Baby
 
Oracle Database 19c New Features for DBAs and Developers.pptx
Oracle Database 19c New Features for DBAs and Developers.pptxOracle Database 19c New Features for DBAs and Developers.pptx
Oracle Database 19c New Features for DBAs and Developers.pptx
Remote DBA Services
 
Energy consumption of Database Management - Florina Jonuzi
Energy consumption of Database Management - Florina JonuziEnergy consumption of Database Management - Florina Jonuzi
Energy consumption of Database Management - Florina Jonuzi
Green Software Development
 
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian CompaniesE-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
Quickdice ERP
 
2024 eCommerceDays Toulouse - Sylius 2.0.pdf
2024 eCommerceDays Toulouse - Sylius 2.0.pdf2024 eCommerceDays Toulouse - Sylius 2.0.pdf
2024 eCommerceDays Toulouse - Sylius 2.0.pdf
Łukasz Chruściel
 
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptxLORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
lorraineandreiamcidl
 
Graspan: A Big Data System for Big Code Analysis
Graspan: A Big Data System for Big Code AnalysisGraspan: A Big Data System for Big Code Analysis
Graspan: A Big Data System for Big Code Analysis
Aftab Hussain
 
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, FactsALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
Green Software Development
 
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI AppAI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
Google
 
Need for Speed: Removing speed bumps from your Symfony projects ⚡️
Need for Speed: Removing speed bumps from your Symfony projects ⚡️Need for Speed: Removing speed bumps from your Symfony projects ⚡️
Need for Speed: Removing speed bumps from your Symfony projects ⚡️
Łukasz Chruściel
 
Empowering Growth with Best Software Development Company in Noida - Deuglo
Empowering Growth with Best Software  Development Company in Noida - DeugloEmpowering Growth with Best Software  Development Company in Noida - Deuglo
Empowering Growth with Best Software Development Company in Noida - Deuglo
Deuglo Infosystem Pvt Ltd
 
Microservice Teams - How the cloud changes the way we work
Microservice Teams - How the cloud changes the way we workMicroservice Teams - How the cloud changes the way we work
Microservice Teams - How the cloud changes the way we work
Sven Peters
 

Recently uploaded (20)

A Study of Variable-Role-based Feature Enrichment in Neural Models of Code
A Study of Variable-Role-based Feature Enrichment in Neural Models of CodeA Study of Variable-Role-based Feature Enrichment in Neural Models of Code
A Study of Variable-Role-based Feature Enrichment in Neural Models of Code
 
E-commerce Application Development Company.pdf
E-commerce Application Development Company.pdfE-commerce Application Development Company.pdf
E-commerce Application Development Company.pdf
 
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024
 
Transform Your Communication with Cloud-Based IVR Solutions
Transform Your Communication with Cloud-Based IVR SolutionsTransform Your Communication with Cloud-Based IVR Solutions
Transform Your Communication with Cloud-Based IVR Solutions
 
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CDKuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
 
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
 
GraphSummit Paris - The art of the possible with Graph Technology
GraphSummit Paris - The art of the possible with Graph TechnologyGraphSummit Paris - The art of the possible with Graph Technology
GraphSummit Paris - The art of the possible with Graph Technology
 
Fundamentals of Programming and Language Processors
Fundamentals of Programming and Language ProcessorsFundamentals of Programming and Language Processors
Fundamentals of Programming and Language Processors
 
Revolutionizing Visual Effects Mastering AI Face Swaps.pdf
Revolutionizing Visual Effects Mastering AI Face Swaps.pdfRevolutionizing Visual Effects Mastering AI Face Swaps.pdf
Revolutionizing Visual Effects Mastering AI Face Swaps.pdf
 
Oracle Database 19c New Features for DBAs and Developers.pptx
Oracle Database 19c New Features for DBAs and Developers.pptxOracle Database 19c New Features for DBAs and Developers.pptx
Oracle Database 19c New Features for DBAs and Developers.pptx
 
Energy consumption of Database Management - Florina Jonuzi
Energy consumption of Database Management - Florina JonuziEnergy consumption of Database Management - Florina Jonuzi
Energy consumption of Database Management - Florina Jonuzi
 
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian CompaniesE-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
 
2024 eCommerceDays Toulouse - Sylius 2.0.pdf
2024 eCommerceDays Toulouse - Sylius 2.0.pdf2024 eCommerceDays Toulouse - Sylius 2.0.pdf
2024 eCommerceDays Toulouse - Sylius 2.0.pdf
 
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptxLORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
LORRAINE ANDREI_LEQUIGAN_HOW TO USE WHATSAPP.pptx
 
Graspan: A Big Data System for Big Code Analysis
Graspan: A Big Data System for Big Code AnalysisGraspan: A Big Data System for Big Code Analysis
Graspan: A Big Data System for Big Code Analysis
 
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, FactsALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
 
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI AppAI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
AI Fusion Buddy Review: Brand New, Groundbreaking Gemini-Powered AI App
 
Need for Speed: Removing speed bumps from your Symfony projects ⚡️
Need for Speed: Removing speed bumps from your Symfony projects ⚡️Need for Speed: Removing speed bumps from your Symfony projects ⚡️
Need for Speed: Removing speed bumps from your Symfony projects ⚡️
 
Empowering Growth with Best Software Development Company in Noida - Deuglo
Empowering Growth with Best Software  Development Company in Noida - DeugloEmpowering Growth with Best Software  Development Company in Noida - Deuglo
Empowering Growth with Best Software Development Company in Noida - Deuglo
 
Microservice Teams - How the cloud changes the way we work
Microservice Teams - How the cloud changes the way we workMicroservice Teams - How the cloud changes the way we work
Microservice Teams - How the cloud changes the way we work
 

Loan-defaulters-predictions(Python codes)

  • 1. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 1/14 In [1]: In [2]: In [3]: train set shape: (56000, 52) test set shape: (24000, 51) import pandas as pd import numpy as np from pandas import DataFrame from sklearn.metrics import roc_auc_score from sklearn.preprocessing import LabelEncoder, MinMaxScaler import catboost from rgf.sklearn import RGFClassifier from lightgbm import LGBMClassifier from sklearn.ensemble import VotingClassifier, RandomForestClassifier from sklearn.model_selection import cross_val_score, StratifiedKFold train=pd.read_csv("Datasets/2020 Qualifying competition/Train.csv") test=pd.read_csv("Datasets/2020 Qualifying competition/Test.csv") print("train set shape:",train.shape) print("test set shape:",test.shape)
  • 2. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 2/14 In [4]: <class 'pandas.core.frame.DataFrame'> RangeIndex: 56000 entries, 0 to 55999 Data columns (total 52 columns): Applicant_ID 56000 non-null object form_field1 53471 non-null float64 form_field2 52156 non-null float64 form_field3 55645 non-null float64 form_field4 55645 non-null float64 form_field5 55645 non-null float64 form_field6 42640 non-null float64 form_field7 50837 non-null float64 form_field8 42640 non-null float64 form_field9 47992 non-null float64 form_field10 55645 non-null float64 form_field11 24579 non-null float64 form_field12 46105 non-null float64 form_field13 50111 non-null float64 form_field14 56000 non-null int64 form_field15 33525 non-null float64 form_field16 42964 non-null float64 form_field17 44849 non-null float64 form_field18 45598 non-null float64 form_field19 55996 non-null float64 form_field20 55645 non-null float64 form_field21 40146 non-null float64 form_field22 35600 non-null float64 form_field23 27877 non-null float64 form_field24 42703 non-null float64 form_field25 50550 non-null float64 form_field26 48562 non-null float64 form_field27 46701 non-null float64 form_field28 55645 non-null float64 form_field29 55645 non-null float64 form_field30 30491 non-null float64 form_field31 16592 non-null float64 form_field32 50550 non-null float64 form_field33 54744 non-null float64 form_field34 55645 non-null float64 form_field35 32852 non-null float64 form_field36 54005 non-null float64 form_field37 50550 non-null float64 form_field38 55645 non-null float64 form_field39 51789 non-null float64 form_field40 12271 non-null float64 form_field41 17771 non-null float64 form_field42 54677 non-null float64 form_field43 55432 non-null float64 form_field44 50617 non-null float64 form_field45 24683 non-null float64 form_field46 40096 non-null float64 form_field47 56000 non-null object form_field48 35111 non-null float64 form_field49 55645 non-null float64 # Information on each of the columns in the dataset train.info()
  • 3. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 3/14 form_field50 44944 non-null float64 default_status 56000 non-null object dtypes: float64(48), int64(1), object(3) memory usage: 22.2+ MB
  • 4. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 4/14 In [5]: Out[5]: Applicant_ID object form_field1 float64 form_field2 float64 form_field3 float64 form_field4 float64 form_field5 float64 form_field6 float64 form_field7 float64 form_field8 float64 form_field9 float64 form_field10 float64 form_field11 float64 form_field12 float64 form_field13 float64 form_field14 int64 form_field15 float64 form_field16 float64 form_field17 float64 form_field18 float64 form_field19 float64 form_field20 float64 form_field21 float64 form_field22 float64 form_field23 float64 form_field24 float64 form_field25 float64 form_field26 float64 form_field27 float64 form_field28 float64 form_field29 float64 form_field30 float64 form_field31 float64 form_field32 float64 form_field33 float64 form_field34 float64 form_field35 float64 form_field36 float64 form_field37 float64 form_field38 float64 form_field39 float64 form_field40 float64 form_field41 float64 form_field42 float64 form_field43 float64 form_field44 float64 form_field45 float64 form_field46 float64 form_field47 object form_field48 float64 form_field49 float64 form_field50 float64 dtype: object test.dtypes
  • 5. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 5/14 In [6]: Out[6]: Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f 0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689 1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898 2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 NaN 956 3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044 4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214 5 rows × 52 columns train.head()
  • 6. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 6/14 In [7]: Out[7]: Applicant_ID object form_field1 float64 form_field2 float64 form_field3 float64 form_field4 float64 form_field5 float64 form_field6 float64 form_field7 float64 form_field8 float64 form_field9 float64 form_field10 float64 form_field11 float64 form_field12 float64 form_field13 float64 form_field14 int64 form_field15 float64 form_field16 float64 form_field17 float64 form_field18 float64 form_field19 float64 form_field20 float64 form_field21 float64 form_field22 float64 form_field23 float64 form_field24 float64 form_field25 float64 form_field26 float64 form_field27 float64 form_field28 float64 form_field29 float64 form_field30 float64 form_field31 float64 form_field32 float64 form_field33 float64 form_field34 float64 form_field35 float64 form_field36 float64 form_field37 float64 form_field38 float64 form_field39 float64 form_field40 float64 form_field41 float64 form_field42 float64 form_field43 float64 form_field44 float64 form_field45 float64 form_field46 float64 form_field47 object form_field48 float64 form_field49 float64 form_field50 float64 default_status object dtype: object train.dtypes
  • 7. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 7/14 In [8]: Out[8]: Applicant_ID 0 form_field1 2529 form_field2 3844 form_field3 355 form_field4 355 form_field5 355 form_field6 13360 form_field7 5163 form_field8 13360 form_field9 8008 form_field10 355 form_field11 31421 form_field12 9895 form_field13 5889 form_field14 0 form_field15 22475 form_field16 13036 form_field17 11151 form_field18 10402 form_field19 4 form_field20 355 form_field21 15854 form_field22 20400 form_field23 28123 form_field24 13297 form_field25 5450 form_field26 7438 form_field27 9299 form_field28 355 form_field29 355 form_field30 25509 form_field31 39408 form_field32 5450 form_field33 1256 form_field34 355 form_field35 23148 form_field36 1995 form_field37 5450 form_field38 355 form_field39 4211 form_field40 43729 form_field41 38229 form_field42 1323 form_field43 568 form_field44 5383 form_field45 31317 form_field46 15904 form_field47 0 form_field48 20889 form_field49 355 form_field50 11056 default_status 0 dtype: int64 # Exploring the number of missing values(NaN) in each column of the dataset train.isnull().sum()
  • 8. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 8/14 In [9]: In [10]: Out[10]: Applicant_ID 0 form_field1 0 form_field2 0 form_field3 0 form_field4 0 form_field5 0 form_field6 0 form_field7 0 form_field8 0 form_field9 0 form_field10 0 form_field11 0 form_field12 0 form_field13 0 form_field14 0 form_field15 0 form_field16 0 form_field17 0 form_field18 0 f fi ld19 0 #fill empty cell(NaN data)with -999 train=train.fillna(-999) test= test.fillna(-999) test.isnull().sum()
  • 9. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 9/14 In [11]: Out[11]: Applicant_ID 0 form_field1 0 form_field2 0 form_field3 0 form_field4 0 form_field5 0 form_field6 0 form_field7 0 form_field8 0 form_field9 0 form_field10 0 form_field11 0 form_field12 0 form_field13 0 form_field14 0 form_field15 0 form_field16 0 form_field17 0 form_field18 0 form_field19 0 form_field20 0 form_field21 0 form_field22 0 form_field23 0 form_field24 0 form_field25 0 form_field26 0 form_field27 0 form_field28 0 form_field29 0 form_field30 0 form_field31 0 form_field32 0 form_field33 0 form_field34 0 form_field35 0 form_field36 0 form_field37 0 form_field38 0 form_field39 0 form_field40 0 form_field41 0 form_field42 0 form_field43 0 form_field44 0 form_field45 0 form_field46 0 form_field47 0 form_field48 0 form_field49 0 form_field50 0 default_status 0 dtype: int64 train.isnull().sum()
  • 10. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 10/14 In [12]: In [13]: In [14]: In [15]: Out[12]: Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f 0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689 1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898 2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956 3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044 4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214 5 rows × 52 columns Out[15]: 0 0 1 0 2 1 3 0 4 0 .. 55995 0 55996 1 55997 0 55998 0 55999 0 Name: default_status, Length: 56000, dtype: int32 train.head() # Encoding Categorical features encoder= ('form_field47', 'default_status') for x in encoder: le= LabelEncoder() train[x] =le.fit_transform(train[x].values) test['form_field47']=le.fit_transform(test['form_field47'].values) y= train["default_status"] y
  • 11. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 11/14 In [16]: In [17]: In [18]: In [19]: In [20]: In [21]: In [23]: In [24]: Out[16]: form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_field7 form_field 0 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689720.0 252072 1 3456.0 0.67400 0.2342 0.0 0.000 0.0 898979.0 497531 2 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956940.0 -9999 3 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044703.0 385499 4 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214728.0 214728 5 rows × 50 columns # drop appropriate column train= train.drop(["Applicant_ID","default_status"], axis=1) test= test.drop(["Applicant_ID"], axis=1) train.head() robust= MinMaxScaler() scaledtrain= robust.fit_transform(train) scaledtrain= DataFrame(scaledtrain) scaledtest= robust.transform(test) scaledtest= DataFrame(scaledtest) # split input(x) and output(y) data #from sklearn.model_selection import train_test_split #X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3, #stratify=y,random_state=42) params= {'n_estimators': 2500,'learning_rate':0.01,'objective': 'CrossEntropy','e 'random_seed': 3500,'verbose': False} fold= StratifiedKFold(n_splits= 15, shuffle= True,random_state= 999) from catboost import CatBoostClassifier model1= CatBoostClassifier(random_state= 499, verbose= False) model2= RGFClassifier(max_leaf= 1500) model3= LGBMClassifier(n_estimators= 900, random_state= 499) localscore, testscore= [], [] model= VotingClassifier(estimators= [('model1', model1), ('model2', model2), ('mo
  • 12. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 12/14 In [25]: #########################fold0 [Voting] ................... (1 of 3) Processing model1, total= 36.0s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold1 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.8min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold2 [Voting] ................... (1 of 3) Processing model1, total= 36.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold3 [Voting] ................... (1 of 3) Processing model1, total= 38.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold4 [Voting] ................... (1 of 3) Processing model1, total= 37.6s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 12.2s #########################fold5 [Voting] ................... (1 of 3) Processing model1, total= 35.5s [Voting] ................... (2 of 3) Processing model2, total= 1.8min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold6 [Voting] ................... (1 of 3) Processing model1, total= 37.6s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold7 [Voting] ................... (1 of 3) Processing model1, total= 35.9s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.3s #########################fold8 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold9 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold10 for i, (trainval, valvalue) in enumerate(fold.split(scaledtrain, y)): print( '#' * 25+ 'fold'+ str(i)) train_x, val_x= scaledtrain.iloc[trainval], scaledtrain.iloc[valvalue] train_y, val_y= y.iloc[trainval], y.iloc[valvalue] model.fit(train_x, train_y) pred= model.predict_proba(val_x)[:,-1] result= roc_auc_score(val_y, pred) localscore.append(result) testpred= model.predict_proba(scaledtest)[:,-1] testscore.append(testpred)
  • 13. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 13/14 In [26]: In [28]: [Voting] ................... (1 of 3) Processing model1, total= 35.9s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.3s #########################fold11 [Voting] ................... (1 of 3) Processing model1, total= 37.7s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 12.6s #########################fold12 [Voting] ................... (1 of 3) Processing model1, total= 35.8s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.4s #########################fold13 [Voting] ................... (1 of 3) Processing model1, total= 37.1s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.0s #########################fold14 [Voting] ................... (1 of 3) Processing model1, total= 36.2s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.6s 0.84036499050012 Out[28]: 0 1 2 3 4 5 6 7 8 0 0.837618 0.835608 0.837453 0.856249 0.829797 0.84765 0.836926 0.84438 0.823843 0.85009 print(np.mean(localscore)) output= pd.DataFrame(localscore).T output.head()
  • 14. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 14/14 In [29]: In [33]: In [34]: In [ ]: Out[29]: Applicant_ID default_status 0 Apcnt_1000032 1 1 Apcnt_1000048 1 2 Apcnt_1000052 1 3 Apcnt_1000076 1 4 Apcnt_1000080 1 ... ... ... 23995 Apcnt_999940 1 23996 Apcnt_999956 1 23997 Apcnt_999976 1 23998 Apcnt_999984 1 23999 Apcnt_999992 1 24000 rows × 2 columns submission= pd.read_csv('Datasets/2020 Qualifying Competition/SampleSubmission.cs submission test_pred= np.mean(testscore, axis=0) submission['default_status']= test_pred submission.to_csv('grace.csv', index= False)