PROC CONTENTS DATA=ATIFRAZA.PRACTICE;
RUN;
PROC MEANS DATA=ATIFRAZA.PRACTICE NMISS N MEAN STD SKEW KURT MIN
MAX CSS MAXDEC=3;
RUN;
DATA ATIFRAZA.PRACTICE;
SET ATIFRAZA.PRACTICE;
LABEL INVOICE = 'Pricce';
RUN;
PROC CONTENTS DATA=ATIFRAZA.PRACTICE;
RUN;
PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE;
VAR CYLINDERS;
RUN;
PROC MEANS DATA=ATIFRAZA.PRACTICE NOPRINT;
VAR INVOICE CYLINDERS;
OUTPUT OUT=ATIFRAZA.abc
MAX = MaxInv MaxCyl
MAXID(INVOICE(MODEL) CYLINDERS(MODEL)) = MaxInvoice MaxCylinders
MIN = MinInv MinCyl
MINID(INVOICE(MODEL) CYLINDERS(MODEL)) = MinInvoice MinCylinders
;
RUN;
PROC PRINT DATA=ATIFRAZA.PRACTICE;
RUN;
CREATING CONTINGENCY TABLE
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE TYPE*ORIGIN/CHISQ;
TABLE TYPE*ORIGIN/ PLOT=FREQPLOT(TYPE=DOT);
TITLE 'TYPE AND ORIGIN TABLE';
RUN;
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE MAKE ORIGIN;
RUN;
PROC FREQ AND PLOT
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE MAKE/ PLOT=FREQPLOT(TYPE=DOT);
WEIGHT CYLINDERS;
RUN;
proc print data=atifraza.practice;
run;
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE Type*Origin/CHISQ;
TABLE Type*Origin/ PLOT=FREQPLOT(TYPE=DOT);
WEIGHT INVOICE;
TITLE 'TYPE AND ORIGIN TABLE';
RUN;
TIMED MEANS AND WINSORIZED
PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE
WINSORIZED=10
TRIMMED=10
ROBUSTSCALE;
VAR CYLINDERS;
RUN;
STANDARDIZE DATA
PROC STDIZE DATA=atifraza.practice
OUT=ATIFRAZA.PRACTICEOK
REPONLY MISSING=MEAN;
VAR CYLINDERS;
BY NOTSORTED TYPE;
RUN;
PROC CORR DATA=atifraza.practiceOK pearson
PLOTS=matriX(HISTOGRAM);
RUN;
/* MERGING TWO TABLES */
data class;
input Name $ 1-25 Year $ 26-34 Major $ 36-50;
datalines;
Abbott, Jennifer first
Carter, Tom third Theater
Mendoza, Elissa fourth Mathematics
Tucker, Rachel first
Uhl, Roland second
Wacenske, Maurice third Theater
;
run;
proc print data=class;
title 'Acting Class Roster';
run;
data GRADE;
input Name $ 1-25 Year $ 26-34 GRADE $ 36-50;
datalines;
Abbott, Jenni first D
Carter, Tom third T
Mendoza, fourth M
Tucker, Ra first A
Uhl, Roland second B
Wacenske, Maurice third C
;
run;
proc print data=GRADE;
title 'Acting Class Roster';
run;
proc sort data=class;
by major;
run;
DATA MERGED;
MERGE CLASS(drop=year) GRADE (drop=year
RENAME=(NAME=ndName));
by major;
RUN;
PROC PRINT DATA=MERGED;
RUN;
/* adding engines of only chevrolet in make using where
statement */
proc sort data=ATIFRAZA.PRACTICE out=WORK.SORTTEMP;
where MAKE='Chevrolet';
;
by Make;
run;
proc print data=WORK.SORTTEMP label;
var Cylinders;
by Make;
sum Cylinders;
run;
proc delete data=work.SORTTEMP;
run;
/* ADDING A NEW VARIABLE WITH THE HELP OR PREVIOUS VARIABLES
ALSO DROPPING AND RENAMING IN SAME COMMAND */
DATA DUMMY;
SET ATIFRAZA.PRACTICE (rename=(Horsepower=HP) DROP=TYPE
MAKE ORIGIN);
AVERAGE=(INVOICE+MSRP)/2;
RUN;
/* CREATING A NEW VARIABLE AND SETTING ITS VALUE ACCORDING TO
SCORE IN ANOTHER VARIABLE */
DATA TESTOFRANKVARIABLE;
SET ATIFRAZA.PRACTICE;
if CYLINDERS>8 then STATUS="LARGE";
if CYLINDERS<=8 then STATUS="MEDIUM";
if CYLINDERS<=6 then STATUS="SMALL";
if CYLINDERS=. then STATUS="UNKNOWN";
RUN;
/* TOTAL (MEAN STD) BY CLASS I.E. TOTAL OF ITEMS BY CLASSES */
proc means data=atifraza.cake;
var PresentScore TasteScore;
class flavor;
output out=Cake_Desc mean=Avg_PS Avg_TS Std=SD_PS SD_TS
SUM=TOTALPS TOTALTS;
run;
/* SCANNING A VARIABLE AND THEN PUTTING NAME INVESTED */
DATA ATIFRAZA.MARKS;
INPUT S_NO $ NAME $10. DATE MMDDYY10. MARKS GRADE $;
FORMAT DATE MMDDYY10.;
DATALINES;
01 ALI RAZA 09122000 80 A
02 AHMED KHAN 02242011 75 B
03 BINA ALI 08302006 60 C
04 MARIA ZIA 07212004 89 A
;
RUN;
DATA ATIFRAZA.MARKS2;
SET ATIFRAZA.MARKS;
F=SCAN(NAME,+1);
L=SCAN(NAME,+2);
NAMES=L||(",")||F;
RUN;
/*PANEL DATA*/
/* create lags and differentials in panel data */
PROC SORT DATA=ATIFRAZA.CPII;
BY COUNTRY;
RUN;
PROC PRINT DATA=ATIFRAZA.CPII;
RUN;
PROC TRANSPOSE DATA=ATIFRAZA.CPII OUT=ATIFRAZA.CPIITRANS;
BY COUNTRY;
RUN;
DATA ATIFRAZA.CPITRANS (RENAME=(COL1=CPI _NAME_=YEARS));
SET ATIFRAZA.CPIITRANS;
RUN;
DATA ATIFRAZA.ESPAK;
SET ATIFRAZA.CPITRANS;
IF COUNTRY='PAK';
/* CREATING DIFFERENCIAL AND LAGS IN A TIME SERIES DATA */
LAG_PK =LAG(CPI);
DCPI_PK =DIF(CPI);
RUN;
PROC PANEL DATA=ATIFRAZA.CPITRANS;
ID COUNTRY YEARS;
LAG CPI(1)/OUT=ATIFRAZA.LAGPANEL;
RUN;
/* SAAD FILE */
/*to generate data*/
data practice.sales;
input date mmddyy10. name $ 12-15 sales 17-22 expense 24-29;
datalines;
02/01/2010 ABC 10000 9000
03/01/2010 DEF 20000 19000
04/01/2010 JIK 15000 12000
;
run;
/*to add new variable*/
data practice.sales;
set practice.sales;
loss=sales-expense;
run;
/*to define format*/
proc print data=practice.sales;
format date mmddyy10. sales dollar. expense dollar. loss dollar.;
run;
/*to identify the contents*/
proc contents data=practice.sales;
run;
/*to define labels*/
data practice.sales;
set practice.sales;
label name='Person name';
label sales= 'Sales ($)';
label expense= 'Expense ($)';
label loss='Loss ($)';
run;
proc contents data=practice.sales;
run;
/*for descriptive analysis*/
proc means data=practice.sales;
run;
/*for selective descriptive analysis*/
proc means data=practice.sales noprint n missing mean STD skew kurt
nmiss max min;
var sales expense loss;
output out=meansales;
run;
proc print data=meansales;
run;
/*for frequency analysis*/
proc freq data=practice.sales;
table sales;
run;
/*cross tabulation*/
proc freq data=practice.sales;
table sales*expense;
run;
/*to generate new variables using existing ones*/
data practice.tsales;
set practice.sales;
MC=sales+expense;
Mult=sales*expense;
div=sales/expense;
logs= log(sales);
loge= log(expense);
logl= log(loss);
run;
/*regression analysis for one ind.variable*/
proc reg data=practice.tsales;
model logs=loge;
run;
/*regression analysis for several ind.variables*/
proc reg data=practice.tsales;
model logs=loge logl;
run;
/*anova analysis*/
proc anova data=practice.tsales;
class loge;
model logs=loge;
run;
/*to drop a variable*/
data practice.lsales;
set practice.tsales;
drop logl;
run;
/*to identify obs with max and min value*/
proc means data=practice.tsales noprint;
var sales expense;
output out=practice.meansales
mean= avs avex
std= sds sdex
max= maxs maxex
maxid(sales(name) expense(name))=maxidsl maxidex
min= mins minex
minid(sales(name) expense(name))=minidsl minidex
;
run;
proc print data=practice.meansales;
run;
/*univariate analysis*/
proc univariate data=practice.tsales;
var sales;
run;
proc univariate data=practice.tsales winsorized=.1
trimmed=
.1 .01
robustscale;
var sales;
run;
proc print data=practice.tsales;
run;
/*correlation analysis*/
proc corr data=practice.tsales;
run;
proc corr data=practice.tsales kendall spearman pearson fisher;
run;
proc corr data=practice.tsales noprint alpha;
run;
proc corr data=practice.tsales csscp cov;
run;
proc corr data=practice.tsales plots=matrix(histogram);
run;
/*mean analysis usin class*/
proc means data=practice.heart;
class sex;
run;
/*frequency analysis using by statement*/
proc freq data=practice.heart;
by sex;
table AgeAtDeath*DeathCause/missing;
run;
/*to find missing values*/
proc means data=practice.heart nmiss n;
run;
/*sorting data using by*/
proc sort data=practice.heart;
by sex status;
run;
/*freq analysis different functions*/
proc freq data=practice.heart;
table AgeAtDeath/missprint;
run;
proc freq data=practice.heart order=data;
table BP_Status*Smoking_Status/chisq;
weight smoking;
run;
/* analysis of cars1 data*/
proc contents data=quiz.cars1;
run;
proc means data=quiz.cars1 n nmiss;
run;
/*replace mising values with 0*/
/*oneway*/
data quiz.cars2;
set quiz.cars1;
array change Invoice;
do over change;
if change=. then change=0;
end;
run ;
proc means data=quiz.cars2 n nmiss ;
run;
/*2ndway*/
proc stdize data=quiz.cars1 out=quiz.cars5 reponly missing=0;
var cylinders invoice;
run;
proc means data=quiz.cars5 n nmiss;
run;
/*if variable is not mentioned it will replace all missing values
present in data*/
proc stdize data=quiz.cars1 out=quiz.cars4 reponly missing=0;
run;
proc means data=quiz.cars4 n nmiss ;
run;
/*replace mising values with mean value*/
proc stdize data=quiz.cars2 out=Quiz.cars3 missing=mean reponly;
var Cylinders;
run;
proc means data=quiz.cars3 n nmiss;
run;
proc contents data=quiz.cars3;
run;
/* to identify outliers Box Plot, Histogram, Maxid, subgroup analysis
and
Univatiate anlaysis and scatter plot of that specific variable */
proc sgplot data= quiz.cars3;
vbox invoice;
run;
proc sgplot data=QUIZ.CARS3;
histogram Invoice /;
density Invoice;
yaxis grid;
discretelegend "DENSITY" / location=inside position=topright
across=1;
run;
proc univariate data=quiz.cars3;
var invoice;
run;
proc means data=quiz.cars3 noprint;
var invoice;
output out=quiz.minicar
max= maxinv
maxid(invoice(model))=maxidinv
min= minin
minid(invoice(model))=minidinv;
run;
proc sgpanel data=quiz.cars3;
panelby type;
vbox Invoice;
run;
Panel data:
data d26oct.cpi;
input country $ y2001 y2002 y2003;
datalines;
PAK 100 111 119
IND 99 107 116
SRL 88 95 101
BND 77 88 96
BHT 96 84 95
AFG 97 103 107
;
run;
proc sort data=d26oct.cpi;
by country;
run;
proc transpose data=d26oct.cpi out=d26oct.transdat;
by country;
run;
proc print data=d26oct.transdat;
run;
data d26oct.paneldat (rename=(col1=cpi _name_=year));
set d26oct.transdat;
run;
data d26oct.TSPAK;
set d26oct.paneldat;
if country="PAK";
dcpi_pk= dif(cpi);
lcpi_pk= lag(cpi);
run;
proc panel data=d26oct.transdat;
id country _name_;
lag col1 (1)/ out=d26oct.panllagdat;
run;
/** Import an XLSX file. **/
PROC IMPORT DATAFILE="/home/msaadbaloch0/Practice/WDI.xlsx"
OUT=practice.wdi
DBMS=XLSX
REPLACE;
RUN;
/** Print the results. **/
PROC PRINT DATA=practice.wdi; RUN;
/*Taking lags without moving the lastone in the next country----better
method*/
PROC SORT DATA=practice.wdi;
BY country;
run;
proc transpose data=practice.wdi out=practice.transwdi;
by country;
run;
proc contents data=practice.transwdi;
run;
data practice.panelwdi (rename=(col1=GDP col2=INF col3=FDI
_name_=year));
set practice.transwdi;
drop _LABEL_;
run;
data practice.panelwdifor;
set practice.panelwdi;
format FDI dollar10. GDP dollar10. INF dollar10.;
run;
proc contents data=practice.panelwdifor;
run;
/*to create lag*/
data practice.panelogdata;
set practice.panelwdifor;
by country;
Lag_GDP=lag(GDP);
Lag_FDI=lag(FDI);
Lag_INF=lag(INF);
if first.country then do;
Lag_GDP=.;
Lag_FDI=.;
Lag_INF=.;
end;
run;
/*to create differnce*/
data practice.paneldifdata;
set practice.panelwdifor;
by country;
dif_GDP=dif(GDP);
dif_INF=dif(INF);
dif_FDI=dif(FDI);
if first.country then do;
dif_GDP=.;
dif_INF=.;
dif_FDI=.;
end;
run;

How to sas codes and tricks

  • 1.
    PROC CONTENTS DATA=ATIFRAZA.PRACTICE; RUN; PROCMEANS DATA=ATIFRAZA.PRACTICE NMISS N MEAN STD SKEW KURT MIN MAX CSS MAXDEC=3; RUN; DATA ATIFRAZA.PRACTICE; SET ATIFRAZA.PRACTICE; LABEL INVOICE = 'Pricce'; RUN; PROC CONTENTS DATA=ATIFRAZA.PRACTICE; RUN; PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE; VAR CYLINDERS; RUN; PROC MEANS DATA=ATIFRAZA.PRACTICE NOPRINT; VAR INVOICE CYLINDERS; OUTPUT OUT=ATIFRAZA.abc MAX = MaxInv MaxCyl MAXID(INVOICE(MODEL) CYLINDERS(MODEL)) = MaxInvoice MaxCylinders MIN = MinInv MinCyl MINID(INVOICE(MODEL) CYLINDERS(MODEL)) = MinInvoice MinCylinders ; RUN; PROC PRINT DATA=ATIFRAZA.PRACTICE; RUN; CREATING CONTINGENCY TABLE PROC FREQ DATA=ATIFRAZA.PRACTICE; TABLE TYPE*ORIGIN/CHISQ; TABLE TYPE*ORIGIN/ PLOT=FREQPLOT(TYPE=DOT); TITLE 'TYPE AND ORIGIN TABLE'; RUN; PROC FREQ DATA=ATIFRAZA.PRACTICE; TABLE MAKE ORIGIN; RUN; PROC FREQ AND PLOT PROC FREQ DATA=ATIFRAZA.PRACTICE; TABLE MAKE/ PLOT=FREQPLOT(TYPE=DOT); WEIGHT CYLINDERS; RUN; proc print data=atifraza.practice; run; PROC FREQ DATA=ATIFRAZA.PRACTICE; TABLE Type*Origin/CHISQ; TABLE Type*Origin/ PLOT=FREQPLOT(TYPE=DOT); WEIGHT INVOICE; TITLE 'TYPE AND ORIGIN TABLE'; RUN; TIMED MEANS AND WINSORIZED
  • 2.
    PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE WINSORIZED=10 TRIMMED=10 ROBUSTSCALE; VARCYLINDERS; RUN; STANDARDIZE DATA PROC STDIZE DATA=atifraza.practice OUT=ATIFRAZA.PRACTICEOK REPONLY MISSING=MEAN; VAR CYLINDERS; BY NOTSORTED TYPE; RUN; PROC CORR DATA=atifraza.practiceOK pearson PLOTS=matriX(HISTOGRAM); RUN; /* MERGING TWO TABLES */ data class; input Name $ 1-25 Year $ 26-34 Major $ 36-50; datalines; Abbott, Jennifer first Carter, Tom third Theater Mendoza, Elissa fourth Mathematics Tucker, Rachel first Uhl, Roland second Wacenske, Maurice third Theater ; run; proc print data=class; title 'Acting Class Roster'; run; data GRADE; input Name $ 1-25 Year $ 26-34 GRADE $ 36-50; datalines; Abbott, Jenni first D Carter, Tom third T Mendoza, fourth M Tucker, Ra first A Uhl, Roland second B Wacenske, Maurice third C ; run; proc print data=GRADE; title 'Acting Class Roster'; run; proc sort data=class; by major; run; DATA MERGED;
  • 3.
    MERGE CLASS(drop=year) GRADE(drop=year RENAME=(NAME=ndName)); by major; RUN; PROC PRINT DATA=MERGED; RUN; /* adding engines of only chevrolet in make using where statement */ proc sort data=ATIFRAZA.PRACTICE out=WORK.SORTTEMP; where MAKE='Chevrolet'; ; by Make; run; proc print data=WORK.SORTTEMP label; var Cylinders; by Make; sum Cylinders; run; proc delete data=work.SORTTEMP; run; /* ADDING A NEW VARIABLE WITH THE HELP OR PREVIOUS VARIABLES ALSO DROPPING AND RENAMING IN SAME COMMAND */ DATA DUMMY; SET ATIFRAZA.PRACTICE (rename=(Horsepower=HP) DROP=TYPE MAKE ORIGIN); AVERAGE=(INVOICE+MSRP)/2; RUN; /* CREATING A NEW VARIABLE AND SETTING ITS VALUE ACCORDING TO SCORE IN ANOTHER VARIABLE */ DATA TESTOFRANKVARIABLE; SET ATIFRAZA.PRACTICE; if CYLINDERS>8 then STATUS="LARGE"; if CYLINDERS<=8 then STATUS="MEDIUM"; if CYLINDERS<=6 then STATUS="SMALL"; if CYLINDERS=. then STATUS="UNKNOWN"; RUN; /* TOTAL (MEAN STD) BY CLASS I.E. TOTAL OF ITEMS BY CLASSES */ proc means data=atifraza.cake; var PresentScore TasteScore; class flavor; output out=Cake_Desc mean=Avg_PS Avg_TS Std=SD_PS SD_TS SUM=TOTALPS TOTALTS; run; /* SCANNING A VARIABLE AND THEN PUTTING NAME INVESTED */ DATA ATIFRAZA.MARKS; INPUT S_NO $ NAME $10. DATE MMDDYY10. MARKS GRADE $; FORMAT DATE MMDDYY10.; DATALINES; 01 ALI RAZA 09122000 80 A 02 AHMED KHAN 02242011 75 B
  • 4.
    03 BINA ALI08302006 60 C 04 MARIA ZIA 07212004 89 A ; RUN; DATA ATIFRAZA.MARKS2; SET ATIFRAZA.MARKS; F=SCAN(NAME,+1); L=SCAN(NAME,+2); NAMES=L||(",")||F; RUN; /*PANEL DATA*/ /* create lags and differentials in panel data */ PROC SORT DATA=ATIFRAZA.CPII; BY COUNTRY; RUN; PROC PRINT DATA=ATIFRAZA.CPII; RUN; PROC TRANSPOSE DATA=ATIFRAZA.CPII OUT=ATIFRAZA.CPIITRANS; BY COUNTRY; RUN; DATA ATIFRAZA.CPITRANS (RENAME=(COL1=CPI _NAME_=YEARS)); SET ATIFRAZA.CPIITRANS; RUN; DATA ATIFRAZA.ESPAK; SET ATIFRAZA.CPITRANS; IF COUNTRY='PAK'; /* CREATING DIFFERENCIAL AND LAGS IN A TIME SERIES DATA */ LAG_PK =LAG(CPI); DCPI_PK =DIF(CPI); RUN; PROC PANEL DATA=ATIFRAZA.CPITRANS; ID COUNTRY YEARS; LAG CPI(1)/OUT=ATIFRAZA.LAGPANEL; RUN;
  • 5.
    /* SAAD FILE*/ /*to generate data*/ data practice.sales; input date mmddyy10. name $ 12-15 sales 17-22 expense 24-29; datalines; 02/01/2010 ABC 10000 9000 03/01/2010 DEF 20000 19000 04/01/2010 JIK 15000 12000 ; run; /*to add new variable*/ data practice.sales; set practice.sales; loss=sales-expense; run; /*to define format*/ proc print data=practice.sales; format date mmddyy10. sales dollar. expense dollar. loss dollar.; run; /*to identify the contents*/ proc contents data=practice.sales; run; /*to define labels*/ data practice.sales; set practice.sales; label name='Person name'; label sales= 'Sales ($)'; label expense= 'Expense ($)'; label loss='Loss ($)'; run; proc contents data=practice.sales; run; /*for descriptive analysis*/ proc means data=practice.sales; run; /*for selective descriptive analysis*/ proc means data=practice.sales noprint n missing mean STD skew kurt nmiss max min; var sales expense loss; output out=meansales; run; proc print data=meansales; run; /*for frequency analysis*/ proc freq data=practice.sales; table sales; run; /*cross tabulation*/ proc freq data=practice.sales;
  • 6.
    table sales*expense; run; /*to generatenew variables using existing ones*/ data practice.tsales; set practice.sales; MC=sales+expense; Mult=sales*expense; div=sales/expense; logs= log(sales); loge= log(expense); logl= log(loss); run; /*regression analysis for one ind.variable*/ proc reg data=practice.tsales; model logs=loge; run; /*regression analysis for several ind.variables*/ proc reg data=practice.tsales; model logs=loge logl; run; /*anova analysis*/ proc anova data=practice.tsales; class loge; model logs=loge; run; /*to drop a variable*/ data practice.lsales; set practice.tsales; drop logl; run; /*to identify obs with max and min value*/ proc means data=practice.tsales noprint; var sales expense; output out=practice.meansales mean= avs avex std= sds sdex max= maxs maxex maxid(sales(name) expense(name))=maxidsl maxidex min= mins minex minid(sales(name) expense(name))=minidsl minidex ; run; proc print data=practice.meansales; run; /*univariate analysis*/ proc univariate data=practice.tsales; var sales; run; proc univariate data=practice.tsales winsorized=.1 trimmed= .1 .01
  • 7.
    robustscale; var sales; run; proc printdata=practice.tsales; run; /*correlation analysis*/ proc corr data=practice.tsales; run; proc corr data=practice.tsales kendall spearman pearson fisher; run; proc corr data=practice.tsales noprint alpha; run; proc corr data=practice.tsales csscp cov; run; proc corr data=practice.tsales plots=matrix(histogram); run; /*mean analysis usin class*/ proc means data=practice.heart; class sex; run; /*frequency analysis using by statement*/ proc freq data=practice.heart; by sex; table AgeAtDeath*DeathCause/missing; run; /*to find missing values*/ proc means data=practice.heart nmiss n; run; /*sorting data using by*/ proc sort data=practice.heart; by sex status; run; /*freq analysis different functions*/ proc freq data=practice.heart; table AgeAtDeath/missprint; run; proc freq data=practice.heart order=data; table BP_Status*Smoking_Status/chisq; weight smoking; run; /* analysis of cars1 data*/ proc contents data=quiz.cars1; run;
  • 8.
    proc means data=quiz.cars1n nmiss; run; /*replace mising values with 0*/ /*oneway*/ data quiz.cars2; set quiz.cars1; array change Invoice; do over change; if change=. then change=0; end; run ; proc means data=quiz.cars2 n nmiss ; run; /*2ndway*/ proc stdize data=quiz.cars1 out=quiz.cars5 reponly missing=0; var cylinders invoice; run; proc means data=quiz.cars5 n nmiss; run; /*if variable is not mentioned it will replace all missing values present in data*/ proc stdize data=quiz.cars1 out=quiz.cars4 reponly missing=0; run; proc means data=quiz.cars4 n nmiss ; run; /*replace mising values with mean value*/ proc stdize data=quiz.cars2 out=Quiz.cars3 missing=mean reponly; var Cylinders; run; proc means data=quiz.cars3 n nmiss; run; proc contents data=quiz.cars3; run; /* to identify outliers Box Plot, Histogram, Maxid, subgroup analysis and Univatiate anlaysis and scatter plot of that specific variable */ proc sgplot data= quiz.cars3; vbox invoice; run; proc sgplot data=QUIZ.CARS3; histogram Invoice /; density Invoice; yaxis grid; discretelegend "DENSITY" / location=inside position=topright across=1;
  • 9.
    run; proc univariate data=quiz.cars3; varinvoice; run; proc means data=quiz.cars3 noprint; var invoice; output out=quiz.minicar max= maxinv maxid(invoice(model))=maxidinv min= minin minid(invoice(model))=minidinv; run; proc sgpanel data=quiz.cars3; panelby type; vbox Invoice; run; Panel data: data d26oct.cpi; input country $ y2001 y2002 y2003; datalines; PAK 100 111 119 IND 99 107 116 SRL 88 95 101 BND 77 88 96 BHT 96 84 95 AFG 97 103 107 ; run; proc sort data=d26oct.cpi; by country; run; proc transpose data=d26oct.cpi out=d26oct.transdat; by country; run; proc print data=d26oct.transdat; run; data d26oct.paneldat (rename=(col1=cpi _name_=year)); set d26oct.transdat; run; data d26oct.TSPAK; set d26oct.paneldat; if country="PAK"; dcpi_pk= dif(cpi);
  • 10.
    lcpi_pk= lag(cpi); run; proc paneldata=d26oct.transdat; id country _name_; lag col1 (1)/ out=d26oct.panllagdat; run; /** Import an XLSX file. **/ PROC IMPORT DATAFILE="/home/msaadbaloch0/Practice/WDI.xlsx" OUT=practice.wdi DBMS=XLSX REPLACE; RUN; /** Print the results. **/ PROC PRINT DATA=practice.wdi; RUN; /*Taking lags without moving the lastone in the next country----better method*/ PROC SORT DATA=practice.wdi; BY country; run; proc transpose data=practice.wdi out=practice.transwdi; by country; run; proc contents data=practice.transwdi; run; data practice.panelwdi (rename=(col1=GDP col2=INF col3=FDI _name_=year)); set practice.transwdi; drop _LABEL_; run; data practice.panelwdifor; set practice.panelwdi; format FDI dollar10. GDP dollar10. INF dollar10.; run; proc contents data=practice.panelwdifor; run; /*to create lag*/ data practice.panelogdata; set practice.panelwdifor; by country;
  • 11.
    Lag_GDP=lag(GDP); Lag_FDI=lag(FDI); Lag_INF=lag(INF); if first.country thendo; Lag_GDP=.; Lag_FDI=.; Lag_INF=.; end; run; /*to create differnce*/ data practice.paneldifdata; set practice.panelwdifor; by country; dif_GDP=dif(GDP); dif_INF=dif(INF); dif_FDI=dif(FDI); if first.country then do; dif_GDP=.; dif_INF=.; dif_FDI=.; end; run;