1. PROC CONTENTS DATA=ATIFRAZA.PRACTICE;
RUN;
PROC MEANS DATA=ATIFRAZA.PRACTICE NMISS N MEAN STD SKEW KURT MIN
MAX CSS MAXDEC=3;
RUN;
DATA ATIFRAZA.PRACTICE;
SET ATIFRAZA.PRACTICE;
LABEL INVOICE = 'Pricce';
RUN;
PROC CONTENTS DATA=ATIFRAZA.PRACTICE;
RUN;
PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE;
VAR CYLINDERS;
RUN;
PROC MEANS DATA=ATIFRAZA.PRACTICE NOPRINT;
VAR INVOICE CYLINDERS;
OUTPUT OUT=ATIFRAZA.abc
MAX = MaxInv MaxCyl
MAXID(INVOICE(MODEL) CYLINDERS(MODEL)) = MaxInvoice MaxCylinders
MIN = MinInv MinCyl
MINID(INVOICE(MODEL) CYLINDERS(MODEL)) = MinInvoice MinCylinders
;
RUN;
PROC PRINT DATA=ATIFRAZA.PRACTICE;
RUN;
CREATING CONTINGENCY TABLE
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE TYPE*ORIGIN/CHISQ;
TABLE TYPE*ORIGIN/ PLOT=FREQPLOT(TYPE=DOT);
TITLE 'TYPE AND ORIGIN TABLE';
RUN;
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE MAKE ORIGIN;
RUN;
PROC FREQ AND PLOT
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE MAKE/ PLOT=FREQPLOT(TYPE=DOT);
WEIGHT CYLINDERS;
RUN;
proc print data=atifraza.practice;
run;
PROC FREQ DATA=ATIFRAZA.PRACTICE;
TABLE Type*Origin/CHISQ;
TABLE Type*Origin/ PLOT=FREQPLOT(TYPE=DOT);
WEIGHT INVOICE;
TITLE 'TYPE AND ORIGIN TABLE';
RUN;
TIMED MEANS AND WINSORIZED
2. PROC UNIVARIATE DATA=ATIFRAZA.PRACTICE
WINSORIZED=10
TRIMMED=10
ROBUSTSCALE;
VAR CYLINDERS;
RUN;
STANDARDIZE DATA
PROC STDIZE DATA=atifraza.practice
OUT=ATIFRAZA.PRACTICEOK
REPONLY MISSING=MEAN;
VAR CYLINDERS;
BY NOTSORTED TYPE;
RUN;
PROC CORR DATA=atifraza.practiceOK pearson
PLOTS=matriX(HISTOGRAM);
RUN;
/* MERGING TWO TABLES */
data class;
input Name $ 1-25 Year $ 26-34 Major $ 36-50;
datalines;
Abbott, Jennifer first
Carter, Tom third Theater
Mendoza, Elissa fourth Mathematics
Tucker, Rachel first
Uhl, Roland second
Wacenske, Maurice third Theater
;
run;
proc print data=class;
title 'Acting Class Roster';
run;
data GRADE;
input Name $ 1-25 Year $ 26-34 GRADE $ 36-50;
datalines;
Abbott, Jenni first D
Carter, Tom third T
Mendoza, fourth M
Tucker, Ra first A
Uhl, Roland second B
Wacenske, Maurice third C
;
run;
proc print data=GRADE;
title 'Acting Class Roster';
run;
proc sort data=class;
by major;
run;
DATA MERGED;
3. MERGE CLASS(drop=year) GRADE (drop=year
RENAME=(NAME=ndName));
by major;
RUN;
PROC PRINT DATA=MERGED;
RUN;
/* adding engines of only chevrolet in make using where
statement */
proc sort data=ATIFRAZA.PRACTICE out=WORK.SORTTEMP;
where MAKE='Chevrolet';
;
by Make;
run;
proc print data=WORK.SORTTEMP label;
var Cylinders;
by Make;
sum Cylinders;
run;
proc delete data=work.SORTTEMP;
run;
/* ADDING A NEW VARIABLE WITH THE HELP OR PREVIOUS VARIABLES
ALSO DROPPING AND RENAMING IN SAME COMMAND */
DATA DUMMY;
SET ATIFRAZA.PRACTICE (rename=(Horsepower=HP) DROP=TYPE
MAKE ORIGIN);
AVERAGE=(INVOICE+MSRP)/2;
RUN;
/* CREATING A NEW VARIABLE AND SETTING ITS VALUE ACCORDING TO
SCORE IN ANOTHER VARIABLE */
DATA TESTOFRANKVARIABLE;
SET ATIFRAZA.PRACTICE;
if CYLINDERS>8 then STATUS="LARGE";
if CYLINDERS<=8 then STATUS="MEDIUM";
if CYLINDERS<=6 then STATUS="SMALL";
if CYLINDERS=. then STATUS="UNKNOWN";
RUN;
/* TOTAL (MEAN STD) BY CLASS I.E. TOTAL OF ITEMS BY CLASSES */
proc means data=atifraza.cake;
var PresentScore TasteScore;
class flavor;
output out=Cake_Desc mean=Avg_PS Avg_TS Std=SD_PS SD_TS
SUM=TOTALPS TOTALTS;
run;
/* SCANNING A VARIABLE AND THEN PUTTING NAME INVESTED */
DATA ATIFRAZA.MARKS;
INPUT S_NO $ NAME $10. DATE MMDDYY10. MARKS GRADE $;
FORMAT DATE MMDDYY10.;
DATALINES;
01 ALI RAZA 09122000 80 A
02 AHMED KHAN 02242011 75 B
4. 03 BINA ALI 08302006 60 C
04 MARIA ZIA 07212004 89 A
;
RUN;
DATA ATIFRAZA.MARKS2;
SET ATIFRAZA.MARKS;
F=SCAN(NAME,+1);
L=SCAN(NAME,+2);
NAMES=L||(",")||F;
RUN;
/*PANEL DATA*/
/* create lags and differentials in panel data */
PROC SORT DATA=ATIFRAZA.CPII;
BY COUNTRY;
RUN;
PROC PRINT DATA=ATIFRAZA.CPII;
RUN;
PROC TRANSPOSE DATA=ATIFRAZA.CPII OUT=ATIFRAZA.CPIITRANS;
BY COUNTRY;
RUN;
DATA ATIFRAZA.CPITRANS (RENAME=(COL1=CPI _NAME_=YEARS));
SET ATIFRAZA.CPIITRANS;
RUN;
DATA ATIFRAZA.ESPAK;
SET ATIFRAZA.CPITRANS;
IF COUNTRY='PAK';
/* CREATING DIFFERENCIAL AND LAGS IN A TIME SERIES DATA */
LAG_PK =LAG(CPI);
DCPI_PK =DIF(CPI);
RUN;
PROC PANEL DATA=ATIFRAZA.CPITRANS;
ID COUNTRY YEARS;
LAG CPI(1)/OUT=ATIFRAZA.LAGPANEL;
RUN;
5. /* SAAD FILE */
/*to generate data*/
data practice.sales;
input date mmddyy10. name $ 12-15 sales 17-22 expense 24-29;
datalines;
02/01/2010 ABC 10000 9000
03/01/2010 DEF 20000 19000
04/01/2010 JIK 15000 12000
;
run;
/*to add new variable*/
data practice.sales;
set practice.sales;
loss=sales-expense;
run;
/*to define format*/
proc print data=practice.sales;
format date mmddyy10. sales dollar. expense dollar. loss dollar.;
run;
/*to identify the contents*/
proc contents data=practice.sales;
run;
/*to define labels*/
data practice.sales;
set practice.sales;
label name='Person name';
label sales= 'Sales ($)';
label expense= 'Expense ($)';
label loss='Loss ($)';
run;
proc contents data=practice.sales;
run;
/*for descriptive analysis*/
proc means data=practice.sales;
run;
/*for selective descriptive analysis*/
proc means data=practice.sales noprint n missing mean STD skew kurt
nmiss max min;
var sales expense loss;
output out=meansales;
run;
proc print data=meansales;
run;
/*for frequency analysis*/
proc freq data=practice.sales;
table sales;
run;
/*cross tabulation*/
proc freq data=practice.sales;
6. table sales*expense;
run;
/*to generate new variables using existing ones*/
data practice.tsales;
set practice.sales;
MC=sales+expense;
Mult=sales*expense;
div=sales/expense;
logs= log(sales);
loge= log(expense);
logl= log(loss);
run;
/*regression analysis for one ind.variable*/
proc reg data=practice.tsales;
model logs=loge;
run;
/*regression analysis for several ind.variables*/
proc reg data=practice.tsales;
model logs=loge logl;
run;
/*anova analysis*/
proc anova data=practice.tsales;
class loge;
model logs=loge;
run;
/*to drop a variable*/
data practice.lsales;
set practice.tsales;
drop logl;
run;
/*to identify obs with max and min value*/
proc means data=practice.tsales noprint;
var sales expense;
output out=practice.meansales
mean= avs avex
std= sds sdex
max= maxs maxex
maxid(sales(name) expense(name))=maxidsl maxidex
min= mins minex
minid(sales(name) expense(name))=minidsl minidex
;
run;
proc print data=practice.meansales;
run;
/*univariate analysis*/
proc univariate data=practice.tsales;
var sales;
run;
proc univariate data=practice.tsales winsorized=.1
trimmed=
.1 .01
7. robustscale;
var sales;
run;
proc print data=practice.tsales;
run;
/*correlation analysis*/
proc corr data=practice.tsales;
run;
proc corr data=practice.tsales kendall spearman pearson fisher;
run;
proc corr data=practice.tsales noprint alpha;
run;
proc corr data=practice.tsales csscp cov;
run;
proc corr data=practice.tsales plots=matrix(histogram);
run;
/*mean analysis usin class*/
proc means data=practice.heart;
class sex;
run;
/*frequency analysis using by statement*/
proc freq data=practice.heart;
by sex;
table AgeAtDeath*DeathCause/missing;
run;
/*to find missing values*/
proc means data=practice.heart nmiss n;
run;
/*sorting data using by*/
proc sort data=practice.heart;
by sex status;
run;
/*freq analysis different functions*/
proc freq data=practice.heart;
table AgeAtDeath/missprint;
run;
proc freq data=practice.heart order=data;
table BP_Status*Smoking_Status/chisq;
weight smoking;
run;
/* analysis of cars1 data*/
proc contents data=quiz.cars1;
run;
8. proc means data=quiz.cars1 n nmiss;
run;
/*replace mising values with 0*/
/*oneway*/
data quiz.cars2;
set quiz.cars1;
array change Invoice;
do over change;
if change=. then change=0;
end;
run ;
proc means data=quiz.cars2 n nmiss ;
run;
/*2ndway*/
proc stdize data=quiz.cars1 out=quiz.cars5 reponly missing=0;
var cylinders invoice;
run;
proc means data=quiz.cars5 n nmiss;
run;
/*if variable is not mentioned it will replace all missing values
present in data*/
proc stdize data=quiz.cars1 out=quiz.cars4 reponly missing=0;
run;
proc means data=quiz.cars4 n nmiss ;
run;
/*replace mising values with mean value*/
proc stdize data=quiz.cars2 out=Quiz.cars3 missing=mean reponly;
var Cylinders;
run;
proc means data=quiz.cars3 n nmiss;
run;
proc contents data=quiz.cars3;
run;
/* to identify outliers Box Plot, Histogram, Maxid, subgroup analysis
and
Univatiate anlaysis and scatter plot of that specific variable */
proc sgplot data= quiz.cars3;
vbox invoice;
run;
proc sgplot data=QUIZ.CARS3;
histogram Invoice /;
density Invoice;
yaxis grid;
discretelegend "DENSITY" / location=inside position=topright
across=1;
9. run;
proc univariate data=quiz.cars3;
var invoice;
run;
proc means data=quiz.cars3 noprint;
var invoice;
output out=quiz.minicar
max= maxinv
maxid(invoice(model))=maxidinv
min= minin
minid(invoice(model))=minidinv;
run;
proc sgpanel data=quiz.cars3;
panelby type;
vbox Invoice;
run;
Panel data:
data d26oct.cpi;
input country $ y2001 y2002 y2003;
datalines;
PAK 100 111 119
IND 99 107 116
SRL 88 95 101
BND 77 88 96
BHT 96 84 95
AFG 97 103 107
;
run;
proc sort data=d26oct.cpi;
by country;
run;
proc transpose data=d26oct.cpi out=d26oct.transdat;
by country;
run;
proc print data=d26oct.transdat;
run;
data d26oct.paneldat (rename=(col1=cpi _name_=year));
set d26oct.transdat;
run;
data d26oct.TSPAK;
set d26oct.paneldat;
if country="PAK";
dcpi_pk= dif(cpi);
10. lcpi_pk= lag(cpi);
run;
proc panel data=d26oct.transdat;
id country _name_;
lag col1 (1)/ out=d26oct.panllagdat;
run;
/** Import an XLSX file. **/
PROC IMPORT DATAFILE="/home/msaadbaloch0/Practice/WDI.xlsx"
OUT=practice.wdi
DBMS=XLSX
REPLACE;
RUN;
/** Print the results. **/
PROC PRINT DATA=practice.wdi; RUN;
/*Taking lags without moving the lastone in the next country----better
method*/
PROC SORT DATA=practice.wdi;
BY country;
run;
proc transpose data=practice.wdi out=practice.transwdi;
by country;
run;
proc contents data=practice.transwdi;
run;
data practice.panelwdi (rename=(col1=GDP col2=INF col3=FDI
_name_=year));
set practice.transwdi;
drop _LABEL_;
run;
data practice.panelwdifor;
set practice.panelwdi;
format FDI dollar10. GDP dollar10. INF dollar10.;
run;
proc contents data=practice.panelwdifor;
run;
/*to create lag*/
data practice.panelogdata;
set practice.panelwdifor;
by country;
11. Lag_GDP=lag(GDP);
Lag_FDI=lag(FDI);
Lag_INF=lag(INF);
if first.country then do;
Lag_GDP=.;
Lag_FDI=.;
Lag_INF=.;
end;
run;
/*to create differnce*/
data practice.paneldifdata;
set practice.panelwdifor;
by country;
dif_GDP=dif(GDP);
dif_INF=dif(INF);
dif_FDI=dif(FDI);
if first.country then do;
dif_GDP=.;
dif_INF=.;
dif_FDI=.;
end;
run;