/*****************************************
* Source From - http://www.wrds.us/index.php/repository/view/1
******Trim or winsorize macro
*byvar = none for no byvar;
*type = delete/winsor (delete will trim, winsor will winsorize;
*dsetin = dataset to winsorize/trim;
*dsetout = dataset to output with winsorized/trimmed values;
*byvar = subsetting variables to winsorize/trim on;
****************************************/
data test;
do i=1 to 100;
x=round(abs(rannor(0)));
y=round(ranuni(0));output;
end;
x=900;y=1;output;
/*x=800;y=0;output;*/
drop i;
run;
%winsor(dsetin=Test,dsetout=TestOut,byvar=none,vars=x,type=winsor,pctl=1 99);
options mlogic mprint;
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
%let token = %scan(&vars,&xn);
%let varL = &varL &token.L;
%let varH = &varH &token.H;
%let xn=%EVAL(&xn + 1);
%end;
%let xn=%eval(&xn-1);
/* Assignin input data set to Xtemp */
data xtemp; set &dsetin; run;
/* if no byvar variable then it will assign to 1 */
%if &byvar = none %then %do;
data xtemp;
set xtemp;
xbyvar = 1;
run;
%let byvar = xbyvar;
%end;
proc sort data = xtemp;
by &byvar;
run;
proc univariate data = xtemp noprint;
by &byvar;
var &vars;
output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
run;
data &dsetout;
merge xtemp xtemp_pctl;
by &byvar;
array trimvars{&xn} &vars;
array trimvarl{&xn} &varL;
array trimvarh{&xn} &varH;
do xi = 1 to dim(trimvars);
%if &type = winsor %then %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
end;
%end;
%else %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then delete;
if (trimvars{xi} > trimvarh{xi}) then delete;
end;
%end;
end;
drop &varL &varH xbyvar xi;
run;
%mend winsor;
* Source From - http://www.wrds.us/index.php/repository/view/1
******Trim or winsorize macro
*byvar = none for no byvar;
*type = delete/winsor (delete will trim, winsor will winsorize;
*dsetin = dataset to winsorize/trim;
*dsetout = dataset to output with winsorized/trimmed values;
*byvar = subsetting variables to winsorize/trim on;
****************************************/
data test;
do i=1 to 100;
x=round(abs(rannor(0)));
y=round(ranuni(0));output;
end;
x=900;y=1;output;
/*x=800;y=0;output;*/
drop i;
run;
%winsor(dsetin=Test,dsetout=TestOut,byvar=none,vars=x,type=winsor,pctl=1 99);
options mlogic mprint;
%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=);
%if &dsetout = %then %let dsetout = &dsetin;
%let varL=;
%let varH=;
%let xn=1;
%do %until ( %scan(&vars,&xn)= );
%let token = %scan(&vars,&xn);
%let varL = &varL &token.L;
%let varH = &varH &token.H;
%let xn=%EVAL(&xn + 1);
%end;
%let xn=%eval(&xn-1);
/* Assignin input data set to Xtemp */
data xtemp; set &dsetin; run;
/* if no byvar variable then it will assign to 1 */
%if &byvar = none %then %do;
data xtemp;
set xtemp;
xbyvar = 1;
run;
%let byvar = xbyvar;
%end;
proc sort data = xtemp;
by &byvar;
run;
proc univariate data = xtemp noprint;
by &byvar;
var &vars;
output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
run;
data &dsetout;
merge xtemp xtemp_pctl;
by &byvar;
array trimvars{&xn} &vars;
array trimvarl{&xn} &varL;
array trimvarh{&xn} &varH;
do xi = 1 to dim(trimvars);
%if &type = winsor %then %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
end;
%end;
%else %do;
if not missing(trimvars{xi}) then do;
if (trimvars{xi} < trimvarl{xi}) then delete;
if (trimvars{xi} > trimvarh{xi}) then delete;
end;
%end;
end;
drop &varL &varH xbyvar xi;
run;
%mend winsor;
No comments:
Post a Comment