Friday, August 17, 2012

Outlier Treatment - 2 std

/**Outlier Treatment **/





data winsor_test;

do i = 1 to 98;

x = round(rannor(124));

y = round(rannor(24));

z = round(rannor(14));

output ;

end;

x = 989;

y = .;

z = 1234;

output;

drop i;

run;



options mlogic mprint;



%macro get_stat(datain);



proc means data = &datain. noprint ;

output out = cal_means(where=(_STAT_ in ('MEAN','STD')));

run;



proc transpose data = cal_means(drop = _type_ _freq_ _stat_) out = cal2;

run;





data _null_;

set cal2;

call symput(compress("Mean"

_n_),col1);

call symput(compress("Std"

_n_),col2);

call symput(compress("Vname"

_n_),_name_);

call symput(compress("Num"),_N_);

run;



data &datain._temp;

set &datain.;

%do i=1 %to &Num.;

if (&&Vname&i.. > &&Mean&i.. + ( 2* &&Std&i..)) OR

(&&Vname&i.. < &&Mean&i.. - ( 2* &&Std&i..)) then &&Vname&i.. = . ;

%end;

run;



proc means data = &datain._temp noprint;

output out = new_means (where=(_STAT_ in ('MEAN','STD')));

run;



proc transpose data = new_means(drop = _type_ _freq_ _stat_) out = tran_new;

run;





data _null_;

set tran_new;

call symput(compress("Mean"

_n_),col1);

call symput(compress("Std"

_n_),col2);

call symput(compress("Vname"

_n_),_name_);

call symput(compress("Num"),_N_);

run;





data &datain._final;

set &datain.;

%do i=1 %to &Num.;

New_&&Vname&i.. = &&Vname&i..;

if not missing(&&Vname&i..) then

DO;

if &&Vname&i.. > &&Mean&i.. + ( 2* &&Std&i..) then New_&&Vname&i.. = &&Mean&i.. + ( 2* &&Std&i..);

if &&Vname&i.. < &&Mean&i.. - ( 2* &&Std&i..) then New_&&Vname&i.. = &&Mean&i.. - ( 2* &&Std&i..);

END;

%end;

run;





%mend;



%get_stat(winsor_test);


No comments:

Post a Comment