Thursday, August 30, 2012

Compute biserial, point biserial, and rank biserial correlations

Source -


http://support.sas.com/kb/24/991.html



%macro biserial(version, data= ,contin= ,binary= ,out=);




%if &version ne %then %put BISERIAL macro Version 2.2;



options nonotes;

* exclude observations with missing variables *;

data &out;

set &data;

where &contin>.;

if &binary>.;

run;



* compute the ranks for the continuous variable *;

proc rank data=&out out=&out ;

var &contin;

ranks r_contin;

run;



* compute proportion of binary, std of contin, and n *;

proc means data=&out noprint;

var &binary &contin;

output out=_temp_(keep=p stdy n) mean=p std=stdx stdy n=n;

run;



* sort by the binary variable *;

proc sort data=&out;

by descending &binary;

run;



* compute mean of contin and rank of contin var *;

proc means data=&out noprint;

by notsorted &binary;

var &contin r_contin;

output out=&out mean=my r_contin;

run;



* restructure the means computed in the step above *;

proc transpose data=&out out=&out(rename=(col1=my1 col2=my0));

var r_contin my;

run;



* combine the data needed to compute biserial correlation *;

data &out;

set &out(drop= _name_ _label_);

retain r1 r0 ;

if _n_=1 then do;

r1=my1;

r0=my0;

end;

else do;

set _temp_;

output;

end;

run;



* compute point biserial correlation *;

proc corr data=&data noprint outp=_temp_;

var &binary &contin;

run;







* extract the point biserial correlation from the matrix *;

data _temp_(keep=pntbisrl);

set _temp_(rename=(&contin=pntbisrl));

if _TYPE_='CORR' and &binary<>1 then output;



run;



options notes;

* compute biserial and rank biserial *;

data &out;

merge _temp_ &out;

if pntbisrl=1 then delete;

h=probit(1-p);

u=exp(-h*h/2)/sqrt(2*arcos(-1));

biserial=p*(1-p)*(my1-my0)/stdy/u;

rnkbisrl=2*(r1-r0)/n;



keep biserial pntbisrl rnkbisrl;

label biserial='Biserial Corr'

pntbisrl='Point Biserial Corr'

rnkbisrl='Rank Biserial Corr';

run;



%mend;







data k;

length x1 $ 1;

input x1 length;

event=(x1='y');

cards;

y 14.8

n 13.8

y 12.4

y 10.1

y 7.1

y 6.1

n 5.8

y 4.6

n 4.3

n 3.5

n 3.3

y 3.2

y 3.0

n 2.8

n 2.8

n 2.5

y 2.4

y 2.3

y 2.1

n 1.7

n 1.7

n 1.5

n 1.3

n 1.3

n 1.2

n 1.2

n 1.1

y 0.8

n 0.7

n 0.6

n 0.5

n 0.2

n 0.2

y 0.1

;



/* Define the BISERIAL macro */

%inc "";



%biserial(data=k, contin=LENGTH, binary=EVENT, out=out1);

*********************
data= SAS data set to be analyzed.
binary =  Name of dichotomous variable which must be numeric with values 0 and 1.
contin=  Name of continuous variable. Ranks of this variable will be computed to produce the rank biserial corr.
out= Output data set name.
*****************



proc print data=out1 label noobs;

title 'Point Biserial, Biserial and Rank Biserial Correlations';

run;







No comments:

Post a Comment