/************************************************ PSmatching.sas adapted from Paper 185-2007 SAS Global Forum 2007 Local and Global Optimal Propensity Score Matching Marcelo Coca-Perraillon Health Care Policy Department, Harvard Medical School, Boston, MA ------------------------------- Treatment and Control observations must be in separate datasets such that Control data includes: idC = subject_id, pscoreC = propensity score Treatment data includes: idT, pscoreT id must be numeric method = NN (nearest neighbor), caliper, or radius caliper value = max for matching replacement = yes/no whether controls can be matched to more than one case out = output data set name example call: %PSMatching(datatreatment= T, datacontrol= C, method= NN, numberofcontrols= 1, caliper=, replacement= no, out= matches); Output format: Id Matched Selected PScore To PScore Obs Control Control TreatID Treat 1 18628 0.39192 16143 0.39192 2 18505 0.23029 16158 0.23002 3 15589 0.29260 16112 0.29260 All other variables discarded. Reformat for merge on subject_id with original data: data pairs; set matches; subject_id = IdSelectedControl; pscore = PScoreControl; pair = _N_; output; subject_id = MatchedToTreatID; pscore = PScoreTreat; pair = _N_; output; keep subject_id pscore pair; ************************************************/ %macro PSMatching(datatreatment=, datacontrol=, method=, numberofcontrols=, caliper=, replacement=, out=); /* Create copies of the treated units if N > 1 */; data _Treatment0(drop= i); set &datatreatment; do i= 1 to &numberofcontrols; RandomNumber= ranuni(12345); output; end; run; /* Randomly sort both datasets */ proc sort data= _Treatment0 out= _Treatment(drop= RandomNumber); by RandomNumber; run; data _Control0; set &datacontrol; RandomNumber= ranuni(45678); run; proc sort data= _Control0 out= _Control(drop= RandomNumber); by RandomNumber; run; data Matched(keep = IdSelectedControl PScoreControl MatchedToTreatID PScoreTreat); length pscoreC 8; length idC 8; /* Load Control dataset into the hash object */ if _N_= 1 then do; declare hash h(dataset: "_Control", ordered: 'no'); declare hiter iter('h'); h.defineKey('idC'); h.defineData('pscoreC', 'idC'); h.defineDone(); call missing(idC, pscoreC); end; /* Open the treatment */ set _Treatment; %if %upcase(&method) ~= RADIUS %then %do; retain BestDistance 99; %end; /* Iterate over the hash */ rc= iter.first(); if (rc=0) then BestDistance= 99; do while (rc = 0); /* Caliper */ %if %upcase(&method) = CALIPER %then %do; if (pscoreT - &caliper) <= pscoreC <= (pscoreT + &caliper) then do; ScoreDistance = abs(pscoreT - pscoreC); if ScoreDistance < BestDistance then do; BestDistance = ScoreDistance; IdSelectedControl = idC; PScoreControl = pscoreC; MatchedToTreatID = idT; PScoreTreat = pscoreT; end; end; %end; /* NN */ %if %upcase(&method) = NN %then %do; ScoreDistance = abs(pscoreT - pscoreC); if ScoreDistance < BestDistance then do; BestDistance = ScoreDistance; IdSelectedControl = idC; PScoreControl = pscoreC; MatchedToTreatID = idT; PScoreTreat = pscoreT; end; %end; %if %upcase(&method) = NN or %upcase(&method) = CALIPER %then %do; rc = iter.next(); /* Output the best control and remove it */ if (rc ~= 0) and BestDistance ~=99 then do; output; %if %upcase(&replacement) = NO %then %do; rc1 = h.remove(key: IdSelectedControl); %end; end; %end; /* Radius */ %if %upcase(&method) = RADIUS %then %do; if (pscoreT - &caliper) <= pscoreC <= (pscoreT + &caliper) then do; IdSelectedControl = idC; PScoreControl = pscoreC; MatchedToTreatID = idT; PScoreTreat = pscoreT; output; end; rc = iter.next(); %end; end; run; /* Delete temporary tables. Quote for debugging */ proc datasets; delete _:(gennum=all); run; data &out; set Matched; run; %mend PSMatching;