/****************************************************************************** condes macro: computes a line by line summary of each variable on a SAS daatset. Jan 2012: Items added: Columns for type-length of variable (N or C) and for SAS format Changes: Allows date formats to be mmddyy, ddmmyy, or dateN. Statistics for dates presented in same format, as mmddyy8., ddmmyy8,, or date7. PDF file or (optionally) html file generated as well as text file. Program re-written slightly - uses proc report to generate condes file. Parameters: dataset - name of dataset, including libname if other than work. pdf - a pdf version of the condes is generated (default Y) html - an html version of the condes is generated (default N) Output: output - a text file with name of dataset and .condes extention - a pdf file with name of dataset and .pdf extension - a html file with name of dataset and .html extension Example: %condes(nhs) %condes(libnhs.nhs) %condes(libnhs.nhs,html=y) %condes(libnhs.nhs,html=y,pdf=n) ******************************************************************************/ %macro condes(dataset,pdf=y,html=n) ; option nocenter nodate nonumber; %let pdf = %upcase(&pdf); %let html = %upcase(&html); %if %index(&dataset,.) = 0 %then %let dataset_name = %scan(&dataset,1); %else %let dataset_name = %scan(&dataset,2); proc contents data=&dataset noprint out=contents (keep=name type length varnum label format formatL); proc print; proc sort; by name; * Find number of character and numeric variables; data _null_ ; retain c n 0; set contents end=done; if type = 1 then do; n + 1; end; else if type = 2 then do; c + 1; call symputx('c' || left(put(c,5.)),name) ; end; if done eq 1 then do; call symputx('n',left(put(n,5.))) ; call symputx('c',left(put(c,5.))) ; end; run ; %if &n > 0 %then %do; proc means data=&dataset noprint ; output out=count n= ; output out=mean mean= ; output out=std std= ; output out=min min= ; output out=max max= ; run; data stats; set count mean std min max; drop _freq_ _type_; run; proc transpose data=stats out=stats name=name; run; data stats; length name $32; set stats (rename = (col1=n)); run; proc sort; by name; %end; * Get statistics for character variables; %if &c > 0 %then %do ; data charstats; set &dataset (keep = _char_) end=done; retain _min1-_min&c 'FFFFFFFFFFFFFFFF'X ; retain _max1-_max&c '0000000000000000'X ; retain _num1-_num&c 0 ; length name $32.; %do i = 1 %to &c ; if &&c&i ne ' ' then do ; _num&i = _num&i + 1 ; if &&c&i gt _max&i then _max&i = &&c&i ; if &&c&i lt _min&i then _min&i = &&c&i ; end ; %end ; * Output one row per character variable with accompanying stats; if done eq 1 then do ; %do i = 1 %to &c ; name = "&&c&i" ; if _num&i eq 0 then max = ' ' ; else max = _max&i ; if _num&i eq 0 then min = ' ' ; else min = _min&i ; n = _num&i ; output ; %end ; end ; keep name n max min ; run; %end; %local charstats; %if &c > 0 %then %let charstats = charstats; %local stats; %if &n > 0 %then %let stats = stats; * Put stats for numeric and char variables together and add meta data; * All statistics will be character data eventually; data stats; retain maxvw 8 maxdw 12 ; merge &stats &charstats contents end=done; by name; * Apply date format to date variables - values will be character; if type ne 2 then do; if format = 'MMDDYY' then do; mean = put(round(col2),mmddyy8.); min = put(col4,mmddyy8.); max = put(col5,mmddyy8.); end; else if format = 'DDMMYY' then do; mean = put(round(col2),ddmmyy8.); min = put(col4,ddmmyy8.); max = put(col5,ddmmyy8.); end; else if format = 'DATE' then do; mean = put(round(col2),date7.); min = put(col4,date7.); max = put(col5,date7.); end; else do; * Apply best format to rest of numeric variables - values will be character; mean = put(col2,best8.); min = put(col4,best8.); max = put(col5,best8.); end; * Apply best format to SD all numeric variables; std = put(col3,best8.); end; * Compute skipvar for inserting blank lines every 5th row in report; skipvar = int((varnum-1)/5); length format_full $10.; if formatL = 0 and format ne ' ' then format_full = cats(format,'.'); else if formatL > 0 then format_full = cats(format,put(formatL,2.),'.'); length type_var $5. ; if type = 1 then type_var = cats('N',put(length,3.)); if type = 2 then type_var = cats('C',put(length,3.)); if formatL > 0 then format_full = cats(format,put(formatL,2.),'.'); * Compute maximum length of variables names and labels so report is condenced; maxvw = max(maxvw,length(name)); maxdw = max(maxdw,length(label)); if done = 1 then do; maxvw = max(maxvw,12); maxdw = max(maxdw,20); call symputx('maxvw',put(maxvw+1,2.)); call symputx('maxdw',put(maxdw,3.)); end; keep name length type_var label format_full skipvar formatL varnum n mean std min max ; run; * Sort so variables are in original order; proc sort; by varnum; * Rename some variables so proc report does not get confused; proc datasets; modify stats; rename n = xn; rename mean = xmean; rename std = xstd; rename min = xmin; rename max = xmax; run; * define template style for pdf and html file; option orientation=landscape; proc template; define style styles.condes; parent = styles.journal; class fonts / 'docFont' = ("Courier",8pt); class output/ padding = 5; end; run; * Figure out linesize for text report - cant be more than 256; * Truncate label if needed to keep under 256; %let linesize = %eval(81 + &maxvw + &maxdw) ; %if &linesize > 256 %then %do; %let linesize = 256; %let maxdw = %eval(256-81-&maxvw); %end; option linesize = &linesize pagesize=64; ods listing close; ods listing file = "&dataset_name..condes" ; %if &pdf = Y %then %do; ods pdf file = "&dataset_name..pdf" style=condes; %end; %if &html = Y %then %do; ods html file = "&dataset_name..html"; %end; proc report data=stats nowindows headline style(header) = [just=left] formchar(2)='-'; column skipvar varnum name type_var format_full label xn xmean xstd xmin xmax; define skipvar / noprint order 'Seq'; define varnum / 'Seq'; define name / 'Name'; define type_var/ 'T'; define format_full / 'Format'; %if &maxdw = 20 %then %do; define label / 'Variable Label' style(column) = [cellwidth=1.5in] ; %end; %else %do; define label / 'Variable Label' ; %end; define xn / 'N'; define xmean / 'Mean'; define xstd / 'Std Dev'; define xmin / 'Minimun'; define xmax / 'Maximum'; compute after skipvar; line ' '; endcomp; format name $&maxvw.. label $&maxdw..; format varnum 4.; title "Condescriptive of: &dataset_name"; run; %if &pdf = Y %then %do; ods pdf close; %end; %if &html = Y %then %do; ods html close; %end; ods listing close; ods listing; proc datasets; delete stats; run; %if &c > 0 %then %do; proc datasets; delete charstats; run; %end; %mend;