/*

/ Program      : popfmt.sas
/ Version      : 3.0
/ Author       : Roland Rashleigh-Berry
/ Date         : 30-Apr-2008
/ Purpose      : Clinical reporting macro to create a treatment format that is the
/                same as an existing format but with the (N=xxx) at the end.
/ SubMacros    : %varfmt %vartype
/ Notes        : This macro is an important driving macro for the major reporting
/                macros %unicatrep and %npcttab and should be pre-called by any
/                reporting macro where the columns in the report are transposed
/                treatment arm values. You can use the global macro variables set
/                up in this macro to know in advance the number of treatment arm
/                columns and hence write flexible reporting macros where the number
/                of treatment columns are not fixed.
/
/                You must specify an input dataset with a treatment variable in 
/                it that has a format assigned to it. Also you need to specify
/                variable(s) that uniquely identifies a patient. Three formats are
/                created as described in _popfmt_, _poptfmt_ and _popnfmt_ below.
/
/                The dataset _popfmt is retained at macro end and has the treatment
/                variable plus the variable "total" sorted in treatment group order
/                and this can be used for calculating percentages in a later data
/                step.
/
/                global macro variables set
/                --------------------------
/                _popfmt_ gets assigned to the name of the format created by
/                         this macro with the drug name and (N=xxx) at the end.
/                         It will be $popfmt. or popfmt. depending
/                         on whether the treatment variable is character or
/                         numeric. Use this format to give you the treatment
/                         group but with the (N=xxx) at the end. Also use this
/                         format if you transpose the data to create a label for
/                         the variable and use "idlabel label" in your proc
/                         transpose so that your new variable will have the 
/                         correct label to display in your proc report.
/                _rawfmt_ gets assigned to the input format
/                _poptfmt_ gets assigned to a format that includes the "Total"
/                         treatment group. There is no (N=xxx) at the end.
/                         It will be $poptfmt. or popfmt.
/                _popnfmt_ gets assigned to the format that contains just the
/                         value of the population total. This will be the pure
/                         value of "N" as text. The format will be called 
/                         $popnfmt. or popnfmt. depending on whether the treatment
/                         variable is character or numeric.
/                _trtvar_ gets assigned to the treatment variable
/                _trtvarlist_ gets assigned to a list of variables starting with
/                         the string assigned to the prefix= parameter and ending
/                         with the value of the treatment variables. Assuming you
/                         will transpose your data using the same prefix, you can
/                         use this list in your proc report columns statement.
/                _trttotstr_ gets assigned to either what is defined to totalc=
/                         or totaln= depending on whether the treatment group
/                         variable is numeric or character. You can use this
/                         value in your code when you create a "total treatment
/                         group" and you want to assign it a value.
/             _uniqueid_  gets assigned to the variable(s) needed to uniquely
/                         define a subject.
/             _trttotvar_ gets assigned to whatever is assigned to the prefix=
/                         parameter followed by what is assigned to _trttotstr_
/                         and might be TRT99 or TRTY. You can use this in your
/                         proc report in the columns statement and the define
/                         statement.
/          _trttotcwidth_ gets set to the treatment total format column width
/            _trtpvalstr_ gets assigned to either what is defined to pvaluec=
/                         or pvaluen= depending on whether the treatment group
/                         variable is numeric or character.
/            _trtpvalvar_ gets assigned to whatever is assigned to the prefix=
/                         parameter followed by what is assigned to _trtpvalstr_
/                         and might be TRT9999 or TRTZ. You can use this in your
/                         proc report in the columns statement and the define
/                         statement.
/                _trtnum_ gets assigned to the number of unique treatment values
/                         found (not including that for the overall treatment
/                         value). You can use this in a macro loop to define
/                         your treatment values in proc report such as:
/                  %do i=1 %to &_trtnum_;
/                    define %scan(&_trtvarlist_,&i,%str( )) / display f=&_popfmt_;
/                  %end;
/               _trtpref_ gets assigned to whatever is assigned to prefix= and
/                         can be used as the prefix in a proc transpose. Then 
/                         your variables will match the list in _trtvarlist_.
/            _trtvartype_ Gets set to C or N (Character or Numeric) depending on
/                         the treatment variable type. You can use this to 
/                         complete a put statement like =put&_trtvartype_(etc...
/            _trtcwidths_ Gets set to the treatment column widths as given by the
/                         format &_popfmt_.
/            _trtfwidths_ Gets set to the treatment column widths as given by the
/                         format &_rawfmt_.
/          _trttotcwidth_ Gets set to the "Total" column width as given by the
/                         format &_popfmt_.
/          _trttotfwidth_ Gets set to the "Total" column width as given by the
/                         format &_poptfmt_.
/          _trttotals_    Treatment population totals corresponding to each
/                         treatment value shown in _trtinlist_.
/
/ Usage        : %popfmt(stat.acct(where=(xxx=1 and &_pop_.cd=1)),trtgroup)
/ 
/===============================================================================
/ PARAMETERS:
/-------name------- -------------------------description------------------------
/ dsin              (pos) Input dataset
/ trtvar            (pos) Treatment arm variable which must be a coded numeric
/                   or short coded character variable (typically one or two
/                   bytes with no spaces)
/ dstrt             (pos) Input dataset containing complete set of trtvar values.
/                   If you are sure your input dataset contains all the trtvar
/                   values you need then you need not specify this. If you do
/                   specify this and there is no data in your input dataset
/                   corresponding to a treatment arm then this will have (N=0).
/ trtfmt            (Optional) treatment variable format. If omitted it uses
/                   the currently assigned format for the treatment variable.
/ uniqueid=patno    Variable(s) that uniquely identify a patient. If more
/                   than one variable then separate with spaces.
/ prefix=TRT        Prefix for a list of variables that will end in the unique
/                   values of the treatment variable that will get written to
/                   the global macro variable _trtlist_ . If you transpose your
/                   data with this prefix then the variable list can be used in
/                   your proc report.
/ totaln=99         Number value to represent "total" for all treatment arms.
/ totalc="Z"        Character value (in quotes) to represent "total" for all
/                   treatment arms.
/ pvaluen=9999      Number value to represent the pvalue treatment arm.
/ pvaluec="ZZZZ"    Character value to represent the pvalue treatment arm.
/ pvallbl="p-value" Label text for the pvalue treatment arm.
/ totstr=Total      Text to represent the "total" for all treatments. If you
/                   set this to null then the format created will not contain
/                   an entry for the total treatment groups.
/ split=@           Split character to put before the (N=xxx) ending. If you
/                   want a space instead then set this to %str( ).
/ suffix            (in quotes) suffix to put after the (N=xxx) ending as part
/                   of the created format. Default is nothing.
/ brackets=yes      By default, show () brackets around the N=xxx label.
/ N=N               Allows you to change how "N" is displayed in (N=xxx) (no
/                   quotes - spaces allowed).
/ freesuff          Free text suffix. The numeric value for N will be
/                   substituted where the "#" is and a "#" must only be used
/                   in one place in the string and for this purpose. Add your
/                   own split characters. This text must be UNQUOTED. It 
/                   overrides the suffix=, brackets= and N= parameters. 
/                   Use %str( ) to give you a leading space.
/===============================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id ----------------------description------------------------
/ rrb  14Oct04         Changed to uniqueid=patno instead of patient
/ rrb  08Aug05         _trtvartype_ global macro variable added and _trtpref_
/                      is the preferred name for _tranpref_ although the old
/                      one is kept for compatibility.
/ rrb  17Jan06         Added column widths plus more explanation written to log
/ rrb  03Feb06         _trtvar_ and _uniqueid_ global macro variables added
/ rrb  04May06         _trttotals_ added
/ rrb  13Feb07         "macro called" message added
/ rrb  20Mar07         All _nX_ variable creation dropped (v2.0)
/ rrb  21Mar07         Header tidy
/ rrb  02May07         Brackets=yes parameter added so that brackets around 
/                      (N=xxx) can be suppressed if required.
/ rrb  30Jul07         Header tidy
/ rrb  22Apr08         N=N parameter added 
/ rrb  23Apr08         Freesuff= parameter added
/ rrb  30Apr08         "total" changed to "_total" for output dataset
/===============================================================================
/ This is public domain software. No guarantee as to suitability or accuracy is
/ given or implied. User uses this code entirely at their own risk.
/=============================================================================*/

%put MACRO CALLED: popfmt v3.0;

%macro popfmt(dsin,
            trtvar,
             dstrt,
            trtfmt=,
          uniqueid=patno,
            prefix=TRT,
            totaln=99,
            totalc="Y",
           pvaluen=9999,
           pvaluec="Z",
           pvallbl="p-value",
            totstr=Total,
             split=@,
            suffix=,
          brackets=yes,
                 N=N,
          freesuff=
              );

%local i varfmt gvar error;
%global _popfmt_  _poptfmt_ _popnfmt_ _trtvarlist_  _trttotstr_  _trttotvar_  
        _trtnum_  _trtpref_ _tranpref_ _trtinlist_ _trtpvalstr_ _trtpvalvar_
        _trtvartype_ _trtcwidths_ _trttotcwidth_ _trtfwidths_ _trttotfwidth_
        _rawfmt_ _uniqueid_ _trtvar_ _trttotals_;
        
%let error=0;


%if not %length(&brackets) %then %let brackets=yes;
%let brackets=%upcase(%substr(&brackets,1,1));

%let _trttotals_=;
%let _trtpref_=&prefix;
%let _tranpref_=&prefix;    %*- old named version kept for compatibility -;

%if not %length(&dsin) %then %do;
  %let error=1;
  %put ERROR: (popfmt) No input dataset specified to first positional parameter;
%end;

%if not %length(&trtvar) %then %do;
  %let error=1;
  %put ERROR: (popfmt) No treatment variable specified to second positional parameter;
%end;

%if &error %then %goto error;

%let _trtvar_=&trtvar;
%let _uniqueid_=&uniqueid;


%*- If treatment arm dataset specified -;
%if %length(&dstrt) %then %do;
  data _poptrt(keep=&trtvar);
    set &dstrt;
  run;
  proc sort nodupkey data=_poptrt;
    by &trtvar;
  run;
  data _poptrt;
    retain _total 0;
    set _poptrt;
  run;
%end;


*- get rid of duplicates from input dataset -;
proc sort nodupkey data=&dsin
                    out=_popfmt(keep=&uniqueid &trtvar);
  by &uniqueid &trtvar;
run;


%*- identify treatment variable format -;
%if %length(&trtfmt) %then %let varfmt=&trtfmt;
%else %let varfmt=%varfmt(_popfmt,&trtvar);

%let _rawfmt_=&varfmt;

%let _trtvartype_=%vartype(_popfmt,&trtvar);

%if "&_trtvartype_" EQ "C" %then %do;
  %let _popfmt_=$popfmt;
  %let _popnfmt_=$popnfmt;
  %let _poptfmt_=$poptfmt;
  %let _trttotstr_=&totalc;
  %let _trttotvar_=&prefix.%sysfunc(compress(&totalc,%str(%`%")));
  %let _trtpvalstr_=&pvaluec;
  %let _trtpvalvar_=&prefix.%sysfunc(compress(&pvaluec,%str(%`%")));
%end;
%else %do;
  %let _popfmt_=popfmt;
  %let _popnfmt_=popnfmt;
  %let _poptfmt_=poptfmt;
  %let _trttotstr_=&totaln;
  %let _trttotvar_=&prefix.&totaln;
  %let _trtpvalstr_=&pvaluen;
  %let _trtpvalvar_=&prefix.&pvaluen;
%end;


*- add observations for the "totals" category -;
data _popfmt;
  set _popfmt;
  output;
  &trtvar=&_trttotstr_;
  output;
  format &trtvar;
run;


*- make sure we have no duplicates in the "totals" category -;
proc sort nodupkey data=_popfmt;
  by &trtvar &uniqueid;
run;


*- get counts by treatment group -;
proc summary nway data=_popfmt;
  class &trtvar;
  output out=_popfmt(drop=_type_ rename=(_freq_=_total));
run;


%*- If treatment dataset specified then do a merge -;
%if %length(&dstrt) %then %do;
  data _popfmt;
    merge _poptrt _popfmt;
    by &trtvar;
  run;
  proc datasets nolist;
    delete _poptrt;
  run;
  quit;
%end;


*- set up labels for main format -;
data _popfmt;
  length label tlabel varlist inlist cwidths fwidths trttotals $ 200
         totalstr $ 6;
  retain fmtname "&_popfmt_"  varlist inlist cwidths fwidths trttotals " ";
  set _popfmt end=last;
  start=&trtvar;
  totalstr=left(put(_total,6.));
  if trttotals=" " then trttotals=totalstr;
  else trttotals=trim(trttotals)||" "||totalstr;
  if &trtvar eq &_trttotstr_ then do;
    %if %length(&totstr) %then %do;
      tlabel="&totstr";
      %if %length(&freesuff) %then %do;
        label=trim(put(&trtvar,&varfmt))||"%qscan(&freesuff,1,#)"||trim(totalstr)||"%qscan(&freesuff,2,#)";
      %end;
      %else %do;
        %if "&brackets" EQ "N" %then %do;
          label="&totstr"||"&split"||"&N="||trim(totalstr);
        %end;
        %else %do;
          label="&totstr"||"&split"||"(&N="||trim(totalstr)||")";
        %end;
        %if %length(&suffix) %then %do;
          label=trim(label)||&suffix;
        %end;
      %end;
      *- get column width from label -;
      cwidth=0;
      *- Do not use scan to test for a space as this -;
      *- might be a deliberate part of the label. -;
      do i=1 to 20;
        len=length(scan(label,i,"&split"));
        if len>cwidth then cwidth=len;
      end;
      call symput('_trttotcwidth_',compress(put(cwidth,2.)));

      *- get original width from tlabel -;
      fwidth=0;
      *- Do not use scan to test for a space as this -;
      *- might be a deliberate part of the label. -;
      do i=1 to 20;
        len=length(scan(tlabel,i,"&split"));
        if len>fwidth then fwidth=len;
      end;
      call symput('_trttotfwidth_',compress(put(fwidth,2.)));

    %end;
    %else %do;
      delete;
    %end;
  end;
  else do;
    tlabel=put(&trtvar,&varfmt);
    %if %length(&freesuff) %then %do;
        label=trim(put(&trtvar,&varfmt))||"%qscan(&freesuff,1,#)"||trim(totalstr)||"%qscan(&freesuff,2,#)";
    %end;
    %else %do;
      %if "&brackets" EQ "N" %then %do;
        label=trim(put(&trtvar,&varfmt))||"&split"||"&N="||trim(totalstr);
      %end;
      %else %do;
        label=trim(put(&trtvar,&varfmt))||"&split"||"(&N="||trim(totalstr)||")";
      %end;
      %if %length(&suffix) %then %do;
        label=trim(label)||&suffix;
      %end;
    %end;
    *- get column width from label -;
    cwidth=0;
    *- Do not use scan to test for a space as this -;
    *- might be a deliberate part of the label. -;
    do i=1 to 20;
      len=length(scan(label,i,"&split"));
      if len>cwidth then cwidth=len;
    end;
    if cwidths=" " then cwidths=compress(put(cwidth,2.));
    else cwidths=trim(cwidths)||' '||compress(put(cwidth,2.));

    *- get original width from tlabel -;
    fwidth=0;
    *- Do not use scan to test for a space as this -;
    *- might be a deliberate part of the label. -;
    do i=1 to 20;
      len=length(scan(tlabel,i,"&split"));
      if len>fwidth then fwidth=len;
    end;   
    if fwidths=" " then fwidths=compress(put(fwidth,2.));
    else fwidths=trim(fwidths)||' '||compress(put(fwidth,2.));
    
    if vtype(&trtvar)="C" then do;
      if inlist=' ' then inlist='"'||left(trim(&trtvar))||'"';
      else inlist=trim(inlist)||' "'||left(trim(&trtvar))||'"';
    end;
    else do;
      if inlist=' ' then inlist=left(&trtvar);
      else inlist=trim(inlist)||" "||left(&trtvar);
    end;
    if varlist=' ' then varlist="&prefix"||left(&trtvar);
    else varlist=trim(varlist)||" &prefix"||left(&trtvar);
  end;
  output;
  if last then do;
    call symput('_trtvarlist_',trim(varlist));
    call symput('_trtinlist_',trim(inlist));
    call symput('_trtcwidths_',trim(cwidths));
    call symput('_trtfwidths_',trim(fwidths));
    call symput('_trttotals_',trim(trttotals));
    &trtvar=&_trtpvalstr_;
    start=&_trtpvalstr_;
    label=&pvallbl;
    tlabel=&pvallbl;
    _total=.;
    totalstr=" ";
    output;
  end;
  drop inlist varlist i cwidth len;
run;


*- create the main format with (N=xxx) at the end -;
proc format cntlin=_popfmt;
run;


*- set of labels for the original format with "Total" trt group added -;
data _popfmt;
  retain fmtname "&_poptfmt_";
  set _popfmt(keep=&trtvar start tlabel _total totalstr
            rename=(tlabel=label));
run;


*- create the format for the original format with "Total" trt group added -;
proc format cntlin=_popfmt;
run;


*- set of labels for the pure N format -;
data _popfmt;
  retain fmtname "&_popnfmt_";
  set _popfmt(keep=&trtvar start _total totalstr
            rename=(totalstr=label));
run;  


*- create the "N" format -;
proc format cntlin=_popfmt;
run;


*- leave only two variables in for percentage calculations -;
data _popfmt;
  set _popfmt(keep=&trtvar _total where=(_total ne .));
run;
  

%*- Number of treatment values (not including the "totals" category) -;
data _null_;
  set _popfmt end=_last;
  if _last then call symput('_trtnum_',compress(put(_n_-1,6.)));
run;



%*- put the dot at the end of the format names -;
%let _popfmt_=&_popfmt_..;
%let _popnfmt_=&_popnfmt_..;
%let _poptfmt_=&_poptfmt_..;


%put;
%put MSG: (popfmt) The following global macro variables have been set up;
%put MSG: (popfmt) and can be resolved in your code.;
%put _popfmt_=&_popfmt_   (output format with (N=xxx) population totals added);
%put _rawfmt_=&_rawfmt_   (input format);
%put _popnfmt_=&_popnfmt_  (format for giving pure population totals);
%put _poptfmt_=&_poptfmt_  (copy of input format but containing total treatment arm);
%put _trtvar_=&_trtvar_    (name of treatment variable);
%put _trtvartype_=&_trtvartype_  (treatment variable type N/C);
%put _trttotstr_=&_trttotstr_ (treatment total string identifier);
%put _uniqueid_=&_uniqueid_   (variable(s) used to uniquely identify subjects);
%put _trttotvar_=&_trttotvar_  (transposed treatment total variable);
%put _trtpvalstr_=&_trtpvalstr_  (p-value string identifier);
%put _trtpvalvar_=&_trtpvalvar_  (p-value variable);
%put _trtpref_=&_trtpref_  (treatment variable prefix used in transpose);
%put _trtvarlist_=&_trtvarlist_ (transposed treatment variables);
%put _trtinlist_=&_trtinlist_  (treatment arm values);
%put _trtnum_=&_trtnum_  (number of treatment arms);
%put _trtcwidths_=&_trtcwidths_    (column widths according to format &_popfmt_);
%put _trtfwidths_=&_trtfwidths_    (column widths according to format &_poptfmt_);
%put _trttotcwidth_=&_trttotcwidth_   ("Total" column width according to format &_popfmt_);
%put _trttotfwidth_=&_trttotfwidth_   ("Total" column width according to format &_poptfmt_);
%put _trttotals_=&_trttotals_;


%put;
%put MSG: (popfmt) Dataset "_popfmt" has been created containing population totals;
%put MSG: (popfmt) with one observation per treatment group and one observation for;
%put MSG: (popfmt) the total of all treatment groups. Use this to merge with and;
%put MSG: (popfmt) calculate percentages. Variables are as follows:;
%put &trtvar: Treatment group (dataset is sorted in this order);
%put _total: Total population for the treatment group;
%put;


%goto skip;
%error:
%put ERROR: (popfmt) Leaving macro due to error(s) listed;
%skip:
%mend;