/*

/ Program   : freqlvls.sas
/ Version   : 1.1
/ Author    : Roland Rashleigh-Berry
/ Date      : 31-Oct-2011
/ Purpose   : To give frequency counts by category and sub-category levels
/ SubMacros : %words %vartype %varfmt %comblvls %splitvar
/ Notes     : The number of controls offered to tailor the output dataset are
/             limited because it is expected that the output dataset will be
/             extensively post-processed.
/
/             New text variables named LVL1, LVL2 etc. are created for the
/             output dataset that correspond to the heirarchical list of
/             category variables supplied to varlist= so that they are converted
/             to character where required and are wide enough to accept the "ANY
/             varname" summation label. The labels of the LVL1, LVL2 etc.
/             variables for the group summation will be "ANY " followed by the
/             original variable name. It is up to you to change these or drop
/             these as needed when post processing.
/
/             For the default calcord=yes, ordering variables named LVL1ORD,
/             LVL2ORD etc. are created. The zero values correspond to the "ANY"
/             group summations and the rank values above zero are based on the
/             descending frequency count with the LVL1, LVL2 etc. value as a
/             secondary key. You may want to use a different ordering system in
/             which case you will have to overwrite some of these values or not
/             calculate them by setting calcord=no.
/
/             The output dataset will be named _freq unless you specify a value
/             to dsout= (which will allow dataset modifiers).
/
/             You can specify a treatment variable. If you also specify a value
/             for the total treatment arm then duplicate observations will be 
/             generated for this total treatment arm value and it will be
/             included in the summation.
/
/             You would typically use the nodupvars= to specify variables that
/             uniquely identify a patient and in this case you will get unique
/             patient counts rather than event counts. If you want both types
/             of counts then make two calls to this macro and merge the output
/             datasets together. For the event counts you might want to set
/             calcord=no to save time if you are using the LVL1ORD, LVL2ORD
/             etc. variables from the patient counts dataset.
/
/             This macro was written to be the core macro for an AE table macro
/             to remove some of the complexity. The output dataset would
/             typically be merged with a denominator dataset to calculate
/             percentages and then the counts, percentages and perhaps event
/             counts shown in a text field that gets transposed by treatment arm
/             with the LVL1ORD, LVL2ORD etc. values from the total treatment arm
/             used as the display order for the transposed values. The %trnslvls
/             macros can be used to do the transpose and the %comblvls macro
/             can be used to combine the values in a single variable.
/
/             For debugging purposes and to help understand what this macro is
/             doing then you can set print=yes to print the output dataset so
/             that the summation levels can be clearly seen. If you also set
/             comblvls=yes the LVLn values will be combined in the variable
/             COMBLVLS and this will be shown in the print instead of the
/             separate LVLn variables.
/
/ Usage     : %freqlvls(sashelp.cars,make type model,trtvar=origin,
/                       trttot="ALL");
/             data cars;
/               length pat $ 3;
/               set sashelp.cars;
/               pat=model;
/             run;
/             %freqlvls(cars,make type model,trtvar=origin,trttot="ALL",
/                       nodupvars=pat);
/===============================================================================
/ PARAMETERS:
/-------name------- -------------------------description------------------------
/ dsin              (pos) input dataset
/ varlist           (pos) list of category variables
/ dsout=_freq       Name of output dataset (defaults to _freq)
/ varlen=256        "Level" variable length (defaults to 256)
/ trtvar            Treatment variable (optional)
/ trttot            Value that represents tot total treatment arm (optional)
/ nodupvars         Variables for dropping duplicates (optional)
/ print=no          By default, do not print the output datset
/ comblvls=no       By default, do not combine the LVLn variables into an
/                   indented version in the variable named COMBLVLS
/ colw=50           column width to use to display a flowing COMBLVLS
/ split=@           Split character to use for COMBLVLS
/ usecolon=yes      By default, if ": " occurs in the string start within 30% of
/                   the defined width then use this to align following line
/                   segments.
/ indent=3          Number of spaces to indent each level
/ hindent=0         Hanging indent for overflowing lines in COMBLVLS
/ calcord=yes       By default, calculate the LVL1ORD, LVL2ORD etc. variables
/                   ranked by descending frequency count.
/ sortbyord=no      By default, do not sort the output dataset by the generated
/                   LVLnORD variables (if you set print=yes or comblvls=yes then
/                   the dataset will be sorted in any case). The default sort
/                   order is the treatment variable (if used) followed by the
/                   LVLn variables.
/ mvarmax           You can specify the name of a macro variable to receive the
/                   maximum frequency value.
/===============================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id ----------------------description------------------------
/ rrb  27Oct11         New (v1.0)
/ rrb  31Oct11         Drop _freq_ on a merge to avoid info lines in log
/                      and added mvarmax= parameter processing (v1.1)
/===============================================================================
/ Copyright (C) 2011, Roland Rashleigh-Berry. Use of this software is by license
/ only commencing 01 Jan 2012 although permission is granted to use these macros
/ for educational or demonstration purposes and by drug regulatory agencies.
/
/ Users should ensure this software is suitable for the purpose to which it is 
/ put and to provide adequate checks on the accuracy of any values produced as
/ no guarantee can be given that the results displayed by this software are as
/ expected and no liability is accepted for any damage caused through use of any
/ incorrect output produced. Only use this software if you agree to these terms.
/=============================================================================*/

%put MACRO CALLED: freqlvls v1.1;

%macro freqlvls(dsin,
             varlist,
               dsout=_freq,
              varlen=256,
              trtvar=,
              trttot=,
           nodupvars=,
               debug=no,
               print=no,
            comblvls=no,
               colw=50,
               split=@,
            usecolon=yes,
              indent=3,
             hindent=0,
             calcord=yes,
           sortbyord=no,
             mvarmax=
                );

  %local i err errflag var oldvar keepvars totvars varfmt vartype workds;
  %let errflag=0;
  %let err=ERR%str(OR);

  %if not %length(&dsin) %then %do;
    %let errflag=1;
    %put &err: (freqlvls) No input dataset specified as the first positional parameter;
  %end;

  %if not %length(&varlist) %then %do;
    %let errflag=1;
    %put &err: (freqlvls) No variable list specified as the second positional parameter;
  %end;

  %if &errflag %then %goto exit;

  %let totvars=%words(&varlist);

  %if not %length(&varlen) %then %let varlen=256;

  %if not %length(&debug) %then %let debug=no;
  %let debug=%upcase(%substr(&debug,1,1));

  %if not %length(&print) %then %let print=no;
  %let print=%upcase(%substr(&print,1,1));

  %if not %length(&comblvls) %then %let comblvls=no;
  %let comblvls=%upcase(%substr(&comblvls,1,1));

  %if not %length(&calcord) %then %let calcord=yes;
  %let calcord=%upcase(%substr(&calcord,1,1));

  %if not %length(&sortbyord) %then %let sortbyord=no;
  %let sortbyord=%upcase(%substr(&sortbyord,1,1));

  %if not %length(&dsout) %then %let dsout=_freq;

  *- create a new input dataset with wider columns named LVL1 etc. -;
  data _dsin;
    length
    %do i=1 %to &totvars;
      lvl&i
    %end;
    $ &varlen ;
    set &dsin;
    %do i=1 %to &totvars;
      %let var=%scan(&varlist,&i,%str( ));
      %let vartype=%vartype(&dsin,&var);
      %let varfmt=%varfmt(&dsin,&var);
      %if &vartype EQ N %then %do;
        %if %length(&varfmt) %then %do;
          lvl&i=left(put(&var,&varfmt.));
        %end;
        %else %do;
          lvl&i=left(put(&var,best16.));
        %end;
      %end;
      %else %do;
        %if %length(&varfmt) %then %do;
          lvl&i=left(put(&var,&varfmt.));
        %end;
        %else %do;
          lvl&i=left(&var);
        %end;
      %end;
    %end;
    %if %length(&trtvar) and %length(&trttot) %then %do;
      output;
      &trtvar=&trttot;
      output;
    %end;
    keep &trtvar &nodupvars
    %do i=1 %to &totvars;
      lvl&i
    %end;
    ;
    label
    %do i=1 %to &totvars;
      lvl&i="%upcase(%scan(&varlist,&i,%str( )))"
    %end;
    ;
  run;

  %let workds=_dsin;
  %if %length(&nodupvars) %then %let workds=_dsin2;
 
  %if %length(&nodupvars) %then %do;
    proc sort nodupkey data=_dsin out=_dsin2;
      by &trtvar &nodupvars;
    run;
  %end;

  proc summary nway missing data=&workds;
    %if %length(&trtvar) %then %do;
      class &trtvar;
    %end;
    output out=lvl0sum(drop=_type_);
  run;

  %if %length(&mvarmax) %then %do;
    proc sql noprint;
      select max(_freq_) into: &mvarmax from lvl0sum;
    quit;
  %end;


  %let keepvars=;
  %do i=1 %to &totvars;

    %let var=lvl&i;
    %let oldvar=%scan(&varlist,&i,%str( ));
    %if %length(&nodupvars) %then %do;
      proc sort nodupkey data=_dsin out=_dsin2;
        by &trtvar &keepvars &var &nodupvars;
      run;
    %end;

    proc summary nway missing data=&workds;
      class &trtvar &keepvars &var;
      output out=lvl&i.sum(drop=_type_);
    run;

    %if &calcord NE N %then %do;
      data lvl&i.sum;
        set lvl&i.sum;
        ord=1/_freq_;
      run;

      proc sort data=lvl&i.sum;
        by &trtvar &keepvars ord &var;
      run;

      data lvl&i.sum;
        retain lvl&i.ord 0;
        set lvl&i.sum;
        by &trtvar &keepvars ord &var;
        %if %length(&trtvar &keepvars) %then %do;
          if first.%scan(&trtvar &keepvars,-1,%str( )) then lvl&i.ord=0;
        %end;
        if first.&var then lvl&i.ord=lvl&i.ord+1;
        drop ord;
      run;
    %end;

    data lvl&i.sum;
      set lvl&i.sum 
          lvl%eval(&i-1)sum(keep=&trtvar &keepvars _freq_ in=_x);
      if _x then do;
        %if &calcord NE N %then %do;
          lvl&i.ord=0;
        %end;
        &var="ANY %upcase(&oldvar)";
      end;
    run;

    proc sort data=lvl&i.sum;
      by &trtvar &keepvars &var;
    run;

    %if &i GT 1 %then %do;
      *- merge in lvlxord values -;
      data lvl&i.sum;
        merge lvl%eval(&i-1)sum(drop=_freq_) lvl&i.sum;
        by &trtvar &keepvars;
      run;
    %end;

    %let keepvars=&keepvars &var;
  %end;


  data &dsout;
    set lvl&totvars.sum;
  run;


  %if &debug NE Y %then %do;
    proc datasets nolist;
      delete lvl0sum _dsin _dsin2
      %do i=1 %to &totvars;
        lvl&i.sum
      %end;
      ;
    run;
    quit;
  %end;


       /*****************************************
           Demonstration and printing section
        *****************************************/

  %if &comblvls EQ Y %then %do;
    %comblvls(&dsout,&totvars,&colw,varlen=&varlen,split=&split,usecolon=&usecolon,
              indent=&indent,hindent=&hindent,byvars=&trtvar);
  %end;
  %else %do;
    %if &print EQ Y or &sortbyord EQ Y %then %do;
      proc sort data=&dsout;
        by &trtvar
        %do i=1 %to &totvars;
          lvl&i.ord
        %end;
        ;
      run;
    %end;
  %end;


  %if &print EQ Y %then %do;
    proc report nowd headline headskip missing split="&split" data=&dsout;
      columns &trtvar
      %if &comblvls NE Y %then %do;
        %do i=1 %to &totvars;
          lvl&i.ord lvl&i
        %end;
      %end;
      %else %do;
        %do i=1 %to &totvars;
          lvl&i.ord
        %end;
        comblvls
      %end;
      _freq_;
      %if %length(&trtvar) %then %do;
        define &trtvar / order;
      %end;
      %do i=1 %to &totvars;
        define lvl&i.ord / order noprint;
        %if &comblvls NE Y %then %do;
          define lvl&i / order width=22 flow;
        %end;
      %end;
      %if &comblvls EQ Y %then %do;
        define comblvls / display width=&colw flow;
      %end;
      define _freq_ / display;
      %if &comblvls EQ Y %then %do;
        break after lvl2ord / skip;
      %end;
      %else %do;
        break after lvl%eval(&totvars-1)ord / skip;
      %end;
    run;
  %end;

  %goto skip;
  %exit: %put &err: (freqlvls) Leaving macro due to problem(s) listed;
  %skip:
 
%mend freqlvls;