/*

/ Program      : unipvals.sas
/ Version      : 1.33
/ Author       : Roland Rashleigh-Berry
/                Joachim Klinger
/ Date         : 23-Aug-2009
/ Purpose      : Clinical reporting macro to calculate p-values for the
/                %unistats macro.
/ SubMacros    : %words %dequote %nobs
/ Notes        : Calculates Chi-square or Fisher's exact for categorical data,
/                the ANOVA for parametric numeric data and Kruskal-Wallis
/                or Wilcoxon rank sum test for non-parametric numeric data.
/
/                For categorical data, unless you override the test selection
/                using usetest=, Chi-square statistics, the Fisher Exact test
/                or no test are used (the latter for 2x1 or 1x2 tables where
/                calculation of a p-values is not appropriate). If, for 2x2
/                tables, the expected cell count for any cell is less than 5 or
/                for larger tables a single expected cell count is less than one
/                or more than 20% of the expected cell counts are less than 5,
/                then the Fisher exact test is used (this rule might be called
/                "Cochran's Recommendation" but please check in "Siegel, S.
/                (1956) Non-Parametric Statistics for the Behavioral Sciences"
/                for confirmation) otherwise the Chi-Square test is used.
/
/                For numeric data then the ANOVA is used by default but if
/                usetest=N then the non-parametric Kruskal-Wallis test is used
/                (but that will effectively be the Wilcoxon rank sum test if
/                there are only two treatment arms).
/
/                You should make sure that data for the total of all treatment
/                arms is not present in the input data or the p-values will be
/                incorrect. It is up to you to exclude observations you do not 
/                want to include in the analysis.
/
/                The variables kept in the output dataset are the by variables,
/                _pvalue and _test which will be set to "CHISQ" or "FISHER" for
/                categorical analysis, "ANOVA" for continuous numeric analysis
/                or "NPAR1WAY" which applies to the Kruskal-Wallis test and the
/                Wilcoxon rank sum test (for only two treatment arms). "ANOVA"
/                will be replaced by "WELCH" if the Welch test is used.
/
/ Usage        : %unipvals(dsin=means,dsout=out,trtvar=tmt,respvar=val,type=N)
/ 
/===============================================================================
/ PARAMETERS:
/-------name------- -------------------------description------------------------
/ dsin              Input dataset
/ usetest           For categorical data, set to C (Chi-square) or F (Fisher's
/                   exact) to override decision of macro to select Chi-square or
/                   Fisher's exact test based on expected cell counts. For
/                   numeric data, set to N (non-parametric) to override 
/                   the ANOVA parametric test. Setting this for categorical
/                   variables will overrride use of CMH if adjcntrvar= is set.
/ exact=no          By default, do not use an exact test for variables that
/                   require a non-parametric test (setting it to yes can
/                   consume a very large amount of processing time).
/ dsout             Output dataset
/ trtvar            Treatment variable (make sure "total treatment" is not
/                   present in the input data).
/ respvar           Response variable
/ byvars            By variable(s) if used
/ byvars2           Extra by variables kept for convenience
/ type              Should be C for categorical or N for continuous-numeric
/ adjcntrvar        Variable representing the centre to be used in adjustment
/                   for centre effects in PROC GLM call. Only one variable
/                   allowed. Terms will be generated in the model statement
/                   for modelform=short as:
/                     model response=trtcd centre /ss1 ss2 ss3 ss4;
/                   or for modelform=long as:
/                     model response=trtcd centre trtcd*centre /ss1 ss2 ss3 ss4;
/                   You can use this parameter for a variable other than a
/                   centre but note that whatever variable you choose, if it is
/                   not a categorical or dichotomous variable suitable for use
/                   in the CLASS statement of a PROC GLM call then you will need
/                   to use the glmclass= parameter to supply the correct call.
/ cntrwarn=yes      By default, warn if a centre effect and/or centre-by-
/                   treatment effect is significant. For the short model as
/                   described for the adjcntrvar= parameter, use the centre term.
/                   For the long model, use the centre and centre-by-treatment
/                   effect term.
/ cntrcond=LE 0.05  Condition for significance to apply to the centre effect
/ intercond=LE 0.1  Condition for significance to apply to the treatment*centre
/                   interaction.
/ errortype=3       Default is to select the error type 3 (Hypothesis type)
/                   treatment arm p-value from the ModelANOVA ODS dataset that
/                   is output from the GLM procedure.
/ modelform=short   Default is to generate the short form of the model statement
/                   as described in the adjcntrvar= parameter description.
/ dsmodelanova      Dataset to append the ModelANOVA datasets to generated by
/                   PROC GLM.
/ hovwarn=yes       Issue a warning message where the homogeneity of variances
/                   shows a significant difference. This will only be done for
/                   one-way ANOVA so if adjcntrvar= is set then the hov*=
/                   parameters will be ignored. A NOTE statement will be
/                   generated where appropriate if a warning is not issued.
/ hovcond=LE 0.05   Condition for meeting HoV significance
/ hovtest=Levene    HoV test to use. You can choose between OBRIEN, BF, Levene,
/                   Levene(type=square) and Levene(type=abs). Levene and
/                   Levene(type=square) are the same. The Bartlett test is not
/                   supported.
/ welch=no          By default, do not calculate ANOVA p-values using the Welch
/                   test for one-way ANOVA where the HoV condition for
/                   significance is met. If Welch is used then the hovtest
/                   defined to hovtest= will be employed in the MEANS statement.
/                   ---------------------------------------------------------
/                   For the following glm*= parameters it is possible to enclose
/                   your code in single quotes and then you can use &respvar 
/                   (the response variable) and &trtvar (the treatment variable)
/                   in your code without causing syntax errors.
/ glmclass          GLM CLASS statement used to override the generated form.
/                   The start word CLASS and ending semicolon will be generated.
/ glmmodel          GLM MODEL statement used to override the generated form.
/                   The start word MODEL and ending semicolon will be generated.
/ glmmeans          GLM MEANS statement used to override the generated form.
/                   The start word MEANS and ending semicolon will be generated.
/ glmlsmeans        GLM LSMEANS statement used to override the generated form.
/                   The start word LSMEANS and ending semicolon will be
/                   generated. Example: glmlsmeans=treat/cl pdiff adjust=t
/ odstblname        ODS tablename statment to store a dataset; per default,
/                   _odstablname will be used as output dataset name
/                   Example: odstblname=LSMeanCL LSMeanDiffCL
/ glmweight         GLM WEIGHT statement that can be added as an extra. The
/                   start word WEIGHT and ending semicolon will be generated.
/===============================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id ----------------------description------------------------
/ rrb  10Jul06         "missing" option added to tables statements in proc freq
/                      calls so it can act on missing values passed to it by
/                      the %unistats macro.
/ rrb  14Jul06         Header tidy
/ rrb  23Sep06         Dummy datasets are created to avoid "dataset not found"
/                      messages when a stats procedure can not calculate a 
/                      p-value.
/ rrb  13Feb07         "macro called" message added
/ rrb  21Mar07         Cancel format on _pvalue variable throughout (v1.2)
/ rrb  28Mar07         Header tidy regarding "Cochran's Recommendation"
/ rrb  30Jul07         Header tidy
/ rrb  06Jan08         New parameters adjcntrvar= and statsopdest= added for
/                      adjustment for centre effects.
/ rrb  06Jan08         Added errortype= , modelform= and dsmodelanova= for
/                      adjustment for centre effects.
/ rrb  07Jan08         hovwarn=, hovcond=, hovtest= amd welch= added to test and
/                      warn for homogeneity of variances significant difference
/                      for one-way ANOVA and to calculate using Welch test.
/ rrb  08Jan08         Added cntrwarn=, cntrcond= and cntrerrtype= for centre
/                      effect warning.
/ rrb  08Jan08         cntrerrtype= removed. cntrcond= value changed and 
/                      intercond=0.1 added.
/ rrb  13Jan08         cntrwarn= processing changed.
/ rrb  13Jan08         CMH test added so that adjcntrvar= can be used with
/                      categorical variables.
/ rrb  14Jan08         glmclass=, glmmodel=, glmmeans= and glmweight= added so
/                      the user can override the statements generated by the
/                      macro for the main glm call.
/ rrb  19Jan08         %dequote macro used for glm* parameters to allow the user
/                      to put code in single quotes and thereby reference macro
/                      variables &respvar and &trtvar.
/ rrb  19Jan08         Keep all variables in input dataset
/ rrb  26Jan08         Fixed bug with wrong p-value displayed in log
/ rrb  26Jan08         Choosing Fisher or Chi-square now overrides adjcntrvar=
/ rrb  06Apr08         Fixed bug where "Source" variable in _ANOVA dataset was
/                      not in the expected lower case form.
/ rrb  07Apr08         Response variable name given for center effect warning.
/ rrb  13Apr08         glmopdest= replaced by statsopdest= plus made effective
/                      for CMH output.
/ rrb  12May08         byvars2= parameter added
/ rrb  12May08         ods listing close syntax fixed
/ rrb  10Feb09         Bug with use of _unibyvars causing both CHISQ and FISHER
/                      tests for the same BY values fixed.
/ rrb  11Feb09         Proc freq no longer called for 2x1 tables to calculate
/                      CHISQ and FISHER as this is meaningless.
/ rrb  12Feb09         Further limitations on calculating p-values for 2x1
/                      tables added for v1.22
/ rrb  19Feb09         A redesign on calculating p-values for 2x1 tables added
/                      for v1.23
/  JK  03Mar09         Model statement in proc glm only called if glmmeans
/                      statement is not empty. glmlsmeans= and odstblname= 
/                      parameters added. Welch test omitted if GLMMODEL
/                      statement given.
/ rrb  05Mar09         Changes made to avoid putting out "uninitialized"
/                      messages (v1.25).
/ rrb  12Jun09         Incorrect variables in _dummypval fixed for v1.26
/ rrb  17Jun09         Call of stats procs avoided for categorical variables 
/                      where _respcnt not > 1 in all cases for v1.27
/  JK  06Jul09         Incorrect use of missing values corrected for v1.28 for
/                      p-values of Fisher- and Chi-square test. Re-route stat
/                      output for Fisher- and Chi-square test to statsopdest.
/                      Shorten print-out for CMH stat output
/  JK  10Jul09         Bug with CHI/Fisher test selection in case when missing values
/                      are present fixed (v1.29)
/  JK  13Jul09         Bug with CHI/Fisher test selection fixed (v1.30)
/ rrb  15Jul09         Calculation of _trtcnt added. Call of stats procs
/                      avoided for categorical variables where _trtcnt not > 1
/                      in all cases (v1.31)
/  JK  21Aug09         Bug while using missing character response variables 
/                      with CHI/Fisher test fixed (v1.32)
/ rrb  23Aug09         statsopdest= parameter and processing removed from this
/                      macro. This will now be handled by %unistats (v1.33)
/===============================================================================
/ This is public domain software. No guarantee as to suitability or accuracy is
/ given or implied. User uses this code entirely at their own risk.
/=============================================================================*/

%put MACRO CALLED: unipvals v1.33;

%macro unipvals(dsin=,
             usetest=,
               exact=no,
               dsout=,
              trtvar=,
             respvar=,
              byvars=,
             byvars2=,
                type=,
          adjcntrvar=,
            cntrwarn=yes,
            cntrcond=LE 0.05,
           intercond=LE 0.1,
           errortype=3,
           modelform=short,
        dsmodelanova=,
             hovwarn=yes,
             hovcond=LE 0.05,
             hovtest=Levene,
               welch=no,
            glmclass=,
            glmmodel=,
            glmmeans=,
          glmlsmeans=,
          odstblname=,
           glmweight=,
           misspct=
              );

%local error dofisher testsel trtcount name1str hovpvalue hovsig;
%let error=0;


          /*-------------------------------------------------*
                   Check we have enough parameters set
           *-------------------------------------------------*/

%if not %length(&dsin) %then %do;
  %let error=1;
  %put ERROR: (unipvals) No input dataset specified to dsin=;
%end;

%if not %length(&dsout) %then %do;
  %let error=1;
  %put ERROR: (unipvals) No output dataset specified to dsout=;
%end;

%if not %length(&trtvar) %then %do;
  %let error=1;
  %put ERROR: (unipvals) No treatment variable specified to trtvar=;
%end;

%if not %length(&respvar) %then %do;
  %let error=1;
  %put ERROR: (unipvals) No response variable specified to respvar=;
%end;

%if not %length(&type) %then %do;
  %let error=1;
  %put ERROR: (unipvals) No type (C)ategorical/(N)umeric specified to type=;
%end;

%if %length(&adjcntrvar) %then %do;
  %if %words(&adjcntrvar) GT 1 %then %do;
    %let error=1;
    %put ERROR: (unipvals) Only one variable allowed. You specified adjcntrvar=&adjcntrvar;
  %end;
%end;

%if &error %then %goto error;

%let type=%substr(%upcase(&type),1,1);
%if "&type" NE "C" and "&type" NE "N" %then %do;
  %let error=1;
  %put ERROR: (unipvals) Type= must be (C)ategorical or (N)umeric;
%end;

%if %length(&usetest) %then %do;
  %let usetest=%upcase(%substr(&usetest,1,1));
  %if "&usetest" NE "C"
  and "&usetest" NE "F"
  and "&usetest" NE "N" %then %do;
    %let error=1;
    %put ERROR: (unipvals) usetest= must be (C)hi-square, (F)isher or
(N)on-parametric;
  %end;
%end;

%if not %length(&exact) %then %let exact=no;
%let exact=%upcase(%substr(&exact,1,1));

%if not %length(&errortype) %then %let errortype=3;

%if not %length(&modelform) %then %let modelform=short;
%let modelform=%upcase(%substr(&modelform,1,1));

%if not %length(&hovwarn) %then %let hovwarn=yes;
%let hovwarn=%upcase(%substr(&hovwarn,1,1));

%if not %length(&hovcond) %then %let hovcond=LE 0.05;

%if not %length(&hovtest) %then %let hovtest=Levene;

%if not %length(&welch) %then %let welch=no;
%let welch=%upcase(%substr(&welch,1,1));

%if not %length(&cntrwarn) %then %let cntrwarn=yes;
%let cntrwarn=%upcase(%substr(&cntrwarn,1,1));

%if not %length(&cntrcond) %then %let cntrcond=LE 0.05;

%if not %length(&intercond) %then %let intercond=LE 0.1;

%if &error %then %goto error;


    /*--------------------------------*
          Define NOTEST macro
     *--------------------------------*/

%macro notest(dsin=,dsout=);

  *- create dummy p-value dataset -;
  data _dummypval; 
    retain _pvalue . _test "NOTEST  ";  
  run;

  proc summary nway missing data=&dsin; 
    class &byvars &byvars2; 
    output out=_unibynone(drop=_type_ _freq_); 
  run; 

  *- merge with by values -;
  data &dsout; 
    set _unibynone; 
    do ptr=1 to nobsdummy; 
      set _dummypval point=ptr nobs=nobsdummy; 
      output; 
    end; 
  run;

  proc datasets nolist;
    delete _dummypval _unibynone;
  quit;

%mend notest;


    /*--------------------------------*
          Define CMH test macro
     *--------------------------------*/

%macro cmh(dsin=,dsout=);

  ods output close;

  *- create dummy p-value dataset -;
  data _dummypval; 
    retain _pvalue . _test "CMH     ";  
  run;

  proc summary nway missing data=&dsin; 
    class &byvars &byvars2; 
    output out=_unibycmh(drop=_type_ _freq_); 
  run; 

  *- merge with by values -;
  data _dummypval; 
    set _unibycmh; 
    do ptr=1 to nobsdummy; 
      set _dummypval point=ptr nobs=nobsdummy; 
      output; 
    end; 
  run;

  %if %nobs(&dsin(where=(_respcnt>1 and _trtcnt>1))) %then %do;

    ods output CMH=&dsout;

    proc freq data=&dsin(where=(_respcnt>1 and _trtcnt>1));
      by &byvars &byvars2 _test;
      tables &adjcntrvar*&trtvar*&respvar / cmh noprint;
      format &trtvar;
    run;

    data &dsout;
      set &dsout(keep=&byvars &byvars2 _test Statistic Prob
                where=(Statistic=2));
      drop Statistic;
      format Prob;
      rename Prob=_pvalue;
    run;

    data &dsout;
      merge _dummypval &dsout;
      by &byvars &byvars2;
    run;

  %end;
  %else %do;

    data &dsout;
      set _dummypval;
    run;

  %end;

  proc datasets nolist;
    delete _dummypval _unibycmh;
  quit;

%mend cmh;



    /*--------------------------------*
        Define Chi-square test macro
     *--------------------------------*/

%macro chisq(dsin=,dsout=);
  ods output close;

  *- create dummy p-value dataset -;
  data _dummypval; 
    retain _pvalue . _test "CHISQ   ";  
  run;

  proc summary nway missing data=&dsin; 
    class &byvars &byvars2; 
    output out=_unibychi(drop=_type_ _freq_); 
  run; 

  *- merge with by values -;
  data _dummypval; 
    set _unibychi; 
    do ptr=1 to nobsdummy; 
      set _dummypval point=ptr nobs=nobsdummy; 
      output; 
    end; 
  run;

  %if %nobs(&dsin(where=(_respcnt>1 and _trtcnt>1))) %then %do;

    ods output ChiSq=&dsout;

    %if "&misspct" EQ "Y" %then %do;
        proc freq data=&dsin(where=(_respcnt>1 and _trtcnt>1));
          by &byvars &byvars2 _test;
          tables &trtvar*&respvar / missing chisq;
          format &trtvar;
        run;
    %end;
    %else %do;
        proc freq data=&dsin(where=(_respcnt>1 and _trtcnt>1));
          by &byvars &byvars2 _test;
          tables &trtvar*&respvar / chisq;
          format &trtvar;
        run;
    %end;

    data &dsout;
      set &dsout(keep=&byvars &byvars2 _test Statistic Prob
                where=(Statistic="Chi-Square"));
      drop Statistic;
      format Prob;
      rename Prob=_pvalue;
    run;

    data &dsout;
      merge _dummypval &dsout;
      by &byvars &byvars2;
    run;

  %end;
  %else %do;

    data &dsout;
      set _dummypval;
    run;

  %end;

  proc datasets nolist;
    delete _dummypval _unibychi;
  quit;

%mend chisq;



    /*--------------------------------*
       Define Fisher exact test macro
     *--------------------------------*/

%macro fisher(dsin=,dsout=);
  ods output close;

  *- create dummy p-value dataset -;
  data _dummypval; 
    length _test $ 8 ; 
    retain _pvalue .  _test "FISHER";   
  run;

  proc summary nway missing data=&dsin; 
    class &byvars &byvars2; 
    output out=_unibyfish(drop=_type_ _freq_); 
  run; 

  *- merge with by values -;
  data _dummypval; 
    set _unibyfish; 
    do ptr=1 to nobsdummy; 
      set _dummypval point=ptr nobs=nobsdummy; 
      output; 
    end; 
  run;

  %if %nobs(&dsin(where=(_respcnt>1 and _trtcnt>1))) %then %do;

    ods output FishersExact=&dsout;

    %if "&misspct" EQ "Y" %then %do;
    proc freq data=&dsin(where=(_respcnt>1 and _trtcnt>1));
      by &byvars &byvars2 _test;
      tables &trtvar*&respvar / missing fisher;
      format &trtvar;
    run;
    %end;
    %else %do;
    proc freq data=&dsin(where=(_respcnt>1 and _trtcnt>1));
      by &byvars &byvars2 _test;
      tables &trtvar*&respvar / fisher;
      format &trtvar;
    run;
    %end;

    data &dsout;
      set &dsout(keep=&byvars &byvars2 _test Name1 nValue1
                where=(Name1="XP2_FISH"));
      drop Name1;
      format nValue1;
      rename nValue1=_pvalue;
    run;

    data &dsout;
      merge _dummypval &dsout;
      by &byvars &byvars2;
    run;

  %end;
  %else %do;

    data &dsout;
      set _dummypval;
    run;

  %end;

  proc datasets nolist;
    delete _dummypval _unibyfish;
  quit;

%mend fisher;







          /*-------------------------------------------------*
                               Process data
           *-------------------------------------------------*/           

*- in case input dataset has modifiers -;
data _pvaldsin;
  set &dsin;
run;


*- sort by by variables -;
%if %length(&byvars.&byvars2) %then %do;
  proc sort data=_pvaldsin;
    by &byvars &byvars2;
  run;
%end;


*- count how many treatment arms -;
proc sql noprint;
  select count(distinct(&trtvar)) into :trtcount 
    separated by " " from _pvaldsin;
quit;


*- Create a dataset with just the by values in -;
*- so that it can be merged in with the p-value -;
*- dummy datasets. -;
%if %length(&byvars.&byvars2) %then %do; 
  proc summary nway missing data=_pvaldsin; 
    class &byvars &byvars2; 
    output out=_unibyvars(drop=_type_ _freq_); 
  run; 
%end; 


%if "&type" EQ "N" 
 and "&usetest" EQ "N"
 and &trtcount GT 2
 and "&exact" EQ "Y" %then %do;
  %let exact=N;
  %put WARNING: (unipvals) Exact test requested but is not available for
&trtcount treatment;
  %put WARNING: (unipvals) arms (only 2 allowed) so exact test will not be
used.;
%end;






          /*-------------------------------------------------*
                             Categorical data
           *-------------------------------------------------*/ 

%if "&type" EQ "C" %then %do;

    
    /*--------------------------------*
          Count number of responses
     *--------------------------------*/

  *- Count the number of different responses because if less than 2 -;
  *- for a by group then there is no point in calculating a p-value. -;
  %if "&misspct" EQ "Y" %then %do;
    proc summary nway missing data=_pvaldsin;
      class &byvars &byvars2 &respvar;
      output out=_unirespcnt;
    run;
  %end;
  %else %do;
    proc summary nway missing data=_pvaldsin;
      where not missing(&respvar);
      class &byvars &byvars2 &respvar;
      output out=_unirespcnt;
    run;
  %end;

  proc summary nway missing data=_unirespcnt;
    class &byvars &byvars2;
    output out=_unirespcnt(drop=_type_ rename=(_freq_=_respcnt));
  run;


    /*--------------------------------*
          Count number of trt arms
     *--------------------------------*/

  *- Count the number of different treatment arms because if less than 2 -;
  *- for a by group then there is no point in calculating a p-value. -;
  %if "&misspct" EQ "Y" %then %do;
    proc summary nway missing data=_pvaldsin;
      class &byvars &byvars2 &trtvar;
     output out=_unitrtcnt;
    run;
  %end;
  %else %do;
    proc summary nway missing data=_pvaldsin;
      where not missing(&respvar);
      class &byvars &byvars2 &trtvar;
      output out=_unitrtcnt;
    run;
  %end;

  proc summary nway missing data=_unitrtcnt;
    class &byvars &byvars2;
    output out=_unitrtcnt(drop=_type_ rename=(_freq_=_trtcnt));
  run;


    /*--------------------------------*
        Merge counts in with original
     *--------------------------------*/

  data _pvaldsin;
    merge _unitrtcnt _unirespcnt _pvaldsin;
    by &byvars &byvars2;
  run;

  proc datasets nolist;
    delete _unirespcnt _unitrtcnt;
  run;
  quit;

  %if not %length(&usetest) and not %length(&adjcntrvar) %then %do;


      /*--------------------------------*
            Get expected cell counts
       *--------------------------------*/

    %if "&misspct" EQ "Y" %then %do;
      proc freq data=_pvaldsin noprint;
        by &byvars &byvars2 _respcnt _trtcnt;
        tables &trtvar*&respvar / missing sparse outexpect out=_expected(drop=percent);
        format &trtvar;
      run;
    %end;
    %else %do;
      proc freq data=_pvaldsin noprint;
      where not missing(&respvar);
        by &byvars &byvars2 _respcnt _trtcnt;
        tables &trtvar*&respvar / sparse outexpect out=_expected(drop=percent);
        format &trtvar;
      run;
    %end;


      /*--------------------------------*
            Determine test to be used
       *--------------------------------*/

    data _whichtest(keep=&byvars &byvars2 _test);
      length _test $ 8;
      retain _below1 _below5 _cells 0;
      set _expected(where=(_respcnt>1 and _trtcnt>1));
      by &byvars &byvars2;
      if first.%scan(&byvars &byvars2,-1,%str( )) then do;
        _below1=0;
        _below5=0;
        _cells=0;
      end;
      _cells=_cells+1;
      if expected < 1 then _below1=_below1+1;
      if expected < 5 then _below5=_below5+1;
      if last.%scan(&byvars &byvars2,-1,%str( )) then do;
        _test="CHISQ";
        if _cells<5 and _below5>0 then _test="FISHER";
        else if _cells>4 and (_below1>0 or _below5>(_cells/5)) then _test="FISHER";
        output;
      end;
    run;

    data _notest(keep=&byvars &byvars2 _test);
      retain _test "NOTEST  ";
      set _expected(where=(_respcnt<2 or _trtcnt<2));
    run;

    data _whichtest;
      set _notest _whichtest;
      by &byvars &byvars2;
    run;


      /*--------------------------------*
          Find out what tests selected
       *--------------------------------*/
  
    proc sql noprint;
      select distinct(_test) into :testsel
      separated by " " from _whichtest;
    quit;
  

      /*--------------------------------*
          Split data according to test
       *--------------------------------*/

    %if "&testsel" EQ "FISHER" %then %do;
      %let dodsets=_dofisher;
      %let pvaldsets=_fisher;

      data _dofisher;
        merge _whichtest _pvaldsin;
        by &byvars &byvars2;
      run;   
      %fisher(dsin=_dofisher,dsout=_fisher)  
    %end;
    
    %else %if "&testsel" EQ "CHISQ" %then %do;
      %let dodsets=_dochisq;
      %let pvaldsets=_chisq;

      data _dochisq;
        merge _whichtest _pvaldsin;
        by &byvars &byvars2;
      run;   
      %chisq(dsin=_dochisq,dsout=_chisq)
    %end;
    
    %else %do;
      %let dodsets=;
      %let pvaldsets=;

      %if %index(&testsel,FISHER) %then %do;
        data _dofisher;
          merge _whichtest _pvaldsin;
          by &byvars &byvars2;
          if _test="FISHER";
        run;
        %let dodsets=_dofisher;
        %let pvaldsets=_fisher;
        %fisher(dsin=_dofisher,dsout=_fisher)
      %end;

      %if %index(&testsel,CHISQ) %then %do;
        data _dochisq;
          merge _whichtest _pvaldsin;
          by &byvars &byvars2;
          if _test="CHISQ";
        run;
        %let dodsets=&dodsets _dochisq;
        %let pvaldsets=&pvaldsets _chisq;
        %chisq(dsin=_dochisq,dsout=_chisq)
      %end;

      %if %index(&testsel,NOTEST) %then %do;
        data _donotest;
          merge _whichtest _pvaldsin;
          by &byvars &byvars2;
          if _test="NOTEST";
        run;
        %let dodsets=&dodsets _donotest;
        %let pvaldsets=&pvaldsets _notest;
        %notest(dsin=_donotest,dsout=_notest)
      %end;
    %end;



      /*--------------------------------*
         Bring p-value datasets together
       *--------------------------------*/

    data &dsout;
      set &pvaldsets;
      %if %length(&byvars.&byvars2) %then %do;
        by &byvars &byvars2;
      %end;
    run;
    
    proc datasets nolist;
      delete _expected _expectrespcnt _whichtest &pvaldsets &dodsets;
    run;
    quit;

    
  %end;  %*- of if not length(usetest) -;



  %else %if "&usetest" EQ "F" %then %do;     %*- Fisher exact -;

    data _pvaldsin;
      retain _test "FISHER  ";
      set _pvaldsin;
    run;
    
    %fisher(dsin=_pvaldsin,dsout=&dsout);
   
    proc datasets nolist;
      delete _pvaldsin;
    run;
    quit;

  %end;   %*- of if usetest eq F -;


  

  %else %if "&usetest" EQ "C" %then %do;     %*- Chi-square -;

    data _pvaldsin;
      retain _test "CHISQ   ";
      set _pvaldsin;
    run;
    
    %chisq(dsin=_pvaldsin,dsout=&dsout);
 
    proc datasets nolist;
      delete _pvaldsin;
    run;
    quit;
       
  %end;   %*- of if usetest eq C -;


  %else %if %length(&adjcntrvar) %then %do;     %*- CMH test -;

    data _pvaldsin;
      retain _test "CMH     ";
      set _pvaldsin;
    run;
    
    %cmh(dsin=_pvaldsin,dsout=&dsout);
    
    proc datasets nolist;
      delete _pvaldsin;
    run;
    quit;

  %end;   %*- end of CMH test -;


%end;  %*- of if type eq C -;




          /*-------------------------------------------------*
                     Numeric non-categorical data
           *-------------------------------------------------*/ 

%else %if "&type" EQ "N" %then %do;


  %if "&usetest" EQ "N" %then %do;  %*- non-parametric test -;
  
    ods output close; 

    *- create dummy p-value dataset -;
    data _WILC; 
      length Name1 $ 8 Label1 $ 15 cValue1 $ 6; 
      retain nValue1 . cValue1 Label1 " "; 
      Name1="P_KW"; 
      output; 
      Name1="XP2_WIL"; 
      output; 
      Name1="P2_WIL"; 
      output; 
    run;

    *- merge with by values if any -;
    data _WILC; 
      set _unibyvars; 
      do ptr=1 to nobswilc; 
        set _WILC point=ptr nobs=nobswilc; 
        output; 
      end; 
    run; 


    %if &trtcount GT 2 %then %do;
      ods output KruskalWallisTest=_WILC;
    %end;
    %else %do;
      ods output WilcoxonTest=_WILC;
    %end;

    proc npar1way data=_pvaldsin wilcoxon;
    %if %length(&byvars.&byvars2) %then %do;
      by &byvars &byvars2;
    %end;    
      class &trtvar;
      var &respvar;
    %if "&exact" EQ "Y" %then %do;
      exact;
    %end;
      format &trtvar ;
    run;


    data &dsout;
      retain _test "NPAR1WAY";
      set _WILC;
      if Name1=
        %if &trtcount GT 2 %then %do;
          "P_KW"
        %end;
        %else %do;
          %if "&exact" EQ "Y" %then %do;
            "XP2_WIL"
          %end;
          %else %do;
            "P2_WIL"
          %end;
        %end;
        ;
      format nValue1;
      rename nValue1=_pvalue;
      drop Name1 Label1 cValue1;
    run;

    proc datasets nolist;
      delete _pvaldsin _WILC;
    run;
    quit;


  %end;

  
  %else %do;   


    /*----------------------------------------*
                  Parameteric ANOVA
     *----------------------------------------*/

    /*-------------------*
       HoV test and warn  
     *-------------------*/

    ods output close;

    %let hovpvalue=99.0;
    %let hovsig=0;

    /* JK: inserted 'and not %length(&glmmodel)' to avoid Welch test if general model is applied */
    %if not %length(&adjcntrvar) and not %length(&glmmodel) %then %do;
    
      ods output HOVFTest=_hovftest;
  
      proc glm data=_pvaldsin;
      %if %length(&byvars.&byvars2) %then %do;
        by &byvars &byvars2;
      %end;
        class &trtvar;
        model &respvar=&trtvar;
        means &trtvar / hovtest=&hovtest;
        format &trtvar ;
      run;

      data _null_;
        set _hovftest(where=(upcase(Source)="%upcase(&trtvar)"));
        call symput('hovpvalue',put(ProbF,6.3));
      run;

      %if %sysevalf(&hovpvalue &hovcond) %then %let hovsig=1;

      %if "&hovwarn" EQ "Y" and &hovsig %then %do;
        %put WARNING: (unipvals) p-value=&hovpvalue &hovcond for hovtest=&hovtest;
      %end;
      %else %put NOTE: (unipvals) p-value=&hovpvalue for hovtest=&hovtest;

      proc datasets nolist;
        delete _hovftest;
      run;
      quit;

    %end;


    *- create dummy _anova p-value dataset -;
    data _anova; 
      retain HypothesisType 3 ProbF . Source "%lowcase(&trtvar)" ;  
    run;

    *- merge with by values if any -;
    data _anova; 
      set _unibyvars; 
      do ptr=1 to nobsdummy; 
        set _anova point=ptr nobs=nobsdummy; 
        output; 
      end; 
    run; 

    *- create dummy _welch p-value dataset -;
    data _welch;
      retain ProbF . Source "%lowcase(&trtvar)" ;
    run;

    *- merge with by values if any -;
    data _welch; 
      set _unibyvars; 
      do ptr=1 to nobsdummy; 
        set _welch point=ptr nobs=nobsdummy; 
        output; 
      end; 
    run;

    ods output ModelANOVA=_anova;

    %if not %length(&adjcntrvar)  and not %length(&glmmodel) and "&welch" EQ "Y" and &hovsig %then %do;
      ods output Welch=_welch;
    %end;

    /* JK statement added */
    %if %length(&odstblname) %then %do;
      %do i=1 %to %words(&odstblname);
        ods output %scan(&odstblname,&i,%str( ))=_%scan(&odstblname,&i,%str( ));
      %end;
    %end;


    /* JK (comment added): Start 2-way ANOVA */

    proc glm data=_pvaldsin;
      %if %length(&byvars.&byvars2) %then %do;
        by &byvars &byvars2;
      %end;
      %if %length(&glmclass) %then %do;
        CLASS %unquote(%dequote(&glmclass)) ;
      %end;
      %else %do;
        CLASS &trtvar &adjcntrvar;
      %end;
      %if %length(&adjcntrvar) %then %do;
        %if "&modelform" EQ "L" %then %do;
          %if %length(&glmmodel) %then %do;
            MODEL %unquote(%dequote(&glmmodel)) ;
          %end;
          %else %do;
            MODEL &respvar=&trtvar &adjcntrvar &trtvar*&adjcntrvar / ss1 ss2 ss3 ss4;
          %end;
        %end;
        %else %do;
          %if %length(&glmmodel) %then %do;
            MODEL %unquote(%dequote(&glmmodel)) ;
          %end;
          %else %do;
            MODEL &respvar=&trtvar &adjcntrvar / ss1 ss2 ss3 ss4;
          %end;
        %end;
        %if %length(&glmmeans) %then %do;
          MEANS %unquote(%dequote(&glmmeans)) ;
        %end;
/* JK (means statement commented out) */
/*        %else %do; */
/*          MEANS &trtvar ; */
/*        %end; */
        %if %length(&glmlsmeans) %then %do;
          LSMEANS %unquote(%dequote(&glmlsmeans)) ;
        %end;
      %end;
      %else %do;
        %if %length(&glmmodel) %then %do;
          MODEL %unquote(%dequote(&glmmodel)) ;
        %end;
        %else %do;
          MODEL &respvar=&trtvar ;
        %end;
        %if %length(&glmmeans) %then %do;
          MEANS %unquote(%dequote(&glmmeans)) ;
        %end;
        %else %do;
          %if "&welch" EQ "Y" and &hovsig %then %do;
%put NOTE: (unipvals) PROC GLM will use means statement "means &trtvar / hovtest=&hovtest welch;" %eqsuff(&byvars);
            MEANS &trtvar / hovtest=&hovtest welch;
          %end;
        %if %length(&glmlsmeans) %then %do;
          LSMEANS %unquote(%dequote(&glmlsmeans)) ;
        %end;
/* JK (means statement commented out) */
/*          %else %do; */
/*            MEANS &trtvar; */
/*          %end; */
        %end;
      %end;
      %if %length(&glmweight) %then %do;
        WEIGHT %unquote(%dequote(&glmweight)) ;
      %end;
      format &trtvar ;
    run;


    %if %length(&dsmodelanova) %then %do;
      proc append force base=&dsmodelanova data=_anova;
      run;
    %end;

    %if not %length(&adjcntrvar) and not %length(&glmmodel) and "&welch" EQ "Y" and &hovsig %then %do;
      data &dsout;
        retain _test "WELCH   ";
        set _welch(keep=&byvars &byvars2 ProbF Source
                  where=(upcase(Source)="%upcase(&trtvar)")
                 rename=(ProbF=_pvalue));
        drop Source;
        format _pvalue;
      run;
    %end;
    %else %do;
      data &dsout;
        retain _test "ANOVA   ";
        set _anova(keep=&byvars &byvars2 HypothesisType ProbF Source
                  where=(HypothesisType=&errortype and upcase(Source)="%upcase(&trtvar)")
                 rename=(ProbF=_pvalue));
        drop HypothesisType Source;
        format _pvalue;
      run;
      %if "&cntrwarn" EQ "Y" and %length(&adjcntrvar) %then %do;
        %if "&modelform" EQ "L" %then %do;
          data _null_;
            set _anova(where=(HypothesisType=&errortype));
            if upcase(Source) EQ "%upcase(&trtvar*&adjcntrvar)" and ProbF &intercond then
put "WARNING: (unipvals) ANOVA p-value for %upcase(&adjcntrvar*&trtvar) = " ProbF "&intercond for %upcase(&respvar) " %eqsuff(&byvars);
            else if upcase(Source) EQ "%upcase(&adjcntrvar)" and ProbF &cntrcond then
put "WARNING: (unipvals) ANOVA p-value for %upcase(&adjcntrvar) = " ProbF "&cntrcond for %upcase(&respvar) " %eqsuff(&byvars) ;
          run;
        %end;
        %else %do;
          data _null_;
            set _anova(where=(HypothesisType=&errortype 
                              and upcase(Source) EQ "%upcase(&adjcntrvar)"));
            if ProbF &cntrcond then
put "WARNING: (unipvals) ANOVA p-value for %upcase(&adjcntrvar) = " ProbF "&cntrcond for %upcase(&respvar) " %eqsuff(&byvars);
          run;
        %end;
      %end;
    %end;

    proc datasets nolist;
      delete _pvaldsin _anova _welch;
    run;
    quit;  
    
    
  %end;
    
%end;   %*- end of if type is N -;


%if %length(&byvars.&byvars2) %then %do;
  proc datasets nolist;
    delete _unibyvars;
  run;
  quit;
%end;

%goto skip;
%error:
%put ERROR: (unipvals) Leaving macro due to error(s) listed;
%skip:
%mend;