/*

/ Program   : complibs.sas
/ Version   : 3.0
/ Author    : Roland Rashleigh-Berry
/ Date      : 15-Jul-2008
/ Purpose   : To "proc compare" identically-named datasets in two libraries
/ SubMacros : %supasort %attrc %words %nvarsc %nvarsn %varlistc %missvars
/             %match %dsall %quotelst %varlist %removew %varlistn %nodup
/             %dequote
/ Notes     : It is for comparing different versions of the data to identify
/             what has been added or deleted or changed. Each dataset is
/             compared with each identically named dataset in each library.
/             Titles will be assigned internally during macro execution.
/
/             You can either set up the librefs before calling this macro and
/             pass the pure libref names to the parameters or if you put in
/             quotes then it will assign the librefs for you and deassign on
/             completion.
/
/             If the datasets are supposed to be exactly the same then set the
/             parameter direct=yes and it will do an obs by obs comparison. If
/             not then you are recommended to supply a list of sort variables to
/             sortvars= for your datasets that you use to define uniqueness of
/             the observations. %supasort will be called to sort the datasets
/             for variables found in this variable list. This is needed since we
/             can not assume the datasets are in any particular order and we
/             need an order for comparisons. If you leave this blank then it
/             will be assumed that matching datasets are already sorted in a
/             unique order and this sort order will be used in the comparison.
/             If no such sort variables exist then the variables defined to
/             dfsortvars= will be used but failing that an error message will be
/             put out for that dataset.
/ 
/             If you set chardiff=yes then the differences found in character
/             fields are shown in a "proc print" so you are not limited to only
/             20 characters as shown the the standard "proc compare" output.
/             Using this option will greatly increase the size of the output.
/             Only the "id" variables plus character variables where a change
/             was found are shown in this listing.
/
/ Usage     : %complibs(base,comp)
/ 
/===============================================================================
/ PARAMETERS:
/-------name------- -------------------------description------------------------
/ libold            (pos) Libref of old library or path name enclosed in quotes.
/ libnew            (pos) Libref of new library or path name enclosed in quotes.
/ sortvars          (optional) List of variables separated by spaces that you
/                   would use to sort the datasets to obtain uniqueness. If left
/                   blank then the current sort order is used.
/ dfsortvars        (options) Default sort variables to use if none defined to
/                   sortvars= and no current sort variables detected for a
/                   dataset.
/ direct=no         By default, do not do a one to one obs comparison. Overrides
/                   sort variable parameters if set to yes. Use this is your
/                   datasets should be exactly the same.
/ options           Options for "proc compare". "listall" is the default.
/ chardiff=no       (unquoted) By default, do not show detailed character field
/                   differences.
/===============================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id ----------------------description------------------------
/ rrb  30Mar06         Use current sort variables for each dataset if nothing is
/                      supplied to sortvars= and added dfsortvars= and direct=
/                      parameters.
/ rrb  13Feb07         "macro called" message added
/ rrb  30Jul07         Header tidy
/ rrb  15Jul08         chardiff= parameter added for detailed analysis of
/                      character variable difference beyond the 20 character
/                      limit for v3.0 .
/===============================================================================
/ This is public domain software. No guarantee as to suitability or accuracy is
/ given or implied. User uses this code entirely at their own risk. 
/=============================================================================*/

%put MACRO CALLED: complibs v3.0;

%macro complibs(libold,
                libnew,
                sortvars=,
                dfsortvars=,
                direct=no,
                options=listbase,
                chardiff=no
                );

%local fnew fold refnew refold sortedby dslist ds i j match cvars;

%if not %length(&direct) %then %let direct=no;
%let direct=%upcase(%substr(&direct,1,1));

%if not %length(&chardiff) %then %let chardiff=no;
%let chardiff=%upcase(%substr(&chardiff,1,1));

%*- Set up libref if libnew is a file name (starts with a quote) -;
%if %index(%str(%'%"),%qsubstr(&libnew,1,1)) %then %do;
  %let fnew=Y;
  %let refnew=NEW;
  libname NEW &libnew access=readonly;
%end;
%else %let refnew=%upcase(&libnew);


%*- Set up libref if libold is a file name (starts with a quote) -;
%if %index(%str(%'%"),%qsubstr(&libold,1,1)) %then %do;
  %let fold=Y;
  %let refold=OLD;
  libname OLD &libold access=readonly;
%end;
%else %let refold=%upcase(&libold);


*- Get a list of datasets in the old library -;
proc sort data=sashelp.vtable(keep=memname libname memtype 
                     where=(libname="&refold" and memtype='DATA')) 
           out=_baseds(drop=libname memtype); 
  by memname; 
run; 


*- Get a list of datasets in the new library -;
proc sort data=sashelp.vtable(keep=memname libname memtype 
                     where=(libname="&refnew" and memtype='DATA')) 
           out=_compds(drop=libname memtype); 
  by memname; 
run; 


*- Select out those datasets that exist in both libraries -;
data _both; 
  merge _baseds(in=_base) _compds(in=_comp); 
  by memname; 
  if _base and _comp; 
run; 


*- Write list of matching datasets out to a macro variable -;
proc sql noprint; 
  select memname into :dslist separated by ' ' from _both; 
quit; 


%*- For each dataset in the list, do the following -;  
%do i=1 %to %words(&dslist); 
  %let ds=%scan(&dslist,&i,%str( ));

  *- Base data ready for sorting -;
  data _base; 
    set &refold..&ds; 
  run; 
    
  *- Compare data ready for sorting -;
  data _comp; 
    set &refnew..&ds; 
  run; 

  *- assign title for the output -;
  title "Comparison of &ds dataset between &refold and &refnew libraries"; 


  %*- direct obs by obs comparison -;
  %if "&direct" EQ "Y" %then %do;
    proc compare base=_base compare=_comp &options; 
    run;
  %end;
  
  %else %do;  %*- sorted comparison -;

    %*- sort base and compare data into matching variable order -;
    %if %length(&sortvars) %then %do;
      %supasort(_base _comp,&sortvars) 
      %let sortedby=%attrc(_base,sortedby); 
    %end;
    %else %do;
      %let sortedby=%attrc(&refold..&ds,sortedby);
      %if not %length(&sortedby) %then %let sortedby=%attrc(&refnew..&ds,sortedby);
      %if %length(&sortedby) %then %do;
        proc sort data=_base;
          by &sortedby;
        run;
        proc sort data=_comp;
          by &sortedby;
        run;
      %end;
      %else %if %length(&dfsortvars) %then %do;
        %supasort(_base _comp,&dfsortvars) 
        %let sortedby=%attrc(_base,sortedby);     
      %end;
    %end;


    %if %length(&sortedby) %then %do;

      *- Do the comparison between the datasets -;
      proc compare base=_base compare=_comp &options; 
        id &sortedby; 
      run; 

%if "&chardiff" EQ "Y" %then %do;

      *- show detailed character variable differences -;
      proc compare base=_base compare=_comp out=_result outnoequal outbase outcomp outdif noprint; 
        id &sortedby; 
      run; 

      data _dif;
        set _result(where=(_type_="DIF"));
        drop _type_;
      run;

      %put >>>>>>>>>> ds=&ds;
      %put >>>>>>>>>> sortedby=&sortedby;

      %let cvars=%varlistc(_dif);
      %put >>>>>>>>>> cvars=&cvars;

      %if %words(&cvars) EQ 0 %then %goto skip;

      %let cvars=%removew(&cvars,&sortedby);
      %put >>>>>>>>>> cvars with sortedby removed = &cvars;


      %if %words(&cvars) EQ 0 %then %goto skip;

      data _dif;
        set _dif(keep=&sortedby &cvars);
        %do j=1 %to %words(&cvars);
          if not index(%scan(&cvars,&j,%str( )),"X")
            then %scan(&cvars,&j,%str( ))=" ";
        %end;
      run;

      %missvars(_dif);
      run;
      %put >>>>>>>>>>> all-missing variables = &_miss_;

      %if %words(&_miss_) EQ 0 %then %let match=;
      %else %let match=%match(&cvars,&_miss_);

      %if "&match" EQ "&cvars" %then %goto skip;

      data _result2;
        merge _dif(in=_dif keep=&sortedby) _result(keep=&sortedby _type_ &cvars);
        by &sortedby;
        if _dif;
        %if %length(&match) %then %do;
          drop &match;
        %end;
      run;

      proc print data=_result2 label noobs;
        id &sortedby; 
      run; 

      %skip:
%end;

    %end;
    %else %do;
      %put;
      %put ERROR: (complibs) No sort variable list determined for dataset &ds;
      %put;
    %end;
  
  %end;

%end; 


*- Tidy up temporary datasets now we are finished -;
proc datasets nolist; 
  delete _baseds _compds _both _base _comp; 
run; 
quit; 


%*- Clear librefs if these were assigned internally -;
%if "&fnew" EQ "Y" %then %do;
  libname &refnew clear;
%end;
%if "&fold" EQ "Y" %then %do;
  libname &refold clear;
%end;


%mend;