/*
/ Program : fixvars.sas
/ Version : 1.0
/ Author : Roland Rashleigh-Berry
/ Date : 29-Mar-2007
/ Purpose : To "fix" variables in a library so they are consistent
/ SubMacros : none
/ Notes : WARNING - USE OF THIS MACRO IN "WRITE" MODE COULD DAMAGE YOUR
/ DATASETS. YOU SHOULD BE VERY CAREFUL IN USING THIS MACRO.
/
/ This works in "read" mode and "write" mode. In "read" mode it
/ writes variable information to a flat file, highlighting any
/ inconsistencies. You can edit this file to remove these
/ inconsistencies. In "write" mode it will read this edited file
/ and apply the variable values to all the variables in the library.
/
/ YOU SHOULD BACK UP DATASETS BEFORE RUNNING THIS IN "WRITE" MODE
/ and carefully check that everything is correct before you delete
/ the backups.
/
/ Note that if you set a character variable length to a common
/ smaller length then you could be losing characters off the end of
/ the variable in some datasets.
/
/ You can not change a variable's name or type using this method.
/
/ Do not change the positioning of any fields in the flat file.
/
/ If a numeric variable has its format changed then an "F" might
/ appear in front of it. This is just a feature of "proc datasets"
/ and makes no difference and can be ignored.
/
/ Use the %clash macro if you need to know the source datasets of
/ some of the variables.
/
/ Variables will only be candidates for updating if the variable
/ name matches exactly (it is case sensitive) as does the variable
/ type.
/
/ Usage : fixvars(mylib,w);
/
/===============================================================================
/ PARAMETERS:
/-------name------- -------------------------description------------------------
/ libname (pos) Library name.
/ mode (pos) Whether in "read" or "write" mode. W or R will do.
/ flatfile="fixvars.txt" Name of the flat file.
/===============================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id ----------------------description------------------------
/ rrb 29Mar07 Put out "macro called" message plus header tidy
/===============================================================================
/ This is public domain software. No guarantee as to suitability or accuracy is
/ given or implied. User uses this code entirely at their own risk.
/=============================================================================*/
%put MACRO CALLED: fixvars v1.0;
%macro fixvars(libname,mode,flatfile="fixvars.txt");
%local error user;
%let error=0;
%let user=%upcase(%sysfunc(getoption(user)));
%if not %length(&user) %then %let user=WORK;
%if not %length(&libname) %then %let libname=&user;
%else %let libname=%upcase(%sysfunc(compress(&libname,%str(%'%"))));
%if not %length(&flatfile) %then %do;
%let error=1;
%put ERROR: (fixvars) No flatfile name specified;
%end;
%else %let flatfile="%sysfunc(compress(&flatfile,%str(%'%")))";
%if not %length(&mode) %then %let mode=read;
%let mode=%sysfunc(compress(&mode,%str(%'%")));
%let mode=%upcase(%substr(&mode,1,1));
%if %sysfunc(verify(&mode,RW)) %then %do;
%let error=1;
%put ERROR: (fixvars) Second positional "mode" parameter must be either R(ead) or W(rite);
%end;
%if &error %then %goto error;
%if "&mode" EQ "R" %then %do;
/*----------------------------------------*
READ MODE
*----------------------------------------*/
*- get existing information about dataset variables -;
proc sql noprint;
create table _fixvars as
select name, type, length, format, label from dictionary.columns
where libname="&libname" and memtype='DATA';
quit;
*- get rid of duplicates -;
proc sort nodupkey data=_fixvars;
by name type length format label;
run;
*- write to flat file alerting where there is more than one entry per variable -;
data _null_;
file &flatfile;
set _fixvars;
by name;
if not (first.name and last.name) then put @1 '+' @;
put @3 name @36 type @41 length @45 format @62 label;
run;
*- tidy up -;
proc datasets nolist;
delete _fixvars;
run;
quit;
%end;
%else %if "&mode" EQ "W" %then %do;
/*----------------------------------------*
WRITE MODE
*----------------------------------------*/
*- get existing information about dataset variables -;
proc sql noprint;
create table _fixvarsold as
select name, type, memname, length as oldlength, format as oldformat, label as oldlabel
from dictionary.columns
where libname="&libname" and memtype='DATA'
order by name, type;
quit;
*- read in the flat file containing corrected variable information -;
data _fixvars;
length name $ 32 type $ 4 length 8 format $ 16 label $ 256 newlabel $ 266;
infile &flatfile;
input;
if _infile_ NE ' ' then do;
name=substr(_infile_,3,32);
type=substr(_infile_,36,4);
if length(_infile_) GT 42 then length=input(substr(_infile_,41,3),3.);
else length=input(substr(_infile_,41),3.);
format=' ';
if length(_infile_) GT 44 then do;
if length(_infile_) GT 61 then format=substr(_infile_,45,16);
else format=substr(_infile_,45);
end;
label=' ';
if length(_infile_) GT 61 then label=substr(_infile_,62);
*- replace single double quotes in label with double double quotes since -;
*- we will be enclosing the label in double quotes when in proc datasets -;
newlabel=tranwrd(label,'"','""');
if name EQ ' ' then do;
call symput('error','1');
put 'ERROR: (fixvars) Variable name missing in flat file';
end;
if type not in ('char' 'num') then do;
call symput('error','1');
put 'ERROR: (fixvars) Type of "' type +(-1) '" not recognised in flat file for variable "' name +(-1) '"';
end;
if length EQ . then do;
call symput('error','1');
put 'ERROR: (fixvars) Variable length not recognised in flat file for variable "' name +(-1) '"';
end;
output;
end;
run;
*- sort just in case the order was changed -;
proc sort data=_fixvars;
by name type length format label;
run;
*- ensure there are no duplicates in this corrected list -;
data _null_;
set _fixvars;
by name type;
if not (first.type and last.type) then do;
if first.type then do;
put 'ERROR: (fixvars) You have a duplicate entry for variable "'
name +(-1) '" type "' type +(-1) '"';
call symput('error','1');
end;
end;
run;
*- if a duplicate was found then exit after tidying up -;
%if &error %then %do;
proc datasets nolist;
delete _fixvarsold;
run;
quit;
%goto error;
%end;
*- merge corected and old variable information together -;
data _fixvars;
merge _fixvars(in=_fix) _fixvarsold(in=_old);
by name type;
if _fix and _old;
run;
/*----------------------------------------*
Fix labels
*----------------------------------------*/
*- sort into dataset name order for labels that need changing -;
proc sort data=_fixvars(where=(label NE oldlabel)) out=_fixlabel;
by memname name;
run;
*- generate "proc datasets" code to fix the labels -;
data _null_;
set _fixlabel;
by memname;
if _n_=1 then call execute("proc datasets nolist lib=&libname;");
if first.memname then call execute('modify '||trim(memname)||'; label ');
call execute(trim(name)||'="'||trim(newlabel)||'" ');
if last.memname then call execute(';run;');
run;
/*----------------------------------------*
Fix formats
*----------------------------------------*/
*- Sort into dataset name order for formats that need changing. -;
*- Missing formats must be last so that they are nullified. -;
proc sort data=_fixvars(where=(format NE oldformat)) out=_fixformat;
by memname descending format;
run;
*- generate "proc datasets" code to fix the formats -;
data _null_;
set _fixformat;
by memname;
if _n_=1 then call execute("proc datasets nolist lib=&libname;");
if first.memname then call execute('modify '||trim(memname)||'; format ');
call execute(trim(name)||' '||trim(format)||' ');
if last.memname then call execute(';run;');
run;
/*----------------------------------------*
Fix lengths
*----------------------------------------*/
*- sort into dataset name order for lengths that need changing -;
proc sort data=_fixvars(where=(length NE oldlength)) out=_fixlength;
by memname name;
run;
*- generate the data step code to fix the lengths -;
data _null_;
length dollar $ 1;
set _fixlength;
by memname;
if type='char' then dollar='$';
else dollar=' ';
if first.memname then call execute("data &libname.."||trim(memname)||';length ');
call execute(trim(name)||' '||dollar||' '||put(length,3.)||' ');
if last.memname then call execute(";set &libname.."||trim(memname)||';run;');
run;
*- tidy up -;
proc datasets nolist;
delete _fixvars _fixvarsold _fixlabel _fixformat _fixlength;
run;
quit;
%end;
%goto skip;
%error:
%put ERROR: (fixvars) Leaving fixvars macro due to error(s) listed;
%skip:
%mend;