SAS 数据集自动检验

OPTION  NOCENTER LS=MAX PS=MAX OBS=MAX MACROGEN;

LIBNAME A '.';

OPTIONS FMTSEARCH=(A);

    
/*附录宏*/
%MACRO M_APPENDIX_FORMAT(N1);
    
    %IF %INDEX(&LVARNAME,&N1.) %THEN %DO;
        %DO K=1 %TO &LA_LEN.;                                
            %LET AN=%SCAN(&LVARNAME.,&K.);
            %LET AC=%SCAN(&LA.,&K.);
            %IF &AN.=&N1. %THEN %DO;
                FORMAT &N1. $&AC._FMT.;
                
            %END;
        %END;
    %END;
    %ELSE %DO;
        FORMAT &N1. $3.;
    %END;
            
%MEND;


%LET NL=%SYSFUNC(COUNTW(&NLIST.));/*变量个数*/
%LET LVARNAME=IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_TYPE PAYBACK_PERSON_TYPE CLASS5_STATE CURRENCY PAYMENT_STAUTS;/*附录变量*/
%LET LA=A1 A18 A20 A44 A58 A32 A33;/*变量对应附录*/
%LET LA_LEN=%SYSFUNC(COUNTW(&LVARNAME.));

%M_APPENDIX_FORMAT(IDENTITEY_TYPE);

LIBNAME XLS EXCEL  '36.xls';

%LET OUTPUTDATA=PCR_BASEINFO;

%MACRO CHECK;

	/*1、检查表结构-保留字段:变量序号、变量名、变量标签、变量类型、变量长度*/
	PROC CONTENTS DATA=A.&OUTPUTDATA OUT=temp_t1(KEEP=VARNUM NAME LABEL TYPE LENGTH) NOPRINT VARNUM; 
	RUN;
	
	PROC SORT DATA=temp_t1;
		BY VARNUM;
	RUN;
	
	DATA XLS.CONTENTS;
		SET temp_t1;
	RUN;
	
	/*2、打印前100条记录查看*/
	DATA XLS.TOP100;
		SET A.&OUTPUTDATA(OBS=100);
	RUN;
	
	/*3、唯一性检查和关联关系检查*/

	PROC SORT DATA=A.&OUTPUTDATA(KEEP=REPORT_NO IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_ORG BUSINESS_TYPE) OUT=NODUP DUPOUT=XLS.REPORT_NO_DUP NODUPKEY;
		BY REPORT_NO IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_ORG BUSINESS_TYPE;
	RUN;
	
	DATA XLS.DATE_XC;
		SET A.&OUTPUTDATA;
		MON=INTCK("MONTH",OPEN_DATE,PUTOUT_DATE);
		IF MON<0 AND MON^=.;
	RUN;
			
	/*4、查看数据集频数分布*/
	
	PROC SQL NOPRINT;
		SELECT NAME INTO:NLIST SEPARATED BY ' ' FROM temp_t1;/*变量列表*/
		SELECT TYPE INTO:TLIST SEPARATED BY ' ' FROM temp_t1;/*变量类型*/
	QUIT;
	
	%LET NL=%SYSFUNC(COUNTW(&NLIST.));/*变量个数*/
	%LET LVARNAME=IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_TYPE PAYBACK_PERSON_TYPE CLASS5_STATE CURRENCY PAYMENT_STAUTS;/*附录变量*/
	%LET LA=A1 A18 A20 A44 A58 A32 A33;/*变量对应附录*/
	%LET LA_LEN=%SYSFUNC(COUNTW(&LVARNAME.));

	
	/*计算字符型变量长度*/
	DATA temp_t2;
		SET A.&OUTPUTDATA(DROP=);
		
		%DO I=1 %TO &NL.;
			%LET N1=%SCAN(&NLIST.,&I.);
			%LET T1=%SCAN(&TLIST.,&I.);
			%IF &T1=2 %THEN %DO;
				&N1._len=LENGTH(&N1.);
			%END;
		%END;
		
	RUN;
	
	
	%DO I=1 %TO &NL.;
		%LET N1=%SCAN(&NLIST.,&I.);
		%LET T1=%SCAN(&TLIST.,&I.);
		%IF &T1=1 %THEN %DO;/*数值型-检查统计量*/
			
			PROC SUMMARY DATA=A.&OUTPUTDATA N NMISS MAX MIN ;
				VAR &N1;
				OUTPUT OUT=XLS.&N1.;
			RUN;
			
 			%END;
			
		%ELSE %DO;/*字符型-检查变量值前两位和变量值长度*/
			
			PROC FREQ DATA=temp_t2;
				TABLES &N1./MISSING OUT=XLS.&N1.;
				%M_APPENDIX_FORMAT(&N1.);
			RUN;
			
			PROC FREQ DATA=temp_t2 NOPRINT;
				TABLES &N1._len/MISSING OUT=XLS.&N1._len;
			RUN;
			
		%END;
	%END;
	
	
%MEND;

%CHECK;

  

原文地址:https://www.cnblogs.com/wdkshy/p/12455635.html