1.2.1 SAS之禅

简于理解,精于效率。

1.2.2 SAS教程

  • 第1课 SAS基础入门
  • 第2课 SAS拼表
  • 第3课 SQL应用
  • 第4课 PROC步
  • 第5课 SAS进阶
  • 第6课 SAS数据清洗
  • 第7课 SAS宏
    教程为2019.03课程原稿,相对入门,可用以温故知新。

以下摘要之:

1.2.2.1 SAS基础

  1. OPTIONS COMPRESS = YES;
  2. /* libname ana "";*/
  3. DATA A;
  4. SET SASHELP.CARS;
  5. RUN;
  6. DATA A;
  7. SET SASHELP.CARS;
  8. MSRP1 = MSRP + 1;
  9. RUN;
  10. DATA A;
  11. KEEP MSRP1;
  12. SET SASHELP.CARS;
  13. MSRP1 = MSRP + 1;
  14. RUN;
  15. DATA B;
  16. FORMAT MSRP2 $20.;
  17. SET A;
  18. /* IF MSRP1 > 30000 THEN MSRP2 = "DAYU3W"; */
  19. /* ELSE MSRP2 = "XIAOYU1000"; */
  20. IF MSRP1 > 30000 THEN MSRP2 = "DAYU3W";
  21. ELSE IF MSRP1 > 1000 THEN MSRP2 = "DAYU3W<1000";
  22. ELSE IF MSRP1 > 200 THEN MSRP2 = "DAYU3W<200";
  23. ELSE MSRP2 = "XIAOYU1000";
  24. RUN;
  25. DATA A;
  26. KEEP MSRP1;
  27. SET SASHELP.CARS;
  28. MSRP1 = MSRP + 1;
  29. IF MSRP1 > 50000;
  30. RUN;
  31. DATA A;
  32. KEEP MSRP1;
  33. SET SASHELP.CARS;
  34. MSRP1 = MSRP + 1;
  35. WHERE MSRP1 > 50000;
  36. RUN;
  37. DATA C;
  38. KEEP MSRP1 MSRP2;
  39. SET SASHELP.CARS;
  40. MSRP1 = MSRP + 1;
  41. IF MSRP1 > 50000;
  42. IF MSRP1 > 60000 THEN MSRP2 = ">";
  43. ELSE MSRP2 = "<";
  44. RUN;
  45. /* IF SUBSTR(A,1,1) = "1" */
  46. /* IF SUBSTR(A,1,1) = "1" OR SUBSTR(A,1,1) = "2" */
  47. /* IF SUBSTR(A,1,1) = "1" AND MSRP1 > 60000 */
  48. /* IF SUBSTR(A,1,1) IN ("1", "2") */
  49. /* IF SUBSTR(A,1,1) NOT IN ("1", "2") */
  50. /* "1" */
  51. /* "2" */
  52. /* "3" */
  53. DATA D;
  54. SET C C;
  55. RUN;
  56. PROC SORT DATA = D;
  57. BY MSRP2 MSRP1;
  58. RUN;
  59. PROC FREQ DATA = D;
  60. TABLE MSRP2 * MSRP1;
  61. RUN;

1.2.2.2 SAS拼表

  1. OPTIONS COMPRESS = YES;
  2. DATA TEST_X1;
  3. INPUT NAME $ PRODUCT $ TYPE $;
  4. CARDS;
  5. A CAR 40
  6. B CAR 42
  7. C BUS 44
  8. D MOTO 9
  9. E BUS 10
  10. ;
  11. RUN;
  12. DATA TEST_X2;
  13. INPUT NAME $ PRODUCT $ TYPE $;
  14. CARDS;
  15. A CAR 40
  16. C CAR 42
  17. B BUS 44
  18. H MOTO 9
  19. J BUS 10
  20. ;
  21. RUN;
  22. DATA TEST_Y;
  23. INPUT NAME $ NPRODUCT $ NTYPE $;
  24. CARDS;
  25. A APPLE 38
  26. B BANANA 42
  27. C CAT 44
  28. D DOG 9
  29. E EGG 10
  30. ;
  31. RUN;
  32. DATA TEST_Z;
  33. INPUT NAME $ NPRODUCT $ NTYPE $;
  34. CARDS;
  35. A APPLE 38
  36. A BANANA 42
  37. B CAT 44
  38. B DOG 9
  39. B EGG 10
  40. ;
  41. RUN;
  42. DATA A1_1;
  43. SET TEST_X1 TEST_X2;
  44. RUN;
  45. DATA A1_2;
  46. SET TEST_X1 TEST_Y;
  47. RUN;
  48. DATA A1_3;
  49. SET TEST_X1 TEST_Y(RENAME=(NPRODUCT=PRODUCT NTYPE=TYPE));
  50. RUN;
  51. /* DATA t3; */
  52. /* MERGE t1(IN=A) t2(IN=B); */
  53. /* BY n1; */
  54. /* IF A; */
  55. /* IF B; */
  56. /* RUN; */
  57. PROC SORT DATA = TEST_X2;BY NAME;RUN;
  58. PROC SORT DATA = TEST_Y;BY NAME;RUN;
  59. PROC SORT DATA = TEST_Z;BY NAME;RUN;
  60. DATA A2;
  61. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  62. BY NAME;
  63. IF A;
  64. RUN;
  65. DATA A2_1;
  66. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  67. BY NAME;
  68. IF NOT A;
  69. RUN;
  70. DATA A3;
  71. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  72. BY NAME;
  73. IF B;
  74. RUN;
  75. DATA A4;
  76. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  77. BY NAME;
  78. IF A AND B;
  79. RUN;
  80. DATA A5;
  81. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  82. BY NAME;
  83. RUN;
  84. DATA A51;
  85. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  86. BY NAME;
  87. IF A OR B;
  88. RUN;
  89. DATA A6;
  90. MERGE TEST_X2(IN=A) TEST_Y(IN=B);
  91. BY NAME;
  92. IF A;
  93. RUN;
  94. DATA A7;
  95. MERGE TEST_X2(IN=A) TEST_Z(IN=B);
  96. BY NAME;
  97. IF A;
  98. RUN;

1.2.2.3 SQL应用

  1. OPTIONS COMPRESS = YES;
  2. /* CREATE */
  3. PROC SQL;
  4. CREATE TABLE TCUSTR(
  5. CUSTR_NBR VARCHAR(18),
  6. SEX INT
  7. );
  8. QUIT;
  9. /* INSERT */
  10. PROC SQL;
  11. INSERT INTO TCUSTR(CUSTR_NBR, SEX)
  12. VALUES("440101200109090011", 1)
  13. VALUES("360101199901010012", 0);
  14. QUIT;
  15. /* DELETE */
  16. PROC SQL;
  17. DELETE FROM TCUSTR WHERE CUSTR_NBR = "440101200109090011";
  18. QUIT;
  19. /* UPDATE */
  20. PROC SQL;
  21. UPDATE TCUSTR SET SEX = 1 WHERE CUSTR_NBR = "360101199901010012";
  22. QUIT;
  23. /* SELECT */
  24. PROC SQL;
  25. SELECT *
  26. FROM SASHELP.CARS;
  27. QUIT;
  28. PROC SQL;
  29. SELECT *
  30. FROM SASHELP.CARS
  31. WHERE MAKE = "Acura";
  32. QUIT;
  33. PROC SQL;
  34. SELECT MAKE, MSRP
  35. FROM SASHELP.CARS
  36. WHERE MAKE = "Acura";
  37. QUIT;
  38. /* SQL FUNC */
  39. PROC SQL;
  40. SELECT
  41. COUNT(MSRP),
  42. SUM(MSRP),
  43. MAX(MSRP),
  44. MIN(MSRP),
  45. AVG(MSRP)
  46. FROM SASHELP.CARS
  47. WHERE MAKE = "Acura";
  48. QUIT;
  49. PROC SQL;
  50. SELECT
  51. COUNT(MSRP) AS CNT_MSRP,
  52. SUM(MSRP) AS SUM_MSRP,
  53. MAX(MSRP) AS MAX_MSRP,
  54. MIN(MSRP) AS MIN_MSRP,
  55. AVG(MSRP) AS AVG_MSRP
  56. FROM SASHELP.CARS
  57. WHERE MAKE = "Acura";
  58. QUIT;
  59. /* GROUP BY */
  60. PROC SQL;
  61. SELECT
  62. MAKE,
  63. COUNT(MSRP) AS CNT_MSRP,
  64. SUM(MSRP) AS SUM_MSRP,
  65. MAX(MSRP) AS MAX_MSRP,
  66. MIN(MSRP) AS MIN_MSRP,
  67. AVG(MSRP) AS AVG_MSRP
  68. FROM SASHELP.CARS
  69. GROUP BY MAKE;
  70. QUIT;
  71. /* ORDER BY */
  72. PROC SQL;
  73. SELECT
  74. MAKE,
  75. COUNT(MSRP) AS CNT_MSRP,
  76. SUM(MSRP) AS SUM_MSRP,
  77. MAX(MSRP) AS MAX_MSRP,
  78. MIN(MSRP) AS MIN_MSRP,
  79. AVG(MSRP) AS AVG_MSRP
  80. FROM SASHELP.CARS
  81. GROUP BY MAKE
  82. ORDER BY CNT_MSRP;
  83. QUIT;
  84. /* WHERE */
  85. PROC SQL;
  86. SELECT
  87. MAKE,
  88. COUNT(MSRP) AS CNT_MSRP,
  89. SUM(MSRP) AS SUM_MSRP,
  90. MAX(MSRP) AS MAX_MSRP,
  91. MIN(MSRP) AS MIN_MSRP,
  92. AVG(MSRP) AS AVG_MSRP
  93. FROM SASHELP.CARS
  94. WHERE MAKE ^= "Acura"
  95. GROUP BY MAKE
  96. ORDER BY CNT_MSRP;
  97. QUIT;
  98. /* HAVING */
  99. PROC SQL;
  100. SELECT
  101. MAKE,
  102. COUNT(MSRP) AS CNT_MSRP,
  103. SUM(MSRP) AS SUM_MSRP,
  104. MAX(MSRP) AS MAX_MSRP,
  105. MIN(MSRP) AS MIN_MSRP,
  106. AVG(MSRP) AS AVG_MSRP
  107. FROM SASHELP.CARS
  108. WHERE MAKE ^= "Acura"
  109. GROUP BY MAKE
  110. HAVING CNT_MSRP > 5
  111. ORDER BY CNT_MSRP;
  112. QUIT;
  113. /* CREATE TABLE */
  114. PROC SQL;
  115. CREATE TABLE CARS_GROUP AS
  116. SELECT
  117. MAKE,
  118. COUNT(MSRP) AS CNT_MSRP,
  119. SUM(MSRP) AS SUM_MSRP,
  120. MAX(MSRP) AS MAX_MSRP,
  121. MIN(MSRP) AS MIN_MSRP,
  122. AVG(MSRP) AS AVG_MSRP
  123. FROM SASHELP.CARS
  124. WHERE MAKE ^= "Acura"
  125. GROUP BY MAKE
  126. HAVING CNT_MSRP > 5
  127. ORDER BY CNT_MSRP;
  128. QUIT;
  129. /* CASE WHEN */
  130. PROC SQL;
  131. CREATE TABLE CARS_GROUP AS
  132. SELECT
  133. MAKE,
  134. SUM(CASE WHEN ENGINESIZE > 3 THEN MSRP ELSE 0 END) AS SUM_MSRP
  135. FROM SASHELP.CARS
  136. GROUP BY MAKE;
  137. QUIT;

1.2.2.4 PROC步

  1. OPTIONS COMPRESS = YES;
  2. DATA CARS;
  3. SET SASHELP.CARS;
  4. RUN;
  5. /* PROC SORT */
  6. PROC SORT DATA = SASHELP.CARS;
  7. BY MSRP;
  8. RUN;
  9. PROC SORT DATA = SASHELP.CARS OUT = CARS_MSRP;
  10. BY DESCENDING MSRP;
  11. RUN;
  12. PROC SORT DATA = SASHELP.CARS OUT = CARS_MAKE;
  13. BY MAKE;
  14. RUN;
  15. DATA CARS_DUP;
  16. KEEP TYPE ORIGIN DRIVETRAIN;
  17. SET SASHELP.CARS;
  18. RUN;
  19. PROC SORT DATA = CARS_DUP OUT = CARS_NDK NODUPKEY;
  20. BY TYPE ORIGIN;
  21. RUN;
  22. PROC SORT DATA = CARS_DUP OUT = CARS_NQK NOUNIQUEKEY;
  23. BY TYPE ORIGIN;
  24. RUN;
  25. PROC SORT DATA = CARS_DUP OUT = CARS_ND NODUP;
  26. BY TYPE ORIGIN;
  27. RUN;
  28. /* PROC FREQ */
  29. PROC FREQ DATA = CARS NOPRINT;
  30. TABLE DRIVETRAIN/OUT=CARS_FREQ;
  31. RUN;
  32. PROC FREQ DATA = CARS;
  33. TABLE DRIVETRAIN * ORIGIN/NOCOL NOROW NOPERCENT MISSING;
  34. RUN;
  35. PROC FREQ DATA = CARS;
  36. TABLE ORIGIN * DRIVETRAIN/NOCOL NOROW NOPERCENT MISSING;
  37. RUN;
  38. PROC FREQ DATA = CARS;
  39. TABLE DRIVETRAIN * ORIGIN;
  40. WHERE TYPE = "SUV";
  41. RUN;
  42. PROC FREQ DATA = CARS;
  43. TABLE TYPE * DRIVETRAIN * ORIGIN;
  44. RUN;
  45. PROC FREQ DATA = CARS;
  46. TABLE (TYPE DRIVETRAIN) * ORIGIN;
  47. /* TABLE TYPE * ORIGIN;*/
  48. /* TABLE DRIVETRAIN * ORIGIN;*/
  49. RUN;
  50. /* PROC SORT DATA = ;*/
  51. /* BY ;*/
  52. /* RUN;*/
  53. /* PROC FREQ DATA = ;*/
  54. /* TABLE ;*/
  55. /* RUN;*/
  56. /* PROC UNIVARIATE DATA = ;*/
  57. /* VAR ;*/
  58. /* RUN;*/
  59. /* PROC UNIVARIATE */
  60. PROC UNIVARIATE DATA = CARS;
  61. VAR MSRP;
  62. RUN;
  63. PROC UNIVARIATE DATA = CARS;
  64. VAR MAKE;
  65. RUN;
  66. PROC UNIVARIATE DATA = CARS;
  67. VAR MSRP;
  68. CLASS MAKE;
  69. RUN;
  70. /* PROC TRANSPOSE */
  71. DATA CARS_MAMS;
  72. KEEP MAKE TYPE MSRP;
  73. SET SASHELP.CARS;
  74. RUN;
  75. PROC SORT DATA = CARS_MAMS OUT = CARS_MAMSD NODUPKEY;
  76. BY MAKE TYPE;
  77. RUN;
  78. PROC TRANSPOSE DATA = CARS_MAMSD;
  79. VAR MSRP;
  80. BY MAKE;
  81. RUN;
  82. PROC TRANSPOSE DATA = CARS_MAMSD OUT=CARS_M PREFIX=ID_;
  83. VAR MSRP;
  84. BY MAKE;
  85. ID TYPE;
  86. RUN;
  87. /* PROC SURVEYSELECT */
  88. PROC SURVEYSELECT
  89. DATA=CARS_MAMS METHOD=SRS N=3
  90. OUT=CARS_SRS_N3;
  91. RUN;
  92. PROC SURVEYSELECT
  93. DATA=CARS_MAMS METHOD=SRS SAMPRATE=0.1
  94. OUT=CARS_SRS_P1;
  95. RUN;
  96. PROC SURVEYSELECT
  97. DATA=CARS_MAMS METHOD=SRS SAMPRATE=0.1
  98. OUT=CARS_SRS_N1;
  99. STRATA MAKE;
  100. RUN;

1.2.2.5 SAS进阶

  1. OPTIONS COMPRESS = YES;
  2. DATA CARS;
  3. KEEP MAKE MSRP;
  4. SET SASHELP.CARS;
  5. RUN;
  6. PROC SORT DATA = CARS OUT = CARS_MSRP;
  7. BY DESCENDING MSRP;
  8. RUN;
  9. PROC FREQ DATA = CARS_MSRP NOPRINT;
  10. TABLE MAKE/OUT = CARS_FMAKE;
  11. RUN;
  12. /* SAS FORMAT */
  13. DATA DEMOY;
  14. INPUT NAME $11. BIRTH HEIGHT;
  15. INFORMAT BIRTH YYMMDD10. HEIGHT 5.1;
  16. CARDS;
  17. LIXIAO 1959/10/21 170.5
  18. WANGMING 1992/02/21 177.8
  19. ;
  20. RUN;
  21. /*1960/01/01*/
  22. /*1991/10/21 11616 */
  23. /*1991/10/23 11618 */
  24. DATA DEMON;
  25. INPUT NAME $11. BIRTH $11. HEIGHT;
  26. CARDS;
  27. LIXIAO 1991/10/21 170.5
  28. WANGMING 1992/02/21 177.8
  29. ;
  30. RUN;
  31. DATA DDATE;
  32. SDATE = "01JAN2018"D;
  33. F1DATE = SDATE;
  34. F2DATE = SDATE;
  35. F3DATE = SDATE;
  36. RUN;
  37. DATA DDATE;
  38. SDATE = "01JAN2018"D;
  39. FORMAT F1DATE YYMMDD10. F2DATE YYMMDD8. F3DATE YYMMDD6.;
  40. F1DATE = SDATE;
  41. F2DATE = SDATE;
  42. F3DATE = SDATE;
  43. RUN;
  44. /* SAS DATE */
  45. DATA SASDATE;
  46. SDATE = "21DEC2018"D;
  47. STIME = "09:39:00"T;
  48. SDATETIME = "21DEC2018 09:39:00"DT;
  49. RUN;
  50. DATA SASDATE_TR;
  51. SDATE = "21DEC2018"D;
  52. STIME = "09:39:00"T;
  53. SDATETIME = "21DEC2018 09:39:00"DT;
  54. FORMAT F1DATE YYMMDD10. F2DATE YYMMDD8. FTIME TIME10. FDATETIME DATETIME20.;
  55. F1DATE = SDATE;
  56. F2DATE = SDATE;
  57. FTIME = STIME;
  58. FDATETIME = SDATETIME;
  59. RUN;
  60. /* INTNX */
  61. DATA DINTNX;
  62. SDATE = "01JAN2018"D;
  63. FORMAT FDATE SDATED SDATEM SDATEY YYMMDD10.;
  64. FDATE = SDATE;
  65. SDATED = INTNX("DAY", SDATE, 1);
  66. SDATEM = INTNX("MONTH", SDATE, 1);
  67. SDATEY = INTNX("YEAR", SDATE, 1);
  68. RUN;
  69. DATA DINTNXN;
  70. SDATE = "02JAN2018"D;
  71. FORMAT FDATE SDATEMB SDATEMM SDATEME SDATEMS YYMMDD10.;
  72. FDATE = SDATE;
  73. SDATEMB = INTNX("MONTH", SDATE, 1, "B");
  74. SDATEMM = INTNX("MONTH", SDATE, 1, "M");
  75. SDATEME = INTNX("MONTH", SDATE, 1, "E");
  76. SDATEMS = INTNX("MONTH", SDATE, 1, "S");
  77. RUN;
  78. DATA DINTNXN;
  79. SDATE = "02JAN2018"D;
  80. FORMAT FDATE SDATEMB SDATEMM SDATEME SDATEMS YYMMDD10.;
  81. FDATE = SDATE;
  82. SDATEMB = INTNX("YEAR", SDATE, 1, "B");
  83. SDATEMM = INTNX("YEAR", SDATE, 1, "M");
  84. SDATEME = INTNX("YEAR", SDATE, 1, "E");
  85. SDATEMS = INTNX("YEAR", SDATE, 1, "S");
  86. RUN;
  87. /* INTCK */
  88. DATA DINTCK;
  89. SDATE_M11 = "01DEC2018"D;
  90. SDATE_M12 = "05NOV2018"D;
  91. GAP_DAYS = INTCK("DAY", SDATE_M12, SDATE_M11);
  92. GAP_MONS = INTCK("MONTH", SDATE_M12, SDATE_M11);
  93. RUN;
  94. /* PUT INPUT */
  95. DATA DPUT;
  96. DATE18 = 20181221;
  97. DATEP18 = PUT(DATE18, $8.);
  98. RUN;
  99. DATA DPUT;
  100. DATE18 = 20181221;
  101. DATEP18 = PUT(DATE18, $8.);
  102. DATEI18 = INPUT(DATEP18, YYMMDD8.);
  103. /* DATEI18 = INPUT(PUT(DATE18, $8.), YYMMDD8.);*/
  104. DATEPN18 = PUT(DATEI18, YYMMDDN8.);
  105. DATEIN18 = INPUT(DATEPN18, 8.);
  106. RUN;

1.2.2.6 SAS宏

  1. OPTIONS COMPRESS = YES;
  2. DATA CARS;
  3. KEEP MAKE MODEL MSRP;
  4. SET SASHELP.CARS;
  5. RUN;
  6. DATA DVAR01;
  7. SET CARS;
  8. WHERE MAKE = "BMW";
  9. RUN;
  10. DATA DVAR02;
  11. SET CARS;
  12. WHERE MAKE = "BMW";
  13. RUN;
  14. DATA DVAR03;
  15. SET CARS;
  16. WHERE MAKE = "BMW";
  17. RUN;
  18. %LET VAR1 = BMW;
  19. DATA DVAR01;
  20. SET CARS;
  21. WHERE MAKE = "&VAR1.";
  22. RUN;
  23. DATA DVAR02;
  24. SET CARS;
  25. WHERE MAKE = "&VAR1.";
  26. RUN;
  27. DATA DVAR03;
  28. SET CARS;
  29. WHERE MAKE = "&VAR1.";
  30. RUN;
  31. /* ... */
  32. %PUT &VAR1.;
  33. /* Acura */
  34. /* BMW */
  35. %LET VAR1 = BMW;
  36. DATA DVAR01;
  37. SET CARS;
  38. WHERE MAKE = "&VAR1.";
  39. RUN;
  40. /* 1 */
  41. %LET VAR1 = Acura;
  42. /* %PUT &VAR1.;*/
  43. DATA DVAR1;
  44. SET CARS;
  45. WHERE MAKE = "&VAR1.";
  46. RUN;
  47. /* 2 */
  48. DATA _NULL_;
  49. CALL SYMPUT("VAR2", "Acura");
  50. RUN;
  51. /* %PUT &VAR2.; */
  52. DATA DVAR2;
  53. SET CARS;
  54. WHERE MAKE = "&VAR2.";
  55. RUN;
  56. /* 3 */
  57. /* PROC SQL NOPRINT;*/
  58. /* SELECT MAKE INTO: VAR3 */
  59. /* FROM CARS;*/
  60. /* QUIT;*/
  61. /* PROC SQL NOPRINT;*/
  62. /* SELECT MAX(MAKE) INTO :VAR3 */
  63. /* FROM CARS;*/
  64. /* QUIT;*/
  65. PROC SQL NOPRINT;
  66. SELECT MAX(MAKE),MIN(MSRP) INTO :VAR3, :VAR4
  67. FROM CARS;
  68. QUIT;
  69. %PUT &VAR3. &VAR4.;
  70. DATA DVAR3;
  71. SET CARS;
  72. WHERE MAKE = "&VAR3.";
  73. RUN;
  74. %MACRO T1;
  75. DATA A;
  76. RUN;
  77. %MEND T1;
  78. %T1;
  79. %MACRO T2(V=,);
  80. %PUT &V.;
  81. DATA DVAR_&V.;
  82. SET CARS;
  83. WHERE MAKE = "&V.";
  84. /* 100H */
  85. RUN;
  86. %MEND T2;
  87. %T2(V=BMW);
  88. /* %PUT &V.;*/
  89. %T2(V=Acura);
  90. /* %PUT &V.;*/
  91. %LET M = BMW;
  92. %MACRO T3(V=,);
  93. %PUT &V.;
  94. %IF &V.= &M. %THEN %DO;
  95. DATA DVAR_&V.;
  96. SET CARS;
  97. WHERE MAKE = "&V.";
  98. /* 100H */
  99. RUN;
  100. /* ...*/
  101. %END;
  102. %ELSE %DO;
  103. DATA DVAR_&V.;
  104. SET CARS;
  105. WHERE MAKE = "&V." AND MSRP > 50000;
  106. /* 100H */
  107. RUN;
  108. /* ...*/
  109. %END;
  110. %MEND T3;
  111. %T3(V=BMW);
  112. %T3(V=Acura);
  113. /* DATA A1;*/
  114. /* SET A;*/
  115. /* IF MSRP > 100 THEN A1 = 1;*/
  116. /**/
  117. /* IF MSRP > 100 THEN DO;*/
  118. /* A1 = 1;*/
  119. /* A2 = 1;*/
  120. /* END;*/
  121. /* RUN;*/
  122. DATA A;
  123. DO I = 1 TO 10;
  124. T = I + 2;
  125. OUTPUT;
  126. END;
  127. RUN;
  128. %MACRO T4(V=,);
  129. %PUT &V.;
  130. %DO I = 0 %TO &V.;
  131. DATA A_&I.;
  132. /* SET ACCT;*/
  133. /* WHERE MONTH = &I.; */
  134. RUN;
  135. %END;
  136. %MEND T4;
  137. %T4(V=10);
  138. %T4(V=2);
  139. %MACRO T5(V=,);
  140. %PUT &V.;
  141. DATA MSRP;
  142. SET CARS;
  143. %DO I = 0 %TO &V.;
  144. IF MSRP > &I.*10000 THEN A&I. = 1;ELSE A&I. = 0;
  145. %END;
  146. RUN;
  147. %MEND T5;
  148. %T5(V=10);
  149. /*EVAL*/
  150. %LET S = 1;
  151. %LET I = 10;
  152. %PUT &S. &I.;
  153. %LET S = &S. + &I.;
  154. %PUT &S. &I.;
  155. %LET S = 1;
  156. %LET I = 10;
  157. %LET S = %EVAL(&S. + &I.);
  158. %PUT &S. &I.;

1.2.3 SAS书籍

《Learning SAS by Example: A Programmer’s Guide》 《SAS编程技术教程(朱世武)》 《SAS编程与数据挖掘商业案例》 《SAS语言抛砖引玉》 《The Little SAS Book》 SAS Help and Documentation

1.2.4 SAS Help使⽤

image.png
如上SAS书籍,专门强调了SAS Help and Documentation的重要性。
使用方法也有很多:

1.2.4.1 通篇阅读

相信这个跟全文背诵一样恐怖,但跟着官方文档走,可更全面感受SAS产品设计。
如Contents就提供这样的阅读目录,可一览SAS Products.

image.png

1.2.4.2 关键字搜索

相信是最常用的方法,搜索框输入关键字,快速搜索。
如输入substr
image.png

当然,也有缺点,尤其个别奇奇怪怪的关键字,如输入index
image.png
搜索结果令人无所适从。

1.2.4.3 归纳体系

关键字搜索非常有用,但当你忘记关键字拼写、拼错关键字甚至想要某某功能关键字时,归纳体系更加便捷。
(1)Category
以计算函数为例,搜索Function Categories

Functions can be categorized by the types of values that they operate on. Each DS2 function belongs to one of the following categories:

  • Array
    operates on a named aggregate collection of homogenous data
  • Bitwise Logical Operations
    operates on one or more bit patterns or binary numbers at the level of their individual bits
  • Character
    operates on character data and SQL expressions
  • Date and Time
    operates on date and time values
  • Descriptive Statistics
    operates on values that measure central tendency, variation among values, and the shape of distribution values
  • Distance
    returns the geodetic distance
  • Financial
    calculates financial values such as interest, periodic payments, depreciation, and prices for European options on stocks.
  • Hyperbolic
    performs hyperbolic calculations such as sine, cosine, and tangent
  • Mathematical
    operates on values to perform general mathematical calculations
  • Numeric
    operates on numeric values
  • Probability
    returns probability calculations.
  • Quantile
    returns a quantile from specific distributions
  • Random Number
    returns random variates from specific distributions
  • Special
    operates on null values and SAS missing values, suspends execution of a program, specifies numeric informats at run time, and executes a FedSQL statement.
  • Trigonometric
    operates on values to perform trigonometric calculations
  • Truncation
    operates on values to limit the number of digits
  • Variable Information
    operates on variables and returns names, types, lengths, informats, labels, and other variable information

Array为例,共计如下4个函数,
截屏2020-12-09 下午2.14.35.png
点进DIM函数,也能发现所在Category。
image.png
(2)See Also
一般地,查看某函数时,文末都有一段See Also,带出类似函数。
以DIM函数为例,就把该Category下的其他类似函数链接出来。
image.png

1.2.4.4 总结

整体来说,SAS Help and Documentation的编写还是有一定水平的,始终是最主要的SAS学习文档。需要使用者在应用过程中,不断学习总结,相信SAS水平会有更加长足的进步。此外,除了SAS Help外,剩余资源主要聚焦在人大经济论坛部分早期文档。这里可以看到,SAS整体开发社区还是比较小,后续笔者将再做整理,欢迎持续关注。
如有更新,可能会在 https://github.com/IvanaXu/DataSAS

1.2.5 SAS IDE

1.2.5.1 SAS-BASE

Win + R,输入sas即可调出SAS BASE,页面如下:
image.png

  • 逻辑库-浏览创建SAS库管理、浏览SAS库文件(移动、复制、更名和删除)、建立非SAS文件的快捷方式;
  • 输出结果-以树形结构展示提交SAS程序输出的各项结果,查看、存储、打印或删除各项结果的内容;
  • 编辑器-对SAS程序语法检查、程序段的收缩和展开、可记录宏、自定快捷键;
  • 日志页面-基本窗口,缺省地打开,依次记录SAS进程中各程序运行的信息,可用命令清空;

这里指出,SAS BASE相比SAS EG,语法要求更严谨、没有代码提示、需手动查看日志、不支持中文变量,对代码要求更高,在非远程连接模式下,一般我们建议使用SAS BASE。

1.2.5.2 SAS-EG

SAS Enterprise Guide基本使用教程,特指远程连接模式:
(1)桌面双击打开或开始菜单中打开SAS Enterprise Guide 7.1(64-bit);
(2)用户登录
(2.1)若提示情况如下:
image.png
输入用户名、密码,点击“确认”即可。
(2.2)若提示情况如下:
image.png
点击“是”,进入连接属性,如下。
image.png
(2.3)若无配置文件列表,则点击“添加”。
image.png
(2.4)在设置中填写名称(可自定义),远程连接服务器的地址、端口、登录的用户名、密码等。填写完毕后点击“保存”,后续步骤与已有配置文件的步骤(2.5)一致。
(2.5)若已有配置文件,则选择给定的特定文件,点击“设为活动”,将弹出用户登录界面。
image.png
输入给定的用户名和密码,点击“确定”。
image.png
点击关闭后即可。
(3)程序编写
(3.1)点击左上角文件,新建,程序。
image.png
(3.2)在右侧增强型编辑器区域编写代码,如下。
image.png
(3.3)代码编写完毕后,点击“运行”,或通过键盘快捷键F3或F8,运行程序。
(3.4)若需执行部分代码,选中要执行部分的代码,点击“运行”,或通过键盘快捷键F3或F8,运行程序。

1.2.5.3 SAS-EM

此部分教程选自《SAS EM使用介绍——刘洋洋》
远程连接模式下,
(1)基本操作
image.png
(2)新建项目
登录后界面如下图,如果已经建立过项目则选择“打开项目”,新建项目则选择“新建项目”,注意:新建项目请放置在个人目录下。
image.png
(3)新建逻辑库
类似sas base中的“libname”功能,加载了逻辑库,才能够提取数据。
加载逻辑库的两种方式如下图。其中逻辑库路径即分析数据的目录。
image.png

image.png
(4)创建流程图、创建数据源
创建方式如下图,新创建的流程图是空白的。
创建数据源后才可以对数据进行分析。
image.png

image.png
(5)代码编写窗口
SAS EM中有类似base的代码编写功能,在这里可以做一些数据查看和处理。
image.png
(6)建模功能

详见 模型策略部分。

1.2.5.4 SAS-University

https://www.sas.com/en_us/software/university-edition/download-software.html

SAS大学生版本,通过VirtualBox的轻量级(2G)SAS版本,支持Windows、MacOS、Linux,安装后启动,可在浏览器通过SAS Studio/JupyterLab编写、运行代码,非常适用于SAS学习、示例代码开发。
安装步骤:
截屏2020-12-14 下午3.05.01.png
开启页面:
输入浏览器上输入http://0.0.0.0:10080/
截屏2020-12-12 下午6.55.11.png
JupyterLab:
与python-Jupyter一致,易保存易共享,本文后续SAS代码大部分经此工具校验调试。
截屏2020-12-12 下午6.58.23.png
未来可期!

特别的,由于依托Linux环境,SAS路径一般在 /opt/sasinside/SASHome/SASFoundation/9.4/sas,且SAS版本为9.4_M6: 截屏2021-07-25 上午10.22.35.png 9.4_M6的特殊性在于调整SAS中文编码字符数由2至3,因SUBSTR(“堂”,1,1) = SUBSTR(“商”,2,1) 这种偶然性而影响SAS中文判断的bug将大幅减少! 请多关注SAS版本修订。

1.2.6 SAS代码规范

  • SAS命名规范(试行)

(1)命名由数字、英文字母、下划线组成,且由英文字母开头,不区分大小写,不使用中文命名;
(2)命名总长度不超过32个字符长度,若过长可调整部分缩写,如CRED_LIMIT转换CREDL;
(3)命名不使用常见代码关键词,如SQL、RUN、DATA等;
(4)计算逻辑,如A/B表示为A_PCT_B;
(5)特定前缀命名,如判断是否IS、统计函数类:SUM、MAX、MIN、AVG等、列表LIST、时间类:DT(具体时间)、DAY(天数)等、金额类AMT、数量类CNT、占比类PCT;
(6)特定后缀命名,如时间类(默认为账单月):前一个月L01(LAST)、后一个月N01(NEXT),如产品类,分期产品ZDFQ、LHFQ等;
(7)特定意义命名,如银联命名习惯(CUSTR-客户)、分期MP、账龄MOB等;
(8)尽量使用英文命名,减少使用中文拼音全拼,避免使用中文拼音缩写;

PS:部分SAS版本也是支持中文命名的,很方便也很好理解,但会无限制使命名过长;

1.2.7 SAS⽚段

以下摘录部分常用SAS代码片段。


~逻辑库链接(仅作示例)

  1. %MACRO CONNECT_LIBNAME();
  2. LIBNAME ODS greenplm dsn="Greenplum Wire Protocol" user="readuser" pass=readuser schema=sdm;
  3. LIBNAME MD greenplm dsn="Greenplum Wire Protocol" user="readuser" pass=readuser schema=adm;
  4. LIBNAME RPT greenplm dsn="Greenplum Wire Protocol" user="readuser" pass=readuser schema=rpt;
  5. LIBNAME BSD "/data2/userhome/DEV-HOME/SAS-DEV-OUTBOUND-01/Data"
  6. %MEND CONNECT_LIBNAME;
  7. %CONNECT_LIBNAME();

~格式刷

  1. %MACRO FMT(TB=, KEY=, DESC=, FMT_NAME=);
  2. PROC SORT DATA = &TB. NODUPKEY OUT = INDATA;
  3. BY &KEY. &DESC.;
  4. QUIT;
  5. PROC CONTENTS DATA = INDATA OUT = CONT NOPRINT;RUN;
  6. PROC SQL NOPRINT;
  7. SELECT TYPE INTO: KEY_TYPE FROM CONT
  8. WHERE UPCASE(NAME) = UPCASE("&KEY.");
  9. QUIT;
  10. %PUT &KEY_TYPE.;
  11. DATA &FMT_NAME.;
  12. LENGTH LABEL $200.;
  13. SET INDATA END = LAST;
  14. START = COMPRESS(&KEY.);
  15. LABEL = COMPRESS(&DESC.);
  16. RETAIN FMTNAME "&FMT_NAME.";
  17. %IF &KEY_TYPE. = 2 %THEN TYPE = "C";
  18. %ELSE TYPE = "N";
  19. ;
  20. OUTPUT;
  21. IF LAST THEN DO;
  22. START = "**OTHER**";
  23. LABEL = "UNKNOWN";
  24. HLO = "O";
  25. OUTPUT;
  26. END;
  27. RUN;
  28. PROC FORMAT CNTLIN = &FMT_NAME.;RUN;
  29. %MEND FMT;
  30. %FMT(TB=SASHELP.CARS, KEY=MODEL, DESC=MAKE, FMT_NAME=FMT_MODEL);

~程序休眠

  1. /* 程序将休眠1h */
  2. %MACRO CALL_SLEEP(HOURS=1);
  3. DATA _NULL_;
  4. CALL SLEEP(&HOURS., 3600);
  5. RUN;
  6. %MEND;
  7. %CALL_SLEEP();

~条件构建宏

根据筛选条件构建如MSRP IN(90520)的宏变量,用于where语句。

  1. %MACRO COND_CRE(SRC_LIB=, SRC_TB=, TARGET_VAR=, INCOND=, OUTCOND=, MAXSIZE=1000);
  2. %GLOBAL &OUTCOND.;
  3. PROC SORT
  4. DATA = &SRC_LIB..&SRC_TB.(WHERE = (&INCOND.))
  5. OUT = TEMP(KEEP = &TARGET_VAR.) NODUPKEY;
  6. BY &TARGET_VAR.;
  7. RUN;
  8. PROC TRANSPOSE DATA = TEMP OUT = T_TEMP PREFIX = A;
  9. VAR &TARGET_VAR.;
  10. RUN;
  11. DATA T2_TEMP;
  12. SET T_TEMP;
  13. ARRAY ARR1(*) A:;
  14. LENGTH COND $&MAXSIZE..;
  15. COND = "";
  16. DO I = 1 TO DIM(ARR1);
  17. COND = COMPRESS(COND)||COMPRESS(ARR1[I])||',';
  18. END;
  19. COND = COMPRESS(COND);
  20. COND = SUBSTR(COND,1,LENGTH(COND)-1);
  21. COND = "&TARGET_VAR. IN("||COMPRESS(COND)||")";
  22. KEEP COND;
  23. RUN;
  24. PROC SQL NOPRINT;
  25. SELECT MAX(COND) INTO: &OUTCOND. FROM T2_TEMP;
  26. QUIT;
  27. %MEND COND_CRE;
  28. %COND_CRE(
  29. SRC_LIB=SASHELP, SRC_TB=CARS,
  30. TARGET_VAR=MSRP, INCOND=MSRP>90000,
  31. OUTCOND=_T, MAXSIZE=1000
  32. );
  33. PROC PRINT DATA = SASHELP.CARS;
  34. WHERE &_T.;
  35. RUN;
  36. %PUT &_T.;
  37. /* MSRP IN(90520,94820,121770,126670,128420,192465) */

~FTP下载/上传文件

  1. /* RF */
  2. %MACRO RF(FILENAME=,OUTFILE=);
  3. FILENAME &OUTFILE. FTP "&FILENAME." CD="/test/"
  4. USER="ip1" PASS="xuyifan" HOST="127.0.0.1" PROMPT;
  5. %MEND RF;
  6. %RF(FILENAME=run_all.csv,OUTFILE=RM);
  7. /* RF, 数据读取 */
  8. DATA TA;
  9. %LET _EFIERR = 0;
  10. INFILE RM
  11. DELIMITER = "," MISSOVER DSD LRECL=32767 FIRSTOBS=1 ENCODING="UTF-8" TERMSTR=LF;
  12. FORMAT A B $20.;
  13. INPUT
  14. A $
  15. B $
  16. ;
  17. RUN;
  18. /* WF */
  19. %MACRO WF(FILENAME=,OUTFILE=);
  20. FILENAME &OUTFILE. FTP "&FILENAME." CD="/test/result/"
  21. USER="ip1" PASS="xuyifan" HOST="127.0.0.1" PROMPT;
  22. %MEND WF;
  23. %WF(FILENAME=TA.csv,OUTFILE=WM);
  24. /* WF, 数据写入 */
  25. DATA _NULL_;
  26. SET TA;
  27. FILE WM LRECL=32767 DLM="@" ENCODING="UTF-8";
  28. PUT
  29. A
  30. B
  31. ;
  32. RUN;

~月度循环

  1. %MACRO RCYC(ST=, ED=,);
  2. DATA _NULL_;
  3. CALL SYMPUT("DEV", INTCK("MONTH", INPUT("&ST.01", YYMMDD8.), INPUT("&ED.01", YYMMDD8.)));
  4. RUN;
  5. %PUT &DEV.;
  6. %DO I=0 %TO &DEV.;
  7. DATA _NULL_;
  8. CALL SYMPUT("LMON", PUT(INTNX("MONTH", INPUT("&ST.01", YYMMDD8.), &I.-1),YYMMN6.));
  9. CALL SYMPUT("NMON", PUT(INTNX("MONTH", INPUT("&ST.01", YYMMDD8.), &I.+0),YYMMN6.));
  10. CALL SYMPUT("TMON", PUT(INTNX("MONTH", INPUT("&ST.01", YYMMDD8.), &I.+1),YYMMN6.));
  11. RUN;
  12. %PUT &I. &LMON. &NMON. &TMON.;
  13. %END;
  14. %MEND;
  15. %RCYC(ST=201810, ED=201812);
  16. /*
  17. 2
  18. 0 201809 201810 201811
  19. 1 201810 201811 201812
  20. 2 201811 201812 201901
  21. */

~DO OVER

  1. DATA A;
  2. INPUT X Y $ Z $ M;
  3. CARDS;
  4. . . . 1
  5. 1 A B 4
  6. ;
  7. RUN;
  8. DATA B;
  9. SET A;
  10. ARRAY CHAR _CHARACTER_;
  11. ARRAY NUM _NUMERIC_;
  12. DO OVER CHAR;
  13. IF CHAR EQ " " THEN CHAR = "/";
  14. END;
  15. DO OVER NUM;
  16. IF NUM EQ . THEN NUM = 0;
  17. END;
  18. RUN;
  19. PROC PRINT DATA = B;
  20. RUN;
  21. /*
  22. Obs X Y Z M
  23. 1 0 / / 1
  24. 2 1 A B 4
  25. */

~格式化加强理解

  1. /* 简单 */
  2. PROC FORMAT;
  3. VALUE RR
  4. LOW-50 = "(LOW,50]"
  5. 50<-60 = "(50,60]"
  6. 60<-HIGH = "(60,HIGH)"
  7. ;
  8. RUN;
  9. PROC FORMAT;
  10. VALUE $RRR
  11. "男" = "1"
  12. "女" = "2"
  13. ;
  14. RUN;
  15. PROC FREQ DATA = SASHELP.CLASS;
  16. TABLE HEIGHT SEX;
  17. FORMAT HEIGHT RR. SEX $RRR.;
  18. RUN;
  19. /* 复杂 */
  20. %MACRO FMT(TABLE=, VAR=);
  21. PROC SUMMARY DATA = &TABLE. NWAY MISSING;
  22. CLASS RK_&VAR.;
  23. VAR &VAR.;
  24. OUTPUT OUT = SUMM(DROP = _TYPE_ _PREQ_) MIN = VAR_MIN MAX = VAR_MAX;
  25. RUN;
  26. PROC SORT DATA = SUMM;BY RK_&VAR.;RUN;
  27. DATA SUMM;
  28. SET SUMM;
  29. BY RK_&VAR.;
  30. RK_&VAR. = _N_;
  31. RUN;
  32. PROC SORT DATA = SUMM;BY DESCENDING RK_&VAR.;RUN;
  33. DATA FMT;
  34. SET SUMM END = LAST;
  35. RETAIN EEXCL "Y" SEXCL "N";
  36. START = PUT(VAR_MIN, 8.2);
  37. END = PUT(LAG(VAR_MIN), 8.2);
  38. IF _N_ EQ 1 THEN END = "HIGH";
  39. IF LAST THEN START = "LOW";
  40. FMTNAME = "&VAR.";
  41. LABEL = CATS(PUT(RK_&VAR., Z2.), "(", START, "-", END, ")");
  42. RUN;
  43. PROC FORMAT CNTLIN = FMT;RUN;
  44. %MEND;
  45. PROC RANK DATA = SASHELP.CLASS GROUPS = 4 OUT = RK;
  46. VAR HEIGHT;
  47. RANKS RK_HEIGHT;
  48. RUN;
  49. %FMT(TABLE=RK, VAR=HEIGHT);
  50. PROC FREQ DATA = SASHELP.CLASS;
  51. TABLE HEIGHT;
  52. FORMAT HEIGHT HEIGHT.;
  53. RUN;

截屏2020-12-13 下午1.55.23.png

~PROC SUMMARY

  1. PROC SUMMARY DATA=SASHELP.CLASS NWAY MISSING;
  2. CLASS AGE;
  3. VAR HEIGHT;
  4. OUTPUT OUT = Q N= MEAN= SUM=/AUTONAME;
  5. RUN;
  6. PROC PRINT DATA = Q;
  7. RUN;
  8. /* PROC SUMMARY DATA = RES2018M3 NWAY MISSING;
  9. CLASS KDT IA TA MA
  10. ;
  11. VAR COM03 COM06 COM12 RCRED_LMT;
  12. OUTPUT OUT = SUN2018M3(DROP = _TYPE_ RENAME = (_FREQ_ = NUM)) SUM=;
  13. RUN; */

截屏2020-12-13 下午2.03.21.png

~哈希表HASH连接

  1. DATA TMP;
  2. SET SASHELP.CLASS(OBS=2);
  3. LIFT = _N_;
  4. KEEP AGE LIFT;
  5. RUN;
  6. DATA H_LIFT;
  7. SET SASHELP.CLASS;
  8. * 进行初始化;
  9. IF _N_ EQ 0 THEN SET TMP;
  10. IF _N_ EQ 1 THEN
  11. DO;
  12. DECLARE HASH H(DATASET: "TMP");
  13. H.DEFINEKEY("AGE");
  14. H.DEFINEDATA("LIFT");
  15. H.DEFINEDONE();
  16. END;
  17. * IF H.FIND() EQ 0 THEN OUTPUT;
  18. IF H.FIND() EQ 0 THEN H_LIFT=LIFT;
  19. ELSE CALL MISSING(H_A);
  20. * ELSE H_LIFT=0;
  21. DROP LIFT;
  22. RUN;

~排列组合

  1. %LET N = 3;
  2. PROC SQL NOPRINT;
  3. SELECT QUOTE(STRIP(NAME)) INTO :NAMES SEPARATED BY " "
  4. FROM SASHELP.CLASS(OBS = &N.);
  5. QUIT;
  6. %PUT &NAMES.;
  7. %MACRO ALL_COMB;
  8. PROC DATASETS LIB = WORK NOLIST NODETAILS;
  9. DELETE APP_ALL_COMB;
  10. RUN;
  11. %DO I = 1 %TO &N.;
  12. DATA ALL_COMB;
  13. LENGTH VARS $488.;
  14. ARRAY ARR(&N.) $32 (&NAMES.);
  15. NCOMB = COMB(DIM(ARR), &I.);
  16. DO I = 1 TO NCOMB;
  17. CALL ALLCOMB(I, &I., OF ARR(*));
  18. VARS = CATX(" ",OF ARR1-ARR&I.);
  19. KEEP VARS;
  20. OUTPUT;
  21. END;
  22. RUN;
  23. PROC APPEND
  24. BASE = APP_ALL_COMB
  25. DATA = ALL_COMB;
  26. RUN;
  27. %END;
  28. %MEND;
  29. %ALL_COMB;
  30. PROC PRINT DATA = APP_ALL_COMB;
  31. RUN;

截屏2020-12-13 下午2.32.56.png

~Lift提升度

  1. PROC SQL NOPRINT;
  2. SELECT MEAN(HEIGHT) INTO :MEAN FROM SASHELP.CLASS;
  3. QUIT;
  4. %PUT &MEAN.;
  5. /* 62.33684 */
  6. PROC SQL;
  7. CREATE TABLE LIFT AS
  8. SELECT
  9. AGE,
  10. MEAN(HEIGHT) AS MEAN,
  11. MEAN(HEIGHT)/&MEAN. AS LIFT
  12. FROM SASHELP.CLASS
  13. GROUP BY 1
  14. ORDER BY 2 DESC;
  15. QUIT;

~PROC SQL

  1. PROC SQL;
  2. CREATE TABLE TMP1 AS
  3. SELECT
  4. AGE,
  5. COUNT(*)/19 AS PCT
  6. FROM SASHELP.CLASS
  7. GROUP BY 1
  8. ORDER BY 2 DESC;
  9. QUIT;
  10. PROC SQL;
  11. CREATE TABLE TMP2 AS
  12. SELECT
  13. AGE,
  14. COUNT(*)/19 AS PCT
  15. FROM SASHELP.CLASS
  16. GROUP BY AGE
  17. ORDER BY PCT DESC;
  18. QUIT;
  19. PROC SQL;
  20. CREATE TABLE TMP3 AS
  21. SELECT
  22. AGE,
  23. COUNT(*)/19 AS PCT FORMAT PERCENT10.2
  24. FROM SASHELP.CLASS
  25. GROUP BY AGE
  26. ORDER BY PCT DESC;
  27. QUIT;

~SAS绘图

  1. PROC RANK DATA = SASHELP.CLASS GROUPS = 4 OUT = RK;
  2. VAR HEIGHT;
  3. RANKS RK_HEIGHT;
  4. RUN;
  5. * 修改系统默认MAXOBS
  6. ODS GRAPHICS ON/MAXOBS=10000000;
  7. PROC SGPLOT DATA = RK;
  8. VBAR
  9. RK_HEIGHT/RESPONSE = RK_HEIGHT
  10. STAT = FREQ DATALABEL;
  11. VLINE
  12. RK_HEIGHT/RESPONSE = HEIGHT
  13. STAT = MEAN Y2AXIS DATALABEL DATALABELPOS = BOTTOM;
  14. RUN;
  15. ODS GRAPHICS OFF;
  16. * VBAR 柱状图;
  17. * VLINE 折线图;
  18. * STAT 统计量;
  19. * DATALABEL 标签;
  20. * DATALABELPOS 标签位置;
  21. * Y2AXIS 次坐标;

截屏2020-12-13 下午2.47.06.png

~自制数据集

  1. DATA VARL;
  2. FORMAT VARS $40. VART $40. VARC $100.;
  3. INFILE DATALINES DELIMITER = "|";
  4. INPUT VARS $ VART $ VARC $;
  5. DATALINES;
  6. ACTIVE_MOB|BASE|POINTS=21/41
  7. AGENCY_VISE_SUM|MEAN_6M|POINTS=O,INC=1
  8. ;
  9. PROC PRINT DATA = VARL;
  10. RUN;

截屏2020-12-13 下午2.51.49.png

~宏变量工厂

  1. DATA CARS;
  2. SET SASHELP.CARS(OBS=3);
  3. RUN;
  4. %MACRO M();
  5. DATA _NULL_;
  6. SET CARS NOBS = N;
  7. CALL SYMPUT(COMPRESS("VARS_"||_N_), COMPRESS(MSRP));
  8. CALL SYMPUT("NAL", N);
  9. RUN;
  10. %DO I = 1 %TO &NAL.;
  11. %PUT &&VARS_&I..;
  12. %END;
  13. %MEND M;
  14. %M();
  15. /*
  16. 36945
  17. 23820
  18. 26990
  19. */

~判断表是否存在

  1. %MACRO M();
  2. %IF %SYSFUNC(EXIST(SASHELP.CARS))
  3. %THEN %PUT SASHELP.CARS EXIST;
  4. %ELSE %PUT SASHELP.CARS NOT EXIST;
  5. %MEND M;
  6. %M();
  7. /* SASHELP.CARS EXIST */

~KS计算

  1. DATA TA;
  2. SET SASHELP.CARS;
  3. IF CYLINDERS IN (4,6) THEN K = 0;
  4. ELSE K = 1;
  5. KEEP MSRP K;
  6. RUN;
  7. PROC SORT DATA = TA;
  8. BY MSRP;
  9. RUN;
  10. PROC RANK DATA = TA OUT = TB GROUPS = 5;
  11. VAR MSRP;
  12. RANKS GMSRP;
  13. RUN;
  14. PROC FREQ DATA = TB NOPRINT;
  15. TABLE GMSRP * K/OUT = TC;
  16. RUN;
  17. PROC NPAR1WAY DATA = TC KS NOPRINT;
  18. CLASS K;
  19. VAR GMSRP;
  20. FREQ COUNT;
  21. OUTPUT OUT = TD;
  22. RUN;
  23. PROC PRINT DATA = TD(KEEP = _D_);
  24. RUN;
  25. /* 0.59527 */

~批量SQL(宏)

  1. %MACRO PSQL(PARA1=, PARA2=,);
  2. SUM(CASE WHEN UPCASE(TYPE) = "&PARA1." THEN 1 ELSE 0 END) AS CNT_&PARA1.,
  3. SUM(CASE WHEN UPCASE(TYPE) = "&PARA1." THEN &PARA2. ELSE 0 END) AS SUM_&PARA1._&PARA2.,
  4. MAX(CASE WHEN UPCASE(TYPE) = "&PARA1." THEN &PARA2. ELSE 0 END) AS MAX_&PARA1._&PARA2.,
  5. %MEND PSQL;
  6. PROC SQL;
  7. CREATE TABLE TEMP(DROP = _T) AS
  8. SELECT
  9. MAKE,
  10. %PSQL(PARA1=SUV, PARA2=MSRP)
  11. %PSQL(PARA1=SUV, PARA2=INVOICE)
  12. %PSQL(PARA1=SPORTS, PARA2=MSRP)
  13. %PSQL(PARA1=SPORTS, PARA2=INVOICE)
  14. 1 AS _T
  15. FROM SASHELP.CARS(OBS=100)
  16. GROUP BY MAKE;
  17. QUIT;
  18. PROC PRINT DATA = TEMP;
  19. RUN;

1.2.8 SAS编译

经典问题(简化自《SAS培训_PDV精华》):

交易流水表EVENT月切表,数据字段:账户号XACCOUNT、交易时间INP_DAY、交易金额BILL_AMT,已根据XACCOUNT、INP_DAY排序。 限一个SAS DATA步,求每个账户首次达到消费金额1288的时间以及当月累计消费金额。 数据集示例:

  1. DATA EVENT;
  2. FORMAT XACCOUNT $8. INP_DAY BILL_AMT;
  3. INFILE DATALINES DELIMITER = "|";
  4. INPUT XACCOUNT $ INP_DAY BILL_AMT;
  5. DATALINES;
  6. 00000001|20200101|1200
  7. 00000001|20200102|120
  8. 00000001|20200108|302
  9. 00000002|20200101|323
  10. 00000002|20200102|423
  11. 00000002|20200110|135
  12. 00000003|20200101|3232
  13. 00000003|20200101|544
  14. 00000003|20200107|423
  15. 00000003|20200109|135
  16. ;
  17. /* 已排序 */
  18. PROC SORT DATA = EVENT;
  19. BY XACCOUNT INP_DAY;
  20. RUN;

求解过程:

  1. DATA RESULT1(KEEP = XACCOUNT RAMT1 RDAT1) RESULT2;
  2. SET EVENT(IN = E);
  3. BY XACCOUNT;
  4. PUT "S0:" _ALL_;
  5. IF FIRST.XACCOUNT THEN DO;
  6. RAMT1 = 0;
  7. RAMT2 = 0;
  8. RAMT3 = 0;
  9. RDAT1 = 99999999;
  10. RDAT2 = 99999999;
  11. END;
  12. RAMT1 + BILL_AMT;
  13. RETAIN RAMT2;
  14. RAMT2 = SUM(RAMT2, BILL_AMT);
  15. RAMT3 = SUM(RAMT3, BILL_AMT);
  16. RETAIN RDAT1;
  17. IF RAMT1 >= 1288 THEN RDAT1 = MIN(INP_DAY, RDAT1);
  18. IF RAMT1 >= 1288 THEN RDAT2 = MIN(INP_DAY, RDAT2);
  19. PUT "S1:" _ALL_;
  20. IF LAST.XACCOUNT
  21. THEN OUTPUT RESULT1;
  22. OUTPUT RESULT2;
  23. RUN;

输出结果:

  1. S0:E=1 XACCOUNT=00000001 INP_DAY=20200101 BILL_AMT=1200 FIRST.XACCOUNT=1 LAST.XACCOUNT=0 RAMT1=0 RAMT2=. RAMT3=. RDAT1=. RDAT2=.
  2. _ERROR_=0 _N_=1
  3. S1:E=1 XACCOUNT=00000001 INP_DAY=20200101 BILL_AMT=1200 FIRST.XACCOUNT=1 LAST.XACCOUNT=0 RAMT1=1200 RAMT2=1200 RAMT3=1200
  4. RDAT1=99999999 RDAT2=99999999 _ERROR_=0 _N_=1
  5. S0:E=1 XACCOUNT=00000001 INP_DAY=20200102 BILL_AMT=120 FIRST.XACCOUNT=0 LAST.XACCOUNT=0 RAMT1=1200 RAMT2=1200 RAMT3=. RDAT1=99999999
  6. RDAT2=. _ERROR_=0 _N_=2
  7. S1:E=1 XACCOUNT=00000001 INP_DAY=20200102 BILL_AMT=120 FIRST.XACCOUNT=0 LAST.XACCOUNT=0 RAMT1=1320 RAMT2=1320 RAMT3=120
  8. RDAT1=20200102 RDAT2=20200102 _ERROR_=0 _N_=2
  9. S0:E=1 XACCOUNT=00000001 INP_DAY=20200108 BILL_AMT=302 FIRST.XACCOUNT=0 LAST.XACCOUNT=1 RAMT1=1320 RAMT2=1320 RAMT3=. RDAT1=20200102
  10. RDAT2=. _ERROR_=0 _N_=3
  11. S1:E=1 XACCOUNT=00000001 INP_DAY=20200108 BILL_AMT=302 FIRST.XACCOUNT=0 LAST.XACCOUNT=1 RAMT1=1622 RAMT2=1622 RAMT3=302
  12. RDAT1=20200102 RDAT2=20200108 _ERROR_=0 _N_=3

截屏2020-12-15 下午2.13.38.png

显然,我们在RAMT1、RDAT1上把要求逻辑实现了一遍,且PUT出来了DATA执行前后_ALL_的变化。
这些输出意味着什么呢?在回答此问题之前,需要先了解SAS数据步的编译。

数据步的编译和执行过程: 编译过程: 在这个阶段,系统扫描每个语句检查它是否有语法错误。大部分语法错误导致系统无法对数据步作进一步的处理,在编译阶段将建立要创建的数据集的描述部分。

执行阶段: 若数据步编译成功,就要开始执行阶段。在这个阶段对源数据文件的每一条记录都执行一次数据步,除非在程序中指明其他处理方式。在这个阶段建立数据集的数据部分。

具体来说,
(1)数据步的编译阶段:

  • 即对程序进行词语和语法编译梦,检查它是否有语法错误;

如,漏掉或错拼的关键词、无效的变量名、遗漏或错误的符号、无效的选择项;

  • 将程序转换为机器码,供执行阶段使用;
  • 建立工作部件输入缓冲器(Input Buffer);
  • 建立工作部件PDV(程序数据列,Program Data Vector);

用于建立SAS系统数据集,一次只处理一个观测,PDV一般格式为[_N_|_ERROR_]
其中_N_记录DATA步执行的次数,_ERROR_指示出错信息,0为无错误,1为有错误;

  • 建立数据集中每个变量的三个必须的属性:Name、Type、Length;
  • 建立新建数据集的描述部分;

即数据集名、观测数和变量个数、变量名及其属性;

(2)数据步的执行阶段:
即创建数据集的数据部分,执行顺序:

  • PDV中外部变量初始化为缺省值;
  • 输入每条记录至输入缓冲器,按INPUT语句读至PDV(如有);
  • 按数据步的其他语句处理后,存入PDV;
  • 在数据步结束时缺省地将PDV内容作为一条新观测写入新的数据集;
  • 回到数据步的开始,使PDV中外部变量初始化为缺省值;
  • 对源文件中每条记录都按上述步骤执行一次;
  • 当对源文件最后一条记录执行结束后,数据步执行完成;

上述步骤中,在数据步迭代开始时外部变量初始化为缺省值,但以下几种情况不受限制:

  • RETAIN语句;
  • SUM语句(A + 1);
  • 数组_TEMPORARY_
  • FILE、INFILE语句;
  • 自动生成变量,如_ERROR_、_N_等;

PS:上述变量自然不包含宏变量,宏变量声明后记录在SASHELP.VMACRO中。
如果还是觉得PDV太过抽象,可以这么理解:

个人认为可以把PDV想象成一排用于存放变量值的盒子。 每个盒子代表一个变量,提交一个DATA步后,SAS会对这个DATA步进行编译,然后执行。 首先,PDV是在DATA步的编译阶段生成的。(编译会进行语法检查并创建一排整齐摆放的”盒子”;)然后,在DATA步的执行阶段,根据不同语句对PDV中变量的值进行清空或更改。(将盒子清空或换上新的物品;) 最后,在RUN语句或者OUTPUT语句将PDV中变量的当前值输出到目标数据集中。KEEP、DROP语句或KEEP=、DROP=数据集选项会影响输出到目标数据集中变量的个数。(如果没有KEEP、DROP,将新建变量和数据集变量对应的盒子搬出到目标数据集;如果只有KEEP,则只搬KEEP指定的盒子;如果只有DROP,则不搬DROP指定的盒子;如果KEEP/DROP同时存在,则只搬KEEP-DROP后剩下的盒子) ——选自网络文章《SAS中关于PDV的总结》

所以,上例在我们通过PUT _ALL_查看PDV中所有变量后,发现:RAMT1、RAMT2实现了累加,而未RETAIN的RAMT3、RDAT2在S0时总是先被初始化,而S1每次仅计算当前所在行数据结果。至此,问题解决,有兴趣的同学可以进一步拓展SAS PROC步、宏编译。

在此感谢上述代码贡献者、搬运工们。