对数据集的处理
data a;
set date.Custbasefulloctober date.Custbasefullseptember;
run;
data b;
set date.Dcp_december date.Dcp_august date.Dcp_june;
by customer_id;
run;
proc append base=date.result data=date.sss ;
run;
data c;
merge date.Dcp_december date.Dcp_august;
by customer_id;
run;
data d;
set c;
set a;
run;
/计算你现在度过了多少时间 周 月 季度。。。/
data a;
Format x date9. Y date9.;
X=‘14feb1991’d;Y=‘21jan2013’d;
year=Intck(‘year’,x,y);
qtr=Intck(‘qtr’,x,y);month=Intck(‘month’,x,y);week=Intck(‘week’,x,y);day=Intck(‘day’,x,y);
put year= qtr= month= week= day=;
run;
proc sql;
select from date.Dcp_july
outer union corr
select from date.dcp_june
quit;
proc sql;
insert into date.dcp_june
set customer_id =555555,
dcp_volume_june=1,
dcp_value_june=13400;
select from date.dcp_june
;
quit;
proc sql;
insert into date.dcp_june(customer_id,dcp_volume_june,dcp_value_june)
values (555555,1,13400);
select from date.dcp_june
;
quit;
proc logistic data=date.monthhh;
model _TEMA023=_TEMA023 month ;
score ;
output out=a;
run;
data test;
input zd var1 var2;
cards;
0 1 1.5
0 2 2
0 3 2.5
0 4 3
1 10 20
1 11 25
1 12 30
;
data test2;
input zd var1 var2;
cards;
1 5 8
1 20 30
;
proc logistic data=test outmodel=out1 plots=effect;
model zd=var1 var2;
score data=test2 out=predict;
run;
proc logistic data=test outmodel=out1 plots=effect;
model zd(event=”1”)=var1 var2;
score data=test2 out=predict;
run;
proc logistic data=test outmodel=out1 ;
model zd=var1 var2/selection=forward;
score data=test2 out=predict;
run;
proc sgplot data=date.monthhh;
reg x=_TEMA023 y=_TEMA024;
scatter x=_TEMA023 y=_TEMA024;
xaxis label=”hh”;
yaxis label=”gg”;
title ““;
run;
proc timeseries data=saslib.A600605 out=a;
id date interval=day
accumulate=average
setmissing=missing /next 还可以用 0,missing,average,first…/
start=“01jan1995”d end=“01jan1996”d;
var oppr clpr;
run;
proc arima data=saslib.month;
identify var=sums nlag=24;
run;
宏函数
%macro count(opts,start=1999,stop=2000);
proc freq data=sashelp.orsales;
where year between &start and &stop;
table productline/&opts; / 表名/
tittle” order beween &start and &stop “;
run;
%mend count;
%**_count(2000,nucum,2001) / 顺序不对 /
%count(nocum,start=2000,stop=2001)
%count(,start=2000,stop=2002)
%macro null;
%global i; /定义全局变量/
%local i; /定义局部的变量/
%let i=1;
%put inside macro i=&i;
%mend null;
%null**;
%put outside macro i=&i;
/强语句 宏加SQL/
%macro sum(i);
proc sql ;
create table a&i as
select sum(profit) into: total
from sashelp.orsales
where quarter=’1999Q1’;
quit;
%put the total profit of 1991Q1 is: &total;
%mend;
%sum(2);
%macro sum(i);
proc sql ;
create table a&i as
select sum(profit),sale into: total, :amount
from sashelp.orsales
where quarter=’1999Q1’;
quit;
%put the total profit of 1991Q1 is: &total;
%mend;
%sum(2);
data Null;
set sashelp.class end=nomore;
call symput(‘name’||left(_N),(trim(name)));
if nomore then
call symput(‘count’,_N);
run;
%macro putloop;
%local i;
%do i=1 %to &count;
%put name&i is &&name&i;
%end;
%mend;
%putloop;
proc sql;
create table datanew as
select from dataold (rename=(var1=xx var2=yy var3=zz var4=mm)); /对多个列进行改名*/
quit;
/查询 对一个月的oppr,clpr求和/
proc sql outobs=1;
create table saslib.result as
select sum(oppr),sum(clpr),date
from saslib.b
where ‘01/jan/1995’d<=date<=‘31/jan/1995’d
;
quit;
/查询后对列进行改名/
proc sql;
create table saslib.T1 as
select
from saslib.result(rename=(_TEMG001=oprr _TEMG002=clpr));
*quit;
结合简化sql语句
proc sql outobs=1;
create table saslib.result(rename=(_TEMG001=oprr _TEMG002=clpr)) as
select sum(oppr),sum(clpr),date
from saslib.b
where ‘01/jan/1995’d<=date<=‘31/jan/1995’d
;
quit;
必备代码
data saslib.month;
set saslib.a;
m=‘31dec1994’d;
newdate=INTCK(‘month’,m,date); / 将日期时间改成 1,1,…2,2,…12,12… /
drop m date;
run;
%MACRO sum(i);
proc sql outobs=1;
create table saslib.hhh as
select sum(clpr),sum(oppr),new_date
from saslib.month
where new_date=&i;
quit;
%mend sum;
%**_sum(2**);
proc means data=date.Dcp_july n mean std stderr clm;
where dcp_volume_july>1;
run;
proc means data=date.Dcp_june n mean std stderr clm;
run;
proc reg data=date.Dcp_june;
model dcp_volume_june=dcp_value_june;
var dcp_volume_june;
run;
proc sql;
create table b as
select from a (rename=(age=xx)); /对多个列进行改名/
*quit;
data c;
set date.a2 end=eof;
array LS{} L: ;
do i=1 to dim(LS);
LS[i]=2;
end;
*run;
proc means data=date.monthhh mean std std var cv range qrange ;
var _TEMA023;
run;
proc univariate data=date.monthhh;
var _TEMA023 _TEMA024;
histogram _TEMA023 _TEMA024 / normal;
probplot _TEMA023 _TEMA024;
inset skewness kurtosis/position=ne;
run;
proc means data=date.monthhh mean nway ;
var _TEMA023 _TEMA024 ;
class _TEMA023 _TEMA024;
types _TEMA023 _TEMA024 _TEMA023_TEMA024;
run;
proc ttest data=date.monthhh;
var _TEMA024;
run;
proc npar1way data=date.monthhh wilcoxon median;
var _TEMA024;
run;
proc univariate data=date.monthhh ;
var _TEMA024;
histogram /lognormal weibull gamma;
run;
proc distance data=date.monthhh;
var ordinal(month);
id month;
run;
data a;
input _TEMA024 @@;
cards;
6 2 6 1 6 2
;
run;
proc reg data=date.monthhh noprint outest=date.b;
PreRev: model month=_TEMA024;
run;
quit;
proc score data=a score=date.b;
var _TEMA024;
*run;
2015年第一天第一问
proc sql;
alter table date.month
add L1 num;
update date.month set L1=TEMA023 where(month=9);
alter table date.month
add L2 num;
update date.month set L2=_TEMA023 where(7<=month<=9);
quit;
/ 查询 对12个月的…. 求平均 /
proc sql ;
create table date.month as
select avg(dcp_volume_january),avg(dcp_value_january) from date.dcp_january
union
select avg(dcp_volume_february),avg(dcp_value_february) from date.dcp_february
union
select avg(dcp_volume_march),avg(dcp_value_march) from date.dcp_march
union
select avg(dcp_volume_april),avg(dcp_value_april) from date.dcp_april
union
select avg(dcp_volume_may),avg(dcp_value_may) from date.dcp_may
union
select avg(dcp_volume_june),avg(dcp_value_june) from date.dcp_june
union
select avg(dcp_volume_july),avg(dcp_value_july) from date.dcp_july
union
select avg(dcp_volume_august),avg(dcp_value_august) from date.dcp_august
union
select avg(dcp_volume_september),avg(dcp_value_september) from date.dcp_september
union
select avg(dcp_volume_october),avg(dcp_value_october) from date.dcp_october
union
select avg(dcp_volume_november),avg(dcp_value_november) from date.dcp_november
union
select avg(dcp_volume_december),avg(dcp_value_december) from date.dcp_december
quit;
/设置列month/
proc sql;
alter table date.month
add month num;
quit;
/对列month赋值/
data date.month;
set date.month(keep=_TEMA023 _TEMA024 month);
input month;
cards;
1
2
3
4
5
6
7
8
9
10
11
12
;
run;
/对目标 计算/
proc sql;
create table date.sss as
select _TEMA023 from date.month where month=9
union
select avg(_TEMA023) from date.monthh where 7<=month<=9
union
select avg(_TEMA023) from date.monthhh where 4<=month<=9
;
quit;
/对表转置 生成3列/
proc transpose data=date.sss out=date.zhuanzhi;
run;
/合并/
data date.c;
merge date.zhuanzhi(drop=_NAME) date.Cust_base_full_october;
run;
proc sql;
create table date.d as
select avg(txn_fx_amt) from date.Fx_txn_october
;
quit;
data date.a2;
merge date.A20151 date.Fx_txn_november;
run;
proc data=date.a22;
model txn_fx_amt=age Cust_RELATIONSHIP Cust_RELATIONSHIP_L3 Cust_RELATIONSHIP_L6 /SELECETION=forward clm;
run;
quit;
proc summary data=date.monthhh;
var _TEMA024;
output out=a;
run;
proc logistic data=date.Cust_base_full_w_dcp_october;
model txn_sav_FCY_cnt=age salary txn_sav_FCY_amt;
score data=date.Cust_base_full_w_dcp_october out=c;
run;
2014年决赛第二题答案
data exampleQ1;
infile datalines dlm=’,’;
input Account_ID Revenue Segment $ N_var1 C_var1 $ TRN_DT $10. TRN_LOC $ TRN_CDE $ TRN_AMT @;
datalines;
10001,5,G1,., ,2014-10-23,CHINA,APP_001,100
10002,6,G1,., ,2014-10-23,CHINA,APP_002,110
10003,.,G1,., ,2014-10-23,CHINA,APP_003,50
10004,10,G2,., ,2014-10-23,CHINA,APP_004,90
10005,15,G2,., ,2014-10-23,CHINA,CAN_005,12
10006,13,G2,., ,2014-10-23,CHINA,MAN_006,10000
10007,.,G2,., ,2014-10-23,CHINA,APP_007,10
10008,20,G3,., ,2014-10-23,CHINA,APP_008,100
10009,25,G3,., ,2014-10-23,CHINA,APP_009,200
10010,.,G3,., ,2014-10-23, ,REJ_010,100000
10010,.,G3,., ,2014-10-23, ,REJ_010,100000
10010,.,G4,., ,2014-10-23, ,REJ_010,100000
10010,.,G5,., ,2014-10-23, ,REJ_010,100000
10003,7,G1,., ,2014-10-24,CHINA,MAN_003,50000
10004,9,G2,., ,2014-10-24,CHINA,APP_004,90
10006,6,G2,., ,2014-10-24,CHINA,MAN_006,1000
10007,.,G2,., ,2014-10-24,CHINA,APP_007,10
10008,8,G3,., ,2014-10-24,CHINA,APP_008,100
10009,9,G3,., ,2014-10-24,CHINA,APP_009,200
10010,.,G3,., ,2014-10-24,CHINA,APP_010,100
10010,10,G4,., ,2014-10-24,CHINA,APP_011,101
10010,20,G5,., ,2014-10-24,CHINA,APP_012,102
;
run;
proc print noobs;run;
proc sql;
create table example_Q2 as
select Account_ID,
CASE WHEN Revenue ^=. then Revenue else avg(Revenue) end as Revenue,
Segment,
N_var1,
C_var1,
TRN_DT,
TRN_LOC,
TRN_CDE,
TRN_AMT
from example_Q1
group by Segment;
quit;
proc print noobs;run;
proc transpose data=example_Q2 out=trans_xmpl;
var _ALL;
run;
data b;
set transxmpl end=eof;
array cols {} COL: ;
do i = 1 to dim(cols);
cols[i]=ifn((strip(cols[i])=” “ or strip(cols[i])=”.”),0,1);
end;
if sum(of COL:)=*0 then
call symput(“dropvars”, catx(“ “,symget(“dropvars”),_NAME));
run;
data exampleQ3;
set example_Q2 (drop=&dropvars);
run;
proc print noobs;run;
proc sort data=example_Q3;
by Account_ID TRN_DT TRN_LOC TRN_CDE TRN_AMT;
run;
data example_Q41;
set example_Q3;
by Account_ID TRN_DT TRN_LOC TRN_CDE TRN_AMT;
if ^(first.TRN_AMT = last.TRN_AMT) then output;
run;
proc print noobs;run;
proc sort data=example_Q3;
by _all;
run;
proc contents noprint data=exampleQ3 out=cont_xmpl;
proc sql noprint;
select count() into : nvar from cont_xmpl;
quit;
%put &nvar.;
proc sql noprint;
select name into : var1 - : var%*_trim(&nvar.) from cont_xmpl;
quit;
%put &var1. &var5.;
%macro dedup(indata,outdata,nvar);
data &outdata.;
set &indata.;
retain
%do i = 1 %to &nvar.;
var&i.
%end;
;
if _n eq 1 then output;
else do;
if (&var1. eq _var1)
%do j = 2 %to &nvar.;
and (&&var&j. eq _var&j.)
%end;
then delete;
else output;
end;
%do k = 1 %to &nvar.;
_var&k. = &&var&k.;
%end;
drop _var:;
run;
%mend;
%dedup(example_Q3,example_Q42,&nvar.);
proc print noobs;run**;
/**Question 5:
Import the sample web log to a SAS dataset which contains 6 variables shown above. */
%LET nfld = 35 ;
DATA a20142.sampleweb_log;
INFILE “C:\Users\JOHN\Desktop\Sample_Web_Log.txt” DELIMITER = ‘|’ MISSOVER DSD LRECL=32767 END=eof ;
INFORMAT fld1-fld&nfld $256. ;
ARRAY fld(&nfld) fld1-fld&nfld ;
INPUT fld1-fld&nfld ;
FORMAT time_stamp DATETIME22.3 ;
LENGTH psdo_cust_id $20.
page_name $64.
session_id $23.
device_type $1.
platform $1. ;
IF SUBSTR(REVERSE(_INFILE),1,1) = ‘|’ THEN DO ;
evnt_dt = INPUT(SUBSTR(fld{1},2,10), YYMMDD10.) ;
evnt_tm = INPUT(SUBSTR(fld{1},13,8)||’.’||SUBSTR(fld{1},22,3), TIME12.3) ;
time_stamp = INPUT(PUT(evnt_dt, DATE9.)||’:’||PUT(evnt_tm,TIME12.3), DATETIME22.3);
DO i = 2 TO &nfld ;
IF fld{i} > ‘’ THEN DO ;
SELECT ;
WHEN ( LOWCASE(SUBSTR(fld{i},1,MIN(12,LENGTH(fld{i})))) = ‘psdo_cust_id’ )
DO ;
customer_id = LEFT(PUT(INPUT(SUBSTR(fld{i},14),9.),9.)) ;
END ;
WHEN ( LOWCASE(SUBSTR(fld{i},1,MIN(10,LENGTH(fld{i})))) = ‘session_id’ )
DO ;
session_id = TRIM(SUBSTR(fld{i},12)) ;
END ;
WHEN ( LOWCASE(SUBSTR(fld{i},1,MIN(9,LENGTH(fld{i})))) = ‘page_name’ )
DO ;
page_name = TRIM(SUBSTR(fld{i},11)) ;
END ;
WHEN ( LOWCASE(SUBSTR(fld{i},1,MIN(8,LENGTH(fld{i})))) = ‘platform’ )
DO ;
platform = TRIM(SUBSTR(fld{i},10)) ;
END ;
WHEN ( LOWCASE(SUBSTR(fld{i},1,MIN(11,LENGTH(fld{i})))) = ‘device_type’ )
DO ;
device_type = TRIM(SUBSTR(fld{i},13)) ;
END ;
OTHERWISE ;
END ;
END ;
END ;
END;
KEEP
time_stamp
psdo_cust_id
page_name
session_id
device_type
platform
;
RUN ;
/ 问题6/
proc freq data=a20142.sample_web_log order=freq;
table page_name/list missing;
run;
/ 问题7/
proc sql;
select max(time_stamp) - min(time_stamp) as During_time
from a20142.sample_web_log
group by psdo_cust_id;
quit;
/经常使用的宏变量定义方法有三种:
1. %let xxx=yyy;
2. Call Symput(‘xxx’,’yyy’);
3. select xxx into: yyy.
三种定义方式最大的区别是在MACRO函数内定义所生成的宏变量的类型不同:
Call Symput在宏函数中定义的宏变量可以在函数外调用;而%let和 Select into则不能,因为这2种方法在MACRO函数内生成的是局部宏变量,若要想在MACRO函数外调用,需事先用%global申明变量类型。
/
%macro test();
data NULL;
call symput(‘Today’,put(today(),date9.));
run;
Method 2
%let today=%sysfunc(today(),date9.);
Method 3
data todaydate;
date=today();
Proc sql noprint;
select put(date,date9.) INTO: today
from todaydate
;
quit;
%put &today;
%mend;
%test;
%put &today;
data A2014.a;
input AccountID Revenue Segment$3. N_var1 C_var1 TransactionDate$10. TransactionLocation$6. TransactionCode$8. TransactionAmount;
cards;
10001 5 G1 . . 2014-10-23 CHINA APP_001 100
10002 6 G1 . . 2014-10-23 CHINA APP_002 110
10003 . G1 . . 2014-10-23 CHINA APP_003 50
10004 10 G2 . . 2014-10-23 CHINA APP_004 90
10005 15 G2 . . 2014-10-23 CHINA CAN_005 12
10006 13 G2 . . 2014-10-23 CHINA MAN_006 10000
10007 . G2 . . 2014-10-23 CHINA APP_007 10
10008 20 G3 . . 2014-10-23 CHINA APP_008 100
10009 25 G3 . . 2014-10-23 CHINA APP_009 200
10010 . G3 . . 2014-10-23 REJ_010 100000
10010 . G3 . . 2014-10-23 REJ_010 100000
10010 . G4 . . 2014-10-23 REJ_010 100000
10010 . G5 . . 2014-10-23 REJ_010 100000
10003 7 G1 . . 2014-10-24 CHINA MAN_003 50000
10004 9 G2 . . 2014-10-24 CHINA APP_004 90
10006 6 G2 . . 2014-10-24 CHINA MAN_006 1000
10007 . G2 . . 2014-10-24 CHINA APP_007 10
10008 8 G3 . . 2014-10-24 CHINA APP_008 100
10009 9 G3 . . 2014-10-24 CHINA APP_009 200
10010 . G3 . . 2014-10-24 CHINA APP_010 100
10010 10 G4 . . 2014-10-24 CHINA APP_011 101
10010 20 G5 . . 2014-10-24 CHINA APP_012 102
;
run;
/ 不行哎 用了近20分钟建立这个表格 还是不够熟练 而且我用的是初等方法 还可以尝试用下input 的格式那种输入 dd.. /
/ 看了好一会才把第二问题目看懂, 用segment的平均值 去换 revenue的缺失值 /
/keep retain 或者是replace !!!语句/
/还是数组了。。。/
data a2014.b;
set a2014.a(keep= AccountID revenue);
array n numeric;
Do over n;
if n=. then n=1;
end;
run;
data a2014.c;
set a2014.a(drop= AccountID revenue);
run;
data a2014.d;
merge a2014.b a2014.c;
run;
/ 去掉一个数也没有的变量 /
/ 但是并没有按照题意做 要求是做选择。。 /
data a2014.e;
set a2014.d(drop=N_var1 C_var1);
run;
/ 题目中要求用 data 步做 找出重复变量 /
proc sql;
create table a2014.f as
select distinct TransactionDate,TransactionLocation,TransactionCode,TransactionAmount from a2014.e;
quit;
/然后接下来删除重复变量/
翻译
建模的案例研究
我们是某某银行顾问。
“外汇”代表外汇产品使用的客户当他们需要外币。例如,当他们出国旅行,去做投资。
在当前的实践中,他们从他们的客户基础和随机选择20000名客户交叉销售外汇产品。
当前的反应率还不到5%。
作为顾问,我们必须构建一个逻辑回归模型对某某银行,以便在20000年针对相同数量的客户,反应率更高。
我们有两个主表,一个用于9月,另一个用于10月的月。
这些表是CUST_BASE_FULL_September CUST_BASE_FULL_October的名字。
我们将把一个作为抽样基础和另一个时间验证基地。
Q1。变量创建
有一种产品叫DCP -双货币某某银行也卖的产品。
我们有12个月的DCP在银行产品的销售信息。
数据集是名叫DCP_January DCP_February等等。每个数据集都有以下3个变量。
变量类型描述
customer_id char假的客户id
在本月dcp_value num总DCP平衡
dcp_volume num DCP账户在本月举行
Q1a)我们希望您能创建一些历史变量(3个月、6个月等)使用这些数据集的两个主表。
请创建L1,L3,L6种变量使用以下名称。
例子:掌握表9月,L3变量将创建使用以下几个月:7月、8月、9月。
使用以下名称:调用表DCP_September DCP_October。
(5分)
变量类型描述
在当前月份dcp_value_l1 num双货币价值在过去的1个月
dcp_value_l3 num平均双货币价值在过去的3个月
dcp_value_l6 num平均双货币价值在过去的6个月
dcp_volume_l1 num双货币数量在过去的1个月
dcp_volume_l3 num平均双货币数量在过去的3个月
dcp_volume_l6 num平均双货币数量在过去的6个月
Q1b)现在我们需要将上面创建的变量与我们的主表和给表名称如下:
(CUST_BASE_FULL_w_DCP_September)
(CUST_BASE_FULL_w_DCP_October)
(5分)
q2大师表创建
Q2a)有2个月的外汇(FX)10月和11月的交易记录,请使用下面的外汇交易表生成的信息
首先,骨料(上卷)外汇交易表客户级别,并给下面的数据名称
(CL_FX_TXN_October)
(CL_FX_TXN_November)
其次,填下表从上面卷起的信息。
m # Cust贸易外汇外汇交易总数交易总额(美元)平均每个事务交易金额(美元)
10月
11月
(5分)
Q2b)- - -请与客户生成一个模型基础表[CUST_BASE_FULL_w_DCP_October]和外汇表11月。请给下面的名字模型基础。
(Model_Base)
-请与客户生成一个模型基础表[CUST_BASE_FULL_w_DCP_September]和外汇表10。请给下面的名字模型基础。
(OUTTIME_Base)
(5分)
第三反应者
现在我们需要构建一个逻辑回归模型来预测什么样的客户将在即将到来的月贸易外汇。
一个应答器来月外汇金额> 0。
让我们检查客户的数量,反应基地,我们已经建立了两个模型。填写以下表来理解。
“客户基础
在m“#总Cust本月#响应在下月%响应
9月(Model_Base)
10月(OUT_TIME验证)
(5分)
Task1:
TRN_201501_WK ,TRN_201502_WK 、TRN_201503_WK *表包含信用卡交易记录,请参考标签变量描述。基于这些表,使用SAS执行以下任务:
1。每周事务表合并到3月事务表TRN_201501 TRN_201502 TRN_201503。(5分)
2。不时,事务将被拒绝,因为信用卡限制或其他原因,和一些客户会刷几次卡一次又一次。基于TRN_201501 TRN_201502 TRN_201503,使用SAS删除重复的拒绝交易,只保留最早的一个。名称与TRN_201501_S1输出表、TRN_201502_S1 TRN_201503_S1。(5分)
小贴士:
——重复拒绝交易有相同的客户ID,帐户ID,ID卡,商家帐户号码,事务本币金额、POS机ID,交易状态,交易日期但不同的事务时间。
3所示。基于TRN_201501_S1 TRN_201502_S1,本月TRN_201503_S1,添加后缀为所有变量名开始“环境”。(例如表TRN_201501_S1,重命名变量TRN_LCY_AMT TRN_LCY_AMT_1501)。使用SAS宏来完成这个任务。名称与TRN_201501_S2输出表、TRN_201502_S2 TRN_201503_S2。(5分)
4所示。与积极TRN_LCY_AMT批准的交易被认为是消费。基于TRN_201501_S2 TRN_201502_S2 TRN_201503_S2,总结3表CUST_ID MRCH_CAT_GRP_DESC水平。名称与SUM_TRN_201501输出表、SUM_TRN_201502 SUM_TRN_201503。(5分)
一个表SUM_TRN_201501的快照:
5。使用merge语句加入SUM_TRN_201501、SUM_TRN_201502 SUM_TRN_201503 CUST_ID和MRCH_CAT_GRP_DESC如下变量TTL_TRN_LCY_AMT TRN_LCY_AMT_1501的总和,TRN_LCY_AMT_1502 TRN_LCY_AMT_1503。名字与TRN_ALL输出表。(5分)
一个表TRN_ALL的快照:
6。基于TRN_ALL,创建一个表显示客户每一个商人群体支出水平2方法分别如下:
1。Proc转置。名字与TRN_ALL_TRS1输出表。(10分)
2。数据步骤(置换与数据步骤)。名字与TRN_ALL_TRS2输出表。(10分)
一个表TRN_ALL_TRS1的快照:
7所示。基于TRN_ALL_TRS1,商人集团客户支出最高?(5分)
Task2:
下面的附件是文件汇丰银行的电话银行系统日志,里面有几个文件,包括两种类型的系统日志:
(1)调用的细节,有两个接口文件,存储不同类型的拨号器信息;
(2)元数据表,后缀“映射”,关于系统代码映射与实际的业务含义。
操作项:
1。从[Call_Details_Interface_1提取关键信息。txt)和名称与Call_Details_Interface_1输出表,只保留下面的列:(5分)
答:column1调用启动时间;
b . column2调用序列号码;
c . column3叫惟一的ID;
d . column5代理员工ID;
大肠column9 ID调用操作原因;
f . column24调用处理代码(作业);
小贴士:
——完全43列在文件;
-调用启动时间+叫序列号+叫惟一的ID可以识别的独特要求每一个员工。
原始的快照文件:
2。从[Call_Details_Interface_2提取关键信息。txt)和名称与Call_Details_Interface_2输出表,只保留下面的列:(5分)
答:column1调用启动时间;
b . column2调用序列号码;
c . column3叫惟一的ID;
d . column8活动ID;
大肠column10客户ID;
小贴士:
——完全31列在文件中;
-调用启动时间+叫序列号+叫惟一的ID可以识别的独特要求每一个员工。
原始的快照文件:
3所示。提取(Phone_Banking_Disposition_Code_Mapping映射表。txt)和名称与PB_Disposition_Code输出表。(5分)
答:column1调用处理代码
b . column3叫性格描述;
原始的快照文件:
4所示。提取(成功接触代码列表。txt]和[交易代码列表。txt),他们输出表Success_Cntc_Code和Deal_Code(5分)
如果调用成功接触代码列表中的性格描述和交易代码列表,然后定义调用成功联系客户;
如果调用性格描述交易代码列表中,然后调用定义为成功的产品销售给客户;
5。使用Call_Details_Interface_1作为基表,加入3表从步骤1、2、3。的连接键Call_Details_Interface_2 CALL_START_TIME,CALL_SEQ_NUM CALL_ID。的连接键PB_Disposition_Code CALL_DISPOSITION_CODE。
基地4 a和4 b,总结成功调用,总数和总数量的调用是成功销售的产品。(10分)
6。列出的全球员工取得了最大的电话。(5分)
7所示。列出最成功销售产品的全球员工。(5分)
8。列出了全球最高效的员工。(销售/总电话)(10分)
注意事项
带空U盘
翻译
有空写注释
帮助文档的充分了解
比赛时 注意建立的逻辑库名 还有什么对表格的名称要求 最好自己建立得要让人容易理解
建立表格时 若变量太多太长 自己适当简化
写word时候 有结果有说明的去做~