事先我已经跑好数据,产出了一个包含
R:Recency,最近一次消费距今天数
F:最近一周订单
M:最近一周用户消费额
三个数据的表。
import pandas as pddf = pd.read_excel('/Users/zhaoningbo/dataset/rfm.xlsx')

应该做一个异常值检验,这次先跳过。
import plotly.plotly as pyimport pandas as pdscatter = dict(mode = "markers",name = "y",type = "scatter3d",x = df['r'], y = df['f'], z = df['m'],marker = dict( size=2, color="rgb(23, 190, 207)" ))layout = dict(title = '3d point clustering',scene = dict(xaxis = dict( zeroline=False ),yaxis = dict( zeroline=False ),zaxis = dict( zeroline=False ),))fig = dict( data=[scatter], layout=layout )# Use py.iplot() for IPython notebookplotly.offline.init_notebook_mode() #初始化jupyter notebook中的绘图模式plotly.offline.iplot(fig, filename='3d point clustering')

#转换数据格式tmp=np.array([df.r,df.f,df.m]).T#调用python关于机器学习sklearn库中的KMeansfrom sklearn.cluster import KMeans#设置分为3类,并训练数据kms=KMeans(n_clusters=3)y=kms.fit_predict(tmp)#将分类结果以散点图形式展示y
array([0, 2, 1, …, 2, 1, 0], dtype=int32)
df["type"] = yd0 = df[df.type==0]d1 = df[df.type==1]d2 = df[df.type==2]df.type.value_counts()
0    587
2    264
1    227
Name: type, dtype: int64
import plotly.plotly as pyimport pandas as pdscatter0 = dict(mode = "markers",name = "d0",type = "scatter3d",x = d0['r'], y = d0['f'], z = d0['m'],marker = dict( size=2, color="rgb(23, 190, 207)" ))scatter1 = dict(mode = "markers",name = "d1",type = "scatter3d",x = d1['r'], y = d1['f'], z = d1['m'],marker = dict( size=2, color="rgb(49,54,149)" ))scatter2 = dict(mode = "markers",name = "d2",type = "scatter3d",x = d2['r'], y = d2['f'], z = d2['m'],marker = dict( size=2, color="rgb(253,174,97)" ))fig = dict( data=[scatter0,scatter1,scatter2], layout=layout )# Use py.iplot() for IPython notebookplotly.offline.init_notebook_mode() #初始化jupyter notebook中的绘图模式plotly.offline.iplot(fig, filename='3d point clustering')

因为一开始就选择了用户价值模型,所以没有做降维。
