事先我已经跑好数据,产出了一个包含
    R:Recency,最近一次消费距今天数
    F:最近一周订单
    M:最近一周用户消费额
    三个数据的表。

    1. import pandas as pd
    2. df = pd.read_excel('/Users/zhaoningbo/dataset/rfm.xlsx')

    image.png

    应该做一个异常值检验,这次先跳过。

    1. import plotly.plotly as py
    2. import pandas as pd
    3. scatter = dict(
    4. mode = "markers",
    5. name = "y",
    6. type = "scatter3d",
    7. x = df['r'], y = df['f'], z = df['m'],
    8. marker = dict( size=2, color="rgb(23, 190, 207)" )
    9. )
    10. layout = dict(
    11. title = '3d point clustering',
    12. scene = dict(
    13. xaxis = dict( zeroline=False ),
    14. yaxis = dict( zeroline=False ),
    15. zaxis = dict( zeroline=False ),
    16. )
    17. )
    18. fig = dict( data=[scatter], layout=layout )
    19. # Use py.iplot() for IPython notebook
    20. plotly.offline.init_notebook_mode() #初始化jupyter notebook中的绘图模式
    21. plotly.offline.iplot(fig, filename='3d point clustering')

    image.png

    1. #转换数据格式
    2. tmp=np.array([df.r,df.f,df.m]).T
    3. #调用python关于机器学习sklearn库中的KMeans
    4. from sklearn.cluster import KMeans
    5. #设置分为3类,并训练数据
    6. kms=KMeans(n_clusters=3)
    7. y=kms.fit_predict(tmp)
    8. #将分类结果以散点图形式展示
    9. y

    array([0, 2, 1, …, 2, 1, 0], dtype=int32)

    1. df["type"] = y
    2. d0 = df[df.type==0]
    3. d1 = df[df.type==1]
    4. d2 = df[df.type==2]
    5. df.type.value_counts()

    0 587
    2 264
    1 227
    Name: type, dtype: int64

    1. import plotly.plotly as py
    2. import pandas as pd
    3. scatter0 = dict(
    4. mode = "markers",
    5. name = "d0",
    6. type = "scatter3d",
    7. x = d0['r'], y = d0['f'], z = d0['m'],
    8. marker = dict( size=2, color="rgb(23, 190, 207)" )
    9. )
    10. scatter1 = dict(
    11. mode = "markers",
    12. name = "d1",
    13. type = "scatter3d",
    14. x = d1['r'], y = d1['f'], z = d1['m'],
    15. marker = dict( size=2, color="rgb(49,54,149)" )
    16. )
    17. scatter2 = dict(
    18. mode = "markers",
    19. name = "d2",
    20. type = "scatter3d",
    21. x = d2['r'], y = d2['f'], z = d2['m'],
    22. marker = dict( size=2, color="rgb(253,174,97)" )
    23. )
    24. fig = dict( data=[scatter0,scatter1,scatter2], layout=layout )
    25. # Use py.iplot() for IPython notebook
    26. plotly.offline.init_notebook_mode() #初始化jupyter notebook中的绘图模式
    27. plotly.offline.iplot(fig, filename='3d point clustering')

    image.png

    因为一开始就选择了用户价值模型,所以没有做降维。