题解 | #筛选某店铺最有价值用户中消费最多前5名#
筛选某店铺最有价值用户中消费最多前5名
https://www.nowcoder.com/practice/58655010a7c34e9fb2b7b491c3f79ca4
import pandas as pd
import numpy as np
# 1.读取数据
df = pd.read_csv("sales.csv", encoding="utf8")
# 2.评分
df["R_Quartile"] = df['recency'].apply(lambda x : 4 if x > df['recency'].quantile(0.75) else (3 if x <= df['recency'].quantile(0.5) else (2 if x <= df['recency'].quantile(0.25) else 1)))
df["F_Quartile"] = df['frequency'].apply(lambda x : 1 if x > df['frequency'].quantile(0.75) else (2 if x <= df['frequency'].quantile(0.5) else (3 if x <= df['frequency'].quantile(0.25) else 4)))
df["M_Quartile"] = df['monetary'].apply(lambda x : 1 if x > df['monetary'].quantile(0.75) else (2 if x <= df['monetary'].quantile(0.5) else (3 if x <= df['monetary'].quantile(0.25) else 4)))
# 3.拼接新的列;这里如果不转为str的话,会变成int相加的形式
df['RFMClass'] = pd.concat([df["R_Quartile"].astype(str), df["F_Quartile"].astype(str), df["M_Quartile"].astype(str)], axis=0, ignore_index=True)
# 4.输出结果
print(df.head().set_index(pd.Index(range(5)))[['user_id', 'recency' ,'frequency', 'monetary', 'RFMClass']])
print()
print(df.sort_values(by=['RFMClass'], ascending=False, inplace=False).head()[['user_id', 'recency' ,'frequency', 'monetary', 'RFMClass']])

