题解 | #统计活跃间隔对用户分级结果#
统计活跃间隔对用户分级结果
https://www.nowcoder.com/practice/6765b4a4f260455bae513a60b6eed0af
【问题分析】
要统计的用户类型涉及两个日期:1、最近活跃日期last_dt
;2、注册日期first_dt
。其中对每个用户而言,最小的登入日期就是注册日期,而最近活跃日期就是最大登出日期。然后将以上日期与‘今天’日期做差就可以得到最近活跃与注册距离‘今天’的差,于是问题可以转换为:
忠实用户(近7天活跃过且非新晋用户)=last_dt_diff<7 and first_dt_diff>=7
新晋用户(近7天新增)=first_dt_diff<7
沉睡用户(近7天未活跃但更早前活跃过)=last_dt_diff>=7
流失用户(近30天未活跃但更早前活跃过)=last_dt_diff>=30
【问题求解】 第一步:获得每个用户的注册时间、最近活跃时间、‘今天’的日期以及用户总量
select
distinct
uid
,min(date(in_time)) over (partition by uid) as first_dt
,max(date(out_time)) over (partition by uid) as last_dt
,max(date(out_time)) over() as cur_dt
,t1.user_cnt
from tb_user_log
left join
(select
count(distinct uid) as user_cnt
from tb_user_log) t1 on 1; -- 注意:拼接条件是on 1
----
+-----+------------+------------+------------+----------+
| uid | first_dt | last_dt | cur_dt | user_cnt |
+-----+------------+------------+------------+----------+
| 101 | 2021-10-01 | 2021-10-01 | 2021-11-04 | 7 |
| 102 | 2021-10-30 | 2021-10-30 | 2021-11-04 | 7 |
| 103 | 2021-10-21 | 2021-10-21 | 2021-11-04 | 7 |
| 104 | 2021-09-03 | 2021-11-02 | 2021-11-04 | 7 |
| 105 | 2021-11-03 | 2021-11-03 | 2021-11-04 | 7 |
| 108 | 2021-09-01 | 2021-11-03 | 2021-11-04 | 7 |
| 109 | 2021-08-31 | 2021-11-04 | 2021-11-04 | 7 |
+-----+------------+------------+------------+----------+
7 rows in set (0.00 sec)
第二步:获得注册与最近活跃日期距离‘今天’的日期差。
select
uid
,user_cnt
,timestampdiff(day,first_dt,cur_dt) as first_dt_diff
,timestampdiff(day,last_dt,cur_dt) as last_dt_diff
from
(
select
distinct
uid
,min(date(in_time)) over (partition by uid) as first_dt
,max(date(out_time)) over (partition by uid) as last_dt
,max(date(out_time)) over() as cur_dt
,t1.user_cnt
from tb_user_log
left join
(select
count(distinct uid) as user_cnt
from tb_user_log) t1 on 1
) t2
----
+-----+----------+---------------+--------------+
| uid | user_cnt | first_dt_diff | last_dt_diff |
+-----+----------+---------------+--------------+
| 101 | 7 | 34 | 34 |
| 102 | 7 | 5 | 5 |
| 103 | 7 | 14 | 14 |
| 104 | 7 | 62 | 2 |
| 105 | 7 | 1 | 1 |
| 108 | 7 | 64 | 1 |
| 109 | 7 | 65 | 0 |
+-----+----------+---------------+--------------+
7 rows in set (0.00 sec)
第三步:统计出结果。最终代码如下:
select
user_grade
,round(count(1)/max(user_cnt),2) as ratio
from
(select
uid
,user_cnt
,case
when last_dt_diff>=30 then '流失用户'
when last_dt_diff>=7 then '沉睡用户'
when first_dt_diff<7 then '新晋用户'
when last_dt_diff<7 and first_dt_diff>=7 then '忠实用户'
end as user_grade
from
(select
uid
,user_cnt
,timestampdiff(day,first_dt,cur_dt) as first_dt_diff
,timestampdiff(day,last_dt,cur_dt) as last_dt_diff
from
(
select
distinct
uid
,min(date(in_time)) over (partition by uid) as first_dt
,max(date(out_time)) over (partition by uid) as last_dt
,max(date(out_time)) over() as cur_dt
,t1.user_cnt
from tb_user_log
left join
(select
count(distinct uid) as user_cnt
from tb_user_log) t1 on 1
) t2
)t3
) t4
group by user_grade
order by user_grade, ratio desc;
#SQL练习记录#