题解 | #2021年11月每天新用户的次日留存率#
2021年11月每天新用户的次日留存率
https://www.nowcoder.com/practice/1fc0e75f07434ef5ba4f1fb2aa83a450
# select
# DATE(a.in_time) dt,
# COUNT(DISTINCT a.uid) 用户数,
# SUM(DATEDIFF(a.in_time,b.in_time)=-1) 次日用户数,
# round(SUM(DATEDIFF(a.in_time,b.in_time)=-1)/COUNT(DISTINCT a.uid),2) 次日用户留存率
# FROM tb_user_log a
# left join tb_user_log b
# on a.uid=b.uid
# where YEAR(a.in_time)=2021 and MONTH(a.in_time)=11
# GROUP BY DATE(a.in_time)
# order by 次日用户留存率 desc
select t1.dt dt,
round(COUNT(t2.uid)/COUNT(t1.uid),2) uv_left_rate
from
(select uid
,min(date(in_time)) dt
from tb_user_log
group by uid)t1
LEFT JOIN
(select uid , date(in_time) dt
from tb_user_log
union
select uid , date(out_time)
from tb_user_log)t2
on t1.uid=t2.uid
and DATEDIFF(t1.dt,t2.dt)=-1
where YEAR(t1.dt)=2021 and MONTH(t1.dt)=11
GROUP BY t1.dt
order by t1.dt,uv_left_rate
这道题目主要难点:
①原始表有两个时间一个是in_time,另一个是out_time 也就是所谓的用户登陆时间和用户退出时间,
②退出时间可能跨天
按照以往我们算次日留存率的做法表自己和表自己左连接 加一个筛选条件时间差一天计数就可以实现,但是本题有两个时间,所以在梳理清楚业务逻辑;
用户次日留存率=次日的用户活跃的总数/新用户总数
一、观察原始表
二、构建用户首次登陆表
select uid,
min(date(in_time)) dt
from tb_user_log
group by uid
三、因为该题目明确说如果退出时间跨天,该用户还会被在第二天再记录一遍活跃用户,所有这里我们用union 取in_time和out_time取并集
select uid , date(in_time) dt from tb_user_log union select uid , date(out_time) from tb_user_log
select t1.dt dt,
round(COUNT(t2.uid)/COUNT(t1.uid),2) uv_left_rate
from
(select uid
,min(date(in_time)) dt
from tb_user_log
group by uid)t1
LEFT JOIN
(select uid , date(in_time) dt
from tb_user_log
union
select uid , date(out_time)
from tb_user_log)t2
on t1.uid=t2.uid
and DATEDIFF(t1.dt,t2.dt)=-1
where YEAR(t1.dt)=2021 and MONTH(t1.dt)=11
GROUP BY t1.dt
order by uv_left_rate desc
查看7道真题和解析