题解 | #2021年11月每天新用户的次日留存率#
2021年11月每天新用户的次日留存率
https://www.nowcoder.com/practice/1fc0e75f07434ef5ba4f1fb2aa83a450
### 解题思路: 通过if(in_time=min(in_time)over(partition by uid order by date_format(in_time,'%Y-%m-%d'))) 窗口函数计算得到每个新增用户的日期(1),依次通过If函数得到新增用户的离开日期(2),通过lead()窗口函数得到每个新增用户的下次登录系统的进入时间(3)和离开时间(4) ; 通过时间dt分组,依次比较时间(2)(3)(4)同时间(1)的关系,过滤时间差在1天之内的,时间(1)不为空且时间戳为’2021-11‘ ; 同时having count(A.indt)>0
select A.dt,
round(sum(if(datediff(nextindt,indt)=1 or datediff(nextoutdt,indt)=1 or datediff(outdt,indt)=1,1,0))/count(A.indt),2) uv_left_rate
from (
select date_format(in_time,'%Y-%m-%d') dt,
uid ,
if(in_time=min(in_time)over(partition by uid),date_format(in_time,'%Y-%m-%d'),null) indt, #每个新增用户的进入日期
if(in_time=min(in_time)over(partition by uid),date_format(out_time,'%Y-%m-%d'),null) outdt, #每个新增用户的离开日期
lead(date_format(in_time,'%Y-%m-%d'),1) over(partition by uid order by date_format(in_time,'%Y-%m-%d')) nextindt,#下次活跃的时间
lead(date_format(out_time,'%Y-%m-%d'),1) over(partition by uid order by date_format(out_time,'%Y-%m-%d')) nextoutdt #下次活跃的时间
from tb_user_log
) A
where A.indt is not null and substring_index(A.indt,'-',2) = '2021-11'
group by A.dt
having count(A.indt) != 0
order by A.dt
/*
select t1.dt,round(count(t2.uid)/count(t1.uid),2) uv_rate
from (select uid
,min(date(in_time)) dt
from tb_user_log
group by uid) as t1 -- 每天新用户表
left join (select uid , date(in_time) dt
from tb_user_log
union
select uid , date(out_time) dt
from tb_user_log) as t2 -- 用户活跃表
on t1.uid=t2.uid
and t1.dt=date_sub(t2.dt,INTERVAL 1 day)
where date_format(t1.dt,'%Y-%m') = '2021-11'
group by t1.dt
order by t1.dt
*/
基恩士成长空间 421人发布