用户行为日志表tb_user_log
- 用户等级标准简化为:忠实用户(近7天活跃过且非新晋用户)、新晋用户(近7天新增)、沉睡用户(近7天未活跃但更早前活跃过)、流失用户(近30天未活跃但更早前活跃过)。
- 假设今天就是数据中所有日期的最大值。
- 近7天表示包含当天T的近7天,即闭区间[T-6, T]。
用户行为日志表tb_user_log
DROP TABLE IF EXISTS tb_user_log; CREATE TABLE tb_user_log ( id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID', uid INT NOT NULL COMMENT '用户ID', artical_id INT NOT NULL COMMENT '视频ID', in_time datetime COMMENT '进入时间', out_time datetime COMMENT '离开时间', sign_in TINYINT DEFAULT 0 COMMENT '是否签到' ) CHARACTER SET utf8 COLLATE utf8_bin; INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES (109, 9001, '2021-08-31 10:00:00', '2021-08-31 10:00:09', 0), (109, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0), (108, 9001, '2021-09-01 10:00:01', '2021-09-01 10:01:50', 0), (108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0), (104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0), (104, 9003, '2021-09-03 11:00:45', '2021-09-03 11:00:55', 0), (105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0), (102, 9001, '2021-10-30 10:00:00', '2021-10-30 10:00:09', 0), (103, 9001, '2021-10-21 10:00:00', '2021-10-21 10:00:09', 0), (101, 0, '2021-10-01 10:00:00', '2021-10-01 10:00:42', 1);
忠实用户|0.43 新晋用户|0.29 沉睡用户|0.14 流失用户|0.14
SELECT user_grade, round(count(uid)/(select count(distinct uid) from tb_user_log),2) ratio FROM (SELECT uid, (CASE WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))<=6 AND DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))>6 THEN '忠实用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))<=6 THEN '新晋用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time))) BETWEEN 7 AND 29 THEN '沉睡用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))>29 THEN '流失用户' END) AS user_grade FROM tb_user_log GROUP BY uid) a GROUP BY user_grade ORDER BY ratio DESC;
select uid, date(min(in_time)) as first_in, date(max(out_time)) as last_in from tb_user_log group by uid然后根据此表中的首次登陆日期first_in和最近一次活跃日期last_in来给用户分级
select ( case when datediff('2021-11-04',first_in)<7 then '新晋用户' when datediff('2021-11-04',first_in)>=7 and datediff('2021-11-04',last_in)<7 then '忠实用户' when datediff('2021-11-04',last_in)>=30 then '流失用户' else '沉睡用户' end ) as user_grade from ( select uid, date(min(in_time)) as first_in, date(max(out_time)) as last_in from tb_user_log group by uid ) t1并将以上表作为临时表
select user_grade, round(count(user_grade)/ (select count(*) from temp) ,2)as ratio from temp group by user_grade order by ratio desc整体代码如下
with temp as ( select ( case when datediff('2021-11-04',first_in)<7 then '新晋用户' when datediff('2021-11-04',first_in)>=7 and datediff('2021-11-04',last_in)<7 then '忠实用户' when datediff('2021-11-04',last_in)>=30 then '流失用户' else '沉睡用户' end ) as user_grade from ( select uid, date(min(in_time)) as first_in, date(max(out_time)) as last_in from tb_user_log group by uid ) t1 ) select user_grade, round(count(user_grade)/ (select count(*) from temp) ,2)as ratio from temp group by user_grade order by ratio desc有问题欢迎批评指正
select tag,round(count(*)/ (select count(distinct uid) from tb_user_log),2) as ratio from (select (case when datediff((select max(in_time) from tb_user_log),max(in_time))<=6 and datediff((select max(in_time) from tb_user_log),min(in_time))>=7 then "忠实用户" when datediff((select max(in_time) from tb_user_log),min(in_time))<=6 then "新晋用户" when datediff((select max(in_time) from tb_user_log),max(in_time))>=30 then "流失用户" else "沉睡用户" end) as tag from tb_user_log group by uid) as t group by tag order by ratio desc之前结果怎么都不对,原来就是差一个对uid的去重!那里是distinct uid!
select user_grade,round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio from( select uid ,case when t_day<7 and t_count=1 then '新晋用户' when t_day<7 and t_count<>1 then '忠实用户' when t_day between 7 and 29 then '沉睡用户' else '流失用户' end user_grade from( select * ,datediff(max(out_time)over(),max(out_time)over(partition by uid)) t_day ,count(1)over(partition by uid) t_count from tb_user_log ) t ) t2 group by user_grade order by ratio desc;
【场景】:近几天活跃
【分类】:条件函数、日期函数、datediff
难点:
1.这个题有一个坑题目中没有说清楚,沉睡用户(近7天未活跃但更早前活跃过),根据定义它是包含流失用户(近30天未活跃但更早前活跃过),也就是说只要是流失用户就是沉睡用户!实际上答案并不包括。所以沉睡用户应该这样定义:近7天未活跃但更早前活跃过且非流失用户。
2.case when 判断使每个类型的用户不会有重合。而且先判断哪个后判断哪个尤其重要,顺序不能换!
新学到:
1.用户最近一次活跃时间:max(date);一直没有想到,其实在之前是知道求用户第一次活跃时间是用:min(date)。如果使用这个会省很多力气。
因为最后一次活跃的时间可以用来确定 近几天未活跃但更早前活跃过
2.select里面可以套用select,尤其是从另外一个表用group by查询,因为group by又不好放在总的框架里面。
(1)统计用户第一次活跃的时间;用户最后一次活跃的时间
如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过
(2)近7天和近30天,涉及两个不同的活跃条件
一种方法是求出第7天是什么时候:日期减去天数得到日期;另一种是日期减去日期得到天数
(3)对用户分类且求比例
case when 判断就使每个类型的用户不会有重合,所以‘新晋用户’在’忠实用户‘的前面,’流失用户‘在’沉睡用户‘的前面。
方法一:
with子句 + 使用 union 合并几种分类情况
#依次去求每个类型的uid有哪些 with main as( #用户第一次活跃的时间 select uid, min(date(in_time)) as min_date from tb_user_log group by uid ), attr as( #近7天和近30天,两个不同条件的活跃记录表 #先求出今日是哪一天,往前七天是那一天,往前30天是那一天 select max(date(in_time)) as now_date, date_sub(max(date(in_time)),interval 6 day) as 7_date, date_sub(max(date(in_time)),interval 29 day) as 30_date from tb_user_log ) ,temp as( #新晋用户(近7天新增) select distinct uid as new_u from main,attr where min_date >= 7_date )#输出:105、102 ,temp1 as( #忠实用户(近7天活跃过且非新晋用户) #when date(in_time)>= 7_date and not in 新晋用户 select distinct uid as zs_u from tb_user_log,attr where date(in_time)>= 7_date and uid not in( select uid from main,attr where min_date >= 7_date ) )#输出:109、108、104 ,temp2 as( #流失用户(近30天未活跃但更早前活跃过) #when date(in_time) < 30_date and not in 近30天活跃过的用户 select distinct uid as ls_u from tb_user_log,attr where date(in_time)< 30_date and uid not in( select uid from tb_user_log,attr where date(in_time) >= 30_date ) )#输出:101 ,temp3 as( #沉睡用户(近7天未活跃但更早前活跃过) #when date(in_time) < 7_date and not in 近7天活跃过的用户 select distinct uid as cs_u from tb_user_log,attr where date(in_time) < 7_date and uid not in( select uid from tb_user_log,attr where date(in_time) >= 7_date ) )#输出:103、101 (select '新晋用户' as user_grade, count(*) as ratio from temp group by user_grade) union (select '忠实用户' as user_grade, count(*) as ratio from temp1 group by user_grade) union (select '流失用户' as user_grade, count(*) as ratio from temp2 group by user_grade) union (select '沉睡用户' as user_grade, count(*) as ratio from temp3 group by user_grade)
方法二:
with 子句 + case when
with main as( #用户第一次活跃的时间,用户最后活跃的时间 select uid, min(date(in_time)) as min_date, max(date(in_time)) as max_date from tb_user_log group by uid ), attr as( #近7天和近30天,涉及两个不同的活跃条件 #先求出今日是哪一天,往前七天是哪一天,往前30天是哪一天 select max(date(in_time)) as now_date, date_sub(max(date(in_time)),interval 6 day) as 7_date, date_sub(max(date(in_time)),interval 29 day) as 30_date from tb_user_log ) #case when 判断就使每个类型的用户不会有重合。 select (case when min_date >= 7_date then '新晋用户' when max_date >= 7_date then '忠实用户' when max_date <= 30_date then '流失用户' else '沉睡用户' end) as user_grade, round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio from main,attr group by user_grade order by ratio desc
方法三:
case when + 日期函数:date_sub()
#case when 判断就使每个类型的用户不会有重合。 select (case when min_date >= 7_date then '新晋用户' when max_date >= 7_date then '忠实用户' when max_date <= 30_date then '流失用户' else '沉睡用户' end) as user_grade, round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio from( #用户第一次活跃的时间,用户最后活跃的时间 select uid, min(date(in_time)) as min_date, max(date(in_time)) as max_date from tb_user_log group by uid ) main, ( #近7天和近30天,涉及两个不同的活跃条件 #先求出今日是哪一天,往前七天是哪一天,往前30天是哪一天 select max(date(in_time)) as now_date, date_sub(max(date(in_time)),interval 6 day) as 7_date, date_sub(max(date(in_time)),interval 29 day) as 30_date from tb_user_log ) attr group by user_grade order by ratio desc
方法四:
case when + 日期函数: timestampdiff(day,expr1,expr2)
#case when 判断就使每个类型的用户不会有重合。 select (case when timestampdiff(day,min_date,'2021-11-04') <= 6 then '新晋用户' when timestampdiff(day,max_date,'2021-11-04') <= 6 then '忠实用户' when timestampdiff(day,max_date,'2021-11-04') >= 30 then '流失用户' else '沉睡用户' end) as user_grade, round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio from( #用户第一次活跃的时间,用户最后活跃的时间 select uid, min(date(in_time)) as min_date, max(date(in_time)) as max_date from tb_user_log group by uid ) main group by user_grade order by ratio desc
方法五:
日期函数:datediff(expr1,expr2) <=6
#case when 判断就使每个类型的用户不会有重合。 select (case when datediff('2021-11-04',min_date) <= 6 then '新晋用户' when datediff('2021-11-04',max_date) <= 6 then '忠实用户' when datediff('2021-11-04',max_date) >= 30 then '流失用户' else '沉睡用户' end) as user_grade, round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio from( #用户第一次活跃的时间,用户最后活跃的时间 select uid, min(date(in_time)) as min_date, max(date(in_time)) as max_date from tb_user_log group by uid ) main group by user_grade order by ratio desc
/*问题:统计活跃间隔对用户分级后 各活跃等级用户占比 结果保留两位小数 且按占比降序排序。 注: 用户等级标准简化为: 忠实用户(近7天活跃过且非新晋用户) 最晚活跃的日期在7天内并且最早活跃日期在7天外 新晋用户(近7天新增) 最早活跃的日期在7天内 沉睡用户(近7天未活跃但更早前活跃过) 最晚活跃日期在7天外并且在30天内 流失用户(近30天未活跃但更早前活跃过)。最晚活跃的日期在30天外 假设今天就是数据中所有日期的最大值。 近7天表示包含当天T的近7天,即闭区间[T-6, T]。 如何求每个用户的最晚活跃日期和最早活跃日期*/ WITH user_day AS (SELECT uid , MAX(DATE(in_time)) AS later_day , MIN(DATE(in_time)) AS early_day FROM tb_user_log GROUP BY uid) SELECT CASE WHEN ud.later_day>=DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) AND ud.early_day<DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) THEN '忠实用户' WHEN ud.early_day>=DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) THEN '新晋用户' WHEN ud.later_day<DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 29 DAY) THEN '流失用户' ELSE '沉睡用户' END AS user_grade , ROUND(COUNT(ud.uid)/(SELECT COUNT(uid) FROM user_day),2) AS ratio FROM user_day AS ud GROUP BY user_grade ORDER BY ratio DESC
with t as ( select uid, date(min(in_time)) first_day, date(max(out_time)) last_day, max(date(max(out_time))) over() today from tb_user_log group by uid ) select case when datediff(today,first_day) <= 6 then '新晋用户' when datediff(today,last_day) <= 6 then '忠实用户' when datediff(today,last_day) > 29 then '流失用户' else '沉睡用户' end user_grade, round(count(uid) / sum(count(uid)) over(),2) ratio from t group by user_grade order by ratio desc;
select distinct user_grade, round(count(user_grade) over (partition by user_grade)/ count(user_grade) over(),2) rate from (select case when timestampdiff(day,max(out_time),'2021-11-04 23:59:59') <=6 and timestampdiff(day,min(in_time),'2021-11-04 23:59:59') >6 then "忠实用户" when timestampdiff(day,min(in_time),'2021-11-04 23:59:59') <= 6 then "新晋用户" when timestampdiff(day,max(out_time),'2021-11-04 23:59:59')>6 and timestampdiff(day,max(out_time),'2021-11-04 23:59:59')<=29 then "沉睡用户" when timestampdiff(day,max(out_time),'2021-11-04 23:59:59')>29 then "流失用户" end user_grade from tb_user_log group by uid) a order by rate desc
SELECT user_grade,ROUND(count(*)/ (select count(distinct uid) from tb_user_log),2) as ratio FROM ( SELECT uid, (CASE WHEN DATEDIFF('2021-11-04',MAX(DATE(out_time)))<=7 AND DATEDIFF('2021-11-04',MIN(DATE(in_time)))>7 THEN "忠实用户" WHEN DATEDIFF('2021-11-04',MAX(DATE(in_time)))<=7 THEN "新晋用户" WHEN DATEDIFF('2021-11-04',MAX(DATE(out_time)))>30 THEN "流失用户" WHEN 7<DATEDIFF('2021-11-04',MAX(DATE(out_time)))<=30 THEN "沉睡用户" END) AS user_grade FROM tb_user_log GROUP BY uid )a GROUP BY user_grade ORDER BY ratio DESC为什么这个代码自测的时候都是对的,提交之后就只有2/3用例是对的o(TヘTo)
# 问题:统计活跃间隔对用户分级后,各活跃等级用户占比,结果保留两位小数,且按占比降序排序。 with t1 as( select *,date(date_format(max(in_time) over(),"%Y-%m-%d")) as cur_time from tb_user_log ) , `近7天` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) between cur_time-INTERVAL 6 DAY and cur_time ) , `近7天之前` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) < cur_time-INTERVAL 6 DAY ) , `近30天` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) between cur_time-INTERVAL 29 DAY and cur_time ) , `近30天之前` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) < cur_time-INTERVAL 29 DAY ) , `新晋用户` as( select * from `近7天` where uid not in(select uid from `近7天之前`) ) , `忠实用户` as( select * from `近7天` where uid not in(select uid from `新晋用户`) ) , `流失用户` as( select * from `近30天之前` where uid not in(select uid from `近30天`) ) , `沉睡用户` as( select * from `近7天之前` where uid not in(select uid from `近7天`) and uid not in (select uid from `流失用户`) ) select user_grade,round(cnt/sum(cnt) over() ,2) as ratio from ( select "忠实用户" as user_grade,count(*) as cnt from `忠实用户` union all select "新晋用户" as user_grade,count(*) as cnt from `新晋用户` union all select "沉睡用户" as user_grade,count(*) as cnt from `沉睡用户` union all select "流失用户" as user_grade,count(*) as cnt from `流失用户` ) t1 order by ratio desc
with a as( select uid, (select max(date(out_time)) from tb_user_log) now, min(date(in_time)) intime, max(date(out_time)) newtime FROM tb_user_log GROUP BY uid ), b as ( select uid, (case when datediff(now,intime) > 6 and datediff(now,newtime) <= 6 then '忠实用户' when datediff(now,intime) <= 6 then '新晋用户' when datediff(now,intime) > 29 and datediff(now,newtime) > 29 then '流失用户' else '沉睡用户' end )user_grade from a ) select user_grade, round(count(uid) / (select count(distinct uid) from tb_user_log), 2) ratio from b group by user_grade order by ratio desc;
# max(date(out_time)) 是最后活跃时间 # min(date(in_time)) 是首次活跃时间 当作注册时间 select user_grade ,round(count(t.uid) /(select count(distinct uid) from tb_user_log ) ,2) from (select uid ,case when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -7 day ) and '2021-11-04' and min(date(in_time)) < DATE_ADD('2021-11-04',INTERVAL -7 day ) then "忠实用户" when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -6 day ) and '2021-11-04' and min(date(in_time)) >= DATE_ADD('2021-11-04',INTERVAL -7 day ) then '新晋用户' when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -29 day ) and DATE_ADD('2021-11-04',INTERVAL -7 day ) then '沉睡用户' when max(date(out_time)) <= DATE_ADD('2021-11-04',INTERVAL -30 day ) then '流失用户' end user_grade from tb_user_log group by uid) t group by t.user_grade order by t.user_grade ; #第一次是这么写的.... (select '忠实用户' , round(count(distinct a.uid)/(select count(distinct uid) from tb_user_log) ,2) u1 from tb_user_log a, (select uid,min(in_time) dt from tb_user_log group by uid) b where a.uid = b.uid and a.artical_id<>0 and DATE_FORMAT(a.in_time,'%Y-%m-%d')!= DATE_FORMAT(b.dt,'%Y-%m-%d') and a.in_time <='2021-11-04 23:59:59' and DATE_ADD(a.in_time,INTERVAL 7 day )>='2021-11-04 23:59:59') union (select '新晋用户' ,round( count(a.uid)/(select count(distinct uid) from tb_user_log) ,2) u2 from tb_user_log a, (select uid,min(in_time) dt from tb_user_log group by uid) b where a.uid = b.uid and a.artical_id<>0 and DATE_FORMAT(a.in_time,'%Y-%m-%d')= DATE_FORMAT(b.dt,'%Y-%m-%d') and a.in_time <='2021-11-04 23:59:59' and DATE_ADD(a.in_time,INTERVAL 7 day )>='2021-11-04 23:59:59') union (select '沉睡用户', round(count(a.uid)/(select count(distinct uid) from tb_user_log),2) u3 from tb_user_log a where DATE_ADD(a.in_time,INTERVAL 7 day )<'2021-11-04 23:59:59' and DATE_ADD(a.in_time,INTERVAL 30 day )>='2021-11-04 23:59:59' and a.uid not in (select uid from tb_user_log where in_time <='2021-11-04 23:59:59' and DATE_ADD(in_time,INTERVAL 7 day )>='2021-11-04 23:59:59' )) union (select '流失用户', round( count(a.uid)/(select count(distinct uid) from tb_user_log),2 ) u4 from tb_user_log a where DATE_ADD(a.in_time,INTERVAL 30 day )<'2021-11-04 23:59:59' and a.uid not in (select uid from tb_user_log where in_time <='2021-11-04 23:59:59' and DATE_ADD(in_time,INTERVAL 30 day )>='2021-11-04 23:59:59' ) );
select case when datediff(@n,f)<7 then '新晋用户' when datediff(@n,l)<7 then '忠实用户' when datediff(@n,l)>29 then '流失用户' else '沉睡用户' end ,round(count(u)/sum(count(u))over(),2) from(select uid u,min(in_time) f,max(out_time) l from tb_user_log group by 1) a ,(select @n:=max(out_time) from tb_user_log) b group by 1 order by 2 desc;优雅就完事~
select( case when date_format(in_time,'%Y-%m-%d') between date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and (select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log) and not exists (select in_time from tb_user_log t2 where in_time<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) then '新晋用户' when exists (select in_time from tb_user_log t2 where in_time<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) and (date_format(in_time,'%Y-%m-%d') between date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and (select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log)) then '忠实用户' when date_format(in_time,'%Y-%m-%d')<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 29 day) and not exists (select in_time from tb_user_log t2 where in_time>=date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 29 day) and t2.uid=t.uid) then '流失用户' when date_format(in_time,'%Y-%m-%d')<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and not exists (select in_time from tb_user_log t2 where in_time>=date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) then '沉睡用户' end) as user_grade, round(count(uid)/(select count(distinct uid) from tb_user_log),2) ratio from tb_user_log t group by user_grade order by ratio desc有没有大佬知道为啥我的结果是这样啊
None | 0.43 |
忠实用户 | 0.43 |
新晋用户 | 0.29 |
沉睡用户 | 0.14 |
流失用户 | 0.14 |
with user_list as ( select uid, case when user_first_active_time_diff between 0 and 6 then '新晋用户' when recent_active_diff between 0 and 6 then '忠实用户' when recent_active_diff between 7 and 29 then '沉睡用户' else '流失用户' end as user_grade from ( select uid, timestampdiff(day,time_time,recent_time) recent_active_diff, timestampdiff(day,user_first_active_time,recent_time) user_first_active_time_diff from ( select uid, max(time_time) as time_time, recent_time, user_first_active_time from ( select uid, substr(out_time,1,10) as time_time, first_value(substr(out_time,1,10)) over(order by substr(out_time,1,10) desc) as recent_time, first_value(substr(out_time,1,10)) over(partition by uid order by substr(out_time,1,10) asc) as user_first_active_time from tb_user_log ) tmp1 group by uid, recent_time, user_first_active_time ) tmp ) tmp2 ) select user_grade, round(count(distinct uid)/(select count(distinct uid) from user_list),2) as ratio from user_list group by user_grade order by round(count(distinct uid)/(select count(distinct uid) from user_list),2) desc
with t1 as (select uid,min(date(in_time))as dt,max(date(in_time)) as dt_max from tb_user_log group by uid ) select case when datediff('2021-11-04',dt) <=6 then '新晋用户' when datediff('2021-11-04',dt_max) <=6 then '忠实用户' when datediff('2021-11-04',dt_max) >=30 then '流失用户' else '沉睡用户' end as user_grade, round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio from t1 group by user_grade order by ratio desc
首先算出每个用户的新增日期和最近活跃日期
然后判断每个用户属于哪个类别
case when的判断是从上而下的,所以顺序可以设计一下;
对于时间差是<7还是<=7 要判断好。
with tb1 as (select a.uid,d1,d2,(case when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))>6 and d2 is not null then '忠实用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))<7 and d2 is not null then '新晋用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log))) between 7 and 29 and d2 is null then '沉睡用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))>29 then '流失用户' end ) user_grade from ( select uid, date(min(in_time)) d1 from tb_user_log group by uid) a #首次登入时间表 left join (select uid, date(min(in_time)) d2 from tb_user_log where timestampdiff(day,date(in_time),date((select date(max(out_time))from tb_user_log)))<7 group by uid) b #近7天内用户首次登入时间表 on a.uid=b.uid) select user_grade,round(count(user_grade)/(select count(user_grade) from tb1),2) ratio from tb1 group by user_grade order by ratio desc;
WITH t1 as ( SELECT uid, max(date(out_time)) as max_out_time, min(date(out_time)) as min_out_time, (SELECT max(date(out_time)) FROM tb_user_log) as max_time FROM tb_user_log GROUP BY uid ) SELECT case when datediff(max_time, min_out_time) > 7 AND datediff(max_time, max_out_time) < 7 then '忠实用户' when datediff(max_time, min_out_time) < 7 AND datediff(max_time, max_out_time) < 7 then '新晋用户' when datediff(max_time, min_out_time) > 7 AND datediff(max_time, max_out_time) > 29 then '流失用户' else '沉睡用户' end as user_grade, round(count(distinct uid) / (SELECT count(distinct uid) FROM tb_user_log), 2) as ratio FROM t1 GROUP BY user_grade ORDER BY ratio desc
with a as ( select uid, date(min(in_time)) frist_dt, date(max(out_time)) last_dt, (select max(out_time) from tb_user_log) today from tb_user_log group by 1 ) select user_grade , round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio from ( select uid, case when datediff(today,frist_dt)<=6 then '新晋用户' when datediff(today,frist_dt)>6 and datediff(today,last_dt)<=6 then '忠实用户' when datediff(today,frist_dt)>6 and datediff(today,last_dt)>=29 then '流失用户' when datediff(today,frist_dt)>6 and datediff(today,last_dt)>6 then '沉睡用户' else '其他' end user_grade from a ) b group by user_grade order by ratio desc