用户行为日志表tb_user_log
- 用户等级标准简化为:忠实用户(近7天活跃过且非新晋用户)、新晋用户(近7天新增)、沉睡用户(近7天未活跃但更早前活跃过)、流失用户(近30天未活跃但更早前活跃过)。
- 假设今天就是数据中所有日期的最大值。
- 近7天表示包含当天T的近7天,即闭区间[T-6, T]。
用户行为日志表tb_user_log
DROP TABLE IF EXISTS tb_user_log;
CREATE TABLE tb_user_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
artical_id INT NOT NULL COMMENT '视频ID',
in_time datetime COMMENT '进入时间',
out_time datetime COMMENT '离开时间',
sign_in TINYINT DEFAULT 0 COMMENT '是否签到'
) CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_log(uid, artical_id, in_time, out_time, sign_in) VALUES
(109, 9001, '2021-08-31 10:00:00', '2021-08-31 10:00:09', 0),
(109, 9002, '2021-11-04 11:00:55', '2021-11-04 11:00:59', 0),
(108, 9001, '2021-09-01 10:00:01', '2021-09-01 10:01:50', 0),
(108, 9001, '2021-11-03 10:00:01', '2021-11-03 10:01:50', 0),
(104, 9001, '2021-11-02 10:00:28', '2021-11-02 10:00:50', 0),
(104, 9003, '2021-09-03 11:00:45', '2021-09-03 11:00:55', 0),
(105, 9003, '2021-11-03 11:00:53', '2021-11-03 11:00:59', 0),
(102, 9001, '2021-10-30 10:00:00', '2021-10-30 10:00:09', 0),
(103, 9001, '2021-10-21 10:00:00', '2021-10-21 10:00:09', 0),
(101, 0, '2021-10-01 10:00:00', '2021-10-01 10:00:42', 1);忠实用户|0.43 新晋用户|0.29 沉睡用户|0.14 流失用户|0.14
SELECT user_grade, round(count(uid)/(select count(distinct uid) from tb_user_log),2) ratio FROM (SELECT uid, (CASE WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))<=6 AND DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))>6 THEN '忠实用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(min(in_time)))<=6 THEN '新晋用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time))) BETWEEN 7 AND 29 THEN '沉睡用户' WHEN DATEDIFF(DATE((SELECT MAX(in_time) FROM tb_user_log)),date(max(in_time)))>29 THEN '流失用户' END) AS user_grade FROM tb_user_log GROUP BY uid) a GROUP BY user_grade ORDER BY ratio DESC;
select uid, date(min(in_time)) as first_in, date(max(out_time)) as last_in from tb_user_log group by uid然后根据此表中的首次登陆日期first_in和最近一次活跃日期last_in来给用户分级
select
(
case when datediff('2021-11-04',first_in)<7 then '新晋用户'
when datediff('2021-11-04',first_in)>=7 and
datediff('2021-11-04',last_in)<7 then '忠实用户'
when datediff('2021-11-04',last_in)>=30 then '流失用户'
else '沉睡用户' end
) as user_grade
from
(
select
uid,
date(min(in_time)) as first_in,
date(max(out_time)) as last_in
from tb_user_log
group by uid
) t1 并将以上表作为临时表 select user_grade, round(count(user_grade)/ (select count(*) from temp) ,2)as ratio from temp group by user_grade order by ratio desc整体代码如下
with temp as
(
select
(
case when datediff('2021-11-04',first_in)<7 then '新晋用户'
when datediff('2021-11-04',first_in)>=7 and
datediff('2021-11-04',last_in)<7 then '忠实用户'
when datediff('2021-11-04',last_in)>=30 then '流失用户'
else '沉睡用户' end
) as user_grade
from
(
select
uid,
date(min(in_time)) as first_in,
date(max(out_time)) as last_in
from tb_user_log
group by uid
) t1
)
select
user_grade,
round(count(user_grade)/
(select count(*) from temp)
,2)as ratio
from temp
group by user_grade
order by ratio desc 有问题欢迎批评指正select tag,round(count(*)/ (select count(distinct uid) from tb_user_log),2) as ratio from (select (case when datediff((select max(in_time) from tb_user_log),max(in_time))<=6 and datediff((select max(in_time) from tb_user_log),min(in_time))>=7 then "忠实用户" when datediff((select max(in_time) from tb_user_log),min(in_time))<=6 then "新晋用户" when datediff((select max(in_time) from tb_user_log),max(in_time))>=30 then "流失用户" else "沉睡用户" end) as tag from tb_user_log group by uid) as t group by tag order by ratio desc之前结果怎么都不对,原来就是差一个对uid的去重!那里是distinct uid!
select user_grade,round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio from( select uid ,case when t_day<7 and t_count=1 then '新晋用户' when t_day<7 and t_count<>1 then '忠实用户' when t_day between 7 and 29 then '沉睡用户' else '流失用户' end user_grade from( select * ,datediff(max(out_time)over(),max(out_time)over(partition by uid)) t_day ,count(1)over(partition by uid) t_count from tb_user_log ) t ) t2 group by user_grade order by ratio desc;
【场景】:近几天活跃
【分类】:条件函数、日期函数、datediff
难点:
1.这个题有一个坑题目中没有说清楚,沉睡用户(近7天未活跃但更早前活跃过),根据定义它是包含流失用户(近30天未活跃但更早前活跃过),也就是说只要是流失用户就是沉睡用户!实际上答案并不包括。所以沉睡用户应该这样定义:近7天未活跃但更早前活跃过且非流失用户。
2.case when 判断使每个类型的用户不会有重合。而且先判断哪个后判断哪个尤其重要,顺序不能换!
新学到:
1.用户最近一次活跃时间:max(date);一直没有想到,其实在之前是知道求用户第一次活跃时间是用:min(date)。如果使用这个会省很多力气。
因为最后一次活跃的时间可以用来确定 近几天未活跃但更早前活跃过
2.select里面可以套用select,尤其是从另外一个表用group by查询,因为group by又不好放在总的框架里面。
(1)统计用户第一次活跃的时间;用户最后一次活跃的时间
如果in_time-进入时间和out_time-离开时间跨天了,在两天里都记为该用户活跃过
(2)近7天和近30天,涉及两个不同的活跃条件
一种方法是求出第7天是什么时候:日期减去天数得到日期;另一种是日期减去日期得到天数
(3)对用户分类且求比例
case when 判断就使每个类型的用户不会有重合,所以‘新晋用户’在’忠实用户‘的前面,’流失用户‘在’沉睡用户‘的前面。
方法一:
with子句 + 使用 union 合并几种分类情况
#依次去求每个类型的uid有哪些
with
main as(
#用户第一次活跃的时间
select
uid,
min(date(in_time)) as min_date
from tb_user_log
group by uid
),
attr as(
#近7天和近30天,两个不同条件的活跃记录表
#先求出今日是哪一天,往前七天是那一天,往前30天是那一天
select
max(date(in_time)) as now_date,
date_sub(max(date(in_time)),interval 6 day) as 7_date,
date_sub(max(date(in_time)),interval 29 day) as 30_date
from tb_user_log
)
,temp as(
#新晋用户(近7天新增)
select distinct
uid as new_u
from main,attr
where min_date >= 7_date
)#输出:105、102
,temp1 as(
#忠实用户(近7天活跃过且非新晋用户)
#when date(in_time)>= 7_date and not in 新晋用户
select distinct
uid as zs_u
from tb_user_log,attr
where date(in_time)>= 7_date
and uid not in(
select
uid
from main,attr
where min_date >= 7_date
)
)#输出:109、108、104
,temp2 as(
#流失用户(近30天未活跃但更早前活跃过)
#when date(in_time) < 30_date and not in 近30天活跃过的用户
select distinct
uid as ls_u
from tb_user_log,attr
where date(in_time)< 30_date
and uid not in(
select
uid
from tb_user_log,attr
where date(in_time) >= 30_date
)
)#输出:101
,temp3 as(
#沉睡用户(近7天未活跃但更早前活跃过)
#when date(in_time) < 7_date and not in 近7天活跃过的用户
select distinct
uid as cs_u
from tb_user_log,attr
where date(in_time) < 7_date
and uid not in(
select
uid
from tb_user_log,attr
where date(in_time) >= 7_date
)
)#输出:103、101
(select
'新晋用户' as user_grade,
count(*) as ratio
from temp
group by user_grade)
union
(select
'忠实用户' as user_grade,
count(*) as ratio
from temp1
group by user_grade)
union
(select
'流失用户' as user_grade,
count(*) as ratio
from temp2
group by user_grade)
union
(select
'沉睡用户' as user_grade,
count(*) as ratio
from temp3
group by user_grade) 方法二:
with 子句 + case when
with
main as(
#用户第一次活跃的时间,用户最后活跃的时间
select
uid,
min(date(in_time)) as min_date,
max(date(in_time)) as max_date
from tb_user_log
group by uid
),
attr as(
#近7天和近30天,涉及两个不同的活跃条件
#先求出今日是哪一天,往前七天是哪一天,往前30天是哪一天
select
max(date(in_time)) as now_date,
date_sub(max(date(in_time)),interval 6 day) as 7_date,
date_sub(max(date(in_time)),interval 29 day) as 30_date
from tb_user_log
)
#case when 判断就使每个类型的用户不会有重合。
select
(case
when min_date >= 7_date
then '新晋用户'
when max_date >= 7_date
then '忠实用户'
when max_date <= 30_date
then '流失用户'
else '沉睡用户'
end) as user_grade,
round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio
from main,attr
group by user_grade
order by ratio desc 方法三:
case when + 日期函数:date_sub()
#case when 判断就使每个类型的用户不会有重合。
select
(case
when min_date >= 7_date
then '新晋用户'
when max_date >= 7_date
then '忠实用户'
when max_date <= 30_date
then '流失用户'
else '沉睡用户'
end) as user_grade,
round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio
from(
#用户第一次活跃的时间,用户最后活跃的时间
select
uid,
min(date(in_time)) as min_date,
max(date(in_time)) as max_date
from tb_user_log
group by uid
) main,
(
#近7天和近30天,涉及两个不同的活跃条件
#先求出今日是哪一天,往前七天是哪一天,往前30天是哪一天
select
max(date(in_time)) as now_date,
date_sub(max(date(in_time)),interval 6 day) as 7_date,
date_sub(max(date(in_time)),interval 29 day) as 30_date
from tb_user_log
) attr
group by user_grade
order by ratio desc 方法四:
case when + 日期函数: timestampdiff(day,expr1,expr2)
#case when 判断就使每个类型的用户不会有重合。
select
(case
when timestampdiff(day,min_date,'2021-11-04') <= 6
then '新晋用户'
when timestampdiff(day,max_date,'2021-11-04') <= 6
then '忠实用户'
when timestampdiff(day,max_date,'2021-11-04') >= 30
then '流失用户'
else '沉睡用户'
end) as user_grade,
round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio
from(
#用户第一次活跃的时间,用户最后活跃的时间
select
uid,
min(date(in_time)) as min_date,
max(date(in_time)) as max_date
from tb_user_log
group by uid
) main
group by user_grade
order by ratio desc 方法五:
日期函数:datediff(expr1,expr2) <=6
#case when 判断就使每个类型的用户不会有重合。
select
(case
when datediff('2021-11-04',min_date) <= 6
then '新晋用户'
when datediff('2021-11-04',max_date) <= 6
then '忠实用户'
when datediff('2021-11-04',max_date) >= 30
then '流失用户'
else '沉睡用户'
end) as user_grade,
round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio
from(
#用户第一次活跃的时间,用户最后活跃的时间
select
uid,
min(date(in_time)) as min_date,
max(date(in_time)) as max_date
from tb_user_log
group by uid
) main
group by user_grade
order by ratio desc
/*问题:统计活跃间隔对用户分级后 各活跃等级用户占比 结果保留两位小数 且按占比降序排序。 注: 用户等级标准简化为: 忠实用户(近7天活跃过且非新晋用户) 最晚活跃的日期在7天内并且最早活跃日期在7天外 新晋用户(近7天新增) 最早活跃的日期在7天内 沉睡用户(近7天未活跃但更早前活跃过) 最晚活跃日期在7天外并且在30天内 流失用户(近30天未活跃但更早前活跃过)。最晚活跃的日期在30天外 假设今天就是数据中所有日期的最大值。 近7天表示包含当天T的近7天,即闭区间[T-6, T]。 如何求每个用户的最晚活跃日期和最早活跃日期*/ WITH user_day AS (SELECT uid , MAX(DATE(in_time)) AS later_day , MIN(DATE(in_time)) AS early_day FROM tb_user_log GROUP BY uid) SELECT CASE WHEN ud.later_day>=DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) AND ud.early_day<DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) THEN '忠实用户' WHEN ud.early_day>=DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 6 DAY) THEN '新晋用户' WHEN ud.later_day<DATE_SUB((SELECT MAX(later_day) FROM user_day), INTERVAL 29 DAY) THEN '流失用户' ELSE '沉睡用户' END AS user_grade , ROUND(COUNT(ud.uid)/(SELECT COUNT(uid) FROM user_day),2) AS ratio FROM user_day AS ud GROUP BY user_grade ORDER BY ratio DESC
with t as ( select uid, date(min(in_time)) first_day, date(max(out_time)) last_day, max(date(max(out_time))) over() today from tb_user_log group by uid ) select case when datediff(today,first_day) <= 6 then '新晋用户' when datediff(today,last_day) <= 6 then '忠实用户' when datediff(today,last_day) > 29 then '流失用户' else '沉睡用户' end user_grade, round(count(uid) / sum(count(uid)) over(),2) ratio from t group by user_grade order by ratio desc;
select distinct user_grade, round(count(user_grade) over (partition by user_grade)/ count(user_grade) over(),2) rate from (select case when timestampdiff(day,max(out_time),'2021-11-04 23:59:59') <=6 and timestampdiff(day,min(in_time),'2021-11-04 23:59:59') >6 then "忠实用户" when timestampdiff(day,min(in_time),'2021-11-04 23:59:59') <= 6 then "新晋用户" when timestampdiff(day,max(out_time),'2021-11-04 23:59:59')>6 and timestampdiff(day,max(out_time),'2021-11-04 23:59:59')<=29 then "沉睡用户" when timestampdiff(day,max(out_time),'2021-11-04 23:59:59')>29 then "流失用户" end user_grade from tb_user_log group by uid) a order by rate desc
SELECT user_grade,ROUND(count(*)/ (select count(distinct uid) from tb_user_log),2) as ratio
FROM
(
SELECT uid,
(CASE
WHEN DATEDIFF('2021-11-04',MAX(DATE(out_time)))<=7 AND DATEDIFF('2021-11-04',MIN(DATE(in_time)))>7 THEN "忠实用户"
WHEN DATEDIFF('2021-11-04',MAX(DATE(in_time)))<=7 THEN "新晋用户"
WHEN DATEDIFF('2021-11-04',MAX(DATE(out_time)))>30 THEN "流失用户"
WHEN 7<DATEDIFF('2021-11-04',MAX(DATE(out_time)))<=30 THEN "沉睡用户"
END) AS user_grade
FROM tb_user_log
GROUP BY uid
)a
GROUP BY user_grade
ORDER BY ratio DESC 为什么这个代码自测的时候都是对的,提交之后就只有2/3用例是对的o(TヘTo)select case when datediff(@n,f)<7 then '新晋用户' when datediff(@n,l)<7 then '忠实用户' when datediff(@n,l)>29 then '流失用户' else '沉睡用户' end ,round(count(u)/sum(count(u))over(),2) from(select uid u,min(in_time) f,max(out_time) l from tb_user_log group by 1) a ,(select @n:=max(out_time) from tb_user_log) b group by 1 order by 2 desc;优雅就完事~
# 问题:统计活跃间隔对用户分级后,各活跃等级用户占比,结果保留两位小数,且按占比降序排序。 with t1 as( select *,date(date_format(max(in_time) over(),"%Y-%m-%d")) as cur_time from tb_user_log ) , `近7天` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) between cur_time-INTERVAL 6 DAY and cur_time ) , `近7天之前` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) < cur_time-INTERVAL 6 DAY ) , `近30天` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) between cur_time-INTERVAL 29 DAY and cur_time ) , `近30天之前` as( select distinct uid from t1 where date(date_format(in_time,"%Y-%m-%d")) < cur_time-INTERVAL 29 DAY ) , `新晋用户` as( select * from `近7天` where uid not in(select uid from `近7天之前`) ) , `忠实用户` as( select * from `近7天` where uid not in(select uid from `新晋用户`) ) , `流失用户` as( select * from `近30天之前` where uid not in(select uid from `近30天`) ) , `沉睡用户` as( select * from `近7天之前` where uid not in(select uid from `近7天`) and uid not in (select uid from `流失用户`) ) select user_grade,round(cnt/sum(cnt) over() ,2) as ratio from ( select "忠实用户" as user_grade,count(*) as cnt from `忠实用户` union all select "新晋用户" as user_grade,count(*) as cnt from `新晋用户` union all select "沉睡用户" as user_grade,count(*) as cnt from `沉睡用户` union all select "流失用户" as user_grade,count(*) as cnt from `流失用户` ) t1 order by ratio desc
with a as( select uid, (select max(date(out_time)) from tb_user_log) now, min(date(in_time)) intime, max(date(out_time)) newtime FROM tb_user_log GROUP BY uid ), b as ( select uid, (case when datediff(now,intime) > 6 and datediff(now,newtime) <= 6 then '忠实用户' when datediff(now,intime) <= 6 then '新晋用户' when datediff(now,intime) > 29 and datediff(now,newtime) > 29 then '流失用户' else '沉睡用户' end )user_grade from a ) select user_grade, round(count(uid) / (select count(distinct uid) from tb_user_log), 2) ratio from b group by user_grade order by ratio desc;
# max(date(out_time)) 是最后活跃时间
# min(date(in_time)) 是首次活跃时间 当作注册时间
select user_grade ,round(count(t.uid) /(select count(distinct uid) from tb_user_log ) ,2) from
(select uid ,case when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -7 day ) and '2021-11-04' and min(date(in_time)) < DATE_ADD('2021-11-04',INTERVAL -7 day ) then "忠实用户"
when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -6 day ) and '2021-11-04' and min(date(in_time)) >= DATE_ADD('2021-11-04',INTERVAL -7 day ) then '新晋用户'
when max(date(out_time)) between DATE_ADD('2021-11-04',INTERVAL -29 day ) and DATE_ADD('2021-11-04',INTERVAL -7 day ) then '沉睡用户'
when max(date(out_time)) <= DATE_ADD('2021-11-04',INTERVAL -30 day ) then '流失用户'
end user_grade
from tb_user_log group by uid) t
group by t.user_grade order by t.user_grade ;
#第一次是这么写的....
(select '忠实用户' , round(count(distinct a.uid)/(select count(distinct uid) from tb_user_log) ,2) u1 from tb_user_log a,
(select uid,min(in_time) dt from tb_user_log group by uid) b where a.uid = b.uid and a.artical_id<>0
and DATE_FORMAT(a.in_time,'%Y-%m-%d')!= DATE_FORMAT(b.dt,'%Y-%m-%d')
and a.in_time <='2021-11-04 23:59:59' and DATE_ADD(a.in_time,INTERVAL 7 day )>='2021-11-04 23:59:59')
union
(select '新晋用户' ,round( count(a.uid)/(select count(distinct uid) from tb_user_log) ,2) u2 from tb_user_log a,
(select uid,min(in_time) dt from tb_user_log group by uid) b where a.uid = b.uid and a.artical_id<>0
and DATE_FORMAT(a.in_time,'%Y-%m-%d')= DATE_FORMAT(b.dt,'%Y-%m-%d')
and a.in_time <='2021-11-04 23:59:59' and DATE_ADD(a.in_time,INTERVAL 7 day )>='2021-11-04 23:59:59')
union
(select '沉睡用户', round(count(a.uid)/(select count(distinct uid) from tb_user_log),2) u3 from tb_user_log a where DATE_ADD(a.in_time,INTERVAL 7 day )<'2021-11-04 23:59:59' and
DATE_ADD(a.in_time,INTERVAL 30 day )>='2021-11-04 23:59:59' and
a.uid not in
(select uid from tb_user_log where in_time <='2021-11-04 23:59:59'
and DATE_ADD(in_time,INTERVAL 7 day )>='2021-11-04 23:59:59' ))
union
(select '流失用户', round( count(a.uid)/(select count(distinct uid) from tb_user_log),2 ) u4 from tb_user_log a where DATE_ADD(a.in_time,INTERVAL 30 day )<'2021-11-04 23:59:59' and
a.uid not in
(select uid from tb_user_log where in_time <='2021-11-04 23:59:59'
and DATE_ADD(in_time,INTERVAL 30 day )>='2021-11-04 23:59:59' ) ); select( case when date_format(in_time,'%Y-%m-%d') between date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and (select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log) and not exists (select in_time from tb_user_log t2 where in_time<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) then '新晋用户' when exists (select in_time from tb_user_log t2 where in_time<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) and (date_format(in_time,'%Y-%m-%d') between date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and (select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log)) then '忠实用户' when date_format(in_time,'%Y-%m-%d')<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 29 day) and not exists (select in_time from tb_user_log t2 where in_time>=date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 29 day) and t2.uid=t.uid) then '流失用户' when date_format(in_time,'%Y-%m-%d')<date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and not exists (select in_time from tb_user_log t2 where in_time>=date_sub((select max(date_format(in_time,'%Y-%m-%d')) from tb_user_log),interval 6 day) and t2.uid=t.uid) then '沉睡用户' end) as user_grade, round(count(uid)/(select count(distinct uid) from tb_user_log),2) ratio from tb_user_log t group by user_grade order by ratio desc有没有大佬知道为啥我的结果是这样啊
| None | 0.43 |
| 忠实用户 | 0.43 |
| 新晋用户 | 0.29 |
| 沉睡用户 | 0.14 |
| 流失用户 | 0.14 |
with user_list as ( select uid, case when user_first_active_time_diff between 0 and 6 then '新晋用户' when recent_active_diff between 0 and 6 then '忠实用户' when recent_active_diff between 7 and 29 then '沉睡用户' else '流失用户' end as user_grade from ( select uid, timestampdiff(day,time_time,recent_time) recent_active_diff, timestampdiff(day,user_first_active_time,recent_time) user_first_active_time_diff from ( select uid, max(time_time) as time_time, recent_time, user_first_active_time from ( select uid, substr(out_time,1,10) as time_time, first_value(substr(out_time,1,10)) over(order by substr(out_time,1,10) desc) as recent_time, first_value(substr(out_time,1,10)) over(partition by uid order by substr(out_time,1,10) asc) as user_first_active_time from tb_user_log ) tmp1 group by uid, recent_time, user_first_active_time ) tmp ) tmp2 ) select user_grade, round(count(distinct uid)/(select count(distinct uid) from user_list),2) as ratio from user_list group by user_grade order by round(count(distinct uid)/(select count(distinct uid) from user_list),2) desc
with t1 as
(select uid,min(date(in_time))as dt,max(date(in_time)) as dt_max
from tb_user_log
group by uid )
select case when datediff('2021-11-04',dt) <=6 then '新晋用户'
when datediff('2021-11-04',dt_max) <=6 then '忠实用户'
when datediff('2021-11-04',dt_max) >=30 then '流失用户'
else '沉睡用户' end as user_grade,
round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) as ratio
from t1
group by user_grade
order by ratio desc 首先算出每个用户的新增日期和最近活跃日期
然后判断每个用户属于哪个类别
case when的判断是从上而下的,所以顺序可以设计一下;
对于时间差是<7还是<=7 要判断好。
with tb1 as (select a.uid,d1,d2,(case when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))>6 and d2 is not null then '忠实用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))<7 and d2 is not null then '新晋用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log))) between 7 and 29 and d2 is null then '沉睡用户' when timestampdiff(day,d1,date((select date(max(out_time))from tb_user_log)))>29 then '流失用户' end ) user_grade from ( select uid, date(min(in_time)) d1 from tb_user_log group by uid) a #首次登入时间表 left join (select uid, date(min(in_time)) d2 from tb_user_log where timestampdiff(day,date(in_time),date((select date(max(out_time))from tb_user_log)))<7 group by uid) b #近7天内用户首次登入时间表 on a.uid=b.uid) select user_grade,round(count(user_grade)/(select count(user_grade) from tb1),2) ratio from tb1 group by user_grade order by ratio desc;
WITH t1 as ( SELECT uid, max(date(out_time)) as max_out_time, min(date(out_time)) as min_out_time, (SELECT max(date(out_time)) FROM tb_user_log) as max_time FROM tb_user_log GROUP BY uid ) SELECT case when datediff(max_time, min_out_time) > 7 AND datediff(max_time, max_out_time) < 7 then '忠实用户' when datediff(max_time, min_out_time) < 7 AND datediff(max_time, max_out_time) < 7 then '新晋用户' when datediff(max_time, min_out_time) > 7 AND datediff(max_time, max_out_time) > 29 then '流失用户' else '沉睡用户' end as user_grade, round(count(distinct uid) / (SELECT count(distinct uid) FROM tb_user_log), 2) as ratio FROM t1 GROUP BY user_grade ORDER BY ratio desc