你正在搭建一个用户活跃度的画像,其中一个与活跃度相关的特征是“最长连续登录天数”, 请用SQL实现“2023年1月1日-2023年1月31日用户最长的连续登录天数”
登陆表 tb_dau:
fdate | user_id |
2023-01-01 | 10000 |
2023-01-02 | 10000 |
2023-01-04 | 10000 |
输出:
user_id | max_consec_days |
10000 | 2 |
fdate | user_id |
2023-01-01 | 10000 |
2023-01-02 | 10000 |
2023-01-04 | 10000 |
user_id | max_consec_days |
10000 | 2 |
drop table if exists tb_dau; create table `tb_dau` ( `fdate` date, `user_id` int ); insert into tb_dau(fdate, user_id) values ('2023-01-01', 10000), ('2023-01-02', 10000), ('2023-01-04', 10000);
user_id|max_consec_days 10000|2
id为10000的用户在1月1日及1月2日连续登录2日,1月4日登录1日,故最长连续登录天数为2日
MySQL中日期加减的函数日期增加 DATE_ADD,例:date_add('2023-01-01', interval 1 day) 输出 '2023-01-02'
日期减少 DATE_SUB,例:date_sub('2023-01-01', interval 1 day) 输出 '2022-12-31'日期差 DATEDIFF,例:datediff('2023-02-01', '2023-01-01') 输出31
select user_id, max(consec_days) max_consec_days from (select user_id, count(startday) consec_days from (select user_id, date_sub(fdate, interval (dense_rank() over (partition by user_id order by fdate)) day) startday from tb_dau where year(fdate) = 2023 and month(fdate) = 1 ) t1 group by user_id, startday ) t2 group by user_id
select user_id ,max(num) max_consec_days from (select user_id ,date_sub(fdate, interval r day) datesub ,count(1) num from (select user_id ,fdate ,row_number()over(partition by user_id order by fdate) r from tb_dau where fdate between '2023-01-01' and '2023-1-31') a group by 1,2 ) b group by 1
#抄的评论区大佬的,看了好几遍才看懂 with t1 as( select user_id, count(continday) as consec_days from ( select user_id, # data_sub函数 # 用登陆日期-连续递增的数字(排序的位次), # 如果日期连续,就会产生相同的结果 date_sub(fdate, interval ( dense_rank() over (partition by user_id order by fdate) ) day ) continday from tb_dau where year(fdate) = 2023 and month(fdate) = 1 ) t2 # 对计算结果进行分组,相同的在一组,表示连续登录 group by user_id, continday ) select user_id, max(consec_days) as max_consec_days from t1 group by user_id
with t1 as ( select distinct user_id, fdate from tb_dau where fdate >= '2023-01-01' and fdate <= '2023-01-31' ), t2 as ( select user_id, fdate, rank() over ( partition by user_id order by fdate ) as rn from t1 ), t3 as ( select user_id, fdate, date_sub (fdate, interval rn day) flag_date from t2 ), t4 as ( select user_id, count(*) total from t3 group by user_id, flag_date ) select user_id, max(total) as max_consec_days from t4 group by user_id
select user_id, max(co) max_consec_days from ( select user_id, count(rn) co from ( select fdate, user_id, row_number()over(partition by user_id order by fdate) rn, date_sub(fdate,interval row_number()over(partition by user_id order by fdate) day) ds from tb_dau ) a group by user_id,ds ) b group by user_id;
with tmp as ( select IFNULL(DATEDIFF(next,fdate)=1,0) as diff, row_number() over(partition by user_id order by fdate) as eday, user_id from( select fdate, user_id, lead(fdate,1,NULL) over(partition by user_id order by fdate) as next from tb_dau) t1 ) select user_id, MAX(eday-last) as max_consec_days from( select lag(eday,1,0) over(partition by user_id order by eday rows 1 preceding) as last, user_id, eday from tmp where diff=0 )t1 group by user_id
# Write your MySQL query statement below WITH t0 as ( select distinct fdate as login_date, user_id as id from tb_dau where fdate between '2023-01-01' and '2023-01-31' ) , t1 AS ( SELECT #------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------- login_date, id, (case when login_date = @prev_date + INTERVAL 1 DAY AND id = @prev_author_id then @consec_days := @consec_days + 1 when login_date = @prev_date AND id = @prev_author_id then @consec_days := @consec_days else @consec_days := 1 end) AS consec_days, @prev_date := login_date, @prev_author_id := id #-------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------ FROM (SELECT @prev_date := NULL, @prev_author_id := NULL, @consec_days := 1) vars, (SELECT login_date, id FROM t0 ORDER BY id , login_date) ordered_dates ) SELECT id as user_id, max((CAST(consec_days AS UNSIGNED))) as max_consec_days from t1 group by id
WITH t1 as ( select user_id,fdate,DATE_SUB(fdate,interval ROW_NUMBER() over(PARTITION by user_id ORDER BY fdate) day) as date_sub from tb_dau ), t2 as ( SELECT user_id,count(*) day_sum from t1 GROUP BY user_id,date_sub ), t3 as( select user_id,day_sum,ROW_NUMBER() over(PARTITION by user_id ORDER BY day_sum desc) day_rank from t2 ) SELECT user_id,day_sum as max_consec_days from t3 where day_rank =1欢迎大家指正批评!
select user_id,consecutive_days as max_consec_days from (select user_id,consecutive_days,dense_rank()over(partition by user_id order by consecutive_days desc) as rk from (select user_id,min(fdate),max(fdate),count(*) as consecutive_days from (select user_id,fdate,date_sub(fdate,interval rn day) as grp,rn from (SELECT user_id,fdate,ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY fdate) AS rn FROM tb_dau) as t1 ) as t2 group by user_id,grp) as t3 ) as t4 where rk =1 group by user_id,consecutive_days
select user_id,max(max_consec_days) max_consec_days from ( select start_day,user_id,count(1) max_consec_days from ( select user_id ,fdate ,date_sub(fdate,interval row_number()over(partition by user_id order by fdate) day) start_day from tb_dau where fdate between '2023-01-01' and '2023-01-31' )a group by start_day,user_id )t2 group by user_id
with a as (select distinct fdate,user_id,row_number() over(partition by user_id order by fdate) ranking from tb_dau), b as (select *,date_sub(fdate, interval ranking day) df from a), c as (select user_id, count(df) consec_days from b group by user_id,df) select user_id,max(consec_days) max_consec_days from c group by user_id
select user_id,max(day_count) as max_consec_days from(select user_id,count(1) as "day_count" from(select user_id,date_sub(fdate,interval row_num day)as "day" from(select user_id,fdate, row_number()over(partition by user_id order by fdate asc) as "row_num" from (select distinct fdate,user_id from tb_dau)as t where year(fdate)=2023 and month(fdate)=1)as t1)as t2 group by user_id,day)t3 group by user_id