你正在搭建一个用户活跃度的画像,其中一个与活跃度相关的特征是“最长连续登录天数”, 请用SQL实现“2023年1月1日-2023年1月31日用户最长的连续登录天数”
登陆表 tb_dau:
fdate | user_id |
2023-01-01 | 10000 |
2023-01-02 | 10000 |
2023-01-04 | 10000 |
输出:
user_id | max_consec_days |
10000 | 2 |
fdate | user_id |
2023-01-01 | 10000 |
2023-01-02 | 10000 |
2023-01-04 | 10000 |
user_id | max_consec_days |
10000 | 2 |
drop table if exists tb_dau; create table `tb_dau` ( `fdate` date, `user_id` int ); insert into tb_dau(fdate, user_id) values ('2023-01-01', 10000), ('2023-01-02', 10000), ('2023-01-04', 10000);
user_id|max_consec_days 10000|2
id为10000的用户在1月1日及1月2日连续登录2日,1月4日登录1日,故最长连续登录天数为2日
MySQL中日期加减的函数日期增加 DATE_ADD,例:date_add('2023-01-01', interval 1 day) 输出 '2023-01-02'
日期减少 DATE_SUB,例:date_sub('2023-01-01', interval 1 day) 输出 '2022-12-31'日期差 DATEDIFF,例:datediff('2023-02-01', '2023-01-01') 输出31
with tmp as ( select IFNULL(DATEDIFF(next,fdate)=1,0) as diff, row_number() over(partition by user_id order by fdate) as eday, user_id from( select fdate, user_id, lead(fdate,1,NULL) over(partition by user_id order by fdate) as next from tb_dau) t1 ) select user_id, MAX(eday-last) as max_consec_days from( select lag(eday,1,0) over(partition by user_id order by eday rows 1 preceding) as last, user_id, eday from tmp where diff=0 )t1 group by user_id
# Write your MySQL query statement below WITH t0 as ( select distinct fdate as login_date, user_id as id from tb_dau where fdate between '2023-01-01' and '2023-01-31' ) , t1 AS ( SELECT #------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------- login_date, id, (case when login_date = @prev_date + INTERVAL 1 DAY AND id = @prev_author_id then @consec_days := @consec_days + 1 when login_date = @prev_date AND id = @prev_author_id then @consec_days := @consec_days else @consec_days := 1 end) AS consec_days, @prev_date := login_date, @prev_author_id := id #-------------只需要修改里面的逻辑就行。注意语句是“顺序执行”的------------------------------ FROM (SELECT @prev_date := NULL, @prev_author_id := NULL, @consec_days := 1) vars, (SELECT login_date, id FROM t0 ORDER BY id , login_date) ordered_dates ) SELECT id as user_id, max((CAST(consec_days AS UNSIGNED))) as max_consec_days from t1 group by id
with v as ( select * ,row_number() over(partition by user_id order by fdate) as ranking from tb_dau where fdate between '2023-01-01' and '2023-01-31' ), vv as ( select subdate(fdate, ranking) as date ,count(*) as max_consec_days ,user_id from v group by date,user_id ) select user_id ,max(max_consec_days) as max_consec_days from vv group by user_id order by user_id
select user_id,max(date_cnt) as max_consec_days from( select user_id,base_date,count(1) as date_cnt from( select user_id,(fdate-rk+1) as base_date from( select user_id,fdate,row_number() over(partition by user_id order by fdate) as rk from( select user_id,fdate from tb_dau where year(fdate)='2023' and month(fdate)='01' group by user_id,fdate ) m ) n ) a group by user_id,base_date ) b group by user_id