with t1 as( select author_id,author_level,answer_date, date_sub(answer_date,interval dense_rank()over(partition by at.author_id order by answer_date) day) as same from answer_tb at inner join author_tb using(author_id) ) select author_id, author_level, count(distinct(answer_date)) as days_cnt from t1 group by 1,2,same having days_cnt>=3 order by 1
select *, dense_rank() over(partition by author_id order by answer_date) as rn from answer_tb
with temp1 as ( select *, dense_rank() over(partition by author_id order by answer_date) as rn from answer_tb a ) select author_id, answer_date, date_sub(answer_date,interval rn day) as dt from temp1这样就得到了一个日期和日期减编号(dt)的表,dt相等表示同一天登陆
select author_id, count(distinct answer_date) as days_cnt from temp2 group by author_id,dt having days_cnt >=3先按照作者,和差值日期(dt)分组,然后对answer_date去重计数
select distinct author_id, author_level, days_cnt from ( select author_id, count(distinct answer_date) as days_cnt from temp2 group by author_id,dt having days_cnt >=3 ) t1 left join author_tb t2 using(author_id) order by author_id
with temp1 as ( select *, dense_rank() over(partition by author_id order by answer_date) as rn from answer_tb ), temp2 as ( select author_id, answer_date, date_sub(answer_date,interval rn day) as dt from temp1 ) select distinct author_id, author_level, days_cnt from ( select author_id, count(distinct answer_date) as days_cnt from temp2 group by author_id,dt having days_cnt >=3 ) t1 left join author_tb t2 using(author_id) order by author_id
SELECT t2.author_id, author_level, MAX(lx_cnt) AS days_cnt FROM (SELECT author_id, DATE_SUB(answer_date, INTERVAL rk DAY) AS fz_dt, COUNT(answer_date) AS lx_cnt FROM (SELECT DISTINCT author_id, answer_date, DENSE_RANK()OVER(PARTITION BY author_id ORDER BY answer_date) AS rk FROM answer_tb) AS t1 GROUP BY author_id, fz_dt) AS t2 JOIN author_tb AS t3 ON t2.author_id = t3.author_id GROUP BY t2.author_id, author_level HAVING days_cnt >= 3 ORDER BY t2.author_id;
with t1 as ( select distinct author_id, answer_date, dense_rank() over(partition by author_id order by answer_date) rn from answer_tb ), t2 as ( select author_id, count(answer_date) days_cnt from t1 group by author_id,date_sub(answer_date,interval rn day) ) select a.author_id, author_level, max(days_cnt) max_days from t2 join author_tb a on t2.author_id = a.author_id group by a.author_id,author_level having max_days >= 3 order by a.author_id;
select b.author_id,author_level,count(b.author_id)+1 days_cnt from (select author_id, answer_date, lead(answer_date,1) over (partition by author_id order by answer_date) d2 from (select distinct answer_date,author_id from answer_tb) a ) b left join author_tb c on b.author_id=c.author_id where timestampdiff(day,answer_date,d2)=1 group by author_id order by author_id
select c.author_id ,author_level ,max(days_cnt) days_cnt from( select b.author_id ,author_level ,count(tim) days_cnt from ( -- (当一组日期按照升序排序时,如果某几个日期是连续的,它们减去本身的DENSE RANK,会发现都是相减的结果都是同一天) select a.author_id ,author_level ,answer_date ,cast(day(answer_date) AS SIGNED) - dense_rank() over(partition by author_id order by answer_date) tim from( select t1.author_id ,author_level ,answer_date from author_tb t1 join answer_tb t2 using(author_id) -- using和on的作用一样,相当于简写 group by 1,2,3 -- 虽然写的是123,但是本质上是对第三列进行去重 / 也可以替换为 去掉这行 在answer_date前加入distinct order by 1,3 -- 按照作者、日期进行升序排序 ) a ) b group by 1,2,tim -- 核心在这里 having days_cnt >= 3 ) c group by 1,2;
WITH t AS ( SELECT author_id, answer_date, rank() over(PARTITION BY author_id ORDER BY answer_date) AS rk, date_sub(answer_date, INTERVAL rank() over(PARTITION BY author_id ORDER BY answer_date) day) AS begin_date FROM answer_tb at2 GROUP BY author_id, answer_date ORDER BY author_id, answer_date ) SELECT t2.author_id, author_level, max(cnt) AS days_cnt FROM ( SELECT author_id, begin_date, count(1) AS cnt FROM t GROUP BY author_id, begin_date HAVING count(1) >= 3) t2 LEFT JOIN author_tb a ON t2.author_id = a.author_id GROUP BY t2.author_id, author_level ORDER BY t2.author_id;
select i,author_level,count(l) o from(select i,d,date_sub(d,interval p day) l from(select author_id i,answer_date d ,row_number()over(partition by author_id order by answer_date) p from answer_tb)a)b join author_tb t on b.i=t.author_id group by 1,2,l having o>=3;
select t4.author_id, t4.author_level, t5.cnt as days_cnt from author_tb as t4, ( select distinct t1.author_id as author_id, count(distinct t1.answer_date) as cnt from answer_tb as t1 join answer_tb as t2 join answer_tb as t3 on t1.author_id = t2.author_id and t2.author_id = t3.author_id and ( # 1 2 3 ( timestampdiff (day, t1.answer_date, t2.answer_date) = -1 and timestampdiff (day, t2.answer_date, t3.answer_date) = -1 ) #2 1 3 &nbs***bsp;( timestampdiff (day, t1.answer_date, t2.answer_date) = 1 and timestampdiff (day, t2.answer_date, t3.answer_date) = -2 ) # 3 1 2 &nbs***bsp;( timestampdiff (day, t1.answer_date, t2.answer_date) = 2 and timestampdiff (day, t2.answer_date, t3.answer_date) = -1 ) ) group by author_id ) as t5 where t4.author_id = t5.author_id;
# Keep in mind that MySQL user variables are deprecated since MySQL 8.0.13, so using window functions might be a better approach for future-proofing your query. WITH answer_dates AS ( SELECT answer_date, author_id, IF(answer_date = @prev_date + INTERVAL 1 DAY AND author_id = @prev_author_id, @consec_days := @consec_days + 1, @consec_days := 1) AS consec_days, @prev_date := answer_date, @prev_author_id := author_id FROM (SELECT @prev_date := NULL, @prev_author_id := NULL, @consec_days := 1) vars, (SELECT answer_date, author_id FROM answer_tb ORDER BY author_id, answer_date) ordered_dates ) select t1.author_id,t2.author_level as author_level, CAST(t1.max_consec_days AS UNSIGNED) AS days_cnt from ( SELECT author_id, MAX(consec_days) AS max_consec_days FROM answer_dates GROUP BY author_id HAVING max_consec_days >= 3 )t1 join author_tb t2 on t2.author_id=t1.author_id
【场景】:连续记录
【分类】:分组条件查询、窗口函数
难点:
1.如何表示连接?连续的表示:日期减去排序的值相等
2.记得先对日期、用户去重
(1)对日期、创作者进行去重
(2)统计日期、创作者、日期减去排序的值(连续签到)
连续的表示:日期减去排序的值相等;
(3)统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序)
[条件]:最大连续回答问题的天数大于等于3天
[使用]:group by 分组条件 [创作者;等级] having count(连续天数) >= 3 order by [创作者]
最终结果
select 查询结果 [创作者;等级;连续回答天数] from 从哪张表中查询数据[多表] group by 分组条件 [创作者;等级] having 判断条件 [连续回答问题的天数大于等于3] order by 对查询结果排序 [创作者升序];
方法一:
with
main as(
#对日期、创作者进行去重
select distinct
answer_date,
author_id
from answer_tb
)
,main1 as(
#统计日期、创作者、日期减去排序的值(连续签到)
select
answer_date,
author_id,
answer_date - row_number() over(partition by author_id order by answer_date) as data
from main
)
#统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序)
select
author_id,
author_level,
count(data) as days_cnt
from author_tb
join main1 using(author_id)
group by author_id,author_level having count(data) >= 3
order by author_id 方法二:
多表连接
#统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序)
select
author_id,
author_level,
count(data) as days_cnt
from author_tb
join(
#统计日期、创作者、日期减去排序的值(连续签到)
select
answer_date,
author_id,
answer_date - dense_rank() over(partition by author_id order by answer_date) as data
from(
#对日期、创作者进行去重
select distinct
answer_date,
author_id
from answer_tb
) main
) main1 using(author_id)
group by author_id,author_level having count(data) >= 3
order by author_id
select b.author_id,a1.author_level,max(b.days_cnt) from (select author_id,count(answer_date-rk) as days_cnt from (select distinct author_id,answer_date, dense_rank() over (partition by author_id order by answer_date) as rk from answer_tb) a group by author_id,answer_date-rk having days_cnt>=3) b join author_tb a1 on b.author_id=a1.author_id group by b.author_id,a1.author_level order by author_id
SELECT author_id, author_level, MAX(continous_days) AS days_cnt
FROM
(SELECT author_id,
author_level,
COUNT(author_id) AS continous_days
FROM
(SELECT DISTINCT a1.author_id, a2.author_level, answer_date,
DENSE_RANK() OVER (PARTITION BY a1.author_id ORDER BY answer_date ASC) AS ranking
FROM answer_tb AS a1 LEFT OUTER JOIN author_tb AS a2
ON a1.author_id = a2.author_id) AS t1
GROUP BY author_id, author_level, DATE_SUB(answer_date, INTERVAL ranking DAY)
HAVING continous_days >= 3) AS t2
GROUP BY author_id, author_level
ORDER BY author_id ASC;
select t2.author_id, t2.author_level, count(1) as days_cnt from ( select n.author_id, a.author_level , date_sub(n.answer_date, interval rank() over (partition by author_id order by answer_date) day) as group_date from answer_tb n left join author_tb a on a.author_id = n.author_id group by n.answer_date, n.author_id, a.author_level ) t2 group by t2.author_id, t2.author_level,t2.group_date having days_cnt >= 3 order by t2.author_level desc