题解 | #连续两次作答试卷的最大时间窗#
连续两次作答试卷的最大时间窗
https://www.nowcoder.com/practice/9dcc0eebb8394e79ada1d4d4e979d73c
1. 分享思路:首先是lead() over() 窗口函数的使用得到每一次日期的下一行日期值,其次利用uid分组得到max(start_time),min(start_time) ,max(datediff(next_time,start_time)) 得到连续两次作答最大的时间窗口,统计年度作答次数总数的时候,需要统计的是count(start_time) 而不是count(distinct start_time) ; /* # 3.表间自关联 select A1.uid , timestampdiff(day,A1.submit_time,B1.submit_time) days_diff1, round(A1.avg_exam * timestampdiff(day,A1.submit_time,B1.submit_time),2) avg_exam_cnt from ( # 2. 这些人各自完成试卷 的提交时间及对应的rank值 。 select ER.uid, ER.submit_time, row_number() over(partition by ER.uid order by ER.submit_time) rankA from ( # 1. 统计在2021年至少有两天作答过试卷的人 select uid from exam_record where year(submit_time) = 2021 group by uid having count(distinct date_format(submit_time,'%Y%m%d')) >= 2 ) A join exam_record ER on A.uid = ER.uid and year(ER.submit_time) = 2021 order by ER.uid,ER.submit_time ) A1 join ( select ER.uid, ER.submit_time, row_number() over(partition by ER.uid order by ER.submit_time) rankB from ( # 1. 统计在2021年至少有两天作答过试卷的人 select uid from exam_record where year(submit_time) = 2021 group by uid having count(distinct date_format(submit_time,'%Y%m%d')) >= 2 ) A join exam_record ER on A.uid = ER.uid and year(ER.submit_time) = 2021 order by ER.uid,ER.submit_time ) B1 on A1.uid=B1.uid and A1.avg_exam=B1.avg_exam and B1.rankB-1 = A1.rankA order by A1.uid,days_diff1 desc limit 1 ; **/ # 重新按照 lead( ,1) over(partition by order by ) 窗口函数书写 select uid , days_window , round((total*days_window)/days_diff,2) avg_exam_cnt from ( select uid, count(start_time) total , #全年作答的总次数 datediff(max(start_time),min(start_time))+1 days_diff , #头尾时间最大时间窗 max(datediff(next_time,start_time))+1 days_window #该年连续两次作答试卷的最大时间窗 from ( select ER.uid, ER.start_time, lead(ER.start_time,1) over(partition by ER.uid order by ER.start_time) next_time # 当下时间的下一次时间 from (select uid from exam_record where year(start_time) = 2021 group by uid having count(distinct date(submit_time)) >= 2 ) A join exam_record ER on A.uid = ER.uid and year(start_time) = 2021 order by ER.uid,ER.start_time ) A1 group by A1.uid ) A2 order by days_window desc,avg_exam_cnt desc ; /* WITH t2 AS ( SELECT uid, COUNT(start_time) total, -- 用户2021年作答的次数 DATEDIFF(MAX(start_time),MIN(start_time))+1 diff_time, -- 头尾作答时间窗 MAX(DATEDIFF(next_time,start_time))+1 days_window -- 最大间隔天数 FROM ( SELECT uid,start_time, LEAD(start_time,1)OVER(PARTITION BY uid ORDER BY start_time) AS next_time -- 第二次作答时间 FROM exam_record WHERE YEAR(start_time)=2021 -- 2021年的数据 ) t1 GROUP BY uid ) SELECT uid,days_window,ROUND(total* days_window/diff_time,2) avg_exam_cnt FROM t2 WHERE diff_time>1 ORDER BY days_window DESC,avg_exam_cnt DESC ; */