题解 | #连续两次作答试卷的最大时间窗#

连续两次作答试卷的最大时间窗

https://www.nowcoder.com/practice/9dcc0eebb8394e79ada1d4d4e979d73c

select
    tc2.uid,
    tc2.days_window,
    round((ta.avg_exam * tc2.days_window), 2) as avg_exam_cnt
from
    (
        select
            uid,
            max(days_diff) as days_window
        from
            (
                select
                    tc.uid,
                    tc.start_time,
                    tc.start_time_lag,
                    (datediff (tc.start_time, tc.start_time_lag) + 1) as days_diff,
                    tc.ranking
                from
                    (
                        SELECT
                            uid,
                            DATE_FORMAT (start_time, '%Y-%m-%d') AS start_time,
                            LAG (DATE_FORMAT (start_time, '%Y-%m-%d'), 1) OVER (
                                partition by
                                    uid
                                ORDER BY
                                    uid,
                                    start_time
                            ) AS start_time_lag,
                            ROW_NUMBER() OVER (
                                partition by
                                    uid
                                ORDER BY
                                    uid,
                                    start_time
                            ) AS ranking
                        FROM
                            exam_record
                        where
                            year (start_time) = 2021
                    ) tc
                where
                    tc.ranking != 1
                    and uid in (
                        select
                            uid
                        from
                            exam_record
                        where
                            year (start_time) = 2021
                        group by
                            uid
                        having
                            count(distinct date_format (start_time, '%Y-%m-%d')) > 1
                    )
                    and year (tc.start_time) = 2021
            ) tc1
        group by
            uid
    ) tc2
    left join (
        select
            uid,
            count(exam_id) / (
                datediff (
                    max(DATE_FORMAT (start_time, '%Y-%m-%d')),
                    min(DATE_FORMAT (start_time, '%Y-%m-%d'))
                ) + 1
            ) as avg_exam
        from
            exam_record
        where
            year (start_time) = 2021
        group by
            uid
    ) ta on tc2.uid = ta.uid
order by
    days_window desc,
    avg_exam_cnt desc;

题目解析:

#01先找出满足条件的uid

select uid
from exam_record 
where year(start_time) =2021
group by uid
having count(distinct date_format(start_time,'%Y-%m-%d'))>1;

用到了year()等日期函数

获取指定时间部分合集

from_unixtime(ts,fmt) 根据指定的fmt格式,格式化unix时间戳ts

last_day(date) 给定日期所在月份的最后一天

hour(time) 返回time的小时值(0~23)

minute(time) 返回time的分钟值(0~59)

quarter(date) 返回date在一年中的季度(1~4),如select quarter(current_date);

month(date) 返回date的月份值(1~12)

week(date) 返回日期date为一年中第几周(0~53)

year(date) 返回日期date的年份(1000~9999)

monthname(date) 返回date的月份名,如:select monthname(current_date);

extract()函数

select extract(year_month from current_date);
select extract(day_second from current_date);
select extract(hour_minute from current_date);

#02之后我们需要计算试卷比例

select uid,
count(exam_id)/
(datediff(max(DATE_FORMAT(start_time, '%Y-%m-%d')),min(DATE_FORMAT(start_time, '%Y-%m-%d')))+1)  as avg_exam_cnt
from exam_record
group by uid

用到了聚合函数count(),max(),min(),日期差函数datediff(大日期-小日期)

聚合函数

avg(col)返回指定列的平均值

min(col)返回指定列的最小值

max(col)返回指定列的最大值

sum(col)返回指定列的所有值之和

count(col)返回指定列中非null值的个数

group_concat()函数:聚合字符串的

#group_concat(列名 separator '指定的分隔符')函数
select department,group_concat(emp_name order by salary separator ';') from emp group 

时间日期常见的操作

#时间加减函数
date_add(date,interval int keyword)函数
date_sub(date,interval int keyword) 函数
例子:
SELECT DATE_ADD('2022-01-01', INTERVAL 2 YEAR);
select date_sub(current_date,interval 6 month);




#时间格式化函数
date_format(date,fmt) 函数
例子:
在 MySQL 中,DATE_FORMAT() 函数用于将日期转换为指定格式的字符串。你可以使用不同的格式代码来定义想要显示的日期格式。以下是一些常见的格式代码:

%Y:四位数的年份(例如:2022);
%y:两位数的年份(例如:22);
%m:两位数的月份(01-12);
%c:没有前导零的月份(1-12);
%d:两位数的日期(01-31);
%H:24小时制的小时数(00-23);
%h:12小时制的小时数(01-12);
%i:两位数的分钟数(00-59);
%s:两位数的秒数(00-59);
%p:AM 或 PM;
%W:星期的英文全名(例如:Sunday);
%a:星期的英文缩写(例如:Sun);
%M:月份的英文全名(例如:January);
%b:月份的英文缩写(例如:Jan)。
以下是 DATE_FORMAT() 函数的一些示例:

SELECT DATE_FORMAT('2022-01-31', '%Y-%m-%d');
-- 输出:2022-01-31

SELECT DATE_FORMAT('2022-01-31', '%d/%m/%Y');
-- 输出:31/01/2022

SELECT DATE_FORMAT('2022-01-31 15:30:45', '%H:%i:%s');
-- 输出:15:30:45

SELECT DATE_FORMAT('2022-01-31 15:30:45', '%h:%i:%s %p');
-- 输出:03:30:45 PM




#时间差函数
date_diff函数
例子:
select datediff(current_date(),'2008-08-08')
select time_diff(timestamp(),'2008-08-08 10:18:56')

TIMESTAMPDIFF(unit, datetime_expr1, datetime_expr2)函数
用于计算两个时间段之差
例子:
unit:要计算的时间单位,可以是YEAR、QUARTER、MONTH、WEEK、DAY、HOUR、MINUTE或SECOND。
datetime_expr1:第一个日期或时间表达式。
datetime_expr2:第二个日期或时间表达式。
以下是一些使用TIMESTAMPDIFF函数的例子:
计算两个日期之间的天数差异:
SELECT TIMESTAMPDIFF(DAY, '2022-01-01', '2022-01-10');
结果为:9,表示两个日期之间相差9天。




#其他格式转为日期格式函数
例子:
str_to_date(string,format)函数将字符串的函数转换为指定日期格式
select str_to_date('August 10 2017','%M%d%Y')

UNIX_TIMESTAMP() 函数: 该函数将一个日期时间表示的字符串转换为 UNIX 时间戳。
示例:
SELECT UNIX_TIMESTAMP('2024-02-27 12:34:56') AS unix_timestamp;

#在找我们的最大间隔数

			select 
				uid,
				max(days_diff) as days_window	
			from(
						select 
							tc.uid,tc.start_time,tc.start_time_lag,(datediff(tc.start_time,tc.start_time_lag)+1) as days_diff,tc.ranking
						from 
						(
							SELECT
									uid,
									DATE_FORMAT(start_time, '%Y-%m-%d') AS start_time,
									LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,
									ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking
							FROM
									exam_record
							where year(start_time) =2021
									) tc
						where tc.ranking !=1
				  and uid in 
					(select uid
from exam_record 
where year(start_time) =2021
group by uid
having count(distinct date_format(start_time,'%Y-%m-%d'))>1)
and year(tc.start_time) =2021
				  ) tc1
				group by uid

这块主要用到了窗口函数lag()和row_number()函数

LAG(DATE_FORMAT(start_time, '%Y-%m-%d'), 1) OVER (partition by uid ORDER BY uid, start_time) AS start_time_lag,

ROW_NUMBER() OVER (partition by uid ORDER BY uid, start_time) AS ranking

#序号函数
window_function ( expr ) OVER ( 
  PARTITION BY ... 
  ORDER BY ... 
  frame_clause 
)
PARTITION BY类似于group by,不加就是正常的全局排序
ORDER BY 就是对分组进行排序
frame_clause窗口大小
例子
#序号函数,row_number() 按照顺序排序 | 唯一标识
rank() 同名排名相同,但是遇见同名会出现跳一位之后在进行排序,1 1  3 | 
dense_rank() 同名排名相同,且连续 1 1 2
select tid,	uid,ranking
from
(
    select b.uid,a.tag tid,
    row_number() over (partition by a.tag order by max(b.score) desc,min(b.score) desc,uid desc) as ranking
    from examination_info a
    left join exam_record b
    on a.exam_id=b.exam_id
    group by a.tag,b.uid
) tc
where ranking<=3;




#分布函数
#cume_dist()函数,小于等于当前行的比例,用于计算所占比例的。
select  
 dname,
 ename
 salary,
 cume_dist() over(order by salary) as rn1,-- 没有partition语句 所有的数据位于一组
 cume_dist() over(partition by dname order by salary) as rn2 
from employee;




#前后函数
lead()函数的用法(提前一期), lag(滞后一期)函数的用法

-- lead的用法是提前一期的意思,lag表示滞后一期的意思
select 
 dname,
 ename,
 hiredate,
 salary,
  lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
 lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
 lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time 
from employee;




#头尾函数
返回截至当前行,开头的第一个first_value(expr)的值;返回最后一个值last_value(expr)的值

select 
 dname,
 ename,
 hiredate,
 salary,
  lag(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as last_1_time,
 lead(hiredate,1,'2000-01-01') over(partition by dname order by hiredate) as after_1_time,
 lead(hiredate,2) over(partition by dname order by hiredate) as after_2_time,
 first_value(salary) over(partition by dname order by hiredate) as first_1_salary,
 last_value(salary) over(partition by dname order by hiredate) as last_1_salary
from employee;




#分组聚合函数
#分组按照时间或者序号累加效果
select dname,ename,salary,sum(salary) over (partition by dname order by hiredate desc) as cn from employee;
或者
SELECT dname, ename, salary, SUM(salary) OVER (
  PARTITION BY dname
  ORDER BY hiredate
  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cn
FROM employee;

#3 PRECEDING表示当前行的前第几行加和
#1 following 表示当前行的后第几行加和
# UNBOUNDED 表示边缘,UNBOUNDED PRECEDING第一行,UNBOUNDED  following 最后一行,实现任意区间段的统计

#不分组直接汇总
select dname,ename,salary,sum(salary) over (partition by dname ) as cn from employee;




#其他聚合函数
nth_value(expr,n)函数:指定分组列或者表达式的第几个数据

ntile(2)函数:为指定分组列进行平均分组

select 
 dname,
 ename,
 hiredate,
 salary,
  nth_value(salary,2) over(partition by dname order by hiredate) as last_1_time,
  ntile(2) over(partition by dname order by hiredate) as nt
from employee;

#最后把他们通过left join 进行连接即可。

全部评论

相关推荐

点赞 收藏 评论
分享
牛客网
牛客企业服务