SELECT COUNT(distinct q2.device_id,q2.date)/count(DISTINCT q1.device_id,q1.date) as avg_ret from question_practice_detail as q1 left outer join question_practice_detail as q2 on q1.device_id=q2.device_id and DATEDIFF(q2.date,q1.date)=1思路是:
select avg(if(b.device_id is not null,1,0)) as avg_ret from (select distinct device_id,date from question_practice_detail )a left join ( select distinct device_id,date_sub(date,interval 1 day) as date from question_practice_detail )b on a.device_id = b.device_id and a.date = b.dateDATE_SUB() 函数从日期减去指定的时间间隔。
date_sub(date,interval 1 day)
select count(distinct b.device_id,b.date)/count(distinct a.device_id,a.date) as avg_ret from question_practice_detail a left JOIN question_practice_detail b on a.date = b.date-1 and a.device_id = b.device_id先用左连接,可以得到第一天回答但是第二天没有回答的记录,然后通过count,并且通过和device_id和日期进行去重
select count(id) / (select count(distinct device_id,date) from question_practice_detail) as avg_ret from (SELECT a.device_id as id ,a.date as date1 ,lead(a.date,1) over(partition by device_id order by a.date) as date2 from (select distinct device_id,date from question_practice_detail) a ) b where DATEDIFF(date2,date1) =1
思路:表的自联结
SELECT COUNT(b.device_id)/ COUNT(a.device_id) FROM( SELECT DISTINCT device_id,date FROM question_practice_detail ) as a LEFT JOIN ( SELECT DISTINCT device_id,DATE_SUB(date,interval 1 day) as date FROM question_practice_detail ) as b ON a.date=b.date AND a.device_id=b.device_id
SELECT COUNT(q2.device_id) / COUNT(q1.device_id) AS avg_ret FROM (SELECT DISTINCT device_id, date FROM question_practice_detail)as q1 LEFT JOIN (SELECT DISTINCT device_id, date FROM question_practice_detail) AS q2 ON q1.device_id = q2.device_id AND q2.date = DATE_ADD(q1.date, interval 1 day)
select count(date2)/count(date) as avg_ret from
(select distinct device_id,date from question_practice_detail) a left join
(select distinct device_id,date_sub(date,interval 1 day) date2 from question_practice_detail) b on a.device_id=b.device_id and a.date=b.date2;#只有在满足两个限定条件的情况下,date2才能匹配出数值,不能匹配的情况下,即第二天没有刷题,date2 is null
这道题确实有些难,吐槽一下真的要实现这个功能我宁愿写两个SQL查出来再进行计算,性能差也比花半天时间来写强
select count(distinct q3.date,q3.device_id) from (select q1.device_id,q1.date from question_practice_detail q1 left join question_practice_detail q2 on q1.device_id = q2.device_id where date_add(q1.date,interval 1 day)=q2.date) q3;
select count(DISTINCT date,device_id) from question_practice_detail;
select q5.times/count(DISTINCT q4.date,q4.device_id) avg_ret from question_practice_detail q4, (select count(distinct q3.date,q3.device_id) times from (select q1.device_id,q1.date from question_practice_detail q1 left join question_practice_detail q2 on q1.device_id = q2.device_id where date_add(q1.date,interval 1 day)=q2.date) q3) q5;
select round(count(distinct t1.device_id,t1.date)/count(distinct t.device_id,t.date),4) as avg_ret from question_practice_detail t left join question_practice_detail t1 on t.device_id = t1.device_id and datediff(t1.date,t.date)=1
这个题看题目是次日留存的,其实本质是考察表的自联结的。
但易错的点不在这里。
作为老手,第一时间想到的就是自联结。
然后 思路就是使用第二天来的人/除以第一天来的人即可
然后写了count(distint t1.device_id)/count(distint t.device_id)
这么写是因为没有第二天的数据,不会左联到T表,就为null,count(列)
是统计非null的行数。但是这个结果不对。
然后把数据导入到本地调试一下,然后明白了代表一个用户一天的情况,需要把天也加进来,device_id+date
才能标识唯一。
然后就求出正确的答案了。
count本身不能求多列,但是加入了distinct 就把多列变为一个组了,就可以了。
题目:现在运营想要查看用户在某天刷题后第二天还会再来刷题的平均概率。请你取出相应数据
分析:
在开始的时候,我是以人为计量进行计算,所以先进行了关联,最终去重是device_id,计算错误;又想到了①,对(device_id,date)。
初始sql如下:
select (t1.num / t2.num) as avg_ret from ( -- 计算第二天再来的记录数量 select count(distinct a.device_id,a.date) as num from question_practice_detail a left join question_practice_detail b on a.device_id = b.device_id and a.date = date_add(b.date, interval -1 day) where b.device_id is not null ) t1, ( -- 计算总记录数量 select count(distinct device_id, date) as num from question_practice_detail ) t2;
后期可以根据sql 的执行计划继续再次优化
SELECT COUNT(distinct q2.device_id,q2.date)/count(DISTINCT q1.device_id,q1.date) as avg_ret from question_practice_detail as q1 left outer join question_practice_detail as q2 on q1.device_id=q2.device_id and DATEDIFF(q2.date,q1.date)=1
SELECT COUNT(distinct qq.device_id)/COUNT(distinct q.device_id) AS avg_ret FROM question_practice_detail q LEFT JOIN question_practice_detail qq ON qq.device_id=q.device_id AND q.date=qq.date-1