首页 > 试题广场 >

计算用户的平均次日留存率

[编程题]计算用户的平均次日留存率
  • 热度指数:346684 时间限制:C/C++ 1秒,其他语言2秒 空间限制:C/C++ 256M,其他语言512M
  • 算法知识视频讲解
题目:现在运营想要查看用户在某天刷题后第二天还会再来刷题的留存率。请你取出相应数据。

示例:question_practice_detail
id device_id question_id result date
1 2138 111 wrong 2021-05-03
2 3214 112 wrong 2021-05-09
3 3214 113 wrong 2021-06-15
4 6543 111 right 2021-08-13
5 2315 115 right 2021-08-13
6 2315 116 right 2021-08-14
7 2315 117 wrong 2021-08-15
8 3214 112 wrong 2021-05-09
9 3214 113 wrong 2021-08-15
10 6543 111 right 2021-08-13
11 2315 115 right 2021-08-13
12 2315 116 right 2021-08-14
13 2315 117 wrong 2021-08-15
14 3214 112 wrong 2021-08-16
15 3214 113 wrong 2021-08-18
16 6543 111 right 2021-08-13

根据示例,你的查询应返回以下结果:
avg_ret
0.3000
示例1

输入

drop table if  exists `question_practice_detail`;
CREATE TABLE `question_practice_detail` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL,
`date` date NOT NULL
);

INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');

输出

avg_ret
0.3000
WITH a AS 
(   SELECT device_id , date
    FROM question_practice_detail
    GROUP BY device_id,date
    )
SELECT
(SELECT COUNT(*) FROM a a1
JOIN a a2
ON a1.device_id = a2.device_id AND a1.date + 1 = a2.date)
/
(SELECT COUNT(*) FROM a ) AS avg_ret;
思路为:
1.通过对device_id与date进行分组得到除去掉同一用户同一天其他的答题记录,因为有的用户在同一天答过多个题目
2.通过1得出来的表进行自连接,条件为当设备id相同时date加1天仍能有date与之匹配,查询出第二天再次刷题的条数,进行聚合统计总数得出分子
3.通过1进行聚合统计出一共有多少次有效刷题次数
4.在通过select查询分子除以分母得到结果
发表于 2025-07-18 23:10:48 回复(0)
select count(if(datediff(c.date2,c.date1)=1,1,null ))/count(c.date1) as avg_ret
from(
SELECT
        a.device_id as id
        ,a.date as date1 
        ,lead(a.date) over(partition by device_id order by a.date) as date2
from 
    (select distinct device_id,date from question_practice_detail) a
)c

简洁明了
发表于 2025-07-18 19:25:37 回复(0)
WITH 
-- 获取每个用户每天的刷题记录(去重)
user_daily_practice AS (
    SELECT DISTINCT 
        device_id,
        date
    FROM question_practice_detail
),

-- 计算每个用户连续两天刷题的情况 (去重)
retention_data AS (
    SELECT 
        distinct a.device_id,
        a.date AS first_day,
        b.date AS next_day
    FROM user_daily_practice a
    LEFT JOIN user_daily_practice b 
        ON a.device_id = b.device_id 
        AND b.date = DATE_ADD(a.date, INTERVAL 1 DAY)
)

-- 计算次日留存率
SELECT 
    ROUND(
        COUNT(next_day) / COUNT(first_day),
        4
    ) AS avg_ret
FROM retention_data;

发表于 2025-07-17 11:59:25 回复(0)
自外连接
SELECT 
        count(date),
        count(next_date), 
        count(next_date)/count(date) AS avg_ret
FROM (SELECT DISTINCT 
                    qpd.device_id,
                    qpd.date,
                    nextqpd.date AS next_date
            FROM question_practice_detail qpd
            LEFT JOIN question_practice_detail nextqpd 
            ON qpd.device_id = nextqpd.device_id AND 
                 DATE_ADD(qpd.date, INTERVAL 1 DAY) = nextqpd.date)  AS id_date_nextdate
发表于 2025-07-16 23:07:43 回复(1)
select
round(count(d2.date)/count(d1.date),4) as avg_ret
from (
select
distinct device_id,
date
from question_practice_detail
) as d1
left join (
select
distinct device_id,
date
from question_practice_detail
) as d2
on d1.device_id = d2.device_id
and d2.date = date_add(d1.date, interval 1 day)

发表于 2025-06-28 16:47:06 回复(0)
select
    count(date2) / count(date1) avg_ret
from (
    select
       distinct q1.device_id, q1.date date1, q2.date date2
    from
        (select distinct q.device_id, q.date from question_practice_detail q) q1
    left join question_practice_detail q2 on q1.device_id = q2.device_id and date_add(q1.date, interval 1 day ) = q2.date
) t

发表于 2025-06-28 14:03:27 回复(0)
我觉得关键是将device_id,date 唯一性,再加上窗口函数的运用
select sum(date_count)/count(device_id) as avg_ret
from(select 
device_id,
date,
if(lag(date,1)over(partition by device_id order by date) = date-1,1,0) as date_count
from question_practice_detail
group by device_id,date) f


发表于 2025-06-21 21:51:06 回复(0)
感觉主要是要确定‘留存率’的计算方法:连续两天有答题的用户数/只有一天答题的用户数

select count(case when daka ='1' then '1' end)/(count(device_id)-count(case when daka=0 then '1' end))  avg_ret

from (

select device_id,

date,

lag(date,1)over(partition by device_id order by date asc) b,

date-lag(date,1)over(partition by device_id order by date asc) daka

from question_practice_detail

) a

不知道这段代码是否正确(结果正确),有无大佬指导下
发表于 2025-06-13 16:10:39 回复(0)
with a as
(select distinct device_id,date from question_practice_detail order by date),
b as(select device_id,date,lead(date,1) over(partition by device_id order by date) date2 from a)
select round(count(case when date+1=date2 then 1 else null end)/count(device_id),4) avg_ret from b;
发表于 2025-06-11 23:11:40 回复(2)
with
    user_answer_detail as ( -- 对原表进行去重处理, 只保留每个用户每天的活跃记录
        select
            device_id,
            date
        from
            question_practice_detail
        group by
            device_id,
            date
    ),
    active_at_day1 as ( -- 创建标识列,检查每个用户在注册后第1天是否活跃
        select
            r.device_id,
            r.date,
            case
                when exists (
                    select
                        1
                    from
                        user_answer_detail a
                    where
                        a.device_id = r.device_id -- 相关子查询,条件1为确保是同一个用户
                        and a.date = DATE_ADD(r.date, interval 1 day) -- 条件2为第二天也有记录
                ) then 1 -- 满足条件记为1
                else 0 -- 否则记为0,方便后续计算
            end as is_active_at_day1
        from
            user_answer_detail r
    )
select
    round(sum(is_active_at_day1)/count(1),4) avg_ret -- 进行计算:标识列总和/记录总数(上表已去重完毕)
from
    active_at_day1;

一小时成果,个人认为是不使用join的最易读易懂的一版

发表于 2025-06-11 19:24:04 回复(0)
#自连接
SELECT count(t2.date)/count(t1.date) AS avg_ret
FROM(
    SELECT distinct
    device_id,date
FROM
    question_practice_detail
)t1
LEFT JOIN
(
    SELECT distinct
    device_id,date
FROM
    question_practice_detail
)t2
ON t1.device_id=t2.device_id AND DATEDIFF(t1.date,t2.date)=1  
个人认为最简洁的代码 使用DATEDIFF优化了一下
发表于 2025-06-04 15:32:08 回复(0)
select
    count(t1.device_id) / (select count(distinct device_id, date) from question_practice_detail) as avg_ret
from
    (select distinct device_id, date_add(date, interval1day) as next_date
    from question_practice_detail
    ) t1
inner join
    (select distinct device_id, date from question_practice_detail) t2 on t1.device_id = t2.device_id and t1.next_date = t2.date
;
一种利用内连接筛选的用户平均次留计算,可简单推广至任意天留存计算
发表于 2025-05-26 14:56:11 回复(0)
select count(if(datediff(b.date,a.date)=1,b.date,null))/count(if(datediff(b.date,a.date)=0,a.date,null)) avg_ret
from (select device_id,date
from question_practice_detail
group by device_id,date) a,
(select device_id,date
from question_practice_detail
group by device_id,date) b
where a.device_id=b.device_id and a.date<=b.date;
模仿了戴戴师兄的代码
发表于 2025-05-09 22:04:37 回复(0)
with qpd as (select * from question_practice_detail),
a as (select distinct t1.device_id, t1.date
from question_practice_detail as t1
join question_practice_detail as t2
on t1.device_id = t2.device_id and datediff(t2.date, t1.date) = 1)

select count(distinct a.device_id, a.date) /count(distinct qpd.device_id, qpd.date) as avg_ret
from a, qpd
发表于 2025-04-28 20:08:55 回复(0)
我有不理解的地方是,假如一个用户连续两天来刷题,计算结果是50%,但是我理解是100%,请哪位帮我理一下概念和思路

with tmp as( select device_id ,date from question_practice_detail group by 1, 2 ) select count(b.date) / count(a.date) from tmp a left join tmp b on a.device_id = b.device_id and a.date = date_sub(b.date, interval 1 day) 


发表于 2025-04-27 16:19:38 回复(0)
select sum(case when date_add(date, interval 1 day) = tom then 1 else 0 end)/count(*) avg_ret
from (
select device_id, date,
lead(date, 1) over(partition by device_id order by date) tom
from question_practice_detail
group by device_id, date
) ad
首先创建子表:通过对device_id, date聚合,找到每人刷题日期;然后使用窗口函数使日期列向下偏移1,获得此人第二次刷题的日期tom
然后判断子表中date与tom是否相差1天,如果是记为1,最后汇总除以总天数
发表于 2025-04-26 18:39:57 回复(0)
SELECT ROUND(SUM(IF(date_diff = 1, 1, 0)) / COUNT(*), 4) AS avg_ret
FROM(
    # 计算时间差
    SELECT device_id, date, DATEDIFF(date, LAG(date, 1, NULL) OVER(PARTITION BY device_id ORDER BY      date)) AS date_diff
    FROM (
        # 对每天的答题记录进行合并
        SELECT device_id, date
        FROM question_practice_detail
        GROUP BY device_id, date
    )t
)t
;
发表于 2025-04-09 14:05:34 回复(0)
SELECT
    (
        SELECT
            COUNT(DISTINCT device_id, q1.date)
        FROM
            question_practice_detail q1
            LEFT JOIN question_practice_detail q2 USING (device_id)
        WHERE
            DATEDIFF (q2.date, q1.date) = 1
    ) / (
        SELECT
            COUNT(DISTINCT device_id, date)
        FROM
            question_practice_detail
    ) AS avg_retd

无敌了这题目,题干就那么一点,什么说明也没有,果然题干越少的题目越难。
这题目关键问题是难理解,所以需要翻译一下题目的要求,翻译出来应该就好多了。
1. 分母:即用户在某一天刷题的次数,与刷题的题目(question_id)无关。换句话说,我们只需要统计去重后的device_id和date即可得到所有用户的刷题记录。计数结果即为分母。
2. 分子:在分母结果的基础上,寻找对于同一用户,后一天也有刷题记录的记录。例如,2315在8.14的后一天(即)8.15有刷题记录,则2315在8.14的刷题记录应该保留下来。计数结果即为分子。
发表于 2025-04-08 00:21:18 回复(0)