相互关注SQL写法(5种不同类型写法)
背景
群友字节面试让写出3种方法,但只写出2种最后面试没过,相互关注是常见一道SQL题,但这次我们加一下挑战难度,通过不同sql方法去解(不考虑时效),且之前用过的方法后续不会再用,比较考察SQL的基本功。
题目
现在有一张表为fans(粉丝表) 里面有两个字段from_user,to_user ,如果两者一致代表from_user关注了to_user
数据
with fans as (
SELECT
'001' as from_user,'002' as to_user
union all
SELECT
'002' as from_user,'001' as to_user
union all
SELECT
'003' as from_user,'001' as to_user
union all
SELECT
'005' as from_user,'001' as to_user
)
解法1:关联(scan2次)
select t1.from_user from fans t1 join fans t2 on t1.from_user=t2.to_user and t1.to_user=t2.from_user ;
from_user |
001 |
002 |
解法2:关联(scan2次)
select u1 from ( select from_user u1,to_user u2 from fans union all select to_user u1,from_user u2 from fans ) a group by u1,u2 having count(1)=2 ;
U1 |
001 |
002 |
解法3:开窗(scan1次),最优,要的就是这个答案
select from_user
from
(
select
from_user,
if(count(fans_user) over (partition by fans_user) > 1, 1,0) as is_fans
from
(
select if(hash(from_user)>hash(to_user),concat(from_user,'-',to_user),concat(to_user,'-',from_user)) fans_user
,from_user
from fans
)
)
where is_fans=1
;
from_user |
001 |
002 |
解法4:炸裂(posexplode),用index去匹配,问题在于数据量级膨胀
select count(*)
,fans2
,fans1
from (
SELECT
t1.fans1 ,
t2.fans2
FROM (
SELECT
CONCAT(from_user,'-',to_user) AS fan1,
CONCAT(to_user,'-',from_user) AS fan2,
from_user
FROM fans
) as temp_table
LATERAL VIEW posexplode(split(temp_table.fan1,'-')) t1 AS fans_index1,fans1
LATERAL VIEW posexplode(split(temp_table.fan2,'-')) t2 AS fans_index2,fans2
WHERE t1.fans_index1 = t2.fans_index2
)
group by fans2
,fans1
having count(*)=2
cnt | fans2 | fans1 |
2 | 002 | 001 |
2 | 001 | 002 |
解法5:集合排序(sort_array),通过拼接collect_list形成list,再去切割排序
SELECT count(sort_array(split(user_fans,'-'))) as cnt
,sort_array(split(user_fans,'-')) as fans_list
from (
SELECT concat_ws(',',COLLECT_LIST(concat(from_user,'-',to_user))) as user_fans
,from_user
FROM fans
group by from_user
) GROUP BY sort_array(split(user_fans,'-'))
having count(sort_array(split(user_fans,'-')))=2
cnt | fans_list |
2 | [001,002] |
