题解 | 内容社区用户活跃度、转化与广告归因分析
内容社区用户活跃度、转化与广告归因分析
https://www.nowcoder.com/practice/e491704f99ed4affb1d42127bf16a4a9
WITH
-- 1. 基础聚合:计算各个时间段的转化数,以及2023年各区域的注册数
temp AS (
SELECT
c.campaign_id,
c.campaign_name,
SUM(CASE WHEN e.event_type = 'signup' AND DATE_FORMAT(e.event_time, '%Y-%m') = '2024-02' THEN 1 ELSE 0 END) AS conv_2024_02,
SUM(CASE WHEN e.event_type = 'signup' AND DATE_FORMAT(e.event_time, '%Y-%m') = '2023-02' THEN 1 ELSE 0 END) AS conv_2023_02,
SUM(CASE WHEN e.event_type = 'signup' AND DATE_FORMAT(e.event_time, '%Y-%m') = '2024-01' THEN 1 ELSE 0 END) AS conv_2024_01,
SUM(CASE WHEN e.event_type = 'signup' AND YEAR(e.event_time) = 2023 THEN 1 ELSE 0 END) AS total_2023_signup,
SUM(CASE WHEN e.event_type = 'signup' AND YEAR(e.event_time) = 2023 AND u.region = '华北' THEN 1 ELSE 0 END) AS north_2023,
SUM(CASE WHEN e.event_type = 'signup' AND YEAR(e.event_time) = 2023 AND u.region = '华东' THEN 1 ELSE 0 END) AS east_2023,
SUM(CASE WHEN e.event_type = 'signup' AND YEAR(e.event_time) = 2023 AND u.region = '华南' THEN 1 ELSE 0 END) AS south_2023
FROM Campaigns c
LEFT JOIN Events e ON c.campaign_id = e.campaign_id
LEFT JOIN Users u ON e.user_id = u.user_id
GROUP BY c.campaign_id, c.campaign_name
),
-- 2.1 筛选出2024-02所有的注册事件
temp1 as (
select
e.user_id,
e.event_id,
e.campaign_id,
e.event_time as sign_time,
e.channel as sign_channel
from Events e
where e.event_type = 'signup' and DATE_FORMAT(event_time, '%Y-%m') = '2024-02'
),
-- 2.2 在已经筛选好的结果里面找到所有小于注册时间的点击时间
temp2 as (
select
tp1.event_id,
tp1.campaign_id,
tp1.sign_time,
tp1.sign_channel,
e1.event_type AS click_type,
e1.event_time AS click_time,
e1.channel AS click_channel,
ROW_NUMBER() OVER(PARTITION BY tp1.event_id ORDER BY e1.event_time DESC) AS rn
from temp1 tp1
left join Events e1
on e1.user_id = tp1.user_id
and e1.campaign_id = tp1.campaign_id
and e1.event_type = 'click'
and e1.event_time <= tp1.sign_time
),
-- 2.3 统计出最近的点击(处理归因)
temp3 as (
select
tp2.campaign_id,
tp2.sign_time,
tp2.click_time,
coalesce(tp2.click_channel, tp2.sign_channel) as final_channel
from temp2 tp2
where tp2.rn = 1
),
-- 2.4 计算从点击到注册的平均时长
temp4 as (
select
tp3.campaign_id,
round(avg(timestampdiff(minute, tp3.click_time, tp3.sign_time)), 2) as avg_click_to_signup_min_2024_02
from temp3 tp3
where click_time is not null
group by tp3.campaign_id
),
-- 2.5 统计最多的渠道
temp5 as (
select
campaign_id,
final_channel,
row_number() over(
partition by campaign_id
order by count(*) desc,
case final_channel
WHEN 'app' THEN 1
WHEN 'mini' THEN 2
WHEN 'web' THEN 3
ELSE 4
end asc
) as rnk
from temp3
where final_channel is not null
GROUP BY campaign_id, final_channel
),
-- 2.6 得出最终结果
temp6 as (
select
campaign_id,
final_channel as top_channel_2024_02
from temp5
where rnk = 1
)
-- 5. 最终结果合并
SELECT
tp.campaign_id,
tp.campaign_name,
tp.conv_2023_02,
tp.conv_2024_02,
tp.conv_2024_01,
(tp.conv_2024_02 - tp.conv_2023_02) AS yoy_delta,
(tp.conv_2024_02 - tp.conv_2024_01) AS mom_delta,
IF(tp.total_2023_signup = 0, 0.00, ROUND(tp.north_2023 / tp.total_2023_signup * 100, 2)) AS north_pct_2023,
IF(tp.total_2023_signup = 0, 0.00, ROUND(tp.south_2023 / tp.total_2023_signup * 100, 2)) AS south_pct_2023,
IF(tp.total_2023_signup = 0, 0.00, ROUND(tp.east_2023 / tp.total_2023_signup * 100, 2)) AS east_pct_2023,
tp4.avg_click_to_signup_min_2024_02,
tp6.top_channel_2024_02
FROM temp tp
LEFT JOIN temp4 tp4 ON tp.campaign_id = tp4.campaign_id
LEFT JOIN temp6 tp6 ON tp.campaign_id = tp6.campaign_id
ORDER BY tp.campaign_id ASC, tp.campaign_name ASC;

查看1道真题和解析