This document discusses using Amazon Redshift for analytics workloads related to advertising technology. It provides examples of SQL queries for tasks like sessionization, attribution, overlap analysis, and ad-hoc queries. It also discusses best practices for data modeling, ETL processes, and optimizing workload performance and costs in Redshift.
26. --Number of ads seen per user
WITH frequency_intermediateAS (
SELECT user_id ,
SUM(1)AS impression_count,
SUM(cost)AS cost ,
SUM(revenue)AS revenue
FROM impressions
WHERE record_dateBETWEEN <...>
GROUP BY 1
)
--Number of people who saw N ads
SELECT impression_count, SUM(1), SUM(cost), SUM(revenue)
FROM frequency_intermediate
GROUP BY 1;
30. WITH user_frequencyAS (
SELECT user_id, campaign_id, site_id,
SUM(impression_count)AS frequency,
SUM(cost)AScost ,
SUM(revenue)AS revenue
FROM frequency_intermediate
WHERE record_dateBETWEEN <...>
GROUP BY 1,2,3
)
SELECT campaign_id, site_id, frequency,
SUM(1), SUM(cost), SUM(revenue)
FROM user_frequency
GROUP BY 1,2,3;
31.
32.
33.
34.
35.
36. --Basic sessionization query, assemble user activity
--that ended in a conversion into a timeline.
SELECT <...>
FROM impressions i
JOIN conversions cON
i.user_id =c.user_id AND
i.record_date <c.record_date
ORDER BY i.record_date;
48. WITH co_occurencesAS (
SELECT
oi.site_idAS site1 ,
oi2.site_id AS site2
FROM overlap_intermediate oi
JOIN overlap_intermediate oi2 ON
oi.site_id> oi2.site_id AND
oi.ak_user_id= oi2.ak_user_id
)
SELECT site1, site2, SUM(1)
FROM co_occurences
GROUP BY 1,2;
50. WITH
site_overlap_intermediateAS (
SELECT user_id, site_id, campaign_id
FROM overlap_intermediateWHERE record_dateBETWEEN <...> GROUP BY 1,2,3
),
site_co_occurencesAS (
SELECT oi.campaign_idAS c_id, oi.site_idAS site1,oi2.site_id AS site2
FROM site_overlap_intermediate oi
JOIN site_overlap_intermediate oi2 ON
oi.site_id> oi2.site_idAND
oi.ak_user_id= oi2.ak_user_id AND
oi.campaign_id = oi2.campaign_id
)
SELECT c_id, site1, site2, SUM(1)FROM site_co_occurencesGROUP BY 1,2,3;