-- Denominator - 1/0 flag for activity on a day
WITH denominator AS (
SELECT
source_data.unit_id,
source_data.date,
exposure_data.group_id,
MAX(1) as denominator
FROM source_data
JOIN exposure_data
ON
-- Only include users who saw the experiment
source_data.unit_id = exposure_data.unit_id
-- Only include data from after the user saw the experiment
-- In this case exposure_data is already deduped to the "first exposure"
AND source_data.timestamp >= exposure_data.timestamp
WHERE <start_filter>
GROUP BY ALL;
),
-- Numerator Candidates - 1/0 flag for success activity on a day
-- Note by default this is equivalent to the denominator CTE
numerator_candidates AS (
SELECT
source_data.unit_id,
source_data.date,
exposure_data.group_id,
MAX(1) as denominator
FROM source_data
JOIN exposure_data
ON
-- Only include users who saw the experiment
source_data.unit_id = exposure_data.unit_id
-- Only include data from after the user saw the experiment
-- In this case exposure_data is already deduped to the "first exposure"
AND source_data.timestamp >= exposure_data.timestamp
WHERE <success_filter>
GROUP BY ALL
),
-- Numerators, deduplicated - 1/0 flag for success per denominator
-- Now we have a 1-0 numerator flag per denominator-day
joined_data AS (
SELECT
denominator.unit_id,
denominator.date,
den.group_id,
1 as denominator
MAX(CASE WHEN numerator_candidates.unit_id IS NOT NULL THEN 1 ELSE 0 END) as numerator
FROM denominator
LEFT JOIN numerator_candidates
ON denominator.unit_id = numerator_candidates.unit_id
AND numerator_candidates.date BETWEEN
denominator.date + INTERVAL '<END - (LENGTH - 1)>' DAY
AND denominator.date + INTERVAL '<LENGTH>' DAY
GROUP BY ALL
)
-- Group Level
SELECT
group_id,
SUM(denominator) as unit_days_started,
SUM(numerator) as unit_days_completed,
SUM(numerator)/SUM(denominator) as mean
FROM joined_data
GROUP BY ALL