WITH breaches_per_year AS (
SELECT
year,
COUNT(*) as number_of_breaches
FROM
@kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
GROUP BY
year
),
total_records_per_year AS (
SELECT
year,
SUM(
CASE
WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
ELSE 0
END
) AS total_records_lost
FROM
@kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
GROUP BY
year
)
SELECT
b.year,
b.number_of_breaches,
r.total_records_lost
FROM
breaches_per_year b
INNER JOIN total_records_per_year r ON b.year = r.year
ORDER BY
b.year ASC
LIMIT 19;