WITH breaches_per_sector AS (
SELECT
sector,
COUNT(*) as number_of_breaches,
SUM(
CASE
WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
ELSE 0
END
) AS total_records_lost
FROM
@kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
GROUP BY
sector
),
top_methods AS (
SELECT
sector,
method,
COUNT(*) as method_count,
ROW_NUMBER() OVER (PARTITION BY sector ORDER BY COUNT(*) DESC) as rank
FROM (
SELECT
sector,
TRIM(REGEXP_replace(method, '\s+', ' ')) AS method
FROM
@kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
) sub
GROUP BY
sector, method
)
SELECT
b.sector,
b.number_of_breaches,
b.total_records_lost,
tm.method as top_method
FROM
breaches_per_sector b
LEFT JOIN
top_methods tm ON b.sector = tm.sector AND tm.rank = 1
ORDER BY
b.number_of_breaches DESC
LIMIT 7;