SELECT
TRIM(REGEXP_replace(method, '\s+', ' ')) AS method,
COUNT(*) as number_of_breaches,
SUM(
CASE
WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
ELSE 0
END
) AS total_records_lost,
AVG(
CASE
WHEN data_sensitivity IN ('1', '2', '3', '4', '5') THEN CAST(data_sensitivity AS INTEGER)
ELSE NULL
END
)*10 AS average_sensitivity
FROM
@kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
GROUP BY
TRIM(REGEXP_replace(method, '\s+', ' '))
ORDER BY
total_records_lost DESC
LIMIT 5;