Baselight
WITH breaches_per_year AS (
    SELECT 
        year,
        COUNT(*) as number_of_breaches
    FROM 
        @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
    GROUP BY 
        year
),
total_records_per_year AS (
    SELECT 
        year,
        SUM(
            CASE 
                WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
                WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
                ELSE 0
            END
        ) AS total_records_lost
    FROM 
        @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
    GROUP BY 
        year
)

SELECT 
    b.year,
    b.number_of_breaches,
    r.total_records_lost
FROM 
    breaches_per_year b
INNER JOIN total_records_per_year r ON b.year = r.year
ORDER BY 
    b.year ASC
LIMIT 19;
yearnumber_of_breachestotal_records_lost
2004192000000
2005344100000
2006550825000
200710150597405
20081976255500
20099245895400
2010139783285
201137201236162
201223221926177
2013351272018579
201419338674396
201525482857030
2016331803040035
2017251234191309
2018412719547878
2019452005221869
202028521127873
2021342329371275
202212540812038

Share link

Anyone who has the link will be able to view this.