Baselight
WITH breaches_per_sector AS (
    SELECT 
        sector,
        COUNT(*) as number_of_breaches,
        SUM(
            CASE 
                WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
                WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
                ELSE 0
            END
        ) AS total_records_lost
    FROM 
        @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
    GROUP BY 
        sector
),
top_methods AS (
    SELECT 
        sector,
        method,
        COUNT(*) as method_count,
        ROW_NUMBER() OVER (PARTITION BY sector ORDER BY COUNT(*) DESC) as rank
    FROM (
        SELECT 
            sector,
            TRIM(REGEXP_replace(method, '\s+', ' ')) AS method
        FROM 
            @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
    ) sub
    GROUP BY 
        sector, method
)

SELECT 
    b.sector,
    b.number_of_breaches,
    b.total_records_lost,
    tm.method as top_method
FROM 
    breaches_per_sector b
LEFT JOIN 
    top_methods tm ON b.sector = tm.sector AND tm.rank = 1
ORDER BY 
    b.number_of_breaches DESC
LIMIT 7;
sectornumber_of_breachestotal_records_losttop_method
web1086145471873hacked
government44896299573hacked
health 43174669929lost device
finance391162050100hacked
retail37900973933hacked
telecoms22873584000hacked
app21553259000hacked

Share link

Anyone who has the link will be able to view this.