Loading...Loading chart...
1WITH breaches_per_sector AS (
2    SELECT 
3        sector,
4        COUNT(*) as number_of_breaches,
5        SUM(
6            CASE 
7                WHEN records_lost LIKE '%M' THEN CAST(REPLACE(REPLACE(records_lost, ',', ''), 'M', '') AS INTEGER) * 1000000
8                WHEN records_lost SIMILAR TO '^[0-9,]+$' THEN CAST(REPLACE(records_lost, ',', '') AS INTEGER)
9                ELSE 0
10            END
11        ) AS total_records_lost
12    FROM 
13        @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
14    GROUP BY 
15        sector
16),
17top_methods AS (
18    SELECT 
19        sector,
20        method,
21        COUNT(*) as method_count,
22        ROW_NUMBER() OVER (PARTITION BY sector ORDER BY COUNT(*) DESC) as rank
23    FROM (
24        SELECT 
25            sector,
26            TRIM(REGEXP_replace(method, '\s+', ' ')) AS method
27        FROM 
28            @kaggle.joebeachcapital_worlds_biggest_data_breaches_and_hacks.balloon_race_data_breaches_latest_breaches
29    ) sub
30    GROUP BY 
31        sector, method
32)
33
34SELECT 
35    b.sector,
36    b.number_of_breaches,
37    b.total_records_lost,
38    tm.method as top_method
39FROM 
40    breaches_per_sector b
41LEFT JOIN 
42    top_methods tm ON b.sector = tm.sector AND tm.rank = 1
43ORDER BY 
44    b.number_of_breaches DESC
45LIMIT 7;