Microsoft Malware Sample
Vectorized malware byte-files.
@kaggle.dheemanthbhat_microsoft_malware_sample
Vectorized malware byte-files.
@kaggle.dheemanthbhat_microsoft_malware_sample
This dataset contains vectorized byte-files taken from the original dataset of Microsoft Malware Classification Challenge (BIG 2015) competition. Original dataset belongs to http://arxiv.org/abs/1802.10135.
Original Train and Test dataset are ~18GB each. This random sample extracted and vectorized is just ~15MB is size.
CountVectorizer.Note: Original dataset contains only 42 byte-files for malware class 5 (Simda).
CREATE TABLE test_vec (
"id" VARCHAR,
"bytfsize" BIGINT,
"n_00" BIGINT -- 00,
"n_01" BIGINT -- 01,
"n_02" BIGINT -- 02,
"n_03" BIGINT -- 03,
"n_04" BIGINT -- 04,
"n_05" BIGINT -- 05,
"n_06" BIGINT -- 06,
"n_07" BIGINT -- 07,
"n_08" BIGINT -- 08,
"n_09" BIGINT -- 09,
"n_0a" BIGINT -- 0a,
"n_0b" BIGINT -- 0b,
"n_0c" BIGINT -- 0c,
"n_0d" BIGINT -- 0d,
"n_0e" BIGINT -- 0e,
"n_0f" BIGINT -- 0f,
"n_10" BIGINT -- 10,
"n_11" BIGINT -- 11,
"n_12" BIGINT -- 12,
"n_13" BIGINT -- 13,
"n_14" BIGINT -- 14,
"n_15" BIGINT -- 15,
"n_16" BIGINT -- 16,
"n_17" BIGINT -- 17,
"n_18" BIGINT -- 18,
"n_19" BIGINT -- 19,
"n_1a" BIGINT -- 1a,
"n_1b" BIGINT -- 1b,
"n_1c" BIGINT -- 1c,
"n_1d" BIGINT -- 1d,
"n_1e" BIGINT -- 1e,
"n_1f" BIGINT -- 1f,
"n_20" BIGINT -- 20,
"n_21" BIGINT -- 21,
"n_22" BIGINT -- 22,
"n_23" BIGINT -- 23,
"n_24" BIGINT -- 24,
"n_25" BIGINT -- 25,
"n_26" BIGINT -- 26,
"n_27" BIGINT -- 27,
"n_28" BIGINT -- 28,
"n_29" BIGINT -- 29,
"n_2a" BIGINT -- 2a,
"n_2b" BIGINT -- 2b,
"n_2c" BIGINT -- 2c,
"n_2d" BIGINT -- 2d,
"n_2e" BIGINT -- 2e,
"n_2f" BIGINT -- 2f,
"n_30" BIGINT -- 30,
"n_31" BIGINT -- 31,
"n_32" BIGINT -- 32,
"n_33" BIGINT -- 33,
"n_34" BIGINT -- 34,
"n_35" BIGINT -- 35,
"n_36" BIGINT -- 36,
"n_37" BIGINT -- 37,
"n_38" BIGINT -- 38,
"n_39" BIGINT -- 39,
"n_3a" BIGINT -- 3a,
"n_3b" BIGINT -- 3b,
"n_3c" BIGINT -- 3c,
"n_3d" BIGINT -- 3d,
"n_3e" BIGINT -- 3e,
"n_3f" BIGINT -- 3f,
"n_40" BIGINT -- 40,
"n_41" BIGINT -- 41,
"n_42" BIGINT -- 42,
"n_43" BIGINT -- 43,
"n_44" BIGINT -- 44,
"n_45" BIGINT -- 45,
"n_46" BIGINT -- 46,
"n_47" BIGINT -- 47,
"n_48" BIGINT -- 48,
"n_49" BIGINT -- 49,
"n_4a" BIGINT -- 4a,
"n_4b" BIGINT -- 4b,
"n_4c" BIGINT -- 4c,
"n_4d" BIGINT -- 4d,
"n_4e" BIGINT -- 4e,
"n_4f" BIGINT -- 4f,
"n_50" BIGINT -- 50,
"n_51" BIGINT -- 51,
"n_52" BIGINT -- 52,
"n_53" BIGINT -- 53,
"n_54" BIGINT -- 54,
"n_55" BIGINT -- 55,
"n_56" BIGINT -- 56,
"n_57" BIGINT -- 57,
"n_58" BIGINT -- 58,
"n_59" BIGINT -- 59,
"n_5a" BIGINT -- 5a,
"n_5b" BIGINT -- 5b,
"n_5c" BIGINT -- 5c,
"n_5d" BIGINT -- 5d,
"n_5e" BIGINT -- 5e,
"n_5f" BIGINT -- 5f,
"n_60" BIGINT -- 60,
"n_61" BIGINT -- 61
);CREATE TABLE trainlabels_bal (
"id" VARCHAR,
"class" BIGINT
);CREATE TABLE train_vec (
"id" VARCHAR,
"bytfsize" BIGINT,
"n_00" BIGINT -- 00,
"n_01" BIGINT -- 01,
"n_02" BIGINT -- 02,
"n_03" BIGINT -- 03,
"n_04" BIGINT -- 04,
"n_05" BIGINT -- 05,
"n_06" BIGINT -- 06,
"n_07" BIGINT -- 07,
"n_08" BIGINT -- 08,
"n_09" BIGINT -- 09,
"n_0a" BIGINT -- 0a,
"n_0b" BIGINT -- 0b,
"n_0c" BIGINT -- 0c,
"n_0d" BIGINT -- 0d,
"n_0e" BIGINT -- 0e,
"n_0f" BIGINT -- 0f,
"n_10" BIGINT -- 10,
"n_11" BIGINT -- 11,
"n_12" BIGINT -- 12,
"n_13" BIGINT -- 13,
"n_14" BIGINT -- 14,
"n_15" BIGINT -- 15,
"n_16" BIGINT -- 16,
"n_17" BIGINT -- 17,
"n_18" BIGINT -- 18,
"n_19" BIGINT -- 19,
"n_1a" BIGINT -- 1a,
"n_1b" BIGINT -- 1b,
"n_1c" BIGINT -- 1c,
"n_1d" BIGINT -- 1d,
"n_1e" BIGINT -- 1e,
"n_1f" BIGINT -- 1f,
"n_20" BIGINT -- 20,
"n_21" BIGINT -- 21,
"n_22" BIGINT -- 22,
"n_23" BIGINT -- 23,
"n_24" BIGINT -- 24,
"n_25" BIGINT -- 25,
"n_26" BIGINT -- 26,
"n_27" BIGINT -- 27,
"n_28" BIGINT -- 28,
"n_29" BIGINT -- 29,
"n_2a" BIGINT -- 2a,
"n_2b" BIGINT -- 2b,
"n_2c" BIGINT -- 2c,
"n_2d" BIGINT -- 2d,
"n_2e" BIGINT -- 2e,
"n_2f" BIGINT -- 2f,
"n_30" BIGINT -- 30,
"n_31" BIGINT -- 31,
"n_32" BIGINT -- 32,
"n_33" BIGINT -- 33,
"n_34" BIGINT -- 34,
"n_35" BIGINT -- 35,
"n_36" BIGINT -- 36,
"n_37" BIGINT -- 37,
"n_38" BIGINT -- 38,
"n_39" BIGINT -- 39,
"n_3a" BIGINT -- 3a,
"n_3b" BIGINT -- 3b,
"n_3c" BIGINT -- 3c,
"n_3d" BIGINT -- 3d,
"n_3e" BIGINT -- 3e,
"n_3f" BIGINT -- 3f,
"n_40" BIGINT -- 40,
"n_41" BIGINT -- 41,
"n_42" BIGINT -- 42,
"n_43" BIGINT -- 43,
"n_44" BIGINT -- 44,
"n_45" BIGINT -- 45,
"n_46" BIGINT -- 46,
"n_47" BIGINT -- 47,
"n_48" BIGINT -- 48,
"n_49" BIGINT -- 49,
"n_4a" BIGINT -- 4a,
"n_4b" BIGINT -- 4b,
"n_4c" BIGINT -- 4c,
"n_4d" BIGINT -- 4d,
"n_4e" BIGINT -- 4e,
"n_4f" BIGINT -- 4f,
"n_50" BIGINT -- 50,
"n_51" BIGINT -- 51,
"n_52" BIGINT -- 52,
"n_53" BIGINT -- 53,
"n_54" BIGINT -- 54,
"n_55" BIGINT -- 55,
"n_56" BIGINT -- 56,
"n_57" BIGINT -- 57,
"n_58" BIGINT -- 58,
"n_59" BIGINT -- 59,
"n_5a" BIGINT -- 5a,
"n_5b" BIGINT -- 5b,
"n_5c" BIGINT -- 5c,
"n_5d" BIGINT -- 5d,
"n_5e" BIGINT -- 5e,
"n_5f" BIGINT -- 5f,
"n_60" BIGINT -- 60,
"n_61" BIGINT -- 61
);Anyone who has the link will be able to view this.