Baselight

Gonohhrea Augmented Dataset

Predicting Antibiotic Resistance Genes

@kaggle.salmaneunus_gonohhrea_augmented_dataset

Loading...
Loading...

About this Dataset

Gonohhrea Augmented Dataset

This dataset has been preprocessed for ML training. First of all, the class imbalance problem has been fixed by applying CTGAN. Then feature selection techniques and PCA have been applied for dimensionality reduction. It contains features as unitigs, which are short strands of DNA, and the dataset records whether the specific strands of genes are present or not in the specific sample. The presence or absence of thousands of unitigs indicates whether the antibiotic will be resistant or susceptible.

Tables

X Train Augmented Cip

@kaggle.salmaneunus_gonohhrea_augmented_dataset.x_train_augmented_cip
  • 12.61 MB
  • 2324 rows
  • 8874 columns
Loading...

CREATE TABLE x_train_augmented_cip (
  "unnamed_0" VARCHAR,
  "acgtttatgccgttatcgatccgatagccggt" BIGINT,
  "catctgcaccctgtcggcactcgccgcctgaaccaccccgtccggacaagg" BIGINT,
  "cacaaaactcgtttcctgctccgaaatgggcga_gtaagggctttcttaaattt_c62900b4" BIGINT,
  "aaatcttccgctttaatccatttgccgtccga" BIGINT,
  "aaacctgccgcagccggcgcatctttgggcggcggtctggac_aacgtggtgta_c280dd17" BIGINT,
  "cccaacacactcgacgtatggatgagccacggc" BIGINT,
  "ttttcaacaaaaaactgcttgtccaacggcaaa" BIGINT,
  "tttcaaccggatagctgaaaaaacaaaaaagttcgata" BIGINT,
  "catcacttcccgctgccgtttttttgtttcagacggcat" BIGINT,
  "aaactgcttgtccaacggcaaaccggcgggcagg_cacatagaaaagcggaaca_023610f0" BIGINT,
  "caggcaccgattaacggcaacgatccctatgctaatg" BIGINT,
  "aagaaaccgttttacccgataagtttctgtgccgaca" BIGINT,
  "agccttgccgtaaatgccttcaagacatgcgg" BIGINT,
  "cgaccaaaacgtgcagggctacattaaaaacctgccgctgcggcata_cggcaa_871cc413" BIGINT,
  "acgattcggatggtttcgagataaaaccgtc" BIGINT,
  "aatgtcaaactggcagaccaacgccacccgaagacaggcgtaccgtttg" BIGINT,
  "aaaaatcatagccgtcatcgggcaactcgtc" BIGINT,
  "aaagacccgtctttgaccaaagaagaactcaccgctttctgccgtac_aaccaa_b98a13d5" BIGINT,
  "caccggcatctgcccccactgcatcaacccgcct" BIGINT,
  "accaaacaactgaccggttgaaaactgccgaaatgccgtctgaaacgtttcaga_a9eacf56" BIGINT,
  "aattggtcggacgttatcattcgggttgttatgtatc" BIGINT,
  "caccctacccctgtaaacatcacgatttgccgg" BIGINT,
  "gtgccggaacagcccgccctgccaaacaaatgccgtctgaaaccggaaaa_gaa_5291ef67" BIGINT,
  "aaaaaaatcctaaagggcgagctgattaaaa" BIGINT,
  "actacatcggtttcctcgccgatggagcggaca" BIGINT,
  "ccatcacggaaaaacccgtaccgttgtagtccgacatttc" BIGINT,
  "cgaaagcaggctttcggcatagacggacgacagccgttcgtcaacccaat_ctg_63ccf7c1" BIGINT,
  "cctttcaaaccgtgcatcagcccgaggctgtggcggac_acatacaaaacgccg_ec082207" BIGINT,
  "gatatattgggaaacgaacggaatcagaatctgcc" BIGINT,
  "cgtctgaagtgatttcgcgccccagccagtc_gtaatcgatgaagaaggcttcg_5f670041" BIGINT,
  "aaccggtgagaaactcgagcgcaccatccgcgtgaaccatcctttgaccttg" BIGINT,
  "acgggggagggagcagattgcggcagatttgg" BIGINT,
  "cagcaaaccgttccgcttcaacgcgcgtacttgggaat" BIGINT,
  "cggcgcagggttgagctacgcgggtaaagccgcagg" BIGINT,
  "atattgtagtgttcgccataacagacctgaaaatcatc" BIGINT,
  "aaaataaaatcacaattatgcgaataaagtttgtataaatttctcctaac" BIGINT,
  "ggttgaaaacacaaaacaggatcaggacgacaacgcggacga" BIGINT,
  "gaaacaggtggacggcacggcggtcaattaccgg_gatttggacaatgacgcgc_4b38204a" BIGINT,
  "acaagacgacaggcggcggaacacgtcgtcg_cgggcagccgaaggtatggccg_e2b627ff" BIGINT,
  "gctccaatcgcgataccggtaaagatgtacgcc" BIGINT,
  "gacacctgcgccgtggtcacctacgcaatgcggcgtgttcactaaaatatcgacttctttg" BIGINT,
  "cccatgtgctgcggttgtccagtttggacggcag" BIGINT,
  "ttcgacttcgcgctcacggggcagaccatgcgccacgaccaccg_tgctgtccg_9d08c234" BIGINT,
  "aattacccgttccgggcaaaatttccgttaccccgaccaaatctctggc_agga_ed92c3e3" BIGINT,
  "ttgcttggtgatttgttcgataacctgctcatcgccgtgggtaacgatggtcatccgtgac" BIGINT,
  "gcctttgcgtccgttgtttttcgtcagggcgg" BIGINT,
  "aaacagttgtacgctttcacccacgggcggcaggttgtagaaggt" BIGINT,
  "taccgccccggacggatccgaagcggcgggcggt" BIGINT,
  "aactttagcaaaactttctgccgaaaaaggctgg_caggacgataaacaaaatg_987c3ef6" BIGINT,
  "caaacatcaccacaatggagttcaagggcgat" BIGINT,
  "aaccgccgcaggattgcctcaaccctatctgtcccgcatcgaaaa" BIGINT,
  "tgaaattaaacgaaaaacaccgctatgccgtgat" BIGINT,
  "acaaaacggcgcagtgccgcgcaaagccaaaatattcgccatcagtatgatgac_36a587b0" BIGINT,
  "gggataccgtatcagtatgggttgggggaatcagg" BIGINT,
  "caaccgccctgccgtcgttgaagcggataaagccgg" BIGINT,
  "tatgacgcaagaacgtttacccgaatttttcgacc_ggatttcgtgtgccagcc_660e0ec5" BIGINT,
  "ttatgcgacgcgccgaaagtatgcgtgacggactg_aggcacgaggctgccgat_e1c0fdc8" BIGINT,
  "caaagccgcaacctcgccatatgcccgttcaaacgtaccgatataa" BIGINT,
  "ggtttgttcgggaacgcgcgaccacctttacgccacagcggagaagaagtcata_3d468fe4" BIGINT,
  "ctgaaagaaggctgtcccaacaaatccgccggatcggtcttg" BIGINT,
  "gaaaaaaaagaaaagaaaaaggaaaaggcggaagcca" BIGINT,
  "tggaaatattcaaatttcctccaataatattaacga" BIGINT,
  "cggatgtcggcaatatcggtgcatttttgccgacgtggccgctgtttgcc_aaa_9620f718" BIGINT,
  "gaacaataacattgtcagcttggcgaagcaggc" BIGINT,
  "taaaaagggcactcatggagctgaccgtccact" BIGINT,
  "gcaagtactgccgcaccgacgaaaaacgggcgcatagggtgggtaa" BIGINT,
  "gccacgctttgcagcacgttttcaaatttcagaagctcggcggcaagc" BIGINT,
  "acgccgcacaaaaaagcggcaataggcaggcaatcagaatgcaccaa" BIGINT,
  "ttctcgccctcggaagtcagcttcagataaacgcgccgt" BIGINT,
  "ttttggattggggaaaaatgcatgaggtggaaattaccgaaaccgatcctttaacgggggaaa" BIGINT,
  "gcggtggaaaccttgtgcgaaggcagccccgaa_agcttggcgccggcgatgct_b95a35f4" BIGINT,
  "gcggaatccactcgtggaactcggtaaattgatagccgccgctcaaacggtcgaaaccg" BIGINT,
  "gcagcaccatttcttttgcagccgccgccgcgtggaacag_ttttattttcttt_770e5101" BIGINT,
  "aacttcagattctacttttgctgcggtttcctgaacttggg_caaatcctgtgc_99d2be32" BIGINT,
  "aatttccgttaccccgaccaaatctctggcgaccgac" BIGINT,
  "gaaaaagaacagacaggctgccaaagccccgaaggaaaatcaaaa" BIGINT,
  "gatgaggacaggttgaaacagtttctcgaacggtaaaaaa" BIGINT,
  "caatatccgatttatttccaacatcacggagagccgtatgaaa" BIGINT,
  "ggcagattccccatttgggttcggaacgcgcgcacgc" BIGINT,
  "caaactgcacgcgccgcccgtccgacctcggcatatccg_acgtgcctgccccg_fe23213c" BIGINT,
  "gatagccttaatttcaaaaggacgattaatggatacacaaa" BIGINT,
  "ctggcgaatgggtactggtacaaaatcagggtcgc" BIGINT,
  "cacaggaacagaccgcctcgggcgcatcctac" BIGINT,
  "agatcgggcgaatgtgctgccaaacccgccgcc" BIGINT,
  "gaacccttcagcgtgatggacttcatccaagccaatccgcgccat_tgcagttc_c61b0941" BIGINT,
  "tacgtgaagaacaaaaagccgcgcagtaggcccgtttgaaaaatgccgtctg" BIGINT,
  "gttcaaggtgttaatcctaacactgaagaagccgacagccgtcaagccag" BIGINT,
  "aatgcgttgtcttgggctacctctggcggcgctggttttaaaggttctcgtaaa_f6ecaa2e" BIGINT,
  "aaatatactaggtatggtactgttttgttagcaattcttcaa" BIGINT,
  "tgtgccacgatattctggggcagaccgcgctggtggatttggca" BIGINT,
  "acatacgggccgcgcccatggtcttcacccg" BIGINT,
  "tatggtcgaagaacacctgccgctcatcacgcagcag" BIGINT,
  "accaaccggtatcgatttaaccgatttccttaatatt_cacactcaaggacaac_6ca228b5" BIGINT,
  "cgaggctgataaattcataatgagccttacgcgcctcgcggtaaagctgctgca_e67b057e" BIGINT,
  "ggcagcatcagcaggcgcagcaccaaagcccgccccgg" BIGINT,
  "tccgccgtgtccaaacccaaagccagcccggcattgttgatgag" BIGINT,
  "taaccctgttttattttcccattctcgttgcg_acaaaaaaccgctttgtgtaa_1e6267b3" BIGINT,
  "gttaccgcgcccaaaatggtggaggggtcgcaaccgagtacgacggcgacgggatacggcg" BIGINT,
  "gatttgcgcggcagatgcaaaaccataggcgcgctgaccg" BIGINT
);

X Train Augmented Azm

@kaggle.salmaneunus_gonohhrea_augmented_dataset.x_train_augmented_azm
  • 698.56 KB
  • 4242 rows
  • 516 columns
Loading...

CREATE TABLE x_train_augmented_azm (
  "unnamed_0" VARCHAR,
  "cttaacatatttgcctttgatttttgaagaagctgccacgccggcag" BIGINT,
  "taccgtaaccggcaatgcggatattacggtc" BIGINT,
  "cagacggcattttttttgcgtttttcgggagg" BIGINT,
  "aacgggttttcagacggcattcgatatcgggacg" BIGINT,
  "ccaaaaattacccgcgttgacgtagctaaaga" BIGINT,
  "cggaccggtattccgtcgaaatcaccgccgtcaaccgcccc" BIGINT,
  "tgaaattgtccatctcgtatgccgtcttctgcttg" BIGINT,
  "tacggtattgtccgcattattaaactcaaaacc_agaagacggcatacgagatg_42cccb6f" BIGINT,
  "ggcattttttttgcgtttttcgggagggggcggc" BIGINT,
  "tatataaggggttgccgttccgcagttgggcggcagcatac" BIGINT,
  "tggtaatgccgggtgagaacgtaaccattactgtagaactgattgcg" BIGINT,
  "acgctttgaacatatttgcctttgatttcgg" BIGINT,
  "ttatgaacaaaccattggtgaatcaggctgctatggt" BIGINT,
  "acggcgacggcagcggcgacggcgacggcaacggca" BIGINT,
  "cgcatgggcaagcaggtcgagatattcgccg" BIGINT,
  "cctggcaaacgcttccccgtcgccctcgaaca" BIGINT,
  "gtctgatttcacaagtcttgtgaagtcttacctgccttaccgtccaacatccgccgcagcc" BIGINT,
  "agcttggatatgtccaatcctacagtgttacg" BIGINT,
  "ccactatgctgacacccgaacaagtcaaggcc" BIGINT,
  "tgccgtcttctgcttgaaaaaaaaaaagcac" BIGINT,
  "ataaagatgagataacggctttgattaatagtcttaaataaaggggc" BIGINT,
  "cagtttgaagtagcgcgcggcggcaacggcgtaaatcagtgcctg" BIGINT,
  "gaagatgcaatctacccgctgctagacggaaagaccc" BIGINT,
  "aacggcacattccacgccgtttcttctctcg" BIGINT,
  "gtcgtaatcaccgccctgccgaccgccatggcctcctggg" BIGINT,
  "aaggctttcatcattcgcttctcggtcgctgcgcatacccg" BIGINT,
  "caaacaaactggggatattatatgcatatcct" BIGINT,
  "taccggtttaaatttaattcactatacatcccgt" BIGINT,
  "accgtaatatccgcattgccggttacggtat" BIGINT,
  "aaaatggaaagatatgcatataatatccccg" BIGINT,
  "ctgcccttggctttggcgcgttatttgccgcacaagcggctcaaagc" BIGINT,
  "gtgaagggtcggcaggtttgaccgtaatatccgc" BIGINT,
  "acacaaccgccttccggccatgccggcggggaaacaaggcgcaaacacgcggggcgggacg" BIGINT,
  "gaaaccctcctgaccgacaaccgcatctggaaacag" BIGINT,
  "acgccgacgacgccgtcaaagacgacgccga" BIGINT,
  "gacgcagcagctacggtttatcctgacggatatccgc" BIGINT,
  "aaatgtcagcttggatatgtccaatcctacagt" BIGINT,
  "actatgctgacacccgaacaagtcaaggccc" BIGINT,
  "aatatccgcattgccggttacggtattgtcc" BIGINT,
  "aaatacgcaaagcacctatcgcgaaagcgacagcatccg" BIGINT,
  "atgcatataatatccccagtttgtttgttga" BIGINT,
  "aatgccctctccccggcccgccattgccgcgcaggcg" BIGINT,
  "tcttctgtcgtatagacgaggttttgattac" BIGINT,
  "tggaaagaaagaataataaaactcctggcgtcgaccagcttaacataaaatacc" BIGINT,
  "tcccgaaaaacgcaaaaaaaatgccgtctgaagacctttcggac" BIGINT,
  "aaaccggtacggcgttgcctcgccttttcgtactat" BIGINT,
  "aaaacctcgtctatacgacagaagaccaccgtattt" BIGINT,
  "aagccgagagaagaaacggcgtggaatgtgcc" BIGINT,
  "gcccgtcccgatatcgaatgccgtctgaaaacc" BIGINT,
  "cccgcgataaatgctatttcagtcaatcgtac" BIGINT,
  "gattatccggagcgaccgtaacgaaccaaacaaaaacc" BIGINT,
  "ccgcctgcgcggcaatggcgggccggggaga" BIGINT,
  "atggtaatgccgggtgagaacgtaaccatta_aagcagttgttttaacaggaca_069e0b31" BIGINT,
  "ccaaaagacggaaaatcaggaaaacggcagcttc" BIGINT,
  "tgatgaaagccgggccatggcattctccgaaatccaaaaa" BIGINT,
  "cggataatcagccctttcgggcgcgaatgctgtgaag" BIGINT,
  "gcaacttggagaatgccctctccccggcccgcc" BIGINT,
  "ggagaatgccctctccccggcccgccattgccgc" BIGINT,
  "caatgcggatattacggtcaaacctgccgaccct" BIGINT,
  "ccttttgatgaagtcgggaaatgcccttatc" BIGINT,
  "gcgccaaaatttccgatgtgctgcccgccgccg" BIGINT,
  "cactataccggccggatgaaattgtccatctcgtatgccgtct" BIGINT,
  "ttacgcgcgccgacaacgccgacaacgacgacgt" BIGINT,
  "cgcgccaaaatttccgatgtgctgcccgccg" BIGINT,
  "tatgccgtcttctgcttgaaaaaaaaaaagca" BIGINT,
  "cggatgtatcagaaaggagaataaattgcaacctttagtcagcgt" BIGINT,
  "taacactgtaggattggacatatccaagctgac" BIGINT,
  "atatccgcattgccggttacggtattgtccg" BIGINT,
  "aacggcgtggaatgtgccgttttcctgatgt" BIGINT,
  "aagcaaaacccccgccaaacgccaatctgcccgggggtttcgagatacaacatgagccaat" BIGINT,
  "cgaaaaggcgaggcaacgccgtaccggtttaaat" BIGINT,
  "ccccccccccatcaaatgaatggcagattga" BIGINT,
  "gtcgagatattcgccgcgcaacggcgagaatacgccgaaag" BIGINT,
  "ggtgaacttgcagtttttcaacaaacaaaccggggatattat" BIGINT,
  "gcgcgacaacgccaaagacgccgacgacgccgtcaaagac" BIGINT,
  "acccgccaacccgaccgccgtgattcccgcg" BIGINT,
  "acccccatcgatccgtccagcctgaagcagcagtcggcagg" BIGINT,
  "agggcggcaaaggcgggctttcgtcccccacg" BIGINT,
  "cggcgtcgtctttgacggcgtcgtcggcgtc" BIGINT,
  "tttggggcatgtgtaaatccgtgcttgtgggtaattttaacgctcgacgataatcttcccg" BIGINT,
  "atatccccagtttgtttgttgaaaaactgca" BIGINT,
  "atattccgtgaacataaaagagttgaaaaaaaagaataataaaacttctggcggcg" BIGINT,
  "acaaactggggatattatatgcatatccttc_agcgaatgatgaaagccttaac_6ae905c7" BIGINT,
  "tatatattgcgtaacactgtaggattggacatatc" BIGINT,
  "tccgtcgaaattaccgccgtcaaccgccccttcctgctcgcc_acactataccg_787d6ca3" BIGINT,
  "gcgcgtcaagccaaagccggtcgcattatcaccattgataccaatcctgcaaaa_40657557" BIGINT,
  "gcccagtcgagaacagcaaagcaacgcgggcatgcgtagcgaccgagaagcgaatg" BIGINT,
  "tacggcgaaggcactaaaatggaaagatatgcatataat" BIGINT,
  "aagaaacggcgtggaatgtgccgttttcctga" BIGINT,
  "attaatagtcttaaataaaggggctgtaccggat" BIGINT,
  "taattaaaaacaacatggtaatcaataatgacgatgaacccaccacccaatactacagcc" BIGINT,
  "catggcattctccgaaatccaaaaacggaaa_gcatcgtaccatcccgacagga_e6b61f49" BIGINT,
  "gtctgaaaacccgttgacggcgcatgggcaagcagg" BIGINT,
  "caataccgtaaccggcaatgcggatattacg" BIGINT,
  "caactgcggaacggcaaccccttatatattgcgtaa" BIGINT,
  "atataatatccccagtttgtttgttgaaaaa_gcactaaaatggaaagatatgc_13c7ce18" BIGINT,
  "cgccgacgacgccgtcaaagacgacgccgac" BIGINT,
  "atctcgtatgccgtcttctgcttgaaaaaaa" BIGINT,
  "caacaaaataccatccttttcgaactgaccggaaccggttgcggcgtcgcaaaa_89083650" BIGINT
);

Y Test Augmented Azm

@kaggle.salmaneunus_gonohhrea_augmented_dataset.y_test_augmented_azm
  • 24.51 KB
  • 1820 rows
  • 3 columns
Loading...

CREATE TABLE y_test_augmented_azm (
  "unnamed_0" BIGINT,
  "sample_id" VARCHAR,
  "azm_sr" DOUBLE
);

Y Test Augmented Cip

@kaggle.salmaneunus_gonohhrea_augmented_dataset.y_test_augmented_cip
  • 14.97 KB
  • 996 rows
  • 3 columns
Loading...

CREATE TABLE y_test_augmented_cip (
  "unnamed_0" BIGINT,
  "sample_id" VARCHAR,
  "cip_sr" DOUBLE
);

Share link

Anyone who has the link will be able to view this.