Multi-label Classification Of Enzyme Substrates
Multi-label classification
@kaggle.gopalns_ec_mixed_class
Multi-label classification
@kaggle.gopalns_ec_mixed_class
Enzymes are known to act on molecules with structural similarities with their substrates. This behaviour is called promiscuity. Scientists working in drug discovery use this behaviour to target/design drugs to either block or promote biological actions. But, correct prediction of EC class(s) of substrates associated with enzymes has been a challenge in biology. Since there is no shortage of data, ML techniques can be employed to solve the aforementioned problem.
There are 3 files names mixed_(desc, ecfp, fcfp).csv containing chemical, structural, connectivity information.
CREATE TABLE mixed_desc (
  "cids" VARCHAR,
  "maxestateindex" DOUBLE,
  "minestateindex" DOUBLE,
  "maxabsestateindex" DOUBLE,
  "minabsestateindex" DOUBLE,
  "qed" DOUBLE,
  "molwt" DOUBLE,
  "heavyatommolwt" DOUBLE,
  "exactmolwt" DOUBLE,
  "numvalenceelectrons" BIGINT,
  "numradicalelectrons" BIGINT,
  "fpdensitymorgan1" DOUBLE,
  "fpdensitymorgan2" DOUBLE,
  "fpdensitymorgan3" DOUBLE,
  "balabanj" DOUBLE,
  "bertzct" DOUBLE,
  "chi0" DOUBLE,
  "chi0n" DOUBLE,
  "chi0v" DOUBLE,
  "chi1" DOUBLE,
  "chi1n" DOUBLE,
  "chi1v" DOUBLE,
  "chi2n" DOUBLE,
  "chi2v" DOUBLE,
  "chi3n" DOUBLE,
  "chi3v" DOUBLE,
  "chi4n" DOUBLE,
  "chi4v" DOUBLE,
  "hallkieralpha" DOUBLE,
  "ipc" DOUBLE,
  "kappa1" DOUBLE,
  "kappa2" DOUBLE,
  "kappa3" DOUBLE,
  "labuteasa" DOUBLE,
  "peoe_vsa1" DOUBLE,
  "peoe_vsa10" DOUBLE,
  "peoe_vsa11" DOUBLE,
  "peoe_vsa12" DOUBLE,
  "peoe_vsa13" DOUBLE,
  "peoe_vsa14" DOUBLE,
  "peoe_vsa2" DOUBLE,
  "peoe_vsa3" DOUBLE,
  "peoe_vsa4" DOUBLE,
  "peoe_vsa5" DOUBLE,
  "peoe_vsa6" DOUBLE,
  "peoe_vsa7" DOUBLE,
  "peoe_vsa8" DOUBLE,
  "peoe_vsa9" DOUBLE,
  "smr_vsa1" DOUBLE,
  "smr_vsa10" DOUBLE,
  "smr_vsa2" DOUBLE,
  "smr_vsa3" DOUBLE,
  "smr_vsa4" DOUBLE,
  "smr_vsa5" DOUBLE,
  "smr_vsa6" DOUBLE,
  "smr_vsa7" DOUBLE,
  "smr_vsa8" BIGINT,
  "smr_vsa9" DOUBLE,
  "slogp_vsa1" DOUBLE,
  "slogp_vsa10" DOUBLE,
  "slogp_vsa11" DOUBLE,
  "slogp_vsa12" DOUBLE,
  "slogp_vsa2" DOUBLE,
  "slogp_vsa3" DOUBLE,
  "slogp_vsa4" DOUBLE,
  "slogp_vsa5" DOUBLE,
  "slogp_vsa6" DOUBLE,
  "slogp_vsa7" DOUBLE,
  "slogp_vsa8" DOUBLE,
  "slogp_vsa9" BIGINT,
  "tpsa" DOUBLE,
  "estate_vsa1" DOUBLE,
  "estate_vsa10" DOUBLE,
  "estate_vsa11" DOUBLE,
  "estate_vsa2" DOUBLE,
  "estate_vsa3" DOUBLE,
  "estate_vsa4" DOUBLE,
  "estate_vsa5" DOUBLE,
  "estate_vsa6" DOUBLE,
  "estate_vsa7" DOUBLE,
  "estate_vsa8" DOUBLE,
  "estate_vsa9" DOUBLE,
  "vsa_estate1" BIGINT,
  "vsa_estate10" DOUBLE,
  "vsa_estate2" DOUBLE,
  "vsa_estate3" BIGINT,
  "vsa_estate4" BIGINT,
  "vsa_estate5" BIGINT,
  "vsa_estate6" BIGINT,
  "vsa_estate7" BIGINT,
  "vsa_estate8" DOUBLE,
  "vsa_estate9" DOUBLE,
  "fractioncsp3" DOUBLE,
  "heavyatomcount" BIGINT,
  "nhohcount" BIGINT,
  "nocount" BIGINT,
  "numaliphaticcarbocycles" BIGINT,
  "numaliphaticheterocycles" BIGINT,
  "numaliphaticrings" BIGINT,
  "numaromaticcarbocycles" BIGINT
);CREATE TABLE mixed_ecfp (
  "cids" VARCHAR,
  "m1" BIGINT,
  "m2" BIGINT,
  "m3" BIGINT,
  "m4" BIGINT,
  "m5" BIGINT,
  "m6" BIGINT,
  "m7" BIGINT,
  "m8" BIGINT,
  "m9" BIGINT,
  "m10" BIGINT,
  "m11" BIGINT,
  "m12" BIGINT,
  "m13" BIGINT,
  "m14" BIGINT,
  "m15" BIGINT,
  "m16" BIGINT,
  "m17" BIGINT,
  "m18" BIGINT,
  "m19" BIGINT,
  "m20" BIGINT,
  "m21" BIGINT,
  "m22" BIGINT,
  "m23" BIGINT,
  "m24" BIGINT,
  "m25" BIGINT,
  "m26" BIGINT,
  "m27" BIGINT,
  "m28" BIGINT,
  "m29" BIGINT,
  "m30" BIGINT,
  "m31" BIGINT,
  "m32" BIGINT,
  "m33" BIGINT,
  "m34" BIGINT,
  "m35" BIGINT,
  "m36" BIGINT,
  "m37" BIGINT,
  "m38" BIGINT,
  "m39" BIGINT,
  "m40" BIGINT,
  "m41" BIGINT,
  "m42" BIGINT,
  "m43" BIGINT,
  "m44" BIGINT,
  "m45" BIGINT,
  "m46" BIGINT,
  "m47" BIGINT,
  "m48" BIGINT,
  "m49" BIGINT,
  "m50" BIGINT,
  "m51" BIGINT,
  "m52" BIGINT,
  "m53" BIGINT,
  "m54" BIGINT,
  "m55" BIGINT,
  "m56" BIGINT,
  "m57" BIGINT,
  "m58" BIGINT,
  "m59" BIGINT,
  "m60" BIGINT,
  "m61" BIGINT,
  "m62" BIGINT,
  "m63" BIGINT,
  "m64" BIGINT,
  "m65" BIGINT,
  "m66" BIGINT,
  "m67" BIGINT,
  "m68" BIGINT,
  "m69" BIGINT,
  "m70" BIGINT,
  "m71" BIGINT,
  "m72" BIGINT,
  "m73" BIGINT,
  "m74" BIGINT,
  "m75" BIGINT,
  "m76" BIGINT,
  "m77" BIGINT,
  "m78" BIGINT,
  "m79" BIGINT,
  "m80" BIGINT,
  "m81" BIGINT,
  "m82" BIGINT,
  "m83" BIGINT,
  "m84" BIGINT,
  "m85" BIGINT,
  "m86" BIGINT,
  "m87" BIGINT,
  "m88" BIGINT,
  "m89" BIGINT,
  "m90" BIGINT,
  "m91" BIGINT,
  "m92" BIGINT,
  "m93" BIGINT,
  "m94" BIGINT,
  "m95" BIGINT,
  "m96" BIGINT,
  "m97" BIGINT,
  "m98" BIGINT,
  "m99" BIGINT
);CREATE TABLE mixed_fcfp (
  "cids" VARCHAR,
  "f1" BIGINT,
  "f2" BIGINT,
  "f3" BIGINT,
  "f4" BIGINT,
  "f5" BIGINT,
  "f6" BIGINT,
  "f7" BIGINT,
  "f8" BIGINT,
  "f9" BIGINT,
  "f10" BIGINT,
  "f11" BIGINT,
  "f12" BIGINT,
  "f13" BIGINT,
  "f14" BIGINT,
  "f15" BIGINT,
  "f16" BIGINT,
  "f17" BIGINT,
  "f18" BIGINT,
  "f19" BIGINT,
  "f20" BIGINT,
  "f21" BIGINT,
  "f22" BIGINT,
  "f23" BIGINT,
  "f24" BIGINT,
  "f25" BIGINT,
  "f26" BIGINT,
  "f27" BIGINT,
  "f28" BIGINT,
  "f29" BIGINT,
  "f30" BIGINT,
  "f31" BIGINT,
  "f32" BIGINT,
  "f33" BIGINT,
  "f34" BIGINT,
  "f35" BIGINT,
  "f36" BIGINT,
  "f37" BIGINT,
  "f38" BIGINT,
  "f39" BIGINT,
  "f40" BIGINT,
  "f41" BIGINT,
  "f42" BIGINT,
  "f43" BIGINT,
  "f44" BIGINT,
  "f45" BIGINT,
  "f46" BIGINT,
  "f47" BIGINT,
  "f48" BIGINT,
  "f49" BIGINT,
  "f50" BIGINT,
  "f51" BIGINT,
  "f52" BIGINT,
  "f53" BIGINT,
  "f54" BIGINT,
  "f55" BIGINT,
  "f56" BIGINT,
  "f57" BIGINT,
  "f58" BIGINT,
  "f59" BIGINT,
  "f60" BIGINT,
  "f61" BIGINT,
  "f62" BIGINT,
  "f63" BIGINT,
  "f64" BIGINT,
  "f65" BIGINT,
  "f66" BIGINT,
  "f67" BIGINT,
  "f68" BIGINT,
  "f69" BIGINT,
  "f70" BIGINT,
  "f71" BIGINT,
  "f72" BIGINT,
  "f73" BIGINT,
  "f74" BIGINT,
  "f75" BIGINT,
  "f76" BIGINT,
  "f77" BIGINT,
  "f78" BIGINT,
  "f79" BIGINT,
  "f80" BIGINT,
  "f81" BIGINT,
  "f82" BIGINT,
  "f83" BIGINT,
  "f84" BIGINT,
  "f85" BIGINT,
  "f86" BIGINT,
  "f87" BIGINT,
  "f88" BIGINT,
  "f89" BIGINT,
  "f90" BIGINT,
  "f91" BIGINT,
  "f92" BIGINT,
  "f93" BIGINT,
  "f94" BIGINT,
  "f95" BIGINT,
  "f96" BIGINT,
  "f97" BIGINT,
  "f98" BIGINT,
  "f99" BIGINT
);Anyone who has the link will be able to view this.