Baselight

Bioinformatics Simulated

This synthetic dataset was created to explore and develop machine learning.

@kaggle.willianoliveiragibin_bioinformatics_simulated

Proteinas Train New
@kaggle.willianoliveiragibin_bioinformatics_simulated.proteinas_train_new

  • 3.55 MB
  • 16000 rows
  • 10 columns
id_prote_na

ID Proteína

sequ_ncia

Sequência

massa_molecular

Massa Molecular

ponto_isoel_trico

Ponto Isoelétrico

hidrofobicidade

Hidrofobicidade

carga_total

Carga Total

propor_o_polar

Proporção Polar

propor_o_apolar

Proporção Apolar

comprimento_sequ_ncia

Comprimento Sequência

classe

Classe

TRAIN_P00001GNMRFVLHDEETHWGTLRTTLNCVPSDIYTISGEDSLFWGMAHPFCYPGSKRTMHPMWQSSDDCANCGTYWMGWDSPIQETEYRWMMDWITMESDALAGRNCCVMNMDWVRSCSSWTHIKSYLTGMNYMAMTFQFVDGDFNDQSLVCMDIWNFPSIATATKPYQFFTLPHVVWK20.362.946.799.999.90048.661.226.272.58314.942.528.735.632.100-32.413.793.103.448.27040.804.597.701.149.400174Estrutural
TRAIN_P00002LFKMQCSFYLLYLAKEAASYQVSMNMLCYEWYNYVYQVTVILRLSREWCPVCTYCYWFVERRVWTTDSTEACCFNC9.328.790.899.999.9906.298.635.673.522.94021.710.526.315.789.40021.052.631.578.947.3005.131.578.947.368.42076Estrutural
TRAIN_P00003PAHLWPYWRFYVWIVFYGYHNPNYHFGMKEVKERPDCKNCTVAVLFIAIDWISTYIPLKPVAWCLYSIRYCVGQPAFEWESGNCKDRVLFMIWFDDSKNVTFKQPAQDMHCRAVQVMPAGLSCHRATFMCPVPPRKCWMEWNSTDVVF176.163.852845.897.731.781.00619.256.756.756.756.700814.189.189.189.189.10046.621.621.621.621.600148Estrutural
TRAIN_P00004GEAFSRPHCFACAATKKGFPWARMCCTTSMAMDGVQSKMHKSKHRFVKWFDCYGSDCTYKWHSDIHGSWSRCCQEAWTMFKHKVTRMLLSWVMCNICFNWCSKEDHLIMFAYFLDMTRPEQDHTTKFCHLAYRCENKAYFTGCFDRCLAMDTIVFDQQQIKTDSQMRMPMAAFYGRHPNSSACFKMNGAHMWASFYTPCHGKFNPMHHCFLAPSAYMWILAQGDFFDMEVEIWDQSWWSGFNRFWYSLETMGDPIVNNCMWPQSRRACLYDPGPPFYYSQNFDVHMRRARSEWMFK3.524.429.680.000.0008.448.340.034.484.86016.047.297.297.297.200211.891.891.891.891.89040.878.378.378.378.300296Estrutural
TRAIN_P00005HYVFQGLMLHCGGYMITACGFGVIFPEQMTREGLIMHTARAHHFLIHHFPMTVKAKAFGRFTCQRTVVVGEKGYVILKNSRQHTVNDCDDQIQERKLRNFGIYLPYYSIQNQQNETMGHWKQPETVSTYPQATSFSLWTVEPANWFMQGYVCKRDILGEPSLGFFGNLRKLCAKGLEPFQDRWGQECQRCSYTWSNEDEHLHASKWRQDGYEERDQKGHPASFEAYQKFTHTWDFCWDYSLYTWWPYMERDYHKQGQMKMHEALVHCSPHFLFTGIGKTYEGQCVMCIIMRF3.455.799.310.000.000769.630.641.937.2561.404.109.589.041.0901820.205.479.452.054.7003.801.369.863.013.690292Receptora
TRAIN_P00006FHSESKFQEIFLHCHDRPMRKHVMHIHKHDHFINMRHDFDEYDSDPEFFSFTYLYKTELGYVMGRQQKIEPYYTFIPKWASM10.274.542.699.999.900651.610.164.642.33412.195.121.951.219.500714.634.146.341.463.4003.902.439.024.390.24082Enzima
TRAIN_P00007CHGWDKNRHKTGEEIPIRPMREMNRMQAWFWIVENSECMVDNWFNLIMKIYLFLPSFAYTTQHLNNDKYHMWCKVFTICGLAYY10.356.947.699.999.90078.717.348.098.754.80017.857.142.857.142.800517.857.142.857.142.8004.523.809.523.809.52084Estrutural
TRAIN_P00008PRAFVGTGNVFVYHSVNTQPRFSKLNNLIERGSINIRYFRVLPIVGDTCFNGCSQGCLRETQTTCQTEVESYKSIHAPSKNTGSNKLKPQMGMWVFHNLLPMCQPWKPQHTLMAQYNLKVLLGWTNLYESVLMGVISIKKRFTAEMHREMQCHGSKAFEGN183.720.8159.486.861.228.942.87015.527.950.310.559.000152.670.807.453.416.14036.645.962.732.919.200161Enzima
TRAIN_P00009TMFFADCQRMESRHCKDSPPWIAEPVSFVPHWKIDSANMMIFPFVVTSDKQLRTNIMSWAHQCVYKLHVQKWKEQEFMFADIGDMYNIWNPDQS112.748.6166.003.186.607.360.83015.425.531.914.893.60022.127.659.574.468.080425.531.914.893.61794Receptora
TRAIN_P00010LLRHIPEPLDHKYHKLYWNDSASFGVGPPCRARVMQYHLKPDNFMVVQYTCYAGKPKAFVETSVVKVFVIISDATSCECCTHCLCVVDFIALLMVRNTGRWNICVLIMEFWVWNRMSNK13.846.215.799.999.9008.667.274.665.832.51019.327.731.092.436.900915.966.386.554.621.80046.218.487.394.957.900119Estrutural

CREATE TABLE proteinas_train_new (
  "id_prote_na" VARCHAR,
  "sequ_ncia" VARCHAR,
  "massa_molecular" VARCHAR,
  "ponto_isoel_trico" VARCHAR,
  "hidrofobicidade" VARCHAR,
  "carga_total" BIGINT,
  "propor_o_polar" VARCHAR,
  "propor_o_apolar" VARCHAR,
  "comprimento_sequ_ncia" BIGINT,
  "classe" VARCHAR
);

Share link

Anyone who has the link will be able to view this.