Baselight

The History Of Baseball

A complete history of major league baseball stats from 1871 to 2015

@kaggle.seanlahman_the_history_of_baseball

Loading...
Loading...

About this Dataset

The History Of Baseball

Baffled why your team traded for that 34-year-old pitcher? Convinced you can create a new and improved version of WAR? Wondering what made the 1907 Cubs great and if can they do it again?

The History of Baseball is a reformatted version of the famous Lahman’s Baseball Database. It contains Major League Baseball’s complete batting and pitching statistics from 1871 to 2015, plus fielding statistics, standings, team stats, park stats, player demographics, managerial records, awards, post-season data, and more.

Scripts, Kaggle’s free, in-browser analytics tool, makes it easy to share detailed sabermetrics, predict the next hall of fame inductee, illustrate how speed scores runs, or publish a definitive analysis on why the Los Angeles Dodgers will never win another World Series.

We have more ideas for analysis than games in a season, but here are a few we’d really love to see:

  • Is there a most error-prone position?
  • When do players at different positions peak?
  • Are the best performers selected for all-star game?
  • How many walks does it take for a starting pitcher to get pulled?
  • Do players with a high ground into double play (GIDP) have a lower batting average?
  • Which players are the most likely to choke during the post-season?
  • Why should or shouldn’t the National League adopt the designated hitter rule?

See the full SQLite schema.

Tables

All Star

@kaggle.seanlahman_the_history_of_baseball.all_star
  • 33 KB
  • 5069 rows
  • 8 columns
Loading...

CREATE TABLE all_star (
  "player_id" VARCHAR,
  "year" BIGINT,
  "game_num" BIGINT,
  "game_id" VARCHAR,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "gp" DOUBLE,
  "starting_pos" DOUBLE
);

Appearances

@kaggle.seanlahman_the_history_of_baseball.appearances
  • 1.2 MB
  • 100951 rows
  • 21 columns
Loading...

CREATE TABLE appearances (
  "year" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "player_id" VARCHAR,
  "g_all" DOUBLE,
  "gs" DOUBLE,
  "g_batting" BIGINT,
  "g_defense" DOUBLE,
  "g_p" BIGINT,
  "g_c" BIGINT,
  "g_1b" BIGINT,
  "g_2b" BIGINT,
  "g_3b" BIGINT,
  "g_ss" BIGINT,
  "g_lf" BIGINT,
  "g_cf" BIGINT,
  "g_rf" BIGINT,
  "g_of" BIGINT,
  "g_dh" DOUBLE,
  "g_ph" DOUBLE,
  "g_pr" DOUBLE
);

Batting

@kaggle.seanlahman_the_history_of_baseball.batting
  • 1.68 MB
  • 101332 rows
  • 22 columns
Loading...

CREATE TABLE batting (
  "player_id" VARCHAR,
  "year" BIGINT,
  "stint" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "g" BIGINT,
  "ab" DOUBLE,
  "r" DOUBLE,
  "h" DOUBLE,
  "double" DOUBLE,
  "triple" DOUBLE,
  "hr" DOUBLE,
  "rbi" DOUBLE,
  "sb" DOUBLE,
  "cs" DOUBLE,
  "bb" DOUBLE,
  "so" DOUBLE,
  "ibb" DOUBLE,
  "hbp" DOUBLE,
  "sh" DOUBLE,
  "sf" DOUBLE,
  "g_idp" DOUBLE
);

Batting Postseason

@kaggle.seanlahman_the_history_of_baseball.batting_postseason
  • 131.98 KB
  • 11690 rows
  • 22 columns
Loading...

CREATE TABLE batting_postseason (
  "year" BIGINT,
  "round" VARCHAR,
  "player_id" VARCHAR,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "g" BIGINT,
  "ab" BIGINT,
  "r" BIGINT,
  "h" BIGINT,
  "double" BIGINT,
  "triple" BIGINT,
  "hr" BIGINT,
  "rbi" BIGINT,
  "sb" BIGINT,
  "cs" DOUBLE,
  "bb" BIGINT,
  "so" BIGINT,
  "ibb" DOUBLE,
  "hbp" DOUBLE,
  "sh" DOUBLE,
  "sf" DOUBLE,
  "g_idp" DOUBLE
);

College

@kaggle.seanlahman_the_history_of_baseball.college
  • 41.05 KB
  • 1207 rows
  • 5 columns
Loading...

CREATE TABLE college (
  "college_id" VARCHAR,
  "name_full" VARCHAR,
  "city" VARCHAR,
  "state" VARCHAR,
  "country" VARCHAR
);

Fielding

@kaggle.seanlahman_the_history_of_baseball.fielding
  • 1.9 MB
  • 170526 rows
  • 18 columns
Loading...

CREATE TABLE fielding (
  "player_id" VARCHAR,
  "year" BIGINT,
  "stint" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "pos" VARCHAR,
  "g" BIGINT,
  "gs" DOUBLE,
  "inn_outs" DOUBLE,
  "po" DOUBLE,
  "a" DOUBLE,
  "e" DOUBLE,
  "dp" DOUBLE,
  "pb" DOUBLE,
  "wp" DOUBLE,
  "sb" DOUBLE,
  "cs" DOUBLE,
  "zr" DOUBLE
);

Fielding Outfield

@kaggle.seanlahman_the_history_of_baseball.fielding_outfield
  • 81.3 KB
  • 12028 rows
  • 6 columns
Loading...

CREATE TABLE fielding_outfield (
  "player_id" VARCHAR,
  "year" BIGINT,
  "stint" BIGINT,
  "glf" DOUBLE,
  "gcf" DOUBLE,
  "grf" DOUBLE
);

Fielding Postseason

@kaggle.seanlahman_the_history_of_baseball.fielding_postseason
  • 125.22 KB
  • 12311 rows
  • 17 columns
Loading...

CREATE TABLE fielding_postseason (
  "player_id" VARCHAR,
  "year" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "round" VARCHAR,
  "pos" VARCHAR,
  "g" BIGINT,
  "gs" DOUBLE,
  "inn_outs" DOUBLE,
  "po" BIGINT,
  "a" BIGINT,
  "e" BIGINT,
  "dp" BIGINT,
  "tp" BIGINT,
  "pb" DOUBLE,
  "sb" DOUBLE,
  "cs" DOUBLE
);

Hall Of Fame

@kaggle.seanlahman_the_history_of_baseball.hall_of_fame
  • 29.54 KB
  • 4120 rows
  • 9 columns
Loading...

CREATE TABLE hall_of_fame (
  "player_id" VARCHAR,
  "yearid" BIGINT,
  "votedby" VARCHAR,
  "ballots" DOUBLE,
  "needed" DOUBLE,
  "votes" DOUBLE,
  "inducted" VARCHAR,
  "category" VARCHAR,
  "needed_note" VARCHAR
);

Home Game

@kaggle.seanlahman_the_history_of_baseball.home_game
  • 57.74 KB
  • 2944 rows
  • 9 columns
Loading...

CREATE TABLE home_game (
  "year" BIGINT,
  "league_id" VARCHAR,
  "team_id" VARCHAR,
  "park_id" VARCHAR,
  "span_first" TIMESTAMP,
  "span_last" TIMESTAMP,
  "games" BIGINT,
  "openings" BIGINT,
  "attendance" BIGINT
);

Manager

@kaggle.seanlahman_the_history_of_baseball.manager
  • 33.48 KB
  • 3405 rows
  • 10 columns
Loading...

CREATE TABLE manager (
  "player_id" VARCHAR,
  "year" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "inseason" BIGINT,
  "g" BIGINT,
  "w" BIGINT,
  "l" BIGINT,
  "rank" DOUBLE,
  "plyr_mgr" VARCHAR
);

Manager Award

@kaggle.seanlahman_the_history_of_baseball.manager_award
  • 5.79 KB
  • 177 rows
  • 6 columns
Loading...

CREATE TABLE manager_award (
  "player_id" VARCHAR,
  "award_id" VARCHAR,
  "year" BIGINT,
  "league_id" VARCHAR,
  "tie" VARCHAR,
  "notes" VARCHAR
);

Manager Award Vote

@kaggle.seanlahman_the_history_of_baseball.manager_award_vote
  • 8.2 KB
  • 414 rows
  • 7 columns
Loading...

CREATE TABLE manager_award_vote (
  "award_id" VARCHAR,
  "year" BIGINT,
  "league_id" VARCHAR,
  "player_id" VARCHAR,
  "points_won" BIGINT,
  "points_max" BIGINT,
  "votes_first" BIGINT
);

Manager Half

@kaggle.seanlahman_the_history_of_baseball.manager_half
  • 8.47 KB
  • 93 rows
  • 10 columns
Loading...

CREATE TABLE manager_half (
  "player_id" VARCHAR,
  "year" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "inseason" BIGINT,
  "half" BIGINT,
  "g" BIGINT,
  "w" BIGINT,
  "l" BIGINT,
  "rank" BIGINT
);

Park

@kaggle.seanlahman_the_history_of_baseball.park
  • 11.24 KB
  • 250 rows
  • 6 columns
Loading...

CREATE TABLE park (
  "park_id" VARCHAR,
  "park_name" VARCHAR,
  "park_alias" VARCHAR,
  "city" VARCHAR,
  "state" VARCHAR,
  "country" VARCHAR
);

Pitching

@kaggle.seanlahman_the_history_of_baseball.pitching
  • 1.07 MB
  • 44139 rows
  • 30 columns
Loading...

CREATE TABLE pitching (
  "player_id" VARCHAR,
  "year" BIGINT,
  "stint" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "w" BIGINT,
  "l" BIGINT,
  "g" BIGINT,
  "gs" BIGINT,
  "cg" BIGINT,
  "sho" BIGINT,
  "sv" BIGINT,
  "ipouts" DOUBLE,
  "h" BIGINT,
  "er" BIGINT,
  "hr" BIGINT,
  "bb" BIGINT,
  "so" BIGINT,
  "baopp" DOUBLE,
  "era" DOUBLE,
  "ibb" DOUBLE,
  "wp" DOUBLE,
  "hbp" DOUBLE,
  "bk" BIGINT,
  "bfp" DOUBLE,
  "gf" DOUBLE,
  "r" BIGINT,
  "sh" DOUBLE,
  "sf" DOUBLE,
  "g_idp" DOUBLE
);

Pitching Postseason

@kaggle.seanlahman_the_history_of_baseball.pitching_postseason
  • 102.75 KB
  • 5109 rows
  • 30 columns
Loading...

CREATE TABLE pitching_postseason (
  "player_id" VARCHAR,
  "year" BIGINT,
  "round" VARCHAR,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "w" BIGINT,
  "l" BIGINT,
  "g" BIGINT,
  "gs" BIGINT,
  "cg" BIGINT,
  "sho" BIGINT,
  "sv" BIGINT,
  "ipouts" BIGINT,
  "h" BIGINT,
  "er" BIGINT,
  "hr" BIGINT,
  "bb" BIGINT,
  "so" BIGINT,
  "baopp" VARCHAR,
  "era" DOUBLE,
  "ibb" DOUBLE,
  "wp" DOUBLE,
  "hbp" DOUBLE,
  "bk" DOUBLE,
  "bfp" DOUBLE,
  "gf" BIGINT,
  "r" BIGINT,
  "sh" DOUBLE,
  "sf" DOUBLE,
  "g_idp" DOUBLE
);

Player

@kaggle.seanlahman_the_history_of_baseball.player
  • 1.16 MB
  • 18846 rows
  • 24 columns
Loading...

CREATE TABLE player (
  "player_id" VARCHAR,
  "birth_year" DOUBLE,
  "birth_month" DOUBLE,
  "birth_day" DOUBLE,
  "birth_country" VARCHAR,
  "birth_state" VARCHAR,
  "birth_city" VARCHAR,
  "death_year" DOUBLE,
  "death_month" DOUBLE,
  "death_day" DOUBLE,
  "death_country" VARCHAR,
  "death_state" VARCHAR,
  "death_city" VARCHAR,
  "name_first" VARCHAR,
  "name_last" VARCHAR,
  "name_given" VARCHAR,
  "weight" DOUBLE,
  "height" DOUBLE,
  "bats" VARCHAR,
  "throws" VARCHAR,
  "debut" TIMESTAMP,
  "final_game" TIMESTAMP,
  "retro_id" VARCHAR,
  "bbref_id" VARCHAR
);

Player Award

@kaggle.seanlahman_the_history_of_baseball.player_award
  • 27.23 KB
  • 6078 rows
  • 6 columns
Loading...

CREATE TABLE player_award (
  "player_id" VARCHAR,
  "award_id" VARCHAR,
  "year" BIGINT,
  "league_id" VARCHAR,
  "tie" VARCHAR,
  "notes" VARCHAR
);

Player Award Vote

@kaggle.seanlahman_the_history_of_baseball.player_award_vote
  • 47 KB
  • 6795 rows
  • 7 columns
Loading...

CREATE TABLE player_award_vote (
  "award_id" VARCHAR,
  "year" BIGINT,
  "league_id" VARCHAR,
  "player_id" VARCHAR,
  "points_won" DOUBLE,
  "points_max" BIGINT,
  "votes_first" DOUBLE
);

Player College

@kaggle.seanlahman_the_history_of_baseball.player_college
  • 115.93 KB
  • 17350 rows
  • 3 columns
Loading...

CREATE TABLE player_college (
  "player_id" VARCHAR,
  "college_id" VARCHAR,
  "year" BIGINT
);

Postseason

@kaggle.seanlahman_the_history_of_baseball.postseason
  • 8.48 KB
  • 307 rows
  • 9 columns
Loading...

CREATE TABLE postseason (
  "year" BIGINT,
  "round" VARCHAR,
  "team_id_winner" VARCHAR,
  "league_id_winner" VARCHAR,
  "team_id_loser" VARCHAR,
  "league_id_loser" VARCHAR,
  "wins" BIGINT,
  "losses" BIGINT,
  "ties" BIGINT
);

Salary

@kaggle.seanlahman_the_history_of_baseball.salary
  • 136.2 KB
  • 25575 rows
  • 5 columns
Loading...

CREATE TABLE salary (
  "year" BIGINT,
  "team_id" VARCHAR,
  "league_id" VARCHAR,
  "player_id" VARCHAR,
  "salary" BIGINT
);

Team

@kaggle.seanlahman_the_history_of_baseball.team
  • 193.08 KB
  • 2805 rows
  • 48 columns
Loading...

CREATE TABLE team (
  "year" BIGINT,
  "league_id" VARCHAR,
  "team_id" VARCHAR,
  "franchise_id" VARCHAR,
  "div_id" VARCHAR,
  "rank" BIGINT,
  "g" BIGINT,
  "ghome" DOUBLE,
  "w" BIGINT,
  "l" BIGINT,
  "div_win" VARCHAR,
  "wc_win" VARCHAR,
  "lg_win" VARCHAR,
  "ws_win" VARCHAR,
  "r" BIGINT,
  "ab" BIGINT,
  "h" BIGINT,
  "double" BIGINT,
  "triple" BIGINT,
  "hr" BIGINT,
  "bb" BIGINT,
  "so" DOUBLE,
  "sb" DOUBLE,
  "cs" DOUBLE,
  "hbp" DOUBLE,
  "sf" DOUBLE,
  "ra" BIGINT,
  "er" BIGINT,
  "era" DOUBLE,
  "cg" BIGINT,
  "sho" BIGINT,
  "sv" BIGINT,
  "ipouts" BIGINT,
  "ha" BIGINT,
  "hra" BIGINT,
  "bba" BIGINT,
  "soa" BIGINT,
  "e" BIGINT,
  "dp" DOUBLE,
  "fp" DOUBLE,
  "name" VARCHAR,
  "park" VARCHAR,
  "attendance" DOUBLE,
  "bpf" BIGINT,
  "ppf" BIGINT,
  "team_id_br" VARCHAR,
  "team_id_lahman45" VARCHAR,
  "team_id_retro" VARCHAR
);

Team Franchise

@kaggle.seanlahman_the_history_of_baseball.team_franchise
  • 5.55 KB
  • 120 rows
  • 4 columns
Loading...

CREATE TABLE team_franchise (
  "franchise_id" VARCHAR,
  "franchise_name" VARCHAR,
  "active" VARCHAR,
  "na_assoc" VARCHAR
);

Team Half

@kaggle.seanlahman_the_history_of_baseball.team_half
  • 7.11 KB
  • 52 rows
  • 10 columns
Loading...

CREATE TABLE team_half (
  "year" BIGINT,
  "league_id" VARCHAR,
  "team_id" VARCHAR,
  "half" BIGINT,
  "div_id" VARCHAR,
  "div_win" VARCHAR,
  "rank" BIGINT,
  "g" BIGINT,
  "w" BIGINT,
  "l" BIGINT
);

Share link

Anyone who has the link will be able to view this.