IMDb Genre-wise Movies Dataset And Sparse Matrices
Contains 75k+ movies, collected from IMDb website: 22 genres + 1 master dataset.
@kaggle.soumyasacharya_imdb_movies_dataset
Contains 75k+ movies, collected from IMDb website: 22 genres + 1 master dataset.
@kaggle.soumyasacharya_imdb_movies_dataset
IMDb stores information related to more than 6 million titles (of which almost 500,000 are featured films) and it is owned by Amazon since 1998.
The movies' Master dataset includes about 75k movies with attributes such as movie description, average rating, number of votes, genre, etc.
The movies are also divided according to genres, which makes a total of 22 genres. The master dataset, containing all the movies is the file all_df.csv
Each dataset has it's sparse matrix, containing all the Tf-idf scores after applying TfidfVectorizer[analyzer='word, ngram_range=(1,3), stopwords='english']
Data has been scraped from the publicly available website https://www.imdb.com .
CREATE TABLE action_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE adventure_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE all_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" VARCHAR,
"date_published" VARCHAR,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE animation_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE biography_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" VARCHAR,
"date_published" VARCHAR,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE comedy_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" VARCHAR,
"date_published" VARCHAR,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE crime_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" VARCHAR,
"date_published" VARCHAR,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE documentary_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" VARCHAR,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE drama_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE family_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE fantasy_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE film_noir_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE history_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE horror_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE musical_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE music_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE mystery_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE romance_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE sci_fi_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE sport_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE thriller_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE war_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
CREATE TABLE western_df (
"imdb_title_id" VARCHAR,
"title" VARCHAR,
"original_title" VARCHAR,
"year" BIGINT,
"date_published" TIMESTAMP,
"genre" VARCHAR,
"duration" BIGINT,
"country" VARCHAR,
"language" VARCHAR,
"director" VARCHAR,
"writer" VARCHAR,
"production_company" VARCHAR,
"actors" VARCHAR,
"description" VARCHAR,
"avg_vote" DOUBLE,
"votes" BIGINT,
"budget" VARCHAR,
"usa_gross_income" VARCHAR,
"worlwide_gross_income" VARCHAR,
"metascore" DOUBLE,
"reviews_from_users" DOUBLE,
"reviews_from_critics" DOUBLE,
"description_words" BIGINT -- Description #words,
"movie_title" VARCHAR
);
Anyone who has the link will be able to view this.