Zomato Bangalore Data (Clean)
The original Zomato dataset, but cleaned. Yes, no redundancies.
@kaggle.saranimje_zomato_data_clean
The original Zomato dataset, but cleaned. Yes, no redundancies.
@kaggle.saranimje_zomato_data_clean
The original Zomato Bengaluru dataset is, let's face it, dirty. There are too many redundancies and too much information which does not make any sense. There are a multitude of URLs for the same restaurant and it's about time we got a cleaner, crisper dataset.
This dataset contains the original data (in the diners.db file) and the new data included in the SQL database and in CSV format too. There are other useful files like user preferences files, Bengaluru POI .geojson files, location density and rent analysis files which can aid geospatial analysis.
Upvote if you find it useful
CREATE TABLE bengaluru_house_data (
"area_type" VARCHAR,
"availability" VARCHAR,
"location" VARCHAR,
"size" VARCHAR,
"society" VARCHAR,
"total_sqft" VARCHAR,
"bath" DOUBLE,
"balcony" DOUBLE,
"price" DOUBLE
);
CREATE TABLE cuisine_location_rate (
"category" VARCHAR,
"city" VARCHAR,
"rating" DOUBLE
);
CREATE TABLE cuisines (
"cuisine1" VARCHAR,
"cuisine2" VARCHAR,
"cuisine3" VARCHAR,
"cuisine4" VARCHAR,
"cuisine5" VARCHAR,
"cuisine6" VARCHAR,
"cuisine7" VARCHAR,
"cuisine8" VARCHAR
);
CREATE TABLE location (
"index" BIGINT,
"location" VARCHAR,
"density" BIGINT,
"rent" BIGINT,
"rate" DOUBLE,
"latitude" DOUBLE,
"longitude" DOUBLE
);
CREATE TABLE new_rent (
"location" VARCHAR,
"number" BIGINT,
"avg_cost" DOUBLE
);
CREATE TABLE onlinedeliverydata (
"age" BIGINT,
"gender" VARCHAR,
"marital_status" VARCHAR,
"occupation" VARCHAR,
"monthly_income" VARCHAR,
"educational_qualifications" VARCHAR,
"family_size" BIGINT,
"latitude" DOUBLE,
"longitude" DOUBLE,
"pin_code" BIGINT,
"medium_p1" VARCHAR -- Medium (P1),
"medium_p2" VARCHAR -- Medium (P2),
"meal_p1" VARCHAR -- Meal(P1),
"meal_p2" VARCHAR -- Meal(P2),
"perference_p1" VARCHAR -- Perference(P1),
"perference_p2" VARCHAR -- Perference(P2),
"ease_and_convenient" VARCHAR,
"time_saving" VARCHAR,
"more_restaurant_choices" VARCHAR,
"easy_payment_option" VARCHAR,
"more_offers_and_discount" VARCHAR,
"good_food_quality" VARCHAR,
"good_tracking_system" VARCHAR,
"self_cooking" VARCHAR,
"health_concern" VARCHAR,
"late_delivery" VARCHAR,
"poor_hygiene" VARCHAR,
"bad_past_experience" VARCHAR,
"unavailability" VARCHAR,
"unaffordable" VARCHAR,
"long_delivery_time" VARCHAR,
"delay_of_delivery_person_getting_assigned" VARCHAR,
"delay_of_delivery_person_picking_up_food" VARCHAR,
"wrong_order_delivered" VARCHAR,
"missing_item" VARCHAR,
"order_placed_by_mistake" VARCHAR,
"influence_of_time" VARCHAR,
"order_time" VARCHAR,
"maximum_wait_time" VARCHAR,
"residence_in_busy_location" VARCHAR,
"google_maps_accuracy" VARCHAR,
"good_road_condition" VARCHAR,
"low_quantity_low_time" VARCHAR,
"delivery_person_ability" VARCHAR,
"influence_of_rating" VARCHAR,
"less_delivery_time" VARCHAR,
"high_quality_of_package" VARCHAR,
"number_of_calls" VARCHAR,
"politeness" VARCHAR,
"freshness" VARCHAR,
"temperature" VARCHAR,
"good_taste" VARCHAR,
"good_quantity" VARCHAR,
"output" VARCHAR,
"reviews" VARCHAR
);
CREATE TABLE poi (
"place" VARCHAR,
"poi_lat" DOUBLE,
"poi_long" DOUBLE
);
CREATE TABLE rate_cost (
"rowid" BIGINT,
"cost" BIGINT,
"avg_rate" DOUBLE,
"listed_type" VARCHAR,
"sum_votes" BIGINT -- SUM(votes)
);
CREATE TABLE zomato_clean (
"index" BIGINT,
"name" VARCHAR,
"address" VARCHAR,
"location" VARCHAR,
"listed_type" VARCHAR,
"votes" BIGINT,
"num_rate" DOUBLE,
"online_order" VARCHAR,
"book_table" VARCHAR,
"cuisines" VARCHAR,
"rest_type" VARCHAR,
"cost" DOUBLE,
"bool_online_order" BIGINT,
"bool_book_table" BIGINT
);
CREATE TABLE zomato_test (
"index" BIGINT,
"name" VARCHAR,
"address" VARCHAR,
"location" VARCHAR,
"listed_type" VARCHAR,
"votes" BIGINT,
"num_rate" DOUBLE,
"online_order" VARCHAR,
"book_table" VARCHAR,
"cuisines" VARCHAR,
"rest_type" VARCHAR,
"cost" DOUBLE,
"bool_online_order" BIGINT,
"bool_book_table" BIGINT,
"prediction" DOUBLE,
"predictions" DOUBLE,
"predictions_tuned" DOUBLE
);
CREATE TABLE zomato_train (
"index" BIGINT,
"name" VARCHAR,
"address" VARCHAR,
"location" VARCHAR,
"listed_type" VARCHAR,
"votes" BIGINT,
"num_rate" DOUBLE,
"online_order" VARCHAR,
"book_table" VARCHAR,
"cuisines" VARCHAR,
"rest_type" VARCHAR,
"cost" DOUBLE,
"bool_online_order" BIGINT,
"bool_book_table" BIGINT
);
Anyone who has the link will be able to view this.