Arabic(Indian) Digits MADBase
Arabic numbers dataset
@kaggle.hossamahmedsalah_arabicindian_digits_madbase
Arabic numbers dataset
@kaggle.hossamahmedsalah_arabicindian_digits_madbase
This dataset is flattern images where each image is represented in a row
## Define the root directory of the dataset
root_dir = "MAHD"
## Define the names of the folders containing the images
folder_names = ['Part{:02d}'.format(i) for i in range(1, 13)]
## folder_names = ['Part{}'.format(i) if i>9 else 'Part0{}'.format(i) for i in range(1, 13)]
## Define the names of the subfolders containing the training and testing images
train_test_folders = ['MAHDBase_TrainingSet', 'test']
## Initialize an empty list to store the image data and labels
data = []
labels = []
## Loop over the training and testing subfolders in each Part folder
for tt in train_test_folders:
for folder_name in folder_names:
if tt == train_test_folders[1] and folder_name == 'Part03':
break
subfolder_path = os.path.join(root_dir, tt, folder_name)
print(subfolder_path)
print(os.listdir(subfolder_path))
for filename in os.listdir(subfolder_path):
# check of the file fromat that it's an image
if os.path.splitext(filename)[1].lower() not in '.bmp':
continue
# Load the image
img_path = os.path.join(subfolder_path, filename)
img = Image.open(img_path)
# Convert the image to grayscale and flatten it into a 1D array
img_grey = img.convert('L')
img_data = np.array(img_grey).flatten()
# Extract the label from the filename and convert it to an integer
label = int(filename.split('_')[2].replace('digit', '').split('.')[0])
# Add the image data and label to the lists
data.append(img_data)
labels.append(label)
## Convert the image data and labels to a pandas dataframe
df = pd.DataFrame(data)
df['label'] = labels
This dataset made by
https://datacenter.aucegypt.edu/shazeem
with 2 datasets
CREATE TABLE mahd (
"unnamed_0" BIGINT -- Unnamed: 0,
"n_0" BIGINT -- 0,
"n_1" BIGINT -- 1,
"n_2" BIGINT -- 2,
"n_3" BIGINT -- 3,
"n_4" BIGINT -- 4,
"n_5" BIGINT -- 5,
"n_6" BIGINT -- 6,
"n_7" BIGINT -- 7,
"n_8" BIGINT -- 8,
"n_9" BIGINT -- 9,
"n_10" BIGINT -- 10,
"n_11" BIGINT -- 11,
"n_12" BIGINT -- 12,
"n_13" BIGINT -- 13,
"n_14" BIGINT -- 14,
"n_15" BIGINT -- 15,
"n_16" BIGINT -- 16,
"n_17" BIGINT -- 17,
"n_18" BIGINT -- 18,
"n_19" BIGINT -- 19,
"n_20" BIGINT -- 20,
"n_21" BIGINT -- 21,
"n_22" BIGINT -- 22,
"n_23" BIGINT -- 23,
"n_24" BIGINT -- 24,
"n_25" BIGINT -- 25,
"n_26" BIGINT -- 26,
"n_27" BIGINT -- 27,
"n_28" BIGINT -- 28,
"n_29" BIGINT -- 29,
"n_30" BIGINT -- 30,
"n_31" BIGINT -- 31,
"n_32" BIGINT -- 32,
"n_33" BIGINT -- 33,
"n_34" BIGINT -- 34,
"n_35" BIGINT -- 35,
"n_36" BIGINT -- 36,
"n_37" BIGINT -- 37,
"n_38" BIGINT -- 38,
"n_39" BIGINT -- 39,
"n_40" BIGINT -- 40,
"n_41" BIGINT -- 41,
"n_42" BIGINT -- 42,
"n_43" BIGINT -- 43,
"n_44" BIGINT -- 44,
"n_45" BIGINT -- 45,
"n_46" BIGINT -- 46,
"n_47" BIGINT -- 47,
"n_48" BIGINT -- 48,
"n_49" BIGINT -- 49,
"n_50" BIGINT -- 50,
"n_51" BIGINT -- 51,
"n_52" BIGINT -- 52,
"n_53" BIGINT -- 53,
"n_54" BIGINT -- 54,
"n_55" BIGINT -- 55,
"n_56" BIGINT -- 56,
"n_57" BIGINT -- 57,
"n_58" BIGINT -- 58,
"n_59" BIGINT -- 59,
"n_60" BIGINT -- 60,
"n_61" BIGINT -- 61,
"n_62" BIGINT -- 62,
"n_63" BIGINT -- 63,
"n_64" BIGINT -- 64,
"n_65" BIGINT -- 65,
"n_66" BIGINT -- 66,
"n_67" BIGINT -- 67,
"n_68" BIGINT -- 68,
"n_69" BIGINT -- 69,
"n_70" BIGINT -- 70,
"n_71" BIGINT -- 71,
"n_72" BIGINT -- 72,
"n_73" BIGINT -- 73,
"n_74" BIGINT -- 74,
"n_75" BIGINT -- 75,
"n_76" BIGINT -- 76,
"n_77" BIGINT -- 77,
"n_78" BIGINT -- 78,
"n_79" BIGINT -- 79,
"n_80" BIGINT -- 80,
"n_81" BIGINT -- 81,
"n_82" BIGINT -- 82,
"n_83" BIGINT -- 83,
"n_84" BIGINT -- 84,
"n_85" BIGINT -- 85,
"n_86" BIGINT -- 86,
"n_87" BIGINT -- 87,
"n_88" BIGINT -- 88,
"n_89" BIGINT -- 89,
"n_90" BIGINT -- 90,
"n_91" BIGINT -- 91,
"n_92" BIGINT -- 92,
"n_93" BIGINT -- 93,
"n_94" BIGINT -- 94,
"n_95" BIGINT -- 95,
"n_96" BIGINT -- 96,
"n_97" BIGINT -- 97,
"n_98" BIGINT -- 98
);Anyone who has the link will be able to view this.