import pandas as pd
from pathlib import Path

# Read training and test datasets
train_data = pd.read_csv(Path("datasets/emnist-balanced-train.csv")).values
test_data = pd.read_csv(Path("datasets/emnist-balanced-test.csv")).values


# training set
x_train = train_data[...,1:]  # all columns except the first
y_train = train_data[...,0]  # first column

# test set
x_test = test_data[...,1:]
y_test = test_data[...,0]


%matplotlib inline
import matplotlib.pyplot as plt


import numpy as np

def imshow_EMNIST(img):
    '''Plots EMNIST dataset images.

    Args:
        img:
            EMNIST image.
    '''
    
    img.shape = (28, 28)  # make the image two-dimensional (28x28 pixels)
    img = np.fliplr(img)  # flip it horizontally
    img = np.rot90(img)  # rotate it 90º
    plt.imshow(255-img, cmap='gray')  # invert the image, so black pixels are 
                                      # white and viceversa, also plot it in
                                      # grayscale (cmap='gray')


img = x_test[1100]  # random image
imshow_EMNIST(img)


# label-character correspondence
data2ascii = {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9',
              10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I',
              19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'Q', 27: 'R',
              28: 'S', 29: 'T', 30: 'U', 31: 'V', 32: 'W', 33: 'X', 34: 'Y', 35: 'Z', 36: 'a',
              37: 'b', 38: 'd', 39: 'e', 40: 'f', 41: 'g', 42: 'h', 43: 'n', 44: 'q', 45: 'r',
              46: 't'}


imshow_EMNIST(img)
print("Character:", data2ascii[y_test[1100]])  # y_test stores the labels

Character: A


def find_character(character, label_set):
    '''Finds a specific character in a set.

    Args:
        character:
            Character to look for.
        label_set:
            Set containing EMNIST labels.
    
    Returns:
        Indexes where the character has been found.
    '''
    
    pos = []
    for i in range(len(label_set)):
        if data2ascii[label_set[i]] == character:
            pos.append(i)
    return pos


indexes = find_character('C', y_test)

print("Number of 'C' characters:", len(indexes))
print(indexes[:20])

imshow_EMNIST(x_test[indexes[2]])  # third 'C' character in the dataset

Number of 'C' characters: 400
[8, 29, 34, 37, 83, 95, 141, 147, 150, 227, 236, 337, 493, 584, 629, 633, 762, 768, 807, 809]


import os
from skimage.io import imread
from pathlib import Path

dataset_dir = Path("./img-prediction")  # directory where the images are
img_names = os.listdir(dataset_dir)  # list all the files in that directory

imgs = []
for name in sorted(img_names):
    img = imread(dataset_dir / name)  # load image
    imgs.append(img)

plt.imshow(imgs[0])  # plot one of the loaded images

<matplotlib.image.AxesImage at 0x7f9ba0e91b20>


from skimage.color import rgb2gray

img = imgs[0]
img_gray = rgb2gray(img)

plt.imshow(img_gray, cmap='gray')

<matplotlib.image.AxesImage at 0x7f9ba423aac0>


from skimage.filters import threshold_otsu

def binarize(img):
    '''Binarizes an image.
    
    Args:
        img:
            Image to be binarized.
    Returns:
        Binarized image.
    '''
    
    img = rgb2gray(img)
    return img < threshold_otsu(img_gray)  # image binarization
    
img_binary = binarize(img)
    
plt.imshow(img_binary==False, cmap='gray')  # compare with False to have black letters over white bg

<matplotlib.image.AxesImage at 0x7f9ba3e786a0>


def get_horiz_projection(img):
    '''Gets the horizontal projections from an image.
    
    Args:
        img:
            Binarized image to get the projections from.
            
    Returns:
        A list containing the horizontal projections.
    '''
    
    img_width = img.shape[1]
    h_proj = np.empty(img_width, dtype=bool)
    for i in range(img_width):
        h_proj[i] = any(img[..., i])  # True if there's information in a column
    return h_proj

def get_vert_projection(img):
    '''Gets the vertical projections from an image.
    
    Args:
        img:
            Binarized image to get the projections from.
            
    Returns:
        A list containing the vertical projections.
    '''
    
    img_height = img.shape[0]
    v_proj = np.empty(img_height, dtype=bool)
    for i in range(img_height):
        v_proj[i] = any(img[i])
    return v_proj


get_horiz_projection(img_binary)[103:111]  # check it works

array([False, False, False, False,  True,  True,  True,  True])


def get_slices_from_proj(img, f_projection, elem_sub_list = 0):
    '''Returns a list of projection intervals in which there is
       information.    
    
    Args:
        img:
            Binarized image to get the slices from.  
        f_projection:
            Function that calculates the projections.
        elem_sub_list:
            Used to divide the list in sublists of `elem_sub_list`
            elements.
            
    Returns:
        The list with the pixel intervals.
    '''
    
    slices = []
    proj = f_projection(img)
    previous = proj[0]
    for i in range(1, proj.size):
        if previous != proj[i]:  # we keep track of the pixel where there's change
            slices.append(i)
            previous = proj[i]  # update previous
    return slices if not elem_sub_list else sub_list(slices, elem_sub_list)

def sub_list(main_list, elem_sub_list):
    '''
    Create sublists of `elem_sub_list` elements inside `main_list`.

    Args:
        main_list:
            List that will to be divided into sublists.
        elem_sub_list:

    Returns:
        The list with the sublists created.
    '''
    return [main_list[i:i+elem_sub_list] for i in range(0, len(main_list), elem_sub_list)]


get_slices_from_proj(img_binary, get_horiz_projection, 2)

[[59, 95], [107, 141], [147, 182], [185, 218]]


imgs_crop = []
h_slices = get_slices_from_proj(img_binary, get_horiz_projection, 2)
for i in range(len(h_slices)):
    imgs_crop.append(img_binary[:, h_slices[i][0] : h_slices[i][1]])  # crop left and right
for i in range(len(imgs_crop)):
    v_slices = get_slices_from_proj(imgs_crop[i], get_vert_projection)  # take the vertical projection of each
    imgs_crop[i] = imgs_crop[i][v_slices[0]:v_slices[-1]]               # letter and crop top and bottom

plt.imshow(img)

# Plot the separated letters
f, axarr = plt.subplots(1, 4) # creamos la figura y los ejes
axarr[0].imshow(imgs_crop[0]==False, cmap='gray')
axarr[1].imshow(imgs_crop[1]==False, cmap='gray')
axarr[2].imshow(imgs_crop[2]==False, cmap='gray')
axarr[3].imshow(imgs_crop[3]==False, cmap='gray')

<matplotlib.image.AxesImage at 0x7f9ba3a10160>


from skimage.filters import gaussian
from skimage.transform import resize

img_blur = gaussian(imgs_crop[3], 3)  # Gaussian blur for smooth edges when rescaling
img_resized = resize(img_blur, (28, 28), mode='reflect', anti_aliasing=True)  # rescale to 28 x 28

plt.imshow(255-img_resized, cmap='gray')

<matplotlib.image.AxesImage at 0x7f9ba3e69d00>


def add_borders_resize(img, pxls_width = 28, pxls_height = 28, sigma=0.3):
    '''Adds white borders to the image and scales it to `pxls_width`
       by `pxls_height` pixels.
       
    Args:
        img:
            Image that will have the borders added to, and will be
            rescaled.
        pxls_width:
            Width of the resized image in pixels.
        pxls_height:
            Height of the resized image in pixels.
        sigma:
            Deviation for the Gaussian filter.

    Returns:
        The resized image with the added frames.
    '''
    
    if img.shape[0] > img.shape[1]:  # if the image is higher than it is wide
        # order_height and border_width are added above and below, left and right, respectively
        border_height = int((img.shape[1]/img.shape[0])*(pxls_width/1.1))  # arbitrary (the higher the image
                                                                           # is, the less border we should add)
        # border_width is calculated so that when it is added to the image, it becomes square
        border_width = int(((img.shape[0]+border_height*2) - img.shape[1])/2)
    else:  # if the image is wider than it is higher, the process is similar
        border_width = int((img.shape[0]/img.shape[1])*(pxls_width/1.1)) # arbitrary
        border_height = int(((img.shape[1]+border_width*2) - img.shape[0])/2)
    v_border = []  # vertical border
    for i in range(border_height):
        v_border.append([False for i in range(0, img.shape[1])])  # create vertical border
    img = np.concatenate((np.asarray(v_border), img, np.asarray(v_border)))  # add it to left and right of the img
    h_border = []  # horizontal border
    for i in range(0, img.shape[0]):
        h_border.append([False for i in range(0, border_width)])  # create horizontal border
    img = np.column_stack((np.asarray(h_border), img, np.asarray(h_border))) # add it above and below
    img = gaussian(img, sigma*10)  # Gaussian blur
    # rescale and normalize to 0-255
    return (resize(img,(pxls_width, pxls_height), mode='reflect', anti_aliasing=True)*255).astype(int)


img_resized = add_borders_resize(img_blur)

plt.imshow(255-img_resized, cmap='gray')

<matplotlib.image.AxesImage at 0x7f9ba3d45f40>


def slice_image(img):
    '''Slices the letters in the image.
    
    Args:
        img:
            Image to be sliced.
    Returns:
        A list with the slices.
    '''
    
    imgs_crop = []
    h_slices = get_slices_from_proj(img, get_horiz_projection, 2)
    for i in range(len(h_slices)):
        imgs_crop.append(img[:, h_slices[i][0] : h_slices[i][1]])  # crop left and right
    for i in range(len(imgs_crop)):
        v_slices = get_slices_from_proj(imgs_crop[i], get_vert_projection)  # we take the vertical projection of
        imgs_crop[i] = imgs_crop[i][v_slices[0]:v_slices[-1]]               # each letter and crop top and bottom
        imgs_crop[i] = add_borders_resize(imgs_crop[i])  # rescale and add frame
    return imgs_crop


def pre_process_img(img):
    '''Pre-processes and slices the image.
    
    Args:
        img:
            Image to be pre-processed.
            
    Returns:
        The image pre-processed and sliced.
    '''
    
    img_binary = binarize(img)  # binarize the image
    imgs_sliced = slice_image(img_binary)  # slice the image
    return imgs_sliced

def imshow_segmented(img, imgs_crop):
    '''Plots the original image, as well as its slices once
       pre-processed.
    
    Args:
        img:
            Image to be plotted.
        imgs_crop:
            Image slices.
    '''
    plt.imshow(img)
    # plot the letters separately
    f, axarr = plt.subplots(1, len(imgs_crop))
    for i in range(len(imgs_crop)):
        axarr[i].imshow(255-imgs_crop[i], cmap='gray')

imgs_sliced = pre_process_img(img)
imshow_segmented(img, imgs_sliced)


from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(hidden_layer_sizes=(1024, 512, 256))


# clf.fit(x_train, y_train)


import pickle

'''
# save the trained classifier
with open('pretrained-models/mlp_classifier.pkl', 'wb') as fid:
    pickle.dump(clf, fid)  
'''

# load the saved classifier
with open(Path('pretrained-models/mlp_classifier.pkl'), 'rb') as fid:
    clf = pickle.load(fid)


def clf_accuracy(x_test, y_test):
    '''Calculates a classifier accuracy.
    
    Args:
        x_test:
            The input test features.
        y_test:
            The input test labels.
            
    Returns:
        The accuracy of the model.
    '''
    
    p = clf.predict(x_test)
    count = 0
    
    for p_i, y_i in zip(p, y_test):
        count +=  int(p_i == y_i)
         
    return (count / x_test.shape[0]) * 100

print("Accuracy:", clf_accuracy(x_test, y_test))

Accuracy: 82.54162455449759


def get_prediction(img, clf, case='first'):
    '''Predicts the word in the image.
    
    Args:
        img:
            The photo with the word to identify.
        clf:
            The classifier used for the prediction.
        case:
            Optional. Admissible values are:
                - 'first': capitalize first letter.
                - 'upper': capitalize whole word.
                - If any other value is passed, including the empty
                  string, the word will be returned in lower case.
    
    Returns:
        The predicted word.
    '''
    
    preds = []
    for image in pre_process_img(img):
        image = np.rot90(image, k=-1)  # rotate and mirror the image,
        image = np.fliplr(image)       # so it matches the dataset images
        image = image.reshape(image.size).tolist()
        pred = clf.predict([image])[0]
        preds.append(data2ascii[pred])
    word = "".join(preds)
    if case == 'first':
        return word.capitalize()
    elif case == 'upper':
        return word.upper()
    return word.lower()


img = imgs[0]

word = get_prediction(img, clf, case='lower')
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: yeah


img = imgs[1]

plt.imshow(img)

word = get_prediction(img, clf, case='lower')
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: great


img = imgs[2]

plt.imshow(img)

word = get_prediction(img, clf)
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: 4chan


img = imgs[3]

plt.imshow(img)

word = get_prediction(img, clf, case='lower')
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: understanding


img = imgs[4]

plt.imshow(img)

word = get_prediction(img, clf, case='lower')
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: h4ck3rm4n


img = imgs[5]

plt.imshow(img)

word = get_prediction(img, clf)
print("Prediction:", word)

imshow_segmented(img, pre_process_img(img))

Prediction: Incomprehensibi1ities

Class	Image data
4	[ 0 0 254 214 ... 214 154 45 0 0 ]
21	[ 188 0 0 179 ... 245 70 244 0 0 ]
8	[ 0 45 177 89 ... 80 154 90 0 45 ]
11	[ 0 252 196 200 ... 61 251 0 0 0 ]
...	...

Table of contents¶

0. Introduction ¶

1. The EMNIST Dataset ¶

2. Image Pre-processing ¶

3. Prediction ¶

4. Conclusion and possible improvements ¶

5. References and further reading ¶

0. Introduction¶

1. The EMNIST Dataset¶

2. Image Pre-processing¶

2.1 Importing the images¶

2.2 Converting the images to grayscale¶

2.3 Binarizing the images¶

2.4 Segmenting the images¶

2.5 Rescaling and center the segmented images¶

2.6 Bringing everything together...¶

3. Prediction¶

3.1 Training our model¶

3.2 Measuring the accuracy¶

3.3 Predicting words¶

3.3.1 Image #1¶

3.3.2 Image #2¶

3.3.3 Image #3¶

3.3.4 Image #4¶

3.3.5 Image #5¶

3.3.5 Image #6¶

4. Conclusion and possible improvements¶

5. References and further reading¶

Label	Character
0	'0'
...	...
9	'9'
10	'A'
...	...
35	'Z'
36	'a'
...	...
46	't'