Denoising Autoencoder

I chose “Dropped out auto-encoder” as my final project topic in the last semester deep learning course, it was simply dropping out units in regular sparse auto-encoder, and furthermore, in stacked sparse auto-encoder, both in visible layer and hidden layer. It does not work well on auto-encoders, except can be used in fine-tune process of stacked sparse auto-encoder. I think the right thing to do is using denoising auto-encoder, instead.

Denoising auto-encoder was raised by Pascal Vincent et al, the basic idea is to force the hidden layer to discover more robust features and prevent it from simply learning the identity, by training the auto-encoder to reconstruct the input from a corrupted version of it.

111

The corrupted version of input data can be simply gotten by randomly disable (set zero) some of the input data. For example, say our input is an image, we randomly set some of the pixels zero (pepper noise), and then like regular auto-encoders, train the encoder and decoder by try to reconstruct the original image (without pepper noise).

For stacked auto-encoder, we use denoising auto-encoder in each step we train the auto-encoder layer by layer (corrupt the reconstructed result of previous layer as input of next layer), and do not corrupt in the fine-tune process.

Denoising auto-encoder shares  advantages of dropout neural nets, and is more adaptable to sparse auto-encoder. By randomly disable input data, means each single unit can be disabled by chance, the network we get is not only more robust, but also can be seen as an approximation of many networks with massive weight sharing.

CODE

// denoising auto-encoder
// single layer dae
// About corrupting part, check getBernoulliMatrix()
// 
// Author: Eric Yuan
// Blog: http://eric-yuan.me
// You are FREE to use the following code for ANY purpose.
//
// To run this code, you should have OpenCV in your computer.
// Have fun with it 🙂

#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <math.h>
#include <fstream>
#include <iostream>
#include <random>  

using namespace cv;
using namespace std;

#define IS_TEST_SA 0

#define ATD at<double>
#define elif else if

#define RELU 0
#define SIGMOID 1
#define TANH 2

int NL_Method = SIGMOID;
double probDestruction = 1.0;
int batch;

typedef struct SparseAutoencoder{
    Mat W1;
    Mat W2;
    Mat b1;
    Mat b2;
    Mat W1grad;
    Mat W2grad;
    Mat b1grad;
    Mat b2grad;
    double cost;
}SA;

typedef struct SparseAutoencoderActivation{
    Mat aInput;
    Mat aHidden;
    Mat aOutput;
    Mat zHidden;
    Mat zOutput;
}SAA;


Mat 
concatenateMat(vector<Mat> &vec){

    int height = vec[0].rows;
    int width = vec[0].cols;
    Mat res = Mat::zeros(height * width, vec.size(), CV_64FC1);
    for(int i=0; i<vec.size(); i++){
        Mat img(height, width, CV_64FC1);

        vec[i].convertTo(img, CV_64FC1);
        // reshape(int cn, int rows=0), cn is num of channels.
        Mat ptmat = img.reshape(0, height * width);
        Rect roi = cv::Rect(i, 0, ptmat.cols, ptmat.rows);
        Mat subView = res(roi);
        ptmat.copyTo(subView);
    }
    divide(res, 255.0, res);
    return res;
}

int 
ReverseInt (int i){
    unsigned char ch1, ch2, ch3, ch4;
    ch1 = i & 255;
    ch2 = (i >> 8) & 255;
    ch3 = (i >> 16) & 255;
    ch4 = (i >> 24) & 255;
    return((int) ch1 << 24) + ((int)ch2 << 16) + ((int)ch3 << 8) + ch4;
}

void 
read_Mnist(string filename, vector<Mat> &vec){
    ifstream file(filename, ios::binary);
    if (file.is_open()){
        int magic_number = 0;
        int number_of_images = 0;
        int n_rows = 0;
        int n_cols = 0;
        file.read((char*) &magic_number, sizeof(magic_number));
        magic_number = ReverseInt(magic_number);
        file.read((char*) &number_of_images,sizeof(number_of_images));
        number_of_images = ReverseInt(number_of_images);
        file.read((char*) &n_rows, sizeof(n_rows));
        n_rows = ReverseInt(n_rows);
        file.read((char*) &n_cols, sizeof(n_cols));
        n_cols = ReverseInt(n_cols);
        for(int i = 0; i < number_of_images; ++i){
            Mat tpmat = Mat::zeros(n_rows, n_cols, CV_8UC1);
            for(int r = 0; r < n_rows; ++r){
                for(int c = 0; c < n_cols; ++c){
                    unsigned char temp = 0;
                    file.read((char*) &temp, sizeof(temp));
                    tpmat.at<uchar>(r, c) = (int) temp;
                }
            }
            vec.push_back(tpmat);
        }
    }
}

void 
read_Mnist_Label(string filename, Mat &mat)
{
    ifstream file(filename, ios::binary);
    if (file.is_open()){
        int magic_number = 0;
        int number_of_images = 0;
        int n_rows = 0;
        int n_cols = 0;
        file.read((char*) &magic_number, sizeof(magic_number));
        magic_number = ReverseInt(magic_number);
        file.read((char*) &number_of_images,sizeof(number_of_images));
        number_of_images = ReverseInt(number_of_images);
        for(int i = 0; i < number_of_images; ++i){
            unsigned char temp = 0;
            file.read((char*) &temp, sizeof(temp));
            mat.ATD(0, i) = (double)temp;
        }
    }
}

Mat 
getBernoulliMatrix(int height, int width, double prob){
    Mat res = Mat::zeros(height, width, CV_64FC1);
    std::default_random_engine e;  
    std::bernoulli_distribution b(prob); 
    for(int i = 0; i < height; i++){
        for(int j = 0; j < width; j++){
            bool tmp = b(e);
            if(tmp) res.ATD(i, j) = 1.0;
        }
    } 
    return res;
}

Mat 
sigmoid(Mat &M){
    Mat temp;
    exp(-M, temp);
    return 1.0 / (temp + 1.0);
}

Mat 
dsigmoid(Mat &a){
    Mat res = 1.0 - a;
    res = res.mul(a);
    return res;
}

Mat 
Tanh(Mat &M){
    Mat res(M);
    for(int i=0; i<res.rows; i++){
        for(int j=0; j<res.cols; j++){
            res.ATD(i, j) = tanh(M.ATD(i, j));
        }
    }
    return res;
}

Mat
dTanh(Mat &M){
    Mat res = Mat::ones(M.rows, M.cols, CV_64FC1);
    Mat temp;
    pow(M, 2.0, temp);
    res -= temp;
    return res;
}

Mat
ReLU(Mat& M){
    Mat res(M);
    for(int i=0; i<M.rows; i++){
        for(int j=0; j<M.cols; j++){
            if(M.ATD(i, j) < 0.0) res.ATD(i, j) = 0.0;
        }
    }
    return res;
}

Mat
dReLU(Mat& M){
    Mat res = Mat::zeros(M.rows, M.cols, CV_64FC1);
    for(int i=0; i<M.rows; i++){
        for(int j=0; j<M.cols; j++){
            if(M.ATD(i, j) > 0.0) res.ATD(i, j) = 1.0;
        }
    }
    return res;
}

Mat 
non_Linearity(Mat &M){
    if(NL_Method == RELU){
        return ReLU(M);
    }elif(NL_Method == TANH){
        return Tanh(M);
    }else{
        return sigmoid(M);
    }
}

Mat 
d_non_Linearity(Mat &M){
    if(NL_Method == RELU){
        return dReLU(M);
    }elif(NL_Method == TANH){
        return dTanh(M);
    }else{
        return dsigmoid(M);
    }
}

void
weightRandomInit(SA &sa, int inputsize, int hiddensize, int nsamples, double epsilon){

    double *pData;
    sa.W1 = Mat::ones(hiddensize, inputsize, CV_64FC1);
    for(int i=0; i<hiddensize; i++){
        pData = sa.W1.ptr<double>(i);
        for(int j=0; j<inputsize; j++){
            pData[j] = randu<double>();
        }
    }
    sa.W1 = sa.W1 * (2 * epsilon) - epsilon;

    sa.W2 = Mat::ones(inputsize, hiddensize, CV_64FC1);
    for(int i=0; i<inputsize; i++){
        pData = sa.W2.ptr<double>(i);
        for(int j=0; j<hiddensize; j++){
            pData[j] = randu<double>();
        }
    }
    sa.W2 = sa.W2 * (2 * epsilon) - epsilon;

    sa.b1 = Mat::ones(hiddensize, 1, CV_64FC1);
    for(int j=0; j<hiddensize; j++){
        sa.b1.ATD(j, 0) = randu<double>();
    }
    sa.b1 = sa.b1 * (2 * epsilon) - epsilon;

    sa.b2 = Mat::ones(inputsize, 1, CV_64FC1);
    for(int j=0; j<inputsize; j++){
        sa.b2.ATD(j, 0) = randu<double>();
    }
    sa.b2 = sa.b2 * (2 * epsilon) - epsilon;

    sa.W1grad = Mat::zeros(hiddensize, inputsize, CV_64FC1);
    sa.W2grad = Mat::zeros(inputsize, hiddensize, CV_64FC1);
    sa.b1grad = Mat::zeros(hiddensize, 1, CV_64FC1);
    sa.b2grad = Mat::zeros(inputsize, 1, CV_64FC1);
    sa.cost = 0.0;
}

void
getSparseAutoencoderActivation(SA &sa, Mat &data, SAA &acti){
    data.copyTo(acti.aInput);
    acti.zHidden = sa.W1 * acti.aInput + repeat(sa.b1, 1, data.cols);
    acti.aHidden = non_Linearity(acti.zHidden);
    acti.zOutput = sa.W2 * acti.aHidden + repeat(sa.b2, 1, data.cols);
    acti.aOutput = non_Linearity(acti.zOutput);
}

void
sparseAutoencoderCost(SA &sa, Mat &corruptedData, Mat &data, double lambda, double sparsityParam, double beta){

    int nfeatures = data.rows;
    int nsamples = data.cols;
    SAA acti;
    getSparseAutoencoderActivation(sa, corruptedData, acti);

    Mat errtp = acti.aOutput - data;
    pow(errtp, 2.0, errtp);
    errtp /= 2.0;
    double Jcost = sum(errtp)[0] / nsamples;
    // now calculate pj which is the average activation of hidden units
    Mat pj;
    reduce(acti.aHidden, pj, 1, CV_REDUCE_SUM);
    pj /= nsamples;
    // the second part is weight decay part
    double Jweight = 0.0;
    Mat temp;
    pow(sa.W1, 2.0, temp);
    Jweight += sum(temp)[0];
    pow(sa.W2, 2.0, temp);
    Jweight += sum(temp)[0];
    Jweight *= lambda / 2.0;
    // the third part of overall cost function is the sparsity part
    temp = sparsityParam / pj;
    log(temp, temp);
    errtp = temp * sparsityParam;
    temp = (1 - sparsityParam) / (1 - pj);
    log(temp, temp);
    errtp += temp * (1 - sparsityParam);
    double Jsparse = sum(errtp)[0] * beta;
    sa.cost = Jcost + Jweight + Jsparse;

    // following are for calculating the grad of weights.
    Mat delta3 = -(data - acti.aOutput);
    delta3 = delta3.mul(d_non_Linearity(acti.aOutput));
    Mat temp2 = -sparsityParam / pj + (1 - sparsityParam) / (1 - pj);
    temp2 *= beta;
    Mat delta2 = sa.W2.t() * delta3 + repeat(temp2, 1, nsamples);
    delta2 = delta2.mul(d_non_Linearity(acti.aHidden));
    Mat nablaW1 = delta2 * acti.aInput.t();
    Mat nablaW2 = delta3 * acti.aHidden.t();
    Mat nablab1, nablab2; 
    delta3.copyTo(nablab2);
    delta2.copyTo(nablab1);
    sa.W1grad = nablaW1 / nsamples + lambda * sa.W1;
    sa.W2grad = nablaW2 / nsamples + lambda * sa.W2;
    reduce(nablab1, sa.b1grad, 1, CV_REDUCE_SUM);
    reduce(nablab2, sa.b2grad, 1, CV_REDUCE_SUM);
    sa.b1grad /= nsamples;
    sa.b2grad /= nsamples;
}

void
gradientChecking(SA &sa, Mat &data, double lambda, double sparsityParam, double beta){

    //Gradient Checking (remember to disable this part after you're sure the 
    //cost function and dJ function are correct)
    sparseAutoencoderCost(sa, data, data, lambda, sparsityParam, beta);
    Mat w1g(sa.W1grad);
    cout<<"test sparse autoencoder !!!!"<<endl;
    double epsilon = 1e-4;
    for(int i=0; i<sa.W1.rows; i++){
        for(int j=0; j<sa.W1.cols; j++){
            double memo = sa.W1.ATD(i, j);
            sa.W1.ATD(i, j) = memo + epsilon;
            sparseAutoencoderCost(sa, data, data, lambda, sparsityParam, beta);
            double value1 = sa.cost;
            sa.W1.ATD(i, j) = memo - epsilon;
            sparseAutoencoderCost(sa, data, data, lambda, sparsityParam, beta);
            double value2 = sa.cost;
            double tp = (value1 - value2) / (2 * epsilon);
            cout<<i<<", "<<j<<", "<<tp<<", "<<w1g.ATD(i, j)<<", "<<w1g.ATD(i, j) / tp<<endl;
            sa.W1.ATD(i, j) = memo;
        }
    }
}

void
trainSparseAutoencoder(SA &sa, Mat &data, int hiddenSize, double lambda, double sparsityParam, double beta){

    int nfeatures = data.rows;
    int nsamples = data.cols;
    weightRandomInit(sa, nfeatures, hiddenSize, nsamples, 0.12);
    if (IS_TEST_SA){
        gradientChecking(sa, data, lambda, sparsityParam, beta);
    }else{
       cout<<"Sparse Autoencoder Learning................"<<endl;
        // define the velocity vectors.
        Mat v_W1 = Mat::zeros(sa.W1.rows, sa.W1.cols, CV_64FC1);
        Mat v_W2 = Mat::zeros(sa.W2.rows, sa.W2.cols, CV_64FC1);
        Mat v_b1 = Mat::zeros(sa.b1.rows, sa.b1.cols, CV_64FC1);
        Mat v_b2 = Mat::zeros(sa.b2.rows, sa.b2.cols, CV_64FC1);

        int epochs = 3000;
        double lrate = 0.1;
        int T = 200;
        double epsilon0 = 80.0;
        double epsilont;
        double f = 0.999;
        double pi = 0.5;
        double pf = 0.99;
        double pt;
        for(int t = 0; t < epochs; t++){

            if(t > T) pt = pf;
            else pt = (double)t / T * pi + (1 - (double)t / T) * pf;
            epsilont = epsilon0 * pow(f, t);

            int randomNum = ((long)rand() + (long)rand()) % (data.cols - batch);
            Rect roi = Rect(randomNum, 0, batch, data.rows);
            Mat batchX = data(roi);

            Mat corrupted = getBernoulliMatrix(batchX.rows, batchX.cols, probDestruction);
            corrupted = batchX.mul(corrupted);

            sparseAutoencoderCost(sa, corrupted, batchX, lambda, sparsityParam, beta);
                
            v_W1 = pt * v_W1 - (1 - pt) * epsilont * (lrate * sa.W1grad);
            v_W2 = pt * v_W2 - (1 - pt) * epsilont * (lrate * sa.W2grad);
            v_b1 = pt * v_b1 - (1 - pt) * epsilont * (lrate * sa.b1grad);
            v_b2 = pt * v_b2 - (1 - pt) * epsilont * (lrate * sa.b2grad);

            sa.W1 += v_W1;
            sa.W2 += v_W2;
            sa.b1 += v_b1;
            sa.b2 += v_b2;

            cout<<"epoch: "<<t<<", Cost function value = "<<sa.cost<<endl;
        }
    }
}

void
readData(Mat &x, Mat &y, string xpath, string ypath, int number_of_images){

    //read MNIST iamge into OpenCV Mat vector
    int image_size = 28 * 28;
    vector<Mat> vec;
    //vec.resize(number_of_images, cv::Mat(28, 28, CV_8UC1));
    read_Mnist(xpath, vec);
    //read MNIST label into double vector
    y = Mat::zeros(1, number_of_images, CV_64FC1);
    read_Mnist_Label(ypath, y);
    x = concatenateMat(vec);
}

void
saveWeight2txt(Mat &data, SA &sa){

    FILE *pOut = fopen("weights.txt", "w");
    for(int i=0; i<sa.W1.rows; i++){
        for(int j=0; j<sa.W1.cols; j++){
            fprintf(pOut, "%lf", sa.W1.ATD(i, j));
            if(j == sa.W1.cols - 1) fprintf(pOut, "\n");
            else fprintf(pOut, " ");
        }
    }
    fclose(pOut);

}

int 
main(int argc, char** argv)
{

    long start, end;
    start = clock();

    Mat trainX, trainY;
    readData(trainX, trainY, "mnist/train-images-idx3-ubyte", "mnist/train-labels-idx1-ubyte", 60000);

    cout<<"Read trainX successfully, including "<<trainX.rows<<" features and "<<trainX.cols<<" samples."<<endl;
    batch = trainX.cols / 100;
    // Finished reading data
    
    // pre-processing data. 
    Scalar mean, stddev;
    meanStdDev(trainX, mean, stddev);
    Mat normX = trainX - mean[0];
    normX.copyTo(trainX);

    SA sa;
    trainSparseAutoencoder(sa, trainX, 200, 5e-4, 0.1, 3);
    saveWeight2txt(trainX, sa);

    end = clock();
    cout<<"Totally used time: "<<((double)(end - start)) / CLOCKS_PER_SEC<<" second"<<endl;
    
    //waitKey(0);
    return 0;
}

 RESULT

Filters obtained after training the single layer auto-encoder:

Encoder with no destroyed inputs:

10_1

Encoder with 25% destruction:

07_1

Encoder with 50% destruction:

05_1

As we can see, without noise, some filters remain uninteresting or very little interesting; as we increase the noise level, the we get better filters.

Enjoy it 🙂

This entry was posted in Algorithm, Machine Learning, OpenCV and tagged , , , , , . Bookmark the permalink. Post a comment or leave a trackback: Trackback URL.

One Comment

  1. chenxiaojun
    Posted August 12, 2016 at 3:02 am | Permalink

    Are you sure this code is right?

Post a Comment

Your email is never published nor shared. Required fields are marked *

You may use these HTML tags and attributes <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>

*
*