Softmax Regression (with OpenCV)

This is the same algorithm with the previous SOFTMAX REGRESSION post. Because I’m going to try to build deeper neural networks for images, so as a review of OpenCV programming, I rewrote the Softmax regression code using OpenCV mat, instead of Armadillo.

I used Matlab, Octave, Armadillo a lot these days, it is kind of hard to adapt OpenCV style, such as:

// in Armadillo, doing this is super easy.
temp = repmat(sum(M, 0), nclasses, 1);

// but in OpenCV...
temp = Mat::zeros(1, M.cols, CV_64FC1);
reduce(M, temp, 0, CV_REDUCE_SUM);
temp2 = repeat(temp, nclasses, 1);

or,

// in Armadillo, 
cost += accu(arma::pow(theta, 2)) * lambda / 2;

// but in OpenCV...
Mat theta2;
pow(theta, 2.0, theta2);
cost += sum(theta2)[0] * lambda / 2;
// The reason why there's a "[0]" is, the sum() 
// in OpenCV actually returns a Scalar.

sigh… it is like… you know…

If you know how to get rid of these kinds of “troubles”, teach me!

CODE

// Softmax regression

#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <math.h>
#include <iostream>
using namespace cv;
using namespace std;

#define elif else if
#define AT at<double>
#define MAX_ITER 100000

double cost = 0.0;
Mat grad;
double lrate = 0.1;
double lambda = 0.0;
int nclasses = 2;

Mat vec2mat(vector<vector<double> >&vec){
    int cols = vec.size();
    int rows = vec[0].size();
    Mat result(rows, cols, CV_64FC1);
    double *pData; 
    for(int i = 0; i<rows; i++){
        pData = result.ptr<double>(i);
        for(int j=0; j<cols; j++){
            pData[j] = vec[j][i];        
        }
    }
    return result;
}

Mat vec2colvec(vector<double>& vec){
    int length = vec.size();
    Mat A(length, 1, CV_64FC1);
    for(int i=0; i<length; i++){
        A.AT(i, 0) = vec[i];
    }
    return A;
}

Mat vec2rowvec(vector<double>& vec){
    Mat A = vec2colvec(vec);
    return A.t();
}

void update_CostFunction_and_Gradient(Mat x, Mat y, Mat weightsMatrix, double lambda){

    int nsamples = x.cols;
    int nfeatures = x.rows;
    //calculate cost function
    Mat theta(weightsMatrix);
    Mat M = theta * x;
    Mat temp, temp2;
    temp = Mat::ones(1, M.cols, CV_64FC1);
    reduce(M, temp, 0, CV_REDUCE_SUM);
    temp2 = repeat(temp, nclasses, 1);
    M -= temp2;
    exp(M, M);
    temp = Mat::ones(1, M.cols, CV_64FC1);
    reduce(M, temp, 0, CV_REDUCE_SUM);
    temp2 = repeat(temp, nclasses, 1);
    divide(M, temp2, M); 
    Mat groundTruth = Mat::zeros(nclasses, nsamples, CV_64FC1);
    for(int i=0; i<nsamples; i++){
        groundTruth.AT(y.AT(0, i), i) = 1.0;
    }
    Mat logM;
    log(M, logM);
    temp = groundTruth.mul(logM);
    cost = - sum(temp)[0] / nsamples;
    Mat theta2;
    pow(theta, 2.0, theta2);
    cost += sum(theta2)[0] * lambda / 2;
    //calculate gradient
    temp = groundTruth - M;   
    temp = temp * x.t();
    grad = - temp / nsamples;
    grad += lambda * theta;

}

Mat calculateY(Mat x, Mat weightsMatrix){

    int nsamples = x.cols;
    int nfeatures = x.rows;
    //calculate cost function
    Mat theta(weightsMatrix);
    Mat M = theta * x;
    Mat temp, temp2;
    temp = Mat::ones(1, M.cols, CV_64FC1);
    reduce(M, temp, 0, CV_REDUCE_SUM);
    temp2 = repeat(temp, nclasses, 1);
    M -= temp2;
    exp(M, M);
    temp = Mat::ones(1, M.cols, CV_64FC1);
    reduce(M, temp, 0, CV_REDUCE_SUM);
    temp2 = repeat(temp, nclasses, 1);
    divide(M, temp2, M); 
    log(M, M);

    Mat result = Mat::ones(1, M.cols, CV_64FC1);
    for(int i=0; i<M.cols; i++){
        double maxele = M.AT(0, i);
        int which = 0;
        for(int j=1; j<M.rows; j++){
            if(M.AT(j, i) > maxele){
                maxele = M.AT(j, i);
                which = j;
            }
        }
        result.AT(0, i) = which;
    }
    return result;
}

void softmax(vector<vector<double> >&vecX, vector<double> &vecY, vector<vector<double> >& testX, vector<double>& testY){

    int nsamples = vecX.size();
    int nfeatures = vecX[0].size();
    //change vecX and vecY into matrix or vector.
    Mat y = vec2rowvec(vecY);
    Mat x = vec2mat(vecX);

    double init_epsilon = 0.12;
    Mat weightsMatrix = Mat::ones(nclasses, nfeatures, CV_64FC1);
    double *pData; 
    for(int i = 0; i<nclasses; i++){
        pData = weightsMatrix.ptr<double>(i);
        for(int j=0; j<nfeatures; j++){
            pData[j] = randu<double>();        
        }
    }
    weightsMatrix = weightsMatrix * (2 * init_epsilon) - init_epsilon;

    grad = Mat::zeros(nclasses, nfeatures, CV_64FC1);

/*
    //Gradient Checking (remember to disable this part after you're sure the 
    //cost function and dJ function are correct)
    update_CostFunction_and_Gradient(x, y, weightsMatrix, lambda);
    Mat dJ(grad);
//    grad.copyTo(dJ);
    cout<<"test!!!!"<<endl;
    double epsilon = 1e-4;
    for(int i=0; i<weightsMatrix.rows; i++){
        for(int j=0; j<weightsMatrix.cols; j++){
            double memo = weightsMatrix.AT(i, j);
            weightsMatrix.AT(i, j) = memo + epsilon;
            update_CostFunction_and_Gradient(x, y, weightsMatrix, lambda);
            double value1 = cost;
            weightsMatrix.AT(i, j) = memo - epsilon;
            update_CostFunction_and_Gradient(x, y, weightsMatrix, lambda);
            double value2 = cost;
            double tp = (value1 - value2) / (2 * epsilon);
            cout<<i<<", "<<j<<", "<<tp<<", "<<dJ.AT(i, j)<<", "<<dJ.AT(i, j) / tp<<endl;
            weightsMatrix.AT(i, j) = memo;
        }
    }
*/

    int converge = 0;
    double lastcost = 0.0;
    while(converge < MAX_ITER){
        update_CostFunction_and_Gradient(x, y, weightsMatrix, lambda);
        weightsMatrix -= lrate * grad;

        cout<<"learning step: "<<converge<<", Cost function value = "<<cost<<endl;
        if(fabs((cost - lastcost) ) <= 5e-6 && converge > 0) break;
        lastcost = cost;
        ++ converge;
    }
    cout<<"############result#############"<<endl;

    Mat yT = vec2rowvec(testY);
    Mat xT = vec2mat(testX);
    Mat result = calculateY(xT, weightsMatrix);
    Mat err(yT);
    err -= result;
    int correct = err.cols;
    for(int i=0; i<err.cols; i++){
        if(err.AT(0, i) != 0) --correct;
    }
    cout<<"correct: "<<correct<<", total: "<<err.cols<<", accuracy: "<<double(correct) / (double)(err.cols)<<endl;
}

int main(int argc, char** argv)
{
    long start, end;
    //read training X from .txt file
    FILE *streamX, *streamY;
    streamX = fopen("trainX.txt", "r");
    int numofX = 30;
    vector<vector<double> > vecX;
    double tpdouble;
    int counter = 0;
    while(1){
        if(fscanf(streamX, "%lf", &tpdouble)==EOF) break;
        if(counter / numofX >= vecX.size()){
            vector<double> tpvec;
            vecX.push_back(tpvec);
        } 
        vecX[counter / numofX].push_back(tpdouble);
        ++ counter;
    }
    fclose(streamX);

    cout<<vecX.size()<<", "<<vecX[0].size()<<endl;

    //read training Y from .txt file
    streamY = fopen("trainY.txt", "r");
    vector<double> vecY;
    while(1){
        if(fscanf(streamY, "%lf", &tpdouble)==EOF) break;
        vecY.push_back(tpdouble);
    }
    fclose(streamY);

    for(int i = 1; i<vecX.size(); i++){
        if(vecX[i].size() != vecX[i - 1].size()) return 0;
    }
    if(vecX.size() != vecY.size()) return 0;

    streamX = fopen("testX.txt", "r");
    vector<vector<double> > vecTX;
    counter = 0;
    while(1){
        if(fscanf(streamX, "%lf", &tpdouble)==EOF) break;
        if(counter / numofX >= vecTX.size()){
            vector<double> tpvec;
            vecTX.push_back(tpvec);
        } 
        vecTX[counter / numofX].push_back(tpdouble);
        ++ counter;
    }
    fclose(streamX);

    streamY = fopen("testY.txt", "r");
    vector<double> vecTY;
    while(1){
        if(fscanf(streamY, "%lf", &tpdouble)==EOF) break;
        vecTY.push_back(tpdouble);
    }
    fclose(streamY);

    start = clock();
    softmax(vecX, vecY, vecTX, vecTY);
    end = clock();
    cout<<"Training used time: "<<((double)(end - start)) / CLOCKS_PER_SEC<<" second"<<endl;
    return 0;
}

The dataset for test can also be found IN THE PREVIOUS POST.

Have fun with it 🙂

 

This entry was posted in Algorithm, Machine Learning, OpenCV and tagged , , , , . Bookmark the permalink. Post a comment or leave a trackback: Trackback URL.

3 Comments

  1. yndk
    Posted April 28, 2014 at 2:00 am | Permalink

    Hi, thanks for sharing the C++ code. Even not tested yet, I really appreciate it.

    Another way of using opencv matrix is like this:
    ———-
    cv::Mat_ dmat(100,200); // 100×200 double matrix

    for (int r=0; r<dmat.rows; r++)
    for (int c=0; c<dmat.cols; c++)
    dmat(r,c) = value; // matrix element assigning

    Almost all the functions defined in cv::Mat are available, as written in OpenCV document.

  2. yndk
    Posted April 28, 2014 at 3:59 am | Permalink

    cv::Mat_\ dmat(100,200); // 100×200 double matrix

  3. ngapweitham
    Posted August 25, 2015 at 12:55 pm | Permalink

    You can implement the core algorithm by Armadillo or Eigen, then convert them to cv::Mat, this way you should be able to share the joy of two worlds. Beware that the cv::Mat may do byte padding, when you convert the matrix of Armadillo to cv::Mat, you need to take care of it.

    ps : I prefer Eigen over Armadillo, because it is much easier to build on windows

Post a Comment

Your email is never published nor shared. Required fields are marked *

You may use these HTML tags and attributes <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>

*
*