[問題] MEX加速 with Eigen library
我想要利用matlab MEX file來加速
我遇到了一個問題是
MEX的速度應該會比matlab快上幾倍
而且Eigen應該會比matlab快上10~30倍
但是我的matlab跑一次是0.1707 sec
我的mex跑一次是 0.667856 sec
為什麼我的mex可以那麼慢,有大大可以指導我一下嗎?
附上我完整的code
#include "mex.h"
#include "Eigen/Dense"
#include "Eigen/Core"
#include <iostream>
using namespace Eigen;
using namespace std;
typedef Map<MatrixXd> MexMat;
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
ptrdiff_t n_de = mxGetM(prhs[0]), n_basis = mxGetN(prhs[0]);
ptrdiff_t n_nu = mxGetM(prhs[1]);
ptrdiff_t n_w = mxGetN(prhs[2]) , n_p = mxGetN(prhs[3]);
ptrdiff_t n_min = min(n_de, n_nu);
MexMat diff_SQ_de ( mxGetPr(prhs[0]), n_de, n_basis);
MexMat diff_SQ_nu ( mxGetPr(prhs[1]), n_nu, n_basis);
MexMat width_candidates ( mxGetPr(prhs[2]), 1, n_w);
MexMat panelty_candidates ( mxGetPr(prhs[3]), 1, n_p);
MatrixXd ker_de_tr(n_de, n_basis), ker_nu_tr(n_nu, n_basis);
MatrixXd H_hat(n_basis, n_basis), h_hat(n_basis, n_basis);
MatrixXd ker_de_tr2(n_min, n_basis), ker_nu_tr2(n_min, n_basis);
MatrixXd B(n_basis, n_basis), Beta(n_basis, 1), B_de(n_basis, n_min);
MatrixXd tmp(1, n_min), B0(n_basis, n_min), B1(n_basis, n_min);
MatrixXd A(n_basis, n_min), r_de(n_min,1), r_nu(n_min,1);
MatrixXd CV_score (n_w, n_p), width(1,1), panelty(1,1);
for (int width_run = 0; width_run < n_w; width_run++){
width(0,0) = width_candidates(0, width_run);
ker_de_tr = (diff_SQ_de.array() / (-2) / pow(width(0,0),2)).array().exp();
ker_nu_tr = (diff_SQ_nu.array() / (-2) / pow(width(0,0),2)).array().exp();
H_hat = ker_de_tr.transpose() * ker_de_tr / n_de;
h_hat = ((ker_nu_tr / n_nu).colwise().sum()).transpose();
ker_de_tr2 = (ker_de_tr.block(0,0,n_min,n_basis)).transpose();
ker_nu_tr2 = (ker_nu_tr.block(0,0,n_min,n_basis)).transpose();
for (int panelty_run = 0; panelty_run < n_p; panelty_run++){
panelty(0,0) = panelty_candidates(0, panelty_run);
H_hat.diagonal() = (H_hat.diagonal()).array() + panelty(0,0)*(n_de-1)/n_de;
B = H_hat.inverse();
Beta = B * h_hat;
B_de = B * ker_de_tr2;
tmp = ((ker_de_tr2.cwiseProduct(B_de)).colwise().sum()).array() * (-1) + n_de;
B0 = (Beta.replicate(1, n_min) + (((Beta.transpose() * ker_de_tr2)
.cwiseQuotient(tmp)).replicate(n_basis, 1).cwiseProduct(B_de))) * n_nu;
B1 = B*ker_nu_tr2 + B_de.cwiseProduct(((((ker_nu_tr2.cwiseProduct(
B_de)).colwise().sum()).cwiseQuotient(tmp)).replicate(n_basis,1)));
A = ((B0-B1) * (n_de-1)/(n_de*(n_nu-1))).cwiseMax(MatrixXd::Zero(n_basis
,n_min));
r_de = (ker_de_tr2.cwiseProduct(A)).colwise().sum();
r_nu = (ker_nu_tr2.cwiseProduct(A)).colwise().sum();
CV_score(width_run, panelty_run) = (r_de.array().pow(2)/n_de).sum()/2 -
(r_nu / n_nu).sum();
}
}
int loc_w, loc_p;
MatrixXd min_cv_score;
CV_score.minCoeff(&loc_p, &loc_w);
MatrixXd O1(1,1), O2(1,1);
O1(0,0) = width_candidates(0,loc_w);
O2(0,0) = panelty_candidates(0,loc_p);
plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
MexMat Output1 ( mxGetPr(plhs[0]), 1, 1 );
MexMat Output2 ( mxGetPr(plhs[1]), 1, 1 );
Output1 = O1;
Output2 = O2;
}
我在猜想是不是每次取代這麼多東西造成記憶體不斷再複製,而拖慢
希望有人可以幫我找到我哪裏寫得不好導致matlab比較快。
最後補上我的系統資訊
file (include test.m): http://myweb.ncku.edu.tw/~r26014014/uLSIF.rar
Platform OS: windows 7 SP1 64bit
Matlab version: 2013b
compiler: VS 2012
compile command:
mex -v -largeArrayDims -IC:\Eigen mex_file.cpp COMPFLAGS="/Ox $COMPFLAGS"
另外,我有google到這篇:http://tinyurl.com/mzhajo6
最後,如果解決問題需要我全部的程式碼,可以寄站內信。
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 140.116.152.221
→
12/22 04:45, , 1F
12/22 04:45, 1F
我google之後,加上/Ox之後,還是要0.6678 sec
推
12/22 04:47, , 2F
12/22 04:47, 2F
→
12/22 04:51, , 3F
12/22 04:51, 3F
我的case更加複雜,K1還會用在其他地方....
此外這裡有L2 panelty term
推
12/23 08:07, , 4F
12/23 08:07, 4F
→
12/23 08:13, , 5F
12/23 08:13, 5F
大大可以給我一點實際例子嗎? 因為我run出來不是LS...
我自己去run了一個test
A is 3 by 3 matrix. b is 3 by 1 matrix
A.colPivHouseholderQr.solve(b) is equal to A.lu().solve(b)
not the inv(A'A)*b
test code:
#include <iostream>
#include <Eigen/Dense>
#include <Eigen/Core>
using namespace Eigen;
using namespace std;
typedef Map<MatrixXd> MexMat;
int main()
{
double mat[9] = {-1.0,-2.0,-3.0,4.0, 5.5, 6.5, -7.7, -8.0, 11.0};
double mat2[3] = {1.0,1.0,3.0};
MexMat EiMat (mat, 3, 3);
MexMat EiMat2 (mat2, 3, 1);
cout << EiMat.colPivHouseholderQr().solve(EiMat2) << endl;
cout << EiMat.lu().solve(EiMat2) << endl;
}
Result:
1.11111
0.765432
0.123457
1.11111
0.765432
0.123457
※ 編輯: celestialgod 來自: 36.238.92.7 (12/24 01:59)
推
12/24 11:47, , 6F
12/24 11:47, 6F
討論串 (同標題文章)
完整討論串 (本文為第 1 之 2 篇):
MATLAB 近期熱門文章
PTT數位生活區 即時熱門文章