[問題] MEX加速 with Eigen library

看板MATLAB作者 (攸藍)時間11年前 (2013/12/22 00:08), 編輯推噓3(303)
留言6則, 1人參與, 最新討論串1/2 (看更多)
我想要利用matlab MEX file來加速 我遇到了一個問題是 MEX的速度應該會比matlab快上幾倍 而且Eigen應該會比matlab快上10~30倍 但是我的matlab跑一次是0.1707 sec 我的mex跑一次是 0.667856 sec 為什麼我的mex可以那麼慢,有大大可以指導我一下嗎? 附上我完整的code #include "mex.h" #include "Eigen/Dense" #include "Eigen/Core" #include <iostream> using namespace Eigen; using namespace std; typedef Map<MatrixXd> MexMat; void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { ptrdiff_t n_de = mxGetM(prhs[0]), n_basis = mxGetN(prhs[0]); ptrdiff_t n_nu = mxGetM(prhs[1]); ptrdiff_t n_w = mxGetN(prhs[2]) , n_p = mxGetN(prhs[3]); ptrdiff_t n_min = min(n_de, n_nu); MexMat diff_SQ_de ( mxGetPr(prhs[0]), n_de, n_basis); MexMat diff_SQ_nu ( mxGetPr(prhs[1]), n_nu, n_basis); MexMat width_candidates ( mxGetPr(prhs[2]), 1, n_w); MexMat panelty_candidates ( mxGetPr(prhs[3]), 1, n_p); MatrixXd ker_de_tr(n_de, n_basis), ker_nu_tr(n_nu, n_basis); MatrixXd H_hat(n_basis, n_basis), h_hat(n_basis, n_basis); MatrixXd ker_de_tr2(n_min, n_basis), ker_nu_tr2(n_min, n_basis); MatrixXd B(n_basis, n_basis), Beta(n_basis, 1), B_de(n_basis, n_min); MatrixXd tmp(1, n_min), B0(n_basis, n_min), B1(n_basis, n_min); MatrixXd A(n_basis, n_min), r_de(n_min,1), r_nu(n_min,1); MatrixXd CV_score (n_w, n_p), width(1,1), panelty(1,1); for (int width_run = 0; width_run < n_w; width_run++){ width(0,0) = width_candidates(0, width_run); ker_de_tr = (diff_SQ_de.array() / (-2) / pow(width(0,0),2)).array().exp(); ker_nu_tr = (diff_SQ_nu.array() / (-2) / pow(width(0,0),2)).array().exp(); H_hat = ker_de_tr.transpose() * ker_de_tr / n_de; h_hat = ((ker_nu_tr / n_nu).colwise().sum()).transpose(); ker_de_tr2 = (ker_de_tr.block(0,0,n_min,n_basis)).transpose(); ker_nu_tr2 = (ker_nu_tr.block(0,0,n_min,n_basis)).transpose(); for (int panelty_run = 0; panelty_run < n_p; panelty_run++){ panelty(0,0) = panelty_candidates(0, panelty_run); H_hat.diagonal() = (H_hat.diagonal()).array() + panelty(0,0)*(n_de-1)/n_de; B = H_hat.inverse(); Beta = B * h_hat; B_de = B * ker_de_tr2; tmp = ((ker_de_tr2.cwiseProduct(B_de)).colwise().sum()).array() * (-1) + n_de; B0 = (Beta.replicate(1, n_min) + (((Beta.transpose() * ker_de_tr2) .cwiseQuotient(tmp)).replicate(n_basis, 1).cwiseProduct(B_de))) * n_nu; B1 = B*ker_nu_tr2 + B_de.cwiseProduct(((((ker_nu_tr2.cwiseProduct( B_de)).colwise().sum()).cwiseQuotient(tmp)).replicate(n_basis,1))); A = ((B0-B1) * (n_de-1)/(n_de*(n_nu-1))).cwiseMax(MatrixXd::Zero(n_basis ,n_min)); r_de = (ker_de_tr2.cwiseProduct(A)).colwise().sum(); r_nu = (ker_nu_tr2.cwiseProduct(A)).colwise().sum(); CV_score(width_run, panelty_run) = (r_de.array().pow(2)/n_de).sum()/2 - (r_nu / n_nu).sum(); } } int loc_w, loc_p; MatrixXd min_cv_score; CV_score.minCoeff(&loc_p, &loc_w); MatrixXd O1(1,1), O2(1,1); O1(0,0) = width_candidates(0,loc_w); O2(0,0) = panelty_candidates(0,loc_p); plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL); MexMat Output1 ( mxGetPr(plhs[0]), 1, 1 ); MexMat Output2 ( mxGetPr(plhs[1]), 1, 1 ); Output1 = O1; Output2 = O2; } 我在猜想是不是每次取代這麼多東西造成記憶體不斷再複製,而拖慢 希望有人可以幫我找到我哪裏寫得不好導致matlab比較快。 最後補上我的系統資訊 file (include test.m): http://myweb.ncku.edu.tw/~r26014014/uLSIF.rar Platform OS: windows 7 SP1 64bit Matlab version: 2013b compiler: VS 2012 compile command: mex -v -largeArrayDims -IC:\Eigen mex_file.cpp COMPFLAGS="/Ox $COMPFLAGS" 另外,我有google到這篇:http://tinyurl.com/mzhajo6 最後,如果解決問題需要我全部的程式碼,可以寄站內信。 -- ※ 發信站: 批踢踢實業坊(ptt.cc) ◆ From: 140.116.152.221

12/22 04:45, , 1F
optimization flag 傳給 compiler 了嗎?
12/22 04:45, 1F
我google之後,加上/Ox之後,還是要0.6678 sec

12/22 04:47, , 2F
看起來像在做 least squares, K1 不用做出來吧
12/22 04:47, 2F

12/22 04:51, , 3F
12/22 04:51, 3F
我的case更加複雜,K1還會用在其他地方.... 此外這裡有L2 panelty term

12/23 08:07, , 4F
L2 penalty term 也不用乘出來
12/23 08:07, 4F

12/23 08:13, , 5F
大概是 K2 下補 0, K1 下加上diagonal sqrt penalty
12/23 08:13, 5F
大大可以給我一點實際例子嗎? 因為我run出來不是LS... 我自己去run了一個test A is 3 by 3 matrix. b is 3 by 1 matrix A.colPivHouseholderQr.solve(b) is equal to A.lu().solve(b) not the inv(A'A)*b test code: #include <iostream> #include <Eigen/Dense> #include <Eigen/Core> using namespace Eigen; using namespace std; typedef Map<MatrixXd> MexMat; int main() { double mat[9] = {-1.0,-2.0,-3.0,4.0, 5.5, 6.5, -7.7, -8.0, 11.0}; double mat2[3] = {1.0,1.0,3.0}; MexMat EiMat (mat, 3, 3); MexMat EiMat2 (mat2, 3, 1); cout << EiMat.colPivHouseholderQr().solve(EiMat2) << endl; cout << EiMat.lu().solve(EiMat2) << endl; } Result: 1.11111 0.765432 0.123457 1.11111 0.765432 0.123457 ※ 編輯: celestialgod 來自: 36.238.92.7 (12/24 01:59)

12/24 11:47, , 6F
不是 inv(A'A)A'b ?
12/24 11:47, 6F
文章代碼(AID): #1IjRpkxF (MATLAB)
文章代碼(AID): #1IjRpkxF (MATLAB)