[問題] cuda C, error : unsupported operation

看板C_and_CPP (C/C++)作者 (iHakka)時間13年前 (2013/03/23 21:35), 編輯推噓0(000)
留言0則, 0人參與, 最新討論串1/2 (看更多)
開發平台(Platform): (Ex: VC++, GCC, Linux, ...) VC+ 2010 cuda5.0 額外使用到的函數庫(Library Used): (Ex: OpenGL, ...) #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <sm_20_atomic_functions.h> 問題(Question): 我要把jocobi迭代法轉到cuda上運行,但是算出來的結果跟預期的不一樣 另外因為在cuda上的atomicAdd操作不知道為什麼無法執行所以無法計算誤差 但是我就給他用for跌代很多次看看結果怎樣,然後就錯了... 餵入的資料(Input): rho是二維陣列 rho[N/2*N+ N/2]=10/(h*h); 預期的正確結果(Expected Output): 錯誤結果(Wrong Output): http://goo.gl/Eeru3 左圖是正確的結果右圖是錯誤的結果 程式碼(Code):(請善用置底文網頁, 記得排版) #include <stdio.h> #include <iostream> #include <fstream> #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <sm_20_atomic_functions.h> using namespace std; #define L 64 #define N (L+2) #define h (1 / float(L + 1)) #define accuracy (1e-6) __global__ void kernel(float* V,float* VNew,float* rho) { int step = 0; int n = 0; double error =0; int i = blockIdx.x; int j = blockIdx.y; for(int t=0;t<5000;t=t+1) { if(i > 0 && i < N-1 && j > 0 && j < N-1 ) VNew[i+j*N]=0.25*(V[i+j*N+1]+V[i+j*N-1]+ V[i+j*N+N]+V[i+j*N-N]+h*h*rho[i+j*N]); //error=err(V,VNew); //if (error < accuracy) //break; if(i > 0 && i < N-1 && j > 0 && j < N-1 ) V[i+j*N]=VNew[i+j*N]; } } int main() { float *V,*rho; float *dev_V,*dev_VNew,*dev_rho; cudaHostAlloc((void**)&V,N*N*sizeof(float),cudaHostAllocDefault); cudaHostAlloc((void**)&rho,N*N*sizeof(float),cudaHostAllocDefault); cudaMalloc((void**)&dev_V,N*N*sizeof(float)); cudaMalloc((void**)&dev_VNew,N*N*sizeof(float)); cudaMalloc((void**)&dev_rho,N*N*sizeof(float)); //初始化 for(int i=0;i<N;i=i+1) { for(int j=0;j<N;j=j+1) { V[i+j*N]=0; rho[i+j*N]=0; } } rho[N/2*N+ N/2]=10/(h*h); cudaMemcpy(dev_rho,rho,N*N*sizeof(float),cudaMemcpyHostToDevice); cudaMemcpy(dev_V,V,N*N*sizeof(float),cudaMemcpyHostToDevice); dim3 grid(N,N); kernel<<<grid,1>>>(dev_V,dev_VNew,dev_rho); cudaMemcpy(V,dev_V,N*N*sizeof(float),cudaMemcpyDeviceToHost); // write potential to file cout << " Potential in file poisson.data" << endl; ofstream dataFile("AGPUpoisson.data"); for (int i = 0; i < L + 2; i++) { for (int j = 0; j < L + 2; j++) { dataFile << V[i +j*N] << "\t"; } dataFile << '\n'; } dataFile.close(); cudaFree(dev_V); cudaFree(dev_VNew); cudaFree(rho); cudaFreeHost(V); cudaFreeHost(rho); } 補充說明(Supplement): 我明明宣告sm_20_atomic的標頭但是他編譯的時候卻抓sm_11說不支援不讓我使用 c:\program files\nvidia gpu computing toolkit\cuda\v5.0\ include\sm_11_atomic_functions.h(98) error : unsupported operation 1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\ CUDA 5.0.targets(592,9): error MSB3721: 命令 ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\bin\nvcc.exe" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env --cl-version 2010 -ccbin "C:\Program Files (x86)\ Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\ NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -I"C:\Program Files\ NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" --keep-dir "Release" -maxrregcount=0 --machine 32 --compile -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o "Release\kernel.cu.obj" "C:\Users\iHakka\Desktop\cuda\cuda\kernel.cu"" 已結束,代碼為 1。 1> -- ※ 發信站: 批踢踢實業坊(ptt.cc) ◆ From: 114.34.230.1
文章代碼(AID): #1HJQ-IOG (C_and_CPP)
文章代碼(AID): #1HJQ-IOG (C_and_CPP)