[問題] cuda C, error : unsupported operation
開發平台(Platform): (Ex: VC++, GCC, Linux, ...)
VC+ 2010 cuda5.0
額外使用到的函數庫(Library Used): (Ex: OpenGL, ...)
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <sm_20_atomic_functions.h>
問題(Question):
我要把jocobi迭代法轉到cuda上運行,但是算出來的結果跟預期的不一樣
另外因為在cuda上的atomicAdd操作不知道為什麼無法執行所以無法計算誤差
但是我就給他用for跌代很多次看看結果怎樣,然後就錯了...
餵入的資料(Input):
rho是二維陣列
rho[N/2*N+ N/2]=10/(h*h);
預期的正確結果(Expected Output):
錯誤結果(Wrong Output):
http://goo.gl/Eeru3
左圖是正確的結果右圖是錯誤的結果
程式碼(Code):(請善用置底文網頁, 記得排版)
#include <stdio.h>
#include <iostream>
#include <fstream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <sm_20_atomic_functions.h>
using namespace std;
#define L 64
#define N (L+2)
#define h (1 / float(L + 1))
#define accuracy (1e-6)
__global__ void kernel(float* V,float* VNew,float* rho) {
int step = 0;
int n = 0;
double error =0;
int i = blockIdx.x;
int j = blockIdx.y;
for(int t=0;t<5000;t=t+1)
{
if(i > 0 && i < N-1 && j > 0 && j < N-1 )
VNew[i+j*N]=0.25*(V[i+j*N+1]+V[i+j*N-1]+
V[i+j*N+N]+V[i+j*N-N]+h*h*rho[i+j*N]);
//error=err(V,VNew);
//if (error < accuracy)
//break;
if(i > 0 && i < N-1 && j > 0 && j < N-1 )
V[i+j*N]=VNew[i+j*N];
}
}
int main()
{
float *V,*rho;
float *dev_V,*dev_VNew,*dev_rho;
cudaHostAlloc((void**)&V,N*N*sizeof(float),cudaHostAllocDefault);
cudaHostAlloc((void**)&rho,N*N*sizeof(float),cudaHostAllocDefault);
cudaMalloc((void**)&dev_V,N*N*sizeof(float));
cudaMalloc((void**)&dev_VNew,N*N*sizeof(float));
cudaMalloc((void**)&dev_rho,N*N*sizeof(float));
//初始化
for(int i=0;i<N;i=i+1)
{
for(int j=0;j<N;j=j+1)
{
V[i+j*N]=0;
rho[i+j*N]=0;
}
}
rho[N/2*N+ N/2]=10/(h*h);
cudaMemcpy(dev_rho,rho,N*N*sizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy(dev_V,V,N*N*sizeof(float),cudaMemcpyHostToDevice);
dim3 grid(N,N);
kernel<<<grid,1>>>(dev_V,dev_VNew,dev_rho);
cudaMemcpy(V,dev_V,N*N*sizeof(float),cudaMemcpyDeviceToHost);
// write potential to file
cout << " Potential in file poisson.data" << endl;
ofstream dataFile("AGPUpoisson.data");
for (int i = 0; i < L + 2; i++) {
for (int j = 0; j < L + 2; j++) {
dataFile << V[i +j*N] << "\t";
}
dataFile << '\n';
}
dataFile.close();
cudaFree(dev_V);
cudaFree(dev_VNew);
cudaFree(rho);
cudaFreeHost(V);
cudaFreeHost(rho);
}
補充說明(Supplement):
我明明宣告sm_20_atomic的標頭但是他編譯的時候卻抓sm_11說不支援不讓我使用
c:\program files\nvidia gpu computing toolkit\cuda\v5.0\
include\sm_11_atomic_functions.h(98) error : unsupported operation
1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\
CUDA 5.0.targets(592,9): error MSB3721:
命令 ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\bin\nvcc.exe"
-gencode=arch=compute_20,code=\"sm_20,compute_20\" --use-local-env
--cl-version 2010 -ccbin "C:\Program Files (x86)\
Microsoft Visual Studio 10.0\VC\bin" -I"C:\Program Files\
NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -I"C:\Program Files\
NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" --keep-dir "Release"
-maxrregcount=0 --machine 32 --compile
-D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o "Release\kernel.cu.obj" "C:\Users\iHakka\Desktop\cuda\cuda\kernel.cu"" 已結束,代碼為 1。
1>
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 114.34.230.1
討論串 (同標題文章)
以下文章回應了本文:
完整討論串 (本文為第 1 之 2 篇):
C_and_CPP 近期熱門文章
PTT數位生活區 即時熱門文章