[問題] opencv的cuda +cuda 核函數的問題

看板C_and_CPP (C/C++)作者 (su27)時間1年前 (2023/08/17 11:52), 編輯推噓0(000)
留言0則, 0人參與, 最新討論串1/1
開發平台(Platform): (Ex: Win10, Linux, ...) Win10 編譯器(Ex: GCC, clang, VC++...)+目標環境(跟開發平台不同的話需列出) VC++ 額外使用到的函數庫(Library Used): (Ex: OpenGL, ...) opencv Cuda 問題(Question): 我想將opencv的cuda數據處理完後 在自己寫核函數去處理 結果發現10*10的數據 只有一行有 其他都是0 想問一下為什麼會這樣 我哪裡寫錯了 謝謝 餵入的資料(Input): [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 預期的正確結果(Expected Output): [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 錯誤結果(Wrong Output): 0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 程式碼(Code):(請善用置底文網頁, 記得排版,禁止使用圖檔) __global__ void test1_Kernel(char* src_image,int length) { int y = blockIdx.y * blockDim.y + threadIdx.y; int x = blockIdx.x * blockDim.x + threadIdx.x; int index = 10 * y + x; if (index != 0) { return; } for (int i = 0; i < 10; i++) { printf("\n"); for (int j = 0; j < 10; j++) { printf("%d ,", (int)src_image[i*10+j]); } } } void test1_withCuda() { cv::Mat src_image = Mat::zeros(Size(10,10), CV_8UC1); for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) { src_image.data[i * 10 + j]=j; } } //capture >> src_image; cv::Mat dst_image; cv::cuda::GpuMat d_src_img; cv::cuda::GpuMat d_dst_img; cout << src_image << endl; d_src_img.upload(src_image); int size_temp = d_src_img.rows * d_src_img.cols; cout << size_temp << endl; test1_Kernel << <10, 10 >> > ((char*)d_src_img.data, size_temp); cudaDeviceSynchronize(); waitKey(0); } 放 補充說明(Supplement): -- ※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 114.35.132.43 (臺灣) ※ 文章網址: https://www.ptt.cc/bbs/C_and_CPP/M.1692244376.A.798.html
文章代碼(AID): #1atPcOUO (C_and_CPP)
文章代碼(AID): #1atPcOUO (C_and_CPP)