[ITK] [ITK-users] Writing from an external buffer to VectorImage

Mon Jul 3 07:22:49 EDT 2017

Hello,

I want to provide a solution to the question I had. In the case that someone else need an code example.
Just to give an idea how the implementation can look like.
The main reads an image and passes this image to the function that calls the kernel.
After the kernel is done the result is copied to the new vector image [1].

int main(int argc, char* argv[])
{

       createImage();
       auto nrrdIO = NRRDIOType::New();
       auto reader = ReaderType::New();
       reader->SetFileName("newImage.nrrd");
       reader->SetImageIO(nrrdIO);

       try
       {
              reader->Update();
       }
       catch (itk::ExceptionObject e)
       {
              std::cerr << "Exception caught!\n";
              std::cerr << e << std::endl;
       }

       auto image = reader->GetOutput();

       auto size = image->GetLargestPossibleRegion().GetSize();
       int sz[3] = {size[0],size[1],size[2]};
       auto totalVoxel = sz[0] * sz[1] * sz[2];

       auto cuda = CudaTest();
       auto out = new float4[totalVoxel];
       cuda.runKernel(out, image->GetBufferPointer(), sz);

       auto vecImage = VectorImageType::New();
       VectorImageType::IndexType index;
       index[0] = 0;
       index[1] = 0;
       index[2] = 0;
       VectorImageType::RegionType region(index,size);
       vecImage->SetRegions(region);
       vecImage->SetVectorLength(3);
       vecImage->Allocate();

       // copy image buffer to vecImage, component by component
       auto vecBuffer = vecImage->GetBufferPointer();
       auto j = 0;
       for (auto i = 0; i < totalVoxel; ++i)
       {
              vecBuffer[j] = out[i].x; j++;
              vecBuffer[j] = out[i].y; j++;
              vecBuffer[j] = out[i].z; j++;
       }

       auto writer = WriterType::New();
       writer->SetInput(vecImage);
       writer->SetFileName("out.nrrd");
       writer->SetImageIO(nrrdIO);
       try
       {
              writer->Update();
       }
       catch (itk::ExceptionObject e)
       {
              std::cerr << "Exception caught!\n";
              std::cerr << e << std::endl;
       }

       delete[] out;

}

The function that starts the kernel looks like this:

void CudaTest::runKernel(float4* out, float* data, int* size)
{
       auto totalVoxel = size[0] * size[1] * size[2];
       // copy image to texture memory
       auto channelDesc = cudaCreateChannelDesc<float>();
       cudaArray *cuArray;
       auto extent = make_cudaExtent(size[0], size[1], size[2]);
       gpuErrChk(cudaMalloc3DArray(&cuArray, &channelDesc, extent));
       cudaMemcpy3DParms copyparms = { 0 };
       copyparms.extent = extent;
       copyparms.dstArray = cuArray;
       copyparms.kind = cudaMemcpyHostToDevice;
       copyparms.srcPtr = make_cudaPitchedPtr((void*)data, size[0] * sizeof(float), size[0], size[1]);
       gpuErrChk(cudaMemcpy3D(&copyparms));

       // set texture configuration and bind array to texture
       tex_volume.addressMode[0] = cudaAddressModeClamp;
       tex_volume.addressMode[1] = cudaAddressModeClamp;
       tex_volume.addressMode[2] = cudaAddressModeClamp;
       tex_volume.filterMode = cudaFilterModeLinear;
       tex_volume.normalized = false;
       gpuErrChk(cudaBindTextureToArray(tex_volume, cuArray, channelDesc));

       // allocate memory for gradient image data on the device
       float4 *dev_gradientImage;
       gpuErrChk(cudaMalloc(&dev_gradientImage, totalVoxel * sizeof(float4)));

       // copy image dimension to device
       int* dev_dimension;
       gpuErrChk(cudaMalloc(&dev_dimension, 3 * sizeof(int)));
       gpuErrChk(cudaMemcpy(dev_dimension, size, 3 * sizeof(int), cudaMemcpyHostToDevice));

       // start computation for the gradient image
       dim3 blockDim(16, 8, 8); //<- threads in block, change according to used GPU, max 1024 threads in a single block
       dim3 grid((size[0] / blockDim.x) + 1, (size[1] / blockDim.y) + 1, (size[2] / blockDim.z) + 1);

       computeGradientImage << <grid, blockDim >> >(dev_gradientImage, dev_dimension);
       gpuErrChk(cudaGetLastError());
       gpuErrChk(cudaDeviceSynchronize());

       // unbind texture
       gpuErrChk(cudaUnbindTexture(tex_volume));
       gpuErrChk(cudaFreeArray(cuArray));

       gpuErrChk(cudaMemcpy(out, dev_gradientImage, totalVoxel * sizeof(float4), cudaMemcpyDeviceToHost));
       printf("Kernel is done.\n");
}

The kernel used in this example.

__global__ void computeGradientImage(float4* gradientImage, int* dimension)
{
       // every thread computes the float4 voxel with theta,phi,magnitude from gradient image
       int idx = blockIdx.x * blockDim.x + threadIdx.x;
       int idy = blockIdx.y * blockDim.y + threadIdx.y;
       int idz = blockIdx.z * blockDim.z + threadIdx.z;

       if (idx < dimension[0] && idy < dimension[1] && idz < dimension[2])
       {
              // define sobel filter for each direction
              float x_000 = tex3D(tex_volume, idx - 1, idy - 1, idz - 1);
              float x_001 = tex3D(tex_volume, idx - 1, idy - 1, idz    );
              float x_002 = tex3D(tex_volume, idx - 1, idy - 1, idz + 1);
              float x_010 = tex3D(tex_volume, idx - 1, idy,     idz - 1);
              float x_011 = tex3D(tex_volume, idx - 1, idy,     idz    );
              float x_012 = tex3D(tex_volume, idx - 1, idy,     idz + 1);
              float x_020 = tex3D(tex_volume, idx - 1, idy + 1, idz - 1);
              float x_021 = tex3D(tex_volume, idx - 1, idy + 1, idz    );
              float x_022 = tex3D(tex_volume, idx - 1, idy + 1, idz + 1);

              float x_100 = tex3D(tex_volume, idx,     idy - 1, idz - 1);
              float x_101 = tex3D(tex_volume, idx,     idy - 1, idz    );
              float x_102 = tex3D(tex_volume, idx,     idy - 1, idz + 1);
              float x_110 = tex3D(tex_volume, idx,     idy,     idz - 1);
//            float x_111 = tex3D(tex_volume, idx,     idy,     idz    );
              float x_112 = tex3D(tex_volume, idx,     idy,     idz + 1);
              float x_120 = tex3D(tex_volume, idx,     idy + 1, idz - 1);
              float x_121 = tex3D(tex_volume, idx,     idy + 1, idz    );
              float x_122 = tex3D(tex_volume, idx,     idy + 1, idz + 1);

              float x_200 = tex3D(tex_volume, idx + 1, idy - 1, idz - 1);
              float x_201 = tex3D(tex_volume, idx + 1, idy - 1, idz    );
              float x_202 = tex3D(tex_volume, idx + 1, idy - 1, idz + 1);
              float x_210 = tex3D(tex_volume, idx + 1, idy,     idz - 1);
              float x_211 = tex3D(tex_volume, idx + 1, idy,     idz    );
              float x_212 = tex3D(tex_volume, idx + 1, idy,     idz + 1);
              float x_220 = tex3D(tex_volume, idx + 1, idy + 1, idz - 1);
              float x_221 = tex3D(tex_volume, idx + 1, idy + 1, idz    );
              float x_222 = tex3D(tex_volume, idx + 1, idy + 1, idz + 1);

              // derivative in x direction
              auto centerX = - 1.f*x_000 - 3.f*x_010 - 1.f*x_020
                             - 3.f*x_001 - 6.f*x_011 - 3.f*x_021
                             - 1.f*x_002 - 3.f*x_012 - 1.f*x_022
                             + 1.f*x_200 + 3.f*x_210 + 1.f*x_220
                             + 3.f*x_201 + 6.f*x_211 + 3.f*x_221
                             + 1.f*x_202 + 3.f*x_212 + 1.f*x_222;

              // derivative in y direction
              auto centerY = - 1.f*x_000 - 3.f*x_100 - 1.f*x_200
                             - 3.f*x_001 - 6.f*x_101 - 3.f*x_201
                             - 1.f*x_002 - 3.f*x_102 - 1.f*x_202
                             + 1.f*x_020 + 3.f*x_120 + 1.f*x_220
                             + 3.f*x_021 + 6.f*x_121 + 3.f*x_221
                             + 1.f*x_022 + 3.f*x_122 + 1.f*x_222;

              // derivative in z direction
              auto centerZ = - 1.f*x_000 - 3.f*x_100 - 1.f*x_200
                             - 3.f*x_010 - 6.f*x_110 - 3.f*x_210
                             - 1.f*x_020 - 3.f*x_120 - 1.f*x_220
                             + 1.f*x_002 + 3.f*x_102 + 1.f*x_202
                             + 3.f*x_012 + 6.f*x_112 + 3.f*x_212
                             + 1.f*x_022 + 3.f*x_122 + 1.f*x_222;

              // magnitude of each voxels gradient
              auto magn = sqrtf(centerX*centerX + centerY*centerY + centerZ*centerZ);

              gradientImage[idx + dimension[0] * (idy + idz * dimension[1])] = make_float4(centerX, centerY, centerZ, magn);
       }
}

with kind regards,
Gordian

[1]: https://itk.org/Doxygen/html/classitk_1_1VectorImage.html

Von: Dženan Zukić [mailto:dzenanz at gmail.com]
Gesendet: Mittwoch, 28. Juni 2017 17:17
An: Kabelitz, Gordian
Cc: insight-users at itk.org
Betreff: Re: [ITK-users] Writing from an external buffer to VectorImage

Hi Gordian,

this approach looks like it should work. What is wrong with it?

Regards,
Dženan Zukić, PhD, Senior R&D Engineer, Kitware (Carrboro, N.C.)

On Wed, Jun 28, 2017 at 9:51 AM, Kabelitz, Gordian <Gordian.Kabelitz at medma.uni-heidelberg.de<mailto:Gordian.Kabelitz at medma.uni-heidelberg.de>> wrote:
Hello,

i computed a gradient with my own function and as a result a pointer to an image buffer is provided. I know the size, origin and spacing of the gradient component image.
I want to copy the gradient image into an itk::VectorImage with the components for the x,y,z gradients.

The way I copied the image to the GPU is that I retrieved the buffer pointer from my input image and use the pointer to copy the image data to the GPU.
I used the way proposed in [1]. The computeGradientImage method is listed at the end of this mail.

[...]
// get float pointer to image data
ImageType::Pointer image = reader->GetOutput();
Image->Update();

float* data = image.GetBufferPointer();
// copy image data to GPU texture memory (this works)
gpu_dev->setVoxels(dimension, voxelSize, data);
[...]
computeGradientImage<<<n,m>>> (dev_gradientImage, dimension);

// copy resulting gradientImage to host variable
float4* host_gradientImage;
cudaMemcpy(host_gradient, dev_gradientImage, numberOfVoxels*sizeof(float4));

--> Pseudo Code <--
// Now I want to reverse the copy process. I have a float4 image and want to copy this into a itk::VectorImage with VariableVectorLength of 3 (skipping the magnitude value).
[...] -> size, spacing, origin, region definition
Itk::VectorImageType vecImage = VectorImageType::New();
vecImage->setRegion(region);
vecImage ->SetVectorLength(3);
vecImage->Allocate();

// copy image buffer to vecImage, component by component
auto vecBuffer = vecImage->getBufferPointer();
auto j = 0;
for (i=0; i<totalVoxel; ++i)
{
        vecbuffer[j] = host_gradient[i].x; j++;
        vecbuffer[j] = host_gradient[i].y; j++;
        vecbuffer[j] = host_gradient[i].z; j++;
}

// save vecImage as nrrd image
[...]

I haven't found a way to achieve my idea.
Are there any suggestions or examples?
As far I can see I cannot use the itk::ImportImageFilter.

Thank you for any suggestions.
With kind regards,
Gordian

[1]: https://itk.org/CourseWare/Training/GettingStarted-V.pdf

void computeGradientImage(float4* gradientImage, int* dimension)
{
        // every thread computes the float4 voxel with theta,phi,magnitude from gradient image
        int idx = blockIdx.x * blockDim.x + threadIdx.x;
        int idy = blockIdx.y * blockDim.y + threadIdx.y;
        int idz = blockIdx.z * blockDim.z + threadIdx.z;

        if (idx < dimension[0] && idy < dimension[1] && idz < dimension[2])
        {
                // define sobel filter for each direction
                [...]

                // run sobel on image in texture memory for each direction and put result into a float4 image
                gradientImage[idx + dimension[0] * (idy + idz * dimension[1])] = make_float4(sobelX, sobelY, sobelZ, magn);
        }
}

_____________________________________
Powered by www.kitware.com<http://www.kitware.com>

Visit other Kitware open-source projects at
http://www.kitware.com/opensource/opensource.html

Kitware offers ITK Training Courses, for more information visit:
http://www.kitware.com/products/protraining.php

Please keep messages on-topic and check the ITK FAQ at:
http://www.itk.org/Wiki/ITK_FAQ

Follow this link to subscribe/unsubscribe:
http://public.kitware.com/mailman/listinfo/insight-users

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://public.kitware.com/pipermail/community/attachments/20170703/12148ad3/attachment-0001.html>
-------------- next part --------------
_____________________________________
Powered by www.kitware.com

Visit other Kitware open-source projects at
http://www.kitware.com/opensource/opensource.html

Kitware offers ITK Training Courses, for more information visit:
http://www.kitware.com/products/protraining.php

Please keep messages on-topic and check the ITK FAQ at:
http://www.itk.org/Wiki/ITK_FAQ

Follow this link to subscribe/unsubscribe:
http://public.kitware.com/mailman/listinfo/insight-users