Point Cloud Library (PCL)
1.15.1
Toggle main menu visibility
Loading...
Searching...
No Matches
people
src
cuda_async_copy.h
1
/*
2
* Software License Agreement (BSD License)
3
*
4
* Point Cloud Library (PCL) - www.pointclouds.org
5
* Copyright (c) 2011, Willow Garage, Inc.
6
*
7
* All rights reserved.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
*
13
* * Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* * Redistributions in binary form must reproduce the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer in the documentation and/or other materials provided
18
* with the distribution.
19
* * Neither the name of Willow Garage, Inc. nor the names of its
20
* contributors may be used to endorse or promote products derived
21
* from this software without specific prior written permission.
22
*
23
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34
* POSSIBILITY OF SUCH DAMAGE.
35
*
36
* @authors: Anatoly Baksheev
37
*/
38
39
#pragma once
40
41
#include <pcl/gpu/containers/device_array.h>
42
#include <pcl/gpu/utils/safe_call.hpp>
43
44
namespace
pcl
45
{
46
namespace
gpu
47
{
48
template
<
class
T>
49
class
AsyncCopy
50
{
51
public
:
52
AsyncCopy
(T* ptr, std::size_t size) : ptr_(ptr)
53
{
54
cudaSafeCall( cudaHostRegister(ptr_, size, 0) );
55
cudaSafeCall( cudaStreamCreate(&stream_) );
56
}
57
58
AsyncCopy
(std::vector<T>& data) : ptr_(&data[0])
59
{
60
cudaSafeCall( cudaHostRegister(ptr_, data.size(), 0) );
61
cudaSafeCall( cudaStreamCreate(&stream_) );
62
}
63
64
~AsyncCopy
()
65
{
66
cudaSafeCall( cudaHostUnregister(ptr_) );
67
cudaSafeCall( cudaStreamDestroy(stream_) );
68
}
69
70
void
download
(
const
DeviceArray<T>
& arr)
71
{
72
cudaSafeCall( cudaMemcpyAsync(ptr_, arr.
ptr
(), arr.
sizeBytes
(), cudaMemcpyDeviceToHost, stream_) );
73
}
74
75
void
download
(
const
DeviceArray2D<T>
& arr)
76
{
77
cudaSafeCall( cudaMemcpy2DAsync(ptr_, arr.
cols
(), arr.
ptr
(), arr.
step
(), arr.
colsBytes
(), arr.
rows
(), cudaMemcpyDeviceToHost, stream_) );
78
}
79
80
void
upload
(
const
DeviceArray<T>
& arr)
const
81
{
82
cudaSafeCall( cudaMemcpyAsync(arr.
ptr
(), ptr_, arr.
size
(), cudaMemcpyHostToDevice, stream_) );
83
}
84
85
void
upload
(
const
DeviceArray2D<T>
& arr)
const
86
{
87
cudaSafeCall( cudaMemcpy2DAsync(arr.
ptr
(), arr.
step
(), ptr_, arr.
cols
(), arr.
colsBytes
(), arr.
rows
(), cudaMemcpyHostToDevice, stream_) );
88
}
89
90
void
waitForCompeltion
()
91
{
92
cudaSafeCall( cudaStreamSynchronize(stream_) );
93
}
94
private
:
95
cudaStream_t stream_;
96
T* ptr_ ;
97
};
98
}
99
100
namespace
device
101
{
102
using
pcl::gpu::AsyncCopy
;
103
}
104
}
pcl::device::AsyncCopy::AsyncCopy
AsyncCopy(T *ptr, std::size_t size)
Definition
cuda_async_copy.h:52
pcl::gpu::AsyncCopy
Definition
cuda_async_copy.h:50
pcl::gpu::AsyncCopy::AsyncCopy
AsyncCopy(T *ptr, std::size_t size)
Definition
cuda_async_copy.h:52
pcl::gpu::AsyncCopy::upload
void upload(const DeviceArray2D< T > &arr) const
Definition
cuda_async_copy.h:85
pcl::gpu::AsyncCopy::~AsyncCopy
~AsyncCopy()
Definition
cuda_async_copy.h:64
pcl::gpu::AsyncCopy::download
void download(const DeviceArray2D< T > &arr)
Definition
cuda_async_copy.h:75
pcl::gpu::AsyncCopy::download
void download(const DeviceArray< T > &arr)
Definition
cuda_async_copy.h:70
pcl::gpu::AsyncCopy::waitForCompeltion
void waitForCompeltion()
Definition
cuda_async_copy.h:90
pcl::gpu::AsyncCopy::AsyncCopy
AsyncCopy(std::vector< T > &data)
Definition
cuda_async_copy.h:58
pcl::gpu::AsyncCopy::upload
void upload(const DeviceArray< T > &arr) const
Definition
cuda_async_copy.h:80
pcl::gpu::DeviceArray2D
DeviceArray2D class
Definition
device_array.h:188
pcl::gpu::DeviceArray2D::rows
int rows() const
Returns number of rows.
Definition
device_array.hpp:317
pcl::gpu::DeviceArray2D::cols
int cols() const
Returns number of elements in each row.
Definition
device_array.hpp:310
pcl::gpu::DeviceArray2D::ptr
T * ptr(int y=0)
Returns pointer to given row in internal buffer.
Definition
device_array.hpp:284
pcl::gpu::DeviceArray
DeviceArray class
Definition
device_array.h:54
pcl::gpu::DeviceArray::size
std::size_t size() const
Returns size in elements.
Definition
device_array.hpp:149
pcl::gpu::DeviceArray::ptr
T * ptr()
Returns pointer for internal buffer in GPU memory.
Definition
device_array.hpp:156
pcl::gpu::DeviceMemory2D::step
std::size_t step() const
Returns stride between two consecutive rows in bytes for internal buffer.
pcl::gpu::DeviceMemory2D::colsBytes
int colsBytes() const
Returns number of bytes in each row.
pcl::gpu::DeviceMemory::sizeBytes
std::size_t sizeBytes() const
pcl::gpu
Definition
device_array.h:45
pcl
Definition
convolution.h:46