Point Cloud Library (PCL)
1.15.1
Toggle main menu visibility
Loading...
Searching...
No Matches
utils
include
pcl
gpu
utils
device
warp.hpp
1
/*
2
* Software License Agreement (BSD License)
3
*
4
* Copyright (c) 2011, Willow Garage, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
*
11
* * Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* * Redistributions in binary form must reproduce the above
14
* copyright notice, this list of conditions and the following
15
* disclaimer in the documentation and/or other materials provided
16
* with the distribution.
17
* * Neither the name of Willow Garage, Inc. nor the names of its
18
* contributors may be used to endorse or promote products derived
19
* from this software without specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
* POSSIBILITY OF SUCH DAMAGE.
33
*
34
* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35
*/
36
37
#ifndef PCL_DEVICE_UTILS_WARP_HPP_
38
#define PCL_DEVICE_UTILS_WARP_HPP_
39
40
namespace
pcl
41
{
42
namespace
device
43
{
44
struct
Warp
45
{
46
enum
47
{
48
LOG_WARP_SIZE
= 5,
49
WARP_SIZE
= 1 <<
LOG_WARP_SIZE
,
50
STRIDE
=
WARP_SIZE
51
};
52
53
/** \brief Returns the warp lane ID of the calling thread. */
54
static
__device__ __forceinline__
unsigned
int
laneId
()
55
{
56
unsigned
int
ret;
57
asm
(
"mov.u32 %0, %laneid;"
:
"=r"
(ret) );
58
return
ret;
59
}
60
61
static
__device__ __forceinline__
int
laneMaskLe
()
62
{
63
unsigned
int
ret;
64
asm
(
"mov.u32 %0, %lanemask_le;"
:
"=r"
(ret) );
65
return
ret;
66
}
67
68
static
__device__ __forceinline__
int
laneMaskLt
()
69
{
70
unsigned
int
ret;
71
asm
(
"mov.u32 %0, %lanemask_lt;"
:
"=r"
(ret) );
72
return
ret;
73
}
74
static
__device__ __forceinline__
unsigned
int
id
()
75
{
76
int
tid = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
77
return
tid >>
LOG_WARP_SIZE
;
78
}
79
80
static
__device__ __forceinline__
int
binaryInclScan
(
int
ballot_mask)
81
{
82
return
__popc(
Warp::laneMaskLe
() & ballot_mask);
83
}
84
85
static
__device__ __forceinline__
int
binaryExclScan
(
int
ballot_mask)
86
{
87
return
__popc(
Warp::laneMaskLt
() & ballot_mask);
88
}
89
90
template
<
typename
It,
typename
T>
91
static
__device__ __forceinline__
void
fill
(It beg, It end,
const
T& value)
92
{
93
for
(It t = beg +
laneId
(); t < end; t +=
STRIDE
)
94
*t = value;
95
}
96
97
template
<
typename
InIt,
typename
OutIt>
98
static
__device__ __forceinline__ OutIt
copy
(InIt beg, InIt end, OutIt out)
99
{
100
unsigned
int
lane =
laneId
();
101
InIt t = beg + lane;
102
OutIt o = out + lane;
103
104
for
(; t < end; t +=
STRIDE
, o +=
STRIDE
)
105
*o = *t;
106
return
o;
107
}
108
109
template
<
typename
InIt,
typename
OutIt,
class
UnOp>
110
static
__device__ __forceinline__ OutIt
transform
(InIt beg, InIt end, OutIt out, UnOp op)
111
{
112
unsigned
int
lane =
laneId
();
113
InIt t = beg + lane;
114
OutIt o = out + lane;
115
116
for
(InIt t = beg +
laneId
(); t < end; t +=
STRIDE
, o +=
STRIDE
)
117
*o = op(*t);
118
return
o;
119
}
120
121
template
<
typename
InIt1,
typename
InIt2,
typename
OutIt,
class
BinOp>
122
static
__device__ __forceinline__ OutIt
transform
(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
123
{
124
unsigned
int
lane =
laneId
();
125
InIt1 t1 = beg1 + lane;
126
InIt2 t2 = beg2 + lane;
127
OutIt o = out + lane;
128
129
for
(; t1 < end1; t1 +=
STRIDE
, t2 +=
STRIDE
, o +=
STRIDE
)
130
*o = op(*t1, *t2);
131
return
o;
132
}
133
134
template
<
typename
OutIt,
typename
T>
135
static
__device__ __forceinline__
void
yota
(OutIt beg, OutIt end, T value)
136
{
137
unsigned
int
lane =
laneId
();
138
value += lane;
139
140
for
(OutIt t = beg + lane; t < end; t +=
STRIDE
, value +=
STRIDE
)
141
*t = value;
142
}
143
144
template
<
typename
T,
class
BinOp>
145
static
__device__ __forceinline__
void
reduce
(
volatile
T* buffer, BinOp op)
146
{
147
unsigned
int
lane =
laneId
();
148
T val = buffer[lane];
149
150
if
(lane < 16)
151
{
152
buffer[lane] = val = op(val, buffer[lane + 16]);
153
buffer[lane] = val = op(val, buffer[lane + 8]);
154
buffer[lane] = val = op(val, buffer[lane + 4]);
155
buffer[lane] = val = op(val, buffer[lane + 2]);
156
buffer[lane] = val = op(val, buffer[lane + 1]);
157
}
158
}
159
160
template
<
typename
T,
class
BinOp>
161
static
__device__ __forceinline__ T
reduce
(
volatile
T* buffer, T init, BinOp op)
162
{
163
unsigned
int
lane =
laneId
();
164
T val = buffer[lane] = init;
165
166
if
(lane < 16)
167
{
168
buffer[lane] = val = op(val, buffer[lane + 16]);
169
buffer[lane] = val = op(val, buffer[lane + 8]);
170
buffer[lane] = val = op(val, buffer[lane + 4]);
171
buffer[lane] = val = op(val, buffer[lane + 2]);
172
buffer[lane] = val = op(val, buffer[lane + 1]);
173
}
174
return
buffer[0];
175
}
176
};
177
}
178
}
179
180
#endif
/* PCL_DEVICE_UTILS_WARP_HPP_ */
pcl::device
Definition
device_array.h:315
pcl
Definition
convolution.h:46
pcl::device::Warp
Definition
utils.hpp:524
pcl::device::Warp::laneMaskLe
static __device__ __forceinline__ int laneMaskLe()
Definition
warp.hpp:61
pcl::device::Warp::binaryInclScan
static __device__ __forceinline__ int binaryInclScan(int ballot_mask)
Definition
warp.hpp:80
pcl::device::Warp::reduce
static __device__ __forceinline__ void reduce(volatile T *buffer, BinOp op)
Definition
warp.hpp:145
pcl::device::Warp::STRIDE
@ STRIDE
Definition
utils.hpp:529
pcl::device::Warp::LOG_WARP_SIZE
@ LOG_WARP_SIZE
Definition
utils.hpp:527
pcl::device::Warp::WARP_SIZE
@ WARP_SIZE
Definition
utils.hpp:528
pcl::device::Warp::laneId
static __device__ __forceinline__ unsigned int laneId()
Returns the warp lane ID of the calling thread.
Definition
warp.hpp:54
pcl::device::Warp::transform
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
Definition
warp.hpp:122
pcl::device::Warp::binaryExclScan
static __device__ __forceinline__ int binaryExclScan(int ballot_mask)
Definition
warp.hpp:85
pcl::device::Warp::laneMaskLt
static __device__ __forceinline__ int laneMaskLt()
Definition
warp.hpp:68
pcl::device::Warp::copy
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
Definition
warp.hpp:98
pcl::device::Warp::fill
static __device__ __forceinline__ void fill(It beg, It end, const T &value)
Definition
warp.hpp:91
pcl::device::Warp::reduce
static __device__ __forceinline__ T reduce(volatile T *buffer, T init, BinOp op)
Definition
warp.hpp:161
pcl::device::Warp::id
static __device__ __forceinline__ unsigned int id()
Definition
warp.hpp:74
pcl::device::Warp::yota
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
Definition
warp.hpp:135
pcl::device::Warp::transform
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
Definition
warp.hpp:110