Point Cloud Library (PCL)
1.15.1
Toggle main menu visibility
Loading...
Searching...
No Matches
octree
src
utils
scan_block.hpp
1
/*
2
* Software License Agreement (BSD License)
3
*
4
* Copyright (c) 2011, Willow Garage, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
*
11
* * Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* * Redistributions in binary form must reproduce the above
14
* copyright notice, this list of conditions and the following
15
* disclaimer in the documentation and/or other materials provided
16
* with the distribution.
17
* * Neither the name of Willow Garage, Inc. nor the names of its
18
* contributors may be used to endorse or promote products derived
19
* from this software without specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32
* POSSIBILITY OF SUCH DAMAGE.
33
*
34
* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35
*/
36
37
#ifndef PCL_GPU_OCTREE_SCAN_BLOCK_HPP
38
#define PCL_GPU_OCTREE_SCAN_BLOCK_HPP
39
40
41
namespace
pcl
42
{
43
namespace
device
44
{
45
enum
ScanKind
{
exclusive
,
inclusive
} ;
46
47
template
<ScanKind Kind ,
class
T>
48
__device__ __forceinline__ T
scan_warp
(
volatile
T *ptr ,
const
unsigned
int
idx = threadIdx.x )
49
{
50
const
unsigned
int
lane = idx & 31;
// index of thread in warp (0..31)
51
52
if
( lane >= 1) ptr [idx ] = ptr [idx - 1] + ptr [idx];
53
if
( lane >= 2) ptr [idx ] = ptr [idx - 2] + ptr [idx];
54
if
( lane >= 4) ptr [idx ] = ptr [idx - 4] + ptr [idx];
55
if
( lane >= 8) ptr [idx ] = ptr [idx - 8] + ptr [idx];
56
if
( lane >= 16) ptr [idx ] = ptr [idx - 16] + ptr [idx];
57
58
if
( Kind ==
inclusive
)
59
return
ptr [idx ];
60
else
61
return
(lane > 0) ? ptr [idx - 1] : 0;
62
}
63
64
template
<ScanKind Kind ,
class
T>
65
__device__ __forceinline__ T
scan_block
(
volatile
T *ptr ,
const
unsigned
int
idx = threadIdx.x )
66
{
67
const
unsigned
int
lane = idx & 31;
68
const
unsigned
int
warpid = idx >> 5;
69
70
// Step 1: Intra - warp scan in each warp
71
T val =
scan_warp <Kind>
( ptr , idx );
72
73
__syncthreads ();
74
75
// Step 2: Collect per - warp partial results
76
77
/* if( warpid == 0 )
78
if( lane == 31 )
79
ptr [ warpid ] = ptr [idx ];
80
81
__syncthreads ();
82
83
if( warpid > 0 ) */
84
if
( lane == 31 )
85
ptr [ warpid ] = ptr [idx ];
86
87
__syncthreads ();
88
89
// Step 3: Use 1st warp to scan per - warp results
90
if
( warpid == 0 )
91
scan_warp<inclusive>
( ptr , idx );
92
93
__syncthreads ();
94
95
// Step 4: Accumulate results from Steps 1 and 3
96
if
( warpid > 0)
97
val = ptr [warpid -1] + val;
98
99
__syncthreads ();
100
101
// Step 5: Write and return the final result
102
ptr[idx] = val;
103
104
__syncthreads ();
105
106
return
val ;
107
}
108
}
109
}
110
111
#endif
/* PCL_GPU_OCTREE_SCAN_BLOCK_HPP */
pcl::device
Definition
device_array.h:315
pcl::device::scan_warp
__device__ __forceinline__ T scan_warp(volatile T *ptr, const unsigned int idx=threadIdx.x)
Definition
device.hpp:87
pcl::device::ScanKind
ScanKind
Definition
device.hpp:83
pcl::device::exclusive
@ exclusive
Definition
device.hpp:83
pcl::device::inclusive
@ inclusive
Definition
device.hpp:83
pcl::device::scan_block
__device__ __forceinline__ T scan_block(volatile T *ptr, const unsigned int idx=threadIdx.x)
Definition
scan_block.hpp:65
pcl
Definition
convolution.h:46