Radix sort OpenCL methods. (See Aqua::CalcServer::RadixSort for details)
More...
|
__kernel void | init (__global unsigned int *perms, unsigned int n) |
|
__kernel void | histogram (const __global unsigned int *keys, __global unsigned int *histograms, const unsigned int pass, __local unsigned int *loc_histo, const unsigned int n) |
|
__kernel void | scan (__global unsigned int *histograms, __local unsigned int *temp, __global unsigned int *globsum) |
|
__kernel void | paste (__global unsigned int *histograms, __global unsigned int *globsum) |
|
__kernel void | sort (const __global unsigned int *in_keys, __global unsigned int *out_keys, __global unsigned int *histograms, const unsigned int pass, __global unsigned int *in_permut, __global unsigned int *out_permut, __local unsigned int *loc_histo, const unsigned int n) |
|
__kernel void | inversePermutation (__global unsigned int *perms, __global unsigned int *inv_perms, unsigned int n) |
|
Radix sort OpenCL methods. (See Aqua::CalcServer::RadixSort for details)
- Note
- The header CalcServer/RadixSort.hcl.in is automatically appended.
◆ histogram()
__kernel void histogram |
( |
const __global unsigned int * |
keys, |
|
|
__global unsigned int * |
histograms, |
|
|
const unsigned int |
pass, |
|
|
__local unsigned int * |
loc_histo, |
|
|
const unsigned int |
n |
|
) |
| |
Perform the local histograms. The histograms are the number of occurrences of each radix. Since we are working in parallel, the number of ocurrences of each radix will be splited in blocks of dimension _ITEMS * _GROUPS: | it(0)gr(0)ra(0) | it(1)gr(0)ra(0) | ... | it(items)gr(0)ra(0) | | it(0)gr(1)ra(0) | it(1)gr(1)ra(0) | ... | it(items)gr(1)ra(0) | | ... | ... | ... | ... | | it(0)gr(groups)ra(0) | it(1)gr(groups)ra(0) | ... | it(items)gr(groups)ra(0) | | it(0)gr(0)ra(1) | it(1)gr(0)ra(1) | ... | it(items)gr(0)ra(1) | | ... | ... | ... | ... | | it(0)gr(groups)ra(1) | it(1)gr(groups)ra(1) | ... | it(items)gr(groups)ra(1) | | ... | ... | ... | ... | | it(0)gr(groups)ra(radix) | it(1)gr(groups)ra(radix) | ... | it(items)gr(groups)ra(radix) | where it is the thread, gr is the group, and ra is the radix.
- Parameters
-
keys | Input unsorted keys allocated into the device. |
histograms | Computed histograms. |
pass | Pass of the radix decomposition. |
loc_histo | Histograms local memory to speed up the process. |
n | Number of keys. |
◆ init()
__kernel void init |
( |
__global unsigned int * |
perms, |
|
|
unsigned int |
n |
|
) |
| |
Initializes the permutations assuming that the keys are sorted.
- Parameters
-
perms | Initial permutations (null) |
n | Number of keys. |
◆ inversePermutation()
__kernel void inversePermutation |
( |
__global unsigned int * |
perms, |
|
|
__global unsigned int * |
inv_perms, |
|
|
unsigned int |
n |
|
) |
| |
Compute the inversed permutations, which allows to know the original position of a key from the sorted one.
- Parameters
-
perms | Direct permutations (from the unsorted position to the sorted one) |
inv_perms | Inverse permutations (from the sorted position to the unsorted one) |
n | Number of keys. |
◆ paste()
__kernel void paste |
( |
__global unsigned int * |
histograms, |
|
|
__global unsigned int * |
globsum |
|
) |
| |
Paste the _HISTOSPLIT accumulated global sums into the splited accumulated histogram, to get a global accumulated histogram.
- Parameters
-
histograms | Accumulated histogram. At the start of the method the first value of each split of the histogram is zero. When the method ends the first value of each split will be the accumulated number of keys in all the previous splits. |
globsum | Accumulated global sums (_HISTOSPLIT components). |
◆ scan()
__kernel void scan |
( |
__global unsigned int * |
histograms, |
|
|
__local unsigned int * |
temp, |
|
|
__global unsigned int * |
globsum |
|
) |
| |
perform a parallel prefix sum (a scan) on the local histograms (see Blelloch 1990), retrieving the accumulated histogram. Each workitem cares about two memories. See also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html
- Parameters
-
histograms | Histograms, or number of ocurrences of each radix, divided by blocks, as shown in histogram() method. |
temp | Local memory used to speed up the process. |
globsum | Total number of keys at each group (output). |
- Note
- The histogram histograms will be transformed in the accumulated histogram.
◆ sort()
__kernel void sort |
( |
const __global unsigned int * |
in_keys, |
|
|
__global unsigned int * |
out_keys, |
|
|
__global unsigned int * |
histograms, |
|
|
const unsigned int |
pass, |
|
|
__global unsigned int * |
in_permut, |
|
|
__global unsigned int * |
out_permut, |
|
|
__local unsigned int * |
loc_histo, |
|
|
const unsigned int |
n |
|
) |
| |
Perform permutations using the accumulated histogram.
- Parameters
-
in_keys | Input unsorted keys. |
out_keys | Output sorted keys. |
histograms | Accumulated histograms. |
pass | Pass of the radix decomposition. |
in_permut | Input permutations. |
out_permut | Output permutations. |
loc_histo | Histogram local memory to speed up the process. |
n | Number of keys. |
- Warning
- Radix sort needs several pass, so the output sorted keys of this pass must be the input unsorted keys of the next pass. Don't forgive to swap the OpenCL identifiers (for keys and permutations).