AQUAgpusph 4.1.2
|
MPI syncing point. More...
#include "resources/Scripts/types/types.h"
Functions | |
__kernel void | mask_planes (__global unsigned int *mpi_mask, const __global int *imove, const __global vec *r_in, unsigned int mpi_rank, vec mpi_planes_orig, vec mpi_planes_dist, unsigned int N) |
Compute the particles sending mask. | |
__kernel void | copy (__global vec *mpi_r, __global vec *mpi_u, __global vec *mpi_dudt, __global float *mpi_rho, __global float *mpi_drhodt, __global float *mpi_m, const __global vec *r_in, const __global vec *u_in, const __global vec *dudt_in, const __global float *rho_in, const __global float *drhodt_in, const __global float *m, unsigned int N) |
Copy the data. | |
__kernel void | restore (__global vec *r_in, __global vec *u_in, __global vec *dudt_in, __global float *rho_in, __global float *drhodt_in, __global float *m, __global int *imove, const __global unsigned int *mpi_mask, const __global vec *mpi_r, const __global vec *mpi_u, const __global vec *mpi_dudt, const __global float *mpi_rho, const __global float *mpi_drhodt, const __global float *mpi_m, unsigned int mpi_rank, unsigned int nbuffer, unsigned int N) |
Add the particles received from other processes. | |
__kernel void | sort (const __global unsigned int *mpi_mask_in, __global unsigned int *mpi_mask, const __global unit *id_sorted, unsigned int N) |
Sort the mask. | |
__kernel void | drop_planes (__global int *imove, __global vec *r, unsigned int mpi_rank, vec mpi_planes_orig, vec mpi_planes_dist, vec domain_max, unsigned int N) |
Drop particles belonging to different processes. | |
MPI syncing point.
__kernel void copy | ( | __global vec * | mpi_r, |
__global vec * | mpi_u, | ||
__global vec * | mpi_dudt, | ||
__global float * | mpi_rho, | ||
__global float * | mpi_drhodt, | ||
__global float * | mpi_m, | ||
const __global vec * | r_in, | ||
const __global vec * | u_in, | ||
const __global vec * | dudt_in, | ||
const __global float * | rho_in, | ||
const __global float * | drhodt_in, | ||
const __global float * | m, | ||
unsigned int | N | ||
) |
Copy the data.
To make the operation as asynchronous as possible, we are copying all the available data, regardless it is useful or not. That is a bit computationally less efficient, but allows to start transfering data ASAP, while we still operate to neglect the particles
It is intended that the fields copied here are the same subsequently exchanged later with the mpi-sync tool
mpi_r | Position \( \mathbf{r} \) MPI copy |
mpi_u | Velocity \( \mathbf{u} \) MPI copy |
mpi_dudt | Velocity rate of change \( \frac{d \mathbf{u}}{d t} \) MPI copy |
mpi_rho | Density \( \rho \) MPI copy |
mpi_drhodt | Density rate of change \( \frac{d \rho}{d t} \) MPI copy |
mpi_m | Mass \( m \) MPI copy |
r_in | Position \( \mathbf{r} \) |
u_in | Velocity \( \mathbf{u} \) |
dudt_in | Velocity rate of change \( \frac{d \mathbf{u}}{d t} \) |
rho_in | Density \( \rho \) |
drhodt_in | Density rate of change \( \frac{d \rho}{d t} \) |
m | Mass \( m \) |
N | Number of particles |
__kernel void drop_planes | ( | __global int * | imove, |
__global vec * | r, | ||
unsigned int | mpi_rank, | ||
vec | mpi_planes_orig, | ||
vec | mpi_planes_dist, | ||
vec | domain_max, | ||
unsigned int | N | ||
) |
Drop particles belonging to different processes.
Transfer the particles belonging to different processes, i.e. outside the current process bounding planes, to "the buffer". It should be always bear in mind that the buffer particles are not made available until next time step
imove | Moving flags
|
r | Position \( \mathbf{r} \) |
mpi_mask | Output processes mask |
mpi_rank | MPI process index |
mpi_planes_orig | Center of the first interface (between procs) plane |
mpi_planes_dist | Distance between interface planes |
domain_max | Top-left-frontal corner of the computational domain |
N | Number of particles |
__kernel void mask_planes | ( | __global unsigned int * | mpi_mask, |
const __global int * | imove, | ||
const __global vec * | r_in, | ||
unsigned int | mpi_rank, | ||
vec | mpi_planes_orig, | ||
vec | mpi_planes_dist, | ||
unsigned int | N | ||
) |
Compute the particles sending mask.
The sending mask is just an array where it is stored the process each particle will be sent for, just in case it shall be sent at all
This method assumes equally spaced interface planes
No special treatment is required at the bounding processes, since setting the mask with an invalid process is just ignoring it. However, such a situation would indicate problems. Thus, it is strongly recommended to use MPI sync with the computational domain module, in such a way mpi_planes_orig matchs the minimum computational domain point
mpi_mask | Output processes mask |
imove | Moving flags
|
r_in | Position \( \mathbf{r} \) |
mpi_rank | MPI process index |
mpi_planes_orig | Center of the first interface (between procs) plane |
mpi_planes_dist | Distance between interface planes |
N | Number of particles |
__kernel void restore | ( | __global vec * | r_in, |
__global vec * | u_in, | ||
__global vec * | dudt_in, | ||
__global float * | rho_in, | ||
__global float * | drhodt_in, | ||
__global float * | m, | ||
__global int * | imove, | ||
const __global unsigned int * | mpi_mask, | ||
const __global vec * | mpi_r, | ||
const __global vec * | mpi_u, | ||
const __global vec * | mpi_dudt, | ||
const __global float * | mpi_rho, | ||
const __global float * | mpi_drhodt, | ||
const __global float * | mpi_m, | ||
unsigned int | mpi_rank, | ||
unsigned int | nbuffer, | ||
unsigned int | N | ||
) |
Add the particles received from other processes.
The restored particles has always imove=0 flag
imove | Moving flags
|
r_in | Position \( \mathbf{r} \) |
u_in | Velocity \( \mathbf{u} \) |
dudt_in | Velocity rate of change \( \frac{d \mathbf{u}}{d t} \) |
rho_in | Density \( \rho \) |
drhodt_in | Density rate of change \( \frac{d \rho}{d t} \) |
m | Mass \( m \) |
mpi_mask | Incoming processes mask |
mpi_r | Position \( \mathbf{r} \) MPI copy |
mpi_u | Velocity \( \mathbf{u} \) MPI copy |
mpi_dudt | Velocity rate of change \( \frac{d \mathbf{u}}{d t} \) MPI copy |
mpi_rho | Density \( \rho \) MPI copy |
mpi_drhodt | Density rate of change \( \frac{d \rho}{d t} \) MPI copy |
mpi_m | Mass \( m \) MPI copy |
mpi_rank | MPI process index |
nbuffer | Number of buffer particles |
N | Number of particles |
__kernel void sort | ( | const __global unsigned int * | mpi_mask_in, |
__global unsigned int * | mpi_mask, | ||
const __global unit * | id_sorted, | ||
unsigned int | N | ||
) |
Sort the mask.
The processes mask can be used for debugging purposes. To this end we should sort it before.
mpi_mask_in | Unsorted processes mask |
mpi_mask | Sorted processes mask |
id_sorted | Permutations list from the unsorted space to the sorted one. |
N | Number of particles |