library('clrng')
runifGpu
get_system_info()
## $`1. Operating System`
## [1] "Linux"
##
## $`2. CPU`
## [1] "Intel(R) Xeon(R) W-2145 CPU @ 3.70GHz"
##
## $`3. GPU`
## [1] "\020n\r\xaa9\177" "\020n\r\xaa9\177" "gfx906:sramecc+:xnack-"
##
## $`4. OpenCL Version`
## [1] "OpenCL 3.0 LINUX"
## [2] "OpenCL 1.2 Intel(R) FPGA SDK for OpenCL(TM), Version 20.3"
## [3] "OpenCL 2.1 AMD-APP (3614.0)"
if (detectGPUs()) {
gpuR::listContexts()[,'platform']
setContext(grep("gpu", listContexts()$device_type)[1])
currentDevice()
setBaseCreator(rep(12235,6))
getOption('clrng.Nglobal')
getOption('clrng.type')
myStreamsGpu = clrng::createStreamsGpu()
dim(myStreamsGpu)
as.vector(clrng::runifGpu(10, myStreamsGpu, verbose=2))
b<-clrng::runifGpu(c(100,100), myStreamsGpu)
bvector<-as.vector(as.matrix(b))
hist(bvector, breaks=40)
quantile(bvector)
} else {
message("No GPU detected. Skipping GPU-dependent code.")
}
##
## #pragma OPENCL EXTENSION cl_khr_fp64 : enable
## #define PI_2 M_PI_2
## #define TWOPI 6.283185307179586231996
## #define mrg31k3p_NORM_cl 4.656612873077392578125e-10
## //TWOPI * mrg31k3p_NORM_cl
## #define TWOPI_mrg31k3p_NORM_cl 2.925836158534319248049e-09
##
##
## #define Nrow 10
## #define Ncol 1
## #define NpadStreams 128
## #define NpadCol 128
## #define mrg31k3p_M1 2147483647
## #define mrg31k3p_M2 2147462579
## #define mrg31k3p_MASK12 511
## #define mrg31k3p_MASK13 16777215
## #define mrg31k3p_MASK2 65535
## #define mrg31k3p_MULT2 21069
##
## void streamsToPrivate(__global int* streams, uint* g1, uint* g2, const int start){
## int Drow, Dcol, DrowStart; for(Drow = 0, DrowStart = start, Dcol = DrowStart + 3;
## Drow < 3; Drow++, DrowStart++, Dcol++){
## g1[Drow] = streams[DrowStart];
## g2[Drow] = streams[Dcol];
## }
## }
##
## void streamsFromPrivate(__global int* streams, uint* g1, uint* g2, const int start){
## int Drow, Dcol, DrowStart; for(Drow = 0,DrowStart = start, Dcol = DrowStart + 3;
## Drow < 3; Drow++, DrowStart++, Dcol++){
## streams[DrowStart] = g1[Drow];
## streams[Dcol] = g2[Drow];
## }
## }
##
## uint clrngMrg31k3pNextState(uint *g1, uint *g2) {
## uint y1, y2;
## y1 = ((g1[1] & mrg31k3p_MASK12) << 22) + (g1[1] >> 9)
## + ((g1[2] & mrg31k3p_MASK13) << 7) + (g1[2] >> 24);
## if (y1 >= mrg31k3p_M1)
## y1 -= mrg31k3p_M1;
## y1 += g1[2];
## if (y1 >= mrg31k3p_M1)
## y1 -= mrg31k3p_M1;
## g1[2] = g1[1];
## g1[1] = g1[0];
## g1[0] = y1;
## y1 = ((g2[0] & mrg31k3p_MASK2) << 15) + (mrg31k3p_MULT2 * (g2[0] >> 16));
## if (y1 >= mrg31k3p_M2)
## y1 -= mrg31k3p_M2;
## y2 = ((g2[2] & mrg31k3p_MASK2) << 15) + (mrg31k3p_MULT2 * (g2[2] >> 16));
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## y2 += g2[2];
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## y2 += y1;
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## g2[2] = g2[1];
## g2[1] = g2[0];
## g2[0] = y2;
## if (g1[0] <= g2[0]){
## return (g1[0] - g2[0] + mrg31k3p_M1);
## } else {
## return(g1[0] - g2[0]);
## }
## }
##
##
##
## __kernel void mrg31k3pMatrix(
## __global int* streams,
## __global double* out){
##
## const int index = get_global_id(0)*get_global_size(1) + get_global_id(1);
## int Drow, Dcol, DrowStart, Dentry, DrowBlock, DcolBlock, DrowInBounds;
## const int DrowStartInc = get_global_size(0) * NpadCol;
## uint g1[3], g2[3];
## const int startvalue=index * NpadStreams;
## double temp;
## const double fact = mrg31k3p_NORM_cl;
## streamsToPrivate(streams,g1,g2,startvalue);
## for(DrowBlock = 0, Drow=get_global_id(0), DrowStart = Drow * NpadCol;
## DrowBlock < Nrow;
## Drow += get_global_size(0), DrowBlock +=get_global_size(0), DrowStart += DrowStartInc) {
## DrowInBounds = Drow < Nrow;
## for(DcolBlock = 0, Dcol=get_global_id(1), Dentry = DrowStart + Dcol;
## DcolBlock < Ncol;
## DcolBlock += get_global_size(1), Dentry += get_global_size(1) ) {
## temp = fact * clrngMrg31k3pNextState(g1, g2);
## if(DrowInBounds) out[Dentry] = temp;
## }//Dcol
## }//Drow
## streamsFromPrivate(streams,g1,g2,startvalue);
## }//kernel

## 0% 25% 50% 75% 100%
## 4.066154e-06 2.527364e-01 5.037022e-01 7.535758e-01 9.999677e-01
rnormGpu
if (detectGPUs()) {
setContext(grep("gpu", listContexts()$device_type)[1])
options(clrng.Nglobal = c(128,64))
myStreamsGpu = clrng::createStreamsGpu()
dim(myStreamsGpu)
as.vector(clrng::rnormGpu(10, myStreamsGpu, verbose=2))
as.matrix(clrng::rnormGpu(c(4, 4), myStreamsGpu))
streamsGpu2 <- createStreamsGpu(n =512*128)
a<-clrng::rnormGpu(c(1024,512), streams=streamsGpu2, Nglobal=c(512,128))
avector<-as.vector(as.matrix(a))
hist(avector,breaks=40)
clrng::qqnormGpu(avector)
stats::qqnorm(avector)
} else {
message("No GPU detected. Skipping GPU-dependent code.")
}
##
## #pragma OPENCL EXTENSION cl_khr_fp64 : enable
## #define PI_2 M_PI_2
## #define TWOPI 6.283185307179586231996
## #define mrg31k3p_NORM_cl 4.656612873077392578125e-10
## //TWOPI * mrg31k3p_NORM_cl
## #define TWOPI_mrg31k3p_NORM_cl 2.925836158534319248049e-09
##
##
## #define Nrow 1
## #define Ncol 10
## #define NpadStreams 128
## #define NpadCol 128
## #define mrg31k3p_M1 2147483647
## #define mrg31k3p_M2 2147462579
## #define mrg31k3p_MASK12 511
## #define mrg31k3p_MASK13 16777215
## #define mrg31k3p_MASK2 65535
## #define mrg31k3p_MULT2 21069
##
## void streamsToPrivate(__global int* streams, uint* g1, uint* g2, const int start){
## int Drow, Dcol, DrowStart; for(Drow = 0, DrowStart = start, Dcol = DrowStart + 3;
## Drow < 3; Drow++, DrowStart++, Dcol++){
## g1[Drow] = streams[DrowStart];
## g2[Drow] = streams[Dcol];
## }
## }
##
## void streamsFromPrivate(__global int* streams, uint* g1, uint* g2, const int start){
## int Drow, Dcol, DrowStart; for(Drow = 0,DrowStart = start, Dcol = DrowStart + 3;
## Drow < 3; Drow++, DrowStart++, Dcol++){
## streams[DrowStart] = g1[Drow];
## streams[Dcol] = g2[Drow];
## }
## }
##
## uint clrngMrg31k3pNextState(uint *g1, uint *g2) {
## uint y1, y2;
## y1 = ((g1[1] & mrg31k3p_MASK12) << 22) + (g1[1] >> 9)
## + ((g1[2] & mrg31k3p_MASK13) << 7) + (g1[2] >> 24);
## if (y1 >= mrg31k3p_M1)
## y1 -= mrg31k3p_M1;
## y1 += g1[2];
## if (y1 >= mrg31k3p_M1)
## y1 -= mrg31k3p_M1;
## g1[2] = g1[1];
## g1[1] = g1[0];
## g1[0] = y1;
## y1 = ((g2[0] & mrg31k3p_MASK2) << 15) + (mrg31k3p_MULT2 * (g2[0] >> 16));
## if (y1 >= mrg31k3p_M2)
## y1 -= mrg31k3p_M2;
## y2 = ((g2[2] & mrg31k3p_MASK2) << 15) + (mrg31k3p_MULT2 * (g2[2] >> 16));
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## y2 += g2[2];
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## y2 += y1;
## if (y2 >= mrg31k3p_M2)
## y2 -= mrg31k3p_M2;
## g2[2] = g2[1];
## g2[1] = g2[0];
## g2[0] = y2;
## if (g1[0] <= g2[0]){
## return (g1[0] - g2[0] + mrg31k3p_M1);
## } else {
## return(g1[0] - g2[0]);
## }
## }
##
##
##
## __kernel void mrg31k3pMatrix(
## __global int* streams,
## __global double* out){
##
## const int index = get_global_id(0)*get_global_size(1) + get_global_id(1);
## int Drow, Dcol, DrowStart, Dentry, DrowBlock, DcolBlock, DrowInBounds;
## const int DrowStartInc = get_global_size(0) * NpadCol;
## uint g1[3], g2[3];
## const int startvalue=index * NpadStreams;
## double temp;
## const double fact[2] = { mrg31k3p_NORM_cl, TWOPI * mrg31k3p_NORM_cl };
## const double addForSine[2] = { 0.0, - PI_2 };
## local double part[2];
## streamsToPrivate(streams,g1,g2,startvalue);
## for(DrowBlock = 0, Drow=get_global_id(0), DrowStart = Drow * NpadCol;
## DrowBlock < Nrow;
## Drow += get_global_size(0), DrowBlock +=get_global_size(0), DrowStart += DrowStartInc) {
## DrowInBounds = Drow < Nrow;
## for(DcolBlock = 0, Dcol=get_global_id(1), Dentry = DrowStart + Dcol;
## DcolBlock < Ncol;
## DcolBlock += get_global_size(1), Dentry += get_global_size(1) ) {
## part[get_local_id(1)] = fact[get_local_id(1)] * clrngMrg31k3pNextState(g1, g2);
## barrier(CLK_LOCAL_MEM_FENCE);
## temp = sqrt( -2.0*log(part[0]) ) * cos(part[1] + addForSine[get_local_id(1)] );// is cos for local0, sine for local1
## if(DrowInBounds) out[Dentry] = temp;
## barrier(CLK_LOCAL_MEM_FENCE);
## }//Dcol
## }//Drow
## streamsFromPrivate(streams,g1,g2,startvalue);
## }//kernel



rexpGpu
if (detectGPUs()) {
b<-clrng::rexpGpu(c(200,100), rate=1, streams = myStreamsGpu, Nglobal=c(64,16))
bGpu<-as.vector(as.matrix(b))
hist(bGpu, freq=TRUE, breaks=40)
bCpu <- stats::rexp(20000, rate=1)
hist(bCpu, freq=TRUE, breaks = 40)
quantile(bGpu)
quantile(bCpu)
} else {
message("No GPU detected. Skipping GPU-dependent code.")
}


## 0% 25% 50% 75% 100%
## 9.343121e-05 2.847599e-01 6.849064e-01 1.386177e+00 9.095221e+00