39 #include <unordered_map> 
   44 #define __CL_ENABLE_EXCEPTIONS 
   46 #if defined(__APPLE__) || defined(__MACOSX) 
   47 #include <OpenCL/cl.hpp> 
   66     void readSource (
const std::vector<std::string> &kernel_filenames, 
 
   67                      std::vector<std::string> &sourceCodes);
 
   70     void split (
const std::string &str, 
char delim, 
 
   71                 std::vector<std::string> &names);
 
   75     std::pair<const char *, size_t> 
 
   88         CLEnv (
const std::vector<std::string> &kernel_filenames = std::vector<std::string> (), 
 
   89                const char *build_options = 
nullptr);
 
   90         CLEnv (
const std::string &kernel_filename, 
 
   91                const char *build_options = 
nullptr);
 
   94         cl::Context& 
getContext (
unsigned int pIdx = 0);
 
   97         cl::CommandQueue& 
getQueue (
unsigned int ctxIdx = 0, 
unsigned int qIdx = 0);
 
   99         cl::Program& 
getProgram (
unsigned int pgIdx = 0);
 
  101         cl::Kernel& 
getKernel (
const char *kernelName, 
unsigned int pgIdx = 0);
 
  103         cl::Context& 
addContext (
unsigned int pIdx, 
const bool gl_shared = 
false);
 
  105         cl::CommandQueue& 
addQueue (
unsigned int ctxIdx, 
unsigned int dIdx, cl_command_queue_properties props = 0);
 
  107         cl::CommandQueue& 
addQueueGL (
unsigned int ctxIdx, cl_command_queue_properties props = 0);
 
  110                                 const std::vector<std::string> &kernel_filenames, 
 
  111                                 const char *kernel_name = 
nullptr, 
 
  112                                 const char *build_options = 
nullptr);
 
  114                                 const std::string &kernel_filename, 
 
  115                                 const char *kernel_name = 
nullptr, 
 
  116                                 const char *build_options = 
nullptr);
 
  125         std::vector< std::vector<cl::Device> > 
devices;
 
  128         std::vector<cl::Context> contexts;  
 
  131         std::vector< std::vector<cl::CommandQueue> > queues;
 
  132         std::vector<cl::Program> programs;  
 
  135         std::vector< std::vector<cl::Kernel> > kernels;
 
  159         std::vector< std::unordered_map<std::string, unsigned int> > kernelIdx;
 
  173     template<
unsigned int nQueues = 1>
 
  187         CLEnvInfo (
unsigned int _pIdx = 0, 
unsigned int _dIdx = 0, 
unsigned int _ctxIdx = 0, 
 
  188                    const std::vector<unsigned int> _qIdx = { 0 }, 
unsigned int _pgIdx = 0) : 
 
  193                 if (_qIdx.size () != nQueues)
 
  194                     throw "The provided vector of command queue indices has the wrong size";
 
  198             catch (
const char *error)
 
  200                 std::cerr << 
"Error[CLEnvInfo]: " << error << std::endl;
 
  218             catch (
const std::out_of_range &error)
 
  220                 std::cerr << 
"Out of Range error: " << error.what () 
 
  221                           << 
" (" << __FILE__ << 
":" << __LINE__ << 
")" << std::endl;
 
  230         std::vector<unsigned int> 
qIdx;  
 
  243     template <u
int nSize, 
typename rep = 
double>
 
  251         ProfilingInfo (std::string pLabel = std::string (), std::string pUnit = std::string (
"ms")) 
 
  252             : label (pLabel), tExec (nSize), tWidth (4 + log10 (nSize)), tUnit (pUnit)
 
  259             assert (idx >= 0 && idx < nSize);
 
  270             return std::accumulate (tExec.begin (), tExec.end (), initVal);
 
  279             return total() / (rep) tExec.size ();
 
  288             return *std::min_element (tExec.begin (), tExec.end ());
 
  297             return *std::max_element (tExec.begin (), tExec.end ());
 
  316         void print (
const char *title = 
nullptr, 
bool bLine = 
true)
 
  318             std::ios::fmtflags f (std::cout.flags ());
 
  319             std::cout << std::fixed << std::setprecision (3);
 
  322                 std::cout << std::endl << title << std::endl << std::endl;
 
  324                 std::cout << std::endl;
 
  326             std::cout << 
" " << label << std::endl;
 
  327             std::cout << 
" " << std::string (label.size (), 
'-') << std::endl;
 
  328             std::cout << 
"   Mean   : " << std::setw (tWidth) << 
mean ()  << 
" " << tUnit << std::endl;
 
  329             std::cout << 
"   Min    : " << std::setw (tWidth) << 
min ()   << 
" " << tUnit << std::endl;
 
  330             std::cout << 
"   Max    : " << std::setw (tWidth) << 
max ()   << 
" " << tUnit << std::endl;
 
  331             std::cout << 
"   Total  : " << std::setw (tWidth) << 
total () << 
" " << tUnit << std::endl;
 
  332             if (bLine) std::cout << std::endl;
 
  349                 std::cout << std::endl << title << std::endl;
 
  351             refProf.
print (
nullptr, 
false);
 
  352             print (
nullptr, 
false);
 
  354             std::cout << std::endl << 
" Benchmark" << std::endl << 
" ---------" << std::endl;
 
  356             std::cout << 
"   Speedup: " << std::setw (tWidth) << 
speedup (refProf) << std::endl << std::endl;
 
  361         std::vector<rep> tExec;  
 
  374     template <
typename rep = 
int64_t, 
typename period = std::milli>
 
  398             tReference = std::chrono::high_resolution_clock::now ();
 
  407             tDuration += std::chrono::duration_cast< std::chrono::duration<rep, period> > 
 
  408                 (std::chrono::high_resolution_clock::now () - tReference);
 
  420             return tDuration.count ();
 
  426             tDuration = std::chrono::duration<rep, period>::zero ();
 
  431         std::chrono::time_point<std::chrono::high_resolution_clock> tReference;
 
  433         std::chrono::duration<rep, period> tDuration;
 
  442     template <
typename period = std::milli>
 
  451             size_t tRes = device.getInfo<CL_DEVICE_PROFILING_TIMER_RESOLUTION> ();  
 
  453             tUnit = (double) tPeriod.den / (
double) tPeriod.num / 1000000000.0 * tRes;
 
  480             cl_ulong start = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_START> ();
 
  481             cl_ulong end = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_END> ();
 
  483             return (end - start) * tUnit;
 
  493 #endif  // CLUTILS_HPP 
void split(const std::string &str, char delim, std::vector< std::string > &names)
Splits a string on the requested delimiter. 
Definition: CLUtils.cpp:232
 
double duration()
Returns the time measured by the timer. 
Definition: CLUtils.hpp:478
 
cl::Context & getContext(unsigned int pIdx=0)
Gets back one of the existing contexts. 
Definition: CLUtils.cpp:347
 
void wait()
This is an interface for cl::Event::wait. 
Definition: CLUtils.hpp:468
 
const char * getOpenCLErrorCodeString(int errorCode)
Returns the name of an error code. 
Definition: CLUtils.cpp:54
 
rep & operator[](const int idx)
Definition: CLUtils.hpp:257
 
std::pair< const char *, size_t > make_kernel_pair(const std::string &kernel_filename)
Creates a pair of a char array (source code) and its size. 
Definition: CLUtils.cpp:249
 
unsigned int pgIdx
Definition: CLUtils.hpp:231
 
rep max()
Returns the max time of the #nSize executon times. 
Definition: CLUtils.hpp:295
 
cl::Kernel & addProgram(unsigned int ctxIdx, const std::vector< std::string > &kernel_filenames, const char *kernel_name=nullptr, const char *build_options=nullptr)
Creates a program for the specified context. 
Definition: CLUtils.cpp:586
 
Facilitates the conveyance of CLEnv arguments. 
Definition: CLUtils.hpp:174
 
cl::Program & getProgram(unsigned int pgIdx=0)
Gets back one of the existing programs. 
Definition: CLUtils.cpp:387
 
void print(ProfilingInfo &refProf, const char *title=nullptr)
Displays summarizing results on two tests. 
Definition: CLUtils.hpp:346
 
void readSource(const std::vector< std::string > &kernel_filenames, std::vector< std::string > &sourceCodes)
Reads in the contents from the requested files. 
Definition: CLUtils.cpp:203
 
CLEnvInfo< 1 > getCLEnvInfo(unsigned int idx)
Creates a new CLEnvInfo object with the specified command queue. 
Definition: CLUtils.hpp:212
 
It brings together functionality common to all OpenCL projects. 
Definition: CLUtils.hpp:57
 
A class for profiling CL devices. 
Definition: CLUtils.hpp:443
 
void print(const char *title=nullptr, bool bLine=true)
Displays summarizing results on the test. 
Definition: CLUtils.hpp:316
 
virtual void initGLMemObjects()
Initializes the OpenGL memory buffers. 
Definition: CLUtils.hpp:149
 
Sets up an OpenCL environment. 
Definition: CLUtils.hpp:85
 
rep duration()
Returns the time measured by the timer. 
Definition: CLUtils.hpp:418
 
rep min()
Returns the min time of the #nSize executon times. 
Definition: CLUtils.hpp:286
 
bool checkCLGLInterop(cl::Device &device)
Checks the availability of the "GL Sharing" capability. 
Definition: CLUtils.cpp:185
 
A class that collects and manipulates timing information about a test. 
Definition: CLUtils.hpp:244
 
A class for measuring execution times. 
Definition: CLUtils.hpp:375
 
GPUTimer(cl::Device &device)
Definition: CLUtils.hpp:448
 
unsigned int ctxIdx
Definition: CLUtils.hpp:229
 
std::vector< std::vector< cl::Device > > devices
List of devices per platform. 
Definition: CLUtils.hpp:125
 
void start(bool tReset=true)
Starts the timer. 
Definition: CLUtils.hpp:393
 
CLEnv(const std::vector< std::string > &kernel_filenames=std::vector< std::string >(), const char *build_options=nullptr)
Definition: CLUtils.cpp:266
 
void reset()
Resets the timer. 
Definition: CLUtils.hpp:424
 
CPUTimer(int initVal=0)
Constructs a timer. 
Definition: CLUtils.hpp:383
 
unsigned int dIdx
Definition: CLUtils.hpp:228
 
cl::CommandQueue & addQueueGL(unsigned int ctxIdx, cl_command_queue_properties props=0)
Creates a queue for the GL-shared device in the specified context. 
Definition: CLUtils.cpp:544
 
ProfilingInfo(std::string pLabel=std::string(), std::string pUnit=std::string("ms"))
Definition: CLUtils.hpp:251
 
std::vector< unsigned int > qIdx
Definition: CLUtils.hpp:230
 
rep stop()
Stops the timer. 
Definition: CLUtils.hpp:405
 
cl::CommandQueue & getQueue(unsigned int ctxIdx=0, unsigned int qIdx=0)
Gets back one of the existing command queues in the specified context. 
Definition: CLUtils.cpp:368
 
cl::Event & event()
Returns a new unpopulated event. 
Definition: CLUtils.hpp:461
 
rep mean()
Returns the mean time of the #nSize executon times. 
Definition: CLUtils.hpp:277
 
rep speedup(ProfilingInfo &refProf)
Returns the relative performance speedup wrt refProf. 
Definition: CLUtils.hpp:305
 
std::vector< cl::Platform > platforms
Definition: CLUtils.hpp:122
 
cl::Kernel & getKernel(const char *kernelName, unsigned int pgIdx=0)
Gets back one of the existing kernels in some program. 
Definition: CLUtils.cpp:407
 
cl::Context & addContext(unsigned int pIdx, const bool gl_shared=false)
Creates a context for all devices in the requested platform. 
Definition: CLUtils.cpp:433
 
CLEnvInfo(unsigned int _pIdx=0, unsigned int _dIdx=0, unsigned int _ctxIdx=0, const std::vector< unsigned int > _qIdx={0}, unsigned int _pgIdx=0)
Initializes a CLEnvInfo object. 
Definition: CLUtils.hpp:187
 
rep total(rep initVal=0.0)
Returns the sum of the #nSize executon times. 
Definition: CLUtils.hpp:268
 
unsigned int pIdx
Definition: CLUtils.hpp:227
 
cl::CommandQueue & addQueue(unsigned int ctxIdx, unsigned int dIdx, cl_command_queue_properties props=0)
Creates a queue for the specified device in the specified context. 
Definition: CLUtils.cpp:515