CLUtils  0.2.1
 Hosted by GitHub
CLUtils.hpp
Go to the documentation of this file.
1 
32 #ifndef CLUTILS_HPP
33 #define CLUTILS_HPP
34 
35 #include <iostream>
36 #include <iomanip>
37 #include <vector>
38 #include <algorithm>
39 #include <unordered_map>
40 #include <chrono>
41 #include <cassert>
42 #include <cmath>
43 
44 #define __CL_ENABLE_EXCEPTIONS
45 
46 #if defined(__APPLE__) || defined(__MACOSX)
47 #include <OpenCL/cl.hpp>
48 #else
49 #include <CL/cl.hpp>
50 #endif
51 
57 namespace clutils
58 {
60  const char* getOpenCLErrorCodeString (int errorCode);
61 
63  bool checkCLGLInterop (cl::Device &device);
64 
66  void readSource (const std::vector<std::string> &kernel_filenames,
67  std::vector<std::string> &sourceCodes);
68 
70  void split (const std::string &str, char delim,
71  std::vector<std::string> &names);
72 
73 
75  std::pair<const char *, size_t>
76  make_kernel_pair (const std::string &kernel_filename);
77 
78 
85  class CLEnv
86  {
87  public:
88  CLEnv (const std::vector<std::string> &kernel_filenames = std::vector<std::string> (),
89  const char *build_options = nullptr);
90  CLEnv (const std::string &kernel_filename,
91  const char *build_options = nullptr);
92  virtual ~CLEnv () {};
94  cl::Context& getContext (unsigned int pIdx = 0);
97  cl::CommandQueue& getQueue (unsigned int ctxIdx = 0, unsigned int qIdx = 0);
99  cl::Program& getProgram (unsigned int pgIdx = 0);
101  cl::Kernel& getKernel (const char *kernelName, unsigned int pgIdx = 0);
103  cl::Context& addContext (unsigned int pIdx, const bool gl_shared = false);
105  cl::CommandQueue& addQueue (unsigned int ctxIdx, unsigned int dIdx, cl_command_queue_properties props = 0);
107  cl::CommandQueue& addQueueGL (unsigned int ctxIdx, cl_command_queue_properties props = 0);
109  cl::Kernel& addProgram (unsigned int ctxIdx,
110  const std::vector<std::string> &kernel_filenames,
111  const char *kernel_name = nullptr,
112  const char *build_options = nullptr);
113  cl::Kernel& addProgram (unsigned int ctxIdx,
114  const std::string &kernel_filename,
115  const char *kernel_name = nullptr,
116  const char *build_options = nullptr);
117 
118  // Objects associated with an OpenCL environment.
119  // For each of a number of objects, there is a vector that
120  // can hold all instances of that object.
121 
122  std::vector<cl::Platform> platforms;
125  std::vector< std::vector<cl::Device> > devices;
126 
127  private:
128  std::vector<cl::Context> contexts;
131  std::vector< std::vector<cl::CommandQueue> > queues;
132  std::vector<cl::Program> programs;
135  std::vector< std::vector<cl::Kernel> > kernels;
136 
137  protected:
149  virtual void initGLMemObjects () {};
150 
151  private:
159  std::vector< std::unordered_map<std::string, unsigned int> > kernelIdx;
160  };
161 
162 
173  template<unsigned int nQueues = 1>
174  class CLEnvInfo
175  {
176  public:
187  CLEnvInfo (unsigned int _pIdx = 0, unsigned int _dIdx = 0, unsigned int _ctxIdx = 0,
188  const std::vector<unsigned int> _qIdx = { 0 }, unsigned int _pgIdx = 0) :
189  pIdx (_pIdx), dIdx (_dIdx), ctxIdx (_ctxIdx), pgIdx (_pgIdx)
190  {
191  try
192  {
193  if (_qIdx.size () != nQueues)
194  throw "The provided vector of command queue indices has the wrong size";
195 
196  qIdx = _qIdx;
197  }
198  catch (const char *error)
199  {
200  std::cerr << "Error[CLEnvInfo]: " << error << std::endl;
201  exit (EXIT_FAILURE);
202  }
203  }
204 
205 
212  CLEnvInfo<1> getCLEnvInfo (unsigned int idx)
213  {
214  try
215  {
216  return CLEnvInfo<1> (pIdx, dIdx, ctxIdx, { qIdx.at (idx) }, pgIdx);
217  }
218  catch (const std::out_of_range &error)
219  {
220  std::cerr << "Out of Range error: " << error.what ()
221  << " (" << __FILE__ << ":" << __LINE__ << ")" << std::endl;
222  exit (EXIT_FAILURE);
223  }
224  }
225 
226 
227  unsigned int pIdx;
228  unsigned int dIdx;
229  unsigned int ctxIdx;
230  std::vector<unsigned int> qIdx;
231  unsigned int pgIdx;
232  };
233 
234 
243  template <uint nSize, typename rep = double>
245  {
246  public:
251  ProfilingInfo (std::string pLabel = std::string (), std::string pUnit = std::string ("ms"))
252  : label (pLabel), tExec (nSize), tWidth (4 + log10 (nSize)), tUnit (pUnit)
253  {
254  }
255 
257  rep& operator[] (const int idx)
258  {
259  assert (idx >= 0 && idx < nSize);
260  return tExec[idx];
261  }
262 
268  rep total (rep initVal = 0.0)
269  {
270  return std::accumulate (tExec.begin (), tExec.end (), initVal);
271  }
272 
277  rep mean ()
278  {
279  return total() / (rep) tExec.size ();
280  }
281 
286  rep min ()
287  {
288  return *std::min_element (tExec.begin (), tExec.end ());
289  }
290 
295  rep max ()
296  {
297  return *std::max_element (tExec.begin (), tExec.end ());
298  }
299 
305  rep speedup (ProfilingInfo &refProf)
306  {
307  return refProf.mean () / mean ();
308  }
309 
316  void print (const char *title = nullptr, bool bLine = true)
317  {
318  std::ios::fmtflags f (std::cout.flags ());
319  std::cout << std::fixed << std::setprecision (3);
320 
321  if (title)
322  std::cout << std::endl << title << std::endl << std::endl;
323  else
324  std::cout << std::endl;
325 
326  std::cout << " " << label << std::endl;
327  std::cout << " " << std::string (label.size (), '-') << std::endl;
328  std::cout << " Mean : " << std::setw (tWidth) << mean () << " " << tUnit << std::endl;
329  std::cout << " Min : " << std::setw (tWidth) << min () << " " << tUnit << std::endl;
330  std::cout << " Max : " << std::setw (tWidth) << max () << " " << tUnit << std::endl;
331  std::cout << " Total : " << std::setw (tWidth) << total () << " " << tUnit << std::endl;
332  if (bLine) std::cout << std::endl;
333 
334  std::cout.flags (f);
335  }
336 
346  void print (ProfilingInfo &refProf, const char *title = nullptr)
347  {
348  if (title)
349  std::cout << std::endl << title << std::endl;
350 
351  refProf.print (nullptr, false);
352  print (nullptr, false);
353 
354  std::cout << std::endl << " Benchmark" << std::endl << " ---------" << std::endl;
355 
356  std::cout << " Speedup: " << std::setw (tWidth) << speedup (refProf) << std::endl << std::endl;
357  }
358 
359  private:
360  std::string label;
361  std::vector<rep> tExec;
362  uint8_t tWidth;
363  std::string tUnit;
364  };
365 
366 
374  template <typename rep = int64_t, typename period = std::milli>
375  class CPUTimer
376  {
377  public:
383  CPUTimer (int initVal = 0) : tDuration (initVal)
384  {
385  }
386 
393  void start (bool tReset = true)
394  {
395  if (tReset)
396  reset ();
397 
398  tReference = std::chrono::high_resolution_clock::now ();
399  }
400 
405  rep stop ()
406  {
407  tDuration += std::chrono::duration_cast< std::chrono::duration<rep, period> >
408  (std::chrono::high_resolution_clock::now () - tReference);
409 
410  return duration ();
411  }
412 
418  rep duration ()
419  {
420  return tDuration.count ();
421  }
422 
424  void reset ()
425  {
426  tDuration = std::chrono::duration<rep, period>::zero ();
427  }
428 
429  private:
431  std::chrono::time_point<std::chrono::high_resolution_clock> tReference;
433  std::chrono::duration<rep, period> tDuration;
434  };
435 
436 
442  template <typename period = std::milli>
443  class GPUTimer
444  {
445  public:
448  GPUTimer (cl::Device &device)
449  {
450  period tPeriod;
451  size_t tRes = device.getInfo<CL_DEVICE_PROFILING_TIMER_RESOLUTION> (); // x nanoseconds
452  // Converts nanoseconds to seconds and then to the requested scale
453  tUnit = (double) tPeriod.den / (double) tPeriod.num / 1000000000.0 * tRes;
454  }
455 
461  cl::Event& event ()
462  {
463  return pEvent;
464  }
465 
468  void wait ()
469  {
470  pEvent.wait ();
471  }
472 
478  double duration ()
479  {
480  cl_ulong start = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_START> ();
481  cl_ulong end = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_END> ();
482 
483  return (end - start) * tUnit;
484  }
485 
486  private:
487  cl::Event pEvent;
488  double tUnit;
489  };
490 
491 }
492 
493 #endif // CLUTILS_HPP
void split(const std::string &str, char delim, std::vector< std::string > &names)
Splits a string on the requested delimiter.
Definition: CLUtils.cpp:232
double duration()
Returns the time measured by the timer.
Definition: CLUtils.hpp:478
cl::Context & getContext(unsigned int pIdx=0)
Gets back one of the existing contexts.
Definition: CLUtils.cpp:347
void wait()
This is an interface for cl::Event::wait.
Definition: CLUtils.hpp:468
const char * getOpenCLErrorCodeString(int errorCode)
Returns the name of an error code.
Definition: CLUtils.cpp:54
rep & operator[](const int idx)
Definition: CLUtils.hpp:257
std::pair< const char *, size_t > make_kernel_pair(const std::string &kernel_filename)
Creates a pair of a char array (source code) and its size.
Definition: CLUtils.cpp:249
unsigned int pgIdx
Definition: CLUtils.hpp:231
rep max()
Returns the max time of the #nSize executon times.
Definition: CLUtils.hpp:295
cl::Kernel & addProgram(unsigned int ctxIdx, const std::vector< std::string > &kernel_filenames, const char *kernel_name=nullptr, const char *build_options=nullptr)
Creates a program for the specified context.
Definition: CLUtils.cpp:586
Facilitates the conveyance of CLEnv arguments.
Definition: CLUtils.hpp:174
cl::Program & getProgram(unsigned int pgIdx=0)
Gets back one of the existing programs.
Definition: CLUtils.cpp:387
void print(ProfilingInfo &refProf, const char *title=nullptr)
Displays summarizing results on two tests.
Definition: CLUtils.hpp:346
void readSource(const std::vector< std::string > &kernel_filenames, std::vector< std::string > &sourceCodes)
Reads in the contents from the requested files.
Definition: CLUtils.cpp:203
CLEnvInfo< 1 > getCLEnvInfo(unsigned int idx)
Creates a new CLEnvInfo object with the specified command queue.
Definition: CLUtils.hpp:212
It brings together functionality common to all OpenCL projects.
Definition: CLUtils.hpp:57
A class for profiling CL devices.
Definition: CLUtils.hpp:443
void print(const char *title=nullptr, bool bLine=true)
Displays summarizing results on the test.
Definition: CLUtils.hpp:316
virtual void initGLMemObjects()
Initializes the OpenGL memory buffers.
Definition: CLUtils.hpp:149
Sets up an OpenCL environment.
Definition: CLUtils.hpp:85
rep duration()
Returns the time measured by the timer.
Definition: CLUtils.hpp:418
rep min()
Returns the min time of the #nSize executon times.
Definition: CLUtils.hpp:286
bool checkCLGLInterop(cl::Device &device)
Checks the availability of the "GL Sharing" capability.
Definition: CLUtils.cpp:185
A class that collects and manipulates timing information about a test.
Definition: CLUtils.hpp:244
A class for measuring execution times.
Definition: CLUtils.hpp:375
GPUTimer(cl::Device &device)
Definition: CLUtils.hpp:448
unsigned int ctxIdx
Definition: CLUtils.hpp:229
std::vector< std::vector< cl::Device > > devices
List of devices per platform.
Definition: CLUtils.hpp:125
void start(bool tReset=true)
Starts the timer.
Definition: CLUtils.hpp:393
CLEnv(const std::vector< std::string > &kernel_filenames=std::vector< std::string >(), const char *build_options=nullptr)
Definition: CLUtils.cpp:266
void reset()
Resets the timer.
Definition: CLUtils.hpp:424
CPUTimer(int initVal=0)
Constructs a timer.
Definition: CLUtils.hpp:383
unsigned int dIdx
Definition: CLUtils.hpp:228
cl::CommandQueue & addQueueGL(unsigned int ctxIdx, cl_command_queue_properties props=0)
Creates a queue for the GL-shared device in the specified context.
Definition: CLUtils.cpp:544
ProfilingInfo(std::string pLabel=std::string(), std::string pUnit=std::string("ms"))
Definition: CLUtils.hpp:251
std::vector< unsigned int > qIdx
Definition: CLUtils.hpp:230
rep stop()
Stops the timer.
Definition: CLUtils.hpp:405
cl::CommandQueue & getQueue(unsigned int ctxIdx=0, unsigned int qIdx=0)
Gets back one of the existing command queues in the specified context.
Definition: CLUtils.cpp:368
cl::Event & event()
Returns a new unpopulated event.
Definition: CLUtils.hpp:461
rep mean()
Returns the mean time of the #nSize executon times.
Definition: CLUtils.hpp:277
rep speedup(ProfilingInfo &refProf)
Returns the relative performance speedup wrt refProf.
Definition: CLUtils.hpp:305
std::vector< cl::Platform > platforms
Definition: CLUtils.hpp:122
cl::Kernel & getKernel(const char *kernelName, unsigned int pgIdx=0)
Gets back one of the existing kernels in some program.
Definition: CLUtils.cpp:407
cl::Context & addContext(unsigned int pIdx, const bool gl_shared=false)
Creates a context for all devices in the requested platform.
Definition: CLUtils.cpp:433
CLEnvInfo(unsigned int _pIdx=0, unsigned int _dIdx=0, unsigned int _ctxIdx=0, const std::vector< unsigned int > _qIdx={0}, unsigned int _pgIdx=0)
Initializes a CLEnvInfo object.
Definition: CLUtils.hpp:187
rep total(rep initVal=0.0)
Returns the sum of the #nSize executon times.
Definition: CLUtils.hpp:268
unsigned int pIdx
Definition: CLUtils.hpp:227
cl::CommandQueue & addQueue(unsigned int ctxIdx, unsigned int dIdx, cl_command_queue_properties props=0)
Creates a queue for the specified device in the specified context.
Definition: CLUtils.cpp:515