39 #include <unordered_map>
44 #define __CL_ENABLE_EXCEPTIONS
46 #if defined(__APPLE__) || defined(__MACOSX)
47 #include <OpenCL/cl.hpp>
66 void readSource (
const std::vector<std::string> &kernel_filenames,
67 std::vector<std::string> &sourceCodes);
70 void split (
const std::string &str,
char delim,
71 std::vector<std::string> &names);
75 std::pair<const char *, size_t>
88 CLEnv (
const std::vector<std::string> &kernel_filenames = std::vector<std::string> (),
89 const char *build_options =
nullptr);
90 CLEnv (
const std::string &kernel_filename,
91 const char *build_options =
nullptr);
94 cl::Context&
getContext (
unsigned int pIdx = 0);
97 cl::CommandQueue&
getQueue (
unsigned int ctxIdx = 0,
unsigned int qIdx = 0);
99 cl::Program&
getProgram (
unsigned int pgIdx = 0);
101 cl::Kernel&
getKernel (
const char *kernelName,
unsigned int pgIdx = 0);
103 cl::Context&
addContext (
unsigned int pIdx,
const bool gl_shared =
false);
105 cl::CommandQueue&
addQueue (
unsigned int ctxIdx,
unsigned int dIdx, cl_command_queue_properties props = 0);
107 cl::CommandQueue&
addQueueGL (
unsigned int ctxIdx, cl_command_queue_properties props = 0);
110 const std::vector<std::string> &kernel_filenames,
111 const char *kernel_name =
nullptr,
112 const char *build_options =
nullptr);
114 const std::string &kernel_filename,
115 const char *kernel_name =
nullptr,
116 const char *build_options =
nullptr);
125 std::vector< std::vector<cl::Device> >
devices;
128 std::vector<cl::Context> contexts;
131 std::vector< std::vector<cl::CommandQueue> > queues;
132 std::vector<cl::Program> programs;
135 std::vector< std::vector<cl::Kernel> > kernels;
159 std::vector< std::unordered_map<std::string, unsigned int> > kernelIdx;
173 template<
unsigned int nQueues = 1>
187 CLEnvInfo (
unsigned int _pIdx = 0,
unsigned int _dIdx = 0,
unsigned int _ctxIdx = 0,
188 const std::vector<unsigned int> _qIdx = { 0 },
unsigned int _pgIdx = 0) :
193 if (_qIdx.size () != nQueues)
194 throw "The provided vector of command queue indices has the wrong size";
198 catch (
const char *error)
200 std::cerr <<
"Error[CLEnvInfo]: " << error << std::endl;
218 catch (
const std::out_of_range &error)
220 std::cerr <<
"Out of Range error: " << error.what ()
221 <<
" (" << __FILE__ <<
":" << __LINE__ <<
")" << std::endl;
230 std::vector<unsigned int>
qIdx;
243 template <u
int nSize,
typename rep =
double>
251 ProfilingInfo (std::string pLabel = std::string (), std::string pUnit = std::string (
"ms"))
252 : label (pLabel), tExec (nSize), tWidth (4 + log10 (nSize)), tUnit (pUnit)
259 assert (idx >= 0 && idx < nSize);
270 return std::accumulate (tExec.begin (), tExec.end (), initVal);
279 return total() / (rep) tExec.size ();
288 return *std::min_element (tExec.begin (), tExec.end ());
297 return *std::max_element (tExec.begin (), tExec.end ());
316 void print (
const char *title =
nullptr,
bool bLine =
true)
318 std::ios::fmtflags f (std::cout.flags ());
319 std::cout << std::fixed << std::setprecision (3);
322 std::cout << std::endl << title << std::endl << std::endl;
324 std::cout << std::endl;
326 std::cout <<
" " << label << std::endl;
327 std::cout <<
" " << std::string (label.size (),
'-') << std::endl;
328 std::cout <<
" Mean : " << std::setw (tWidth) <<
mean () <<
" " << tUnit << std::endl;
329 std::cout <<
" Min : " << std::setw (tWidth) <<
min () <<
" " << tUnit << std::endl;
330 std::cout <<
" Max : " << std::setw (tWidth) <<
max () <<
" " << tUnit << std::endl;
331 std::cout <<
" Total : " << std::setw (tWidth) <<
total () <<
" " << tUnit << std::endl;
332 if (bLine) std::cout << std::endl;
349 std::cout << std::endl << title << std::endl;
351 refProf.
print (
nullptr,
false);
352 print (
nullptr,
false);
354 std::cout << std::endl <<
" Benchmark" << std::endl <<
" ---------" << std::endl;
356 std::cout <<
" Speedup: " << std::setw (tWidth) <<
speedup (refProf) << std::endl << std::endl;
361 std::vector<rep> tExec;
374 template <
typename rep =
int64_t,
typename period = std::milli>
398 tReference = std::chrono::high_resolution_clock::now ();
407 tDuration += std::chrono::duration_cast< std::chrono::duration<rep, period> >
408 (std::chrono::high_resolution_clock::now () - tReference);
420 return tDuration.count ();
426 tDuration = std::chrono::duration<rep, period>::zero ();
431 std::chrono::time_point<std::chrono::high_resolution_clock> tReference;
433 std::chrono::duration<rep, period> tDuration;
442 template <
typename period = std::milli>
451 size_t tRes = device.getInfo<CL_DEVICE_PROFILING_TIMER_RESOLUTION> ();
453 tUnit = (double) tPeriod.den / (
double) tPeriod.num / 1000000000.0 * tRes;
480 cl_ulong start = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_START> ();
481 cl_ulong end = pEvent.getProfilingInfo<CL_PROFILING_COMMAND_END> ();
483 return (end - start) * tUnit;
493 #endif // CLUTILS_HPP
void split(const std::string &str, char delim, std::vector< std::string > &names)
Splits a string on the requested delimiter.
Definition: CLUtils.cpp:232
double duration()
Returns the time measured by the timer.
Definition: CLUtils.hpp:478
cl::Context & getContext(unsigned int pIdx=0)
Gets back one of the existing contexts.
Definition: CLUtils.cpp:347
void wait()
This is an interface for cl::Event::wait.
Definition: CLUtils.hpp:468
const char * getOpenCLErrorCodeString(int errorCode)
Returns the name of an error code.
Definition: CLUtils.cpp:54
rep & operator[](const int idx)
Definition: CLUtils.hpp:257
std::pair< const char *, size_t > make_kernel_pair(const std::string &kernel_filename)
Creates a pair of a char array (source code) and its size.
Definition: CLUtils.cpp:249
unsigned int pgIdx
Definition: CLUtils.hpp:231
rep max()
Returns the max time of the #nSize executon times.
Definition: CLUtils.hpp:295
cl::Kernel & addProgram(unsigned int ctxIdx, const std::vector< std::string > &kernel_filenames, const char *kernel_name=nullptr, const char *build_options=nullptr)
Creates a program for the specified context.
Definition: CLUtils.cpp:586
Facilitates the conveyance of CLEnv arguments.
Definition: CLUtils.hpp:174
cl::Program & getProgram(unsigned int pgIdx=0)
Gets back one of the existing programs.
Definition: CLUtils.cpp:387
void print(ProfilingInfo &refProf, const char *title=nullptr)
Displays summarizing results on two tests.
Definition: CLUtils.hpp:346
void readSource(const std::vector< std::string > &kernel_filenames, std::vector< std::string > &sourceCodes)
Reads in the contents from the requested files.
Definition: CLUtils.cpp:203
CLEnvInfo< 1 > getCLEnvInfo(unsigned int idx)
Creates a new CLEnvInfo object with the specified command queue.
Definition: CLUtils.hpp:212
It brings together functionality common to all OpenCL projects.
Definition: CLUtils.hpp:57
A class for profiling CL devices.
Definition: CLUtils.hpp:443
void print(const char *title=nullptr, bool bLine=true)
Displays summarizing results on the test.
Definition: CLUtils.hpp:316
virtual void initGLMemObjects()
Initializes the OpenGL memory buffers.
Definition: CLUtils.hpp:149
Sets up an OpenCL environment.
Definition: CLUtils.hpp:85
rep duration()
Returns the time measured by the timer.
Definition: CLUtils.hpp:418
rep min()
Returns the min time of the #nSize executon times.
Definition: CLUtils.hpp:286
bool checkCLGLInterop(cl::Device &device)
Checks the availability of the "GL Sharing" capability.
Definition: CLUtils.cpp:185
A class that collects and manipulates timing information about a test.
Definition: CLUtils.hpp:244
A class for measuring execution times.
Definition: CLUtils.hpp:375
GPUTimer(cl::Device &device)
Definition: CLUtils.hpp:448
unsigned int ctxIdx
Definition: CLUtils.hpp:229
std::vector< std::vector< cl::Device > > devices
List of devices per platform.
Definition: CLUtils.hpp:125
void start(bool tReset=true)
Starts the timer.
Definition: CLUtils.hpp:393
CLEnv(const std::vector< std::string > &kernel_filenames=std::vector< std::string >(), const char *build_options=nullptr)
Definition: CLUtils.cpp:266
void reset()
Resets the timer.
Definition: CLUtils.hpp:424
CPUTimer(int initVal=0)
Constructs a timer.
Definition: CLUtils.hpp:383
unsigned int dIdx
Definition: CLUtils.hpp:228
cl::CommandQueue & addQueueGL(unsigned int ctxIdx, cl_command_queue_properties props=0)
Creates a queue for the GL-shared device in the specified context.
Definition: CLUtils.cpp:544
ProfilingInfo(std::string pLabel=std::string(), std::string pUnit=std::string("ms"))
Definition: CLUtils.hpp:251
std::vector< unsigned int > qIdx
Definition: CLUtils.hpp:230
rep stop()
Stops the timer.
Definition: CLUtils.hpp:405
cl::CommandQueue & getQueue(unsigned int ctxIdx=0, unsigned int qIdx=0)
Gets back one of the existing command queues in the specified context.
Definition: CLUtils.cpp:368
cl::Event & event()
Returns a new unpopulated event.
Definition: CLUtils.hpp:461
rep mean()
Returns the mean time of the #nSize executon times.
Definition: CLUtils.hpp:277
rep speedup(ProfilingInfo &refProf)
Returns the relative performance speedup wrt refProf.
Definition: CLUtils.hpp:305
std::vector< cl::Platform > platforms
Definition: CLUtils.hpp:122
cl::Kernel & getKernel(const char *kernelName, unsigned int pgIdx=0)
Gets back one of the existing kernels in some program.
Definition: CLUtils.cpp:407
cl::Context & addContext(unsigned int pIdx, const bool gl_shared=false)
Creates a context for all devices in the requested platform.
Definition: CLUtils.cpp:433
CLEnvInfo(unsigned int _pIdx=0, unsigned int _dIdx=0, unsigned int _ctxIdx=0, const std::vector< unsigned int > _qIdx={0}, unsigned int _pgIdx=0)
Initializes a CLEnvInfo object.
Definition: CLUtils.hpp:187
rep total(rep initVal=0.0)
Returns the sum of the #nSize executon times.
Definition: CLUtils.hpp:268
unsigned int pIdx
Definition: CLUtils.hpp:227
cl::CommandQueue & addQueue(unsigned int ctxIdx, unsigned int dIdx, cl_command_queue_properties props=0)
Creates a queue for the specified device in the specified context.
Definition: CLUtils.cpp:515