| \n", " | \n", " | nid | \n", "time | \n", "time (gpu) | \n", "name | \n", "
|---|---|---|---|---|---|
| node | \n", "profile | \n", "\n", " | \n", " | \n", " | \n", " |
| {'name': 'RAJAPerf', 'type': 'function'} | \n", "457195964 | \n", "23.0 | \n", "0.000615 | \n", "NaN | \n", "RAJAPerf | \n", "
| 528105777 | \n", "23.0 | \n", "0.000596 | \n", "NaN | \n", "RAJAPerf | \n", "|
| {'name': 'Algorithm', 'type': 'function'} | \n", "457195964 | \n", "164.0 | \n", "0.000024 | \n", "NaN | \n", "Algorithm | \n", "
| 528105777 | \n", "164.0 | \n", "0.000024 | \n", "NaN | \n", "Algorithm | \n", "|
| {'name': 'Algorithm_MEMCPY', 'type': 'function'} | \n", "457195964 | \n", "168.0 | \n", "0.000017 | \n", "NaN | \n", "Algorithm_MEMCPY | \n", "
| 528105777 | \n", "168.0 | \n", "0.000017 | \n", "NaN | \n", "Algorithm_MEMCPY | \n", "|
| {'name': 'cudaDeviceSynchronize', 'type': 'function'} | \n", "457195964 | \n", "170.0 | \n", "0.000061 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "
| 528105777 | \n", "170.0 | \n", "0.000039 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "|
| {'name': 'cudaLaunchKernel', 'type': 'function'} | \n", "457195964 | \n", "169.0 | \n", "0.000031 | \n", "NaN | \n", "cudaLaunchKernel | \n", "
| 528105777 | \n", "169.0 | \n", "0.000032 | \n", "NaN | \n", "cudaLaunchKernel | \n", "|
| {'name': 'void rajaperf::algorithm::memcpy<128ul>(double*, double*, long)', 'type': 'kernel'} | \n", "457195964 | \n", "225.0 | \n", "NaN | \n", "0.000051 | \n", "void rajaperf::algorithm::memcpy<128ul>(double... | \n", "
| 528105777 | \n", "225.0 | \n", "NaN | \n", "0.000031 | \n", "void rajaperf::algorithm::memcpy<128ul>(double... | \n", "|
| {'name': 'Algorithm_MEMSET', 'type': 'function'} | \n", "457195964 | \n", "165.0 | \n", "0.000015 | \n", "NaN | \n", "Algorithm_MEMSET | \n", "
| 528105777 | \n", "165.0 | \n", "0.000014 | \n", "NaN | \n", "Algorithm_MEMSET | \n", "|
| {'name': 'cudaDeviceSynchronize', 'type': 'function'} | \n", "457195964 | \n", "167.0 | \n", "0.000043 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "
| 528105777 | \n", "167.0 | \n", "0.000030 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "|
| {'name': 'cudaLaunchKernel', 'type': 'function'} | \n", "457195964 | \n", "166.0 | \n", "0.000030 | \n", "NaN | \n", "cudaLaunchKernel | \n", "
| 528105777 | \n", "166.0 | \n", "0.000029 | \n", "NaN | \n", "cudaLaunchKernel | \n", "|
| {'name': 'void rajaperf::algorithm::memset<128ul>(double*, double, long)', 'type': 'kernel'} | \n", "457195964 | \n", "224.0 | \n", "NaN | \n", "0.000033 | \n", "void rajaperf::algorithm::memset<128ul>(double... | \n", "
| 528105777 | \n", "224.0 | \n", "NaN | \n", "0.000020 | \n", "void rajaperf::algorithm::memset<128ul>(double... | \n", "
| \n", " | \n", " | nid | \n", "time | \n", "time (gpu) | \n", "name | \n", "gpu__time_duration.sum | \n", "sm__throughput.avg.pct_of_peak_sustained_elapsed | \n", "smsp__maximum_warps_avg_per_active_cycle | \n", "
|---|---|---|---|---|---|---|---|---|
| node | \n", "profile | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| {'name': 'RAJAPerf', 'type': 'function'} | \n", "457195964 | \n", "23.0 | \n", "0.000615 | \n", "NaN | \n", "RAJAPerf | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "23.0 | \n", "0.000596 | \n", "NaN | \n", "RAJAPerf | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'Algorithm', 'type': 'function'} | \n", "457195964 | \n", "164.0 | \n", "0.000024 | \n", "NaN | \n", "Algorithm | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "164.0 | \n", "0.000024 | \n", "NaN | \n", "Algorithm | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'Algorithm_MEMCPY', 'type': 'function'} | \n", "457195964 | \n", "168.0 | \n", "0.000017 | \n", "NaN | \n", "Algorithm_MEMCPY | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "168.0 | \n", "0.000017 | \n", "NaN | \n", "Algorithm_MEMCPY | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'cudaDeviceSynchronize', 'type': 'function'} | \n", "457195964 | \n", "170.0 | \n", "0.000061 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "170.0 | \n", "0.000039 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'cudaLaunchKernel', 'type': 'function'} | \n", "457195964 | \n", "169.0 | \n", "0.000031 | \n", "NaN | \n", "cudaLaunchKernel | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "169.0 | \n", "0.000032 | \n", "NaN | \n", "cudaLaunchKernel | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'void rajaperf::algorithm::memcpy<128ul>(double*, double*, long)', 'type': 'kernel'} | \n", "457195964 | \n", "225.0 | \n", "NaN | \n", "0.000051 | \n", "void rajaperf::algorithm::memcpy<128ul>(double... | \n", "43232.0 | \n", "6.521123 | \n", "16.0 | \n", "
| 528105777 | \n", "225.0 | \n", "NaN | \n", "0.000031 | \n", "void rajaperf::algorithm::memcpy<128ul>(double... | \n", "22880.0 | \n", "6.294607 | \n", "16.0 | \n", "|
| {'name': 'Algorithm_MEMSET', 'type': 'function'} | \n", "457195964 | \n", "165.0 | \n", "0.000015 | \n", "NaN | \n", "Algorithm_MEMSET | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "165.0 | \n", "0.000014 | \n", "NaN | \n", "Algorithm_MEMSET | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'cudaDeviceSynchronize', 'type': 'function'} | \n", "457195964 | \n", "167.0 | \n", "0.000043 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "167.0 | \n", "0.000030 | \n", "NaN | \n", "cudaDeviceSynchronize | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'cudaLaunchKernel', 'type': 'function'} | \n", "457195964 | \n", "166.0 | \n", "0.000030 | \n", "NaN | \n", "cudaLaunchKernel | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 528105777 | \n", "166.0 | \n", "0.000029 | \n", "NaN | \n", "cudaLaunchKernel | \n", "NaN | \n", "NaN | \n", "NaN | \n", "|
| {'name': 'void rajaperf::algorithm::memset<128ul>(double*, double, long)', 'type': 'kernel'} | \n", "457195964 | \n", "224.0 | \n", "NaN | \n", "0.000033 | \n", "void rajaperf::algorithm::memset<128ul>(double... | \n", "31648.0 | \n", "7.531866 | \n", "16.0 | \n", "
| 528105777 | \n", "224.0 | \n", "NaN | \n", "0.000020 | \n", "void rajaperf::algorithm::memset<128ul>(double... | \n", "18016.0 | \n", "6.692635 | \n", "16.0 | \n", "
| \n", " | name | \n", "time (gpu) | \n", "c2clink__enabled_mask | \n", "c2clink__present | \n", "device__attribute_architecture | \n", "device__attribute_async_engine_count | \n", "device__attribute_can_flush_remote_writes | \n", "device__attribute_can_map_host_memory | \n", "device__attribute_can_tex2d_gather | \n", "device__attribute_can_use_64_bit_stream_mem_ops_v1 | \n", "... | \n", "sm__sass_thread_inst_executed.sum | \n", "smsp__inst_executed.sum | \n", "smsp__inst_executed_op_global_ld.sum | \n", "smsp__inst_executed_op_global_st.sum | \n", "smsp__inst_executed_op_local_ld.sum | \n", "smsp__inst_executed_op_local_st.sum | \n", "smsp__inst_executed_op_shared_ld.sum | \n", "smsp__inst_executed_op_shared_st.sum | \n", "smsp__inst_executed_pipe_tensor.sum | \n", "smsp__maximum_warps_avg_per_active_cycle | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Apps_DEL_DOT_VEC_2D | \n", "0.000527 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.165767e+09 | \n", "3.643023e+07 | \n", "4455496.0 | \n", "262088.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "10.0 | \n", "
| 1 | \n", "Apps_EDGE3D | \n", "0.468591 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.898933e+11 | \n", "5.934178e+09 | \n", "6587736.0 | \n", "274489.0 | \n", "1.089172e+09 | \n", "715043845.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "2.0 | \n", "
| 2 | \n", "Apps_ENERGY | \n", "0.002247 | \n", "0.0 | \n", "0.0 | \n", "1920.0 | \n", "24.0 | \n", "6.0 | \n", "6.0 | \n", "6.0 | \n", "6.0 | \n", "... | \n", "1.811939e+09 | \n", "5.662310e+07 | \n", "5767168.0 | \n", "1572864.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "96.0 | \n", "
| 3 | \n", "Apps_FIR | \n", "0.000178 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.115685e+09 | \n", "3.486515e+07 | \n", "4194304.0 | \n", "262144.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "16.0 | \n", "
| 4 | \n", "Apps_LTIMES | \n", "0.002227 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "3.340763e+09 | \n", "1.216348e+08 | \n", "18874368.0 | \n", "8388608.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "16.0 | \n", "
| 5 | \n", "Apps_LTIMES_NOVIEW | \n", "0.002234 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "2.900361e+09 | \n", "1.042022e+08 | \n", "18874368.0 | \n", "8388608.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "16.0 | \n", "
| 6 | \n", "Apps_MATVEC_3D_STENCIL | \n", "0.002154 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.731645e+09 | \n", "5.411398e+07 | \n", "14378100.0 | \n", "261420.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "4.0 | \n", "
| 7 | \n", "Apps_NODAL_ACCUMULATION_3D | \n", "0.000556 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "3.597146e+08 | \n", "1.124110e+07 | \n", "522840.0 | \n", "0.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "16.0 | \n", "
| 8 | \n", "Apps_PRESSURE | \n", "0.000507 | \n", "0.0 | \n", "0.0 | \n", "640.0 | \n", "8.0 | \n", "2.0 | \n", "2.0 | \n", "2.0 | \n", "2.0 | \n", "... | \n", "5.117051e+08 | \n", "1.599078e+07 | \n", "1048576.0 | \n", "786432.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "32.0 | \n", "
| 9 | \n", "Apps_VOL3D | \n", "0.000380 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "1.317547e+09 | \n", "4.117341e+07 | \n", "6587736.0 | \n", "274489.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "8.0 | \n", "
| 10 | \n", "Apps_ZONAL_ACCUMULATION_3D | \n", "0.000268 | \n", "0.0 | \n", "0.0 | \n", "320.0 | \n", "4.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "4.182726e+08 | \n", "1.307104e+07 | \n", "2352780.0 | \n", "261420.0 | \n", "0.000000e+00 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "16.0 | \n", "
11 rows × 239 columns
\n", "