{ "cells": [ { "cell_type": "markdown", "id": "848cf56c", "metadata": { "papermill": { "duration": 0.001865, "end_time": "2024-09-06T18:35:27.756868", "exception": false, "start_time": "2024-09-06T18:35:27.755003", "status": "completed" }, "tags": [] }, "source": [ "# Using Groupby-Aggregate to Compose Multi-Run Datasets: Thicket Tutorial\n", "\n", "Thicket is a python-based toolkit for Exploratory Data Analysis (EDA) of parallel performance data that enables performance optimization and understanding of applications’ performance on supercomputers. It bridges the performance tool gap between being able to consider only a single instance of a simulation run (e.g., single platform, single measurement tool, or single scale) and finding actionable insights in multi-dimensional, multi-scale, multi-architecture, and multi-tool performance datasets.\n", "\n", "## 1. Import Necessary Packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "4797d428", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:27.760176Z", "iopub.status.busy": "2024-09-06T18:35:27.760037Z", "iopub.status.idle": "2024-09-06T18:35:28.315501Z", "shell.execute_reply": "2024-09-06T18:35:28.315179Z" }, "papermill": { "duration": 0.558006, "end_time": "2024-09-06T18:35:28.316372", "exception": false, "start_time": "2024-09-06T18:35:27.758366", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "application/javascript": [ "var Roundtrip_Obj = {};\n", "var refresh_cycle = false;\n", "var clicked_cell = null;\n", "var cached_cells = Jupyter.notebook.get_cell_elements();\n", "\n", "/**\n", " * @name unindentPyCode\n", " * @description Removes leading indentations from a python code string.\n", " * \n", " * @param {string} code Python code in string form\n", " * @returns Passed code string but with no leading indentations\n", " */\n", "function unindentPyCode(code){\n", " let uicode = code.split('\\n');\n", " let indent = 0;\n", "\n", " uicode.forEach((l,i, arr)=>{\n", " if(i == 0){\n", " indent = l.search(/\\S/);\n", " }\n", " arr[i] = l.slice(indent);\n", " })\n", " uicode = uicode.join('\\n');\n", " return uicode;\n", "}\n", "\n", "/**\n", " * @name buildPythonAssignment\n", " * @description Builds up a python code string which assigns javascript data back into jypyter notebook namespace\n", " * \n", " * @param {string} val This is data assigned back to the python code\n", " * @param {string} py_var This is the variable into which val is assigned\n", " * @param {string} converter This is a definition of a python function which translates data back to the desired format\n", " * @returns The python code to be run in the jupyter shell\n", " */\n", "function buildPythonAssignment(val, py_var, converter){\n", " // console.log(val, py_var, converter);\n", " var holder = `'${val}'`;\n", " var code = `${unindentPyCode(converter.code)}`\n", " code += `\\ntmp = ${holder}`;\n", " code += `\\n${py_var} = ${converter.name}(tmp)`\n", "\n", " return code\n", "}\n", "\n", "/**\n", " * @name manageNewCell\n", " * \n", " * @description Increments all two way bound cell ids by the number of new cells which proceed them. \n", " * Ex. Adding one cell at position 2 will increment a bound cell at position 3 from 3->4. \n", " * \n", " * @param {array} newCells A list of our current cells in the notebook to be compared against cached cells\n", " * @param {} obj The current roundtrip object containing all data bindings\n", " */\n", "function manageNewCell(newCells, obj){\n", " let newIds = [];\n", "\n", " Object.keys(newCells).forEach(function(i){\n", " if(!Object.values(cached_cells).includes(newCells[i]) && !isNaN(i)){\n", " newIds.push(i);\n", " }\n", " });\n", "\n", " //increment all bindings past each new id\n", " for(let js_var in obj){\n", " for(let id of newIds){\n", " for(let key in obj[js_var][\"two_way\"]){\n", " obj[js_var][\"two_way\"][key].forEach((two_way_id, i) => {\n", " if(two_way_id > id){\n", " obj[js_var][\"two_way\"][key][i] += 1;\n", " }\n", " });\n", " }\n", " } \n", " }\n", "\n", " cached_cells = newCells;\n", "}\n", "\n", "function manageDeletedCell(newCells, obj){\n", " let deletedId = null;\n", " \n", " for(i of Object.keys(cachedCells)){\n", " if (cached_cells[i] !== newCells[i]){\n", " deletedId = i;\n", " break;\n", " }\n", " }\n", "\n", "}\n", "\n", "\n", "function bindClickDetectToCells(){\n", " let cells = Jupyter.notebook.get_cell_elements();\n", "\n", " for(let i in Object.keys(cells)){\n", " let cell = cells[i];\n", "\n", " if(cell !== undefined){\n", " cell.addEventListener('mousedown', () => {\n", " clicked_cell = i;\n", " }, true)\n", " }\n", " }\n", "}\n", "\n", "bindClickDetectToCells();\n", "\n", "/**\n", " * @name RT_Handler\n", " * @description A wrapper for our roundtrip object. It is called as a proxy for the\n", " * roundtrip object defined above. This enables us to define custom call backs for\n", " * gets and sets on the roundtrip object. The custom set handles necessary data conversion,\n", " * the registering of two-way bound variables and automatic updating of watched cells. The get\n", " * allows users to interact with the underlying object without worrying about the proxy.\n", " */\n", "var RT_Handler = {\n", " set(obj, prop, value){\n", " //Do cell housekeeping\n", "\n", "\n", " //Initial pass of value into roundtrip object\n", " // from python code; there may be multiple different\n", " // visualizations of the same type we need to catch\n", " if (typeof value === 'object' && value.hasOwnProperty('origin') && value.origin == 'INIT'){\n", " \n", " /**\n", " * In this code block we need to check if there is already a \n", " * an array of id's which are two way bound already defined and \n", " * add to it or remove from it\n", " */\n", " let ida = Jupyter.notebook.get_selected_index()-1;\n", " value.id = ida;\n", " let new_val = value;\n", "\n", " // Block updating bindings while jupyter is running\n", " if(refresh_cycle){\n", " new_val = obj[prop];\n", " new_val.data = value.data;\n", " return Reflect.set(obj, prop, new_val);\n", " }\n", "\n", " /**\n", " * The broad case where we are updating bindings \n", " * on existing data\n", " */\n", " if(obj[prop] != undefined){\n", " new_val = obj[prop];\n", " new_val.data = value.data;\n", " new_val.converter = value.converter;\n", "\n", " // If there is no two way array, create one\n", " // Else push on our new id\n", " if(value.two_way === true){\n", " if(!Object.keys(new_val.two_way).includes(value['python_var'])){\n", " new_val.two_way[value['python_var']] = [];\n", " }\n", "\n", " let pybinding = new_val.two_way[value['python_var']];\n", "\n", " if(!pybinding.includes(value.id)){\n", " pybinding.push(value.id);\n", " }\n", "\n", " }\n", "\n", " //Deregister a cell id from being two-way bound now\n", " else if(value.two_way === false && Object.keys(new_val.two_way).includes(value['python_var'])){\n", " let pybinding = new_val.two_way[value['python_var']];\n", " const index = pybinding.indexOf(value.id);\n", " \n", " if (index > -1) {\n", " pybinding.splice(index, 1);\n", " }\n", " }\n", " }\n", "\n", " //Initalize a new two-way object if\n", " // one did not exist\n", " else{\n", " if(new_val.two_way == true){\n", " new_val.two_way = {};\n", " new_val.two_way[value['python_var']] = [value.id];\n", " }\n", " else{\n", " new_val.two_way = {};\n", " }\n", " delete new_val.id;\n", " delete new_val.from_py;\n", " delete new_val.python_var;\n", " }\n", "\n", " return Reflect.set(obj, prop, new_val);\n", " }\n", " //Assignment from javascript code\n", " else {\n", " // TODO: make the py/js data identification object a\n", " // formal class\n", " if(obj[prop] === undefined){\n", " obj[prop] = {\n", " two_way: {},\n", " origin: \"JS\",\n", " data: null,\n", " python_var: \"\",\n", " converter: null,\n", " type: typeof(value)\n", " }\n", " }\n", "\n", " var execable_cells = [];\n", " let origin = 'STANDARD';\n", " let python_var = '';\n", "\n", " if (typeof value === 'object' && \n", " value.hasOwnProperty('origin') && \n", " value.origin == 'PYASSIGN'){\n", "\n", " origin = value.origin;\n", " python_var = value.python_var;\n", " value = value.data;\n", " }\n", "\n", " //TODO: Replace with imported, webpacked D3\n", " require(['https://d3js.org/d3.v4.min.js'], function(d3) {\n", "\n", " // When 2 way bound this calls automatically when something changes\n", " if (obj[prop] !== undefined && Object.keys(obj[prop][\"two_way\"]).length > 0){\n", "\n", " let current_cell = Number(clicked_cell);\n", " let py_var = '';\n", "\n", " //ust set the data without updating if our current cell is not two way bound\n", " if(origin == 'STANDARD'){\n", " let found = false;\n", " for(let key in obj[prop][\"two_way\"]){\n", " if (obj[prop][\"two_way\"][key].includes(current_cell)){\n", " found = true;\n", " py_var = key;\n", " }\n", " }\n", "\n", " if(!found){\n", " return Reflect.set(obj[prop], \"data\", value);\n", " }\n", " }\n", "\n", "\n", " if(origin == 'PYASSIGN'){\n", " py_var = python_var;\n", " }\n", "\n", "\n", " /**\n", " * We now have a list of registered cells we can execute.\n", " * So we look through our javascript variables to see if they\n", " * are bound to the same py variable as our current assignment\n", " * TODO: Make this list update when cells are moved up or down\n", " */\n", "\n", " for(let js_var in obj){\n", " let boundpyvars = Object.keys(obj[js_var][\"two_way\"]);\n", "\n", " if(boundpyvars.includes(py_var)){\n", " let clls = obj[js_var][\"two_way\"][py_var].filter(x => x != current_cell );\n", " execable_cells = execable_cells.concat(clls);\n", " }\n", " }\n", "\n", " if(origin == 'STANDARD'){\n", " // TODO:THROW AN ERROR IF CONVERTER == NONE\n", " const code = buildPythonAssignment(value, py_var, obj[prop][\"converter\"]);\n", " \n", " //TODO: Turn this into a function that manages error reporting and printing\n", " Jupyter.notebook.kernel.execute(code, { \n", " shell:{\n", " reply: function(r){\n", " //consider putting this in a reserved jupyter variable\n", " if(r.content.status == 'error'){\n", " console.error(`${r.content.ename} in JS->Python coversion:\\n ${r.content.evalue}`)\n", " }\n", " }\n", " }\n", " });\n", " }\n", "\n", " refresh_cycle = true;\n", " Jupyter.notebook.execute_cells(execable_cells);\n", "\n", " /**\n", " * Test every half second to see if some of the\n", " * jupyter cells are still running. Avoids a race condition\n", " * where incorrect ids were stored in our roundtrip object.\n", " */\n", " const test_running = function(){\n", " let runtest = d3.selectAll(\".running\");\n", " if(runtest.empty()){\n", " refresh_cycle = false;\n", " return;\n", " }\n", " else{\n", " setTimeout(test_running, 500);\n", " }\n", " }\n", "\n", " test_running();\n", " }\n", "\n", " });\n", " } \n", "\n", " return Reflect.set(obj[prop], \"data\", value);\n", " },\n", " get(obj, prop, reciever){\n", " let ret = obj[prop].data\n", " return ret; \n", " }\n", "}\n", "\n", "window.Roundtrip = new Proxy(Roundtrip_Obj, RT_Handler);\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from glob import glob\n", "import numpy as np\n", "from IPython.display import display\n", "from IPython.display import HTML\n", "\n", "import thicket as th\n", "\n", "display(HTML(\"\"))" ] }, { "cell_type": "code", "execution_count": 2, "id": "a8f1da19", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.320317Z", "iopub.status.busy": "2024-09-06T18:35:28.320184Z", "iopub.status.idle": "2024-09-06T18:35:28.322448Z", "shell.execute_reply": "2024-09-06T18:35:28.322122Z" }, "papermill": { "duration": 0.004854, "end_time": "2024-09-06T18:35:28.323143", "exception": false, "start_time": "2024-09-06T18:35:28.318289", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Disable the Pandas 3 Future Warnings for now\n", "import warnings\n", "warnings.filterwarnings(\"ignore\", category=FutureWarning) " ] }, { "cell_type": "markdown", "id": "1892ad2f", "metadata": { "papermill": { "duration": 0.001281, "end_time": "2024-09-06T18:35:28.325823", "exception": false, "start_time": "2024-09-06T18:35:28.324542", "status": "completed" }, "tags": [] }, "source": [ "## 2. Define Dataset Paths and Names\n", "\n", "In this example, we load two repeat runs generated on lassen. We can use glob to find all of the caliper files in a given directory." ] }, { "cell_type": "code", "execution_count": 3, "id": "e81b174b", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.328722Z", "iopub.status.busy": "2024-09-06T18:35:28.328633Z", "iopub.status.idle": "2024-09-06T18:35:28.500412Z", "shell.execute_reply": "2024-09-06T18:35:28.500132Z" }, "papermill": { "duration": 0.174113, "end_time": "2024-09-06T18:35:28.501169", "exception": false, "start_time": "2024-09-06T18:35:28.327056", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "data = glob(\"../data/lassen/clang10.0.1_nvcc10.2.89_1048576/**/*.cali\", recursive=True)\n", "tk = th.Thicket.from_caliperreader(data, disable_tqdm=True)" ] }, { "cell_type": "markdown", "id": "f60fcd07", "metadata": { "papermill": { "duration": 0.001579, "end_time": "2024-09-06T18:35:28.504485", "exception": false, "start_time": "2024-09-06T18:35:28.502906", "status": "completed" }, "tags": [] }, "source": [ "## 3. Groupby\n", "\n", "Groupby the unique combinations of `variant` and `tuning` from the metadata table. In general, these will be the parameters you varied in your runs.\n", "\n", "After performing the groupby, we can see that each thicket contains multiple profiles. In order to perform certain composition operations in Thicket, we need to aggregate the performance data (`Thicket.dataframe`)." ] }, { "cell_type": "code", "execution_count": 4, "id": "88a7c896", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.507738Z", "iopub.status.busy": "2024-09-06T18:35:28.507635Z", "iopub.status.idle": "2024-09-06T18:35:28.522427Z", "shell.execute_reply": "2024-09-06T18:35:28.522104Z" }, "papermill": { "duration": 0.017101, "end_time": "2024-09-06T18:35:28.523010", "exception": false, "start_time": "2024-09-06T18:35:28.505909", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4 thickets created...\n", "{('Base_CUDA', 'block_1024'): , ('Base_CUDA', 'block_128'): , ('Base_CUDA', 'block_256'): , ('Base_CUDA', 'block_512'): }\n" ] } ], "source": [ "gb = tk.groupby([\"variant\", \"tuning\"])" ] }, { "cell_type": "code", "execution_count": 5, "id": "3b901618", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.526707Z", "iopub.status.busy": "2024-09-06T18:35:28.526620Z", "iopub.status.idle": "2024-09-06T18:35:28.529501Z", "shell.execute_reply": "2024-09-06T18:35:28.529177Z" }, "papermill": { "duration": 0.005482, "end_time": "2024-09-06T18:35:28.530127", "exception": false, "start_time": "2024-09-06T18:35:28.524645", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "key ('Base_CUDA', 'block_1024') contains 2 profiles\n", "key ('Base_CUDA', 'block_128') contains 2 profiles\n", "key ('Base_CUDA', 'block_256') contains 2 profiles\n", "key ('Base_CUDA', 'block_512') contains 2 profiles\n" ] } ], "source": [ "for key, ttk in gb.items():\n", " print(f\"key {key} contains {len(ttk.profile)} profiles\")" ] }, { "cell_type": "markdown", "id": "7c14cf7f", "metadata": { "papermill": { "duration": 0.001333, "end_time": "2024-09-06T18:35:28.532960", "exception": false, "start_time": "2024-09-06T18:35:28.531627", "status": "completed" }, "tags": [] }, "source": [ "## 4. Aggregation" ] }, { "cell_type": "markdown", "id": "e01a75da", "metadata": { "papermill": { "duration": 0.001435, "end_time": "2024-09-06T18:35:28.535891", "exception": false, "start_time": "2024-09-06T18:35:28.534456", "status": "completed" }, "tags": [] }, "source": [ "Using the `aggregate_thicket` function, we can aggregate each Thicket in the groupby object individually." ] }, { "cell_type": "code", "execution_count": 6, "id": "951c9d30", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.539342Z", "iopub.status.busy": "2024-09-06T18:35:28.539246Z", "iopub.status.idle": "2024-09-06T18:35:28.604538Z", "shell.execute_reply": "2024-09-06T18:35:28.604254Z" }, "papermill": { "duration": 0.067709, "end_time": "2024-09-06T18:35:28.605127", "exception": false, "start_time": "2024-09-06T18:35:28.537418", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nid_meanMin time/rank_meanMax time/rank_meanAvg time/rank_meanTotal time_meanBlockSize_meanBytes/Rep_meanFlops/Rep_meanIterations/Rep_meanKernels/Rep_meanProblemSize_meanReps_meanspot.channelname
nodevarianttuning
{'name': 'RAJAPerf', 'type': 'function'}Base_CUDAblock_1281.01.7796281.7796281.7796281.779628128.03.359049e+096.797544e+09125952040.0160.01135363.02500.0regionprofileRAJAPerf
{'name': 'Algorithm', 'type': 'function'}Base_CUDAblock_12810.00.0068090.0068090.0068090.006809128.01.677722e+071.048576e+061048576.01.01048576.0100.0regionprofileAlgorithm
{'name': 'Algorithm_MEMCPY', 'type': 'function'}Base_CUDAblock_12813.00.0024390.0024390.0024390.002439128.01.677722e+070.000000e+001048576.01.01048576.0100.0regionprofileAlgorithm_MEMCPY
{'name': 'Algorithm_MEMSET', 'type': 'function'}Base_CUDAblock_12812.00.0017050.0017050.0017050.001705128.08.388616e+060.000000e+001048576.01.01048576.0100.0regionprofileAlgorithm_MEMSET
{'name': 'Algorithm_REDUCE_SUM', 'type': 'function'}Base_CUDAblock_12811.00.0026420.0026420.0026420.002642128.08.388616e+061.048576e+061048576.01.01048576.050.0regionprofileAlgorithm_REDUCE_SUM
...................................................
{'name': 'Stream_ADD', 'type': 'function'}Base_CUDAblock_12854.00.0335930.0335930.0335930.033593128.02.516582e+071.048576e+061048576.01.01048576.01000.0regionprofileStream_ADD
{'name': 'Stream_COPY', 'type': 'function'}Base_CUDAblock_12855.00.0425840.0425840.0425840.042584128.01.677722e+070.000000e+001048576.01.01048576.01800.0regionprofileStream_COPY
{'name': 'Stream_DOT', 'type': 'function'}Base_CUDAblock_12856.00.1081530.1081530.1081530.108153128.01.677723e+072.097152e+061048576.01.01048576.02000.0regionprofileStream_DOT
{'name': 'Stream_MUL', 'type': 'function'}Base_CUDAblock_12857.00.0426110.0426110.0426110.042611128.01.677722e+071.048576e+061048576.01.01048576.01800.0regionprofileStream_MUL
{'name': 'Stream_TRIAD', 'type': 'function'}Base_CUDAblock_12858.00.0336480.0336480.0336480.033648128.02.516582e+072.097152e+061048576.01.01048576.01000.0regionprofileStream_TRIAD
\n", "

67 rows × 14 columns

\n", "
" ], "text/plain": [ " nid_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 10.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 13.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 12.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 11.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 54.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 55.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 56.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 57.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 58.0 \n", "\n", " Min time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1.779628 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 0.006809 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 0.002439 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 0.001705 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 0.002642 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 0.033593 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 0.042584 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 0.108153 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 0.042611 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 0.033648 \n", "\n", " Max time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1.779628 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 0.006809 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 0.002439 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 0.001705 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 0.002642 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 0.033593 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 0.042584 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 0.108153 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 0.042611 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 0.033648 \n", "\n", " Avg time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1.779628 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 0.006809 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 0.002439 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 0.001705 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 0.002642 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 0.033593 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 0.042584 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 0.108153 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 0.042611 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 0.033648 \n", "\n", " Total time_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1.779628 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 0.006809 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 0.002439 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 0.001705 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 0.002642 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 0.033593 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 0.042584 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 0.108153 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 0.042611 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 0.033648 \n", "\n", " BlockSize_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 128.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 128.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 128.0 \n", "\n", " Bytes/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 3.359049e+09 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 1.677722e+07 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 1.677722e+07 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 8.388616e+06 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 8.388616e+06 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 2.516582e+07 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 1.677722e+07 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 1.677723e+07 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1.677722e+07 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 2.516582e+07 \n", "\n", " Flops/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 6.797544e+09 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 1.048576e+06 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 0.000000e+00 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 0.000000e+00 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 1.048576e+06 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 1.048576e+06 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 0.000000e+00 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 2.097152e+06 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1.048576e+06 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 2.097152e+06 \n", "\n", " Iterations/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 125952040.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 1048576.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "\n", " Kernels/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 160.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 1.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 1.0 \n", "\n", " ProblemSize_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 1135363.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 1048576.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 1048576.0 \n", "\n", " Reps_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 2500.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 100.0 \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 100.0 \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 100.0 \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 50.0 \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 1000.0 \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 1800.0 \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 2000.0 \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 1800.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 1000.0 \n", "\n", " spot.channel \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 regionprofile \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 regionprofile \n", "\n", " name \n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_128 RAJAPerf \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_128 Algorithm \n", "{'name': 'Algorithm_MEMCPY', 'type': 'function'} Base_CUDA block_128 Algorithm_MEMCPY \n", "{'name': 'Algorithm_MEMSET', 'type': 'function'} Base_CUDA block_128 Algorithm_MEMSET \n", "{'name': 'Algorithm_REDUCE_SUM', 'type': 'funct... Base_CUDA block_128 Algorithm_REDUCE_SUM \n", "... ... \n", "{'name': 'Stream_ADD', 'type': 'function'} Base_CUDA block_128 Stream_ADD \n", "{'name': 'Stream_COPY', 'type': 'function'} Base_CUDA block_128 Stream_COPY \n", "{'name': 'Stream_DOT', 'type': 'function'} Base_CUDA block_128 Stream_DOT \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_128 Stream_MUL \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_128 Stream_TRIAD \n", "\n", "[67 rows x 14 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "gb_agg = {}\n", "for key, ttk in gb.items():\n", " gb_agg[key] = gb.aggregate_thicket(ttk, np.mean)\n", "\n", "display(gb_agg[('Base_CUDA', 'block_128')].dataframe)" ] }, { "cell_type": "markdown", "id": "fa608352", "metadata": { "papermill": { "duration": 0.001685, "end_time": "2024-09-06T18:35:28.608890", "exception": false, "start_time": "2024-09-06T18:35:28.607205", "status": "completed" }, "tags": [] }, "source": [ "We can call `agg` to aggregate and create a composed dataframe in one step" ] }, { "cell_type": "code", "execution_count": 7, "id": "e326d9b4", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:28.612602Z", "iopub.status.busy": "2024-09-06T18:35:28.612507Z", "iopub.status.idle": "2024-09-06T18:35:28.738387Z", "shell.execute_reply": "2024-09-06T18:35:28.738076Z" }, "papermill": { "duration": 0.128432, "end_time": "2024-09-06T18:35:28.738999", "exception": false, "start_time": "2024-09-06T18:35:28.610567", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nid_meanMin time/rank_meanMax time/rank_meanAvg time/rank_meanTotal time_meanBlockSize_meanBytes/Rep_meanFlops/Rep_meanIterations/Rep_meanKernels/Rep_meanProblemSize_meanReps_meanspot.channelname
nodevarianttuning
{'name': 'RAJAPerf', 'type': 'function'}Base_CUDAblock_10241.02.1229342.1229342.1229342.1229341024.03.359049e+096.797544e+09125952040.0160.01135363.02500.0regionprofileRAJAPerf
block_1281.01.7796281.7796281.7796281.779628128.03.359049e+096.797544e+09125952040.0160.01135363.02500.0regionprofileRAJAPerf
block_2561.01.7721651.7721651.7721651.772165256.03.359049e+096.797544e+09125952040.0160.01135363.02500.0regionprofileRAJAPerf
block_5121.01.8383141.8383141.8383141.838314512.03.359049e+096.797544e+09125952040.0160.01135363.02500.0regionprofileRAJAPerf
{'name': 'Algorithm', 'type': 'function'}Base_CUDAblock_102411.00.0063710.0063710.0063710.0063711024.01.677722e+071.048576e+061048576.01.01048576.0100.0regionprofileAlgorithm
...................................................
{'name': 'Stream_MUL', 'type': 'function'}Base_CUDAblock_51257.00.0427750.0427750.0427750.042775512.01.677722e+071.048576e+061048576.01.01048576.01800.0regionprofileStream_MUL
{'name': 'Stream_TRIAD', 'type': 'function'}Base_CUDAblock_102460.00.0337490.0337490.0337490.0337491024.02.516582e+072.097152e+061048576.01.01048576.01000.0regionprofileStream_TRIAD
block_12858.00.0336480.0336480.0336480.033648128.02.516582e+072.097152e+061048576.01.01048576.01000.0regionprofileStream_TRIAD
block_25664.00.0336490.0336490.0336490.033649256.02.516582e+072.097152e+061048576.01.01048576.01000.0regionprofileStream_TRIAD
block_51258.00.0337130.0337130.0337130.033713512.02.516582e+072.097152e+061048576.01.01048576.01000.0regionprofileStream_TRIAD
\n", "

268 rows × 14 columns

\n", "
" ], "text/plain": [ " nid_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 1.0 \n", " block_128 1.0 \n", " block_256 1.0 \n", " block_512 1.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 11.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 57.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 60.0 \n", " block_128 58.0 \n", " block_256 64.0 \n", " block_512 58.0 \n", "\n", " Min time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 2.122934 \n", " block_128 1.779628 \n", " block_256 1.772165 \n", " block_512 1.838314 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 0.006371 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 0.042775 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 0.033749 \n", " block_128 0.033648 \n", " block_256 0.033649 \n", " block_512 0.033713 \n", "\n", " Max time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 2.122934 \n", " block_128 1.779628 \n", " block_256 1.772165 \n", " block_512 1.838314 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 0.006371 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 0.042775 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 0.033749 \n", " block_128 0.033648 \n", " block_256 0.033649 \n", " block_512 0.033713 \n", "\n", " Avg time/rank_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 2.122934 \n", " block_128 1.779628 \n", " block_256 1.772165 \n", " block_512 1.838314 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 0.006371 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 0.042775 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 0.033749 \n", " block_128 0.033648 \n", " block_256 0.033649 \n", " block_512 0.033713 \n", "\n", " Total time_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 2.122934 \n", " block_128 1.779628 \n", " block_256 1.772165 \n", " block_512 1.838314 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 0.006371 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 0.042775 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 0.033749 \n", " block_128 0.033648 \n", " block_256 0.033649 \n", " block_512 0.033713 \n", "\n", " BlockSize_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 1024.0 \n", " block_128 128.0 \n", " block_256 256.0 \n", " block_512 512.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1024.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 512.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 1024.0 \n", " block_128 128.0 \n", " block_256 256.0 \n", " block_512 512.0 \n", "\n", " Bytes/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 3.359049e+09 \n", " block_128 3.359049e+09 \n", " block_256 3.359049e+09 \n", " block_512 3.359049e+09 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1.677722e+07 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1.677722e+07 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 2.516582e+07 \n", " block_128 2.516582e+07 \n", " block_256 2.516582e+07 \n", " block_512 2.516582e+07 \n", "\n", " Flops/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 6.797544e+09 \n", " block_128 6.797544e+09 \n", " block_256 6.797544e+09 \n", " block_512 6.797544e+09 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1.048576e+06 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1.048576e+06 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 2.097152e+06 \n", " block_128 2.097152e+06 \n", " block_256 2.097152e+06 \n", " block_512 2.097152e+06 \n", "\n", " Iterations/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 125952040.0 \n", " block_128 125952040.0 \n", " block_256 125952040.0 \n", " block_512 125952040.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1048576.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1048576.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 1048576.0 \n", " block_128 1048576.0 \n", " block_256 1048576.0 \n", " block_512 1048576.0 \n", "\n", " Kernels/Rep_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 160.0 \n", " block_128 160.0 \n", " block_256 160.0 \n", " block_512 160.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 1.0 \n", " block_128 1.0 \n", " block_256 1.0 \n", " block_512 1.0 \n", "\n", " ProblemSize_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 1135363.0 \n", " block_128 1135363.0 \n", " block_256 1135363.0 \n", " block_512 1135363.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 1048576.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1048576.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 1048576.0 \n", " block_128 1048576.0 \n", " block_256 1048576.0 \n", " block_512 1048576.0 \n", "\n", " Reps_mean \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 2500.0 \n", " block_128 2500.0 \n", " block_256 2500.0 \n", " block_512 2500.0 \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 100.0 \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 1800.0 \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 1000.0 \n", " block_128 1000.0 \n", " block_256 1000.0 \n", " block_512 1000.0 \n", "\n", " spot.channel \\\n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 regionprofile \n", " block_128 regionprofile \n", " block_256 regionprofile \n", " block_512 regionprofile \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 regionprofile \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 regionprofile \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 regionprofile \n", " block_128 regionprofile \n", " block_256 regionprofile \n", " block_512 regionprofile \n", "\n", " name \n", "node variant tuning \n", "{'name': 'RAJAPerf', 'type': 'function'} Base_CUDA block_1024 RAJAPerf \n", " block_128 RAJAPerf \n", " block_256 RAJAPerf \n", " block_512 RAJAPerf \n", "{'name': 'Algorithm', 'type': 'function'} Base_CUDA block_1024 Algorithm \n", "... ... \n", "{'name': 'Stream_MUL', 'type': 'function'} Base_CUDA block_512 Stream_MUL \n", "{'name': 'Stream_TRIAD', 'type': 'function'} Base_CUDA block_1024 Stream_TRIAD \n", " block_128 Stream_TRIAD \n", " block_256 Stream_TRIAD \n", " block_512 Stream_TRIAD \n", "\n", "[268 rows x 14 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tk_agg = gb.agg(np.mean, disable_tqdm=True)\n", "\n", "display(tk_agg.dataframe)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "papermill": { "default_parameters": {}, "duration": 2.082969, "end_time": "2024-09-06T18:35:29.064093", "environment_variables": {}, "exception": null, "input_path": "06_groupby_aggregate_of_multirun_data.ipynb", "output_path": "06_groupby_aggregate_of_multirun_data.ipynb", "parameters": {}, "start_time": "2024-09-06T18:35:26.981124", "version": "2.6.0" }, "vscode": { "interpreter": { "hash": "e9b2a95c73c2c3cbd2385f2b17bb401a2882e839041a509387bd5d08c5b62925" } } }, "nbformat": 4, "nbformat_minor": 5 }