{ "cells": [ { "cell_type": "markdown", "id": "387fdda3", "metadata": { "papermill": { "duration": 0.002228, "end_time": "2024-09-06T18:35:25.447619", "exception": false, "start_time": "2024-09-06T18:35:25.445391", "status": "completed" }, "tags": [] }, "source": [ "# Query Language: Thicket Tutorial\n", "\n", "Thicket is a python-based toolkit for Exploratory Data Analysis (EDA) of parallel performance data that enables performance optimization and understanding of applications’ performance on supercomputers. It bridges the performance tool gap between being able to consider only a single instance of a simulation run (e.g., single platform, single measurement tool, or single scale) and finding actionable insights in multi-dimensional, multi-scale, multi-architecture, and multi-tool performance datasets.\n", "\n", "## 1. Import Necessary Packages\n", "\n", "To explore the structure and various capabilities of thicket components, we begin by importing necessary packages. These include python extensions and thicket's statistical functions." ] }, { "cell_type": "code", "execution_count": 1, "id": "0689fbba", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:25.451264Z", "iopub.status.busy": "2024-09-06T18:35:25.451149Z", "iopub.status.idle": "2024-09-06T18:35:25.975502Z", "shell.execute_reply": "2024-09-06T18:35:25.975192Z" }, "papermill": { "duration": 0.526842, "end_time": "2024-09-06T18:35:25.976162", "exception": false, "start_time": "2024-09-06T18:35:25.449320", "status": "completed" }, "scrolled": true, "tags": [] }, "outputs": [ { "data": { "application/javascript": [ "var Roundtrip_Obj = {};\n", "var refresh_cycle = false;\n", "var clicked_cell = null;\n", "var cached_cells = Jupyter.notebook.get_cell_elements();\n", "\n", "/**\n", " * @name unindentPyCode\n", " * @description Removes leading indentations from a python code string.\n", " * \n", " * @param {string} code Python code in string form\n", " * @returns Passed code string but with no leading indentations\n", " */\n", "function unindentPyCode(code){\n", " let uicode = code.split('\\n');\n", " let indent = 0;\n", "\n", " uicode.forEach((l,i, arr)=>{\n", " if(i == 0){\n", " indent = l.search(/\\S/);\n", " }\n", " arr[i] = l.slice(indent);\n", " })\n", " uicode = uicode.join('\\n');\n", " return uicode;\n", "}\n", "\n", "/**\n", " * @name buildPythonAssignment\n", " * @description Builds up a python code string which assigns javascript data back into jypyter notebook namespace\n", " * \n", " * @param {string} val This is data assigned back to the python code\n", " * @param {string} py_var This is the variable into which val is assigned\n", " * @param {string} converter This is a definition of a python function which translates data back to the desired format\n", " * @returns The python code to be run in the jupyter shell\n", " */\n", "function buildPythonAssignment(val, py_var, converter){\n", " // console.log(val, py_var, converter);\n", " var holder = `'${val}'`;\n", " var code = `${unindentPyCode(converter.code)}`\n", " code += `\\ntmp = ${holder}`;\n", " code += `\\n${py_var} = ${converter.name}(tmp)`\n", "\n", " return code\n", "}\n", "\n", "/**\n", " * @name manageNewCell\n", " * \n", " * @description Increments all two way bound cell ids by the number of new cells which proceed them. \n", " * Ex. Adding one cell at position 2 will increment a bound cell at position 3 from 3->4. \n", " * \n", " * @param {array} newCells A list of our current cells in the notebook to be compared against cached cells\n", " * @param {} obj The current roundtrip object containing all data bindings\n", " */\n", "function manageNewCell(newCells, obj){\n", " let newIds = [];\n", "\n", " Object.keys(newCells).forEach(function(i){\n", " if(!Object.values(cached_cells).includes(newCells[i]) && !isNaN(i)){\n", " newIds.push(i);\n", " }\n", " });\n", "\n", " //increment all bindings past each new id\n", " for(let js_var in obj){\n", " for(let id of newIds){\n", " for(let key in obj[js_var][\"two_way\"]){\n", " obj[js_var][\"two_way\"][key].forEach((two_way_id, i) => {\n", " if(two_way_id > id){\n", " obj[js_var][\"two_way\"][key][i] += 1;\n", " }\n", " });\n", " }\n", " } \n", " }\n", "\n", " cached_cells = newCells;\n", "}\n", "\n", "function manageDeletedCell(newCells, obj){\n", " let deletedId = null;\n", " \n", " for(i of Object.keys(cachedCells)){\n", " if (cached_cells[i] !== newCells[i]){\n", " deletedId = i;\n", " break;\n", " }\n", " }\n", "\n", "}\n", "\n", "\n", "function bindClickDetectToCells(){\n", " let cells = Jupyter.notebook.get_cell_elements();\n", "\n", " for(let i in Object.keys(cells)){\n", " let cell = cells[i];\n", "\n", " if(cell !== undefined){\n", " cell.addEventListener('mousedown', () => {\n", " clicked_cell = i;\n", " }, true)\n", " }\n", " }\n", "}\n", "\n", "bindClickDetectToCells();\n", "\n", "/**\n", " * @name RT_Handler\n", " * @description A wrapper for our roundtrip object. It is called as a proxy for the\n", " * roundtrip object defined above. This enables us to define custom call backs for\n", " * gets and sets on the roundtrip object. The custom set handles necessary data conversion,\n", " * the registering of two-way bound variables and automatic updating of watched cells. The get\n", " * allows users to interact with the underlying object without worrying about the proxy.\n", " */\n", "var RT_Handler = {\n", " set(obj, prop, value){\n", " //Do cell housekeeping\n", "\n", "\n", " //Initial pass of value into roundtrip object\n", " // from python code; there may be multiple different\n", " // visualizations of the same type we need to catch\n", " if (typeof value === 'object' && value.hasOwnProperty('origin') && value.origin == 'INIT'){\n", " \n", " /**\n", " * In this code block we need to check if there is already a \n", " * an array of id's which are two way bound already defined and \n", " * add to it or remove from it\n", " */\n", " let ida = Jupyter.notebook.get_selected_index()-1;\n", " value.id = ida;\n", " let new_val = value;\n", "\n", " // Block updating bindings while jupyter is running\n", " if(refresh_cycle){\n", " new_val = obj[prop];\n", " new_val.data = value.data;\n", " return Reflect.set(obj, prop, new_val);\n", " }\n", "\n", " /**\n", " * The broad case where we are updating bindings \n", " * on existing data\n", " */\n", " if(obj[prop] != undefined){\n", " new_val = obj[prop];\n", " new_val.data = value.data;\n", " new_val.converter = value.converter;\n", "\n", " // If there is no two way array, create one\n", " // Else push on our new id\n", " if(value.two_way === true){\n", " if(!Object.keys(new_val.two_way).includes(value['python_var'])){\n", " new_val.two_way[value['python_var']] = [];\n", " }\n", "\n", " let pybinding = new_val.two_way[value['python_var']];\n", "\n", " if(!pybinding.includes(value.id)){\n", " pybinding.push(value.id);\n", " }\n", "\n", " }\n", "\n", " //Deregister a cell id from being two-way bound now\n", " else if(value.two_way === false && Object.keys(new_val.two_way).includes(value['python_var'])){\n", " let pybinding = new_val.two_way[value['python_var']];\n", " const index = pybinding.indexOf(value.id);\n", " \n", " if (index > -1) {\n", " pybinding.splice(index, 1);\n", " }\n", " }\n", " }\n", "\n", " //Initalize a new two-way object if\n", " // one did not exist\n", " else{\n", " if(new_val.two_way == true){\n", " new_val.two_way = {};\n", " new_val.two_way[value['python_var']] = [value.id];\n", " }\n", " else{\n", " new_val.two_way = {};\n", " }\n", " delete new_val.id;\n", " delete new_val.from_py;\n", " delete new_val.python_var;\n", " }\n", "\n", " return Reflect.set(obj, prop, new_val);\n", " }\n", " //Assignment from javascript code\n", " else {\n", " // TODO: make the py/js data identification object a\n", " // formal class\n", " if(obj[prop] === undefined){\n", " obj[prop] = {\n", " two_way: {},\n", " origin: \"JS\",\n", " data: null,\n", " python_var: \"\",\n", " converter: null,\n", " type: typeof(value)\n", " }\n", " }\n", "\n", " var execable_cells = [];\n", " let origin = 'STANDARD';\n", " let python_var = '';\n", "\n", " if (typeof value === 'object' && \n", " value.hasOwnProperty('origin') && \n", " value.origin == 'PYASSIGN'){\n", "\n", " origin = value.origin;\n", " python_var = value.python_var;\n", " value = value.data;\n", " }\n", "\n", " //TODO: Replace with imported, webpacked D3\n", " require(['https://d3js.org/d3.v4.min.js'], function(d3) {\n", "\n", " // When 2 way bound this calls automatically when something changes\n", " if (obj[prop] !== undefined && Object.keys(obj[prop][\"two_way\"]).length > 0){\n", "\n", " let current_cell = Number(clicked_cell);\n", " let py_var = '';\n", "\n", " //ust set the data without updating if our current cell is not two way bound\n", " if(origin == 'STANDARD'){\n", " let found = false;\n", " for(let key in obj[prop][\"two_way\"]){\n", " if (obj[prop][\"two_way\"][key].includes(current_cell)){\n", " found = true;\n", " py_var = key;\n", " }\n", " }\n", "\n", " if(!found){\n", " return Reflect.set(obj[prop], \"data\", value);\n", " }\n", " }\n", "\n", "\n", " if(origin == 'PYASSIGN'){\n", " py_var = python_var;\n", " }\n", "\n", "\n", " /**\n", " * We now have a list of registered cells we can execute.\n", " * So we look through our javascript variables to see if they\n", " * are bound to the same py variable as our current assignment\n", " * TODO: Make this list update when cells are moved up or down\n", " */\n", "\n", " for(let js_var in obj){\n", " let boundpyvars = Object.keys(obj[js_var][\"two_way\"]);\n", "\n", " if(boundpyvars.includes(py_var)){\n", " let clls = obj[js_var][\"two_way\"][py_var].filter(x => x != current_cell );\n", " execable_cells = execable_cells.concat(clls);\n", " }\n", " }\n", "\n", " if(origin == 'STANDARD'){\n", " // TODO:THROW AN ERROR IF CONVERTER == NONE\n", " const code = buildPythonAssignment(value, py_var, obj[prop][\"converter\"]);\n", " \n", " //TODO: Turn this into a function that manages error reporting and printing\n", " Jupyter.notebook.kernel.execute(code, { \n", " shell:{\n", " reply: function(r){\n", " //consider putting this in a reserved jupyter variable\n", " if(r.content.status == 'error'){\n", " console.error(`${r.content.ename} in JS->Python coversion:\\n ${r.content.evalue}`)\n", " }\n", " }\n", " }\n", " });\n", " }\n", "\n", " refresh_cycle = true;\n", " Jupyter.notebook.execute_cells(execable_cells);\n", "\n", " /**\n", " * Test every half second to see if some of the\n", " * jupyter cells are still running. Avoids a race condition\n", " * where incorrect ids were stored in our roundtrip object.\n", " */\n", " const test_running = function(){\n", " let runtest = d3.selectAll(\".running\");\n", " if(runtest.empty()){\n", " refresh_cycle = false;\n", " return;\n", " }\n", " else{\n", " setTimeout(test_running, 500);\n", " }\n", " }\n", "\n", " test_running();\n", " }\n", "\n", " });\n", " } \n", "\n", " return Reflect.set(obj[prop], \"data\", value);\n", " },\n", " get(obj, prop, reciever){\n", " let ret = obj[prop].data\n", " return ret; \n", " }\n", "}\n", "\n", "window.Roundtrip = new Proxy(Roundtrip_Obj, RT_Handler);\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import re\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from IPython.display import display\n", "from IPython.display import HTML\n", "import hatchet as ht\n", "\n", "import thicket as tt\n", "\n", "display(HTML(\"\"))" ] }, { "cell_type": "code", "execution_count": 2, "id": "6586fbfb", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:25.980433Z", "iopub.status.busy": "2024-09-06T18:35:25.980316Z", "iopub.status.idle": "2024-09-06T18:35:25.982266Z", "shell.execute_reply": "2024-09-06T18:35:25.982002Z" }, "papermill": { "duration": 0.004494, "end_time": "2024-09-06T18:35:25.982736", "exception": false, "start_time": "2024-09-06T18:35:25.978242", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Disable the Pandas 3 and Numpy Warnings for now\n", "import warnings\n", "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", "warnings.filterwarnings(\"ignore\", category=FutureWarning) " ] }, { "cell_type": "markdown", "id": "841835d6", "metadata": { "papermill": { "duration": 0.001533, "end_time": "2024-09-06T18:35:25.985867", "exception": false, "start_time": "2024-09-06T18:35:25.984334", "status": "completed" }, "tags": [] }, "source": [ "## 2. Read in Performance Profiles\n", "\n", "For this notebook, we select profiles generated on Lawrence Livermore National Lab (LLNL) machine, lassen. We create a thicket object generated with the same block size of 128. " ] }, { "cell_type": "code", "execution_count": 3, "id": "7643e225", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:25.989788Z", "iopub.status.busy": "2024-09-06T18:35:25.989670Z", "iopub.status.idle": "2024-09-06T18:35:26.080480Z", "shell.execute_reply": "2024-09-06T18:35:26.080133Z" }, "papermill": { "duration": 0.093758, "end_time": "2024-09-06T18:35:26.081184", "exception": false, "start_time": "2024-09-06T18:35:25.987426", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "problem_sizes = [\n", " \"1048576\", \n", " \"2097152\", \n", " \"4194304\", \n", " \"8388608\"\n", "]\n", "lassen1 = [f\"../data/lassen/clang10.0.1_nvcc10.2.89_{x}/1/Base_CUDA-block_128.cali\" for x in problem_sizes]\n", "lassen2 = [f\"../data/lassen/clang10.0.1_nvcc10.2.89_1048576/1/Base_CUDA-block_256.cali\"]\n", "\n", "# generate thicket(s)\n", "th_lassen = tt.Thicket.from_caliperreader(lassen1, disable_tqdm=True)" ] }, { "cell_type": "markdown", "id": "d603a02a", "metadata": { "papermill": { "duration": 0.001677, "end_time": "2024-09-06T18:35:26.084790", "exception": false, "start_time": "2024-09-06T18:35:26.083113", "status": "completed" }, "tags": [] }, "source": [ "## 3. Thicket Query Language \n", "\n", "**Use the Query Language**\n", "\n", "Thicket's query language provides users the capability to select or `query` specific nodes based on the call tree component in thicket. The nodes in the performance data and statistics table are updated as well to reflect which nodes are remaining in the call tree." ] }, { "cell_type": "code", "execution_count": 4, "id": "a1212ef4", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.088442Z", "iopub.status.busy": "2024-09-06T18:35:26.088349Z", "iopub.status.idle": "2024-09-06T18:35:26.095300Z", "shell.execute_reply": "2024-09-06T18:35:26.095019Z" }, "papermill": { "duration": 0.009388, "end_time": "2024-09-06T18:35:26.095800", "exception": false, "start_time": "2024-09-06T18:35:26.086412", "status": "completed" }, "scrolled": false, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial call tree:\n", " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m1.781\u001b[0m RAJAPerf\u001b[0m\n", "├─ \u001b[38;5;22m0.007\u001b[0m Algorithm\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.002\u001b[0m Algorithm_MEMCPY\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.002\u001b[0m Algorithm_MEMSET\u001b[0m\n", "│ └─ \u001b[38;5;22m0.003\u001b[0m Algorithm_REDUCE_SUM\u001b[0m\n", "├─ \u001b[38;5;34m0.185\u001b[0m Apps\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.007\u001b[0m Apps_DEL_DOT_VEC_2D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.039\u001b[0m Apps_ENERGY\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.004\u001b[0m Apps_FIR\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.035\u001b[0m Apps_HALOEXCHANGE\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.005\u001b[0m Apps_HALOEXCHANGE_FUSED\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.014\u001b[0m Apps_LTIMES\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.014\u001b[0m Apps_LTIMES_NOVIEW\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.008\u001b[0m Apps_NODAL_ACCUMULATION_3D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.048\u001b[0m Apps_PRESSURE\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.006\u001b[0m Apps_VOL3D\u001b[0m\n", "│ └─ \u001b[38;5;22m0.004\u001b[0m Apps_ZONAL_ACCUMULATION_3D\u001b[0m\n", "├─ \u001b[38;5;34m0.358\u001b[0m Basic\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.009\u001b[0m Basic_COPY8\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.017\u001b[0m Basic_DAXPY\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.017\u001b[0m Basic_DAXPY_ATOMIC\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.012\u001b[0m Basic_IF_QUAD\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.028\u001b[0m Basic_INIT3\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.042\u001b[0m Basic_INIT_VIEW1D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.042\u001b[0m Basic_INIT_VIEW1D_OFFSET\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.020\u001b[0m Basic_MULADDSUB\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.021\u001b[0m Basic_NESTED_INIT\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.127\u001b[0m Basic_PI_ATOMIC\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.003\u001b[0m Basic_PI_REDUCE\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.002\u001b[0m Basic_REDUCE3_INT\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.016\u001b[0m Basic_REDUCE_STRUCT\u001b[0m\n", "│ └─ \u001b[38;5;22m0.003\u001b[0m Basic_TRAP_INT\u001b[0m\n", "├─ \u001b[38;5;34m0.386\u001b[0m Lcals\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.062\u001b[0m Lcals_DIFF_PREDICT\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.023\u001b[0m Lcals_EOS\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.048\u001b[0m Lcals_FIRST_DIFF\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.006\u001b[0m Lcals_FIRST_MIN\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.048\u001b[0m Lcals_FIRST_SUM\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.049\u001b[0m Lcals_GEN_LIN_RECUR\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.034\u001b[0m Lcals_HYDRO_1D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.023\u001b[0m Lcals_HYDRO_2D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.047\u001b[0m Lcals_INT_PREDICT\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.003\u001b[0m Lcals_PLANCKIAN\u001b[0m\n", "│ └─ \u001b[38;5;22m0.045\u001b[0m Lcals_TRIDIAG_ELIM\u001b[0m\n", "├─ \u001b[38;5;46m0.583\u001b[0m Polybench\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.006\u001b[0m Polybench_2MM\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.009\u001b[0m Polybench_3MM\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.037\u001b[0m Polybench_ADI\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.026\u001b[0m Polybench_ATAX\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.037\u001b[0m Polybench_FDTD_2D\u001b[0m\n", "│ ├─ \u001b[38;5;34m0.206\u001b[0m Polybench_FLOYD_WARSHALL\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.006\u001b[0m Polybench_GEMM\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.007\u001b[0m Polybench_GEMVER\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.026\u001b[0m Polybench_GESUMMV\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.022\u001b[0m Polybench_HEAT_3D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.077\u001b[0m Polybench_JACOBI_1D\u001b[0m\n", "│ ├─ \u001b[38;5;22m0.102\u001b[0m Polybench_JACOBI_2D\u001b[0m\n", "│ └─ \u001b[38;5;22m0.022\u001b[0m Polybench_MVT\u001b[0m\n", "└─ \u001b[38;5;34m0.261\u001b[0m Stream\u001b[0m\n", " ├─ \u001b[38;5;22m0.034\u001b[0m Stream_ADD\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_COPY\u001b[0m\n", " ├─ \u001b[38;5;22m0.108\u001b[0m Stream_DOT\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_MUL\u001b[0m\n", " └─ \u001b[38;5;22m0.034\u001b[0m Stream_TRIAD\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.00 Max: 1.78 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m1.60 - 1.78\n", "\u001b[38;5;208m█ \u001b[0m1.25 - 1.60\n", "\u001b[38;5;220m█ \u001b[0m0.89 - 1.25\n", "\u001b[38;5;46m█ \u001b[0m0.54 - 0.89\n", "\u001b[38;5;34m█ \u001b[0m0.18 - 0.54\n", "\u001b[38;5;22m█ \u001b[0m0.00 - 0.18\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "print(\"Initial call tree:\")\n", "print(th_lassen.tree(\"Total time\"))" ] }, { "cell_type": "markdown", "id": "71c35593", "metadata": { "papermill": { "duration": 0.001668, "end_time": "2024-09-06T18:35:26.099203", "exception": false, "start_time": "2024-09-06T18:35:26.097535", "status": "completed" }, "tags": [] }, "source": [ "### Example Query 1: Find a Subgraph with a Specific Root\n", "\n", "This example shows how to find a subtree starting with a specific root. More specifically, the query in this example finds a subtree rooted at the node with the name \"Stream\" followed by all nodes down to the leaf nodes.\n", "\n", "NOTE: A DeprecationWarning is generated when using “old-style” queries (i.e., queries with QueryMatcher) if you have Hatchet>=2023.1.0 installed." ] }, { "cell_type": "code", "execution_count": 5, "id": "6a2f4c62", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.102842Z", "iopub.status.busy": "2024-09-06T18:35:26.102750Z", "iopub.status.idle": "2024-09-06T18:35:26.163492Z", "shell.execute_reply": "2024-09-06T18:35:26.163200Z" }, "papermill": { "duration": 0.063274, "end_time": "2024-09-06T18:35:26.164054", "exception": false, "start_time": "2024-09-06T18:35:26.100780", "status": "completed" }, "scrolled": false, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m0.261\u001b[0m Stream\u001b[0m\n", "├─ \u001b[38;5;22m0.034\u001b[0m Stream_ADD\u001b[0m\n", "├─ \u001b[38;5;22m0.043\u001b[0m Stream_COPY\u001b[0m\n", "├─ \u001b[38;5;46m0.108\u001b[0m Stream_DOT\u001b[0m\n", "├─ \u001b[38;5;22m0.043\u001b[0m Stream_MUL\u001b[0m\n", "└─ \u001b[38;5;22m0.034\u001b[0m Stream_TRIAD\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.03 Max: 0.26 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m0.24 - 0.26\n", "\u001b[38;5;208m█ \u001b[0m0.19 - 0.24\n", "\u001b[38;5;220m█ \u001b[0m0.15 - 0.19\n", "\u001b[38;5;46m█ \u001b[0m0.10 - 0.15\n", "\u001b[38;5;34m█ \u001b[0m0.06 - 0.10\n", "\u001b[38;5;22m█ \u001b[0m0.03 - 0.06\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "query_ex1 = (\n", " ht.QueryMatcher()\n", " .match (\n", " \".\", \n", " lambda row: row[\"name\"].apply(\n", " lambda x: re.match(\n", " \"Stream\", x\n", " )\n", " is not None\n", " ).all()\n", " )\n", " .rel(\"*\")\n", ")\n", "\n", "# applying the first query on the lassen thicket\n", "th_ex1 = th_lassen.query(query_ex1)\n", "print(th_ex1.tree(\"Total time\"))" ] }, { "cell_type": "markdown", "id": "39c03f96", "metadata": { "papermill": { "duration": 0.001769, "end_time": "2024-09-06T18:35:26.167774", "exception": false, "start_time": "2024-09-06T18:35:26.166005", "status": "completed" }, "tags": [] }, "source": [ "### Example Query 2: Find All Paths Ending with a Specific Node\n", "\n", "This example shows how to find all paths of a GraphFrame ending with a specific node. More specifically, the queries in this example can be used to find paths ending with a node named \"Stream\"." ] }, { "cell_type": "code", "execution_count": 6, "id": "037f5ad7", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.171508Z", "iopub.status.busy": "2024-09-06T18:35:26.171416Z", "iopub.status.idle": "2024-09-06T18:35:26.238378Z", "shell.execute_reply": "2024-09-06T18:35:26.238100Z" }, "papermill": { "duration": 0.069413, "end_time": "2024-09-06T18:35:26.238887", "exception": false, "start_time": "2024-09-06T18:35:26.169474", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m1.781\u001b[0m RAJAPerf\u001b[0m\n", "└─ \u001b[38;5;34m0.261\u001b[0m Stream\u001b[0m\n", " ├─ \u001b[38;5;22m0.034\u001b[0m Stream_ADD\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_COPY\u001b[0m\n", " ├─ \u001b[38;5;22m0.108\u001b[0m Stream_DOT\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_MUL\u001b[0m\n", " └─ \u001b[38;5;22m0.034\u001b[0m Stream_TRIAD\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.03 Max: 1.78 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m1.61 - 1.78\n", "\u001b[38;5;208m█ \u001b[0m1.26 - 1.61\n", "\u001b[38;5;220m█ \u001b[0m0.91 - 1.26\n", "\u001b[38;5;46m█ \u001b[0m0.56 - 0.91\n", "\u001b[38;5;34m█ \u001b[0m0.21 - 0.56\n", "\u001b[38;5;22m█ \u001b[0m0.03 - 0.21\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "query_ex2 = (\n", " ht.QueryMatcher()\n", " .match(\"*\")\n", " .rel(\n", " \".\",\n", " lambda row: row[\"name\"].apply(\n", " lambda x: re.match(\n", " \"Stream\", x\n", " )\n", " is not None\n", " ).all()\n", " )\n", ")\n", "\n", "# applying the second query on the lassen thicket\n", "th_ex2 = th_lassen.query(query_ex2)\n", "print(th_ex2.tree(\"Total time\"))" ] }, { "cell_type": "markdown", "id": "3c70ab25", "metadata": { "papermill": { "duration": 0.001797, "end_time": "2024-09-06T18:35:26.242668", "exception": false, "start_time": "2024-09-06T18:35:26.240871", "status": "completed" }, "tags": [] }, "source": [ "### Example Query 3: Find All Paths with Specific Starting and Ending Nodes\n", "\n", "This example shows how to find all call paths starting with and ending with specific nodes. More specifically, the query in this example finds paths starting with a node named \"Stream\" and ending with a node named \"Stream_MUL\"." ] }, { "cell_type": "code", "execution_count": 7, "id": "28ae2678", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.246637Z", "iopub.status.busy": "2024-09-06T18:35:26.246542Z", "iopub.status.idle": "2024-09-06T18:35:26.311648Z", "shell.execute_reply": "2024-09-06T18:35:26.311266Z" }, "papermill": { "duration": 0.067838, "end_time": "2024-09-06T18:35:26.312234", "exception": false, "start_time": "2024-09-06T18:35:26.244396", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m0.261\u001b[0m Stream\u001b[0m\n", "└─ \u001b[38;5;22m0.043\u001b[0m Stream_MUL\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.04 Max: 0.26 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m0.24 - 0.26\n", "\u001b[38;5;208m█ \u001b[0m0.20 - 0.24\n", "\u001b[38;5;220m█ \u001b[0m0.15 - 0.20\n", "\u001b[38;5;46m█ \u001b[0m0.11 - 0.15\n", "\u001b[38;5;34m█ \u001b[0m0.06 - 0.11\n", "\u001b[38;5;22m█ \u001b[0m0.04 - 0.06\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "query_ex3 = (\n", " ht.QueryMatcher()\n", " .match(\n", " \".\",\n", " lambda row: row[\"name\"].apply(\n", " lambda x: re.match(\n", " \"Stream\", x\n", " )\n", " is not None\n", " ).all()\n", " )\n", " .rel(\"*\")\n", " .rel(\n", " \".\",\n", " lambda row: row[\"name\"].apply(\n", " lambda x: re.match(\n", " \"Stream_MUL\", x\n", " )\n", " is not None\n", " ).all()\n", " )\n", ")\n", "\n", "# applying the third query on the lassen thicket\n", "th_ex3 = th_lassen.query(query_ex3)\n", "print(th_ex3.tree(\"Total time\"))" ] }, { "cell_type": "markdown", "id": "aa358ba0", "metadata": { "papermill": { "duration": 0.002126, "end_time": "2024-09-06T18:35:26.316311", "exception": false, "start_time": "2024-09-06T18:35:26.314185", "status": "completed" }, "tags": [] }, "source": [ "### Example Query 4: Find All Nodes for a Particular Software Library\n", "\n", "This example shows how to find all call paths representing a specific software library. This example is simply a variant of finding a subtree with a given root shown in Example Query 1. The example query below can be adapted to find the nodes for a subset of the MPI library, for example. In our example, we look for subtrees rooted at PolyBench_2MM, Basic_DAXPY, and Apps_ENERGY." ] }, { "cell_type": "code", "execution_count": 8, "id": "99a509de", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.320226Z", "iopub.status.busy": "2024-09-06T18:35:26.320111Z", "iopub.status.idle": "2024-09-06T18:35:26.393521Z", "shell.execute_reply": "2024-09-06T18:35:26.393199Z" }, "papermill": { "duration": 0.076067, "end_time": "2024-09-06T18:35:26.394107", "exception": false, "start_time": "2024-09-06T18:35:26.318040", "status": "completed" }, "scrolled": true, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m0.039\u001b[0m Apps_ENERGY\u001b[0m\n", "\u001b[38;5;46m0.017\u001b[0m Basic_DAXPY\u001b[0m\n", "\u001b[38;5;22m0.006\u001b[0m Polybench_2MM\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.01 Max: 0.04 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m0.04 - 0.04\n", "\u001b[38;5;208m█ \u001b[0m0.03 - 0.04\n", "\u001b[38;5;220m█ \u001b[0m0.02 - 0.03\n", "\u001b[38;5;46m█ \u001b[0m0.02 - 0.02\n", "\u001b[38;5;34m█ \u001b[0m0.01 - 0.02\n", "\u001b[38;5;22m█ \u001b[0m0.01 - 0.01\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "api_entrypoints = [\n", " \"Polybench_2MM\",\n", " \"Basic_DAXPY\",\n", " \"Apps_ENERGY\",\n", "]\n", "\n", "query_ex4 = (\n", " ht.QueryMatcher()\n", " .match(\n", " \".\",\n", " lambda row: row[\"name\"].apply(\n", " lambda x: x in api_entrypoints\n", " ).all()\n", " )\n", " .rel(\"*\")\n", ")\n", "\n", "# applying the fourth query on the lassen thicket\n", "th_ex4 = th_lassen.query(query_ex4)\n", "print(th_ex4.tree(\"Total time\"))" ] }, { "cell_type": "markdown", "id": "596fd8bd", "metadata": { "papermill": { "duration": 0.001847, "end_time": "2024-09-06T18:35:26.398009", "exception": false, "start_time": "2024-09-06T18:35:26.396162", "status": "completed" }, "tags": [] }, "source": [ "### Example Query 5: Find All Paths through a Specific Node\n", "\n", "This example shows how to find all call paths that pass through a specific node. More specifically, the query below finds all paths that pass through a node named \"Stream\"." ] }, { "cell_type": "code", "execution_count": 9, "id": "92f36b94", "metadata": { "execution": { "iopub.execute_input": "2024-09-06T18:35:26.401918Z", "iopub.status.busy": "2024-09-06T18:35:26.401826Z", "iopub.status.idle": "2024-09-06T18:35:26.477512Z", "shell.execute_reply": "2024-09-06T18:35:26.477231Z" }, "papermill": { "duration": 0.078406, "end_time": "2024-09-06T18:35:26.478209", "exception": false, "start_time": "2024-09-06T18:35:26.399803", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _____ _ _ _ _ \n", " |_ _| |__ (_) ___| | _____| |_ \n", " | | | '_ \\| |/ __| |/ / _ \\ __|\n", " | | | | | | | (__| < __/ |_ \n", " |_| |_| |_|_|\\___|_|\\_\\___|\\__| v2024.1.0\n", "\n", "\u001b[38;5;196m1.781\u001b[0m RAJAPerf\u001b[0m\n", "└─ \u001b[38;5;34m0.261\u001b[0m Stream\u001b[0m\n", " ├─ \u001b[38;5;22m0.034\u001b[0m Stream_ADD\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_COPY\u001b[0m\n", " ├─ \u001b[38;5;22m0.108\u001b[0m Stream_DOT\u001b[0m\n", " ├─ \u001b[38;5;22m0.043\u001b[0m Stream_MUL\u001b[0m\n", " └─ \u001b[38;5;22m0.034\u001b[0m Stream_TRIAD\u001b[0m\n", "\n", "\u001b[4mLegend\u001b[0m (Metric: Total time Min: 0.03 Max: 1.78 indices: {'profile': 1814734126})\n", "\u001b[38;5;196m█ \u001b[0m1.61 - 1.78\n", "\u001b[38;5;208m█ \u001b[0m1.26 - 1.61\n", "\u001b[38;5;220m█ \u001b[0m0.91 - 1.26\n", "\u001b[38;5;46m█ \u001b[0m0.56 - 0.91\n", "\u001b[38;5;34m█ \u001b[0m0.21 - 0.56\n", "\u001b[38;5;22m█ \u001b[0m0.03 - 0.21\n", "\n", "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", "\n" ] } ], "source": [ "query_ex5 = (\n", " ht.QueryMatcher()\n", " .match(\"*\")\n", " .rel(\n", " \".\",\n", " lambda row: row[\"name\"].apply(\n", " lambda x: re.match(\n", " \"Stream\", x\n", " )\n", " is not None\n", " ).all()\n", " )\n", " .rel(\"*\")\n", ")\n", "\n", "# applying the fifth query on the lassen thicket\n", "th_ex5 = th_lassen.query(query_ex5)\n", "print(th_ex5.tree(\"Total time\"))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "papermill": { "default_parameters": {}, "duration": 2.113542, "end_time": "2024-09-06T18:35:26.803553", "environment_variables": {}, "exception": null, "input_path": "05_thicket_query_language.ipynb", "output_path": "05_thicket_query_language.ipynb", "parameters": {}, "start_time": "2024-09-06T18:35:24.690011", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }