3 years ago · ea63fd6003
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 
				+miniconda3/
			
--- a/gpu_status.sh
+++ b/gpu_status.sh
@@ -0,0 +1,2 @@
 
				+#!/bin/bash
			
 
				+
			
--- a/python/gpu_status.py
+++ b/python/gpu_status.py
@@ -0,0 +1,66 @@
 
				+import argparse
			
 
				+from typing import Protocol
			
 
				+
			
 
				+import requests
			
 
				+from renderer import Renderer
			
 
				+
			
 
				+parser = argparse.ArgumentParser(description="Prints the status of server nodes and their GPUs.")
			
 
				+
			
 
				+parser.add_argument(
			
 
				+    "-s", 
			
 
				+    "--server", 
			
 
				+    type=str, 
			
 
				+    default="deimos", 
			
 
				+    help="The name of the server to query."
			
 
				+)
			
 
				+parser.add_argument(
			
 
				+    "-p",
			
 
				+    "--port",
			
 
				+    type=str,
			
 
				+    default="8091",
			
 
				+    help="The port of the server to query.",
			
 
				+)
			
 
				+parser.add_argument(
			
 
				+    "-t", 
			
 
				+    "--protocol", 
			
 
				+    type=str, 
			
 
				+    default="http", 
			
 
				+    choices=("http", "https"), 
			
 
				+    help="The protocol to use for the query.",
			
 
				+)
			
 
				+parser.add_argument(
			
 
				+    "-c", 
			
 
				+    "--compact", 
			
 
				+    action="store_true", 
			
 
				+    help="Flag; if set, a compact overview will be displayed."
			
 
				+)
			
 
				+parser.add_argument(
			
 
				+    "-f",
			
 
				+    "--filter",
			
 
				+    type=str, 
			
 
				+    nargs="*",
			
 
				+    default=None,
			
 
				+    help="The node names of nodes to filter for."
			
 
				+)
			
 
				+
			
 
				+args = parser.parse_args()
			
 
				+
			
 
				+response = requests.get(f"{args.protocol}://{args.server}:{args.port}/api/clients/")
			
 
				+
			
 
				+if not args.compact:
			
 
				+    renderer = Renderer(
			
 
				+        columns=1,
			
 
				+        progress_bar_width=50,
			
 
				+        use_space_lines=True,
			
 
				+        node_names=args.filter
			
 
				+    )
			
 
				+else:
			
 
				+    renderer = Renderer(
			
 
				+        columns=2,
			
 
				+        progress_bar_width=40,
			
 
				+        use_space_lines=False,
			
 
				+        node_names=args.filter,
			
 
				+        display_power=False,
			
 
				+    )
			
 
				+
			
 
				+print(renderer.render_info_dict(response.json()))
			
--- a/python/renderer.py
+++ b/python/renderer.py
@@ -0,0 +1,114 @@
 
				+from math import ceil
			
 
				+
			
 
				+
			
 
				+class Renderer:
			
 
				+    def __init__(self, progress_bar_width=50, columns=1, use_space_lines=True, node_names=None, display_power=True) -> None:
			
 
				+        self.progress_bar_width = progress_bar_width
			
 
				+        self.columns = columns
			
 
				+        self.use_space_lines = use_space_lines
			
 
				+        self.node_names = node_names
			
 
				+        self.display_power = display_power
			
 
				+
			
 
				+    def render_info_dict(self, info_dict):
			
 
				+        line_blocks = []
			
 
				+
			
 
				+        for node_dict in info_dict:
			
 
				+            if self.node_names is None or node_dict["name"] in self.node_names:
			
 
				+                lines = self.render_node(node_dict)
			
 
				+
			
 
				+                line_blocks.append(lines)
			
 
				+
			
 
				+        first_line = "|" + "=" * (len(lines[-1]) - 2) + "|"
			
 
				+        
			
 
				+        final_lines = []
			
 
				+
			
 
				+        n_rows = ceil(len(line_blocks) / self.columns)
			
 
				+
			
 
				+        # Format rows and columns
			
 
				+        for row in range(n_rows):
			
 
				+            lines = []
			
 
				+
			
 
				+            for col in range(self.columns):
			
 
				+                if col * n_rows + row < len(line_blocks):
			
 
				+                    if len(lines) == 0:
			
 
				+                        lines.extend(line_blocks[col * n_rows + row])
			
 
				+                    else:
			
 
				+                        for i, line in enumerate(line_blocks[col * n_rows + row]):
			
 
				+                            lines[i] += line
			
 
				+            
			
 
				+            final_lines.extend(lines)            
			
 
				+
			
 
				+        final_lines.insert(0, first_line * self.columns)
			
 
				+
			
 
				+        #lines.append("=" * len(lines[-1]))
			
 
				+
			
 
				+        return "\n".join(final_lines)
			
 
				+
			
 
				+    def render_node(self, node_dict):
			
 
				+        name = node_dict["name"]
			
 
				+        mem_used = node_dict["latest_info"]["used_memory_mb"]
			
 
				+        mem_total = node_dict["total_memory_mb"]
			
 
				+        utilization = node_dict["latest_info"]["cpu_utilization"]
			
 
				+        temp = node_dict["latest_info"]["temperature"]
			
 
				+
			
 
				+        head_line = "|- Node: " + name + " "
			
 
				+        info_line = f"| CPU: {utilization:>4.1f}%   Memory: {mem_used:>6}/{mem_total:<6} MB   Temp: {temp:>3}°C"
			
 
				+
			
 
				+        lines = []
			
 
				+
			
 
				+        for i, gpu_dict in enumerate(node_dict["gpus"]):
			
 
				+            lines.extend(self.get_rendered_gpu_lines(gpu_dict))
			
 
				+
			
 
				+            if i != len(node_dict["gpus"]) - 1:
			
 
				+                if self.use_space_lines:
			
 
				+                    lines.append("|" + "-" * (len(lines[-1]) - 2) + "|")
			
 
				+                else:
			
 
				+                    lines.append("|" + " " * (len(lines[-1]) - 2) + "|")
			
 
				+            else:
			
 
				+                lines.append("|" + "=" * (len(lines[-1]) - 2) + "|")
			
 
				+
			
 
				+        head_line = head_line + "-" * (len(lines[-1]) - len(head_line) - 1) + "|"
			
 
				+        info_line = info_line + " " * (len(lines[-1]) - len(info_line) - 1) + "|"
			
 
				+        pad_line = "|" + "-" * (len(lines[-1]) - 2) + "|"
			
 
				+        pad_line_empty = "|" + " " * (len(lines[-1]) - 2) + "|"
			
 
				+
			
 
				+        lines.insert(0, pad_line)
			
 
				+        if self.use_space_lines:
			
 
				+            lines.insert(0, pad_line_empty)
			
 
				+        lines.insert(0, info_line)
			
 
				+        if self.use_space_lines:
			
 
				+            lines.insert(0, pad_line_empty)
			
 
				+        lines.insert(0, head_line)
			
 
				+
			
 
				+        return lines
			
 
				+
			
 
				+
			
 
				+    def get_rendered_gpu_lines(self, gpu_dict):
			
 
				+        gpu_type = gpu_dict["type"]
			
 
				+        index = gpu_dict["index"]
			
 
				+        mem_used = gpu_dict["latest_info"]["used_memory_mb"]
			
 
				+        mem_total = gpu_dict["total_memory_mb"]
			
 
				+        utilization = gpu_dict["latest_info"]["utilization"]
			
 
				+        temp = gpu_dict["latest_info"]["temperature"]
			
 
				+        n_processes = len(gpu_dict["running_processes"])
			
 
				+        power = gpu_dict["latest_info"]["power_draw"]
			
 
				+
			
 
				+        mem_used_percent = int(self.progress_bar_width * mem_used / mem_total)
			
 
				+        rest_mem = self.progress_bar_width - mem_used_percent
			
 
				+
			
 
				+        line_util = "| [" + \
			
 
				+            "=" * mem_used_percent + \
			
 
				+            " " * rest_mem + "]" + \
			
 
				+            f"{mem_used:>6}/{mem_total:<6} MB,  Util: {int(utilization):>3}% |"
			
 
				+
			
 
				+        line_meta = f"| GPU #{index} ({gpu_type}):   #Proc.: {n_processes}   Temp: {temp:>3}°C   " + (f"Power: {int(power):>3} W" if self.display_power else "")
			
 
				+
			
 
				+        line_meta = line_meta + " " * (len(line_util) - len(line_meta) - 1) + "|"
			
 
				+        empty_line = "|" + " " * (len(line_meta) - 2) + "|"
			
 
				+
			
 
				+        if self.use_space_lines:
			
 
				+            return [empty_line, line_meta, line_util, empty_line]
			
 
				+        else:
			
 
				+            return [line_meta, line_util]
			
 
				+
			
 
				+