Fully working version, but lacks unit tests

2026-01-12 07:20:58 -03:00 · 2024-02-15 11:51:18 -03:00
parent e9c43f70c7
commit 8750ff4dea
4 changed files with 165 additions and 26 deletions
--- a/helper_functions.py
+++ b/helper_functions.py
@@ -0,0 +1,97 @@
+import pynvml
+import datetime
+import time
+
+# Timestamp: datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+def log_helper(msg):
+    print(f'LOG[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]: {msg}')
+
+def print_help():
+    print('HELP TEXT')
+
+def list_gpus():
+    deviceCount = pynvml.nvmlDeviceGetCount()
+
+    for i in range(deviceCount):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+        print(f'Device {i} name : {pynvml.nvmlDeviceGetName(handle)} - UUID: {pynvml.nvmlDeviceGetUUID(handle)}')
+
+def print_GPU_info(gpu_handle):
+    log_helper(f"Driver Version: {pynvml.nvmlSystemGetDriverVersion()}")
+    log_helper(f'Device name : {pynvml.nvmlDeviceGetName(gpu_handle)}')
+    log_helper(f'Device UUID : {pynvml.nvmlDeviceGetUUID(gpu_handle)}')
+    log_helper(f'Device fan speed : {pynvml.nvmlDeviceGetFanSpeed(gpu_handle)}%')
+    log_helper(f'Temperature {pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)}°C')
+    log_helper(f"Fan controller count {pynvml.nvmlDeviceGetNumFans(gpu_handle)}")
+
+def fan_control(configuration):
+    gpu_handle = get_GPU_handle(configuration.target_gpu)
+    print_GPU_info(gpu_handle)
+    control_and_monitor(gpu_handle, configuration)
+
+# Search for a GPU and return a handle
+def get_GPU_handle(gpu_name):
+    deviceCount = pynvml.nvmlDeviceGetCount()
+
+    for i in range(deviceCount):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+
+        if pynvml.nvmlDeviceGetName(handle) == gpu_name:
+            return handle
+
+    print(f'It was not possible to locate the target device : {gpu_name}')
+    raise GpuNotFound('It was not possible to locate the device')
+
+def set_gpu_fan_speed(gpu_handle, speed_percentage, dry_run):
+
+    # This is not really the number of fan, but the number of controllers
+    fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
+
+    for fan_idx in range(fan_count):
+        fan_speed = pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx)
+
+        # Setting the fan speed DANGEROUS! Use dry run for testing before actual changes
+        if dry_run != True:
+            pynvml.nvmlDeviceSetFanSpeed_v2(gpu_handle, fan_idx, speed_percentage)
+
+
+# Control GPU functions and monitor for changes (e.g. temperature)
+def control_and_monitor(gpu_handle, configuration):
+    
+    previous_speed = 0
+    
+    # Infinite loop, one must kill the process to stop it
+    while(True):
+        current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)
+        current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
+
+        log_helper(f'Current temp: {current_temp}')
+        log_helper(f'Current speed: {current_speed}')
+
+        found_temp_match = False
+        for pair in configuration.temp_speed_pair:
+
+            # Remember that that list starts by the highest temp value and keeps lowering it
+            if current_temp >= pair.temperature:
+
+                # Only send commands to the GPU if necessary
+                if previous_speed != pair.speed or current_speed != pair.speed:
+                    set_gpu_fan_speed(gpu_handle, pair.speed, configuration.dry_run)
+                    previous_speed = pair.speed
+                    log_helper(f'Setting GPU fan speed: {pair.speed}%')
+                else:
+                    log_helper(f'Same as previous speed, nothing to do!')
+
+                found_temp_match = True
+                break
+
+        # We didn't find a match, use the default speed
+        if found_temp_match == False:
+            set_gpu_fan_speed(gpu_handle, configuration.default_speed, configuration.dry_run)
+            log_helper(f'Found no temperature match, using default fan speed: {configuration.default_speed}')
+
+        time.sleep(configuration.time_interval)
+
+            
+
--- a/nvml_gpu_control.py
+++ b/nvml_gpu_control.py
@@ -1,32 +1,36 @@
 from pynvml import *
+import sys
+import helper_functions as main_funcs
+import parse_args

-nvmlInit()
+def main():
+    
+    # Getting a configuration obj
+    config = parse_args.parse_cmd_args(sys.argv)

-print(f"Driver Version: {nvmlSystemGetDriverVersion()}")
+    try:
+        # Starting nvml
+        nvmlInit()

+        if float(nvmlSystemGetDriverVersion()) < 520:
+            print('WARNING: You are running an unsupported driver, you may have problems')

-deviceCount = nvmlDeviceGetCount()
+        match config.action:

+            # Help doesn't require nvml (TODO change code paths)
+            case 'help':
+                main_funcs.print_help()

-for i in range(deviceCount):
-    handle = nvmlDeviceGetHandleByIndex(i)
-    print(f"Device {i} : {nvmlDeviceGetName(handle)}")
+            case 'list':
+                main_funcs.list_gpus()

-    print(f"Device fan speed : {nvmlDeviceGetFanSpeed(handle)}%")
-    print(f"Temperature {nvmlDeviceGetTemperature(handle, 0)}°C")
+            case 'fan-control':
+                main_funcs.fan_control(config)
+    
+    # One should call shutdown with or without erros, this is why I am using finally
+    finally:
+        print('Calling nvml shutdown and teminating the program')
+        nvmlShutdown()

-    # This is not really the number of fan, but the number of controllers
-    fan_count = nvmlDeviceGetNumFans(handle)
-    print(f"Fan count {fan_count}")
-
-    for fan_idx in range(fan_count):
-        fan_speed = nvmlDeviceGetFanSpeed_v2(handle, fan_idx)
-        print(f"Fan {fan_idx} : {fan_speed}%")
-
-        # Setting the fan speed DANGEROUS!
-        target_fan_speed = 100
-        nvmlDeviceSetFanSpeed_v2(handle, fan_idx, target_fan_speed)
-        print(f"Target fan speed set: {target_fan_speed}%")
-
-
-nvmlShutdown()
+if __name__ == '__main__':
+    main()
--- a/parse_args.py
+++ b/parse_args.py
@@ -27,6 +27,7 @@ class Configuration:
        self.curve_type = "fixed" # Currently for internal usage only (I want to later add calculation for lines and curves fuctions)
        self.default_speed = 50 # Percentage
        self.time_interval = 1.0 # In seconds
+        self.dry_run = False

 class TempSpeedPair:

@@ -58,6 +59,7 @@ def validate_config(config):
        print("You did not select a target GPU")
        raise InvalidConfig("No GPU was selected")

+    # A user will always have a default speed set, so I don't think this check is necessary
    #if len(config.temp_speed_pair) == 0:
    #    print("You did not create fan points (see --speed-pairs)")
    #    raise InvalidConfig("Has no fan curve")
@@ -134,10 +136,13 @@ def parse_cmd_args(args):
            configuration.default_speed = int(args[i+1])
            i += 1 # Skip the next iteration

-        elif (arg == '--time-interval' or arg == '-t'):
+        elif (arg == '--time-interval' or arg == '-ti'):
            configuration.time_interval = float(args[i+1])
            i += 1 # Skip the next iteration

+        elif (arg == '--dry-run' or arg == '-dr'):
+            configuration.dry_run = True
+
        else:
            print(f'Invalid option: {arg}')
            raise InvalidOption('The option given was invalid')
--- a/tests.py
+++ b/tests.py
@@ -1,6 +1,8 @@
 import unittest
+from unittest.mock import Mock
 import sys
 import parse_args
+import helper_functions as main_funcs

 # Test command: python.exe .\tests.py -b

@@ -62,7 +64,7 @@ class TestMethods(unittest.TestCase):
        self.assertEqual( config.action, 'fan-control')
        self.assertEqual( config.time_interval, 5.0)

-        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--time-interval', '0.5', '-t', 'RTX 3080'])
+        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ti', '0.5', '-t', 'RTX 3080'])
        self.assertEqual( config.action, 'fan-control')
        self.assertEqual( config.time_interval, 0.5)

@@ -82,6 +84,9 @@ class TestMethods(unittest.TestCase):
        ]
        self.assertEqual(expected_output, config.temp_speed_pair)

+        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-sp', '0:0,10:30,20:50,35:75,40:100', '-t', 'RTX 3080'])
+        self.assertEqual(expected_output, config.temp_speed_pair)
+
    def test_parse_args_temp_speed_pair_sort(self):
        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '40:100,20:50,10:30,35:75', '-t', 'RTX 3080'])

@@ -120,6 +125,17 @@ class TestMethods(unittest.TestCase):
        with self.assertRaises(parse_args.InvalidFanSpeed):
            parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:-100'])

+    def test_parse_args_dry_run(self):
+        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--dry-run', '-t', 'RTX 3080'])
+        self.assertEqual(config.dry_run, True)
+
+        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-dr', '-t', 'RTX 3080'])
+        self.assertEqual(config.dry_run, True)
+
+        # Defaulf value should always be False
+        config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-t', 'RTX 3080'])
+        self.assertEqual(config.dry_run, False)
+
    def test_parse_args_invalid_option(self):

        with self.assertRaises(parse_args.InvalidOption):
@@ -143,7 +159,24 @@ class TestMethods(unittest.TestCase):
    def test_parse_args_sane_checks(self):

        with self.assertRaises(parse_args.InvalidConfig):
-            parse_args.parse_cmd_args(['.python_script', 'fan-control']) 
+            parse_args.parse_cmd_args(['.python_script', 'fan-control'])
+
+
+    # GPU Functions - I wull need to improve the tests later
+
+    def test_gpu_something(self):
+        # Mocking
+        import pynvml
+
+        pynvml.nvmlDeviceGetCount = Mock(return_value=1)
+        pynvml.nvmlDeviceGetHandleByIndex = Mock(return_value=0)
+        pynvml.nvmlDeviceGetName = Mock(return_value='RTX 3080')
+
+        # Main function
+        main_funcs.list_gpus()
+
+        # Fail
+        self.assertTrue(True)