mirror of
https://github.com/brockar/NVML-GPU-Control.git
synced 2026-01-12 07:20:58 -03:00
Fully working version, but lacks unit tests
This commit is contained in:
97
helper_functions.py
Normal file
97
helper_functions.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import pynvml
|
||||
import datetime
|
||||
import time
|
||||
|
||||
# Timestamp: datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def log_helper(msg):
|
||||
print(f'LOG[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]: {msg}')
|
||||
|
||||
def print_help():
|
||||
print('HELP TEXT')
|
||||
|
||||
def list_gpus():
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
print(f'Device {i} name : {pynvml.nvmlDeviceGetName(handle)} - UUID: {pynvml.nvmlDeviceGetUUID(handle)}')
|
||||
|
||||
def print_GPU_info(gpu_handle):
|
||||
log_helper(f"Driver Version: {pynvml.nvmlSystemGetDriverVersion()}")
|
||||
log_helper(f'Device name : {pynvml.nvmlDeviceGetName(gpu_handle)}')
|
||||
log_helper(f'Device UUID : {pynvml.nvmlDeviceGetUUID(gpu_handle)}')
|
||||
log_helper(f'Device fan speed : {pynvml.nvmlDeviceGetFanSpeed(gpu_handle)}%')
|
||||
log_helper(f'Temperature {pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)}°C')
|
||||
log_helper(f"Fan controller count {pynvml.nvmlDeviceGetNumFans(gpu_handle)}")
|
||||
|
||||
def fan_control(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.target_gpu)
|
||||
print_GPU_info(gpu_handle)
|
||||
control_and_monitor(gpu_handle, configuration)
|
||||
|
||||
# Search for a GPU and return a handle
|
||||
def get_GPU_handle(gpu_name):
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
|
||||
if pynvml.nvmlDeviceGetName(handle) == gpu_name:
|
||||
return handle
|
||||
|
||||
print(f'It was not possible to locate the target device : {gpu_name}')
|
||||
raise GpuNotFound('It was not possible to locate the device')
|
||||
|
||||
def set_gpu_fan_speed(gpu_handle, speed_percentage, dry_run):
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed = pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx)
|
||||
|
||||
# Setting the fan speed DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetFanSpeed_v2(gpu_handle, fan_idx, speed_percentage)
|
||||
|
||||
|
||||
# Control GPU functions and monitor for changes (e.g. temperature)
|
||||
def control_and_monitor(gpu_handle, configuration):
|
||||
|
||||
previous_speed = 0
|
||||
|
||||
# Infinite loop, one must kill the process to stop it
|
||||
while(True):
|
||||
current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)
|
||||
current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
|
||||
|
||||
log_helper(f'Current temp: {current_temp}')
|
||||
log_helper(f'Current speed: {current_speed}')
|
||||
|
||||
found_temp_match = False
|
||||
for pair in configuration.temp_speed_pair:
|
||||
|
||||
# Remember that that list starts by the highest temp value and keeps lowering it
|
||||
if current_temp >= pair.temperature:
|
||||
|
||||
# Only send commands to the GPU if necessary
|
||||
if previous_speed != pair.speed or current_speed != pair.speed:
|
||||
set_gpu_fan_speed(gpu_handle, pair.speed, configuration.dry_run)
|
||||
previous_speed = pair.speed
|
||||
log_helper(f'Setting GPU fan speed: {pair.speed}%')
|
||||
else:
|
||||
log_helper(f'Same as previous speed, nothing to do!')
|
||||
|
||||
found_temp_match = True
|
||||
break
|
||||
|
||||
# We didn't find a match, use the default speed
|
||||
if found_temp_match == False:
|
||||
set_gpu_fan_speed(gpu_handle, configuration.default_speed, configuration.dry_run)
|
||||
log_helper(f'Found no temperature match, using default fan speed: {configuration.default_speed}')
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
|
||||
|
||||
@@ -1,32 +1,36 @@
|
||||
from pynvml import *
|
||||
import sys
|
||||
import helper_functions as main_funcs
|
||||
import parse_args
|
||||
|
||||
nvmlInit()
|
||||
def main():
|
||||
|
||||
# Getting a configuration obj
|
||||
config = parse_args.parse_cmd_args(sys.argv)
|
||||
|
||||
print(f"Driver Version: {nvmlSystemGetDriverVersion()}")
|
||||
try:
|
||||
# Starting nvml
|
||||
nvmlInit()
|
||||
|
||||
if float(nvmlSystemGetDriverVersion()) < 520:
|
||||
print('WARNING: You are running an unsupported driver, you may have problems')
|
||||
|
||||
deviceCount = nvmlDeviceGetCount()
|
||||
match config.action:
|
||||
|
||||
# Help doesn't require nvml (TODO change code paths)
|
||||
case 'help':
|
||||
main_funcs.print_help()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = nvmlDeviceGetHandleByIndex(i)
|
||||
print(f"Device {i} : {nvmlDeviceGetName(handle)}")
|
||||
case 'list':
|
||||
main_funcs.list_gpus()
|
||||
|
||||
print(f"Device fan speed : {nvmlDeviceGetFanSpeed(handle)}%")
|
||||
print(f"Temperature {nvmlDeviceGetTemperature(handle, 0)}°C")
|
||||
case 'fan-control':
|
||||
main_funcs.fan_control(config)
|
||||
|
||||
# One should call shutdown with or without erros, this is why I am using finally
|
||||
finally:
|
||||
print('Calling nvml shutdown and teminating the program')
|
||||
nvmlShutdown()
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = nvmlDeviceGetNumFans(handle)
|
||||
print(f"Fan count {fan_count}")
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed = nvmlDeviceGetFanSpeed_v2(handle, fan_idx)
|
||||
print(f"Fan {fan_idx} : {fan_speed}%")
|
||||
|
||||
# Setting the fan speed DANGEROUS!
|
||||
target_fan_speed = 100
|
||||
nvmlDeviceSetFanSpeed_v2(handle, fan_idx, target_fan_speed)
|
||||
print(f"Target fan speed set: {target_fan_speed}%")
|
||||
|
||||
|
||||
nvmlShutdown()
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -27,6 +27,7 @@ class Configuration:
|
||||
self.curve_type = "fixed" # Currently for internal usage only (I want to later add calculation for lines and curves fuctions)
|
||||
self.default_speed = 50 # Percentage
|
||||
self.time_interval = 1.0 # In seconds
|
||||
self.dry_run = False
|
||||
|
||||
class TempSpeedPair:
|
||||
|
||||
@@ -58,6 +59,7 @@ def validate_config(config):
|
||||
print("You did not select a target GPU")
|
||||
raise InvalidConfig("No GPU was selected")
|
||||
|
||||
# A user will always have a default speed set, so I don't think this check is necessary
|
||||
#if len(config.temp_speed_pair) == 0:
|
||||
# print("You did not create fan points (see --speed-pairs)")
|
||||
# raise InvalidConfig("Has no fan curve")
|
||||
@@ -134,10 +136,13 @@ def parse_cmd_args(args):
|
||||
configuration.default_speed = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--time-interval' or arg == '-t'):
|
||||
elif (arg == '--time-interval' or arg == '-ti'):
|
||||
configuration.time_interval = float(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--dry-run' or arg == '-dr'):
|
||||
configuration.dry_run = True
|
||||
|
||||
else:
|
||||
print(f'Invalid option: {arg}')
|
||||
raise InvalidOption('The option given was invalid')
|
||||
|
||||
37
tests.py
37
tests.py
@@ -1,6 +1,8 @@
|
||||
import unittest
|
||||
from unittest.mock import Mock
|
||||
import sys
|
||||
import parse_args
|
||||
import helper_functions as main_funcs
|
||||
|
||||
# Test command: python.exe .\tests.py -b
|
||||
|
||||
@@ -62,7 +64,7 @@ class TestMethods(unittest.TestCase):
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 5.0)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--time-interval', '0.5', '-t', 'RTX 3080'])
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ti', '0.5', '-t', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 0.5)
|
||||
|
||||
@@ -82,6 +84,9 @@ class TestMethods(unittest.TestCase):
|
||||
]
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-sp', '0:0,10:30,20:50,35:75,40:100', '-t', 'RTX 3080'])
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
def test_parse_args_temp_speed_pair_sort(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '40:100,20:50,10:30,35:75', '-t', 'RTX 3080'])
|
||||
|
||||
@@ -120,6 +125,17 @@ class TestMethods(unittest.TestCase):
|
||||
with self.assertRaises(parse_args.InvalidFanSpeed):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:-100'])
|
||||
|
||||
def test_parse_args_dry_run(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--dry-run', '-t', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-dr', '-t', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
# Defaulf value should always be False
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-t', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, False)
|
||||
|
||||
def test_parse_args_invalid_option(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidOption):
|
||||
@@ -143,7 +159,24 @@ class TestMethods(unittest.TestCase):
|
||||
def test_parse_args_sane_checks(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control'])
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control'])
|
||||
|
||||
|
||||
# GPU Functions - I wull need to improve the tests later
|
||||
|
||||
def test_gpu_something(self):
|
||||
# Mocking
|
||||
import pynvml
|
||||
|
||||
pynvml.nvmlDeviceGetCount = Mock(return_value=1)
|
||||
pynvml.nvmlDeviceGetHandleByIndex = Mock(return_value=0)
|
||||
pynvml.nvmlDeviceGetName = Mock(return_value='RTX 3080')
|
||||
|
||||
# Main function
|
||||
main_funcs.list_gpus()
|
||||
|
||||
# Fail
|
||||
self.assertTrue(True)
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user