mirror of
https://github.com/brockar/NVML-GPU-Control.git
synced 2026-01-11 15:01:01 -03:00
Added systemd service file with instructions fro Linux (Ubuntu). Besides some typo fixes
This commit is contained in:
82
.github/ISSUE_TEMPLATE/bug_report.md
vendored
82
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -1,41 +1,41 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: "[BUG]"
|
||||
labels: bug
|
||||
assignees: HackTestes
|
||||
|
||||
---
|
||||
|
||||
**System configuration**
|
||||
* [ ] Uses proprietary NVIDIA driver
|
||||
* Driver version:
|
||||
* GPU:
|
||||
* Operating system and version (or Linux distro):
|
||||
* Display server (only for Linux)
|
||||
* [ ] Wayland
|
||||
* [ ] X11/Xorg
|
||||
* Python version (python --version):
|
||||
* Command used: what have you typed to execute the program, `python ./nvml_control list `
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. Scroll down to '....'
|
||||
4. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Command output**
|
||||
Put the program logs here by running it outside of a service
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: "[BUG]"
|
||||
labels: bug
|
||||
assignees: HackTestes
|
||||
|
||||
---
|
||||
|
||||
**System configuration**
|
||||
* [ ] Uses proprietary NVIDIA driver
|
||||
* Driver version:
|
||||
* GPU:
|
||||
* Operating system and version (or Linux distro):
|
||||
* Display server (only for Linux)
|
||||
* [ ] Wayland
|
||||
* [ ] X11/Xorg
|
||||
* Python version (python --version):
|
||||
* Command used: what have you typed to execute the program, `python ./nvml_control list `
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. Scroll down to '....'
|
||||
4. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Command output**
|
||||
Put the program logs here by running it outside of a service
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -1,5 +1,5 @@
|
||||
/__pycache__
|
||||
|
||||
**/__pycache__
|
||||
|
||||
/__pycache__
|
||||
|
||||
**/__pycache__
|
||||
|
||||
/src/__pycache__
|
||||
678
LICENSE
678
LICENSE
@@ -1,339 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
||||
|
||||
837
README.md
837
README.md
@@ -1,412 +1,425 @@
|
||||
# NVML GPU Control
|
||||
|
||||
This is a small program that uses the NVIDIA Management Library (NVML) to monitor GPU temperature and set fan speed. NVML is being used, because it is OS and display sever agnostic (that means it doesn't depend on X11 or Windows). Another important reason is that the official NVIDIA tool (NVIDIA smi) does not currently support fan control.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
* This project is NOT endorsed or sponsored by NVIDIA
|
||||
* This project is independent
|
||||
|
||||
## Supported hardware
|
||||
|
||||
- Any NVIDIA CUDA suported card with a driver higher or equal to version 520
|
||||
|
||||
## Dependencies
|
||||
|
||||
To use it, you must have installed:
|
||||
|
||||
- NVIDIA's proprietary drivers (>= v520)
|
||||
- Python 3
|
||||
- [nvidia-ml-py](https://pypi.org/project/nvidia-ml-py/) (current version used: 12.535.133)
|
||||
|
||||
You will also need **admin/root** privileges to be able to **set the fan speed**.
|
||||
|
||||
## Why I am creating this project?
|
||||
|
||||
Because of multiple reasons:
|
||||
|
||||
1. NVIDIA smi doesn't change fan speed
|
||||
2. Can't use nvidia-settings under Wayland to control the fans
|
||||
3. GeForce Experience needs internet to work and it's pretty bad
|
||||
|
||||
Now that NVIDIA added the functions to work on any CUDA supported card on drivers equal or higher than v520 (see Change Log [here](https://docs.nvidia.com/deploy/nvml-api/change-log.html#change-log)), it is possible to control GeForce cards' fans through NVML! This means that I can get perfect Wayland support as well, since NVML doesn't depend on a display server.
|
||||
|
||||

|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
Note: you may need to adapt the path of some of the commands
|
||||
|
||||
1. Clone the repository
|
||||
```
|
||||
git clone https://github.com/HackTestes/NVML-GPU-Control NVML_GPU_Control
|
||||
```
|
||||
|
||||
**The next part requires admin/root permissions**
|
||||
|
||||
2. Create a new folder for the scripts
|
||||
```
|
||||
# Windows
|
||||
mkdir 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
mkdir '/usr/bin/User_NVIDIA_GPU_Control/'
|
||||
```
|
||||
|
||||
3. Copy the scripts files from the repository to the new directory
|
||||
```
|
||||
# Windows
|
||||
cp 'C:\Path_to_the_repository\NVML_GPU_Control\src\*' 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
cp '/Path_to_the_repository/NVML_GPU_Control/src/*' '/usr/bin/User_NVIDIA_GPU_Control\'
|
||||
```
|
||||
|
||||
**Additional notes**: you may also need to install the library as admin or install it as a normal user and then lock the files(change the permissions and take ownership as root/admin).
|
||||
|
||||
### Uninstall
|
||||
|
||||
You only need to remove the directory (BE EXTRA CAREFUL WITH THE *RM* COMMAND). You can also use the GUI to simply delete the directory if you find that easier and safer.
|
||||
|
||||
Useful docs (read before running the commands):
|
||||
* [Remove-Item](https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.management/remove-item?view=powershell-7.4)
|
||||
* [rm man page](https://man7.org/linux/man-pages/man1/rm.1.html)
|
||||
|
||||
```
|
||||
# Windows - you can run first with the -WhatIf parameter to test
|
||||
Remove-Item -Confirm -Force -Recurse -Path 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
rm --interactive --preserve-root -R '/usr/bin/User_NVIDIA_GPU_Control'
|
||||
```
|
||||
|
||||
## How to use
|
||||
|
||||
- Make sure to run with the working directory being the `.\src`
|
||||
|
||||
```
|
||||
cd ./src
|
||||
```
|
||||
|
||||
- You must first list all cards that are connected, so you can get the name or UUID
|
||||
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py list
|
||||
```
|
||||
|
||||
- Then you can select a target by name
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080'
|
||||
```
|
||||
|
||||
- And the fan speed for each temperature level
|
||||
```
|
||||
sudo python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080' -sp '10:35,20:50,30:50,35:100'
|
||||
```
|
||||
|
||||
- You could also use the `--dry-run` for testing!
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080' -sp '10:35,20:50,30:50,35:100' --dry-run
|
||||
```
|
||||
|
||||
- You can also revert to the original state
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-policy --auto -n 'NVIDIA GeForce RTX 4080'
|
||||
```
|
||||
|
||||
Note that it does not current support fan curve (or linear progression), so it works on levels. Each level the temperature is verified against the configuration (higher or equal) and then set properly. Also, each temperature associated with speed is ordered automatically. (think of it as a staircase graph)
|
||||
|
||||
```
|
||||
Temp : speed(%)
|
||||
|
||||
1. 40 : 100 (>=40°C - 100%)
|
||||
|
||||
2. 30 : 50 (>=30°C - 50%)
|
||||
|
||||
3. 20 : 30 (>=20°C - 30%)
|
||||
|
||||
4. Default speed (DS)
|
||||
|
||||
___________________________
|
||||
|
||||
41°C - 100%
|
||||
|
||||
21°C - 30%
|
||||
|
||||
19°C - Default speed
|
||||
|
||||
```
|
||||
|
||||
#### Usage docs
|
||||
|
||||
```
|
||||
python.exe .\nvml_gpu_control.py <ACTION> <OPTIONS>
|
||||
|
||||
ACTIONS
|
||||
help
|
||||
Display help text
|
||||
|
||||
list
|
||||
List all available GPUs connected to the system by printing its name and UUID
|
||||
|
||||
fan-control
|
||||
Monitor and controls the fan speed of the selected card (you must select a target card)
|
||||
|
||||
fan-info
|
||||
Shows information about fan speed
|
||||
|
||||
fan-policy <--auto|--manual>
|
||||
Changes the fan control policy to automatic (vBIOS controlled) or manual. Note that when the fan speed is changed, the NVML library automatically changes this setting to manual. This setting is useful to change the GPU back to its original state
|
||||
|
||||
fan-policy-info
|
||||
Shows information about the current fan policy
|
||||
|
||||
power-limit-info
|
||||
Shows information about the power limit of the selected GPU
|
||||
|
||||
power-control
|
||||
Controls the power limit of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
thresholds-info
|
||||
Shows information about temperature thresholds in dregrees Celsius of the selected GPU.
|
||||
|
||||
temp-control
|
||||
Controls the temperature thresholds configuration of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
control-all
|
||||
Allows the use of all controls in a single command/loop
|
||||
|
||||
|
||||
OPTIONS
|
||||
|
||||
--name OR -n <GPU_NAME>
|
||||
Select a target GPU by its name. Note: UUID has preference over name
|
||||
|
||||
--uuid OR -id <GPU_UUID>
|
||||
Select a target GPU by its Universally Unique IDentifier (UUID). Note: UUID has preference over name
|
||||
|
||||
--time-interval OR -ti <TIME_SECONDS>
|
||||
Time period to wait before probing the GPU again. Works for all actions that run in a loop
|
||||
|
||||
--dry-run OR -dr
|
||||
Run the program, but don't change/set anything. Useful for testing the behavior of the program
|
||||
|
||||
--speed-pair OR -sp <TEMP_CELSIUS:SPEED_PERCENTAGE,TEMP_CELSIUS:SPEED_PERCENTAGE...>
|
||||
A comma separated list of pairs of temperature in celsius and the fan speed in % (temp:speed) defining basic settings for a fan curve
|
||||
|
||||
--default-speed OR -ds <FAN_SPEED_PERCENTAGE>
|
||||
Set a default speed for when there is no match for the fan curve settings
|
||||
|
||||
--manual
|
||||
Sets the fan policy to manual
|
||||
|
||||
--auto
|
||||
Sets the fan policy to automatic (vBIOS controlled)
|
||||
|
||||
--power-limit OR -pl <POWER_LIMIT_WATTS>
|
||||
Sets the power limit of the GPU in watts
|
||||
|
||||
--acoustic-temp-limit OR -tl <TEMPERATURE_CELSIUS>
|
||||
Sets the acoustic threshold in celsious (note that this is the same temperature limit used by GeForce Experience)
|
||||
|
||||
--single-use OR -su
|
||||
Makes some actions work only once insted of in a loop. This option is valid for: temp-control and power-control
|
||||
|
||||
```
|
||||
|
||||
##### Running tests
|
||||
|
||||
```
|
||||
python.exe ./src/tests.py -b
|
||||
```
|
||||
|
||||
|
||||
### Setting up services or tasks (under development)
|
||||
|
||||
This section will present some simple commands to setup services or tasks that start as admin and run the configured program with the configured settings. You should secure the files under an admin only folder, so only authorized programs can modify the scripts (and DON'T use SUID in Linux).
|
||||
|
||||
#### Windows
|
||||
|
||||
Please, check Microsoft's documentation:
|
||||
|
||||
- [Task scheduler](https://learn.microsoft.com/en-us/windows/win32/taskschd/task-scheduler-start-page)
|
||||
- [Task scheduler command line](https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/schtasks)
|
||||
|
||||
Since this program does not implement the service API, it will be using scheduled tasks to run at startup. There will be presented a GUI and a command line guide to how to do the setup:
|
||||
|
||||
##### GUI
|
||||
|
||||
1. Make sure to have the script files at a path only accessible to admin users. This guide will be using `C:\Program Files\User_NVIDIA_GPU_Control\`
|
||||
|
||||
2. Open Task Scheduler as an admin (you might need to select a admin user)
|
||||
|
||||
3. Click on `create task` (do not confuse it for the create **simple** task)
|
||||
|
||||

|
||||
|
||||
4. General tab -> Write the service name. This guide will use: `User NVIDIA GPU Control Task`
|
||||
|
||||
5. General tab -> Write a description. This guide will use: `This task runs a daemon at startup responsible for controling NVIDIA GPUs' fans and power`
|
||||
|
||||
6. General tab -> Mark the box containing `Run whether the user is logged or not`
|
||||
|
||||
7. General tab -> Mark the box containing `Do not store password`
|
||||
|
||||
8. General tab -> Mark the box containing `Run with highest privileges`
|
||||
|
||||

|
||||
|
||||
9. Triggers tab -> Create a new trigger and change the `Begin the task` to `At Startup` (make sure to leave the Enabled box marked)
|
||||
|
||||

|
||||

|
||||
|
||||
10. Actions tab -> Create a new action and select the `action` `Start a program`
|
||||
|
||||
11. Actions tab -> In the `Program/script` put the path of the python executable. This guide wil use `"C:\Program Files\Python312\python.exe"` (Note that some python versions may have a different directory name and make sure only admin users can change the executable and the folder) - the double quotes are necessary
|
||||
|
||||
12. Actions tab -> In the `Add arguments (optional)`, add the script path and the desired settings. This guide will use the following args: `"C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py" "fan-control" "-n" "NVIDIA GeForce RTX 4080" "-sp" "10:0,20:50,35:100"`
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
"C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py" "control-all" "-n" "NVIDIA GeForce RTX 4080" "-pl" "305" "-tl" "65" "-sp" "10:0,20:50,35:100"
|
||||
```
|
||||
|
||||
13. Actions tab -> In the `Start in (optional)`, add the script path directory. This guide will use the following args: `C:\Program Files\User_NVIDIA_GPU_Control`
|
||||
|
||||

|
||||
|
||||
14. Conditions tab -> Leave all boxes UNmarked
|
||||
|
||||

|
||||
|
||||
15. Settings tab -> Mark the box in `Allow task to be run on demand`
|
||||
|
||||
16. Settings tab -> UNmark the box in `Stop task if it runs longer than`
|
||||
|
||||
17. Settings tab -> Mark the box in `If the running task does not end when requested, force it to stop`
|
||||
|
||||
18. Settings tab -> In the `If the task is already running, then the following rule applies`, select the `Do not start a new instance`
|
||||
|
||||

|
||||
|
||||
|
||||
##### Command line (Not recommended and untested)
|
||||
|
||||
Some users might find easier to simply run a command, however, it is important to warn about two things:
|
||||
|
||||
1. The command line utility has less features than the GUI version;
|
||||
2. If you are unsure of what the command does, please check MS's documentation before running it (especially because you must run it with admin permissions)
|
||||
|
||||
1. Open a terminal with admin permissions
|
||||
|
||||
2. Write the following command: `schtasks /create /tn 'User NVIDIA GPU Control Task' /tr 'C:\Program Files\Python312\python.exe C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py fan-control -t "NVIDIA GeForce RTX 4080" -sp "10:0,20:47,30:50,35:100"' /sc ONSTART /np /rl HIGHEST`
|
||||
|
||||
Another formatting
|
||||
|
||||
```
|
||||
schtasks /create
|
||||
/tn 'User NVIDIA GPU Control Task'
|
||||
/tr 'C:\Program Files\Python312\python.exe C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py fan-control -n "NVIDIA GeForce RTX 4080" -sp "10:0,20:47,30:50,35:100"'
|
||||
/sc ONSTART
|
||||
/np
|
||||
/rl HIGHEST
|
||||
```
|
||||
|
||||
One of the limitations involve not being able to change the start working directory, so some paths in the scripts might break. Overall, I do not recommend this approach on Windows, users should opt for the GUI method.
|
||||
|
||||
#### Linux (systemd / cronjob) - **Not ready**
|
||||
|
||||
##### Systemd timer
|
||||
|
||||
1. Create a service file
|
||||
```
|
||||
[Unit]
|
||||
Description=Unofficial NVIDIA Fan Control service
|
||||
ConditionUser=0
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/usr/bin/User_NVIDIA_GPU_Control/
|
||||
ExecStart=/usr/bin/python3 /usr/bin/User_NVIDIA_GPU_Control/nvml_gpu_control.py -n "RTX 3080" -sp 10:20,20:35:30:50,35:100
|
||||
Restart=always
|
||||
KillSignal=SIGQUIT
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
## Security considerations
|
||||
|
||||
### Windows
|
||||
|
||||
1. Having an admin prompt under the same desktop
|
||||
|
||||
An opened prompt under the same desktop can receive key command from non-privileged programs, allowing any program to escalate to admin. To mitigate this it is necessary to restrict all other programs with a UI limit JobObject, create the window under a new desktop or not create any windows on the desktop (this is how it is done under the guide).
|
||||
|
||||
2. Programs that start automatically as admin must be secured against writes
|
||||
|
||||
The scripts and the executables can only be written by admin users, otherwise, another program may overwrite them and gain admin rights on the machine. Please, verify the permissions set on the python executable and on the scripts (this also applies to the library nvidia-ml-py).
|
||||
|
||||
|
||||
### Linux
|
||||
|
||||
1. Having an admin prompt under the same desktop (X11)
|
||||
|
||||
This is a similar risk to the Windows counterpart, especially on X11/Xorg. So, if you use X11, you must create a new session under a new TTY to create an admin window; but if you use Wayland, it already isolates windows by default.
|
||||
|
||||
2. Programs that start automatically as admin must be secured against writes
|
||||
|
||||
Same as Windows. All of the executables and scripts must be accessible only to the root user (UID 0).
|
||||
|
||||
## Roadmap (features to be added)
|
||||
|
||||
### Must have
|
||||
|
||||
- [x] Fan control
|
||||
|
||||
- [x] Select GPU by name
|
||||
|
||||
- [x] Display fan speed per controller
|
||||
|
||||
- [x] Control fan policy
|
||||
|
||||
- [x] Select GPU by UUID (allows users to control more than 1 GPU individually that shares the same model - e.g. 2 RTXs 4080)
|
||||
|
||||
- [ ] Run at startup with necessary permissions (Windows and Linux) - Windows already works
|
||||
|
||||
- [x] Power limit control
|
||||
|
||||
- [x] Temperature threshold control
|
||||
|
||||
- [x] Enable all controls
|
||||
|
||||
- [x] Help action must not require NVML initialization
|
||||
|
||||
### Can consider (nice to have)
|
||||
|
||||
- [ ] Logging to file option (with message size limit) -> user can spawn another instance with the same arguments and pass the `--dry-run` option as it should mirror the output of the privileged one
|
||||
|
||||
- [ ] Temperature curves (linear, quadratic, logarithmic...) -> might be unecessary as users can generate all speed points elsewhere and just pass it as arguments
|
||||
|
||||
## Support
|
||||
|
||||
I will be suporting this program as long as I have NVIDIA GPUs (especially bacause I am also dogfooding it). Don't expect new features as it has everything currently I need, but you can suggest new features that you think is useful (note that the focus is energy and temperature control to increase **stability**). You can expect however bug fixes from me so my project remains compatible with the latest versions of NVML.
|
||||
|
||||
If I loose the need for this software (aka change my hardware), I will make sure to update this notice.
|
||||
|
||||
## Contribute
|
||||
|
||||
Just a few guidelines and style decisions:
|
||||
|
||||
- variable_name
|
||||
|
||||
- function_name
|
||||
|
||||
- ObjectOrClassName
|
||||
|
||||
- Other dependencies are DISALLOWED, I want to limit the dependencies as a security measure (just remember the xz incident). You are free to try to convince me, but your contribution will most likely be rejected
|
||||
|
||||
- Code should be testable, so please include unit tests to your code. If you think that certain parts are just too hard to make tests, include a juntification
|
||||
# NVML GPU Control
|
||||
|
||||
This is a small program that uses the NVIDIA Management Library (NVML) to monitor GPU temperature and set fan speed. NVML is being used, because it is OS and display sever agnostic (that means it doesn't depend on X11 or Windows). Another important reason is that the official NVIDIA tool (NVIDIA smi) does not currently support fan control.
|
||||
|
||||
## Disclaimer
|
||||
|
||||
* This project is NOT endorsed or sponsored by NVIDIA
|
||||
* This project is independent
|
||||
|
||||
## Supported hardware
|
||||
|
||||
- Any NVIDIA CUDA supported card with a driver higher or equal to version 520
|
||||
|
||||
## Dependencies
|
||||
|
||||
To use it, you must have installed:
|
||||
|
||||
- NVIDIA's proprietary drivers (>= v520)
|
||||
- Python 3
|
||||
- [nvidia-ml-py](https://pypi.org/project/nvidia-ml-py/) (current version used: 12.535.133)
|
||||
|
||||
You will also need **admin/root** privileges to be able to **set the fan speed**.
|
||||
|
||||
## Why I am creating this project?
|
||||
|
||||
Because of multiple reasons:
|
||||
|
||||
1. NVIDIA smi doesn't change fan speed
|
||||
2. Can't use nvidia-settings under Wayland to control the fans
|
||||
3. GeForce Experience needs internet to work and it's pretty bad
|
||||
|
||||
Now that NVIDIA added the functions to work on any CUDA supported card on drivers equal or higher than v520 (see Change Log [here](https://docs.nvidia.com/deploy/nvml-api/change-log.html#change-log)), it is possible to control GeForce cards' fans through NVML! This means that I can get perfect Wayland support as well, since NVML doesn't depend on a display server.
|
||||
|
||||

|
||||

|
||||
|
||||
## Installation
|
||||
|
||||
Note: you may need to adapt the path of some of the commands
|
||||
|
||||
1. Clone the repository
|
||||
```
|
||||
git clone https://github.com/HackTestes/NVML-GPU-Control NVML_GPU_Control
|
||||
```
|
||||
|
||||
**The next part requires admin/root permissions**
|
||||
|
||||
2. Create a new folder for the scripts
|
||||
```
|
||||
# Windows
|
||||
mkdir 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
sudo mkdir '/usr/bin/User_NVIDIA_GPU_Control/'
|
||||
```
|
||||
|
||||
3. Copy the scripts files from the repository to the new directory
|
||||
```
|
||||
# Windows
|
||||
cp 'C:\Path_to_the_repository\NVML_GPU_Control\src\*' 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
sudo cp '/Path_to_the_repository/NVML_GPU_Control/src/*' '/usr/bin/User_NVIDIA_GPU_Control/'
|
||||
```
|
||||
|
||||
**Additional notes**: you may also need to install the library as admin or install it as a normal user and then lock the files(change the permissions and take ownership as root/admin).
|
||||
|
||||
### Uninstall
|
||||
|
||||
You only need to remove the directory (BE EXTRA CAREFUL WITH THE *RM* COMMAND). You can also use the GUI to simply delete the directory if you find that easier and safer.
|
||||
|
||||
Useful docs (read before running the commands):
|
||||
* [Remove-Item](https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.management/remove-item?view=powershell-7.4)
|
||||
* [rm man page](https://man7.org/linux/man-pages/man1/rm.1.html)
|
||||
|
||||
```
|
||||
# Windows - you can run first with the -WhatIf parameter to test
|
||||
Remove-Item -Confirm -Force -Recurse -Path 'C:\Program Files\User_NVIDIA_GPU_Control\'
|
||||
|
||||
# Linux
|
||||
rm --interactive --preserve-root -R '/usr/bin/User_NVIDIA_GPU_Control'
|
||||
```
|
||||
|
||||
## How to use
|
||||
|
||||
- Make sure to run with the working directory being the `.\src`
|
||||
|
||||
```
|
||||
cd ./src
|
||||
```
|
||||
|
||||
- You must first list all cards that are connected, so you can get the name or UUID
|
||||
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py list
|
||||
```
|
||||
|
||||
- Then you can select a target by name
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080'
|
||||
```
|
||||
|
||||
- And the fan speed for each temperature level
|
||||
```
|
||||
sudo python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080' -sp '10:35,20:50,30:50,35:100'
|
||||
```
|
||||
|
||||
- You could also use the `--dry-run` for testing!
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-control -n 'NVIDIA GeForce RTX 4080' -sp '10:35,20:50,30:50,35:100' --dry-run
|
||||
```
|
||||
|
||||
- You can also revert to the original state
|
||||
```
|
||||
python.exe ./nvml_gpu_control.py fan-policy --auto -n 'NVIDIA GeForce RTX 4080'
|
||||
```
|
||||
|
||||
Note that it does not current support fan curve (or linear progression), so it works on levels. Each level the temperature is verified against the configuration (higher or equal) and then set properly. Also, each temperature associated with speed is ordered automatically. (think of it as a staircase graph)
|
||||
|
||||
```
|
||||
Temp : speed(%)
|
||||
|
||||
1. 40 : 100 (>=40°C - 100%)
|
||||
|
||||
2. 30 : 50 (>=30°C - 50%)
|
||||
|
||||
3. 20 : 30 (>=20°C - 30%)
|
||||
|
||||
4. Default speed (DS)
|
||||
|
||||
___________________________
|
||||
|
||||
41°C - 100%
|
||||
|
||||
21°C - 30%
|
||||
|
||||
19°C - Default speed
|
||||
|
||||
```
|
||||
|
||||
#### Usage docs
|
||||
|
||||
```
|
||||
python.exe .\nvml_gpu_control.py <ACTION> <OPTIONS>
|
||||
|
||||
ACTIONS
|
||||
help
|
||||
Display help text
|
||||
|
||||
list
|
||||
List all available GPUs connected to the system by printing its name and UUID
|
||||
|
||||
fan-control
|
||||
Monitor and controls the fan speed of the selected card (you must select a target card)
|
||||
|
||||
fan-info
|
||||
Shows information about fan speed
|
||||
|
||||
fan-policy <--auto|--manual>
|
||||
Changes the fan control policy to automatic (vBIOS controlled) or manual. Note that when the fan speed is changed, the NVML library automatically changes this setting to manual. This setting is useful to change the GPU back to its original state
|
||||
|
||||
fan-policy-info
|
||||
Shows information about the current fan policy
|
||||
|
||||
power-limit-info
|
||||
Shows information about the power limit of the selected GPU
|
||||
|
||||
power-control
|
||||
Controls the power limit of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
thresholds-info
|
||||
Shows information about temperature thresholds in dregrees Celsius of the selected GPU.
|
||||
|
||||
temp-control
|
||||
Controls the temperature thresholds configuration of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
control-all
|
||||
Allows the use of all controls in a single command/loop
|
||||
|
||||
|
||||
OPTIONS
|
||||
|
||||
--name OR -n <GPU_NAME>
|
||||
Select a target GPU by its name. Note: UUID has preference over name
|
||||
|
||||
--uuid OR -id <GPU_UUID>
|
||||
Select a target GPU by its Universally Unique IDentifier (UUID). Note: UUID has preference over name
|
||||
|
||||
--time-interval OR -ti <TIME_SECONDS>
|
||||
Time period to wait before probing the GPU again. Works for all actions that run in a loop
|
||||
|
||||
--dry-run OR -dr
|
||||
Run the program, but don't change/set anything. Useful for testing the behavior of the program
|
||||
|
||||
--speed-pair OR -sp <TEMP_CELSIUS:SPEED_PERCENTAGE,TEMP_CELSIUS:SPEED_PERCENTAGE...>
|
||||
A comma separated list of pairs of temperature in celsius and the fan speed in % (temp:speed) defining basic settings for a fan curve
|
||||
|
||||
--default-speed OR -ds <FAN_SPEED_PERCENTAGE>
|
||||
Set a default speed for when there is no match for the fan curve settings
|
||||
|
||||
--manual
|
||||
Sets the fan policy to manual
|
||||
|
||||
--auto
|
||||
Sets the fan policy to automatic (vBIOS controlled)
|
||||
|
||||
--power-limit OR -pl <POWER_LIMIT_WATTS>
|
||||
Sets the power limit of the GPU in watts
|
||||
|
||||
--acoustic-temp-limit OR -tl <TEMPERATURE_CELSIUS>
|
||||
Sets the acoustic threshold in celsious (note that this is the same temperature limit used by GeForce Experience)
|
||||
|
||||
--single-use OR -su
|
||||
Makes some actions work only once insted of in a loop. This option is valid for: temp-control and power-control
|
||||
|
||||
```
|
||||
|
||||
##### Running tests
|
||||
|
||||
```
|
||||
python.exe ./src/tests.py -b
|
||||
```
|
||||
|
||||
|
||||
### Setting up services or tasks (under development)
|
||||
|
||||
This section will present some simple commands to setup services or tasks that start as admin and run the configured program with the configured settings. You should secure the files under an admin only folder, so only authorized programs can modify the scripts (and DON'T use SUID in Linux).
|
||||
|
||||
#### Windows
|
||||
|
||||
Please, check Microsoft's documentation:
|
||||
|
||||
- [Task scheduler](https://learn.microsoft.com/en-us/windows/win32/taskschd/task-scheduler-start-page)
|
||||
- [Task scheduler command line](https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/schtasks)
|
||||
|
||||
Since this program does not implement the service API, it will be using scheduled tasks to run at startup. There will be presented a GUI and a command line guide to how to do the setup:
|
||||
|
||||
##### GUI
|
||||
|
||||
1. Make sure to have the script files at a path only accessible to admin users. This guide will be using `C:\Program Files\User_NVIDIA_GPU_Control\`
|
||||
|
||||
2. Open Task Scheduler as an admin (you might need to select a admin user)
|
||||
|
||||
3. Click on `create task` (do not confuse it for the create **simple** task)
|
||||
|
||||

|
||||
|
||||
4. General tab -> Write the service name. This guide will use: `User NVIDIA GPU Control Task`
|
||||
|
||||
5. General tab -> Write a description. This guide will use: `This task runs a daemon at startup responsible for controling NVIDIA GPUs' fans and power`
|
||||
|
||||
6. General tab -> Mark the box containing `Run whether the user is logged or not`
|
||||
|
||||
7. General tab -> Mark the box containing `Do not store password`
|
||||
|
||||
8. General tab -> Mark the box containing `Run with highest privileges`
|
||||
|
||||

|
||||
|
||||
9. Triggers tab -> Create a new trigger and change the `Begin the task` to `At Startup` (make sure to leave the Enabled box marked)
|
||||
|
||||

|
||||

|
||||
|
||||
10. Actions tab -> Create a new action and select the `action` `Start a program`
|
||||
|
||||
11. Actions tab -> In the `Program/script` put the path of the python executable. This guide wil use `"C:\Program Files\Python312\python.exe"` (Note that some python versions may have a different directory name and make sure only admin users can change the executable and the folder) - the double quotes are necessary
|
||||
|
||||
12. Actions tab -> In the `Add arguments (optional)`, add the script path and the desired settings. This guide will use the following args: `"C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py" "fan-control" "-n" "NVIDIA GeForce RTX 4080" "-sp" "10:0,20:50,35:100"`
|
||||
|
||||
or
|
||||
|
||||
```
|
||||
"C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py" "control-all" "-n" "NVIDIA GeForce RTX 4080" "-pl" "305" "-tl" "65" "-sp" "10:0,20:50,35:100"
|
||||
```
|
||||
|
||||
13. Actions tab -> In the `Start in (optional)`, add the script path directory. This guide will use the following args: `C:\Program Files\User_NVIDIA_GPU_Control`
|
||||
|
||||

|
||||
|
||||
14. Conditions tab -> Leave all boxes UNmarked
|
||||
|
||||

|
||||
|
||||
15. Settings tab -> Mark the box in `Allow task to be run on demand`
|
||||
|
||||
16. Settings tab -> UNmark the box in `Stop task if it runs longer than`
|
||||
|
||||
17. Settings tab -> Mark the box in `If the running task does not end when requested, force it to stop`
|
||||
|
||||
18. Settings tab -> In the `If the task is already running, then the following rule applies`, select the `Do not start a new instance`
|
||||
|
||||

|
||||
|
||||
|
||||
##### Command line (Not recommended and untested)
|
||||
|
||||
Some users might find easier to simply run a command, however, it is important to warn about two things:
|
||||
|
||||
1. The command line utility has less features than the GUI version;
|
||||
2. If you are unsure of what the command does, please check MS's documentation before running it (especially because you must run it with admin permissions)
|
||||
|
||||
1. Open a terminal with admin permissions
|
||||
|
||||
2. Write the following command: `schtasks /create /tn 'User NVIDIA GPU Control Task' /tr 'C:\Program Files\Python312\python.exe C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py fan-control -t "NVIDIA GeForce RTX 4080" -sp "10:0,20:47,30:50,35:100"' /sc ONSTART /np /rl HIGHEST`
|
||||
|
||||
Another formatting
|
||||
|
||||
```
|
||||
schtasks /create
|
||||
/tn 'User NVIDIA GPU Control Task'
|
||||
/tr 'C:\Program Files\Python312\python.exe C:\Program Files\User_NVIDIA_GPU_Control\nvml_gpu_control.py fan-control -n "NVIDIA GeForce RTX 4080" -sp "10:0,20:47,30:50,35:100"'
|
||||
/sc ONSTART
|
||||
/np
|
||||
/rl HIGHEST
|
||||
```
|
||||
|
||||
One of the limitations involve not being able to change the start working directory, so some paths in the scripts might break. Overall, I do not recommend this approach on Windows, users should opt for the GUI method.
|
||||
|
||||
#### Linux (systemd)
|
||||
|
||||
This section will show how to install a global (system wide) systemd service in Ubuntu and enable it, so very time the computer starts the control will resume their work.
|
||||
|
||||
##### Systemd service
|
||||
|
||||
1. Take a look at the systemd service at `linux_config/unofficial-gpu-nvml-control.service`. Change the GPU name and the settings to the desired configuration (Note: you can use the UUID as well).
|
||||
|
||||
1. Copy the unit file into `/etc/systemd/system/` (needs root)
|
||||
```
|
||||
sudo cp ./linux_config/unofficial-gpu-nvml-control.service /etc/systemd/system/
|
||||
```
|
||||
|
||||
1. Enable the service (needs root)
|
||||
```
|
||||
sudo systemctl enable unofficial-gpu-nvml-control.service
|
||||
```
|
||||
|
||||
1. Start the service (needs root)
|
||||
```
|
||||
sudo systemctl start unofficial-gpu-nvml-control.service
|
||||
```
|
||||
|
||||
1. Troubleshoot if needed (get the stdout from the service)
|
||||
```
|
||||
sudo journalctl -u unofficial-gpu-nvml-control.service
|
||||
```
|
||||
|
||||
Reload service
|
||||
```
|
||||
sudo systemctl daemon-reload
|
||||
```
|
||||
|
||||
|
||||
## Security considerations
|
||||
|
||||
### Windows
|
||||
|
||||
1. Having an admin prompt under the same desktop
|
||||
|
||||
An opened prompt under the same desktop can receive key command from non-privileged programs, allowing any program to escalate to admin. To mitigate this it is necessary to restrict all other programs with a UI limit JobObject, create the window under a new desktop or not create any windows on the desktop (this is how it is done under the guide).
|
||||
|
||||
2. Programs that start automatically as admin must be secured against writes
|
||||
|
||||
The scripts and the executables can only be written by admin users, otherwise, another program may overwrite them and gain admin rights on the machine. Please, verify the permissions set on the python executable and on the scripts (this also applies to the library nvidia-ml-py).
|
||||
|
||||
|
||||
### Linux
|
||||
|
||||
1. Having an admin prompt under the same desktop (X11)
|
||||
|
||||
This is a similar risk to the Windows counterpart, especially on X11/Xorg. So, if you use X11, you must create a new session under a new TTY to create an admin window; but if you use Wayland, it already isolates windows by default.
|
||||
|
||||
2. Programs that start automatically as admin must be secured against writes
|
||||
|
||||
Same as Windows. All of the executables and scripts must be accessible only to the root user (UID 0). I recommend to install the pynvml library with the distro's package manager.
|
||||
|
||||
## Roadmap (features to be added)
|
||||
|
||||
### Must have
|
||||
|
||||
- [x] Fan control
|
||||
|
||||
- [x] Select GPU by name
|
||||
|
||||
- [x] Display fan speed per controller
|
||||
|
||||
- [x] Control fan policy
|
||||
|
||||
- [x] Select GPU by UUID (allows users to control more than 1 GPU individually that shares the same model - e.g. 2 RTXs 4080)
|
||||
|
||||
- [ ] Run at startup with necessary permissions (Windows and Linux) - Windows already works
|
||||
|
||||
- [x] Power limit control
|
||||
|
||||
- [x] Temperature threshold control
|
||||
|
||||
- [x] Enable all controls
|
||||
|
||||
- [x] Help action must not require NVML initialization
|
||||
|
||||
### Can consider (nice to have)
|
||||
|
||||
- [ ] Logging to file option (with message size limit) -> user can spawn another instance with the same arguments and pass the `--dry-run` option as it should mirror the output of the privileged one
|
||||
|
||||
- [ ] Temperature curves (linear, quadratic, logarithmic...) -> might be unnecessary as users can generate all speed points elsewhere and just pass it as arguments
|
||||
|
||||
## Support
|
||||
|
||||
I will be supporting this program as long as I have NVIDIA GPUs (especially because I am also dogfooding it). Don't expect new features as it has everything currently I need, but you can suggest new features that you think is useful (note that the focus is energy and temperature control to increase **stability**). You can expect however bug fixes from me so my project remains compatible with the latest versions of NVML.
|
||||
|
||||
If I loose the need for this software (aka change my hardware), I will make sure to update this notice.
|
||||
|
||||
## Contribute
|
||||
|
||||
Just a few guidelines and style decisions:
|
||||
|
||||
- variable_name
|
||||
|
||||
- function_name
|
||||
|
||||
- ObjectOrClassName
|
||||
|
||||
- Other dependencies are DISALLOWED, I want to limit the dependencies as a security measure (just remember the xz incident). You are free to try to convince me, but your contribution will most likely be rejected
|
||||
|
||||
- Code should be testable, so please include unit tests to your code. If you think that certain parts are just too hard to make tests, include a juntification
|
||||
|
||||
14
linux_config/unofficial-gpu-nvml-control.service
Normal file
14
linux_config/unofficial-gpu-nvml-control.service
Normal file
@@ -0,0 +1,14 @@
|
||||
[Unit]
|
||||
Description=Unofficial NVIDIA Control service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
WorkingDirectory=/usr/bin/User_NVIDIA_GPU_Control/
|
||||
ExecStart=/usr/bin/python3 /usr/bin/User_NVIDIA_GPU_Control/nvml_gpu_control.py control-all -n "GPU_NAME" -pl 290 -tl 65 -sp "0:50,36:55,40:75,45:100"
|
||||
Restart=always
|
||||
KillSignal=SIGQUIT
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,16 +1,16 @@
|
||||
import pynvml
|
||||
from ctypes import *
|
||||
|
||||
pynvml.nvmlInit()
|
||||
|
||||
gpu = pynvml.nvmlDeviceGetHandleByUUID('GPU-80285976-b824-419f-d246-35946b3bb2a6')
|
||||
|
||||
policy = c_uint(0)
|
||||
|
||||
print(policy)
|
||||
|
||||
pynvml.nvmlDeviceGetFanControlPolicy_v2(gpu, 0, byref(policy))
|
||||
|
||||
print(policy)
|
||||
|
||||
import pynvml
|
||||
from ctypes import *
|
||||
|
||||
pynvml.nvmlInit()
|
||||
|
||||
gpu = pynvml.nvmlDeviceGetHandleByUUID('GPU-80285976-b824-419f-d246-35946b3bb2a6')
|
||||
|
||||
policy = c_uint(0)
|
||||
|
||||
print(policy)
|
||||
|
||||
pynvml.nvmlDeviceGetFanControlPolicy_v2(gpu, 0, byref(policy))
|
||||
|
||||
print(policy)
|
||||
|
||||
pynvml.nvmlShutdown()
|
||||
@@ -1,489 +1,489 @@
|
||||
import pynvml
|
||||
import datetime
|
||||
import time
|
||||
import ctypes
|
||||
|
||||
class UnsupportedDriverVersion(Exception):
|
||||
pass
|
||||
|
||||
class GpuNotFound(Exception):
|
||||
pass
|
||||
|
||||
class TemperatureThresholds:
|
||||
def __init__(self, shutdown_t, slowdown_t, max_memory_t, gpu_max_t, min_acoustic_t, current_acoustic_t, max_acoustic_t):
|
||||
self.shutdown = shutdown_t
|
||||
self.slowdown = slowdown_t
|
||||
self.max_memory = max_memory_t
|
||||
self.gpu_max = gpu_max_t
|
||||
self.min_acoustic = min_acoustic_t
|
||||
self.current_acoustic = current_acoustic_t
|
||||
self.max_acoustic = max_acoustic_t
|
||||
|
||||
class PowerLimitConstraintsWatts:
|
||||
def __init__(self, min_pl, max_pl):
|
||||
self.min = min_pl
|
||||
self.max = max_pl
|
||||
|
||||
class FanSpeedConstraintsPercentage:
|
||||
def __init__(self, min_s, max_s):
|
||||
self.min = min_s
|
||||
self.max = max_s
|
||||
|
||||
def check_driver_version(driver_version_str):
|
||||
major = int(driver_version_str.split('.')[0])
|
||||
|
||||
if major < 520:
|
||||
raise UnsupportedDriverVersion('Driver version is lower than 520')
|
||||
|
||||
def log_helper(msg):
|
||||
print(f'LOG[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]: {msg}')
|
||||
|
||||
def print_help():
|
||||
help_text = '''
|
||||
python.exe .\\nvml_gpu_control.py <ACTION> <OPTIONS>
|
||||
|
||||
ACTIONS
|
||||
help
|
||||
Display help text
|
||||
|
||||
list
|
||||
List all available GPUs connected to the system by printing its name and UUID
|
||||
|
||||
fan-control
|
||||
Monitor and controls the fan speed of the selected card (you must select a target card)
|
||||
|
||||
fan-info
|
||||
Shows information about fan speed
|
||||
|
||||
fan-policy <--auto|--manual>
|
||||
Changes the fan control policy to automatic (vBIOS controlled) or manual. Note that when the fan speed is changed, the NVML library automatically changes this setting to manual. This setting is useful to change the GPU back to its original state
|
||||
|
||||
fan-policy-info
|
||||
Shows information about the current fan policy
|
||||
|
||||
power-limit-info
|
||||
Shows information about the power limit of the selected GPU
|
||||
|
||||
power-control
|
||||
Controls the power limit of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
thresholds-info
|
||||
Shows information about temperature thresholds in dregrees Celsius of the selected GPU.
|
||||
|
||||
temp-control
|
||||
Controls the temperature thresholds configuration of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
control-all
|
||||
Allows the use of all controls in a single command/loop
|
||||
|
||||
|
||||
OPTIONS
|
||||
|
||||
--name OR -n <GPU_NAME>
|
||||
Select a target GPU by its name. Note: UUID has preference over name
|
||||
|
||||
--uuid OR -id <GPU_UUID>
|
||||
Select a target GPU by its Universally Unique IDentifier (UUID). Note: UUID has preference over name
|
||||
|
||||
--time-interval OR -ti <TIME_SECONDS>
|
||||
Time period to wait before probing the GPU again. Works for all actions that run in a loop
|
||||
|
||||
--dry-run OR -dr
|
||||
Run the program, but don't change/set anything. Useful for testing the behavior of the program
|
||||
|
||||
--speed-pair OR -sp <TEMP_CELSIUS:SPEED_PERCENTAGE,TEMP_CELSIUS:SPEED_PERCENTAGE...>
|
||||
A comma separated list of pairs of temperature in celsius and the fan speed in % (temp:speed) defining basic settings for a fan curve
|
||||
|
||||
--default-speed OR -ds <FAN_SPEED_PERCENTAGE>
|
||||
Set a default speed for when there is no match for the fan curve settings
|
||||
|
||||
--manual
|
||||
Sets the fan policy to manual
|
||||
|
||||
--auto
|
||||
Sets the fan policy to automatic (vBIOS controlled)
|
||||
|
||||
--power-limit OR -pl <POWER_LIMIT_WATTS>
|
||||
Sets the power limit of the GPU in watts
|
||||
|
||||
--acoustic-temp-limit OR -tl <TEMPERATURE_CELSIUS>
|
||||
Sets the acoustic threshold in celsious (note that this is the same temperature limit used by GeForce Experience)
|
||||
|
||||
--single-use OR -su
|
||||
Makes some actions work only once insted of in a loop. This option is valid for: temp-control and power-control
|
||||
|
||||
'''
|
||||
print(help_text)
|
||||
|
||||
def list_gpus():
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
print(f'Device {i} name : {pynvml.nvmlDeviceGetName(handle)} - UUID: {pynvml.nvmlDeviceGetUUID(handle)}')
|
||||
|
||||
def print_GPU_info(gpu_handle):
|
||||
log_helper(f"Driver Version: {pynvml.nvmlSystemGetDriverVersion()}")
|
||||
log_helper(f'Device name : {pynvml.nvmlDeviceGetName(gpu_handle)}')
|
||||
log_helper(f'Device UUID : {pynvml.nvmlDeviceGetUUID(gpu_handle)}')
|
||||
log_helper(f'Device fan speed : {pynvml.nvmlDeviceGetFanSpeed(gpu_handle)}%')
|
||||
log_helper(f'Temperature {pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)}°C')
|
||||
log_helper(f"Fan controller count {pynvml.nvmlDeviceGetNumFans(gpu_handle)}")
|
||||
|
||||
|
||||
# Search for a GPU and return a handle
|
||||
|
||||
def get_GPU_handle(gpu_name, gpu_uuid):
|
||||
|
||||
if gpu_uuid != '':
|
||||
return pynvml.nvmlDeviceGetHandleByUUID(gpu_uuid)
|
||||
|
||||
else:
|
||||
return get_GPU_handle_by_name(gpu_name)
|
||||
|
||||
|
||||
# This will NOT work if the user has more than 2 GPUs with the same name/model, use UUID for this case
|
||||
def get_GPU_handle_by_name(gpu_name):
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
|
||||
if pynvml.nvmlDeviceGetName(handle) == gpu_name:
|
||||
return handle
|
||||
|
||||
print(f'It was not possible to locate the target device : {gpu_name}')
|
||||
raise GpuNotFound('It was not possible to locate the device')
|
||||
|
||||
def set_gpu_fan_speed(gpu_handle, speed_percentage, dry_run):
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed = pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx)
|
||||
|
||||
# Setting the fan speed DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetFanSpeed_v2(gpu_handle, fan_idx, speed_percentage)
|
||||
|
||||
def get_gpu_fan_speed_per_controller(gpu_handle):
|
||||
|
||||
fan_speed_per_controller = []
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed_per_controller.append(pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx))
|
||||
|
||||
return fan_speed_per_controller
|
||||
|
||||
# This function will be here for only future references as many drivers ignore such values
|
||||
def get_gpu_fan_speed_constraints(gpu_handle):
|
||||
|
||||
# It needs pointers to work
|
||||
fan_min = ctypes.c_uint(0)
|
||||
fan_max = ctypes.c_uint(0)
|
||||
|
||||
# Note some drivers do not respect the minimum and may turn off the fan motor in a different speed
|
||||
# Some drivers turn off the fan motor at speeds as high as 47%
|
||||
pynvml.nvmlDeviceGetMinMaxFanSpeed(gpu_handle, ctypes.byref(fan_min), ctypes.byref(fan_max))
|
||||
|
||||
return FanSpeedConstraintsPercentage(fan_min.value, fan_max.value)
|
||||
|
||||
def print_fan_info(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
|
||||
fan_constraints = get_gpu_fan_speed_constraints(gpu_handle)
|
||||
|
||||
print(f'Current temp: {current_temp}°C')
|
||||
print(f'Current speed: {current_speed}%') # Minitor for fan fan speed changes and reajust!
|
||||
|
||||
# Get the fan speed per controller
|
||||
for idx, fan_speed_c in enumerate(get_gpu_fan_speed_per_controller(gpu_handle)):
|
||||
print(f'Fan controller speed {idx}: {fan_speed_c}%')
|
||||
|
||||
print(f'Fan constraints: Min {fan_constraints.min}% - Max {fan_constraints.max}%')
|
||||
|
||||
def fan_control(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
# Infinite loop, one must kill the process to stop it
|
||||
while(True):
|
||||
fan_control_subroutine(gpu_handle, configuration)
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
|
||||
# Control GPU functions and monitor for changes (e.g. temperature)
|
||||
def fan_control_subroutine(gpu_handle, configuration):
|
||||
|
||||
current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
|
||||
|
||||
log_helper(f'Current temp: {current_temp}°C')
|
||||
log_helper(f'Current speed: {current_speed}%') # Monitor for fan fan speed changes and reajust!
|
||||
|
||||
# Get the fan speed per controller
|
||||
for idx, fan_speed_c in enumerate(get_gpu_fan_speed_per_controller(gpu_handle)):
|
||||
log_helper(f'Fan controller speed {idx}: {fan_speed_c}%')
|
||||
|
||||
for pair in configuration.temp_speed_pair:
|
||||
|
||||
# Remember that that list starts by the highest temp value and keeps lowering it
|
||||
if current_temp >= pair.temperature:
|
||||
|
||||
# Only send commands to the GPU if necessary (if the current setting is different from the targeted one)
|
||||
if current_speed != pair.speed:
|
||||
set_gpu_fan_speed(gpu_handle, pair.speed, configuration.dry_run)
|
||||
log_helper(f'Setting GPU fan speed: {pair.speed}%')
|
||||
else:
|
||||
log_helper(f'Same as previous speed, nothing to do!')
|
||||
|
||||
# Match found and set, return now
|
||||
return
|
||||
|
||||
# We didn't find a match, use the default speed
|
||||
set_gpu_fan_speed(gpu_handle, configuration.default_speed, configuration.dry_run)
|
||||
log_helper(f'Found no temperature match, using default fan speed: {configuration.default_speed}')
|
||||
|
||||
def fan_policy_info_msg(fan_policy: int):
|
||||
|
||||
if pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW == fan_policy:
|
||||
return 'Current fan control policy is automatic'
|
||||
|
||||
elif pynvml.NVML_FAN_POLICY_MANUAL == fan_policy:
|
||||
return 'Current fan control policy is manual'
|
||||
|
||||
else:
|
||||
return 'Unknown fan control policy'
|
||||
|
||||
def set_fan_policy(gpu_handle, policy, dry_run):
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
|
||||
# Setting the fan control policy can be DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
fan_speed = pynvml.nvmlDeviceSetFanControlPolicy(gpu_handle, fan_idx, policy)
|
||||
|
||||
# Also set the default fan speed for extra safety (automatic only)
|
||||
if policy == pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW:
|
||||
pynvml.nvmlDeviceSetDefaultFanSpeed_v2(gpu_handle, fan_idx)
|
||||
|
||||
def get_fan_policy(gpu_handle):
|
||||
current_policy = ctypes.c_uint(0)
|
||||
|
||||
# The library unfortunately still needs pointers, this is why I need to use ctypes
|
||||
pynvml.nvmlDeviceGetFanControlPolicy_v2(gpu_handle, 0, ctypes.byref(current_policy))
|
||||
|
||||
return current_policy.value
|
||||
|
||||
def print_fan_policy_info(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ))
|
||||
|
||||
def fan_policy(configuration):
|
||||
|
||||
target_fan_policy = configuration.fan_policy
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
# Get the current policy before setting anything
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ))
|
||||
|
||||
if target_fan_policy == 'automatic':
|
||||
set_fan_policy(gpu_handle, pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW, configuration.dry_run)
|
||||
|
||||
elif target_fan_policy == 'manual':
|
||||
set_fan_policy(gpu_handle, pynvml.NVML_FAN_POLICY_MANUAL, configuration.dry_run)
|
||||
|
||||
print('New fan control policy set sucessfully!')
|
||||
|
||||
# Get the new policy
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ) + "\n")
|
||||
|
||||
# Power control
|
||||
|
||||
# nvmlDeviceGetPowerManagementMode was deprecated: https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g10365092adc37d7a17d261db8fe63fb6
|
||||
# nvmlDeviceSetPowerManagementLimit_v2 also seems to be deprecated, docs are non-existent: https://docs.nvidia.com/deploy/nvml-api/nvml_8h.html#nvml_8h_1d10040f340986af6cda91e71629edb2b
|
||||
|
||||
def set_power_limit(gpu_handle, power_limit_watts, dry_run):
|
||||
|
||||
# Setting the power limit can be DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetPowerManagementLimit(gpu_handle, int(power_limit_watts * 1000))
|
||||
|
||||
# Current power limit defined by the user, but it might defer from the enforced one
|
||||
def get_current_power_limit_watts(gpu_handle):
|
||||
return int(pynvml.nvmlDeviceGetPowerManagementLimit(gpu_handle) / 1000)
|
||||
|
||||
# This one takes the constraints into account
|
||||
def get_enforced_power_limit_watts(gpu_handle):
|
||||
return int(pynvml.nvmlDeviceGetEnforcedPowerLimit(gpu_handle) / 1000)
|
||||
|
||||
def get_power_limit_constraints_watts(gpu_handle):
|
||||
constraints_array = pynvml.nvmlDeviceGetPowerManagementLimitConstraints(gpu_handle)
|
||||
min = int(constraints_array[0] / 1000)
|
||||
max = int(constraints_array[1] / 1000)
|
||||
|
||||
return PowerLimitConstraintsWatts(min, max)
|
||||
|
||||
def print_power_limit_info(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
constraints = get_power_limit_constraints_watts(gpu_handle)
|
||||
current_pl = get_current_power_limit_watts(gpu_handle)
|
||||
current_enforced_pl = get_enforced_power_limit_watts(gpu_handle)
|
||||
|
||||
print(f'Power limit constraints\nMin: {constraints.min}W - Max: {constraints.max}W\n')
|
||||
print(f'Current power limit: {current_pl}W\n')
|
||||
print(f'Current enforced power limit: {current_enforced_pl}W\n')
|
||||
|
||||
def power_control_subroutine(gpu_handle, target_power_limit, dry_run):
|
||||
power_limit_constraints_watts = get_power_limit_constraints_watts(gpu_handle)
|
||||
current_pl = get_current_power_limit_watts(gpu_handle)
|
||||
current_enforced_pl = get_enforced_power_limit_watts(gpu_handle)
|
||||
|
||||
log_helper(f'Current power limit: {current_pl}W')
|
||||
log_helper(f'Current enforced power limit: {current_enforced_pl}W')
|
||||
|
||||
if target_power_limit < power_limit_constraints_watts.min or target_power_limit > power_limit_constraints_watts.max:
|
||||
log_helper(f'WARNING: trying to set power limit outside of the min({power_limit_constraints_watts.min}W) and max({power_limit_constraints_watts.max}W) range')
|
||||
|
||||
if target_power_limit != current_pl or target_power_limit != current_enforced_pl:
|
||||
set_power_limit(gpu_handle, target_power_limit, dry_run)
|
||||
log_helper(f'Setting the power limit: {target_power_limit}W')
|
||||
|
||||
else:
|
||||
log_helper(f'Nothing to do, current and enforced power limit is the same as the target')
|
||||
|
||||
|
||||
def power_control(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
power_control_subroutine(gpu_handle, configuration.power_limit, configuration.dry_run)
|
||||
|
||||
if configuration.single_use == True:
|
||||
break
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
# Temperature control
|
||||
|
||||
# nvmlDeviceGetTemperatureThreshold is deprecated for some thresholds, use nvmlDeviceGetFieldValues insted
|
||||
# https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g271ba78911494f33fc079b204a929405
|
||||
def get_temperarure_thresholds(gpu_handle):
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may shut down for HW protection
|
||||
#shutdown_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_SHUTDOWN_TLIMIT])[0].value.siVal
|
||||
shutdown_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may begin HW slowdown
|
||||
#slowdown_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_SLOWDOWN_TLIMIT])[0].value.siVal
|
||||
slowdown_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may begin SW slowdown due to memory temperature
|
||||
#max_memory_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT])[0].value.siVal
|
||||
max_memory_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may be throttled below base clock
|
||||
#gpu_max_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT])[0].value.siVal
|
||||
gpu_max_threshold =0
|
||||
|
||||
# The acoustic settings is the same used by GeForce Experience
|
||||
# Info from nvidia-settings: Current temperature that is set as acoustic threshold.
|
||||
current_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR)
|
||||
|
||||
# These thresholds still use the old function
|
||||
# Info from nvidia-settings: Minimum GPU Temperature that can be set as acoustic threshold
|
||||
min_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN)
|
||||
|
||||
# Info from nvidia-settings: Maximum GPU temperature that can be set as acoustic threshold.
|
||||
max_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX)
|
||||
|
||||
return TemperatureThresholds(shutdown_threshold, slowdown_threshold, max_memory_threshold, gpu_max_threshold, min_acoustic_threshold, current_acoustic_threshold, max_acoustic_threshold)
|
||||
|
||||
# Valid values for threshold_type
|
||||
# NVML_TEMPERATURE_THRESHOLD_SHUTDOWN
|
||||
# NVML_TEMPERATURE_THRESHOLD_SLOWDOWN
|
||||
# NVML_TEMPERATURE_THRESHOLD_MEM_MAX
|
||||
# NVML_TEMPERATURE_THRESHOLD_GPU_MAX
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR <- only this one will be supported
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX
|
||||
def set_temperature_thresholds(gpu_handle, threshold_type, temperature_C, dry_run):
|
||||
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetTemperatureThreshold(gpu_handle, threshold_type, temperature_C)
|
||||
|
||||
def print_thresholds_info(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
temperarure_thresholds = get_temperarure_thresholds(gpu_handle)
|
||||
|
||||
#print(f'Temperature threshold - shutdown: {temperarure_thresholds.shutdown}°C')
|
||||
#print(f'Temperature threshold - slowdown: {temperarure_thresholds.slowdown}°C')
|
||||
#print(f'Temperature threshold - max memory temperature: {temperarure_thresholds.max_memory}°C')
|
||||
#print(f'Temperature threshold - ignore base clock: {temperarure_thresholds.gpu_max}°C')
|
||||
print(f'Temperature threshold - current acoustic: {temperarure_thresholds.current_acoustic}°C')
|
||||
print(f'Temperature threshold - minimum acoustic: {temperarure_thresholds.min_acoustic}°C')
|
||||
print(f'Temperature threshold - maximum acoustic: {temperarure_thresholds.max_acoustic}°C')
|
||||
|
||||
def temp_control_subroutine(gpu_handle, target_acoustic_temp_limit, dry_run):
|
||||
|
||||
current_temp_thresholds = get_temperarure_thresholds(gpu_handle)
|
||||
|
||||
log_helper(f'Current acoustic threshold: {current_temp_thresholds.current_acoustic}°C')
|
||||
|
||||
if target_acoustic_temp_limit < current_temp_thresholds.min_acoustic or target_acoustic_temp_limit > current_temp_thresholds.max_acoustic:
|
||||
log_helper(f'WARNING: trying to set acoustic threshold outside of the min({current_temp_thresholds.min_acoustic}°C) and max({current_temp_thresholds.max_acoustic}°C) range')
|
||||
|
||||
if target_acoustic_temp_limit != current_temp_thresholds.current_acoustic:
|
||||
set_temperature_thresholds(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR, target_acoustic_temp_limit, dry_run)
|
||||
log_helper(f'Setting acoustic temperature threshold: {target_acoustic_temp_limit}°C')
|
||||
|
||||
else:
|
||||
log_helper(f'Nothing to do, current temperature threshold is the same as the target')
|
||||
|
||||
|
||||
def temp_control(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
temp_control_subroutine(gpu_handle, configuration.acoustic_temp_limit, configuration.dry_run)
|
||||
|
||||
if configuration.single_use == True:
|
||||
break
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
|
||||
def control_all(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
|
||||
# If this settings is different than the default, the user has enabled it
|
||||
if configuration.power_limit != 0:
|
||||
power_control_subroutine(gpu_handle, configuration.power_limit, configuration.dry_run)
|
||||
|
||||
# If this settings is different than the default, the user has enabled it
|
||||
if configuration.acoustic_temp_limit != 0:
|
||||
temp_control_subroutine(gpu_handle, configuration.acoustic_temp_limit, configuration.dry_run)
|
||||
|
||||
fan_control_subroutine(gpu_handle, configuration)
|
||||
|
||||
import pynvml
|
||||
import datetime
|
||||
import time
|
||||
import ctypes
|
||||
|
||||
class UnsupportedDriverVersion(Exception):
|
||||
pass
|
||||
|
||||
class GpuNotFound(Exception):
|
||||
pass
|
||||
|
||||
class TemperatureThresholds:
|
||||
def __init__(self, shutdown_t, slowdown_t, max_memory_t, gpu_max_t, min_acoustic_t, current_acoustic_t, max_acoustic_t):
|
||||
self.shutdown = shutdown_t
|
||||
self.slowdown = slowdown_t
|
||||
self.max_memory = max_memory_t
|
||||
self.gpu_max = gpu_max_t
|
||||
self.min_acoustic = min_acoustic_t
|
||||
self.current_acoustic = current_acoustic_t
|
||||
self.max_acoustic = max_acoustic_t
|
||||
|
||||
class PowerLimitConstraintsWatts:
|
||||
def __init__(self, min_pl, max_pl):
|
||||
self.min = min_pl
|
||||
self.max = max_pl
|
||||
|
||||
class FanSpeedConstraintsPercentage:
|
||||
def __init__(self, min_s, max_s):
|
||||
self.min = min_s
|
||||
self.max = max_s
|
||||
|
||||
def check_driver_version(driver_version_str):
|
||||
major = int(driver_version_str.split('.')[0])
|
||||
|
||||
if major < 520:
|
||||
raise UnsupportedDriverVersion('Driver version is lower than 520')
|
||||
|
||||
def log_helper(msg):
|
||||
print(f'LOG[{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}]: {msg}')
|
||||
|
||||
def print_help():
|
||||
help_text = '''
|
||||
python.exe .\\nvml_gpu_control.py <ACTION> <OPTIONS>
|
||||
|
||||
ACTIONS
|
||||
help
|
||||
Display help text
|
||||
|
||||
list
|
||||
List all available GPUs connected to the system by printing its name and UUID
|
||||
|
||||
fan-control
|
||||
Monitor and controls the fan speed of the selected card (you must select a target card)
|
||||
|
||||
fan-info
|
||||
Shows information about fan speed
|
||||
|
||||
fan-policy <--auto|--manual>
|
||||
Changes the fan control policy to automatic (vBIOS controlled) or manual. Note that when the fan speed is changed, the NVML library automatically changes this setting to manual. This setting is useful to change the GPU back to its original state
|
||||
|
||||
fan-policy-info
|
||||
Shows information about the current fan policy
|
||||
|
||||
power-limit-info
|
||||
Shows information about the power limit of the selected GPU
|
||||
|
||||
power-control
|
||||
Controls the power limit of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
thresholds-info
|
||||
Shows information about temperature thresholds in dregrees Celsius of the selected GPU.
|
||||
|
||||
temp-control
|
||||
Controls the temperature thresholds configuration of the selected GPU. It runs in a loop by default, but can run once using the --single-use option
|
||||
|
||||
control-all
|
||||
Allows the use of all controls in a single command/loop
|
||||
|
||||
|
||||
OPTIONS
|
||||
|
||||
--name OR -n <GPU_NAME>
|
||||
Select a target GPU by its name. Note: UUID has preference over name
|
||||
|
||||
--uuid OR -id <GPU_UUID>
|
||||
Select a target GPU by its Universally Unique IDentifier (UUID). Note: UUID has preference over name
|
||||
|
||||
--time-interval OR -ti <TIME_SECONDS>
|
||||
Time period to wait before probing the GPU again. Works for all actions that run in a loop
|
||||
|
||||
--dry-run OR -dr
|
||||
Run the program, but don't change/set anything. Useful for testing the behavior of the program
|
||||
|
||||
--speed-pair OR -sp <TEMP_CELSIUS:SPEED_PERCENTAGE,TEMP_CELSIUS:SPEED_PERCENTAGE...>
|
||||
A comma separated list of pairs of temperature in celsius and the fan speed in % (temp:speed) defining basic settings for a fan curve
|
||||
|
||||
--default-speed OR -ds <FAN_SPEED_PERCENTAGE>
|
||||
Set a default speed for when there is no match for the fan curve settings
|
||||
|
||||
--manual
|
||||
Sets the fan policy to manual
|
||||
|
||||
--auto
|
||||
Sets the fan policy to automatic (vBIOS controlled)
|
||||
|
||||
--power-limit OR -pl <POWER_LIMIT_WATTS>
|
||||
Sets the power limit of the GPU in watts
|
||||
|
||||
--acoustic-temp-limit OR -tl <TEMPERATURE_CELSIUS>
|
||||
Sets the acoustic threshold in celsious (note that this is the same temperature limit used by GeForce Experience)
|
||||
|
||||
--single-use OR -su
|
||||
Makes some actions work only once insted of in a loop. This option is valid for: temp-control and power-control
|
||||
|
||||
'''
|
||||
print(help_text)
|
||||
|
||||
def list_gpus():
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
print(f'Device {i} name : {pynvml.nvmlDeviceGetName(handle)} - UUID: {pynvml.nvmlDeviceGetUUID(handle)}')
|
||||
|
||||
def print_GPU_info(gpu_handle):
|
||||
log_helper(f"Driver Version: {pynvml.nvmlSystemGetDriverVersion()}")
|
||||
log_helper(f'Device name : {pynvml.nvmlDeviceGetName(gpu_handle)}')
|
||||
log_helper(f'Device UUID : {pynvml.nvmlDeviceGetUUID(gpu_handle)}')
|
||||
log_helper(f'Device fan speed : {pynvml.nvmlDeviceGetFanSpeed(gpu_handle)}%')
|
||||
log_helper(f'Temperature {pynvml.nvmlDeviceGetTemperature(gpu_handle, 0)}°C')
|
||||
log_helper(f"Fan controller count {pynvml.nvmlDeviceGetNumFans(gpu_handle)}")
|
||||
|
||||
|
||||
# Search for a GPU and return a handle
|
||||
|
||||
def get_GPU_handle(gpu_name, gpu_uuid):
|
||||
|
||||
if gpu_uuid != '':
|
||||
return pynvml.nvmlDeviceGetHandleByUUID(gpu_uuid)
|
||||
|
||||
else:
|
||||
return get_GPU_handle_by_name(gpu_name)
|
||||
|
||||
|
||||
# This will NOT work if the user has more than 2 GPUs with the same name/model, use UUID for this case
|
||||
def get_GPU_handle_by_name(gpu_name):
|
||||
deviceCount = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
for i in range(deviceCount):
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
|
||||
if pynvml.nvmlDeviceGetName(handle) == gpu_name:
|
||||
return handle
|
||||
|
||||
print(f'It was not possible to locate the target device : {gpu_name}')
|
||||
raise GpuNotFound('It was not possible to locate the device')
|
||||
|
||||
def set_gpu_fan_speed(gpu_handle, speed_percentage, dry_run):
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed = pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx)
|
||||
|
||||
# Setting the fan speed DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetFanSpeed_v2(gpu_handle, fan_idx, speed_percentage)
|
||||
|
||||
def get_gpu_fan_speed_per_controller(gpu_handle):
|
||||
|
||||
fan_speed_per_controller = []
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
fan_speed_per_controller.append(pynvml.nvmlDeviceGetFanSpeed_v2(gpu_handle, fan_idx))
|
||||
|
||||
return fan_speed_per_controller
|
||||
|
||||
# This function will be here for only future references as many drivers ignore such values
|
||||
def get_gpu_fan_speed_constraints(gpu_handle):
|
||||
|
||||
# It needs pointers to work
|
||||
fan_min = ctypes.c_uint(0)
|
||||
fan_max = ctypes.c_uint(0)
|
||||
|
||||
# Note some drivers do not respect the minimum and may turn off the fan motor in a different speed
|
||||
# Some drivers turn off the fan motor at speeds as high as 47%
|
||||
pynvml.nvmlDeviceGetMinMaxFanSpeed(gpu_handle, ctypes.byref(fan_min), ctypes.byref(fan_max))
|
||||
|
||||
return FanSpeedConstraintsPercentage(fan_min.value, fan_max.value)
|
||||
|
||||
def print_fan_info(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
|
||||
fan_constraints = get_gpu_fan_speed_constraints(gpu_handle)
|
||||
|
||||
print(f'Current temp: {current_temp}°C')
|
||||
print(f'Current speed: {current_speed}%') # Minitor for fan fan speed changes and reajust!
|
||||
|
||||
# Get the fan speed per controller
|
||||
for idx, fan_speed_c in enumerate(get_gpu_fan_speed_per_controller(gpu_handle)):
|
||||
print(f'Fan controller speed {idx}: {fan_speed_c}%')
|
||||
|
||||
print(f'Fan constraints: Min {fan_constraints.min}% - Max {fan_constraints.max}%')
|
||||
|
||||
def fan_control(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
# Infinite loop, one must kill the process to stop it
|
||||
while(True):
|
||||
fan_control_subroutine(gpu_handle, configuration)
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
|
||||
# Control GPU functions and monitor for changes (e.g. temperature)
|
||||
def fan_control_subroutine(gpu_handle, configuration):
|
||||
|
||||
current_temp = pynvml.nvmlDeviceGetTemperature(gpu_handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
current_speed = pynvml.nvmlDeviceGetFanSpeed(gpu_handle)
|
||||
|
||||
log_helper(f'Current temp: {current_temp}°C')
|
||||
log_helper(f'Current speed: {current_speed}%') # Monitor for fan fan speed changes and reajust!
|
||||
|
||||
# Get the fan speed per controller
|
||||
for idx, fan_speed_c in enumerate(get_gpu_fan_speed_per_controller(gpu_handle)):
|
||||
log_helper(f'Fan controller speed {idx}: {fan_speed_c}%')
|
||||
|
||||
for pair in configuration.temp_speed_pair:
|
||||
|
||||
# Remember that that list starts by the highest temp value and keeps lowering it
|
||||
if current_temp >= pair.temperature:
|
||||
|
||||
# Only send commands to the GPU if necessary (if the current setting is different from the targeted one)
|
||||
if current_speed != pair.speed:
|
||||
set_gpu_fan_speed(gpu_handle, pair.speed, configuration.dry_run)
|
||||
log_helper(f'Setting GPU fan speed: {pair.speed}%')
|
||||
else:
|
||||
log_helper(f'Same as previous speed, nothing to do!')
|
||||
|
||||
# Match found and set, return now
|
||||
return
|
||||
|
||||
# We didn't find a match, use the default speed
|
||||
set_gpu_fan_speed(gpu_handle, configuration.default_speed, configuration.dry_run)
|
||||
log_helper(f'Found no temperature match, using default fan speed: {configuration.default_speed}')
|
||||
|
||||
def fan_policy_info_msg(fan_policy: int):
|
||||
|
||||
if pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW == fan_policy:
|
||||
return 'Current fan control policy is automatic'
|
||||
|
||||
elif pynvml.NVML_FAN_POLICY_MANUAL == fan_policy:
|
||||
return 'Current fan control policy is manual'
|
||||
|
||||
else:
|
||||
return 'Unknown fan control policy'
|
||||
|
||||
def set_fan_policy(gpu_handle, policy, dry_run):
|
||||
|
||||
# This is not really the number of fan, but the number of controllers
|
||||
fan_count = pynvml.nvmlDeviceGetNumFans(gpu_handle)
|
||||
|
||||
for fan_idx in range(fan_count):
|
||||
|
||||
# Setting the fan control policy can be DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
fan_speed = pynvml.nvmlDeviceSetFanControlPolicy(gpu_handle, fan_idx, policy)
|
||||
|
||||
# Also set the default fan speed for extra safety (automatic only)
|
||||
if policy == pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW:
|
||||
pynvml.nvmlDeviceSetDefaultFanSpeed_v2(gpu_handle, fan_idx)
|
||||
|
||||
def get_fan_policy(gpu_handle):
|
||||
current_policy = ctypes.c_uint(0)
|
||||
|
||||
# The library unfortunately still needs pointers, this is why I need to use ctypes
|
||||
pynvml.nvmlDeviceGetFanControlPolicy_v2(gpu_handle, 0, ctypes.byref(current_policy))
|
||||
|
||||
return current_policy.value
|
||||
|
||||
def print_fan_policy_info(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ))
|
||||
|
||||
def fan_policy(configuration):
|
||||
|
||||
target_fan_policy = configuration.fan_policy
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
# Get the current policy before setting anything
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ))
|
||||
|
||||
if target_fan_policy == 'automatic':
|
||||
set_fan_policy(gpu_handle, pynvml.NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW, configuration.dry_run)
|
||||
|
||||
elif target_fan_policy == 'manual':
|
||||
set_fan_policy(gpu_handle, pynvml.NVML_FAN_POLICY_MANUAL, configuration.dry_run)
|
||||
|
||||
print('New fan control policy set sucessfully!')
|
||||
|
||||
# Get the new policy
|
||||
print(fan_policy_info_msg( get_fan_policy(gpu_handle) ) + "\n")
|
||||
|
||||
# Power control
|
||||
|
||||
# nvmlDeviceGetPowerManagementMode was deprecated: https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g10365092adc37d7a17d261db8fe63fb6
|
||||
# nvmlDeviceSetPowerManagementLimit_v2 also seems to be deprecated, docs are non-existent: https://docs.nvidia.com/deploy/nvml-api/nvml_8h.html#nvml_8h_1d10040f340986af6cda91e71629edb2b
|
||||
|
||||
def set_power_limit(gpu_handle, power_limit_watts, dry_run):
|
||||
|
||||
# Setting the power limit can be DANGEROUS! Use dry run for testing before actual changes
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetPowerManagementLimit(gpu_handle, int(power_limit_watts * 1000))
|
||||
|
||||
# Current power limit defined by the user, but it might defer from the enforced one
|
||||
def get_current_power_limit_watts(gpu_handle):
|
||||
return int(pynvml.nvmlDeviceGetPowerManagementLimit(gpu_handle) / 1000)
|
||||
|
||||
# This one takes the constraints into account
|
||||
def get_enforced_power_limit_watts(gpu_handle):
|
||||
return int(pynvml.nvmlDeviceGetEnforcedPowerLimit(gpu_handle) / 1000)
|
||||
|
||||
def get_power_limit_constraints_watts(gpu_handle):
|
||||
constraints_array = pynvml.nvmlDeviceGetPowerManagementLimitConstraints(gpu_handle)
|
||||
min = int(constraints_array[0] / 1000)
|
||||
max = int(constraints_array[1] / 1000)
|
||||
|
||||
return PowerLimitConstraintsWatts(min, max)
|
||||
|
||||
def print_power_limit_info(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
constraints = get_power_limit_constraints_watts(gpu_handle)
|
||||
current_pl = get_current_power_limit_watts(gpu_handle)
|
||||
current_enforced_pl = get_enforced_power_limit_watts(gpu_handle)
|
||||
|
||||
print(f'Power limit constraints\nMin: {constraints.min}W - Max: {constraints.max}W\n')
|
||||
print(f'Current power limit: {current_pl}W\n')
|
||||
print(f'Current enforced power limit: {current_enforced_pl}W\n')
|
||||
|
||||
def power_control_subroutine(gpu_handle, target_power_limit, dry_run):
|
||||
power_limit_constraints_watts = get_power_limit_constraints_watts(gpu_handle)
|
||||
current_pl = get_current_power_limit_watts(gpu_handle)
|
||||
current_enforced_pl = get_enforced_power_limit_watts(gpu_handle)
|
||||
|
||||
log_helper(f'Current power limit: {current_pl}W')
|
||||
log_helper(f'Current enforced power limit: {current_enforced_pl}W')
|
||||
|
||||
if target_power_limit < power_limit_constraints_watts.min or target_power_limit > power_limit_constraints_watts.max:
|
||||
log_helper(f'WARNING: trying to set power limit outside of the min({power_limit_constraints_watts.min}W) and max({power_limit_constraints_watts.max}W) range')
|
||||
|
||||
if target_power_limit != current_pl or target_power_limit != current_enforced_pl:
|
||||
set_power_limit(gpu_handle, target_power_limit, dry_run)
|
||||
log_helper(f'Setting the power limit: {target_power_limit}W')
|
||||
|
||||
else:
|
||||
log_helper(f'Nothing to do, current and enforced power limit is the same as the target')
|
||||
|
||||
|
||||
def power_control(configuration):
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
power_control_subroutine(gpu_handle, configuration.power_limit, configuration.dry_run)
|
||||
|
||||
if configuration.single_use == True:
|
||||
break
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
# Temperature control
|
||||
|
||||
# nvmlDeviceGetTemperatureThreshold is deprecated for some thresholds, use nvmlDeviceGetFieldValues insted
|
||||
# https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g271ba78911494f33fc079b204a929405
|
||||
def get_temperarure_thresholds(gpu_handle):
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may shut down for HW protection
|
||||
#shutdown_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_SHUTDOWN_TLIMIT])[0].value.siVal
|
||||
shutdown_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may begin HW slowdown
|
||||
#slowdown_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_SLOWDOWN_TLIMIT])[0].value.siVal
|
||||
slowdown_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may begin SW slowdown due to memory temperature
|
||||
#max_memory_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_MEM_MAX_TLIMIT])[0].value.siVal
|
||||
max_memory_threshold = 0
|
||||
|
||||
# Info from nvidia-settings: T.Limit temperature after which GPU may be throttled below base clock
|
||||
#gpu_max_threshold = pynvml.nvmlDeviceGetFieldValues(gpu_handle, [pynvml.NVML_FI_DEV_TEMPERATURE_GPU_MAX_TLIMIT])[0].value.siVal
|
||||
gpu_max_threshold =0
|
||||
|
||||
# The acoustic settings is the same used by GeForce Experience
|
||||
# Info from nvidia-settings: Current temperature that is set as acoustic threshold.
|
||||
current_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR)
|
||||
|
||||
# These thresholds still use the old function
|
||||
# Info from nvidia-settings: Minimum GPU Temperature that can be set as acoustic threshold
|
||||
min_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN)
|
||||
|
||||
# Info from nvidia-settings: Maximum GPU temperature that can be set as acoustic threshold.
|
||||
max_acoustic_threshold = pynvml.nvmlDeviceGetTemperatureThreshold(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX)
|
||||
|
||||
return TemperatureThresholds(shutdown_threshold, slowdown_threshold, max_memory_threshold, gpu_max_threshold, min_acoustic_threshold, current_acoustic_threshold, max_acoustic_threshold)
|
||||
|
||||
# Valid values for threshold_type
|
||||
# NVML_TEMPERATURE_THRESHOLD_SHUTDOWN
|
||||
# NVML_TEMPERATURE_THRESHOLD_SLOWDOWN
|
||||
# NVML_TEMPERATURE_THRESHOLD_MEM_MAX
|
||||
# NVML_TEMPERATURE_THRESHOLD_GPU_MAX
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MIN
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR <- only this one will be supported
|
||||
# NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_MAX
|
||||
def set_temperature_thresholds(gpu_handle, threshold_type, temperature_C, dry_run):
|
||||
|
||||
if dry_run != True:
|
||||
pynvml.nvmlDeviceSetTemperatureThreshold(gpu_handle, threshold_type, temperature_C)
|
||||
|
||||
def print_thresholds_info(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
|
||||
temperarure_thresholds = get_temperarure_thresholds(gpu_handle)
|
||||
|
||||
#print(f'Temperature threshold - shutdown: {temperarure_thresholds.shutdown}°C')
|
||||
#print(f'Temperature threshold - slowdown: {temperarure_thresholds.slowdown}°C')
|
||||
#print(f'Temperature threshold - max memory temperature: {temperarure_thresholds.max_memory}°C')
|
||||
#print(f'Temperature threshold - ignore base clock: {temperarure_thresholds.gpu_max}°C')
|
||||
print(f'Temperature threshold - current acoustic: {temperarure_thresholds.current_acoustic}°C')
|
||||
print(f'Temperature threshold - minimum acoustic: {temperarure_thresholds.min_acoustic}°C')
|
||||
print(f'Temperature threshold - maximum acoustic: {temperarure_thresholds.max_acoustic}°C')
|
||||
|
||||
def temp_control_subroutine(gpu_handle, target_acoustic_temp_limit, dry_run):
|
||||
|
||||
current_temp_thresholds = get_temperarure_thresholds(gpu_handle)
|
||||
|
||||
log_helper(f'Current acoustic threshold: {current_temp_thresholds.current_acoustic}°C')
|
||||
|
||||
if target_acoustic_temp_limit < current_temp_thresholds.min_acoustic or target_acoustic_temp_limit > current_temp_thresholds.max_acoustic:
|
||||
log_helper(f'WARNING: trying to set acoustic threshold outside of the min({current_temp_thresholds.min_acoustic}°C) and max({current_temp_thresholds.max_acoustic}°C) range')
|
||||
|
||||
if target_acoustic_temp_limit != current_temp_thresholds.current_acoustic:
|
||||
set_temperature_thresholds(gpu_handle, pynvml.NVML_TEMPERATURE_THRESHOLD_ACOUSTIC_CURR, target_acoustic_temp_limit, dry_run)
|
||||
log_helper(f'Setting acoustic temperature threshold: {target_acoustic_temp_limit}°C')
|
||||
|
||||
else:
|
||||
log_helper(f'Nothing to do, current temperature threshold is the same as the target')
|
||||
|
||||
|
||||
def temp_control(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
temp_control_subroutine(gpu_handle, configuration.acoustic_temp_limit, configuration.dry_run)
|
||||
|
||||
if configuration.single_use == True:
|
||||
break
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
|
||||
|
||||
def control_all(configuration):
|
||||
|
||||
gpu_handle = get_GPU_handle(configuration.gpu_name, configuration.gpu_uuid)
|
||||
print_GPU_info(gpu_handle)
|
||||
|
||||
while(True):
|
||||
|
||||
# If this settings is different than the default, the user has enabled it
|
||||
if configuration.power_limit != 0:
|
||||
power_control_subroutine(gpu_handle, configuration.power_limit, configuration.dry_run)
|
||||
|
||||
# If this settings is different than the default, the user has enabled it
|
||||
if configuration.acoustic_temp_limit != 0:
|
||||
temp_control_subroutine(gpu_handle, configuration.acoustic_temp_limit, configuration.dry_run)
|
||||
|
||||
fan_control_subroutine(gpu_handle, configuration)
|
||||
|
||||
time.sleep(configuration.time_interval)
|
||||
@@ -1,70 +1,70 @@
|
||||
from pynvml import *
|
||||
import sys
|
||||
import helper_functions as main_funcs
|
||||
import parse_args
|
||||
|
||||
def main():
|
||||
|
||||
# Getting a configuration obj
|
||||
config = parse_args.parse_cmd_args(sys.argv)
|
||||
|
||||
if config.action == 'help':
|
||||
main_funcs.print_help()
|
||||
return
|
||||
|
||||
try:
|
||||
# Starting nvml
|
||||
nvmlInit()
|
||||
|
||||
# Verify driver version
|
||||
try:
|
||||
main_funcs.check_driver_version(nvmlSystemGetDriverVersion())
|
||||
|
||||
except main_funcs.UnsupportedDriverVersion:
|
||||
print('WARNING: You are running an unsupported driver, you may have problems')
|
||||
|
||||
match config.action:
|
||||
|
||||
# Information query
|
||||
case 'list':
|
||||
main_funcs.list_gpus()
|
||||
|
||||
case 'get-power-limit-info':
|
||||
main_funcs.print_power_limit_info(config)
|
||||
|
||||
case 'get-thresholds-info':
|
||||
main_funcs.print_thresholds_info(config)
|
||||
|
||||
# Fan control
|
||||
case 'fan-control':
|
||||
main_funcs.fan_control(config)
|
||||
|
||||
# Fan control
|
||||
case 'fan-info':
|
||||
main_funcs.print_fan_info(config)
|
||||
|
||||
case 'fan-policy':
|
||||
main_funcs.fan_policy(config)
|
||||
|
||||
case 'fan-policy-info':
|
||||
main_funcs.print_fan_policy_info(config)
|
||||
|
||||
# Power control
|
||||
case 'power-control':
|
||||
main_funcs.power_control(config)
|
||||
|
||||
# Temperature threshold control
|
||||
case 'temp-control':
|
||||
main_funcs.temp_control(config)
|
||||
|
||||
# Enable everything
|
||||
case 'control-all':
|
||||
main_funcs.control_all(config)
|
||||
|
||||
# One should call shutdown with or without erros, this is why I am using finally
|
||||
finally:
|
||||
print('Calling nvml shutdown and terminating the program')
|
||||
nvmlShutdown()
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pynvml import *
|
||||
import sys
|
||||
import helper_functions as main_funcs
|
||||
import parse_args
|
||||
|
||||
def main():
|
||||
|
||||
# Getting a configuration obj
|
||||
config = parse_args.parse_cmd_args(sys.argv)
|
||||
|
||||
if config.action == 'help':
|
||||
main_funcs.print_help()
|
||||
return
|
||||
|
||||
try:
|
||||
# Starting nvml
|
||||
nvmlInit()
|
||||
|
||||
# Verify driver version
|
||||
try:
|
||||
main_funcs.check_driver_version(nvmlSystemGetDriverVersion())
|
||||
|
||||
except main_funcs.UnsupportedDriverVersion:
|
||||
print('WARNING: You are running an unsupported driver, you may have problems')
|
||||
|
||||
match config.action:
|
||||
|
||||
# Information query
|
||||
case 'list':
|
||||
main_funcs.list_gpus()
|
||||
|
||||
case 'get-power-limit-info':
|
||||
main_funcs.print_power_limit_info(config)
|
||||
|
||||
case 'get-thresholds-info':
|
||||
main_funcs.print_thresholds_info(config)
|
||||
|
||||
# Fan control
|
||||
case 'fan-control':
|
||||
main_funcs.fan_control(config)
|
||||
|
||||
# Fan control
|
||||
case 'fan-info':
|
||||
main_funcs.print_fan_info(config)
|
||||
|
||||
case 'fan-policy':
|
||||
main_funcs.fan_policy(config)
|
||||
|
||||
case 'fan-policy-info':
|
||||
main_funcs.print_fan_policy_info(config)
|
||||
|
||||
# Power control
|
||||
case 'power-control':
|
||||
main_funcs.power_control(config)
|
||||
|
||||
# Temperature threshold control
|
||||
case 'temp-control':
|
||||
main_funcs.temp_control(config)
|
||||
|
||||
# Enable everything
|
||||
case 'control-all':
|
||||
main_funcs.control_all(config)
|
||||
|
||||
# One should call shutdown with or without erros, this is why I am using finally
|
||||
finally:
|
||||
print('Calling nvml shutdown and terminating the program')
|
||||
nvmlShutdown()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,228 +1,228 @@
|
||||
import helper_functions
|
||||
|
||||
class InvalidAction(Exception):
|
||||
pass
|
||||
|
||||
class InvalidOption(Exception):
|
||||
pass
|
||||
|
||||
class InvalidNumberSpeedPairParams(Exception):
|
||||
pass
|
||||
|
||||
class InvalidFanSpeed(Exception):
|
||||
pass
|
||||
|
||||
class InsufficientArgs(Exception):
|
||||
pass
|
||||
|
||||
class InvalidConfig(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Configuration:
|
||||
|
||||
def __init__(self):
|
||||
# This only supports one target gpu, use a process for each GPU (erros become isolated to each other)
|
||||
self.target_gpu = ""
|
||||
self.gpu_name = ""
|
||||
self.gpu_uuid = ""
|
||||
self.action = ""
|
||||
self.temp_speed_pair = []
|
||||
self.curve_type = "fixed" # Currently for internal usage only (I want to later add calculation for lines and curves fuctions)
|
||||
self.default_speed = 50 # Percentage
|
||||
self.time_interval = 1.0 # In seconds
|
||||
self.dry_run = False
|
||||
self.fan_policy = ''
|
||||
self.single_use = False
|
||||
self.acoustic_temp_limit = 0 # The user must set the value
|
||||
self.power_limit = 0 # The user must set the value
|
||||
|
||||
class TempSpeedPair:
|
||||
|
||||
def __init__(self, temperature, speed):
|
||||
|
||||
self.temperature = temperature
|
||||
self.speed = speed # Percentage!
|
||||
|
||||
# The sorting is based on the temperature
|
||||
def __lt__(self, other):
|
||||
if (self.temperature <= other.temperature):
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# Important for testing
|
||||
def __eq__(self, other):
|
||||
if (self.temperature == other.temperature and self.speed == other.speed):
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# Some sane checks (in case the user makes a bad config by accident)
|
||||
def validate_config(config):
|
||||
|
||||
# At least one of the target setting must be configured
|
||||
if config.gpu_name == '' and config.gpu_uuid == '':
|
||||
print("You did not select a target GPU")
|
||||
raise InvalidConfig("No GPU was selected")
|
||||
|
||||
# fan-policy needs a mode
|
||||
if config.action == 'fan-policy':
|
||||
if config.fan_policy == '':
|
||||
print("You did not select a fan policy: autmatic or manual")
|
||||
raise InvalidConfig("No fan policy was selected")
|
||||
|
||||
# power-control needs a power limit configuration
|
||||
if config.action == 'power-control':
|
||||
if config.power_limit == 0:
|
||||
print("You did not select a power limit")
|
||||
raise InvalidConfig("No power limit was selected")
|
||||
|
||||
# temp-control needs a power limit configuration
|
||||
if config.action == 'temp-control':
|
||||
if config.acoustic_temp_limit == 0:
|
||||
print("You did not select a temperature limit")
|
||||
raise InvalidConfig("No temperature limit was selected")
|
||||
|
||||
|
||||
def parse_cmd_args(args):
|
||||
|
||||
configuration = Configuration()
|
||||
|
||||
if len(args) == 1:
|
||||
print(f'You must pass more arguments')
|
||||
raise InsufficientArgs("No action was supplied")
|
||||
|
||||
# You can always ignore the first argument, since it is the program itself
|
||||
# Get the second arg, which is the action
|
||||
action = args[1]
|
||||
|
||||
# The action names are decoupled from the cmd interface, allowing for flexibility
|
||||
if (action == 'help'):
|
||||
configuration.action = 'help'
|
||||
return configuration # It should stop here, ignore all other args
|
||||
|
||||
elif (action == 'list'):
|
||||
configuration.action = 'list'
|
||||
return configuration # It should stop here, ignore all other args
|
||||
|
||||
elif (action == 'fan-control'):
|
||||
configuration.action = 'fan-control'
|
||||
|
||||
elif (action == 'fan-info'):
|
||||
configuration.action = 'fan-info'
|
||||
|
||||
elif (action == 'fan-policy'):
|
||||
configuration.action = 'fan-policy'
|
||||
|
||||
elif (action == 'fan-policy-info'):
|
||||
configuration.action = 'fan-policy-info'
|
||||
|
||||
elif (action == 'power-limit-info'):
|
||||
configuration.action = 'get-power-limit-info'
|
||||
|
||||
elif (action == 'thresholds-info'):
|
||||
configuration.action = 'get-thresholds-info'
|
||||
|
||||
elif (action == 'power-control'):
|
||||
configuration.action = 'power-control'
|
||||
|
||||
elif (action == 'temp-control'):
|
||||
configuration.action = 'temp-control'
|
||||
|
||||
elif (action == 'control-all'):
|
||||
configuration.action = 'control-all'
|
||||
|
||||
else:
|
||||
helper_functions.print_help()
|
||||
print(f'Invalid action: {action}\n\n')
|
||||
raise InvalidAction("The action passed as argument is incorrect")
|
||||
|
||||
|
||||
# You can safely ignore the actions here
|
||||
i = 2
|
||||
while(i < len(args)):
|
||||
|
||||
arg = args[i]
|
||||
|
||||
if (arg == '--name' or arg == '-n'):
|
||||
configuration.gpu_name = args[i+1]
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--uuid' or arg == '-id'):
|
||||
configuration.gpu_uuid = args[i+1]
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--speed-pair' or arg == '-sp'):
|
||||
|
||||
# Think of as points in a graph (speed % x temp °C)
|
||||
speed_points = args[i+1].split(',')
|
||||
|
||||
for speed_pair_str in speed_points:
|
||||
|
||||
speed_pair = speed_pair_str.split(':')
|
||||
|
||||
if (len(speed_pair) != 2):
|
||||
print('Invalid number of speed pair parameters')
|
||||
raise InvalidNumberSpeedPairParams("You can only set temperature and target speed at a time")
|
||||
|
||||
temp = int(speed_pair[0])
|
||||
speed = int(speed_pair[1])
|
||||
|
||||
if (speed > 100):
|
||||
print(f'The fan speed only goes up to 100%. You choose {speed}')
|
||||
raise InvalidFanSpeed(f'The fan speed only goes up to 100%. You choose {speed}')
|
||||
|
||||
if (speed < 0):
|
||||
print(f'The fan speed cannot be lower than 0%. You choose {speed}')
|
||||
raise InvalidFanSpeed(f'The fan speed cannot be lower than 0%. You choose {speed}')
|
||||
|
||||
configuration.temp_speed_pair.append( TempSpeedPair(temp, speed) )
|
||||
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--default-speed' or arg == '-ds'):
|
||||
configuration.default_speed = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--time-interval' or arg == '-ti'):
|
||||
configuration.time_interval = float(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--dry-run' or arg == '-dr'):
|
||||
configuration.dry_run = True
|
||||
|
||||
# For the fan-policy action
|
||||
elif (arg == '--auto'):
|
||||
configuration.fan_policy = 'automatic'
|
||||
|
||||
elif (arg == '--manual'):
|
||||
configuration.fan_policy = 'manual'
|
||||
|
||||
elif (arg == '--single-use' or arg == '-su'):
|
||||
configuration.single_use = True
|
||||
|
||||
elif (arg == '--acoustic-temp-limit' or arg == '-tl'):
|
||||
configuration.acoustic_temp_limit = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--power-limit' or arg == '-pl'):
|
||||
configuration.power_limit = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
else:
|
||||
helper_functions.print_help()
|
||||
print(f'Invalid option: {arg}\n\n')
|
||||
raise InvalidOption('The option given was invalid')
|
||||
|
||||
# Change iteration
|
||||
i += 1
|
||||
|
||||
# Organizing the array before sending the configuration
|
||||
configuration.temp_speed_pair.sort(reverse=True)
|
||||
|
||||
validate_config(configuration)
|
||||
|
||||
import helper_functions
|
||||
|
||||
class InvalidAction(Exception):
|
||||
pass
|
||||
|
||||
class InvalidOption(Exception):
|
||||
pass
|
||||
|
||||
class InvalidNumberSpeedPairParams(Exception):
|
||||
pass
|
||||
|
||||
class InvalidFanSpeed(Exception):
|
||||
pass
|
||||
|
||||
class InsufficientArgs(Exception):
|
||||
pass
|
||||
|
||||
class InvalidConfig(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Configuration:
|
||||
|
||||
def __init__(self):
|
||||
# This only supports one target gpu, use a process for each GPU (erros become isolated to each other)
|
||||
self.target_gpu = ""
|
||||
self.gpu_name = ""
|
||||
self.gpu_uuid = ""
|
||||
self.action = ""
|
||||
self.temp_speed_pair = []
|
||||
self.curve_type = "fixed" # Currently for internal usage only (I want to later add calculation for lines and curves fuctions)
|
||||
self.default_speed = 50 # Percentage
|
||||
self.time_interval = 1.0 # In seconds
|
||||
self.dry_run = False
|
||||
self.fan_policy = ''
|
||||
self.single_use = False
|
||||
self.acoustic_temp_limit = 0 # The user must set the value
|
||||
self.power_limit = 0 # The user must set the value
|
||||
|
||||
class TempSpeedPair:
|
||||
|
||||
def __init__(self, temperature, speed):
|
||||
|
||||
self.temperature = temperature
|
||||
self.speed = speed # Percentage!
|
||||
|
||||
# The sorting is based on the temperature
|
||||
def __lt__(self, other):
|
||||
if (self.temperature <= other.temperature):
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# Important for testing
|
||||
def __eq__(self, other):
|
||||
if (self.temperature == other.temperature and self.speed == other.speed):
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# Some sane checks (in case the user makes a bad config by accident)
|
||||
def validate_config(config):
|
||||
|
||||
# At least one of the target setting must be configured
|
||||
if config.gpu_name == '' and config.gpu_uuid == '':
|
||||
print("You did not select a target GPU")
|
||||
raise InvalidConfig("No GPU was selected")
|
||||
|
||||
# fan-policy needs a mode
|
||||
if config.action == 'fan-policy':
|
||||
if config.fan_policy == '':
|
||||
print("You did not select a fan policy: autmatic or manual")
|
||||
raise InvalidConfig("No fan policy was selected")
|
||||
|
||||
# power-control needs a power limit configuration
|
||||
if config.action == 'power-control':
|
||||
if config.power_limit == 0:
|
||||
print("You did not select a power limit")
|
||||
raise InvalidConfig("No power limit was selected")
|
||||
|
||||
# temp-control needs a power limit configuration
|
||||
if config.action == 'temp-control':
|
||||
if config.acoustic_temp_limit == 0:
|
||||
print("You did not select a temperature limit")
|
||||
raise InvalidConfig("No temperature limit was selected")
|
||||
|
||||
|
||||
def parse_cmd_args(args):
|
||||
|
||||
configuration = Configuration()
|
||||
|
||||
if len(args) == 1:
|
||||
print(f'You must pass more arguments')
|
||||
raise InsufficientArgs("No action was supplied")
|
||||
|
||||
# You can always ignore the first argument, since it is the program itself
|
||||
# Get the second arg, which is the action
|
||||
action = args[1]
|
||||
|
||||
# The action names are decoupled from the cmd interface, allowing for flexibility
|
||||
if (action == 'help'):
|
||||
configuration.action = 'help'
|
||||
return configuration # It should stop here, ignore all other args
|
||||
|
||||
elif (action == 'list'):
|
||||
configuration.action = 'list'
|
||||
return configuration # It should stop here, ignore all other args
|
||||
|
||||
elif (action == 'fan-control'):
|
||||
configuration.action = 'fan-control'
|
||||
|
||||
elif (action == 'fan-info'):
|
||||
configuration.action = 'fan-info'
|
||||
|
||||
elif (action == 'fan-policy'):
|
||||
configuration.action = 'fan-policy'
|
||||
|
||||
elif (action == 'fan-policy-info'):
|
||||
configuration.action = 'fan-policy-info'
|
||||
|
||||
elif (action == 'power-limit-info'):
|
||||
configuration.action = 'get-power-limit-info'
|
||||
|
||||
elif (action == 'thresholds-info'):
|
||||
configuration.action = 'get-thresholds-info'
|
||||
|
||||
elif (action == 'power-control'):
|
||||
configuration.action = 'power-control'
|
||||
|
||||
elif (action == 'temp-control'):
|
||||
configuration.action = 'temp-control'
|
||||
|
||||
elif (action == 'control-all'):
|
||||
configuration.action = 'control-all'
|
||||
|
||||
else:
|
||||
helper_functions.print_help()
|
||||
print(f'Invalid action: {action}\n\n')
|
||||
raise InvalidAction("The action passed as argument is incorrect")
|
||||
|
||||
|
||||
# You can safely ignore the actions here
|
||||
i = 2
|
||||
while(i < len(args)):
|
||||
|
||||
arg = args[i]
|
||||
|
||||
if (arg == '--name' or arg == '-n'):
|
||||
configuration.gpu_name = args[i+1]
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--uuid' or arg == '-id'):
|
||||
configuration.gpu_uuid = args[i+1]
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--speed-pair' or arg == '-sp'):
|
||||
|
||||
# Think of as points in a graph (speed % x temp °C)
|
||||
speed_points = args[i+1].split(',')
|
||||
|
||||
for speed_pair_str in speed_points:
|
||||
|
||||
speed_pair = speed_pair_str.split(':')
|
||||
|
||||
if (len(speed_pair) != 2):
|
||||
print('Invalid number of speed pair parameters')
|
||||
raise InvalidNumberSpeedPairParams("You can only set temperature and target speed at a time")
|
||||
|
||||
temp = int(speed_pair[0])
|
||||
speed = int(speed_pair[1])
|
||||
|
||||
if (speed > 100):
|
||||
print(f'The fan speed only goes up to 100%. You choose {speed}')
|
||||
raise InvalidFanSpeed(f'The fan speed only goes up to 100%. You choose {speed}')
|
||||
|
||||
if (speed < 0):
|
||||
print(f'The fan speed cannot be lower than 0%. You choose {speed}')
|
||||
raise InvalidFanSpeed(f'The fan speed cannot be lower than 0%. You choose {speed}')
|
||||
|
||||
configuration.temp_speed_pair.append( TempSpeedPair(temp, speed) )
|
||||
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--default-speed' or arg == '-ds'):
|
||||
configuration.default_speed = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--time-interval' or arg == '-ti'):
|
||||
configuration.time_interval = float(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--dry-run' or arg == '-dr'):
|
||||
configuration.dry_run = True
|
||||
|
||||
# For the fan-policy action
|
||||
elif (arg == '--auto'):
|
||||
configuration.fan_policy = 'automatic'
|
||||
|
||||
elif (arg == '--manual'):
|
||||
configuration.fan_policy = 'manual'
|
||||
|
||||
elif (arg == '--single-use' or arg == '-su'):
|
||||
configuration.single_use = True
|
||||
|
||||
elif (arg == '--acoustic-temp-limit' or arg == '-tl'):
|
||||
configuration.acoustic_temp_limit = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
elif (arg == '--power-limit' or arg == '-pl'):
|
||||
configuration.power_limit = int(args[i+1])
|
||||
i += 1 # Skip the next iteration
|
||||
|
||||
else:
|
||||
helper_functions.print_help()
|
||||
print(f'Invalid option: {arg}\n\n')
|
||||
raise InvalidOption('The option given was invalid')
|
||||
|
||||
# Change iteration
|
||||
i += 1
|
||||
|
||||
# Organizing the array before sending the configuration
|
||||
configuration.temp_speed_pair.sort(reverse=True)
|
||||
|
||||
validate_config(configuration)
|
||||
|
||||
return configuration
|
||||
638
src/tests.py
638
src/tests.py
@@ -1,320 +1,320 @@
|
||||
import unittest
|
||||
from unittest.mock import Mock
|
||||
import sys
|
||||
import ctypes
|
||||
import parse_args
|
||||
import helper_functions as main_funcs
|
||||
|
||||
# Test command: python.exe .\tests.py -b
|
||||
|
||||
class TestMethods(unittest.TestCase):
|
||||
|
||||
def test_parse_args_inssuficient_args(self):
|
||||
with self.assertRaises(parse_args.InsufficientArgs):
|
||||
parse_args.parse_cmd_args(['.python_script'])
|
||||
|
||||
def test_parse_args_help(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'help'])
|
||||
self.assertEqual( config.action, 'help')
|
||||
|
||||
def test_parse_args_list(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'list'])
|
||||
self.assertEqual( config.action, 'list')
|
||||
|
||||
def test_parse_args_fan_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
|
||||
def test_parse_args_fan_policy(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--auto'])
|
||||
self.assertEqual( config.action, 'fan-policy')
|
||||
|
||||
def test_parse_args_fan_policy_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-policy-info')
|
||||
|
||||
def test_parse_args_fan_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-info')
|
||||
|
||||
def test_parse_args_get_power_limit_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-limit-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'get-power-limit-info')
|
||||
|
||||
def test_parse_args_get_temp_thresholds_limit_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'thresholds-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'get-thresholds-info')
|
||||
|
||||
def test_parse_args_action_power_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '150'])
|
||||
self.assertEqual( config.action, 'power-control')
|
||||
|
||||
def test_parse_args_action_temp_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '-tl', '150'])
|
||||
self.assertEqual( config.action, 'temp-control')
|
||||
|
||||
def test_parse_args_action_control_all(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'control-all', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'control-all')
|
||||
|
||||
def test_parse_args_invalid_action(self):
|
||||
with self.assertRaises(parse_args.InvalidAction):
|
||||
parse_args.parse_cmd_args(['.python_script', 'invalid-action'])
|
||||
|
||||
def test_parse_args_action_ignore_rest(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'help', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'help')
|
||||
self.assertEqual( config.target_gpu, '')
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'list', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'list')
|
||||
self.assertEqual( config.target_gpu, '')
|
||||
|
||||
def test_parse_args_option_gpu_name(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_name, 'RTX 4080')
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_name, 'RTX 3080')
|
||||
|
||||
def test_parse_args_option_gpu_uuid(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--uuid', 'GPU-00000000-0000-0000-0000-000000000000'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_uuid, 'GPU-00000000-0000-0000-0000-000000000000')
|
||||
|
||||
def test_parse_args_option_power_limit(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '--power-limit', '100'])
|
||||
self.assertEqual( config.power_limit, 100)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '100'])
|
||||
self.assertEqual( config.power_limit, 100)
|
||||
|
||||
def test_parse_args_option_acoustic_temp_limit(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '--acoustic-temp-limit', '50'])
|
||||
self.assertEqual( config.acoustic_temp_limit, 50)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '-tl', '50'])
|
||||
self.assertEqual( config.acoustic_temp_limit, 50)
|
||||
|
||||
def test_parse_args_option_single_use(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50', '--single-use'])
|
||||
self.assertEqual( config.single_use, True)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50', '-su'])
|
||||
self.assertEqual( config.single_use, True)
|
||||
|
||||
# Default should be False
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50'])
|
||||
self.assertEqual( config.single_use, False)
|
||||
|
||||
def test_parse_args_option_default_speed(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--default-speed', '36', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.default_speed, 36)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ds', '27', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.default_speed, 27)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertTrue( config.default_speed >= 30) # Fan speed must never default for a value lower than 30%, except for when user explicitly wants to
|
||||
|
||||
def test_parse_args_option_time_interval(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--time-interval', '5', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 5.0)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ti', '0.5', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 0.5)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertTrue( config.time_interval <= 1) # Default should never be higher than 1s, unless the user states so
|
||||
|
||||
def test_parse_args_temp_speed_pair(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '0:0,10:30,20:50,35:75,40:100', '-n', 'RTX 3080'])
|
||||
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(35, 75),
|
||||
parse_args.TempSpeedPair(20, 50),
|
||||
parse_args.TempSpeedPair(10, 30),
|
||||
parse_args.TempSpeedPair(0, 0),
|
||||
]
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-sp', '0:0,10:30,20:50,35:75,40:100', '-n', 'RTX 3080'])
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
def test_parse_args_temp_speed_pair_sort(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '40:100,20:50,10:30,35:75', '-n', 'RTX 3080'])
|
||||
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(35, 75),
|
||||
parse_args.TempSpeedPair(20, 50),
|
||||
parse_args.TempSpeedPair(10, 30),
|
||||
]
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
def test_parse_args_temp_speed_pair_empty_list(self):
|
||||
|
||||
with self.assertRaises(IndexError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair'])
|
||||
|
||||
def test_parse_args_temp_speed_pair_invalid_list(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidNumberSpeedPairParams):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', ''])
|
||||
|
||||
with self.assertRaises(parse_args.InvalidNumberSpeedPairParams):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20'])
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20:10-20'])
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20:10-20,10-20:10-20'])
|
||||
|
||||
def test_parse_args_temp_speed_pair_invalid_fan_speed(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidFanSpeed):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:120'])
|
||||
|
||||
with self.assertRaises(parse_args.InvalidFanSpeed):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:-100'])
|
||||
|
||||
def test_parse_args_dry_run(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--dry-run', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-dr', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
# Default value should always be False
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, False)
|
||||
|
||||
def test_parse_args_fan_policy_auto(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--auto'])
|
||||
self.assertEqual( config.fan_policy, 'automatic')
|
||||
|
||||
def test_parse_args_fan_policy_manual(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--manual'])
|
||||
self.assertEqual( config.fan_policy, 'manual')
|
||||
|
||||
def test_parse_args_invalid_option(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidOption):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--invalid-option', '10:120', '-n', 'RTX 3080'])
|
||||
|
||||
def test_parse_args_real_cmd(self):
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080', '--speed-pair', '0:0,20:35,30:50,40:100', '--time-interval', '0.5'])
|
||||
|
||||
self.assertEqual(config.action, 'fan-control')
|
||||
self.assertEqual(config.gpu_name, 'RTX 4080')
|
||||
self.assertEqual(config.time_interval, 0.5)
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(30, 50),
|
||||
parse_args.TempSpeedPair(20, 35),
|
||||
parse_args.TempSpeedPair(0, 0),
|
||||
]
|
||||
self.assertEqual(config.temp_speed_pair, expected_output)
|
||||
|
||||
def test_parse_args_sane_checks_no_fan_policy(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-policy', '-n', 'RTX 3080'])
|
||||
|
||||
def test_parse_args_sane_checks_no_gpu(self):
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'temp-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-limit-info'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'thresholds-info'])
|
||||
|
||||
def test_parse_args_sane_checks_no_power_limit(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080'])
|
||||
|
||||
def test_parse_args_sane_checks_no_temp_acoustic_limit(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080'])
|
||||
|
||||
|
||||
def test_check_driver_version(self):
|
||||
|
||||
# If the driver starts to return letters, a refactoring will be needed anyways
|
||||
# So I just want to verify that letters reaise erros
|
||||
with self.assertRaises(ValueError):
|
||||
main_funcs.check_driver_version('AAA')
|
||||
|
||||
# Only cares for the major version
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20.20')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20.20aaaaa')
|
||||
|
||||
def test_fan_policy_info_msg(self):
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(0).value)
|
||||
self.assertEqual('Current fan control policy is automatic', msg)
|
||||
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(1).value)
|
||||
self.assertEqual('Current fan control policy is manual', msg)
|
||||
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(100).value)
|
||||
self.assertEqual('Unknown fan control policy', msg)
|
||||
|
||||
# # GPU Functions - I will need to improve the tests later
|
||||
#
|
||||
# def test_gpu_something(self):
|
||||
# # Mocking
|
||||
# import pynvml
|
||||
#
|
||||
# pynvml.nvmlDeviceGetCount = Mock(return_value=1)
|
||||
# pynvml.nvmlDeviceGetHandleByIndex = Mock(return_value=0)
|
||||
# pynvml.nvmlDeviceGetName = Mock(return_value='RTX 3080')
|
||||
#
|
||||
# # Main function
|
||||
# main_funcs.list_gpus()
|
||||
#
|
||||
# # Fail
|
||||
# self.assertTrue(True)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import unittest
|
||||
from unittest.mock import Mock
|
||||
import sys
|
||||
import ctypes
|
||||
import parse_args
|
||||
import helper_functions as main_funcs
|
||||
|
||||
# Test command: python.exe .\tests.py -b
|
||||
|
||||
class TestMethods(unittest.TestCase):
|
||||
|
||||
def test_parse_args_inssuficient_args(self):
|
||||
with self.assertRaises(parse_args.InsufficientArgs):
|
||||
parse_args.parse_cmd_args(['.python_script'])
|
||||
|
||||
def test_parse_args_help(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'help'])
|
||||
self.assertEqual( config.action, 'help')
|
||||
|
||||
def test_parse_args_list(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'list'])
|
||||
self.assertEqual( config.action, 'list')
|
||||
|
||||
def test_parse_args_fan_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
|
||||
def test_parse_args_fan_policy(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--auto'])
|
||||
self.assertEqual( config.action, 'fan-policy')
|
||||
|
||||
def test_parse_args_fan_policy_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-policy-info')
|
||||
|
||||
def test_parse_args_fan_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-info')
|
||||
|
||||
def test_parse_args_get_power_limit_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-limit-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'get-power-limit-info')
|
||||
|
||||
def test_parse_args_get_temp_thresholds_limit_info(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'thresholds-info', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'get-thresholds-info')
|
||||
|
||||
def test_parse_args_action_power_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '150'])
|
||||
self.assertEqual( config.action, 'power-control')
|
||||
|
||||
def test_parse_args_action_temp_control(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '-tl', '150'])
|
||||
self.assertEqual( config.action, 'temp-control')
|
||||
|
||||
def test_parse_args_action_control_all(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'control-all', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'control-all')
|
||||
|
||||
def test_parse_args_invalid_action(self):
|
||||
with self.assertRaises(parse_args.InvalidAction):
|
||||
parse_args.parse_cmd_args(['.python_script', 'invalid-action'])
|
||||
|
||||
def test_parse_args_action_ignore_rest(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'help', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'help')
|
||||
self.assertEqual( config.target_gpu, '')
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'list', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'list')
|
||||
self.assertEqual( config.target_gpu, '')
|
||||
|
||||
def test_parse_args_option_gpu_name(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_name, 'RTX 4080')
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_name, 'RTX 3080')
|
||||
|
||||
def test_parse_args_option_gpu_uuid(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--uuid', 'GPU-00000000-0000-0000-0000-000000000000'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.gpu_uuid, 'GPU-00000000-0000-0000-0000-000000000000')
|
||||
|
||||
def test_parse_args_option_power_limit(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '--power-limit', '100'])
|
||||
self.assertEqual( config.power_limit, 100)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '100'])
|
||||
self.assertEqual( config.power_limit, 100)
|
||||
|
||||
def test_parse_args_option_acoustic_temp_limit(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '--acoustic-temp-limit', '50'])
|
||||
self.assertEqual( config.acoustic_temp_limit, 50)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080', '-tl', '50'])
|
||||
self.assertEqual( config.acoustic_temp_limit, 50)
|
||||
|
||||
def test_parse_args_option_single_use(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50', '--single-use'])
|
||||
self.assertEqual( config.single_use, True)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50', '-su'])
|
||||
self.assertEqual( config.single_use, True)
|
||||
|
||||
# Default should be False
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080', '-pl', '50'])
|
||||
self.assertEqual( config.single_use, False)
|
||||
|
||||
def test_parse_args_option_default_speed(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--default-speed', '36', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.default_speed, 36)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ds', '27', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.default_speed, 27)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertTrue( config.default_speed >= 30) # Fan speed must never default for a value lower than 30%, except for when user explicitly wants to
|
||||
|
||||
def test_parse_args_option_time_interval(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--time-interval', '5', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 5.0)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-ti', '0.5', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertEqual( config.time_interval, 0.5)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual( config.action, 'fan-control')
|
||||
self.assertTrue( config.time_interval <= 1) # Default should never be higher than 1s, unless the user states so
|
||||
|
||||
def test_parse_args_temp_speed_pair(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '0:0,10:30,20:50,35:75,40:100', '-n', 'RTX 3080'])
|
||||
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(35, 75),
|
||||
parse_args.TempSpeedPair(20, 50),
|
||||
parse_args.TempSpeedPair(10, 30),
|
||||
parse_args.TempSpeedPair(0, 0),
|
||||
]
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-sp', '0:0,10:30,20:50,35:75,40:100', '-n', 'RTX 3080'])
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
def test_parse_args_temp_speed_pair_sort(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '40:100,20:50,10:30,35:75', '-n', 'RTX 3080'])
|
||||
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(35, 75),
|
||||
parse_args.TempSpeedPair(20, 50),
|
||||
parse_args.TempSpeedPair(10, 30),
|
||||
]
|
||||
self.assertEqual(expected_output, config.temp_speed_pair)
|
||||
|
||||
def test_parse_args_temp_speed_pair_empty_list(self):
|
||||
|
||||
with self.assertRaises(IndexError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair'])
|
||||
|
||||
def test_parse_args_temp_speed_pair_invalid_list(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidNumberSpeedPairParams):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', ''])
|
||||
|
||||
with self.assertRaises(parse_args.InvalidNumberSpeedPairParams):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20'])
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20:10-20'])
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10-20:10-20,10-20:10-20'])
|
||||
|
||||
def test_parse_args_temp_speed_pair_invalid_fan_speed(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidFanSpeed):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:120'])
|
||||
|
||||
with self.assertRaises(parse_args.InvalidFanSpeed):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--speed-pair', '10:-100'])
|
||||
|
||||
def test_parse_args_dry_run(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--dry-run', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-dr', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, True)
|
||||
|
||||
# Default value should always be False
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '-n', 'RTX 3080'])
|
||||
self.assertEqual(config.dry_run, False)
|
||||
|
||||
def test_parse_args_fan_policy_auto(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--auto'])
|
||||
self.assertEqual( config.fan_policy, 'automatic')
|
||||
|
||||
def test_parse_args_fan_policy_manual(self):
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-policy', '--name', 'RTX 4080', '--manual'])
|
||||
self.assertEqual( config.fan_policy, 'manual')
|
||||
|
||||
def test_parse_args_invalid_option(self):
|
||||
|
||||
with self.assertRaises(parse_args.InvalidOption):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control', '--invalid-option', '10:120', '-n', 'RTX 3080'])
|
||||
|
||||
def test_parse_args_real_cmd(self):
|
||||
|
||||
config = parse_args.parse_cmd_args(['.python_script', 'fan-control', '--name', 'RTX 4080', '--speed-pair', '0:0,20:35,30:50,40:100', '--time-interval', '0.5'])
|
||||
|
||||
self.assertEqual(config.action, 'fan-control')
|
||||
self.assertEqual(config.gpu_name, 'RTX 4080')
|
||||
self.assertEqual(config.time_interval, 0.5)
|
||||
expected_output = [
|
||||
parse_args.TempSpeedPair(40, 100),
|
||||
parse_args.TempSpeedPair(30, 50),
|
||||
parse_args.TempSpeedPair(20, 35),
|
||||
parse_args.TempSpeedPair(0, 0),
|
||||
]
|
||||
self.assertEqual(config.temp_speed_pair, expected_output)
|
||||
|
||||
def test_parse_args_sane_checks_no_fan_policy(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-policy', '-n', 'RTX 3080'])
|
||||
|
||||
def test_parse_args_sane_checks_no_gpu(self):
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'fan-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'temp-control'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-limit-info'])
|
||||
|
||||
# No target gpu
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'thresholds-info'])
|
||||
|
||||
def test_parse_args_sane_checks_no_power_limit(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'power-control', '--name', 'RTX 4080'])
|
||||
|
||||
def test_parse_args_sane_checks_no_temp_acoustic_limit(self):
|
||||
|
||||
# No fan policy
|
||||
with self.assertRaises(parse_args.InvalidConfig):
|
||||
parse_args.parse_cmd_args(['.python_script', 'temp-control', '--name', 'RTX 4080'])
|
||||
|
||||
|
||||
def test_check_driver_version(self):
|
||||
|
||||
# If the driver starts to return letters, a refactoring will be needed anyways
|
||||
# So I just want to verify that letters reaise erros
|
||||
with self.assertRaises(ValueError):
|
||||
main_funcs.check_driver_version('AAA')
|
||||
|
||||
# Only cares for the major version
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20.20')
|
||||
|
||||
with self.assertRaises(main_funcs.UnsupportedDriverVersion):
|
||||
main_funcs.check_driver_version('515.20.20.20aaaaa')
|
||||
|
||||
def test_fan_policy_info_msg(self):
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(0).value)
|
||||
self.assertEqual('Current fan control policy is automatic', msg)
|
||||
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(1).value)
|
||||
self.assertEqual('Current fan control policy is manual', msg)
|
||||
|
||||
msg = main_funcs.fan_policy_info_msg(ctypes.c_uint(100).value)
|
||||
self.assertEqual('Unknown fan control policy', msg)
|
||||
|
||||
# # GPU Functions - I will need to improve the tests later
|
||||
#
|
||||
# def test_gpu_something(self):
|
||||
# # Mocking
|
||||
# import pynvml
|
||||
#
|
||||
# pynvml.nvmlDeviceGetCount = Mock(return_value=1)
|
||||
# pynvml.nvmlDeviceGetHandleByIndex = Mock(return_value=0)
|
||||
# pynvml.nvmlDeviceGetName = Mock(return_value='RTX 3080')
|
||||
#
|
||||
# # Main function
|
||||
# main_funcs.list_gpus()
|
||||
#
|
||||
# # Fail
|
||||
# self.assertTrue(True)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user