1 Commits

Author SHA1 Message Date
Oscar Krause
f87e75c471 fixes 2024-01-18 13:39:51 +01:00
20 changed files with 535 additions and 933 deletions

View File

@@ -2,7 +2,7 @@ Package: fastapi-dls
Version: 0.0
Architecture: all
Maintainer: Oscar Krause oscar.krause@collinwebdesigns.de
Depends: python3, python3-fastapi, python3-uvicorn, python3-dotenv, python3-dateutil, python3-josepy, python3-sqlalchemy, python3-cryptography, python3-markdown, uvicorn, openssl
Depends: python3, python3-fastapi, python3-uvicorn, python3-dotenv, python3-dateutil, python3-jose, python3-sqlalchemy, python3-pycryptodome, python3-markdown, uvicorn, openssl
Recommends: curl
Installed-Size: 10240
Homepage: https://git.collinwebdesigns.de/oscar.krause/fastapi-dls

View File

@@ -1,8 +1,8 @@
# https://packages.debian.org/hu/
fastapi==0.92.0
uvicorn[standard]==0.17.6
python-jose[cryptography]==3.3.0
cryptography==38.0.4
python-jose[pycryptodome]==3.3.0
pycryptodome==3.11.0
python-dateutil==2.8.2
sqlalchemy==1.4.46
markdown==3.4.1

View File

@@ -0,0 +1,10 @@
# https://packages.ubuntu.com
fastapi==0.91.0
uvicorn[standard]==0.15.0
python-jose[pycryptodome]==3.3.0
pycryptodome==3.11.0
python-dateutil==2.8.2
sqlalchemy==1.4.46
markdown==3.4.3
python-dotenv==0.21.0
jinja2==3.1.2

View File

@@ -0,0 +1,10 @@
# https://packages.ubuntu.com
fastapi==0.101.0
uvicorn[standard]==0.23.2
python-jose[pycryptodome]==3.3.0
pycryptodome==3.11.0
python-dateutil==2.8.2
sqlalchemy==1.4.47
markdown==3.4.4
python-dotenv==1.0.0
jinja2==3.1.2

View File

@@ -1,10 +0,0 @@
# https://packages.ubuntu.com
fastapi==0.101.0
uvicorn[standard]==0.27.1
python-jose[cryptography]==3.3.0
cryptography==41.0.7
python-dateutil==2.8.2
sqlalchemy==1.4.50
markdown==3.5.2
python-dotenv==1.0.1
jinja2==3.1.2

View File

@@ -1,10 +0,0 @@
# https://packages.ubuntu.com
fastapi==0.110.3
uvicorn[standard]==0.30.3
python-jose[cryptography]==3.3.0
cryptography==42.0.5
python-dateutil==2.9.0
sqlalchemy==2.0.32
markdown==3.6
python-dotenv==1.0.1
jinja2==3.1.3

View File

@@ -8,11 +8,11 @@ pkgdesc='NVIDIA DLS server implementation with FastAPI'
arch=('any')
url='https://git.collinwebdesigns.de/oscar.krause/fastapi-dls'
license=('MIT')
depends=('python' 'python-jose' 'python-starlette' 'python-httpx' 'python-fastapi' 'python-dotenv' 'python-dateutil' 'python-sqlalchemy' 'python-cryptography' 'uvicorn' 'python-markdown' 'openssl')
depends=('python' 'python-jose' 'python-starlette' 'python-httpx' 'python-fastapi' 'python-dotenv' 'python-dateutil' 'python-sqlalchemy' 'python-pycryptodome' 'uvicorn' 'python-markdown' 'openssl')
provider=("$pkgname")
install="$pkgname.install"
backup=('etc/default/fastapi-dls')
source=("git+file://${CI_PROJECT_DIR}"
source=('git+file:///tmp/builds/dUqn6z-s/1/oscar.krause/fastapi-dls' # https://gitea.publichub.eu/oscar.krause/fastapi-dls.git
"$pkgname.default"
"$pkgname.service"
"$pkgname.tmpfiles")
@@ -39,7 +39,7 @@ check() {
package() {
install -d "$pkgdir/usr/share/doc/$pkgname"
install -d "$pkgdir/var/lib/$pkgname/cert"
#cp -r "$srcdir/$pkgname/doc"/* "$pkgdir/usr/share/doc/$pkgname/"
cp -r "$srcdir/$pkgname/doc"/* "$pkgdir/usr/share/doc/$pkgname/"
install -Dm644 "$srcdir/$pkgname/README.md" "$pkgdir/usr/share/doc/$pkgname/README.md"
install -Dm644 "$srcdir/$pkgname/version.env" "$pkgdir/usr/share/doc/$pkgname/version.env"

2
.gitignore vendored
View File

@@ -1,6 +1,6 @@
.DS_Store
venv/
.idea/
*.sqlite
app/*.sqlite*
app/cert/*.*
.pytest_cache

View File

@@ -16,12 +16,11 @@ build:docker:
interruptible: true
stage: build
rules:
# deployment is in "deploy:docker:"
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
- app/**/*
- Dockerfile
- requirements.txt
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
tags: [ docker ]
before_script:
- docker buildx inspect
@@ -44,13 +43,16 @@ build:apt:
- if: $CI_COMMIT_TAG
variables:
VERSION: $CI_COMMIT_REF_NAME
- if: ($CI_PIPELINE_SOURCE == 'merge_request_event') || ($CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH)
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
- app/**/*
- .DEBIAN/**/*
- .gitlab-ci.yml
variables:
VERSION: "0.0.1"
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
variables:
VERSION: "0.0.1"
before_script:
- echo -e "VERSION=$VERSION\nCOMMIT=$CI_COMMIT_SHA" > version.env
# install build dependencies
@@ -91,13 +93,16 @@ build:pacman:
- if: $CI_COMMIT_TAG
variables:
VERSION: $CI_COMMIT_REF_NAME
- if: ($CI_PIPELINE_SOURCE == 'merge_request_event') || ($CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH)
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
- app/**/*
- .PKGBUILD/**/*
- .gitlab-ci.yml
variables:
VERSION: "0.0.1"
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
variables:
VERSION: "0.0.1"
before_script:
#- echo -e "VERSION=$VERSION\nCOMMIT=$CI_COMMIT_SHA" > version.env
# install build dependencies
@@ -120,12 +125,13 @@ build:pacman:
paths:
- "*.pkg.tar.zst"
test:python:
image: $IMAGE
test:
image: python:3.11-slim-bookworm
stage: test
interruptible: true
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_TAG
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
@@ -135,21 +141,14 @@ test:python:
DATABASE: sqlite:///../app/db.sqlite
parallel:
matrix:
- IMAGE:
# https://devguide.python.org/versions/#supported-versions
# - python:3.14-rc-alpine # EOL 2030-10 => uvicorn does not support 3.14 yet
- python:3.13-alpine # EOL 2029-10
- python:3.12-alpine # EOL 2028-10
- python:3.11-alpine # EOL 2027-10
# - python:3.10-alpine # EOL 2026-10 => ImportError: cannot import name 'UTC' from 'datetime'
# - python:3.9-alpine # EOL 2025-10 => ImportError: cannot import name 'UTC' from 'datetime'
- REQUIREMENTS:
- requirements.txt
- .DEBIAN/requirements-bookworm-12.txt
- .DEBIAN/requirements-ubuntu-23.10.txt
before_script:
- apk --no-cache add openssl
- python3 -m venv venv
- source venv/bin/activate
- pip install --upgrade pip
- pip install -r requirements.txt
- pip install pytest pytest-cov pytest-custom_exit_code httpx
- apt-get update && apt-get install -y python3-dev gcc
- pip install -r $REQUIREMENTS
- pip install pytest httpx
- mkdir -p app/cert
- openssl genrsa -out app/cert/instance.private.pem 2048
- openssl rsa -in app/cert/instance.private.pem -outform PEM -pubout -out app/cert/instance.public.pem
@@ -158,26 +157,17 @@ test:python:
- python -m pytest main.py --junitxml=report.xml
artifacts:
reports:
dotenv: version.env
junit: ['**/report.xml']
test:apt:
image: $IMAGE
.test:linux:
stage: test
rules:
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
- app/**/*
- .DEBIAN/**/*
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
parallel:
matrix:
- IMAGE:
- debian:trixie-slim # EOL: t.b.a.
- debian:bookworm-slim # EOL: June 06, 2026
- debian:bookworm-slim # EOL: June 06, 2026
- ubuntu:24.04 # EOL: April 2036
- ubuntu:24.10
needs:
- job: build:apt
artifacts: true
@@ -209,15 +199,22 @@ test:apt:
- apt-get purge -qq -y fastapi-dls
- apt-get autoremove -qq -y && apt-get clean -qq
test:pacman:archlinux:
test:debian:
extends: .test:linux
image: debian:bookworm-slim
test:ubuntu:
extends: .test:linux
image: ubuntu:23.10
test:archlinux:
image: archlinux:base
rules:
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
- if: $CI_COMMIT_BRANCH && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH
changes:
- app/**/*
- .PKGBUILD/**/*
- .gitlab-ci.yml
- if: $CI_PIPELINE_SOURCE == 'merge_request_event'
needs:
- job: build:pacman
artifacts: true
@@ -251,7 +248,7 @@ semgrep-sast:
test_coverage:
# extends: test
image: python:3.12-slim-bookworm
image: python:3.11-slim-bookworm
allow_failure: true
stage: test
rules:
@@ -261,19 +258,19 @@ test_coverage:
before_script:
- apt-get update && apt-get install -y python3-dev gcc
- pip install -r requirements.txt
- pip install pytest pytest-cov pytest-custom_exit_code httpx
- pip install pytest httpx
- mkdir -p app/cert
- openssl genrsa -out app/cert/instance.private.pem 2048
- openssl rsa -in app/cert/instance.private.pem -outform PEM -pubout -out app/cert/instance.public.pem
- cd test
script:
- coverage run -m pytest main.py --junitxml=report.xml --suppress-no-test-exit-code
- pip install pytest pytest-cov
- coverage run -m pytest main.py
- coverage report
- coverage xml
coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
artifacts:
reports:
junit: [ '**/report.xml' ]
coverage_report:
coverage_format: cobertura
path: '**/coverage.xml'
@@ -292,20 +289,19 @@ gemnasium-python-dependency_scanning:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
deploy:docker:
image: docker:dind
stage: deploy
tags: [ docker ]
.deploy:
rules:
- if: $CI_COMMIT_TAG
deploy:docker:
extends: .deploy
stage: deploy
before_script:
- echo "Building docker image for commit $CI_COMMIT_SHA with version $CI_COMMIT_REF_NAME"
- docker buildx inspect
- docker buildx create --use
script:
- echo "========== GitLab-Registry =========="
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
- IMAGE=$CI_REGISTRY/$CI_PROJECT_PATH
- IMAGE=$CI_REGISTRY/$CI_PROJECT_PATH/$CI_COMMIT_REF_NAME
- docker buildx build --progress=plain --platform $DOCKER_BUILDX_PLATFORM --build-arg VERSION=$CI_COMMIT_REF_NAME --build-arg COMMIT=$CI_COMMIT_SHA --tag $IMAGE:$CI_COMMIT_REF_NAME --push .
- docker buildx build --progress=plain --platform $DOCKER_BUILDX_PLATFORM --build-arg VERSION=$CI_COMMIT_REF_NAME --build-arg COMMIT=$CI_COMMIT_SHA --tag $IMAGE:latest --push .
- echo "========== Docker-Hub =========="
@@ -316,10 +312,9 @@ deploy:docker:
deploy:apt:
# doc: https://git.collinwebdesigns.de/help/user/packages/debian_repository/index.md#install-a-package
extends: .deploy
image: debian:bookworm-slim
stage: deploy
rules:
- if: $CI_COMMIT_TAG
needs:
- job: build:apt
artifacts: true
@@ -356,10 +351,9 @@ deploy:apt:
- 'curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ${EXPORT_NAME} "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/${PACKAGE_NAME}/${PACKAGE_VERSION}/${EXPORT_NAME}"'
deploy:pacman:
extends: .deploy
image: archlinux:base-devel
stage: deploy
rules:
- if: $CI_COMMIT_TAG
needs:
- job: build:pacman
artifacts: true
@@ -380,7 +374,7 @@ deploy:pacman:
release:
image: registry.gitlab.com/gitlab-org/release-cli:latest
stage: .post
needs: [ build:docker, build:apt, build:pacman ]
needs: [ test ]
rules:
- if: $CI_COMMIT_TAG
script:

View File

@@ -1,4 +1,4 @@
FROM python:3.12-alpine
FROM python:3.11-alpine
ARG VERSION
ARG COMMIT=""
@@ -10,7 +10,7 @@ RUN apk update \
&& apk add --no-cache --virtual build-deps gcc g++ python3-dev musl-dev pkgconfig \
&& apk add --no-cache curl postgresql postgresql-dev mariadb-dev sqlite-dev \
&& pip install --no-cache-dir --upgrade uvicorn \
&& pip install --no-cache-dir psycopg2==2.9.10 mysqlclient==2.2.7 pysqlite3==0.5.4 \
&& pip install --no-cache-dir psycopg2==2.9.6 mysqlclient==2.2.0 pysqlite3==0.5.1 \
&& pip install --no-cache-dir -r /tmp/requirements.txt \
&& apk del build-deps

17
FAQ.md Normal file
View File

@@ -0,0 +1,17 @@
# FAQ
## `Failed to acquire license from <ip> (Info: <license> - Error: The allowed time to process response has expired)`
- Did your timezone settings are correct on fastapi-dls **and your guest**?
- Did you download the client-token more than an hour ago?
Please download a new client-token. The guest have to register within an hour after client-token was created.
## `jose.exceptions.JWTError: Signature verification failed.`
- Did you recreated `instance.public.pem` / `instance.private.pem`?
Then you have to download a **new** client-token on each of your guests.

203
README.md
View File

@@ -2,37 +2,22 @@
Minimal Delegated License Service (DLS).
> [!note] Compatibility
> Compatibility tested with official NLS 2.0.1, 2.1.0, 3.1.0, 3.3.1, 3.4.0. For Driver compatibility
> see [compatibility matrix](#vgpu-software-compatibility-matrix).
> [!warning] 18.x Drivers are not yet supported!
> Drivers are only supported until **17.x releases**.
Compatibility tested with official NLS 2.0.1, 2.1.0, 3.1.0. For Driver compatibility see [here](#setup-client).
This service can be used without internet connection.
Only the clients need a connection to this service on configured port.
**Official Links**
* https://git.collinwebdesigns.de/oscar.krause/fastapi-dls (Private Git)
* https://gitea.publichub.eu/oscar.krause/fastapi-dls (Public Git)
* https://hub.docker.com/r/collinwebdesigns/fastapi-dls (Docker-Hub `collinwebdesigns/fastapi-dls:latest`)
- https://git.collinwebdesigns.de/oscar.krause/fastapi-dls (Private Git)
- https://gitea.publichub.eu/oscar.krause/fastapi-dls (Public Git)
- https://hub.docker.com/r/collinwebdesigns/fastapi-dls (Docker-Hub `collinwebdesigns/fastapi-dls:latest`)
*All other repositories are forks! (which is no bad - just for information and bug reports)*
[Releases & Release Notes](https://git.collinwebdesigns.de/oscar.krause/fastapi-dls/-/releases)
**Further Reading**
* [NVIDIA vGPU Guide](https://gitlab.com/polloloco/vgpu-proxmox) - This document serves as a guide to install NVIDIA vGPU host drivers on the latest Proxmox VE version
* [vgpu_unlock](https://github.com/DualCoder/vgpu_unlock) - Unlock vGPU functionality for consumer-grade Nvidia GPUs.
* [vGPU_Unlock Wiki](https://docs.google.com/document/d/1pzrWJ9h-zANCtyqRgS7Vzla0Y8Ea2-5z2HEi4X75d2Q) - Guide for `vgpu_unlock`
* [Proxmox 8 vGPU in VMs and LXC Containers](https://medium.com/@dionisievldulrincz/proxmox-8-vgpu-in-vms-and-lxc-containers-4146400207a3) - Install *Merged Drivers* for using in Proxmox VMs and LXCs
* [Proxmox All-In-One Installer Script](https://wvthoog.nl/proxmox-vgpu-v3/) - Also known as `proxmox-installer.sh`
---
[TOC]
[[_TOC_]]
# Setup (Service)
@@ -48,9 +33,6 @@ Tested with Ubuntu 22.10 (EOL!) (from Proxmox templates), actually its consuming
- Make sure your timezone is set correct on you fastapi-dls server and your client
This guide does not show how to install vGPU host drivers! Look at the official documentation packed with the driver
releases.
## Docker
Docker-Images are available here for Intel (x86), AMD (amd64) and ARM (arm64):
@@ -86,7 +68,7 @@ docker run -e DLS_URL=`hostname -i` -e DLS_PORT=443 -p 443:443 -v $WORKING_DIR:/
See [`examples`](examples) directory for more advanced examples (with reverse proxy usage).
> Adjust `REQUIRED` variables as needed
> Adjust *REQUIRED* variables as needed
```yaml
version: '3.9'
@@ -120,7 +102,7 @@ volumes:
dls-db:
```
## Debian / Ubuntu / macOS (manual method using `git clone` and python virtual environment)
## Debian/Ubuntu/macOS (manual method using `git clone` and python virtual environment)
Tested on `Debian 11 (bullseye)`, `Debian 12 (bookworm)` and `macOS Ventura (13.6)`, Ubuntu may also work.
**Please note that setup on macOS differs from Debian based systems.**
@@ -327,20 +309,18 @@ EOF
Now you have to run `systemctl daemon-reload`. After that you can start service
with `systemctl start fastapi-dls.service` and enable autostart with `systemctl enable fastapi-dls.service`.
## Debian / Ubuntu (using `dpkg` / `apt`)
## Debian/Ubuntu (using `dpkg`)
Packages are available here:
- [GitLab-Registry](https://git.collinwebdesigns.de/oscar.krause/fastapi-dls/-/packages)
Successful tested with (**LTS Version**):
Successful tested with:
- **Debian 12 (Bookworm)** (EOL: June 06, 2026)
- *Ubuntu 22.10 (Kinetic Kudu)* (EOL: July 20, 2023)
- *Ubuntu 23.04 (Lunar Lobster)* (EOL: January 2024)
- *Ubuntu 23.10 (Mantic Minotaur)* (EOL: July 2024)
- **Ubuntu 24.04 (Noble Numbat)** (EOL: Apr 2029)
- *Ubuntu 24.10 (Oracular Oriole)* (EOL: Jul 2025)
- Debian 12 (Bookworm)
- Ubuntu 22.10 (Kinetic Kudu) (EOL: July 20, 2023)
- Ubuntu 23.04 (Lunar Lobster) (EOL: January 2024)
- Ubuntu 23.10 (Mantic Minotaur) (EOL: July 2024)
Not working with:
@@ -398,13 +378,6 @@ Now you have to edit `/etc/default/fastapi-dls` as needed.
Continue [here](#unraid-guest) for docker guest setup.
## NixOS
Tanks to [@mrzenc](https://github.com/mrzenc) for [fastapi-dls-nixos](https://github.com/mrzenc/fastapi-dls-nixos).
> [!note] Native NixOS-Package
> There is a [pull request](https://github.com/NixOS/nixpkgs/pull/358647) which adds fastapi-dls into nixpkgs.
## Let's Encrypt Certificate (optional)
If you're using installation via docker, you can use `traefik`. Please refer to their documentation.
@@ -423,36 +396,49 @@ After first success you have to replace `--issue` with `--renew`.
# Configuration
| Variable | Default | Usage |
|--------------------------|----------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|
| `DEBUG` | `false` | Toggles `fastapi` debug mode |
| `DLS_URL` | `localhost` | Used in client-token to tell guest driver where dls instance is reachable |
| `DLS_PORT` | `443` | Used in client-token to tell guest driver where dls instance is reachable |
| `TOKEN_EXPIRE_DAYS` | `1` | Client auth-token validity (used for authenticate client against api, **not `.tok` file!**) |
| `LEASE_EXPIRE_DAYS` | `90` | Lease time in days |
| `LEASE_RENEWAL_PERIOD` | `0.15` | The percentage of the lease period that must elapse before a licensed client can renew a license \*1 |
| `DATABASE` | `sqlite:///db.sqlite` | See [official SQLAlchemy docs](https://docs.sqlalchemy.org/en/14/core/engines.html) |
| `CORS_ORIGINS` | `https://{DLS_URL}` | Sets `Access-Control-Allow-Origin` header (comma separated string) \*2 |
| `SITE_KEY_XID` | `00000000-0000-0000-0000-000000000000` | Site identification uuid |
| `INSTANCE_REF` | `10000000-0000-0000-0000-000000000001` | Instance identification uuid |
| `ALLOTMENT_REF` | `20000000-0000-0000-0000-000000000001` | Allotment identification uuid |
| `INSTANCE_KEY_RSA` | `<app-dir>/cert/instance.private.pem` | Site-wide private RSA key for singing JWTs \*3 |
| `INSTANCE_KEY_PUB` | `<app-dir>/cert/instance.public.pem` | Site-wide public key \*3 |
| Variable | Default | Usage |
|------------------------|----------------------------------------|------------------------------------------------------------------------------------------------------|
| `DEBUG` | `false` | Toggles `fastapi` debug mode |
| `DLS_URL` | `localhost` | Used in client-token to tell guest driver where dls instance is reachable |
| `DLS_PORT` | `443` | Used in client-token to tell guest driver where dls instance is reachable |
| `TOKEN_EXPIRE_DAYS` | `1` | Client auth-token validity (used for authenticate client against api, **not `.tok` file!**) |
| `LEASE_EXPIRE_DAYS` | `90` | Lease time in days |
| `LEASE_RENEWAL_PERIOD` | `0.15` | The percentage of the lease period that must elapse before a licensed client can renew a license \*1 |
| `DATABASE` | `sqlite:///db.sqlite` | See [official SQLAlchemy docs](https://docs.sqlalchemy.org/en/14/core/engines.html) |
| `CORS_ORIGINS` | `https://{DLS_URL}` | Sets `Access-Control-Allow-Origin` header (comma separated string) \*2 |
| `SITE_KEY_XID` | `00000000-0000-0000-0000-000000000000` | Site identification uuid |
| `INSTANCE_REF` | `10000000-0000-0000-0000-000000000001` | Instance identification uuid |
| `ALLOTMENT_REF` | `20000000-0000-0000-0000-000000000001` | Allotment identification uuid |
| `INSTANCE_KEY_RSA` | `<app-dir>/cert/instance.private.pem` | Site-wide private RSA key for singing JWTs \*3 |
| `INSTANCE_KEY_PUB` | `<app-dir>/cert/instance.public.pem` | Site-wide public key \*3 |
\*1 For example, if the lease period is one day and the renewal period is 20%, the client attempts to renew its license
every 4.8 hours. If network connectivity is lost, the loss of connectivity is detected during license renewal and the
client has 19.2 hours in which to re-establish connectivity before its license expires.
\*2 Always use `https`, since guest-drivers only support secure connections!
\*3 Always use `https`, since guest-drivers only support secure connections!
\*3 If you recreate your instance keys you need to **recreate client-token for each guest**!
\*4 If you recreate instance keys you need to **recreate client-token for each guest**!
# Setup (Client)
**The token file has to be copied! It's not enough to C&P file contents, because there can be special characters.**
This guide does not show how to install vGPU guest drivers! Look at the official documentation packed with the driver
releases.
Successfully tested with this package versions:
| vGPU Suftware | Linux vGPU Manager | Linux Driver | Windows Driver | Release Date |
|---------------|--------------------|--------------|----------------|---------------|
| `16.2` | `535.129.03` | `535.129.03` | `537.70` | October 2023 |
| `16.1` | `535.104.06` | `535.104.05` | `537.13` | August 2023 |
| `16.0` | `535.54.06` | `535.54.03` | `536.22` | July 2023 |
| `15.3` | `525.125.03` | `525.125.06` | `529.11` | June 2023 |
| `15.2` | `525.105.14` | `525.105.17` | `528.89` | March 2023 |
| `15.1` | `525.85.07` | `525.85.05` | `528.24` | January 2023 |
| `15.0` | `525.60.12` | `525.60.13` | `527.41` | December 2022 |
| `14.4` | `510.108.03` | `510.108.03` | `514.08` | December 2022 |
| `14.3` | `510.108.03` | `510.108.03` | `513.91` | November 2022 |
- https://docs.nvidia.com/grid/index.html
## Linux
@@ -528,32 +514,33 @@ Done. For more information check [troubleshoot section](#troubleshoot).
8. Set schedule to `At First Array Start Only`
9. Click on Apply
# API Endpoints
# Endpoints
<details>
<summary>show</summary>
**`GET /`**
### `GET /`
Redirect to `/-/readme`.
**`GET /-/health`**
### `GET /-/health`
Status endpoint, used for *healthcheck*.
**`GET /-/config`**
### `GET /-/config`
Shows current runtime environment variables and their values.
**`GET /-/readme`**
### `GET /-/readme`
HTML rendered README.md.
**`GET /-/manage`**
### `GET /-/manage`
Shows a very basic UI to delete origins or leases.
**`GET /-/origins?leases=false`**
### `GET /-/origins?leases=false`
List registered origins.
@@ -561,11 +548,11 @@ List registered origins.
|-----------------|---------|--------------------------------------|
| `leases` | `false` | Include referenced leases per origin |
**`DELETE /-/origins`**
### `DELETE /-/origins`
Deletes all origins and their leases.
**`GET /-/leases?origin=false`**
### `GET /-/leases?origin=false`
List current leases.
@@ -573,20 +560,20 @@ List current leases.
|-----------------|---------|-------------------------------------|
| `origin` | `false` | Include referenced origin per lease |
**`DELETE /-/lease/{lease_ref}`**
### `DELETE /-/lease/{lease_ref}`
Deletes an lease.
**`GET /-/client-token`**
### `GET /-/client-token`
Generate client token, (see [installation](#installation)).
**Others**
### Others
There are many other internal api endpoints for handling authentication and lease process.
</details>
# Troubleshoot / Debug
# Troubleshoot
**Please make sure that fastapi-dls and your guests are on the same timezone!**
@@ -606,26 +593,11 @@ Logs are available in `C:\Users\Public\Documents\Nvidia\LoggingLog.NVDisplay.Con
# Known Issues
## Generic
### `Failed to acquire license from <ip> (Info: <license> - Error: The allowed time to process response has expired)`
- Did your timezone settings are correct on fastapi-dls **and your guest**?
- Did you download the client-token more than an hour ago?
Please download a new client-token. The guest have to register within an hour after client-token was created.
### `jose.exceptions.JWTError: Signature verification failed.`
- Did you recreate `instance.public.pem` / `instance.private.pem`?
Then you have to download a **new** client-token on each of your guests.
## Linux
### Invalid HTTP request
### `uvicorn.error:Invalid HTTP request received.`
This error message: `uvicorn.error:Invalid HTTP request received.` can be ignored.
This message can be ignored.
- Ref. https://github.com/encode/uvicorn/issues/441
@@ -751,57 +723,12 @@ The error message can safely be ignored (since we have no license limitation :P)
</details>
# vGPU Software Compatibility Matrix
**18.x Drivers are not supported on FastAPI-DLS Versions < 1.6.0**
<details>
<summary>Show Table</summary>
Successfully tested with this package versions.
| vGPU Suftware | Driver Branch | Linux vGPU Manager | Linux Driver | Windows Driver | Release Date | EOL Date |
|:-------------:|:-------------:|--------------------|--------------|----------------|--------------:|--------------:|
| `17.5` | R550 | `550.144.02` | `550.144.03` | `553.62` | January 2025 | June 2025 |
| `17.4` | R550 | `550.127.06` | `550.127.05` | `553.24` | October 2024 | |
| `17.3` | R550 | `550.90.05` | `550.90.07` | `552.74` | July 2024 | |
| `17.2` | R550 | `550.90.05` | `550.90.07` | `552.55` | June 2024 | |
| `17.1` | R550 | `550.54.16` | `550.54.15` | `551.78` | March 2024 | |
| `17.0` | R550 | `550.54.10` | `550.54.14` | `551.61` | February 2024 | |
| `16.9` | R535 | `535.230.02` | `535.216.01` | `539.19` | October 2024 | July 2026 |
| `16.8` | R535 | `535.216.01` | `535.216.01` | `538.95` | October 2024 | |
| `16.7` | R535 | `535.183.04` | `535.183.06` | `538.78` | July 2024 | |
| `16.6` | R535 | `535.183.04` | `535.183.01` | `538.67` | June 2024 | |
| `16.5` | R535 | `535.161.05` | `535.161.08` | `538.46` | February 2024 | |
| `16.4` | R535 | `535.161.05` | `535.161.07` | `538.33` | February 2024 | |
| `16.3` | R535 | `535.154.02` | `535.154.05` | `538.15` | January 2024 | |
| `16.2` | R535 | `535.129.03` | `535.129.03` | `537.70` | October 2023 | |
| `16.1` | R535 | `535.104.06` | `535.104.05` | `537.13` | August 2023 | |
| `16.0` | R535 | `535.54.06` | `535.54.03` | `536.22` | July 2023 | |
| `15.4` | R525 | `525.147.01` | `525.147.05` | `529.19` | June 2023 | December 2023 |
| `14.4` | R510 | `510.108.03` | `510.108.03` | `514.08` | December 2022 | February 2023 |
</details>
- https://docs.nvidia.com/grid/index.html
- https://docs.nvidia.com/grid/gpus-supported-by-vgpu.html
*To get the latest drivers, visit Nvidia or search in Discord-Channel `GPU Unlocking` (Server-ID: `829786927829745685`)
on channel `licensing`
# Credits
Thanks to vGPU community and all who uses this project and report bugs.
Special thanks to:
Special thanks to
- `samicrusader` who created build file for **ArchLinux**
- `cyrus` who wrote the section for **openSUSE**
- `midi` who wrote the section for **unRAID**
- `polloloco` who wrote the *[NVIDIA vGPU Guide](https://gitlab.com/polloloco/vgpu-proxmox)*
- `DualCoder` who creates the `vgpu_unlock` functionality [vgpu_unlock](https://github.com/DualCoder/vgpu_unlock)
- `Krutav Shah` who wrote the [vGPU_Unlock Wiki](https://docs.google.com/document/d/1pzrWJ9h-zANCtyqRgS7Vzla0Y8Ea2-5z2HEi4X75d2Q/)
- `Wim van 't Hoog` for the [Proxmox All-In-One Installer Script](https://wvthoog.nl/proxmox-vgpu-v3/)
- `mrzenc` who wrote [fastapi-dls-nixos](https://github.com/mrzenc/fastapi-dls-nixos)
And thanks to all people who contributed to all these libraries!
- @samicrusader who created build file for ArchLinux
- @cyrus who wrote the section for openSUSE
- @midi who wrote the section for unRAID

View File

@@ -1,89 +1,55 @@
import logging
import sys
from base64 import b64encode as b64enc
from calendar import timegm
from contextlib import asynccontextmanager
from datetime import datetime, UTC
from hashlib import sha256
from json import loads as json_loads
from os import getenv as env
from os.path import join, dirname
from uuid import uuid4
from os.path import join, dirname
from os import getenv as env
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
from fastapi import FastAPI
from fastapi.requests import Request
from jose import jws, jwt, JWTError
from json import loads as json_loads
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import timegm
from jose import jws, jwk, jwt, JWTError
from jose.constants import ALGORITHMS
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import StreamingResponse, JSONResponse as JSONr, HTMLResponse as HTMLr, Response, RedirectResponse
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from orm import Origin, Lease, init as db_init, migrate, Instance, Site
from util import load_key, load_file
from orm import Origin, Lease, init as db_init, migrate
# Load variables
load_dotenv('../version.env')
# Get current timezone
TZ = datetime.now().astimezone().tzinfo
# Load basic variables
VERSION, COMMIT, DEBUG = env('VERSION', 'unknown'), env('COMMIT', 'unknown'), bool(env('DEBUG', False))
# Database connection
config = dict(openapi_url=None, docs_url=None, redoc_url=None) # dict(openapi_url='/-/openapi.json', docs_url='/-/docs', redoc_url='/-/redoc')
app = FastAPI(title='FastAPI-DLS', description='Minimal Delegated License Service (DLS).', version=VERSION, **config)
db = create_engine(str(env('DATABASE', 'sqlite:///db.sqlite')))
db_init(db), migrate(db)
# Load DLS variables (all prefixed with "INSTANCE_*" is used as "SERVICE_INSTANCE_*" or "SI_*" in official dls service)
# everything prefixed with "INSTANCE_*" is used as "SERVICE_INSTANCE_*" or "SI_*" in official dls service
DLS_URL = str(env('DLS_URL', 'localhost'))
DLS_PORT = int(env('DLS_PORT', '443'))
SITE_KEY_XID = str(env('SITE_KEY_XID', '00000000-0000-0000-0000-000000000000'))
INSTANCE_REF = str(env('INSTANCE_REF', '10000000-0000-0000-0000-000000000001'))
ALLOTMENT_REF = str(env('ALLOTMENT_REF', '20000000-0000-0000-0000-000000000001'))
INSTANCE_KEY_RSA = load_key(str(env('INSTANCE_KEY_RSA', join(dirname(__file__), 'cert/instance.private.pem'))))
INSTANCE_KEY_PUB = load_key(str(env('INSTANCE_KEY_PUB', join(dirname(__file__), 'cert/instance.public.pem'))))
TOKEN_EXPIRE_DELTA = relativedelta(days=int(env('TOKEN_EXPIRE_DAYS', 1)), hours=int(env('TOKEN_EXPIRE_HOURS', 0)))
LEASE_EXPIRE_DELTA = relativedelta(days=int(env('LEASE_EXPIRE_DAYS', 90)), hours=int(env('LEASE_EXPIRE_HOURS', 0)))
LEASE_RENEWAL_PERIOD = float(env('LEASE_RENEWAL_PERIOD', 0.15))
LEASE_RENEWAL_DELTA = timedelta(days=int(env('LEASE_EXPIRE_DAYS', 90)), hours=int(env('LEASE_EXPIRE_HOURS', 0)))
CLIENT_TOKEN_EXPIRE_DELTA = relativedelta(years=12)
CORS_ORIGINS = str(env('CORS_ORIGINS', '')).split(',') if (env('CORS_ORIGINS')) else [f'https://{DLS_URL}']
ALLOTMENT_REF = str(env('ALLOTMENT_REF', '20000000-0000-0000-0000-000000000001')) # todo
# Logging
LOG_LEVEL = logging.DEBUG if DEBUG else logging.INFO
logging.basicConfig(format='[{levelname:^7}] [{module:^15}] {message}', style='{')
logger = logging.getLogger(__name__)
logger.setLevel(LOG_LEVEL)
logging.getLogger('util').setLevel(LOG_LEVEL)
logging.getLogger('DriverMatrix').setLevel(LOG_LEVEL)
# FastAPI
@asynccontextmanager
async def lifespan(_: FastAPI):
# on startup
default_instance = Instance.get_default_instance(db)
lease_renewal_period = default_instance.lease_renewal_period
lease_renewal_delta = default_instance.get_lease_renewal_delta()
client_token_expire_delta = default_instance.get_client_token_expire_delta()
logger.info(f'''
Using timezone: {str(TZ)}. Make sure this is correct and match your clients!
Your clients will renew their license every {str(Lease.calculate_renewal(lease_renewal_period, lease_renewal_delta))}.
If the renewal fails, the license is valid for {str(lease_renewal_delta)}.
Your client-token file (.tok) is valid for {str(client_token_expire_delta)}.
''')
logger.info(f'Debug is {"enabled" if DEBUG else "disabled"}.')
validate_settings()
yield
# on shutdown
logger.info(f'Shutting down ...')
config = dict(openapi_url=None, docs_url=None, redoc_url=None) # dict(openapi_url='/-/openapi.json', docs_url='/-/docs', redoc_url='/-/redoc')
app = FastAPI(title='FastAPI-DLS', description='Minimal Delegated License Service (DLS).', version=VERSION, lifespan=lifespan, **config)
jwt_encode_key = jwk.construct(INSTANCE_KEY_RSA.export_key().decode('utf-8'), algorithm=ALGORITHMS.RS256)
jwt_decode_key = jwk.construct(INSTANCE_KEY_PUB.export_key().decode('utf-8'), algorithm=ALGORITHMS.RS256)
app.debug = DEBUG
app.add_middleware(
@@ -94,28 +60,17 @@ app.add_middleware(
allow_headers=['*'],
)
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG if DEBUG else logging.INFO)
# Helper
def __get_token(request: Request, jwt_decode_key: "jose.jwt") -> dict:
def __get_token(request: Request) -> dict:
authorization_header = request.headers.get('authorization')
token = authorization_header.split(' ')[1]
return jwt.decode(token=token, key=jwt_decode_key, algorithms=ALGORITHMS.RS256, options={'verify_aud': False})
def validate_settings():
session = sessionmaker(bind=db)()
lease_expire_delta_min, lease_expire_delta_max = 86_400, 7_776_000
for instance in session.query(Instance).all():
lease_expire_delta = instance.lease_expire_delta
if lease_expire_delta < 86_400 or lease_expire_delta > 7_776_000:
logging.warning(f'> [ instance ]: {instance.instance_ref}: "lease_expire_delta" should be between {lease_expire_delta_min} and {lease_expire_delta_max}')
session.close()
# Endpoints
@app.get('/', summary='Index')
async def index():
return RedirectResponse('/-/readme')
@@ -133,20 +88,18 @@ async def _health():
@app.get('/-/config', summary='* Config', description='returns environment variables.')
async def _config():
default_site, default_instance = Site.get_default_site(db), Instance.get_default_instance(db)
return JSONr({
'VERSION': str(VERSION),
'COMMIT': str(COMMIT),
'DEBUG': str(DEBUG),
'DLS_URL': str(DLS_URL),
'DLS_PORT': str(DLS_PORT),
'SITE_KEY_XID': str(default_site.site_key),
'INSTANCE_REF': str(default_instance.instance_ref),
'SITE_KEY_XID': str(SITE_KEY_XID),
'INSTANCE_REF': str(INSTANCE_REF),
'ALLOTMENT_REF': [str(ALLOTMENT_REF)],
'TOKEN_EXPIRE_DELTA': str(default_instance.get_token_expire_delta()),
'LEASE_EXPIRE_DELTA': str(default_instance.get_lease_expire_delta()),
'LEASE_RENEWAL_PERIOD': str(default_instance.lease_renewal_period),
'TOKEN_EXPIRE_DELTA': str(TOKEN_EXPIRE_DELTA),
'LEASE_EXPIRE_DELTA': str(LEASE_EXPIRE_DELTA),
'LEASE_RENEWAL_PERIOD': str(LEASE_RENEWAL_PERIOD),
'CORS_ORIGINS': str(CORS_ORIGINS),
'TZ': str(TZ),
})
@@ -155,8 +108,7 @@ async def _config():
@app.get('/-/readme', summary='* Readme')
async def _readme():
from markdown import markdown
from util import load_file
content = load_file(join(dirname(__file__), '../README.md')).decode('utf-8')
content = load_file('../README.md').decode('utf-8')
return HTMLr(markdown(text=content, extensions=['tables', 'fenced_code', 'md_in_html', 'nl2br', 'toc']))
@@ -205,7 +157,8 @@ async def _origins(request: Request, leases: bool = False):
for origin in session.query(Origin).all():
x = origin.serialize()
if leases:
x['leases'] = list(map(lambda _: _.serialize(), Lease.find_by_origin_ref(db, origin.origin_ref)))
serialize = dict(renewal_period=LEASE_RENEWAL_PERIOD, renewal_delta=LEASE_RENEWAL_DELTA)
x['leases'] = list(map(lambda _: _.serialize(**serialize), Lease.find_by_origin_ref(db, origin.origin_ref)))
response.append(x)
session.close()
return JSONr(response)
@@ -222,7 +175,8 @@ async def _leases(request: Request, origin: bool = False):
session = sessionmaker(bind=db)()
response = []
for lease in session.query(Lease).all():
x = lease.serialize()
serialize = dict(renewal_period=LEASE_RENEWAL_PERIOD, renewal_delta=LEASE_RENEWAL_DELTA)
x = lease.serialize(**serialize)
if origin:
lease_origin = session.query(Origin).filter(Origin.origin_ref == lease.origin_ref).first()
if lease_origin is not None:
@@ -248,14 +202,8 @@ async def _lease_delete(request: Request, lease_ref: str):
# venv/lib/python3.9/site-packages/nls_core_service_instance/service_instance_token_manager.py
@app.get('/-/client-token', summary='* Client-Token', description='creates a new messenger token for this service instance')
async def _client_token():
cur_time = datetime.now(UTC)
default_instance = Instance.get_default_instance(db)
public_key = default_instance.get_public_key()
# todo: implemented request parameter to support different instances
jwt_encode_key = default_instance.get_jwt_encode_key()
exp_time = cur_time + default_instance.get_client_token_expire_delta()
cur_time = datetime.utcnow()
exp_time = cur_time + CLIENT_TOKEN_EXPIRE_DELTA
payload = {
"jti": str(uuid4()),
@@ -268,7 +216,7 @@ async def _client_token():
"scope_ref_list": [ALLOTMENT_REF],
"fulfillment_class_ref_list": [],
"service_instance_configuration": {
"nls_service_instance_ref": default_instance.instance_ref,
"nls_service_instance_ref": INSTANCE_REF,
"svc_port_set_list": [
{
"idx": 0,
@@ -280,10 +228,10 @@ async def _client_token():
},
"service_instance_public_key_configuration": {
"service_instance_public_key_me": {
"mod": hex(public_key.raw().public_numbers().n)[2:],
"exp": int(public_key.raw().public_numbers().e),
"mod": hex(INSTANCE_KEY_PUB.public_key().n)[2:],
"exp": int(INSTANCE_KEY_PUB.public_key().e),
},
"service_instance_public_key_pem": public_key.pem().decode('utf-8'),
"service_instance_public_key_pem": INSTANCE_KEY_PUB.export_key().decode('utf-8'),
"key_retention_mode": "LATEST_ONLY"
},
}
@@ -300,10 +248,10 @@ async def _client_token():
# venv/lib/python3.9/site-packages/nls_services_auth/test/test_origins_controller.py
@app.post('/auth/v1/origin', description='find or create an origin')
async def auth_v1_origin(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.utcnow()
origin_ref = j.get('candidate_origin_ref')
logger.info(f'> [ origin ]: {origin_ref}: {j}')
logging.info(f'> [ origin ]: {origin_ref}: {j}')
data = Origin(
origin_ref=origin_ref,
@@ -330,10 +278,10 @@ async def auth_v1_origin(request: Request):
# venv/lib/python3.9/site-packages/nls_services_auth/test/test_origins_controller.py
@app.post('/auth/v1/origin/update', description='update an origin evidence')
async def auth_v1_origin_update(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.utcnow()
origin_ref = j.get('origin_ref')
logger.info(f'> [ update ]: {origin_ref}: {j}')
logging.info(f'> [ update ]: {origin_ref}: {j}')
data = Origin(
origin_ref=origin_ref,
@@ -357,24 +305,21 @@ async def auth_v1_origin_update(request: Request):
# venv/lib/python3.9/site-packages/nls_core_auth/auth.py - CodeResponse
@app.post('/auth/v1/code', description='get an authorization code')
async def auth_v1_code(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.utcnow()
origin_ref = j.get('origin_ref')
logger.info(f'> [ code ]: {origin_ref}: {j}')
logging.info(f'> [ code ]: {origin_ref}: {j}')
delta = relativedelta(minutes=15)
expires = cur_time + delta
default_site = Site.get_default_site(db)
jwt_encode_key = Instance.get_default_instance(db).get_jwt_encode_key()
payload = {
'iat': timegm(cur_time.timetuple()),
'exp': timegm(expires.timetuple()),
'challenge': j.get('code_challenge'),
'origin_ref': j.get('origin_ref'),
'key_ref': default_site.site_key,
'kid': default_site.site_key,
'key_ref': SITE_KEY_XID,
'kid': SITE_KEY_XID
}
auth_code = jws.sign(payload, key=jwt_encode_key, headers={'kid': payload.get('kid')}, algorithm=ALGORITHMS.RS256)
@@ -392,25 +337,22 @@ async def auth_v1_code(request: Request):
# venv/lib/python3.9/site-packages/nls_core_auth/auth.py - TokenResponse
@app.post('/auth/v1/token', description='exchange auth code and verifier for token')
async def auth_v1_token(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
default_site, default_instance = Site.get_default_site(db), Instance.get_default_instance(db)
jwt_encode_key, jwt_decode_key = default_instance.get_jwt_encode_key(), default_instance.get_jwt_decode_key()
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.utcnow()
try:
payload = jwt.decode(token=j.get('auth_code'), key=jwt_decode_key, algorithms=ALGORITHMS.RS256)
payload = jwt.decode(token=j.get('auth_code'), key=jwt_decode_key)
except JWTError as e:
return JSONr(status_code=400, content={'status': 400, 'title': 'invalid token', 'detail': str(e)})
origin_ref = payload.get('origin_ref')
logger.info(f'> [ auth ]: {origin_ref}: {j}')
logging.info(f'> [ auth ]: {origin_ref}: {j}')
# validate the code challenge
challenge = b64enc(sha256(j.get('code_verifier').encode('utf-8')).digest()).rstrip(b'=').decode('utf-8')
if payload.get('challenge') != challenge:
return JSONr(status_code=401, content={'status': 401, 'detail': 'expected challenge did not match verifier'})
access_expires_on = cur_time + default_instance.get_token_expire_delta()
access_expires_on = cur_time + TOKEN_EXPIRE_DELTA
new_payload = {
'iat': timegm(cur_time.timetuple()),
@@ -419,8 +361,8 @@ async def auth_v1_token(request: Request):
'aud': 'https://cls.nvidia.org',
'exp': timegm(access_expires_on.timetuple()),
'origin_ref': origin_ref,
'key_ref': default_site.site_key,
'kid': default_site.site_key,
'key_ref': SITE_KEY_XID,
'kid': SITE_KEY_XID,
}
auth_token = jwt.encode(new_payload, key=jwt_encode_key, headers={'kid': payload.get('kid')}, algorithm=ALGORITHMS.RS256)
@@ -437,19 +379,16 @@ async def auth_v1_token(request: Request):
# venv/lib/python3.9/site-packages/nls_services_lease/test/test_lease_multi_controller.py
@app.post('/leasing/v1/lessor', description='request multiple leases (borrow) for current origin')
async def leasing_v1_lessor(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
default_instance = Instance.get_default_instance(db)
jwt_decode_key = default_instance.get_jwt_decode_key()
j, token, cur_time = json_loads((await request.body()).decode('utf-8')), __get_token(request), datetime.utcnow()
try:
token = __get_token(request, jwt_decode_key)
token = __get_token(request)
except JWTError:
return JSONr(status_code=401, content={'status': 401, 'detail': 'token is not valid'})
origin_ref = token.get('origin_ref')
scope_ref_list = j.get('scope_ref_list')
logger.info(f'> [ create ]: {origin_ref}: create leases for scope_ref_list {scope_ref_list}')
logging.info(f'> [ create ]: {origin_ref}: create leases for scope_ref_list {scope_ref_list}')
lease_result_list = []
for scope_ref in scope_ref_list:
@@ -457,7 +396,7 @@ async def leasing_v1_lessor(request: Request):
# return JSONr(status_code=500, detail=f'no service instances found for scopes: ["{scope_ref}"]')
lease_ref = str(uuid4())
expires = cur_time + default_instance.get_lease_expire_delta()
expires = cur_time + LEASE_EXPIRE_DELTA
lease_result_list.append({
"ordinal": 0,
# https://docs.nvidia.com/license-system/latest/nvidia-license-system-user-guide/index.html
@@ -465,13 +404,13 @@ async def leasing_v1_lessor(request: Request):
"ref": lease_ref,
"created": cur_time.isoformat(),
"expires": expires.isoformat(),
"recommended_lease_renewal": default_instance.lease_renewal_period,
"recommended_lease_renewal": LEASE_RENEWAL_PERIOD,
"offline_lease": "true",
"license_type": "CONCURRENT_COUNTED_SINGLE"
}
})
data = Lease(instance_ref=default_instance.instance_ref, origin_ref=origin_ref, lease_ref=lease_ref, lease_created=cur_time, lease_expires=expires)
data = Lease(origin_ref=origin_ref, lease_ref=lease_ref, lease_created=cur_time, lease_expires=expires)
Lease.create_or_update(db, data)
response = {
@@ -488,19 +427,12 @@ async def leasing_v1_lessor(request: Request):
# venv/lib/python3.9/site-packages/nls_dal_service_instance_dls/schema/service_instance/V1_0_21__product_mapping.sql
@app.get('/leasing/v1/lessor/leases', description='get active leases for current origin')
async def leasing_v1_lessor_lease(request: Request):
cur_time = datetime.now(UTC)
jwt_decode_key = Instance.get_default_instance(db).get_jwt_decode_key()
try:
token = __get_token(request, jwt_decode_key)
except JWTError:
return JSONr(status_code=401, content={'status': 401, 'detail': 'token is not valid'})
token, cur_time = __get_token(request), datetime.utcnow()
origin_ref = token.get('origin_ref')
active_lease_list = list(map(lambda x: x.lease_ref, Lease.find_by_origin_ref(db, origin_ref)))
logger.info(f'> [ leases ]: {origin_ref}: found {len(active_lease_list)} active leases')
logging.info(f'> [ leases ]: {origin_ref}: found {len(active_lease_list)} active leases')
response = {
"active_lease_list": active_lease_list,
@@ -515,28 +447,20 @@ async def leasing_v1_lessor_lease(request: Request):
# venv/lib/python3.9/site-packages/nls_core_lease/lease_single.py
@app.put('/leasing/v1/lease/{lease_ref}', description='renew a lease')
async def leasing_v1_lease_renew(request: Request, lease_ref: str):
cur_time = datetime.now(UTC)
default_instance = Instance.get_default_instance(db)
jwt_decode_key = default_instance.get_jwt_decode_key()
try:
token = __get_token(request, jwt_decode_key)
except JWTError:
return JSONr(status_code=401, content={'status': 401, 'detail': 'token is not valid'})
token, cur_time = __get_token(request), datetime.utcnow()
origin_ref = token.get('origin_ref')
logger.info(f'> [ renew ]: {origin_ref}: renew {lease_ref}')
logging.info(f'> [ renew ]: {origin_ref}: renew {lease_ref}')
entity = Lease.find_by_origin_ref_and_lease_ref(db, origin_ref, lease_ref)
if entity is None:
return JSONr(status_code=404, content={'status': 404, 'detail': 'requested lease not available'})
expires = cur_time + default_instance.get_lease_expire_delta()
expires = cur_time + LEASE_EXPIRE_DELTA
response = {
"lease_ref": lease_ref,
"expires": expires.isoformat(),
"recommended_lease_renewal": default_instance.lease_renewal_period,
"recommended_lease_renewal": LEASE_RENEWAL_PERIOD,
"offline_lease": True,
"prompts": None,
"sync_timestamp": cur_time.isoformat(),
@@ -550,17 +474,10 @@ async def leasing_v1_lease_renew(request: Request, lease_ref: str):
# venv/lib/python3.9/site-packages/nls_services_lease/test/test_lease_single_controller.py
@app.delete('/leasing/v1/lease/{lease_ref}', description='release (return) a lease')
async def leasing_v1_lease_delete(request: Request, lease_ref: str):
cur_time = datetime.now(UTC)
jwt_decode_key = Instance.get_default_instance(db).get_jwt_decode_key()
try:
token = __get_token(request, jwt_decode_key)
except JWTError:
return JSONr(status_code=401, content={'status': 401, 'detail': 'token is not valid'})
token, cur_time = __get_token(request), datetime.utcnow()
origin_ref = token.get('origin_ref')
logger.info(f'> [ return ]: {origin_ref}: return {lease_ref}')
logging.info(f'> [ return ]: {origin_ref}: return {lease_ref}')
entity = Lease.find_by_lease_ref(db, lease_ref)
if entity.origin_ref != origin_ref:
@@ -583,20 +500,13 @@ async def leasing_v1_lease_delete(request: Request, lease_ref: str):
# venv/lib/python3.9/site-packages/nls_services_lease/test/test_lease_multi_controller.py
@app.delete('/leasing/v1/lessor/leases', description='release all leases')
async def leasing_v1_lessor_lease_remove(request: Request):
cur_time = datetime.now(UTC)
jwt_decode_key = Instance.get_default_instance(db).get_jwt_decode_key()
try:
token = __get_token(request, jwt_decode_key)
except JWTError:
return JSONr(status_code=401, content={'status': 401, 'detail': 'token is not valid'})
token, cur_time = __get_token(request), datetime.utcnow()
origin_ref = token.get('origin_ref')
released_lease_list = list(map(lambda x: x.lease_ref, Lease.find_by_origin_ref(db, origin_ref)))
deletions = Lease.cleanup(db, origin_ref)
logger.info(f'> [ remove ]: {origin_ref}: removed {deletions} leases')
logging.info(f'> [ remove ]: {origin_ref}: removed {deletions} leases')
response = {
"released_lease_list": released_lease_list,
@@ -610,9 +520,7 @@ async def leasing_v1_lessor_lease_remove(request: Request):
@app.post('/leasing/v1/lessor/shutdown', description='shutdown all leases')
async def leasing_v1_lessor_shutdown(request: Request):
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.now(UTC)
jwt_decode_key = Instance.get_default_instance(db).get_jwt_decode_key()
j, cur_time = json_loads((await request.body()).decode('utf-8')), datetime.utcnow()
token = j.get('token')
token = jwt.decode(token=token, key=jwt_decode_key, algorithms=ALGORITHMS.RS256, options={'verify_aud': False})
@@ -620,7 +528,7 @@ async def leasing_v1_lessor_shutdown(request: Request):
released_lease_list = list(map(lambda x: x.lease_ref, Lease.find_by_origin_ref(db, origin_ref)))
deletions = Lease.cleanup(db, origin_ref)
logger.info(f'> [ shutdown ]: {origin_ref}: removed {deletions} leases')
logging.info(f'> [ shutdown ]: {origin_ref}: removed {deletions} leases')
response = {
"released_lease_list": released_lease_list,
@@ -632,6 +540,18 @@ async def leasing_v1_lessor_shutdown(request: Request):
return JSONr(response)
@app.on_event('startup')
async def app_on_startup():
logger.info(f'''
Using timezone: {str(TZ)}. Make sure this is correct and match your clients!
Your clients renew their license every {str(Lease.calculate_renewal(LEASE_RENEWAL_PERIOD, LEASE_RENEWAL_DELTA))}.
If the renewal fails, the license is {str(LEASE_RENEWAL_DELTA)} valid.
Your client-token file (.tok) is valid for {str(CLIENT_TOKEN_EXPIRE_DELTA)}.
''')
if __name__ == '__main__':
import uvicorn
@@ -643,7 +563,7 @@ if __name__ == '__main__':
#
###
logger.info(f'> Starting dev-server ...')
logging.info(f'> Starting dev-server ...')
ssl_keyfile = join(dirname(__file__), 'cert/webserver.key')
ssl_certfile = join(dirname(__file__), 'cert/webserver.crt')

View File

@@ -1,143 +1,18 @@
import logging
from datetime import datetime, timedelta, timezone, UTC
from os import getenv as env
from os.path import join, dirname, isfile
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from jose import jwk
from jose.constants import ALGORITHMS
from sqlalchemy import Column, VARCHAR, CHAR, ForeignKey, DATETIME, update, and_, inspect, text, BLOB, INT, FLOAT
from sqlalchemy import Column, VARCHAR, CHAR, ForeignKey, DATETIME, update, and_, inspect, text
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker, declarative_base, Session, relationship
from sqlalchemy.schema import CreateTable
from util import DriverMatrix, PrivateKey, PublicKey, DriverMatrix
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
from sqlalchemy.orm import sessionmaker, declarative_base
Base = declarative_base()
class Site(Base):
__tablename__ = "site"
INITIAL_SITE_KEY_XID = '10000000-0000-0000-0000-000000000000'
INITIAL_SITE_NAME = 'default-site'
site_key = Column(CHAR(length=36), primary_key=True, unique=True, index=True) # uuid4, SITE_KEY_XID
name = Column(VARCHAR(length=256), nullable=False)
def __str__(self):
return f'SITE_KEY_XID: {self.site_key}'
@staticmethod
def create_statement(engine: Engine):
return CreateTable(Site.__table__).compile(engine)
@staticmethod
def get_default_site(engine: Engine) -> "Site":
session = sessionmaker(bind=engine)()
entity = session.query(Site).filter(Site.site_key == Site.INITIAL_SITE_KEY_XID).first()
session.close()
return entity
class Instance(Base):
__tablename__ = "instance"
DEFAULT_INSTANCE_REF = '10000000-0000-0000-0000-000000000001'
DEFAULT_TOKEN_EXPIRE_DELTA = 86_400 # 1 day
DEFAULT_LEASE_EXPIRE_DELTA = 7_776_000 # 90 days
DEFAULT_LEASE_RENEWAL_PERIOD = 0.15
DEFAULT_CLIENT_TOKEN_EXPIRE_DELTA = 378_432_000 # 12 years
# 1 day = 86400 (min. in production setup, max 90 days), 1 hour = 3600
instance_ref = Column(CHAR(length=36), primary_key=True, unique=True, index=True) # uuid4, INSTANCE_REF
site_key = Column(CHAR(length=36), ForeignKey(Site.site_key, ondelete='CASCADE'), nullable=False, index=True) # uuid4
private_key = Column(BLOB(length=2048), nullable=False)
public_key = Column(BLOB(length=512), nullable=False)
token_expire_delta = Column(INT(), nullable=False, default=DEFAULT_TOKEN_EXPIRE_DELTA, comment='in seconds')
lease_expire_delta = Column(INT(), nullable=False, default=DEFAULT_LEASE_EXPIRE_DELTA, comment='in seconds')
lease_renewal_period = Column(FLOAT(precision=2), nullable=False, default=DEFAULT_LEASE_RENEWAL_PERIOD)
client_token_expire_delta = Column(INT(), nullable=False, default=DEFAULT_CLIENT_TOKEN_EXPIRE_DELTA, comment='in seconds')
__origin = relationship(Site, foreign_keys=[site_key])
def __str__(self):
return f'INSTANCE_REF: {self.instance_ref} (SITE_KEY_XID: {self.site_key})'
@staticmethod
def create_statement(engine: Engine):
return CreateTable(Instance.__table__).compile(engine)
@staticmethod
def create_or_update(engine: Engine, instance: "Instance"):
session = sessionmaker(bind=engine)()
entity = session.query(Instance).filter(Instance.instance_ref == instance.instance_ref).first()
if entity is None:
session.add(instance)
else:
x = dict(
site_key=instance.site_key,
private_key=instance.private_key,
public_key=instance.public_key,
token_expire_delta=instance.token_expire_delta,
lease_expire_delta=instance.lease_expire_delta,
lease_renewal_period=instance.lease_renewal_period,
client_token_expire_delta=instance.client_token_expire_delta,
)
session.execute(update(Instance).where(Instance.instance_ref == instance.instance_ref).values(**x))
session.commit()
session.flush()
session.close()
# todo: validate on startup that "lease_expire_delta" is between 1 day and 90 days
@staticmethod
def get_default_instance(engine: Engine) -> "Instance":
session = sessionmaker(bind=engine)()
site = Site.get_default_site(engine)
entity = session.query(Instance).filter(Instance.site_key == site.site_key).first()
session.close()
return entity
def get_token_expire_delta(self) -> "dateutil.relativedelta.relativedelta":
return relativedelta(seconds=self.token_expire_delta)
def get_lease_expire_delta(self) -> "dateutil.relativedelta.relativedelta":
return relativedelta(seconds=self.lease_expire_delta)
def get_lease_renewal_delta(self) -> "datetime.timedelta":
return timedelta(seconds=self.lease_expire_delta)
def get_client_token_expire_delta(self) -> "dateutil.relativedelta.relativedelta":
return relativedelta(seconds=self.client_token_expire_delta)
def __get_private_key(self) -> "PrivateKey":
return PrivateKey(self.private_key)
def get_public_key(self) -> "PublicKey":
return PublicKey(self.public_key)
def get_jwt_encode_key(self) -> "jose.jkw":
return jwk.construct(self.__get_private_key().pem().decode('utf-8'), algorithm=ALGORITHMS.RS256)
def get_jwt_decode_key(self) -> "jose.jwt":
return jwk.construct(self.get_public_key().pem().decode('utf-8'), algorithm=ALGORITHMS.RS256)
def get_private_key_str(self, encoding: str = 'utf-8') -> str:
return self.private_key.decode(encoding)
def get_public_key_str(self, encoding: str = 'utf-8') -> str:
return self.private_key.decode(encoding)
class Origin(Base):
__tablename__ = "origin"
origin_ref = Column(CHAR(length=36), primary_key=True, unique=True, index=True) # uuid4
# service_instance_xid = Column(CHAR(length=36), nullable=False, index=True) # uuid4 # not necessary, we only support one service_instance_xid ('INSTANCE_REF')
hostname = Column(VARCHAR(length=256), nullable=True)
guest_driver_version = Column(VARCHAR(length=10), nullable=True)
@@ -148,8 +23,6 @@ class Origin(Base):
return f'Origin(origin_ref={self.origin_ref}, hostname={self.hostname})'
def serialize(self) -> dict:
_ = DriverMatrix().find(self.guest_driver_version)
return {
'origin_ref': self.origin_ref,
# 'service_instance_xid': self.service_instance_xid,
@@ -157,11 +30,11 @@ class Origin(Base):
'guest_driver_version': self.guest_driver_version,
'os_platform': self.os_platform,
'os_version': self.os_version,
'$driver': _ if _ is not None else None,
}
@staticmethod
def create_statement(engine: Engine):
from sqlalchemy.schema import CreateTable
return CreateTable(Origin.__table__).compile(engine)
@staticmethod
@@ -188,17 +61,7 @@ class Origin(Base):
if origin_refs is None:
deletions = session.query(Origin).delete()
else:
deletions = session.query(Origin).filter(Origin.origin_ref.in_(origin_refs)).delete()
session.commit()
session.close()
return deletions
@staticmethod
def delete_expired(engine: Engine) -> int:
session = sessionmaker(bind=engine)()
origins = session.query(Origin).join(Lease, Origin.origin_ref == Lease.origin_ref, isouter=True).filter(Lease.lease_ref.is_(None)).all()
origin_refs = [origin.origin_ref for origin in origins]
deletions = session.query(Origin).filter(Origin.origin_ref.in_(origin_refs)).delete()
deletions = session.query(Origin).filter(Origin.origin_ref in origin_refs).delete()
session.commit()
session.close()
return deletions
@@ -207,24 +70,18 @@ class Origin(Base):
class Lease(Base):
__tablename__ = "lease"
instance_ref = Column(CHAR(length=36), ForeignKey(Instance.instance_ref, ondelete='CASCADE'), nullable=False, index=True) # uuid4
lease_ref = Column(CHAR(length=36), primary_key=True, nullable=False, index=True) # uuid4
origin_ref = Column(CHAR(length=36), ForeignKey(Origin.origin_ref, ondelete='CASCADE'), nullable=False, index=True) # uuid4
# scope_ref = Column(CHAR(length=36), nullable=False, index=True) # uuid4 # not necessary, we only support one scope_ref ('ALLOTMENT_REF')
lease_created = Column(DATETIME(), nullable=False)
lease_expires = Column(DATETIME(), nullable=False)
lease_updated = Column(DATETIME(), nullable=False)
__instance = relationship(Instance, foreign_keys=[instance_ref])
__origin = relationship(Origin, foreign_keys=[origin_ref])
def __repr__(self):
return f'Lease(origin_ref={self.origin_ref}, lease_ref={self.lease_ref}, expires={self.lease_expires})'
def serialize(self) -> dict:
renewal_period = self.__instance.lease_renewal_period
renewal_delta = self.__instance.get_lease_renewal_delta
def serialize(self, renewal_period: float, renewal_delta: timedelta) -> dict:
lease_renewal = int(Lease.calculate_renewal(renewal_period, renewal_delta).total_seconds())
lease_renewal = self.lease_updated + relativedelta(seconds=lease_renewal)
@@ -232,14 +89,15 @@ class Lease(Base):
'lease_ref': self.lease_ref,
'origin_ref': self.origin_ref,
# 'scope_ref': self.scope_ref,
'lease_created': self.lease_created.replace(tzinfo=timezone.utc).isoformat(),
'lease_expires': self.lease_expires.replace(tzinfo=timezone.utc).isoformat(),
'lease_updated': self.lease_updated.replace(tzinfo=timezone.utc).isoformat(),
'lease_renewal': lease_renewal.replace(tzinfo=timezone.utc).isoformat(),
'lease_created': self.lease_created.isoformat(),
'lease_expires': self.lease_expires.isoformat(),
'lease_updated': self.lease_updated.isoformat(),
'lease_renewal': lease_renewal.isoformat(),
}
@staticmethod
def create_statement(engine: Engine):
from sqlalchemy.schema import CreateTable
return CreateTable(Lease.__table__).compile(engine)
@staticmethod
@@ -305,7 +163,7 @@ class Lease(Base):
@staticmethod
def delete_expired(engine: Engine) -> int:
session = sessionmaker(bind=engine)()
deletions = session.query(Lease).filter(Lease.lease_expires <= datetime.now(UTC)).delete()
deletions = session.query(Lease).filter(Lease.lease_expires <= datetime.utcnow()).delete()
session.commit()
session.close()
return deletions
@@ -333,104 +191,38 @@ class Lease(Base):
return renew
def init_default_site(session: Session):
private_key = PrivateKey.generate()
public_key = private_key.public_key()
site = Site(
site_key=Site.INITIAL_SITE_KEY_XID,
name=Site.INITIAL_SITE_NAME
)
session.add(site)
session.commit()
instance = Instance(
instance_ref=Instance.DEFAULT_INSTANCE_REF,
site_key=site.site_key,
private_key=private_key.pem(),
public_key=public_key.pem(),
)
session.add(instance)
session.commit()
def init(engine: Engine):
tables = [Site, Instance, Origin, Lease]
tables = [Origin, Lease]
db = inspect(engine)
session = sessionmaker(bind=engine)()
for table in tables:
exists = db.dialect.has_table(engine.connect(), table.__tablename__)
logger.info(f'> Table "{table.__tablename__:<16}" exists: {exists}')
if not exists:
if not db.dialect.has_table(engine.connect(), table.__tablename__):
session.execute(text(str(table.create_statement(engine))))
session.commit()
# create default site
cnt = session.query(Site).count()
if cnt == 0:
init_default_site(session)
session.flush()
session.close()
def migrate(engine: Engine):
db = inspect(engine)
# todo: add update guide to use 1.LATEST to 2.0
def upgrade_1_x_to_2_0():
site = Site.get_default_site(engine)
logger.info(site)
instance = Instance.get_default_instance(engine)
logger.info(instance)
def upgrade_1_0_to_1_1():
x = db.dialect.get_columns(engine.connect(), Lease.__tablename__)
x = next(_ for _ in x if _['name'] == 'origin_ref')
if x['primary_key'] > 0:
print('Found old database schema with "origin_ref" as primary-key in "lease" table. Dropping table!')
print(' Your leases are recreated on next renewal!')
print(' If an error message appears on the client, you can ignore it.')
Lease.__table__.drop(bind=engine)
init(engine)
# SITE_KEY_XID
if site_key := env('SITE_KEY_XID', None) is not None:
site.site_key = str(site_key)
# def upgrade_1_2_to_1_3():
# x = db.dialect.get_columns(engine.connect(), Lease.__tablename__)
# x = next((_ for _ in x if _['name'] == 'scope_ref'), None)
# if x is None:
# Lease.scope_ref.compile()
# column_name = Lease.scope_ref.name
# column_type = Lease.scope_ref.type.compile(engine.dialect)
# engine.execute(f'ALTER TABLE "{Lease.__tablename__}" ADD COLUMN "{column_name}" {column_type}')
# INSTANCE_REF
if instance_ref := env('INSTANCE_REF', None) is not None:
instance.instance_ref = str(instance_ref)
# ALLOTMENT_REF
if allotment_ref := env('ALLOTMENT_REF', None) is not None:
pass # todo
# INSTANCE_KEY_RSA, INSTANCE_KEY_PUB
default_instance_private_key_path = str(join(dirname(__file__), 'cert/instance.private.pem'))
instance_private_key = env('INSTANCE_KEY_RSA', None)
if instance_private_key is not None:
instance.private_key = PrivateKey(instance_private_key.encode('utf-8'))
elif isfile(default_instance_private_key_path):
instance.private_key = PrivateKey.from_file(default_instance_private_key_path)
default_instance_public_key_path = str(join(dirname(__file__), 'cert/instance.public.pem'))
instance_public_key = env('INSTANCE_KEY_PUB', None)
if instance_public_key is not None:
instance.public_key = PublicKey(instance_public_key.encode('utf-8'))
elif isfile(default_instance_public_key_path):
instance.public_key = PublicKey.from_file(default_instance_public_key_path)
# TOKEN_EXPIRE_DELTA
token_expire_delta = env('TOKEN_EXPIRE_DAYS', None)
if token_expire_delta not in (None, 0):
instance.token_expire_delta = token_expire_delta * 86_400
token_expire_delta = env('TOKEN_EXPIRE_HOURS', None)
if token_expire_delta not in (None, 0):
instance.token_expire_delta = token_expire_delta * 3_600
# LEASE_EXPIRE_DELTA, LEASE_RENEWAL_DELTA
lease_expire_delta = env('LEASE_EXPIRE_DAYS', None)
if lease_expire_delta not in (None, 0):
instance.lease_expire_delta = lease_expire_delta * 86_400
lease_expire_delta = env('LEASE_EXPIRE_HOURS', None)
if lease_expire_delta not in (None, 0):
instance.lease_expire_delta = lease_expire_delta * 3_600
# LEASE_RENEWAL_PERIOD
lease_renewal_period = env('LEASE_RENEWAL_PERIOD', None)
if lease_renewal_period is not None:
instance.lease_renewal_period = lease_renewal_period
# todo: update site, instance
upgrade_1_x_to_2_0()
upgrade_1_0_to_1_1()
# upgrade_1_2_to_1_3()

View File

@@ -1,132 +1,28 @@
import logging
from json import load as json_load
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPrivateKey, RSAPublicKey, generate_private_key
from cryptography.hazmat.primitives.serialization import load_pem_private_key, load_pem_public_key
logging.basicConfig()
def load_file(filename: str) -> bytes:
log = logging.getLogger(f'{__name__}')
log.debug(f'Loading contents of file "{filename}')
def load_file(filename) -> bytes:
with open(filename, 'rb') as file:
content = file.read()
return content
class PrivateKey:
def load_key(filename) -> "RsaKey":
try:
# Crypto | Cryptodome on Debian
from Crypto.PublicKey import RSA
from Crypto.PublicKey.RSA import RsaKey
except ModuleNotFoundError:
from Cryptodome.PublicKey import RSA
from Cryptodome.PublicKey.RSA import RsaKey
def __init__(self, data: bytes):
self.__key = load_pem_private_key(data, password=None)
@staticmethod
def from_file(filename: str) -> "PrivateKey":
log = logging.getLogger(__name__)
log.debug(f'Importing RSA-Private-Key from "{filename}"')
with open(filename, 'rb') as f:
data = f.read()
return PrivateKey(data=data.strip())
def raw(self) -> RSAPrivateKey:
return self.__key
def pem(self) -> bytes:
return self.__key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption()
)
def public_key(self) -> "PublicKey":
data = self.__key.public_key().public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo
)
return PublicKey(data=data)
@staticmethod
def generate(public_exponent: int = 65537, key_size: int = 2048) -> "PrivateKey":
log = logging.getLogger(__name__)
log.debug(f'Generating RSA-Key')
key = generate_private_key(public_exponent=public_exponent, key_size=key_size)
data = key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=serialization.NoEncryption()
)
return PrivateKey(data=data)
return RSA.import_key(extern_key=load_file(filename), passphrase=None)
class PublicKey:
def generate_key() -> "RsaKey":
try:
# Crypto | Cryptodome on Debian
from Crypto.PublicKey import RSA
from Crypto.PublicKey.RSA import RsaKey
except ModuleNotFoundError:
from Cryptodome.PublicKey import RSA
from Cryptodome.PublicKey.RSA import RsaKey
def __init__(self, data: bytes):
self.__key = load_pem_public_key(data)
@staticmethod
def from_file(filename: str) -> "PublicKey":
log = logging.getLogger(__name__)
log.debug(f'Importing RSA-Public-Key from "{filename}"')
with open(filename, 'rb') as f:
data = f.read()
return PublicKey(data=data.strip())
def raw(self) -> RSAPublicKey:
return self.__key
def pem(self) -> bytes:
return self.__key.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo
)
class DriverMatrix:
__DRIVER_MATRIX_FILENAME = 'static/driver_matrix.json'
__DRIVER_MATRIX: None | dict = None # https://docs.nvidia.com/grid/ => "Driver Versions"
def __init__(self):
self.log = logging.getLogger(self.__class__.__name__)
if DriverMatrix.__DRIVER_MATRIX is None:
self.__load()
def __load(self):
try:
file = open(DriverMatrix.__DRIVER_MATRIX_FILENAME)
DriverMatrix.__DRIVER_MATRIX = json_load(file)
file.close()
self.log.debug(f'Successfully loaded "{DriverMatrix.__DRIVER_MATRIX_FILENAME}".')
except Exception as e:
DriverMatrix.__DRIVER_MATRIX = {} # init empty dict to not try open file everytime, just when restarting app
# self.log.warning(f'Failed to load "{DriverMatrix.__DRIVER_MATRIX_FILENAME}": {e}')
@staticmethod
def find(version: str) -> dict | None:
if DriverMatrix.__DRIVER_MATRIX is None:
return None
for idx, (key, branch) in enumerate(DriverMatrix.__DRIVER_MATRIX.items()):
for release in branch.get('$releases'):
linux_driver = release.get('Linux Driver')
windows_driver = release.get('Windows Driver')
if version == linux_driver or version == windows_driver:
tmp = branch.copy()
tmp.pop('$releases')
is_latest = release.get('vGPU Software') == branch.get('Latest Release in Branch')
return {
'software_branch': branch.get('vGPU Software Branch'),
'branch_version': release.get('vGPU Software'),
'driver_branch': branch.get('Driver Branch'),
'branch_status': branch.get('vGPU Branch Status'),
'release_date': release.get('Release Date'),
'eol': branch.get('EOL Date') if is_latest else None,
'is_latest': is_latest,
}
return None
return RSA.generate(bits=2048)

26
doc/Database.md Normal file
View File

@@ -0,0 +1,26 @@
# Database structure
## `request_routing.service_instance`
| xid | org_name |
|----------------------------------------|--------------------------|
| `10000000-0000-0000-0000-000000000000` | `lic-000000000000000000` |
- `xid` is used as `SERVICE_INSTANCE_XID`
## `request_routing.license_allotment_service_instance`
| xid | service_instance_xid | license_allotment_xid |
|----------------------------------------|----------------------------------------|----------------------------------------|
| `90000000-0000-0000-0000-000000000001` | `10000000-0000-0000-0000-000000000000` | `80000000-0000-0000-0000-000000000001` |
- `xid` is only a primary-key and never used as foreign-key or reference
- `license_allotment_xid` must be used to fetch `xid`'s from `request_routing.license_allotment_reference`
## `request_routing.license_allotment_reference`
| xid | license_allotment_xid |
|----------------------------------------|----------------------------------------|
| `20000000-0000-0000-0000-000000000001` | `80000000-0000-0000-0000-000000000001` |
- `xid` is used as `scope_ref_list` on token request

View File

@@ -0,0 +1,177 @@
# Reverse Engineering Notes
# Usefully commands
## Check licensing status
- `nvidia-smi -q | grep "License"`
**Output**
```
vGPU Software Licensed Product
License Status : Licensed (Expiry: 2023-1-14 12:59:52 GMT)
```
## Track licensing progress
- NVIDIA Grid Log: `journalctl -u nvidia-gridd -f`
```
systemd[1]: Started NVIDIA Grid Daemon.
nvidia-gridd[2986]: Configuration parameter ( ServerAddress ) not set
nvidia-gridd[2986]: vGPU Software package (0)
nvidia-gridd[2986]: Ignore service provider and node-locked licensing
nvidia-gridd[2986]: NLS initialized
nvidia-gridd[2986]: Acquiring license. (Info: license.nvidia.space; NVIDIA RTX Virtual Workstation)
nvidia-gridd[2986]: License acquired successfully. (Info: license.nvidia.space, NVIDIA RTX Virtual Workstation; Expiry: 2023-1-29 22:3:0 GMT)
```
# DLS-Container File-System (Docker)
## Configuration data
Most variables and configs are stored in `/var/lib/docker/volumes/configurations/_data`.
Files can be modified with `docker cp <container-id>:/venv/... /opt/localfile/...` and back.
(May you need to fix permissions with `docker exec -u 0 <container-id> chown nonroot:nonroot /venv/...`)
## Dive / Docker image inspector
- `dive dls:appliance`
The source code is stored in `/venv/lib/python3.9/site-packages/nls_*`.
Image-Reference:
```
Tags: (unavailable)
Id: d1c7976a5d2b3681ff6c5a30f8187e4015187a83f3f285ba4a37a45458bd6b98
Digest: sha256:311223c5af7a298ec1104f5dc8c3019bfb0e1f77256dc3d995244ffb295a97
1f
Command:
#(nop) ADD file:c1900d3e3a29c29a743a8da86c437006ec5d2aa873fb24e48033b6bf492bb37b in /
```
## Private Key (Site-Key)
- `/etc/dls/config/decryptor/decryptor`
```shell
docker exec -it <container-id> /etc/dls/config/decryptor/decryptor > /tmp/private-key.pem
```
```
-----BEGIN RSA PRIVATE KEY-----
...
-----END RSA PRIVATE KEY-----
```
## Site Key Uri - `/etc/dls/config/site_key_uri.bin`
```
base64-content...
```
## DB Password - `/etc/dls/config/dls_db_password.bin`
```
base64-content...
```
**Decrypt database password**
```
cd /var/lib/docker/volumes/configurations/_data
cat dls_db_password.bin | base64 -d > dls_db_password.bin.raw
openssl rsautl -decrypt -inkey /tmp/private-key.pem -in dls_db_password.bin.raw
```
# Database
- It's enough to manipulate database licenses. There must not be changed any line of code to bypass licensing
validations.
# Logging / Stack Trace
- https://docs.nvidia.com/license-system/latest/nvidia-license-system-user-guide/index.html#troubleshooting-dls-instance
**Failed licensing log**
```
{
"activity": 100,
"context": {
"SERVICE_INSTANCE_ID": "b43d6e46-d6d0-4943-8b8d-c66a5f6e0d38",
"SERVICE_INSTANCE_NAME": "DEFAULT_2022-12-14_12:48:30",
"description": "borrow failed: NotFoundError(no pool features found for: NVIDIA RTX Virtual Workstation)",
"event_type": null,
"function_name": "_evt",
"lineno": 54,
"module_name": "nls_dal_lease_dls.event",
"operation_id": "e72a8ca7-34cc-4e11-b80c-273592085a24",
"origin_ref": "3f7f5a50-a26b-425b-8d5e-157f63e72b1c",
"service_name": "nls_services_lease"
},
"detail": {
"oc": {
"license_allotment_xid": "10c4317f-7c4c-11ed-a524-0e4252a7e5f1",
"origin_ref": "3f7f5a50-a26b-425b-8d5e-157f63e72b1c",
"service_instance_xid": "b43d6e46-d6d0-4943-8b8d-c66a5f6e0d38"
},
"operation_id": "e72a8ca7-34cc-4e11-b80c-273592085a24"
},
"id": "0cc9e092-3b92-4652-8d9e-7622ef85dc79",
"metadata": {},
"ts": "2022-12-15T10:25:36.827661Z"
}
{
"activity": 400,
"context": {
"SERVICE_INSTANCE_ID": "b43d6e46-d6d0-4943-8b8d-c66a5f6e0d38",
"SERVICE_INSTANCE_NAME": "DEFAULT_2022-12-14_12:48:30",
"description": "lease_multi_create failed: no pool features found for: NVIDIA RTX Virtual Workstation",
"event_by": "system",
"function_name": "lease_multi_create",
"level": "warning",
"lineno": 157,
"module_name": "nls_services_lease.controllers.lease_multi_controller",
"operation_id": "e72a8ca7-34cc-4e11-b80c-273592085a24",
"service_name": "nls_services_lease"
},
"detail": {
"_msg": "lease_multi_create failed: no pool features found for: NVIDIA RTX Virtual Workstation",
"exec_info": ["NotFoundError", "NotFoundError(no pool features found for: NVIDIA RTX Virtual Workstation)", " File \"/venv/lib/python3.9/site-packages/nls_services_lease/controllers/lease_multi_controller.py\", line 127, in lease_multi_create\n data = _leaseMulti.lease_multi_create(event_args)\n File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 208, in lease_multi_create\n raise e\n File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 184, in lease_multi_create\n self._try_proposals(oc, mlr, results, detail)\n File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 219, in _try_proposals\n lease = self._leases.create(creator)\n File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 230, in create\n features = self._get_features(creator)\n File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 148, in _get_features\n self._explain_not_available(cur, creator)\n File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 299, in _explain_not_available\n raise NotFoundError(f'no pool features found for: {lcc.product_name}')\n"],
"operation_id": "e72a8ca7-34cc-4e11-b80c-273592085a24"
},
"id": "282801b9-d612-40a5-9145-b56d8e420dac",
"metadata": {},
"ts": "2022-12-15T10:25:36.831673Z"
}
```
**Stack Trace**
```
"NotFoundError", "NotFoundError(no pool features found for: NVIDIA RTX Virtual Workstation)", " File \"/venv/lib/python3.9/site-packages/nls_services_lease/controllers/lease_multi_controller.py\", line 127, in lease_multi_create
data = _leaseMulti.lease_multi_create(event_args)
File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 208, in lease_multi_create
raise e
File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 184, in lease_multi_create
self._try_proposals(oc, mlr, results, detail)
File \"/venv/lib/python3.9/site-packages/nls_core_lease/lease_multi.py\", line 219, in _try_proposals
lease = self._leases.create(creator)
File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 230, in create
features = self._get_features(creator)
File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 148, in _get_features
self._explain_not_available(cur, creator)
File \"/venv/lib/python3.9/site-packages/nls_dal_lease_dls/leases.py\", line 299, in _explain_not_available
raise NotFoundError(f'no pool features found for: {lcc.product_name}')
"
```
# Nginx
- NGINX uses `/opt/certs/cert.pem` and `/opt/certs/key.pem`

View File

@@ -1,8 +1,8 @@
fastapi==0.115.12
uvicorn[standard]==0.34.1
python-jose[cryptography]==3.4.0
cryptography==44.0.2
python-dateutil==2.9.0
sqlalchemy==2.0.40
markdown==3.8
python-dotenv==1.1.0
fastapi==0.109.0
uvicorn[standard]==0.25.0
python-jose==3.3.0
pycryptodome==3.20.0
python-dateutil==2.8.2
sqlalchemy==2.0.25
markdown==3.5.2
python-dotenv==1.0.0

View File

@@ -1,123 +0,0 @@
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
URL = 'https://docs.nvidia.com/vgpu/index.html'
BRANCH_STATUS_KEY = 'vGPU Branch Status'
VGPU_KEY, GRID_KEY, DRIVER_BRANCH_KEY = 'vGPU Software', 'vGPU Software', 'Driver Branch'
LINUX_VGPU_MANAGER_KEY, LINUX_DRIVER_KEY = 'Linux vGPU Manager', 'Linux Driver'
WINDOWS_VGPU_MANAGER_KEY, WINDOWS_DRIVER_KEY = 'Windows vGPU Manager', 'Windows Driver'
ALT_VGPU_MANAGER_KEY = 'vGPU Manager'
RELEASE_DATE_KEY, LATEST_KEY, EOL_KEY = 'Release Date', 'Latest Release in Branch', 'EOL Date'
JSON_RELEASES_KEY = '$releases'
def __driver_versions(html: 'BeautifulSoup'):
def __strip(_: str) -> str:
# removes content after linebreak (e.g. "Hello\n World" to "Hello")
_ = _.strip()
tmp = _.split('\n')
if len(tmp) > 0:
return tmp[0]
return _
# find wrapper for "DriverVersions" and find tables
data = html.find('div', {'id': 'driver-versions'})
items = data.find_all('bsp-accordion', {'class': 'Accordion-items-item'})
for item in items:
software_branch = item.find('div', {'class': 'Accordion-items-item-title'}).text.strip()
software_branch = software_branch.replace(' Releases', '')
matrix_key = software_branch.lower()
branch_status = item.find('a', href=True, string='Branch status')
branch_status = branch_status.next_sibling.replace(':', '').strip()
# driver version info from table-heads (ths) and table-rows (trs)
table = item.find('table')
ths, trs = table.find_all('th'), table.find_all('tr')
headers, releases = [header.text.strip() for header in ths], []
for trs in trs:
tds = trs.find_all('td')
if len(tds) == 0: # skip empty
continue
# create dict with table-heads as key and cell content as value
x = {headers[i]: __strip(cell.text) for i, cell in enumerate(tds)}
x.setdefault(BRANCH_STATUS_KEY, branch_status)
releases.append(x)
# add to matrix
MATRIX.update({matrix_key: {JSON_RELEASES_KEY: releases}})
def __debug():
# print table head
s = f'{VGPU_KEY:^13} | {LINUX_VGPU_MANAGER_KEY:^21} | {LINUX_DRIVER_KEY:^21} | {WINDOWS_VGPU_MANAGER_KEY:^21} | {WINDOWS_DRIVER_KEY:^21} | {RELEASE_DATE_KEY:>21} | {BRANCH_STATUS_KEY:^21}'
print(s)
# iterate over dict & format some variables to not overload table
for idx, (key, branch) in enumerate(MATRIX.items()):
for release in branch.get(JSON_RELEASES_KEY):
version = release.get(VGPU_KEY, release.get(GRID_KEY, ''))
linux_manager = release.get(LINUX_VGPU_MANAGER_KEY, release.get(ALT_VGPU_MANAGER_KEY, ''))
linux_driver = release.get(LINUX_DRIVER_KEY)
windows_manager = release.get(WINDOWS_VGPU_MANAGER_KEY, release.get(ALT_VGPU_MANAGER_KEY, ''))
windows_driver = release.get(WINDOWS_DRIVER_KEY)
release_date = release.get(RELEASE_DATE_KEY)
is_latest = release.get(VGPU_KEY) == branch.get(LATEST_KEY)
branch_status = __parse_branch_status(release.get(BRANCH_STATUS_KEY, ''))
version = f'{version} *' if is_latest else version
s = f'{version:<13} | {linux_manager:<21} | {linux_driver:<21} | {windows_manager:<21} | {windows_driver:<21} | {release_date:>21} | {branch_status:^21}'
print(s)
def __parse_branch_status(string: str) -> str:
string = string.replace('Production Branch', 'Prod. -')
string = string.replace('Long-Term Support Branch', 'LTS -')
string = string.replace('supported until', '')
string = string.replace('EOL since', 'EOL - ')
string = string.replace('EOL from', 'EOL -')
return string
def __dump(filename: str):
import json
file = open(filename, 'w')
json.dump(MATRIX, file)
file.close()
if __name__ == '__main__':
MATRIX = {}
try:
import httpx
from bs4 import BeautifulSoup
except Exception as e:
logger.error(f'Failed to import module: {e}')
logger.info('Run "pip install beautifulsoup4 httpx"')
exit(1)
r = httpx.get(URL)
if r.status_code != 200:
logger.error(f'Error loading "{URL}" with status code {r.status_code}.')
exit(2)
# parse html
soup = BeautifulSoup(r.text, features='html.parser')
# build matrix
__driver_versions(soup)
# debug output
__debug()
# dump data to file
__dump('../app/static/driver_matrix.json')

View File

@@ -1,39 +1,35 @@
import sys
from base64 import b64encode as b64enc
from calendar import timegm
from datetime import datetime, UTC
from hashlib import sha256
from os import getenv as env
from calendar import timegm
from datetime import datetime
from os.path import dirname, join
from uuid import uuid4, UUID
from dateutil.relativedelta import relativedelta
from jose import jwt
from jose import jwt, jwk
from jose.constants import ALGORITHMS
from sqlalchemy import create_engine
from starlette.testclient import TestClient
import sys
# add relative path to use packages as they were in the app/ dir
sys.path.append('../')
sys.path.append('../app')
from app import main
from orm import init as db_init, migrate, Site, Instance
from app.util import load_key
client = TestClient(main.app)
ORIGIN_REF, ALLOTMENT_REF, SECRET = str(uuid4()), '20000000-0000-0000-0000-000000000001', 'HelloWorld'
# fastapi setup
client = TestClient(main.app)
# INSTANCE_KEY_RSA = generate_key()
# INSTANCE_KEY_PUB = INSTANCE_KEY_RSA.public_key()
# database setup
db = create_engine(str(env('DATABASE', 'sqlite:///db.sqlite')))
db_init(db), migrate(db)
INSTANCE_KEY_RSA = load_key(str(join(dirname(__file__), '../app/cert/instance.private.pem')))
INSTANCE_KEY_PUB = load_key(str(join(dirname(__file__), '../app/cert/instance.public.pem')))
# test vars
DEFAULT_SITE, DEFAULT_INSTANCE = Site.get_default_site(db), Instance.get_default_instance(db)
SITE_KEY = DEFAULT_SITE.site_key
jwt_encode_key, jwt_decode_key = DEFAULT_INSTANCE.get_jwt_encode_key(), DEFAULT_INSTANCE.get_jwt_decode_key()
jwt_encode_key = jwk.construct(INSTANCE_KEY_RSA.export_key().decode('utf-8'), algorithm=ALGORITHMS.RS256)
jwt_decode_key = jwk.construct(INSTANCE_KEY_PUB.export_key().decode('utf-8'), algorithm=ALGORITHMS.RS256)
def __bearer_token(origin_ref: str) -> str:
@@ -42,12 +38,6 @@ def __bearer_token(origin_ref: str) -> str:
return token
def test_initial_default_site_and_instance():
default_site, default_instance = Site.get_default_site(db), Instance.get_default_instance(db)
assert default_site.site_key == Site.INITIAL_SITE_KEY_XID
assert default_instance.instance_ref == Instance.DEFAULT_INSTANCE_REF
def test_index():
response = client.get('/')
assert response.status_code == 200
@@ -116,7 +106,6 @@ def test_auth_v1_origin():
assert response.json().get('origin_ref') == ORIGIN_REF
def auth_v1_origin_update():
payload = {
"registration_pending": False,
@@ -152,7 +141,7 @@ def test_auth_v1_code():
def test_auth_v1_token():
cur_time = datetime.now(UTC)
cur_time = datetime.utcnow()
access_expires_on = cur_time + relativedelta(hours=1)
payload = {
@@ -164,7 +153,8 @@ def test_auth_v1_token():
"kid": "00000000-0000-0000-0000-000000000000"
}
payload = {
"auth_code": jwt.encode(payload, key=jwt_encode_key, headers={'kid': payload.get('kid')}, algorithm=ALGORITHMS.RS256),
"auth_code": jwt.encode(payload, key=jwt_encode_key, headers={'kid': payload.get('kid')},
algorithm=ALGORITHMS.RS256),
"code_verifier": SECRET,
}
@@ -197,6 +187,8 @@ def test_leasing_v1_lessor():
assert len(lease_result_list[0]['lease']['ref']) == 36
assert str(UUID(lease_result_list[0]['lease']['ref'])) == lease_result_list[0]['lease']['ref']
return lease_result_list[0]['lease']['ref']
def test_leasing_v1_lessor_lease():
response = client.get('/leasing/v1/lessor/leases', headers={'authorization': __bearer_token(ORIGIN_REF)})
@@ -239,23 +231,7 @@ def test_leasing_v1_lease_delete():
def test_leasing_v1_lessor_lease_remove():
# see "test_leasing_v1_lessor()"
payload = {
'fulfillment_context': {
'fulfillment_class_ref_list': []
},
'lease_proposal_list': [{
'license_type_qualifiers': {'count': 1},
'product': {'name': 'NVIDIA RTX Virtual Workstation'}
}],
'proposal_evaluation_mode': 'ALL_OF',
'scope_ref_list': [ALLOTMENT_REF]
}
response = client.post('/leasing/v1/lessor', json=payload, headers={'authorization': __bearer_token(ORIGIN_REF)})
lease_result_list = response.json().get('lease_result_list')
lease_ref = lease_result_list[0]['lease']['ref']
#
lease_ref = test_leasing_v1_lessor()
response = client.delete('/leasing/v1/lessor/leases', headers={'authorization': __bearer_token(ORIGIN_REF)})
assert response.status_code == 200