104 lines
3.5 KiB
YAML
104 lines
3.5 KiB
YAML
# Copyright (c) 2023 Georgios Alexopoulos
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: nvshare-device-plugin
|
|
namespace: nvshare-system
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
name: nvshare-device-plugin
|
|
template:
|
|
metadata:
|
|
labels:
|
|
name: nvshare-device-plugin
|
|
spec:
|
|
runtimeClassName: nvidia # Explicitly request the runtime
|
|
priorityClassName: system-node-critical
|
|
nodeSelector:
|
|
gpu.bytetrade.io/driver/cuda-supported: 'true'
|
|
initContainers:
|
|
- name: init-dir
|
|
image: busybox:1.28
|
|
volumeMounts:
|
|
- name: host-var-run-nvshare
|
|
mountPath: /var/run/nvshare
|
|
command:
|
|
- sh
|
|
- -c
|
|
- "[ -d /var/run/nvshare/libnvshare.so ] && rm -rf /var/run/nvshare/libnvshare.so || true"
|
|
containers:
|
|
- name: nvshare-lib
|
|
image: beclab/nvshare:libnvshare-v0.0.1
|
|
command:
|
|
- sleep
|
|
- infinity
|
|
lifecycle:
|
|
postStart:
|
|
exec:
|
|
command:
|
|
- "/bin/sh"
|
|
- "-c"
|
|
- "test -f /host-var-run-nvshare/libnvshare.so || ( test -d /host-var-run-nvshare/libnvshare.so && rm -rf /host-var-run-nvshare/libnvshare.so && false ) || touch /host-var-run-nvshare/libnvshare.so && mount -v --bind /libnvshare.so /host-var-run-nvshare/libnvshare.so"
|
|
preStop:
|
|
exec:
|
|
command:
|
|
- "/bin/sh"
|
|
- "-c"
|
|
- "umount -v /host-var-run-nvshare/libnvshare.so && rm -rf /host-var-run-nvshare/libnvshare.so"
|
|
securityContext:
|
|
# Necessary for mounts to work.
|
|
privileged: true
|
|
volumeMounts:
|
|
- mountPath: /host-var-run-nvshare
|
|
name: host-var-run-nvshare
|
|
# A bidirectional mount ensures that mount points also show up on the
|
|
# host. We need this because nvshare-device-plugin modifies the specs
|
|
# of the Pods that request nvshare virtual GPUs and adds a hostPath
|
|
# mount for /var/run/nvshare/libnvshare.so
|
|
mountPropagation: Bidirectional
|
|
- name: nvshare-device-plugin
|
|
image: bytetrade/nvshare:nvshare-device-plugin
|
|
imagePullPolicy: IfNotPresent
|
|
env:
|
|
- name: NVSHARE_VIRTUAL_DEVICES
|
|
value: "10"
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
capabilities:
|
|
drop: ["ALL"]
|
|
volumeMounts:
|
|
- name: device-plugin-socket
|
|
mountPath: /var/lib/kubelet/device-plugins
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 1
|
|
volumes:
|
|
- name: host-var-run-nvshare
|
|
hostPath:
|
|
path: /var/run/nvshare
|
|
type: DirectoryOrCreate
|
|
- name: device-plugin-socket
|
|
hostPath:
|
|
path: /var/lib/kubelet/device-plugins
|
|
tolerations:
|
|
# In some cases, GPU nodes have an nvidia.com/gpu taint to run only
|
|
# GPU workloads. Tolerate that taint.
|
|
- key: nvidia.com/gpu
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
|