Files
Olares/frameworks/GPU/config/gpu/templates/device-plugin.yaml
2024-04-29 20:12:53 +08:00

97 lines
2.7 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: probe.orionx-k8s-device-plugin
namespace: {{ .Release.Namespace }}
data:
probe.sh: |
#!/bin/bash
# Author: yuanzhilei@virtaitech.com
#
health=$(pidof k8s-device-plugin)
if [[ -z $health ]]; then
echo "k8s-device-plugin not exists."
exit 1
fi
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: orionx-k8s-device-plugin
namespace: {{ .Release.Namespace }}
labels:
app: orionx-k8s-device-plugin
spec:
selector:
matchLabels:
app: orionx-k8s-device-plugin
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app: orionx-k8s-device-plugin
orionx: orionx-k8s-device-plugin
spec:
priorityClassName: system-cluster-critical
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
containers:
- image: eball/orionx-k8s-device-plugin:3.1.5-20230707
imagePullPolicy: Always
name: device-plugin
livenessProbe:
exec:
command:
- /tmp/probe.sh
periodSeconds: 10
initialDelaySeconds: 60
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
# command: ["bash", "-c", "sleep 1d"]
env:
- name : ORION_CONTROLLER
value : {{ .Values.gpu.server }}
- name: ORION_VGPU_RESNAME
value: "virtaitech.com/gpu"
- name: NVIDIA_GPU_RESNAME
value: "nvidia.com/gpu"
- name: ORION_GMEM_UNIT
value: "1024"
- name: ORION_MNT_PATH
value: '{"/var/tmp/orion/comm/":"/var/tmp/orion/comm:ro"}'
- name: ORION_ENV
value: '{"ORION_DRIVER_ONLY":"true","ORION_DEVICE_ENABLE":"1","ORION_EXPORT_CMD":"env | grep ORION; orion-smi -j","ORION_NV_SMI_FORCE_DISPLAY":"1"}'
- name: REPORT_ONLY_LOCAL_VGPU
value: "false"
- name: DP_DISABLE_HEALTHCHECKS
value: "all"
- name: ORION_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: probe
mountPath: /tmp/probe.sh
subPath: probe.sh
- name: localtime
mountPath: /etc/localtime
volumes:
- name: localtime
hostPath:
path: /etc/localtime
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: probe
configMap:
name: probe.orionx-k8s-device-plugin
defaultMode: 0755