Compare commits

...

2 Commits

Author SHA1 Message Date
liuyu
d8e4c29bfe app-service: underlay namespace labels modified 2025-04-10 19:14:12 +08:00
liuyu
cf27b3d715 olares: fix hami gpu monitoring configuration bug 2025-04-10 16:47:33 +08:00
7 changed files with 12 additions and 6 deletions

View File

@@ -815,7 +815,7 @@ data:
}
upstream HamiServer {
server hami-webui.kube-system:8000;
server hami-webui.kube-system:3000;
}
server {

View File

@@ -13,6 +13,12 @@ data:
# If line starts with a '#' it is considered a comment
# DCGM FIELD, Prometheus metric type, help message
DCGM_FI_DRIVER_VERSION, label, Driver Version.
DCGM_FI_DEV_BRAND, label, Device Brand.
DCGM_FI_DEV_SERIAL, label, Device Serial Number.
# Clocks
DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz).
DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).

View File

@@ -17,7 +17,7 @@ apiVersion: {{ .Values.dcgmExporter.serviceMonitor.apiVersion }}
kind: ServiceMonitor
metadata:
name: {{ include "dcgm-exporter.fullname" . }}
namespace: {{ include "dcgm-exporter.namespace" . }}
namespace: kubesphere-monitoring-system
labels:
{{- include "dcgm-exporter.labels" . | nindent 4 }}
app.kubernetes.io/component: "dcgm-exporter"

View File

@@ -3,7 +3,7 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "hami-webui.fullname" . }}-hami-svc-monitor
namespace: {{ include "hami-webui.namespace" . }}
namespace: kubesphere-monitoring-system
labels:
{{- include "hami-webui.labels" . | nindent 4 }}
app.kubernetes.io/component: "hami-webui"

View File

@@ -17,7 +17,7 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "hami-webui.fullname" . }}-svc-monitor
namespace: {{ include "hami-webui.namespace" . }}
namespace: kubesphere-monitoring-system
labels:
{{- include "hami-webui.labels" . | nindent 4 }}
app.kubernetes.io/component: "hami-webui"

View File

@@ -405,7 +405,7 @@ webui:
replicaCount: 1
vendorNodeSelectors:
NVIDIA: gpu=on
NVIDIA: gpu.bytetrade.io/cuda-supported=true
Ascend: ascend=on
DCU: dcu=on
MLU: mlu=on

View File

@@ -149,7 +149,7 @@ spec:
priorityClassName: "system-cluster-critical"
containers:
- name: app-service
image: beclab/app-service:0.3.21
image: beclab/app-service:0.3.23
imagePullPolicy: IfNotPresent
securityContext:
runAsUser: 0