Compare commits

...

3 Commits

Author SHA1 Message Date
eball
9a6674e51a ci: skip processing vendor files in Olares.yaml 2025-12-22 19:18:47 +08:00
eball
bab074cd37 daemon: bump jws sdk version (#2287) 2025-12-22 17:58:55 +08:00
dkeven
afb7d49455 fix(cli): unify node GPU info update logic (#2288) 2025-12-22 17:58:35 +08:00
6 changed files with 99 additions and 75 deletions

View File

@@ -36,6 +36,10 @@ fi
find $BASE_DIR/../ -type f -name Olares.yaml | while read f; do
if [[ "$f" == *"/vendor/"* ]]; then
echo "skip vendor file $f"
continue
fi
echo "Processing $f"
declare -a bins
IFS=

View File

@@ -187,7 +187,7 @@ func (m *InstallPluginModule) Init() {
Prepare: &prepare.PrepareCollection{
new(common.OnlyFirstMaster),
},
Action: new(UpdateNodeLabels),
Action: new(UpdateNodeGPUInfo),
Parallel: false,
Retry: 1,
}
@@ -223,23 +223,6 @@ func (m *InstallPluginModule) Init() {
}
}
type GetCudaVersionModule struct {
common.KubeModule
}
func (g *GetCudaVersionModule) Init() {
g.Name = "GetCudaVersion"
getCudaVersion := &task.LocalTask{
Name: "GetCudaVersion",
Action: new(GetCudaVersion),
}
g.Tasks = []task.Interface{
getCudaVersion,
}
}
type NodeLabelingModule struct {
common.KubeModule
}
@@ -253,7 +236,7 @@ func (l *NodeLabelingModule) Init() {
new(CudaInstalled),
new(CurrentNodeInK8s),
},
Action: new(UpdateNodeLabels),
Action: new(UpdateNodeGPUInfo),
Retry: 1,
}

View File

@@ -10,7 +10,10 @@ import (
"strings"
"time"
v1alpha1 "bytetrade.io/web3os/app-service/api/sys.bytetrade.io/v1alpha1"
apputils "bytetrade.io/web3os/app-service/pkg/utils"
ctrl "sigs.k8s.io/controller-runtime"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"github.com/beclab/Olares/cli/pkg/clientset"
"github.com/beclab/Olares/cli/pkg/common"
@@ -26,7 +29,11 @@ import (
"github.com/pelletier/go-toml"
"github.com/pkg/errors"
apixclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/retry"
)
@@ -323,59 +330,11 @@ func (t *CheckGpuStatus) Execute(runtime connector.Runtime) error {
return fmt.Errorf("GPU Container State is Pending")
}
type GetCudaVersion struct {
type UpdateNodeGPUInfo struct {
common.KubeAction
}
func (g *GetCudaVersion) Execute(runtime connector.Runtime) error {
var nvidiaSmiFile string
var systemInfo = runtime.GetSystemInfo()
switch {
case systemInfo.IsWsl():
nvidiaSmiFile = "/usr/lib/wsl/lib/nvidia-smi"
default:
nvidiaSmiFile = "/usr/bin/nvidia-smi"
}
if !util.IsExist(nvidiaSmiFile) {
logger.Info("nvidia-smi not exists")
return nil
}
var cudaVersion string
res, err := runtime.GetRunner().Cmd(fmt.Sprintf("%s --version", nvidiaSmiFile), false, true)
if err != nil {
logger.Errorf("get cuda version error %v", err)
return nil
}
lines := strings.Split(res, "\n")
if len(lines) == 0 {
return nil
}
for _, line := range lines {
if strings.Contains(line, "CUDA Version") {
parts := strings.Split(line, ":")
if len(parts) != 2 {
break
}
cudaVersion = strings.TrimSpace(parts[1])
}
}
if cudaVersion != "" {
common.SetSystemEnv("OLARES_SYSTEM_CUDA_VERSION", cudaVersion)
}
return nil
}
type UpdateNodeLabels struct {
common.KubeAction
}
func (u *UpdateNodeLabels) Execute(runtime connector.Runtime) error {
func (u *UpdateNodeGPUInfo) Execute(runtime connector.Runtime) error {
client, err := clientset.NewKubeClient()
if err != nil {
return errors.Wrap(errors.WithStack(err), "kubeclient create error")
@@ -482,6 +441,85 @@ func UpdateNodeGpuLabel(ctx context.Context, client kubernetes.Interface, driver
}
}
if cuda != nil && *cuda != "" {
if err := updateCudaVersionSystemEnv(ctx, *cuda); err != nil {
logger.Errorf("failed to update SystemEnv for CUDA version: %v", err)
return err
}
}
return nil
}
func updateCudaVersionSystemEnv(ctx context.Context, cudaVersion string) error {
envName := "OLARES_SYSTEM_CUDA_VERSION"
common.SetSystemEnv(envName, cudaVersion)
config, err := ctrl.GetConfig()
if err != nil {
return fmt.Errorf("failed to get rest config: %w", err)
}
apix, err := apixclientset.NewForConfig(config)
if err != nil {
return fmt.Errorf("failed to create crd client: %w", err)
}
_, err = apix.ApiextensionsV1().CustomResourceDefinitions().Get(ctx, "systemenvs.sys.bytetrade.io", metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
logger.Debugf("SystemEnv CRD not found, skipping CUDA version update")
return nil
}
return fmt.Errorf("failed to get SystemEnv CRD: %w", err)
}
scheme := kruntime.NewScheme()
if err := v1alpha1.AddToScheme(scheme); err != nil {
return fmt.Errorf("failed to add systemenv scheme: %w", err)
}
c, err := ctrlclient.New(config, ctrlclient.Options{Scheme: scheme})
if err != nil {
return fmt.Errorf("failed to create client: %w", err)
}
resourceName, err := apputils.EnvNameToResourceName(envName)
if err != nil {
return fmt.Errorf("invalid system env name: %s", envName)
}
var existingSystemEnv v1alpha1.SystemEnv
err = c.Get(ctx, types.NamespacedName{Name: resourceName}, &existingSystemEnv)
if err == nil {
if existingSystemEnv.Default != cudaVersion {
existingSystemEnv.Default = cudaVersion
if err := c.Update(ctx, &existingSystemEnv); err != nil {
return fmt.Errorf("failed to update SystemEnv %s: %w", resourceName, err)
}
logger.Infof("Updated SystemEnv %s default to %s", resourceName, cudaVersion)
}
return nil
}
if !apierrors.IsNotFound(err) {
return fmt.Errorf("failed to get SystemEnv %s: %w", resourceName, err)
}
systemEnv := &v1alpha1.SystemEnv{
ObjectMeta: metav1.ObjectMeta{
Name: resourceName,
},
EnvVarSpec: v1alpha1.EnvVarSpec{
EnvName: envName,
Default: cudaVersion,
},
}
if err := c.Create(ctx, systemEnv); err != nil && !apierrors.IsAlreadyExists(err) {
return fmt.Errorf("failed to create SystemEnv %s: %w", resourceName, err)
}
logger.Infof("Created SystemEnv: %s with default %s", envName, cudaVersion)
return nil
}

View File

@@ -58,7 +58,6 @@ func (l *linuxInstallPhaseBuilder) installGpuPlugin() phase {
return []module.Module{
&gpu.RestartK3sServiceModule{Skip: !(l.runtime.Arg.Kubetype == common.K3s)},
&gpu.InstallPluginModule{Skip: skipGpuPlugin},
&gpu.GetCudaVersionModule{},
}
}

View File

@@ -22,7 +22,7 @@ require (
bytetrade.io/web3os/bfl v0.0.0-00010101000000-000000000000
github.com/Masterminds/semver/v3 v3.4.0
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9
github.com/containerd/containerd v1.7.28
github.com/distribution/distribution/v3 v3.0.0
github.com/dustin/go-humanize v1.0.1
@@ -39,6 +39,7 @@ require (
github.com/libp2p/go-netroute v0.2.2
github.com/mackerelio/go-osstat v0.2.5
github.com/mdlayher/raw v0.1.0
github.com/miekg/dns v1.1.55
github.com/muka/network_manager v0.0.0-20200903202308-ae5ede816e07
github.com/nxadm/tail v1.4.11
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
@@ -56,6 +57,7 @@ require (
golang.org/x/exp v0.0.0-20250819193227-8b4c13bb791b
golang.org/x/sys v0.35.0
k8s.io/api v0.34.1
k8s.io/apiextensions-apiserver v0.34.0
k8s.io/apimachinery v0.34.1
k8s.io/client-go v12.0.0+incompatible
k8s.io/cri-api v0.34.1
@@ -129,7 +131,6 @@ require (
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/mdlayher/packet v0.0.0-20220221164757-67998ac0ff93 // indirect
github.com/mdlayher/socket v0.2.1 // indirect
github.com/miekg/dns v1.1.55 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/spdystream v0.5.0 // indirect
@@ -200,7 +201,6 @@ require (
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
howett.net/plist v1.0.0 // indirect
k8s.io/apiextensions-apiserver v0.34.0 // indirect
k8s.io/apiserver v0.34.0 // indirect
k8s.io/component-base v0.34.1 // indirect
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect

View File

@@ -24,8 +24,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPd
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89 h1:5s9hXV8K3faToQtE9DbiM7O6jt5kIiEsLAaKn6F0UfA=
github.com/beclab/Olares/cli v0.0.0-20251016092744-6241cceceb89/go.mod h1:iEvZxM6PnFxFRppneTzV3hgr2tIxDnsI3dhp4pi7pFg=
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9 h1:YNHfPra2FqsKJ5mAxSWNVIK6VyWygRyZiNwfPqiFxlg=
github.com/beclab/Olares/cli v0.0.0-20251219153848-63d422037cf9/go.mod h1:cYPcuju2yRSp9BQjIN/CC495dDOOvVoL42r/gvFlutk=
github.com/beclab/app-service v0.4.37 h1:gt60wQxgPWMc3oN94TNSdiQAvzqTyCv/OUP93jNSQTY=
github.com/beclab/app-service v0.4.37/go.mod h1:0vEg3rv/DbR7dYznvTlXNXyYNn+TXNMaxz03GQYRWUQ=
github.com/beclab/bfl v0.3.36 h1:PgeSPGc+XoONiwFsKq9xX8rqcL4kVM1G/ut0lYYj/js=