Compare commits

...

3 Commits

Author SHA1 Message Date
hysyeah
a57d3aead8 fix: check resource before resume operation (#2272) 2025-12-18 19:05:01 +08:00
hys
32be988b06 fix: check resource before resume operation 2025-12-18 18:57:37 +08:00
hys
b71626264e fix: update appservice image tag 2025-12-18 14:39:31 +08:00
9 changed files with 118 additions and 34 deletions

View File

@@ -170,7 +170,7 @@ spec:
priorityClassName: "system-cluster-critical" priorityClassName: "system-cluster-critical"
containers: containers:
- name: app-service - name: app-service
image: beclab/app-service:0.4.63 image: beclab/app-service:0.4.65
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
securityContext: securityContext:
runAsUser: 0 runAsUser: 0

View File

@@ -93,6 +93,7 @@ type RequirementResp struct {
Response Response
Resource string `json:"resource"` Resource string `json:"resource"`
Message string `json:"message"` Message string `json:"message"`
Reason string `json:"reason"`
} }
// AppSource describe the source of an application, recommend,model,agent // AppSource describe the source of an application, recommend,model,agent

View File

@@ -343,23 +343,25 @@ func (h *installHandlerHelper) validate(isAdmin bool, installedApps []*v1alpha1.
} }
//resourceType, err := CheckAppRequirement(h.h.kubeConfig, h.token, h.appConfig) //resourceType, err := CheckAppRequirement(h.h.kubeConfig, h.token, h.appConfig)
resourceType, err := apputils.CheckAppRequirement(h.token, h.appConfig) resourceType, resourceConditionType, err := apputils.CheckAppRequirement(h.token, h.appConfig, v1alpha1.InstallOp)
if err != nil { if err != nil {
klog.Errorf("Failed to check app requirement err=%v", err) klog.Errorf("Failed to check app requirement err=%v", err)
h.resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{ h.resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{
Response: api.Response{Code: 400}, Response: api.Response{Code: 400},
Resource: resourceType, Resource: resourceType.String(),
Message: err.Error(), Message: err.Error(),
Reason: resourceConditionType.String(),
}) })
return return
} }
resourceType, err = apputils.CheckUserResRequirement(h.req.Request.Context(), h.appConfig, h.owner) resourceType, resourceConditionType, err = apputils.CheckUserResRequirement(h.req.Request.Context(), h.appConfig, v1alpha1.InstallOp)
if err != nil { if err != nil {
h.resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{ h.resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{
Response: api.Response{Code: 400}, Response: api.Response{Code: 400},
Resource: resourceType, Resource: resourceType.String(),
Message: err.Error(), Message: err.Error(),
Reason: resourceConditionType.String(),
}) })
return return
} }

View File

@@ -1,13 +1,17 @@
package apiserver package apiserver
import ( import (
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"k8s.io/klog/v2"
"net/http"
"strconv" "strconv"
"time" "time"
"bytetrade.io/web3os/app-service/api/app.bytetrade.io/v1alpha1" "bytetrade.io/web3os/app-service/api/app.bytetrade.io/v1alpha1"
"bytetrade.io/web3os/app-service/pkg/apiserver/api" "bytetrade.io/web3os/app-service/pkg/apiserver/api"
"bytetrade.io/web3os/app-service/pkg/appcfg"
"bytetrade.io/web3os/app-service/pkg/appstate" "bytetrade.io/web3os/app-service/pkg/appstate"
"bytetrade.io/web3os/app-service/pkg/constants" "bytetrade.io/web3os/app-service/pkg/constants"
"bytetrade.io/web3os/app-service/pkg/kubesphere" "bytetrade.io/web3os/app-service/pkg/kubesphere"
@@ -99,6 +103,12 @@ func (h *Handler) suspend(req *restful.Request, resp *restful.Response) {
func (h *Handler) resume(req *restful.Request, resp *restful.Response) { func (h *Handler) resume(req *restful.Request, resp *restful.Response) {
app := req.PathParameter(ParamAppName) app := req.PathParameter(ParamAppName)
owner := req.Attribute(constants.UserContextAttribute).(string) owner := req.Attribute(constants.UserContextAttribute).(string)
token, err := h.GetUserServiceAccountToken(req.Request.Context(), owner)
if err != nil {
klog.Error("Failed to get user service account token: ", err)
api.HandleError(resp, req, err)
return
}
name, err := apputils.FmtAppMgrName(app, owner, "") name, err := apputils.FmtAppMgrName(app, owner, "")
if err != nil { if err != nil {
@@ -116,6 +126,36 @@ func (h *Handler) resume(req *restful.Request, resp *restful.Response) {
api.HandleBadRequest(resp, req, fmt.Errorf("%s operation is not allowed for %s state", v1alpha1.ResumeOp, am.Status.State)) api.HandleBadRequest(resp, req, fmt.Errorf("%s operation is not allowed for %s state", v1alpha1.ResumeOp, am.Status.State))
return return
} }
var appCfg *appcfg.ApplicationConfig
err = json.Unmarshal([]byte(am.Spec.Config), &appCfg)
if err != nil {
klog.Errorf("unmarshal to appConfig failed %v", err)
api.HandleError(resp, req, err)
return
}
resourceType, resourceConditionType, err := apputils.CheckAppRequirement(token, appCfg, v1alpha1.ResumeOp)
if err != nil {
klog.Errorf("Failed to check app requirement err=%v", err)
resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{
Response: api.Response{Code: 400},
Resource: resourceType.String(),
Message: err.Error(),
Reason: resourceConditionType.String(),
})
return
}
resourceType, resourceConditionType, err = apputils.CheckUserResRequirement(req.Request.Context(), appCfg, v1alpha1.ResumeOp)
if err != nil {
resp.WriteHeaderAndEntity(http.StatusBadRequest, api.RequirementResp{
Response: api.Response{Code: 400},
Resource: resourceType.String(),
Message: err.Error(),
Reason: resourceConditionType.String(),
})
return
}
am.Spec.OpType = v1alpha1.ResumeOp am.Spec.OpType = v1alpha1.ResumeOp
// if current user is admin, also resume server side // if current user is admin, also resume server side

View File

@@ -1382,12 +1382,12 @@ func (h *Handler) installOpValidate(ctx context.Context, appConfig *appcfg.Appli
if err != nil { if err != nil {
return err return err
} }
_, err = apputils.CheckAppRequirement("", appConfig) _, _, err = apputils.CheckAppRequirement("", appConfig, v1alpha1.InstallOp)
if err != nil { if err != nil {
return err return err
} }
_, err = apputils.CheckUserResRequirement(ctx, appConfig, appConfig.OwnerName) _, _, err = apputils.CheckUserResRequirement(ctx, appConfig, v1alpha1.InstallOp)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -130,9 +130,6 @@ const (
var ( var (
empty = sets.Empty{} empty = sets.Empty{}
// States represents the state for whole application lifecycle.
States = sets.String{"pending": empty, "downloading": empty, "installing": empty, "initializing": empty, "running": empty,
"uninstalling": empty, "upgrading": empty, "suspend": empty, "resuming": empty}
// Sources represents the source of the application. // Sources represents the source of the application.
Sources = sets.String{"market": empty, "custom": empty, "devbox": empty, "system": empty, "unknown": empty} Sources = sets.String{"market": empty, "custom": empty, "devbox": empty, "system": empty, "unknown": empty}
// ResourceTypes represents the type of application system supported. // ResourceTypes represents the type of application system supported.
@@ -152,6 +149,47 @@ var (
OLARES_APP_NAME = "olares-app" OLARES_APP_NAME = "olares-app"
) )
type ResourceConditionType string
const (
DiskPressure ResourceConditionType = "DiskPressure"
SystemCPUPressure ResourceConditionType = "SystemCPUPressure"
SystemMemoryPressure ResourceConditionType = "SystemMemoryPressure"
SystemGPUNotAvailable ResourceConditionType = "SystemGPUNotAvailable"
SystemGPUPressure ResourceConditionType = "SystemGPUPressure"
K8sRequestCPUPressure ResourceConditionType = "K8sReqeustCPUPressure"
K8sRequestMemoryPressure ResourceConditionType = "K8sRequestMemoryPressure"
UserCPUPressure ResourceConditionType = "UserCPUPressure"
UserMemoryPressure ResourceConditionType = "UserMemoryPressure"
DiskPressureMessage string = "Insufficient disk space. Unable to %s the application. Please stop other running applications to free up storage."
SystemCPUPressureMessage string = "Insufficient system CPU. Unable to %s the application. Please stop other running applications to free up resources."
SystemMemoryPressureMessage string = "Insufficient system memory. Unable to %s the application. Please stop other running applications to free up memory."
SystemGPUNotAvailableMessage string = "No available GPU found. Unable to %s the application."
SystemGPUPressureMessage string = "Available GPU is insufficient to %s this application. The requested GPU memory cannot exceed the maximum GPU memory of the node."
K8sRequestCPUPressureMessage string = "Available CPU is insufficient to %s this application. Please stop other applications to free up resources."
K8sRequestMemoryPressureMessage string = "Available memory is insufficient to %s this application. Please stop other applications to free up resources."
UserCPUPressureMessage string = "Insufficient user CPU. Unable to %s the application. Please stop other running applications to free up resources."
UserMemoryPressureMessage string = "Insufficient user memory. Unable to %s the application. Please stop other running applications to free up memory."
)
func (rct ResourceConditionType) String() string {
return string(rct)
}
type ResourceType string
const (
Disk ResourceType = "disk"
CPU ResourceType = "cpu"
Memory ResourceType = "memory"
GPU ResourceType = "gpu"
)
func (rt ResourceType) String() string {
return string(rt)
}
func init() { func init() {
flag.StringVar(&APIServerListenAddress, "listen", ":6755", flag.StringVar(&APIServerListenAddress, "listen", ":6755",
"app-service listening address") "app-service listening address")

View File

@@ -199,7 +199,7 @@ func (imc *ImageManagerClient) PollDownloadProgress(ctx context.Context, am *app
} }
err = imc.updateProgress(ctx, am, &lastProgress, ret*100, am.Spec.OpType == appv1alpha1.UpgradeOp) err = imc.updateProgress(ctx, am, &lastProgress, ret*100, am.Spec.OpType == appv1alpha1.UpgradeOp)
if err == nil && im.Status.State == "completed" { if err == nil {
return nil return nil
} }

View File

@@ -242,6 +242,11 @@ func updateProgress(statuses []StatusInfo, ongoing *jobs, seen map[string]int64,
} }
for _, status := range statuses { for _, status := range statuses {
klog.Infof("status: %s,ref: %v, offset: %v, Total: %v", status.Status, status.Ref, status.Offset, status.Total) klog.Infof("status: %s,ref: %v, offset: %v, Total: %v", status.Status, status.Ref, status.Offset, status.Total)
if !isLayerType(status.Ref) {
statusesLen--
continue
}
if status.Status == "exists" { if status.Status == "exists" {
key := strings.Split(status.Ref, "-")[1] key := strings.Split(status.Ref, "-")[1]
offset += seen[key] offset += seen[key]
@@ -249,10 +254,6 @@ func updateProgress(statuses []StatusInfo, ongoing *jobs, seen map[string]int64,
continue continue
} }
if !isLayerType(status.Ref) {
statusesLen--
continue
}
if status.Status == "done" { if status.Status == "done" {
offset += status.Total offset += status.Total
doneLayer++ doneLayer++
@@ -261,7 +262,7 @@ func updateProgress(statuses []StatusInfo, ongoing *jobs, seen map[string]int64,
offset += status.Offset offset += status.Offset
} }
if doneLayer == statusesLen && doneLayer != 0 { if doneLayer == statusesLen && statusesLen > 0 {
offset = imageSize offset = imageSize
} }
if imageSize != 0 { if imageSize != 0 {

View File

@@ -207,39 +207,41 @@ func CheckUserRole(appConfig *appcfg.ApplicationConfig, owner string) error {
} }
// CheckAppRequirement check if the cluster has enough resources for application install/upgrade. // CheckAppRequirement check if the cluster has enough resources for application install/upgrade.
func CheckAppRequirement(token string, appConfig *appcfg.ApplicationConfig) (string, error) { func CheckAppRequirement(token string, appConfig *appcfg.ApplicationConfig, op v1alpha1.OpType) (constants.ResourceType, constants.ResourceConditionType, error) {
metrics, _, err := GetClusterResource(token) metrics, _, err := GetClusterResource(token)
if err != nil { if err != nil {
return "", err return "", "", err
} }
klog.Infof("start to %s app %s", op, appConfig.AppName)
klog.Infof("Current resource=%s", utils.PrettyJSON(metrics)) klog.Infof("Current resource=%s", utils.PrettyJSON(metrics))
klog.Infof("App required resource=%s", utils.PrettyJSON(appConfig.Requirement)) klog.Infof("App required resource=%s", utils.PrettyJSON(appConfig.Requirement))
if appConfig.Requirement.Disk != nil && if appConfig.Requirement.Disk != nil &&
appConfig.Requirement.Disk.CmpInt64(int64(metrics.Disk.Total*0.9-metrics.Disk.Usage)) > 0 || appConfig.Requirement.Disk.CmpInt64(int64(metrics.Disk.Total*0.9-metrics.Disk.Usage)) > 0 ||
int64(metrics.Disk.Total*0.9-metrics.Disk.Usage) < 5*1024*1024*1024 { int64(metrics.Disk.Total*0.9-metrics.Disk.Usage) < 5*1024*1024*1024 {
return "disk", errors.New("The app's DISK requirement cannot be satisfied") return constants.Disk, constants.DiskPressure, fmt.Errorf(constants.DiskPressureMessage, op)
} }
if appConfig.Requirement.Memory != nil && if appConfig.Requirement.Memory != nil &&
appConfig.Requirement.Memory.CmpInt64(int64(metrics.Memory.Total*0.9-metrics.Memory.Usage)) > 0 { appConfig.Requirement.Memory.CmpInt64(int64(metrics.Memory.Total*0.9-metrics.Memory.Usage)) > 0 {
return "memory", errors.New("The app's MEMORY requirement cannot be satisfied") return constants.Memory, constants.SystemMemoryPressure, fmt.Errorf(constants.SystemMemoryPressureMessage, op)
} }
if appConfig.Requirement.CPU != nil { if appConfig.Requirement.CPU != nil {
availableCPU, _ := resource.ParseQuantity(strconv.FormatFloat(metrics.CPU.Total*0.9-metrics.CPU.Usage, 'f', -1, 64)) availableCPU, _ := resource.ParseQuantity(strconv.FormatFloat(metrics.CPU.Total*0.9-metrics.CPU.Usage, 'f', -1, 64))
if appConfig.Requirement.CPU.Cmp(availableCPU) > 0 { if appConfig.Requirement.CPU.Cmp(availableCPU) > 0 {
return "cpu", errors.New("The app's CPU requirement cannot be satisfied") return constants.CPU, constants.SystemCPUPressure, fmt.Errorf(constants.SystemCPUPressureMessage, op)
} }
} }
if appConfig.Requirement.GPU != nil { if appConfig.Requirement.GPU != nil {
if !appConfig.Requirement.GPU.IsZero() && metrics.GPU.Total <= 0 { if !appConfig.Requirement.GPU.IsZero() && metrics.GPU.Total <= 0 {
return "gpu", errors.New("The app's GPU requirement cannot be satisfied") return constants.GPU, constants.SystemGPUNotAvailable, fmt.Errorf(constants.SystemGPUNotAvailableMessage, op)
} }
nodes, err := utils.GetNodeInfo(context.TODO()) nodes, err := utils.GetNodeInfo(context.TODO())
if err != nil { if err != nil {
klog.Errorf("failed to get node info %v", err) klog.Errorf("failed to get node info %v", err)
return "", err return "", "", err
} }
klog.Infof("nodes info: %#v", nodes) klog.Infof("nodes info: %#v", nodes)
var maxNodeGPUMem int64 var maxNodeGPUMem int64
@@ -254,13 +256,13 @@ func CheckAppRequirement(token string, appConfig *appcfg.ApplicationConfig) (str
} }
if appConfig.Requirement.GPU.CmpInt64(maxNodeGPUMem) > 0 { if appConfig.Requirement.GPU.CmpInt64(maxNodeGPUMem) > 0 {
return "gpu", errors.New("The app's GPU requirement cannot found satisfied node") return constants.GPU, constants.SystemGPUPressure, fmt.Errorf(constants.SystemGPUPressureMessage, op)
} }
} }
allocatedResources, err := getRequestResources() allocatedResources, err := getRequestResources()
if err != nil { if err != nil {
return "", err return "", "", err
} }
if len(allocatedResources) == 1 { if len(allocatedResources) == 1 {
sufficientCPU, sufficientMemory := false, false sufficientCPU, sufficientMemory := false, false
@@ -283,14 +285,14 @@ func CheckAppRequirement(token string, appConfig *appcfg.ApplicationConfig) (str
} }
} }
if !sufficientCPU { if !sufficientCPU {
return "cpu", errors.New("The app's CPU requirement specified in the kubernetes requests cannot be satisfied") return constants.CPU, constants.K8sRequestCPUPressure, fmt.Errorf(constants.K8sRequestCPUPressureMessage, op)
} }
if !sufficientMemory { if !sufficientMemory {
return "memory", errors.New("The app's MEMORY requirement specified in the kubernetes requests cannot be satisfied") return constants.Memory, constants.K8sRequestMemoryPressure, fmt.Errorf(constants.K8sRequestMemoryPressureMessage, op)
} }
} }
return "", nil return "", "", nil
} }
func getRequestResources() (map[string]resources, error) { func getRequestResources() (map[string]resources, error) {
@@ -450,22 +452,22 @@ func getValue(m *kubesphere.Metric) float64 {
} }
// CheckUserResRequirement check if the user has enough resources for application install/upgrade. // CheckUserResRequirement check if the user has enough resources for application install/upgrade.
func CheckUserResRequirement(ctx context.Context, appConfig *appcfg.ApplicationConfig, username string) (string, error) { func CheckUserResRequirement(ctx context.Context, appConfig *appcfg.ApplicationConfig, op v1alpha1.OpType) (constants.ResourceType, constants.ResourceConditionType, error) {
metrics, err := prometheus.GetCurUserResource(ctx, username) metrics, err := prometheus.GetCurUserResource(ctx, appConfig.OwnerName)
if err != nil { if err != nil {
return "", err return "", "", err
} }
switch { switch {
case appConfig.Requirement.Memory != nil && metrics.Memory.Total != 0 && case appConfig.Requirement.Memory != nil && metrics.Memory.Total != 0 &&
appConfig.Requirement.Memory.CmpInt64(int64(metrics.Memory.Total*0.9-metrics.Memory.Usage)) > 0: appConfig.Requirement.Memory.CmpInt64(int64(metrics.Memory.Total*0.9-metrics.Memory.Usage)) > 0:
return "memory", errors.New("The user's app MEMORY requirement cannot be satisfied") return constants.Memory, constants.UserMemoryPressure, fmt.Errorf(constants.UserMemoryPressureMessage, op)
case appConfig.Requirement.CPU != nil && metrics.CPU.Total != 0: case appConfig.Requirement.CPU != nil && metrics.CPU.Total != 0:
availableCPU, _ := resource.ParseQuantity(strconv.FormatFloat(metrics.CPU.Total*0.9-metrics.CPU.Usage, 'f', -1, 64)) availableCPU, _ := resource.ParseQuantity(strconv.FormatFloat(metrics.CPU.Total*0.9-metrics.CPU.Usage, 'f', -1, 64))
if appConfig.Requirement.CPU.Cmp(availableCPU) > 0 { if appConfig.Requirement.CPU.Cmp(availableCPU) > 0 {
return "cpu", errors.New("The user's app CPU requirement cannot be satisfied") return constants.CPU, constants.UserCPUPressure, fmt.Errorf(constants.UserCPUPressureMessage, op)
} }
} }
return "", nil return "", "", nil
} }
func CheckMiddlewareRequirement(ctx context.Context, ctrlClient client.Client, middleware *tapr.Middleware) (bool, error) { func CheckMiddlewareRequirement(ctx context.Context, ctrlClient client.Client, middleware *tapr.Middleware) (bool, error) {