feat(search): add ocis search optimize CLI command (#12136)

Add a CLI-only optimize command that compacts the Bleve search index by
merging segments. The command opens the index directly via a new engine
factory (NewEngineFromConfig), without requiring the running gRPC service.

Key changes per review feedback from jvillafanez:
- CLI-only: no gRPC endpoint, no proto changes — admin triggers it
  directly when disruption is acceptable
- Engine factory: extracted engine creation from the gRPC handler into
  NewEngineFromConfig, reusable by both the service and CLI commands
- Optimize() merged into Engine interface: no separate Optimizer
  interface, non-supporting engines can return an error

Signed-off-by: Paul Faure <paul@faure.ca>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Martin <github@diemattels.at>
This commit is contained in:
Paul Faure
2026-04-17 05:16:03 -04:00
committed by GitHub
parent 196664c0f0
commit a17760898c
8 changed files with 136 additions and 36 deletions

View File

@@ -0,0 +1,12 @@
Enhancement: Add `ocis search optimize` CLI command
Added a new `ocis search optimize` command that compacts the search index
by merging Bleve segments, without re-indexing content. The command opens
the index directly (without requiring the search service to be running),
making it safe to run during maintenance windows without blocking search
queries.
This is useful after bulk reindexing operations that create many small
index segments, which can degrade search performance over time.
https://github.com/owncloud/ocis/pull/12136

View File

@@ -0,0 +1,40 @@
package command
import (
"context"
"fmt"
"github.com/urfave/cli/v2"
"github.com/owncloud/ocis/v2/ocis-pkg/config/configlog"
"github.com/owncloud/ocis/v2/services/search/pkg/config"
"github.com/owncloud/ocis/v2/services/search/pkg/config/parser"
"github.com/owncloud/ocis/v2/services/search/pkg/engine"
)
// Optimize is the entrypoint for the optimize command.
func Optimize(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "optimize",
Usage: "compact the search index by merging segments, without re-indexing content",
Category: "index management",
Before: func(_ *cli.Context) error {
return configlog.ReturnFatal(parser.ParseConfig(cfg))
},
Action: func(_ *cli.Context) error {
eng, closer, err := engine.NewEngineFromConfig(cfg)
if err != nil {
return err
}
defer closer.Close()
fmt.Println("optimizing search index...")
if err := eng.Optimize(context.Background()); err != nil {
fmt.Println("failed to optimize index: " + err.Error())
return err
}
fmt.Println("index optimization complete")
return nil
},
}
}

View File

@@ -17,6 +17,7 @@ func GetCommands(cfg *config.Config) cli.Commands {
// interaction with this service
Index(cfg),
Optimize(cfg),
// infos about this service
Health(cfg),

View File

@@ -3,6 +3,8 @@ package engine
import (
"context"
"errors"
"fmt"
"io"
"regexp"
"github.com/blevesearch/bleve/v2/search"
@@ -10,7 +12,10 @@ import (
searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0"
searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0"
bleveEngine "github.com/owncloud/ocis/v2/services/search/pkg/engine/bleve"
"github.com/owncloud/ocis/v2/services/search/pkg/config"
"github.com/owncloud/ocis/v2/services/search/pkg/content"
"github.com/owncloud/ocis/v2/services/search/pkg/query/bleve"
)
// ErrResourceNotFound is returned when a resource is not present in the index.
@@ -29,12 +34,33 @@ type Engine interface {
Restore(id string) error
Purge(id string) error
DocCount() (uint64, error)
Optimize(ctx context.Context) error
}
// Optimizer is an optional interface that Engine implementations may support
// to trigger index compaction. Callers should type-assert before use.
type Optimizer interface {
Optimize(ctx context.Context) error
// NewEngineFromConfig creates an Engine from the search service configuration.
// The returned io.Closer must be called to release the underlying index
// resources. This factory is used by CLI commands that need direct engine
// access without starting the full gRPC service.
func NewEngineFromConfig(cfg *config.Config) (Engine, io.Closer, error) {
switch cfg.Engine.Type {
case "bleve":
bleveMapping, err := BuildBleveMapping()
if err != nil {
return nil, nil, err
}
var indexGetter bleveEngine.IndexGetter
indexGetter = bleveEngine.NewIndexGetterPersistent(cfg.Engine.Bleve.Datapath, bleveMapping)
if cfg.Engine.Bleve.Scale {
indexGetter = bleveEngine.NewIndexGetterPersistentScale(cfg.Engine.Bleve.Datapath, bleveMapping)
}
eng := NewBleveEngine(indexGetter, bleve.DefaultCreator)
return eng, eng, nil
default:
return nil, nil, fmt.Errorf("unknown search engine: %s", cfg.Engine.Type)
}
}
// Resource is the entity that is stored in the index.

View File

@@ -125,6 +125,52 @@ func (_c *Engine_DocCount_Call) RunAndReturn(run func() (uint64, error)) *Engine
return _c
}
// Optimize provides a mock function with given fields: ctx
func (_m *Engine) Optimize(ctx context.Context) error {
ret := _m.Called(ctx)
if len(ret) == 0 {
panic("no return value specified for Optimize")
}
var r0 error
if rf, ok := ret.Get(0).(func(context.Context) error); ok {
r0 = rf(ctx)
} else {
r0 = ret.Error(0)
}
return r0
}
// Engine_Optimize_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Optimize'
type Engine_Optimize_Call struct {
*mock.Call
}
// Optimize is a helper method to define mock.On call
// - ctx context.Context
func (_e *Engine_Expecter) Optimize(ctx interface{}) *Engine_Optimize_Call {
return &Engine_Optimize_Call{Call: _e.mock.On("Optimize", ctx)}
}
func (_c *Engine_Optimize_Call) Run(run func(ctx context.Context)) *Engine_Optimize_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context))
})
return _c
}
func (_c *Engine_Optimize_Call) Return(_a0 error) *Engine_Optimize_Call {
_c.Call.Return(_a0)
return _c
}
func (_c *Engine_Optimize_Call) RunAndReturn(run func(context.Context) error) *Engine_Optimize_Call {
_c.Call.Return(run)
return _c
}
// Lookup provides a mock function with given fields: id
func (_m *Engine) Lookup(id string) (*engine.Resource, error) {
ret := _m.Called(id)

View File

@@ -491,13 +491,6 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
logDocCount(s.engine, s.logger)
if opt, ok := s.engine.(engine.Optimizer); ok {
s.logger.Info().Msg("optimizing search index after space walk")
if err := opt.Optimize(ownerCtx); err != nil {
s.logger.Warn().Err(err).Msg("index optimization failed")
}
}
return nil
}

View File

@@ -147,6 +147,7 @@ var _ = Describe("Searchprovider", func() {
Status: status.NewOK(ctx),
}, nil)
indexClient.On("DocCount").Return(uint64(1), nil)
indexClient.On("Optimize", mock.Anything).Return(nil)
})
Describe("New", func() {

View File

@@ -30,8 +30,6 @@ import (
"github.com/owncloud/ocis/v2/services/search/pkg/config"
"github.com/owncloud/ocis/v2/services/search/pkg/content"
"github.com/owncloud/ocis/v2/services/search/pkg/engine"
bleveEngine "github.com/owncloud/ocis/v2/services/search/pkg/engine/bleve"
"github.com/owncloud/ocis/v2/services/search/pkg/query/bleve"
"github.com/owncloud/ocis/v2/services/search/pkg/search"
)
@@ -43,29 +41,12 @@ func NewHandler(opts ...Option) (searchsvc.SearchProviderHandler, func(), error)
cfg := options.Config
// initialize search engine
var eng engine.Engine
switch cfg.Engine.Type {
case "bleve":
bleveMapping, err := engine.BuildBleveMapping()
if err != nil {
return nil, teardown, err
}
var indexGetter bleveEngine.IndexGetter
indexGetter = bleveEngine.NewIndexGetterPersistent(cfg.Engine.Bleve.Datapath, bleveMapping)
if cfg.Engine.Bleve.Scale {
indexGetter = bleveEngine.NewIndexGetterPersistentScale(cfg.Engine.Bleve.Datapath, bleveMapping)
}
bleveEngine := engine.NewBleveEngine(indexGetter, bleve.DefaultCreator)
teardown = func() {
_ = bleveEngine.Close()
}
eng = bleveEngine
default:
return nil, teardown, fmt.Errorf("unknown search engine: %s", cfg.Engine.Type)
eng, engCloser, err := engine.NewEngineFromConfig(cfg)
if err != nil {
return nil, teardown, err
}
teardown = func() {
_ = engCloser.Close()
}
// initialize gateway