mirror of
https://github.com/owncloud/ocis
synced 2026-04-25 17:25:21 +02:00
Merge pull request #12104 from paul43210/fix/search-bleve-optimize
enhancement(search): optimize bleve index after bulk reindexing
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
Enhancement: Optimize search index after bulk reindexing
|
||||
|
||||
After an `IndexSpace` walk completes, the search engine now triggers a
|
||||
segment merge (compaction) on the bleve index. Over time, writes create
|
||||
multiple index segments that degrade query performance. The new
|
||||
`Optimize()` method calls bleve's `ForceMerge` to consolidate all
|
||||
segments into one, improving subsequent search and lookup speed. This is
|
||||
especially beneficial after bulk reindexing large spaces.
|
||||
|
||||
https://github.com/owncloud/ocis/pull/12104
|
||||
https://github.com/owncloud/ocis/issues/12093
|
||||
@@ -2,6 +2,7 @@ package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"reflect"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/porter"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
|
||||
@@ -402,6 +404,33 @@ func (b *Bleve) Purge(id string) error {
|
||||
return bleveIndex.Delete(id)
|
||||
}
|
||||
|
||||
// Optimize triggers a force merge of the bleve index segments into a single
|
||||
// segment, improving query performance. This is an expensive I/O operation
|
||||
// and should be called during low-usage periods (e.g., after bulk indexing).
|
||||
func (b *Bleve) Optimize(ctx context.Context) error {
|
||||
bleveIndex, closeFn, err := b.indexGetter.GetIndex()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer closeFn()
|
||||
|
||||
internal, err := bleveIndex.Advanced()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to access internal index: %w", err)
|
||||
}
|
||||
|
||||
type forceMerger interface {
|
||||
ForceMerge(ctx context.Context, mo *mergeplan.MergePlanOptions) error
|
||||
}
|
||||
|
||||
fm, ok := internal.(forceMerger)
|
||||
if !ok {
|
||||
return fmt.Errorf("index implementation does not support force merge")
|
||||
}
|
||||
|
||||
return fm.ForceMerge(ctx, nil)
|
||||
}
|
||||
|
||||
// DocCount returns the number of resources in the index.
|
||||
func (b *Bleve) DocCount() (uint64, error) {
|
||||
bleveIndex, closeFn, err := b.indexGetter.GetIndex(bleveEngine.ReadOnly(true))
|
||||
|
||||
@@ -30,6 +30,12 @@ type Engine interface {
|
||||
DocCount() (uint64, error)
|
||||
}
|
||||
|
||||
// Optimizer is an optional interface that Engine implementations may support
|
||||
// to trigger index compaction. Callers should type-assert before use.
|
||||
type Optimizer interface {
|
||||
Optimize(ctx context.Context) error
|
||||
}
|
||||
|
||||
// Resource is the entity that is stored in the index.
|
||||
type Resource struct {
|
||||
content.Document
|
||||
|
||||
@@ -486,6 +486,13 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
|
||||
|
||||
logDocCount(s.engine, s.logger)
|
||||
|
||||
if opt, ok := s.engine.(engine.Optimizer); ok {
|
||||
s.logger.Info().Msg("optimizing search index after space walk")
|
||||
if err := opt.Optimize(ownerCtx); err != nil {
|
||||
s.logger.Warn().Err(err).Msg("index optimization failed")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user