mirror of
https://github.com/owncloud/ocis
synced 2026-04-25 17:25:21 +02:00
Merge pull request #12103 from paul43210/fix/search-indexspace-docid-lookup
perf(search): use O(1) DocID lookup instead of full search in IndexSpace
This commit is contained in:
11
changelog/unreleased/fix-search-indexspace-docid-lookup.md
Normal file
11
changelog/unreleased/fix-search-indexspace-docid-lookup.md
Normal file
@@ -0,0 +1,11 @@
|
||||
Bugfix: Use O(1) document lookup instead of full search during reindexing
|
||||
|
||||
The `IndexSpace` bulk reindexer was using a full KQL search query per file
|
||||
to check whether re-extraction was needed. On large indexes this query
|
||||
took 600–950ms each, making a 61,000-file space take ~13.5 hours just to
|
||||
walk. Replaced the per-file `Search()` call with an O(1) `Lookup()` using
|
||||
Bleve's `DocIDQuery`, then comparing mtime and extraction status in memory.
|
||||
This reduces per-file check time from ~800ms to <1ms.
|
||||
|
||||
https://github.com/owncloud/ocis/pull/12096
|
||||
https://github.com/owncloud/ocis/issues/12093
|
||||
@@ -317,6 +317,18 @@ func (b *Bleve) Update(id string, mutateFn func(*Resource)) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Lookup retrieves a resource by its document ID using an O(1) DocIDQuery.
|
||||
// Returns ErrResourceNotFound if the resource is not in the index.
|
||||
func (b *Bleve) Lookup(id string) (*Resource, error) {
|
||||
bleveIndex, closeFn, err := b.indexGetter.GetIndex(bleveEngine.ReadOnly(true))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer closeFn()
|
||||
|
||||
return b.getResource(bleveIndex, id)
|
||||
}
|
||||
|
||||
// Move updates the resource location and all of its necessary fields.
|
||||
func (b *Bleve) Move(id string, parentid string, target string) error {
|
||||
bleveIndex, closeFn, err := b.indexGetter.GetIndex()
|
||||
|
||||
@@ -23,6 +23,7 @@ type Engine interface {
|
||||
Search(ctx context.Context, req *searchService.SearchIndexRequest) (*searchService.SearchIndexResponse, error)
|
||||
Upsert(id string, r Resource) error
|
||||
Update(id string, mutateFn func(*Resource)) error
|
||||
Lookup(id string) (*Resource, error)
|
||||
Move(id string, parentid string, target string) error
|
||||
Delete(id string) error
|
||||
Restore(id string) error
|
||||
|
||||
@@ -125,6 +125,67 @@ func (_c *Engine_DocCount_Call) RunAndReturn(run func() (uint64, error)) *Engine
|
||||
return _c
|
||||
}
|
||||
|
||||
// Lookup provides a mock function with given fields: id
|
||||
func (_m *Engine) Lookup(id string) (*engine.Resource, error) {
|
||||
ret := _m.Called(id)
|
||||
|
||||
if len(ret) == 0 {
|
||||
panic("no return value specified for Lookup")
|
||||
}
|
||||
|
||||
var r0 *engine.Resource
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(0).(func(string) (*engine.Resource, error)); ok {
|
||||
return rf(id)
|
||||
}
|
||||
if rf, ok := ret.Get(0).(func(string) *engine.Resource); ok {
|
||||
r0 = rf(id)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(*engine.Resource)
|
||||
}
|
||||
}
|
||||
|
||||
if rf, ok := ret.Get(1).(func(string) error); ok {
|
||||
r1 = rf(id)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// Engine_Lookup_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Lookup'
|
||||
type Engine_Lookup_Call struct {
|
||||
*mock.Call
|
||||
}
|
||||
|
||||
// Lookup is a helper method to define mock.On call
|
||||
// - id string
|
||||
func (_e *Engine_Expecter) Lookup(id interface{}) *Engine_Lookup_Call {
|
||||
return &Engine_Lookup_Call{Call: _e.mock.On("Lookup", id)}
|
||||
}
|
||||
|
||||
// Run sets a handler to be called when the Lookup mock is matched.
|
||||
func (_c *Engine_Lookup_Call) Run(run func(id string)) *Engine_Lookup_Call {
|
||||
_c.Call.Run(func(args mock.Arguments) {
|
||||
run(args[0].(string))
|
||||
})
|
||||
return _c
|
||||
}
|
||||
|
||||
// Return specifies the return values for the Lookup mock.
|
||||
func (_c *Engine_Lookup_Call) Return(_a0 *engine.Resource, _a1 error) *Engine_Lookup_Call {
|
||||
_c.Call.Return(_a0, _a1)
|
||||
return _c
|
||||
}
|
||||
|
||||
// RunAndReturn sets a handler that is called and whose return values are used as the mock's return values.
|
||||
func (_c *Engine_Lookup_Call) RunAndReturn(run func(string) (*engine.Resource, error)) *Engine_Lookup_Call {
|
||||
_c.Call.Return(run)
|
||||
return _c
|
||||
}
|
||||
|
||||
// Move provides a mock function with given fields: id, parentid, target
|
||||
func (_m *Engine) Move(id string, parentid string, target string) error {
|
||||
ret := _m.Called(id, parentid, target)
|
||||
|
||||
@@ -462,17 +462,19 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
|
||||
}
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("Walking tree")
|
||||
|
||||
searchRes, err := s.engine.Search(ownerCtx, &searchsvc.SearchIndexRequest{
|
||||
Query: "id:" + storagespace.FormatResourceID(info.Id) + ` mtime>=` + utils.TSToTime(info.Mtime).Format(time.RFC3339Nano) + ` Extracted:true`,
|
||||
})
|
||||
|
||||
if err == nil && len(searchRes.Matches) >= 1 {
|
||||
if info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER {
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("subtree hasn't changed. Skipping.")
|
||||
return filepath.SkipDir
|
||||
resourceID := storagespace.FormatResourceID(info.Id)
|
||||
r, err := s.engine.Lookup(resourceID)
|
||||
if err == nil && r.Extracted {
|
||||
fileMtime := utils.TSToTime(info.Mtime)
|
||||
docMtime, parseErr := time.Parse(time.RFC3339Nano, r.Mtime)
|
||||
if parseErr == nil && !docMtime.Before(fileMtime) {
|
||||
if info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER {
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("subtree hasn't changed. Skipping.")
|
||||
return filepath.SkipDir
|
||||
}
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("element hasn't changed. Skipping.")
|
||||
return nil
|
||||
}
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("element hasn't changed. Skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
s.UpsertItem(ref)
|
||||
|
||||
@@ -163,7 +163,7 @@ var _ = Describe("Searchprovider", func() {
|
||||
indexClient.On("Upsert", mock.Anything, mock.MatchedBy(func(r engine.Resource) bool {
|
||||
return r.ID == "storageid$spaceid!opaqueid" && r.Path == "./foo.pdf"
|
||||
})).Return(nil)
|
||||
indexClient.On("Search", mock.Anything, mock.Anything).Return(&searchsvc.SearchIndexResponse{}, nil)
|
||||
indexClient.On("Lookup", mock.Anything).Return(nil, engine.ErrResourceNotFound)
|
||||
gatewayClient.On("Stat", mock.Anything, mock.MatchedBy(func(sreq *sprovider.StatRequest) bool {
|
||||
return sreq.Ref.ResourceId.StorageId == "storageid" &&
|
||||
sreq.Ref.ResourceId.OpaqueId == "spaceid" &&
|
||||
|
||||
Reference in New Issue
Block a user