all repos — searchix @ 7c0e3729dd314a571fd635408fb89f24199e00b3

Search engine for NixOS, nix-darwin, home-manager and NUR users

feat: revert back to using index as storage

Alan Pearce
commit

7c0e3729dd314a571fd635408fb89f24199e00b3

parent

e0bbccf0b9c5e43bfa2ef02a5bb33c27b8bf5d00

1 file changed, 44 insertions(+), 8 deletions(-)

changed files
M internal/index/indexer.gointernal/index/indexer.go
@@ -1,19 +1,21 @@
package index import ( + "bytes" "context" + "encoding/gob" "math" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index/nixattr" "alin.ovh/searchix/internal/nix" - "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" "github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
@@ -23,7 +25,9 @@ "github.com/blevesearch/bleve/v2/analysis/token/ngram"
"github.com/blevesearch/bleve/v2/analysis/token/porter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" + "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/mapping" + index "github.com/blevesearch/bleve_index_api" "go.uber.org/zap" )
@@ -31,7 +35,6 @@ type Options struct {
Force bool LowMemory bool BatchSize int - Store *storage.Store Logger *log.Logger Root *file.Root Config *config.Config
@@ -41,7 +44,6 @@ type WriteIndex struct {
batchSize int index bleve.Index log *log.Logger - store *storage.Store exists bool Meta *Meta }
@@ -49,6 +51,8 @@
type BatchError struct { error } + +var idAnalyzer analysis.Analyzer func createIndexMapping() (mapping.IndexMapping, error) { indexMapping := bleve.NewIndexMapping()
@@ -284,7 +288,6 @@
return &ReadIndex{ config: options.Config, log: options.Logger, - store: options.Store, exists: exists, index: idx, meta: meta,
@@ -294,7 +297,6 @@ exists: exists,
index: idx, batchSize: options.BatchSize, log: options.Logger, - store: options.Store, Meta: meta, }, nil
@@ -312,8 +314,30 @@ func (i *WriteIndex) Import(
ctx context.Context, objects <-chan nix.Importable, ) <-chan error { - return i.WithBatch(ctx, objects, func(batch *bleve.Batch, obj nix.Importable) error { - if err := batch.Index(nix.GetKey(obj), obj); err != nil { + indexMapping := i.index.Mapping() + + return i.WithBatchObjects(ctx, objects, func(batch *bleve.Batch, obj nix.Importable) error { + doc := document.NewDocument(nix.GetKey(obj)) + if err := indexMapping.MapDocument(doc, obj); err != nil { + return fault.Wrap(err, fmsg.Withf("could not map document for object: %s", obj.GetName())) + } + + var data bytes.Buffer + enc := gob.NewEncoder(&data) + if err := enc.Encode(&obj); err != nil { + return fault.Wrap(err, fmsg.With("could not store object in search index")) + } + field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField) + doc.AddField(field) + idField := document.NewTextFieldCustom( + "_id", nil, []byte(doc.ID()), + index.IndexField|index.StoreField|index.IncludeTermVectors, + idAnalyzer, + ) + doc.AddField(idField) + + // log.Debug("adding object to index", "name", opt.Name) + if err := batch.IndexAdvanced(doc); err != nil { return fault.Wrap(err, fmsg.Withf("could not index object %s", obj.GetName())) }
@@ -325,7 +349,19 @@ func (i *WriteIndex) GetBatchSize() int {
return i.batchSize } -func (i *WriteIndex) WithBatch( +func (i *WriteIndex) WithBatch(fn func(batch *bleve.Batch)) error { + batch := i.index.NewBatch() + fn(batch) + + err := i.Flush(batch) + if err != nil { + return fault.Wrap(err, fmsg.With("could not flush batch")) + } + + return nil +} + +func (i *WriteIndex) WithBatchObjects( ctx context.Context, objects <-chan nix.Importable, processor func(batch *bleve.Batch, obj nix.Importable) error,