all repos — searchix @ 7c0e3729dd314a571fd635408fb89f24199e00b3

Search engine for NixOS, nix-darwin, home-manager and NUR users

feat: revert back to using index as storage

Alan Pearce
commit

7c0e3729dd314a571fd635408fb89f24199e00b3

parent

e0bbccf0b9c5e43bfa2ef02a5bb33c27b8bf5d00

1 file changed, 168 insertions(+), 170 deletions(-)

changed files
M internal/importer/main.gointernal/importer/main.go
@@ -12,7 +12,6 @@
"alin.ovh/x/log" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" - "github.com/asdine/storm/v3/q" "github.com/blevesearch/bleve/v2" "alin.ovh/searchix/internal/config"
@@ -20,19 +19,17 @@ "alin.ovh/searchix/internal/fetcher"
"alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/manpages" - "alin.ovh/searchix/internal/nix" "alin.ovh/searchix/internal/programs" - "alin.ovh/searchix/internal/storage" ) type Options struct { LowMemory bool Offline bool Logger *log.Logger + ReadIndex *index.ReadIndex WriteIndex *index.WriteIndex Manpages *manpages.URLMap Root *file.Root - Storage *storage.Store } type Importer struct {
@@ -55,8 +52,7 @@
func (imp *Importer) Fetch( ctx context.Context, forceUpdate bool, - fetchOnly bool, - onlyUpdateSources *[]string, + onlyUpdateSources []string, ) error { if len(imp.config.Importer.Sources) == 0 { imp.options.Logger.Info("No sources enabled")
@@ -71,14 +67,14 @@ imp.config.Importer.Timeout.Duration,
) defer cancelImport() - forceUpdate = forceUpdate || (onlyUpdateSources != nil && len(*onlyUpdateSources) > 0) + forceUpdate = forceUpdate || (len(onlyUpdateSources) > 0) meta := imp.options.WriteIndex.Meta - importSource := imp.createSourceImporter(importCtx, meta, forceUpdate, fetchOnly) + importSource := imp.createSourceFetcher(importCtx, meta, forceUpdate) for name, source := range imp.config.Importer.Sources { - if onlyUpdateSources != nil && len(*onlyUpdateSources) > 0 { - if !slices.Contains(*onlyUpdateSources, name) { + if len(onlyUpdateSources) > 0 { + if !slices.Contains(onlyUpdateSources, name) { continue } }
@@ -91,15 +87,32 @@
return nil } -func (imp *Importer) Index(ctx context.Context) error { +func (imp *Importer) Index(ctx context.Context, onlyUpdateSources []string) error { + if len(imp.config.Importer.Sources) == 0 { + imp.options.Logger.Info("No sources enabled") + + return nil + } + + imp.options.Logger.Debug("starting importer", "timeout", imp.config.Importer.Timeout.Duration) + importCtx, cancelImport := context.WithTimeout( + ctx, + imp.config.Importer.Timeout.Duration, + ) + defer cancelImport() + + meta := imp.options.WriteIndex.Meta + + importSource := imp.createSourceImporter(importCtx, meta) for name, source := range imp.config.Importer.Sources { - hadErrors, err := imp.indexSource(ctx, source) + if len(onlyUpdateSources) > 0 { + if !slices.Contains(onlyUpdateSources, name) { + continue + } + } + err := importSource(source) if err != nil { - return fault.Wrap(err, fmsg.Withf("Failed to import source %s", name)) - } - - if hadErrors { - imp.options.Logger.Warn("Imported source encountered errors", "source", source.Name) + imp.options.Logger.Error("import failed", "source", name, "error", err) } }
@@ -130,8 +143,7 @@ imp.options.Logger.Info("adding new sources", "sources", newSources)
err := imp.Fetch( ctx, false, - false, - &newSources, + newSources, ) if err != nil { return fault.Wrap(err, fmsg.With("Failed to update index with new sources"))
@@ -162,76 +174,45 @@
return nil } -func (imp *Importer) PruneSource(ctx context.Context, source *config.Source) error { - store := imp.options.Storage +func (imp *Importer) PruneSource( + _ context.Context, + source *config.Source, +) error { + read := imp.options.ReadIndex write := imp.options.WriteIndex - tx, err := store.WithBatch(true).From(source.Key).Begin(true) - if err != nil { - return fault.Wrap(err, fmsg.With("Failed to begin transaction")) + if read == nil { + imp.options.Logger.DPanic("read index is not available") } - defer tx.Rollback() cutoff := write.Meta.LastImport.StartedAt imp.options.Logger.Debug("searching for old entities", "cutoff", cutoff.Format(time.RFC3339)) - query := tx.Select(q.Lt("ImportedAt", cutoff)) - - var obj nix.Importable - switch source.Importer { - case config.Options: - obj = new(nix.Option) - case config.Packages: - obj = new(nix.Package) - } - count, err := query.Count(obj) + maxCount, err := read.Count(source) if err != nil { return fault.Wrap(err, fmsg.With("failed to retrieve entities for pruning")) } - if count == 0 { - return nil - } - - maxCount, err := tx.Count(obj) + res, err := read.ImportedBefore(cutoff, source) if err != nil { return fault.Wrap(err, fmsg.With("failed to retrieve entities for pruning")) } - if float64(count) > (0.9 * float64(maxCount)) { - return fault.Newf("too many entities to prune: %d/%d (threshold: 90%%)", count, maxCount) + if res.Total == 0 { + return nil } - objs := make(chan nix.Importable, 1) - errs := write.WithBatch(ctx, objs, func(batch *bleve.Batch, obj nix.Importable) error { - batch.Delete(obj.GetName()) + if float64(res.Total) > (0.9 * float64(maxCount)) { + return fault.Newf("too many entities to prune: %d/%d (threshold: 90%%)", res.Total, maxCount) + } - return nil - }) - - go func() { - for err := range errs { - imp.options.Logger.Error("failed to prune old entities", "error", err) + err = write.WithBatch(func(batch *bleve.Batch) { + for _, dm := range res.Hits { + batch.Delete(dm.ID) } - }() - - err = query.Each(obj, func(record any) error { - objs <- record.(nix.Importable) - - return nil }) if err != nil { - return fault.Wrap(err, fmsg.With("failed to prune old entities from index")) - } - - err = query.Delete(obj) - if err != nil { - return fault.Wrap(err, fmsg.With("failed to prune old entities from storage")) - } - - err = tx.Commit() - if err != nil { - return fault.Wrap(err, fmsg.With("Failed to commit transaction")) + return fault.Wrap(err, fmsg.With("failed to prune entities")) } imp.options.Logger.Info(
@@ -241,21 +222,18 @@ source.Importer.String(),
"source", source.Key, "count", - count, + res.Total, ) return nil } -func (imp *Importer) createSourceImporter( +func (imp *Importer) createSourceFetcher( parent context.Context, meta *index.Meta, forceUpdate bool, - fetchOnly bool, ) func(*config.Source) error { return func(source *config.Source) error { - var files *fetcher.FetchedFiles - logger := imp.options.Logger.With("name", source.Key) pdb, err := programs.New(source, &programs.Options{ Logger: logger,
@@ -276,124 +254,144 @@
ctx, cancel := context.WithTimeout(parent, source.Timeout.Duration) defer cancel() - if imp.options.Offline { - logger.Debug("skipping fetch; in offline mode") + logger.Debug("starting fetcher") - files, err = fetcher.Open(source, fopts) - if err != nil { - return fault.Wrap(err, fmsg.With("error opening fetched files")) + fetcher, err := fetcher.New(source, fopts) + if err != nil { + return fault.Wrap(err, fmsg.With("error creating fetcher")) + } + + _, err = fetcher.FetchIfNeeded(ctx, sourceMeta) + if err != nil { + var exerr *exec.ExitError + if errors.As(err, &exerr) { + lines := strings.SplitSeq(strings.TrimSpace(string(exerr.Stderr)), "\n") + for line := range lines { + logger.Error( + "importer fetch failed", + "fetcher", + source.Fetcher.String(), + "stderr", + line, + "status", + exerr.ExitCode(), + ) + } } - } else { - logger.Debug("starting fetcher") + + return fault.Wrap(err, fmsg.With("importer fetch failed")) + } + logger.Info( + "importer fetch succeeded", + "previous", + previousUpdate.Format(time.DateTime), + "current", + sourceMeta.UpdatedAt.Format(time.DateTime), + "is_updated", + sourceMeta.UpdatedAt.After(previousUpdate), + "update_force", + forceUpdate, + ) - fetcher, err := fetcher.New(source, fopts) + if source.Programs.Enable { + err = pdb.Instantiate(ctx) if err != nil { - return fault.Wrap(err, fmsg.With("error creating fetcher")) + logger.Warn("programs database instantiation failed", "error", err) } + } - files, err = fetcher.FetchIfNeeded(ctx, sourceMeta) + if source.Manpages.Enable { + err = imp.options.Manpages.Update(ctx, source) if err != nil { - var exerr *exec.ExitError - if errors.As(err, &exerr) { - lines := strings.SplitSeq(strings.TrimSpace(string(exerr.Stderr)), "\n") - for line := range lines { - logger.Error( - "importer fetch failed", - "fetcher", - source.Fetcher.String(), - "stderr", - line, - "status", - exerr.ExitCode(), - ) - } - } + logger.Warn("manpages database update failed", "error", err) + } + } + + return nil + } +} - return fault.Wrap(err, fmsg.With("importer fetch failed")) - } - logger.Info( - "importer fetch succeeded", - "previous", - previousUpdate.Format(time.DateTime), - "current", - sourceMeta.UpdatedAt.Format(time.DateTime), - "is_updated", - sourceMeta.UpdatedAt.After(previousUpdate), - "update_force", - forceUpdate, - "fetch_only", - fetchOnly, - ) +func (imp *Importer) createSourceImporter( + parent context.Context, + meta *index.Meta, +) func(*config.Source) error { + return func(source *config.Source) error { + logger := imp.options.Logger.With("name", source.Key) + pdb, err := programs.New(source, &programs.Options{ + Logger: logger, + Root: imp.options.Root, + }) + if err != nil { + return fault.Wrap(err, fmsg.With("error creating program database")) + } - if source.Programs.Enable { - err = pdb.Instantiate(ctx) - if err != nil { - logger.Warn("programs database instantiation failed", "error", err) - } - } + sourceMeta := meta.GetSourceMeta(source.Key) - if source.Manpages.Enable { - err = imp.options.Manpages.Update(ctx, source) - if err != nil { - logger.Warn("manpages database update failed", "error", err) - } - } + fopts := &fetcher.Options{ + Logger: logger, + Root: imp.options.Root, } - if !fetchOnly && - (!sourceMeta.UpdatedAt.After(sourceMeta.StoredAt) || sourceMeta.StoredAt.IsZero() || forceUpdate) { + ctx, cancel := context.WithTimeout(parent, source.Timeout.Duration) + defer cancel() - if files.Revision != nil { - err = setRepoRevision(files.Revision, source) - if err != nil { - logger.Warn("could not set source repo revision", "error", err) - } + files, err := fetcher.Open(source, fopts) + if err != nil { + return fault.Wrap(err, fmsg.With("error opening fetched files")) + } + + if source.Programs.Enable { + err = pdb.Instantiate(ctx) + if err != nil { + logger.Warn("programs database instantiation failed", "error", err) } + } - var processor Processor - logger.Debug( - "creating processor", - "importer_type", - source.Importer, - "revision", - source.Repo.Revision, - ) - switch source.Importer { - case config.Options: - processor, err = NewOptionProcessor( - files.Options, - source, - logger.Named("processor"), - ) - case config.Packages: - processor, err = NewPackageProcessor( - files.Packages, - source, - logger.Named("processor"), - pdb, - ) - } + if files.Revision != nil { + err = setRepoRevision(files.Revision, source) if err != nil { - return fault.Wrap(err, fmsg.Withf("failed to create processor")) + logger.Warn("could not set source repo revision", "error", err) } + } - hadWarnings, err := pipe( - ctx, - logger.Named("importer"), - processor.Process, - imp.options.Storage.MakeSourceImporter(source), + var processor Processor + logger.Debug( + "creating processor", + "importer_type", + source.Importer, + "revision", + source.Repo.Revision, + ) + switch source.Importer { + case config.Options: + processor, err = NewOptionProcessor( + files.Options, + source, + logger.Named("processor"), ) - if err != nil { - return fault.Wrap(err, fmsg.Withf("failed to process source")) - } + case config.Packages: + processor, err = NewPackageProcessor( + files.Packages, + source, + logger.Named("processor"), + pdb, + ) + } + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to create processor")) + } - sourceMeta.StoredAt = time.Now() + hadWarnings, err := imp.process(ctx, processor) + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to process source")) + } + + sourceMeta.StoredAt = time.Now() - if hadWarnings { - logger.Warn("importer succeeded, but with warnings/errors") - } else { - logger.Info("importer succeeded") - } + if hadWarnings { + logger.Warn("importer succeeded, but with warnings/errors") + } else { + logger.Info("importer succeeded") } sourceMeta.Rev = source.Repo.Revision