feat: store fetched files in data directory
10 files changed, 231 insertions(+), 95 deletions(-)
M cmd/searchix-web/main.go → cmd/searchix-web/main.go
@@ -138,6 +138,7 @@ WriteIndex: write, LowMemory: cfg.Importer.LowMemory, Logger: logger.Named("importer"), Manpages: mdb, + Root: root, }) if err != nil { logger.Fatal("Failed to create importer", "error", err)
M internal/config/structs.go → internal/config/structs.go
@@ -5,6 +5,7 @@ // keep config structs here so that lll ignores the long lines (go doesn't support multi-line struct tags) import ( "fmt" + "path/filepath" "strings" "github.com/creasty/defaults"@@ -83,3 +84,8 @@ if defaults.CanUpdate(source.Name) { source.Name = strings.ToTitle(source.Key[0:1]) + source.Key[1:] } } + +func (source *Source) JoinPath(name string) string { + //nolint:forbidigo // is not absolute + return filepath.Join("sources", source.Key, name) +}
M internal/fetcher/channel.go → internal/fetcher/channel.go
@@ -3,16 +3,14 @@ import ( "context" "fmt" - "os" "os/exec" - "path" + "path/filepath" "strconv" "strings" "time" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/index" - "alin.ovh/x/log" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" )@@ -20,18 +18,18 @@ type ChannelFetcher struct { Source *config.Source SourceFile string - Logger *log.Logger + *Options } func NewChannelFetcher( source *config.Source, - logger *log.Logger, + options *Options, ) (*ChannelFetcher, error) { switch source.Importer { case config.Options: return &ChannelFetcher{ - Source: source, - Logger: logger, + Source: source, + Options: options, }, nil default: return nil, fault.Newf("unsupported importer type %s", source.Importer)@@ -42,6 +40,7 @@ func (i *ChannelFetcher) FetchIfNeeded( ctx context.Context, sourceMeta *index.SourceMeta, ) (*FetchedFiles, error) { + target := i.Source.JoinPath("options.json") args := []string{ "--no-build-output", "--timeout",@@ -64,20 +63,35 @@ return nil, fault.Wrap(err, fmsg.With("failed to run nix-build (--dry-run)")) } //nolint:forbidigo // nix-build only gives the top-level path - outPath := path.Join(strings.TrimSpace(string(out)), i.Source.OutputPath, "options.json") + outPath := filepath.Join(strings.TrimSpace(string(out)), i.Source.OutputPath, "options.json") + i.Logger.Debug( "checking output path", "outputPath", outPath, + "updated", + outPath != sourceMeta.Path, ) - if outPath != sourceMeta.Path { sourceMeta.Path = outPath sourceMeta.Updated = time.Now().Truncate(time.Second) + + if exists, err := i.Root.Exists(target); err != nil { + return nil, fault.Wrap(err, fmsg.With("failed to check if target path exists")) + } else if exists { + err := i.Root.Remove(target) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to remove previous file %s", target)) + } + } + + err := i.Root.CopyTo(outPath, target) + if err != nil { + return nil, fault.Wrap(err, fmsg.With("failed to copy options.json")) + } } - //nolint:forbidigo // nix builds the file in the nix store - file, err := os.Open(outPath) + file, err := i.Root.Open(target) if err != nil { return nil, fault.Wrap(err, fmsg.With("failed to open options.json")) }
M internal/fetcher/download.go → internal/fetcher/download.go
@@ -9,25 +9,23 @@ "alin.ovh/searchix/internal/fetcher/http" "alin.ovh/searchix/internal/index" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" - - "alin.ovh/x/log" ) type DownloadFetcher struct { Source *config.Source SourceFile string - Logger *log.Logger + *Options } func NewDownloadFetcher( source *config.Source, - logger *log.Logger, + options *Options, ) (*DownloadFetcher, error) { switch source.Importer { case config.Options, config.Packages: return &DownloadFetcher{ - Source: source, - Logger: logger, + Source: source, + Options: options, }, nil default: return nil, fault.Newf("unsupported importer type %s", source.Importer)@@ -45,7 +43,6 @@ ctx context.Context, sourceMeta *index.SourceMeta, ) (*FetchedFiles, error) { f := &FetchedFiles{} - sourceUpdated := sourceMeta.Updated filesToFetch := make([]string, 2) filesToFetch[0] = files["revision"]@@ -56,34 +53,41 @@ case config.Options: filesToFetch[1] = files["options"] } - for _, filename := range filesToFetch { - fetchURL, baseErr := url.JoinPath(i.Source.URL, filename) + fetcher := http.NewFetcher(&http.Options{ + Logger: i.Logger.Named("http"), + Root: i.Root, + }) + + for _, basename := range filesToFetch { + target := i.Source.JoinPath(basename) + fetchURL, baseErr := url.JoinPath(i.Source.URL, basename) if baseErr != nil { return nil, fault.Wrap( baseErr, fmsg.Withf( "could not build URL with elements %s and %s", i.Source.URL, - filename, + basename, ), ) } - i.Logger.Debug("preparing to fetch URL", "url", fetchURL) + i.Logger.Debug("preparing to fetch URL", "url", fetchURL, "target", target) - body, mtime, err := http.FetchFileIfNeeded(ctx, i.Logger, sourceUpdated, fetchURL) + body, err := fetcher.FetchFileIfNeeded(ctx, target, fetchURL) if err != nil { i.Logger.Warn("failed to fetch file", "url", fetchURL, "error", err) - return nil, fault.Wrap(err, fmsg.Withf("could not fetch file %s", filename)) + return nil, fault.Wrap(err, fmsg.Withf("could not fetch file %s", basename)) } - // don't bother to issue requests for the later files - if mtime.Before(sourceUpdated) { - break + + stat, err := i.Root.Stat(target) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("could not stat file %s", target)) } - sourceMeta.Updated = mtime + sourceMeta.Updated = stat.ModTime() - switch filename { + switch basename { case files["revision"]: f.Revision = body case files["options"]:@@ -91,7 +95,7 @@ f.Options = body case files["packages"]: f.Packages = body default: - return f, fault.Newf("unknown filename %s", filename) + return f, fault.Newf("unknown filename %s", basename) } }
M internal/fetcher/http/http.go → internal/fetcher/http/http.go
@@ -5,10 +5,12 @@ "context" "fmt" "io" "net/http" + "os" "strings" "time" "alin.ovh/searchix/internal/config" + "alin.ovh/searchix/internal/file" "alin.ovh/x/log" "github.com/Southclaws/fault"@@ -32,22 +34,49 @@ func (r *brotliReadCloser) Close() error { return fault.Wrap(r.src.Close(), fmsg.With("failed to call close on underlying reader")) } -func FetchFileIfNeeded( +type Options struct { + Logger *log.Logger + Root *file.Root +} + +type Fetcher struct { + logger *log.Logger + root *file.Root +} + +func NewFetcher(options *Options) *Fetcher { + return &Fetcher{ + logger: options.Logger, + root: options.Root, + } +} + +func (h *Fetcher) FetchFileIfNeeded( ctx context.Context, - log *log.Logger, - mtime time.Time, + filename string, url string, -) (io.ReadCloser, time.Time, error) { - var newMtime time.Time +) (io.ReadCloser, error) { + stat, err := h.root.StatIfExists(filename) + if err != nil { + return nil, fault.Wrap( + err, + fmsg.Withf("failed to stat file %s", filename), + ) + } var ifModifiedSince string - if !mtime.IsZero() { - ifModifiedSince = strings.Replace(mtime.UTC().Format(time.RFC1123), "UTC", "GMT", 1) + if stat != nil && stat.Size() > 0 && !stat.ModTime().IsZero() { + ifModifiedSince = strings.Replace( + stat.ModTime().UTC().Format(time.RFC1123), + "UTC", + "GMT", + 1, + ) } - req, baseErr := http.NewRequestWithContext(ctx, "GET", url, http.NoBody) - if baseErr != nil { - return nil, newMtime, fault.Wrap( - baseErr, + req, err := http.NewRequestWithContext(ctx, "GET", url, http.NoBody) + if err != nil { + return nil, fault.Wrap( + err, fmsg.Withf("could not create HTTP request for %s", url), ) }@@ -57,26 +86,24 @@ if ifModifiedSince != "" { req.Header.Set("If-Modified-Since", ifModifiedSince) } - res, baseErr := http.DefaultClient.Do(req) - if baseErr != nil { - return nil, newMtime, fault.Wrap( - baseErr, + res, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fault.Wrap( + err, fmsg.Withf("could not make HTTP request to %s", url), ) } var body io.ReadCloser - var err error + var newMtime time.Time + encoding := res.Header.Get("Content-Encoding") switch res.StatusCode { case http.StatusNotModified: - newMtime = mtime - - return nil, newMtime, nil case http.StatusOK: var baseErr error newMtime, baseErr = time.Parse(time.RFC1123, res.Header.Get("Last-Modified")) if baseErr != nil { - log.Warn( + h.logger.Warn( "could not parse Last-Modified header from response", "value", res.Header.Get("Last-Modified"),@@ -84,18 +111,51 @@ ) newMtime = time.Now() } - switch ce := res.Header.Get("Content-Encoding"); ce { + switch encoding { case "br": - log.Debug("using brotli encoding") body = newBrotliReader(res.Body) case "", "identity", "gzip": body = res.Body default: - err = fault.Newf("cannot handle a body with content-encoding %s", ce) + return nil, fault.Newf("cannot handle a body with content-encoding %s", encoding) + } + + writer, err := h.root.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o644) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to open file %s", filename)) + } + + _, err = io.Copy(writer, body) + if err != nil { + return nil, fault.Wrap( + err, + fmsg.Withf("failed to copy response body to file %s", filename), + ) + } + + err = writer.Sync() + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to sync file %s", filename)) + } + + err = writer.Close() + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to close file %s", filename)) + } + + err = h.root.Chtimes(filename, time.Time{}, newMtime) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to update file times %s", filename)) } + default: - err = fault.Newf("got response code %d, don't know what to do", res.StatusCode) + return nil, fault.Newf("got response code %d, don't know what to do", res.StatusCode) + } + + reader, err := h.root.Open(filename) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("failed to open file %s", filename)) } - return NewReadCloser(body), newMtime, err + return reader, nil }
M internal/fetcher/main.go → internal/fetcher/main.go
@@ -5,13 +5,16 @@ "context" "io" "alin.ovh/searchix/internal/config" + "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index" "alin.ovh/x/log" "github.com/Southclaws/fault" + "github.com/Southclaws/fault/fmsg" ) type Options struct { Logger *log.Logger + Root *file.Root } type FetchedFiles struct {@@ -28,13 +31,26 @@ func New( source *config.Source, opts *Options, ) (fetcher Fetcher, err error) { + target := source.JoinPath("") + exists, err := opts.Root.Exists(target) + if err != nil { + return nil, fault.Wrap(err, fmsg.With("failed to check if directory exists")) + } + + if !exists { + err = opts.Root.MkdirAll(target) + if err != nil { + return nil, fault.Wrap(err, fmsg.With("failed to create directory")) + } + } + switch source.Fetcher { case config.ChannelNixpkgs: - fetcher, err = NewNixpkgsChannelFetcher(source, opts.Logger) + fetcher, err = NewNixpkgsChannelFetcher(source, opts) case config.Channel: - fetcher, err = NewChannelFetcher(source, opts.Logger) + fetcher, err = NewChannelFetcher(source, opts) case config.Download: - fetcher, err = NewDownloadFetcher(source, opts.Logger) + fetcher, err = NewDownloadFetcher(source, opts) default: err = fault.Newf("unsupported fetcher type %s", source.Fetcher.String()) }
M internal/fetcher/nixpkgs-channel.go → internal/fetcher/nixpkgs-channel.go
@@ -3,19 +3,19 @@ import ( "context" "net/url" + "strings" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/fetcher/http" "alin.ovh/searchix/internal/index" - "alin.ovh/x/log" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" ) type NixpkgsChannelFetcher struct { Source *config.Source - Logger *log.Logger + *Options } func makeChannelURL(channel string, subPath string) (string, error) {@@ -26,13 +26,13 @@ } func NewNixpkgsChannelFetcher( source *config.Source, - logger *log.Logger, + options *Options, ) (*NixpkgsChannelFetcher, error) { switch source.Importer { case config.Options, config.Packages: return &NixpkgsChannelFetcher{ - Source: source, - Logger: logger, + Source: source, + Options: options, }, nil default: return nil, fault.Newf("unsupported importer type %s", source.Importer)@@ -60,23 +60,30 @@ case config.Options: filesToFetch[1] = optionsFilename } + fetcher := http.NewFetcher(&http.Options{ + Logger: i.Logger.Named("http"), + Root: i.Root, + }) + var fetchURL string for _, filename := range filesToFetch { + target := i.Source.JoinPath(strings.TrimSuffix(filename, ".br")) fetchURL, err = makeChannelURL(i.Source.Channel, filename) if err != nil { return } i.Logger.Debug("attempting to fetch file", "url", fetchURL) - body, mtime, err := http.FetchFileIfNeeded(ctx, i.Logger, sourceMeta.Updated, fetchURL) + body, err := fetcher.FetchFileIfNeeded(ctx, target, fetchURL) if err != nil { return f, fault.Wrap(err, fmsg.Withf("failed to fetch file with url %s", fetchURL)) } - // don't bother to issue requests for the later files - if mtime.Before(sourceMeta.Updated) { - break + + stat, err := i.Root.Stat(target) + if err != nil { + return nil, fault.Wrap(err, fmsg.Withf("could not stat file %s", target)) } - sourceMeta.Updated = mtime + sourceMeta.Updated = stat.ModTime() switch filename { case revisionFilename:
M internal/file/root.go → internal/file/root.go
@@ -2,9 +2,11 @@ //nolint:forbidigo // wrappers for os.File functions go here package file import ( + "io" "io/fs" "os" "path/filepath" + "time" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg"@@ -54,6 +56,49 @@ func (r *Root) Exists(file string) (bool, error) { stat, err := r.StatIfExists(file) return stat != nil, err +} + +func (r *Root) CopyTo(source, target string) error { + src, err := os.Open(source) + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to open source file %s", source)) + } + defer src.Close() + + dst, err := r.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o640) + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to create target file %s", target)) + } + defer dst.Close() + + if _, err := io.Copy(dst, src); err != nil { + return fault.Wrap(err, fmsg.Withf("failed to copy file from %s to %s", source, target)) + } + + err = dst.Sync() + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to sync target file %s", target)) + } + + return nil +} + +func (r *Root) Chtimes(name string, atime time.Time, mtime time.Time) error { + err := os.Chtimes(r.JoinPath(name), atime, mtime) + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to change times of %s", name)) + } + + return nil +} + +func (r *Root) MkdirAll(name string) error { + err := os.MkdirAll(r.JoinPath(name), 0o750) + if err != nil { + return fault.Wrap(err, fmsg.Withf("failed to create directory %s", name)) + } + + return nil } func (r *Root) ReadFile(name string) ([]byte, error) {
M internal/importer/main.go → internal/importer/main.go
@@ -13,6 +13,7 @@ "time" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/fetcher" + "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/manpages" "alin.ovh/searchix/internal/programs"@@ -28,6 +29,7 @@ LowMemory bool Logger *log.Logger WriteIndex *index.WriteIndex Manpages *manpages.URLMap + Root *file.Root } var Job struct {@@ -67,6 +69,7 @@ logger.Debug("starting fetcher") fetcher, err := fetcher.New(source, &fetcher.Options{ Logger: logger, + Root: imp.options.Root, }) if err != nil { return fault.Wrap(err, fmsg.With("error creating fetcher"))
M internal/manpages/manpages.go → internal/manpages/manpages.go
@@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "io" "time" "alin.ovh/searchix/internal/config"@@ -49,32 +48,29 @@ if source.Manpages.Path == "" { return fault.New("manpages repo source path not configured") } + fetcher := http.NewFetcher(&http.Options{ + Logger: m.logger.Named("http"), + Root: m.root, + }) + url, err := makeManpageURL(source) if err != nil { return fault.Wrap(err, fmsg.With("failed to join manpages URL")) } m.logger.Debug("fetching manpages URL map", "url", url) - r, mtime, err := http.FetchFileIfNeeded(ctx, m.logger.Named("http"), m.mtime, url) + r, err := fetcher.FetchFileIfNeeded(ctx, basename, url) if err != nil { return fault.Wrap(err, fmsg.With("failed to fetch manpages")) } defer r.Close() - if err := m.save(r); err != nil { - return fault.Wrap(err, fmsg.With("failed to save manpages")) - } - - m.mtime = mtime - return nil } // Open loads the manpage URLs from the JSON file func (m *URLMap) Open() error { - m.logger.Debug("opening manpages file", "path", basename) - - stat, err := m.root.Stat(basename) + stat, err := m.root.StatIfExists(basename) if err != nil { return fault.Wrap(err, fmsg.Withf("failed to stat manpages file: %s", basename)) }@@ -92,22 +88,6 @@ return fault.Wrap(err, fmsg.With("failed to parse manpages JSON")) } m.logger.Debug("loaded manpages data", "urls", len(m.urlMap)) - - return nil -} - -func (m *URLMap) save(r io.Reader) error { - m.logger.Debug("saving manpages file", "path", basename) - - f, err := m.root.Create(basename) - if err != nil { - return fault.Wrap(err, fmsg.With("failed to create manpages file")) - } - defer f.Close() - - if _, err := io.Copy(f, r); err != nil { - return fault.Wrap(err, fmsg.With("failed to write manpages file")) - } return nil }