feat: store data separate from search index
21 files changed, 379 insertions(+), 131 deletions(-)
changed files
- cmd/searchix-web/main.go
- go.mod
- go.sum
- gomod2nix.toml
- internal/components/combined.go
- internal/components/data.go
- internal/components/detail.go
- internal/components/options.go
- internal/components/packages.go
- internal/importer/importer.go
- internal/importer/main.go
- internal/index/indexer.go
- internal/index/search.go
- internal/index/search_test.go
- internal/nix/option.go
- internal/nix/package.go
- internal/server/mux.go
- internal/server/server.go
- internal/storage/log.go
- internal/storage/store.go
- nix/dev-shell.nix
M cmd/searchix-web/main.go → cmd/searchix-web/main.go
@@ -20,6 +20,7 @@ "alin.ovh/searchix/internal/importer" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/manpages" "alin.ovh/searchix/internal/server" + "alin.ovh/searchix/internal/storage" "alin.ovh/searchix/web" "alin.ovh/x/log" )@@ -108,13 +109,24 @@ logger.Fatal("Failed to open data root", "error", err) } defer root.Close() + store, err := storage.New(&storage.Options{ + Root: root, + Logger: logger.Named("store"), + }) + if err != nil { + logger.Fatal("Failed to create store", "error", err) + } + defer store.Close() + read, write, exists, err := index.OpenOrCreate( &index.Options{ + Config: cfg, Force: *replace, LowMemory: cfg.Importer.LowMemory, BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), Root: root, + Store: store, }, ) if err != nil {@@ -129,12 +141,14 @@ s, err := web.New(cfg, logger, &server.Options{ ReadIndex: read, ManpagesURLMap: mdb, + Store: store, }) if err != nil { logger.Fatal("Failed to initialise searchix-web", "error", err) } imp, err := importer.New(cfg, &importer.Options{ + Storage: store, WriteIndex: write, LowMemory: cfg.Importer.LowMemory, Logger: logger.Named("importer"),
M go.mod → go.mod
@@ -8,9 +8,9 @@ alin.ovh/x v1.0.0 badc0de.net/pkg/flagutil v1.0.1 github.com/Southclaws/fault v0.8.2 github.com/andybalholm/brotli v1.1.1 + github.com/asdine/storm/v3 v3.2.1 github.com/bcicen/jstream v1.0.1 github.com/blevesearch/bleve/v2 v2.5.2 - github.com/blevesearch/bleve_index_api v1.2.8 github.com/creasty/defaults v1.8.0 github.com/crewjam/csp v0.0.2 github.com/dustin/go-humanize v1.0.1@@ -22,6 +22,7 @@ github.com/pelletier/go-toml/v2 v2.2.4 github.com/stefanfritsch/goldmark-fences v1.0.0 github.com/stoewer/go-strcase v1.3.0 github.com/yuin/goldmark v1.7.12 + go.etcd.io/bbolt v1.4.2 go.uber.org/zap v1.27.0 golang.org/x/net v0.41.0 modernc.org/sqlite v1.38.0@@ -31,6 +32,7 @@ require ( github.com/Code-Hex/dd v1.1.0 // indirect github.com/RoaringBitmap/roaring/v2 v2.5.0 // indirect github.com/bits-and-blooms/bitset v1.22.0 // indirect + github.com/blevesearch/bleve_index_api v1.2.8 // indirect github.com/blevesearch/geo v0.2.3 // indirect github.com/blevesearch/go-faiss v1.0.25 // indirect github.com/blevesearch/go-porterstemmer v1.0.3 // indirect@@ -60,10 +62,9 @@ github.com/pkg/errors v0.9.1 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/sykesm/zap-logfmt v0.0.4 // indirect github.com/thessem/zap-prettyconsole v0.5.2 // indirect - go.etcd.io/bbolt v1.4.1 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect - golang.org/x/sys v0.33.0 // indirect + golang.org/x/sys v0.34.0 // indirect golang.org/x/text v0.26.0 // indirect google.golang.org/protobuf v1.36.6 // indirect modernc.org/libc v1.65.10 // indirect
M go.sum → go.sum
@@ -7,12 +7,18 @@ badc0de.net/pkg/flagutil v1.0.1/go.mod h1:HwwkfbImu+u288bnLaYDGqBxkJzvqi5YzKofmgkMLvk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Code-Hex/dd v1.1.0 h1:VEtTThnS9l7WhpKUIpdcWaf0B8Vp0LeeSEsxA1DZseI= github.com/Code-Hex/dd v1.1.0/go.mod h1:VaMyo/YjTJ3d4qm/bgtrUkT2w+aYwJ07Y7eCWyrJr1w= +github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM= +github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= github.com/RoaringBitmap/roaring/v2 v2.5.0 h1:TJ45qCM7D7fIEBwKd9zhoR0/S1egfnSSIzLU1e1eYLY= github.com/RoaringBitmap/roaring/v2 v2.5.0/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0= +github.com/Sereal/Sereal v0.0.0-20190618215532-0b8ac451a863 h1:BRrxwOZBolJN4gIwvZMJY1tzqBvQgpaZiQRuIDD40jM= +github.com/Sereal/Sereal v0.0.0-20190618215532-0b8ac451a863/go.mod h1:D0JMgToj/WdxCgd30Kc1UcA9E+WdZoJqeVOuYW7iTBM= github.com/Southclaws/fault v0.8.2 h1:hbQANoRWYVWnQjpwJlNlfaolM+oIihgoFowaY3EBLCs= github.com/Southclaws/fault v0.8.2/go.mod h1:VUVkAWutC59SL16s6FTqf3I6I2z77RmnaW5XRz4bLOE= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/asdine/storm/v3 v3.2.1 h1:I5AqhkPK6nBZ/qJXySdI7ot5BlXSZ7qvDY1zAn5ZJac= +github.com/asdine/storm/v3 v3.2.1/go.mod h1:LEpXwGt4pIqrE/XcTvCnZHT5MgZCV6Ub9q7yQzOFWr0= github.com/bcicen/jstream v1.0.1 h1:BXY7Cu4rdmc0rhyTVyT3UkxAiX3bnLpKLas9btbH5ck= github.com/bcicen/jstream v1.0.1/go.mod h1:9ielPxqFry7Y4Tg3j4BfjPocfJ3TbsRtXOAYXYmRuAQ= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=@@ -71,8 +77,11 @@ github.com/getsentry/sentry-go v0.33.0 h1:YWyDii0KGVov3xOaamOnF0mjOrqSjBqwv48UEzn7QFg= github.com/getsentry/sentry-go v0.33.0/go.mod h1:C55omcY9ChRQIUcVcGcs+Zdy4ZpQGvNJ7JYHIoSWOtE= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs= github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=@@ -125,6 +134,7 @@ github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=@@ -138,14 +148,17 @@ github.com/sykesm/zap-logfmt v0.0.4/go.mod h1:AuBd9xQjAe3URrWT1BBDk2v2onAZHkZkWRMiYZXiZWA= github.com/tailscale/depaware v0.0.0-20210622194025-720c4b409502/go.mod h1:p9lPsd+cx33L3H9nNoecRRxPssFKUwwI50I3pZ0yT+8= github.com/thessem/zap-prettyconsole v0.5.2 h1:knusxXGhmkD5Ho+WiI4IzD16Dz9PEcOIKdK+uX4oTPA= github.com/thessem/zap-prettyconsole v0.5.2/go.mod h1:3qfsE7y+bLOq7EQ+fMZHD3HYEp24ULFf5nhLSx6rjrE= +github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI= +github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.7.12 h1:YwGP/rrea2/CnCtUHgjuolG/PnMxdQtPMO5PvaE2/nY= github.com/yuin/goldmark v1.7.12/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= -go.etcd.io/bbolt v1.4.1 h1:5mOV+HWjIPLEAlUGMsveaUvK2+byZMFOzojoi7bh7uI= -go.etcd.io/bbolt v1.4.1/go.mod h1:c8zu2BnXWTu2XM4XcICtbGSl9cFwsXtcf9zLt2OncM8= +go.etcd.io/bbolt v1.3.4/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= +go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I= +go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.8.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=@@ -177,7 +190,9 @@ golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191105084925-a882066a44e0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=@@ -189,6 +204,7 @@ golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=@@ -197,10 +213,11 @@ golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA= +golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=@@ -217,10 +234,13 @@ golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
M gomod2nix.toml → gomod2nix.toml
@@ -22,6 +22,9 @@ hash = "sha256-ifMHIRqeMNV2+jhi5jxA42iK1DATNPYI4pN69inRGT4=" [mod."github.com/andybalholm/brotli"] version = "v1.1.1" hash = "sha256-kCt+irK1gvz2lGQUeEolYa5+FbLsfWlJMCd5hm+RPgQ=" + [mod."github.com/asdine/storm/v3"] + version = "v3.2.1" + hash = "sha256-BLpBFWFjLd5Xumx72cgSA2Zget+4UYMTW+z4bkkdIR0=" [mod."github.com/bcicen/jstream"] version = "v1.0.1" hash = "sha256-mm+/BuIEYYj6XOHCCJLxVMKd1XcBXCiRCWA+aTvr1sE="@@ -155,8 +158,8 @@ [mod."github.com/yuin/goldmark"] version = "v1.7.12" hash = "sha256-thLYBS4woL2X5qRdo7vP+xCvjlGRDU0jXtDCUt6vvWM=" [mod."go.etcd.io/bbolt"] - version = "v1.4.1" - hash = "sha256-FqKrJJxOOfAnvziKKFzI6VUGDp+ga6IgZmuihHp2I2A=" + version = "v1.4.2" + hash = "sha256-/ujVPvBaSZp3ogFsGkf5yl6CjaghnN/MKuYNf9EHUfI=" [mod."go.uber.org/multierr"] version = "v1.11.0" hash = "sha256-Lb6rHHfR62Ozg2j2JZy3MKOMKdsfzd1IYTR57r3Mhp0="@@ -170,8 +173,8 @@ [mod."golang.org/x/net"] version = "v0.41.0" hash = "sha256-6/pi8rNmGvBFzkJQXkXkMfL1Bjydhg3BgAMYDyQ/Uvg=" [mod."golang.org/x/sys"] - version = "v0.33.0" - hash = "sha256-wlOzIOUgAiGAtdzhW/KPl/yUVSH/lvFZfs5XOuJ9LOQ=" + version = "v0.34.0" + hash = "sha256-5rZ7p8IaGli5X1sJbfIKOcOEwY4c0yQhinJPh2EtK50=" [mod."golang.org/x/text"] version = "v0.26.0" hash = "sha256-N+27nBCyGvje0yCTlUzZoVZ0LRxx4AJ+eBlrFQVRlFQ="
M internal/components/combined.go → internal/components/combined.go
@@ -10,15 +10,11 @@ . "alin.ovh/gomponents/html" ) func CombinedData(data nix.Importable) g.Node { - switch data.(type) { - case nix.Option: - if o := convertMatch[nix.Option](data); o != nil { - return firstSentence(o.Description) - } - case nix.Package: - if p := convertMatch[nix.Package](data); p != nil { - return g.Text(firstSentence(p.Description)) - } + switch data := data.(type) { + case *nix.Option: + return firstSentence(data.Description) + case *nix.Package: + return g.Text(firstSentence(data.Description)) } return g.Text("No description Available")
M internal/components/data.go → internal/components/data.go
@@ -4,7 +4,6 @@ import ( "alin.ovh/searchix/frontend" "alin.ovh/searchix/internal/config" search "alin.ovh/searchix/internal/index" - "alin.ovh/searchix/internal/nix" ) type TemplateData struct {@@ -25,12 +24,3 @@ Prev string Next string All string } - -func convertMatch[I nix.Importable](m nix.Importable) *I { - i, ok := m.(I) - if !ok { - return nil - } - - return &i -}
M internal/components/detail.go → internal/components/detail.go
@@ -8,10 +8,10 @@ ) func Detail(thing nix.Importable) g.Node { switch t := thing.(type) { - case nix.Option: - return OptionDetail(t) - case nix.Package: - return PackageDetail(t) + case *nix.Option: + return OptionDetail(*t) + case *nix.Package: + return PackageDetail(*t) default: return nil }
M internal/components/options.go → internal/components/options.go
@@ -22,7 +22,7 @@ ), ), TBody( g.MapIter(result.Hits, func(hit index.DocumentMatch) g.Node { - if m := convertMatch[nix.Option](hit.Data); m != nil { + if m, ok := hit.Data.(*nix.Option); ok { return optionRow(hit, *m) }
M internal/components/packages.go → internal/components/packages.go
@@ -23,7 +23,7 @@ ), ), TBody( g.MapIter(result.Hits, func(hit index.DocumentMatch) g.Node { - if m := convertMatch[nix.Package](hit.Data); m != nil { + if m, ok := hit.Data.(*nix.Package); ok { return packageRow(hit, *m) }
M internal/importer/importer.go → internal/importer/importer.go
@@ -4,6 +4,7 @@ import ( "context" "sync" + "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/nix" )@@ -15,14 +16,20 @@ func (imp *Importer) process( ctx context.Context, processor Processor, + source *config.Source, ) (bool, error) { wg := sync.WaitGroup{} wg.Add(1) objects, pErrs := processor.Process(ctx) + d1, d2 := duplicate(objects) + wg.Add(1) - iErrs := imp.options.WriteIndex.Import(ctx, objects) + iErrs := imp.options.WriteIndex.Import(ctx, d1) + + wg.Add(1) + wErrs := imp.options.Storage.Import(ctx, source, d2) var hadObjectErrors bool var criticalError error@@ -49,11 +56,21 @@ case err, running := <-pErrs: if !running { wg.Done() pErrs = nil + imp.options.Logger.Debug("processing completed") continue } hadObjectErrors = true imp.options.Logger.Warn("error processing object", "error", err) + case err, running := <-wErrs: + if !running { + wg.Done() + wErrs = nil + + continue + } + hadObjectErrors = true + imp.options.Logger.Warn("error writing to storage", "error", err) } } }()@@ -62,3 +79,23 @@ wg.Wait() return hadObjectErrors, criticalError } + +func duplicate[T any](v <-chan T) (<-chan T, <-chan T) { + if v == nil { + return nil, nil + } + + dup1 := make(chan T, 1) + dup2 := make(chan T, 1) + + go func() { + for v := range v { + dup1 <- v + dup2 <- v + } + close(dup1) + close(dup2) + }() + + return dup1, dup2 +}
M internal/importer/main.go → internal/importer/main.go
@@ -15,6 +15,7 @@ "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/manpages" "alin.ovh/searchix/internal/programs" + "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" "github.com/Southclaws/fault"@@ -28,6 +29,7 @@ Logger *log.Logger WriteIndex *index.WriteIndex Manpages *manpages.URLMap Root *file.Root + Storage *storage.Store } var Job struct {@@ -43,10 +45,11 @@ func MarkIndexingStarted() { Job.StartedAt = time.Now() } -func MarkIndexingFinished() { +func MarkIndexingFinished(meta *index.Meta) { Job.LastRun.StartedAt = Job.StartedAt Job.LastRun.FinishedAt = time.Now() Job.StartedAt = time.Time{} + meta.LastImport = Job.LastRun } func SetNextRun(nextRun time.Time) {@@ -183,7 +186,7 @@ if err != nil { return fault.Wrap(err, fmsg.Withf("failed to create processor")) } - hadWarnings, err := imp.process(ctx, processor) + hadWarnings, err := imp.process(ctx, processor, source) if err != nil { return fault.Wrap(err, fmsg.Withf("failed to process source")) }@@ -246,7 +249,7 @@ imp.options.Logger.Error("import failed", "source", name, "error", err) } } - MarkIndexingFinished() + MarkIndexingFinished(meta) err := imp.options.WriteIndex.SaveMeta() if err != nil {
M internal/index/indexer.go → internal/index/indexer.go
@@ -1,22 +1,20 @@ package index import ( - "bytes" "context" - "encoding/gob" "math" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index/nixattr" "alin.ovh/searchix/internal/nix" + "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" "go.uber.org/zap" "github.com/Southclaws/fault" "github.com/Southclaws/fault/fmsg" "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/analysis" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" "github.com/blevesearch/bleve/v2/analysis/analyzer/simple"@@ -26,25 +24,24 @@ "github.com/blevesearch/bleve/v2/analysis/token/ngram" "github.com/blevesearch/bleve/v2/analysis/token/porter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" - "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/mapping" - index "github.com/blevesearch/bleve_index_api" ) -var idAnalyzer analysis.Analyzer - type Options struct { Force bool LowMemory bool BatchSize int + Store *storage.Store Logger *log.Logger Root *file.Root + Config *config.Config } type WriteIndex struct { batchSize int index bleve.Index log *log.Logger + store *storage.Store Meta *Meta }@@ -111,17 +108,19 @@ return nil, fault.Wrap(err, fmsg.With("could not add custom analyser")) } identityFieldMapping := bleve.NewKeywordFieldMapping() + identityFieldMapping.Store = false attributeFieldMapping := bleve.NewKeywordFieldMapping() attributeFieldMapping.Analyzer = "dotted_keyword" + attributeFieldMapping.Store = true keywordFieldMapping := bleve.NewKeywordFieldMapping() keywordFieldMapping.Analyzer = simple.Name + keywordFieldMapping.Store = false nameNGramMapping := bleve.NewTextFieldMapping() nameNGramMapping.Analyzer = "c_name" nameNGramMapping.IncludeTermVectors = true - nameNGramMapping.Store = false nixDocMapping := bleve.NewDocumentStaticMapping() nixDocMapping.AddFieldMappingsAt("Text", textFieldMapping)@@ -167,8 +166,6 @@ packageMapping.AddFieldMappingsAt("Programs", identityFieldMapping) indexMapping.AddDocumentMapping("option", optionMapping) indexMapping.AddDocumentMapping("package", packageMapping) - - idAnalyzer = indexMapping.AnalyzerNamed("c_name") return indexMapping, nil }@@ -277,14 +274,17 @@ options.BatchSize = 1_000 } return &ReadIndex{ - index: idx, - log: options.Logger, - meta: meta, + config: options.Config, + log: options.Logger, + store: options.Store, + index: idx, + meta: meta, }, &WriteIndex{ index: idx, batchSize: options.BatchSize, log: options.Logger, + store: options.Store, Meta: meta, }, exists,@@ -306,7 +306,6 @@ go func() { defer close(errs) k := 0 batch := i.index.NewBatch() - indexMapping := i.index.Mapping() outer: for obj := range objects {@@ -318,31 +317,7 @@ break outer default: } - doc := document.NewDocument(nix.GetKey(obj)) - if err := indexMapping.MapDocument(doc, obj); err != nil { - errs <- fault.Wrap(err, fmsg.Withf("could not map document for object: %s", obj.GetName())) - - continue - } - - var data bytes.Buffer - enc := gob.NewEncoder(&data) - if err := enc.Encode(&obj); err != nil { - errs <- fault.Wrap(err, fmsg.With("could not store object in search index")) - - continue - } - field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField) - doc.AddField(field) - idField := document.NewTextFieldCustom( - "_id", nil, []byte(doc.ID()), - index.IndexField|index.StoreField|index.IncludeTermVectors, - idAnalyzer, - ) - doc.AddField(idField) - - // log.Debug("adding object to index", "name", opt.Name) - if err := batch.IndexAdvanced(doc); err != nil { + if err := batch.Index(nix.GetKey(obj), obj); err != nil { errs <- fault.Wrap(err, fmsg.Withf("could not index object %s", obj.GetName())) continue
M internal/index/search.go → internal/index/search.go
@@ -1,15 +1,14 @@ package index import ( - "bytes" "context" - "encoding/gob" "iter" "strings" "time" "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/nix" + "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" "github.com/Southclaws/fault"@@ -32,9 +31,11 @@ Hits iter.Seq[DocumentMatch] } type ReadIndex struct { - index bleve.Index - log *log.Logger - meta *Meta + index bleve.Index + config *config.Config + store *storage.Store + log *log.Logger + meta *Meta } func (index *ReadIndex) LastUpdated() time.Time {@@ -77,9 +78,10 @@ } func (index *ReadIndex) search( ctx context.Context, + source *config.Source, request *bleve.SearchRequest, ) (*Result, error) { - request.Fields = []string{"_data", "Source"} + request.Fields = []string{"Source"} bleveResult, err := index.index.SearchInContext(ctx, request) select {@@ -91,21 +93,34 @@ return nil, fault.Wrap(err, fmsg.With("failed to execute search query")) } hits := func(yield func(DocumentMatch) bool) { - var buf bytes.Buffer for _, match := range bleveResult.Hits { hit := DocumentMatch{ DocumentMatch: match, Data: nil, } - _, err := buf.WriteString(match.Fields["_data"].(string)) - if err != nil { - index.log.Warn("error fetching result data", "error", err) + + parts := strings.SplitN(match.ID, "/", 3) + sourceName := parts[1] + id := parts[2] + + src := source + if src == nil { + var ok bool + src, ok = index.config.Importer.Sources[sourceName] + if !ok { + continue + } } - err = gob.NewDecoder(&buf).Decode(&hit.Data) + + doc, err := index.store.GetDocument(src, id) if err != nil { - index.log.Warn("error decoding gob data", "error", err, "data", buf.String()) + index.log.Warn("error getting document", "error", err) + + continue } - buf.Reset() + + hit.Data = doc + if !yield(hit) { return }@@ -221,35 +236,7 @@ if from != 0 { search.From = from } - return index.search(ctx, search) -} - -func (index *ReadIndex) GetDocument( - ctx context.Context, - source *config.Source, - id string, -) (*nix.Importable, error) { - key := nix.MakeKey(source, id) - query := bleve.NewDocIDQuery([]string{key}) - search := bleve.NewSearchRequest(query) - search.Size = 1 - - result, err := index.search(ctx, search) - if err != nil { - return nil, err - } - - if result.Total == 0 { - return nil, nil - } - - for hit := range result.Hits { - if hit.ID == key { - return &hit.Data, err - } - } - - return nil, err + return index.search(ctx, source, search) } func (index *ReadIndex) Close() error {
M internal/index/search_test.go → internal/index/search_test.go
@@ -12,6 +12,7 @@ "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/file" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/nix" + "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" )@@ -27,12 +28,23 @@ t.Fatal(err) } defer root.Close() + store, err := storage.New(&storage.Options{ + Root: root, + Logger: log.Named("storage"), + }) + if err != nil { + t.Fatal(err) + } + defer store.Close() + read, _, exists, err := index.OpenOrCreate(&index.Options{ Logger: log.Named("index"), Root: root, BatchSize: cfg.Importer.BatchSize, Force: false, LowMemory: false, + Store: store, + Config: &cfg, }) if err != nil { t.Fatal(err)@@ -73,7 +85,11 @@ "gitSVN": 0, } var i int for hit := range result.Hits { - data := hit.Data.(nix.Package) + data, ok := hit.Data.(*nix.Package) + if !ok { + t.Fatalf("Expected hit.Data to be *nix.Package, got %T", hit.Data) + } + if _, found := important[data.Attribute]; found { important[data.Attribute] = i }@@ -99,11 +115,23 @@ t.Fatal(err) } defer root.Close() + store, err := storage.New(&storage.Options{ + Root: root, + Logger: log, + }) + if err != nil { + t.Fatal(err) + } + defer store.Close() + read, _, exists, err := index.OpenOrCreate(&index.Options{ Force: false, LowMemory: false, Logger: log.Named("index"), Root: root, + BatchSize: 0, + Store: store, + Config: &cfg, }) if err != nil { t.Fatal(err)@@ -147,7 +175,11 @@ unwanted := "javacc" unwantedIndex := math.MaxInt var i int for hit := range result.Hits { - data := hit.Data.(nix.Package) + data, ok := hit.Data.(*nix.Package) + if !ok { + t.Fatalf("Expected hit.Data to be *nix.Package, got %T", hit.Data) + } + if _, found := important[data.Attribute]; found { matches = append(matches, data.Attribute) } else if data.Attribute == unwanted {
M internal/nix/option.go → internal/nix/option.go
@@ -22,7 +22,7 @@ URL string } type Option struct { - Name string + Name string `storm:"id"` Source string Declarations []Link Default *Docs `json:",omitempty"`
M internal/nix/package.go → internal/nix/package.go
@@ -2,7 +2,7 @@ package nix type Package struct { Name string - Attribute string + Attribute string `storm:"id"` Source string Broken bool Definition string
M internal/server/mux.go → internal/server/mux.go
@@ -70,6 +70,7 @@ if options.ReadIndex == nil { return nil, fault.New("read index is nil") } index := options.ReadIndex + store := options.Store sortSources(cfg.Importer.Sources) assets, err := frontend.New() if err != nil {@@ -230,10 +231,7 @@ return } importerSingular := importerType.Singular() - ctx, cancel := context.WithTimeout(r.Context(), searchTimeout) - doc, err := index.GetDocument(ctx, source, r.PathValue("id")) - cancel() - + doc, err := store.GetDocument(source, r.PathValue("id")) if err != nil { errorHandler( w,@@ -260,9 +258,9 @@ } var baseErr error if r.Header.Get("Fetch") == "true" { w.Header().Add("Content-Type", "text/html; charset=utf-8") - baseErr = components.Detail(*doc).Render(w) + baseErr = components.Detail(doc).Render(w) } else { - baseErr = components.DetailPage(tdata, *doc).Render(w) + baseErr = components.DetailPage(tdata, doc).Render(w) } if baseErr != nil { log.Error("template error", "template", importerSingular, "error", baseErr)
M internal/server/server.go → internal/server/server.go
@@ -10,6 +10,7 @@ "alin.ovh/searchix/internal/config" "alin.ovh/searchix/internal/index" "alin.ovh/searchix/internal/manpages" + "alin.ovh/searchix/internal/storage" "alin.ovh/x/log" "github.com/Southclaws/fault"@@ -29,6 +30,7 @@ } type Options struct { ReadIndex *index.ReadIndex + Store *storage.Store ManpagesURLMap *manpages.URLMap }
A internal/storage/log.go
@@ -0,0 +1,15 @@ +package storage + +import "go.uber.org/zap" + +type Logger struct { + *zap.SugaredLogger +} + +func (l Logger) Warning(v ...any) { + l.Warn(v...) +} + +func (l Logger) Warningf(format string, v ...any) { + l.Warnf(format, v...) +}
A internal/storage/store.go
@@ -0,0 +1,174 @@ +package storage + +import ( + "context" + "errors" + "time" + + "alin.ovh/searchix/internal/config" + "alin.ovh/searchix/internal/file" + "alin.ovh/searchix/internal/nix" + "alin.ovh/x/log" + "github.com/Southclaws/fault" + "github.com/Southclaws/fault/fmsg" + "github.com/asdine/storm/v3" + "github.com/asdine/storm/v3/codec/gob" + "go.uber.org/zap" + + "go.etcd.io/bbolt" +) + +var BatchSize = 10000 + +type Options struct { + Root *file.Root + Logger *log.Logger +} + +type Store struct { + db *storm.DB + log *log.Logger +} + +func New(opts *Options) (*Store, error) { + //nolint:forbidigo // external package + path := opts.Root.JoinPath("searchix.bolt") + bb, err := storm.Open(path, + storm.Codec(gob.Codec), + storm.BoltOptions(0o600, &bbolt.Options{ + Timeout: 1 * time.Second, + Logger: Logger{ + opts.Logger.Named("bolt").GetLogger().Sugar().WithOptions( + zap.IncreaseLevel(zap.InfoLevel), + ), + }, + }), + ) + if err != nil { + return nil, fault.Wrap(err, fmsg.With("failed to open database")) + } + + return &Store{ + db: bb, + log: opts.Logger, + }, nil +} + +func (s *Store) Close() error { + err := s.db.Close() + if err != nil { + return fault.Wrap(err, fmsg.With("failed to close database")) + } + + return nil +} + +func (s *Store) Import( + ctx context.Context, + source *config.Source, + objects <-chan nix.Importable, +) <-chan error { + errs := make(chan error) + node := s.db.From(source.Key).WithBatch(true) + + i := 0 + + var save func(storm.Node, nix.Importable) error + switch source.Importer { + case config.Packages: + save = saveGen[nix.Package] + case config.Options: + save = saveGen[nix.Option] + default: + errs <- fault.New("invalid importer") + + return errs + } + + go func() { + defer close(errs) + tx, err := node.Begin(true) + if err != nil { + errs <- fault.Wrap(err, fmsg.With("failed to begin transaction")) + + return + } + defer func() { + if err := tx.Rollback(); err != nil { + if !errors.Is(err, storm.ErrNotInTransaction) { + errs <- fault.Wrap(err, fmsg.With("failed to rollback transaction")) + } + } + }() + + outer: + for obj := range objects { + i++ + select { + case <-ctx.Done(): + s.log.Warn("import aborted") + + break outer + default: + } + + err := save(tx, obj) + if err != nil { + errs <- fault.Wrap(err, fmsg.With("failed to save object")) + } + + if i%BatchSize == 0 { + s.log.Info("imported", "count", i) + } + } + + if err := tx.Commit(); err != nil { + errs <- fault.Wrap(err, fmsg.With("failed to commit transaction")) + } + }() + + return errs +} + +func saveGen[T nix.Importable](node storm.Node, obj nix.Importable) error { + doc, ok := obj.(T) + if !ok { + return fault.Newf("invalid type: %T", obj) + } + + if err := node.Save(&doc); err != nil { + return fault.Wrap(err, fmsg.With("failed to save document")) + } + + return nil +} + +func (s *Store) GetDocument( + source *config.Source, + id string, +) (nix.Importable, error) { + var doc nix.Importable + var err error + + node := s.db.From(source.Key) + + switch source.Importer { + case config.Packages: + doc = &nix.Package{} + err = node.One("Attribute", id, doc) + case config.Options: + doc = &nix.Option{} + err = node.One("Name", id, doc) + default: + return nil, fault.New("invalid importer type") + } + + if err != nil { + return nil, fault.Wrap( + err, + fmsg.Withf("failed to get document source: %s id: %s", source.Key, id), + ) + } + + return doc, nil +}
M nix/dev-shell.nix → nix/dev-shell.nix
@@ -17,6 +17,7 @@ sd modd brotli + boltbrowser bleve wgo gomod2nix