all repos — searchix @ 87f2c60692a7ab6a46b969ddd29e1c97e0cd6722

Search engine for NixOS, nix-darwin, home-manager and NUR users

fix: keep names_with_underscores as single tokens Fixes: https://codeberg.org/alanpearce/searchix/issues/2

Alan Pearce
commit

87f2c60692a7ab6a46b969ddd29e1c97e0cd6722

parent

660a81fb1c8f02150f18a143e031844a696e8311

1 file changed, 112 insertions(+), 0 deletions(-)

changed files
A internal/index/nixattr/parser.go
@@ -0,0 +1,112 @@
+// Copyright (c) 2016 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nixattr + +import ( + "github.com/blevesearch/bleve/v2/analysis" +) + +func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token { + term := analysis.BuildTermFromRunes(buffer) + token := &analysis.Token{ + Term: term, + Position: p.position, + Start: p.index, + End: p.index + len(term), + } + p.position++ + p.index += len(term) + + return token +} + +// Parser accepts a symbol and passes it to the current state (representing a class). +// The state can accept it (and accumulate it). Otherwise, the parser creates a new state that +// starts with the pushed symbol. +// +// Parser accumulates a new resulting token every time it switches state. +// Use FlushTokens() to get the results after the last symbol was pushed. +type Parser struct { + bufferLen int + buffer []rune + current State + tokens []*analysis.Token + position int + index int +} + +func NewParser(length, position, index int) *Parser { + return &Parser{ + bufferLen: length, + buffer: make([]rune, 0, length), + tokens: make([]*analysis.Token, 0, length), + position: position, + index: index, + } +} + +func (p *Parser) Push(sym rune, peek *rune) { + switch { + case p.current == nil: + // the start of parsing + p.current = p.NewState(sym) + p.buffer = append(p.buffer, sym) + + case p.current.Member(sym, peek): + // same state, just accumulate + p.buffer = append(p.buffer, sym) + + default: + // the old state is no more, thus convert the buffer + p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer)) + + // let the new state begin + p.current = p.NewState(sym) + p.buffer = make([]rune, 0, p.bufferLen) + p.buffer = append(p.buffer, sym) + } +} + +// Note. States have to have different starting symbols. +func (p *Parser) NewState(sym rune) State { + var found State + + found = &LowerCaseState{} + if found.StartSym(sym) { + return found + } + + found = &UpperCaseState{} + if found.StartSym(sym) { + return found + } + + found = &NumberCaseState{} + if found.StartSym(sym) { + return found + } + + return &NonAlphaNumericCaseState{} +} + +func (p *Parser) FlushTokens() []*analysis.Token { + p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer)) + + return p.tokens +} + +func (p *Parser) NextPosition() int { + return p.position +}