mirror of https://github.com/go-gitea/gitea
102 lines
2.3 KiB
Go
102 lines
2.3 KiB
Go
// Copyright 2024 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package path
|
|
|
|
import (
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/blevesearch/bleve/v2/analysis"
|
|
"github.com/blevesearch/bleve/v2/registry"
|
|
)
|
|
|
|
const (
|
|
Name = "gitea/path"
|
|
)
|
|
|
|
type TokenFilter struct{}
|
|
|
|
func NewTokenFilter() *TokenFilter {
|
|
return &TokenFilter{}
|
|
}
|
|
|
|
func TokenFilterConstructor(config map[string]any, cache *registry.Cache) (analysis.TokenFilter, error) {
|
|
return NewTokenFilter(), nil
|
|
}
|
|
|
|
func (s *TokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
|
if len(input) == 1 {
|
|
// if there is only one token, we dont need to generate the reversed chain
|
|
return generatePathTokens(input, false)
|
|
}
|
|
|
|
normal := generatePathTokens(input, false)
|
|
reversed := generatePathTokens(input, true)
|
|
|
|
return append(normal, reversed...)
|
|
}
|
|
|
|
// Generates path tokens from the input tokens.
|
|
// This mimics the behavior of the path hierarchy tokenizer in ES. It takes the input tokens and combine them, generating a term for each component
|
|
// in tree (e.g., foo/bar/baz.md will generate foo, foo/bar, and foo/bar/baz.md).
|
|
//
|
|
// If the reverse flag is set, the order of the tokens is reversed (the same input will generate baz.md, baz.md/bar, baz.md/bar/foo). This is useful
|
|
// to efficiently search for filenames without supplying the fullpath.
|
|
func generatePathTokens(input analysis.TokenStream, reversed bool) analysis.TokenStream {
|
|
terms := make([]string, 0, len(input))
|
|
longestTerm := 0
|
|
|
|
if reversed {
|
|
slices.Reverse(input)
|
|
}
|
|
|
|
for i := 0; i < len(input); i++ {
|
|
var sb strings.Builder
|
|
sb.WriteString(string(input[0].Term))
|
|
|
|
for j := 1; j < i; j++ {
|
|
sb.WriteString("/")
|
|
sb.WriteString(string(input[j].Term))
|
|
}
|
|
|
|
term := sb.String()
|
|
|
|
if longestTerm < len(term) {
|
|
longestTerm = len(term)
|
|
}
|
|
|
|
terms = append(terms, term)
|
|
}
|
|
|
|
output := make(analysis.TokenStream, 0, len(terms))
|
|
|
|
for _, term := range terms {
|
|
var start, end int
|
|
|
|
if reversed {
|
|
start = 0
|
|
end = len(term)
|
|
} else {
|
|
start = longestTerm - len(term)
|
|
end = longestTerm
|
|
}
|
|
|
|
token := analysis.Token{
|
|
Position: 1,
|
|
Start: start,
|
|
End: end,
|
|
Type: analysis.AlphaNumeric,
|
|
Term: []byte(term),
|
|
}
|
|
|
|
output = append(output, &token)
|
|
}
|
|
|
|
return output
|
|
}
|
|
|
|
func init() {
|
|
registry.RegisterTokenFilter(Name, TokenFilterConstructor)
|
|
}
|