feat(search): enhanced meilisearch search experience (#864)

* feat(search): enhanced `meilisearch` search experience
- upgrade `meilisearch` dependency
- support subdirectory search
- optimize searchDocument fields for subdirectory search
- specify full index uid instead of index prefix

* fix(search): more fixes to `meilisearch`
- make use of context where context was not used
- remove code of waiting task in deletion process, as tasks are queued and will be executed orderly (if tasks were submitted to the queue successfully), which can improve `AutoUpdate` performance
This commit is contained in:
hcrgm
2025-07-31 11:24:22 +08:00
committed by GitHub
parent 54ae7e6d9b
commit 1682e873d6
8 changed files with 276 additions and 375 deletions

View File

@ -20,9 +20,9 @@ type Database struct {
}
type Meilisearch struct {
Host string `json:"host" env:"HOST"`
APIKey string `json:"api_key" env:"API_KEY"`
IndexPrefix string `json:"index_prefix" env:"INDEX_PREFIX"`
Host string `json:"host" env:"HOST"`
APIKey string `json:"api_key" env:"API_KEY"`
Index string `json:"index" env:"INDEX"`
}
type Scheme struct {
@ -155,7 +155,8 @@ func DefaultConfig(dataDir string) *Config {
DBFile: dbPath,
},
Meilisearch: Meilisearch{
Host: "http://localhost:7700",
Host: "http://localhost:7700",
Index: "openlist",
},
BleveDir: indexDir,
Log: LogConfig{

View File

@ -3,6 +3,7 @@ package meilisearch
import (
"errors"
"fmt"
"time"
"github.com/OpenListTeam/OpenList/v4/internal/conf"
"github.com/OpenListTeam/OpenList/v4/internal/model"
@ -18,13 +19,18 @@ var config = searcher.Config{
func init() {
searcher.RegisterSearcher(config, func() (searcher.Searcher, error) {
indexUid := conf.Conf.Meilisearch.Index
if len(indexUid) == 0 {
return nil, errors.New("index is blank")
}
m := Meilisearch{
Client: meilisearch.NewClient(meilisearch.ClientConfig{
Host: conf.Conf.Meilisearch.Host,
APIKey: conf.Conf.Meilisearch.APIKey,
}),
IndexUid: conf.Conf.Meilisearch.IndexPrefix + "openlist",
FilterableAttributes: []string{"parent", "is_dir", "name"},
Client: meilisearch.New(
conf.Conf.Meilisearch.Host,
meilisearch.WithAPIKey(conf.Conf.Meilisearch.APIKey),
),
IndexUid: indexUid,
FilterableAttributes: []string{"parent", "is_dir", "name",
"parent_hash", "parent_path_hashes"},
SearchableAttributes: []string{"name"},
}
@ -40,7 +46,7 @@ func init() {
if err != nil {
return nil, err
}
forTask, err := m.Client.WaitForTask(task.TaskUID)
forTask, err := m.Client.WaitForTask(task.TaskUID, time.Second)
if err != nil {
return nil, err
}

View File

@ -10,17 +10,26 @@ import (
"github.com/OpenListTeam/OpenList/v4/internal/model"
"github.com/OpenListTeam/OpenList/v4/internal/search/searcher"
"github.com/OpenListTeam/OpenList/v4/pkg/utils"
"github.com/google/uuid"
"github.com/meilisearch/meilisearch-go"
)
type searchDocument struct {
// Document id, hash of the file path,
// can be used for filtering a file exactly(case-sensitively).
ID string `json:"id"`
// Hash of parent, can be used for filtering direct children.
ParentHash string `json:"parent_hash"`
// One-by-one hash of parent paths (path hierarchy).
// eg: A file's parent is '/home/a/b',
// its parent paths are '/home/a/b', '/home/a', '/home', '/'.
// Can be used for filtering all descendants exactly.
// Storing path hashes instead of plaintext paths benefits disk usage and case-sensitive filter.
ParentPathHashes []string `json:"parent_path_hashes"`
model.SearchNode
}
type Meilisearch struct {
Client *meilisearch.Client
Client meilisearch.ServiceManager
IndexUid string
FilterableAttributes []string
SearchableAttributes []string
@ -36,10 +45,20 @@ func (m *Meilisearch) Search(ctx context.Context, req model.SearchReq) ([]model.
Page: int64(req.Page),
HitsPerPage: int64(req.PerPage),
}
var filters []string
if req.Scope != 0 {
mReq.Filter = fmt.Sprintf("is_dir = %v", req.Scope == 1)
filters = append(filters, fmt.Sprintf("is_dir = %v", req.Scope == 1))
}
search, err := m.Client.Index(m.IndexUid).Search(req.Keywords, mReq)
if req.Parent != "" && req.Parent != "/" {
// use parent_path_hashes to filter descendants
parentHash := hashPath(req.Parent)
filters = append(filters, fmt.Sprintf("parent_path_hashes = '%s'", parentHash))
}
if len(filters) > 0 {
mReq.Filter = strings.Join(filters, " AND ")
}
search, err := m.Client.Index(m.IndexUid).SearchWithContext(ctx, req.Keywords, mReq)
if err != nil {
return nil, 0, err
}
@ -64,18 +83,29 @@ func (m *Meilisearch) Index(ctx context.Context, node model.SearchNode) error {
func (m *Meilisearch) BatchIndex(ctx context.Context, nodes []model.SearchNode) error {
documents, _ := utils.SliceConvert(nodes, func(src model.SearchNode) (*searchDocument, error) {
parentHash := hashPath(src.Parent)
nodePath := path.Join(src.Parent, src.Name)
nodePathHash := hashPath(nodePath)
parentPaths := utils.GetPathHierarchy(src.Parent)
parentPathHashes, _ := utils.SliceConvert(parentPaths, func(parentPath string) (string, error) {
return hashPath(parentPath), nil
})
return &searchDocument{
ID: uuid.NewString(),
SearchNode: src,
ID: nodePathHash,
ParentHash: parentHash,
ParentPathHashes: parentPathHashes,
SearchNode: src,
}, nil
})
_, err := m.Client.Index(m.IndexUid).AddDocuments(documents)
// max up to 10,000 documents per batch to reduce error rate while uploading over the Internet
_, err := m.Client.Index(m.IndexUid).AddDocumentsInBatchesWithContext(ctx, documents, 10000)
if err != nil {
return err
}
// documents were uploaded and enqueued for indexing, just return early
//// Wait for the task to complete and check
//forTask, err := m.Client.WaitForTask(task.TaskUID, meilisearch.WaitParams{
// Context: ctx,
@ -92,23 +122,20 @@ func (m *Meilisearch) BatchIndex(ctx context.Context, nodes []model.SearchNode)
func (m *Meilisearch) getDocumentsByParent(ctx context.Context, parent string) ([]*searchDocument, error) {
var result meilisearch.DocumentsResult
err := m.Client.Index(m.IndexUid).GetDocuments(&meilisearch.DocumentsQuery{
Filter: fmt.Sprintf("parent = '%s'", strings.ReplaceAll(parent, "'", "\\'")),
Limit: int64(model.MaxInt),
}, &result)
query := &meilisearch.DocumentsQuery{
Limit: int64(model.MaxInt),
}
if parent != "" && parent != "/" {
// use parent_hash to filter direct children
parentHash := hashPath(parent)
query.Filter = fmt.Sprintf("parent_hash = '%s'", parentHash)
}
err := m.Client.Index(m.IndexUid).GetDocumentsWithContext(ctx, query, &result)
if err != nil {
return nil, err
}
return utils.SliceConvert(result.Results, func(src map[string]any) (*searchDocument, error) {
return &searchDocument{
ID: src["id"].(string),
SearchNode: model.SearchNode{
Parent: src["parent"].(string),
Name: src["name"].(string),
IsDir: src["is_dir"].(bool),
Size: int64(src["size"].(float64)),
},
}, nil
return buildSearchDocumentFromResults(src), nil
})
}
@ -120,91 +147,59 @@ func (m *Meilisearch) Get(ctx context.Context, parent string) ([]model.SearchNod
return utils.SliceConvert(result, func(src *searchDocument) (model.SearchNode, error) {
return src.SearchNode, nil
})
}
func (m *Meilisearch) getParentsByPrefix(ctx context.Context, parent string) ([]string, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
parents := []string{parent}
get, err := m.getDocumentsByParent(ctx, parent)
if err != nil {
return nil, err
func (m *Meilisearch) getDocumentInPath(ctx context.Context, parent string, name string) (*searchDocument, error) {
var result searchDocument
// join them and calculate the hash to exactly identify the node
nodePath := path.Join(parent, name)
nodePathHash := hashPath(nodePath)
err := m.Client.Index(m.IndexUid).GetDocumentWithContext(ctx, nodePathHash, nil, &result)
if err != nil {
// return nil for documents that no exists
if err.(*meilisearch.Error).StatusCode == 404 {
return nil, nil
}
for _, node := range get {
if node.IsDir {
arr, err := m.getParentsByPrefix(ctx, path.Join(node.Parent, node.Name))
if err != nil {
return nil, err
}
parents = append(parents, arr...)
}
}
return parents, nil
return nil, err
}
return &result, nil
}
func (m *Meilisearch) DelDirChild(ctx context.Context, prefix string) error {
dfs, err := m.getParentsByPrefix(ctx, utils.FixAndCleanPath(prefix))
if err != nil {
return err
}
utils.SliceReplace(dfs, func(src string) string {
return "'" + strings.ReplaceAll(src, "'", "\\'") + "'"
})
s := fmt.Sprintf("parent IN [%s]", strings.Join(dfs, ","))
task, err := m.Client.Index(m.IndexUid).DeleteDocumentsByFilter(s)
if err != nil {
return err
}
taskStatus, err := m.getTaskStatus(ctx, task.TaskUID)
if err != nil {
return err
}
if taskStatus != meilisearch.TaskStatusSucceeded {
return fmt.Errorf("DelDir failed, task status is %s", taskStatus)
}
return nil
func (m *Meilisearch) delDirChild(ctx context.Context, prefix string) error {
prefix = hashPath(prefix)
// use parent_path_hashes to filter descendants,
// so no longer need to walk through the directories to get their IDs,
// speeding up the deletion process with easy maintained codebase
filter := fmt.Sprintf("parent_path_hashes = '%s'", prefix)
_, err := m.Client.Index(m.IndexUid).DeleteDocumentsByFilterWithContext(ctx, filter)
// task was enqueued (if succeed), no need to wait
return err
}
func (m *Meilisearch) Del(ctx context.Context, prefix string) error {
prefix = utils.FixAndCleanPath(prefix)
dir, name := path.Split(prefix)
get, err := m.getDocumentsByParent(ctx, dir[:len(dir)-1])
if dir != "/" {
dir = dir[:len(dir)-1]
}
document, err := m.getDocumentInPath(ctx, dir, name)
if err != nil {
return err
}
var document *searchDocument
for _, v := range get {
if v.Name == name {
document = v
break
}
}
if document == nil {
// Defensive programming. Document may be the folder, try deleting Child
return m.DelDirChild(ctx, prefix)
return m.delDirChild(ctx, prefix)
}
if document.IsDir {
err = m.DelDirChild(ctx, prefix)
err = m.delDirChild(ctx, prefix)
if err != nil {
return err
}
}
task, err := m.Client.Index(m.IndexUid).DeleteDocument(document.ID)
if err != nil {
return err
}
taskStatus, err := m.getTaskStatus(ctx, task.TaskUID)
if err != nil {
return err
}
if taskStatus != meilisearch.TaskStatusSucceeded {
return fmt.Errorf("DelDir failed, task status is %s", taskStatus)
}
return nil
_, err = m.Client.Index(m.IndexUid).DeleteDocumentWithContext(ctx, document.ID)
// task was enqueued (if succeed), no need to wait
return err
}
func (m *Meilisearch) Release(ctx context.Context) error {
@ -212,15 +207,13 @@ func (m *Meilisearch) Release(ctx context.Context) error {
}
func (m *Meilisearch) Clear(ctx context.Context) error {
_, err := m.Client.Index(m.IndexUid).DeleteAllDocuments()
_, err := m.Client.Index(m.IndexUid).DeleteAllDocumentsWithContext(ctx)
// task was enqueued (if succeed), no need to wait
return err
}
func (m *Meilisearch) getTaskStatus(ctx context.Context, taskUID int64) (meilisearch.TaskStatus, error) {
forTask, err := m.Client.WaitForTask(taskUID, meilisearch.WaitParams{
Context: ctx,
Interval: time.Second,
})
forTask, err := m.Client.WaitForTaskWithContext(ctx, taskUID, time.Second)
if err != nil {
return meilisearch.TaskStatusUnknown, err
}

View File

@ -0,0 +1,31 @@
package meilisearch
import (
"github.com/OpenListTeam/OpenList/v4/internal/model"
"github.com/OpenListTeam/OpenList/v4/pkg/utils"
)
// hashPath hashes a path with SHA-1.
// Path-relative exact matching should use hash,
// because filtering strings on meilisearch is case-insensitive.
func hashPath(path string) string {
return utils.HashData(utils.SHA1, []byte(path))
}
func buildSearchDocumentFromResults(results map[string]any) *searchDocument {
searchNode := model.SearchNode{}
document := &searchDocument{
SearchNode: searchNode,
}
// use assertion test to avoid panic
searchNode.Parent, _ = results["parent"].(string)
searchNode.Name, _ = results["name"].(string)
searchNode.IsDir, _ = results["is_dir"].(bool)
searchNode.Size, _ = results["size"].(int64)
document.ID, _ = results["id"].(string)
document.ParentHash, _ = results["parent_hash"].(string)
document.ParentPathHashes, _ = results["parent_path_hashes"].([]string)
return document
}