mirror of
https://github.com/OpenListTeam/OpenList.git
synced 2025-09-19 04:06:18 +08:00
feat(search): enhanced meilisearch
search experience (#864)
* feat(search): enhanced `meilisearch` search experience - upgrade `meilisearch` dependency - support subdirectory search - optimize searchDocument fields for subdirectory search - specify full index uid instead of index prefix * fix(search): more fixes to `meilisearch` - make use of context where context was not used - remove code of waiting task in deletion process, as tasks are queued and will be executed orderly (if tasks were submitted to the queue successfully), which can improve `AutoUpdate` performance
This commit is contained in:
@ -20,9 +20,9 @@ type Database struct {
|
||||
}
|
||||
|
||||
type Meilisearch struct {
|
||||
Host string `json:"host" env:"HOST"`
|
||||
APIKey string `json:"api_key" env:"API_KEY"`
|
||||
IndexPrefix string `json:"index_prefix" env:"INDEX_PREFIX"`
|
||||
Host string `json:"host" env:"HOST"`
|
||||
APIKey string `json:"api_key" env:"API_KEY"`
|
||||
Index string `json:"index" env:"INDEX"`
|
||||
}
|
||||
|
||||
type Scheme struct {
|
||||
@ -155,7 +155,8 @@ func DefaultConfig(dataDir string) *Config {
|
||||
DBFile: dbPath,
|
||||
},
|
||||
Meilisearch: Meilisearch{
|
||||
Host: "http://localhost:7700",
|
||||
Host: "http://localhost:7700",
|
||||
Index: "openlist",
|
||||
},
|
||||
BleveDir: indexDir,
|
||||
Log: LogConfig{
|
||||
|
@ -3,6 +3,7 @@ package meilisearch
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/OpenListTeam/OpenList/v4/internal/conf"
|
||||
"github.com/OpenListTeam/OpenList/v4/internal/model"
|
||||
@ -18,13 +19,18 @@ var config = searcher.Config{
|
||||
|
||||
func init() {
|
||||
searcher.RegisterSearcher(config, func() (searcher.Searcher, error) {
|
||||
indexUid := conf.Conf.Meilisearch.Index
|
||||
if len(indexUid) == 0 {
|
||||
return nil, errors.New("index is blank")
|
||||
}
|
||||
m := Meilisearch{
|
||||
Client: meilisearch.NewClient(meilisearch.ClientConfig{
|
||||
Host: conf.Conf.Meilisearch.Host,
|
||||
APIKey: conf.Conf.Meilisearch.APIKey,
|
||||
}),
|
||||
IndexUid: conf.Conf.Meilisearch.IndexPrefix + "openlist",
|
||||
FilterableAttributes: []string{"parent", "is_dir", "name"},
|
||||
Client: meilisearch.New(
|
||||
conf.Conf.Meilisearch.Host,
|
||||
meilisearch.WithAPIKey(conf.Conf.Meilisearch.APIKey),
|
||||
),
|
||||
IndexUid: indexUid,
|
||||
FilterableAttributes: []string{"parent", "is_dir", "name",
|
||||
"parent_hash", "parent_path_hashes"},
|
||||
SearchableAttributes: []string{"name"},
|
||||
}
|
||||
|
||||
@ -40,7 +46,7 @@ func init() {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
forTask, err := m.Client.WaitForTask(task.TaskUID)
|
||||
forTask, err := m.Client.WaitForTask(task.TaskUID, time.Second)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -10,17 +10,26 @@ import (
|
||||
"github.com/OpenListTeam/OpenList/v4/internal/model"
|
||||
"github.com/OpenListTeam/OpenList/v4/internal/search/searcher"
|
||||
"github.com/OpenListTeam/OpenList/v4/pkg/utils"
|
||||
"github.com/google/uuid"
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
)
|
||||
|
||||
type searchDocument struct {
|
||||
// Document id, hash of the file path,
|
||||
// can be used for filtering a file exactly(case-sensitively).
|
||||
ID string `json:"id"`
|
||||
// Hash of parent, can be used for filtering direct children.
|
||||
ParentHash string `json:"parent_hash"`
|
||||
// One-by-one hash of parent paths (path hierarchy).
|
||||
// eg: A file's parent is '/home/a/b',
|
||||
// its parent paths are '/home/a/b', '/home/a', '/home', '/'.
|
||||
// Can be used for filtering all descendants exactly.
|
||||
// Storing path hashes instead of plaintext paths benefits disk usage and case-sensitive filter.
|
||||
ParentPathHashes []string `json:"parent_path_hashes"`
|
||||
model.SearchNode
|
||||
}
|
||||
|
||||
type Meilisearch struct {
|
||||
Client *meilisearch.Client
|
||||
Client meilisearch.ServiceManager
|
||||
IndexUid string
|
||||
FilterableAttributes []string
|
||||
SearchableAttributes []string
|
||||
@ -36,10 +45,20 @@ func (m *Meilisearch) Search(ctx context.Context, req model.SearchReq) ([]model.
|
||||
Page: int64(req.Page),
|
||||
HitsPerPage: int64(req.PerPage),
|
||||
}
|
||||
var filters []string
|
||||
if req.Scope != 0 {
|
||||
mReq.Filter = fmt.Sprintf("is_dir = %v", req.Scope == 1)
|
||||
filters = append(filters, fmt.Sprintf("is_dir = %v", req.Scope == 1))
|
||||
}
|
||||
search, err := m.Client.Index(m.IndexUid).Search(req.Keywords, mReq)
|
||||
if req.Parent != "" && req.Parent != "/" {
|
||||
// use parent_path_hashes to filter descendants
|
||||
parentHash := hashPath(req.Parent)
|
||||
filters = append(filters, fmt.Sprintf("parent_path_hashes = '%s'", parentHash))
|
||||
}
|
||||
if len(filters) > 0 {
|
||||
mReq.Filter = strings.Join(filters, " AND ")
|
||||
}
|
||||
|
||||
search, err := m.Client.Index(m.IndexUid).SearchWithContext(ctx, req.Keywords, mReq)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
@ -64,18 +83,29 @@ func (m *Meilisearch) Index(ctx context.Context, node model.SearchNode) error {
|
||||
|
||||
func (m *Meilisearch) BatchIndex(ctx context.Context, nodes []model.SearchNode) error {
|
||||
documents, _ := utils.SliceConvert(nodes, func(src model.SearchNode) (*searchDocument, error) {
|
||||
parentHash := hashPath(src.Parent)
|
||||
nodePath := path.Join(src.Parent, src.Name)
|
||||
nodePathHash := hashPath(nodePath)
|
||||
parentPaths := utils.GetPathHierarchy(src.Parent)
|
||||
parentPathHashes, _ := utils.SliceConvert(parentPaths, func(parentPath string) (string, error) {
|
||||
return hashPath(parentPath), nil
|
||||
})
|
||||
|
||||
return &searchDocument{
|
||||
ID: uuid.NewString(),
|
||||
SearchNode: src,
|
||||
ID: nodePathHash,
|
||||
ParentHash: parentHash,
|
||||
ParentPathHashes: parentPathHashes,
|
||||
SearchNode: src,
|
||||
}, nil
|
||||
})
|
||||
|
||||
_, err := m.Client.Index(m.IndexUid).AddDocuments(documents)
|
||||
// max up to 10,000 documents per batch to reduce error rate while uploading over the Internet
|
||||
_, err := m.Client.Index(m.IndexUid).AddDocumentsInBatchesWithContext(ctx, documents, 10000)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// documents were uploaded and enqueued for indexing, just return early
|
||||
//// Wait for the task to complete and check
|
||||
//forTask, err := m.Client.WaitForTask(task.TaskUID, meilisearch.WaitParams{
|
||||
// Context: ctx,
|
||||
@ -92,23 +122,20 @@ func (m *Meilisearch) BatchIndex(ctx context.Context, nodes []model.SearchNode)
|
||||
|
||||
func (m *Meilisearch) getDocumentsByParent(ctx context.Context, parent string) ([]*searchDocument, error) {
|
||||
var result meilisearch.DocumentsResult
|
||||
err := m.Client.Index(m.IndexUid).GetDocuments(&meilisearch.DocumentsQuery{
|
||||
Filter: fmt.Sprintf("parent = '%s'", strings.ReplaceAll(parent, "'", "\\'")),
|
||||
Limit: int64(model.MaxInt),
|
||||
}, &result)
|
||||
query := &meilisearch.DocumentsQuery{
|
||||
Limit: int64(model.MaxInt),
|
||||
}
|
||||
if parent != "" && parent != "/" {
|
||||
// use parent_hash to filter direct children
|
||||
parentHash := hashPath(parent)
|
||||
query.Filter = fmt.Sprintf("parent_hash = '%s'", parentHash)
|
||||
}
|
||||
err := m.Client.Index(m.IndexUid).GetDocumentsWithContext(ctx, query, &result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return utils.SliceConvert(result.Results, func(src map[string]any) (*searchDocument, error) {
|
||||
return &searchDocument{
|
||||
ID: src["id"].(string),
|
||||
SearchNode: model.SearchNode{
|
||||
Parent: src["parent"].(string),
|
||||
Name: src["name"].(string),
|
||||
IsDir: src["is_dir"].(bool),
|
||||
Size: int64(src["size"].(float64)),
|
||||
},
|
||||
}, nil
|
||||
return buildSearchDocumentFromResults(src), nil
|
||||
})
|
||||
}
|
||||
|
||||
@ -120,91 +147,59 @@ func (m *Meilisearch) Get(ctx context.Context, parent string) ([]model.SearchNod
|
||||
return utils.SliceConvert(result, func(src *searchDocument) (model.SearchNode, error) {
|
||||
return src.SearchNode, nil
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func (m *Meilisearch) getParentsByPrefix(ctx context.Context, parent string) ([]string, error) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
parents := []string{parent}
|
||||
get, err := m.getDocumentsByParent(ctx, parent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
func (m *Meilisearch) getDocumentInPath(ctx context.Context, parent string, name string) (*searchDocument, error) {
|
||||
var result searchDocument
|
||||
// join them and calculate the hash to exactly identify the node
|
||||
nodePath := path.Join(parent, name)
|
||||
nodePathHash := hashPath(nodePath)
|
||||
err := m.Client.Index(m.IndexUid).GetDocumentWithContext(ctx, nodePathHash, nil, &result)
|
||||
if err != nil {
|
||||
// return nil for documents that no exists
|
||||
if err.(*meilisearch.Error).StatusCode == 404 {
|
||||
return nil, nil
|
||||
}
|
||||
for _, node := range get {
|
||||
if node.IsDir {
|
||||
arr, err := m.getParentsByPrefix(ctx, path.Join(node.Parent, node.Name))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parents = append(parents, arr...)
|
||||
}
|
||||
}
|
||||
return parents, nil
|
||||
return nil, err
|
||||
}
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func (m *Meilisearch) DelDirChild(ctx context.Context, prefix string) error {
|
||||
dfs, err := m.getParentsByPrefix(ctx, utils.FixAndCleanPath(prefix))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
utils.SliceReplace(dfs, func(src string) string {
|
||||
return "'" + strings.ReplaceAll(src, "'", "\\'") + "'"
|
||||
})
|
||||
s := fmt.Sprintf("parent IN [%s]", strings.Join(dfs, ","))
|
||||
task, err := m.Client.Index(m.IndexUid).DeleteDocumentsByFilter(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
taskStatus, err := m.getTaskStatus(ctx, task.TaskUID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if taskStatus != meilisearch.TaskStatusSucceeded {
|
||||
return fmt.Errorf("DelDir failed, task status is %s", taskStatus)
|
||||
}
|
||||
return nil
|
||||
func (m *Meilisearch) delDirChild(ctx context.Context, prefix string) error {
|
||||
prefix = hashPath(prefix)
|
||||
// use parent_path_hashes to filter descendants,
|
||||
// so no longer need to walk through the directories to get their IDs,
|
||||
// speeding up the deletion process with easy maintained codebase
|
||||
filter := fmt.Sprintf("parent_path_hashes = '%s'", prefix)
|
||||
_, err := m.Client.Index(m.IndexUid).DeleteDocumentsByFilterWithContext(ctx, filter)
|
||||
// task was enqueued (if succeed), no need to wait
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Meilisearch) Del(ctx context.Context, prefix string) error {
|
||||
prefix = utils.FixAndCleanPath(prefix)
|
||||
dir, name := path.Split(prefix)
|
||||
get, err := m.getDocumentsByParent(ctx, dir[:len(dir)-1])
|
||||
if dir != "/" {
|
||||
dir = dir[:len(dir)-1]
|
||||
}
|
||||
|
||||
document, err := m.getDocumentInPath(ctx, dir, name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var document *searchDocument
|
||||
for _, v := range get {
|
||||
if v.Name == name {
|
||||
document = v
|
||||
break
|
||||
}
|
||||
}
|
||||
if document == nil {
|
||||
// Defensive programming. Document may be the folder, try deleting Child
|
||||
return m.DelDirChild(ctx, prefix)
|
||||
return m.delDirChild(ctx, prefix)
|
||||
}
|
||||
if document.IsDir {
|
||||
err = m.DelDirChild(ctx, prefix)
|
||||
err = m.delDirChild(ctx, prefix)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
task, err := m.Client.Index(m.IndexUid).DeleteDocument(document.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
taskStatus, err := m.getTaskStatus(ctx, task.TaskUID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if taskStatus != meilisearch.TaskStatusSucceeded {
|
||||
return fmt.Errorf("DelDir failed, task status is %s", taskStatus)
|
||||
}
|
||||
return nil
|
||||
_, err = m.Client.Index(m.IndexUid).DeleteDocumentWithContext(ctx, document.ID)
|
||||
// task was enqueued (if succeed), no need to wait
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Meilisearch) Release(ctx context.Context) error {
|
||||
@ -212,15 +207,13 @@ func (m *Meilisearch) Release(ctx context.Context) error {
|
||||
}
|
||||
|
||||
func (m *Meilisearch) Clear(ctx context.Context) error {
|
||||
_, err := m.Client.Index(m.IndexUid).DeleteAllDocuments()
|
||||
_, err := m.Client.Index(m.IndexUid).DeleteAllDocumentsWithContext(ctx)
|
||||
// task was enqueued (if succeed), no need to wait
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Meilisearch) getTaskStatus(ctx context.Context, taskUID int64) (meilisearch.TaskStatus, error) {
|
||||
forTask, err := m.Client.WaitForTask(taskUID, meilisearch.WaitParams{
|
||||
Context: ctx,
|
||||
Interval: time.Second,
|
||||
})
|
||||
forTask, err := m.Client.WaitForTaskWithContext(ctx, taskUID, time.Second)
|
||||
if err != nil {
|
||||
return meilisearch.TaskStatusUnknown, err
|
||||
}
|
||||
|
31
internal/search/meilisearch/utils.go
Normal file
31
internal/search/meilisearch/utils.go
Normal file
@ -0,0 +1,31 @@
|
||||
package meilisearch
|
||||
|
||||
import (
|
||||
"github.com/OpenListTeam/OpenList/v4/internal/model"
|
||||
"github.com/OpenListTeam/OpenList/v4/pkg/utils"
|
||||
)
|
||||
|
||||
// hashPath hashes a path with SHA-1.
|
||||
// Path-relative exact matching should use hash,
|
||||
// because filtering strings on meilisearch is case-insensitive.
|
||||
func hashPath(path string) string {
|
||||
return utils.HashData(utils.SHA1, []byte(path))
|
||||
}
|
||||
|
||||
func buildSearchDocumentFromResults(results map[string]any) *searchDocument {
|
||||
searchNode := model.SearchNode{}
|
||||
document := &searchDocument{
|
||||
SearchNode: searchNode,
|
||||
}
|
||||
|
||||
// use assertion test to avoid panic
|
||||
searchNode.Parent, _ = results["parent"].(string)
|
||||
searchNode.Name, _ = results["name"].(string)
|
||||
searchNode.IsDir, _ = results["is_dir"].(bool)
|
||||
searchNode.Size, _ = results["size"].(int64)
|
||||
|
||||
document.ID, _ = results["id"].(string)
|
||||
document.ParentHash, _ = results["parent_hash"].(string)
|
||||
document.ParentPathHashes, _ = results["parent_path_hashes"].([]string)
|
||||
return document
|
||||
}
|
Reference in New Issue
Block a user