feat: misc improvements about upload/copy/hash (#5045)

general: add createTime/updateTime support in webdav and some drivers
general: add hash support in some drivers
general: cross-storage rapid-upload support
general: enhance upload to avoid local temp file if possible
general: replace readseekcloser with File interface to speed upstream operations
feat(aliyun_open): same as above
feat(crypt): add hack for 139cloud

Close #4934 
Close #4819 

baidu_netdisk needs to improve the upload code to support rapid-upload
This commit is contained in:
Sean
2023-08-27 21:14:23 +08:00
committed by GitHub
parent 9b765ef696
commit a3748af772
77 changed files with 1731 additions and 615 deletions

View File

@ -113,7 +113,7 @@ func CreateNestedFile(path string) (*os.File, error) {
}
// CreateTempFile create temp file from io.ReadCloser, and seek to 0
func CreateTempFile(r io.ReadCloser, size int64) (*os.File, error) {
func CreateTempFile(r io.Reader, size int64) (*os.File, error) {
if f, ok := r.(*os.File); ok {
return f, nil
}
@ -171,3 +171,10 @@ func GetMimeType(name string) string {
}
return "application/octet-stream"
}
const (
KB = 1 << (10 * (iota + 1))
MB
GB
TB
)

View File

@ -4,46 +4,178 @@ import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"errors"
"github.com/alist-org/alist/v3/internal/errs"
"hash"
"io"
"strings"
)
func GetSHA1Encode(data []byte) string {
h := sha1.New()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}
func GetSHA256Encode(data []byte) string {
h := sha256.New()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}
func GetMD5Encode(data []byte) string {
h := md5.New()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}
func GetMD5EncodeStr(data string) string {
return GetMD5Encode([]byte(data))
return HashData(MD5, []byte(data))
}
var DEC = map[string]string{
"-": "+",
"_": "/",
".": "=",
//inspired by "github.com/rclone/rclone/fs/hash"
// ErrUnsupported should be returned by filesystem,
// if it is requested to deliver an unsupported hash type.
var ErrUnsupported = errors.New("hash type not supported")
// HashType indicates a standard hashing algorithm
type HashType struct {
Width int
Name string
Alias string
NewFunc func() hash.Hash
}
func SafeAtob(data string) (string, error) {
for k, v := range DEC {
data = strings.ReplaceAll(data, k, v)
var (
name2hash = map[string]*HashType{}
alias2hash = map[string]*HashType{}
Supported []*HashType
)
// RegisterHash adds a new Hash to the list and returns its Type
func RegisterHash(name, alias string, width int, newFunc func() hash.Hash) *HashType {
newType := &HashType{
Name: name,
Alias: alias,
Width: width,
NewFunc: newFunc,
}
bytes, err := base64.StdEncoding.DecodeString(data)
name2hash[name] = newType
alias2hash[alias] = newType
Supported = append(Supported, newType)
return newType
}
var (
// MD5 indicates MD5 support
MD5 = RegisterHash("md5", "MD5", 32, md5.New)
// SHA1 indicates SHA-1 support
SHA1 = RegisterHash("sha1", "SHA-1", 40, sha1.New)
// SHA256 indicates SHA-256 support
SHA256 = RegisterHash("sha256", "SHA-256", 64, sha256.New)
)
// HashData get hash of one hashType
func HashData(hashType *HashType, data []byte) string {
h := hashType.NewFunc()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}
// HashReader get hash of one hashType from a reader
func HashReader(hashType *HashType, reader io.Reader) (string, error) {
h := hashType.NewFunc()
_, err := io.Copy(h, reader)
if err != nil {
return "", errs.NewErr(err, "HashReader error")
}
return hex.EncodeToString(h.Sum(nil)), nil
}
// HashFile get hash of one hashType from a model.File
func HashFile(hashType *HashType, file io.ReadSeeker) (string, error) {
str, err := HashReader(hashType, file)
if err != nil {
return "", err
}
return string(bytes), err
if _, err = file.Seek(0, io.SeekStart); err != nil {
return str, err
}
return str, nil
}
// fromTypes will return hashers for all the requested types.
func fromTypes(types []*HashType) map[*HashType]hash.Hash {
hashers := map[*HashType]hash.Hash{}
for _, t := range types {
hashers[t] = t.NewFunc()
}
return hashers
}
// toMultiWriter will return a set of hashers into a
// single multiwriter, where one write will update all
// the hashers.
func toMultiWriter(h map[*HashType]hash.Hash) io.Writer {
// Convert to to slice
var w = make([]io.Writer, 0, len(h))
for _, v := range h {
w = append(w, v)
}
return io.MultiWriter(w...)
}
// A MultiHasher will construct various hashes on all incoming writes.
type MultiHasher struct {
w io.Writer
size int64
h map[*HashType]hash.Hash // Hashes
}
// NewMultiHasher will return a hash writer that will write
// the requested hash types.
func NewMultiHasher(types []*HashType) *MultiHasher {
hashers := fromTypes(types)
m := MultiHasher{h: hashers, w: toMultiWriter(hashers)}
return &m
}
func (m *MultiHasher) Write(p []byte) (n int, err error) {
n, err = m.w.Write(p)
m.size += int64(n)
return n, err
}
func (m *MultiHasher) GetHashInfo() *HashInfo {
dst := make(map[*HashType]string)
for k, v := range m.h {
dst[k] = hex.EncodeToString(v.Sum(nil))
}
return &HashInfo{h: dst}
}
// Sum returns the specified hash from the multihasher
func (m *MultiHasher) Sum(hashType *HashType) ([]byte, error) {
h, ok := m.h[hashType]
if !ok {
return nil, ErrUnsupported
}
return h.Sum(nil), nil
}
// Size returns the number of bytes written
func (m *MultiHasher) Size() int64 {
return m.size
}
// A HashInfo contains hash string for one or more hashType
type HashInfo struct {
h map[*HashType]string
}
func NewHashInfo(ht *HashType, str string) HashInfo {
m := make(map[*HashType]string)
m[ht] = str
return HashInfo{h: m}
}
func (hi HashInfo) String() string {
var tmp []string
for ht, str := range hi.h {
if len(str) > 0 {
tmp = append(tmp, ht.Name+":"+str)
}
}
return strings.Join(tmp, "\n")
}
func (hi HashInfo) GetHash(ht *HashType) string {
return hi.h[ht]
}

64
pkg/utils/hash_test.go Normal file
View File

@ -0,0 +1,64 @@
package utils
import (
"bytes"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"io"
"testing"
)
type hashTest struct {
input []byte
output map[*HashType]string
}
var hashTestSet = []hashTest{
{
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
output: map[*HashType]string{
MD5: "bf13fc19e5151ac57d4252e0e0f87abe",
SHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
SHA256: "c839e57675862af5c21bd0a15413c3ec579e0d5522dab600bc6c3489b05b8f54",
},
},
// Empty data set
{
input: []byte{},
output: map[*HashType]string{
MD5: "d41d8cd98f00b204e9800998ecf8427e",
SHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
SHA256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
},
},
}
func TestMultiHasher(t *testing.T) {
for _, test := range hashTestSet {
mh := NewMultiHasher([]*HashType{MD5, SHA1, SHA256})
n, err := io.Copy(mh, bytes.NewBuffer(test.input))
require.NoError(t, err)
assert.Len(t, test.input, int(n))
hashInfo := mh.GetHashInfo()
for k, v := range hashInfo.h {
expect, ok := test.output[k]
require.True(t, ok, "test output for hash not found")
assert.Equal(t, expect, v)
}
// Test that all are present
for k, v := range test.output {
expect, ok := hashInfo.h[k]
require.True(t, ok, "test output for hash not found")
assert.Equal(t, expect, v)
}
for k, v := range test.output {
expect := hashInfo.GetHash(k)
require.True(t, len(expect) > 0, "test output for hash not found")
assert.Equal(t, expect, v)
}
expect := hashInfo.GetHash(nil)
require.True(t, len(expect) == 0, "unknown type should return empty string")
Log.Info(hashInfo.String())
}
}

View File

@ -3,7 +3,9 @@ package utils
import (
"bytes"
"context"
"errors"
"fmt"
"golang.org/x/exp/constraints"
"io"
"time"
@ -17,7 +19,7 @@ type readerFunc func(p []byte) (n int, err error)
func (rf readerFunc) Read(p []byte) (n int, err error) { return rf(p) }
// CopyWithCtx slightly modified function signature:
// - context has been added in order to propagate cancelation
// - context has been added in order to propagate cancellation
// - I do not return the number of bytes written, has it is not useful in my use case
func CopyWithCtx(ctx context.Context, out io.Writer, in io.Reader, size int64, progress func(percentage int)) error {
// Copy will call the Reader and Writer interface multiple time, in order
@ -132,16 +134,6 @@ func (mr *MultiReadable) Close() error {
return nil
}
type nopCloser struct {
io.ReadSeeker
}
func (nopCloser) Close() error { return nil }
func ReadSeekerNopCloser(r io.ReadSeeker) io.ReadSeekCloser {
return nopCloser{r}
}
func Retry(attempts int, sleep time.Duration, f func() error) (err error) {
for i := 0; i < attempts; i++ {
fmt.Println("This is attempt number", i)
@ -158,23 +150,56 @@ func Retry(attempts int, sleep time.Duration, f func() error) (err error) {
return fmt.Errorf("after %d attempts, last error: %s", attempts, err)
}
type Closers struct {
closers []*io.Closer
type ClosersIF interface {
io.Closer
Add(closer io.Closer)
AddClosers(closers Closers)
GetClosers() Closers
}
func (c *Closers) Close() (err error) {
type Closers struct {
closers []io.Closer
}
func (c *Closers) GetClosers() Closers {
return *c
}
var _ ClosersIF = (*Closers)(nil)
func (c *Closers) Close() error {
var errs []error
for _, closer := range c.closers {
if closer != nil {
_ = (*closer).Close()
errs = append(errs, closer.Close())
}
}
return nil
return errors.Join(errs...)
}
func (c *Closers) Add(closer io.Closer) {
if closer != nil {
c.closers = append(c.closers, &closer)
c.closers = append(c.closers, closer)
}
func (c *Closers) AddClosers(closers Closers) {
c.closers = append(c.closers, closers.closers...)
}
func EmptyClosers() Closers {
return Closers{[]io.Closer{}}
}
func NewClosers(c ...io.Closer) Closers {
return Closers{c}
}
func Min[T constraints.Ordered](a, b T) T {
if a < b {
return a
}
return b
}
func NewClosers() *Closers {
return &Closers{[]*io.Closer{}}
func Max[T constraints.Ordered](a, b T) T {
if a < b {
return b
}
return a
}

View File

@ -1,6 +1,7 @@
package utils
import (
"encoding/base64"
"strings"
"github.com/alist-org/alist/v3/internal/conf"
@ -12,3 +13,20 @@ func MappingName(name string) string {
}
return name
}
var DEC = map[string]string{
"-": "+",
"_": "/",
".": "=",
}
func SafeAtob(data string) (string, error) {
for k, v := range DEC {
data = strings.ReplaceAll(data, k, v)
}
bytes, err := base64.StdEncoding.DecodeString(data)
if err != nil {
return "", err
}
return string(bytes), err
}