Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alist-org
GitHub Repository: alist-org/alist
Path: blob/main/internal/archive/zip/utils.go
1562 views
1
package zip
2
3
import (
4
"bytes"
5
"io"
6
"io/fs"
7
stdpath "path"
8
"strings"
9
10
"github.com/alist-org/alist/v3/internal/archive/tool"
11
"github.com/alist-org/alist/v3/internal/errs"
12
"github.com/alist-org/alist/v3/internal/stream"
13
"github.com/saintfish/chardet"
14
"github.com/yeka/zip"
15
"golang.org/x/text/encoding"
16
"golang.org/x/text/encoding/charmap"
17
"golang.org/x/text/encoding/japanese"
18
"golang.org/x/text/encoding/korean"
19
"golang.org/x/text/encoding/simplifiedchinese"
20
"golang.org/x/text/encoding/traditionalchinese"
21
"golang.org/x/text/encoding/unicode"
22
"golang.org/x/text/encoding/unicode/utf32"
23
"golang.org/x/text/transform"
24
)
25
26
type WrapReader struct {
27
Reader *zip.Reader
28
}
29
30
func (r *WrapReader) Files() []tool.SubFile {
31
ret := make([]tool.SubFile, 0, len(r.Reader.File))
32
for _, f := range r.Reader.File {
33
ret = append(ret, &WrapFile{f: f})
34
}
35
return ret
36
}
37
38
type WrapFileInfo struct {
39
fs.FileInfo
40
}
41
42
func (f *WrapFileInfo) Name() string {
43
return decodeName(f.FileInfo.Name())
44
}
45
46
type WrapFile struct {
47
f *zip.File
48
}
49
50
func (f *WrapFile) Name() string {
51
return decodeName(f.f.Name)
52
}
53
54
func (f *WrapFile) FileInfo() fs.FileInfo {
55
return &WrapFileInfo{FileInfo: f.f.FileInfo()}
56
}
57
58
func (f *WrapFile) Open() (io.ReadCloser, error) {
59
return f.f.Open()
60
}
61
62
func (f *WrapFile) IsEncrypted() bool {
63
return f.f.IsEncrypted()
64
}
65
66
func (f *WrapFile) SetPassword(password string) {
67
f.f.SetPassword(password)
68
}
69
70
func getReader(ss []*stream.SeekableStream) (*zip.Reader, error) {
71
if len(ss) > 1 && stdpath.Ext(ss[1].GetName()) == ".z01" {
72
// FIXME: Incorrect parsing method for standard multipart zip format
73
ss = append(ss[1:], ss[0])
74
}
75
reader, err := stream.NewMultiReaderAt(ss)
76
if err != nil {
77
return nil, err
78
}
79
return zip.NewReader(reader, reader.Size())
80
}
81
82
func filterPassword(err error) error {
83
if err != nil && strings.Contains(err.Error(), "password") {
84
return errs.WrongArchivePassword
85
}
86
return err
87
}
88
89
func decodeName(name string) string {
90
b := []byte(name)
91
detector := chardet.NewTextDetector()
92
results, err := detector.DetectAll(b)
93
if err != nil {
94
return name
95
}
96
var ce, re, enc encoding.Encoding
97
for _, r := range results {
98
if r.Confidence > 30 {
99
ce = getCommonEncoding(r.Charset)
100
if ce != nil {
101
break
102
}
103
}
104
if re == nil {
105
re = getEncoding(r.Charset)
106
}
107
}
108
if ce != nil {
109
enc = ce
110
} else if re != nil {
111
enc = re
112
} else {
113
return name
114
}
115
i := bytes.NewReader(b)
116
decoder := transform.NewReader(i, enc.NewDecoder())
117
content, _ := io.ReadAll(decoder)
118
return string(content)
119
}
120
121
func getCommonEncoding(name string) (enc encoding.Encoding) {
122
switch name {
123
case "UTF-8":
124
enc = unicode.UTF8
125
case "UTF-16LE":
126
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
127
case "Shift_JIS":
128
enc = japanese.ShiftJIS
129
case "GB-18030":
130
enc = simplifiedchinese.GB18030
131
case "EUC-KR":
132
enc = korean.EUCKR
133
case "Big5":
134
enc = traditionalchinese.Big5
135
default:
136
enc = nil
137
}
138
return
139
}
140
141
func getEncoding(name string) (enc encoding.Encoding) {
142
switch name {
143
case "UTF-8":
144
enc = unicode.UTF8
145
case "UTF-16BE":
146
enc = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
147
case "UTF-16LE":
148
enc = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
149
case "UTF-32BE":
150
enc = utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM)
151
case "UTF-32LE":
152
enc = utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM)
153
case "ISO-8859-1":
154
enc = charmap.ISO8859_1
155
case "ISO-8859-2":
156
enc = charmap.ISO8859_2
157
case "ISO-8859-3":
158
enc = charmap.ISO8859_3
159
case "ISO-8859-4":
160
enc = charmap.ISO8859_4
161
case "ISO-8859-5":
162
enc = charmap.ISO8859_5
163
case "ISO-8859-6":
164
enc = charmap.ISO8859_6
165
case "ISO-8859-7":
166
enc = charmap.ISO8859_7
167
case "ISO-8859-8":
168
enc = charmap.ISO8859_8
169
case "ISO-8859-8-I":
170
enc = charmap.ISO8859_8I
171
case "ISO-8859-9":
172
enc = charmap.ISO8859_9
173
case "windows-1251":
174
enc = charmap.Windows1251
175
case "windows-1256":
176
enc = charmap.Windows1256
177
case "KOI8-R":
178
enc = charmap.KOI8R
179
case "Shift_JIS":
180
enc = japanese.ShiftJIS
181
case "GB-18030":
182
enc = simplifiedchinese.GB18030
183
case "EUC-JP":
184
enc = japanese.EUCJP
185
case "EUC-KR":
186
enc = korean.EUCKR
187
case "Big5":
188
enc = traditionalchinese.Big5
189
case "ISO-2022-JP":
190
enc = japanese.ISO2022JP
191
default:
192
enc = nil
193
}
194
return
195
}
196
197