1 package main
2
3 import (
4 "bufio"
5 "bytes"
6 "errors"
7 "flag"
8 "fmt"
9 "io"
10 "net/http"
11 "os"
12 "runtime"
13 "runtime/debug"
14 "strconv"
15 "strings"
16 "sync"
17 "time"
18 "unicode"
19
20 "github.com/klauspost/compress/s2"
21 "github.com/klauspost/compress/s2/cmd/internal/filepathx"
22 "github.com/klauspost/compress/s2/cmd/internal/readahead"
23 )
24
25 var (
26 safe = flag.Bool("safe", false, "Do not overwrite output files")
27 verify = flag.Bool("verify", false, "Verify files, but do not write output")
28 stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated")
29 remove = flag.Bool("rm", false, "Delete source file(s) after successful decompression")
30 quiet = flag.Bool("q", false, "Don't write any output to terminal, except errors")
31 bench = flag.Int("bench", 0, "Run benchmark n times. No output will be written")
32 tail = flag.String("tail", "", "Return last of compressed file. Examples: 92, 64K, 256K, 1M, 4M. Requires Index")
33 offset = flag.String("offset", "", "Start at offset. Examples: 92, 64K, 256K, 1M, 4M. Requires Index")
34 help = flag.Bool("help", false, "Display help")
35 out = flag.String("o", "", "Write output to another file. Single input file only")
36 block = flag.Bool("block", false, "Decompress as a single block. Will load content into memory.")
37 cpu = flag.Int("cpu", runtime.NumCPU(), "Decompress streams using this amount of threads")
38
39 version = "(dev)"
40 date = "(unknown)"
41 )
42
43 const (
44 s2Ext = ".s2"
45 snappyExt = ".sz"
46 )
47
48 func main() {
49 flag.Parse()
50 r := s2.NewReader(nil)
51
52
53 args := flag.Args()
54 if len(args) == 0 || *help {
55 _, _ = fmt.Fprintf(os.Stderr, "s2 decompress v%v, built at %v.\n\n", version, date)
56 _, _ = fmt.Fprintf(os.Stderr, "Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.\n"+
57 "Copyright (c) 2019+ Klaus Post. All rights reserved.\n\n")
58 _, _ = fmt.Fprintln(os.Stderr, `Usage: s2d [options] file1 file2
59
60 Decompresses all files supplied as input. Input files must end with '`+s2Ext+`' or '`+snappyExt+`'.
61 Output file names have the extension removed. By default output files will be overwritten.
62 Use - as the only file name to read from stdin and write to stdout.
63
64 Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
65 Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
66
67 File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
68 Extensions on downloaded files are ignored. Only http response code 200 is accepted.
69
70 Options:`)
71 flag.PrintDefaults()
72 os.Exit(0)
73 }
74 tailBytes, err := toSize(*tail)
75 exitErr(err)
76 offset, err := toSize(*offset)
77 exitErr(err)
78 if tailBytes > 0 && offset > 0 {
79 exitErr(errors.New("--offset and --tail cannot be used together"))
80 }
81 if len(args) == 1 && args[0] == "-" {
82 r.Reset(os.Stdin)
83 if *verify {
84 _, err := io.Copy(io.Discard, r)
85 exitErr(err)
86 return
87 }
88 if *out == "" {
89 _, err := io.Copy(os.Stdout, r)
90 exitErr(err)
91 return
92 }
93 dstFilename := *out
94 if *safe {
95 _, err := os.Stat(dstFilename)
96 if !os.IsNotExist(err) {
97 exitErr(errors.New("destination files exists"))
98 }
99 }
100 dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.ModePerm)
101 exitErr(err)
102 defer dstFile.Close()
103 bw := bufio.NewWriterSize(dstFile, 4<<20)
104 defer bw.Flush()
105 _, err = io.Copy(bw, r)
106 exitErr(err)
107 return
108 }
109 var files []string
110
111 for _, pattern := range args {
112 if isHTTP(pattern) {
113 files = append(files, pattern)
114 continue
115 }
116
117 found, err := filepathx.Glob(pattern)
118 exitErr(err)
119 if len(found) == 0 {
120 exitErr(fmt.Errorf("unable to find file %v", pattern))
121 }
122 files = append(files, found...)
123 }
124
125 *quiet = *quiet || *stdout
126
127 if *bench > 0 {
128 debug.SetGCPercent(10)
129 for _, filename := range files {
130 block := *block
131 dstFilename := cleanFileName(filename)
132 if strings.HasSuffix(filename, ".block") {
133 dstFilename = strings.TrimSuffix(dstFilename, ".block")
134 block = true
135 }
136 switch {
137 case strings.HasSuffix(dstFilename, s2Ext):
138 case strings.HasSuffix(dstFilename, snappyExt):
139 case strings.HasSuffix(dstFilename, ".snappy"):
140 default:
141 if !isHTTP(filename) {
142 fmt.Println("Skipping", filename)
143 continue
144 }
145 }
146
147 func() {
148 if !*quiet {
149 fmt.Print("Reading ", filename, "...")
150 }
151
152 file, size, _ := openFile(filename)
153 b := make([]byte, size)
154 _, err := io.ReadFull(file, b)
155 exitErr(err)
156 file.Close()
157
158 for i := 0; i < *bench; i++ {
159 if !*quiet {
160 fmt.Print("\nDecompressing...")
161 }
162 start := time.Now()
163 var output int64
164 if block {
165 dec, err := s2.Decode(nil, b)
166 exitErr(err)
167 output = int64(len(dec))
168 } else {
169 r.Reset(bytes.NewBuffer(b))
170 if *cpu > 1 {
171 output, err = r.DecodeConcurrent(io.Discard, *cpu)
172 } else {
173 output, err = io.Copy(io.Discard, r)
174 }
175 exitErr(err)
176 }
177 if !*quiet {
178 elapsed := time.Since(start)
179 ms := elapsed.Round(time.Millisecond)
180 mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
181 pct := float64(output) * 100 / float64(len(b))
182 fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec)
183 }
184 }
185 if !*quiet {
186 fmt.Println("")
187 }
188 }()
189 }
190 os.Exit(0)
191 }
192
193 if *out != "" && len(files) > 1 {
194 exitErr(errors.New("-out parameter can only be used with one input"))
195 }
196
197 for _, filename := range files {
198 dstFilename := cleanFileName(filename)
199 block := *block
200 if strings.HasSuffix(dstFilename, ".block") {
201 dstFilename = strings.TrimSuffix(dstFilename, ".block")
202 block = true
203 }
204 switch {
205 case *out != "":
206 dstFilename = *out
207 case strings.HasSuffix(dstFilename, s2Ext):
208 dstFilename = strings.TrimSuffix(dstFilename, s2Ext)
209 case strings.HasSuffix(dstFilename, snappyExt):
210 dstFilename = strings.TrimSuffix(dstFilename, snappyExt)
211 case strings.HasSuffix(dstFilename, ".snappy"):
212 dstFilename = strings.TrimSuffix(dstFilename, ".snappy")
213 default:
214 if !isHTTP(filename) {
215 fmt.Println("Skipping", filename)
216 continue
217 }
218 }
219 if *verify {
220 dstFilename = "(verify)"
221 }
222
223 func() {
224 var closeOnce sync.Once
225 if !*quiet {
226 fmt.Print("Decompressing ", filename, " -> ", dstFilename)
227 }
228
229 file, _, mode := openFile(filename)
230 defer closeOnce.Do(func() { file.Close() })
231 var rc interface {
232 io.Reader
233 BytesRead() int64
234 }
235 if tailBytes > 0 || offset > 0 {
236 rs, ok := file.(io.ReadSeeker)
237 if !ok && tailBytes > 0 {
238 exitErr(errors.New("cannot tail with non-seekable input"))
239 }
240 if ok {
241 rc = &rCountSeeker{in: rs}
242 } else {
243 rc = &rCounter{in: file}
244 }
245 } else {
246 rc = &rCounter{in: file}
247 }
248 var src io.Reader
249 if !block && tailBytes == 0 && offset == 0 {
250 ra, err := readahead.NewReaderSize(rc, 2, 4<<20)
251 exitErr(err)
252 defer ra.Close()
253 src = ra
254 } else {
255 src = rc
256 }
257 if *safe {
258 _, err := os.Stat(dstFilename)
259 if !os.IsNotExist(err) {
260 exitErr(errors.New("destination files exists"))
261 }
262 }
263 var out io.Writer
264 switch {
265 case *verify:
266 out = io.Discard
267 case *stdout:
268 out = os.Stdout
269 default:
270 dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode)
271 exitErr(err)
272 defer dstFile.Close()
273 out = dstFile
274 if !block {
275 bw := bufio.NewWriterSize(dstFile, 4<<20)
276 defer bw.Flush()
277 out = bw
278 }
279 }
280 var decoded io.Reader
281 start := time.Now()
282 if block {
283 all, err := io.ReadAll(src)
284 exitErr(err)
285 b, err := s2.Decode(nil, all)
286 exitErr(err)
287 decoded = bytes.NewReader(b)
288 } else {
289 r.Reset(src)
290 if tailBytes > 0 || offset > 0 {
291 rs, err := r.ReadSeeker(tailBytes > 0, nil)
292 exitErr(err)
293 if tailBytes > 0 {
294 _, err = rs.Seek(-tailBytes, io.SeekEnd)
295 } else {
296 _, err = rs.Seek(offset, io.SeekStart)
297 }
298 exitErr(err)
299 }
300 decoded = r
301 }
302 var err error
303 var output int64
304 if dec, ok := decoded.(*s2.Reader); ok && tailBytes == 0 && offset == 0 {
305 output, err = dec.DecodeConcurrent(out, *cpu)
306 } else {
307 output, err = io.Copy(out, decoded)
308 }
309 exitErr(err)
310 if !*quiet {
311 elapsed := time.Since(start)
312 mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
313 pct := float64(output) * 100 / float64(rc.BytesRead())
314 fmt.Printf(" %d -> %d [%.02f%%]; %.01fMB/s\n", rc.BytesRead(), output, pct, mbPerSec)
315 }
316 if *remove && !*verify {
317 closeOnce.Do(func() {
318 file.Close()
319 if !*quiet {
320 fmt.Println("Removing", filename)
321 }
322 err := os.Remove(filename)
323 exitErr(err)
324 })
325 }
326 }()
327 }
328 }
329
330 func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
331 if isHTTP(name) {
332 resp, err := http.Get(name)
333 exitErr(err)
334 if resp.StatusCode != http.StatusOK {
335 exitErr(fmt.Errorf("unexpected response status code %v, want 200 OK", resp.Status))
336 }
337 return resp.Body, resp.ContentLength, os.ModePerm
338 }
339 file, err := os.Open(name)
340 exitErr(err)
341 st, err := file.Stat()
342 exitErr(err)
343 return file, st.Size(), st.Mode()
344 }
345
346 func cleanFileName(s string) string {
347 if isHTTP(s) {
348 s = strings.TrimPrefix(s, "http://")
349 s = strings.TrimPrefix(s, "https://")
350 s = strings.Map(func(r rune) rune {
351 switch r {
352 case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
353 return '_'
354 }
355 if r < 20 {
356 return '_'
357 }
358 return r
359 }, s)
360 }
361 return s
362 }
363
364 func isHTTP(name string) bool {
365 return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
366 }
367
368 func exitErr(err error) {
369 if err != nil {
370 fmt.Fprintln(os.Stderr, "\nERROR:", err.Error())
371 os.Exit(2)
372 }
373 }
374
375 type rCounter struct {
376 n int64
377 in io.Reader
378 }
379
380 func (w *rCounter) Read(p []byte) (n int, err error) {
381 n, err = w.in.Read(p)
382 w.n += int64(n)
383 return n, err
384 }
385
386 func (w *rCounter) BytesRead() int64 {
387 return w.n
388 }
389
390 type rCountSeeker struct {
391 n int64
392 in io.ReadSeeker
393 }
394
395 func (w *rCountSeeker) Read(p []byte) (n int, err error) {
396 n, err = w.in.Read(p)
397 w.n += int64(n)
398 return n, err
399 }
400
401 func (w *rCountSeeker) Seek(offset int64, whence int) (int64, error) {
402 return w.in.Seek(offset, whence)
403 }
404
405 func (w *rCountSeeker) BytesRead() int64 {
406 return w.n
407 }
408
409
410 func toSize(size string) (int64, error) {
411 if len(size) == 0 {
412 return 0, nil
413 }
414 size = strings.ToUpper(strings.TrimSpace(size))
415 firstLetter := strings.IndexFunc(size, unicode.IsLetter)
416 if firstLetter == -1 {
417 firstLetter = len(size)
418 }
419
420 bytesString, multiple := size[:firstLetter], size[firstLetter:]
421 sz, err := strconv.ParseInt(bytesString, 10, 64)
422 if err != nil {
423 return 0, fmt.Errorf("unable to parse size: %v", err)
424 }
425
426 if sz < 0 {
427 return 0, errors.New("negative size given")
428 }
429 switch multiple {
430 case "T", "TB", "TIB":
431 return sz * 1 << 40, nil
432 case "G", "GB", "GIB":
433 return sz * 1 << 30, nil
434 case "M", "MB", "MIB":
435 return sz * 1 << 20, nil
436 case "K", "KB", "KIB":
437 return sz * 1 << 10, nil
438 case "B", "":
439 return sz, nil
440 default:
441 return 0, fmt.Errorf("unknown size suffix: %v", multiple)
442 }
443 }
444
View as plain text