...

Source file src/github.com/klauspost/compress/s2/cmd/s2d/main.go

Documentation: github.com/klauspost/compress/s2/cmd/s2d

     1  package main
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"flag"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"os"
    12  	"runtime"
    13  	"runtime/debug"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	"time"
    18  	"unicode"
    19  
    20  	"github.com/klauspost/compress/s2"
    21  	"github.com/klauspost/compress/s2/cmd/internal/filepathx"
    22  	"github.com/klauspost/compress/s2/cmd/internal/readahead"
    23  )
    24  
    25  var (
    26  	safe   = flag.Bool("safe", false, "Do not overwrite output files")
    27  	verify = flag.Bool("verify", false, "Verify files, but do not write output")
    28  	stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated")
    29  	remove = flag.Bool("rm", false, "Delete source file(s) after successful decompression")
    30  	quiet  = flag.Bool("q", false, "Don't write any output to terminal, except errors")
    31  	bench  = flag.Int("bench", 0, "Run benchmark n times. No output will be written")
    32  	tail   = flag.String("tail", "", "Return last of compressed file. Examples: 92, 64K, 256K, 1M, 4M. Requires Index")
    33  	offset = flag.String("offset", "", "Start at offset. Examples: 92, 64K, 256K, 1M, 4M. Requires Index")
    34  	help   = flag.Bool("help", false, "Display help")
    35  	out    = flag.String("o", "", "Write output to another file. Single input file only")
    36  	block  = flag.Bool("block", false, "Decompress as a single block. Will load content into memory.")
    37  	cpu    = flag.Int("cpu", runtime.NumCPU(), "Decompress streams using this amount of threads")
    38  
    39  	version = "(dev)"
    40  	date    = "(unknown)"
    41  )
    42  
    43  const (
    44  	s2Ext     = ".s2"
    45  	snappyExt = ".sz" // https://github.com/google/snappy/blob/main/framing_format.txt#L34
    46  )
    47  
    48  func main() {
    49  	flag.Parse()
    50  	r := s2.NewReader(nil)
    51  
    52  	// No args, use stdin/stdout
    53  	args := flag.Args()
    54  	if len(args) == 0 || *help {
    55  		_, _ = fmt.Fprintf(os.Stderr, "s2 decompress v%v, built at %v.\n\n", version, date)
    56  		_, _ = fmt.Fprintf(os.Stderr, "Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.\n"+
    57  			"Copyright (c) 2019+ Klaus Post. All rights reserved.\n\n")
    58  		_, _ = fmt.Fprintln(os.Stderr, `Usage: s2d [options] file1 file2
    59  
    60  Decompresses all files supplied as input. Input files must end with '`+s2Ext+`' or '`+snappyExt+`'.
    61  Output file names have the extension removed. By default output files will be overwritten.
    62  Use - as the only file name to read from stdin and write to stdout.
    63  
    64  Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
    65  Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
    66  
    67  File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
    68  Extensions on downloaded files are ignored. Only http response code 200 is accepted.
    69  
    70  Options:`)
    71  		flag.PrintDefaults()
    72  		os.Exit(0)
    73  	}
    74  	tailBytes, err := toSize(*tail)
    75  	exitErr(err)
    76  	offset, err := toSize(*offset)
    77  	exitErr(err)
    78  	if tailBytes > 0 && offset > 0 {
    79  		exitErr(errors.New("--offset and --tail cannot be used together"))
    80  	}
    81  	if len(args) == 1 && args[0] == "-" {
    82  		r.Reset(os.Stdin)
    83  		if *verify {
    84  			_, err := io.Copy(io.Discard, r)
    85  			exitErr(err)
    86  			return
    87  		}
    88  		if *out == "" {
    89  			_, err := io.Copy(os.Stdout, r)
    90  			exitErr(err)
    91  			return
    92  		}
    93  		dstFilename := *out
    94  		if *safe {
    95  			_, err := os.Stat(dstFilename)
    96  			if !os.IsNotExist(err) {
    97  				exitErr(errors.New("destination files exists"))
    98  			}
    99  		}
   100  		dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.ModePerm)
   101  		exitErr(err)
   102  		defer dstFile.Close()
   103  		bw := bufio.NewWriterSize(dstFile, 4<<20)
   104  		defer bw.Flush()
   105  		_, err = io.Copy(bw, r)
   106  		exitErr(err)
   107  		return
   108  	}
   109  	var files []string
   110  
   111  	for _, pattern := range args {
   112  		if isHTTP(pattern) {
   113  			files = append(files, pattern)
   114  			continue
   115  		}
   116  
   117  		found, err := filepathx.Glob(pattern)
   118  		exitErr(err)
   119  		if len(found) == 0 {
   120  			exitErr(fmt.Errorf("unable to find file %v", pattern))
   121  		}
   122  		files = append(files, found...)
   123  	}
   124  
   125  	*quiet = *quiet || *stdout
   126  
   127  	if *bench > 0 {
   128  		debug.SetGCPercent(10)
   129  		for _, filename := range files {
   130  			block := *block
   131  			dstFilename := cleanFileName(filename)
   132  			if strings.HasSuffix(filename, ".block") {
   133  				dstFilename = strings.TrimSuffix(dstFilename, ".block")
   134  				block = true
   135  			}
   136  			switch {
   137  			case strings.HasSuffix(dstFilename, s2Ext):
   138  			case strings.HasSuffix(dstFilename, snappyExt):
   139  			case strings.HasSuffix(dstFilename, ".snappy"):
   140  			default:
   141  				if !isHTTP(filename) {
   142  					fmt.Println("Skipping", filename)
   143  					continue
   144  				}
   145  			}
   146  
   147  			func() {
   148  				if !*quiet {
   149  					fmt.Print("Reading ", filename, "...")
   150  				}
   151  				// Input file.
   152  				file, size, _ := openFile(filename)
   153  				b := make([]byte, size)
   154  				_, err := io.ReadFull(file, b)
   155  				exitErr(err)
   156  				file.Close()
   157  
   158  				for i := 0; i < *bench; i++ {
   159  					if !*quiet {
   160  						fmt.Print("\nDecompressing...")
   161  					}
   162  					start := time.Now()
   163  					var output int64
   164  					if block {
   165  						dec, err := s2.Decode(nil, b)
   166  						exitErr(err)
   167  						output = int64(len(dec))
   168  					} else {
   169  						r.Reset(bytes.NewBuffer(b))
   170  						if *cpu > 1 {
   171  							output, err = r.DecodeConcurrent(io.Discard, *cpu)
   172  						} else {
   173  							output, err = io.Copy(io.Discard, r)
   174  						}
   175  						exitErr(err)
   176  					}
   177  					if !*quiet {
   178  						elapsed := time.Since(start)
   179  						ms := elapsed.Round(time.Millisecond)
   180  						mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
   181  						pct := float64(output) * 100 / float64(len(b))
   182  						fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec)
   183  					}
   184  				}
   185  				if !*quiet {
   186  					fmt.Println("")
   187  				}
   188  			}()
   189  		}
   190  		os.Exit(0)
   191  	}
   192  
   193  	if *out != "" && len(files) > 1 {
   194  		exitErr(errors.New("-out parameter can only be used with one input"))
   195  	}
   196  
   197  	for _, filename := range files {
   198  		dstFilename := cleanFileName(filename)
   199  		block := *block
   200  		if strings.HasSuffix(dstFilename, ".block") {
   201  			dstFilename = strings.TrimSuffix(dstFilename, ".block")
   202  			block = true
   203  		}
   204  		switch {
   205  		case *out != "":
   206  			dstFilename = *out
   207  		case strings.HasSuffix(dstFilename, s2Ext):
   208  			dstFilename = strings.TrimSuffix(dstFilename, s2Ext)
   209  		case strings.HasSuffix(dstFilename, snappyExt):
   210  			dstFilename = strings.TrimSuffix(dstFilename, snappyExt)
   211  		case strings.HasSuffix(dstFilename, ".snappy"):
   212  			dstFilename = strings.TrimSuffix(dstFilename, ".snappy")
   213  		default:
   214  			if !isHTTP(filename) {
   215  				fmt.Println("Skipping", filename)
   216  				continue
   217  			}
   218  		}
   219  		if *verify {
   220  			dstFilename = "(verify)"
   221  		}
   222  
   223  		func() {
   224  			var closeOnce sync.Once
   225  			if !*quiet {
   226  				fmt.Print("Decompressing ", filename, " -> ", dstFilename)
   227  			}
   228  			// Input file.
   229  			file, _, mode := openFile(filename)
   230  			defer closeOnce.Do(func() { file.Close() })
   231  			var rc interface {
   232  				io.Reader
   233  				BytesRead() int64
   234  			}
   235  			if tailBytes > 0 || offset > 0 {
   236  				rs, ok := file.(io.ReadSeeker)
   237  				if !ok && tailBytes > 0 {
   238  					exitErr(errors.New("cannot tail with non-seekable input"))
   239  				}
   240  				if ok {
   241  					rc = &rCountSeeker{in: rs}
   242  				} else {
   243  					rc = &rCounter{in: file}
   244  				}
   245  			} else {
   246  				rc = &rCounter{in: file}
   247  			}
   248  			var src io.Reader
   249  			if !block && tailBytes == 0 && offset == 0 {
   250  				ra, err := readahead.NewReaderSize(rc, 2, 4<<20)
   251  				exitErr(err)
   252  				defer ra.Close()
   253  				src = ra
   254  			} else {
   255  				src = rc
   256  			}
   257  			if *safe {
   258  				_, err := os.Stat(dstFilename)
   259  				if !os.IsNotExist(err) {
   260  					exitErr(errors.New("destination files exists"))
   261  				}
   262  			}
   263  			var out io.Writer
   264  			switch {
   265  			case *verify:
   266  				out = io.Discard
   267  			case *stdout:
   268  				out = os.Stdout
   269  			default:
   270  				dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode)
   271  				exitErr(err)
   272  				defer dstFile.Close()
   273  				out = dstFile
   274  				if !block {
   275  					bw := bufio.NewWriterSize(dstFile, 4<<20)
   276  					defer bw.Flush()
   277  					out = bw
   278  				}
   279  			}
   280  			var decoded io.Reader
   281  			start := time.Now()
   282  			if block {
   283  				all, err := io.ReadAll(src)
   284  				exitErr(err)
   285  				b, err := s2.Decode(nil, all)
   286  				exitErr(err)
   287  				decoded = bytes.NewReader(b)
   288  			} else {
   289  				r.Reset(src)
   290  				if tailBytes > 0 || offset > 0 {
   291  					rs, err := r.ReadSeeker(tailBytes > 0, nil)
   292  					exitErr(err)
   293  					if tailBytes > 0 {
   294  						_, err = rs.Seek(-tailBytes, io.SeekEnd)
   295  					} else {
   296  						_, err = rs.Seek(offset, io.SeekStart)
   297  					}
   298  					exitErr(err)
   299  				}
   300  				decoded = r
   301  			}
   302  			var err error
   303  			var output int64
   304  			if dec, ok := decoded.(*s2.Reader); ok && tailBytes == 0 && offset == 0 {
   305  				output, err = dec.DecodeConcurrent(out, *cpu)
   306  			} else {
   307  				output, err = io.Copy(out, decoded)
   308  			}
   309  			exitErr(err)
   310  			if !*quiet {
   311  				elapsed := time.Since(start)
   312  				mbPerSec := (float64(output) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
   313  				pct := float64(output) * 100 / float64(rc.BytesRead())
   314  				fmt.Printf(" %d -> %d [%.02f%%]; %.01fMB/s\n", rc.BytesRead(), output, pct, mbPerSec)
   315  			}
   316  			if *remove && !*verify {
   317  				closeOnce.Do(func() {
   318  					file.Close()
   319  					if !*quiet {
   320  						fmt.Println("Removing", filename)
   321  					}
   322  					err := os.Remove(filename)
   323  					exitErr(err)
   324  				})
   325  			}
   326  		}()
   327  	}
   328  }
   329  
   330  func openFile(name string) (rc io.ReadCloser, size int64, mode os.FileMode) {
   331  	if isHTTP(name) {
   332  		resp, err := http.Get(name)
   333  		exitErr(err)
   334  		if resp.StatusCode != http.StatusOK {
   335  			exitErr(fmt.Errorf("unexpected response status code %v, want 200 OK", resp.Status))
   336  		}
   337  		return resp.Body, resp.ContentLength, os.ModePerm
   338  	}
   339  	file, err := os.Open(name)
   340  	exitErr(err)
   341  	st, err := file.Stat()
   342  	exitErr(err)
   343  	return file, st.Size(), st.Mode()
   344  }
   345  
   346  func cleanFileName(s string) string {
   347  	if isHTTP(s) {
   348  		s = strings.TrimPrefix(s, "http://")
   349  		s = strings.TrimPrefix(s, "https://")
   350  		s = strings.Map(func(r rune) rune {
   351  			switch r {
   352  			case '\\', '/', '*', '?', ':', '|', '<', '>', '~':
   353  				return '_'
   354  			}
   355  			if r < 20 {
   356  				return '_'
   357  			}
   358  			return r
   359  		}, s)
   360  	}
   361  	return s
   362  }
   363  
   364  func isHTTP(name string) bool {
   365  	return strings.HasPrefix(name, "http://") || strings.HasPrefix(name, "https://")
   366  }
   367  
   368  func exitErr(err error) {
   369  	if err != nil {
   370  		fmt.Fprintln(os.Stderr, "\nERROR:", err.Error())
   371  		os.Exit(2)
   372  	}
   373  }
   374  
   375  type rCounter struct {
   376  	n  int64
   377  	in io.Reader
   378  }
   379  
   380  func (w *rCounter) Read(p []byte) (n int, err error) {
   381  	n, err = w.in.Read(p)
   382  	w.n += int64(n)
   383  	return n, err
   384  }
   385  
   386  func (w *rCounter) BytesRead() int64 {
   387  	return w.n
   388  }
   389  
   390  type rCountSeeker struct {
   391  	n  int64
   392  	in io.ReadSeeker
   393  }
   394  
   395  func (w *rCountSeeker) Read(p []byte) (n int, err error) {
   396  	n, err = w.in.Read(p)
   397  	w.n += int64(n)
   398  	return n, err
   399  }
   400  
   401  func (w *rCountSeeker) Seek(offset int64, whence int) (int64, error) {
   402  	return w.in.Seek(offset, whence)
   403  }
   404  
   405  func (w *rCountSeeker) BytesRead() int64 {
   406  	return w.n
   407  }
   408  
   409  // toSize converts a size indication to bytes.
   410  func toSize(size string) (int64, error) {
   411  	if len(size) == 0 {
   412  		return 0, nil
   413  	}
   414  	size = strings.ToUpper(strings.TrimSpace(size))
   415  	firstLetter := strings.IndexFunc(size, unicode.IsLetter)
   416  	if firstLetter == -1 {
   417  		firstLetter = len(size)
   418  	}
   419  
   420  	bytesString, multiple := size[:firstLetter], size[firstLetter:]
   421  	sz, err := strconv.ParseInt(bytesString, 10, 64)
   422  	if err != nil {
   423  		return 0, fmt.Errorf("unable to parse size: %v", err)
   424  	}
   425  
   426  	if sz < 0 {
   427  		return 0, errors.New("negative size given")
   428  	}
   429  	switch multiple {
   430  	case "T", "TB", "TIB":
   431  		return sz * 1 << 40, nil
   432  	case "G", "GB", "GIB":
   433  		return sz * 1 << 30, nil
   434  	case "M", "MB", "MIB":
   435  		return sz * 1 << 20, nil
   436  	case "K", "KB", "KIB":
   437  		return sz * 1 << 10, nil
   438  	case "B", "":
   439  		return sz, nil
   440  	default:
   441  		return 0, fmt.Errorf("unknown size suffix: %v", multiple)
   442  	}
   443  }
   444  

View as plain text