...

Source file src/github.com/vbatts/tar-split/tar/asm/disassemble.go

Documentation: github.com/vbatts/tar-split/tar/asm

     1  package asm
     2  
     3  import (
     4  	"io"
     5  
     6  	"github.com/vbatts/tar-split/archive/tar"
     7  	"github.com/vbatts/tar-split/tar/storage"
     8  )
     9  
    10  // NewInputTarStream wraps the Reader stream of a tar archive and provides a
    11  // Reader stream of the same.
    12  //
    13  // In the middle it will pack the segments and file metadata to storage.Packer
    14  // `p`.
    15  //
    16  // The the storage.FilePutter is where payload of files in the stream are
    17  // stashed. If this stashing is not needed, you can provide a nil
    18  // storage.FilePutter. Since the checksumming is still needed, then a default
    19  // of NewDiscardFilePutter will be used internally
    20  func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
    21  	// What to do here... folks will want their own access to the Reader that is
    22  	// their tar archive stream, but we'll need that same stream to use our
    23  	// forked 'archive/tar'.
    24  	// Perhaps do an io.TeeReader that hands back an io.Reader for them to read
    25  	// from, and we'll MITM the stream to store metadata.
    26  	// We'll need a storage.FilePutter too ...
    27  
    28  	// Another concern, whether to do any storage.FilePutter operations, such that we
    29  	// don't extract any amount of the archive. But then again, we're not making
    30  	// files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
    31  	// Perhaps we have a DiscardFilePutter that is a bit bucket.
    32  
    33  	// we'll return the pipe reader, since TeeReader does not buffer and will
    34  	// only read what the outputRdr Read's. Since Tar archives have padding on
    35  	// the end, we want to be the one reading the padding, even if the user's
    36  	// `archive/tar` doesn't care.
    37  	pR, pW := io.Pipe()
    38  	outputRdr := io.TeeReader(r, pW)
    39  
    40  	// we need a putter that will generate the crc64 sums of file payloads
    41  	if fp == nil {
    42  		fp = storage.NewDiscardFilePutter()
    43  	}
    44  
    45  	go func() {
    46  		tr := tar.NewReader(outputRdr)
    47  		tr.RawAccounting = true
    48  		for {
    49  			hdr, err := tr.Next()
    50  			if err != nil {
    51  				if err != io.EOF {
    52  					pW.CloseWithError(err)
    53  					return
    54  				}
    55  				// even when an EOF is reached, there is often 1024 null bytes on
    56  				// the end of an archive. Collect them too.
    57  				if b := tr.RawBytes(); len(b) > 0 {
    58  					_, err := p.AddEntry(storage.Entry{
    59  						Type:    storage.SegmentType,
    60  						Payload: b,
    61  					})
    62  					if err != nil {
    63  						pW.CloseWithError(err)
    64  						return
    65  					}
    66  				}
    67  				break // not return. We need the end of the reader.
    68  			}
    69  			if hdr == nil {
    70  				break // not return. We need the end of the reader.
    71  			}
    72  
    73  			if b := tr.RawBytes(); len(b) > 0 {
    74  				_, err := p.AddEntry(storage.Entry{
    75  					Type:    storage.SegmentType,
    76  					Payload: b,
    77  				})
    78  				if err != nil {
    79  					pW.CloseWithError(err)
    80  					return
    81  				}
    82  			}
    83  
    84  			var csum []byte
    85  			if hdr.Size > 0 {
    86  				var err error
    87  				_, csum, err = fp.Put(hdr.Name, tr)
    88  				if err != nil {
    89  					pW.CloseWithError(err)
    90  					return
    91  				}
    92  			}
    93  
    94  			entry := storage.Entry{
    95  				Type:    storage.FileType,
    96  				Size:    hdr.Size,
    97  				Payload: csum,
    98  			}
    99  			// For proper marshalling of non-utf8 characters
   100  			entry.SetName(hdr.Name)
   101  
   102  			// File entries added, regardless of size
   103  			_, err = p.AddEntry(entry)
   104  			if err != nil {
   105  				pW.CloseWithError(err)
   106  				return
   107  			}
   108  
   109  			if b := tr.RawBytes(); len(b) > 0 {
   110  				_, err = p.AddEntry(storage.Entry{
   111  					Type:    storage.SegmentType,
   112  					Payload: b,
   113  				})
   114  				if err != nil {
   115  					pW.CloseWithError(err)
   116  					return
   117  				}
   118  			}
   119  		}
   120  
   121  		// It is allowable, and not uncommon that there is further padding on
   122  		// the end of an archive, apart from the expected 1024 null bytes. We
   123  		// do this in chunks rather than in one go to avoid cases where a
   124  		// maliciously crafted tar file tries to trick us into reading many GBs
   125  		// into memory.
   126  		const paddingChunkSize = 1024 * 1024
   127  		var paddingChunk [paddingChunkSize]byte
   128  		for {
   129  			var isEOF bool
   130  			n, err := outputRdr.Read(paddingChunk[:])
   131  			if err != nil {
   132  				if err != io.EOF {
   133  					pW.CloseWithError(err)
   134  					return
   135  				}
   136  				isEOF = true
   137  			}
   138  			if n != 0 {
   139  				_, err = p.AddEntry(storage.Entry{
   140  					Type:    storage.SegmentType,
   141  					Payload: paddingChunk[:n],
   142  				})
   143  				if err != nil {
   144  					pW.CloseWithError(err)
   145  					return
   146  				}
   147  			}
   148  			if isEOF {
   149  				break
   150  			}
   151  		}
   152  		pW.Close()
   153  	}()
   154  
   155  	return pR, nil
   156  }
   157  

View as plain text