...

Source file src/github.com/Microsoft/go-winio/backuptar/tar.go

Documentation: github.com/Microsoft/go-winio/backuptar

     1  //go:build windows
     2  // +build windows
     3  
     4  package backuptar
     5  
     6  import (
     7  	"archive/tar"
     8  	"encoding/base64"
     9  	"fmt"
    10  	"io"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/Microsoft/go-winio"
    18  	"golang.org/x/sys/windows"
    19  )
    20  
    21  //nolint:deadcode,varcheck // keep unused constants for potential future use
    22  const (
    23  	cISUID  = 0004000 // Set uid
    24  	cISGID  = 0002000 // Set gid
    25  	cISVTX  = 0001000 // Save text (sticky bit)
    26  	cISDIR  = 0040000 // Directory
    27  	cISFIFO = 0010000 // FIFO
    28  	cISREG  = 0100000 // Regular file
    29  	cISLNK  = 0120000 // Symbolic link
    30  	cISBLK  = 0060000 // Block special file
    31  	cISCHR  = 0020000 // Character special file
    32  	cISSOCK = 0140000 // Socket
    33  )
    34  
    35  const (
    36  	hdrFileAttributes        = "MSWINDOWS.fileattr"
    37  	hdrSecurityDescriptor    = "MSWINDOWS.sd"
    38  	hdrRawSecurityDescriptor = "MSWINDOWS.rawsd"
    39  	hdrMountPoint            = "MSWINDOWS.mountpoint"
    40  	hdrEaPrefix              = "MSWINDOWS.xattr."
    41  
    42  	hdrCreationTime = "LIBARCHIVE.creationtime"
    43  )
    44  
    45  // zeroReader is an io.Reader that always returns 0s.
    46  type zeroReader struct{}
    47  
    48  func (zeroReader) Read(b []byte) (int, error) {
    49  	for i := range b {
    50  		b[i] = 0
    51  	}
    52  	return len(b), nil
    53  }
    54  
    55  func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error {
    56  	curOffset := int64(0)
    57  	for {
    58  		bhdr, err := br.Next()
    59  		if err == io.EOF { //nolint:errorlint
    60  			err = io.ErrUnexpectedEOF
    61  		}
    62  		if err != nil {
    63  			return err
    64  		}
    65  		if bhdr.Id != winio.BackupSparseBlock {
    66  			return fmt.Errorf("unexpected stream %d", bhdr.Id)
    67  		}
    68  
    69  		// We can't seek backwards, since we have already written that data to the tar.Writer.
    70  		if bhdr.Offset < curOffset {
    71  			return fmt.Errorf("cannot seek back from %d to %d", curOffset, bhdr.Offset)
    72  		}
    73  		// archive/tar does not support writing sparse files
    74  		// so just write zeroes to catch up to the current offset.
    75  		if _, err = io.CopyN(t, zeroReader{}, bhdr.Offset-curOffset); err != nil {
    76  			return fmt.Errorf("seek to offset %d: %w", bhdr.Offset, err)
    77  		}
    78  		if bhdr.Size == 0 {
    79  			// A sparse block with size = 0 is used to mark the end of the sparse blocks.
    80  			break
    81  		}
    82  		n, err := io.Copy(t, br)
    83  		if err != nil {
    84  			return err
    85  		}
    86  		if n != bhdr.Size {
    87  			return fmt.Errorf("copied %d bytes instead of %d at offset %d", n, bhdr.Size, bhdr.Offset)
    88  		}
    89  		curOffset = bhdr.Offset + n
    90  	}
    91  	return nil
    92  }
    93  
    94  // BasicInfoHeader creates a tar header from basic file information.
    95  func BasicInfoHeader(name string, size int64, fileInfo *winio.FileBasicInfo) *tar.Header {
    96  	hdr := &tar.Header{
    97  		Format:     tar.FormatPAX,
    98  		Name:       filepath.ToSlash(name),
    99  		Size:       size,
   100  		Typeflag:   tar.TypeReg,
   101  		ModTime:    time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()),
   102  		ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()),
   103  		AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()),
   104  		PAXRecords: make(map[string]string),
   105  	}
   106  	hdr.PAXRecords[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes)
   107  	hdr.PAXRecords[hdrCreationTime] = formatPAXTime(time.Unix(0, fileInfo.CreationTime.Nanoseconds()))
   108  
   109  	if (fileInfo.FileAttributes & syscall.FILE_ATTRIBUTE_DIRECTORY) != 0 {
   110  		hdr.Mode |= cISDIR
   111  		hdr.Size = 0
   112  		hdr.Typeflag = tar.TypeDir
   113  	}
   114  	return hdr
   115  }
   116  
   117  // SecurityDescriptorFromTarHeader reads the SDDL associated with the header of the current file
   118  // from the tar header and returns the security descriptor into a byte slice.
   119  func SecurityDescriptorFromTarHeader(hdr *tar.Header) ([]byte, error) {
   120  	if sdraw, ok := hdr.PAXRecords[hdrRawSecurityDescriptor]; ok {
   121  		sd, err := base64.StdEncoding.DecodeString(sdraw)
   122  		if err != nil {
   123  			// Not returning sd as-is in the error-case, as base64.DecodeString
   124  			// may return partially decoded data (not nil or empty slice) in case
   125  			// of a failure: https://github.com/golang/go/blob/go1.17.7/src/encoding/base64/base64.go#L382-L387
   126  			return nil, err
   127  		}
   128  		return sd, nil
   129  	}
   130  	// Maintaining old SDDL-based behavior for backward compatibility. All new
   131  	// tar headers written by this library will have raw binary for the security
   132  	// descriptor.
   133  	if sddl, ok := hdr.PAXRecords[hdrSecurityDescriptor]; ok {
   134  		return winio.SddlToSecurityDescriptor(sddl)
   135  	}
   136  	return nil, nil
   137  }
   138  
   139  // ExtendedAttributesFromTarHeader reads the EAs associated with the header of the
   140  // current file from the tar header and returns it as a byte slice.
   141  func ExtendedAttributesFromTarHeader(hdr *tar.Header) ([]byte, error) {
   142  	var eas []winio.ExtendedAttribute //nolint:prealloc // len(eas) <= len(hdr.PAXRecords); prealloc is wasteful
   143  	for k, v := range hdr.PAXRecords {
   144  		if !strings.HasPrefix(k, hdrEaPrefix) {
   145  			continue
   146  		}
   147  		data, err := base64.StdEncoding.DecodeString(v)
   148  		if err != nil {
   149  			return nil, err
   150  		}
   151  		eas = append(eas, winio.ExtendedAttribute{
   152  			Name:  k[len(hdrEaPrefix):],
   153  			Value: data,
   154  		})
   155  	}
   156  	var eaData []byte
   157  	var err error
   158  	if len(eas) != 0 {
   159  		eaData, err = winio.EncodeExtendedAttributes(eas)
   160  		if err != nil {
   161  			return nil, err
   162  		}
   163  	}
   164  	return eaData, nil
   165  }
   166  
   167  // EncodeReparsePointFromTarHeader reads the ReparsePoint structure from the tar header
   168  // and encodes it into a byte slice. The file for which this function is called must be a
   169  // symlink.
   170  func EncodeReparsePointFromTarHeader(hdr *tar.Header) []byte {
   171  	_, isMountPoint := hdr.PAXRecords[hdrMountPoint]
   172  	rp := winio.ReparsePoint{
   173  		Target:       filepath.FromSlash(hdr.Linkname),
   174  		IsMountPoint: isMountPoint,
   175  	}
   176  	return winio.EncodeReparsePoint(&rp)
   177  }
   178  
   179  // WriteTarFileFromBackupStream writes a file to a tar writer using data from a Win32 backup stream.
   180  //
   181  // This encodes Win32 metadata as tar pax vendor extensions starting with MSWINDOWS.
   182  //
   183  // The additional Win32 metadata is:
   184  //
   185  //   - MSWINDOWS.fileattr: The Win32 file attributes, as a decimal value
   186  //   - MSWINDOWS.rawsd: The Win32 security descriptor, in raw binary format
   187  //   - MSWINDOWS.mountpoint: If present, this is a mount point and not a symlink, even though the type is '2' (symlink)
   188  func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size int64, fileInfo *winio.FileBasicInfo) error {
   189  	name = filepath.ToSlash(name)
   190  	hdr := BasicInfoHeader(name, size, fileInfo)
   191  
   192  	// If r can be seeked, then this function is two-pass: pass 1 collects the
   193  	// tar header data, and pass 2 copies the data stream. If r cannot be
   194  	// seeked, then some header data (in particular EAs) will be silently lost.
   195  	var (
   196  		restartPos int64
   197  		err        error
   198  	)
   199  	sr, readTwice := r.(io.Seeker)
   200  	if readTwice {
   201  		if restartPos, err = sr.Seek(0, io.SeekCurrent); err != nil {
   202  			readTwice = false
   203  		}
   204  	}
   205  
   206  	br := winio.NewBackupStreamReader(r)
   207  	var dataHdr *winio.BackupHeader
   208  	for dataHdr == nil {
   209  		bhdr, err := br.Next()
   210  		if err == io.EOF { //nolint:errorlint
   211  			break
   212  		}
   213  		if err != nil {
   214  			return err
   215  		}
   216  		switch bhdr.Id {
   217  		case winio.BackupData:
   218  			hdr.Mode |= cISREG
   219  			if !readTwice {
   220  				dataHdr = bhdr
   221  			}
   222  		case winio.BackupSecurity:
   223  			sd, err := io.ReadAll(br)
   224  			if err != nil {
   225  				return err
   226  			}
   227  			hdr.PAXRecords[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd)
   228  
   229  		case winio.BackupReparseData:
   230  			hdr.Mode |= cISLNK
   231  			hdr.Typeflag = tar.TypeSymlink
   232  			reparseBuffer, _ := io.ReadAll(br)
   233  			rp, err := winio.DecodeReparsePoint(reparseBuffer)
   234  			if err != nil {
   235  				return err
   236  			}
   237  			if rp.IsMountPoint {
   238  				hdr.PAXRecords[hdrMountPoint] = "1"
   239  			}
   240  			hdr.Linkname = rp.Target
   241  
   242  		case winio.BackupEaData:
   243  			eab, err := io.ReadAll(br)
   244  			if err != nil {
   245  				return err
   246  			}
   247  			eas, err := winio.DecodeExtendedAttributes(eab)
   248  			if err != nil {
   249  				return err
   250  			}
   251  			for _, ea := range eas {
   252  				// Use base64 encoding for the binary value. Note that there
   253  				// is no way to encode the EA's flags, since their use doesn't
   254  				// make any sense for persisted EAs.
   255  				hdr.PAXRecords[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value)
   256  			}
   257  
   258  		case winio.BackupAlternateData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
   259  			// ignore these streams
   260  		default:
   261  			return fmt.Errorf("%s: unknown stream ID %d", name, bhdr.Id)
   262  		}
   263  	}
   264  
   265  	err = t.WriteHeader(hdr)
   266  	if err != nil {
   267  		return err
   268  	}
   269  
   270  	if readTwice {
   271  		// Get back to the data stream.
   272  		if _, err = sr.Seek(restartPos, io.SeekStart); err != nil {
   273  			return err
   274  		}
   275  		for dataHdr == nil {
   276  			bhdr, err := br.Next()
   277  			if err == io.EOF { //nolint:errorlint
   278  				break
   279  			}
   280  			if err != nil {
   281  				return err
   282  			}
   283  			if bhdr.Id == winio.BackupData {
   284  				dataHdr = bhdr
   285  			}
   286  		}
   287  	}
   288  
   289  	// The logic for copying file contents is fairly complicated due to the need for handling sparse files,
   290  	// and the weird ways they are represented by BackupRead. A normal file will always either have a data stream
   291  	// with size and content, or no data stream at all (if empty). However, for a sparse file, the content can also
   292  	// be represented using a series of sparse block streams following the data stream. Additionally, the way sparse
   293  	// files are handled by BackupRead has changed in the OS recently. The specifics of the representation are described
   294  	// in the list at the bottom of this block comment.
   295  	//
   296  	// Sparse files can be represented in four different ways, based on the specifics of the file.
   297  	// - Size = 0:
   298  	//     Previously: BackupRead yields no data stream and no sparse block streams.
   299  	//     Recently: BackupRead yields a data stream with size = 0. There are no following sparse block streams.
   300  	// - Size > 0, no allocated ranges:
   301  	//     BackupRead yields a data stream with size = 0. Following is a single sparse block stream with
   302  	//     size = 0 and offset = <file size>.
   303  	// - Size > 0, one allocated range:
   304  	//     BackupRead yields a data stream with size = <file size> containing the file contents. There are no
   305  	//     sparse block streams. This is the case if you take a normal file with contents and simply set the
   306  	//     sparse flag on it.
   307  	// - Size > 0, multiple allocated ranges:
   308  	//     BackupRead yields a data stream with size = 0. Following are sparse block streams for each allocated
   309  	//     range of the file containing the range contents. Finally there is a sparse block stream with
   310  	//     size = 0 and offset = <file size>.
   311  
   312  	if dataHdr != nil { //nolint:nestif // todo: reduce nesting complexity
   313  		// A data stream was found. Copy the data.
   314  		// We assume that we will either have a data stream size > 0 XOR have sparse block streams.
   315  		if dataHdr.Size > 0 || (dataHdr.Attributes&winio.StreamSparseAttributes) == 0 {
   316  			if size != dataHdr.Size {
   317  				return fmt.Errorf("%s: mismatch between file size %d and header size %d", name, size, dataHdr.Size)
   318  			}
   319  			if _, err = io.Copy(t, br); err != nil {
   320  				return fmt.Errorf("%s: copying contents from data stream: %w", name, err)
   321  			}
   322  		} else if size > 0 {
   323  			// As of a recent OS change, BackupRead now returns a data stream for empty sparse files.
   324  			// These files have no sparse block streams, so skip the copySparse call if file size = 0.
   325  			if err = copySparse(t, br); err != nil {
   326  				return fmt.Errorf("%s: copying contents from sparse block stream: %w", name, err)
   327  			}
   328  		}
   329  	}
   330  
   331  	// Look for streams after the data stream. The only ones we handle are alternate data streams.
   332  	// Other streams may have metadata that could be serialized, but the tar header has already
   333  	// been written. In practice, this means that we don't get EA or TXF metadata.
   334  	for {
   335  		bhdr, err := br.Next()
   336  		if err == io.EOF { //nolint:errorlint
   337  			break
   338  		}
   339  		if err != nil {
   340  			return err
   341  		}
   342  		switch bhdr.Id {
   343  		case winio.BackupAlternateData:
   344  			if (bhdr.Attributes & winio.StreamSparseAttributes) != 0 {
   345  				// Unsupported for now, since the size of the alternate stream is not present
   346  				// in the backup stream until after the data has been read.
   347  				return fmt.Errorf("%s: tar of sparse alternate data streams is unsupported", name)
   348  			}
   349  			altName := strings.TrimSuffix(bhdr.Name, ":$DATA")
   350  			hdr = &tar.Header{
   351  				Format:     hdr.Format,
   352  				Name:       name + altName,
   353  				Mode:       hdr.Mode,
   354  				Typeflag:   tar.TypeReg,
   355  				Size:       bhdr.Size,
   356  				ModTime:    hdr.ModTime,
   357  				AccessTime: hdr.AccessTime,
   358  				ChangeTime: hdr.ChangeTime,
   359  			}
   360  			err = t.WriteHeader(hdr)
   361  			if err != nil {
   362  				return err
   363  			}
   364  			_, err = io.Copy(t, br)
   365  			if err != nil {
   366  				return err
   367  			}
   368  		case winio.BackupEaData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
   369  			// ignore these streams
   370  		default:
   371  			return fmt.Errorf("%s: unknown stream ID %d after data", name, bhdr.Id)
   372  		}
   373  	}
   374  	return nil
   375  }
   376  
   377  // FileInfoFromHeader retrieves basic Win32 file information from a tar header, using the additional metadata written by
   378  // WriteTarFileFromBackupStream.
   379  func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *winio.FileBasicInfo, err error) {
   380  	name = hdr.Name
   381  	if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
   382  		size = hdr.Size
   383  	}
   384  	fileInfo = &winio.FileBasicInfo{
   385  		LastAccessTime: windows.NsecToFiletime(hdr.AccessTime.UnixNano()),
   386  		LastWriteTime:  windows.NsecToFiletime(hdr.ModTime.UnixNano()),
   387  		ChangeTime:     windows.NsecToFiletime(hdr.ChangeTime.UnixNano()),
   388  		// Default to ModTime, we'll pull hdrCreationTime below if present
   389  		CreationTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
   390  	}
   391  	if attrStr, ok := hdr.PAXRecords[hdrFileAttributes]; ok {
   392  		attr, err := strconv.ParseUint(attrStr, 10, 32)
   393  		if err != nil {
   394  			return "", 0, nil, err
   395  		}
   396  		fileInfo.FileAttributes = uint32(attr)
   397  	} else {
   398  		if hdr.Typeflag == tar.TypeDir {
   399  			fileInfo.FileAttributes |= syscall.FILE_ATTRIBUTE_DIRECTORY
   400  		}
   401  	}
   402  	if creationTimeStr, ok := hdr.PAXRecords[hdrCreationTime]; ok {
   403  		creationTime, err := parsePAXTime(creationTimeStr)
   404  		if err != nil {
   405  			return "", 0, nil, err
   406  		}
   407  		fileInfo.CreationTime = windows.NsecToFiletime(creationTime.UnixNano())
   408  	}
   409  	return name, size, fileInfo, err
   410  }
   411  
   412  // WriteBackupStreamFromTarFile writes a Win32 backup stream from the current tar file. Since this function may process multiple
   413  // tar file entries in order to collect all the alternate data streams for the file, it returns the next
   414  // tar file that was not processed, or io.EOF is there are no more.
   415  func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) (*tar.Header, error) {
   416  	bw := winio.NewBackupStreamWriter(w)
   417  
   418  	sd, err := SecurityDescriptorFromTarHeader(hdr)
   419  	if err != nil {
   420  		return nil, err
   421  	}
   422  	if len(sd) != 0 {
   423  		bhdr := winio.BackupHeader{
   424  			Id:   winio.BackupSecurity,
   425  			Size: int64(len(sd)),
   426  		}
   427  		err := bw.WriteHeader(&bhdr)
   428  		if err != nil {
   429  			return nil, err
   430  		}
   431  		_, err = bw.Write(sd)
   432  		if err != nil {
   433  			return nil, err
   434  		}
   435  	}
   436  
   437  	eadata, err := ExtendedAttributesFromTarHeader(hdr)
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  	if len(eadata) != 0 {
   442  		bhdr := winio.BackupHeader{
   443  			Id:   winio.BackupEaData,
   444  			Size: int64(len(eadata)),
   445  		}
   446  		err = bw.WriteHeader(&bhdr)
   447  		if err != nil {
   448  			return nil, err
   449  		}
   450  		_, err = bw.Write(eadata)
   451  		if err != nil {
   452  			return nil, err
   453  		}
   454  	}
   455  
   456  	if hdr.Typeflag == tar.TypeSymlink {
   457  		reparse := EncodeReparsePointFromTarHeader(hdr)
   458  		bhdr := winio.BackupHeader{
   459  			Id:   winio.BackupReparseData,
   460  			Size: int64(len(reparse)),
   461  		}
   462  		err := bw.WriteHeader(&bhdr)
   463  		if err != nil {
   464  			return nil, err
   465  		}
   466  		_, err = bw.Write(reparse)
   467  		if err != nil {
   468  			return nil, err
   469  		}
   470  	}
   471  
   472  	if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
   473  		bhdr := winio.BackupHeader{
   474  			Id:   winio.BackupData,
   475  			Size: hdr.Size,
   476  		}
   477  		err := bw.WriteHeader(&bhdr)
   478  		if err != nil {
   479  			return nil, err
   480  		}
   481  		_, err = io.Copy(bw, t)
   482  		if err != nil {
   483  			return nil, err
   484  		}
   485  	}
   486  	// Copy all the alternate data streams and return the next non-ADS header.
   487  	for {
   488  		ahdr, err := t.Next()
   489  		if err != nil {
   490  			return nil, err
   491  		}
   492  		if ahdr.Typeflag != tar.TypeReg || !strings.HasPrefix(ahdr.Name, hdr.Name+":") {
   493  			return ahdr, nil
   494  		}
   495  		bhdr := winio.BackupHeader{
   496  			Id:   winio.BackupAlternateData,
   497  			Size: ahdr.Size,
   498  			Name: ahdr.Name[len(hdr.Name):] + ":$DATA",
   499  		}
   500  		err = bw.WriteHeader(&bhdr)
   501  		if err != nil {
   502  			return nil, err
   503  		}
   504  		_, err = io.Copy(bw, t)
   505  		if err != nil {
   506  			return nil, err
   507  		}
   508  	}
   509  }
   510  

View as plain text