...

Source file src/github.com/Microsoft/go-winio/wim/wim.go

Documentation: github.com/Microsoft/go-winio/wim

     1  //go:build windows || linux
     2  // +build windows linux
     3  
     4  // Package wim implements a WIM file parser.
     5  //
     6  // WIM files are used to distribute Windows file system and container images.
     7  // They are documented at https://msdn.microsoft.com/en-us/library/windows/desktop/dd861280.aspx.
     8  package wim
     9  
    10  import (
    11  	"bytes"
    12  	"crypto/sha1" //nolint:gosec // not used for secure application
    13  	"encoding/binary"
    14  	"encoding/xml"
    15  	"errors"
    16  	"fmt"
    17  	"io"
    18  	"strconv"
    19  	"sync"
    20  	"time"
    21  	"unicode/utf16"
    22  )
    23  
    24  // File attribute constants from Windows.
    25  //
    26  //nolint:revive // var-naming: ALL_CAPS
    27  const (
    28  	FILE_ATTRIBUTE_READONLY            = 0x00000001
    29  	FILE_ATTRIBUTE_HIDDEN              = 0x00000002
    30  	FILE_ATTRIBUTE_SYSTEM              = 0x00000004
    31  	FILE_ATTRIBUTE_DIRECTORY           = 0x00000010
    32  	FILE_ATTRIBUTE_ARCHIVE             = 0x00000020
    33  	FILE_ATTRIBUTE_DEVICE              = 0x00000040
    34  	FILE_ATTRIBUTE_NORMAL              = 0x00000080
    35  	FILE_ATTRIBUTE_TEMPORARY           = 0x00000100
    36  	FILE_ATTRIBUTE_SPARSE_FILE         = 0x00000200
    37  	FILE_ATTRIBUTE_REPARSE_POINT       = 0x00000400
    38  	FILE_ATTRIBUTE_COMPRESSED          = 0x00000800
    39  	FILE_ATTRIBUTE_OFFLINE             = 0x00001000
    40  	FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 0x00002000
    41  	FILE_ATTRIBUTE_ENCRYPTED           = 0x00004000
    42  	FILE_ATTRIBUTE_INTEGRITY_STREAM    = 0x00008000
    43  	FILE_ATTRIBUTE_VIRTUAL             = 0x00010000
    44  	FILE_ATTRIBUTE_NO_SCRUB_DATA       = 0x00020000
    45  	FILE_ATTRIBUTE_EA                  = 0x00040000
    46  )
    47  
    48  // Windows processor architectures.
    49  //
    50  //nolint:revive // var-naming: ALL_CAPS
    51  const (
    52  	PROCESSOR_ARCHITECTURE_INTEL         = 0
    53  	PROCESSOR_ARCHITECTURE_MIPS          = 1
    54  	PROCESSOR_ARCHITECTURE_ALPHA         = 2
    55  	PROCESSOR_ARCHITECTURE_PPC           = 3
    56  	PROCESSOR_ARCHITECTURE_SHX           = 4
    57  	PROCESSOR_ARCHITECTURE_ARM           = 5
    58  	PROCESSOR_ARCHITECTURE_IA64          = 6
    59  	PROCESSOR_ARCHITECTURE_ALPHA64       = 7
    60  	PROCESSOR_ARCHITECTURE_MSIL          = 8
    61  	PROCESSOR_ARCHITECTURE_AMD64         = 9
    62  	PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 = 10
    63  	PROCESSOR_ARCHITECTURE_NEUTRAL       = 11
    64  	PROCESSOR_ARCHITECTURE_ARM64         = 12
    65  )
    66  
    67  var wimImageTag = [...]byte{'M', 'S', 'W', 'I', 'M', 0, 0, 0}
    68  
    69  // todo: replace this with pkg/guid.GUID (and add tests to make sure nothing breaks)
    70  
    71  type guid struct {
    72  	Data1 uint32
    73  	Data2 uint16
    74  	Data3 uint16
    75  	Data4 [8]byte
    76  }
    77  
    78  func (g guid) String() string {
    79  	return fmt.Sprintf("%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
    80  		g.Data1,
    81  		g.Data2,
    82  		g.Data3,
    83  		g.Data4[0],
    84  		g.Data4[1],
    85  		g.Data4[2],
    86  		g.Data4[3],
    87  		g.Data4[4],
    88  		g.Data4[5],
    89  		g.Data4[6],
    90  		g.Data4[7])
    91  }
    92  
    93  type resourceDescriptor struct {
    94  	FlagsAndCompressedSize uint64
    95  	Offset                 int64
    96  	OriginalSize           int64
    97  }
    98  
    99  type resFlag byte
   100  
   101  //nolint:deadcode,varcheck // need unused variables for iota to work
   102  const (
   103  	resFlagFree resFlag = 1 << iota
   104  	resFlagMetadata
   105  	resFlagCompressed
   106  	resFlagSpanned
   107  )
   108  
   109  const validate = false
   110  
   111  const supportedResFlags = resFlagMetadata | resFlagCompressed
   112  
   113  func (r *resourceDescriptor) Flags() resFlag {
   114  	return resFlag(r.FlagsAndCompressedSize >> 56)
   115  }
   116  
   117  func (r *resourceDescriptor) CompressedSize() int64 {
   118  	return int64(r.FlagsAndCompressedSize & 0xffffffffffffff)
   119  }
   120  
   121  func (r *resourceDescriptor) String() string {
   122  	s := fmt.Sprintf("%d bytes at %d", r.CompressedSize(), r.Offset)
   123  	if r.Flags()&4 != 0 {
   124  		s += fmt.Sprintf(" (uncompresses to %d)", r.OriginalSize)
   125  	}
   126  	return s
   127  }
   128  
   129  // SHA1Hash contains the SHA1 hash of a file or stream.
   130  type SHA1Hash [20]byte
   131  
   132  type streamDescriptor struct {
   133  	resourceDescriptor
   134  	PartNumber uint16
   135  	RefCount   uint32
   136  	Hash       SHA1Hash
   137  }
   138  
   139  type hdrFlag uint32
   140  
   141  //nolint:deadcode,varcheck // need unused variables for iota to work
   142  const (
   143  	hdrFlagReserved hdrFlag = 1 << iota
   144  	hdrFlagCompressed
   145  	hdrFlagReadOnly
   146  	hdrFlagSpanned
   147  	hdrFlagResourceOnly
   148  	hdrFlagMetadataOnly
   149  	hdrFlagWriteInProgress
   150  	hdrFlagRpFix
   151  )
   152  
   153  //nolint:deadcode,varcheck // need unused variables for iota to work
   154  const (
   155  	hdrFlagCompressReserved hdrFlag = 1 << (iota + 16)
   156  	hdrFlagCompressXpress
   157  	hdrFlagCompressLzx
   158  )
   159  
   160  const supportedHdrFlags = hdrFlagRpFix | hdrFlagReadOnly | hdrFlagCompressed | hdrFlagCompressLzx
   161  
   162  type wimHeader struct {
   163  	ImageTag        [8]byte
   164  	Size            uint32
   165  	Version         uint32
   166  	Flags           hdrFlag
   167  	CompressionSize uint32
   168  	WIMGuid         guid
   169  	PartNumber      uint16
   170  	TotalParts      uint16
   171  	ImageCount      uint32
   172  	OffsetTable     resourceDescriptor
   173  	XMLData         resourceDescriptor
   174  	BootMetadata    resourceDescriptor
   175  	BootIndex       uint32
   176  	Padding         uint32
   177  	Integrity       resourceDescriptor
   178  	Unused          [60]byte
   179  }
   180  
   181  type securityblockDisk struct {
   182  	TotalLength uint32
   183  	NumEntries  uint32
   184  }
   185  
   186  const securityblockDiskSize = 8
   187  
   188  type direntry struct {
   189  	Attributes       uint32
   190  	SecurityID       uint32
   191  	SubdirOffset     int64
   192  	Unused1, Unused2 int64
   193  	CreationTime     Filetime
   194  	LastAccessTime   Filetime
   195  	LastWriteTime    Filetime
   196  	Hash             SHA1Hash
   197  	Padding          uint32
   198  	ReparseHardLink  int64
   199  	StreamCount      uint16
   200  	ShortNameLength  uint16
   201  	FileNameLength   uint16
   202  }
   203  
   204  var direntrySize = int64(binary.Size(direntry{}) + 8) // includes an 8-byte length prefix
   205  
   206  type streamentry struct {
   207  	Unused     int64
   208  	Hash       SHA1Hash
   209  	NameLength int16
   210  }
   211  
   212  var streamentrySize = int64(binary.Size(streamentry{}) + 8) // includes an 8-byte length prefix
   213  
   214  // Filetime represents a Windows time.
   215  type Filetime struct {
   216  	LowDateTime  uint32
   217  	HighDateTime uint32
   218  }
   219  
   220  // Time returns the time as time.Time.
   221  func (ft *Filetime) Time() time.Time {
   222  	// 100-nanosecond intervals since January 1, 1601
   223  	nsec := int64(ft.HighDateTime)<<32 + int64(ft.LowDateTime)
   224  	// change starting time to the Epoch (00:00:00 UTC, January 1, 1970)
   225  	nsec -= 116444736000000000
   226  	// convert into nanoseconds
   227  	nsec *= 100
   228  	return time.Unix(0, nsec)
   229  }
   230  
   231  // UnmarshalXML unmarshalls the time from a WIM XML blob.
   232  func (ft *Filetime) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   233  	type Time struct {
   234  		Low  string `xml:"LOWPART"`
   235  		High string `xml:"HIGHPART"`
   236  	}
   237  	var t Time
   238  	err := d.DecodeElement(&t, &start)
   239  	if err != nil {
   240  		return err
   241  	}
   242  
   243  	low, err := strconv.ParseUint(t.Low, 0, 32)
   244  	if err != nil {
   245  		return err
   246  	}
   247  	high, err := strconv.ParseUint(t.High, 0, 32)
   248  	if err != nil {
   249  		return err
   250  	}
   251  
   252  	ft.LowDateTime = uint32(low)
   253  	ft.HighDateTime = uint32(high)
   254  	return nil
   255  }
   256  
   257  type info struct {
   258  	Image []ImageInfo `xml:"IMAGE"`
   259  }
   260  
   261  // ImageInfo contains information about the image.
   262  type ImageInfo struct {
   263  	Name         string       `xml:"NAME"`
   264  	Index        int          `xml:"INDEX,attr"`
   265  	CreationTime Filetime     `xml:"CREATIONTIME"`
   266  	ModTime      Filetime     `xml:"LASTMODIFICATIONTIME"`
   267  	Windows      *WindowsInfo `xml:"WINDOWS"`
   268  }
   269  
   270  // WindowsInfo contains information about the Windows installation in the image.
   271  type WindowsInfo struct {
   272  	Arch             byte     `xml:"ARCH"`
   273  	ProductName      string   `xml:"PRODUCTNAME"`
   274  	EditionID        string   `xml:"EDITIONID"`
   275  	InstallationType string   `xml:"INSTALLATIONTYPE"`
   276  	ProductType      string   `xml:"PRODUCTTYPE"`
   277  	Languages        []string `xml:"LANGUAGES>LANGUAGE"`
   278  	DefaultLanguage  string   `xml:"LANGUAGES>DEFAULT"`
   279  	Version          Version  `xml:"VERSION"`
   280  	SystemRoot       string   `xml:"SYSTEMROOT"`
   281  }
   282  
   283  // Version represents a Windows build version.
   284  type Version struct {
   285  	Major   int `xml:"MAJOR"`
   286  	Minor   int `xml:"MINOR"`
   287  	Build   int `xml:"BUILD"`
   288  	SPBuild int `xml:"SPBUILD"`
   289  	SPLevel int `xml:"SPLEVEL"`
   290  }
   291  
   292  // ParseError is returned when the WIM cannot be parsed.
   293  type ParseError struct {
   294  	Oper string
   295  	Path string
   296  	Err  error
   297  }
   298  
   299  func (e *ParseError) Error() string {
   300  	if e.Path == "" {
   301  		return "WIM parse error at " + e.Oper + ": " + e.Err.Error()
   302  	}
   303  	return fmt.Sprintf("WIM parse error: %s %s: %s", e.Oper, e.Path, e.Err.Error())
   304  }
   305  
   306  func (e *ParseError) Unwrap() error { return e.Err }
   307  
   308  // Reader provides functions to read a WIM file.
   309  type Reader struct {
   310  	hdr      wimHeader
   311  	r        io.ReaderAt
   312  	fileData map[SHA1Hash]resourceDescriptor
   313  
   314  	XMLInfo string   // The XML information about the WIM.
   315  	Image   []*Image // The WIM's images.
   316  }
   317  
   318  // Image represents an image within a WIM file.
   319  type Image struct {
   320  	wim        *Reader
   321  	offset     resourceDescriptor
   322  	sds        [][]byte
   323  	rootOffset int64
   324  	r          io.ReadCloser
   325  	curOffset  int64
   326  	m          sync.Mutex
   327  
   328  	ImageInfo
   329  }
   330  
   331  // StreamHeader contains alternate data stream metadata.
   332  type StreamHeader struct {
   333  	Name string
   334  	Hash SHA1Hash
   335  	Size int64
   336  }
   337  
   338  // Stream represents an alternate data stream or reparse point data stream.
   339  type Stream struct {
   340  	StreamHeader
   341  	wim    *Reader
   342  	offset resourceDescriptor
   343  }
   344  
   345  // FileHeader contains file metadata.
   346  type FileHeader struct {
   347  	Name               string
   348  	ShortName          string
   349  	Attributes         uint32
   350  	SecurityDescriptor []byte
   351  	CreationTime       Filetime
   352  	LastAccessTime     Filetime
   353  	LastWriteTime      Filetime
   354  	Hash               SHA1Hash
   355  	Size               int64
   356  	LinkID             int64
   357  	ReparseTag         uint32
   358  	ReparseReserved    uint32
   359  }
   360  
   361  // File represents a file or directory in a WIM image.
   362  type File struct {
   363  	FileHeader
   364  	Streams      []*Stream
   365  	offset       resourceDescriptor
   366  	img          *Image
   367  	subdirOffset int64
   368  }
   369  
   370  // NewReader returns a Reader that can be used to read WIM file data.
   371  func NewReader(f io.ReaderAt) (*Reader, error) {
   372  	r := &Reader{r: f}
   373  	section := io.NewSectionReader(f, 0, 0xffff)
   374  	err := binary.Read(section, binary.LittleEndian, &r.hdr)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  
   379  	if r.hdr.ImageTag != wimImageTag {
   380  		return nil, &ParseError{Oper: "image tag", Err: errors.New("not a WIM file")}
   381  	}
   382  
   383  	if r.hdr.Flags&^supportedHdrFlags != 0 {
   384  		return nil, fmt.Errorf("unsupported WIM flags %x", r.hdr.Flags&^supportedHdrFlags)
   385  	}
   386  
   387  	if r.hdr.CompressionSize != 0x8000 {
   388  		return nil, fmt.Errorf("unsupported compression size %d", r.hdr.CompressionSize)
   389  	}
   390  
   391  	if r.hdr.TotalParts != 1 {
   392  		return nil, errors.New("multi-part WIM not supported")
   393  	}
   394  
   395  	fileData, images, err := r.readOffsetTable(&r.hdr.OffsetTable)
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  
   400  	xmlinfo, err := r.readXML()
   401  	if err != nil {
   402  		return nil, err
   403  	}
   404  
   405  	var inf info
   406  	err = xml.Unmarshal([]byte(xmlinfo), &inf)
   407  	if err != nil {
   408  		return nil, &ParseError{Oper: "XML info", Err: err}
   409  	}
   410  
   411  	for i, img := range images {
   412  		for _, imgInfo := range inf.Image {
   413  			if imgInfo.Index == i+1 {
   414  				img.ImageInfo = imgInfo
   415  				break
   416  			}
   417  		}
   418  	}
   419  
   420  	r.fileData = fileData
   421  	r.Image = images
   422  	r.XMLInfo = xmlinfo
   423  	return r, nil
   424  }
   425  
   426  // Close releases resources associated with the Reader.
   427  func (r *Reader) Close() error {
   428  	for _, img := range r.Image {
   429  		img.reset()
   430  	}
   431  	return nil
   432  }
   433  
   434  func (r *Reader) resourceReader(hdr *resourceDescriptor) (io.ReadCloser, error) {
   435  	return r.resourceReaderWithOffset(hdr, 0)
   436  }
   437  
   438  func (r *Reader) resourceReaderWithOffset(hdr *resourceDescriptor, offset int64) (io.ReadCloser, error) {
   439  	var sr io.ReadCloser
   440  	section := io.NewSectionReader(r.r, hdr.Offset, hdr.CompressedSize())
   441  	if hdr.Flags()&resFlagCompressed == 0 {
   442  		_, _ = section.Seek(offset, 0)
   443  		sr = io.NopCloser(section)
   444  	} else {
   445  		cr, err := newCompressedReader(section, hdr.OriginalSize, offset)
   446  		if err != nil {
   447  			return nil, err
   448  		}
   449  		sr = cr
   450  	}
   451  
   452  	return sr, nil
   453  }
   454  
   455  func (r *Reader) readResource(hdr *resourceDescriptor) ([]byte, error) {
   456  	rsrc, err := r.resourceReader(hdr)
   457  	if err != nil {
   458  		return nil, err
   459  	}
   460  	defer rsrc.Close()
   461  	return io.ReadAll(rsrc)
   462  }
   463  
   464  func (r *Reader) readXML() (string, error) {
   465  	if r.hdr.XMLData.CompressedSize() == 0 {
   466  		return "", nil
   467  	}
   468  	rsrc, err := r.resourceReader(&r.hdr.XMLData)
   469  	if err != nil {
   470  		return "", err
   471  	}
   472  	defer rsrc.Close()
   473  
   474  	xmlData := make([]uint16, r.hdr.XMLData.OriginalSize/2)
   475  	err = binary.Read(rsrc, binary.LittleEndian, xmlData)
   476  	if err != nil {
   477  		return "", &ParseError{Oper: "XML data", Err: err}
   478  	}
   479  
   480  	// The BOM will always indicate little-endian UTF-16.
   481  	if xmlData[0] != 0xfeff {
   482  		return "", &ParseError{Oper: "XML data", Err: errors.New("invalid BOM")}
   483  	}
   484  	return string(utf16.Decode(xmlData[1:])), nil
   485  }
   486  
   487  func (r *Reader) readOffsetTable(res *resourceDescriptor) (map[SHA1Hash]resourceDescriptor, []*Image, error) {
   488  	fileData := make(map[SHA1Hash]resourceDescriptor)
   489  	var images []*Image
   490  
   491  	offsetTable, err := r.readResource(res)
   492  	if err != nil {
   493  		return nil, nil, &ParseError{Oper: "offset table", Err: err}
   494  	}
   495  
   496  	br := bytes.NewReader(offsetTable)
   497  	for i := 0; ; i++ {
   498  		var res streamDescriptor
   499  		err := binary.Read(br, binary.LittleEndian, &res)
   500  		if err == io.EOF { //nolint:errorlint
   501  			break
   502  		}
   503  		if err != nil {
   504  			return nil, nil, &ParseError{Oper: "offset table", Err: err}
   505  		}
   506  		if res.Flags()&^supportedResFlags != 0 {
   507  			return nil, nil, &ParseError{Oper: "offset table", Err: errors.New("unsupported resource flag")}
   508  		}
   509  
   510  		// Validation for ad-hoc testing
   511  		if validate {
   512  			sec, err := r.resourceReader(&res.resourceDescriptor)
   513  			if err != nil {
   514  				panic(fmt.Sprint(i, err))
   515  			}
   516  			hash := sha1.New() //nolint:gosec // not used for secure application
   517  			_, err = io.Copy(hash, sec)
   518  			sec.Close()
   519  			if err != nil {
   520  				panic(fmt.Sprint(i, err))
   521  			}
   522  			var cmphash SHA1Hash
   523  			copy(cmphash[:], hash.Sum(nil))
   524  			if cmphash != res.Hash {
   525  				panic(fmt.Sprint(i, "hash mismatch"))
   526  			}
   527  		}
   528  
   529  		if res.Flags()&resFlagMetadata != 0 {
   530  			image := &Image{
   531  				wim:    r,
   532  				offset: res.resourceDescriptor,
   533  			}
   534  			images = append(images, image)
   535  		} else {
   536  			fileData[res.Hash] = res.resourceDescriptor
   537  		}
   538  	}
   539  
   540  	if len(images) != int(r.hdr.ImageCount) {
   541  		return nil, nil, &ParseError{Oper: "offset table", Err: errors.New("mismatched image count")}
   542  	}
   543  
   544  	return fileData, images, nil
   545  }
   546  
   547  func (*Reader) readSecurityDescriptors(rsrc io.Reader) (sds [][]byte, n int64, err error) {
   548  	var secBlock securityblockDisk
   549  	err = binary.Read(rsrc, binary.LittleEndian, &secBlock)
   550  	if err != nil {
   551  		return sds, 0, &ParseError{Oper: "security table", Err: err}
   552  	}
   553  
   554  	n += securityblockDiskSize
   555  
   556  	secSizes := make([]int64, secBlock.NumEntries)
   557  	err = binary.Read(rsrc, binary.LittleEndian, &secSizes)
   558  	if err != nil {
   559  		return sds, n, &ParseError{Oper: "security table sizes", Err: err}
   560  	}
   561  
   562  	n += int64(secBlock.NumEntries * 8)
   563  
   564  	sds = make([][]byte, secBlock.NumEntries)
   565  	for i, size := range secSizes {
   566  		sd := make([]byte, size&0xffffffff)
   567  		_, err = io.ReadFull(rsrc, sd)
   568  		if err != nil {
   569  			return sds, n, &ParseError{Oper: "security descriptor", Err: err}
   570  		}
   571  		n += int64(len(sd))
   572  		sds[i] = sd
   573  	}
   574  
   575  	secsize := int64((secBlock.TotalLength + 7) &^ 7)
   576  	if n > secsize {
   577  		return sds, n, &ParseError{Oper: "security descriptor", Err: errors.New("security descriptor table too small")}
   578  	}
   579  
   580  	_, err = io.CopyN(io.Discard, rsrc, secsize-n)
   581  	if err != nil {
   582  		return sds, n, err
   583  	}
   584  
   585  	n = secsize
   586  	return sds, n, nil
   587  }
   588  
   589  // Open parses the image and returns the root directory.
   590  func (img *Image) Open() (*File, error) {
   591  	if img.sds == nil {
   592  		rsrc, err := img.wim.resourceReaderWithOffset(&img.offset, img.rootOffset)
   593  		if err != nil {
   594  			return nil, err
   595  		}
   596  		sds, n, err := img.wim.readSecurityDescriptors(rsrc)
   597  		if err != nil {
   598  			rsrc.Close()
   599  			return nil, err
   600  		}
   601  		img.sds = sds
   602  		img.r = rsrc
   603  		img.rootOffset = n
   604  		img.curOffset = n
   605  	}
   606  
   607  	f, err := img.readdir(img.rootOffset)
   608  	if err != nil {
   609  		return nil, err
   610  	}
   611  	if len(f) != 1 {
   612  		return nil, &ParseError{Oper: "root directory", Err: errors.New("expected exactly 1 root directory entry")}
   613  	}
   614  	return f[0], err
   615  }
   616  
   617  func (img *Image) reset() {
   618  	if img.r != nil {
   619  		img.r.Close()
   620  		img.r = nil
   621  	}
   622  	img.curOffset = -1
   623  }
   624  
   625  func (img *Image) readdir(offset int64) ([]*File, error) {
   626  	img.m.Lock()
   627  	defer img.m.Unlock()
   628  
   629  	if offset < img.curOffset || offset > img.curOffset+chunkSize {
   630  		// Reset to seek backward or to seek forward very far.
   631  		img.reset()
   632  	}
   633  	if img.r == nil {
   634  		rsrc, err := img.wim.resourceReaderWithOffset(&img.offset, offset)
   635  		if err != nil {
   636  			return nil, err
   637  		}
   638  		img.r = rsrc
   639  		img.curOffset = offset
   640  	}
   641  	if offset > img.curOffset {
   642  		_, err := io.CopyN(io.Discard, img.r, offset-img.curOffset)
   643  		if err != nil {
   644  			img.reset()
   645  			if err == io.EOF { //nolint:errorlint
   646  				err = io.ErrUnexpectedEOF
   647  			}
   648  			return nil, err
   649  		}
   650  	}
   651  
   652  	var entries []*File
   653  	for {
   654  		e, n, err := img.readNextEntry(img.r)
   655  		img.curOffset += n
   656  		if err == io.EOF { //nolint:errorlint
   657  			break
   658  		}
   659  		if err != nil {
   660  			img.reset()
   661  			return nil, err
   662  		}
   663  		entries = append(entries, e)
   664  	}
   665  	return entries, nil
   666  }
   667  
   668  func (img *Image) readNextEntry(r io.Reader) (*File, int64, error) {
   669  	var length int64
   670  	err := binary.Read(r, binary.LittleEndian, &length)
   671  	if err != nil {
   672  		return nil, 0, &ParseError{Oper: "directory length check", Err: err}
   673  	}
   674  
   675  	if length == 0 {
   676  		return nil, 8, io.EOF
   677  	}
   678  
   679  	left := length
   680  	if left < direntrySize {
   681  		return nil, 0, &ParseError{Oper: "directory entry", Err: errors.New("size too short")}
   682  	}
   683  
   684  	var dentry direntry
   685  	err = binary.Read(r, binary.LittleEndian, &dentry)
   686  	if err != nil {
   687  		return nil, 0, &ParseError{Oper: "directory entry", Err: err}
   688  	}
   689  
   690  	left -= direntrySize
   691  
   692  	namesLen := int64(dentry.FileNameLength + 2 + dentry.ShortNameLength)
   693  	if left < namesLen {
   694  		return nil, 0, &ParseError{Oper: "directory entry", Err: errors.New("size too short for names")}
   695  	}
   696  
   697  	names := make([]uint16, namesLen/2)
   698  	err = binary.Read(r, binary.LittleEndian, names)
   699  	if err != nil {
   700  		return nil, 0, &ParseError{Oper: "file name", Err: err}
   701  	}
   702  
   703  	left -= namesLen
   704  
   705  	var name, shortName string
   706  	if dentry.FileNameLength > 0 {
   707  		name = string(utf16.Decode(names[:dentry.FileNameLength/2]))
   708  	}
   709  
   710  	if dentry.ShortNameLength > 0 {
   711  		shortName = string(utf16.Decode(names[dentry.FileNameLength/2+1:]))
   712  	}
   713  
   714  	var offset resourceDescriptor
   715  	zerohash := SHA1Hash{}
   716  	if dentry.Hash != zerohash {
   717  		var ok bool
   718  		offset, ok = img.wim.fileData[dentry.Hash]
   719  		if !ok {
   720  			return nil, 0, &ParseError{
   721  				Oper: "directory entry",
   722  				Path: name,
   723  				Err:  fmt.Errorf("could not find file data matching hash %#v", dentry),
   724  			}
   725  		}
   726  	}
   727  
   728  	f := &File{
   729  		FileHeader: FileHeader{
   730  			Attributes:     dentry.Attributes,
   731  			CreationTime:   dentry.CreationTime,
   732  			LastAccessTime: dentry.LastAccessTime,
   733  			LastWriteTime:  dentry.LastWriteTime,
   734  			Hash:           dentry.Hash,
   735  			Size:           offset.OriginalSize,
   736  			Name:           name,
   737  			ShortName:      shortName,
   738  		},
   739  
   740  		offset:       offset,
   741  		img:          img,
   742  		subdirOffset: dentry.SubdirOffset,
   743  	}
   744  
   745  	isDir := false
   746  
   747  	if dentry.Attributes&FILE_ATTRIBUTE_REPARSE_POINT == 0 {
   748  		f.LinkID = dentry.ReparseHardLink
   749  		if dentry.Attributes&FILE_ATTRIBUTE_DIRECTORY != 0 {
   750  			isDir = true
   751  		}
   752  	} else {
   753  		f.ReparseTag = uint32(dentry.ReparseHardLink)
   754  		f.ReparseReserved = uint32(dentry.ReparseHardLink >> 32)
   755  	}
   756  
   757  	if isDir && f.subdirOffset == 0 {
   758  		return nil, 0, &ParseError{Oper: "directory entry", Path: name, Err: errors.New("no subdirectory data for directory")}
   759  	} else if !isDir && f.subdirOffset != 0 {
   760  		return nil, 0, &ParseError{Oper: "directory entry", Path: name, Err: errors.New("unexpected subdirectory data for non-directory")}
   761  	}
   762  
   763  	if dentry.SecurityID != 0xffffffff {
   764  		f.SecurityDescriptor = img.sds[dentry.SecurityID]
   765  	}
   766  
   767  	_, err = io.CopyN(io.Discard, r, left)
   768  	if err != nil {
   769  		if err == io.EOF { //nolint:errorlint
   770  			err = io.ErrUnexpectedEOF
   771  		}
   772  		return nil, 0, err
   773  	}
   774  
   775  	if dentry.StreamCount > 0 {
   776  		var streams []*Stream
   777  		for i := uint16(0); i < dentry.StreamCount; i++ {
   778  			s, n, err := img.readNextStream(r)
   779  			length += n
   780  			if err != nil {
   781  				return nil, 0, err
   782  			}
   783  			// The first unnamed stream should be treated as the file stream.
   784  			if i == 0 && s.Name == "" {
   785  				f.Hash = s.Hash
   786  				f.Size = s.Size
   787  				f.offset = s.offset
   788  			} else if s.Name != "" {
   789  				streams = append(streams, s)
   790  			}
   791  		}
   792  		f.Streams = streams
   793  	}
   794  
   795  	if dentry.Attributes&FILE_ATTRIBUTE_REPARSE_POINT != 0 && f.Size == 0 {
   796  		return nil, 0, &ParseError{
   797  			Oper: "directory entry",
   798  			Path: name,
   799  			Err:  errors.New("reparse point is missing reparse stream"),
   800  		}
   801  	}
   802  
   803  	return f, length, nil
   804  }
   805  
   806  func (img *Image) readNextStream(r io.Reader) (*Stream, int64, error) {
   807  	var length int64
   808  	err := binary.Read(r, binary.LittleEndian, &length)
   809  	if err != nil {
   810  		if err == io.EOF { //nolint:errorlint
   811  			err = io.ErrUnexpectedEOF
   812  		}
   813  		return nil, 0, &ParseError{Oper: "stream length check", Err: err}
   814  	}
   815  
   816  	left := length
   817  	if left < streamentrySize {
   818  		return nil, 0, &ParseError{Oper: "stream entry", Err: errors.New("size too short")}
   819  	}
   820  
   821  	var sentry streamentry
   822  	err = binary.Read(r, binary.LittleEndian, &sentry)
   823  	if err != nil {
   824  		return nil, 0, &ParseError{Oper: "stream entry", Err: err}
   825  	}
   826  
   827  	left -= streamentrySize
   828  
   829  	if left < int64(sentry.NameLength) {
   830  		return nil, 0, &ParseError{Oper: "stream entry", Err: errors.New("size too short for name")}
   831  	}
   832  
   833  	names := make([]uint16, sentry.NameLength/2)
   834  	err = binary.Read(r, binary.LittleEndian, names)
   835  	if err != nil {
   836  		return nil, 0, &ParseError{Oper: "file name", Err: err}
   837  	}
   838  
   839  	left -= int64(sentry.NameLength)
   840  	name := string(utf16.Decode(names))
   841  
   842  	var offset resourceDescriptor
   843  	if sentry.Hash != (SHA1Hash{}) {
   844  		var ok bool
   845  		offset, ok = img.wim.fileData[sentry.Hash]
   846  		if !ok {
   847  			return nil, 0, &ParseError{
   848  				Oper: "stream entry",
   849  				Path: name,
   850  				Err:  fmt.Errorf("could not find file data matching hash %v", sentry.Hash),
   851  			}
   852  		}
   853  	}
   854  
   855  	s := &Stream{
   856  		StreamHeader: StreamHeader{
   857  			Hash: sentry.Hash,
   858  			Size: offset.OriginalSize,
   859  			Name: name,
   860  		},
   861  		wim:    img.wim,
   862  		offset: offset,
   863  	}
   864  
   865  	_, err = io.CopyN(io.Discard, r, left)
   866  	if err != nil {
   867  		if err == io.EOF { //nolint:errorlint
   868  			err = io.ErrUnexpectedEOF
   869  		}
   870  		return nil, 0, err
   871  	}
   872  
   873  	return s, length, nil
   874  }
   875  
   876  // Open returns an io.ReadCloser that can be used to read the stream's contents.
   877  func (s *Stream) Open() (io.ReadCloser, error) {
   878  	return s.wim.resourceReader(&s.offset)
   879  }
   880  
   881  // Open returns an io.ReadCloser that can be used to read the file's contents.
   882  func (f *File) Open() (io.ReadCloser, error) {
   883  	return f.img.wim.resourceReader(&f.offset)
   884  }
   885  
   886  // Readdir reads the directory entries.
   887  func (f *File) Readdir() ([]*File, error) {
   888  	if !f.IsDir() {
   889  		return nil, errors.New("not a directory")
   890  	}
   891  	return f.img.readdir(f.subdirOffset)
   892  }
   893  
   894  // IsDir returns whether the given file is a directory. It returns false when it
   895  // is a directory reparse point.
   896  func (f *FileHeader) IsDir() bool {
   897  	return f.Attributes&(FILE_ATTRIBUTE_DIRECTORY|FILE_ATTRIBUTE_REPARSE_POINT) == FILE_ATTRIBUTE_DIRECTORY
   898  }
   899  

View as plain text