...

Source file src/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go

Documentation: github.com/Microsoft/hcsshim/ext4/internal/compactext4

     1  package compactext4
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"path"
    11  	"sort"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/Microsoft/hcsshim/ext4/internal/format"
    16  	"github.com/Microsoft/hcsshim/internal/memory"
    17  )
    18  
    19  // Writer writes a compact ext4 file system.
    20  type Writer struct {
    21  	f                    io.ReadWriteSeeker
    22  	bw                   *bufio.Writer
    23  	inodes               []*inode
    24  	curName              string
    25  	curInode             *inode
    26  	pos                  int64
    27  	dataWritten, dataMax int64
    28  	err                  error
    29  	initialized          bool
    30  	supportInlineData    bool
    31  	maxDiskSize          int64
    32  	gdBlocks             uint32
    33  }
    34  
    35  // Mode flags for Linux files.
    36  const (
    37  	S_IXOTH  = format.S_IXOTH
    38  	S_IWOTH  = format.S_IWOTH
    39  	S_IROTH  = format.S_IROTH
    40  	S_IXGRP  = format.S_IXGRP
    41  	S_IWGRP  = format.S_IWGRP
    42  	S_IRGRP  = format.S_IRGRP
    43  	S_IXUSR  = format.S_IXUSR
    44  	S_IWUSR  = format.S_IWUSR
    45  	S_IRUSR  = format.S_IRUSR
    46  	S_ISVTX  = format.S_ISVTX
    47  	S_ISGID  = format.S_ISGID
    48  	S_ISUID  = format.S_ISUID
    49  	S_IFIFO  = format.S_IFIFO
    50  	S_IFCHR  = format.S_IFCHR
    51  	S_IFDIR  = format.S_IFDIR
    52  	S_IFBLK  = format.S_IFBLK
    53  	S_IFREG  = format.S_IFREG
    54  	S_IFLNK  = format.S_IFLNK
    55  	S_IFSOCK = format.S_IFSOCK
    56  
    57  	TypeMask = format.TypeMask
    58  )
    59  
    60  type inode struct {
    61  	Size                        int64
    62  	Atime, Ctime, Mtime, Crtime uint64
    63  	Number                      format.InodeNumber
    64  	Mode                        uint16
    65  	Uid, Gid                    uint32
    66  	LinkCount                   uint32
    67  	XattrBlock                  uint32
    68  	BlockCount                  uint32
    69  	Devmajor, Devminor          uint32
    70  	Flags                       format.InodeFlag
    71  	Data                        []byte
    72  	XattrInline                 []byte
    73  	Children                    directory
    74  }
    75  
    76  func (node *inode) FileType() uint16 {
    77  	return node.Mode & format.TypeMask
    78  }
    79  
    80  func (node *inode) IsDir() bool {
    81  	return node.FileType() == S_IFDIR
    82  }
    83  
    84  // A File represents a file to be added to an ext4 file system.
    85  type File struct {
    86  	Linkname                    string
    87  	Size                        int64
    88  	Mode                        uint16
    89  	Uid, Gid                    uint32
    90  	Atime, Ctime, Mtime, Crtime time.Time
    91  	Devmajor, Devminor          uint32
    92  	Xattrs                      map[string][]byte
    93  }
    94  
    95  const (
    96  	inodeFirst        = 11
    97  	inodeLostAndFound = inodeFirst
    98  
    99  	BlockSize               = 4096
   100  	blocksPerGroup          = BlockSize * 8
   101  	inodeSize               = 256
   102  	maxInodesPerGroup       = BlockSize * 8 // Limited by the inode bitmap
   103  	inodesPerGroupIncrement = BlockSize / inodeSize
   104  
   105  	defaultMaxDiskSize = 16 * memory.GiB                // 16GB
   106  	maxMaxDiskSize     = 16 * 1024 * 1024 * 1024 * 1024 // 16TB
   107  
   108  	groupDescriptorSize      = 32 // Use the small group descriptor
   109  	groupsPerDescriptorBlock = BlockSize / groupDescriptorSize
   110  
   111  	maxFileSize             = 128 * memory.GiB // 128GB file size maximum for now
   112  	smallSymlinkSize        = 59               // max symlink size that goes directly in the inode
   113  	maxBlocksPerExtent      = 0x8000           // maximum number of blocks in an extent
   114  	inodeDataSize           = 60
   115  	inodeUsedSize           = 152 // fields through CrtimeExtra
   116  	inodeExtraSize          = inodeSize - inodeUsedSize
   117  	xattrInodeOverhead      = 4 + 4                       // magic number + empty next entry value
   118  	xattrBlockOverhead      = 32 + 4                      // header + empty next entry value
   119  	inlineDataXattrOverhead = xattrInodeOverhead + 16 + 4 // entry + "data"
   120  	inlineDataSize          = inodeDataSize + inodeExtraSize - inlineDataXattrOverhead
   121  )
   122  
   123  type exceededMaxSizeError struct {
   124  	Size int64
   125  }
   126  
   127  func (err exceededMaxSizeError) Error() string {
   128  	return fmt.Sprintf("disk exceeded maximum size of %d bytes", err.Size)
   129  }
   130  
   131  var directoryEntrySize = binary.Size(format.DirectoryEntry{})
   132  var extraIsize = uint16(inodeUsedSize - 128)
   133  
   134  type directory map[string]*inode
   135  
   136  func splitFirst(p string) (string, string) {
   137  	n := strings.IndexByte(p, '/')
   138  	if n >= 0 {
   139  		return p[:n], p[n+1:]
   140  	}
   141  	return p, ""
   142  }
   143  
   144  func (w *Writer) findPath(root *inode, p string) *inode {
   145  	inode := root
   146  	for inode != nil && len(p) != 0 {
   147  		name, rest := splitFirst(p)
   148  		p = rest
   149  		inode = inode.Children[name]
   150  	}
   151  	return inode
   152  }
   153  
   154  func timeToFsTime(t time.Time) uint64 {
   155  	if t.IsZero() {
   156  		return 0
   157  	}
   158  	s := t.Unix()
   159  	if s < -0x80000000 {
   160  		return 0x80000000
   161  	}
   162  	if s > 0x37fffffff {
   163  		return 0x37fffffff
   164  	}
   165  	return uint64(s) | uint64(t.Nanosecond())<<34
   166  }
   167  
   168  func fsTimeToTime(t uint64) time.Time {
   169  	if t == 0 {
   170  		return time.Time{}
   171  	}
   172  	s := int64(t & 0x3ffffffff)
   173  	if s > 0x7fffffff && s < 0x100000000 {
   174  		s = int64(int32(uint32(s)))
   175  	}
   176  	return time.Unix(s, int64(t>>34))
   177  }
   178  
   179  func (w *Writer) getInode(i format.InodeNumber) *inode {
   180  	if i == 0 || int(i) > len(w.inodes) {
   181  		return nil
   182  	}
   183  	return w.inodes[i-1]
   184  }
   185  
   186  var xattrPrefixes = []struct {
   187  	Index  uint8
   188  	Prefix string
   189  }{
   190  	{2, "system.posix_acl_access"},
   191  	{3, "system.posix_acl_default"},
   192  	{8, "system.richacl"},
   193  	{7, "system."},
   194  	{1, "user."},
   195  	{4, "trusted."},
   196  	{6, "security."},
   197  }
   198  
   199  func compressXattrName(name string) (uint8, string) {
   200  	for _, p := range xattrPrefixes {
   201  		if strings.HasPrefix(name, p.Prefix) {
   202  			return p.Index, name[len(p.Prefix):]
   203  		}
   204  	}
   205  	return 0, name
   206  }
   207  
   208  func decompressXattrName(index uint8, name string) string {
   209  	for _, p := range xattrPrefixes {
   210  		if index == p.Index {
   211  			return p.Prefix + name
   212  		}
   213  	}
   214  	return name
   215  }
   216  
   217  func hashXattrEntry(name string, value []byte) uint32 {
   218  	var hash uint32
   219  	for i := 0; i < len(name); i++ {
   220  		hash = (hash << 5) ^ (hash >> 27) ^ uint32(name[i])
   221  	}
   222  
   223  	for i := 0; i+3 < len(value); i += 4 {
   224  		hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(value[i:i+4])
   225  	}
   226  
   227  	if len(value)%4 != 0 {
   228  		var last [4]byte
   229  		copy(last[:], value[len(value)&^3:])
   230  		hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(last[:])
   231  	}
   232  	return hash
   233  }
   234  
   235  type xattr struct {
   236  	Name  string
   237  	Index uint8
   238  	Value []byte
   239  }
   240  
   241  func (x *xattr) EntryLen() int {
   242  	return (len(x.Name)+3)&^3 + 16
   243  }
   244  
   245  func (x *xattr) ValueLen() int {
   246  	return (len(x.Value) + 3) &^ 3
   247  }
   248  
   249  type xattrState struct {
   250  	inode, block         []xattr
   251  	inodeLeft, blockLeft int
   252  }
   253  
   254  func (s *xattrState) init() {
   255  	s.inodeLeft = inodeExtraSize - xattrInodeOverhead
   256  	s.blockLeft = BlockSize - xattrBlockOverhead
   257  }
   258  
   259  func (s *xattrState) addXattr(name string, value []byte) bool {
   260  	index, name := compressXattrName(name)
   261  	x := xattr{
   262  		Index: index,
   263  		Name:  name,
   264  		Value: value,
   265  	}
   266  	length := x.EntryLen() + x.ValueLen()
   267  	if s.inodeLeft >= length {
   268  		s.inode = append(s.inode, x)
   269  		s.inodeLeft -= length
   270  	} else if s.blockLeft >= length {
   271  		s.block = append(s.block, x)
   272  		s.blockLeft -= length
   273  	} else {
   274  		return false
   275  	}
   276  	return true
   277  }
   278  
   279  func putXattrs(xattrs []xattr, b []byte, offsetDelta uint16) {
   280  	offset := uint16(len(b)) + offsetDelta
   281  	eb := b
   282  	db := b
   283  	for _, xattr := range xattrs {
   284  		vl := xattr.ValueLen()
   285  		offset -= uint16(vl)
   286  		eb[0] = uint8(len(xattr.Name))
   287  		eb[1] = xattr.Index
   288  		binary.LittleEndian.PutUint16(eb[2:], offset)
   289  		binary.LittleEndian.PutUint32(eb[8:], uint32(len(xattr.Value)))
   290  		binary.LittleEndian.PutUint32(eb[12:], hashXattrEntry(xattr.Name, xattr.Value))
   291  		copy(eb[16:], xattr.Name)
   292  		eb = eb[xattr.EntryLen():]
   293  		copy(db[len(db)-vl:], xattr.Value)
   294  		db = db[:len(db)-vl]
   295  	}
   296  }
   297  
   298  func getXattrs(b []byte, xattrs map[string][]byte, offsetDelta uint16) {
   299  	eb := b
   300  	for len(eb) != 0 {
   301  		nameLen := eb[0]
   302  		if nameLen == 0 {
   303  			break
   304  		}
   305  		index := eb[1]
   306  		offset := binary.LittleEndian.Uint16(eb[2:]) - offsetDelta
   307  		valueLen := binary.LittleEndian.Uint32(eb[8:])
   308  		attr := xattr{
   309  			Index: index,
   310  			Name:  string(eb[16 : 16+nameLen]),
   311  			Value: b[offset : uint32(offset)+valueLen],
   312  		}
   313  		xattrs[decompressXattrName(index, attr.Name)] = attr.Value
   314  		eb = eb[attr.EntryLen():]
   315  	}
   316  }
   317  
   318  func (w *Writer) writeXattrs(inode *inode, state *xattrState) error {
   319  	// Write the inline attributes.
   320  	if len(state.inode) != 0 {
   321  		inode.XattrInline = make([]byte, inodeExtraSize)
   322  		binary.LittleEndian.PutUint32(inode.XattrInline[0:], format.XAttrHeaderMagic) // Magic
   323  		putXattrs(state.inode, inode.XattrInline[4:], 0)
   324  	}
   325  
   326  	// Write the block attributes. If there was previously an xattr block, then
   327  	// rewrite it even if it is now empty.
   328  	if len(state.block) != 0 || inode.XattrBlock != 0 {
   329  		sort.Slice(state.block, func(i, j int) bool {
   330  			return state.block[i].Index < state.block[j].Index ||
   331  				len(state.block[i].Name) < len(state.block[j].Name) ||
   332  				state.block[i].Name < state.block[j].Name
   333  		})
   334  
   335  		var b [BlockSize]byte
   336  		binary.LittleEndian.PutUint32(b[0:], format.XAttrHeaderMagic) // Magic
   337  		binary.LittleEndian.PutUint32(b[4:], 1)                       // ReferenceCount
   338  		binary.LittleEndian.PutUint32(b[8:], 1)                       // Blocks
   339  		putXattrs(state.block, b[32:], 32)
   340  
   341  		orig := w.block()
   342  		if inode.XattrBlock == 0 {
   343  			inode.XattrBlock = orig
   344  			inode.BlockCount++
   345  		} else {
   346  			// Reuse the original block.
   347  			w.seekBlock(inode.XattrBlock)
   348  			defer w.seekBlock(orig)
   349  		}
   350  
   351  		if _, err := w.write(b[:]); err != nil {
   352  			return err
   353  		}
   354  	}
   355  
   356  	return nil
   357  }
   358  
   359  func (w *Writer) write(b []byte) (int, error) {
   360  	if w.err != nil {
   361  		return 0, w.err
   362  	}
   363  	if w.pos+int64(len(b)) > w.maxDiskSize {
   364  		w.err = exceededMaxSizeError{w.maxDiskSize}
   365  		return 0, w.err
   366  	}
   367  	n, err := w.bw.Write(b)
   368  	w.pos += int64(n)
   369  	w.err = err
   370  	return n, err
   371  }
   372  
   373  func (w *Writer) zero(n int64) (int64, error) {
   374  	if w.err != nil {
   375  		return 0, w.err
   376  	}
   377  	if w.pos+int64(n) > w.maxDiskSize {
   378  		w.err = exceededMaxSizeError{w.maxDiskSize}
   379  		return 0, w.err
   380  	}
   381  	n, err := io.CopyN(w.bw, zero, n)
   382  	w.pos += n
   383  	w.err = err
   384  	return n, err
   385  }
   386  
   387  func (w *Writer) makeInode(f *File, node *inode) (*inode, error) {
   388  	mode := f.Mode
   389  	if mode&format.TypeMask == 0 {
   390  		mode |= format.S_IFREG
   391  	}
   392  	typ := mode & format.TypeMask
   393  	ino := format.InodeNumber(len(w.inodes) + 1)
   394  	if node == nil {
   395  		node = &inode{
   396  			Number: ino,
   397  		}
   398  		if typ == S_IFDIR {
   399  			node.Children = make(directory)
   400  			node.LinkCount = 1 // A directory is linked to itself.
   401  		}
   402  	} else if node.Flags&format.InodeFlagExtents != 0 {
   403  		// Since we cannot deallocate or reuse blocks, don't allow updates that
   404  		// would invalidate data that has already been written.
   405  		return nil, errors.New("cannot overwrite file with non-inline data")
   406  	}
   407  	node.Mode = mode
   408  	node.Uid = f.Uid
   409  	node.Gid = f.Gid
   410  	node.Flags = format.InodeFlagHugeFile
   411  	node.Atime = timeToFsTime(f.Atime)
   412  	node.Ctime = timeToFsTime(f.Ctime)
   413  	node.Mtime = timeToFsTime(f.Mtime)
   414  	node.Crtime = timeToFsTime(f.Crtime)
   415  	node.Devmajor = f.Devmajor
   416  	node.Devminor = f.Devminor
   417  	node.Data = nil
   418  	if f.Xattrs == nil {
   419  		f.Xattrs = make(map[string][]byte)
   420  	}
   421  
   422  	// copy over existing xattrs first, we need to merge existing xattrs and the passed xattrs.
   423  	existingXattrs := make(map[string][]byte)
   424  	if len(node.XattrInline) > 0 {
   425  		getXattrs(node.XattrInline[4:], existingXattrs, 0)
   426  	}
   427  	node.XattrInline = nil
   428  
   429  	var xstate xattrState
   430  	xstate.init()
   431  
   432  	var size int64
   433  	switch typ {
   434  	case format.S_IFREG:
   435  		size = f.Size
   436  		if f.Size > maxFileSize {
   437  			return nil, fmt.Errorf("file too big: %d > %d", f.Size, int64(maxFileSize))
   438  		}
   439  		if f.Size <= inlineDataSize && w.supportInlineData {
   440  			node.Data = make([]byte, f.Size)
   441  			extra := 0
   442  			if f.Size > inodeDataSize {
   443  				extra = int(f.Size - inodeDataSize)
   444  			}
   445  			// Add a dummy entry for now.
   446  			if !xstate.addXattr("system.data", node.Data[:extra]) {
   447  				panic("not enough room for inline data")
   448  			}
   449  			node.Flags |= format.InodeFlagInlineData
   450  		}
   451  	case format.S_IFLNK:
   452  		node.Mode |= 0777 // Symlinks should appear as ugw rwx
   453  		size = int64(len(f.Linkname))
   454  		if size <= smallSymlinkSize {
   455  			// Special case: small symlinks go directly in Block without setting
   456  			// an inline data flag.
   457  			node.Data = make([]byte, len(f.Linkname))
   458  			copy(node.Data, f.Linkname)
   459  		}
   460  	case format.S_IFDIR, format.S_IFIFO, format.S_IFSOCK, format.S_IFCHR, format.S_IFBLK:
   461  	default:
   462  		return nil, fmt.Errorf("invalid mode %o", mode)
   463  	}
   464  
   465  	// merge xattrs but prefer currently passed over existing
   466  	for name, data := range existingXattrs {
   467  		if _, ok := f.Xattrs[name]; !ok {
   468  			f.Xattrs[name] = data
   469  		}
   470  	}
   471  
   472  	// Accumulate the extended attributes.
   473  	if len(f.Xattrs) != 0 {
   474  		// Sort the xattrs to avoid non-determinism in map iteration.
   475  		var xattrs []string
   476  		for name := range f.Xattrs {
   477  			xattrs = append(xattrs, name)
   478  		}
   479  		sort.Strings(xattrs)
   480  		for _, name := range xattrs {
   481  			if !xstate.addXattr(name, f.Xattrs[name]) {
   482  				return nil, fmt.Errorf("could not fit xattr %s", name)
   483  			}
   484  		}
   485  	}
   486  
   487  	if err := w.writeXattrs(node, &xstate); err != nil {
   488  		return nil, err
   489  	}
   490  
   491  	node.Size = size
   492  	if typ == format.S_IFLNK && size > smallSymlinkSize {
   493  		// Write the link name as data.
   494  		w.startInode("", node, size)
   495  		if _, err := w.Write([]byte(f.Linkname)); err != nil {
   496  			return nil, err
   497  		}
   498  		if err := w.finishInode(); err != nil {
   499  			return nil, err
   500  		}
   501  	}
   502  
   503  	if int(node.Number-1) >= len(w.inodes) {
   504  		w.inodes = append(w.inodes, node)
   505  	}
   506  	return node, nil
   507  }
   508  
   509  func (w *Writer) root() *inode {
   510  	return w.getInode(format.InodeRoot)
   511  }
   512  
   513  func (w *Writer) lookup(name string, mustExist bool) (*inode, *inode, string, error) {
   514  	root := w.root()
   515  	cleanname := path.Clean("/" + name)[1:]
   516  	if len(cleanname) == 0 {
   517  		return root, root, "", nil
   518  	}
   519  	dirname, childname := path.Split(cleanname)
   520  	if len(childname) == 0 || len(childname) > 0xff {
   521  		return nil, nil, "", fmt.Errorf("%s: invalid name", name)
   522  	}
   523  	dir := w.findPath(root, dirname)
   524  	if dir == nil || !dir.IsDir() {
   525  		return nil, nil, "", fmt.Errorf("%s: path not found", name)
   526  	}
   527  	child := dir.Children[childname]
   528  	if child == nil && mustExist {
   529  		return nil, nil, "", fmt.Errorf("%s: file not found", name)
   530  	}
   531  	return dir, child, childname, nil
   532  }
   533  
   534  // MakeParents ensures that all the parent directories in the path specified by `name` exists. If
   535  // they don't exist it creates them (like `mkdir -p`). These non existing parent directories are created
   536  // with the same permissions as that of it's parent directory. It is expected that the a
   537  // call to make these parent directories will be made at a later point with the correct
   538  // permissions, at that time the permissions of these directories will be updated.
   539  func (w *Writer) MakeParents(name string) error {
   540  	if err := w.finishInode(); err != nil {
   541  		return err
   542  	}
   543  
   544  	// go through the directories in the path one by one and create the
   545  	// parent directories if they don't exist.
   546  	cleanname := path.Clean("/" + name)[1:]
   547  	parentDirs, _ := path.Split(cleanname)
   548  	currentPath := ""
   549  	root := w.root()
   550  	dirname := ""
   551  	for parentDirs != "" {
   552  		dirname, parentDirs = splitFirst(parentDirs)
   553  		currentPath += "/" + dirname
   554  		if _, ok := root.Children[dirname]; !ok {
   555  			f := &File{
   556  				Mode:     root.Mode,
   557  				Atime:    time.Now(),
   558  				Mtime:    time.Now(),
   559  				Ctime:    time.Now(),
   560  				Crtime:   time.Now(),
   561  				Size:     0,
   562  				Uid:      root.Uid,
   563  				Gid:      root.Gid,
   564  				Devmajor: root.Devmajor,
   565  				Devminor: root.Devminor,
   566  				Xattrs:   make(map[string][]byte),
   567  			}
   568  			if err := w.Create(currentPath, f); err != nil {
   569  				return fmt.Errorf("failed while creating parent directories: %w", err)
   570  			}
   571  		}
   572  		root = root.Children[dirname]
   573  	}
   574  	return nil
   575  }
   576  
   577  // Create adds a file to the file system.
   578  func (w *Writer) Create(name string, f *File) error {
   579  	if err := w.finishInode(); err != nil {
   580  		return err
   581  	}
   582  	dir, existing, childname, err := w.lookup(name, false)
   583  	if err != nil {
   584  		return err
   585  	}
   586  	var reuse *inode
   587  	if existing != nil {
   588  		if existing.IsDir() {
   589  			if f.Mode&TypeMask != S_IFDIR {
   590  				return fmt.Errorf("%s: cannot replace a directory with a file", name)
   591  			}
   592  			reuse = existing
   593  		} else if f.Mode&TypeMask == S_IFDIR {
   594  			return fmt.Errorf("%s: cannot replace a file with a directory", name)
   595  		} else if existing.LinkCount < 2 {
   596  			reuse = existing
   597  		}
   598  	} else {
   599  		if f.Mode&TypeMask == S_IFDIR && dir.LinkCount >= format.MaxLinks {
   600  			return fmt.Errorf("%s: exceeded parent directory maximum link count", name)
   601  		}
   602  	}
   603  	child, err := w.makeInode(f, reuse)
   604  	if err != nil {
   605  		return fmt.Errorf("%s: %s", name, err)
   606  	}
   607  	if existing != child {
   608  		if existing != nil {
   609  			existing.LinkCount--
   610  		}
   611  		dir.Children[childname] = child
   612  		child.LinkCount++
   613  		if child.IsDir() {
   614  			dir.LinkCount++
   615  		}
   616  	}
   617  	if child.Mode&format.TypeMask == format.S_IFREG {
   618  		w.startInode(name, child, f.Size)
   619  	}
   620  	return nil
   621  }
   622  
   623  // Link adds a hard link to the file system.
   624  // We support creating hardlinks to symlinks themselves instead of what
   625  // the symlinks link to, as this is what containerd does upstream.
   626  func (w *Writer) Link(oldname, newname string) error {
   627  	if err := w.finishInode(); err != nil {
   628  		return err
   629  	}
   630  	newdir, existing, newchildname, err := w.lookup(newname, false)
   631  	if err != nil {
   632  		return err
   633  	}
   634  	if existing != nil && (existing.IsDir() || existing.LinkCount < 2) {
   635  		return fmt.Errorf("%s: cannot orphan existing file or directory", newname)
   636  	}
   637  
   638  	_, oldfile, _, err := w.lookup(oldname, true)
   639  	if err != nil {
   640  		return err
   641  	}
   642  	switch oldfile.Mode & format.TypeMask {
   643  	case format.S_IFDIR:
   644  		return fmt.Errorf("%s: link target cannot be a directory: %s", newname, oldname)
   645  	}
   646  
   647  	if existing != oldfile && oldfile.LinkCount >= format.MaxLinks {
   648  		return fmt.Errorf("%s: link target would exceed maximum link count: %s", newname, oldname)
   649  	}
   650  
   651  	if existing != nil {
   652  		existing.LinkCount--
   653  	}
   654  	oldfile.LinkCount++
   655  	newdir.Children[newchildname] = oldfile
   656  	return nil
   657  }
   658  
   659  // Stat returns information about a file that has been written.
   660  func (w *Writer) Stat(name string) (*File, error) {
   661  	if err := w.finishInode(); err != nil {
   662  		return nil, err
   663  	}
   664  	_, node, _, err := w.lookup(name, true)
   665  	if err != nil {
   666  		return nil, err
   667  	}
   668  	f := &File{
   669  		Size:     node.Size,
   670  		Mode:     node.Mode,
   671  		Uid:      node.Uid,
   672  		Gid:      node.Gid,
   673  		Atime:    fsTimeToTime(node.Atime),
   674  		Ctime:    fsTimeToTime(node.Ctime),
   675  		Mtime:    fsTimeToTime(node.Mtime),
   676  		Crtime:   fsTimeToTime(node.Crtime),
   677  		Devmajor: node.Devmajor,
   678  		Devminor: node.Devminor,
   679  	}
   680  	f.Xattrs = make(map[string][]byte)
   681  	if node.XattrBlock != 0 || len(node.XattrInline) != 0 {
   682  		if node.XattrBlock != 0 {
   683  			orig := w.block()
   684  			w.seekBlock(node.XattrBlock)
   685  			if w.err != nil {
   686  				return nil, w.err
   687  			}
   688  			var b [BlockSize]byte
   689  			_, err := w.f.Read(b[:])
   690  			w.seekBlock(orig)
   691  			if err != nil {
   692  				return nil, err
   693  			}
   694  			getXattrs(b[32:], f.Xattrs, 32)
   695  		}
   696  		if len(node.XattrInline) != 0 {
   697  			getXattrs(node.XattrInline[4:], f.Xattrs, 0)
   698  			delete(f.Xattrs, "system.data")
   699  		}
   700  	}
   701  	if node.FileType() == S_IFLNK {
   702  		if node.Size > smallSymlinkSize {
   703  			return nil, fmt.Errorf("%s: cannot retrieve link information", name)
   704  		}
   705  		f.Linkname = string(node.Data)
   706  	}
   707  	return f, nil
   708  }
   709  
   710  func (w *Writer) Write(b []byte) (int, error) {
   711  	if len(b) == 0 {
   712  		return 0, nil
   713  	}
   714  	if w.dataWritten+int64(len(b)) > w.dataMax {
   715  		return 0, fmt.Errorf("%s: wrote too much: %d > %d", w.curName, w.dataWritten+int64(len(b)), w.dataMax)
   716  	}
   717  
   718  	if w.curInode.Flags&format.InodeFlagInlineData != 0 {
   719  		copy(w.curInode.Data[w.dataWritten:], b)
   720  		w.dataWritten += int64(len(b))
   721  		return len(b), nil
   722  	}
   723  
   724  	n, err := w.write(b)
   725  	w.dataWritten += int64(n)
   726  	return n, err
   727  }
   728  
   729  func (w *Writer) startInode(name string, inode *inode, size int64) {
   730  	if w.curInode != nil {
   731  		panic("inode already in progress")
   732  	}
   733  	w.curName = name
   734  	w.curInode = inode
   735  	w.dataWritten = 0
   736  	w.dataMax = size
   737  }
   738  
   739  func (w *Writer) block() uint32 {
   740  	return uint32(w.pos / BlockSize)
   741  }
   742  
   743  func (w *Writer) seekBlock(block uint32) {
   744  	w.pos = int64(block) * BlockSize
   745  	if w.err != nil {
   746  		return
   747  	}
   748  	w.err = w.bw.Flush()
   749  	if w.err != nil {
   750  		return
   751  	}
   752  	_, w.err = w.f.Seek(w.pos, io.SeekStart)
   753  }
   754  
   755  func (w *Writer) nextBlock() {
   756  	if w.pos%BlockSize != 0 {
   757  		// Simplify callers; w.err is updated on failure.
   758  		_, _ = w.zero(BlockSize - w.pos%BlockSize)
   759  	}
   760  }
   761  
   762  func fillExtents(hdr *format.ExtentHeader, extents []format.ExtentLeafNode, startBlock, offset, inodeSize uint32) {
   763  	*hdr = format.ExtentHeader{
   764  		Magic:   format.ExtentHeaderMagic,
   765  		Entries: uint16(len(extents)),
   766  		Max:     uint16(cap(extents)),
   767  		Depth:   0,
   768  	}
   769  	for i := range extents {
   770  		block := offset + uint32(i)*maxBlocksPerExtent
   771  		length := inodeSize - block
   772  		if length > maxBlocksPerExtent {
   773  			length = maxBlocksPerExtent
   774  		}
   775  		start := startBlock + block
   776  		extents[i] = format.ExtentLeafNode{
   777  			Block:    block,
   778  			Length:   uint16(length),
   779  			StartLow: start,
   780  		}
   781  	}
   782  }
   783  
   784  func (w *Writer) writeExtents(inode *inode) error {
   785  	start := w.pos - w.dataWritten
   786  	if start%BlockSize != 0 {
   787  		panic("unaligned")
   788  	}
   789  	w.nextBlock()
   790  
   791  	startBlock := uint32(start / BlockSize)
   792  	blocks := w.block() - startBlock
   793  	usedBlocks := blocks
   794  
   795  	const extentNodeSize = 12
   796  	const extentsPerBlock = BlockSize/extentNodeSize - 1
   797  
   798  	extents := (blocks + maxBlocksPerExtent - 1) / maxBlocksPerExtent
   799  	var b bytes.Buffer
   800  	if extents == 0 {
   801  		// Nothing to do.
   802  	} else if extents <= 4 {
   803  		var root struct {
   804  			hdr     format.ExtentHeader
   805  			extents [4]format.ExtentLeafNode
   806  		}
   807  		fillExtents(&root.hdr, root.extents[:extents], startBlock, 0, blocks)
   808  		_ = binary.Write(&b, binary.LittleEndian, root)
   809  	} else if extents <= 4*extentsPerBlock {
   810  		const extentsPerBlock = BlockSize/extentNodeSize - 1
   811  		extentBlocks := extents/extentsPerBlock + 1
   812  		usedBlocks += extentBlocks
   813  		var b2 bytes.Buffer
   814  
   815  		var root struct {
   816  			hdr   format.ExtentHeader
   817  			nodes [4]format.ExtentIndexNode
   818  		}
   819  		root.hdr = format.ExtentHeader{
   820  			Magic:   format.ExtentHeaderMagic,
   821  			Entries: uint16(extentBlocks),
   822  			Max:     4,
   823  			Depth:   1,
   824  		}
   825  		for i := uint32(0); i < extentBlocks; i++ {
   826  			root.nodes[i] = format.ExtentIndexNode{
   827  				Block:   i * extentsPerBlock * maxBlocksPerExtent,
   828  				LeafLow: w.block(),
   829  			}
   830  			extentsInBlock := extents - i*extentBlocks
   831  			if extentsInBlock > extentsPerBlock {
   832  				extentsInBlock = extentsPerBlock
   833  			}
   834  
   835  			var node struct {
   836  				hdr     format.ExtentHeader
   837  				extents [extentsPerBlock]format.ExtentLeafNode
   838  				_       [BlockSize - (extentsPerBlock+1)*extentNodeSize]byte
   839  			}
   840  
   841  			offset := i * extentsPerBlock * maxBlocksPerExtent
   842  			fillExtents(&node.hdr, node.extents[:extentsInBlock], startBlock+offset, offset, blocks)
   843  			_ = binary.Write(&b2, binary.LittleEndian, node)
   844  			if _, err := w.write(b2.Next(BlockSize)); err != nil {
   845  				return err
   846  			}
   847  		}
   848  		_ = binary.Write(&b, binary.LittleEndian, root)
   849  	} else {
   850  		panic("file too big")
   851  	}
   852  
   853  	inode.Data = b.Bytes()
   854  	inode.Flags |= format.InodeFlagExtents
   855  	inode.BlockCount += usedBlocks
   856  	return w.err
   857  }
   858  
   859  func (w *Writer) finishInode() error {
   860  	if !w.initialized {
   861  		if err := w.init(); err != nil {
   862  			return err
   863  		}
   864  	}
   865  	if w.curInode == nil {
   866  		return nil
   867  	}
   868  	if w.dataWritten != w.dataMax {
   869  		return fmt.Errorf("did not write the right amount: %d != %d", w.dataWritten, w.dataMax)
   870  	}
   871  
   872  	if w.dataMax != 0 && w.curInode.Flags&format.InodeFlagInlineData == 0 {
   873  		if err := w.writeExtents(w.curInode); err != nil {
   874  			return err
   875  		}
   876  	}
   877  
   878  	w.dataWritten = 0
   879  	w.dataMax = 0
   880  	w.curInode = nil
   881  	return w.err
   882  }
   883  
   884  func modeToFileType(mode uint16) format.FileType {
   885  	switch mode & format.TypeMask {
   886  	default:
   887  		return format.FileTypeUnknown
   888  	case format.S_IFREG:
   889  		return format.FileTypeRegular
   890  	case format.S_IFDIR:
   891  		return format.FileTypeDirectory
   892  	case format.S_IFCHR:
   893  		return format.FileTypeCharacter
   894  	case format.S_IFBLK:
   895  		return format.FileTypeBlock
   896  	case format.S_IFIFO:
   897  		return format.FileTypeFIFO
   898  	case format.S_IFSOCK:
   899  		return format.FileTypeSocket
   900  	case format.S_IFLNK:
   901  		return format.FileTypeSymbolicLink
   902  	}
   903  }
   904  
   905  type constReader byte
   906  
   907  var zero = constReader(0)
   908  
   909  func (r constReader) Read(b []byte) (int, error) {
   910  	for i := range b {
   911  		b[i] = byte(r)
   912  	}
   913  	return len(b), nil
   914  }
   915  
   916  func (w *Writer) writeDirectory(dir, parent *inode) error {
   917  	if err := w.finishInode(); err != nil {
   918  		return err
   919  	}
   920  
   921  	// The size of the directory is not known yet.
   922  	w.startInode("", dir, 0x7fffffffffffffff)
   923  	left := BlockSize
   924  	finishBlock := func() error {
   925  		if left > 0 {
   926  			e := format.DirectoryEntry{
   927  				RecordLength: uint16(left),
   928  			}
   929  			err := binary.Write(w, binary.LittleEndian, e)
   930  			if err != nil {
   931  				return err
   932  			}
   933  			left -= directoryEntrySize
   934  			if left < 4 {
   935  				panic("not enough space for trailing entry")
   936  			}
   937  			_, err = io.CopyN(w, zero, int64(left))
   938  			if err != nil {
   939  				return err
   940  			}
   941  		}
   942  		left = BlockSize
   943  		return nil
   944  	}
   945  
   946  	writeEntry := func(ino format.InodeNumber, name string) error {
   947  		rlb := directoryEntrySize + len(name)
   948  		rl := (rlb + 3) & ^3
   949  		if left < rl+12 {
   950  			if err := finishBlock(); err != nil {
   951  				return err
   952  			}
   953  		}
   954  		e := format.DirectoryEntry{
   955  			Inode:        ino,
   956  			RecordLength: uint16(rl),
   957  			NameLength:   uint8(len(name)),
   958  			FileType:     modeToFileType(w.getInode(ino).Mode),
   959  		}
   960  		err := binary.Write(w, binary.LittleEndian, e)
   961  		if err != nil {
   962  			return err
   963  		}
   964  		_, err = w.Write([]byte(name))
   965  		if err != nil {
   966  			return err
   967  		}
   968  		var zero [4]byte
   969  		_, err = w.Write(zero[:rl-rlb])
   970  		if err != nil {
   971  			return err
   972  		}
   973  		left -= rl
   974  		return nil
   975  	}
   976  	if err := writeEntry(dir.Number, "."); err != nil {
   977  		return err
   978  	}
   979  	if err := writeEntry(parent.Number, ".."); err != nil {
   980  		return err
   981  	}
   982  
   983  	// Follow e2fsck's convention and sort the children by inode number.
   984  	var children []string
   985  	for name := range dir.Children {
   986  		children = append(children, name)
   987  	}
   988  	sort.Slice(children, func(i, j int) bool {
   989  		left_num := dir.Children[children[i]].Number
   990  		right_num := dir.Children[children[j]].Number
   991  
   992  		if left_num == right_num {
   993  			return children[i] < children[j]
   994  		}
   995  		return left_num < right_num
   996  	})
   997  
   998  	for _, name := range children {
   999  		child := dir.Children[name]
  1000  		if err := writeEntry(child.Number, name); err != nil {
  1001  			return err
  1002  		}
  1003  	}
  1004  	if err := finishBlock(); err != nil {
  1005  		return err
  1006  	}
  1007  	w.curInode.Size = w.dataWritten
  1008  	w.dataMax = w.dataWritten
  1009  	return nil
  1010  }
  1011  
  1012  func (w *Writer) writeDirectoryRecursive(dir, parent *inode) error {
  1013  	if err := w.writeDirectory(dir, parent); err != nil {
  1014  		return err
  1015  	}
  1016  
  1017  	// Follow e2fsck's convention and sort the children by inode number.
  1018  	var children []string
  1019  	for name := range dir.Children {
  1020  		children = append(children, name)
  1021  	}
  1022  	sort.Slice(children, func(i, j int) bool {
  1023  		left_num := dir.Children[children[i]].Number
  1024  		right_num := dir.Children[children[j]].Number
  1025  
  1026  		if left_num == right_num {
  1027  			return children[i] < children[j]
  1028  		}
  1029  		return left_num < right_num
  1030  	})
  1031  
  1032  	for _, name := range children {
  1033  		child := dir.Children[name]
  1034  		if child.IsDir() {
  1035  			if err := w.writeDirectoryRecursive(child, dir); err != nil {
  1036  				return err
  1037  			}
  1038  		}
  1039  	}
  1040  	return nil
  1041  }
  1042  
  1043  func (w *Writer) writeInodeTable(tableSize uint32) error {
  1044  	var b bytes.Buffer
  1045  	for _, inode := range w.inodes {
  1046  		if inode != nil {
  1047  			binode := format.Inode{
  1048  				Mode:          inode.Mode,
  1049  				Uid:           uint16(inode.Uid & 0xffff),
  1050  				Gid:           uint16(inode.Gid & 0xffff),
  1051  				SizeLow:       uint32(inode.Size & 0xffffffff),
  1052  				SizeHigh:      uint32(inode.Size >> 32),
  1053  				LinksCount:    uint16(inode.LinkCount),
  1054  				BlocksLow:     inode.BlockCount,
  1055  				Flags:         inode.Flags,
  1056  				XattrBlockLow: inode.XattrBlock,
  1057  				UidHigh:       uint16(inode.Uid >> 16),
  1058  				GidHigh:       uint16(inode.Gid >> 16),
  1059  				ExtraIsize:    uint16(inodeUsedSize - 128),
  1060  				Atime:         uint32(inode.Atime),
  1061  				AtimeExtra:    uint32(inode.Atime >> 32),
  1062  				Ctime:         uint32(inode.Ctime),
  1063  				CtimeExtra:    uint32(inode.Ctime >> 32),
  1064  				Mtime:         uint32(inode.Mtime),
  1065  				MtimeExtra:    uint32(inode.Mtime >> 32),
  1066  				Crtime:        uint32(inode.Crtime),
  1067  				CrtimeExtra:   uint32(inode.Crtime >> 32),
  1068  			}
  1069  			switch inode.Mode & format.TypeMask {
  1070  			case format.S_IFDIR, format.S_IFREG, format.S_IFLNK:
  1071  				n := copy(binode.Block[:], inode.Data)
  1072  				if n < len(inode.Data) {
  1073  					// Rewrite the first xattr with the data.
  1074  					xattr := [1]xattr{{
  1075  						Name:  "data",
  1076  						Index: 7, // "system."
  1077  						Value: inode.Data[n:],
  1078  					}}
  1079  					putXattrs(xattr[:], inode.XattrInline[4:], 0)
  1080  				}
  1081  			case format.S_IFBLK, format.S_IFCHR:
  1082  				dev := inode.Devminor&0xff | inode.Devmajor<<8 | (inode.Devminor&0xffffff00)<<12
  1083  				binary.LittleEndian.PutUint32(binode.Block[4:], dev)
  1084  			}
  1085  
  1086  			_ = binary.Write(&b, binary.LittleEndian, binode)
  1087  			b.Truncate(inodeUsedSize)
  1088  			n, _ := b.Write(inode.XattrInline)
  1089  			_, _ = io.CopyN(&b, zero, int64(inodeExtraSize-n))
  1090  		} else {
  1091  			_, _ = io.CopyN(&b, zero, inodeSize)
  1092  		}
  1093  		if _, err := w.write(b.Next(inodeSize)); err != nil {
  1094  			return err
  1095  		}
  1096  	}
  1097  	rest := tableSize - uint32(len(w.inodes)*inodeSize)
  1098  	if _, err := w.zero(int64(rest)); err != nil {
  1099  		return err
  1100  	}
  1101  	return nil
  1102  }
  1103  
  1104  // NewWriter returns a Writer that writes an ext4 file system to the provided
  1105  // WriteSeeker.
  1106  func NewWriter(f io.ReadWriteSeeker, opts ...Option) *Writer {
  1107  	w := &Writer{
  1108  		f:           f,
  1109  		bw:          bufio.NewWriterSize(f, 65536*8),
  1110  		maxDiskSize: defaultMaxDiskSize,
  1111  	}
  1112  	for _, opt := range opts {
  1113  		opt(w)
  1114  	}
  1115  	return w
  1116  }
  1117  
  1118  // An Option provides extra options to NewWriter.
  1119  type Option func(*Writer)
  1120  
  1121  // InlineData instructs the Writer to write small files into the inode
  1122  // structures directly. This creates smaller images but currently is not
  1123  // compatible with DAX.
  1124  func InlineData(w *Writer) {
  1125  	w.supportInlineData = true
  1126  }
  1127  
  1128  // MaximumDiskSize instructs the writer to reserve enough metadata space for the
  1129  // specified disk size. If not provided, then 16GB is the default.
  1130  func MaximumDiskSize(size int64) Option {
  1131  	return func(w *Writer) {
  1132  		if size < 0 || size > maxMaxDiskSize {
  1133  			w.maxDiskSize = maxMaxDiskSize
  1134  		} else if size == 0 {
  1135  			w.maxDiskSize = defaultMaxDiskSize
  1136  		} else {
  1137  			w.maxDiskSize = (size + BlockSize - 1) &^ (BlockSize - 1)
  1138  		}
  1139  	}
  1140  }
  1141  
  1142  func (w *Writer) init() error {
  1143  	// Skip the defective block inode.
  1144  	w.inodes = make([]*inode, 1, 32)
  1145  	// Create the root directory.
  1146  	root, _ := w.makeInode(&File{
  1147  		Mode: format.S_IFDIR | 0755,
  1148  	}, nil)
  1149  	root.LinkCount++ // The root is linked to itself.
  1150  	// Skip until the first non-reserved inode.
  1151  	w.inodes = append(w.inodes, make([]*inode, inodeFirst-len(w.inodes)-1)...)
  1152  	maxBlocks := (w.maxDiskSize-1)/BlockSize + 1
  1153  	maxGroups := (maxBlocks-1)/blocksPerGroup + 1
  1154  	w.gdBlocks = uint32((maxGroups-1)/groupsPerDescriptorBlock + 1)
  1155  
  1156  	// Skip past the superblock and block descriptor table.
  1157  	w.seekBlock(1 + w.gdBlocks)
  1158  	w.initialized = true
  1159  
  1160  	// The lost+found directory is required to exist for e2fsck to pass.
  1161  	if err := w.Create("lost+found", &File{Mode: format.S_IFDIR | 0700}); err != nil {
  1162  		return err
  1163  	}
  1164  	return w.err
  1165  }
  1166  
  1167  func groupCount(blocks uint32, inodes uint32, inodesPerGroup uint32) uint32 {
  1168  	inodeBlocksPerGroup := inodesPerGroup * inodeSize / BlockSize
  1169  	dataBlocksPerGroup := blocksPerGroup - inodeBlocksPerGroup - 2 // save room for the bitmaps
  1170  
  1171  	// Increase the block count to ensure there are enough groups for all the
  1172  	// inodes.
  1173  	minBlocks := (inodes-1)/inodesPerGroup*dataBlocksPerGroup + 1
  1174  	if blocks < minBlocks {
  1175  		blocks = minBlocks
  1176  	}
  1177  
  1178  	return (blocks + dataBlocksPerGroup - 1) / dataBlocksPerGroup
  1179  }
  1180  
  1181  func bestGroupCount(blocks uint32, inodes uint32) (groups uint32, inodesPerGroup uint32) {
  1182  	groups = 0xffffffff
  1183  	for ipg := uint32(inodesPerGroupIncrement); ipg <= maxInodesPerGroup; ipg += inodesPerGroupIncrement {
  1184  		g := groupCount(blocks, inodes, ipg)
  1185  		if g < groups {
  1186  			groups = g
  1187  			inodesPerGroup = ipg
  1188  		}
  1189  	}
  1190  	return
  1191  }
  1192  
  1193  func (w *Writer) Close() error {
  1194  	if err := w.finishInode(); err != nil {
  1195  		return err
  1196  	}
  1197  	root := w.root()
  1198  	if err := w.writeDirectoryRecursive(root, root); err != nil {
  1199  		return err
  1200  	}
  1201  	// Finish the last inode (probably a directory).
  1202  	if err := w.finishInode(); err != nil {
  1203  		return err
  1204  	}
  1205  
  1206  	// Write the inode table
  1207  	inodeTableOffset := w.block()
  1208  	groups, inodesPerGroup := bestGroupCount(inodeTableOffset, uint32(len(w.inodes)))
  1209  	err := w.writeInodeTable(groups * inodesPerGroup * inodeSize)
  1210  	if err != nil {
  1211  		return err
  1212  	}
  1213  
  1214  	// Write the bitmaps.
  1215  	bitmapOffset := w.block()
  1216  	bitmapSize := groups * 2
  1217  	validDataSize := bitmapOffset + bitmapSize
  1218  	diskSize := validDataSize
  1219  	minSize := (groups-1)*blocksPerGroup + 1
  1220  	if diskSize < minSize {
  1221  		diskSize = minSize
  1222  	}
  1223  
  1224  	usedGdBlocks := (groups-1)/groupsPerDescriptorBlock + 1
  1225  	if usedGdBlocks > w.gdBlocks {
  1226  		return exceededMaxSizeError{w.maxDiskSize}
  1227  	}
  1228  
  1229  	gds := make([]format.GroupDescriptor, w.gdBlocks*groupsPerDescriptorBlock)
  1230  	inodeTableSizePerGroup := inodesPerGroup * inodeSize / BlockSize
  1231  	var totalUsedBlocks, totalUsedInodes uint32
  1232  	for g := uint32(0); g < groups; g++ {
  1233  		var b [BlockSize * 2]byte
  1234  		var dirCount, usedInodeCount, usedBlockCount uint16
  1235  
  1236  		// Block bitmap
  1237  		if (g+1)*blocksPerGroup <= validDataSize {
  1238  			// This group is fully allocated.
  1239  			for j := range b[:BlockSize] {
  1240  				b[j] = 0xff
  1241  			}
  1242  			usedBlockCount = blocksPerGroup
  1243  		} else if g*blocksPerGroup < validDataSize {
  1244  			for j := uint32(0); j < validDataSize-g*blocksPerGroup; j++ {
  1245  				b[j/8] |= 1 << (j % 8)
  1246  				usedBlockCount++
  1247  			}
  1248  		}
  1249  		if g == 0 {
  1250  			// Unused group descriptor blocks should be cleared.
  1251  			for j := 1 + usedGdBlocks; j < 1+w.gdBlocks; j++ {
  1252  				b[j/8] &^= 1 << (j % 8)
  1253  				usedBlockCount--
  1254  			}
  1255  		}
  1256  		if g == groups-1 && diskSize%blocksPerGroup != 0 {
  1257  			// Blocks that aren't present in the disk should be marked as
  1258  			// allocated.
  1259  			for j := diskSize % blocksPerGroup; j < blocksPerGroup; j++ {
  1260  				b[j/8] |= 1 << (j % 8)
  1261  				usedBlockCount++
  1262  			}
  1263  		}
  1264  		// Inode bitmap
  1265  		for j := uint32(0); j < inodesPerGroup; j++ {
  1266  			ino := format.InodeNumber(1 + g*inodesPerGroup + j)
  1267  			inode := w.getInode(ino)
  1268  			if ino < inodeFirst || inode != nil {
  1269  				b[BlockSize+j/8] |= 1 << (j % 8)
  1270  				usedInodeCount++
  1271  			}
  1272  			if inode != nil && inode.Mode&format.TypeMask == format.S_IFDIR {
  1273  				dirCount++
  1274  			}
  1275  		}
  1276  		_, err := w.write(b[:])
  1277  		if err != nil {
  1278  			return err
  1279  		}
  1280  		gds[g] = format.GroupDescriptor{
  1281  			BlockBitmapLow:     bitmapOffset + 2*g,
  1282  			InodeBitmapLow:     bitmapOffset + 2*g + 1,
  1283  			InodeTableLow:      inodeTableOffset + g*inodeTableSizePerGroup,
  1284  			UsedDirsCountLow:   dirCount,
  1285  			FreeInodesCountLow: uint16(inodesPerGroup) - usedInodeCount,
  1286  			FreeBlocksCountLow: blocksPerGroup - usedBlockCount,
  1287  		}
  1288  
  1289  		totalUsedBlocks += uint32(usedBlockCount)
  1290  		totalUsedInodes += uint32(usedInodeCount)
  1291  	}
  1292  
  1293  	// Zero up to the disk size.
  1294  	_, err = w.zero(int64(diskSize-bitmapOffset-bitmapSize) * BlockSize)
  1295  	if err != nil {
  1296  		return err
  1297  	}
  1298  
  1299  	// Write the block descriptors
  1300  	w.seekBlock(1)
  1301  	if w.err != nil {
  1302  		return w.err
  1303  	}
  1304  	err = binary.Write(w.bw, binary.LittleEndian, gds)
  1305  	if err != nil {
  1306  		return err
  1307  	}
  1308  
  1309  	// Write the super block
  1310  	var blk [BlockSize]byte
  1311  	b := bytes.NewBuffer(blk[:1024])
  1312  	sb := &format.SuperBlock{
  1313  		InodesCount:        inodesPerGroup * groups,
  1314  		BlocksCountLow:     diskSize,
  1315  		FreeBlocksCountLow: blocksPerGroup*groups - totalUsedBlocks,
  1316  		FreeInodesCount:    inodesPerGroup*groups - totalUsedInodes,
  1317  		FirstDataBlock:     0,
  1318  		LogBlockSize:       2, // 2^(10 + 2)
  1319  		LogClusterSize:     2,
  1320  		BlocksPerGroup:     blocksPerGroup,
  1321  		ClustersPerGroup:   blocksPerGroup,
  1322  		InodesPerGroup:     inodesPerGroup,
  1323  		Magic:              format.SuperBlockMagic,
  1324  		State:              1, // cleanly unmounted
  1325  		Errors:             1, // continue on error?
  1326  		CreatorOS:          0, // Linux
  1327  		RevisionLevel:      1, // dynamic inode sizes
  1328  		FirstInode:         inodeFirst,
  1329  		LpfInode:           inodeLostAndFound,
  1330  		InodeSize:          inodeSize,
  1331  		FeatureCompat:      format.CompatSparseSuper2 | format.CompatExtAttr,
  1332  		FeatureIncompat:    format.IncompatFiletype | format.IncompatExtents | format.IncompatFlexBg,
  1333  		FeatureRoCompat:    format.RoCompatLargeFile | format.RoCompatHugeFile | format.RoCompatExtraIsize | format.RoCompatReadonly,
  1334  		MinExtraIsize:      extraIsize,
  1335  		WantExtraIsize:     extraIsize,
  1336  		LogGroupsPerFlex:   31,
  1337  	}
  1338  	if w.supportInlineData {
  1339  		sb.FeatureIncompat |= format.IncompatInlineData
  1340  	}
  1341  	_ = binary.Write(b, binary.LittleEndian, sb)
  1342  	w.seekBlock(0)
  1343  	if _, err := w.write(blk[:]); err != nil {
  1344  		return err
  1345  	}
  1346  	w.seekBlock(diskSize)
  1347  	return w.err
  1348  }
  1349  

View as plain text