1 package compactext4
2
3 import (
4 "bufio"
5 "bytes"
6 "encoding/binary"
7 "errors"
8 "fmt"
9 "io"
10 "path"
11 "sort"
12 "strings"
13 "time"
14
15 "github.com/Microsoft/hcsshim/ext4/internal/format"
16 "github.com/Microsoft/hcsshim/internal/memory"
17 )
18
19
20 type Writer struct {
21 f io.ReadWriteSeeker
22 bw *bufio.Writer
23 inodes []*inode
24 curName string
25 curInode *inode
26 pos int64
27 dataWritten, dataMax int64
28 err error
29 initialized bool
30 supportInlineData bool
31 maxDiskSize int64
32 gdBlocks uint32
33 }
34
35
36 const (
37 S_IXOTH = format.S_IXOTH
38 S_IWOTH = format.S_IWOTH
39 S_IROTH = format.S_IROTH
40 S_IXGRP = format.S_IXGRP
41 S_IWGRP = format.S_IWGRP
42 S_IRGRP = format.S_IRGRP
43 S_IXUSR = format.S_IXUSR
44 S_IWUSR = format.S_IWUSR
45 S_IRUSR = format.S_IRUSR
46 S_ISVTX = format.S_ISVTX
47 S_ISGID = format.S_ISGID
48 S_ISUID = format.S_ISUID
49 S_IFIFO = format.S_IFIFO
50 S_IFCHR = format.S_IFCHR
51 S_IFDIR = format.S_IFDIR
52 S_IFBLK = format.S_IFBLK
53 S_IFREG = format.S_IFREG
54 S_IFLNK = format.S_IFLNK
55 S_IFSOCK = format.S_IFSOCK
56
57 TypeMask = format.TypeMask
58 )
59
60 type inode struct {
61 Size int64
62 Atime, Ctime, Mtime, Crtime uint64
63 Number format.InodeNumber
64 Mode uint16
65 Uid, Gid uint32
66 LinkCount uint32
67 XattrBlock uint32
68 BlockCount uint32
69 Devmajor, Devminor uint32
70 Flags format.InodeFlag
71 Data []byte
72 XattrInline []byte
73 Children directory
74 }
75
76 func (node *inode) FileType() uint16 {
77 return node.Mode & format.TypeMask
78 }
79
80 func (node *inode) IsDir() bool {
81 return node.FileType() == S_IFDIR
82 }
83
84
85 type File struct {
86 Linkname string
87 Size int64
88 Mode uint16
89 Uid, Gid uint32
90 Atime, Ctime, Mtime, Crtime time.Time
91 Devmajor, Devminor uint32
92 Xattrs map[string][]byte
93 }
94
95 const (
96 inodeFirst = 11
97 inodeLostAndFound = inodeFirst
98
99 BlockSize = 4096
100 blocksPerGroup = BlockSize * 8
101 inodeSize = 256
102 maxInodesPerGroup = BlockSize * 8
103 inodesPerGroupIncrement = BlockSize / inodeSize
104
105 defaultMaxDiskSize = 16 * memory.GiB
106 maxMaxDiskSize = 16 * 1024 * 1024 * 1024 * 1024
107
108 groupDescriptorSize = 32
109 groupsPerDescriptorBlock = BlockSize / groupDescriptorSize
110
111 maxFileSize = 128 * memory.GiB
112 smallSymlinkSize = 59
113 maxBlocksPerExtent = 0x8000
114 inodeDataSize = 60
115 inodeUsedSize = 152
116 inodeExtraSize = inodeSize - inodeUsedSize
117 xattrInodeOverhead = 4 + 4
118 xattrBlockOverhead = 32 + 4
119 inlineDataXattrOverhead = xattrInodeOverhead + 16 + 4
120 inlineDataSize = inodeDataSize + inodeExtraSize - inlineDataXattrOverhead
121 )
122
123 type exceededMaxSizeError struct {
124 Size int64
125 }
126
127 func (err exceededMaxSizeError) Error() string {
128 return fmt.Sprintf("disk exceeded maximum size of %d bytes", err.Size)
129 }
130
131 var directoryEntrySize = binary.Size(format.DirectoryEntry{})
132 var extraIsize = uint16(inodeUsedSize - 128)
133
134 type directory map[string]*inode
135
136 func splitFirst(p string) (string, string) {
137 n := strings.IndexByte(p, '/')
138 if n >= 0 {
139 return p[:n], p[n+1:]
140 }
141 return p, ""
142 }
143
144 func (w *Writer) findPath(root *inode, p string) *inode {
145 inode := root
146 for inode != nil && len(p) != 0 {
147 name, rest := splitFirst(p)
148 p = rest
149 inode = inode.Children[name]
150 }
151 return inode
152 }
153
154 func timeToFsTime(t time.Time) uint64 {
155 if t.IsZero() {
156 return 0
157 }
158 s := t.Unix()
159 if s < -0x80000000 {
160 return 0x80000000
161 }
162 if s > 0x37fffffff {
163 return 0x37fffffff
164 }
165 return uint64(s) | uint64(t.Nanosecond())<<34
166 }
167
168 func fsTimeToTime(t uint64) time.Time {
169 if t == 0 {
170 return time.Time{}
171 }
172 s := int64(t & 0x3ffffffff)
173 if s > 0x7fffffff && s < 0x100000000 {
174 s = int64(int32(uint32(s)))
175 }
176 return time.Unix(s, int64(t>>34))
177 }
178
179 func (w *Writer) getInode(i format.InodeNumber) *inode {
180 if i == 0 || int(i) > len(w.inodes) {
181 return nil
182 }
183 return w.inodes[i-1]
184 }
185
186 var xattrPrefixes = []struct {
187 Index uint8
188 Prefix string
189 }{
190 {2, "system.posix_acl_access"},
191 {3, "system.posix_acl_default"},
192 {8, "system.richacl"},
193 {7, "system."},
194 {1, "user."},
195 {4, "trusted."},
196 {6, "security."},
197 }
198
199 func compressXattrName(name string) (uint8, string) {
200 for _, p := range xattrPrefixes {
201 if strings.HasPrefix(name, p.Prefix) {
202 return p.Index, name[len(p.Prefix):]
203 }
204 }
205 return 0, name
206 }
207
208 func decompressXattrName(index uint8, name string) string {
209 for _, p := range xattrPrefixes {
210 if index == p.Index {
211 return p.Prefix + name
212 }
213 }
214 return name
215 }
216
217 func hashXattrEntry(name string, value []byte) uint32 {
218 var hash uint32
219 for i := 0; i < len(name); i++ {
220 hash = (hash << 5) ^ (hash >> 27) ^ uint32(name[i])
221 }
222
223 for i := 0; i+3 < len(value); i += 4 {
224 hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(value[i:i+4])
225 }
226
227 if len(value)%4 != 0 {
228 var last [4]byte
229 copy(last[:], value[len(value)&^3:])
230 hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(last[:])
231 }
232 return hash
233 }
234
235 type xattr struct {
236 Name string
237 Index uint8
238 Value []byte
239 }
240
241 func (x *xattr) EntryLen() int {
242 return (len(x.Name)+3)&^3 + 16
243 }
244
245 func (x *xattr) ValueLen() int {
246 return (len(x.Value) + 3) &^ 3
247 }
248
249 type xattrState struct {
250 inode, block []xattr
251 inodeLeft, blockLeft int
252 }
253
254 func (s *xattrState) init() {
255 s.inodeLeft = inodeExtraSize - xattrInodeOverhead
256 s.blockLeft = BlockSize - xattrBlockOverhead
257 }
258
259 func (s *xattrState) addXattr(name string, value []byte) bool {
260 index, name := compressXattrName(name)
261 x := xattr{
262 Index: index,
263 Name: name,
264 Value: value,
265 }
266 length := x.EntryLen() + x.ValueLen()
267 if s.inodeLeft >= length {
268 s.inode = append(s.inode, x)
269 s.inodeLeft -= length
270 } else if s.blockLeft >= length {
271 s.block = append(s.block, x)
272 s.blockLeft -= length
273 } else {
274 return false
275 }
276 return true
277 }
278
279 func putXattrs(xattrs []xattr, b []byte, offsetDelta uint16) {
280 offset := uint16(len(b)) + offsetDelta
281 eb := b
282 db := b
283 for _, xattr := range xattrs {
284 vl := xattr.ValueLen()
285 offset -= uint16(vl)
286 eb[0] = uint8(len(xattr.Name))
287 eb[1] = xattr.Index
288 binary.LittleEndian.PutUint16(eb[2:], offset)
289 binary.LittleEndian.PutUint32(eb[8:], uint32(len(xattr.Value)))
290 binary.LittleEndian.PutUint32(eb[12:], hashXattrEntry(xattr.Name, xattr.Value))
291 copy(eb[16:], xattr.Name)
292 eb = eb[xattr.EntryLen():]
293 copy(db[len(db)-vl:], xattr.Value)
294 db = db[:len(db)-vl]
295 }
296 }
297
298 func getXattrs(b []byte, xattrs map[string][]byte, offsetDelta uint16) {
299 eb := b
300 for len(eb) != 0 {
301 nameLen := eb[0]
302 if nameLen == 0 {
303 break
304 }
305 index := eb[1]
306 offset := binary.LittleEndian.Uint16(eb[2:]) - offsetDelta
307 valueLen := binary.LittleEndian.Uint32(eb[8:])
308 attr := xattr{
309 Index: index,
310 Name: string(eb[16 : 16+nameLen]),
311 Value: b[offset : uint32(offset)+valueLen],
312 }
313 xattrs[decompressXattrName(index, attr.Name)] = attr.Value
314 eb = eb[attr.EntryLen():]
315 }
316 }
317
318 func (w *Writer) writeXattrs(inode *inode, state *xattrState) error {
319
320 if len(state.inode) != 0 {
321 inode.XattrInline = make([]byte, inodeExtraSize)
322 binary.LittleEndian.PutUint32(inode.XattrInline[0:], format.XAttrHeaderMagic)
323 putXattrs(state.inode, inode.XattrInline[4:], 0)
324 }
325
326
327
328 if len(state.block) != 0 || inode.XattrBlock != 0 {
329 sort.Slice(state.block, func(i, j int) bool {
330 return state.block[i].Index < state.block[j].Index ||
331 len(state.block[i].Name) < len(state.block[j].Name) ||
332 state.block[i].Name < state.block[j].Name
333 })
334
335 var b [BlockSize]byte
336 binary.LittleEndian.PutUint32(b[0:], format.XAttrHeaderMagic)
337 binary.LittleEndian.PutUint32(b[4:], 1)
338 binary.LittleEndian.PutUint32(b[8:], 1)
339 putXattrs(state.block, b[32:], 32)
340
341 orig := w.block()
342 if inode.XattrBlock == 0 {
343 inode.XattrBlock = orig
344 inode.BlockCount++
345 } else {
346
347 w.seekBlock(inode.XattrBlock)
348 defer w.seekBlock(orig)
349 }
350
351 if _, err := w.write(b[:]); err != nil {
352 return err
353 }
354 }
355
356 return nil
357 }
358
359 func (w *Writer) write(b []byte) (int, error) {
360 if w.err != nil {
361 return 0, w.err
362 }
363 if w.pos+int64(len(b)) > w.maxDiskSize {
364 w.err = exceededMaxSizeError{w.maxDiskSize}
365 return 0, w.err
366 }
367 n, err := w.bw.Write(b)
368 w.pos += int64(n)
369 w.err = err
370 return n, err
371 }
372
373 func (w *Writer) zero(n int64) (int64, error) {
374 if w.err != nil {
375 return 0, w.err
376 }
377 if w.pos+int64(n) > w.maxDiskSize {
378 w.err = exceededMaxSizeError{w.maxDiskSize}
379 return 0, w.err
380 }
381 n, err := io.CopyN(w.bw, zero, n)
382 w.pos += n
383 w.err = err
384 return n, err
385 }
386
387 func (w *Writer) makeInode(f *File, node *inode) (*inode, error) {
388 mode := f.Mode
389 if mode&format.TypeMask == 0 {
390 mode |= format.S_IFREG
391 }
392 typ := mode & format.TypeMask
393 ino := format.InodeNumber(len(w.inodes) + 1)
394 if node == nil {
395 node = &inode{
396 Number: ino,
397 }
398 if typ == S_IFDIR {
399 node.Children = make(directory)
400 node.LinkCount = 1
401 }
402 } else if node.Flags&format.InodeFlagExtents != 0 {
403
404
405 return nil, errors.New("cannot overwrite file with non-inline data")
406 }
407 node.Mode = mode
408 node.Uid = f.Uid
409 node.Gid = f.Gid
410 node.Flags = format.InodeFlagHugeFile
411 node.Atime = timeToFsTime(f.Atime)
412 node.Ctime = timeToFsTime(f.Ctime)
413 node.Mtime = timeToFsTime(f.Mtime)
414 node.Crtime = timeToFsTime(f.Crtime)
415 node.Devmajor = f.Devmajor
416 node.Devminor = f.Devminor
417 node.Data = nil
418 if f.Xattrs == nil {
419 f.Xattrs = make(map[string][]byte)
420 }
421
422
423 existingXattrs := make(map[string][]byte)
424 if len(node.XattrInline) > 0 {
425 getXattrs(node.XattrInline[4:], existingXattrs, 0)
426 }
427 node.XattrInline = nil
428
429 var xstate xattrState
430 xstate.init()
431
432 var size int64
433 switch typ {
434 case format.S_IFREG:
435 size = f.Size
436 if f.Size > maxFileSize {
437 return nil, fmt.Errorf("file too big: %d > %d", f.Size, int64(maxFileSize))
438 }
439 if f.Size <= inlineDataSize && w.supportInlineData {
440 node.Data = make([]byte, f.Size)
441 extra := 0
442 if f.Size > inodeDataSize {
443 extra = int(f.Size - inodeDataSize)
444 }
445
446 if !xstate.addXattr("system.data", node.Data[:extra]) {
447 panic("not enough room for inline data")
448 }
449 node.Flags |= format.InodeFlagInlineData
450 }
451 case format.S_IFLNK:
452 node.Mode |= 0777
453 size = int64(len(f.Linkname))
454 if size <= smallSymlinkSize {
455
456
457 node.Data = make([]byte, len(f.Linkname))
458 copy(node.Data, f.Linkname)
459 }
460 case format.S_IFDIR, format.S_IFIFO, format.S_IFSOCK, format.S_IFCHR, format.S_IFBLK:
461 default:
462 return nil, fmt.Errorf("invalid mode %o", mode)
463 }
464
465
466 for name, data := range existingXattrs {
467 if _, ok := f.Xattrs[name]; !ok {
468 f.Xattrs[name] = data
469 }
470 }
471
472
473 if len(f.Xattrs) != 0 {
474
475 var xattrs []string
476 for name := range f.Xattrs {
477 xattrs = append(xattrs, name)
478 }
479 sort.Strings(xattrs)
480 for _, name := range xattrs {
481 if !xstate.addXattr(name, f.Xattrs[name]) {
482 return nil, fmt.Errorf("could not fit xattr %s", name)
483 }
484 }
485 }
486
487 if err := w.writeXattrs(node, &xstate); err != nil {
488 return nil, err
489 }
490
491 node.Size = size
492 if typ == format.S_IFLNK && size > smallSymlinkSize {
493
494 w.startInode("", node, size)
495 if _, err := w.Write([]byte(f.Linkname)); err != nil {
496 return nil, err
497 }
498 if err := w.finishInode(); err != nil {
499 return nil, err
500 }
501 }
502
503 if int(node.Number-1) >= len(w.inodes) {
504 w.inodes = append(w.inodes, node)
505 }
506 return node, nil
507 }
508
509 func (w *Writer) root() *inode {
510 return w.getInode(format.InodeRoot)
511 }
512
513 func (w *Writer) lookup(name string, mustExist bool) (*inode, *inode, string, error) {
514 root := w.root()
515 cleanname := path.Clean("/" + name)[1:]
516 if len(cleanname) == 0 {
517 return root, root, "", nil
518 }
519 dirname, childname := path.Split(cleanname)
520 if len(childname) == 0 || len(childname) > 0xff {
521 return nil, nil, "", fmt.Errorf("%s: invalid name", name)
522 }
523 dir := w.findPath(root, dirname)
524 if dir == nil || !dir.IsDir() {
525 return nil, nil, "", fmt.Errorf("%s: path not found", name)
526 }
527 child := dir.Children[childname]
528 if child == nil && mustExist {
529 return nil, nil, "", fmt.Errorf("%s: file not found", name)
530 }
531 return dir, child, childname, nil
532 }
533
534
535
536
537
538
539 func (w *Writer) MakeParents(name string) error {
540 if err := w.finishInode(); err != nil {
541 return err
542 }
543
544
545
546 cleanname := path.Clean("/" + name)[1:]
547 parentDirs, _ := path.Split(cleanname)
548 currentPath := ""
549 root := w.root()
550 dirname := ""
551 for parentDirs != "" {
552 dirname, parentDirs = splitFirst(parentDirs)
553 currentPath += "/" + dirname
554 if _, ok := root.Children[dirname]; !ok {
555 f := &File{
556 Mode: root.Mode,
557 Atime: time.Now(),
558 Mtime: time.Now(),
559 Ctime: time.Now(),
560 Crtime: time.Now(),
561 Size: 0,
562 Uid: root.Uid,
563 Gid: root.Gid,
564 Devmajor: root.Devmajor,
565 Devminor: root.Devminor,
566 Xattrs: make(map[string][]byte),
567 }
568 if err := w.Create(currentPath, f); err != nil {
569 return fmt.Errorf("failed while creating parent directories: %w", err)
570 }
571 }
572 root = root.Children[dirname]
573 }
574 return nil
575 }
576
577
578 func (w *Writer) Create(name string, f *File) error {
579 if err := w.finishInode(); err != nil {
580 return err
581 }
582 dir, existing, childname, err := w.lookup(name, false)
583 if err != nil {
584 return err
585 }
586 var reuse *inode
587 if existing != nil {
588 if existing.IsDir() {
589 if f.Mode&TypeMask != S_IFDIR {
590 return fmt.Errorf("%s: cannot replace a directory with a file", name)
591 }
592 reuse = existing
593 } else if f.Mode&TypeMask == S_IFDIR {
594 return fmt.Errorf("%s: cannot replace a file with a directory", name)
595 } else if existing.LinkCount < 2 {
596 reuse = existing
597 }
598 } else {
599 if f.Mode&TypeMask == S_IFDIR && dir.LinkCount >= format.MaxLinks {
600 return fmt.Errorf("%s: exceeded parent directory maximum link count", name)
601 }
602 }
603 child, err := w.makeInode(f, reuse)
604 if err != nil {
605 return fmt.Errorf("%s: %s", name, err)
606 }
607 if existing != child {
608 if existing != nil {
609 existing.LinkCount--
610 }
611 dir.Children[childname] = child
612 child.LinkCount++
613 if child.IsDir() {
614 dir.LinkCount++
615 }
616 }
617 if child.Mode&format.TypeMask == format.S_IFREG {
618 w.startInode(name, child, f.Size)
619 }
620 return nil
621 }
622
623
624
625
626 func (w *Writer) Link(oldname, newname string) error {
627 if err := w.finishInode(); err != nil {
628 return err
629 }
630 newdir, existing, newchildname, err := w.lookup(newname, false)
631 if err != nil {
632 return err
633 }
634 if existing != nil && (existing.IsDir() || existing.LinkCount < 2) {
635 return fmt.Errorf("%s: cannot orphan existing file or directory", newname)
636 }
637
638 _, oldfile, _, err := w.lookup(oldname, true)
639 if err != nil {
640 return err
641 }
642 switch oldfile.Mode & format.TypeMask {
643 case format.S_IFDIR:
644 return fmt.Errorf("%s: link target cannot be a directory: %s", newname, oldname)
645 }
646
647 if existing != oldfile && oldfile.LinkCount >= format.MaxLinks {
648 return fmt.Errorf("%s: link target would exceed maximum link count: %s", newname, oldname)
649 }
650
651 if existing != nil {
652 existing.LinkCount--
653 }
654 oldfile.LinkCount++
655 newdir.Children[newchildname] = oldfile
656 return nil
657 }
658
659
660 func (w *Writer) Stat(name string) (*File, error) {
661 if err := w.finishInode(); err != nil {
662 return nil, err
663 }
664 _, node, _, err := w.lookup(name, true)
665 if err != nil {
666 return nil, err
667 }
668 f := &File{
669 Size: node.Size,
670 Mode: node.Mode,
671 Uid: node.Uid,
672 Gid: node.Gid,
673 Atime: fsTimeToTime(node.Atime),
674 Ctime: fsTimeToTime(node.Ctime),
675 Mtime: fsTimeToTime(node.Mtime),
676 Crtime: fsTimeToTime(node.Crtime),
677 Devmajor: node.Devmajor,
678 Devminor: node.Devminor,
679 }
680 f.Xattrs = make(map[string][]byte)
681 if node.XattrBlock != 0 || len(node.XattrInline) != 0 {
682 if node.XattrBlock != 0 {
683 orig := w.block()
684 w.seekBlock(node.XattrBlock)
685 if w.err != nil {
686 return nil, w.err
687 }
688 var b [BlockSize]byte
689 _, err := w.f.Read(b[:])
690 w.seekBlock(orig)
691 if err != nil {
692 return nil, err
693 }
694 getXattrs(b[32:], f.Xattrs, 32)
695 }
696 if len(node.XattrInline) != 0 {
697 getXattrs(node.XattrInline[4:], f.Xattrs, 0)
698 delete(f.Xattrs, "system.data")
699 }
700 }
701 if node.FileType() == S_IFLNK {
702 if node.Size > smallSymlinkSize {
703 return nil, fmt.Errorf("%s: cannot retrieve link information", name)
704 }
705 f.Linkname = string(node.Data)
706 }
707 return f, nil
708 }
709
710 func (w *Writer) Write(b []byte) (int, error) {
711 if len(b) == 0 {
712 return 0, nil
713 }
714 if w.dataWritten+int64(len(b)) > w.dataMax {
715 return 0, fmt.Errorf("%s: wrote too much: %d > %d", w.curName, w.dataWritten+int64(len(b)), w.dataMax)
716 }
717
718 if w.curInode.Flags&format.InodeFlagInlineData != 0 {
719 copy(w.curInode.Data[w.dataWritten:], b)
720 w.dataWritten += int64(len(b))
721 return len(b), nil
722 }
723
724 n, err := w.write(b)
725 w.dataWritten += int64(n)
726 return n, err
727 }
728
729 func (w *Writer) startInode(name string, inode *inode, size int64) {
730 if w.curInode != nil {
731 panic("inode already in progress")
732 }
733 w.curName = name
734 w.curInode = inode
735 w.dataWritten = 0
736 w.dataMax = size
737 }
738
739 func (w *Writer) block() uint32 {
740 return uint32(w.pos / BlockSize)
741 }
742
743 func (w *Writer) seekBlock(block uint32) {
744 w.pos = int64(block) * BlockSize
745 if w.err != nil {
746 return
747 }
748 w.err = w.bw.Flush()
749 if w.err != nil {
750 return
751 }
752 _, w.err = w.f.Seek(w.pos, io.SeekStart)
753 }
754
755 func (w *Writer) nextBlock() {
756 if w.pos%BlockSize != 0 {
757
758 _, _ = w.zero(BlockSize - w.pos%BlockSize)
759 }
760 }
761
762 func fillExtents(hdr *format.ExtentHeader, extents []format.ExtentLeafNode, startBlock, offset, inodeSize uint32) {
763 *hdr = format.ExtentHeader{
764 Magic: format.ExtentHeaderMagic,
765 Entries: uint16(len(extents)),
766 Max: uint16(cap(extents)),
767 Depth: 0,
768 }
769 for i := range extents {
770 block := offset + uint32(i)*maxBlocksPerExtent
771 length := inodeSize - block
772 if length > maxBlocksPerExtent {
773 length = maxBlocksPerExtent
774 }
775 start := startBlock + block
776 extents[i] = format.ExtentLeafNode{
777 Block: block,
778 Length: uint16(length),
779 StartLow: start,
780 }
781 }
782 }
783
784 func (w *Writer) writeExtents(inode *inode) error {
785 start := w.pos - w.dataWritten
786 if start%BlockSize != 0 {
787 panic("unaligned")
788 }
789 w.nextBlock()
790
791 startBlock := uint32(start / BlockSize)
792 blocks := w.block() - startBlock
793 usedBlocks := blocks
794
795 const extentNodeSize = 12
796 const extentsPerBlock = BlockSize/extentNodeSize - 1
797
798 extents := (blocks + maxBlocksPerExtent - 1) / maxBlocksPerExtent
799 var b bytes.Buffer
800 if extents == 0 {
801
802 } else if extents <= 4 {
803 var root struct {
804 hdr format.ExtentHeader
805 extents [4]format.ExtentLeafNode
806 }
807 fillExtents(&root.hdr, root.extents[:extents], startBlock, 0, blocks)
808 _ = binary.Write(&b, binary.LittleEndian, root)
809 } else if extents <= 4*extentsPerBlock {
810 const extentsPerBlock = BlockSize/extentNodeSize - 1
811 extentBlocks := extents/extentsPerBlock + 1
812 usedBlocks += extentBlocks
813 var b2 bytes.Buffer
814
815 var root struct {
816 hdr format.ExtentHeader
817 nodes [4]format.ExtentIndexNode
818 }
819 root.hdr = format.ExtentHeader{
820 Magic: format.ExtentHeaderMagic,
821 Entries: uint16(extentBlocks),
822 Max: 4,
823 Depth: 1,
824 }
825 for i := uint32(0); i < extentBlocks; i++ {
826 root.nodes[i] = format.ExtentIndexNode{
827 Block: i * extentsPerBlock * maxBlocksPerExtent,
828 LeafLow: w.block(),
829 }
830 extentsInBlock := extents - i*extentBlocks
831 if extentsInBlock > extentsPerBlock {
832 extentsInBlock = extentsPerBlock
833 }
834
835 var node struct {
836 hdr format.ExtentHeader
837 extents [extentsPerBlock]format.ExtentLeafNode
838 _ [BlockSize - (extentsPerBlock+1)*extentNodeSize]byte
839 }
840
841 offset := i * extentsPerBlock * maxBlocksPerExtent
842 fillExtents(&node.hdr, node.extents[:extentsInBlock], startBlock+offset, offset, blocks)
843 _ = binary.Write(&b2, binary.LittleEndian, node)
844 if _, err := w.write(b2.Next(BlockSize)); err != nil {
845 return err
846 }
847 }
848 _ = binary.Write(&b, binary.LittleEndian, root)
849 } else {
850 panic("file too big")
851 }
852
853 inode.Data = b.Bytes()
854 inode.Flags |= format.InodeFlagExtents
855 inode.BlockCount += usedBlocks
856 return w.err
857 }
858
859 func (w *Writer) finishInode() error {
860 if !w.initialized {
861 if err := w.init(); err != nil {
862 return err
863 }
864 }
865 if w.curInode == nil {
866 return nil
867 }
868 if w.dataWritten != w.dataMax {
869 return fmt.Errorf("did not write the right amount: %d != %d", w.dataWritten, w.dataMax)
870 }
871
872 if w.dataMax != 0 && w.curInode.Flags&format.InodeFlagInlineData == 0 {
873 if err := w.writeExtents(w.curInode); err != nil {
874 return err
875 }
876 }
877
878 w.dataWritten = 0
879 w.dataMax = 0
880 w.curInode = nil
881 return w.err
882 }
883
884 func modeToFileType(mode uint16) format.FileType {
885 switch mode & format.TypeMask {
886 default:
887 return format.FileTypeUnknown
888 case format.S_IFREG:
889 return format.FileTypeRegular
890 case format.S_IFDIR:
891 return format.FileTypeDirectory
892 case format.S_IFCHR:
893 return format.FileTypeCharacter
894 case format.S_IFBLK:
895 return format.FileTypeBlock
896 case format.S_IFIFO:
897 return format.FileTypeFIFO
898 case format.S_IFSOCK:
899 return format.FileTypeSocket
900 case format.S_IFLNK:
901 return format.FileTypeSymbolicLink
902 }
903 }
904
905 type constReader byte
906
907 var zero = constReader(0)
908
909 func (r constReader) Read(b []byte) (int, error) {
910 for i := range b {
911 b[i] = byte(r)
912 }
913 return len(b), nil
914 }
915
916 func (w *Writer) writeDirectory(dir, parent *inode) error {
917 if err := w.finishInode(); err != nil {
918 return err
919 }
920
921
922 w.startInode("", dir, 0x7fffffffffffffff)
923 left := BlockSize
924 finishBlock := func() error {
925 if left > 0 {
926 e := format.DirectoryEntry{
927 RecordLength: uint16(left),
928 }
929 err := binary.Write(w, binary.LittleEndian, e)
930 if err != nil {
931 return err
932 }
933 left -= directoryEntrySize
934 if left < 4 {
935 panic("not enough space for trailing entry")
936 }
937 _, err = io.CopyN(w, zero, int64(left))
938 if err != nil {
939 return err
940 }
941 }
942 left = BlockSize
943 return nil
944 }
945
946 writeEntry := func(ino format.InodeNumber, name string) error {
947 rlb := directoryEntrySize + len(name)
948 rl := (rlb + 3) & ^3
949 if left < rl+12 {
950 if err := finishBlock(); err != nil {
951 return err
952 }
953 }
954 e := format.DirectoryEntry{
955 Inode: ino,
956 RecordLength: uint16(rl),
957 NameLength: uint8(len(name)),
958 FileType: modeToFileType(w.getInode(ino).Mode),
959 }
960 err := binary.Write(w, binary.LittleEndian, e)
961 if err != nil {
962 return err
963 }
964 _, err = w.Write([]byte(name))
965 if err != nil {
966 return err
967 }
968 var zero [4]byte
969 _, err = w.Write(zero[:rl-rlb])
970 if err != nil {
971 return err
972 }
973 left -= rl
974 return nil
975 }
976 if err := writeEntry(dir.Number, "."); err != nil {
977 return err
978 }
979 if err := writeEntry(parent.Number, ".."); err != nil {
980 return err
981 }
982
983
984 var children []string
985 for name := range dir.Children {
986 children = append(children, name)
987 }
988 sort.Slice(children, func(i, j int) bool {
989 left_num := dir.Children[children[i]].Number
990 right_num := dir.Children[children[j]].Number
991
992 if left_num == right_num {
993 return children[i] < children[j]
994 }
995 return left_num < right_num
996 })
997
998 for _, name := range children {
999 child := dir.Children[name]
1000 if err := writeEntry(child.Number, name); err != nil {
1001 return err
1002 }
1003 }
1004 if err := finishBlock(); err != nil {
1005 return err
1006 }
1007 w.curInode.Size = w.dataWritten
1008 w.dataMax = w.dataWritten
1009 return nil
1010 }
1011
1012 func (w *Writer) writeDirectoryRecursive(dir, parent *inode) error {
1013 if err := w.writeDirectory(dir, parent); err != nil {
1014 return err
1015 }
1016
1017
1018 var children []string
1019 for name := range dir.Children {
1020 children = append(children, name)
1021 }
1022 sort.Slice(children, func(i, j int) bool {
1023 left_num := dir.Children[children[i]].Number
1024 right_num := dir.Children[children[j]].Number
1025
1026 if left_num == right_num {
1027 return children[i] < children[j]
1028 }
1029 return left_num < right_num
1030 })
1031
1032 for _, name := range children {
1033 child := dir.Children[name]
1034 if child.IsDir() {
1035 if err := w.writeDirectoryRecursive(child, dir); err != nil {
1036 return err
1037 }
1038 }
1039 }
1040 return nil
1041 }
1042
1043 func (w *Writer) writeInodeTable(tableSize uint32) error {
1044 var b bytes.Buffer
1045 for _, inode := range w.inodes {
1046 if inode != nil {
1047 binode := format.Inode{
1048 Mode: inode.Mode,
1049 Uid: uint16(inode.Uid & 0xffff),
1050 Gid: uint16(inode.Gid & 0xffff),
1051 SizeLow: uint32(inode.Size & 0xffffffff),
1052 SizeHigh: uint32(inode.Size >> 32),
1053 LinksCount: uint16(inode.LinkCount),
1054 BlocksLow: inode.BlockCount,
1055 Flags: inode.Flags,
1056 XattrBlockLow: inode.XattrBlock,
1057 UidHigh: uint16(inode.Uid >> 16),
1058 GidHigh: uint16(inode.Gid >> 16),
1059 ExtraIsize: uint16(inodeUsedSize - 128),
1060 Atime: uint32(inode.Atime),
1061 AtimeExtra: uint32(inode.Atime >> 32),
1062 Ctime: uint32(inode.Ctime),
1063 CtimeExtra: uint32(inode.Ctime >> 32),
1064 Mtime: uint32(inode.Mtime),
1065 MtimeExtra: uint32(inode.Mtime >> 32),
1066 Crtime: uint32(inode.Crtime),
1067 CrtimeExtra: uint32(inode.Crtime >> 32),
1068 }
1069 switch inode.Mode & format.TypeMask {
1070 case format.S_IFDIR, format.S_IFREG, format.S_IFLNK:
1071 n := copy(binode.Block[:], inode.Data)
1072 if n < len(inode.Data) {
1073
1074 xattr := [1]xattr{{
1075 Name: "data",
1076 Index: 7,
1077 Value: inode.Data[n:],
1078 }}
1079 putXattrs(xattr[:], inode.XattrInline[4:], 0)
1080 }
1081 case format.S_IFBLK, format.S_IFCHR:
1082 dev := inode.Devminor&0xff | inode.Devmajor<<8 | (inode.Devminor&0xffffff00)<<12
1083 binary.LittleEndian.PutUint32(binode.Block[4:], dev)
1084 }
1085
1086 _ = binary.Write(&b, binary.LittleEndian, binode)
1087 b.Truncate(inodeUsedSize)
1088 n, _ := b.Write(inode.XattrInline)
1089 _, _ = io.CopyN(&b, zero, int64(inodeExtraSize-n))
1090 } else {
1091 _, _ = io.CopyN(&b, zero, inodeSize)
1092 }
1093 if _, err := w.write(b.Next(inodeSize)); err != nil {
1094 return err
1095 }
1096 }
1097 rest := tableSize - uint32(len(w.inodes)*inodeSize)
1098 if _, err := w.zero(int64(rest)); err != nil {
1099 return err
1100 }
1101 return nil
1102 }
1103
1104
1105
1106 func NewWriter(f io.ReadWriteSeeker, opts ...Option) *Writer {
1107 w := &Writer{
1108 f: f,
1109 bw: bufio.NewWriterSize(f, 65536*8),
1110 maxDiskSize: defaultMaxDiskSize,
1111 }
1112 for _, opt := range opts {
1113 opt(w)
1114 }
1115 return w
1116 }
1117
1118
1119 type Option func(*Writer)
1120
1121
1122
1123
1124 func InlineData(w *Writer) {
1125 w.supportInlineData = true
1126 }
1127
1128
1129
1130 func MaximumDiskSize(size int64) Option {
1131 return func(w *Writer) {
1132 if size < 0 || size > maxMaxDiskSize {
1133 w.maxDiskSize = maxMaxDiskSize
1134 } else if size == 0 {
1135 w.maxDiskSize = defaultMaxDiskSize
1136 } else {
1137 w.maxDiskSize = (size + BlockSize - 1) &^ (BlockSize - 1)
1138 }
1139 }
1140 }
1141
1142 func (w *Writer) init() error {
1143
1144 w.inodes = make([]*inode, 1, 32)
1145
1146 root, _ := w.makeInode(&File{
1147 Mode: format.S_IFDIR | 0755,
1148 }, nil)
1149 root.LinkCount++
1150
1151 w.inodes = append(w.inodes, make([]*inode, inodeFirst-len(w.inodes)-1)...)
1152 maxBlocks := (w.maxDiskSize-1)/BlockSize + 1
1153 maxGroups := (maxBlocks-1)/blocksPerGroup + 1
1154 w.gdBlocks = uint32((maxGroups-1)/groupsPerDescriptorBlock + 1)
1155
1156
1157 w.seekBlock(1 + w.gdBlocks)
1158 w.initialized = true
1159
1160
1161 if err := w.Create("lost+found", &File{Mode: format.S_IFDIR | 0700}); err != nil {
1162 return err
1163 }
1164 return w.err
1165 }
1166
1167 func groupCount(blocks uint32, inodes uint32, inodesPerGroup uint32) uint32 {
1168 inodeBlocksPerGroup := inodesPerGroup * inodeSize / BlockSize
1169 dataBlocksPerGroup := blocksPerGroup - inodeBlocksPerGroup - 2
1170
1171
1172
1173 minBlocks := (inodes-1)/inodesPerGroup*dataBlocksPerGroup + 1
1174 if blocks < minBlocks {
1175 blocks = minBlocks
1176 }
1177
1178 return (blocks + dataBlocksPerGroup - 1) / dataBlocksPerGroup
1179 }
1180
1181 func bestGroupCount(blocks uint32, inodes uint32) (groups uint32, inodesPerGroup uint32) {
1182 groups = 0xffffffff
1183 for ipg := uint32(inodesPerGroupIncrement); ipg <= maxInodesPerGroup; ipg += inodesPerGroupIncrement {
1184 g := groupCount(blocks, inodes, ipg)
1185 if g < groups {
1186 groups = g
1187 inodesPerGroup = ipg
1188 }
1189 }
1190 return
1191 }
1192
1193 func (w *Writer) Close() error {
1194 if err := w.finishInode(); err != nil {
1195 return err
1196 }
1197 root := w.root()
1198 if err := w.writeDirectoryRecursive(root, root); err != nil {
1199 return err
1200 }
1201
1202 if err := w.finishInode(); err != nil {
1203 return err
1204 }
1205
1206
1207 inodeTableOffset := w.block()
1208 groups, inodesPerGroup := bestGroupCount(inodeTableOffset, uint32(len(w.inodes)))
1209 err := w.writeInodeTable(groups * inodesPerGroup * inodeSize)
1210 if err != nil {
1211 return err
1212 }
1213
1214
1215 bitmapOffset := w.block()
1216 bitmapSize := groups * 2
1217 validDataSize := bitmapOffset + bitmapSize
1218 diskSize := validDataSize
1219 minSize := (groups-1)*blocksPerGroup + 1
1220 if diskSize < minSize {
1221 diskSize = minSize
1222 }
1223
1224 usedGdBlocks := (groups-1)/groupsPerDescriptorBlock + 1
1225 if usedGdBlocks > w.gdBlocks {
1226 return exceededMaxSizeError{w.maxDiskSize}
1227 }
1228
1229 gds := make([]format.GroupDescriptor, w.gdBlocks*groupsPerDescriptorBlock)
1230 inodeTableSizePerGroup := inodesPerGroup * inodeSize / BlockSize
1231 var totalUsedBlocks, totalUsedInodes uint32
1232 for g := uint32(0); g < groups; g++ {
1233 var b [BlockSize * 2]byte
1234 var dirCount, usedInodeCount, usedBlockCount uint16
1235
1236
1237 if (g+1)*blocksPerGroup <= validDataSize {
1238
1239 for j := range b[:BlockSize] {
1240 b[j] = 0xff
1241 }
1242 usedBlockCount = blocksPerGroup
1243 } else if g*blocksPerGroup < validDataSize {
1244 for j := uint32(0); j < validDataSize-g*blocksPerGroup; j++ {
1245 b[j/8] |= 1 << (j % 8)
1246 usedBlockCount++
1247 }
1248 }
1249 if g == 0 {
1250
1251 for j := 1 + usedGdBlocks; j < 1+w.gdBlocks; j++ {
1252 b[j/8] &^= 1 << (j % 8)
1253 usedBlockCount--
1254 }
1255 }
1256 if g == groups-1 && diskSize%blocksPerGroup != 0 {
1257
1258
1259 for j := diskSize % blocksPerGroup; j < blocksPerGroup; j++ {
1260 b[j/8] |= 1 << (j % 8)
1261 usedBlockCount++
1262 }
1263 }
1264
1265 for j := uint32(0); j < inodesPerGroup; j++ {
1266 ino := format.InodeNumber(1 + g*inodesPerGroup + j)
1267 inode := w.getInode(ino)
1268 if ino < inodeFirst || inode != nil {
1269 b[BlockSize+j/8] |= 1 << (j % 8)
1270 usedInodeCount++
1271 }
1272 if inode != nil && inode.Mode&format.TypeMask == format.S_IFDIR {
1273 dirCount++
1274 }
1275 }
1276 _, err := w.write(b[:])
1277 if err != nil {
1278 return err
1279 }
1280 gds[g] = format.GroupDescriptor{
1281 BlockBitmapLow: bitmapOffset + 2*g,
1282 InodeBitmapLow: bitmapOffset + 2*g + 1,
1283 InodeTableLow: inodeTableOffset + g*inodeTableSizePerGroup,
1284 UsedDirsCountLow: dirCount,
1285 FreeInodesCountLow: uint16(inodesPerGroup) - usedInodeCount,
1286 FreeBlocksCountLow: blocksPerGroup - usedBlockCount,
1287 }
1288
1289 totalUsedBlocks += uint32(usedBlockCount)
1290 totalUsedInodes += uint32(usedInodeCount)
1291 }
1292
1293
1294 _, err = w.zero(int64(diskSize-bitmapOffset-bitmapSize) * BlockSize)
1295 if err != nil {
1296 return err
1297 }
1298
1299
1300 w.seekBlock(1)
1301 if w.err != nil {
1302 return w.err
1303 }
1304 err = binary.Write(w.bw, binary.LittleEndian, gds)
1305 if err != nil {
1306 return err
1307 }
1308
1309
1310 var blk [BlockSize]byte
1311 b := bytes.NewBuffer(blk[:1024])
1312 sb := &format.SuperBlock{
1313 InodesCount: inodesPerGroup * groups,
1314 BlocksCountLow: diskSize,
1315 FreeBlocksCountLow: blocksPerGroup*groups - totalUsedBlocks,
1316 FreeInodesCount: inodesPerGroup*groups - totalUsedInodes,
1317 FirstDataBlock: 0,
1318 LogBlockSize: 2,
1319 LogClusterSize: 2,
1320 BlocksPerGroup: blocksPerGroup,
1321 ClustersPerGroup: blocksPerGroup,
1322 InodesPerGroup: inodesPerGroup,
1323 Magic: format.SuperBlockMagic,
1324 State: 1,
1325 Errors: 1,
1326 CreatorOS: 0,
1327 RevisionLevel: 1,
1328 FirstInode: inodeFirst,
1329 LpfInode: inodeLostAndFound,
1330 InodeSize: inodeSize,
1331 FeatureCompat: format.CompatSparseSuper2 | format.CompatExtAttr,
1332 FeatureIncompat: format.IncompatFiletype | format.IncompatExtents | format.IncompatFlexBg,
1333 FeatureRoCompat: format.RoCompatLargeFile | format.RoCompatHugeFile | format.RoCompatExtraIsize | format.RoCompatReadonly,
1334 MinExtraIsize: extraIsize,
1335 WantExtraIsize: extraIsize,
1336 LogGroupsPerFlex: 31,
1337 }
1338 if w.supportInlineData {
1339 sb.FeatureIncompat |= format.IncompatInlineData
1340 }
1341 _ = binary.Write(b, binary.LittleEndian, sb)
1342 w.seekBlock(0)
1343 if _, err := w.write(blk[:]); err != nil {
1344 return err
1345 }
1346 w.seekBlock(diskSize)
1347 return w.err
1348 }
1349
View as plain text