...

Source file src/go.etcd.io/bbolt/db.go

Documentation: go.etcd.io/bbolt

     1  package bbolt
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"hash/fnv"
     7  	"io"
     8  	"os"
     9  	"runtime"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  	"unsafe"
    14  )
    15  
    16  // The largest step that can be taken when remapping the mmap.
    17  const maxMmapStep = 1 << 30 // 1GB
    18  
    19  // The data file format version.
    20  const version = 2
    21  
    22  // Represents a marker value to indicate that a file is a Bolt DB.
    23  const magic uint32 = 0xED0CDAED
    24  
    25  const pgidNoFreelist pgid = 0xffffffffffffffff
    26  
    27  // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
    28  // syncing changes to a file.  This is required as some operating systems,
    29  // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
    30  // must be synchronized using the msync(2) syscall.
    31  const IgnoreNoSync = runtime.GOOS == "openbsd"
    32  
    33  // Default values if not set in a DB instance.
    34  const (
    35  	DefaultMaxBatchSize  int = 1000
    36  	DefaultMaxBatchDelay     = 10 * time.Millisecond
    37  	DefaultAllocSize         = 16 * 1024 * 1024
    38  )
    39  
    40  // default page size for db is set to the OS page size.
    41  var defaultPageSize = os.Getpagesize()
    42  
    43  // The time elapsed between consecutive file locking attempts.
    44  const flockRetryTimeout = 50 * time.Millisecond
    45  
    46  // FreelistType is the type of the freelist backend
    47  type FreelistType string
    48  
    49  const (
    50  	// FreelistArrayType indicates backend freelist type is array
    51  	FreelistArrayType = FreelistType("array")
    52  	// FreelistMapType indicates backend freelist type is hashmap
    53  	FreelistMapType = FreelistType("hashmap")
    54  )
    55  
    56  // DB represents a collection of buckets persisted to a file on disk.
    57  // All data access is performed through transactions which can be obtained through the DB.
    58  // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
    59  type DB struct {
    60  	// Put `stats` at the first field to ensure it's 64-bit aligned. Note that
    61  	// the first word in an allocated struct can be relied upon to be 64-bit
    62  	// aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG. Also
    63  	// refer to discussion in https://github.com/etcd-io/bbolt/issues/577.
    64  	stats Stats
    65  
    66  	// When enabled, the database will perform a Check() after every commit.
    67  	// A panic is issued if the database is in an inconsistent state. This
    68  	// flag has a large performance impact so it should only be used for
    69  	// debugging purposes.
    70  	StrictMode bool
    71  
    72  	// Setting the NoSync flag will cause the database to skip fsync()
    73  	// calls after each commit. This can be useful when bulk loading data
    74  	// into a database and you can restart the bulk load in the event of
    75  	// a system failure or database corruption. Do not set this flag for
    76  	// normal use.
    77  	//
    78  	// If the package global IgnoreNoSync constant is true, this value is
    79  	// ignored.  See the comment on that constant for more details.
    80  	//
    81  	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
    82  	NoSync bool
    83  
    84  	// When true, skips syncing freelist to disk. This improves the database
    85  	// write performance under normal operation, but requires a full database
    86  	// re-sync during recovery.
    87  	NoFreelistSync bool
    88  
    89  	// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
    90  	// dramatic performance degradation if database is large and fragmentation in freelist is common.
    91  	// The alternative one is using hashmap, it is faster in almost all circumstances
    92  	// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
    93  	// The default type is array
    94  	FreelistType FreelistType
    95  
    96  	// When true, skips the truncate call when growing the database.
    97  	// Setting this to true is only safe on non-ext3/ext4 systems.
    98  	// Skipping truncation avoids preallocation of hard drive space and
    99  	// bypasses a truncate() and fsync() syscall on remapping.
   100  	//
   101  	// https://github.com/boltdb/bolt/issues/284
   102  	NoGrowSync bool
   103  
   104  	// When `true`, bbolt will always load the free pages when opening the DB.
   105  	// When opening db in write mode, this flag will always automatically
   106  	// set to `true`.
   107  	PreLoadFreelist bool
   108  
   109  	// If you want to read the entire database fast, you can set MmapFlag to
   110  	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
   111  	MmapFlags int
   112  
   113  	// MaxBatchSize is the maximum size of a batch. Default value is
   114  	// copied from DefaultMaxBatchSize in Open.
   115  	//
   116  	// If <=0, disables batching.
   117  	//
   118  	// Do not change concurrently with calls to Batch.
   119  	MaxBatchSize int
   120  
   121  	// MaxBatchDelay is the maximum delay before a batch starts.
   122  	// Default value is copied from DefaultMaxBatchDelay in Open.
   123  	//
   124  	// If <=0, effectively disables batching.
   125  	//
   126  	// Do not change concurrently with calls to Batch.
   127  	MaxBatchDelay time.Duration
   128  
   129  	// AllocSize is the amount of space allocated when the database
   130  	// needs to create new pages. This is done to amortize the cost
   131  	// of truncate() and fsync() when growing the data file.
   132  	AllocSize int
   133  
   134  	// Mlock locks database file in memory when set to true.
   135  	// It prevents major page faults, however used memory can't be reclaimed.
   136  	//
   137  	// Supported only on Unix via mlock/munlock syscalls.
   138  	Mlock bool
   139  
   140  	path     string
   141  	openFile func(string, int, os.FileMode) (*os.File, error)
   142  	file     *os.File
   143  	// `dataref` isn't used at all on Windows, and the golangci-lint
   144  	// always fails on Windows platform.
   145  	//nolint
   146  	dataref  []byte // mmap'ed readonly, write throws SEGV
   147  	data     *[maxMapSize]byte
   148  	datasz   int
   149  	filesz   int // current on disk file size
   150  	meta0    *meta
   151  	meta1    *meta
   152  	pageSize int
   153  	opened   bool
   154  	rwtx     *Tx
   155  	txs      []*Tx
   156  
   157  	freelist     *freelist
   158  	freelistLoad sync.Once
   159  
   160  	pagePool sync.Pool
   161  
   162  	batchMu sync.Mutex
   163  	batch   *batch
   164  
   165  	rwlock   sync.Mutex   // Allows only one writer at a time.
   166  	metalock sync.Mutex   // Protects meta page access.
   167  	mmaplock sync.RWMutex // Protects mmap access during remapping.
   168  	statlock sync.RWMutex // Protects stats access.
   169  
   170  	ops struct {
   171  		writeAt func(b []byte, off int64) (n int, err error)
   172  	}
   173  
   174  	// Read only mode.
   175  	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
   176  	readOnly bool
   177  }
   178  
   179  // Path returns the path to currently open database file.
   180  func (db *DB) Path() string {
   181  	return db.path
   182  }
   183  
   184  // GoString returns the Go string representation of the database.
   185  func (db *DB) GoString() string {
   186  	return fmt.Sprintf("bolt.DB{path:%q}", db.path)
   187  }
   188  
   189  // String returns the string representation of the database.
   190  func (db *DB) String() string {
   191  	return fmt.Sprintf("DB<%q>", db.path)
   192  }
   193  
   194  // Open creates and opens a database at the given path.
   195  // If the file does not exist then it will be created automatically.
   196  // Passing in nil options will cause Bolt to open the database with the default options.
   197  func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
   198  	db := &DB{
   199  		opened: true,
   200  	}
   201  	// Set default options if no options are provided.
   202  	if options == nil {
   203  		options = DefaultOptions
   204  	}
   205  	db.NoSync = options.NoSync
   206  	db.NoGrowSync = options.NoGrowSync
   207  	db.MmapFlags = options.MmapFlags
   208  	db.NoFreelistSync = options.NoFreelistSync
   209  	db.PreLoadFreelist = options.PreLoadFreelist
   210  	db.FreelistType = options.FreelistType
   211  	db.Mlock = options.Mlock
   212  
   213  	// Set default values for later DB operations.
   214  	db.MaxBatchSize = DefaultMaxBatchSize
   215  	db.MaxBatchDelay = DefaultMaxBatchDelay
   216  	db.AllocSize = DefaultAllocSize
   217  
   218  	flag := os.O_RDWR
   219  	if options.ReadOnly {
   220  		flag = os.O_RDONLY
   221  		db.readOnly = true
   222  	} else {
   223  		// always load free pages in write mode
   224  		db.PreLoadFreelist = true
   225  	}
   226  
   227  	db.openFile = options.OpenFile
   228  	if db.openFile == nil {
   229  		db.openFile = os.OpenFile
   230  	}
   231  
   232  	// Open data file and separate sync handler for metadata writes.
   233  	var err error
   234  	if db.file, err = db.openFile(path, flag|os.O_CREATE, mode); err != nil {
   235  		_ = db.close()
   236  		return nil, err
   237  	}
   238  	db.path = db.file.Name()
   239  
   240  	// Lock file so that other processes using Bolt in read-write mode cannot
   241  	// use the database  at the same time. This would cause corruption since
   242  	// the two processes would write meta pages and free pages separately.
   243  	// The database file is locked exclusively (only one process can grab the lock)
   244  	// if !options.ReadOnly.
   245  	// The database file is locked using the shared lock (more than one process may
   246  	// hold a lock at the same time) otherwise (options.ReadOnly is set).
   247  	if err := flock(db, !db.readOnly, options.Timeout); err != nil {
   248  		_ = db.close()
   249  		return nil, err
   250  	}
   251  
   252  	// Default values for test hooks
   253  	db.ops.writeAt = db.file.WriteAt
   254  
   255  	if db.pageSize = options.PageSize; db.pageSize == 0 {
   256  		// Set the default page size to the OS page size.
   257  		db.pageSize = defaultPageSize
   258  	}
   259  
   260  	// Initialize the database if it doesn't exist.
   261  	if info, err := db.file.Stat(); err != nil {
   262  		_ = db.close()
   263  		return nil, err
   264  	} else if info.Size() == 0 {
   265  		// Initialize new files with meta pages.
   266  		if err := db.init(); err != nil {
   267  			// clean up file descriptor on initialization fail
   268  			_ = db.close()
   269  			return nil, err
   270  		}
   271  	} else {
   272  		// try to get the page size from the metadata pages
   273  		if pgSize, err := db.getPageSize(); err == nil {
   274  			db.pageSize = pgSize
   275  		} else {
   276  			_ = db.close()
   277  			return nil, ErrInvalid
   278  		}
   279  	}
   280  
   281  	// Initialize page pool.
   282  	db.pagePool = sync.Pool{
   283  		New: func() interface{} {
   284  			return make([]byte, db.pageSize)
   285  		},
   286  	}
   287  
   288  	// Memory map the data file.
   289  	if err := db.mmap(options.InitialMmapSize); err != nil {
   290  		_ = db.close()
   291  		return nil, err
   292  	}
   293  
   294  	if db.PreLoadFreelist {
   295  		db.loadFreelist()
   296  	}
   297  
   298  	if db.readOnly {
   299  		return db, nil
   300  	}
   301  
   302  	// Flush freelist when transitioning from no sync to sync so
   303  	// NoFreelistSync unaware boltdb can open the db later.
   304  	if !db.NoFreelistSync && !db.hasSyncedFreelist() {
   305  		tx, err := db.Begin(true)
   306  		if tx != nil {
   307  			err = tx.Commit()
   308  		}
   309  		if err != nil {
   310  			_ = db.close()
   311  			return nil, err
   312  		}
   313  	}
   314  
   315  	// Mark the database as opened and return.
   316  	return db, nil
   317  }
   318  
   319  // getPageSize reads the pageSize from the meta pages. It tries
   320  // to read the first meta page firstly. If the first page is invalid,
   321  // then it tries to read the second page using the default page size.
   322  func (db *DB) getPageSize() (int, error) {
   323  	var (
   324  		meta0CanRead, meta1CanRead bool
   325  	)
   326  
   327  	// Read the first meta page to determine the page size.
   328  	if pgSize, canRead, err := db.getPageSizeFromFirstMeta(); err != nil {
   329  		// We cannot read the page size from page 0, but can read page 0.
   330  		meta0CanRead = canRead
   331  	} else {
   332  		return pgSize, nil
   333  	}
   334  
   335  	// Read the second meta page to determine the page size.
   336  	if pgSize, canRead, err := db.getPageSizeFromSecondMeta(); err != nil {
   337  		// We cannot read the page size from page 1, but can read page 1.
   338  		meta1CanRead = canRead
   339  	} else {
   340  		return pgSize, nil
   341  	}
   342  
   343  	// If we can't read the page size from both pages, but can read
   344  	// either page, then we assume it's the same as the OS or the one
   345  	// given, since that's how the page size was chosen in the first place.
   346  	//
   347  	// If both pages are invalid, and (this OS uses a different page size
   348  	// from what the database was created with or the given page size is
   349  	// different from what the database was created with), then we are out
   350  	// of luck and cannot access the database.
   351  	if meta0CanRead || meta1CanRead {
   352  		return db.pageSize, nil
   353  	}
   354  
   355  	return 0, ErrInvalid
   356  }
   357  
   358  // getPageSizeFromFirstMeta reads the pageSize from the first meta page
   359  func (db *DB) getPageSizeFromFirstMeta() (int, bool, error) {
   360  	var buf [0x1000]byte
   361  	var metaCanRead bool
   362  	if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
   363  		metaCanRead = true
   364  		if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
   365  			return int(m.pageSize), metaCanRead, nil
   366  		}
   367  	}
   368  	return 0, metaCanRead, ErrInvalid
   369  }
   370  
   371  // getPageSizeFromSecondMeta reads the pageSize from the second meta page
   372  func (db *DB) getPageSizeFromSecondMeta() (int, bool, error) {
   373  	var (
   374  		fileSize    int64
   375  		metaCanRead bool
   376  	)
   377  
   378  	// get the db file size
   379  	if info, err := db.file.Stat(); err != nil {
   380  		return 0, metaCanRead, err
   381  	} else {
   382  		fileSize = info.Size()
   383  	}
   384  
   385  	// We need to read the second meta page, so we should skip the first page;
   386  	// but we don't know the exact page size yet, it's chicken & egg problem.
   387  	// The solution is to try all the possible page sizes, which starts from 1KB
   388  	// and until 16MB (1024<<14) or the end of the db file
   389  	//
   390  	// TODO: should we support larger page size?
   391  	for i := 0; i <= 14; i++ {
   392  		var buf [0x1000]byte
   393  		var pos int64 = 1024 << uint(i)
   394  		if pos >= fileSize-1024 {
   395  			break
   396  		}
   397  		bw, err := db.file.ReadAt(buf[:], pos)
   398  		if (err == nil && bw == len(buf)) || (err == io.EOF && int64(bw) == (fileSize-pos)) {
   399  			metaCanRead = true
   400  			if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
   401  				return int(m.pageSize), metaCanRead, nil
   402  			}
   403  		}
   404  	}
   405  
   406  	return 0, metaCanRead, ErrInvalid
   407  }
   408  
   409  // loadFreelist reads the freelist if it is synced, or reconstructs it
   410  // by scanning the DB if it is not synced. It assumes there are no
   411  // concurrent accesses being made to the freelist.
   412  func (db *DB) loadFreelist() {
   413  	db.freelistLoad.Do(func() {
   414  		db.freelist = newFreelist(db.FreelistType)
   415  		if !db.hasSyncedFreelist() {
   416  			// Reconstruct free list by scanning the DB.
   417  			db.freelist.readIDs(db.freepages())
   418  		} else {
   419  			// Read free list from freelist page.
   420  			db.freelist.read(db.page(db.meta().freelist))
   421  		}
   422  		db.stats.FreePageN = db.freelist.free_count()
   423  	})
   424  }
   425  
   426  func (db *DB) hasSyncedFreelist() bool {
   427  	return db.meta().freelist != pgidNoFreelist
   428  }
   429  
   430  // mmap opens the underlying memory-mapped file and initializes the meta references.
   431  // minsz is the minimum size that the new mmap can be.
   432  func (db *DB) mmap(minsz int) (err error) {
   433  	db.mmaplock.Lock()
   434  	defer db.mmaplock.Unlock()
   435  
   436  	info, err := db.file.Stat()
   437  	if err != nil {
   438  		return fmt.Errorf("mmap stat error: %s", err)
   439  	} else if int(info.Size()) < db.pageSize*2 {
   440  		return fmt.Errorf("file size too small")
   441  	}
   442  
   443  	// Ensure the size is at least the minimum size.
   444  	fileSize := int(info.Size())
   445  	var size = fileSize
   446  	if size < minsz {
   447  		size = minsz
   448  	}
   449  	size, err = db.mmapSize(size)
   450  	if err != nil {
   451  		return err
   452  	}
   453  
   454  	if db.Mlock {
   455  		// Unlock db memory
   456  		if err := db.munlock(fileSize); err != nil {
   457  			return err
   458  		}
   459  	}
   460  
   461  	// Dereference all mmap references before unmapping.
   462  	if db.rwtx != nil {
   463  		db.rwtx.root.dereference()
   464  	}
   465  
   466  	// Unmap existing data before continuing.
   467  	if err = db.munmap(); err != nil {
   468  		return err
   469  	}
   470  
   471  	// Memory-map the data file as a byte slice.
   472  	// gofail: var mapError string
   473  	// return errors.New(mapError)
   474  	if err = mmap(db, size); err != nil {
   475  		return err
   476  	}
   477  
   478  	// Perform unmmap on any error to reset all data fields:
   479  	// dataref, data, datasz, meta0 and meta1.
   480  	defer func() {
   481  		if err != nil {
   482  			if unmapErr := db.munmap(); unmapErr != nil {
   483  				err = fmt.Errorf("%w; rollback unmap also failed: %v", err, unmapErr)
   484  			}
   485  		}
   486  	}()
   487  
   488  	if db.Mlock {
   489  		// Don't allow swapping of data file
   490  		if err := db.mlock(fileSize); err != nil {
   491  			return err
   492  		}
   493  	}
   494  
   495  	// Save references to the meta pages.
   496  	db.meta0 = db.page(0).meta()
   497  	db.meta1 = db.page(1).meta()
   498  
   499  	// Validate the meta pages. We only return an error if both meta pages fail
   500  	// validation, since meta0 failing validation means that it wasn't saved
   501  	// properly -- but we can recover using meta1. And vice-versa.
   502  	err0 := db.meta0.validate()
   503  	err1 := db.meta1.validate()
   504  	if err0 != nil && err1 != nil {
   505  		return err0
   506  	}
   507  
   508  	return nil
   509  }
   510  
   511  func (db *DB) invalidate() {
   512  	db.dataref = nil
   513  	db.data = nil
   514  	db.datasz = 0
   515  
   516  	db.meta0 = nil
   517  	db.meta1 = nil
   518  }
   519  
   520  // munmap unmaps the data file from memory.
   521  func (db *DB) munmap() error {
   522  	defer db.invalidate()
   523  
   524  	// gofail: var unmapError string
   525  	// return errors.New(unmapError)
   526  	if err := munmap(db); err != nil {
   527  		return fmt.Errorf("unmap error: " + err.Error())
   528  	}
   529  
   530  	return nil
   531  }
   532  
   533  // mmapSize determines the appropriate size for the mmap given the current size
   534  // of the database. The minimum size is 32KB and doubles until it reaches 1GB.
   535  // Returns an error if the new mmap size is greater than the max allowed.
   536  func (db *DB) mmapSize(size int) (int, error) {
   537  	// Double the size from 32KB until 1GB.
   538  	for i := uint(15); i <= 30; i++ {
   539  		if size <= 1<<i {
   540  			return 1 << i, nil
   541  		}
   542  	}
   543  
   544  	// Verify the requested size is not above the maximum allowed.
   545  	if size > maxMapSize {
   546  		return 0, fmt.Errorf("mmap too large")
   547  	}
   548  
   549  	// If larger than 1GB then grow by 1GB at a time.
   550  	sz := int64(size)
   551  	if remainder := sz % int64(maxMmapStep); remainder > 0 {
   552  		sz += int64(maxMmapStep) - remainder
   553  	}
   554  
   555  	// Ensure that the mmap size is a multiple of the page size.
   556  	// This should always be true since we're incrementing in MBs.
   557  	pageSize := int64(db.pageSize)
   558  	if (sz % pageSize) != 0 {
   559  		sz = ((sz / pageSize) + 1) * pageSize
   560  	}
   561  
   562  	// If we've exceeded the max size then only grow up to the max size.
   563  	if sz > maxMapSize {
   564  		sz = maxMapSize
   565  	}
   566  
   567  	return int(sz), nil
   568  }
   569  
   570  func (db *DB) munlock(fileSize int) error {
   571  	// gofail: var munlockError string
   572  	// return errors.New(munlockError)
   573  	if err := munlock(db, fileSize); err != nil {
   574  		return fmt.Errorf("munlock error: " + err.Error())
   575  	}
   576  	return nil
   577  }
   578  
   579  func (db *DB) mlock(fileSize int) error {
   580  	// gofail: var mlockError string
   581  	// return errors.New(mlockError)
   582  	if err := mlock(db, fileSize); err != nil {
   583  		return fmt.Errorf("mlock error: " + err.Error())
   584  	}
   585  	return nil
   586  }
   587  
   588  func (db *DB) mrelock(fileSizeFrom, fileSizeTo int) error {
   589  	if err := db.munlock(fileSizeFrom); err != nil {
   590  		return err
   591  	}
   592  	if err := db.mlock(fileSizeTo); err != nil {
   593  		return err
   594  	}
   595  	return nil
   596  }
   597  
   598  // init creates a new database file and initializes its meta pages.
   599  func (db *DB) init() error {
   600  	// Create two meta pages on a buffer.
   601  	buf := make([]byte, db.pageSize*4)
   602  	for i := 0; i < 2; i++ {
   603  		p := db.pageInBuffer(buf, pgid(i))
   604  		p.id = pgid(i)
   605  		p.flags = metaPageFlag
   606  
   607  		// Initialize the meta page.
   608  		m := p.meta()
   609  		m.magic = magic
   610  		m.version = version
   611  		m.pageSize = uint32(db.pageSize)
   612  		m.freelist = 2
   613  		m.root = bucket{root: 3}
   614  		m.pgid = 4
   615  		m.txid = txid(i)
   616  		m.checksum = m.sum64()
   617  	}
   618  
   619  	// Write an empty freelist at page 3.
   620  	p := db.pageInBuffer(buf, pgid(2))
   621  	p.id = pgid(2)
   622  	p.flags = freelistPageFlag
   623  	p.count = 0
   624  
   625  	// Write an empty leaf page at page 4.
   626  	p = db.pageInBuffer(buf, pgid(3))
   627  	p.id = pgid(3)
   628  	p.flags = leafPageFlag
   629  	p.count = 0
   630  
   631  	// Write the buffer to our data file.
   632  	if _, err := db.ops.writeAt(buf, 0); err != nil {
   633  		return err
   634  	}
   635  	if err := fdatasync(db); err != nil {
   636  		return err
   637  	}
   638  	db.filesz = len(buf)
   639  
   640  	return nil
   641  }
   642  
   643  // Close releases all database resources.
   644  // It will block waiting for any open transactions to finish
   645  // before closing the database and returning.
   646  func (db *DB) Close() error {
   647  	db.rwlock.Lock()
   648  	defer db.rwlock.Unlock()
   649  
   650  	db.metalock.Lock()
   651  	defer db.metalock.Unlock()
   652  
   653  	db.mmaplock.Lock()
   654  	defer db.mmaplock.Unlock()
   655  
   656  	return db.close()
   657  }
   658  
   659  func (db *DB) close() error {
   660  	if !db.opened {
   661  		return nil
   662  	}
   663  
   664  	db.opened = false
   665  
   666  	db.freelist = nil
   667  
   668  	// Clear ops.
   669  	db.ops.writeAt = nil
   670  
   671  	var errs []error
   672  	// Close the mmap.
   673  	if err := db.munmap(); err != nil {
   674  		errs = append(errs, err)
   675  	}
   676  
   677  	// Close file handles.
   678  	if db.file != nil {
   679  		// No need to unlock read-only file.
   680  		if !db.readOnly {
   681  			// Unlock the file.
   682  			if err := funlock(db); err != nil {
   683  				errs = append(errs, fmt.Errorf("bolt.Close(): funlock error: %w", err))
   684  			}
   685  		}
   686  
   687  		// Close the file descriptor.
   688  		if err := db.file.Close(); err != nil {
   689  			errs = append(errs, fmt.Errorf("db file close: %w", err))
   690  		}
   691  		db.file = nil
   692  	}
   693  
   694  	db.path = ""
   695  
   696  	if len(errs) > 0 {
   697  		return errs[0]
   698  	}
   699  	return nil
   700  }
   701  
   702  // Begin starts a new transaction.
   703  // Multiple read-only transactions can be used concurrently but only one
   704  // write transaction can be used at a time. Starting multiple write transactions
   705  // will cause the calls to block and be serialized until the current write
   706  // transaction finishes.
   707  //
   708  // Transactions should not be dependent on one another. Opening a read
   709  // transaction and a write transaction in the same goroutine can cause the
   710  // writer to deadlock because the database periodically needs to re-mmap itself
   711  // as it grows and it cannot do that while a read transaction is open.
   712  //
   713  // If a long running read transaction (for example, a snapshot transaction) is
   714  // needed, you might want to set DB.InitialMmapSize to a large enough value
   715  // to avoid potential blocking of write transaction.
   716  //
   717  // IMPORTANT: You must close read-only transactions after you are finished or
   718  // else the database will not reclaim old pages.
   719  func (db *DB) Begin(writable bool) (*Tx, error) {
   720  	if writable {
   721  		return db.beginRWTx()
   722  	}
   723  	return db.beginTx()
   724  }
   725  
   726  func (db *DB) beginTx() (*Tx, error) {
   727  	// Lock the meta pages while we initialize the transaction. We obtain
   728  	// the meta lock before the mmap lock because that's the order that the
   729  	// write transaction will obtain them.
   730  	db.metalock.Lock()
   731  
   732  	// Obtain a read-only lock on the mmap. When the mmap is remapped it will
   733  	// obtain a write lock so all transactions must finish before it can be
   734  	// remapped.
   735  	db.mmaplock.RLock()
   736  
   737  	// Exit if the database is not open yet.
   738  	if !db.opened {
   739  		db.mmaplock.RUnlock()
   740  		db.metalock.Unlock()
   741  		return nil, ErrDatabaseNotOpen
   742  	}
   743  
   744  	// Exit if the database is not correctly mapped.
   745  	if db.data == nil {
   746  		db.mmaplock.RUnlock()
   747  		db.metalock.Unlock()
   748  		return nil, ErrInvalidMapping
   749  	}
   750  
   751  	// Create a transaction associated with the database.
   752  	t := &Tx{}
   753  	t.init(db)
   754  
   755  	// Keep track of transaction until it closes.
   756  	db.txs = append(db.txs, t)
   757  	n := len(db.txs)
   758  
   759  	// Unlock the meta pages.
   760  	db.metalock.Unlock()
   761  
   762  	// Update the transaction stats.
   763  	db.statlock.Lock()
   764  	db.stats.TxN++
   765  	db.stats.OpenTxN = n
   766  	db.statlock.Unlock()
   767  
   768  	return t, nil
   769  }
   770  
   771  func (db *DB) beginRWTx() (*Tx, error) {
   772  	// If the database was opened with Options.ReadOnly, return an error.
   773  	if db.readOnly {
   774  		return nil, ErrDatabaseReadOnly
   775  	}
   776  
   777  	// Obtain writer lock. This is released by the transaction when it closes.
   778  	// This enforces only one writer transaction at a time.
   779  	db.rwlock.Lock()
   780  
   781  	// Once we have the writer lock then we can lock the meta pages so that
   782  	// we can set up the transaction.
   783  	db.metalock.Lock()
   784  	defer db.metalock.Unlock()
   785  
   786  	// Exit if the database is not open yet.
   787  	if !db.opened {
   788  		db.rwlock.Unlock()
   789  		return nil, ErrDatabaseNotOpen
   790  	}
   791  
   792  	// Exit if the database is not correctly mapped.
   793  	if db.data == nil {
   794  		db.rwlock.Unlock()
   795  		return nil, ErrInvalidMapping
   796  	}
   797  
   798  	// Create a transaction associated with the database.
   799  	t := &Tx{writable: true}
   800  	t.init(db)
   801  	db.rwtx = t
   802  	db.freePages()
   803  	return t, nil
   804  }
   805  
   806  // freePages releases any pages associated with closed read-only transactions.
   807  func (db *DB) freePages() {
   808  	// Free all pending pages prior to earliest open transaction.
   809  	sort.Sort(txsById(db.txs))
   810  	minid := txid(0xFFFFFFFFFFFFFFFF)
   811  	if len(db.txs) > 0 {
   812  		minid = db.txs[0].meta.txid
   813  	}
   814  	if minid > 0 {
   815  		db.freelist.release(minid - 1)
   816  	}
   817  	// Release unused txid extents.
   818  	for _, t := range db.txs {
   819  		db.freelist.releaseRange(minid, t.meta.txid-1)
   820  		minid = t.meta.txid + 1
   821  	}
   822  	db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
   823  	// Any page both allocated and freed in an extent is safe to release.
   824  }
   825  
   826  type txsById []*Tx
   827  
   828  func (t txsById) Len() int           { return len(t) }
   829  func (t txsById) Swap(i, j int)      { t[i], t[j] = t[j], t[i] }
   830  func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
   831  
   832  // removeTx removes a transaction from the database.
   833  func (db *DB) removeTx(tx *Tx) {
   834  	// Release the read lock on the mmap.
   835  	db.mmaplock.RUnlock()
   836  
   837  	// Use the meta lock to restrict access to the DB object.
   838  	db.metalock.Lock()
   839  
   840  	// Remove the transaction.
   841  	for i, t := range db.txs {
   842  		if t == tx {
   843  			last := len(db.txs) - 1
   844  			db.txs[i] = db.txs[last]
   845  			db.txs[last] = nil
   846  			db.txs = db.txs[:last]
   847  			break
   848  		}
   849  	}
   850  	n := len(db.txs)
   851  
   852  	// Unlock the meta pages.
   853  	db.metalock.Unlock()
   854  
   855  	// Merge statistics.
   856  	db.statlock.Lock()
   857  	db.stats.OpenTxN = n
   858  	db.stats.TxStats.add(&tx.stats)
   859  	db.statlock.Unlock()
   860  }
   861  
   862  // Update executes a function within the context of a read-write managed transaction.
   863  // If no error is returned from the function then the transaction is committed.
   864  // If an error is returned then the entire transaction is rolled back.
   865  // Any error that is returned from the function or returned from the commit is
   866  // returned from the Update() method.
   867  //
   868  // Attempting to manually commit or rollback within the function will cause a panic.
   869  func (db *DB) Update(fn func(*Tx) error) error {
   870  	t, err := db.Begin(true)
   871  	if err != nil {
   872  		return err
   873  	}
   874  
   875  	// Make sure the transaction rolls back in the event of a panic.
   876  	defer func() {
   877  		if t.db != nil {
   878  			t.rollback()
   879  		}
   880  	}()
   881  
   882  	// Mark as a managed tx so that the inner function cannot manually commit.
   883  	t.managed = true
   884  
   885  	// If an error is returned from the function then rollback and return error.
   886  	err = fn(t)
   887  	t.managed = false
   888  	if err != nil {
   889  		_ = t.Rollback()
   890  		return err
   891  	}
   892  
   893  	return t.Commit()
   894  }
   895  
   896  // View executes a function within the context of a managed read-only transaction.
   897  // Any error that is returned from the function is returned from the View() method.
   898  //
   899  // Attempting to manually rollback within the function will cause a panic.
   900  func (db *DB) View(fn func(*Tx) error) error {
   901  	t, err := db.Begin(false)
   902  	if err != nil {
   903  		return err
   904  	}
   905  
   906  	// Make sure the transaction rolls back in the event of a panic.
   907  	defer func() {
   908  		if t.db != nil {
   909  			t.rollback()
   910  		}
   911  	}()
   912  
   913  	// Mark as a managed tx so that the inner function cannot manually rollback.
   914  	t.managed = true
   915  
   916  	// If an error is returned from the function then pass it through.
   917  	err = fn(t)
   918  	t.managed = false
   919  	if err != nil {
   920  		_ = t.Rollback()
   921  		return err
   922  	}
   923  
   924  	return t.Rollback()
   925  }
   926  
   927  // Batch calls fn as part of a batch. It behaves similar to Update,
   928  // except:
   929  //
   930  // 1. concurrent Batch calls can be combined into a single Bolt
   931  // transaction.
   932  //
   933  // 2. the function passed to Batch may be called multiple times,
   934  // regardless of whether it returns error or not.
   935  //
   936  // This means that Batch function side effects must be idempotent and
   937  // take permanent effect only after a successful return is seen in
   938  // caller.
   939  //
   940  // The maximum batch size and delay can be adjusted with DB.MaxBatchSize
   941  // and DB.MaxBatchDelay, respectively.
   942  //
   943  // Batch is only useful when there are multiple goroutines calling it.
   944  func (db *DB) Batch(fn func(*Tx) error) error {
   945  	errCh := make(chan error, 1)
   946  
   947  	db.batchMu.Lock()
   948  	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
   949  		// There is no existing batch, or the existing batch is full; start a new one.
   950  		db.batch = &batch{
   951  			db: db,
   952  		}
   953  		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
   954  	}
   955  	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
   956  	if len(db.batch.calls) >= db.MaxBatchSize {
   957  		// wake up batch, it's ready to run
   958  		go db.batch.trigger()
   959  	}
   960  	db.batchMu.Unlock()
   961  
   962  	err := <-errCh
   963  	if err == trySolo {
   964  		err = db.Update(fn)
   965  	}
   966  	return err
   967  }
   968  
   969  type call struct {
   970  	fn  func(*Tx) error
   971  	err chan<- error
   972  }
   973  
   974  type batch struct {
   975  	db    *DB
   976  	timer *time.Timer
   977  	start sync.Once
   978  	calls []call
   979  }
   980  
   981  // trigger runs the batch if it hasn't already been run.
   982  func (b *batch) trigger() {
   983  	b.start.Do(b.run)
   984  }
   985  
   986  // run performs the transactions in the batch and communicates results
   987  // back to DB.Batch.
   988  func (b *batch) run() {
   989  	b.db.batchMu.Lock()
   990  	b.timer.Stop()
   991  	// Make sure no new work is added to this batch, but don't break
   992  	// other batches.
   993  	if b.db.batch == b {
   994  		b.db.batch = nil
   995  	}
   996  	b.db.batchMu.Unlock()
   997  
   998  retry:
   999  	for len(b.calls) > 0 {
  1000  		var failIdx = -1
  1001  		err := b.db.Update(func(tx *Tx) error {
  1002  			for i, c := range b.calls {
  1003  				if err := safelyCall(c.fn, tx); err != nil {
  1004  					failIdx = i
  1005  					return err
  1006  				}
  1007  			}
  1008  			return nil
  1009  		})
  1010  
  1011  		if failIdx >= 0 {
  1012  			// take the failing transaction out of the batch. it's
  1013  			// safe to shorten b.calls here because db.batch no longer
  1014  			// points to us, and we hold the mutex anyway.
  1015  			c := b.calls[failIdx]
  1016  			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
  1017  			// tell the submitter re-run it solo, continue with the rest of the batch
  1018  			c.err <- trySolo
  1019  			continue retry
  1020  		}
  1021  
  1022  		// pass success, or bolt internal errors, to all callers
  1023  		for _, c := range b.calls {
  1024  			c.err <- err
  1025  		}
  1026  		break retry
  1027  	}
  1028  }
  1029  
  1030  // trySolo is a special sentinel error value used for signaling that a
  1031  // transaction function should be re-run. It should never be seen by
  1032  // callers.
  1033  var trySolo = errors.New("batch function returned an error and should be re-run solo")
  1034  
  1035  type panicked struct {
  1036  	reason interface{}
  1037  }
  1038  
  1039  func (p panicked) Error() string {
  1040  	if err, ok := p.reason.(error); ok {
  1041  		return err.Error()
  1042  	}
  1043  	return fmt.Sprintf("panic: %v", p.reason)
  1044  }
  1045  
  1046  func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
  1047  	defer func() {
  1048  		if p := recover(); p != nil {
  1049  			err = panicked{p}
  1050  		}
  1051  	}()
  1052  	return fn(tx)
  1053  }
  1054  
  1055  // Sync executes fdatasync() against the database file handle.
  1056  //
  1057  // This is not necessary under normal operation, however, if you use NoSync
  1058  // then it allows you to force the database file to sync against the disk.
  1059  func (db *DB) Sync() error { return fdatasync(db) }
  1060  
  1061  // Stats retrieves ongoing performance stats for the database.
  1062  // This is only updated when a transaction closes.
  1063  func (db *DB) Stats() Stats {
  1064  	db.statlock.RLock()
  1065  	defer db.statlock.RUnlock()
  1066  	return db.stats
  1067  }
  1068  
  1069  // This is for internal access to the raw data bytes from the C cursor, use
  1070  // carefully, or not at all.
  1071  func (db *DB) Info() *Info {
  1072  	_assert(db.data != nil, "database file isn't correctly mapped")
  1073  	return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
  1074  }
  1075  
  1076  // page retrieves a page reference from the mmap based on the current page size.
  1077  func (db *DB) page(id pgid) *page {
  1078  	pos := id * pgid(db.pageSize)
  1079  	return (*page)(unsafe.Pointer(&db.data[pos]))
  1080  }
  1081  
  1082  // pageInBuffer retrieves a page reference from a given byte array based on the current page size.
  1083  func (db *DB) pageInBuffer(b []byte, id pgid) *page {
  1084  	return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
  1085  }
  1086  
  1087  // meta retrieves the current meta page reference.
  1088  func (db *DB) meta() *meta {
  1089  	// We have to return the meta with the highest txid which doesn't fail
  1090  	// validation. Otherwise, we can cause errors when in fact the database is
  1091  	// in a consistent state. metaA is the one with the higher txid.
  1092  	metaA := db.meta0
  1093  	metaB := db.meta1
  1094  	if db.meta1.txid > db.meta0.txid {
  1095  		metaA = db.meta1
  1096  		metaB = db.meta0
  1097  	}
  1098  
  1099  	// Use higher meta page if valid. Otherwise, fallback to previous, if valid.
  1100  	if err := metaA.validate(); err == nil {
  1101  		return metaA
  1102  	} else if err := metaB.validate(); err == nil {
  1103  		return metaB
  1104  	}
  1105  
  1106  	// This should never be reached, because both meta1 and meta0 were validated
  1107  	// on mmap() and we do fsync() on every write.
  1108  	panic("bolt.DB.meta(): invalid meta pages")
  1109  }
  1110  
  1111  // allocate returns a contiguous block of memory starting at a given page.
  1112  func (db *DB) allocate(txid txid, count int) (*page, error) {
  1113  	// Allocate a temporary buffer for the page.
  1114  	var buf []byte
  1115  	if count == 1 {
  1116  		buf = db.pagePool.Get().([]byte)
  1117  	} else {
  1118  		buf = make([]byte, count*db.pageSize)
  1119  	}
  1120  	p := (*page)(unsafe.Pointer(&buf[0]))
  1121  	p.overflow = uint32(count - 1)
  1122  
  1123  	// Use pages from the freelist if they are available.
  1124  	if p.id = db.freelist.allocate(txid, count); p.id != 0 {
  1125  		return p, nil
  1126  	}
  1127  
  1128  	// Resize mmap() if we're at the end.
  1129  	p.id = db.rwtx.meta.pgid
  1130  	var minsz = int((p.id+pgid(count))+1) * db.pageSize
  1131  	if minsz >= db.datasz {
  1132  		if err := db.mmap(minsz); err != nil {
  1133  			return nil, fmt.Errorf("mmap allocate error: %s", err)
  1134  		}
  1135  	}
  1136  
  1137  	// Move the page id high water mark.
  1138  	db.rwtx.meta.pgid += pgid(count)
  1139  
  1140  	return p, nil
  1141  }
  1142  
  1143  // grow grows the size of the database to the given sz.
  1144  func (db *DB) grow(sz int) error {
  1145  	// Ignore if the new size is less than available file size.
  1146  	if sz <= db.filesz {
  1147  		return nil
  1148  	}
  1149  
  1150  	// If the data is smaller than the alloc size then only allocate what's needed.
  1151  	// Once it goes over the allocation size then allocate in chunks.
  1152  	if db.datasz <= db.AllocSize {
  1153  		sz = db.datasz
  1154  	} else {
  1155  		sz += db.AllocSize
  1156  	}
  1157  
  1158  	// Truncate and fsync to ensure file size metadata is flushed.
  1159  	// https://github.com/boltdb/bolt/issues/284
  1160  	if !db.NoGrowSync && !db.readOnly {
  1161  		if runtime.GOOS != "windows" {
  1162  			if err := db.file.Truncate(int64(sz)); err != nil {
  1163  				return fmt.Errorf("file resize error: %s", err)
  1164  			}
  1165  		}
  1166  		if err := db.file.Sync(); err != nil {
  1167  			return fmt.Errorf("file sync error: %s", err)
  1168  		}
  1169  		if db.Mlock {
  1170  			// unlock old file and lock new one
  1171  			if err := db.mrelock(db.filesz, sz); err != nil {
  1172  				return fmt.Errorf("mlock/munlock error: %s", err)
  1173  			}
  1174  		}
  1175  	}
  1176  
  1177  	db.filesz = sz
  1178  	return nil
  1179  }
  1180  
  1181  func (db *DB) IsReadOnly() bool {
  1182  	return db.readOnly
  1183  }
  1184  
  1185  func (db *DB) freepages() []pgid {
  1186  	tx, err := db.beginTx()
  1187  	defer func() {
  1188  		err = tx.Rollback()
  1189  		if err != nil {
  1190  			panic("freepages: failed to rollback tx")
  1191  		}
  1192  	}()
  1193  	if err != nil {
  1194  		panic("freepages: failed to open read only tx")
  1195  	}
  1196  
  1197  	reachable := make(map[pgid]*page)
  1198  	nofreed := make(map[pgid]bool)
  1199  	ech := make(chan error)
  1200  	go func() {
  1201  		for e := range ech {
  1202  			panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e))
  1203  		}
  1204  	}()
  1205  	tx.checkBucket(&tx.root, reachable, nofreed, HexKVStringer(), ech)
  1206  	close(ech)
  1207  
  1208  	// TODO: If check bucket reported any corruptions (ech) we shouldn't proceed to freeing the pages.
  1209  
  1210  	var fids []pgid
  1211  	for i := pgid(2); i < db.meta().pgid; i++ {
  1212  		if _, ok := reachable[i]; !ok {
  1213  			fids = append(fids, i)
  1214  		}
  1215  	}
  1216  	return fids
  1217  }
  1218  
  1219  // Options represents the options that can be set when opening a database.
  1220  type Options struct {
  1221  	// Timeout is the amount of time to wait to obtain a file lock.
  1222  	// When set to zero it will wait indefinitely. This option is only
  1223  	// available on Darwin and Linux.
  1224  	Timeout time.Duration
  1225  
  1226  	// Sets the DB.NoGrowSync flag before memory mapping the file.
  1227  	NoGrowSync bool
  1228  
  1229  	// Do not sync freelist to disk. This improves the database write performance
  1230  	// under normal operation, but requires a full database re-sync during recovery.
  1231  	NoFreelistSync bool
  1232  
  1233  	// PreLoadFreelist sets whether to load the free pages when opening
  1234  	// the db file. Note when opening db in write mode, bbolt will always
  1235  	// load the free pages.
  1236  	PreLoadFreelist bool
  1237  
  1238  	// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
  1239  	// dramatic performance degradation if database is large and fragmentation in freelist is common.
  1240  	// The alternative one is using hashmap, it is faster in almost all circumstances
  1241  	// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
  1242  	// The default type is array
  1243  	FreelistType FreelistType
  1244  
  1245  	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
  1246  	// grab a shared lock (UNIX).
  1247  	ReadOnly bool
  1248  
  1249  	// Sets the DB.MmapFlags flag before memory mapping the file.
  1250  	MmapFlags int
  1251  
  1252  	// InitialMmapSize is the initial mmap size of the database
  1253  	// in bytes. Read transactions won't block write transaction
  1254  	// if the InitialMmapSize is large enough to hold database mmap
  1255  	// size. (See DB.Begin for more information)
  1256  	//
  1257  	// If <=0, the initial map size is 0.
  1258  	// If initialMmapSize is smaller than the previous database size,
  1259  	// it takes no effect.
  1260  	InitialMmapSize int
  1261  
  1262  	// PageSize overrides the default OS page size.
  1263  	PageSize int
  1264  
  1265  	// NoSync sets the initial value of DB.NoSync. Normally this can just be
  1266  	// set directly on the DB itself when returned from Open(), but this option
  1267  	// is useful in APIs which expose Options but not the underlying DB.
  1268  	NoSync bool
  1269  
  1270  	// OpenFile is used to open files. It defaults to os.OpenFile. This option
  1271  	// is useful for writing hermetic tests.
  1272  	OpenFile func(string, int, os.FileMode) (*os.File, error)
  1273  
  1274  	// Mlock locks database file in memory when set to true.
  1275  	// It prevents potential page faults, however
  1276  	// used memory can't be reclaimed. (UNIX only)
  1277  	Mlock bool
  1278  }
  1279  
  1280  // DefaultOptions represent the options used if nil options are passed into Open().
  1281  // No timeout is used which will cause Bolt to wait indefinitely for a lock.
  1282  var DefaultOptions = &Options{
  1283  	Timeout:      0,
  1284  	NoGrowSync:   false,
  1285  	FreelistType: FreelistArrayType,
  1286  }
  1287  
  1288  // Stats represents statistics about the database.
  1289  type Stats struct {
  1290  	// Put `TxStats` at the first field to ensure it's 64-bit aligned. Note
  1291  	// that the first word in an allocated struct can be relied upon to be
  1292  	// 64-bit aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG.
  1293  	// Also refer to discussion in https://github.com/etcd-io/bbolt/issues/577.
  1294  	TxStats TxStats // global, ongoing stats.
  1295  
  1296  	// Freelist stats
  1297  	FreePageN     int // total number of free pages on the freelist
  1298  	PendingPageN  int // total number of pending pages on the freelist
  1299  	FreeAlloc     int // total bytes allocated in free pages
  1300  	FreelistInuse int // total bytes used by the freelist
  1301  
  1302  	// Transaction stats
  1303  	TxN     int // total number of started read transactions
  1304  	OpenTxN int // number of currently open read transactions
  1305  }
  1306  
  1307  // Sub calculates and returns the difference between two sets of database stats.
  1308  // This is useful when obtaining stats at two different points and time and
  1309  // you need the performance counters that occurred within that time span.
  1310  func (s *Stats) Sub(other *Stats) Stats {
  1311  	if other == nil {
  1312  		return *s
  1313  	}
  1314  	var diff Stats
  1315  	diff.FreePageN = s.FreePageN
  1316  	diff.PendingPageN = s.PendingPageN
  1317  	diff.FreeAlloc = s.FreeAlloc
  1318  	diff.FreelistInuse = s.FreelistInuse
  1319  	diff.TxN = s.TxN - other.TxN
  1320  	diff.TxStats = s.TxStats.Sub(&other.TxStats)
  1321  	return diff
  1322  }
  1323  
  1324  type Info struct {
  1325  	Data     uintptr
  1326  	PageSize int
  1327  }
  1328  
  1329  type meta struct {
  1330  	magic    uint32
  1331  	version  uint32
  1332  	pageSize uint32
  1333  	flags    uint32
  1334  	root     bucket
  1335  	freelist pgid
  1336  	pgid     pgid
  1337  	txid     txid
  1338  	checksum uint64
  1339  }
  1340  
  1341  // validate checks the marker bytes and version of the meta page to ensure it matches this binary.
  1342  func (m *meta) validate() error {
  1343  	if m.magic != magic {
  1344  		return ErrInvalid
  1345  	} else if m.version != version {
  1346  		return ErrVersionMismatch
  1347  	} else if m.checksum != m.sum64() {
  1348  		return ErrChecksum
  1349  	}
  1350  	return nil
  1351  }
  1352  
  1353  // copy copies one meta object to another.
  1354  func (m *meta) copy(dest *meta) {
  1355  	*dest = *m
  1356  }
  1357  
  1358  // write writes the meta onto a page.
  1359  func (m *meta) write(p *page) {
  1360  	if m.root.root >= m.pgid {
  1361  		panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
  1362  	} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
  1363  		// TODO: reject pgidNoFreeList if !NoFreelistSync
  1364  		panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
  1365  	}
  1366  
  1367  	// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
  1368  	p.id = pgid(m.txid % 2)
  1369  	p.flags |= metaPageFlag
  1370  
  1371  	// Calculate the checksum.
  1372  	m.checksum = m.sum64()
  1373  
  1374  	m.copy(p.meta())
  1375  }
  1376  
  1377  // generates the checksum for the meta.
  1378  func (m *meta) sum64() uint64 {
  1379  	var h = fnv.New64a()
  1380  	_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
  1381  	return h.Sum64()
  1382  }
  1383  
  1384  // _assert will panic with a given formatted message if the given condition is false.
  1385  func _assert(condition bool, msg string, v ...interface{}) {
  1386  	if !condition {
  1387  		panic(fmt.Sprintf("assertion failed: "+msg, v...))
  1388  	}
  1389  }
  1390  

View as plain text