...

Source file src/go.etcd.io/bbolt/freelist.go

Documentation: go.etcd.io/bbolt

     1  package bbolt
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"unsafe"
     7  )
     8  
     9  // txPending holds a list of pgids and corresponding allocation txns
    10  // that are pending to be freed.
    11  type txPending struct {
    12  	ids              []pgid
    13  	alloctx          []txid // txids allocating the ids
    14  	lastReleaseBegin txid   // beginning txid of last matching releaseRange
    15  }
    16  
    17  // pidSet holds the set of starting pgids which have the same span size
    18  type pidSet map[pgid]struct{}
    19  
    20  // freelist represents a list of all pages that are available for allocation.
    21  // It also tracks pages that have been freed but are still in use by open transactions.
    22  type freelist struct {
    23  	freelistType   FreelistType                // freelist type
    24  	ids            []pgid                      // all free and available free page ids.
    25  	allocs         map[pgid]txid               // mapping of txid that allocated a pgid.
    26  	pending        map[txid]*txPending         // mapping of soon-to-be free page ids by tx.
    27  	cache          map[pgid]struct{}           // fast lookup of all free and pending page ids.
    28  	freemaps       map[uint64]pidSet           // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
    29  	forwardMap     map[pgid]uint64             // key is start pgid, value is its span size
    30  	backwardMap    map[pgid]uint64             // key is end pgid, value is its span size
    31  	allocate       func(txid txid, n int) pgid // the freelist allocate func
    32  	free_count     func() int                  // the function which gives you free page number
    33  	mergeSpans     func(ids pgids)             // the mergeSpan func
    34  	getFreePageIDs func() []pgid               // get free pgids func
    35  	readIDs        func(pgids []pgid)          // readIDs func reads list of pages and init the freelist
    36  }
    37  
    38  // newFreelist returns an empty, initialized freelist.
    39  func newFreelist(freelistType FreelistType) *freelist {
    40  	f := &freelist{
    41  		freelistType: freelistType,
    42  		allocs:       make(map[pgid]txid),
    43  		pending:      make(map[txid]*txPending),
    44  		cache:        make(map[pgid]struct{}),
    45  		freemaps:     make(map[uint64]pidSet),
    46  		forwardMap:   make(map[pgid]uint64),
    47  		backwardMap:  make(map[pgid]uint64),
    48  	}
    49  
    50  	if freelistType == FreelistMapType {
    51  		f.allocate = f.hashmapAllocate
    52  		f.free_count = f.hashmapFreeCount
    53  		f.mergeSpans = f.hashmapMergeSpans
    54  		f.getFreePageIDs = f.hashmapGetFreePageIDs
    55  		f.readIDs = f.hashmapReadIDs
    56  	} else {
    57  		f.allocate = f.arrayAllocate
    58  		f.free_count = f.arrayFreeCount
    59  		f.mergeSpans = f.arrayMergeSpans
    60  		f.getFreePageIDs = f.arrayGetFreePageIDs
    61  		f.readIDs = f.arrayReadIDs
    62  	}
    63  
    64  	return f
    65  }
    66  
    67  // size returns the size of the page after serialization.
    68  func (f *freelist) size() int {
    69  	n := f.count()
    70  	if n >= 0xFFFF {
    71  		// The first element will be used to store the count. See freelist.write.
    72  		n++
    73  	}
    74  	return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n)
    75  }
    76  
    77  // count returns count of pages on the freelist
    78  func (f *freelist) count() int {
    79  	return f.free_count() + f.pending_count()
    80  }
    81  
    82  // arrayFreeCount returns count of free pages(array version)
    83  func (f *freelist) arrayFreeCount() int {
    84  	return len(f.ids)
    85  }
    86  
    87  // pending_count returns count of pending pages
    88  func (f *freelist) pending_count() int {
    89  	var count int
    90  	for _, txp := range f.pending {
    91  		count += len(txp.ids)
    92  	}
    93  	return count
    94  }
    95  
    96  // copyall copies a list of all free ids and all pending ids in one sorted list.
    97  // f.count returns the minimum length required for dst.
    98  func (f *freelist) copyall(dst []pgid) {
    99  	m := make(pgids, 0, f.pending_count())
   100  	for _, txp := range f.pending {
   101  		m = append(m, txp.ids...)
   102  	}
   103  	sort.Sort(m)
   104  	mergepgids(dst, f.getFreePageIDs(), m)
   105  }
   106  
   107  // arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
   108  // If a contiguous block cannot be found then 0 is returned.
   109  func (f *freelist) arrayAllocate(txid txid, n int) pgid {
   110  	if len(f.ids) == 0 {
   111  		return 0
   112  	}
   113  
   114  	var initial, previd pgid
   115  	for i, id := range f.ids {
   116  		if id <= 1 {
   117  			panic(fmt.Sprintf("invalid page allocation: %d", id))
   118  		}
   119  
   120  		// Reset initial page if this is not contiguous.
   121  		if previd == 0 || id-previd != 1 {
   122  			initial = id
   123  		}
   124  
   125  		// If we found a contiguous block then remove it and return it.
   126  		if (id-initial)+1 == pgid(n) {
   127  			// If we're allocating off the beginning then take the fast path
   128  			// and just adjust the existing slice. This will use extra memory
   129  			// temporarily but the append() in free() will realloc the slice
   130  			// as is necessary.
   131  			if (i + 1) == n {
   132  				f.ids = f.ids[i+1:]
   133  			} else {
   134  				copy(f.ids[i-n+1:], f.ids[i+1:])
   135  				f.ids = f.ids[:len(f.ids)-n]
   136  			}
   137  
   138  			// Remove from the free cache.
   139  			for i := pgid(0); i < pgid(n); i++ {
   140  				delete(f.cache, initial+i)
   141  			}
   142  			f.allocs[initial] = txid
   143  			return initial
   144  		}
   145  
   146  		previd = id
   147  	}
   148  	return 0
   149  }
   150  
   151  // free releases a page and its overflow for a given transaction id.
   152  // If the page is already free then a panic will occur.
   153  func (f *freelist) free(txid txid, p *page) {
   154  	if p.id <= 1 {
   155  		panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
   156  	}
   157  
   158  	// Free page and all its overflow pages.
   159  	txp := f.pending[txid]
   160  	if txp == nil {
   161  		txp = &txPending{}
   162  		f.pending[txid] = txp
   163  	}
   164  	allocTxid, ok := f.allocs[p.id]
   165  	if ok {
   166  		delete(f.allocs, p.id)
   167  	} else if (p.flags & freelistPageFlag) != 0 {
   168  		// Freelist is always allocated by prior tx.
   169  		allocTxid = txid - 1
   170  	}
   171  
   172  	for id := p.id; id <= p.id+pgid(p.overflow); id++ {
   173  		// Verify that page is not already free.
   174  		if _, ok := f.cache[id]; ok {
   175  			panic(fmt.Sprintf("page %d already freed", id))
   176  		}
   177  		// Add to the freelist and cache.
   178  		txp.ids = append(txp.ids, id)
   179  		txp.alloctx = append(txp.alloctx, allocTxid)
   180  		f.cache[id] = struct{}{}
   181  	}
   182  }
   183  
   184  // release moves all page ids for a transaction id (or older) to the freelist.
   185  func (f *freelist) release(txid txid) {
   186  	m := make(pgids, 0)
   187  	for tid, txp := range f.pending {
   188  		if tid <= txid {
   189  			// Move transaction's pending pages to the available freelist.
   190  			// Don't remove from the cache since the page is still free.
   191  			m = append(m, txp.ids...)
   192  			delete(f.pending, tid)
   193  		}
   194  	}
   195  	f.mergeSpans(m)
   196  }
   197  
   198  // releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
   199  func (f *freelist) releaseRange(begin, end txid) {
   200  	if begin > end {
   201  		return
   202  	}
   203  	var m pgids
   204  	for tid, txp := range f.pending {
   205  		if tid < begin || tid > end {
   206  			continue
   207  		}
   208  		// Don't recompute freed pages if ranges haven't updated.
   209  		if txp.lastReleaseBegin == begin {
   210  			continue
   211  		}
   212  		for i := 0; i < len(txp.ids); i++ {
   213  			if atx := txp.alloctx[i]; atx < begin || atx > end {
   214  				continue
   215  			}
   216  			m = append(m, txp.ids[i])
   217  			txp.ids[i] = txp.ids[len(txp.ids)-1]
   218  			txp.ids = txp.ids[:len(txp.ids)-1]
   219  			txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
   220  			txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
   221  			i--
   222  		}
   223  		txp.lastReleaseBegin = begin
   224  		if len(txp.ids) == 0 {
   225  			delete(f.pending, tid)
   226  		}
   227  	}
   228  	f.mergeSpans(m)
   229  }
   230  
   231  // rollback removes the pages from a given pending tx.
   232  func (f *freelist) rollback(txid txid) {
   233  	// Remove page ids from cache.
   234  	txp := f.pending[txid]
   235  	if txp == nil {
   236  		return
   237  	}
   238  	var m pgids
   239  	for i, pgid := range txp.ids {
   240  		delete(f.cache, pgid)
   241  		tx := txp.alloctx[i]
   242  		if tx == 0 {
   243  			continue
   244  		}
   245  		if tx != txid {
   246  			// Pending free aborted; restore page back to alloc list.
   247  			f.allocs[pgid] = tx
   248  		} else {
   249  			// Freed page was allocated by this txn; OK to throw away.
   250  			m = append(m, pgid)
   251  		}
   252  	}
   253  	// Remove pages from pending list and mark as free if allocated by txid.
   254  	delete(f.pending, txid)
   255  	f.mergeSpans(m)
   256  }
   257  
   258  // freed returns whether a given page is in the free list.
   259  func (f *freelist) freed(pgId pgid) bool {
   260  	_, ok := f.cache[pgId]
   261  	return ok
   262  }
   263  
   264  // read initializes the freelist from a freelist page.
   265  func (f *freelist) read(p *page) {
   266  	if (p.flags & freelistPageFlag) == 0 {
   267  		panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
   268  	}
   269  	// If the page.count is at the max uint16 value (64k) then it's considered
   270  	// an overflow and the size of the freelist is stored as the first element.
   271  	var idx, count = 0, int(p.count)
   272  	if count == 0xFFFF {
   273  		idx = 1
   274  		c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
   275  		count = int(c)
   276  		if count < 0 {
   277  			panic(fmt.Sprintf("leading element count %d overflows int", c))
   278  		}
   279  	}
   280  
   281  	// Copy the list of page ids from the freelist.
   282  	if count == 0 {
   283  		f.ids = nil
   284  	} else {
   285  		var ids []pgid
   286  		data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx)
   287  		unsafeSlice(unsafe.Pointer(&ids), data, count)
   288  
   289  		// copy the ids, so we don't modify on the freelist page directly
   290  		idsCopy := make([]pgid, count)
   291  		copy(idsCopy, ids)
   292  		// Make sure they're sorted.
   293  		sort.Sort(pgids(idsCopy))
   294  
   295  		f.readIDs(idsCopy)
   296  	}
   297  }
   298  
   299  // arrayReadIDs initializes the freelist from a given list of ids.
   300  func (f *freelist) arrayReadIDs(ids []pgid) {
   301  	f.ids = ids
   302  	f.reindex()
   303  }
   304  
   305  func (f *freelist) arrayGetFreePageIDs() []pgid {
   306  	return f.ids
   307  }
   308  
   309  // write writes the page ids onto a freelist page. All free and pending ids are
   310  // saved to disk since in the event of a program crash, all pending ids will
   311  // become free.
   312  func (f *freelist) write(p *page) error {
   313  	// Combine the old free pgids and pgids waiting on an open transaction.
   314  
   315  	// Update the header flag.
   316  	p.flags |= freelistPageFlag
   317  
   318  	// The page.count can only hold up to 64k elements so if we overflow that
   319  	// number then we handle it by putting the size in the first element.
   320  	l := f.count()
   321  	if l == 0 {
   322  		p.count = uint16(l)
   323  	} else if l < 0xFFFF {
   324  		p.count = uint16(l)
   325  		var ids []pgid
   326  		data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
   327  		unsafeSlice(unsafe.Pointer(&ids), data, l)
   328  		f.copyall(ids)
   329  	} else {
   330  		p.count = 0xFFFF
   331  		var ids []pgid
   332  		data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
   333  		unsafeSlice(unsafe.Pointer(&ids), data, l+1)
   334  		ids[0] = pgid(l)
   335  		f.copyall(ids[1:])
   336  	}
   337  
   338  	return nil
   339  }
   340  
   341  // reload reads the freelist from a page and filters out pending items.
   342  func (f *freelist) reload(p *page) {
   343  	f.read(p)
   344  
   345  	// Build a cache of only pending pages.
   346  	pcache := make(map[pgid]bool)
   347  	for _, txp := range f.pending {
   348  		for _, pendingID := range txp.ids {
   349  			pcache[pendingID] = true
   350  		}
   351  	}
   352  
   353  	// Check each page in the freelist and build a new available freelist
   354  	// with any pages not in the pending lists.
   355  	var a []pgid
   356  	for _, id := range f.getFreePageIDs() {
   357  		if !pcache[id] {
   358  			a = append(a, id)
   359  		}
   360  	}
   361  
   362  	f.readIDs(a)
   363  }
   364  
   365  // noSyncReload reads the freelist from pgids and filters out pending items.
   366  func (f *freelist) noSyncReload(pgids []pgid) {
   367  	// Build a cache of only pending pages.
   368  	pcache := make(map[pgid]bool)
   369  	for _, txp := range f.pending {
   370  		for _, pendingID := range txp.ids {
   371  			pcache[pendingID] = true
   372  		}
   373  	}
   374  
   375  	// Check each page in the freelist and build a new available freelist
   376  	// with any pages not in the pending lists.
   377  	var a []pgid
   378  	for _, id := range pgids {
   379  		if !pcache[id] {
   380  			a = append(a, id)
   381  		}
   382  	}
   383  
   384  	f.readIDs(a)
   385  }
   386  
   387  // reindex rebuilds the free cache based on available and pending free lists.
   388  func (f *freelist) reindex() {
   389  	ids := f.getFreePageIDs()
   390  	f.cache = make(map[pgid]struct{}, len(ids))
   391  	for _, id := range ids {
   392  		f.cache[id] = struct{}{}
   393  	}
   394  	for _, txp := range f.pending {
   395  		for _, pendingID := range txp.ids {
   396  			f.cache[pendingID] = struct{}{}
   397  		}
   398  	}
   399  }
   400  
   401  // arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
   402  func (f *freelist) arrayMergeSpans(ids pgids) {
   403  	sort.Sort(ids)
   404  	f.ids = pgids(f.ids).merge(ids)
   405  }
   406  

View as plain text