1 package bbolt
2
3 import (
4 "bytes"
5 "fmt"
6 "unsafe"
7 )
8
9 const (
10
11 MaxKeySize = 32768
12
13
14 MaxValueSize = (1 << 31) - 2
15 )
16
17 const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
18
19 const (
20 minFillPercent = 0.1
21 maxFillPercent = 1.0
22 )
23
24
25
26 const DefaultFillPercent = 0.5
27
28
29 type Bucket struct {
30 *bucket
31 tx *Tx
32 buckets map[string]*Bucket
33 page *page
34 rootNode *node
35 nodes map[pgid]*node
36
37
38
39
40
41
42 FillPercent float64
43 }
44
45
46
47
48
49 type bucket struct {
50 root pgid
51 sequence uint64
52 }
53
54
55 func newBucket(tx *Tx) Bucket {
56 var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
57 if tx.writable {
58 b.buckets = make(map[string]*Bucket)
59 b.nodes = make(map[pgid]*node)
60 }
61 return b
62 }
63
64
65 func (b *Bucket) Tx() *Tx {
66 return b.tx
67 }
68
69
70 func (b *Bucket) Root() pgid {
71 return b.root
72 }
73
74
75 func (b *Bucket) Writable() bool {
76 return b.tx.writable
77 }
78
79
80
81
82 func (b *Bucket) Cursor() *Cursor {
83
84 b.tx.stats.IncCursorCount(1)
85
86
87 return &Cursor{
88 bucket: b,
89 stack: make([]elemRef, 0),
90 }
91 }
92
93
94
95
96 func (b *Bucket) Bucket(name []byte) *Bucket {
97 if b.buckets != nil {
98 if child := b.buckets[string(name)]; child != nil {
99 return child
100 }
101 }
102
103
104 c := b.Cursor()
105 k, v, flags := c.seek(name)
106
107
108 if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
109 return nil
110 }
111
112
113 var child = b.openBucket(v)
114 if b.buckets != nil {
115 b.buckets[string(name)] = child
116 }
117
118 return child
119 }
120
121
122
123 func (b *Bucket) openBucket(value []byte) *Bucket {
124 var child = newBucket(b.tx)
125
126
127 const unalignedMask = unsafe.Alignof(struct {
128 bucket
129 page
130 }{}) - 1
131 unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0
132 if unaligned {
133 value = cloneBytes(value)
134 }
135
136
137
138 if b.tx.writable && !unaligned {
139 child.bucket = &bucket{}
140 *child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
141 } else {
142 child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
143 }
144
145
146 if child.root == 0 {
147 child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
148 }
149
150 return &child
151 }
152
153
154
155
156 func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
157 if b.tx.db == nil {
158 return nil, ErrTxClosed
159 } else if !b.tx.writable {
160 return nil, ErrTxNotWritable
161 } else if len(key) == 0 {
162 return nil, ErrBucketNameRequired
163 }
164
165
166
167
168 newKey := cloneBytes(key)
169
170
171 c := b.Cursor()
172 k, _, flags := c.seek(newKey)
173
174
175 if bytes.Equal(newKey, k) {
176 if (flags & bucketLeafFlag) != 0 {
177 return nil, ErrBucketExists
178 }
179 return nil, ErrIncompatibleValue
180 }
181
182
183 var bucket = Bucket{
184 bucket: &bucket{},
185 rootNode: &node{isLeaf: true},
186 FillPercent: DefaultFillPercent,
187 }
188 var value = bucket.write()
189
190 c.node().put(newKey, newKey, value, 0, bucketLeafFlag)
191
192
193
194
195 b.page = nil
196
197 return b.Bucket(newKey), nil
198 }
199
200
201
202
203 func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
204 child, err := b.CreateBucket(key)
205 if err == ErrBucketExists {
206 return b.Bucket(key), nil
207 } else if err != nil {
208 return nil, err
209 }
210 return child, nil
211 }
212
213
214
215 func (b *Bucket) DeleteBucket(key []byte) error {
216 if b.tx.db == nil {
217 return ErrTxClosed
218 } else if !b.Writable() {
219 return ErrTxNotWritable
220 }
221
222
223 c := b.Cursor()
224 k, _, flags := c.seek(key)
225
226
227 if !bytes.Equal(key, k) {
228 return ErrBucketNotFound
229 } else if (flags & bucketLeafFlag) == 0 {
230 return ErrIncompatibleValue
231 }
232
233
234 child := b.Bucket(key)
235 err := child.ForEachBucket(func(k []byte) error {
236 if err := child.DeleteBucket(k); err != nil {
237 return fmt.Errorf("delete bucket: %s", err)
238 }
239 return nil
240 })
241 if err != nil {
242 return err
243 }
244
245
246 delete(b.buckets, string(key))
247
248
249 child.nodes = nil
250 child.rootNode = nil
251 child.free()
252
253
254 c.node().del(key)
255
256 return nil
257 }
258
259
260
261
262 func (b *Bucket) Get(key []byte) []byte {
263 k, v, flags := b.Cursor().seek(key)
264
265
266 if (flags & bucketLeafFlag) != 0 {
267 return nil
268 }
269
270
271 if !bytes.Equal(key, k) {
272 return nil
273 }
274 return v
275 }
276
277
278
279
280
281 func (b *Bucket) Put(key []byte, value []byte) error {
282 if b.tx.db == nil {
283 return ErrTxClosed
284 } else if !b.Writable() {
285 return ErrTxNotWritable
286 } else if len(key) == 0 {
287 return ErrKeyRequired
288 } else if len(key) > MaxKeySize {
289 return ErrKeyTooLarge
290 } else if int64(len(value)) > MaxValueSize {
291 return ErrValueTooLarge
292 }
293
294
295
296
297 newKey := cloneBytes(key)
298
299
300 c := b.Cursor()
301 k, _, flags := c.seek(newKey)
302
303
304 if bytes.Equal(newKey, k) && (flags&bucketLeafFlag) != 0 {
305 return ErrIncompatibleValue
306 }
307
308
309
310 c.node().put(newKey, newKey, value, 0, 0)
311
312 return nil
313 }
314
315
316
317
318 func (b *Bucket) Delete(key []byte) error {
319 if b.tx.db == nil {
320 return ErrTxClosed
321 } else if !b.Writable() {
322 return ErrTxNotWritable
323 }
324
325
326 c := b.Cursor()
327 k, _, flags := c.seek(key)
328
329
330 if !bytes.Equal(key, k) {
331 return nil
332 }
333
334
335 if (flags & bucketLeafFlag) != 0 {
336 return ErrIncompatibleValue
337 }
338
339
340 c.node().del(key)
341
342 return nil
343 }
344
345
346 func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
347
348
349 func (b *Bucket) SetSequence(v uint64) error {
350 if b.tx.db == nil {
351 return ErrTxClosed
352 } else if !b.Writable() {
353 return ErrTxNotWritable
354 }
355
356
357
358 if b.rootNode == nil {
359 _ = b.node(b.root, nil)
360 }
361
362
363 b.bucket.sequence = v
364 return nil
365 }
366
367
368 func (b *Bucket) NextSequence() (uint64, error) {
369 if b.tx.db == nil {
370 return 0, ErrTxClosed
371 } else if !b.Writable() {
372 return 0, ErrTxNotWritable
373 }
374
375
376
377 if b.rootNode == nil {
378 _ = b.node(b.root, nil)
379 }
380
381
382 b.bucket.sequence++
383 return b.bucket.sequence, nil
384 }
385
386
387
388
389
390
391 func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
392 if b.tx.db == nil {
393 return ErrTxClosed
394 }
395 c := b.Cursor()
396 for k, v := c.First(); k != nil; k, v = c.Next() {
397 if err := fn(k, v); err != nil {
398 return err
399 }
400 }
401 return nil
402 }
403
404 func (b *Bucket) ForEachBucket(fn func(k []byte) error) error {
405 if b.tx.db == nil {
406 return ErrTxClosed
407 }
408 c := b.Cursor()
409 for k, _, flags := c.first(); k != nil; k, _, flags = c.next() {
410 if flags&bucketLeafFlag != 0 {
411 if err := fn(k); err != nil {
412 return err
413 }
414 }
415 }
416 return nil
417 }
418
419
420 func (b *Bucket) Stats() BucketStats {
421 var s, subStats BucketStats
422 pageSize := b.tx.db.pageSize
423 s.BucketN += 1
424 if b.root == 0 {
425 s.InlineBucketN += 1
426 }
427 b.forEachPage(func(p *page, depth int, pgstack []pgid) {
428 if (p.flags & leafPageFlag) != 0 {
429 s.KeyN += int(p.count)
430
431
432 used := pageHeaderSize
433
434 if p.count != 0 {
435
436 used += leafPageElementSize * uintptr(p.count-1)
437
438
439
440
441
442
443 lastElement := p.leafPageElement(p.count - 1)
444 used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize)
445 }
446
447 if b.root == 0 {
448
449 s.InlineBucketInuse += int(used)
450 } else {
451
452 s.LeafPageN++
453 s.LeafInuse += int(used)
454 s.LeafOverflowN += int(p.overflow)
455
456
457
458
459 for i := uint16(0); i < p.count; i++ {
460 e := p.leafPageElement(i)
461 if (e.flags & bucketLeafFlag) != 0 {
462
463
464 subStats.Add(b.openBucket(e.value()).Stats())
465 }
466 }
467 }
468 } else if (p.flags & branchPageFlag) != 0 {
469 s.BranchPageN++
470 lastElement := p.branchPageElement(p.count - 1)
471
472
473
474 used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1))
475
476
477
478
479 used += uintptr(lastElement.pos + lastElement.ksize)
480 s.BranchInuse += int(used)
481 s.BranchOverflowN += int(p.overflow)
482 }
483
484
485 if depth+1 > s.Depth {
486 s.Depth = depth + 1
487 }
488 })
489
490
491 s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
492 s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
493
494
495 s.Depth += subStats.Depth
496
497 s.Add(subStats)
498 return s
499 }
500
501
502 func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) {
503
504 if b.page != nil {
505 fn(b.page, 0, []pgid{b.root})
506 return
507 }
508
509
510 b.tx.forEachPage(b.root, fn)
511 }
512
513
514
515 func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
516
517 if b.page != nil {
518 fn(b.page, nil, 0)
519 return
520 }
521 b._forEachPageNode(b.root, 0, fn)
522 }
523
524 func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) {
525 var p, n = b.pageNode(pgId)
526
527
528 fn(p, n, depth)
529
530
531 if p != nil {
532 if (p.flags & branchPageFlag) != 0 {
533 for i := 0; i < int(p.count); i++ {
534 elem := p.branchPageElement(uint16(i))
535 b._forEachPageNode(elem.pgid, depth+1, fn)
536 }
537 }
538 } else {
539 if !n.isLeaf {
540 for _, inode := range n.inodes {
541 b._forEachPageNode(inode.pgid, depth+1, fn)
542 }
543 }
544 }
545 }
546
547
548 func (b *Bucket) spill() error {
549
550 for name, child := range b.buckets {
551
552
553
554 var value []byte
555 if child.inlineable() {
556 child.free()
557 value = child.write()
558 } else {
559 if err := child.spill(); err != nil {
560 return err
561 }
562
563
564 value = make([]byte, unsafe.Sizeof(bucket{}))
565 var bucket = (*bucket)(unsafe.Pointer(&value[0]))
566 *bucket = *child.bucket
567 }
568
569
570 if child.rootNode == nil {
571 continue
572 }
573
574
575 var c = b.Cursor()
576 k, _, flags := c.seek([]byte(name))
577 if !bytes.Equal([]byte(name), k) {
578 panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
579 }
580 if flags&bucketLeafFlag == 0 {
581 panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
582 }
583 c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
584 }
585
586
587 if b.rootNode == nil {
588 return nil
589 }
590
591
592 if err := b.rootNode.spill(); err != nil {
593 return err
594 }
595 b.rootNode = b.rootNode.root()
596
597
598 if b.rootNode.pgid >= b.tx.meta.pgid {
599 panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
600 }
601 b.root = b.rootNode.pgid
602
603 return nil
604 }
605
606
607
608 func (b *Bucket) inlineable() bool {
609 var n = b.rootNode
610
611
612 if n == nil || !n.isLeaf {
613 return false
614 }
615
616
617
618 var size = pageHeaderSize
619 for _, inode := range n.inodes {
620 size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
621
622 if inode.flags&bucketLeafFlag != 0 {
623 return false
624 } else if size > b.maxInlineBucketSize() {
625 return false
626 }
627 }
628
629 return true
630 }
631
632
633 func (b *Bucket) maxInlineBucketSize() uintptr {
634 return uintptr(b.tx.db.pageSize / 4)
635 }
636
637
638 func (b *Bucket) write() []byte {
639
640 var n = b.rootNode
641 var value = make([]byte, bucketHeaderSize+n.size())
642
643
644 var bucket = (*bucket)(unsafe.Pointer(&value[0]))
645 *bucket = *b.bucket
646
647
648 var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
649 n.write(p)
650
651 return value
652 }
653
654
655 func (b *Bucket) rebalance() {
656 for _, n := range b.nodes {
657 n.rebalance()
658 }
659 for _, child := range b.buckets {
660 child.rebalance()
661 }
662 }
663
664
665 func (b *Bucket) node(pgId pgid, parent *node) *node {
666 _assert(b.nodes != nil, "nodes map expected")
667
668
669 if n := b.nodes[pgId]; n != nil {
670 return n
671 }
672
673
674 n := &node{bucket: b, parent: parent}
675 if parent == nil {
676 b.rootNode = n
677 } else {
678 parent.children = append(parent.children, n)
679 }
680
681
682 var p = b.page
683 if p == nil {
684 p = b.tx.page(pgId)
685 }
686
687
688 n.read(p)
689 b.nodes[pgId] = n
690
691
692 b.tx.stats.IncNodeCount(1)
693
694 return n
695 }
696
697
698 func (b *Bucket) free() {
699 if b.root == 0 {
700 return
701 }
702
703 var tx = b.tx
704 b.forEachPageNode(func(p *page, n *node, _ int) {
705 if p != nil {
706 tx.db.freelist.free(tx.meta.txid, p)
707 } else {
708 n.free()
709 }
710 })
711 b.root = 0
712 }
713
714
715 func (b *Bucket) dereference() {
716 if b.rootNode != nil {
717 b.rootNode.root().dereference()
718 }
719
720 for _, child := range b.buckets {
721 child.dereference()
722 }
723 }
724
725
726
727 func (b *Bucket) pageNode(id pgid) (*page, *node) {
728
729
730 if b.root == 0 {
731 if id != 0 {
732 panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
733 }
734 if b.rootNode != nil {
735 return nil, b.rootNode
736 }
737 return b.page, nil
738 }
739
740
741 if b.nodes != nil {
742 if n := b.nodes[id]; n != nil {
743 return nil, n
744 }
745 }
746
747
748 return b.tx.page(id), nil
749 }
750
751
752 type BucketStats struct {
753
754 BranchPageN int
755 BranchOverflowN int
756 LeafPageN int
757 LeafOverflowN int
758
759
760 KeyN int
761 Depth int
762
763
764 BranchAlloc int
765 BranchInuse int
766 LeafAlloc int
767 LeafInuse int
768
769
770 BucketN int
771 InlineBucketN int
772 InlineBucketInuse int
773 }
774
775 func (s *BucketStats) Add(other BucketStats) {
776 s.BranchPageN += other.BranchPageN
777 s.BranchOverflowN += other.BranchOverflowN
778 s.LeafPageN += other.LeafPageN
779 s.LeafOverflowN += other.LeafOverflowN
780 s.KeyN += other.KeyN
781 if s.Depth < other.Depth {
782 s.Depth = other.Depth
783 }
784 s.BranchAlloc += other.BranchAlloc
785 s.BranchInuse += other.BranchInuse
786 s.LeafAlloc += other.LeafAlloc
787 s.LeafInuse += other.LeafInuse
788
789 s.BucketN += other.BucketN
790 s.InlineBucketN += other.InlineBucketN
791 s.InlineBucketInuse += other.InlineBucketInuse
792 }
793
794
795 func cloneBytes(v []byte) []byte {
796 var clone = make([]byte, len(v))
797 copy(clone, v)
798 return clone
799 }
800
View as plain text