1 // Copyright 2019 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package compact provides compact Merkle tree data structures. 16 package compact 17 18 import ( 19 "bytes" 20 "errors" 21 "fmt" 22 "math/bits" 23 ) 24 25 // HashFn computes an internal node's hash using the hashes of its child nodes. 26 type HashFn func(left, right []byte) []byte 27 28 // VisitFn visits the node with the specified ID and hash. 29 type VisitFn func(id NodeID, hash []byte) 30 31 // RangeFactory allows creating compact ranges with the specified hash 32 // function, which must not be nil, and must not be changed. 33 type RangeFactory struct { 34 Hash HashFn 35 } 36 37 // NewRange creates a Range for [begin, end) with the given set of hashes. The 38 // hashes correspond to the roots of the minimal set of perfect sub-trees 39 // covering the [begin, end) leaves range, ordered left to right. 40 func (f *RangeFactory) NewRange(begin, end uint64, hashes [][]byte) (*Range, error) { 41 if end < begin { 42 return nil, fmt.Errorf("invalid range: end=%d, want >= %d", end, begin) 43 } 44 if got, want := len(hashes), RangeSize(begin, end); got != want { 45 return nil, fmt.Errorf("invalid hashes: got %d values, want %d", got, want) 46 } 47 return &Range{f: f, begin: begin, end: end, hashes: hashes}, nil 48 } 49 50 // NewEmptyRange returns a new Range for an empty [begin, begin) range. The 51 // value of begin defines where the range will start growing from when entries 52 // are appended to it. 53 func (f *RangeFactory) NewEmptyRange(begin uint64) *Range { 54 return &Range{f: f, begin: begin, end: begin} 55 } 56 57 // Range represents a compact Merkle tree range for leaf indices [begin, end). 58 // 59 // It contains the minimal set of perfect subtrees whose leaves comprise this 60 // range. The structure is efficiently mergeable with other compact ranges that 61 // share one of the endpoints with it. 62 // 63 // For more details, see 64 // https://github.com/transparency-dev/merkle/blob/main/docs/compact_ranges.md. 65 type Range struct { 66 f *RangeFactory 67 begin uint64 68 end uint64 69 hashes [][]byte 70 } 71 72 // Begin returns the first index covered by the range (inclusive). 73 func (r *Range) Begin() uint64 { 74 return r.begin 75 } 76 77 // End returns the last index covered by the range (exclusive). 78 func (r *Range) End() uint64 { 79 return r.end 80 } 81 82 // Hashes returns sub-tree hashes corresponding to the minimal set of perfect 83 // sub-trees covering the [begin, end) range, ordered left to right. 84 func (r *Range) Hashes() [][]byte { 85 return r.hashes 86 } 87 88 // Append extends the compact range by appending the passed in hash to it. It 89 // reports all the added nodes through the visitor function (if non-nil). 90 func (r *Range) Append(hash []byte, visitor VisitFn) error { 91 if visitor != nil { 92 visitor(NewNodeID(0, r.end), hash) 93 } 94 return r.appendImpl(r.end+1, hash, nil, visitor) 95 } 96 97 // AppendRange extends the compact range by merging in the other compact range 98 // from the right. It uses the tree hasher to calculate hashes of newly created 99 // nodes, and reports them through the visitor function (if non-nil). 100 func (r *Range) AppendRange(other *Range, visitor VisitFn) error { 101 if other.f != r.f { 102 return errors.New("incompatible ranges") 103 } 104 if got, want := other.begin, r.end; got != want { 105 return fmt.Errorf("ranges are disjoint: other.begin=%d, want %d", got, want) 106 } 107 if len(other.hashes) == 0 { // The other range is empty, merging is trivial. 108 return nil 109 } 110 return r.appendImpl(other.end, other.hashes[0], other.hashes[1:], visitor) 111 } 112 113 // GetRootHash returns the root hash of the Merkle tree represented by this 114 // compact range. Requires the range to start at index 0. If the range is 115 // empty, returns nil. 116 // 117 // If visitor is not nil, it is called with all "ephemeral" nodes (i.e. the 118 // ones rooting imperfect subtrees) along the right border of the tree. 119 func (r *Range) GetRootHash(visitor VisitFn) ([]byte, error) { 120 if r.begin != 0 { 121 return nil, fmt.Errorf("begin=%d, want 0", r.begin) 122 } 123 ln := len(r.hashes) 124 if ln == 0 { 125 return nil, nil 126 } 127 hash := r.hashes[ln-1] 128 // All non-perfect subtree hashes along the right border of the tree 129 // correspond to the parents of all perfect subtree nodes except the lowest 130 // one (therefore the loop skips it). 131 for i, size := ln-2, r.end; i >= 0; i-- { 132 hash = r.f.Hash(r.hashes[i], hash) 133 if visitor != nil { 134 size &= size - 1 // Delete the previous node. 135 level := uint(bits.TrailingZeros64(size)) + 1 // Compute the parent level. 136 index := size >> level // And its horizontal index. 137 visitor(NewNodeID(level, index), hash) 138 } 139 } 140 return hash, nil 141 } 142 143 // Equal compares two Ranges for equality. 144 func (r *Range) Equal(other *Range) bool { 145 if r.f != other.f || r.begin != other.begin || r.end != other.end { 146 return false 147 } 148 if len(r.hashes) != len(other.hashes) { 149 return false 150 } 151 for i := range r.hashes { 152 if !bytes.Equal(r.hashes[i], other.hashes[i]) { 153 return false 154 } 155 } 156 return true 157 } 158 159 // appendImpl extends the compact range by merging the [r.end, end) compact 160 // range into it. The other compact range is decomposed into a seed hash and 161 // all the other hashes (possibly none). The method uses the tree hasher to 162 // calculate hashes of newly created nodes, and reports them through the 163 // visitor function (if non-nil). 164 func (r *Range) appendImpl(end uint64, seed []byte, hashes [][]byte, visitor VisitFn) error { 165 // Bits [low, high) of r.end encode the merge path, i.e. the sequence of node 166 // merges that transforms the two compact ranges into one. 167 low, high := getMergePath(r.begin, r.end, end) 168 if high < low { 169 high = low 170 } 171 index := r.end >> low 172 // Now bits [0, high-low) of index encode the merge path. 173 174 // The number of one bits in index is the number of nodes from the left range 175 // that will be merged, and zero bits correspond to the nodes in the right 176 // range. Below we make sure that both ranges have enough hashes, which can 177 // be false only in case the data is corrupted in some way. 178 ones := bits.OnesCount64(index & (1<<(high-low) - 1)) 179 if ln := len(r.hashes); ln < ones { 180 return fmt.Errorf("corrupted lhs range: got %d hashes, want >= %d", ln, ones) 181 } 182 if ln, zeros := len(hashes), int(high-low)-ones; ln < zeros { 183 return fmt.Errorf("corrupted rhs range: got %d hashes, want >= %d", ln+1, zeros+1) 184 } 185 186 // Some of the trailing nodes of the left compact range, and some of the 187 // leading nodes of the right range, are sequentially merged with the seed, 188 // according to the mask. All new nodes are reported through the visitor. 189 idx1, idx2 := len(r.hashes), 0 190 for h := low; h < high; h++ { 191 if index&1 == 0 { 192 seed = r.f.Hash(seed, hashes[idx2]) 193 idx2++ 194 } else { 195 idx1-- 196 seed = r.f.Hash(r.hashes[idx1], seed) 197 } 198 index >>= 1 199 if visitor != nil { 200 visitor(NewNodeID(h+1, index), seed) 201 } 202 } 203 204 // All nodes from both ranges that have not been merged are bundled together 205 // with the "merged" seed node. 206 r.hashes = append(append(r.hashes[:idx1], seed), hashes[idx2:]...) 207 r.end = end 208 return nil 209 } 210 211 // getMergePath returns the merging path between the compact range [begin, mid) 212 // and [mid, end). The path is represented as a range of bits within mid, with 213 // bit indices [low, high). A bit value of 1 on level i of mid means that the 214 // node on this level merges with the corresponding node in the left compact 215 // range, whereas 0 represents merging with the right compact range. If the 216 // path is empty then high <= low. 217 // 218 // The output is not specified if begin <= mid <= end doesn't hold, but the 219 // function never panics. 220 func getMergePath(begin, mid, end uint64) (uint, uint) { 221 low := bits.TrailingZeros64(mid) 222 high := 64 223 if begin != 0 { 224 high = bits.Len64(mid ^ (begin - 1)) 225 } 226 if high2 := bits.Len64((mid - 1) ^ end); high2 < high { 227 high = high2 228 } 229 return uint(low), uint(high - 1) 230 } 231 232 // Decompose splits the [begin, end) range into a minimal number of sub-ranges, 233 // each of which is of the form [m * 2^k, (m+1) * 2^k), i.e. of length 2^k, for 234 // some integers m, k >= 0. 235 // 236 // The sequence of sizes is returned encoded as bitmasks left and right, where: 237 // - a 1 bit in a bitmask denotes a sub-range of the corresponding size 2^k 238 // - left mask bits in LSB-to-MSB order encode the left part of the sequence 239 // - right mask bits in MSB-to-LSB order encode the right part 240 // 241 // The corresponding values of m are not returned (they can be calculated from 242 // begin and the sub-range sizes). 243 // 244 // For example, (begin, end) values of (0b110, 0b11101) would indicate a 245 // sequence of tree sizes: 2,8; 8,4,1. 246 // 247 // The output is not specified if begin > end, but the function never panics. 248 func Decompose(begin, end uint64) (uint64, uint64) { 249 // Special case, as the code below works only if begin != 0, or end < 2^63. 250 if begin == 0 { 251 return 0, end 252 } 253 xbegin := begin - 1 254 // Find where paths to leaves #begin-1 and #end diverge, and mask the upper 255 // bits away, as only the nodes strictly below this point are in the range. 256 d := bits.Len64(xbegin^end) - 1 257 mask := uint64(1)<<uint(d) - 1 258 // The left part of the compact range consists of all nodes strictly below 259 // and to the right from the path to leaf #begin-1, corresponding to zero 260 // bits in the masked part of begin-1. Likewise, the right part consists of 261 // nodes below and to the left from the path to leaf #end, corresponding to 262 // ones in the masked part of end. 263 return ^xbegin & mask, end & mask 264 } 265