...

Source file src/go.starlark.net/starlark/hashtable.go

Documentation: go.starlark.net/starlark

     1  // Copyright 2017 The Bazel Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package starlark
     6  
     7  import (
     8  	"fmt"
     9  	_ "unsafe" // for go:linkname hack
    10  )
    11  
    12  // hashtable is used to represent Starlark dict and set values.
    13  // It is a hash table whose key/value entries form a doubly-linked list
    14  // in the order the entries were inserted.
    15  //
    16  // Initialized instances of hashtable must not be copied.
    17  type hashtable struct {
    18  	table     []bucket  // len is zero or a power of two
    19  	bucket0   [1]bucket // inline allocation for small maps.
    20  	len       uint32
    21  	itercount uint32  // number of active iterators (ignored if frozen)
    22  	head      *entry  // insertion order doubly-linked list; may be nil
    23  	tailLink  **entry // address of nil link at end of list (perhaps &head)
    24  	frozen    bool
    25  
    26  	_ noCopy // triggers vet copylock check on this type.
    27  }
    28  
    29  // noCopy is zero-sized type that triggers vet's copylock check.
    30  // See https://github.com/golang/go/issues/8005#issuecomment-190753527.
    31  type noCopy struct{}
    32  
    33  func (*noCopy) Lock()   {}
    34  func (*noCopy) Unlock() {}
    35  
    36  const bucketSize = 8
    37  
    38  type bucket struct {
    39  	entries [bucketSize]entry
    40  	next    *bucket // linked list of buckets
    41  }
    42  
    43  type entry struct {
    44  	hash       uint32 // nonzero => in use
    45  	key, value Value
    46  	next       *entry  // insertion order doubly-linked list; may be nil
    47  	prevLink   **entry // address of link to this entry (perhaps &head)
    48  }
    49  
    50  func (ht *hashtable) init(size int) {
    51  	if size < 0 {
    52  		panic("size < 0")
    53  	}
    54  	nb := 1
    55  	for overloaded(size, nb) {
    56  		nb = nb << 1
    57  	}
    58  	if nb < 2 {
    59  		ht.table = ht.bucket0[:1]
    60  	} else {
    61  		ht.table = make([]bucket, nb)
    62  	}
    63  	ht.tailLink = &ht.head
    64  }
    65  
    66  func (ht *hashtable) freeze() {
    67  	if !ht.frozen {
    68  		ht.frozen = true
    69  		for e := ht.head; e != nil; e = e.next {
    70  			e.key.Freeze()
    71  			e.value.Freeze()
    72  		}
    73  	}
    74  }
    75  
    76  func (ht *hashtable) insert(k, v Value) error {
    77  	if err := ht.checkMutable("insert into"); err != nil {
    78  		return err
    79  	}
    80  	if ht.table == nil {
    81  		ht.init(1)
    82  	}
    83  	h, err := k.Hash()
    84  	if err != nil {
    85  		return err
    86  	}
    87  	if h == 0 {
    88  		h = 1 // zero is reserved
    89  	}
    90  
    91  retry:
    92  	var insert *entry
    93  
    94  	// Inspect each bucket in the bucket list.
    95  	p := &ht.table[h&(uint32(len(ht.table)-1))]
    96  	for {
    97  		for i := range p.entries {
    98  			e := &p.entries[i]
    99  			if e.hash != h {
   100  				if e.hash == 0 {
   101  					// Found empty entry; make a note.
   102  					insert = e
   103  				}
   104  				continue
   105  			}
   106  			if eq, err := Equal(k, e.key); err != nil {
   107  				return err // e.g. excessively recursive tuple
   108  			} else if !eq {
   109  				continue
   110  			}
   111  			// Key already present; update value.
   112  			e.value = v
   113  			return nil
   114  		}
   115  		if p.next == nil {
   116  			break
   117  		}
   118  		p = p.next
   119  	}
   120  
   121  	// Key not found.  p points to the last bucket.
   122  
   123  	// Does the number of elements exceed the buckets' load factor?
   124  	if overloaded(int(ht.len), len(ht.table)) {
   125  		ht.grow()
   126  		goto retry
   127  	}
   128  
   129  	if insert == nil {
   130  		// No space in existing buckets.  Add a new one to the bucket list.
   131  		b := new(bucket)
   132  		p.next = b
   133  		insert = &b.entries[0]
   134  	}
   135  
   136  	// Insert key/value pair.
   137  	insert.hash = h
   138  	insert.key = k
   139  	insert.value = v
   140  
   141  	// Append entry to doubly-linked list.
   142  	insert.prevLink = ht.tailLink
   143  	*ht.tailLink = insert
   144  	ht.tailLink = &insert.next
   145  
   146  	ht.len++
   147  
   148  	return nil
   149  }
   150  
   151  func overloaded(elems, buckets int) bool {
   152  	const loadFactor = 6.5 // just a guess
   153  	return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets)
   154  }
   155  
   156  func (ht *hashtable) grow() {
   157  	// Double the number of buckets and rehash.
   158  	//
   159  	// Even though this makes reentrant calls to ht.insert,
   160  	// calls Equals unnecessarily (since there can't be duplicate keys),
   161  	// and recomputes the hash unnecessarily, the gains from
   162  	// avoiding these steps were found to be too small to justify
   163  	// the extra logic: -2% on hashtable benchmark.
   164  	ht.table = make([]bucket, len(ht.table)<<1)
   165  	oldhead := ht.head
   166  	ht.head = nil
   167  	ht.tailLink = &ht.head
   168  	ht.len = 0
   169  	for e := oldhead; e != nil; e = e.next {
   170  		ht.insert(e.key, e.value)
   171  	}
   172  	ht.bucket0[0] = bucket{} // clear out unused initial bucket
   173  }
   174  
   175  func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) {
   176  	h, err := k.Hash()
   177  	if err != nil {
   178  		return nil, false, err // unhashable
   179  	}
   180  	if h == 0 {
   181  		h = 1 // zero is reserved
   182  	}
   183  	if ht.table == nil {
   184  		return None, false, nil // empty
   185  	}
   186  
   187  	// Inspect each bucket in the bucket list.
   188  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   189  		for i := range p.entries {
   190  			e := &p.entries[i]
   191  			if e.hash == h {
   192  				if eq, err := Equal(k, e.key); err != nil {
   193  					return nil, false, err // e.g. excessively recursive tuple
   194  				} else if eq {
   195  					return e.value, true, nil // found
   196  				}
   197  			}
   198  		}
   199  	}
   200  	return None, false, nil // not found
   201  }
   202  
   203  // Items returns all the items in the map (as key/value pairs) in insertion order.
   204  func (ht *hashtable) items() []Tuple {
   205  	items := make([]Tuple, 0, ht.len)
   206  	array := make([]Value, ht.len*2) // allocate a single backing array
   207  	for e := ht.head; e != nil; e = e.next {
   208  		pair := Tuple(array[:2:2])
   209  		array = array[2:]
   210  		pair[0] = e.key
   211  		pair[1] = e.value
   212  		items = append(items, pair)
   213  	}
   214  	return items
   215  }
   216  
   217  func (ht *hashtable) first() (Value, bool) {
   218  	if ht.head != nil {
   219  		return ht.head.key, true
   220  	}
   221  	return None, false
   222  }
   223  
   224  func (ht *hashtable) keys() []Value {
   225  	keys := make([]Value, 0, ht.len)
   226  	for e := ht.head; e != nil; e = e.next {
   227  		keys = append(keys, e.key)
   228  	}
   229  	return keys
   230  }
   231  
   232  func (ht *hashtable) delete(k Value) (v Value, found bool, err error) {
   233  	if err := ht.checkMutable("delete from"); err != nil {
   234  		return nil, false, err
   235  	}
   236  	if ht.table == nil {
   237  		return None, false, nil // empty
   238  	}
   239  	h, err := k.Hash()
   240  	if err != nil {
   241  		return nil, false, err // unhashable
   242  	}
   243  	if h == 0 {
   244  		h = 1 // zero is reserved
   245  	}
   246  
   247  	// Inspect each bucket in the bucket list.
   248  	for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next {
   249  		for i := range p.entries {
   250  			e := &p.entries[i]
   251  			if e.hash == h {
   252  				if eq, err := Equal(k, e.key); err != nil {
   253  					return nil, false, err
   254  				} else if eq {
   255  					// Remove e from doubly-linked list.
   256  					*e.prevLink = e.next
   257  					if e.next == nil {
   258  						ht.tailLink = e.prevLink // deletion of last entry
   259  					} else {
   260  						e.next.prevLink = e.prevLink
   261  					}
   262  
   263  					v := e.value
   264  					*e = entry{}
   265  					ht.len--
   266  					return v, true, nil // found
   267  				}
   268  			}
   269  		}
   270  	}
   271  
   272  	// TODO(adonovan): opt: remove completely empty bucket from bucket list.
   273  
   274  	return None, false, nil // not found
   275  }
   276  
   277  // checkMutable reports an error if the hash table should not be mutated.
   278  // verb+" dict" should describe the operation.
   279  func (ht *hashtable) checkMutable(verb string) error {
   280  	if ht.frozen {
   281  		return fmt.Errorf("cannot %s frozen hash table", verb)
   282  	}
   283  	if ht.itercount > 0 {
   284  		return fmt.Errorf("cannot %s hash table during iteration", verb)
   285  	}
   286  	return nil
   287  }
   288  
   289  func (ht *hashtable) clear() error {
   290  	if err := ht.checkMutable("clear"); err != nil {
   291  		return err
   292  	}
   293  	if ht.table != nil {
   294  		for i := range ht.table {
   295  			ht.table[i] = bucket{}
   296  		}
   297  	}
   298  	ht.head = nil
   299  	ht.tailLink = &ht.head
   300  	ht.len = 0
   301  	return nil
   302  }
   303  
   304  func (ht *hashtable) addAll(other *hashtable) error {
   305  	for e := other.head; e != nil; e = e.next {
   306  		if err := ht.insert(e.key, e.value); err != nil {
   307  			return err
   308  		}
   309  	}
   310  	return nil
   311  }
   312  
   313  // dump is provided as an aid to debugging.
   314  func (ht *hashtable) dump() {
   315  	fmt.Printf("hashtable %p len=%d head=%p tailLink=%p",
   316  		ht, ht.len, ht.head, ht.tailLink)
   317  	if ht.tailLink != nil {
   318  		fmt.Printf(" *tailLink=%p", *ht.tailLink)
   319  	}
   320  	fmt.Println()
   321  	for j := range ht.table {
   322  		fmt.Printf("bucket chain %d\n", j)
   323  		for p := &ht.table[j]; p != nil; p = p.next {
   324  			fmt.Printf("bucket %p\n", p)
   325  			for i := range p.entries {
   326  				e := &p.entries[i]
   327  				fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n",
   328  					i, e, e.hash, e.key, e.value)
   329  				fmt.Printf("\t\tnext=%p &next=%p prev=%p",
   330  					e.next, &e.next, e.prevLink)
   331  				if e.prevLink != nil {
   332  					fmt.Printf(" *prev=%p", *e.prevLink)
   333  				}
   334  				fmt.Println()
   335  			}
   336  		}
   337  	}
   338  }
   339  
   340  func (ht *hashtable) iterate() *keyIterator {
   341  	if !ht.frozen {
   342  		ht.itercount++
   343  	}
   344  	return &keyIterator{ht: ht, e: ht.head}
   345  }
   346  
   347  type keyIterator struct {
   348  	ht *hashtable
   349  	e  *entry
   350  }
   351  
   352  func (it *keyIterator) Next(k *Value) bool {
   353  	if it.e != nil {
   354  		*k = it.e.key
   355  		it.e = it.e.next
   356  		return true
   357  	}
   358  	return false
   359  }
   360  
   361  func (it *keyIterator) Done() {
   362  	if !it.ht.frozen {
   363  		it.ht.itercount--
   364  	}
   365  }
   366  
   367  // TODO(adonovan): use go1.19's maphash.String.
   368  
   369  // hashString computes the hash of s.
   370  func hashString(s string) uint32 {
   371  	if len(s) >= 12 {
   372  		// Call the Go runtime's optimized hash implementation,
   373  		// which uses the AESENC instruction on amd64 machines.
   374  		return uint32(goStringHash(s, 0))
   375  	}
   376  	return softHashString(s)
   377  }
   378  
   379  //go:linkname goStringHash runtime.stringHash
   380  func goStringHash(s string, seed uintptr) uintptr
   381  
   382  // softHashString computes the 32-bit FNV-1a hash of s in software.
   383  func softHashString(s string) uint32 {
   384  	var h uint32 = 2166136261
   385  	for i := 0; i < len(s); i++ {
   386  		h ^= uint32(s[i])
   387  		h *= 16777619
   388  	}
   389  	return h
   390  }
   391  

View as plain text