...

Source file src/github.com/klauspost/compress/flate/huffman_sortByFreq.go

Documentation: github.com/klauspost/compress/flate

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  // Sort sorts data.
     8  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
     9  // data.Less and data.Swap. The sort is not guaranteed to be stable.
    10  func sortByFreq(data []literalNode) {
    11  	n := len(data)
    12  	quickSortByFreq(data, 0, n, maxDepth(n))
    13  }
    14  
    15  func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
    16  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
    17  		if maxDepth == 0 {
    18  			heapSort(data, a, b)
    19  			return
    20  		}
    21  		maxDepth--
    22  		mlo, mhi := doPivotByFreq(data, a, b)
    23  		// Avoiding recursion on the larger subproblem guarantees
    24  		// a stack depth of at most lg(b-a).
    25  		if mlo-a < b-mhi {
    26  			quickSortByFreq(data, a, mlo, maxDepth)
    27  			a = mhi // i.e., quickSortByFreq(data, mhi, b)
    28  		} else {
    29  			quickSortByFreq(data, mhi, b, maxDepth)
    30  			b = mlo // i.e., quickSortByFreq(data, a, mlo)
    31  		}
    32  	}
    33  	if b-a > 1 {
    34  		// Do ShellSort pass with gap 6
    35  		// It could be written in this simplified form cause b-a <= 12
    36  		for i := a + 6; i < b; i++ {
    37  			if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
    38  				data[i], data[i-6] = data[i-6], data[i]
    39  			}
    40  		}
    41  		insertionSortByFreq(data, a, b)
    42  	}
    43  }
    44  
    45  func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
    46  	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
    47  	if hi-lo > 40 {
    48  		// Tukey's ``Ninther,'' median of three medians of three.
    49  		s := (hi - lo) / 8
    50  		medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
    51  		medianOfThreeSortByFreq(data, m, m-s, m+s)
    52  		medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
    53  	}
    54  	medianOfThreeSortByFreq(data, lo, m, hi-1)
    55  
    56  	// Invariants are:
    57  	//	data[lo] = pivot (set up by ChoosePivot)
    58  	//	data[lo < i < a] < pivot
    59  	//	data[a <= i < b] <= pivot
    60  	//	data[b <= i < c] unexamined
    61  	//	data[c <= i < hi-1] > pivot
    62  	//	data[hi-1] >= pivot
    63  	pivot := lo
    64  	a, c := lo+1, hi-1
    65  
    66  	for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
    67  	}
    68  	b := a
    69  	for {
    70  		for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
    71  		}
    72  		for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
    73  		}
    74  		if b >= c {
    75  			break
    76  		}
    77  		// data[b] > pivot; data[c-1] <= pivot
    78  		data[b], data[c-1] = data[c-1], data[b]
    79  		b++
    80  		c--
    81  	}
    82  	// If hi-c<3 then there are duplicates (by property of median of nine).
    83  	// Let's be a bit more conservative, and set border to 5.
    84  	protect := hi-c < 5
    85  	if !protect && hi-c < (hi-lo)/4 {
    86  		// Lets test some points for equality to pivot
    87  		dups := 0
    88  		if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
    89  			data[c], data[hi-1] = data[hi-1], data[c]
    90  			c++
    91  			dups++
    92  		}
    93  		if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
    94  			b--
    95  			dups++
    96  		}
    97  		// m-lo = (hi-lo)/2 > 6
    98  		// b-lo > (hi-lo)*3/4-1 > 8
    99  		// ==> m < b ==> data[m] <= pivot
   100  		if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
   101  			data[m], data[b-1] = data[b-1], data[m]
   102  			b--
   103  			dups++
   104  		}
   105  		// if at least 2 points are equal to pivot, assume skewed distribution
   106  		protect = dups > 1
   107  	}
   108  	if protect {
   109  		// Protect against a lot of duplicates
   110  		// Add invariant:
   111  		//	data[a <= i < b] unexamined
   112  		//	data[b <= i < c] = pivot
   113  		for {
   114  			for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
   115  			}
   116  			for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
   117  			}
   118  			if a >= b {
   119  				break
   120  			}
   121  			// data[a] == pivot; data[b-1] < pivot
   122  			data[a], data[b-1] = data[b-1], data[a]
   123  			a++
   124  			b--
   125  		}
   126  	}
   127  	// Swap pivot into middle
   128  	data[pivot], data[b-1] = data[b-1], data[pivot]
   129  	return b - 1, c
   130  }
   131  
   132  // Insertion sort
   133  func insertionSortByFreq(data []literalNode, a, b int) {
   134  	for i := a + 1; i < b; i++ {
   135  		for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
   136  			data[j], data[j-1] = data[j-1], data[j]
   137  		}
   138  	}
   139  }
   140  
   141  // quickSortByFreq, loosely following Bentley and McIlroy,
   142  // ``Engineering a Sort Function,'' SP&E November 1993.
   143  
   144  // medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
   145  func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
   146  	// sort 3 elements
   147  	if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
   148  		data[m1], data[m0] = data[m0], data[m1]
   149  	}
   150  	// data[m0] <= data[m1]
   151  	if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
   152  		data[m2], data[m1] = data[m1], data[m2]
   153  		// data[m0] <= data[m2] && data[m1] < data[m2]
   154  		if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
   155  			data[m1], data[m0] = data[m0], data[m1]
   156  		}
   157  	}
   158  	// now data[m0] <= data[m1] <= data[m2]
   159  }
   160  

View as plain text