pointcompression.go

Documentation: github.com/golang/geo/s2

     1  // Copyright 2017 Google Inc. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package s2
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  
    21  	"github.com/golang/geo/r3"
    22  )
    23  
    24  // maxEncodedVertices is the maximum number of vertices, in a row, to be encoded or decoded.
    25  // On decode, this defends against malicious encodings that try and have us exceed RAM.
    26  const maxEncodedVertices = 50000000
    27  
    28  // xyzFaceSiTi represents the The XYZ and face,si,ti coordinates of a Point
    29  // and, if this point is equal to the center of a Cell, the level of this cell
    30  // (-1 otherwise). This is used for Loops and Polygons to store data in a more
    31  // compressed format.
    32  type xyzFaceSiTi struct {
    33  	xyz    Point
    34  	face   int
    35  	si, ti uint32
    36  	level  int
    37  }
    38  
    39  const derivativeEncodingOrder = 2
    40  
    41  func appendFace(faces []faceRun, face int) []faceRun {
    42  	if len(faces) == 0 || faces[len(faces)-1].face != face {
    43  		return append(faces, faceRun{face, 1})
    44  	}
    45  	faces[len(faces)-1].count++
    46  	return faces
    47  }
    48  
    49  // encodePointsCompressed uses an optimized compressed format to encode the given values.
    50  func encodePointsCompressed(e *encoder, vertices []xyzFaceSiTi, level int) {
    51  	var faces []faceRun
    52  	for _, v := range vertices {
    53  		faces = appendFace(faces, v.face)
    54  	}
    55  	encodeFaces(e, faces)
    56  
    57  	type piQi struct {
    58  		pi, qi uint32
    59  	}
    60  	verticesPiQi := make([]piQi, len(vertices))
    61  	for i, v := range vertices {
    62  		verticesPiQi[i] = piQi{siTitoPiQi(v.si, level), siTitoPiQi(v.ti, level)}
    63  	}
    64  	piCoder, qiCoder := newNthDerivativeCoder(derivativeEncodingOrder), newNthDerivativeCoder(derivativeEncodingOrder)
    65  	for i, v := range verticesPiQi {
    66  		f := encodePointCompressed
    67  		if i == 0 {
    68  			// The first point will be just the (pi, qi) coordinates
    69  			// of the Point. NthDerivativeCoder will not save anything
    70  			// in that case, so we encode in fixed format rather than varint
    71  			// to avoid the varint overhead.
    72  			f = encodeFirstPointFixedLength
    73  		}
    74  		f(e, v.pi, v.qi, level, piCoder, qiCoder)
    75  	}
    76  
    77  	var offCenter []int
    78  	for i, v := range vertices {
    79  		if v.level != level {
    80  			offCenter = append(offCenter, i)
    81  		}
    82  	}
    83  	e.writeUvarint(uint64(len(offCenter)))
    84  	for _, idx := range offCenter {
    85  		e.writeUvarint(uint64(idx))
    86  		e.writeFloat64(vertices[idx].xyz.X)
    87  		e.writeFloat64(vertices[idx].xyz.Y)
    88  		e.writeFloat64(vertices[idx].xyz.Z)
    89  	}
    90  }
    91  
    92  func encodeFirstPointFixedLength(e *encoder, pi, qi uint32, level int, piCoder, qiCoder *nthDerivativeCoder) {
    93  	// Do not ZigZagEncode the first point, since it cannot be negative.
    94  	codedPi, codedQi := piCoder.encode(int32(pi)), qiCoder.encode(int32(qi))
    95  	// Interleave to reduce overhead from two partial bytes to one.
    96  	interleaved := interleaveUint32(uint32(codedPi), uint32(codedQi))
    97  
    98  	// Write as little endian.
    99  	bytesRequired := (level + 7) / 8 * 2
   100  	for i := 0; i < bytesRequired; i++ {
   101  		e.writeUint8(uint8(interleaved))
   102  		interleaved >>= 8
   103  	}
   104  }
   105  
   106  // encodePointCompressed encodes points into e.
   107  // Given a sequence of Points assumed to be the center of level-k cells,
   108  // compresses it into a stream using the following method:
   109  // - decompose the points into (face, si, ti) tuples.
   110  // - run-length encode the faces, combining face number and count into a
   111  //
   112  //	varint32. See the faceRun struct.
   113  //
   114  // - right shift the (si, ti) to remove the part that's constant for all cells
   115  //
   116  //	of level-k. The result is called the (pi, qi) space.
   117  //
   118  // - 2nd derivative encode the pi and qi sequences (linear prediction)
   119  // - zig-zag encode all derivative values but the first, which cannot be
   120  //
   121  //	negative
   122  //
   123  // - interleave the zig-zag encoded values
   124  // - encode the first interleaved value in a fixed length encoding
   125  //
   126  //	(varint would make this value larger)
   127  //
   128  // - encode the remaining interleaved values as varint64s, as the
   129  //
   130  //	derivative encoding should make the values small.
   131  //
   132  // In addition, provides a lossless method to compress a sequence of points even
   133  // if some points are not the center of level-k cells. These points are stored
   134  // exactly, using 3 double precision values, after the above encoded string,
   135  // together with their index in the sequence (this leads to some redundancy - it
   136  // is expected that only a small fraction of the points are not cell centers).
   137  //
   138  // To encode leaf cells, this requires 8 bytes for the first vertex plus
   139  // an average of 3.8 bytes for each additional vertex, when computed on
   140  // Google's geographic repository.
   141  func encodePointCompressed(e *encoder, pi, qi uint32, level int, piCoder, qiCoder *nthDerivativeCoder) {
   142  	// ZigZagEncode, as varint requires the maximum number of bytes for
   143  	// negative numbers.
   144  	zzPi := zigzagEncode(piCoder.encode(int32(pi)))
   145  	zzQi := zigzagEncode(qiCoder.encode(int32(qi)))
   146  	// Interleave to reduce overhead from two partial bytes to one.
   147  	interleaved := interleaveUint32(zzPi, zzQi)
   148  	e.writeUvarint(interleaved)
   149  }
   150  
   151  type faceRun struct {
   152  	face, count int
   153  }
   154  
   155  func decodeFaceRun(d *decoder) faceRun {
   156  	faceAndCount := d.readUvarint()
   157  	ret := faceRun{
   158  		face:  int(faceAndCount % NumFaces),
   159  		count: int(faceAndCount / NumFaces),
   160  	}
   161  	if ret.count <= 0 && d.err == nil {
   162  		d.err = errors.New("non-positive count for face run")
   163  	}
   164  	return ret
   165  }
   166  
   167  func decodeFaces(numVertices int, d *decoder) []faceRun {
   168  	var frs []faceRun
   169  	for nparsed := 0; nparsed < numVertices; {
   170  		fr := decodeFaceRun(d)
   171  		if d.err != nil {
   172  			return nil
   173  		}
   174  		frs = append(frs, fr)
   175  		nparsed += fr.count
   176  	}
   177  	return frs
   178  }
   179  
   180  // encodeFaceRun encodes each faceRun as a varint64 with value NumFaces * count + face.
   181  func encodeFaceRun(e *encoder, fr faceRun) {
   182  	// It isn't necessary to encode the number of faces left for the last run,
   183  	// but since this would only help if there were more than 21 faces, it will
   184  	// be a small overall savings, much smaller than the bound encoding.
   185  	coded := NumFaces*uint64(fr.count) + uint64(fr.face)
   186  	e.writeUvarint(coded)
   187  }
   188  
   189  func encodeFaces(e *encoder, frs []faceRun) {
   190  	for _, fr := range frs {
   191  		encodeFaceRun(e, fr)
   192  	}
   193  }
   194  
   195  type facesIterator struct {
   196  	faces []faceRun
   197  	// How often have we yet shown the current face?
   198  	numCurrentFaceShown int
   199  	curFace             int
   200  }
   201  
   202  func (fi *facesIterator) next() (ok bool) {
   203  	if len(fi.faces) == 0 {
   204  		return false
   205  	}
   206  	fi.curFace = fi.faces[0].face
   207  	fi.numCurrentFaceShown++
   208  
   209  	// Advance fs if needed.
   210  	if fi.faces[0].count <= fi.numCurrentFaceShown {
   211  		fi.faces = fi.faces[1:]
   212  		fi.numCurrentFaceShown = 0
   213  	}
   214  
   215  	return true
   216  }
   217  
   218  func decodePointsCompressed(d *decoder, level int, target []Point) {
   219  	faces := decodeFaces(len(target), d)
   220  
   221  	piCoder := newNthDerivativeCoder(derivativeEncodingOrder)
   222  	qiCoder := newNthDerivativeCoder(derivativeEncodingOrder)
   223  
   224  	iter := facesIterator{faces: faces}
   225  	for i := range target {
   226  		decodeFn := decodePointCompressed
   227  		if i == 0 {
   228  			decodeFn = decodeFirstPointFixedLength
   229  		}
   230  		pi, qi := decodeFn(d, level, piCoder, qiCoder)
   231  		if ok := iter.next(); !ok && d.err == nil {
   232  			d.err = fmt.Errorf("ran out of faces at target %d", i)
   233  			return
   234  		}
   235  		target[i] = Point{facePiQitoXYZ(iter.curFace, pi, qi, level)}
   236  	}
   237  
   238  	numOffCenter := int(d.readUvarint())
   239  	if d.err != nil {
   240  		return
   241  	}
   242  	if numOffCenter > len(target) {
   243  		d.err = fmt.Errorf("numOffCenter = %d, should be at most len(target) = %d", numOffCenter, len(target))
   244  		return
   245  	}
   246  	for i := 0; i < numOffCenter; i++ {
   247  		idx := int(d.readUvarint())
   248  		if d.err != nil {
   249  			return
   250  		}
   251  		if idx >= len(target) {
   252  			d.err = fmt.Errorf("off center index = %d, should be < len(target) = %d", idx, len(target))
   253  			return
   254  		}
   255  		target[idx].X = d.readFloat64()
   256  		target[idx].Y = d.readFloat64()
   257  		target[idx].Z = d.readFloat64()
   258  	}
   259  }
   260  
   261  func decodeFirstPointFixedLength(d *decoder, level int, piCoder, qiCoder *nthDerivativeCoder) (pi, qi uint32) {
   262  	bytesToRead := (level + 7) / 8 * 2
   263  	var interleaved uint64
   264  	for i := 0; i < bytesToRead; i++ {
   265  		rr := d.readUint8()
   266  		interleaved |= (uint64(rr) << uint(i*8))
   267  	}
   268  
   269  	piCoded, qiCoded := deinterleaveUint32(interleaved)
   270  
   271  	return uint32(piCoder.decode(int32(piCoded))), uint32(qiCoder.decode(int32(qiCoded)))
   272  }
   273  
   274  func zigzagEncode(x int32) uint32 {
   275  	return (uint32(x) << 1) ^ uint32(x>>31)
   276  }
   277  
   278  func zigzagDecode(x uint32) int32 {
   279  	return int32((x >> 1) ^ uint32((int32(x&1)<<31)>>31))
   280  }
   281  
   282  func decodePointCompressed(d *decoder, level int, piCoder, qiCoder *nthDerivativeCoder) (pi, qi uint32) {
   283  	interleavedZigZagEncodedDerivPiQi := d.readUvarint()
   284  	piZigzag, qiZigzag := deinterleaveUint32(interleavedZigZagEncodedDerivPiQi)
   285  	return uint32(piCoder.decode(zigzagDecode(piZigzag))), uint32(qiCoder.decode(zigzagDecode(qiZigzag)))
   286  }
   287  
   288  // We introduce a new coordinate system (pi, qi), which is (si, ti)
   289  // with the bits that are constant for cells of that level shifted
   290  // off to the right.
   291  // si = round(s * 2^31)
   292  // pi = si >> (31 - level)
   293  //    = floor(s * 2^level)
   294  // If the point has been snapped to the level, the bits that are
   295  // shifted off will be a 1 in the msb, then 0s after that, so the
   296  // fractional part discarded by the cast is (close to) 0.5.
   297  
   298  // stToPiQi returns the value transformed to the PiQi coordinate space.
   299  func stToPiQi(s float64, level uint) uint32 {
   300  	return uint32(s * float64(int(1)<<level))
   301  }
   302  
   303  // siTiToPiQi returns the value transformed into the PiQi coordinate spade.
   304  // encodeFirstPointFixedLength encodes the return value using level bits,
   305  // so we clamp si to the range [0, 2**level - 1] before trying to encode
   306  // it. This is okay because if si == maxSiTi, then it is not a cell center
   307  // anyway and will be encoded separately as an off-center point.
   308  func siTitoPiQi(siTi uint32, level int) uint32 {
   309  	s := uint(siTi)
   310  	const max = maxSiTi - 1
   311  	if s > max {
   312  		s = max
   313  	}
   314  
   315  	return uint32(s >> (MaxLevel + 1 - uint(level)))
   316  }
   317  
   318  // piQiToST returns the value transformed to ST space.
   319  func piQiToST(pi uint32, level int) float64 {
   320  	// We want to recover the position at the center of the cell. If the point
   321  	// was snapped to the center of the cell, then math.Modf(s * 2^level) == 0.5.
   322  	// Inverting STtoPiQi gives:
   323  	// s = (pi + 0.5) / 2^level.
   324  	return (float64(pi) + 0.5) / float64(int(1)<<uint(level))
   325  }
   326  
   327  func facePiQitoXYZ(face int, pi, qi uint32, level int) r3.Vector {
   328  	return faceUVToXYZ(face, stToUV(piQiToST(pi, level)), stToUV(piQiToST(qi, level))).Normalize()
   329  }
   330
View as plain text