...

Source file src/github.com/rs/xid/id.go

Documentation: github.com/rs/xid

     1  // Package xid is a globally unique id generator suited for web scale
     2  //
     3  // Xid is using Mongo Object ID algorithm to generate globally unique ids:
     4  // https://docs.mongodb.org/manual/reference/object-id/
     5  //
     6  //   - 4-byte value representing the seconds since the Unix epoch,
     7  //   - 3-byte machine identifier,
     8  //   - 2-byte process id, and
     9  //   - 3-byte counter, starting with a random value.
    10  //
    11  // The binary representation of the id is compatible with Mongo 12 bytes Object IDs.
    12  // The string representation is using base32 hex (w/o padding) for better space efficiency
    13  // when stored in that form (20 bytes). The hex variant of base32 is used to retain the
    14  // sortable property of the id.
    15  //
    16  // Xid doesn't use base64 because case sensitivity and the 2 non alphanum chars may be an
    17  // issue when transported as a string between various systems. Base36 wasn't retained either
    18  // because 1/ it's not standard 2/ the resulting size is not predictable (not bit aligned)
    19  // and 3/ it would not remain sortable. To validate a base32 `xid`, expect a 20 chars long,
    20  // all lowercase sequence of `a` to `v` letters and `0` to `9` numbers (`[0-9a-v]{20}`).
    21  //
    22  // UUID is 16 bytes (128 bits), snowflake is 8 bytes (64 bits), xid stands in between
    23  // with 12 bytes with a more compact string representation ready for the web and no
    24  // required configuration or central generation server.
    25  //
    26  // Features:
    27  //
    28  //   - Size: 12 bytes (96 bits), smaller than UUID, larger than snowflake
    29  //   - Base32 hex encoded by default (16 bytes storage when transported as printable string)
    30  //   - Non configured, you don't need set a unique machine and/or data center id
    31  //   - K-ordered
    32  //   - Embedded time with 1 second precision
    33  //   - Unicity guaranteed for 16,777,216 (24 bits) unique ids per second and per host/process
    34  //
    35  // Best used with xlog's RequestIDHandler (https://godoc.org/github.com/rs/xlog#RequestIDHandler).
    36  //
    37  // References:
    38  //
    39  //   - http://www.slideshare.net/davegardnerisme/unique-id-generation-in-distributed-systems
    40  //   - https://en.wikipedia.org/wiki/Universally_unique_identifier
    41  //   - https://blog.twitter.com/2010/announcing-snowflake
    42  package xid
    43  
    44  import (
    45  	"bytes"
    46  	"crypto/md5"
    47  	"crypto/rand"
    48  	"database/sql/driver"
    49  	"encoding/binary"
    50  	"fmt"
    51  	"hash/crc32"
    52  	"io/ioutil"
    53  	"os"
    54  	"sort"
    55  	"sync/atomic"
    56  	"time"
    57  	"unsafe"
    58  )
    59  
    60  // Code inspired from mgo/bson ObjectId
    61  
    62  // ID represents a unique request id
    63  type ID [rawLen]byte
    64  
    65  const (
    66  	encodedLen = 20 // string encoded len
    67  	rawLen     = 12 // binary raw len
    68  
    69  	// encoding stores a custom version of the base32 encoding with lower case
    70  	// letters.
    71  	encoding = "0123456789abcdefghijklmnopqrstuv"
    72  )
    73  
    74  var (
    75  	// objectIDCounter is atomically incremented when generating a new ObjectId
    76  	// using NewObjectId() function. It's used as a counter part of an id.
    77  	// This id is initialized with a random value.
    78  	objectIDCounter = randInt()
    79  
    80  	// machineId stores machine id generated once and used in subsequent calls
    81  	// to NewObjectId function.
    82  	machineID = readMachineID()
    83  
    84  	// pid stores the current process id
    85  	pid = os.Getpid()
    86  
    87  	nilID ID
    88  
    89  	// dec is the decoding map for base32 encoding
    90  	dec [256]byte
    91  )
    92  
    93  func init() {
    94  	for i := 0; i < len(dec); i++ {
    95  		dec[i] = 0xFF
    96  	}
    97  	for i := 0; i < len(encoding); i++ {
    98  		dec[encoding[i]] = byte(i)
    99  	}
   100  
   101  	// If /proc/self/cpuset exists and is not /, we can assume that we are in a
   102  	// form of container and use the content of cpuset xor-ed with the PID in
   103  	// order get a reasonable machine global unique PID.
   104  	b, err := ioutil.ReadFile("/proc/self/cpuset")
   105  	if err == nil && len(b) > 1 {
   106  		pid ^= int(crc32.ChecksumIEEE(b))
   107  	}
   108  }
   109  
   110  // readMachineId generates machine id and puts it into the machineId global
   111  // variable. If this function fails to get the hostname, it will cause
   112  // a runtime error.
   113  func readMachineID() []byte {
   114  	id := make([]byte, 3)
   115  	hid, err := readPlatformMachineID()
   116  	if err != nil || len(hid) == 0 {
   117  		hid, err = os.Hostname()
   118  	}
   119  	if err == nil && len(hid) != 0 {
   120  		hw := md5.New()
   121  		hw.Write([]byte(hid))
   122  		copy(id, hw.Sum(nil))
   123  	} else {
   124  		// Fallback to rand number if machine id can't be gathered
   125  		if _, randErr := rand.Reader.Read(id); randErr != nil {
   126  			panic(fmt.Errorf("xid: cannot get hostname nor generate a random number: %v; %v", err, randErr))
   127  		}
   128  	}
   129  	return id
   130  }
   131  
   132  // randInt generates a random uint32
   133  func randInt() uint32 {
   134  	b := make([]byte, 3)
   135  	if _, err := rand.Reader.Read(b); err != nil {
   136  		panic(fmt.Errorf("xid: cannot generate random number: %v;", err))
   137  	}
   138  	return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
   139  }
   140  
   141  // New generates a globally unique ID
   142  func New() ID {
   143  	return NewWithTime(time.Now())
   144  }
   145  
   146  // NewWithTime generates a globally unique ID with the passed in time
   147  func NewWithTime(t time.Time) ID {
   148  	var id ID
   149  	// Timestamp, 4 bytes, big endian
   150  	binary.BigEndian.PutUint32(id[:], uint32(t.Unix()))
   151  	// Machine, first 3 bytes of md5(hostname)
   152  	id[4] = machineID[0]
   153  	id[5] = machineID[1]
   154  	id[6] = machineID[2]
   155  	// Pid, 2 bytes, specs don't specify endianness, but we use big endian.
   156  	id[7] = byte(pid >> 8)
   157  	id[8] = byte(pid)
   158  	// Increment, 3 bytes, big endian
   159  	i := atomic.AddUint32(&objectIDCounter, 1)
   160  	id[9] = byte(i >> 16)
   161  	id[10] = byte(i >> 8)
   162  	id[11] = byte(i)
   163  	return id
   164  }
   165  
   166  // FromString reads an ID from its string representation
   167  func FromString(id string) (ID, error) {
   168  	i := &ID{}
   169  	err := i.UnmarshalText([]byte(id))
   170  	return *i, err
   171  }
   172  
   173  // String returns a base32 hex lowercased with no padding representation of the id (char set is 0-9, a-v).
   174  func (id ID) String() string {
   175  	text := make([]byte, encodedLen)
   176  	encode(text, id[:])
   177  	return *(*string)(unsafe.Pointer(&text))
   178  }
   179  
   180  // Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it.
   181  func (id ID) Encode(dst []byte) []byte {
   182  	encode(dst, id[:])
   183  	return dst
   184  }
   185  
   186  // MarshalText implements encoding/text TextMarshaler interface
   187  func (id ID) MarshalText() ([]byte, error) {
   188  	text := make([]byte, encodedLen)
   189  	encode(text, id[:])
   190  	return text, nil
   191  }
   192  
   193  // MarshalJSON implements encoding/json Marshaler interface
   194  func (id ID) MarshalJSON() ([]byte, error) {
   195  	if id.IsNil() {
   196  		return []byte("null"), nil
   197  	}
   198  	text := make([]byte, encodedLen+2)
   199  	encode(text[1:encodedLen+1], id[:])
   200  	text[0], text[encodedLen+1] = '"', '"'
   201  	return text, nil
   202  }
   203  
   204  // encode by unrolling the stdlib base32 algorithm + removing all safe checks
   205  func encode(dst, id []byte) {
   206  	_ = dst[19]
   207  	_ = id[11]
   208  
   209  	dst[19] = encoding[(id[11]<<4)&0x1F]
   210  	dst[18] = encoding[(id[11]>>1)&0x1F]
   211  	dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
   212  	dst[16] = encoding[id[10]>>3]
   213  	dst[15] = encoding[id[9]&0x1F]
   214  	dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F]
   215  	dst[13] = encoding[(id[8]>>2)&0x1F]
   216  	dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F]
   217  	dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F]
   218  	dst[10] = encoding[(id[6]>>1)&0x1F]
   219  	dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F]
   220  	dst[8] = encoding[id[5]>>3]
   221  	dst[7] = encoding[id[4]&0x1F]
   222  	dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F]
   223  	dst[5] = encoding[(id[3]>>2)&0x1F]
   224  	dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F]
   225  	dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F]
   226  	dst[2] = encoding[(id[1]>>1)&0x1F]
   227  	dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F]
   228  	dst[0] = encoding[id[0]>>3]
   229  }
   230  
   231  // UnmarshalText implements encoding/text TextUnmarshaler interface
   232  func (id *ID) UnmarshalText(text []byte) error {
   233  	if len(text) != encodedLen {
   234  		return ErrInvalidID
   235  	}
   236  	for _, c := range text {
   237  		if dec[c] == 0xFF {
   238  			return ErrInvalidID
   239  		}
   240  	}
   241  	if !decode(id, text) {
   242  		return ErrInvalidID
   243  	}
   244  	return nil
   245  }
   246  
   247  // UnmarshalJSON implements encoding/json Unmarshaler interface
   248  func (id *ID) UnmarshalJSON(b []byte) error {
   249  	s := string(b)
   250  	if s == "null" {
   251  		*id = nilID
   252  		return nil
   253  	}
   254  	// Check the slice length to prevent panic on passing it to UnmarshalText()
   255  	if len(b) < 2 {
   256  		return ErrInvalidID
   257  	}
   258  	return id.UnmarshalText(b[1 : len(b)-1])
   259  }
   260  
   261  // decode by unrolling the stdlib base32 algorithm + customized safe check.
   262  func decode(id *ID, src []byte) bool {
   263  	_ = src[19]
   264  	_ = id[11]
   265  
   266  	id[11] = dec[src[17]]<<6 | dec[src[18]]<<1 | dec[src[19]]>>4
   267  	id[10] = dec[src[16]]<<3 | dec[src[17]]>>2
   268  	id[9] = dec[src[14]]<<5 | dec[src[15]]
   269  	id[8] = dec[src[12]]<<7 | dec[src[13]]<<2 | dec[src[14]]>>3
   270  	id[7] = dec[src[11]]<<4 | dec[src[12]]>>1
   271  	id[6] = dec[src[9]]<<6 | dec[src[10]]<<1 | dec[src[11]]>>4
   272  	id[5] = dec[src[8]]<<3 | dec[src[9]]>>2
   273  	id[4] = dec[src[6]]<<5 | dec[src[7]]
   274  	id[3] = dec[src[4]]<<7 | dec[src[5]]<<2 | dec[src[6]]>>3
   275  	id[2] = dec[src[3]]<<4 | dec[src[4]]>>1
   276  	id[1] = dec[src[1]]<<6 | dec[src[2]]<<1 | dec[src[3]]>>4
   277  	id[0] = dec[src[0]]<<3 | dec[src[1]]>>2
   278  
   279  	// Validate that there are no discarer bits (padding) in src that would
   280  	// cause the string-encoded id not to equal src.
   281  	var check [4]byte
   282  
   283  	check[3] = encoding[(id[11]<<4)&0x1F]
   284  	check[2] = encoding[(id[11]>>1)&0x1F]
   285  	check[1] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
   286  	check[0] = encoding[id[10]>>3]
   287  	return bytes.Equal([]byte(src[16:20]), check[:])
   288  }
   289  
   290  // Time returns the timestamp part of the id.
   291  // It's a runtime error to call this method with an invalid id.
   292  func (id ID) Time() time.Time {
   293  	// First 4 bytes of ObjectId is 32-bit big-endian seconds from epoch.
   294  	secs := int64(binary.BigEndian.Uint32(id[0:4]))
   295  	return time.Unix(secs, 0)
   296  }
   297  
   298  // Machine returns the 3-byte machine id part of the id.
   299  // It's a runtime error to call this method with an invalid id.
   300  func (id ID) Machine() []byte {
   301  	return id[4:7]
   302  }
   303  
   304  // Pid returns the process id part of the id.
   305  // It's a runtime error to call this method with an invalid id.
   306  func (id ID) Pid() uint16 {
   307  	return binary.BigEndian.Uint16(id[7:9])
   308  }
   309  
   310  // Counter returns the incrementing value part of the id.
   311  // It's a runtime error to call this method with an invalid id.
   312  func (id ID) Counter() int32 {
   313  	b := id[9:12]
   314  	// Counter is stored as big-endian 3-byte value
   315  	return int32(uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2]))
   316  }
   317  
   318  // Value implements the driver.Valuer interface.
   319  func (id ID) Value() (driver.Value, error) {
   320  	if id.IsNil() {
   321  		return nil, nil
   322  	}
   323  	b, err := id.MarshalText()
   324  	return string(b), err
   325  }
   326  
   327  // Scan implements the sql.Scanner interface.
   328  func (id *ID) Scan(value interface{}) (err error) {
   329  	switch val := value.(type) {
   330  	case string:
   331  		return id.UnmarshalText([]byte(val))
   332  	case []byte:
   333  		return id.UnmarshalText(val)
   334  	case nil:
   335  		*id = nilID
   336  		return nil
   337  	default:
   338  		return fmt.Errorf("xid: scanning unsupported type: %T", value)
   339  	}
   340  }
   341  
   342  // IsNil Returns true if this is a "nil" ID
   343  func (id ID) IsNil() bool {
   344  	return id == nilID
   345  }
   346  
   347  // NilID returns a zero value for `xid.ID`.
   348  func NilID() ID {
   349  	return nilID
   350  }
   351  
   352  // Bytes returns the byte array representation of `ID`
   353  func (id ID) Bytes() []byte {
   354  	return id[:]
   355  }
   356  
   357  // FromBytes convert the byte array representation of `ID` back to `ID`
   358  func FromBytes(b []byte) (ID, error) {
   359  	var id ID
   360  	if len(b) != rawLen {
   361  		return id, ErrInvalidID
   362  	}
   363  	copy(id[:], b)
   364  	return id, nil
   365  }
   366  
   367  // Compare returns an integer comparing two IDs. It behaves just like `bytes.Compare`.
   368  // The result will be 0 if two IDs are identical, -1 if current id is less than the other one,
   369  // and 1 if current id is greater than the other.
   370  func (id ID) Compare(other ID) int {
   371  	return bytes.Compare(id[:], other[:])
   372  }
   373  
   374  type sorter []ID
   375  
   376  func (s sorter) Len() int {
   377  	return len(s)
   378  }
   379  
   380  func (s sorter) Less(i, j int) bool {
   381  	return s[i].Compare(s[j]) < 0
   382  }
   383  
   384  func (s sorter) Swap(i, j int) {
   385  	s[i], s[j] = s[j], s[i]
   386  }
   387  
   388  // Sort sorts an array of IDs inplace.
   389  // It works by wrapping `[]ID` and use `sort.Sort`.
   390  func Sort(ids []ID) {
   391  	sort.Sort(sorter(ids))
   392  }
   393  

View as plain text