...

Source file src/github.com/rivo/uniseg/properties.go

Documentation: github.com/rivo/uniseg

     1  package uniseg
     2  
     3  // The Unicode properties as used in the various parsers. Only the ones needed
     4  // in the context of this package are included.
     5  const (
     6  	prXX      = 0    // Same as prAny.
     7  	prAny     = iota // prAny must be 0.
     8  	prPrepend        // Grapheme properties must come first, to reduce the number of bits stored in the state vector.
     9  	prCR
    10  	prLF
    11  	prControl
    12  	prExtend
    13  	prRegionalIndicator
    14  	prSpacingMark
    15  	prL
    16  	prV
    17  	prT
    18  	prLV
    19  	prLVT
    20  	prZWJ
    21  	prExtendedPictographic
    22  	prNewline
    23  	prWSegSpace
    24  	prDoubleQuote
    25  	prSingleQuote
    26  	prMidNumLet
    27  	prNumeric
    28  	prMidLetter
    29  	prMidNum
    30  	prExtendNumLet
    31  	prALetter
    32  	prFormat
    33  	prHebrewLetter
    34  	prKatakana
    35  	prSp
    36  	prSTerm
    37  	prClose
    38  	prSContinue
    39  	prATerm
    40  	prUpper
    41  	prLower
    42  	prSep
    43  	prOLetter
    44  	prCM
    45  	prBA
    46  	prBK
    47  	prSP
    48  	prEX
    49  	prQU
    50  	prAL
    51  	prPR
    52  	prPO
    53  	prOP
    54  	prCP
    55  	prIS
    56  	prHY
    57  	prSY
    58  	prNU
    59  	prCL
    60  	prNL
    61  	prGL
    62  	prAI
    63  	prBB
    64  	prHL
    65  	prSA
    66  	prJL
    67  	prJV
    68  	prJT
    69  	prNS
    70  	prZW
    71  	prB2
    72  	prIN
    73  	prWJ
    74  	prID
    75  	prEB
    76  	prCJ
    77  	prH2
    78  	prH3
    79  	prSG
    80  	prCB
    81  	prRI
    82  	prEM
    83  	prN
    84  	prNa
    85  	prA
    86  	prW
    87  	prH
    88  	prF
    89  	prEmojiPresentation
    90  )
    91  
    92  // Unicode General Categories. Only the ones needed in the context of this
    93  // package are included.
    94  const (
    95  	gcNone = iota // gcNone must be 0.
    96  	gcCc
    97  	gcZs
    98  	gcPo
    99  	gcSc
   100  	gcPs
   101  	gcPe
   102  	gcSm
   103  	gcPd
   104  	gcNd
   105  	gcLu
   106  	gcSk
   107  	gcPc
   108  	gcLl
   109  	gcSo
   110  	gcLo
   111  	gcPi
   112  	gcCf
   113  	gcNo
   114  	gcPf
   115  	gcLC
   116  	gcLm
   117  	gcMn
   118  	gcMe
   119  	gcMc
   120  	gcNl
   121  	gcZl
   122  	gcZp
   123  	gcCn
   124  	gcCs
   125  	gcCo
   126  )
   127  
   128  // Special code points.
   129  const (
   130  	vs15 = 0xfe0e // Variation Selector-15 (text presentation)
   131  	vs16 = 0xfe0f // Variation Selector-16 (emoji presentation)
   132  )
   133  
   134  // propertySearch performs a binary search on a property slice and returns the
   135  // entry whose range (start = first array element, end = second array element)
   136  // includes r, or an array of 0's if no such entry was found.
   137  func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) {
   138  	// Run a binary search.
   139  	from := 0
   140  	to := len(dictionary)
   141  	for to > from {
   142  		middle := (from + to) / 2
   143  		cpRange := dictionary[middle]
   144  		if int(r) < cpRange[0] {
   145  			to = middle
   146  			continue
   147  		}
   148  		if int(r) > cpRange[1] {
   149  			from = middle + 1
   150  			continue
   151  		}
   152  		return cpRange
   153  	}
   154  	return
   155  }
   156  
   157  // property returns the Unicode property value (see constants above) of the
   158  // given code point.
   159  func property(dictionary [][3]int, r rune) int {
   160  	return propertySearch(dictionary, r)[2]
   161  }
   162  
   163  // propertyLineBreak returns the Unicode property value and General Category
   164  // (see constants above) of the given code point, as listed in the line break
   165  // code points table, while fast tracking ASCII digits and letters.
   166  func propertyLineBreak(r rune) (property, generalCategory int) {
   167  	if r >= 'a' && r <= 'z' {
   168  		return prAL, gcLl
   169  	}
   170  	if r >= 'A' && r <= 'Z' {
   171  		return prAL, gcLu
   172  	}
   173  	if r >= '0' && r <= '9' {
   174  		return prNU, gcNd
   175  	}
   176  	entry := propertySearch(lineBreakCodePoints, r)
   177  	return entry[2], entry[3]
   178  }
   179  
   180  // propertyGraphemes returns the Unicode grapheme cluster property value of the
   181  // given code point while fast tracking ASCII characters.
   182  func propertyGraphemes(r rune) int {
   183  	if r >= 0x20 && r <= 0x7e {
   184  		return prAny
   185  	}
   186  	if r == 0x0a {
   187  		return prLF
   188  	}
   189  	if r == 0x0d {
   190  		return prCR
   191  	}
   192  	if r >= 0 && r <= 0x1f || r == 0x7f {
   193  		return prControl
   194  	}
   195  	return property(graphemeCodePoints, r)
   196  }
   197  
   198  // propertyEastAsianWidth returns the Unicode East Asian Width property value of
   199  // the given code point while fast tracking ASCII characters.
   200  func propertyEastAsianWidth(r rune) int {
   201  	if r >= 0x20 && r <= 0x7e {
   202  		return prNa
   203  	}
   204  	if r >= 0 && r <= 0x1f || r == 0x7f {
   205  		return prN
   206  	}
   207  	return property(eastAsianWidth, r)
   208  }
   209  

View as plain text