...

Source file src/github.com/yuin/goldmark/util/util_cjk.go

Documentation: github.com/yuin/goldmark/util

     1  package util
     2  
     3  import "unicode"
     4  
     5  var cjkRadicalsSupplement = &unicode.RangeTable{
     6  	R16: []unicode.Range16{
     7  		{0x2E80, 0x2EFF, 1},
     8  	},
     9  }
    10  
    11  var kangxiRadicals = &unicode.RangeTable{
    12  	R16: []unicode.Range16{
    13  		{0x2F00, 0x2FDF, 1},
    14  	},
    15  }
    16  
    17  var ideographicDescriptionCharacters = &unicode.RangeTable{
    18  	R16: []unicode.Range16{
    19  		{0x2FF0, 0x2FFF, 1},
    20  	},
    21  }
    22  
    23  var cjkSymbolsAndPunctuation = &unicode.RangeTable{
    24  	R16: []unicode.Range16{
    25  		{0x3000, 0x303F, 1},
    26  	},
    27  }
    28  
    29  var hiragana = &unicode.RangeTable{
    30  	R16: []unicode.Range16{
    31  		{0x3040, 0x309F, 1},
    32  	},
    33  }
    34  
    35  var katakana = &unicode.RangeTable{
    36  	R16: []unicode.Range16{
    37  		{0x30A0, 0x30FF, 1},
    38  	},
    39  }
    40  
    41  var kanbun = &unicode.RangeTable{
    42  	R16: []unicode.Range16{
    43  		{0x3130, 0x318F, 1},
    44  		{0x3190, 0x319F, 1},
    45  	},
    46  }
    47  
    48  var cjkStrokes = &unicode.RangeTable{
    49  	R16: []unicode.Range16{
    50  		{0x31C0, 0x31EF, 1},
    51  	},
    52  }
    53  
    54  var katakanaPhoneticExtensions = &unicode.RangeTable{
    55  	R16: []unicode.Range16{
    56  		{0x31F0, 0x31FF, 1},
    57  	},
    58  }
    59  
    60  var cjkCompatibility = &unicode.RangeTable{
    61  	R16: []unicode.Range16{
    62  		{0x3300, 0x33FF, 1},
    63  	},
    64  }
    65  
    66  var cjkUnifiedIdeographsExtensionA = &unicode.RangeTable{
    67  	R16: []unicode.Range16{
    68  		{0x3400, 0x4DBF, 1},
    69  	},
    70  }
    71  
    72  var cjkUnifiedIdeographs = &unicode.RangeTable{
    73  	R16: []unicode.Range16{
    74  		{0x4E00, 0x9FFF, 1},
    75  	},
    76  }
    77  
    78  var yiSyllables = &unicode.RangeTable{
    79  	R16: []unicode.Range16{
    80  		{0xA000, 0xA48F, 1},
    81  	},
    82  }
    83  
    84  var yiRadicals = &unicode.RangeTable{
    85  	R16: []unicode.Range16{
    86  		{0xA490, 0xA4CF, 1},
    87  	},
    88  }
    89  
    90  var cjkCompatibilityIdeographs = &unicode.RangeTable{
    91  	R16: []unicode.Range16{
    92  		{0xF900, 0xFAFF, 1},
    93  	},
    94  }
    95  
    96  var verticalForms = &unicode.RangeTable{
    97  	R16: []unicode.Range16{
    98  		{0xFE10, 0xFE1F, 1},
    99  	},
   100  }
   101  
   102  var cjkCompatibilityForms = &unicode.RangeTable{
   103  	R16: []unicode.Range16{
   104  		{0xFE30, 0xFE4F, 1},
   105  	},
   106  }
   107  
   108  var smallFormVariants = &unicode.RangeTable{
   109  	R16: []unicode.Range16{
   110  		{0xFE50, 0xFE6F, 1},
   111  	},
   112  }
   113  
   114  var halfwidthAndFullwidthForms = &unicode.RangeTable{
   115  	R16: []unicode.Range16{
   116  		{0xFF00, 0xFFEF, 1},
   117  	},
   118  }
   119  
   120  var kanaSupplement = &unicode.RangeTable{
   121  	R32: []unicode.Range32{
   122  		{0x1B000, 0x1B0FF, 1},
   123  	},
   124  }
   125  
   126  var kanaExtendedA = &unicode.RangeTable{
   127  	R32: []unicode.Range32{
   128  		{0x1B100, 0x1B12F, 1},
   129  	},
   130  }
   131  
   132  var smallKanaExtension = &unicode.RangeTable{
   133  	R32: []unicode.Range32{
   134  		{0x1B130, 0x1B16F, 1},
   135  	},
   136  }
   137  
   138  var cjkUnifiedIdeographsExtensionB = &unicode.RangeTable{
   139  	R32: []unicode.Range32{
   140  		{0x20000, 0x2A6DF, 1},
   141  	},
   142  }
   143  
   144  var cjkUnifiedIdeographsExtensionC = &unicode.RangeTable{
   145  	R32: []unicode.Range32{
   146  		{0x2A700, 0x2B73F, 1},
   147  	},
   148  }
   149  
   150  var cjkUnifiedIdeographsExtensionD = &unicode.RangeTable{
   151  	R32: []unicode.Range32{
   152  		{0x2B740, 0x2B81F, 1},
   153  	},
   154  }
   155  
   156  var cjkUnifiedIdeographsExtensionE = &unicode.RangeTable{
   157  	R32: []unicode.Range32{
   158  		{0x2B820, 0x2CEAF, 1},
   159  	},
   160  }
   161  
   162  var cjkUnifiedIdeographsExtensionF = &unicode.RangeTable{
   163  	R32: []unicode.Range32{
   164  		{0x2CEB0, 0x2EBEF, 1},
   165  	},
   166  }
   167  
   168  var cjkCompatibilityIdeographsSupplement = &unicode.RangeTable{
   169  	R32: []unicode.Range32{
   170  		{0x2F800, 0x2FA1F, 1},
   171  	},
   172  }
   173  
   174  var cjkUnifiedIdeographsExtensionG = &unicode.RangeTable{
   175  	R32: []unicode.Range32{
   176  		{0x30000, 0x3134F, 1},
   177  	},
   178  }
   179  
   180  // IsEastAsianWideRune returns trhe if the given rune is an east asian wide character, otherwise false.
   181  func IsEastAsianWideRune(r rune) bool {
   182  	return unicode.Is(unicode.Hiragana, r) ||
   183  		unicode.Is(unicode.Katakana, r) ||
   184  		unicode.Is(unicode.Han, r) ||
   185  		unicode.Is(unicode.Lm, r) ||
   186  		unicode.Is(unicode.Hangul, r) ||
   187  		unicode.Is(cjkSymbolsAndPunctuation, r)
   188  }
   189  
   190  // IsSpaceDiscardingUnicodeRune returns true if the given rune is space-discarding unicode character, otherwise false.
   191  // See https://www.w3.org/TR/2020/WD-css-text-3-20200429/#space-discard-set
   192  func IsSpaceDiscardingUnicodeRune(r rune) bool {
   193  	return unicode.Is(cjkRadicalsSupplement, r) ||
   194  		unicode.Is(kangxiRadicals, r) ||
   195  		unicode.Is(ideographicDescriptionCharacters, r) ||
   196  		unicode.Is(cjkSymbolsAndPunctuation, r) ||
   197  		unicode.Is(hiragana, r) ||
   198  		unicode.Is(katakana, r) ||
   199  		unicode.Is(kanbun, r) ||
   200  		unicode.Is(cjkStrokes, r) ||
   201  		unicode.Is(katakanaPhoneticExtensions, r) ||
   202  		unicode.Is(cjkCompatibility, r) ||
   203  		unicode.Is(cjkUnifiedIdeographsExtensionA, r) ||
   204  		unicode.Is(cjkUnifiedIdeographs, r) ||
   205  		unicode.Is(yiSyllables, r) ||
   206  		unicode.Is(yiRadicals, r) ||
   207  		unicode.Is(cjkCompatibilityIdeographs, r) ||
   208  		unicode.Is(verticalForms, r) ||
   209  		unicode.Is(cjkCompatibilityForms, r) ||
   210  		unicode.Is(smallFormVariants, r) ||
   211  		unicode.Is(halfwidthAndFullwidthForms, r) ||
   212  		unicode.Is(kanaSupplement, r) ||
   213  		unicode.Is(kanaExtendedA, r) ||
   214  		unicode.Is(smallKanaExtension, r) ||
   215  		unicode.Is(cjkUnifiedIdeographsExtensionB, r) ||
   216  		unicode.Is(cjkUnifiedIdeographsExtensionC, r) ||
   217  		unicode.Is(cjkUnifiedIdeographsExtensionD, r) ||
   218  		unicode.Is(cjkUnifiedIdeographsExtensionE, r) ||
   219  		unicode.Is(cjkUnifiedIdeographsExtensionF, r) ||
   220  		unicode.Is(cjkCompatibilityIdeographsSupplement, r) ||
   221  		unicode.Is(cjkUnifiedIdeographsExtensionG, r)
   222  }
   223  
   224  // EastAsianWidth returns the east asian width of the given rune.
   225  // See https://www.unicode.org/reports/tr11/tr11-36.html
   226  func EastAsianWidth(r rune) string {
   227  	switch {
   228  	case r == 0x3000,
   229  		(0xFF01 <= r && r <= 0xFF60),
   230  		(0xFFE0 <= r && r <= 0xFFE6):
   231  		return "F"
   232  
   233  	case r == 0x20A9,
   234  		(0xFF61 <= r && r <= 0xFFBE),
   235  		(0xFFC2 <= r && r <= 0xFFC7),
   236  		(0xFFCA <= r && r <= 0xFFCF),
   237  		(0xFFD2 <= r && r <= 0xFFD7),
   238  		(0xFFDA <= r && r <= 0xFFDC),
   239  		(0xFFE8 <= r && r <= 0xFFEE):
   240  		return "H"
   241  
   242  	case (0x1100 <= r && r <= 0x115F),
   243  		(0x11A3 <= r && r <= 0x11A7),
   244  		(0x11FA <= r && r <= 0x11FF),
   245  		(0x2329 <= r && r <= 0x232A),
   246  		(0x2E80 <= r && r <= 0x2E99),
   247  		(0x2E9B <= r && r <= 0x2EF3),
   248  		(0x2F00 <= r && r <= 0x2FD5),
   249  		(0x2FF0 <= r && r <= 0x2FFB),
   250  		(0x3001 <= r && r <= 0x303E),
   251  		(0x3041 <= r && r <= 0x3096),
   252  		(0x3099 <= r && r <= 0x30FF),
   253  		(0x3105 <= r && r <= 0x312D),
   254  		(0x3131 <= r && r <= 0x318E),
   255  		(0x3190 <= r && r <= 0x31BA),
   256  		(0x31C0 <= r && r <= 0x31E3),
   257  		(0x31F0 <= r && r <= 0x321E),
   258  		(0x3220 <= r && r <= 0x3247),
   259  		(0x3250 <= r && r <= 0x32FE),
   260  		(0x3300 <= r && r <= 0x4DBF),
   261  		(0x4E00 <= r && r <= 0xA48C),
   262  		(0xA490 <= r && r <= 0xA4C6),
   263  		(0xA960 <= r && r <= 0xA97C),
   264  		(0xAC00 <= r && r <= 0xD7A3),
   265  		(0xD7B0 <= r && r <= 0xD7C6),
   266  		(0xD7CB <= r && r <= 0xD7FB),
   267  		(0xF900 <= r && r <= 0xFAFF),
   268  		(0xFE10 <= r && r <= 0xFE19),
   269  		(0xFE30 <= r && r <= 0xFE52),
   270  		(0xFE54 <= r && r <= 0xFE66),
   271  		(0xFE68 <= r && r <= 0xFE6B),
   272  		(0x1B000 <= r && r <= 0x1B001),
   273  		(0x1F200 <= r && r <= 0x1F202),
   274  		(0x1F210 <= r && r <= 0x1F23A),
   275  		(0x1F240 <= r && r <= 0x1F248),
   276  		(0x1F250 <= r && r <= 0x1F251),
   277  		(0x20000 <= r && r <= 0x2F73F),
   278  		(0x2B740 <= r && r <= 0x2FFFD),
   279  		(0x30000 <= r && r <= 0x3FFFD):
   280  		return "W"
   281  
   282  	case (0x0020 <= r && r <= 0x007E),
   283  		(0x00A2 <= r && r <= 0x00A3),
   284  		(0x00A5 <= r && r <= 0x00A6),
   285  		r == 0x00AC,
   286  		r == 0x00AF,
   287  		(0x27E6 <= r && r <= 0x27ED),
   288  		(0x2985 <= r && r <= 0x2986):
   289  		return "Na"
   290  
   291  	case (0x00A1 == r),
   292  		(0x00A4 == r),
   293  		(0x00A7 <= r && r <= 0x00A8),
   294  		(0x00AA == r),
   295  		(0x00AD <= r && r <= 0x00AE),
   296  		(0x00B0 <= r && r <= 0x00B4),
   297  		(0x00B6 <= r && r <= 0x00BA),
   298  		(0x00BC <= r && r <= 0x00BF),
   299  		(0x00C6 == r),
   300  		(0x00D0 == r),
   301  		(0x00D7 <= r && r <= 0x00D8),
   302  		(0x00DE <= r && r <= 0x00E1),
   303  		(0x00E6 == r),
   304  		(0x00E8 <= r && r <= 0x00EA),
   305  		(0x00EC <= r && r <= 0x00ED),
   306  		(0x00F0 == r),
   307  		(0x00F2 <= r && r <= 0x00F3),
   308  		(0x00F7 <= r && r <= 0x00FA),
   309  		(0x00FC == r),
   310  		(0x00FE == r),
   311  		(0x0101 == r),
   312  		(0x0111 == r),
   313  		(0x0113 == r),
   314  		(0x011B == r),
   315  		(0x0126 <= r && r <= 0x0127),
   316  		(0x012B == r),
   317  		(0x0131 <= r && r <= 0x0133),
   318  		(0x0138 == r),
   319  		(0x013F <= r && r <= 0x0142),
   320  		(0x0144 == r),
   321  		(0x0148 <= r && r <= 0x014B),
   322  		(0x014D == r),
   323  		(0x0152 <= r && r <= 0x0153),
   324  		(0x0166 <= r && r <= 0x0167),
   325  		(0x016B == r),
   326  		(0x01CE == r),
   327  		(0x01D0 == r),
   328  		(0x01D2 == r),
   329  		(0x01D4 == r),
   330  		(0x01D6 == r),
   331  		(0x01D8 == r),
   332  		(0x01DA == r),
   333  		(0x01DC == r),
   334  		(0x0251 == r),
   335  		(0x0261 == r),
   336  		(0x02C4 == r),
   337  		(0x02C7 == r),
   338  		(0x02C9 <= r && r <= 0x02CB),
   339  		(0x02CD == r),
   340  		(0x02D0 == r),
   341  		(0x02D8 <= r && r <= 0x02DB),
   342  		(0x02DD == r),
   343  		(0x02DF == r),
   344  		(0x0300 <= r && r <= 0x036F),
   345  		(0x0391 <= r && r <= 0x03A1),
   346  		(0x03A3 <= r && r <= 0x03A9),
   347  		(0x03B1 <= r && r <= 0x03C1),
   348  		(0x03C3 <= r && r <= 0x03C9),
   349  		(0x0401 == r),
   350  		(0x0410 <= r && r <= 0x044F),
   351  		(0x0451 == r),
   352  		(0x2010 == r),
   353  		(0x2013 <= r && r <= 0x2016),
   354  		(0x2018 <= r && r <= 0x2019),
   355  		(0x201C <= r && r <= 0x201D),
   356  		(0x2020 <= r && r <= 0x2022),
   357  		(0x2024 <= r && r <= 0x2027),
   358  		(0x2030 == r),
   359  		(0x2032 <= r && r <= 0x2033),
   360  		(0x2035 == r),
   361  		(0x203B == r),
   362  		(0x203E == r),
   363  		(0x2074 == r),
   364  		(0x207F == r),
   365  		(0x2081 <= r && r <= 0x2084),
   366  		(0x20AC == r),
   367  		(0x2103 == r),
   368  		(0x2105 == r),
   369  		(0x2109 == r),
   370  		(0x2113 == r),
   371  		(0x2116 == r),
   372  		(0x2121 <= r && r <= 0x2122),
   373  		(0x2126 == r),
   374  		(0x212B == r),
   375  		(0x2153 <= r && r <= 0x2154),
   376  		(0x215B <= r && r <= 0x215E),
   377  		(0x2160 <= r && r <= 0x216B),
   378  		(0x2170 <= r && r <= 0x2179),
   379  		(0x2189 == r),
   380  		(0x2190 <= r && r <= 0x2199),
   381  		(0x21B8 <= r && r <= 0x21B9),
   382  		(0x21D2 == r),
   383  		(0x21D4 == r),
   384  		(0x21E7 == r),
   385  		(0x2200 == r),
   386  		(0x2202 <= r && r <= 0x2203),
   387  		(0x2207 <= r && r <= 0x2208),
   388  		(0x220B == r),
   389  		(0x220F == r),
   390  		(0x2211 == r),
   391  		(0x2215 == r),
   392  		(0x221A == r),
   393  		(0x221D <= r && r <= 0x2220),
   394  		(0x2223 == r),
   395  		(0x2225 == r),
   396  		(0x2227 <= r && r <= 0x222C),
   397  		(0x222E == r),
   398  		(0x2234 <= r && r <= 0x2237),
   399  		(0x223C <= r && r <= 0x223D),
   400  		(0x2248 == r),
   401  		(0x224C == r),
   402  		(0x2252 == r),
   403  		(0x2260 <= r && r <= 0x2261),
   404  		(0x2264 <= r && r <= 0x2267),
   405  		(0x226A <= r && r <= 0x226B),
   406  		(0x226E <= r && r <= 0x226F),
   407  		(0x2282 <= r && r <= 0x2283),
   408  		(0x2286 <= r && r <= 0x2287),
   409  		(0x2295 == r),
   410  		(0x2299 == r),
   411  		(0x22A5 == r),
   412  		(0x22BF == r),
   413  		(0x2312 == r),
   414  		(0x2460 <= r && r <= 0x24E9),
   415  		(0x24EB <= r && r <= 0x254B),
   416  		(0x2550 <= r && r <= 0x2573),
   417  		(0x2580 <= r && r <= 0x258F),
   418  		(0x2592 <= r && r <= 0x2595),
   419  		(0x25A0 <= r && r <= 0x25A1),
   420  		(0x25A3 <= r && r <= 0x25A9),
   421  		(0x25B2 <= r && r <= 0x25B3),
   422  		(0x25B6 <= r && r <= 0x25B7),
   423  		(0x25BC <= r && r <= 0x25BD),
   424  		(0x25C0 <= r && r <= 0x25C1),
   425  		(0x25C6 <= r && r <= 0x25C8),
   426  		(0x25CB == r),
   427  		(0x25CE <= r && r <= 0x25D1),
   428  		(0x25E2 <= r && r <= 0x25E5),
   429  		(0x25EF == r),
   430  		(0x2605 <= r && r <= 0x2606),
   431  		(0x2609 == r),
   432  		(0x260E <= r && r <= 0x260F),
   433  		(0x2614 <= r && r <= 0x2615),
   434  		(0x261C == r),
   435  		(0x261E == r),
   436  		(0x2640 == r),
   437  		(0x2642 == r),
   438  		(0x2660 <= r && r <= 0x2661),
   439  		(0x2663 <= r && r <= 0x2665),
   440  		(0x2667 <= r && r <= 0x266A),
   441  		(0x266C <= r && r <= 0x266D),
   442  		(0x266F == r),
   443  		(0x269E <= r && r <= 0x269F),
   444  		(0x26BE <= r && r <= 0x26BF),
   445  		(0x26C4 <= r && r <= 0x26CD),
   446  		(0x26CF <= r && r <= 0x26E1),
   447  		(0x26E3 == r),
   448  		(0x26E8 <= r && r <= 0x26FF),
   449  		(0x273D == r),
   450  		(0x2757 == r),
   451  		(0x2776 <= r && r <= 0x277F),
   452  		(0x2B55 <= r && r <= 0x2B59),
   453  		(0x3248 <= r && r <= 0x324F),
   454  		(0xE000 <= r && r <= 0xF8FF),
   455  		(0xFE00 <= r && r <= 0xFE0F),
   456  		(0xFFFD == r),
   457  		(0x1F100 <= r && r <= 0x1F10A),
   458  		(0x1F110 <= r && r <= 0x1F12D),
   459  		(0x1F130 <= r && r <= 0x1F169),
   460  		(0x1F170 <= r && r <= 0x1F19A),
   461  		(0xE0100 <= r && r <= 0xE01EF),
   462  		(0xF0000 <= r && r <= 0xFFFFD),
   463  		(0x100000 <= r && r <= 0x10FFFD):
   464  		return "A"
   465  
   466  	default:
   467  		return "N"
   468  	}
   469  }
   470  

View as plain text