...

Source file src/github.com/huandu/xstrings/convert.go

Documentation: github.com/huandu/xstrings

     1  // Copyright 2015 Huan Du. All rights reserved.
     2  // Licensed under the MIT license that can be found in the LICENSE file.
     3  
     4  package xstrings
     5  
     6  import (
     7  	"math/rand"
     8  	"unicode"
     9  	"unicode/utf8"
    10  )
    11  
    12  // ToCamelCase is to convert words separated by space, underscore and hyphen to camel case.
    13  //
    14  // Some samples.
    15  //
    16  //	"some_words"      => "SomeWords"
    17  //	"http_server"     => "HttpServer"
    18  //	"no_https"        => "NoHttps"
    19  //	"_complex__case_" => "_Complex_Case_"
    20  //	"some words"      => "SomeWords"
    21  func ToCamelCase(str string) string {
    22  	if len(str) == 0 {
    23  		return ""
    24  	}
    25  
    26  	buf := &stringBuilder{}
    27  	var r0, r1 rune
    28  	var size int
    29  
    30  	// leading connector will appear in output.
    31  	for len(str) > 0 {
    32  		r0, size = utf8.DecodeRuneInString(str)
    33  		str = str[size:]
    34  
    35  		if !isConnector(r0) {
    36  			r0 = unicode.ToUpper(r0)
    37  			break
    38  		}
    39  
    40  		buf.WriteRune(r0)
    41  	}
    42  
    43  	if len(str) == 0 {
    44  		// A special case for a string contains only 1 rune.
    45  		if size != 0 {
    46  			buf.WriteRune(r0)
    47  		}
    48  
    49  		return buf.String()
    50  	}
    51  
    52  	for len(str) > 0 {
    53  		r1 = r0
    54  		r0, size = utf8.DecodeRuneInString(str)
    55  		str = str[size:]
    56  
    57  		if isConnector(r0) && isConnector(r1) {
    58  			buf.WriteRune(r1)
    59  			continue
    60  		}
    61  
    62  		if isConnector(r1) {
    63  			r0 = unicode.ToUpper(r0)
    64  		} else {
    65  			buf.WriteRune(r1)
    66  		}
    67  	}
    68  
    69  	buf.WriteRune(r0)
    70  	return buf.String()
    71  }
    72  
    73  // ToSnakeCase can convert all upper case characters in a string to
    74  // snake case format.
    75  //
    76  // Some samples.
    77  //
    78  //	"FirstName"    => "first_name"
    79  //	"HTTPServer"   => "http_server"
    80  //	"NoHTTPS"      => "no_https"
    81  //	"GO_PATH"      => "go_path"
    82  //	"GO PATH"      => "go_path"  // space is converted to underscore.
    83  //	"GO-PATH"      => "go_path"  // hyphen is converted to underscore.
    84  //	"http2xx"      => "http_2xx" // insert an underscore before a number and after an alphabet.
    85  //	"HTTP20xOK"    => "http_20x_ok"
    86  //	"Duration2m3s" => "duration_2m3s"
    87  //	"Bld4Floor3rd" => "bld4_floor_3rd"
    88  func ToSnakeCase(str string) string {
    89  	return camelCaseToLowerCase(str, '_')
    90  }
    91  
    92  // ToKebabCase can convert all upper case characters in a string to
    93  // kebab case format.
    94  //
    95  // Some samples.
    96  //
    97  //	"FirstName"    => "first-name"
    98  //	"HTTPServer"   => "http-server"
    99  //	"NoHTTPS"      => "no-https"
   100  //	"GO_PATH"      => "go-path"
   101  //	"GO PATH"      => "go-path"  // space is converted to '-'.
   102  //	"GO-PATH"      => "go-path"  // hyphen is converted to '-'.
   103  //	"http2xx"      => "http-2xx" // insert an underscore before a number and after an alphabet.
   104  //	"HTTP20xOK"    => "http-20x-ok"
   105  //	"Duration2m3s" => "duration-2m3s"
   106  //	"Bld4Floor3rd" => "bld4-floor-3rd"
   107  func ToKebabCase(str string) string {
   108  	return camelCaseToLowerCase(str, '-')
   109  }
   110  
   111  func camelCaseToLowerCase(str string, connector rune) string {
   112  	if len(str) == 0 {
   113  		return ""
   114  	}
   115  
   116  	buf := &stringBuilder{}
   117  	wt, word, remaining := nextWord(str)
   118  
   119  	for len(remaining) > 0 {
   120  		if wt != connectorWord {
   121  			toLower(buf, wt, word, connector)
   122  		}
   123  
   124  		prev := wt
   125  		last := word
   126  		wt, word, remaining = nextWord(remaining)
   127  
   128  		switch prev {
   129  		case numberWord:
   130  			for wt == alphabetWord || wt == numberWord {
   131  				toLower(buf, wt, word, connector)
   132  				wt, word, remaining = nextWord(remaining)
   133  			}
   134  
   135  			if wt != invalidWord && wt != punctWord && wt != connectorWord {
   136  				buf.WriteRune(connector)
   137  			}
   138  
   139  		case connectorWord:
   140  			toLower(buf, prev, last, connector)
   141  
   142  		case punctWord:
   143  			// nothing.
   144  
   145  		default:
   146  			if wt != numberWord {
   147  				if wt != connectorWord && wt != punctWord {
   148  					buf.WriteRune(connector)
   149  				}
   150  
   151  				break
   152  			}
   153  
   154  			if len(remaining) == 0 {
   155  				break
   156  			}
   157  
   158  			last := word
   159  			wt, word, remaining = nextWord(remaining)
   160  
   161  			// consider number as a part of previous word.
   162  			// e.g. "Bld4Floor" => "bld4_floor"
   163  			if wt != alphabetWord {
   164  				toLower(buf, numberWord, last, connector)
   165  
   166  				if wt != connectorWord && wt != punctWord {
   167  					buf.WriteRune(connector)
   168  				}
   169  
   170  				break
   171  			}
   172  
   173  			// if there are some lower case letters following a number,
   174  			// add connector before the number.
   175  			// e.g. "HTTP2xx" => "http_2xx"
   176  			buf.WriteRune(connector)
   177  			toLower(buf, numberWord, last, connector)
   178  
   179  			for wt == alphabetWord || wt == numberWord {
   180  				toLower(buf, wt, word, connector)
   181  				wt, word, remaining = nextWord(remaining)
   182  			}
   183  
   184  			if wt != invalidWord && wt != connectorWord && wt != punctWord {
   185  				buf.WriteRune(connector)
   186  			}
   187  		}
   188  	}
   189  
   190  	toLower(buf, wt, word, connector)
   191  	return buf.String()
   192  }
   193  
   194  func isConnector(r rune) bool {
   195  	return r == '-' || r == '_' || unicode.IsSpace(r)
   196  }
   197  
   198  type wordType int
   199  
   200  const (
   201  	invalidWord wordType = iota
   202  	numberWord
   203  	upperCaseWord
   204  	alphabetWord
   205  	connectorWord
   206  	punctWord
   207  	otherWord
   208  )
   209  
   210  func nextWord(str string) (wt wordType, word, remaining string) {
   211  	if len(str) == 0 {
   212  		return
   213  	}
   214  
   215  	var offset int
   216  	remaining = str
   217  	r, size := nextValidRune(remaining, utf8.RuneError)
   218  	offset += size
   219  
   220  	if r == utf8.RuneError {
   221  		wt = invalidWord
   222  		word = str[:offset]
   223  		remaining = str[offset:]
   224  		return
   225  	}
   226  
   227  	switch {
   228  	case isConnector(r):
   229  		wt = connectorWord
   230  		remaining = remaining[size:]
   231  
   232  		for len(remaining) > 0 {
   233  			r, size = nextValidRune(remaining, r)
   234  
   235  			if !isConnector(r) {
   236  				break
   237  			}
   238  
   239  			offset += size
   240  			remaining = remaining[size:]
   241  		}
   242  
   243  	case unicode.IsPunct(r):
   244  		wt = punctWord
   245  		remaining = remaining[size:]
   246  
   247  		for len(remaining) > 0 {
   248  			r, size = nextValidRune(remaining, r)
   249  
   250  			if !unicode.IsPunct(r) {
   251  				break
   252  			}
   253  
   254  			offset += size
   255  			remaining = remaining[size:]
   256  		}
   257  
   258  	case unicode.IsUpper(r):
   259  		wt = upperCaseWord
   260  		remaining = remaining[size:]
   261  
   262  		if len(remaining) == 0 {
   263  			break
   264  		}
   265  
   266  		r, size = nextValidRune(remaining, r)
   267  
   268  		switch {
   269  		case unicode.IsUpper(r):
   270  			prevSize := size
   271  			offset += size
   272  			remaining = remaining[size:]
   273  
   274  			for len(remaining) > 0 {
   275  				r, size = nextValidRune(remaining, r)
   276  
   277  				if !unicode.IsUpper(r) {
   278  					break
   279  				}
   280  
   281  				prevSize = size
   282  				offset += size
   283  				remaining = remaining[size:]
   284  			}
   285  
   286  			// it's a bit complex when dealing with a case like "HTTPStatus".
   287  			// it's expected to be splitted into "HTTP" and "Status".
   288  			// Therefore "S" should be in remaining instead of word.
   289  			if len(remaining) > 0 && isAlphabet(r) {
   290  				offset -= prevSize
   291  				remaining = str[offset:]
   292  			}
   293  
   294  		case isAlphabet(r):
   295  			offset += size
   296  			remaining = remaining[size:]
   297  
   298  			for len(remaining) > 0 {
   299  				r, size = nextValidRune(remaining, r)
   300  
   301  				if !isAlphabet(r) || unicode.IsUpper(r) {
   302  					break
   303  				}
   304  
   305  				offset += size
   306  				remaining = remaining[size:]
   307  			}
   308  		}
   309  
   310  	case isAlphabet(r):
   311  		wt = alphabetWord
   312  		remaining = remaining[size:]
   313  
   314  		for len(remaining) > 0 {
   315  			r, size = nextValidRune(remaining, r)
   316  
   317  			if !isAlphabet(r) || unicode.IsUpper(r) {
   318  				break
   319  			}
   320  
   321  			offset += size
   322  			remaining = remaining[size:]
   323  		}
   324  
   325  	case unicode.IsNumber(r):
   326  		wt = numberWord
   327  		remaining = remaining[size:]
   328  
   329  		for len(remaining) > 0 {
   330  			r, size = nextValidRune(remaining, r)
   331  
   332  			if !unicode.IsNumber(r) {
   333  				break
   334  			}
   335  
   336  			offset += size
   337  			remaining = remaining[size:]
   338  		}
   339  
   340  	default:
   341  		wt = otherWord
   342  		remaining = remaining[size:]
   343  
   344  		for len(remaining) > 0 {
   345  			r, size = nextValidRune(remaining, r)
   346  
   347  			if size == 0 || isConnector(r) || isAlphabet(r) || unicode.IsNumber(r) || unicode.IsPunct(r) {
   348  				break
   349  			}
   350  
   351  			offset += size
   352  			remaining = remaining[size:]
   353  		}
   354  	}
   355  
   356  	word = str[:offset]
   357  	return
   358  }
   359  
   360  func nextValidRune(str string, prev rune) (r rune, size int) {
   361  	var sz int
   362  
   363  	for len(str) > 0 {
   364  		r, sz = utf8.DecodeRuneInString(str)
   365  		size += sz
   366  
   367  		if r != utf8.RuneError {
   368  			return
   369  		}
   370  
   371  		str = str[sz:]
   372  	}
   373  
   374  	r = prev
   375  	return
   376  }
   377  
   378  func toLower(buf *stringBuilder, wt wordType, str string, connector rune) {
   379  	buf.Grow(buf.Len() + len(str))
   380  
   381  	if wt != upperCaseWord && wt != connectorWord {
   382  		buf.WriteString(str)
   383  		return
   384  	}
   385  
   386  	for len(str) > 0 {
   387  		r, size := utf8.DecodeRuneInString(str)
   388  		str = str[size:]
   389  
   390  		if isConnector(r) {
   391  			buf.WriteRune(connector)
   392  		} else if unicode.IsUpper(r) {
   393  			buf.WriteRune(unicode.ToLower(r))
   394  		} else {
   395  			buf.WriteRune(r)
   396  		}
   397  	}
   398  }
   399  
   400  // SwapCase will swap characters case from upper to lower or lower to upper.
   401  func SwapCase(str string) string {
   402  	var r rune
   403  	var size int
   404  
   405  	buf := &stringBuilder{}
   406  
   407  	for len(str) > 0 {
   408  		r, size = utf8.DecodeRuneInString(str)
   409  
   410  		switch {
   411  		case unicode.IsUpper(r):
   412  			buf.WriteRune(unicode.ToLower(r))
   413  
   414  		case unicode.IsLower(r):
   415  			buf.WriteRune(unicode.ToUpper(r))
   416  
   417  		default:
   418  			buf.WriteRune(r)
   419  		}
   420  
   421  		str = str[size:]
   422  	}
   423  
   424  	return buf.String()
   425  }
   426  
   427  // FirstRuneToUpper converts first rune to upper case if necessary.
   428  func FirstRuneToUpper(str string) string {
   429  	if str == "" {
   430  		return str
   431  	}
   432  
   433  	r, size := utf8.DecodeRuneInString(str)
   434  
   435  	if !unicode.IsLower(r) {
   436  		return str
   437  	}
   438  
   439  	buf := &stringBuilder{}
   440  	buf.WriteRune(unicode.ToUpper(r))
   441  	buf.WriteString(str[size:])
   442  	return buf.String()
   443  }
   444  
   445  // FirstRuneToLower converts first rune to lower case if necessary.
   446  func FirstRuneToLower(str string) string {
   447  	if str == "" {
   448  		return str
   449  	}
   450  
   451  	r, size := utf8.DecodeRuneInString(str)
   452  
   453  	if !unicode.IsUpper(r) {
   454  		return str
   455  	}
   456  
   457  	buf := &stringBuilder{}
   458  	buf.WriteRune(unicode.ToLower(r))
   459  	buf.WriteString(str[size:])
   460  	return buf.String()
   461  }
   462  
   463  // Shuffle randomizes runes in a string and returns the result.
   464  // It uses default random source in `math/rand`.
   465  func Shuffle(str string) string {
   466  	if str == "" {
   467  		return str
   468  	}
   469  
   470  	runes := []rune(str)
   471  	index := 0
   472  
   473  	for i := len(runes) - 1; i > 0; i-- {
   474  		index = rand.Intn(i + 1)
   475  
   476  		if i != index {
   477  			runes[i], runes[index] = runes[index], runes[i]
   478  		}
   479  	}
   480  
   481  	return string(runes)
   482  }
   483  
   484  // ShuffleSource randomizes runes in a string with given random source.
   485  func ShuffleSource(str string, src rand.Source) string {
   486  	if str == "" {
   487  		return str
   488  	}
   489  
   490  	runes := []rune(str)
   491  	index := 0
   492  	r := rand.New(src)
   493  
   494  	for i := len(runes) - 1; i > 0; i-- {
   495  		index = r.Intn(i + 1)
   496  
   497  		if i != index {
   498  			runes[i], runes[index] = runes[index], runes[i]
   499  		}
   500  	}
   501  
   502  	return string(runes)
   503  }
   504  
   505  // Successor returns the successor to string.
   506  //
   507  // If there is one alphanumeric rune is found in string, increase the rune by 1.
   508  // If increment generates a "carry", the rune to the left of it is incremented.
   509  // This process repeats until there is no carry, adding an additional rune if necessary.
   510  //
   511  // If there is no alphanumeric rune, the rightmost rune will be increased by 1
   512  // regardless whether the result is a valid rune or not.
   513  //
   514  // Only following characters are alphanumeric.
   515  //   - a - z
   516  //   - A - Z
   517  //   - 0 - 9
   518  //
   519  // Samples (borrowed from ruby's String#succ document):
   520  //
   521  //	"abcd"      => "abce"
   522  //	"THX1138"   => "THX1139"
   523  //	"<<koala>>" => "<<koalb>>"
   524  //	"1999zzz"   => "2000aaa"
   525  //	"ZZZ9999"   => "AAAA0000"
   526  //	"***"       => "**+"
   527  func Successor(str string) string {
   528  	if str == "" {
   529  		return str
   530  	}
   531  
   532  	var r rune
   533  	var i int
   534  	carry := ' '
   535  	runes := []rune(str)
   536  	l := len(runes)
   537  	lastAlphanumeric := l
   538  
   539  	for i = l - 1; i >= 0; i-- {
   540  		r = runes[i]
   541  
   542  		if ('a' <= r && r <= 'y') ||
   543  			('A' <= r && r <= 'Y') ||
   544  			('0' <= r && r <= '8') {
   545  			runes[i]++
   546  			carry = ' '
   547  			lastAlphanumeric = i
   548  			break
   549  		}
   550  
   551  		switch r {
   552  		case 'z':
   553  			runes[i] = 'a'
   554  			carry = 'a'
   555  			lastAlphanumeric = i
   556  
   557  		case 'Z':
   558  			runes[i] = 'A'
   559  			carry = 'A'
   560  			lastAlphanumeric = i
   561  
   562  		case '9':
   563  			runes[i] = '0'
   564  			carry = '0'
   565  			lastAlphanumeric = i
   566  		}
   567  	}
   568  
   569  	// Needs to add one character for carry.
   570  	if i < 0 && carry != ' ' {
   571  		buf := &stringBuilder{}
   572  		buf.Grow(l + 4) // Reserve enough space for write.
   573  
   574  		if lastAlphanumeric != 0 {
   575  			buf.WriteString(str[:lastAlphanumeric])
   576  		}
   577  
   578  		buf.WriteRune(carry)
   579  
   580  		for _, r = range runes[lastAlphanumeric:] {
   581  			buf.WriteRune(r)
   582  		}
   583  
   584  		return buf.String()
   585  	}
   586  
   587  	// No alphanumeric character. Simply increase last rune's value.
   588  	if lastAlphanumeric == l {
   589  		runes[l-1]++
   590  	}
   591  
   592  	return string(runes)
   593  }
   594  

View as plain text