...

Source file src/github.com/huandu/xstrings/manipulate.go

Documentation: github.com/huandu/xstrings

     1  // Copyright 2015 Huan Du. All rights reserved.
     2  // Licensed under the MIT license that can be found in the LICENSE file.
     3  
     4  package xstrings
     5  
     6  import (
     7  	"strings"
     8  	"unicode/utf8"
     9  )
    10  
    11  // Reverse a utf8 encoded string.
    12  func Reverse(str string) string {
    13  	var size int
    14  
    15  	tail := len(str)
    16  	buf := make([]byte, tail)
    17  	s := buf
    18  
    19  	for len(str) > 0 {
    20  		_, size = utf8.DecodeRuneInString(str)
    21  		tail -= size
    22  		s = append(s[:tail], []byte(str[:size])...)
    23  		str = str[size:]
    24  	}
    25  
    26  	return string(buf)
    27  }
    28  
    29  // Slice a string by rune.
    30  //
    31  // Start must satisfy 0 <= start <= rune length.
    32  //
    33  // End can be positive, zero or negative.
    34  // If end >= 0, start and end must satisfy start <= end <= rune length.
    35  // If end < 0, it means slice to the end of string.
    36  //
    37  // Otherwise, Slice will panic as out of range.
    38  func Slice(str string, start, end int) string {
    39  	var size, startPos, endPos int
    40  
    41  	origin := str
    42  
    43  	if start < 0 || end > len(str) || (end >= 0 && start > end) {
    44  		panic("out of range")
    45  	}
    46  
    47  	if end >= 0 {
    48  		end -= start
    49  	}
    50  
    51  	for start > 0 && len(str) > 0 {
    52  		_, size = utf8.DecodeRuneInString(str)
    53  		start--
    54  		startPos += size
    55  		str = str[size:]
    56  	}
    57  
    58  	if end < 0 {
    59  		return origin[startPos:]
    60  	}
    61  
    62  	endPos = startPos
    63  
    64  	for end > 0 && len(str) > 0 {
    65  		_, size = utf8.DecodeRuneInString(str)
    66  		end--
    67  		endPos += size
    68  		str = str[size:]
    69  	}
    70  
    71  	if len(str) == 0 && (start > 0 || end > 0) {
    72  		panic("out of range")
    73  	}
    74  
    75  	return origin[startPos:endPos]
    76  }
    77  
    78  // Partition splits a string by sep into three parts.
    79  // The return value is a slice of strings with head, match and tail.
    80  //
    81  // If str contains sep, for example "hello" and "l", Partition returns
    82  //
    83  //	"he", "l", "lo"
    84  //
    85  // If str doesn't contain sep, for example "hello" and "x", Partition returns
    86  //
    87  //	"hello", "", ""
    88  func Partition(str, sep string) (head, match, tail string) {
    89  	index := strings.Index(str, sep)
    90  
    91  	if index == -1 {
    92  		head = str
    93  		return
    94  	}
    95  
    96  	head = str[:index]
    97  	match = str[index : index+len(sep)]
    98  	tail = str[index+len(sep):]
    99  	return
   100  }
   101  
   102  // LastPartition splits a string by last instance of sep into three parts.
   103  // The return value is a slice of strings with head, match and tail.
   104  //
   105  // If str contains sep, for example "hello" and "l", LastPartition returns
   106  //
   107  //	"hel", "l", "o"
   108  //
   109  // If str doesn't contain sep, for example "hello" and "x", LastPartition returns
   110  //
   111  //	"", "", "hello"
   112  func LastPartition(str, sep string) (head, match, tail string) {
   113  	index := strings.LastIndex(str, sep)
   114  
   115  	if index == -1 {
   116  		tail = str
   117  		return
   118  	}
   119  
   120  	head = str[:index]
   121  	match = str[index : index+len(sep)]
   122  	tail = str[index+len(sep):]
   123  	return
   124  }
   125  
   126  // Insert src into dst at given rune index.
   127  // Index is counted by runes instead of bytes.
   128  //
   129  // If index is out of range of dst, panic with out of range.
   130  func Insert(dst, src string, index int) string {
   131  	return Slice(dst, 0, index) + src + Slice(dst, index, -1)
   132  }
   133  
   134  // Scrub scrubs invalid utf8 bytes with repl string.
   135  // Adjacent invalid bytes are replaced only once.
   136  func Scrub(str, repl string) string {
   137  	var buf *stringBuilder
   138  	var r rune
   139  	var size, pos int
   140  	var hasError bool
   141  
   142  	origin := str
   143  
   144  	for len(str) > 0 {
   145  		r, size = utf8.DecodeRuneInString(str)
   146  
   147  		if r == utf8.RuneError {
   148  			if !hasError {
   149  				if buf == nil {
   150  					buf = &stringBuilder{}
   151  				}
   152  
   153  				buf.WriteString(origin[:pos])
   154  				hasError = true
   155  			}
   156  		} else if hasError {
   157  			hasError = false
   158  			buf.WriteString(repl)
   159  
   160  			origin = origin[pos:]
   161  			pos = 0
   162  		}
   163  
   164  		pos += size
   165  		str = str[size:]
   166  	}
   167  
   168  	if buf != nil {
   169  		buf.WriteString(origin)
   170  		return buf.String()
   171  	}
   172  
   173  	// No invalid byte.
   174  	return origin
   175  }
   176  
   177  // WordSplit splits a string into words. Returns a slice of words.
   178  // If there is no word in a string, return nil.
   179  //
   180  // Word is defined as a locale dependent string containing alphabetic characters,
   181  // which may also contain but not start with `'` and `-` characters.
   182  func WordSplit(str string) []string {
   183  	var word string
   184  	var words []string
   185  	var r rune
   186  	var size, pos int
   187  
   188  	inWord := false
   189  
   190  	for len(str) > 0 {
   191  		r, size = utf8.DecodeRuneInString(str)
   192  
   193  		switch {
   194  		case isAlphabet(r):
   195  			if !inWord {
   196  				inWord = true
   197  				word = str
   198  				pos = 0
   199  			}
   200  
   201  		case inWord && (r == '\'' || r == '-'):
   202  			// Still in word.
   203  
   204  		default:
   205  			if inWord {
   206  				inWord = false
   207  				words = append(words, word[:pos])
   208  			}
   209  		}
   210  
   211  		pos += size
   212  		str = str[size:]
   213  	}
   214  
   215  	if inWord {
   216  		words = append(words, word[:pos])
   217  	}
   218  
   219  	return words
   220  }
   221  

View as plain text