...

Source file src/github.com/gobuffalo/flect/plural_rules.go

Documentation: github.com/gobuffalo/flect

     1  package flect
     2  
     3  import "fmt"
     4  
     5  var pluralRules = []rule{}
     6  
     7  // AddPlural adds a rule that will replace the given suffix with the replacement suffix.
     8  // The name is confusing. This function will be deprecated in the next release.
     9  func AddPlural(suffix string, repl string) {
    10  	InsertPluralRule(suffix, repl)
    11  }
    12  
    13  // InsertPluralRule inserts a rule that will replace the given suffix with
    14  // the repl(acement) at the begining of the list of the pluralize rules.
    15  func InsertPluralRule(suffix, repl string) {
    16  	pluralMoot.Lock()
    17  	defer pluralMoot.Unlock()
    18  
    19  	pluralRules = append([]rule{{
    20  		suffix: suffix,
    21  		fn:     simpleRuleFunc(suffix, repl),
    22  	}}, pluralRules...)
    23  
    24  	pluralRules = append([]rule{{
    25  		suffix: repl,
    26  		fn:     noop,
    27  	}}, pluralRules...)
    28  }
    29  
    30  type word struct {
    31  	singular       string
    32  	plural         string
    33  	alternative    string
    34  	unidirectional bool // plural to singular is not possible (or bad)
    35  	uncountable    bool
    36  	exact          bool
    37  }
    38  
    39  // dictionary is the main table for singularize and pluralize.
    40  // All words in the dictionary will be added to singleToPlural, pluralToSingle
    41  // and singlePluralAssertions by init() functions.
    42  var dictionary = []word{
    43  	// identicals https://en.wikipedia.org/wiki/English_plurals#Nouns_with_identical_singular_and_plural
    44  	{singular: "aircraft", plural: "aircraft"},
    45  	{singular: "beef", plural: "beef", alternative: "beefs"},
    46  	{singular: "bison", plural: "bison"},
    47  	{singular: "blues", plural: "blues", unidirectional: true},
    48  	{singular: "chassis", plural: "chassis"},
    49  	{singular: "deer", plural: "deer"},
    50  	{singular: "fish", plural: "fish", alternative: "fishes"},
    51  	{singular: "moose", plural: "moose"},
    52  	{singular: "police", plural: "police"},
    53  	{singular: "salmon", plural: "salmon", alternative: "salmons"},
    54  	{singular: "series", plural: "series"},
    55  	{singular: "sheep", plural: "sheep"},
    56  	{singular: "shrimp", plural: "shrimp", alternative: "shrimps"},
    57  	{singular: "species", plural: "species"},
    58  	{singular: "swine", plural: "swine", alternative: "swines"},
    59  	{singular: "trout", plural: "trout", alternative: "trouts"},
    60  	{singular: "tuna", plural: "tuna", alternative: "tunas"},
    61  	{singular: "you", plural: "you"},
    62  	// -en https://en.wikipedia.org/wiki/English_plurals#Plurals_in_-(e)n
    63  	{singular: "child", plural: "children"},
    64  	{singular: "ox", plural: "oxen", exact: true},
    65  	// apophonic https://en.wikipedia.org/wiki/English_plurals#Apophonic_plurals
    66  	{singular: "foot", plural: "feet"},
    67  	{singular: "goose", plural: "geese"},
    68  	{singular: "man", plural: "men"},
    69  	{singular: "human", plural: "humans"}, // not humen
    70  	{singular: "louse", plural: "lice", exact: true},
    71  	{singular: "mouse", plural: "mice"},
    72  	{singular: "tooth", plural: "teeth"},
    73  	{singular: "woman", plural: "women"},
    74  	// misc https://en.wikipedia.org/wiki/English_plurals#Miscellaneous_irregular_plurals
    75  	{singular: "die", plural: "dice", exact: true},
    76  	{singular: "person", plural: "people"},
    77  
    78  	// Words from French that end in -u add an x; in addition to eau to eaux rule
    79  	{singular: "adieu", plural: "adieux", alternative: "adieus"},
    80  	{singular: "fabliau", plural: "fabliaux"},
    81  	{singular: "bureau", plural: "bureaus", alternative: "bureaux"}, // popular
    82  
    83  	// Words from Greek that end in -on change -on to -a; in addition to hedron rule
    84  	{singular: "criterion", plural: "criteria"},
    85  	{singular: "ganglion", plural: "ganglia", alternative: "ganglions"},
    86  	{singular: "lexicon", plural: "lexica", alternative: "lexicons"},
    87  	{singular: "mitochondrion", plural: "mitochondria", alternative: "mitochondrions"},
    88  	{singular: "noumenon", plural: "noumena"},
    89  	{singular: "phenomenon", plural: "phenomena"},
    90  	{singular: "taxon", plural: "taxa"},
    91  
    92  	// Words from Latin that end in -um change -um to -a; in addition to some rules
    93  	{singular: "media", plural: "media"}, // popular case: media -> media
    94  	{singular: "medium", plural: "media", alternative: "mediums", unidirectional: true},
    95  	{singular: "stadium", plural: "stadiums", alternative: "stadia"},
    96  	{singular: "aquarium", plural: "aquaria", alternative: "aquariums"},
    97  	{singular: "auditorium", plural: "auditoria", alternative: "auditoriums"},
    98  	{singular: "symposium", plural: "symposia", alternative: "symposiums"},
    99  	{singular: "curriculum", plural: "curriculums", alternative: "curricula"}, // ulum
   100  	{singular: "quota", plural: "quotas"},
   101  
   102  	// Words from Latin that end in -us change -us to -i or -era
   103  	{singular: "alumnus", plural: "alumni", alternative: "alumnuses"}, // -i
   104  	{singular: "bacillus", plural: "bacilli"},
   105  	{singular: "cactus", plural: "cacti", alternative: "cactuses"},
   106  	{singular: "coccus", plural: "cocci"},
   107  	{singular: "focus", plural: "foci", alternative: "focuses"},
   108  	{singular: "locus", plural: "loci", alternative: "locuses"},
   109  	{singular: "nucleus", plural: "nuclei", alternative: "nucleuses"},
   110  	{singular: "octopus", plural: "octupuses", alternative: "octopi"},
   111  	{singular: "radius", plural: "radii", alternative: "radiuses"},
   112  	{singular: "syllabus", plural: "syllabi"},
   113  	{singular: "corpus", plural: "corpora", alternative: "corpuses"}, // -ra
   114  	{singular: "genus", plural: "genera"},
   115  
   116  	// Words from Latin that end in -a change -a to -ae
   117  	{singular: "alumna", plural: "alumnae"},
   118  	{singular: "vertebra", plural: "vertebrae"},
   119  	{singular: "differentia", plural: "differentiae"}, // -tia
   120  	{singular: "minutia", plural: "minutiae"},
   121  	{singular: "vita", plural: "vitae"},   // -ita
   122  	{singular: "larva", plural: "larvae"}, // -va
   123  	{singular: "postcava", plural: "postcavae"},
   124  	{singular: "praecava", plural: "praecavae"},
   125  	{singular: "uva", plural: "uvae"},
   126  
   127  	// Words from Latin that end in -ex change -ex to -ices
   128  	{singular: "apex", plural: "apices", alternative: "apexes"},
   129  	{singular: "codex", plural: "codices", alternative: "codexes"},
   130  	{singular: "index", plural: "indices", alternative: "indexes"},
   131  	{singular: "latex", plural: "latices", alternative: "latexes"},
   132  	{singular: "vertex", plural: "vertices", alternative: "vertexes"},
   133  	{singular: "vortex", plural: "vortices", alternative: "vortexes"},
   134  
   135  	// Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
   136  	{singular: "appendix", plural: "appendices", alternative: "appendixes"},
   137  	{singular: "radix", plural: "radices", alternative: "radixes"},
   138  	{singular: "helix", plural: "helices", alternative: "helixes"},
   139  
   140  	// Words from Latin that end in -is change -is to -es
   141  	{singular: "axis", plural: "axes", exact: true},
   142  	{singular: "crisis", plural: "crises"},
   143  	{singular: "ellipsis", plural: "ellipses", unidirectional: true}, // ellipse
   144  	{singular: "genesis", plural: "geneses"},
   145  	{singular: "oasis", plural: "oases"},
   146  	{singular: "thesis", plural: "theses"},
   147  	{singular: "testis", plural: "testes"},
   148  	{singular: "base", plural: "bases"}, // popular case
   149  	{singular: "basis", plural: "bases", unidirectional: true},
   150  
   151  	{singular: "alias", plural: "aliases", exact: true}, // no alia, no aliasis
   152  	{singular: "vedalia", plural: "vedalias"},           // no vedalium, no vedaliases
   153  
   154  	// Words that end in -ch, -o, -s, -sh, -x, -z (can be conflict with the others)
   155  	{singular: "use", plural: "uses", exact: true}, // us vs use
   156  	{singular: "abuse", plural: "abuses"},
   157  	{singular: "cause", plural: "causes"},
   158  	{singular: "clause", plural: "clauses"},
   159  	{singular: "cruse", plural: "cruses"},
   160  	{singular: "excuse", plural: "excuses"},
   161  	{singular: "fuse", plural: "fuses"},
   162  	{singular: "house", plural: "houses"},
   163  	{singular: "misuse", plural: "misuses"},
   164  	{singular: "muse", plural: "muses"},
   165  	{singular: "pause", plural: "pauses"},
   166  	{singular: "ache", plural: "aches"},
   167  	{singular: "topaz", plural: "topazes"},
   168  	{singular: "buffalo", plural: "buffaloes", alternative: "buffalos"},
   169  	{singular: "potato", plural: "potatoes"},
   170  	{singular: "tomato", plural: "tomatoes"},
   171  
   172  	// uncountables
   173  	{singular: "equipment", uncountable: true},
   174  	{singular: "information", uncountable: true},
   175  	{singular: "jeans", uncountable: true},
   176  	{singular: "money", uncountable: true},
   177  	{singular: "news", uncountable: true},
   178  	{singular: "rice", uncountable: true},
   179  
   180  	// exceptions: -f to -ves, not -fe
   181  	{singular: "dwarf", plural: "dwarfs", alternative: "dwarves"},
   182  	{singular: "hoof", plural: "hoofs", alternative: "hooves"},
   183  	{singular: "thief", plural: "thieves"},
   184  	// exceptions: instead of -f(e) to -ves
   185  	{singular: "chive", plural: "chives"},
   186  	{singular: "hive", plural: "hives"},
   187  	{singular: "move", plural: "moves"},
   188  
   189  	// exceptions: instead of -y to -ies
   190  	{singular: "movie", plural: "movies"},
   191  	{singular: "cookie", plural: "cookies"},
   192  
   193  	// exceptions: instead of -um to -a
   194  	{singular: "pretorium", plural: "pretoriums"},
   195  	{singular: "agenda", plural: "agendas"}, // instead of plural of agendum
   196  	// exceptions: instead of -um to -a (chemical element names)
   197  
   198  	// Words from Latin that end in -a change -a to -ae
   199  	{singular: "formula", plural: "formulas", alternative: "formulae"}, // also -um/-a
   200  
   201  	// exceptions: instead of -o to -oes
   202  	{singular: "shoe", plural: "shoes"},
   203  	{singular: "toe", plural: "toes", exact: true},
   204  	{singular: "graffiti", plural: "graffiti"},
   205  
   206  	// abbreviations
   207  	{singular: "ID", plural: "IDs", exact: true},
   208  }
   209  
   210  // singleToPlural is the highest priority map for Pluralize().
   211  // singularToPluralSuffixList is used to build pluralRules for suffixes and
   212  // compound words.
   213  var singleToPlural = map[string]string{}
   214  
   215  // pluralToSingle is the highest priority map for Singularize().
   216  // singularToPluralSuffixList is used to build singularRules for suffixes and
   217  // compound words.
   218  var pluralToSingle = map[string]string{}
   219  
   220  // NOTE: This map should not be built as reverse map of singleToPlural since
   221  // there are words that has the same plurals.
   222  
   223  // build singleToPlural and pluralToSingle with dictionary
   224  func init() {
   225  	for _, wd := range dictionary {
   226  		if singleToPlural[wd.singular] != "" {
   227  			panic(fmt.Errorf("map singleToPlural already has an entry for %s", wd.singular))
   228  		}
   229  
   230  		if wd.uncountable && wd.plural == "" {
   231  			wd.plural = wd.singular
   232  		}
   233  
   234  		if wd.plural == "" {
   235  			panic(fmt.Errorf("plural for %s is not provided", wd.singular))
   236  		}
   237  
   238  		singleToPlural[wd.singular] = wd.plural
   239  
   240  		if !wd.unidirectional {
   241  			if pluralToSingle[wd.plural] != "" {
   242  				panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.plural))
   243  			}
   244  			pluralToSingle[wd.plural] = wd.singular
   245  
   246  			if wd.alternative != "" {
   247  				if pluralToSingle[wd.alternative] != "" {
   248  					panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.alternative))
   249  				}
   250  				pluralToSingle[wd.alternative] = wd.singular
   251  			}
   252  		}
   253  	}
   254  }
   255  
   256  type singularToPluralSuffix struct {
   257  	singular string
   258  	plural   string
   259  }
   260  
   261  // singularToPluralSuffixList is a list of "bidirectional" suffix rules for
   262  // the irregular plurals follow such rules.
   263  //
   264  // NOTE: IMPORTANT! The order of items in this list is the rule priority, not
   265  // alphabet order. The first match will be used to inflect.
   266  var singularToPluralSuffixList = []singularToPluralSuffix{
   267  	// https://en.wiktionary.org/wiki/Appendix:English_irregular_nouns#Rules
   268  	// Words that end in -f or -fe change -f or -fe to -ves
   269  	{"tive", "tives"}, // exception
   270  	{"eaf", "eaves"},
   271  	{"oaf", "oaves"},
   272  	{"afe", "aves"},
   273  	{"arf", "arves"},
   274  	{"rfe", "rves"},
   275  	{"rf", "rves"},
   276  	{"lf", "lves"},
   277  	{"fe", "ves"}, // previously '[a-eg-km-z]fe' TODO: regex support
   278  
   279  	// Words that end in -y preceded by a consonant change -y to -ies
   280  	{"ay", "ays"},
   281  	{"ey", "eys"},
   282  	{"oy", "oys"},
   283  	{"quy", "quies"},
   284  	{"uy", "uys"},
   285  	{"y", "ies"}, // '[^aeiou]y'
   286  
   287  	// Words from French that end in -u add an x (eg, château becomes châteaux)
   288  	{"eau", "eaux"}, // it seems like 'eau' is the most popular form of this rule
   289  
   290  	// Words from Latin that end in -a change -a to -ae; before -on to -a and -um to -a
   291  	{"bula", "bulae"},
   292  	{"dula", "bulae"},
   293  	{"lula", "bulae"},
   294  	{"nula", "bulae"},
   295  	{"vula", "bulae"},
   296  
   297  	// Words from Greek that end in -on change -on to -a (eg, polyhedron becomes polyhedra)
   298  	// https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
   299  	{"hedron", "hedra"},
   300  
   301  	// Words from Latin that end in -um change -um to -a (eg, minimum becomes minima)
   302  	// https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
   303  	{"ium", "ia"}, // some exceptions especially chemical element names
   304  	{"seum", "seums"},
   305  	{"eum", "ea"},
   306  	{"oum", "oa"},
   307  	{"stracum", "straca"},
   308  	{"dum", "da"},
   309  	{"elum", "ela"},
   310  	{"ilum", "ila"},
   311  	{"olum", "ola"},
   312  	{"ulum", "ula"},
   313  	{"llum", "lla"},
   314  	{"ylum", "yla"},
   315  	{"imum", "ima"},
   316  	{"ernum", "erna"},
   317  	{"gnum", "gna"},
   318  	{"brum", "bra"},
   319  	{"crum", "cra"},
   320  	{"terum", "tera"},
   321  	{"serum", "sera"},
   322  	{"trum", "tra"},
   323  	{"antum", "anta"},
   324  	{"atum", "ata"},
   325  	{"entum", "enta"},
   326  	{"etum", "eta"},
   327  	{"itum", "ita"},
   328  	{"otum", "ota"},
   329  	{"utum", "uta"},
   330  	{"ctum", "cta"},
   331  	{"ovum", "ova"},
   332  
   333  	// Words from Latin that end in -us change -us to -i or -era
   334  	// not easy to make a simple rule. just add them all to the dictionary
   335  
   336  	// Words from Latin that end in -ex change -ex to -ices (eg, vortex becomes vortices)
   337  	// Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
   338  	//    for example, -dix, -dex, and -dice will have the same plural form so
   339  	//    making a simple rule is not possible for them
   340  	{"trix", "trices"}, // ignore a few words end in trice
   341  
   342  	// Words from Latin that end in -is change -is to -es (eg, thesis becomes theses)
   343  	// -sis and -se has the same plural -ses so making a rule is not easy too.
   344  	{"iasis", "iases"},
   345  	{"mesis", "meses"},
   346  	{"kinesis", "kineses"},
   347  	{"resis", "reses"},
   348  	{"gnosis", "gnoses"}, // e.g. diagnosis
   349  	{"opsis", "opses"},   // e.g. synopsis
   350  	{"ysis", "yses"},     // e.g. analysis
   351  
   352  	// Words that end in -ch, -o, -s, -sh, -x, -z
   353  	{"ouse", "ouses"},
   354  	{"lause", "lauses"},
   355  	{"us", "uses"}, // use/uses is in the dictionary
   356  
   357  	{"ch", "ches"},
   358  	{"io", "ios"},
   359  	{"sh", "shes"},
   360  	{"ss", "sses"},
   361  	{"ez", "ezzes"},
   362  	{"iz", "izzes"},
   363  	{"tz", "tzes"},
   364  	{"zz", "zzes"},
   365  	{"ano", "anos"},
   366  	{"lo", "los"},
   367  	{"to", "tos"},
   368  	{"oo", "oos"},
   369  	{"o", "oes"},
   370  	{"x", "xes"},
   371  
   372  	// for abbreviations
   373  	{"S", "Ses"},
   374  
   375  	// excluded rules: seems rare
   376  	// Words from Hebrew that add -im or -ot (eg, cherub becomes cherubim)
   377  	// - cherub (cherubs or cherubim), seraph (seraphs or seraphim)
   378  	// Words from Greek that end in -ma change -ma to -mata
   379  	// - The most of words end in -ma are in this category but it looks like
   380  	//   just adding -s is more popular.
   381  	// Words from Latin that end in -nx change -nx to -nges
   382  	// - The most of words end in -nx are in this category but it looks like
   383  	//   just adding -es is more popular. (sphinxes)
   384  
   385  	// excluded rules: don't care at least for now:
   386  	// Words that end in -ful that add an s after the -ful
   387  	// Words that end in -s or -ese denoting a national of a particular country
   388  	// Symbols or letters, which often add -'s
   389  }
   390  
   391  func init() {
   392  	for i := len(singularToPluralSuffixList) - 1; i >= 0; i-- {
   393  		InsertPluralRule(singularToPluralSuffixList[i].singular, singularToPluralSuffixList[i].plural)
   394  		InsertSingularRule(singularToPluralSuffixList[i].plural, singularToPluralSuffixList[i].singular)
   395  	}
   396  
   397  	// build pluralRule and singularRule with dictionary for compound words
   398  	for _, wd := range dictionary {
   399  		if wd.exact {
   400  			continue
   401  		}
   402  
   403  		if wd.uncountable && wd.plural == "" {
   404  			wd.plural = wd.singular
   405  		}
   406  
   407  		InsertPluralRule(wd.singular, wd.plural)
   408  
   409  		if !wd.unidirectional {
   410  			InsertSingularRule(wd.plural, wd.singular)
   411  
   412  			if wd.alternative != "" {
   413  				InsertSingularRule(wd.alternative, wd.singular)
   414  			}
   415  		}
   416  	}
   417  }
   418  

View as plain text