1 // Copyright 2016 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package fields provides a view of the fields of a struct that follows the Go 16 // rules, amended to consider tags and case insensitivity. 17 // 18 // # Usage 19 // 20 // First define a function that interprets tags: 21 // 22 // func parseTag(st reflect.StructTag) (name string, keep bool, other interface{}, err error) { ... } 23 // 24 // The function's return values describe whether to ignore the field 25 // completely or provide an alternate name, as well as other data from the 26 // parse that is stored to avoid re-parsing. 27 // 28 // Then define a function to validate the type: 29 // 30 // func validate(t reflect.Type) error { ... } 31 // 32 // Then, if necessary, define a function to specify leaf types - types 33 // which should be considered one field and not be recursed into: 34 // 35 // func isLeafType(t reflect.Type) bool { ... } 36 // 37 // eg: 38 // 39 // func isLeafType(t reflect.Type) bool { 40 // return t == reflect.TypeOf(time.Time{}) 41 // } 42 // 43 // Next, construct a Cache, passing your functions. As its name suggests, a 44 // Cache remembers validation and field information for a type, so subsequent 45 // calls with the same type are very fast. 46 // 47 // cache := fields.NewCache(parseTag, validate, isLeafType) 48 // 49 // To get the fields of a struct type as determined by the above rules, call 50 // the Fields method: 51 // 52 // fields, err := cache.Fields(reflect.TypeOf(MyStruct{})) 53 // 54 // The return value can be treated as a slice of Fields. 55 // 56 // Given a string, such as a key or column name obtained during unmarshalling, 57 // call Match on the list of fields to find a field whose name is the best 58 // match: 59 // 60 // field := fields.Match(name) 61 // 62 // Match looks for an exact match first, then falls back to a case-insensitive 63 // comparison. 64 package fields 65 66 import ( 67 "bytes" 68 "errors" 69 "reflect" 70 "sort" 71 "strings" 72 "sync" 73 ) 74 75 // A Field records information about a struct field. 76 type Field struct { 77 Name string // effective field name 78 NameFromTag bool // did Name come from a tag? 79 Type reflect.Type // field type 80 Index []int // index sequence, for reflect.Value.FieldByIndex 81 ParsedTag interface{} // third return value of the parseTag function 82 83 nameBytes []byte 84 equalFold func(s, t []byte) bool 85 } 86 87 // ParseTagFunc is a function that accepts a struct tag and returns four values: an alternative name for the field 88 // extracted from the tag, a boolean saying whether to keep the field or ignore it, additional data that is stored 89 // with the field information to avoid having to parse the tag again, and an error. 90 type ParseTagFunc func(reflect.StructTag) (name string, keep bool, other interface{}, err error) 91 92 // ValidateFunc is a function that accepts a reflect.Type and returns an error if the struct type is invalid in any 93 // way. 94 type ValidateFunc func(reflect.Type) error 95 96 // LeafTypesFunc is a function that accepts a reflect.Type and returns true if the struct type a leaf, or false if not. 97 // TODO(deklerk): is this description accurate? 98 type LeafTypesFunc func(reflect.Type) bool 99 100 // A Cache records information about the fields of struct types. 101 // 102 // A Cache is safe for use by multiple goroutines. 103 type Cache struct { 104 parseTag ParseTagFunc 105 validate ValidateFunc 106 leafTypes LeafTypesFunc 107 cache sync.Map // from reflect.Type to cacheValue 108 } 109 110 // NewCache constructs a Cache. 111 // 112 // Its first argument should be a function that accepts 113 // a struct tag and returns four values: an alternative name for the field 114 // extracted from the tag, a boolean saying whether to keep the field or ignore 115 // it, additional data that is stored with the field information to avoid 116 // having to parse the tag again, and an error. 117 // 118 // Its second argument should be a function that accepts a reflect.Type and 119 // returns an error if the struct type is invalid in any way. For example, it 120 // may check that all of the struct field tags are valid, or that all fields 121 // are of an appropriate type. 122 func NewCache(parseTag ParseTagFunc, validate ValidateFunc, leafTypes LeafTypesFunc) *Cache { 123 if parseTag == nil { 124 parseTag = func(reflect.StructTag) (string, bool, interface{}, error) { 125 return "", true, nil, nil 126 } 127 } 128 if validate == nil { 129 validate = func(reflect.Type) error { 130 return nil 131 } 132 } 133 if leafTypes == nil { 134 leafTypes = func(reflect.Type) bool { 135 return false 136 } 137 } 138 139 return &Cache{ 140 parseTag: parseTag, 141 validate: validate, 142 leafTypes: leafTypes, 143 } 144 } 145 146 // A fieldScan represents an item on the fieldByNameFunc scan work list. 147 type fieldScan struct { 148 typ reflect.Type 149 index []int 150 } 151 152 // Fields returns all the exported fields of t, which must be a struct type. It 153 // follows the standard Go rules for embedded fields, modified by the presence 154 // of tags. The result is sorted lexicographically by index. 155 // 156 // These rules apply in the absence of tags: 157 // Anonymous struct fields are treated as if their inner exported fields were 158 // fields in the outer struct (embedding). The result includes all fields that 159 // aren't shadowed by fields at higher level of embedding. If more than one 160 // field with the same name exists at the same level of embedding, it is 161 // excluded. An anonymous field that is not of struct type is treated as having 162 // its type as its name. 163 // 164 // Tags modify these rules as follows: 165 // A field's tag is used as its name. 166 // An anonymous struct field with a name given in its tag is treated as 167 // a field having that name, rather than an embedded struct (the struct's 168 // fields will not be returned). 169 // If more than one field with the same name exists at the same level of embedding, 170 // but exactly one of them is tagged, then the tagged field is reported and the others 171 // are ignored. 172 func (c *Cache) Fields(t reflect.Type) (List, error) { 173 if t.Kind() != reflect.Struct { 174 panic("fields: Fields of non-struct type") 175 } 176 return c.cachedTypeFields(t) 177 } 178 179 // A List is a list of Fields. 180 type List []Field 181 182 // Match returns the field in the list whose name best matches the supplied 183 // name, nor nil if no field does. If there is a field with the exact name, it 184 // is returned. Otherwise the first field (sorted by index) whose name matches 185 // case-insensitively is returned. 186 func (l List) Match(name string) *Field { 187 return l.MatchBytes([]byte(name)) 188 } 189 190 // MatchBytes is identical to Match, except that the argument is a byte slice. 191 func (l List) MatchBytes(name []byte) *Field { 192 var f *Field 193 for i := range l { 194 ff := &l[i] 195 if bytes.Equal(ff.nameBytes, name) { 196 return ff 197 } 198 if f == nil && ff.equalFold(ff.nameBytes, name) { 199 f = ff 200 } 201 } 202 return f 203 } 204 205 type cacheValue struct { 206 fields List 207 err error 208 } 209 210 // cachedTypeFields is like typeFields but uses a cache to avoid repeated work. 211 // This code has been copied and modified from 212 // https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/encode.go. 213 func (c *Cache) cachedTypeFields(t reflect.Type) (List, error) { 214 var cv cacheValue 215 x, ok := c.cache.Load(t) 216 if ok { 217 cv = x.(cacheValue) 218 } else { 219 if err := c.validate(t); err != nil { 220 cv = cacheValue{nil, err} 221 } else { 222 f, err := c.typeFields(t) 223 cv = cacheValue{List(f), err} 224 } 225 c.cache.Store(t, cv) 226 } 227 return cv.fields, cv.err 228 } 229 230 func (c *Cache) typeFields(t reflect.Type) ([]Field, error) { 231 fields, err := c.listFields(t) 232 if err != nil { 233 return nil, err 234 } 235 sort.Sort(byName(fields)) 236 // Delete all fields that are hidden by the Go rules for embedded fields. 237 238 // The fields are sorted in primary order of name, secondary order of field 239 // index length. So the first field with a given name is the dominant one. 240 var out []Field 241 for advance, i := 0, 0; i < len(fields); i += advance { 242 // One iteration per name. 243 // Find the sequence of fields with the name of this first field. 244 fi := fields[i] 245 name := fi.Name 246 for advance = 1; i+advance < len(fields); advance++ { 247 fj := fields[i+advance] 248 if fj.Name != name { 249 break 250 } 251 } 252 // Find the dominant field, if any, out of all fields that have the same name. 253 dominant, ok := dominantField(fields[i : i+advance]) 254 if ok { 255 out = append(out, dominant) 256 } 257 } 258 sort.Sort(byIndex(out)) 259 return out, nil 260 } 261 262 func (c *Cache) listFields(t reflect.Type) ([]Field, error) { 263 // This uses the same condition that the Go language does: there must be a unique instance 264 // of the match at a given depth level. If there are multiple instances of a match at the 265 // same depth, they annihilate each other and inhibit any possible match at a lower level. 266 // The algorithm is breadth first search, one depth level at a time. 267 268 // The current and next slices are work queues: 269 // current lists the fields to visit on this depth level, 270 // and next lists the fields on the next lower level. 271 current := []fieldScan{} 272 next := []fieldScan{{typ: t}} 273 274 // nextCount records the number of times an embedded type has been 275 // encountered and considered for queueing in the 'next' slice. 276 // We only queue the first one, but we increment the count on each. 277 // If a struct type T can be reached more than once at a given depth level, 278 // then it annihilates itself and need not be considered at all when we 279 // process that next depth level. 280 var nextCount map[reflect.Type]int 281 282 // visited records the structs that have been considered already. 283 // Embedded pointer fields can create cycles in the graph of 284 // reachable embedded types; visited avoids following those cycles. 285 // It also avoids duplicated effort: if we didn't find the field in an 286 // embedded type T at level 2, we won't find it in one at level 4 either. 287 visited := map[reflect.Type]bool{} 288 289 var fields []Field // Fields found. 290 291 for len(next) > 0 { 292 current, next = next, current[:0] 293 count := nextCount 294 nextCount = nil 295 296 // Process all the fields at this depth, now listed in 'current'. 297 // The loop queues embedded fields found in 'next', for processing during the next 298 // iteration. The multiplicity of the 'current' field counts is recorded 299 // in 'count'; the multiplicity of the 'next' field counts is recorded in 'nextCount'. 300 for _, scan := range current { 301 t := scan.typ 302 if visited[t] { 303 // We've looked through this type before, at a higher level. 304 // That higher level would shadow the lower level we're now at, 305 // so this one can't be useful to us. Ignore it. 306 continue 307 } 308 visited[t] = true 309 for i := 0; i < t.NumField(); i++ { 310 f := t.Field(i) 311 312 exported := (f.PkgPath == "") 313 314 // If a named field is unexported, ignore it. An anonymous 315 // unexported field is processed, because it may contain 316 // exported fields, which are visible. 317 if !exported && !f.Anonymous { 318 continue 319 } 320 321 // Examine the tag. 322 tagName, keep, other, err := c.parseTag(f.Tag) 323 if err != nil { 324 return nil, err 325 } 326 if !keep { 327 continue 328 } 329 if c.leafTypes(f.Type) { 330 fields = append(fields, newField(f, tagName, other, scan.index, i)) 331 continue 332 } 333 334 var ntyp reflect.Type 335 if f.Anonymous { 336 // Anonymous field of type T or *T. 337 ntyp = f.Type 338 if ntyp.Kind() == reflect.Ptr { 339 ntyp = ntyp.Elem() 340 } 341 } 342 343 // Record fields with a tag name, non-anonymous fields, or 344 // anonymous non-struct fields. 345 if tagName != "" || ntyp == nil || ntyp.Kind() != reflect.Struct { 346 if !exported { 347 continue 348 } 349 fields = append(fields, newField(f, tagName, other, scan.index, i)) 350 if count[t] > 1 { 351 // If there were multiple instances, add a second, 352 // so that the annihilation code will see a duplicate. 353 fields = append(fields, fields[len(fields)-1]) 354 } 355 continue 356 } 357 358 // Queue embedded struct fields for processing with next level, 359 // but only if the embedded types haven't already been queued. 360 if nextCount[ntyp] > 0 { 361 nextCount[ntyp] = 2 // exact multiple doesn't matter 362 continue 363 } 364 if nextCount == nil { 365 nextCount = map[reflect.Type]int{} 366 } 367 nextCount[ntyp] = 1 368 if count[t] > 1 { 369 nextCount[ntyp] = 2 // exact multiple doesn't matter 370 } 371 var index []int 372 index = append(index, scan.index...) 373 index = append(index, i) 374 next = append(next, fieldScan{ntyp, index}) 375 } 376 } 377 } 378 return fields, nil 379 } 380 381 func newField(f reflect.StructField, tagName string, other interface{}, index []int, i int) Field { 382 name := tagName 383 if name == "" { 384 name = f.Name 385 } 386 sf := Field{ 387 Name: name, 388 NameFromTag: tagName != "", 389 Type: f.Type, 390 ParsedTag: other, 391 nameBytes: []byte(name), 392 } 393 sf.equalFold = foldFunc(sf.nameBytes) 394 sf.Index = append(sf.Index, index...) 395 sf.Index = append(sf.Index, i) 396 return sf 397 } 398 399 // byName sorts fields using the following criteria, in order: 400 // 1. name 401 // 2. embedding depth 402 // 3. tag presence (preferring a tagged field) 403 // 4. index sequence. 404 type byName []Field 405 406 func (x byName) Len() int { return len(x) } 407 408 func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 409 410 func (x byName) Less(i, j int) bool { 411 if x[i].Name != x[j].Name { 412 return x[i].Name < x[j].Name 413 } 414 if len(x[i].Index) != len(x[j].Index) { 415 return len(x[i].Index) < len(x[j].Index) 416 } 417 if x[i].NameFromTag != x[j].NameFromTag { 418 return x[i].NameFromTag 419 } 420 return byIndex(x).Less(i, j) 421 } 422 423 // byIndex sorts field by index sequence. 424 type byIndex []Field 425 426 func (x byIndex) Len() int { return len(x) } 427 428 func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 429 430 func (x byIndex) Less(i, j int) bool { 431 xi := x[i].Index 432 xj := x[j].Index 433 ln := len(xi) 434 if l := len(xj); l < ln { 435 ln = l 436 } 437 for k := 0; k < ln; k++ { 438 if xi[k] != xj[k] { 439 return xi[k] < xj[k] 440 } 441 } 442 return len(xi) < len(xj) 443 } 444 445 // dominantField looks through the fields, all of which are known to have the 446 // same name, to find the single field that dominates the others using Go's 447 // embedding rules, modified by the presence of tags. If there are multiple 448 // top-level fields, the boolean will be false: This condition is an error in 449 // Go and we skip all the fields. 450 func dominantField(fs []Field) (Field, bool) { 451 // The fields are sorted in increasing index-length order, then by presence of tag. 452 // That means that the first field is the dominant one. We need only check 453 // for error cases: two fields at top level, either both tagged or neither tagged. 454 if len(fs) > 1 && len(fs[0].Index) == len(fs[1].Index) && fs[0].NameFromTag == fs[1].NameFromTag { 455 return Field{}, false 456 } 457 return fs[0], true 458 } 459 460 // ParseStandardTag extracts the sub-tag named by key, then parses it using the 461 // de facto standard format introduced in encoding/json: 462 // 463 // "-" means "ignore this tag". It must occur by itself. (parseStandardTag returns an error 464 // in this case, whereas encoding/json accepts the "-" even if it is not alone.) 465 // "<name>" provides an alternative name for the field 466 // "<name>,opt1,opt2,..." specifies options after the name. 467 // 468 // The options are returned as a []string. 469 func ParseStandardTag(key string, t reflect.StructTag) (name string, keep bool, options []string, err error) { 470 s := t.Get(key) 471 parts := strings.Split(s, ",") 472 if parts[0] == "-" { 473 if len(parts) > 1 { 474 return "", false, nil, errors.New(`"-" field tag with options`) 475 } 476 return "", false, nil, nil 477 } 478 if len(parts) > 1 { 479 options = parts[1:] 480 } 481 return parts[0], true, options, nil 482 } 483