1 // Unmarshal arbitrary XML docs to map[string]interface{} or JSON and extract values (using wildcards, if necessary). 2 // Copyright 2012-2018 Charles Banning. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file 5 6 /* 7 Unmarshal dynamic / arbitrary XML docs and extract values (using wildcards, if necessary). 8 THIS IS ONLY PROVIDED TO FACILIATE MIGRATING TO "mxj" PACKAGE FROM "x2j" PACKAGE. 9 10 NOTICE: 03mar18, package mostly replicates github.com/clbanning/x2j using github.com/clbanning/mxj 11 (Note: there is no concept of Node or Tree; only direct decoding to map[string]interface{}.) 12 13 One useful function is: 14 15 - Unmarshal(doc []byte, v interface{}) error 16 where v is a pointer to a variable of type 'map[string]interface{}', 'string', or 17 any other type supported by xml.Unmarshal(). 18 19 To retrieve a value for specific tag use: 20 21 - DocValue(doc, path string, attrs ...string) (interface{},error) 22 - MapValue(m map[string]interface{}, path string, attr map[string]interface{}, recast ...bool) (interface{}, error) 23 24 The 'path' argument is a period-separated tag hierarchy - also known as dot-notation. 25 It is the program's responsibility to cast the returned value to the proper type; possible 26 types are the normal JSON unmarshaling types: string, float64, bool, []interface, map[string]interface{}. 27 28 To retrieve all values associated with a tag occurring anywhere in the XML document use: 29 30 - ValuesForTag(doc, tag string) ([]interface{}, error) 31 - ValuesForKey(m map[string]interface{}, key string) []interface{} 32 33 Demos: http://play.golang.org/p/m8zP-cpk0O 34 http://play.golang.org/p/cIteTS1iSg 35 http://play.golang.org/p/vd8pMiI21b 36 37 Returned values should be one of map[string]interface, []interface{}, or string. 38 39 All the values assocated with a tag-path that may include one or more wildcard characters - 40 '*' - can also be retrieved using: 41 42 - ValuesFromTagPath(doc, path string, getAttrs ...bool) ([]interface{}, error) 43 - ValuesFromKeyPath(map[string]interface{}, path string, getAttrs ...bool) []interface{} 44 45 Demos: http://play.golang.org/p/kUQnZ8VuhS 46 http://play.golang.org/p/l1aMHYtz7G 47 48 NOTE: care should be taken when using "*" at the end of a path - i.e., "books.book.*". See 49 the x2jpath_test.go case on how the wildcard returns all key values and collapses list values; 50 the same message structure can load a []interface{} or a map[string]interface{} (or an interface{}) 51 value for a tag. 52 53 See the test cases in "x2jpath_test.go" and programs in "example" subdirectory for more. 54 55 XML PARSING CONVENTIONS 56 57 - Attributes are parsed to map[string]interface{} values by prefixing a hyphen, '-', 58 to the attribute label. 59 - If the element is a simple element and has attributes, the element value 60 is given the key '#text' for its map[string]interface{} representation. (See 61 the 'atomFeedString.xml' test data, below.) 62 63 io.Reader HANDLING 64 65 ToMap(), ToJson(), and ToJsonIndent() provide parsing of messages from an io.Reader. 66 If you want to handle a message stream, look at XmlMsgsFromReader(). 67 68 NON-UTF8 CHARACTER SETS 69 70 Use the X2jCharsetReader variable to assign io.Reader for alternative character sets. 71 72 */ 73 package x2j 74 75 import ( 76 "bytes" 77 "encoding/xml" 78 "errors" 79 "fmt" 80 "io" 81 "strconv" 82 "strings" 83 84 "github.com/clbanning/mxj" 85 ) 86 87 // If X2jCharsetReader != nil, it will be used to decode the doc or stream if required 88 // import charset "code.google.com/p/go-charset/charset" 89 // ... 90 // x2j.X2jCharsetReader = charset.NewReader 91 // s, err := x2j.DocToJson(doc) 92 var X2jCharsetReader func(charset string, input io.Reader)(io.Reader, error) 93 94 // DocToJson - return an XML doc as a JSON string. 95 // If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible. 96 func DocToJson(doc string, recast ...bool) (string, error) { 97 var r bool 98 if len(recast) == 1 { 99 r = recast[0] 100 } 101 m, merr := mxj.NewMapXml([]byte(doc), r) 102 if m == nil || merr != nil { 103 return "", merr 104 } 105 106 b, berr := m.Json() 107 if berr != nil { 108 return "", berr 109 } 110 111 // NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML. 112 return string(b), nil 113 } 114 115 // DocToJsonIndent - return an XML doc as a prettified JSON string. 116 // If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible. 117 // Note: recasting is only applied to element values, not attribute values. 118 func DocToJsonIndent(doc string, recast ...bool) (string, error) { 119 var r bool 120 if len(recast) == 1 { 121 r = recast[0] 122 } 123 m, merr := mxj.NewMapXml([]byte(doc), r) 124 if m == nil || merr != nil { 125 return "", merr 126 } 127 128 b, berr := m.JsonIndent("", " ") 129 if berr != nil { 130 return "", berr 131 } 132 133 // NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML. 134 return string(b), nil 135 } 136 137 // DocToMap - convert an XML doc into a map[string]interface{}. 138 // (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().) 139 // If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible. 140 // Note: recasting is only applied to element values, not attribute values. 141 func DocToMap(doc string, recast ...bool) (map[string]interface{}, error) { 142 var r bool 143 if len(recast) == 1 { 144 r = recast[0] 145 } 146 return mxj.NewMapXml([]byte(doc), r) 147 } 148 149 // WriteMap - dumps the map[string]interface{} for examination. 150 // 'offset' is initial indentation count; typically: WriteMap(m). 151 // NOTE: with XML all element types are 'string'. 152 // But code written as generic for use with maps[string]interface{} values from json.Unmarshal(). 153 // Or it can handle a DocToMap(doc,true) result where values have been recast'd. 154 func WriteMap(m interface{}, offset ...int) string { 155 var indent int 156 if len(offset) == 1 { 157 indent = offset[0] 158 } 159 160 var s string 161 switch m.(type) { 162 case nil: 163 return "[nil] nil" 164 case string: 165 return "[string] " + m.(string) 166 case float64: 167 return "[float64] " + strconv.FormatFloat(m.(float64), 'e', 2, 64) 168 case bool: 169 return "[bool] " + strconv.FormatBool(m.(bool)) 170 case []interface{}: 171 s += "[[]interface{}]" 172 for i, v := range m.([]interface{}) { 173 s += "\n" 174 for i := 0; i < indent; i++ { 175 s += " " 176 } 177 s += "[item: " + strconv.FormatInt(int64(i), 10) + "]" 178 switch v.(type) { 179 case string, float64, bool: 180 s += "\n" 181 default: 182 // noop 183 } 184 for i := 0; i < indent; i++ { 185 s += " " 186 } 187 s += WriteMap(v, indent+1) 188 } 189 case map[string]interface{}: 190 for k, v := range m.(map[string]interface{}) { 191 s += "\n" 192 for i := 0; i < indent; i++ { 193 s += " " 194 } 195 // s += "[map[string]interface{}] "+k+" :"+WriteMap(v,indent+1) 196 s += k + " :" + WriteMap(v, indent+1) 197 } 198 default: 199 // shouldn't ever be here ... 200 s += fmt.Sprintf("unknown type for: %v", m) 201 } 202 return s 203 } 204 205 // ------------------------ value extraction from XML doc -------------------------- 206 207 // DocValue - return a value for a specific tag 208 // 'doc' is a valid XML message. 209 // 'path' is a hierarchy of XML tags, e.g., "doc.name". 210 // 'attrs' is an OPTIONAL list of "name:value" pairs for attributes. 211 // Note: 'recast' is not enabled here. Use DocToMap(), NewAttributeMap(), and MapValue() calls for that. 212 func DocValue(doc, path string, attrs ...string) (interface{}, error) { 213 m, err := mxj.NewMapXml([]byte(doc), false) 214 if err != nil { 215 return nil, err 216 } 217 218 a, err := NewAttributeMap(attrs...) 219 if err != nil { 220 return nil, err 221 } 222 v, verr := MapValue(m, path, a) 223 if verr != nil { 224 return nil, verr 225 } 226 return v, nil 227 } 228 229 // MapValue - retrieves value based on walking the map, 'm'. 230 // 'm' is the map value of interest. 231 // 'path' is a period-separated hierarchy of keys in the map. 232 // 'attr' is a map of attribute "name:value" pairs from NewAttributeMap(). May be 'nil'. 233 // If the path can't be traversed, an error is returned. 234 // Note: the optional argument 'r' can be used to coerce attribute values, 'attr', if done so for 'm'. 235 func MapValue(m map[string]interface{}, path string, attr map[string]interface{}, r ...bool) (interface{}, error) { 236 // attribute values may have been recasted during map construction; default is 'false'. 237 if len(r) == 1 && r[0] == true { 238 for k, v := range attr { 239 attr[k] = recast(v.(string), true) 240 } 241 } 242 243 // parse the path 244 keys := strings.Split(path, ".") 245 246 // initialize return value to 'm' so a path of "" will work correctly 247 var v interface{} = m 248 var ok bool 249 var okey string 250 var isMap bool = true 251 if keys[0] == "" && len(attr) == 0 { 252 return v, nil 253 } 254 for _, key := range keys { 255 if !isMap { 256 return nil, errors.New("no keys beyond: " + okey) 257 } 258 if v, ok = m[key]; !ok { 259 return nil, errors.New("no key in map: " + key) 260 } else { 261 switch v.(type) { 262 case map[string]interface{}: 263 m = v.(map[string]interface{}) 264 isMap = true 265 default: 266 isMap = false 267 } 268 } 269 // save 'key' for error reporting 270 okey = key 271 } 272 273 // match attributes; value is "#text" or nil 274 if attr == nil { 275 return v, nil 276 } 277 return hasAttributes(v, attr) 278 } 279 280 // recast - try to cast string values to bool or float64 281 func recast(s string, r bool) interface{} { 282 if r { 283 // handle numeric strings ahead of boolean 284 if f, err := strconv.ParseFloat(s, 64); err == nil { 285 return interface{}(f) 286 } 287 // ParseBool treats "1"==true & "0"==false 288 if b, err := strconv.ParseBool(s); err == nil { 289 return interface{}(b) 290 } 291 } 292 return interface{}(s) 293 } 294 295 // hasAttributes() - interface{} equality works for string, float64, bool 296 func hasAttributes(v interface{}, a map[string]interface{}) (interface{}, error) { 297 switch v.(type) { 298 case []interface{}: 299 // run through all entries looking one with matching attributes 300 for _, vv := range v.([]interface{}) { 301 if vvv, vvverr := hasAttributes(vv, a); vvverr == nil { 302 return vvv, nil 303 } 304 } 305 return nil, errors.New("no list member with matching attributes") 306 case map[string]interface{}: 307 // do all attribute name:value pairs match? 308 nv := v.(map[string]interface{}) 309 for key, val := range a { 310 if vv, ok := nv[key]; !ok { 311 return nil, errors.New("no attribute with name: " + key[1:]) 312 } else if val != vv { 313 return nil, errors.New("no attribute key:value pair: " + fmt.Sprintf("%s:%v", key[1:], val)) 314 } 315 } 316 // they all match; so return value associated with "#text" key. 317 if vv, ok := nv["#text"]; ok { 318 return vv, nil 319 } else { 320 // this happens when another element is value of tag rather than just a string value 321 return nv, nil 322 } 323 } 324 return nil, errors.New("no match for attributes") 325 } 326 327 // NewAttributeMap() - generate map of attributes=value entries as map["-"+string]string. 328 // 'kv' arguments are "name:value" pairs that appear as attributes, name="value". 329 // If len(kv) == 0, the return is (nil, nil). 330 func NewAttributeMap(kv ...string) (map[string]interface{}, error) { 331 if len(kv) == 0 { 332 return nil, nil 333 } 334 m := make(map[string]interface{}, 0) 335 for _, v := range kv { 336 vv := strings.Split(v, ":") 337 if len(vv) != 2 { 338 return nil, errors.New("attribute not \"name:value\" pair: " + v) 339 } 340 // attributes are stored as keys prepended with hyphen 341 m["-"+vv[0]] = interface{}(vv[1]) 342 } 343 return m, nil 344 } 345 346 //------------------------- get values for key ---------------------------- 347 348 // ValuesForTag - return all values in doc associated with 'tag'. 349 // Returns nil if the 'tag' does not occur in the doc. 350 // If there is an error encounted while parsing doc, that is returned. 351 // If you want values 'recast' use DocToMap() and ValuesForKey(). 352 func ValuesForTag(doc, tag string) ([]interface{}, error) { 353 m, err := mxj.NewMapXml([]byte(doc)) 354 if err != nil { 355 return nil, err 356 } 357 358 return ValuesForKey(m, tag), nil 359 } 360 361 // ValuesForKey - return all values in map associated with 'key' 362 // Returns nil if the 'key' does not occur in the map 363 func ValuesForKey(m map[string]interface{}, key string) []interface{} { 364 ret := make([]interface{}, 0) 365 366 hasKey(m, key, &ret) 367 if len(ret) > 0 { 368 return ret 369 } 370 return nil 371 } 372 373 // hasKey - if the map 'key' exists append it to array 374 // if it doesn't do nothing except scan array and map values 375 func hasKey(iv interface{}, key string, ret *[]interface{}) { 376 switch iv.(type) { 377 case map[string]interface{}: 378 vv := iv.(map[string]interface{}) 379 if v, ok := vv[key]; ok { 380 *ret = append(*ret, v) 381 } 382 for _, v := range iv.(map[string]interface{}) { 383 hasKey(v, key, ret) 384 } 385 case []interface{}: 386 for _, v := range iv.([]interface{}) { 387 hasKey(v, key, ret) 388 } 389 } 390 } 391 392 // ======== 2013.07.01 - x2j.Unmarshal, wraps xml.Unmarshal ============== 393 394 // Unmarshal - wraps xml.Unmarshal with handling of map[string]interface{} 395 // and string type variables. 396 // Usage: x2j.Unmarshal(doc,&m) where m of type map[string]interface{} 397 // x2j.Unmarshal(doc,&s) where s of type string (Overrides xml.Unmarshal().) 398 // x2j.Unmarshal(doc,&struct) - passed to xml.Unmarshal() 399 // x2j.Unmarshal(doc,&slice) - passed to xml.Unmarshal() 400 func Unmarshal(doc []byte, v interface{}) error { 401 switch v.(type) { 402 case *map[string]interface{}: 403 m, err := mxj.NewMapXml(doc) 404 vv := *v.(*map[string]interface{}) 405 for k, v := range m { 406 vv[k] = v 407 } 408 return err 409 case *string: 410 s, err := ByteDocToJson(doc) 411 *(v.(*string)) = s 412 return err 413 default: 414 b := bytes.NewBuffer(doc) 415 p := xml.NewDecoder(b) 416 p.CharsetReader = X2jCharsetReader 417 return p.Decode(v) 418 // return xml.Unmarshal(doc, v) 419 } 420 return nil 421 } 422 423 // ByteDocToJson - return an XML doc as a JSON string. 424 // If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible. 425 func ByteDocToJson(doc []byte, recast ...bool) (string, error) { 426 var r bool 427 if len(recast) == 1 { 428 r = recast[0] 429 } 430 m, merr := mxj.NewMapXml(doc, r) 431 if m == nil || merr != nil { 432 return "", merr 433 } 434 435 b, berr := m.Json() 436 if berr != nil { 437 return "", berr 438 } 439 440 // NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML. 441 return string(b), nil 442 } 443 444 // ByteDocToMap - convert an XML doc into a map[string]interface{}. 445 // (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().) 446 // If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible. 447 // Note: recasting is only applied to element values, not attribute values. 448 func ByteDocToMap(doc []byte, recast ...bool) (map[string]interface{}, error) { 449 var r bool 450 if len(recast) == 1 { 451 r = recast[0] 452 } 453 return mxj.NewMapXml(doc, r) 454 } 455 456