     1  //	Unmarshal arbitrary XML docs to map[string]interface{} or JSON and extract values (using wildcards, if necessary).
     2  // Copyright 2012-2018 Charles Banning. All rights reserved.
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file
     6  /*
     7     Unmarshal dynamic / arbitrary XML docs and extract values (using wildcards, if necessary).
    10     NOTICE: 03mar18, package mostly replicates github.com/clbanning/x2j using github.com/clbanning/mxj
    11                      (Note: there is no concept of Node or Tree; only direct decoding to map[string]interface{}.)
    13     One useful function is:
    15         - Unmarshal(doc []byte, v interface{}) error  
    16           where v is a pointer to a variable of type 'map[string]interface{}', 'string', or
    17           any other type supported by xml.Unmarshal().
    19     To retrieve a value for specific tag use: 
    21         - DocValue(doc, path string, attrs ...string) (interface{},error) 
    22         - MapValue(m map[string]interface{}, path string, attr map[string]interface{}, recast ...bool) (interface{}, error)
    24     The 'path' argument is a period-separated tag hierarchy - also known as dot-notation.
    25     It is the program's responsibility to cast the returned value to the proper type; possible 
    26     types are the normal JSON unmarshaling types: string, float64, bool, []interface, map[string]interface{}.  
    28     To retrieve all values associated with a tag occurring anywhere in the XML document use:
    30         - ValuesForTag(doc, tag string) ([]interface{}, error)
    31         - ValuesForKey(m map[string]interface{}, key string) []interface{}
    33         Demos: http://play.golang.org/p/m8zP-cpk0O
    34                http://play.golang.org/p/cIteTS1iSg
    35                http://play.golang.org/p/vd8pMiI21b
    37     Returned values should be one of map[string]interface, []interface{}, or string.
    39     All the values assocated with a tag-path that may include one or more wildcard characters - 
    40     '*' - can also be retrieved using:
    42         - ValuesFromTagPath(doc, path string, getAttrs ...bool) ([]interface{}, error)
    43         - ValuesFromKeyPath(map[string]interface{}, path string, getAttrs ...bool) []interface{}
    45         Demos: http://play.golang.org/p/kUQnZ8VuhS
    46     	        http://play.golang.org/p/l1aMHYtz7G
    48     NOTE: care should be taken when using "*" at the end of a path - i.e., "books.book.*".  See
    49     the x2jpath_test.go case on how the wildcard returns all key values and collapses list values;
    50     the same message structure can load a []interface{} or a map[string]interface{} (or an interface{}) 
    51     value for a tag.
    53     See the test cases in "x2jpath_test.go" and programs in "example" subdirectory for more.
    57        - Attributes are parsed to map[string]interface{} values by prefixing a hyphen, '-',
    58          to the attribute label.
    59        - If the element is a simple element and has attributes, the element value
    60          is given the key '#text' for its map[string]interface{} representation.  (See
    61          the 'atomFeedString.xml' test data, below.)
    63      io.Reader HANDLING
    65      ToMap(), ToJson(), and ToJsonIndent() provide parsing of messages from an io.Reader.
    66      If you want to handle a message stream, look at XmlMsgsFromReader().
    70      Use the X2jCharsetReader variable to assign io.Reader for alternative character sets.
    72  */
    73  package x2j
    75  import (
    76  	"bytes"
    77  	"encoding/xml"
    78  	"errors"
    79  	"fmt"
    80  	"io"
    81  	"strconv"
    82  	"strings"
    84  	"github.com/clbanning/mxj"
    85  )
    87  // If X2jCharsetReader != nil, it will be used to decode the doc or stream if required
    88  //   import charset "code.google.com/p/go-charset/charset"
    89  //   ...
    90  //   x2j.X2jCharsetReader = charset.NewReader
    91  //   s, err := x2j.DocToJson(doc)
    92  var X2jCharsetReader func(charset string, input io.Reader)(io.Reader, error)
    94  // DocToJson - return an XML doc as a JSON string.
    95  //	If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
    96  func DocToJson(doc string, recast ...bool) (string, error) {
    97  	var r bool
    98  	if len(recast) == 1 {
    99  		r = recast[0]
   100  	}
   101  	m, merr := mxj.NewMapXml([]byte(doc), r)
   102  	if m == nil || merr != nil {
   103  		return "", merr
   104  	}
   106  	b, berr := m.Json()
   107  	if berr != nil {
   108  		return "", berr
   109  	}
   111  	// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
   112  	return string(b), nil
   113  }
   115  // DocToJsonIndent - return an XML doc as a prettified JSON string.
   116  //	If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
   117  //	Note: recasting is only applied to element values, not attribute values.
   118  func DocToJsonIndent(doc string, recast ...bool) (string, error) {
   119  	var r bool
   120  	if len(recast) == 1 {
   121  		r = recast[0]
   122  	}
   123  	m, merr := mxj.NewMapXml([]byte(doc), r)
   124  	if m == nil || merr != nil {
   125  		return "", merr
   126  	}
   128  	b, berr := m.JsonIndent("", "  ")
   129  	if berr != nil {
   130  		return "", berr
   131  	}
   133  	// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
   134  	return string(b), nil
   135  }
   137  // DocToMap - convert an XML doc into a map[string]interface{}.
   138  // (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
   139  //	If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
   140  //	Note: recasting is only applied to element values, not attribute values.
   141  func DocToMap(doc string, recast ...bool) (map[string]interface{}, error) {
   142  	var r bool
   143  	if len(recast) == 1 {
   144  		r = recast[0]
   145  	}
   146  	return mxj.NewMapXml([]byte(doc), r)
   147  }
   149  // WriteMap - dumps the map[string]interface{} for examination.
   150  //	'offset' is initial indentation count; typically: WriteMap(m).
   151  //	NOTE: with XML all element types are 'string'.
   152  //	But code written as generic for use with maps[string]interface{} values from json.Unmarshal().
   153  //	Or it can handle a DocToMap(doc,true) result where values have been recast'd.
   154  func WriteMap(m interface{}, offset ...int) string {
   155  	var indent int
   156  	if len(offset) == 1 {
   157  		indent = offset[0]
   158  	}
   160  	var s string
   161  	switch m.(type) {
   162  	case nil:
   163  		return "[nil] nil"
   164  	case string:
   165  		return "[string] " + m.(string)
   166  	case float64:
   167  		return "[float64] " + strconv.FormatFloat(m.(float64), 'e', 2, 64)
   168  	case bool:
   169  		return "[bool] " + strconv.FormatBool(m.(bool))
   170  	case []interface{}:
   171  		s += "[[]interface{}]"
   172  		for i, v := range m.([]interface{}) {
   173  			s += "\n"
   174  			for i := 0; i < indent; i++ {
   175  				s += "  "
   176  			}
   177  			s += "[item: " + strconv.FormatInt(int64(i), 10) + "]"
   178  			switch v.(type) {
   179  			case string, float64, bool:
   180  				s += "\n"
   181  			default:
   182  				// noop
   183  			}
   184  			for i := 0; i < indent; i++ {
   185  				s += "  "
   186  			}
   187  			s += WriteMap(v, indent+1)
   188  		}
   189  	case map[string]interface{}:
   190  		for k, v := range m.(map[string]interface{}) {
   191  			s += "\n"
   192  			for i := 0; i < indent; i++ {
   193  				s += "  "
   194  			}
   195  			// s += "[map[string]interface{}] "+k+" :"+WriteMap(v,indent+1)
   196  			s += k + " :" + WriteMap(v, indent+1)
   197  		}
   198  	default:
   199  		// shouldn't ever be here ...
   200  		s += fmt.Sprintf("unknown type for: %v", m)
   201  	}
   202  	return s
   203  }
   205  // ------------------------  value extraction from XML doc --------------------------
   207  // DocValue - return a value for a specific tag
   208  //	'doc' is a valid XML message.
   209  //	'path' is a hierarchy of XML tags, e.g., "doc.name".
   210  //	'attrs' is an OPTIONAL list of "name:value" pairs for attributes.
   211  //	Note: 'recast' is not enabled here. Use DocToMap(), NewAttributeMap(), and MapValue() calls for that.
   212  func DocValue(doc, path string, attrs ...string) (interface{}, error) {
   213  	m, err := mxj.NewMapXml([]byte(doc), false)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   218  	a, err := NewAttributeMap(attrs...)
   219  	if err != nil {
   220  		return nil, err
   221  	}
   222  	v, verr := MapValue(m, path, a)
   223  	if verr != nil {
   224  		return nil, verr
   225  	}
   226  	return v, nil
   227  }
   229  // MapValue - retrieves value based on walking the map, 'm'.
   230  //	'm' is the map value of interest.
   231  //	'path' is a period-separated hierarchy of keys in the map.
   232  //	'attr' is a map of attribute "name:value" pairs from NewAttributeMap().  May be 'nil'.
   233  //	If the path can't be traversed, an error is returned.
   234  //	Note: the optional argument 'r' can be used to coerce attribute values, 'attr', if done so for 'm'.
   235  func MapValue(m map[string]interface{}, path string, attr map[string]interface{}, r ...bool) (interface{}, error) {
   236  	// attribute values may have been recasted during map construction; default is 'false'.
   237  	if len(r) == 1 && r[0] == true {
   238  		for k, v := range attr {
   239  			attr[k] = recast(v.(string), true)
   240  		}
   241  	}
   243  	// parse the path
   244  	keys := strings.Split(path, ".")
   246  	// initialize return value to 'm' so a path of "" will work correctly
   247  	var v interface{} = m
   248  	var ok bool
   249  	var okey string
   250  	var isMap bool = true
   251  	if keys[0] == "" && len(attr) == 0 {
   252  		return v, nil
   253  	}
   254  	for _, key := range keys {
   255  		if !isMap {
   256  			return nil, errors.New("no keys beyond: " + okey)
   257  		}
   258  		if v, ok = m[key]; !ok {
   259  			return nil, errors.New("no key in map: " + key)
   260  		} else {
   261  			switch v.(type) {
   262  			case map[string]interface{}:
   263  				m = v.(map[string]interface{})
   264  				isMap = true
   265  			default:
   266  				isMap = false
   267  			}
   268  		}
   269  		// save 'key' for error reporting
   270  		okey = key
   271  	}
   273  	// match attributes; value is "#text" or nil
   274  	if attr == nil {
   275  		return v, nil
   276  	}
   277  	return hasAttributes(v, attr)
   278  }
   280  // recast - try to cast string values to bool or float64
   281  func recast(s string, r bool) interface{} {
   282  	if r {
   283  		// handle numeric strings ahead of boolean
   284  		if f, err := strconv.ParseFloat(s, 64); err == nil {
   285  			return interface{}(f)
   286  		}
   287  		// ParseBool treats "1"==true & "0"==false
   288  		if b, err := strconv.ParseBool(s); err == nil {
   289  			return interface{}(b)
   290  		}
   291  	}
   292  	return interface{}(s)
   293  }
   295  // hasAttributes() - interface{} equality works for string, float64, bool
   296  func hasAttributes(v interface{}, a map[string]interface{}) (interface{}, error) {
   297  	switch v.(type) {
   298  	case []interface{}:
   299  		// run through all entries looking one with matching attributes
   300  		for _, vv := range v.([]interface{}) {
   301  			if vvv, vvverr := hasAttributes(vv, a); vvverr == nil {
   302  				return vvv, nil
   303  			}
   304  		}
   305  		return nil, errors.New("no list member with matching attributes")
   306  	case map[string]interface{}:
   307  		// do all attribute name:value pairs match?
   308  		nv := v.(map[string]interface{})
   309  		for key, val := range a {
   310  			if vv, ok := nv[key]; !ok {
   311  				return nil, errors.New("no attribute with name: " + key[1:])
   312  			} else if val != vv {
   313  				return nil, errors.New("no attribute key:value pair: " + fmt.Sprintf("%s:%v", key[1:], val))
   314  			}
   315  		}
   316  		// they all match; so return value associated with "#text" key.
   317  		if vv, ok := nv["#text"]; ok {
   318  			return vv, nil
   319  		} else {
   320  			// this happens when another element is value of tag rather than just a string value
   321  			return nv, nil
   322  		}
   323  	}
   324  	return nil, errors.New("no match for attributes")
   325  }
   327  // NewAttributeMap() - generate map of attributes=value entries as map["-"+string]string.
   328  //	'kv' arguments are "name:value" pairs that appear as attributes, name="value".
   329  //	If len(kv) == 0, the return is (nil, nil).
   330  func NewAttributeMap(kv ...string) (map[string]interface{}, error) {
   331  	if len(kv) == 0 {
   332  		return nil, nil
   333  	}
   334  	m := make(map[string]interface{}, 0)
   335  	for _, v := range kv {
   336  		vv := strings.Split(v, ":")
   337  		if len(vv) != 2 {
   338  			return nil, errors.New("attribute not \"name:value\" pair: " + v)
   339  		}
   340  		// attributes are stored as keys prepended with hyphen
   341  		m["-"+vv[0]] = interface{}(vv[1])
   342  	}
   343  	return m, nil
   344  }
   346  //------------------------- get values for key ----------------------------
   348  // ValuesForTag - return all values in doc associated with 'tag'.
   349  //	Returns nil if the 'tag' does not occur in the doc.
   350  //	If there is an error encounted while parsing doc, that is returned.
   351  //	If you want values 'recast' use DocToMap() and ValuesForKey().
   352  func ValuesForTag(doc, tag string) ([]interface{}, error) {
   353  	m, err := mxj.NewMapXml([]byte(doc))
   354  	if err != nil {
   355  		return nil, err
   356  	}
   358  	return ValuesForKey(m, tag), nil
   359  }
   361  // ValuesForKey - return all values in map associated with 'key'
   362  //	Returns nil if the 'key' does not occur in the map
   363  func ValuesForKey(m map[string]interface{}, key string) []interface{} {
   364  	ret := make([]interface{}, 0)
   366  	hasKey(m, key, &ret)
   367  	if len(ret) > 0 {
   368  		return ret
   369  	}
   370  	return nil
   371  }
   373  // hasKey - if the map 'key' exists append it to array
   374  //          if it doesn't do nothing except scan array and map values
   375  func hasKey(iv interface{}, key string, ret *[]interface{}) {
   376  	switch iv.(type) {
   377  	case map[string]interface{}:
   378  		vv := iv.(map[string]interface{})
   379  		if v, ok := vv[key]; ok {
   380  			*ret = append(*ret, v)
   381  		}
   382  		for _, v := range iv.(map[string]interface{}) {
   383  			hasKey(v, key, ret)
   384  		}
   385  	case []interface{}:
   386  		for _, v := range iv.([]interface{}) {
   387  			hasKey(v, key, ret)
   388  		}
   389  	}
   390  }
   392  // ======== 2013.07.01 - x2j.Unmarshal, wraps xml.Unmarshal ==============
   394  // Unmarshal - wraps xml.Unmarshal with handling of map[string]interface{}
   395  // and string type variables.
   396  //	Usage: x2j.Unmarshal(doc,&m) where m of type map[string]interface{}
   397  //	       x2j.Unmarshal(doc,&s) where s of type string (Overrides xml.Unmarshal().)
   398  //	       x2j.Unmarshal(doc,&struct) - passed to xml.Unmarshal()
   399  //	       x2j.Unmarshal(doc,&slice) - passed to xml.Unmarshal()
   400  func Unmarshal(doc []byte, v interface{}) error {
   401  	switch v.(type) {
   402  	case *map[string]interface{}:
   403  		m, err := mxj.NewMapXml(doc)
   404  		vv := *v.(*map[string]interface{})
   405  		for k, v := range m {
   406  			vv[k] = v
   407  		}
   408  		return err
   409  	case *string:
   410  		s, err := ByteDocToJson(doc)
   411  		*(v.(*string)) = s
   412  		return err
   413  	default:
   414  		b := bytes.NewBuffer(doc)
   415  		p := xml.NewDecoder(b)
   416  		p.CharsetReader = X2jCharsetReader
   417  		return p.Decode(v)
   418  		// return xml.Unmarshal(doc, v)
   419  	}
   420  	return nil
   421  }
   423  // ByteDocToJson - return an XML doc as a JSON string.
   424  //	If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
   425  func ByteDocToJson(doc []byte, recast ...bool) (string, error) {
   426  	var r bool
   427  	if len(recast) == 1 {
   428  		r = recast[0]
   429  	}
   430  	m, merr := mxj.NewMapXml(doc, r)
   431  	if m == nil || merr != nil {
   432  		return "", merr
   433  	}
   435  	b, berr := m.Json()
   436  	if berr != nil {
   437  		return "", berr
   438  	}
   440  	// NOTE: don't have to worry about safe JSON marshaling with json.Marshal, since '<' and '>" are reservedin XML.
   441  	return string(b), nil
   442  }
   444  // ByteDocToMap - convert an XML doc into a map[string]interface{}.
   445  // (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
   446  //	If the optional argument 'recast' is 'true', then values will be converted to boolean or float64 if possible.
   447  //	Note: recasting is only applied to element values, not attribute values.
   448  func ByteDocToMap(doc []byte, recast ...bool) (map[string]interface{}, error) {
   449  	var r bool
   450  	if len(recast) == 1 {
   451  		r = recast[0]
   452  	}
   453  	return mxj.NewMapXml(doc, r)
   454  }

