...

Source file src/github.com/clbanning/mxj/v2/xmlseq.go

Documentation: github.com/clbanning/mxj/v2

     1  // Copyright 2012-2016, 2019 Charles Banning. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file
     4  
     5  // xmlseq.go - version of xml.go with sequence # injection on Decoding and sorting on Encoding.
     6  // Also, handles comments, directives and process instructions.
     7  
     8  package mxj
     9  
    10  import (
    11  	"bytes"
    12  	"encoding/xml"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  	"regexp"
    17  	"sort"
    18  	"strings"
    19  )
    20  
    21  // MapSeq is like Map but contains seqencing indices to allow recovering the original order of
    22  // the XML elements when the map[string]interface{} is marshaled. Element attributes are
    23  // stored as a map["#attr"]map[<attr_key>]map[string]interface{}{"#text":"<value>", "#seq":<attr_index>}
    24  // value instead of denoting the keys with a prefix character.  Also, comments, directives and
    25  // process instructions are preserved.
    26  type MapSeq map[string]interface{}
    27  
    28  // NoRoot is returned by NewXmlSeq, etc., when a comment, directive or procinstr element is parsed
    29  // in the XML data stream and the element is not contained in an XML object with a root element.
    30  var NoRoot = errors.New("no root key")
    31  var NO_ROOT = NoRoot // maintain backwards compatibility
    32  
    33  // ------------------- NewMapXmlSeq & NewMapXmlSeqReader ... -------------------------
    34  
    35  // NewMapXmlSeq converts a XML doc into a MapSeq value with elements id'd with decoding sequence key represented
    36  // as map["#seq"]<int value>.
    37  // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
    38  // NOTE: "#seq" key/value pairs are removed on encoding with msv.Xml() / msv.XmlIndent().
    39  //	• attributes are a map - map["#attr"]map["attr_key"]map[string]interface{}{"#text":<aval>, "#seq":<num>}
    40  //	• all simple elements are decoded as map["#text"]interface{} with a "#seq" k:v pair, as well.
    41  //	• lists always decode as map["list_tag"][]map[string]interface{} where the array elements are maps that
    42  //	  include a "#seq" k:v pair based on sequence they are decoded.  Thus, XML like:
    43  //	      <doc>
    44  //	         <ltag>value 1</ltag>
    45  //	         <newtag>value 2</newtag>
    46  //	         <ltag>value 3</ltag>
    47  //	      </doc>
    48  //	  is decoded as:
    49  //	    doc :
    50  //	      ltag :[[]interface{}]
    51  //	        [item: 0]
    52  //	          #seq :[int] 0
    53  //	          #text :[string] value 1
    54  //	        [item: 1]
    55  //	          #seq :[int] 2
    56  //	          #text :[string] value 3
    57  //	      newtag :
    58  //	        #seq :[int] 1
    59  //	        #text :[string] value 2
    60  //	  It will encode in proper sequence even though the MapSeq representation merges all "ltag" elements in an array.
    61  //	• comments - "<!--comment-->" -  are decoded as map["#comment"]map["#text"]"cmnt_text" with a "#seq" k:v pair.
    62  //	• directives - "<!text>" - are decoded as map["#directive"]map[#text"]"directive_text" with a "#seq" k:v pair.
    63  //	• process instructions  - "<?instr?>" - are decoded as map["#procinst"]interface{} where the #procinst value
    64  //	  is of map[string]interface{} type with the following keys: #target, #inst, and #seq.
    65  //	• comments, directives, and procinsts that are NOT part of a document with a root key will be returned as
    66  //	  map[string]interface{} and the error value 'NoRoot'.
    67  //	• note: "<![CDATA[" syntax is lost in xml.Decode parser - and is not handled here, either.
    68  //	   and: "\r\n" is converted to "\n"
    69  //
    70  //	NOTES:
    71  //	   1. The 'xmlVal' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
    72  //	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
    73  //	   2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
    74  //	      re-encode the message in its original structure.
    75  //	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
    76  //
    77  //	NAME SPACES:
    78  //	   1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
    79  //	      "<prefix>:" notation rather than stripping it as with NewMapXml().
    80  //	   2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
    81  //
    82  //	ERRORS:
    83  //	   1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
    84  //	      "#directive" or #procinst" key.
    85  //	   2. Unmarshaling an XML doc that is formatted using the whitespace character, " ", will error, since
    86  //	      Decoder.RawToken treats such occurances as significant. See NewMapFormattedXmlSeq().
    87  func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
    88  	var r bool
    89  	if len(cast) == 1 {
    90  		r = cast[0]
    91  	}
    92  	return xmlSeqToMap(xmlVal, r)
    93  }
    94  
    95  // NewMapFormattedXmlSeq performs the same as NewMapXmlSeq but is useful for processing XML objects that
    96  // are formatted using the whitespace character, " ".  (The stdlib xml.Decoder, by default, treats all
    97  // whitespace as significant; Decoder.Token() and Decoder.RawToken() will return strings of one or more
    98  // whitespace characters and without alphanumeric or punctuation characters as xml.CharData values.)
    99  //
   100  // If you're processing such XML, then this will convert all occurrences of whitespace-only strings
   101  // into an empty string, "", prior to parsing the XML - irrespective of whether the occurrence is
   102  // formatting or is a actual element value.
   103  func NewMapFormattedXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
   104  	var c bool
   105  	if len(cast) == 1 {
   106  		c = cast[0]
   107  	}
   108  
   109  	// Per PR #104 - clean out formatting characters so they don't show up in Decoder.RawToken() stream.
   110  	// NOTE: Also replaces element values that are solely comprised of formatting/whitespace characters
   111  	// with empty string, "".
   112  	r := regexp.MustCompile(`>[\n\t\r ]*<`)
   113  	xmlVal = r.ReplaceAll(xmlVal, []byte("><"))
   114  	return xmlSeqToMap(xmlVal, c)
   115  }
   116  
   117  // NewMpaXmlSeqReader returns next XML doc from an io.Reader as a MapSeq value.
   118  //	NOTES:
   119  //	   1. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
   120  //	      extraneous xml.CharData will be ignored unless io.EOF is reached first.
   121  //	   2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
   122  //	      re-encode the message in its original structure.
   123  //	   3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
   124  //
   125  //	ERRORS:
   126  //	   1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
   127  //	      "#directive" or #procinst" key.
   128  func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
   129  	var r bool
   130  	if len(cast) == 1 {
   131  		r = cast[0]
   132  	}
   133  
   134  	// We need to put an *os.File reader in a ByteReader or the xml.NewDecoder
   135  	// will wrap it in a bufio.Reader and seek on the file beyond where the
   136  	// xml.Decoder parses!
   137  	if _, ok := xmlReader.(io.ByteReader); !ok {
   138  		xmlReader = myByteReader(xmlReader) // see code at EOF
   139  	}
   140  
   141  	// build the map
   142  	return xmlSeqReaderToMap(xmlReader, r)
   143  }
   144  
   145  // NewMapXmlSeqReaderRaw returns the  next XML doc from  an io.Reader as a MapSeq value.
   146  // Returns MapSeq value, slice with the raw XML, and any error.
   147  //	NOTES:
   148  //	   1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
   149  //	      using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
   150  //	      See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
   151  //	      data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
   152  //	      you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
   153  //	    2. The 'raw' return value may be larger than the XML text value.
   154  //	    3. The 'xmlReader' will be parsed looking for an xml.StartElement, xml.Comment, etc., so BOM and other
   155  //	       extraneous xml.CharData will be ignored unless io.EOF is reached first.
   156  //	    4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
   157  //	       re-encode the message in its original structure.
   158  //	    5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
   159  //
   160  //	ERRORS:
   161  //	    1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
   162  //	       "#directive" or #procinst" key.
   163  func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
   164  	var r bool
   165  	if len(cast) == 1 {
   166  		r = cast[0]
   167  	}
   168  	// create TeeReader so we can retrieve raw XML
   169  	buf := make([]byte, 0)
   170  	wb := bytes.NewBuffer(buf)
   171  	trdr := myTeeReader(xmlReader, wb)
   172  
   173  	m, err := xmlSeqReaderToMap(trdr, r)
   174  
   175  	// retrieve the raw XML that was decoded
   176  	b := wb.Bytes()
   177  
   178  	// err may be NoRoot
   179  	return m, b, err
   180  }
   181  
   182  // xmlSeqReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
   183  func xmlSeqReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
   184  	// parse the Reader
   185  	p := xml.NewDecoder(rdr)
   186  	if CustomDecoder != nil {
   187  		useCustomDecoder(p)
   188  	} else {
   189  		p.CharsetReader = XmlCharsetReader
   190  	}
   191  	return xmlSeqToMapParser("", nil, p, r)
   192  }
   193  
   194  // xmlSeqToMap - convert a XML doc into map[string]interface{} value
   195  func xmlSeqToMap(doc []byte, r bool) (map[string]interface{}, error) {
   196  	b := bytes.NewReader(doc)
   197  	p := xml.NewDecoder(b)
   198  	if CustomDecoder != nil {
   199  		useCustomDecoder(p)
   200  	} else {
   201  		p.CharsetReader = XmlCharsetReader
   202  	}
   203  	return xmlSeqToMapParser("", nil, p, r)
   204  }
   205  
   206  // ===================================== where the work happens =============================
   207  
   208  // xmlSeqToMapParser - load a 'clean' XML doc into a map[string]interface{} directly.
   209  // Add #seq tag value for each element decoded - to be used for Encoding later.
   210  func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
   211  	if snakeCaseKeys {
   212  		skey = strings.Replace(skey, "-", "_", -1)
   213  	}
   214  
   215  	// NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey' in 'n'.
   216  	var n, na map[string]interface{}
   217  	var seq int // for including seq num when decoding
   218  
   219  	// Allocate maps and load attributes, if any.
   220  	// NOTE: on entry from NewMapXml(), etc., skey=="", and we fall through
   221  	//       to get StartElement then recurse with skey==xml.StartElement.Name.Local
   222  	//       where we begin allocating map[string]interface{} values 'n' and 'na'.
   223  	if skey != "" {
   224  		// 'n' only needs one slot - save call to runtime•hashGrow()
   225  		// 'na' we don't know
   226  		n = make(map[string]interface{}, 1)
   227  		na = make(map[string]interface{})
   228  		if len(a) > 0 {
   229  			// xml.Attr is decoded into: map["#attr"]map[<attr_label>]interface{}
   230  			// where interface{} is map[string]interface{}{"#text":<attr_val>, "#seq":<attr_seq>}
   231  			aa := make(map[string]interface{}, len(a))
   232  			for i, v := range a {
   233  				if snakeCaseKeys {
   234  					v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
   235  				}
   236  				if xmlEscapeCharsDecoder { // per issue#84
   237  					v.Value = escapeChars(v.Value)
   238  				}
   239  				if len(v.Name.Space) > 0 {
   240  					aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
   241  				} else {
   242  					aa[v.Name.Local] = map[string]interface{}{textK: cast(v.Value, r, ""), seqK: i}
   243  				}
   244  			}
   245  			na[attrK] = aa
   246  		}
   247  	}
   248  
   249  	// Return XMPP <stream:stream> message.
   250  	if handleXMPPStreamTag && skey == "stream:stream" {
   251  		n[skey] = na
   252  		return n, nil
   253  	}
   254  
   255  	for {
   256  		t, err := p.RawToken()
   257  		if err != nil {
   258  			if err != io.EOF {
   259  				return nil, errors.New("xml.Decoder.Token() - " + err.Error())
   260  			}
   261  			return nil, err
   262  		}
   263  		switch t.(type) {
   264  		case xml.StartElement:
   265  			tt := t.(xml.StartElement)
   266  
   267  			// First call to xmlSeqToMapParser() doesn't pass xml.StartElement - the map key.
   268  			// So when the loop is first entered, the first token is the root tag along
   269  			// with any attributes, which we process here.
   270  			//
   271  			// Subsequent calls to xmlSeqToMapParser() will pass in tag+attributes for
   272  			// processing before getting the next token which is the element value,
   273  			// which is done above.
   274  			if skey == "" {
   275  				if len(tt.Name.Space) > 0 {
   276  					return xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
   277  				} else {
   278  					return xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
   279  				}
   280  			}
   281  
   282  			// If not initializing the map, parse the element.
   283  			// len(nn) == 1, necessarily - it is just an 'n'.
   284  			var nn map[string]interface{}
   285  			if len(tt.Name.Space) > 0 {
   286  				nn, err = xmlSeqToMapParser(tt.Name.Space+`:`+tt.Name.Local, tt.Attr, p, r)
   287  			} else {
   288  				nn, err = xmlSeqToMapParser(tt.Name.Local, tt.Attr, p, r)
   289  			}
   290  			if err != nil {
   291  				return nil, err
   292  			}
   293  
   294  			// The nn map[string]interface{} value is a na[nn_key] value.
   295  			// We need to see if nn_key already exists - means we're parsing a list.
   296  			// This may require converting na[nn_key] value into []interface{} type.
   297  			// First, extract the key:val for the map - it's a singleton.
   298  			var key string
   299  			var val interface{}
   300  			for key, val = range nn {
   301  				break
   302  			}
   303  
   304  			// add "#seq" k:v pair -
   305  			// Sequence number included even in list elements - this should allow us
   306  			// to properly resequence even something goofy like:
   307  			//     <list>item 1</list>
   308  			//     <subelement>item 2</subelement>
   309  			//     <list>item 3</list>
   310  			// where all the "list" subelements are decoded into an array.
   311  			switch val.(type) {
   312  			case map[string]interface{}:
   313  				val.(map[string]interface{})[seqK] = seq
   314  				seq++
   315  			case interface{}: // a non-nil simple element: string, float64, bool
   316  				v := map[string]interface{}{textK: val, seqK: seq}
   317  				seq++
   318  				val = v
   319  			}
   320  
   321  			// 'na' holding sub-elements of n.
   322  			// See if 'key' already exists.
   323  			// If 'key' exists, then this is a list, if not just add key:val to na.
   324  			if v, ok := na[key]; ok {
   325  				var a []interface{}
   326  				switch v.(type) {
   327  				case []interface{}:
   328  					a = v.([]interface{})
   329  				default: // anything else - note: v.(type) != nil
   330  					a = []interface{}{v}
   331  				}
   332  				a = append(a, val)
   333  				na[key] = a
   334  			} else {
   335  				na[key] = val // save it as a singleton
   336  			}
   337  		case xml.EndElement:
   338  			if skey != "" {
   339  				tt := t.(xml.EndElement)
   340  				if snakeCaseKeys {
   341  					tt.Name.Local = strings.Replace(tt.Name.Local, "-", "_", -1)
   342  				}
   343  				var name string
   344  				if len(tt.Name.Space) > 0 {
   345  					name = tt.Name.Space + `:` + tt.Name.Local
   346  				} else {
   347  					name = tt.Name.Local
   348  				}
   349  				if skey != name {
   350  					return nil, fmt.Errorf("element %s not properly terminated, got %s at #%d",
   351  						skey, name, p.InputOffset())
   352  				}
   353  			}
   354  			// len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
   355  			if len(n) == 0 {
   356  				// If len(na)==0 we have an empty element == "";
   357  				// it has no xml.Attr nor xml.CharData.
   358  				// Empty element content will be  map["etag"]map["#text"]""
   359  				// after #seq injection - map["etag"]map["#seq"]seq - after return.
   360  				if len(na) > 0 {
   361  					n[skey] = na
   362  				} else {
   363  					n[skey] = "" // empty element
   364  				}
   365  			}
   366  			return n, nil
   367  		case xml.CharData:
   368  			// clean up possible noise
   369  			tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
   370  			if xmlEscapeCharsDecoder { // issue#84
   371  				tt = escapeChars(tt)
   372  			}
   373  			if skey == "" {
   374  				// per Adrian (http://www.adrianlungu.com/) catch stray text
   375  				// in decoder stream -
   376  				// https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
   377  				// NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
   378  				// a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
   379  				continue
   380  			}
   381  			if len(tt) > 0 {
   382  				// every simple element is a #text and has #seq associated with it
   383  				na[textK] = cast(tt, r, "")
   384  				na[seqK] = seq
   385  				seq++
   386  			}
   387  		case xml.Comment:
   388  			if n == nil { // no root 'key'
   389  				n = map[string]interface{}{commentK: string(t.(xml.Comment))}
   390  				return n, NoRoot
   391  			}
   392  			cm := make(map[string]interface{}, 2)
   393  			cm[textK] = string(t.(xml.Comment))
   394  			cm[seqK] = seq
   395  			seq++
   396  			na[commentK] = cm
   397  		case xml.Directive:
   398  			if n == nil { // no root 'key'
   399  				n = map[string]interface{}{directiveK: string(t.(xml.Directive))}
   400  				return n, NoRoot
   401  			}
   402  			dm := make(map[string]interface{}, 2)
   403  			dm[textK] = string(t.(xml.Directive))
   404  			dm[seqK] = seq
   405  			seq++
   406  			na[directiveK] = dm
   407  		case xml.ProcInst:
   408  			if n == nil {
   409  				na = map[string]interface{}{targetK: t.(xml.ProcInst).Target, instK: string(t.(xml.ProcInst).Inst)}
   410  				n = map[string]interface{}{procinstK: na}
   411  				return n, NoRoot
   412  			}
   413  			pm := make(map[string]interface{}, 3)
   414  			pm[targetK] = t.(xml.ProcInst).Target
   415  			pm[instK] = string(t.(xml.ProcInst).Inst)
   416  			pm[seqK] = seq
   417  			seq++
   418  			na[procinstK] = pm
   419  		default:
   420  			// noop - shouldn't ever get here, now, since we handle all token types
   421  		}
   422  	}
   423  }
   424  
   425  // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
   426  
   427  // --------------------- mv.XmlSeq & mv.XmlSeqWriter -------------------------
   428  
   429  // Xml encodes a MapSeq as XML with elements sorted on #seq.  The companion of NewMapXmlSeq().
   430  // The following rules apply.
   431  //    - The "#seq" key value is used to seqence the subelements or attributes only.
   432  //    - The "#attr" map key identifies the map of attribute map[string]interface{} values with "#text" key.
   433  //    - The "#comment" map key identifies a comment in the value "#text" map entry - <!--comment-->.
   434  //    - The "#directive" map key identifies a directive in the value "#text" map entry - <!directive>.
   435  //    - The "#procinst" map key identifies a process instruction in the value "#target" and "#inst"
   436  //      map entries - <?target inst?>.
   437  //    - Value type encoding:
   438  //          > string, bool, float64, int, int32, int64, float32: per "%v" formating
   439  //          > []bool, []uint8: by casting to string
   440  //          > structures, etc.: handed to xml.Marshal() - if there is an error, the element
   441  //            value is "UNKNOWN"
   442  //    - Elements with only attribute values or are null are terminated using "/>" unless XmlGoEmptyElemSystax() called.
   443  //    - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
   444  //      Thus, `{ "key":"value" }` encodes as "<key>value</key>".
   445  func (mv MapSeq) Xml(rootTag ...string) ([]byte, error) {
   446  	m := map[string]interface{}(mv)
   447  	var err error
   448  	s := new(string)
   449  	p := new(pretty) // just a stub
   450  
   451  	if len(m) == 1 && len(rootTag) == 0 {
   452  		for key, value := range m {
   453  			// if it's an array, see if all values are map[string]interface{}
   454  			// we force a new root tag if we'll end up with no key:value in the list
   455  			// so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
   456  			switch value.(type) {
   457  			case []interface{}:
   458  				for _, v := range value.([]interface{}) {
   459  					switch v.(type) {
   460  					case map[string]interface{}: // noop
   461  					default: // anything else
   462  						err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
   463  						goto done
   464  					}
   465  				}
   466  			}
   467  			err = mapToXmlSeqIndent(false, s, key, value, p)
   468  		}
   469  	} else if len(rootTag) == 1 {
   470  		err = mapToXmlSeqIndent(false, s, rootTag[0], m, p)
   471  	} else {
   472  		err = mapToXmlSeqIndent(false, s, DefaultRootTag, m, p)
   473  	}
   474  done:
   475  	if xmlCheckIsValid {
   476  		d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
   477  		for {
   478  			_, err = d.Token()
   479  			if err == io.EOF {
   480  				err = nil
   481  				break
   482  			} else if err != nil {
   483  				return nil, err
   484  			}
   485  		}
   486  	}
   487  	return []byte(*s), err
   488  }
   489  
   490  // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
   491  // The names will also provide a key for the number of return arguments.
   492  
   493  // XmlWriter Writes the MapSeq value as  XML on the Writer.
   494  // See MapSeq.Xml() for encoding rules.
   495  func (mv MapSeq) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
   496  	x, err := mv.Xml(rootTag...)
   497  	if err != nil {
   498  		return err
   499  	}
   500  
   501  	_, err = xmlWriter.Write(x)
   502  	return err
   503  }
   504  
   505  // XmlWriteRaw writes the MapSeq value as XML on the Writer. []byte is the raw XML that was written.
   506  // See Map.XmlSeq() for encoding rules.
   507  /*
   508  func (mv MapSeq) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
   509  	x, err := mv.Xml(rootTag...)
   510  	if err != nil {
   511  		return x, err
   512  	}
   513  
   514  	_, err = xmlWriter.Write(x)
   515  	return x, err
   516  }
   517  */
   518  
   519  // XmlIndentWriter writes the MapSeq value as pretty XML on the Writer.
   520  // See MapSeq.Xml() for encoding rules.
   521  func (mv MapSeq) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
   522  	x, err := mv.XmlIndent(prefix, indent, rootTag...)
   523  	if err != nil {
   524  		return err
   525  	}
   526  
   527  	_, err = xmlWriter.Write(x)
   528  	return err
   529  }
   530  
   531  // XmlIndentWriterRaw writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
   532  // See Map.XmlSeq() for encoding rules.
   533  /*
   534  func (mv MapSeq) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
   535  	x, err := mv.XmlSeqIndent(prefix, indent, rootTag...)
   536  	if err != nil {
   537  		return x, err
   538  	}
   539  
   540  	_, err = xmlWriter.Write(x)
   541  	return x, err
   542  }
   543  */
   544  
   545  // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
   546  
   547  // ---------------------- XmlSeqIndent ----------------------------
   548  
   549  // XmlIndent encodes a map[string]interface{} as a pretty XML string.
   550  // See MapSeq.XmlSeq() for encoding rules.
   551  func (mv MapSeq) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
   552  	m := map[string]interface{}(mv)
   553  
   554  	var err error
   555  	s := new(string)
   556  	p := new(pretty)
   557  	p.indent = indent
   558  	p.padding = prefix
   559  
   560  	if len(m) == 1 && len(rootTag) == 0 {
   561  		// this can extract the key for the single map element
   562  		// use it if it isn't a key for a list
   563  		for key, value := range m {
   564  			if _, ok := value.([]interface{}); ok {
   565  				err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
   566  			} else {
   567  				err = mapToXmlSeqIndent(true, s, key, value, p)
   568  			}
   569  		}
   570  	} else if len(rootTag) == 1 {
   571  		err = mapToXmlSeqIndent(true, s, rootTag[0], m, p)
   572  	} else {
   573  		err = mapToXmlSeqIndent(true, s, DefaultRootTag, m, p)
   574  	}
   575  	if xmlCheckIsValid {
   576  		if _, err = NewMapXml([]byte(*s)); err != nil {
   577  			return nil, err
   578  		}
   579  		d := xml.NewDecoder(bytes.NewReader([]byte(*s)))
   580  		for {
   581  			_, err = d.Token()
   582  			if err == io.EOF {
   583  				err = nil
   584  				break
   585  			} else if err != nil {
   586  				return nil, err
   587  			}
   588  		}
   589  	}
   590  	return []byte(*s), err
   591  }
   592  
   593  // where the work actually happens
   594  // returns an error if an attribute is not atomic
   595  func mapToXmlSeqIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
   596  	var endTag bool
   597  	var isSimple bool
   598  	var noEndTag bool
   599  	var elen int
   600  	var ss string
   601  	p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
   602  
   603  	switch value.(type) {
   604  	case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
   605  		if doIndent {
   606  			*s += p.padding
   607  		}
   608  		if key != commentK && key != directiveK && key != procinstK {
   609  			*s += `<` + key
   610  		}
   611  	}
   612  	switch value.(type) {
   613  	case map[string]interface{}:
   614  		val := value.(map[string]interface{})
   615  
   616  		if key == commentK {
   617  			*s += `<!--` + val[textK].(string) + `-->`
   618  			noEndTag = true
   619  			break
   620  		}
   621  
   622  		if key == directiveK {
   623  			*s += `<!` + val[textK].(string) + `>`
   624  			noEndTag = true
   625  			break
   626  		}
   627  
   628  		if key == procinstK {
   629  			*s += `<?` + val[targetK].(string) + ` ` + val[instK].(string) + `?>`
   630  			noEndTag = true
   631  			break
   632  		}
   633  
   634  		haveAttrs := false
   635  		// process attributes first
   636  		if v, ok := val[attrK].(map[string]interface{}); ok {
   637  			// First, unroll the map[string]interface{} into a []keyval array.
   638  			// Then sequence it.
   639  			kv := make([]keyval, len(v))
   640  			n := 0
   641  			for ak, av := range v {
   642  				kv[n] = keyval{ak, av}
   643  				n++
   644  			}
   645  			sort.Sort(elemListSeq(kv))
   646  			// Now encode the attributes in original decoding sequence, using keyval array.
   647  			for _, a := range kv {
   648  				vv := a.v.(map[string]interface{})
   649  				switch vv[textK].(type) {
   650  				case string:
   651  					if xmlEscapeChars {
   652  						ss = escapeChars(vv[textK].(string))
   653  					} else {
   654  						ss = vv[textK].(string)
   655  					}
   656  					*s += ` ` + a.k + `="` + ss + `"`
   657  				case float64, bool, int, int32, int64, float32:
   658  					*s += ` ` + a.k + `="` + fmt.Sprintf("%v", vv[textK]) + `"`
   659  				case []byte:
   660  					if xmlEscapeChars {
   661  						ss = escapeChars(string(vv[textK].([]byte)))
   662  					} else {
   663  						ss = string(vv[textK].([]byte))
   664  					}
   665  					*s += ` ` + a.k + `="` + ss + `"`
   666  				default:
   667  					return fmt.Errorf("invalid attribute value for: %s", a.k)
   668  				}
   669  			}
   670  			haveAttrs = true
   671  		}
   672  
   673  		// simple element?
   674  		// every map value has, at least, "#seq" and, perhaps, "#text" and/or "#attr"
   675  		_, seqOK := val[seqK] // have key
   676  		if v, ok := val[textK]; ok && ((len(val) == 3 && haveAttrs) || (len(val) == 2 && !haveAttrs)) && seqOK {
   677  			if stmp, ok := v.(string); ok && stmp != "" {
   678  				if xmlEscapeChars {
   679  					stmp = escapeChars(stmp)
   680  				}
   681  				*s += ">" + stmp
   682  				endTag = true
   683  				elen = 1
   684  			}
   685  			isSimple = true
   686  			break
   687  		} else if !ok && ((len(val) == 2 && haveAttrs) || (len(val) == 1 && !haveAttrs)) && seqOK {
   688  			// here no #text but have #seq or #seq+#attr
   689  			endTag = false
   690  			break
   691  		}
   692  
   693  		// we now need to sequence everything except attributes
   694  		// 'kv' will hold everything that needs to be written
   695  		kv := make([]keyval, 0)
   696  		for k, v := range val {
   697  			if k == attrK { // already processed
   698  				continue
   699  			}
   700  			if k == seqK { // ignore - just for sorting
   701  				continue
   702  			}
   703  			switch v.(type) {
   704  			case []interface{}:
   705  				// unwind the array as separate entries
   706  				for _, vv := range v.([]interface{}) {
   707  					kv = append(kv, keyval{k, vv})
   708  				}
   709  			default:
   710  				kv = append(kv, keyval{k, v})
   711  			}
   712  		}
   713  
   714  		// close tag with possible attributes
   715  		*s += ">"
   716  		if doIndent {
   717  			*s += "\n"
   718  		}
   719  		// something more complex
   720  		p.mapDepth++
   721  		sort.Sort(elemListSeq(kv))
   722  		i := 0
   723  		for _, v := range kv {
   724  			switch v.v.(type) {
   725  			case []interface{}:
   726  			default:
   727  				if i == 0 && doIndent {
   728  					p.Indent()
   729  				}
   730  			}
   731  			i++
   732  			if err := mapToXmlSeqIndent(doIndent, s, v.k, v.v, p); err != nil {
   733  				return err
   734  			}
   735  			switch v.v.(type) {
   736  			case []interface{}: // handled in []interface{} case
   737  			default:
   738  				if doIndent {
   739  					p.Outdent()
   740  				}
   741  			}
   742  			i--
   743  		}
   744  		p.mapDepth--
   745  		endTag = true
   746  		elen = 1 // we do have some content other than attrs
   747  	case []interface{}:
   748  		for _, v := range value.([]interface{}) {
   749  			if doIndent {
   750  				p.Indent()
   751  			}
   752  			if err := mapToXmlSeqIndent(doIndent, s, key, v, p); err != nil {
   753  				return err
   754  			}
   755  			if doIndent {
   756  				p.Outdent()
   757  			}
   758  		}
   759  		return nil
   760  	case nil:
   761  		// terminate the tag
   762  		if doIndent {
   763  			*s += p.padding
   764  		}
   765  		*s += "<" + key
   766  		endTag, isSimple = true, true
   767  		break
   768  	default: // handle anything - even goofy stuff
   769  		elen = 0
   770  		switch value.(type) {
   771  		case string:
   772  			if xmlEscapeChars {
   773  				ss = escapeChars(value.(string))
   774  			} else {
   775  				ss = value.(string)
   776  			}
   777  			elen = len(ss)
   778  			if elen > 0 {
   779  				*s += ">" + ss
   780  			}
   781  		case float64, bool, int, int32, int64, float32:
   782  			v := fmt.Sprintf("%v", value)
   783  			elen = len(v)
   784  			if elen > 0 {
   785  				*s += ">" + v
   786  			}
   787  		case []byte: // NOTE: byte is just an alias for uint8
   788  			// similar to how xml.Marshal handles []byte structure members
   789  			if xmlEscapeChars {
   790  				ss = escapeChars(string(value.([]byte)))
   791  			} else {
   792  				ss = string(value.([]byte))
   793  			}
   794  			elen = len(ss)
   795  			if elen > 0 {
   796  				*s += ">" + ss
   797  			}
   798  		default:
   799  			var v []byte
   800  			var err error
   801  			if doIndent {
   802  				v, err = xml.MarshalIndent(value, p.padding, p.indent)
   803  			} else {
   804  				v, err = xml.Marshal(value)
   805  			}
   806  			if err != nil {
   807  				*s += ">UNKNOWN"
   808  			} else {
   809  				elen = len(v)
   810  				if elen > 0 {
   811  					*s += string(v)
   812  				}
   813  			}
   814  		}
   815  		isSimple = true
   816  		endTag = true
   817  	}
   818  	if endTag && !noEndTag {
   819  		if doIndent {
   820  			if !isSimple {
   821  				*s += p.padding
   822  			}
   823  		}
   824  		switch value.(type) {
   825  		case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
   826  			if elen > 0 || useGoXmlEmptyElemSyntax {
   827  				if elen == 0 {
   828  					*s += ">"
   829  				}
   830  				*s += `</` + key + ">"
   831  			} else {
   832  				*s += `/>`
   833  			}
   834  		}
   835  	} else if !noEndTag {
   836  		if useGoXmlEmptyElemSyntax {
   837  			*s += `</` + key + ">"
   838  			// *s += "></" + key + ">"
   839  		} else {
   840  			*s += "/>"
   841  		}
   842  	}
   843  	if doIndent {
   844  		if p.cnt > p.start {
   845  			*s += "\n"
   846  		}
   847  		p.Outdent()
   848  	}
   849  
   850  	return nil
   851  }
   852  
   853  // the element sort implementation
   854  
   855  type keyval struct {
   856  	k string
   857  	v interface{}
   858  }
   859  type elemListSeq []keyval
   860  
   861  func (e elemListSeq) Len() int {
   862  	return len(e)
   863  }
   864  
   865  func (e elemListSeq) Swap(i, j int) {
   866  	e[i], e[j] = e[j], e[i]
   867  }
   868  
   869  func (e elemListSeq) Less(i, j int) bool {
   870  	var iseq, jseq int
   871  	var fiseq, fjseq float64
   872  	var ok bool
   873  	if iseq, ok = e[i].v.(map[string]interface{})[seqK].(int); !ok {
   874  		if fiseq, ok = e[i].v.(map[string]interface{})[seqK].(float64); ok {
   875  			iseq = int(fiseq)
   876  		} else {
   877  			iseq = 9999999
   878  		}
   879  	}
   880  
   881  	if jseq, ok = e[j].v.(map[string]interface{})[seqK].(int); !ok {
   882  		if fjseq, ok = e[j].v.(map[string]interface{})[seqK].(float64); ok {
   883  			jseq = int(fjseq)
   884  		} else {
   885  			jseq = 9999999
   886  		}
   887  	}
   888  
   889  	return iseq <= jseq
   890  }
   891  
   892  // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio
   893  
   894  // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
   895  // It preserves comments, directives and process instructions,
   896  func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
   897  	x, err := NewMapXmlSeq(b)
   898  	if err != nil {
   899  		return nil, err
   900  	}
   901  	return x.XmlIndent(prefix, indent)
   902  }
   903  

View as plain text