// Copyright Josh Komoroske. All rights reserved. // Use of this source code is governed by the MIT license, // a copy of which can be found in the LICENSE.txt file. package junit import ( "bytes" "encoding/xml" "errors" "html" "io" ) // reparentXML will wrap the given reader (which is assumed to be valid XML), // in a fake root nodeAlias. // // This action is useful in the event that the original XML document does not // have a single root nodeAlias, which is required by the XML specification. // Additionally, Go's XML parser will silently drop all nodes after the first // that is encountered, which can lead to data loss from a parser perspective. // This function also enables the ingestion of blank XML files, which would // normally cause a parsing error. func reparentXML(reader io.Reader) io.Reader { return io.MultiReader( bytes.NewReader([]byte("")), reader, bytes.NewReader([]byte("")), ) } // extractContent parses the raw contents from an XML node, and returns it in a // more consumable form. // // This function deals with two distinct classes of node data; Encoded entities // and CDATA tags. These Encoded entities are normal (html escaped) text that // you typically find between tags like so: // • "Hello, world!" → "Hello, world!" // • "I </3 XML" → "I " → "Hello, world!" // • "" → "I </3 XML" // • "" → "I . You probably too." → "I ") mode int output []byte ) for { if mode == 0 { offset := bytes.Index(data, cdataStart) if offset == -1 { // The string "" appears in the data. This is an error! return nil, errors.New("unmatched CDATA end tag") } output = append(output, html.UnescapeString(string(data))...) break } // The string "" does not appear in the data. This is an error! return nil, errors.New("unmatched CDATA start tag") } // The string "]]>" appears at some offset. Read up to that offset. Discard "]]>" prefix. output = append(output, data[:offset]...) data = data[offset:] data = data[3:] mode = 0 } } return output, nil } // parse unmarshalls the given XML data into a graph of nodes, and then returns // a slice of all top-level nodes. func parse(reader io.Reader) ([]xmlNode, error) { var ( dec = xml.NewDecoder(reparentXML(reader)) root xmlNode ) if err := dec.Decode(&root); err != nil { return nil, err } return root.Nodes, nil }