1 // Copyright 2012-2018 Charles Banning. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file 4 5 // x2j_bulk.go: Process files with multiple XML messages. 6 // Extends x2m_bulk.go to work with JSON strings rather than map[string]interface{}. 7 8 package x2j 9 10 import ( 11 "bytes" 12 "io" 13 "os" 14 "regexp" 15 16 "github.com/clbanning/mxj" 17 ) 18 19 // XmlMsgsFromFileAsJson() 20 // 'fname' is name of file 21 // 'phandler' is the JSON string processing handler. Return of 'false' stops further processing. 22 // 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error. 23 // Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized. 24 // This means that you can stop reading the file on error or after processing a particular message. 25 // To have reading and handling run concurrently, pass arguments to a go routine in handler and return true. 26 func XmlMsgsFromFileAsJson(fname string, phandler func(string)(bool), ehandler func(error)(bool), recast ...bool) error { 27 var r bool 28 if len(recast) == 1 { 29 r = recast[0] 30 } 31 fi, fierr := os.Stat(fname) 32 if fierr != nil { 33 return fierr 34 } 35 fh, fherr := os.Open(fname) 36 if fherr != nil { 37 return fherr 38 } 39 defer fh.Close() 40 buf := make([]byte,fi.Size()) 41 _, rerr := fh.Read(buf) 42 if rerr != nil { 43 return rerr 44 } 45 doc := string(buf) 46 47 // xml.Decoder doesn't properly handle whitespace in some doc 48 // see songTextString.xml test case ... 49 reg,_ := regexp.Compile("[ \t\n\r]*<") 50 doc = reg.ReplaceAllString(doc,"<") 51 b := bytes.NewBufferString(doc) 52 53 for { 54 s, serr := XmlBufferToJson(b,r) 55 if serr != nil && serr != io.EOF { 56 if ok := ehandler(serr); !ok { 57 // caused reader termination 58 return serr 59 } 60 } 61 if s != "" { 62 if ok := phandler(s); !ok { 63 break 64 } 65 } 66 if serr == io.EOF { 67 break 68 } 69 } 70 return nil 71 } 72 73 // XmlBufferToJson - process XML message from a bytes.Buffer 74 // 'b' is the buffer 75 // Optional argument 'recast' coerces values to float64 or bool where possible. 76 func XmlBufferToJson(b *bytes.Buffer,recast ...bool) (string,error) { 77 var r bool 78 if len(recast) == 1 { 79 r = recast[0] 80 } 81 82 m, err := mxj.NewMapXmlReader(b, r) 83 // n,err := XmlBufferToTree(b) 84 if err != nil { 85 return "", err 86 } 87 88 // m := make(map[string]interface{}) 89 // m[n.key] = n.treeToMap(r) 90 91 j, jerr := m.Json() 92 return string(j), jerr 93 } 94 95 // ============================= io.Reader version for stream processing ====================== 96 97 // XmlMsgsFromReaderAsJson() - io.Reader version of XmlMsgsFromFileAsJson 98 // 'rdr' is an io.Reader for an XML message (stream) 99 // 'phandler' is the JSON string processing handler. Return of 'false' stops further processing. 100 // 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error. 101 // Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized. 102 // This means that you can stop reading the file on error or after processing a particular message. 103 // To have reading and handling run concurrently, pass arguments to a go routine in handler and return true. 104 func XmlMsgsFromReaderAsJson(rdr io.Reader, phandler func(string)(bool), ehandler func(error)(bool), recast ...bool) error { 105 var r bool 106 if len(recast) == 1 { 107 r = recast[0] 108 } 109 110 for { 111 s, serr := ToJson(rdr,r) 112 if serr != nil && serr != io.EOF { 113 if ok := ehandler(serr); !ok { 114 // caused reader termination 115 return serr 116 } 117 } 118 if s != "" { 119 if ok := phandler(s); !ok { 120 break 121 } 122 } 123 if serr == io.EOF { 124 break 125 } 126 } 127 return nil 128 } 129 130