1 // Copyright 2012-2018 Charles Banning. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file 4 5 // x2m_bulk.go: Process files with multiple XML messages. 6 7 package x2j 8 9 import ( 10 "bytes" 11 "io" 12 "os" 13 "regexp" 14 15 "github.com/clbanning/mxj" 16 ) 17 18 // XmlMsgsFromFile() 19 // 'fname' is name of file 20 // 'phandler' is the map processing handler. Return of 'false' stops further processing. 21 // 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error. 22 // Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized. 23 // This means that you can stop reading the file on error or after processing a particular message. 24 // To have reading and handling run concurrently, pass arguments to a go routine in handler and return true. 25 func XmlMsgsFromFile(fname string, phandler func(map[string]interface{})(bool), ehandler func(error)(bool), recast ...bool) error { 26 var r bool 27 if len(recast) == 1 { 28 r = recast[0] 29 } 30 fi, fierr := os.Stat(fname) 31 if fierr != nil { 32 return fierr 33 } 34 fh, fherr := os.Open(fname) 35 if fherr != nil { 36 return fherr 37 } 38 defer fh.Close() 39 buf := make([]byte,fi.Size()) 40 _, rerr := fh.Read(buf) 41 if rerr != nil { 42 return rerr 43 } 44 doc := string(buf) 45 46 // xml.Decoder doesn't properly handle whitespace in some doc 47 // see songTextString.xml test case ... 48 reg,_ := regexp.Compile("[ \t\n\r]*<") 49 doc = reg.ReplaceAllString(doc,"<") 50 b := bytes.NewBufferString(doc) 51 52 for { 53 m, merr := mxj.NewMapXmlReader(b,r) 54 if merr != nil && merr != io.EOF { 55 if ok := ehandler(merr); !ok { 56 // caused reader termination 57 return merr 58 } 59 } 60 if m != nil { 61 if ok := phandler(m); !ok { 62 break 63 } 64 } 65 if merr == io.EOF { 66 break 67 } 68 } 69 return nil 70 } 71 72 // XmlBufferToMap - process XML message from a bytes.Buffer 73 // 'b' is the buffer 74 // Optional argument 'recast' coerces map values to float64 or bool where possible. 75 func XmlBufferToMap(b *bytes.Buffer,recast ...bool) (map[string]interface{},error) { 76 var r bool 77 if len(recast) == 1 { 78 r = recast[0] 79 } 80 81 return mxj.NewMapXmlReader(b, r) 82 } 83 84 // ============================= io.Reader version for stream processing ====================== 85 86 // XmlMsgsFromReader() - io.Reader version of XmlMsgsFromFile 87 // 'rdr' is an io.Reader for an XML message (stream) 88 // 'phandler' is the map processing handler. Return of 'false' stops further processing. 89 // 'ehandler' is the parsing error handler. Return of 'false' stops further processing and returns error. 90 // Note: phandler() and ehandler() calls are blocking, so reading and processing of messages is serialized. 91 // This means that you can stop reading the file on error or after processing a particular message. 92 // To have reading and handling run concurrently, pass arguments to a go routine in handler and return true. 93 func XmlMsgsFromReader(rdr io.Reader, phandler func(map[string]interface{})(bool), ehandler func(error)(bool), recast ...bool) error { 94 var r bool 95 if len(recast) == 1 { 96 r = recast[0] 97 } 98 99 for { 100 m, merr := ToMap(rdr,r) 101 if merr != nil && merr != io.EOF { 102 if ok := ehandler(merr); !ok { 103 // caused reader termination 104 return merr 105 } 106 } 107 if m != nil { 108 if ok := phandler(m); !ok { 109 break 110 } 111 } 112 if merr == io.EOF { 113 break 114 } 115 } 116 return nil 117 } 118 119