...

Source file src/github.com/gabriel-vasile/mimetype/internal/magic/text_csv.go

Documentation: github.com/gabriel-vasile/mimetype/internal/magic

     1  package magic
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/csv"
     6  	"errors"
     7  	"io"
     8  )
     9  
    10  // Csv matches a comma-separated values file.
    11  func Csv(raw []byte, limit uint32) bool {
    12  	return sv(raw, ',', limit)
    13  }
    14  
    15  // Tsv matches a tab-separated values file.
    16  func Tsv(raw []byte, limit uint32) bool {
    17  	return sv(raw, '\t', limit)
    18  }
    19  
    20  func sv(in []byte, comma rune, limit uint32) bool {
    21  	r := csv.NewReader(dropLastLine(in, limit))
    22  	r.Comma = comma
    23  	r.ReuseRecord = true
    24  	r.LazyQuotes = true
    25  	r.Comment = '#'
    26  
    27  	lines := 0
    28  	for {
    29  		_, err := r.Read()
    30  		if errors.Is(err, io.EOF) {
    31  			break
    32  		}
    33  		if err != nil {
    34  			return false
    35  		}
    36  		lines++
    37  	}
    38  
    39  	return r.FieldsPerRecord > 1 && lines > 1
    40  }
    41  
    42  // dropLastLine drops the last incomplete line from b.
    43  //
    44  // mimetype limits itself to ReadLimit bytes when performing a detection.
    45  // This means, for file formats like CSV for NDJSON, the last line of the input
    46  // can be an incomplete line.
    47  func dropLastLine(b []byte, cutAt uint32) io.Reader {
    48  	if cutAt == 0 {
    49  		return bytes.NewReader(b)
    50  	}
    51  	if uint32(len(b)) >= cutAt {
    52  		for i := cutAt - 1; i > 0; i-- {
    53  			if b[i] == '\n' {
    54  				return bytes.NewReader(b[:i])
    55  			}
    56  		}
    57  
    58  		// No newline was found between the 0 index and cutAt.
    59  		return bytes.NewReader(b[:cutAt])
    60  	}
    61  
    62  	return bytes.NewReader(b)
    63  }
    64  

View as plain text