...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package metadata
18
19 import (
20 "regexp"
21 "strconv"
22 "strings"
23
24 "github.com/apache/arrow/go/v15/parquet"
25 "github.com/apache/arrow/go/v15/parquet/schema"
26 )
27
28 var (
29
30
31
32 versionRx = regexp.MustCompile(`^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$`)
33
34
35
36 applicationRx = regexp.MustCompile(`^(.*?)\s*(?:(version\s*(?:([^(]*?)\s*(?:\(\s*build\s*([^)]*?)\s*\))?)?)?)$`)
37
38
39
40 Parquet816FixedVersion = NewAppVersionExplicit("parquet-mr", 1, 2, 9)
41 parquet251FixedVersion = NewAppVersionExplicit("parquet-mr", 1, 8, 0)
42 parquetCPPFixedStatsVersion = NewAppVersionExplicit("parquet-cpp", 1, 3, 0)
43 parquetMRFixedStatsVersion = NewAppVersionExplicit("parquet-mr", 1, 10, 0)
44
45
46 parquet1655FixedVersion = NewAppVersionExplicit("parquet-cpp-arrow", 4, 0, 0)
47 )
48
49
50
51 type AppVersion struct {
52 App string
53 Build string
54 Version struct {
55 Major int
56 Minor int
57 Patch int
58 Unknown string
59 PreRelease string
60 BuildInfo string
61 }
62 }
63
64
65
66 func NewAppVersionExplicit(app string, major, minor, patch int) *AppVersion {
67 v := &AppVersion{App: app}
68 v.Version.Major = major
69 v.Version.Minor = minor
70 v.Version.Patch = patch
71 return v
72 }
73
74
75
76
77
78 func NewAppVersion(createdby string) *AppVersion {
79 v := &AppVersion{}
80
81 var ver []string
82
83 m := applicationRx.FindStringSubmatch(strings.ToLower(createdby))
84 if len(m) >= 4 {
85 v.App = m[1]
86 v.Build = m[4]
87 ver = versionRx.FindStringSubmatch(m[3])
88 } else {
89 v.App = "unknown"
90 }
91
92 if len(ver) >= 7 {
93 v.Version.Major, _ = strconv.Atoi(ver[1])
94 v.Version.Minor, _ = strconv.Atoi(ver[2])
95 v.Version.Patch, _ = strconv.Atoi(ver[3])
96 v.Version.Unknown = ver[4]
97 v.Version.PreRelease = ver[5]
98 v.Version.BuildInfo = ver[6]
99 }
100 return v
101 }
102
103
104
105
106
107
108
109
110
111 func (v AppVersion) LessThan(other *AppVersion) bool {
112 switch {
113 case v.App != other.App:
114 return false
115 case v.Version.Major < other.Version.Major:
116 return true
117 case v.Version.Major > other.Version.Major:
118 return false
119 case v.Version.Minor < other.Version.Minor:
120 return true
121 case v.Version.Minor > other.Version.Minor:
122 return false
123 }
124
125 return v.Version.Patch < other.Version.Patch
126 }
127
128
129
130
131 func (v AppVersion) Equal(other *AppVersion) bool {
132 return v.App == other.App &&
133 v.Version.Major == other.Version.Major &&
134 v.Version.Minor == other.Version.Minor &&
135 v.Version.Patch == other.Version.Patch
136 }
137
138
139
140
141
142
143
144
145 func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schema.LogicalType, stats EncodedStatistics, sort schema.SortOrder) bool {
146
147 if (v.App == "parquet-cpp" && v.LessThan(parquetCPPFixedStatsVersion)) ||
148 (v.App == "parquet-mr" && v.LessThan(parquetMRFixedStatsVersion)) {
149
150 var maxEqualsMin bool
151 if stats.HasMin && stats.HasMax {
152 maxEqualsMin = string(stats.Min) == string(stats.Max)
153 }
154 if sort != schema.SortSIGNED && !maxEqualsMin {
155 return false
156 }
157
158 if coltype != parquet.Types.FixedLenByteArray && coltype != parquet.Types.ByteArray {
159 return true
160 }
161 }
162
163
164
165 if v.App == "parquet-cpp" || (v.App == "parquet-cpp-arrow" && v.LessThan(parquet1655FixedVersion)) {
166 if _, ok := logicalType.(*schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
167 return false
168 }
169 }
170
171
172
173 if v.App == "unknown" {
174 return true
175 }
176
177
178 if sort == schema.SortUNKNOWN {
179 return false
180 }
181
182
183 return !v.LessThan(parquet251FixedVersion)
184 }
185
View as plain text