1#!/usr/bin/env python3
2
3# Copyright 2015 The Kubernetes Authors.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import argparse
18import datetime
19import difflib
20import glob
21import os
22import re
23import sys
24
25parser = argparse.ArgumentParser()
26parser.add_argument(
27 "filenames", help="list of files to check, all files if unspecified", nargs="*"
28)
29
30rootdir = os.path.dirname(__file__) + "/../../"
31rootdir = os.path.abspath(rootdir)
32parser.add_argument("--rootdir", default=rootdir, help="root directory to examine")
33
34default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
35parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
36
37parser.add_argument(
38 "-v",
39 "--verbose",
40 help="give verbose output regarding why a file does not pass",
41 action="store_true",
42)
43
44args = parser.parse_args()
45
46verbose_out = sys.stderr if args.verbose else open("/dev/null", "w")
47
48
49def get_refs():
50 refs = {}
51
52 for path in glob.glob(os.path.join(args.boilerplate_dir, "boilerplate.*.txt")):
53 extension = os.path.basename(path).split(".")[1]
54
55 with open(path, "r") as ref_file:
56 refs[extension] = ref_file.read().splitlines()
57
58 return refs
59
60
61def is_generated_file(data, regexs):
62 return regexs["generated"].search(data)
63
64
65def file_passes(filename, refs, regexs):
66 try:
67 with open(filename) as stream:
68 data = stream.read()
69 except OSError as exc:
70 print(f"Unable to open {filename}: {exc}", file=verbose_out)
71 return False
72
73 # determine if the file is automatically generated
74 generated = is_generated_file(data, regexs)
75
76 basename = os.path.basename(filename)
77 extension = file_extension(filename)
78 if generated:
79 if extension == "go":
80 extension = "generatego"
81
82 if extension != "":
83 ref = refs[extension]
84 else:
85 ref = refs[basename]
86
87 # remove extra content from the top of files
88 if extension in ("go", "generatego"):
89 data, found = regexs["go_build_constraints"].subn("", data, 1)
90 elif extension in ["sh", "py"]:
91 data, found = regexs["shebang"].subn("", data, 1)
92
93 data = data.splitlines()
94
95 # if our test file is smaller than the reference it surely fails!
96 if len(ref) > len(data):
97 print(
98 f"File {filename} smaller than reference ({len(data)} < {len(ref)})",
99 file=verbose_out,
100 )
101 return False
102
103 # trim our file to the same number of lines as the reference file
104 data = data[: len(ref)]
105
106 pattern = regexs["year"]
107 for line in data:
108 if pattern.search(line):
109 if generated:
110 print(
111 f"File {filename} has the YEAR field, but it should not be in generated file",
112 file=verbose_out,
113 )
114 else:
115 print(
116 "File {filename} has the YEAR field, but missing the year of date",
117 file=verbose_out,
118 )
119 return False
120
121 if not generated:
122 # Replace all occurrences of the regex "2014|2015|2016|2017|2018" with "YEAR"
123 pattern = regexs["date"]
124 for i, line in enumerate(data):
125 data[i], found = pattern.subn("YEAR", line)
126 if found != 0:
127 break
128
129 # if we don't match the reference at this point, fail
130 if ref != data:
131 print(f"Header in {filename} does not match reference, diff:", file=verbose_out)
132 if args.verbose:
133 print(file=verbose_out)
134 for line in difflib.unified_diff(
135 ref, data, "reference", filename, lineterm=""
136 ):
137 print(line, file=verbose_out)
138 print(file=verbose_out)
139 return False
140
141 return True
142
143
144def file_extension(filename):
145 return os.path.splitext(filename)[1].split(".")[-1].lower()
146
147
148skipped_names = [
149 "third_party",
150 "_output",
151 ".git",
152 "cluster/env.sh",
153 "vendor",
154 "testdata",
155 "test/e2e/generated/bindata.go",
156 "hack/boilerplate/test",
157 "staging/src/k8s.io/kubectl/pkg/generated/bindata.go",
158]
159
160
161def normalize_files(files):
162 newfiles = []
163 for pathname in files:
164 if any(x in pathname for x in skipped_names):
165 continue
166 newfiles.append(pathname)
167 for i, pathname in enumerate(newfiles):
168 if not os.path.isabs(pathname):
169 newfiles[i] = os.path.join(args.rootdir, pathname)
170 return newfiles
171
172
173def get_files(extensions):
174 files = []
175 if len(args.filenames) > 0:
176 files = args.filenames
177 else:
178 for root, dirs, walkfiles in os.walk(args.rootdir):
179 # don't visit certain dirs. This is just a performance improvement
180 # as we would prune these later in normalize_files(). But doing it
181 # cuts down the amount of filesystem walking we do and cuts down
182 # the size of the file list
183 for dname in skipped_names:
184 if dname in dirs:
185 dirs.remove(dname)
186 for dname in dirs:
187 # dirs that start with __ are ignored
188 if dname.startswith("__"):
189 dirs.remove(dname)
190
191 for name in walkfiles:
192 pathname = os.path.join(root, name)
193 files.append(pathname)
194
195 files = normalize_files(files)
196 outfiles = []
197 for pathname in files:
198 basename = os.path.basename(pathname)
199 extension = file_extension(pathname)
200 if extension in extensions or basename in extensions:
201 outfiles.append(pathname)
202 return outfiles
203
204
205def get_dates():
206 years = datetime.datetime.now().year
207 return "(%s)" % "|".join(str(year) for year in range(2014, years + 1))
208
209
210def get_regexs():
211 regexs = {}
212 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
213 regexs["year"] = re.compile("YEAR")
214 # get_dates return 2014, 2015, 2016, 2017, or 2018 until the current year
215 # as a regex like: "(2014|2015|2016|2017|2018)";
216 # company holder names can be anything
217 regexs["date"] = re.compile(get_dates())
218 # strip the following build constraints/tags:
219 # //go:build
220 # // +build \n\n
221 regexs["go_build_constraints"] = re.compile(
222 r"^(//(go:build| \+build).*\n)+\n", re.MULTILINE
223 )
224 # strip #!.* from scripts
225 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
226 # Search for generated files
227 regexs["generated"] = re.compile(r"^[/*#]+ +.* DO NOT EDIT\.$", re.MULTILINE)
228 return regexs
229
230
231def main():
232 regexs = get_regexs()
233 refs = get_refs()
234 filenames = get_files(refs)
235
236 for filename in filenames:
237 if not file_passes(filename, refs, regexs):
238 print(filename)
239
240 return 0
241
242
243if __name__ == "__main__":
244 sys.exit(main())
View as plain text