boilerplate.py

Documentation: k8s.io/kubernetes/hack/boilerplate

     1#!/usr/bin/env python3
     2
     3# Copyright 2015 The Kubernetes Authors.
     4#
     5# Licensed under the Apache License, Version 2.0 (the "License");
     6# you may not use this file except in compliance with the License.
     7# You may obtain a copy of the License at
     8#
     9#     http://www.apache.org/licenses/LICENSE-2.0
    10#
    11# Unless required by applicable law or agreed to in writing, software
    12# distributed under the License is distributed on an "AS IS" BASIS,
    13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14# See the License for the specific language governing permissions and
    15# limitations under the License.
    16
    17import argparse
    18import datetime
    19import difflib
    20import glob
    21import os
    22import re
    23import sys
    24
    25parser = argparse.ArgumentParser()
    26parser.add_argument(
    27    "filenames", help="list of files to check, all files if unspecified", nargs="*"
    28)
    29
    30rootdir = os.path.dirname(__file__) + "/../../"
    31rootdir = os.path.abspath(rootdir)
    32parser.add_argument("--rootdir", default=rootdir, help="root directory to examine")
    33
    34default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    35parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    36
    37parser.add_argument(
    38    "-v",
    39    "--verbose",
    40    help="give verbose output regarding why a file does not pass",
    41    action="store_true",
    42)
    43
    44args = parser.parse_args()
    45
    46verbose_out = sys.stderr if args.verbose else open("/dev/null", "w")
    47
    48
    49def get_refs():
    50    refs = {}
    51
    52    for path in glob.glob(os.path.join(args.boilerplate_dir, "boilerplate.*.txt")):
    53        extension = os.path.basename(path).split(".")[1]
    54
    55        with open(path, "r") as ref_file:
    56            refs[extension] = ref_file.read().splitlines()
    57
    58    return refs
    59
    60
    61def is_generated_file(data, regexs):
    62    return regexs["generated"].search(data)
    63
    64
    65def file_passes(filename, refs, regexs):
    66    try:
    67        with open(filename) as stream:
    68            data = stream.read()
    69    except OSError as exc:
    70        print(f"Unable to open {filename}: {exc}", file=verbose_out)
    71        return False
    72
    73    # determine if the file is automatically generated
    74    generated = is_generated_file(data, regexs)
    75
    76    basename = os.path.basename(filename)
    77    extension = file_extension(filename)
    78    if generated:
    79        if extension == "go":
    80            extension = "generatego"
    81
    82    if extension != "":
    83        ref = refs[extension]
    84    else:
    85        ref = refs[basename]
    86
    87    # remove extra content from the top of files
    88    if extension in ("go", "generatego"):
    89        data, found = regexs["go_build_constraints"].subn("", data, 1)
    90    elif extension in ["sh", "py"]:
    91        data, found = regexs["shebang"].subn("", data, 1)
    92
    93    data = data.splitlines()
    94
    95    # if our test file is smaller than the reference it surely fails!
    96    if len(ref) > len(data):
    97        print(
    98            f"File {filename} smaller than reference ({len(data)} < {len(ref)})",
    99            file=verbose_out,
   100        )
   101        return False
   102
   103    # trim our file to the same number of lines as the reference file
   104    data = data[: len(ref)]
   105
   106    pattern = regexs["year"]
   107    for line in data:
   108        if pattern.search(line):
   109            if generated:
   110                print(
   111                    f"File {filename} has the YEAR field, but it should not be in generated file",
   112                    file=verbose_out,
   113                )
   114            else:
   115                print(
   116                    "File {filename} has the YEAR field, but missing the year of date",
   117                    file=verbose_out,
   118                )
   119            return False
   120
   121    if not generated:
   122        # Replace all occurrences of the regex "2014|2015|2016|2017|2018" with "YEAR"
   123        pattern = regexs["date"]
   124        for i, line in enumerate(data):
   125            data[i], found = pattern.subn("YEAR", line)
   126            if found != 0:
   127                break
   128
   129    # if we don't match the reference at this point, fail
   130    if ref != data:
   131        print(f"Header in {filename} does not match reference, diff:", file=verbose_out)
   132        if args.verbose:
   133            print(file=verbose_out)
   134            for line in difflib.unified_diff(
   135                ref, data, "reference", filename, lineterm=""
   136            ):
   137                print(line, file=verbose_out)
   138            print(file=verbose_out)
   139        return False
   140
   141    return True
   142
   143
   144def file_extension(filename):
   145    return os.path.splitext(filename)[1].split(".")[-1].lower()
   146
   147
   148skipped_names = [
   149    "third_party",
   150    "_output",
   151    ".git",
   152    "cluster/env.sh",
   153    "vendor",
   154    "testdata",
   155    "test/e2e/generated/bindata.go",
   156    "hack/boilerplate/test",
   157    "staging/src/k8s.io/kubectl/pkg/generated/bindata.go",
   158]
   159
   160
   161def normalize_files(files):
   162    newfiles = []
   163    for pathname in files:
   164        if any(x in pathname for x in skipped_names):
   165            continue
   166        newfiles.append(pathname)
   167    for i, pathname in enumerate(newfiles):
   168        if not os.path.isabs(pathname):
   169            newfiles[i] = os.path.join(args.rootdir, pathname)
   170    return newfiles
   171
   172
   173def get_files(extensions):
   174    files = []
   175    if len(args.filenames) > 0:
   176        files = args.filenames
   177    else:
   178        for root, dirs, walkfiles in os.walk(args.rootdir):
   179            # don't visit certain dirs. This is just a performance improvement
   180            # as we would prune these later in normalize_files(). But doing it
   181            # cuts down the amount of filesystem walking we do and cuts down
   182            # the size of the file list
   183            for dname in skipped_names:
   184                if dname in dirs:
   185                    dirs.remove(dname)
   186            for dname in dirs:
   187                # dirs that start with __ are ignored
   188                if dname.startswith("__"):
   189                    dirs.remove(dname)
   190
   191            for name in walkfiles:
   192                pathname = os.path.join(root, name)
   193                files.append(pathname)
   194
   195    files = normalize_files(files)
   196    outfiles = []
   197    for pathname in files:
   198        basename = os.path.basename(pathname)
   199        extension = file_extension(pathname)
   200        if extension in extensions or basename in extensions:
   201            outfiles.append(pathname)
   202    return outfiles
   203
   204
   205def get_dates():
   206    years = datetime.datetime.now().year
   207    return "(%s)" % "|".join(str(year) for year in range(2014, years + 1))
   208
   209
   210def get_regexs():
   211    regexs = {}
   212    # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   213    regexs["year"] = re.compile("YEAR")
   214    # get_dates return 2014, 2015, 2016, 2017, or 2018 until the current year
   215    # as a regex like: "(2014|2015|2016|2017|2018)";
   216    # company holder names can be anything
   217    regexs["date"] = re.compile(get_dates())
   218    # strip the following build constraints/tags:
   219    # //go:build
   220    # // +build \n\n
   221    regexs["go_build_constraints"] = re.compile(
   222        r"^(//(go:build| \+build).*\n)+\n", re.MULTILINE
   223    )
   224    # strip #!.* from scripts
   225    regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   226    # Search for generated files
   227    regexs["generated"] = re.compile(r"^[/*#]+ +.* DO NOT EDIT\.$", re.MULTILINE)
   228    return regexs
   229
   230
   231def main():
   232    regexs = get_regexs()
   233    refs = get_refs()
   234    filenames = get_files(refs)
   235
   236    for filename in filenames:
   237        if not file_passes(filename, refs, regexs):
   238            print(filename)
   239
   240    return 0
   241
   242
   243if __name__ == "__main__":
   244    sys.exit(main())
View as plain text