parameters: TargetFolder: '' RootFolder: '' BuildSHA: '' RepoId: $(Build.Repository.Name) steps: - task: PythonScript@0 displayName: Replace Relative Readme Links with Absolute References inputs: scriptSource: inline script: | import argparse import sys import os import logging import glob import re import fnmatch from io import open try: from pathlib import Path except: from pathlib2 import Path # This script is intended to be run against a single folder. All readme.md files (regardless of casing) will have the relative links # updated with appropriate full reference links. This is a recursive update.. logging.getLogger().setLevel(logging.INFO) RELATIVE_LINK_REPLACEMENT_SYNTAX = ( "https://github.com/{repo_id}/tree/{build_sha}/{target_resource_path}" ) LINK_DISCOVERY_REGEX = r"\[([^\]]*)\]\(([^)]+)\)" PREDEFINED_LINK_DISCOVERY_REGEX = r"(\[[^\]]+]\:)\s*([^\s]+)" IMAGE_FILE_EXTENSIONS = ['.jpeg', '.jpg', '.png', '.gif', '.tiff'] RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE = ( "https://github.com/{repo_id}/raw/{build_sha}/{target_resource_path}" ) def locate_readmes(directory): readme_set = [] for root, dirs, files in os.walk(directory): for file in files: if file.lower() == "readme.md": readme_set.append(os.path.join(root, file)) return readme_set def is_relative_link(link_value, readme_location): link_without_location = link_value if link_without_location.find('#') > 0: link_without_location = link_without_location[0:link_without_location.find('#')] try: return os.path.exists( os.path.abspath(os.path.join(os.path.dirname(readme_location), link_without_location)) ) except: return False def replace_relative_link(match, readme_location, root_folder, build_sha, repo_id): link_path = match.group(2).strip() if is_relative_link(link_path, readme_location): # if it is a relative reference, we need to find the path from the root of the repository resource_absolute_path = os.path.abspath( os.path.join(os.path.dirname(readme_location), link_path) ) placement_from_root = os.path.relpath(resource_absolute_path, root_folder) suffix = Path(placement_from_root).suffix if (suffix in IMAGE_FILE_EXTENSIONS): updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format( repo_id=repo_id, build_sha=build_sha, target_resource_path=placement_from_root, ).replace("\\", "/") else: updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format( repo_id=repo_id, build_sha=build_sha, target_resource_path=placement_from_root, ).replace("\\", "/") return "[{}]({})".format(match.group(1), updated_link) else: return match.group(0) def replace_predefined_relative_links(match, readme_location, root_folder, build_sha, repo_id): link_path = match.group(2).strip() if is_relative_link(link_path, readme_location): # if it is a relative reference, we need to find the path from the root of the repository resource_absolute_path = os.path.abspath( os.path.join(os.path.dirname(readme_location), link_path) ) placement_from_root = os.path.relpath(resource_absolute_path, root_folder) suffix = Path(placement_from_root).suffix if (suffix in IMAGE_FILE_EXTENSIONS): updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format( repo_id=repo_id, build_sha=build_sha, target_resource_path=placement_from_root, ).replace("\\", "/") else: updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format( repo_id=repo_id, build_sha=build_sha, target_resource_path=placement_from_root, ).replace("\\", "/") return "{} {}".format(match.group(1), updated_link) else: return match.group(0) def transfer_content_to_absolute_references( root_folder, build_sha, repo_id, readme_location, content ): content = re.sub( LINK_DISCOVERY_REGEX, lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_relative_link( match, readme_location, root_folder, build_sha, repo_id ), content, ) content = re.sub( PREDEFINED_LINK_DISCOVERY_REGEX, lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_predefined_relative_links( match, readme_location, root_folder, build_sha, repo_id ), content, ) return content if __name__ == "__main__": parser = argparse.ArgumentParser( description="Replaces relative links for any README.md under the target folder. Given any discovered relative link, will replace with the provided repoId and SHA. Case insensitive" ) parser.add_argument( "-t", "--target", dest="target_folder", help="The target folder that contains a README ", default="${{ parameters.TargetFolder }}", ) parser.add_argument( "-i", "--repoid", dest="repo_id", help='The target repository used as the base for the path replacement. Full Id, example: "Azure/azure-sdk-for-net"', default="${{ parameters.RepoId }}", ) parser.add_argument( "-r", "--root", dest="root_folder", help="The root directory of the repository. This gives us the ability to rationalize links in situations where a relative link traverses UPWARDS from the readme.", default="${{ parameters.RootFolder }}", ) parser.add_argument( "-s", "--sha", dest="build_sha", help="The commit hash associated with this change. Using this will mean that links will never be broken.", default="${{ parameters.BuildSHA }}", ) args = parser.parse_args() logging.info("Root Folder: {}".format(args.root_folder)) logging.info("Target Folder: {}".format(args.target_folder)) logging.info("Repository Id: {}".format(args.repo_id)) logging.info("Build SHA: {}".format(args.build_sha)) readme_files = locate_readmes(args.target_folder) for readme_location in readme_files: try: logging.info( "Running Relative Link Replacement on {}.".format(readme_location) ) with open(readme_location, "r", encoding="utf-8") as readme_stream: readme_content = readme_stream.read() new_content = transfer_content_to_absolute_references( args.root_folder, args.build_sha, args.repo_id, readme_location, readme_content, ) with open(readme_location, "w", encoding="utf-8") as readme_stream: readme_stream.write(new_content) except Exception as e: logging.error(e) exit(1) - script: | git diff -U0 displayName: Highlight Readme Updates