1parameters:
2 TargetFolder: ''
3 RootFolder: ''
4 BuildSHA: ''
5 RepoId: $(Build.Repository.Name)
6
7steps:
8 - task: PythonScript@0
9 displayName: Replace Relative Readme Links with Absolute References
10 inputs:
11 scriptSource: inline
12 script: |
13 import argparse
14 import sys
15 import os
16 import logging
17 import glob
18 import re
19 import fnmatch
20 from io import open
21 try:
22 from pathlib import Path
23 except:
24 from pathlib2 import Path
25
26 # This script is intended to be run against a single folder. All readme.md files (regardless of casing) will have the relative links
27 # updated with appropriate full reference links. This is a recursive update..
28
29 logging.getLogger().setLevel(logging.INFO)
30
31 RELATIVE_LINK_REPLACEMENT_SYNTAX = (
32 "https://github.com/{repo_id}/tree/{build_sha}/{target_resource_path}"
33 )
34
35 LINK_DISCOVERY_REGEX = r"\[([^\]]*)\]\(([^)]+)\)"
36 PREDEFINED_LINK_DISCOVERY_REGEX = r"(\[[^\]]+]\:)\s*([^\s]+)"
37
38 IMAGE_FILE_EXTENSIONS = ['.jpeg', '.jpg', '.png', '.gif', '.tiff']
39 RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE = (
40 "https://github.com/{repo_id}/raw/{build_sha}/{target_resource_path}"
41 )
42
43 def locate_readmes(directory):
44 readme_set = []
45
46 for root, dirs, files in os.walk(directory):
47 for file in files:
48 if file.lower() == "readme.md":
49 readme_set.append(os.path.join(root, file))
50 return readme_set
51
52
53 def is_relative_link(link_value, readme_location):
54 link_without_location = link_value
55 if link_without_location.find('#') > 0:
56 link_without_location = link_without_location[0:link_without_location.find('#')]
57
58 try:
59 return os.path.exists(
60 os.path.abspath(os.path.join(os.path.dirname(readme_location), link_without_location))
61 )
62 except:
63 return False
64
65
66 def replace_relative_link(match, readme_location, root_folder, build_sha, repo_id):
67 link_path = match.group(2).strip()
68
69 if is_relative_link(link_path, readme_location):
70 # if it is a relative reference, we need to find the path from the root of the repository
71 resource_absolute_path = os.path.abspath(
72 os.path.join(os.path.dirname(readme_location), link_path)
73 )
74 placement_from_root = os.path.relpath(resource_absolute_path, root_folder)
75
76 suffix = Path(placement_from_root).suffix
77 if (suffix in IMAGE_FILE_EXTENSIONS):
78 updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format(
79 repo_id=repo_id,
80 build_sha=build_sha,
81 target_resource_path=placement_from_root,
82 ).replace("\\", "/")
83 else:
84 updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format(
85 repo_id=repo_id,
86 build_sha=build_sha,
87 target_resource_path=placement_from_root,
88 ).replace("\\", "/")
89
90 return "[{}]({})".format(match.group(1), updated_link)
91 else:
92 return match.group(0)
93
94 def replace_predefined_relative_links(match, readme_location, root_folder, build_sha, repo_id):
95 link_path = match.group(2).strip()
96
97 if is_relative_link(link_path, readme_location):
98 # if it is a relative reference, we need to find the path from the root of the repository
99 resource_absolute_path = os.path.abspath(
100 os.path.join(os.path.dirname(readme_location), link_path)
101 )
102 placement_from_root = os.path.relpath(resource_absolute_path, root_folder)
103
104 suffix = Path(placement_from_root).suffix
105 if (suffix in IMAGE_FILE_EXTENSIONS):
106 updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX_FOR_IMAGE.format(
107 repo_id=repo_id,
108 build_sha=build_sha,
109 target_resource_path=placement_from_root,
110 ).replace("\\", "/")
111 else:
112 updated_link = RELATIVE_LINK_REPLACEMENT_SYNTAX.format(
113 repo_id=repo_id,
114 build_sha=build_sha,
115 target_resource_path=placement_from_root,
116 ).replace("\\", "/")
117
118 return "{} {}".format(match.group(1), updated_link)
119 else:
120 return match.group(0)
121
122
123 def transfer_content_to_absolute_references(
124 root_folder, build_sha, repo_id, readme_location, content
125 ):
126 content = re.sub(
127 LINK_DISCOVERY_REGEX,
128 lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_relative_link(
129 match, readme_location, root_folder, build_sha, repo_id
130 ),
131 content,
132 )
133
134 content = re.sub(
135 PREDEFINED_LINK_DISCOVERY_REGEX,
136 lambda match, readme_location=readme_location, root_folder=root_folder, build_sha=build_sha, repo_id=repo_id: replace_predefined_relative_links(
137 match, readme_location, root_folder, build_sha, repo_id
138 ),
139 content,
140 )
141
142 return content
143
144
145 if __name__ == "__main__":
146 parser = argparse.ArgumentParser(
147 description="Replaces relative links for any README.md under the target folder. Given any discovered relative link, will replace with the provided repoId and SHA. Case insensitive"
148 )
149
150 parser.add_argument(
151 "-t",
152 "--target",
153 dest="target_folder",
154 help="The target folder that contains a README ",
155 default="${{ parameters.TargetFolder }}",
156 )
157
158 parser.add_argument(
159 "-i",
160 "--repoid",
161 dest="repo_id",
162 help='The target repository used as the base for the path replacement. Full Id, example: "Azure/azure-sdk-for-net"',
163 default="${{ parameters.RepoId }}",
164 )
165
166 parser.add_argument(
167 "-r",
168 "--root",
169 dest="root_folder",
170 help="The root directory of the repository. This gives us the ability to rationalize links in situations where a relative link traverses UPWARDS from the readme.",
171 default="${{ parameters.RootFolder }}",
172 )
173
174 parser.add_argument(
175 "-s",
176 "--sha",
177 dest="build_sha",
178 help="The commit hash associated with this change. Using this will mean that links will never be broken.",
179 default="${{ parameters.BuildSHA }}",
180 )
181
182 args = parser.parse_args()
183
184 logging.info("Root Folder: {}".format(args.root_folder))
185 logging.info("Target Folder: {}".format(args.target_folder))
186 logging.info("Repository Id: {}".format(args.repo_id))
187 logging.info("Build SHA: {}".format(args.build_sha))
188
189 readme_files = locate_readmes(args.target_folder)
190
191 for readme_location in readme_files:
192 try:
193 logging.info(
194 "Running Relative Link Replacement on {}.".format(readme_location)
195 )
196
197 with open(readme_location, "r", encoding="utf-8") as readme_stream:
198 readme_content = readme_stream.read()
199
200 new_content = transfer_content_to_absolute_references(
201 args.root_folder,
202 args.build_sha,
203 args.repo_id,
204 readme_location,
205 readme_content,
206 )
207
208 with open(readme_location, "w", encoding="utf-8") as readme_stream:
209 readme_stream.write(new_content)
210
211 except Exception as e:
212 logging.error(e)
213 exit(1)
214
215 - script: |
216 git diff -U0
217 displayName: Highlight Readme Updates
View as plain text