Coverage for src/debputy/plugin/debputy/package_processors.py: 54%

168 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-04-07 12:14 +0200

1import contextlib 

2import functools 

3import gzip 

4import os 

5import re 

6import subprocess 

7from contextlib import ExitStack 

8from typing import Optional, Iterator, IO, Any, List, Dict, Callable, Union 

9 

10from debputy.plugin.api import VirtualPath 

11from debputy.util import _error, xargs, escape_shell, _info, assume_not_none 

12 

13 

14@contextlib.contextmanager 

15def _open_maybe_gzip(path: VirtualPath) -> Iterator[Union[IO[bytes], gzip.GzipFile]]: 

16 if path.name.endswith(".gz"): 

17 with gzip.GzipFile(path.fs_path, "rb") as fd: 

18 yield fd 

19 else: 

20 with path.open(byte_io=True) as fd: 

21 yield fd 

22 

23 

24_SO_LINK_RE = re.compile(rb"[.]so\s+(.*)\s*") 

25_LA_DEP_LIB_RE = re.compile(rb"'.+'") 

26 

27 

28def _detect_so_link(path: VirtualPath) -> Optional[str]: 

29 so_link_re = _SO_LINK_RE 

30 with _open_maybe_gzip(path) as fd: 

31 for line in fd: 

32 m = so_link_re.search(line) 

33 if m: 

34 return m.group(1).decode("utf-8") 

35 return None 

36 

37 

38def _replace_with_symlink(path: VirtualPath, so_link_target: str) -> None: 

39 adjusted_target = so_link_target 

40 parent_dir = path.parent_dir 

41 assert parent_dir is not None # For the type checking 

42 if parent_dir.name == os.path.dirname(adjusted_target): 

43 # Avoid man8/../man8/foo links 

44 adjusted_target = os.path.basename(adjusted_target) 

45 elif "/" in so_link_target: 

46 # symlinks and so links have a different base directory when the link has a "/". 

47 # Adjust with an extra "../" to align the result 

48 adjusted_target = "../" + adjusted_target 

49 

50 path.unlink() 

51 parent_dir.add_symlink(path.name, adjusted_target) 

52 

53 

54@functools.lru_cache(1) 

55def _has_man_recode() -> bool: 

56 # Ideally, we would just use shutil.which or something like that. 

57 # Unfortunately, in debhelper, we experienced problems with which 

58 # returning "yes" for a man tool that actually could not be run 

59 # on salsa CI. 

60 # 

61 # Therefore, we adopt the logic of dh_installman to run the tool 

62 # with --help to confirm it is not broken, because no one could 

63 # figure out what happened in the salsa CI and my life is still 

64 # too short to figure it out. 

65 try: 

66 subprocess.check_call( 

67 ["man-recode", "--help"], 

68 stdin=subprocess.DEVNULL, 

69 stdout=subprocess.DEVNULL, 

70 stderr=subprocess.DEVNULL, 

71 restore_signals=True, 

72 ) 

73 except subprocess.CalledProcessError: 

74 return False 

75 return True 

76 

77 

78def process_manpages(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None: 

79 man_dir = fs_root.lookup("./usr/share/man") 

80 if not man_dir: 

81 return 

82 

83 re_encode = [] 

84 for path in (p for p in man_dir.all_paths() if p.is_file and p.has_fs_path): 

85 size = path.size 

86 if size == 0: 

87 continue 

88 so_link_target = None 

89 if size <= 1024: 

90 # debhelper has a 1024 byte guard on the basis that ".so file tend to be small". 

91 # That guard worked well for debhelper, so lets keep it for now on that basis alone. 

92 so_link_target = _detect_so_link(path) 

93 if so_link_target: 

94 _replace_with_symlink(path, so_link_target) 

95 else: 

96 re_encode.append(path) 

97 

98 if not re_encode or not _has_man_recode(): 

99 return 

100 

101 with ExitStack() as manager: 

102 manpages = [ 

103 manager.enter_context(p.replace_fs_path_content()) for p in re_encode 

104 ] 

105 static_cmd = ["man-recode", "--to-code", "UTF-8", "--suffix", ".encoded"] 

106 for cmd in xargs(static_cmd, manpages): 

107 _info(f"Ensuring manpages have utf-8 encoding via: {escape_shell(*cmd)}") 

108 try: 

109 subprocess.check_call( 

110 cmd, 

111 stdin=subprocess.DEVNULL, 

112 restore_signals=True, 

113 ) 

114 except subprocess.CalledProcessError: 

115 _error( 

116 "The man-recode process failed. Please review the output of `man-recode` to understand" 

117 " what went wrong." 

118 ) 

119 for manpage in manpages: 

120 dest_name = manpage 

121 if dest_name.endswith(".gz"): 

122 dest_name = dest_name[:-3] 

123 os.rename(f"{dest_name}.encoded", manpage) 

124 

125 

126def _filter_compress_paths() -> Callable[[VirtualPath], Iterator[VirtualPath]]: 

127 ignore_dir_basenames = { 

128 "_sources", 

129 } 

130 ignore_basenames = { 

131 ".htaccess", 

132 "index.sgml", 

133 "objects.inv", 

134 "search_index.json", 

135 "copyright", 

136 } 

137 ignore_extensions = { 

138 ".htm", 

139 ".html", 

140 ".xhtml", 

141 ".gif", 

142 ".png", 

143 ".jpg", 

144 ".jpeg", 

145 ".gz", 

146 ".taz", 

147 ".tgz", 

148 ".z", 

149 ".bz2", 

150 ".epub", 

151 ".jar", 

152 ".zip", 

153 ".odg", 

154 ".odp", 

155 ".odt", 

156 ".css", 

157 ".xz", 

158 ".lz", 

159 ".lzma", 

160 ".haddock", 

161 ".hs", 

162 ".woff", 

163 ".woff2", 

164 ".svg", 

165 ".svgz", 

166 ".js", 

167 ".devhelp2", 

168 ".map", # Technically, dh_compress has this one case-sensitive 

169 } 

170 ignore_special_cases = ("-gz", "-z", "_z") 

171 

172 def _filtered_walk(path: VirtualPath) -> Iterator[VirtualPath]: 

173 for path, children in path.walk(): 

174 if path.name in ignore_dir_basenames: 174 ↛ 175line 174 didn't jump to line 175, because the condition on line 174 was never true

175 children.clear() 

176 continue 

177 if path.is_dir and path.name == "examples": 177 ↛ 179line 177 didn't jump to line 179, because the condition on line 177 was never true

178 # Ignore anything beneath /usr/share/doc/*/examples 

179 parent = path.parent_dir 

180 grand_parent = parent.parent_dir if parent else None 

181 if grand_parent and grand_parent.absolute == "/usr/share/doc": 

182 children.clear() 

183 continue 

184 name = path.name 

185 if ( 

186 path.is_symlink 

187 or not path.is_file 

188 or name in ignore_basenames 

189 or not path.has_fs_path 

190 ): 

191 continue 

192 

193 name_lc = name.lower() 

194 _, ext = os.path.splitext(name_lc) 

195 

196 if ext in ignore_extensions or name_lc.endswith(ignore_special_cases): 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 continue 

198 yield path 

199 

200 return _filtered_walk 

201 

202 

203def _find_compressable_paths(fs_root: VirtualPath) -> Iterator[VirtualPath]: 

204 path_filter = _filter_compress_paths() 

205 

206 for p, compress_size_threshold in ( 

207 ("./usr/share/info", 0), 

208 ("./usr/share/man", 0), 

209 ("./usr/share/doc", 4096), 

210 ): 

211 path = fs_root.lookup(p) 

212 if path is None: 

213 continue 

214 paths = path_filter(path) 

215 if compress_size_threshold: 215 ↛ 218line 215 didn't jump to line 218, because the condition on line 215 was never true

216 # The special-case for changelog and NEWS is from dh_compress. Generally these files 

217 # have always been compressed regardless of their size. 

218 paths = ( 

219 p 

220 for p in paths 

221 if p.size > compress_size_threshold 

222 or p.name.startswith(("changelog", "NEWS")) 

223 ) 

224 yield from paths 

225 x11_path = fs_root.lookup("./usr/share/fonts/X11") 

226 if x11_path: 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true

227 yield from ( 

228 p for p in x11_path.all_paths() if p.is_file and p.name.endswith(".pcf") 

229 ) 

230 

231 

232def apply_compression(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None: 

233 # TODO: Support hardlinks 

234 compressed_files: Dict[str, str] = {} 

235 for path in _find_compressable_paths(fs_root): 

236 parent_dir = assume_not_none(path.parent_dir) 

237 with parent_dir.add_file(f"{path.name}.gz", mtime=path.mtime) as new_file, open( 

238 new_file.fs_path, "wb" 

239 ) as fd: 

240 try: 

241 subprocess.check_call(["gzip", "-9nc", path.fs_path], stdout=fd) 

242 except subprocess.CalledProcessError: 

243 full_command = f"gzip -9nc {escape_shell(path.fs_path)} > {escape_shell(new_file.fs_path)}" 

244 _error( 

245 f"The compression of {path.path} failed. Please review the error message from gzip to" 

246 f" understand what went wrong. Full command was: {full_command}" 

247 ) 

248 compressed_files[path.path] = new_file.path 

249 del parent_dir[path.name] 

250 

251 all_remaining_symlinks = {p.path: p for p in fs_root.all_paths() if p.is_symlink} 

252 changed = True 

253 while changed: 

254 changed = False 

255 remaining: List[VirtualPath] = list(all_remaining_symlinks.values()) 

256 for symlink in remaining: 

257 target = symlink.readlink() 

258 dir_target, basename_target = os.path.split(target) 

259 new_basename_target = f"{basename_target}.gz" 

260 symlink_parent_dir = assume_not_none(symlink.parent_dir) 

261 dir_path = symlink_parent_dir 

262 if dir_target != "": 

263 dir_path = dir_path.lookup(dir_target) 

264 if ( 264 ↛ 269line 264 didn't jump to line 269

265 not dir_path 

266 or basename_target in dir_path 

267 or new_basename_target not in dir_path 

268 ): 

269 continue 

270 del all_remaining_symlinks[symlink.path] 

271 changed = True 

272 

273 new_link_name = ( 

274 f"{symlink.name}.gz" 

275 if not symlink.name.endswith(".gz") 

276 else symlink.name 

277 ) 

278 symlink_parent_dir.add_symlink( 

279 new_link_name, os.path.join(dir_target, new_basename_target) 

280 ) 

281 symlink.unlink() 

282 

283 

284def _la_files(fs_root: VirtualPath) -> Iterator[VirtualPath]: 

285 lib_dir = fs_root.lookup("/usr/lib") 

286 if not lib_dir: 

287 return 

288 # Original code only iterators directly in /usr/lib. To be a faithful conversion, we do the same 

289 # here. 

290 # Eagerly resolve the list as the replacement can trigger a runtime error otherwise 

291 paths = list(lib_dir.iterdir) 

292 yield from (p for p in paths if p.is_file and p.name.endswith(".la")) 

293 

294 

295# Conceptually, the same feature that dh_gnome provides. 

296# The clean_la_files function based on the dh_gnome version written by Luca Falavigna in 2010, 

297# who in turn references a Makefile version of the feature. 

298# https://salsa.debian.org/gnome-team/gnome-pkg-tools/-/commit/2868e1e41ea45443b0fb340bf4c71c4de87d4a5b 

299def clean_la_files( 

300 fs_root: VirtualPath, 

301 _unused1: Any, 

302 _unused2: Any, 

303) -> None: 

304 for path in _la_files(fs_root): 

305 buffer = [] 

306 with path.open(byte_io=True) as fd: 

307 replace_file = False 

308 for line in fd: 

309 if line.startswith(b"dependency_libs"): 

310 replacement = _LA_DEP_LIB_RE.sub(b"''", line) 

311 if replacement != line: 

312 replace_file = True 

313 line = replacement 

314 buffer.append(line) 

315 

316 if not replace_file: 

317 continue 

318 _info(f"Clearing the dependency_libs line in {path.path}") 

319 with path.replace_fs_path_content() as fs_path, open(fs_path, "wb") as wfd: 

320 wfd.writelines(buffer)