]> git.lizzy.rs Git - rust.git/blob - src/ci/stage-build.py
Auto merge of #107443 - cjgillot:generator-less-query, r=compiler-errors
[rust.git] / src / ci / stage-build.py
1 #!/usr/bin/env python3
2 # ignore-tidy-linelength
3
4 # Compatible with Python 3.6+
5
6 import contextlib
7 import getpass
8 import glob
9 import logging
10 import os
11 import pprint
12 import shutil
13 import subprocess
14 import sys
15 import time
16 import traceback
17 import urllib.request
18 from collections import OrderedDict
19 from io import StringIO
20 from pathlib import Path
21 from typing import Callable, Dict, Iterable, List, Optional, Union
22
23 PGO_HOST = os.environ["PGO_HOST"]
24
25 LOGGER = logging.getLogger("stage-build")
26
27 LLVM_PGO_CRATES = [
28     "syn-1.0.89",
29     "cargo-0.60.0",
30     "serde-1.0.136",
31     "ripgrep-13.0.0",
32     "regex-1.5.5",
33     "clap-3.1.6",
34     "hyper-0.14.18"
35 ]
36
37 RUSTC_PGO_CRATES = [
38     "externs",
39     "ctfe-stress-5",
40     "cargo-0.60.0",
41     "token-stream-stress",
42     "match-stress",
43     "tuple-stress",
44     "diesel-1.4.8",
45     "bitmaps-3.1.0"
46 ]
47
48 LLVM_BOLT_CRATES = LLVM_PGO_CRATES
49
50
51 class Pipeline:
52     # Paths
53     def checkout_path(self) -> Path:
54         """
55         The root checkout, where the source is located.
56         """
57         raise NotImplementedError
58
59     def downloaded_llvm_dir(self) -> Path:
60         """
61         Directory where the host LLVM is located.
62         """
63         raise NotImplementedError
64
65     def build_root(self) -> Path:
66         """
67         The main directory where the build occurs.
68         """
69         raise NotImplementedError
70
71     def build_artifacts(self) -> Path:
72         return self.build_root() / "build" / PGO_HOST
73
74     def rustc_stage_0(self) -> Path:
75         return self.build_artifacts() / "stage0" / "bin" / "rustc"
76
77     def cargo_stage_0(self) -> Path:
78         return self.build_artifacts() / "stage0" / "bin" / "cargo"
79
80     def rustc_stage_2(self) -> Path:
81         return self.build_artifacts() / "stage2" / "bin" / "rustc"
82
83     def opt_artifacts(self) -> Path:
84         raise NotImplementedError
85
86     def llvm_profile_dir_root(self) -> Path:
87         return self.opt_artifacts() / "llvm-pgo"
88
89     def llvm_profile_merged_file(self) -> Path:
90         return self.opt_artifacts() / "llvm-pgo.profdata"
91
92     def rustc_perf_dir(self) -> Path:
93         return self.opt_artifacts() / "rustc-perf"
94
95     def build_rustc_perf(self):
96         raise NotImplementedError()
97
98     def rustc_profile_dir_root(self) -> Path:
99         return self.opt_artifacts() / "rustc-pgo"
100
101     def rustc_profile_merged_file(self) -> Path:
102         return self.opt_artifacts() / "rustc-pgo.profdata"
103
104     def rustc_profile_template_path(self) -> Path:
105         """
106         The profile data is written into a single filepath that is being repeatedly merged when each
107         rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
108         why we override the profile path to include the PID. This will produce many more profiling
109         files, but the resulting profile will produce a slightly faster rustc binary.
110         """
111         return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
112
113     def supports_bolt(self) -> bool:
114         raise NotImplementedError
115
116     def llvm_bolt_profile_merged_file(self) -> Path:
117         return self.opt_artifacts() / "bolt.profdata"
118
119
120 class LinuxPipeline(Pipeline):
121     def checkout_path(self) -> Path:
122         return Path("/checkout")
123
124     def downloaded_llvm_dir(self) -> Path:
125         return Path("/rustroot")
126
127     def build_root(self) -> Path:
128         return self.checkout_path() / "obj"
129
130     def opt_artifacts(self) -> Path:
131         return Path("/tmp/tmp-multistage/opt-artifacts")
132
133     def build_rustc_perf(self):
134         # /tmp/rustc-perf comes from the Dockerfile
135         shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
136         cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
137
138         with change_cwd(self.rustc_perf_dir()):
139             cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
140                 RUSTC=str(self.rustc_stage_0()),
141                 RUSTC_BOOTSTRAP="1"
142             ))
143
144     def supports_bolt(self) -> bool:
145         return True
146
147
148 class WindowsPipeline(Pipeline):
149     def __init__(self):
150         self.checkout_dir = Path(os.getcwd())
151
152     def checkout_path(self) -> Path:
153         return self.checkout_dir
154
155     def downloaded_llvm_dir(self) -> Path:
156         return self.checkout_path() / "citools" / "clang-rust"
157
158     def build_root(self) -> Path:
159         return self.checkout_path()
160
161     def opt_artifacts(self) -> Path:
162         return self.checkout_path() / "opt-artifacts"
163
164     def rustc_stage_0(self) -> Path:
165         return super().rustc_stage_0().with_suffix(".exe")
166
167     def cargo_stage_0(self) -> Path:
168         return super().cargo_stage_0().with_suffix(".exe")
169
170     def rustc_stage_2(self) -> Path:
171         return super().rustc_stage_2().with_suffix(".exe")
172
173     def build_rustc_perf(self):
174         # rustc-perf version from 2022-07-22
175         perf_commit = "3c253134664fdcba862c539d37f0de18557a9a4c"
176         rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
177
178         def download_rustc_perf():
179             download_file(
180                 f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
181                 rustc_perf_zip_path
182             )
183             with change_cwd(self.opt_artifacts()):
184                 unpack_archive(rustc_perf_zip_path)
185                 move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
186                 delete_file(rustc_perf_zip_path)
187
188         retry_action(download_rustc_perf, "Download rustc-perf")
189
190         with change_cwd(self.rustc_perf_dir()):
191             cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
192                 RUSTC=str(self.rustc_stage_0()),
193                 RUSTC_BOOTSTRAP="1"
194             ))
195
196     def rustc_profile_template_path(self) -> Path:
197         """
198         On Windows, we don't have enough space to use separate files for each rustc invocation.
199         Therefore, we use a single file for the generated profiles.
200         """
201         return self.rustc_profile_dir_root() / "default_%m.profraw"
202
203     def supports_bolt(self) -> bool:
204         return False
205
206
207 class Timer:
208     def __init__(self):
209         # We want this dictionary to be ordered by insertion.
210         # We use `OrderedDict` for compatibility with older Python versions.
211         self.stages = OrderedDict()
212
213     @contextlib.contextmanager
214     def stage(self, name: str):
215         assert name not in self.stages
216
217         start = time.time()
218         exc = None
219         try:
220             LOGGER.info(f"Stage `{name}` starts")
221             yield
222         except BaseException as exception:
223             exc = exception
224             raise
225         finally:
226             end = time.time()
227             duration = end - start
228             self.stages[name] = duration
229             if exc is None:
230                 LOGGER.info(f"Stage `{name}` ended: OK ({duration:.2f}s)")
231             else:
232                 LOGGER.info(f"Stage `{name}` ended: FAIL ({duration:.2f}s)")
233
234     def print_stats(self):
235         total_duration = sum(self.stages.values())
236
237         # 57 is the width of the whole table
238         divider = "-" * 57
239
240         with StringIO() as output:
241             print(divider, file=output)
242             for (name, duration) in self.stages.items():
243                 pct = (duration / total_duration) * 100
244                 name_str = f"{name}:"
245                 print(f"{name_str:<34} {duration:>12.2f}s ({pct:>5.2f}%)", file=output)
246
247             total_duration_label = "Total duration:"
248             print(f"{total_duration_label:<34} {total_duration:>12.2f}s", file=output)
249             print(divider, file=output, end="")
250             LOGGER.info(f"Timer results\n{output.getvalue()}")
251
252
253 @contextlib.contextmanager
254 def change_cwd(dir: Path):
255     """
256     Temporarily change working directory to `dir`.
257     """
258     cwd = os.getcwd()
259     LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
260     os.chdir(dir)
261     try:
262         yield
263     finally:
264         LOGGER.debug(f"Reverting working dir to `{cwd}`")
265         os.chdir(cwd)
266
267
268 def move_path(src: Path, dst: Path):
269     LOGGER.info(f"Moving `{src}` to `{dst}`")
270     shutil.move(src, dst)
271
272
273 def delete_file(path: Path):
274     LOGGER.info(f"Deleting file `{path}`")
275     os.unlink(path)
276
277
278 def delete_directory(path: Path):
279     LOGGER.info(f"Deleting directory `{path}`")
280     shutil.rmtree(path)
281
282
283 def unpack_archive(archive: Path):
284     LOGGER.info(f"Unpacking archive `{archive}`")
285     shutil.unpack_archive(archive)
286
287
288 def download_file(src: str, target: Path):
289     LOGGER.info(f"Downloading `{src}` into `{target}`")
290     urllib.request.urlretrieve(src, str(target))
291
292
293 def retry_action(action, name: str, max_fails: int = 5):
294     LOGGER.info(f"Attempting to perform action `{name}` with retry")
295     for iteration in range(max_fails):
296         LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
297         try:
298             action()
299             return
300         except:
301             LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
302
303     raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
304
305
306 def cmd(
307         args: List[Union[str, Path]],
308         env: Optional[Dict[str, str]] = None,
309         output_path: Optional[Path] = None
310 ):
311     args = [str(arg) for arg in args]
312
313     environment = os.environ.copy()
314
315     cmd_str = ""
316     if env is not None:
317         environment.update(env)
318         cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
319         cmd_str += " "
320     cmd_str += " ".join(args)
321     if output_path is not None:
322         cmd_str += f" > {output_path}"
323     LOGGER.info(f"Executing `{cmd_str}`")
324
325     if output_path is not None:
326         with open(output_path, "w") as f:
327             return subprocess.run(
328                 args,
329                 env=environment,
330                 check=True,
331                 stdout=f
332             )
333     return subprocess.run(args, env=environment, check=True)
334
335
336 def run_compiler_benchmarks(
337         pipeline: Pipeline,
338         profiles: List[str],
339         scenarios: List[str],
340         crates: List[str],
341         env: Optional[Dict[str, str]] = None
342 ):
343     env = env if env is not None else {}
344
345     # Compile libcore, both in opt-level=0 and opt-level=3
346     with change_cwd(pipeline.build_root()):
347         cmd([
348             pipeline.rustc_stage_2(),
349             "--edition", "2021",
350             "--crate-type", "lib",
351             str(pipeline.checkout_path() / "library/core/src/lib.rs"),
352             "--out-dir", pipeline.opt_artifacts()
353         ], env=dict(RUSTC_BOOTSTRAP="1", **env))
354
355         cmd([
356             pipeline.rustc_stage_2(),
357             "--edition", "2021",
358             "--crate-type", "lib",
359             "-Copt-level=3",
360             str(pipeline.checkout_path() / "library/core/src/lib.rs"),
361             "--out-dir", pipeline.opt_artifacts()
362         ], env=dict(RUSTC_BOOTSTRAP="1", **env))
363
364     # Run rustc-perf benchmarks
365     # Benchmark using profile_local with eprintln, which essentially just means
366     # don't actually benchmark -- just make sure we run rustc a bunch of times.
367     with change_cwd(pipeline.rustc_perf_dir()):
368         cmd([
369             pipeline.cargo_stage_0(),
370             "run",
371             "-p", "collector", "--bin", "collector", "--",
372             "profile_local", "eprintln",
373             pipeline.rustc_stage_2(),
374             "--id", "Test",
375             "--cargo", pipeline.cargo_stage_0(),
376             "--profiles", ",".join(profiles),
377             "--scenarios", ",".join(scenarios),
378             "--include", ",".join(crates)
379         ], env=dict(
380             RUST_LOG="collector=debug",
381             RUSTC=str(pipeline.rustc_stage_0()),
382             RUSTC_BOOTSTRAP="1",
383             **env
384         ))
385
386
387 # https://stackoverflow.com/a/31631711/1107768
388 def format_bytes(size: int) -> str:
389     """Return the given bytes as a human friendly KiB, MiB or GiB string."""
390     KB = 1024
391     MB = KB ** 2  # 1,048,576
392     GB = KB ** 3  # 1,073,741,824
393     TB = KB ** 4  # 1,099,511,627,776
394
395     if size < KB:
396         return f"{size} B"
397     elif KB <= size < MB:
398         return f"{size / KB:.2f} KiB"
399     elif MB <= size < GB:
400         return f"{size / MB:.2f} MiB"
401     elif GB <= size < TB:
402         return f"{size / GB:.2f} GiB"
403     else:
404         return str(size)
405
406
407 # https://stackoverflow.com/a/63307131/1107768
408 def count_files(path: Path) -> int:
409     return sum(1 for p in path.rglob("*") if p.is_file())
410
411
412 def count_files_with_prefix(path: Path) -> int:
413     return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
414
415
416 # https://stackoverflow.com/a/55659577/1107768
417 def get_path_size(path: Path) -> int:
418     if path.is_dir():
419         return sum(p.stat().st_size for p in path.rglob("*"))
420     return path.stat().st_size
421
422
423 def get_path_prefix_size(path: Path) -> int:
424     """
425     Get size of all files beginning with the prefix `path`.
426     Alternative to shell `du -sh <path>*`.
427     """
428     return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
429
430
431 def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
432     for file in os.listdir(directory):
433         path = directory / file
434         if filter is None or filter(path):
435             yield path
436
437
438 def build_rustc(
439         pipeline: Pipeline,
440         args: List[str],
441         env: Optional[Dict[str, str]] = None
442 ):
443     arguments = [
444                     sys.executable,
445                     pipeline.checkout_path() / "x.py",
446                     "build",
447                     "--target", PGO_HOST,
448                     "--host", PGO_HOST,
449                     "--stage", "2",
450                     "library/std"
451                 ] + args
452     cmd(arguments, env=env)
453
454
455 def create_pipeline() -> Pipeline:
456     if sys.platform == "linux":
457         return LinuxPipeline()
458     elif sys.platform in ("cygwin", "win32"):
459         return WindowsPipeline()
460     else:
461         raise Exception(f"Optimized build is not supported for platform {sys.platform}")
462
463
464 def gather_llvm_profiles(pipeline: Pipeline):
465     LOGGER.info("Running benchmarks with PGO instrumented LLVM")
466     run_compiler_benchmarks(
467         pipeline,
468         profiles=["Debug", "Opt"],
469         scenarios=["Full"],
470         crates=LLVM_PGO_CRATES
471     )
472
473     profile_path = pipeline.llvm_profile_merged_file()
474     LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
475     cmd([
476         pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
477         "merge",
478         "-o", profile_path,
479         pipeline.llvm_profile_dir_root()
480     ])
481
482     LOGGER.info("LLVM PGO statistics")
483     LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
484     LOGGER.info(
485         f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
486     LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
487
488     # We don't need the individual .profraw files now that they have been merged
489     # into a final .profdata
490     delete_directory(pipeline.llvm_profile_dir_root())
491
492
493 def gather_rustc_profiles(pipeline: Pipeline):
494     LOGGER.info("Running benchmarks with PGO instrumented rustc")
495
496     # Here we're profiling the `rustc` frontend, so we also include `Check`.
497     # The benchmark set includes various stress tests that put the frontend under pressure.
498     run_compiler_benchmarks(
499         pipeline,
500         profiles=["Check", "Debug", "Opt"],
501         scenarios=["All"],
502         crates=RUSTC_PGO_CRATES,
503         env=dict(
504             LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
505         )
506     )
507
508     profile_path = pipeline.rustc_profile_merged_file()
509     LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
510     cmd([
511         pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
512         "merge",
513         "-o", profile_path,
514         pipeline.rustc_profile_dir_root()
515     ])
516
517     LOGGER.info("Rustc PGO statistics")
518     LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
519     LOGGER.info(
520         f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
521     LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
522
523     # We don't need the individual .profraw files now that they have been merged
524     # into a final .profdata
525     delete_directory(pipeline.rustc_profile_dir_root())
526
527
528 def gather_llvm_bolt_profiles(pipeline: Pipeline):
529     LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
530     run_compiler_benchmarks(
531         pipeline,
532         profiles=["Check", "Debug", "Opt"],
533         scenarios=["Full"],
534         crates=LLVM_BOLT_CRATES
535     )
536
537     merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
538     profile_files_path = Path("/tmp/prof.fdata")
539     LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
540
541     profile_files = sorted(glob.glob(f"{profile_files_path}*"))
542     cmd([
543         "merge-fdata",
544         *profile_files,
545     ], output_path=merged_profile_path)
546
547     LOGGER.info("LLVM BOLT statistics")
548     LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
549     LOGGER.info(
550         f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
551     LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
552
553
554 def clear_llvm_files(pipeline: Pipeline):
555     """
556     Rustbuild currently doesn't support rebuilding LLVM when PGO options
557     change (or any other llvm-related options); so just clear out the relevant
558     directories ourselves.
559     """
560     LOGGER.info("Clearing LLVM build files")
561     delete_directory(pipeline.build_artifacts() / "llvm")
562     delete_directory(pipeline.build_artifacts() / "lld")
563
564
565 def print_binary_sizes(pipeline: Pipeline):
566     bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
567     binaries = get_files(bin_dir)
568
569     lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
570     libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
571
572     paths = sorted(binaries) + sorted(libraries)
573     with StringIO() as output:
574         for path in paths:
575             path_str = f"{path.name}:"
576             print(f"{path_str:<30}{format_bytes(path.stat().st_size):>14}", file=output)
577         LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
578
579
580 def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]):
581     # Clear and prepare tmp directory
582     shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
583     os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
584
585     pipeline.build_rustc_perf()
586
587     # Stage 1: Build rustc + PGO instrumented LLVM
588     with timer.stage("Build rustc (LLVM PGO)"):
589         build_rustc(pipeline, args=[
590             "--llvm-profile-generate"
591         ], env=dict(
592             LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
593         ))
594
595     with timer.stage("Gather profiles (LLVM PGO)"):
596         gather_llvm_profiles(pipeline)
597
598     clear_llvm_files(pipeline)
599     final_build_args += [
600         "--llvm-profile-use",
601         pipeline.llvm_profile_merged_file()
602     ]
603
604     # Stage 2: Build PGO instrumented rustc + LLVM
605     with timer.stage("Build rustc (rustc PGO)"):
606         build_rustc(pipeline, args=[
607             "--rust-profile-generate",
608             pipeline.rustc_profile_dir_root()
609         ])
610
611     with timer.stage("Gather profiles (rustc PGO)"):
612         gather_rustc_profiles(pipeline)
613
614     clear_llvm_files(pipeline)
615     final_build_args += [
616         "--rust-profile-use",
617         pipeline.rustc_profile_merged_file()
618     ]
619
620     # Stage 3: Build rustc + BOLT instrumented LLVM
621     if pipeline.supports_bolt():
622         with timer.stage("Build rustc (LLVM BOLT)"):
623             build_rustc(pipeline, args=[
624                 "--llvm-profile-use",
625                 pipeline.llvm_profile_merged_file(),
626                 "--llvm-bolt-profile-generate",
627             ])
628         with timer.stage("Gather profiles (LLVM BOLT)"):
629             gather_llvm_bolt_profiles(pipeline)
630
631         clear_llvm_files(pipeline)
632         final_build_args += [
633             "--llvm-bolt-profile-use",
634             pipeline.llvm_bolt_profile_merged_file()
635         ]
636
637     # Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
638     with timer.stage("Final build"):
639         cmd(final_build_args)
640
641
642 if __name__ == "__main__":
643     logging.basicConfig(
644         level=logging.DEBUG,
645         format="%(name)s %(levelname)-4s: %(message)s",
646     )
647
648     LOGGER.info(f"Running multi-stage build using Python {sys.version}")
649     LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
650
651     build_args = sys.argv[1:]
652
653     timer = Timer()
654     pipeline = create_pipeline()
655     try:
656         execute_build_pipeline(timer, pipeline, build_args)
657     except BaseException as e:
658         LOGGER.error("The multi-stage build has failed")
659         raise e
660     finally:
661         timer.print_stats()
662
663     print_binary_sizes(pipeline)