2 # ignore-tidy-linelength
4 # Compatible with Python 3.6+
18 from collections import OrderedDict
19 from io import StringIO
20 from pathlib import Path
21 from typing import Callable, Dict, Iterable, List, Optional, Union
23 PGO_HOST = os.environ["PGO_HOST"]
25 LOGGER = logging.getLogger("stage-build")
41 "token-stream-stress",
48 LLVM_BOLT_CRATES = LLVM_PGO_CRATES
53 def checkout_path(self) -> Path:
55 The root checkout, where the source is located.
57 raise NotImplementedError
59 def downloaded_llvm_dir(self) -> Path:
61 Directory where the host LLVM is located.
63 raise NotImplementedError
65 def build_root(self) -> Path:
67 The main directory where the build occurs.
69 raise NotImplementedError
71 def build_artifacts(self) -> Path:
72 return self.build_root() / "build" / PGO_HOST
74 def rustc_stage_0(self) -> Path:
75 return self.build_artifacts() / "stage0" / "bin" / "rustc"
77 def cargo_stage_0(self) -> Path:
78 return self.build_artifacts() / "stage0" / "bin" / "cargo"
80 def rustc_stage_2(self) -> Path:
81 return self.build_artifacts() / "stage2" / "bin" / "rustc"
83 def opt_artifacts(self) -> Path:
84 raise NotImplementedError
86 def llvm_profile_dir_root(self) -> Path:
87 return self.opt_artifacts() / "llvm-pgo"
89 def llvm_profile_merged_file(self) -> Path:
90 return self.opt_artifacts() / "llvm-pgo.profdata"
92 def rustc_perf_dir(self) -> Path:
93 return self.opt_artifacts() / "rustc-perf"
95 def build_rustc_perf(self):
96 raise NotImplementedError()
98 def rustc_profile_dir_root(self) -> Path:
99 return self.opt_artifacts() / "rustc-pgo"
101 def rustc_profile_merged_file(self) -> Path:
102 return self.opt_artifacts() / "rustc-pgo.profdata"
104 def rustc_profile_template_path(self) -> Path:
106 The profile data is written into a single filepath that is being repeatedly merged when each
107 rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
108 why we override the profile path to include the PID. This will produce many more profiling
109 files, but the resulting profile will produce a slightly faster rustc binary.
111 return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
113 def supports_bolt(self) -> bool:
114 raise NotImplementedError
116 def llvm_bolt_profile_merged_file(self) -> Path:
117 return self.opt_artifacts() / "bolt.profdata"
120 class LinuxPipeline(Pipeline):
121 def checkout_path(self) -> Path:
122 return Path("/checkout")
124 def downloaded_llvm_dir(self) -> Path:
125 return Path("/rustroot")
127 def build_root(self) -> Path:
128 return self.checkout_path() / "obj"
130 def opt_artifacts(self) -> Path:
131 return Path("/tmp/tmp-multistage/opt-artifacts")
133 def build_rustc_perf(self):
134 # /tmp/rustc-perf comes from the Dockerfile
135 shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
136 cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
138 with change_cwd(self.rustc_perf_dir()):
139 cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
140 RUSTC=str(self.rustc_stage_0()),
144 def supports_bolt(self) -> bool:
148 class WindowsPipeline(Pipeline):
150 self.checkout_dir = Path(os.getcwd())
152 def checkout_path(self) -> Path:
153 return self.checkout_dir
155 def downloaded_llvm_dir(self) -> Path:
156 return self.checkout_path() / "citools" / "clang-rust"
158 def build_root(self) -> Path:
159 return self.checkout_path()
161 def opt_artifacts(self) -> Path:
162 return self.checkout_path() / "opt-artifacts"
164 def rustc_stage_0(self) -> Path:
165 return super().rustc_stage_0().with_suffix(".exe")
167 def cargo_stage_0(self) -> Path:
168 return super().cargo_stage_0().with_suffix(".exe")
170 def rustc_stage_2(self) -> Path:
171 return super().rustc_stage_2().with_suffix(".exe")
173 def build_rustc_perf(self):
174 # rustc-perf version from 2022-07-22
175 perf_commit = "3c253134664fdcba862c539d37f0de18557a9a4c"
176 rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
178 def download_rustc_perf():
180 f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
183 with change_cwd(self.opt_artifacts()):
184 unpack_archive(rustc_perf_zip_path)
185 move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
186 delete_file(rustc_perf_zip_path)
188 retry_action(download_rustc_perf, "Download rustc-perf")
190 with change_cwd(self.rustc_perf_dir()):
191 cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
192 RUSTC=str(self.rustc_stage_0()),
196 def rustc_profile_template_path(self) -> Path:
198 On Windows, we don't have enough space to use separate files for each rustc invocation.
199 Therefore, we use a single file for the generated profiles.
201 return self.rustc_profile_dir_root() / "default_%m.profraw"
203 def supports_bolt(self) -> bool:
209 # We want this dictionary to be ordered by insertion.
210 # We use `OrderedDict` for compatibility with older Python versions.
211 self.stages = OrderedDict()
213 @contextlib.contextmanager
214 def stage(self, name: str):
215 assert name not in self.stages
220 LOGGER.info(f"Stage `{name}` starts")
222 except BaseException as exception:
227 duration = end - start
228 self.stages[name] = duration
230 LOGGER.info(f"Stage `{name}` ended: OK ({duration:.2f}s)")
232 LOGGER.info(f"Stage `{name}` ended: FAIL ({duration:.2f}s)")
234 def print_stats(self):
235 total_duration = sum(self.stages.values())
237 # 57 is the width of the whole table
240 with StringIO() as output:
241 print(divider, file=output)
242 for (name, duration) in self.stages.items():
243 pct = (duration / total_duration) * 100
244 name_str = f"{name}:"
245 print(f"{name_str:<34} {duration:>12.2f}s ({pct:>5.2f}%)", file=output)
247 total_duration_label = "Total duration:"
248 print(f"{total_duration_label:<34} {total_duration:>12.2f}s", file=output)
249 print(divider, file=output, end="")
250 LOGGER.info(f"Timer results\n{output.getvalue()}")
253 @contextlib.contextmanager
254 def change_cwd(dir: Path):
256 Temporarily change working directory to `dir`.
259 LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
264 LOGGER.debug(f"Reverting working dir to `{cwd}`")
268 def move_path(src: Path, dst: Path):
269 LOGGER.info(f"Moving `{src}` to `{dst}`")
270 shutil.move(src, dst)
273 def delete_file(path: Path):
274 LOGGER.info(f"Deleting file `{path}`")
278 def delete_directory(path: Path):
279 LOGGER.info(f"Deleting directory `{path}`")
283 def unpack_archive(archive: Path):
284 LOGGER.info(f"Unpacking archive `{archive}`")
285 shutil.unpack_archive(archive)
288 def download_file(src: str, target: Path):
289 LOGGER.info(f"Downloading `{src}` into `{target}`")
290 urllib.request.urlretrieve(src, str(target))
293 def retry_action(action, name: str, max_fails: int = 5):
294 LOGGER.info(f"Attempting to perform action `{name}` with retry")
295 for iteration in range(max_fails):
296 LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
301 LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
303 raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
307 args: List[Union[str, Path]],
308 env: Optional[Dict[str, str]] = None,
309 output_path: Optional[Path] = None
311 args = [str(arg) for arg in args]
313 environment = os.environ.copy()
317 environment.update(env)
318 cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
320 cmd_str += " ".join(args)
321 if output_path is not None:
322 cmd_str += f" > {output_path}"
323 LOGGER.info(f"Executing `{cmd_str}`")
325 if output_path is not None:
326 with open(output_path, "w") as f:
327 return subprocess.run(
333 return subprocess.run(args, env=environment, check=True)
336 def run_compiler_benchmarks(
339 scenarios: List[str],
341 env: Optional[Dict[str, str]] = None
343 env = env if env is not None else {}
345 # Compile libcore, both in opt-level=0 and opt-level=3
346 with change_cwd(pipeline.build_root()):
348 pipeline.rustc_stage_2(),
350 "--crate-type", "lib",
351 str(pipeline.checkout_path() / "library/core/src/lib.rs"),
352 "--out-dir", pipeline.opt_artifacts()
353 ], env=dict(RUSTC_BOOTSTRAP="1", **env))
356 pipeline.rustc_stage_2(),
358 "--crate-type", "lib",
360 str(pipeline.checkout_path() / "library/core/src/lib.rs"),
361 "--out-dir", pipeline.opt_artifacts()
362 ], env=dict(RUSTC_BOOTSTRAP="1", **env))
364 # Run rustc-perf benchmarks
365 # Benchmark using profile_local with eprintln, which essentially just means
366 # don't actually benchmark -- just make sure we run rustc a bunch of times.
367 with change_cwd(pipeline.rustc_perf_dir()):
369 pipeline.cargo_stage_0(),
371 "-p", "collector", "--bin", "collector", "--",
372 "profile_local", "eprintln",
373 pipeline.rustc_stage_2(),
375 "--cargo", pipeline.cargo_stage_0(),
376 "--profiles", ",".join(profiles),
377 "--scenarios", ",".join(scenarios),
378 "--include", ",".join(crates)
380 RUST_LOG="collector=debug",
381 RUSTC=str(pipeline.rustc_stage_0()),
387 # https://stackoverflow.com/a/31631711/1107768
388 def format_bytes(size: int) -> str:
389 """Return the given bytes as a human friendly KiB, MiB or GiB string."""
391 MB = KB ** 2 # 1,048,576
392 GB = KB ** 3 # 1,073,741,824
393 TB = KB ** 4 # 1,099,511,627,776
397 elif KB <= size < MB:
398 return f"{size / KB:.2f} KiB"
399 elif MB <= size < GB:
400 return f"{size / MB:.2f} MiB"
401 elif GB <= size < TB:
402 return f"{size / GB:.2f} GiB"
407 # https://stackoverflow.com/a/63307131/1107768
408 def count_files(path: Path) -> int:
409 return sum(1 for p in path.rglob("*") if p.is_file())
412 def count_files_with_prefix(path: Path) -> int:
413 return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
416 # https://stackoverflow.com/a/55659577/1107768
417 def get_path_size(path: Path) -> int:
419 return sum(p.stat().st_size for p in path.rglob("*"))
420 return path.stat().st_size
423 def get_path_prefix_size(path: Path) -> int:
425 Get size of all files beginning with the prefix `path`.
426 Alternative to shell `du -sh <path>*`.
428 return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
431 def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
432 for file in os.listdir(directory):
433 path = directory / file
434 if filter is None or filter(path):
441 env: Optional[Dict[str, str]] = None
445 pipeline.checkout_path() / "x.py",
447 "--target", PGO_HOST,
452 cmd(arguments, env=env)
455 def create_pipeline() -> Pipeline:
456 if sys.platform == "linux":
457 return LinuxPipeline()
458 elif sys.platform in ("cygwin", "win32"):
459 return WindowsPipeline()
461 raise Exception(f"Optimized build is not supported for platform {sys.platform}")
464 def gather_llvm_profiles(pipeline: Pipeline):
465 LOGGER.info("Running benchmarks with PGO instrumented LLVM")
466 run_compiler_benchmarks(
468 profiles=["Debug", "Opt"],
470 crates=LLVM_PGO_CRATES
473 profile_path = pipeline.llvm_profile_merged_file()
474 LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
476 pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
479 pipeline.llvm_profile_dir_root()
482 LOGGER.info("LLVM PGO statistics")
483 LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
485 f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
486 LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
488 # We don't need the individual .profraw files now that they have been merged
489 # into a final .profdata
490 delete_directory(pipeline.llvm_profile_dir_root())
493 def gather_rustc_profiles(pipeline: Pipeline):
494 LOGGER.info("Running benchmarks with PGO instrumented rustc")
496 # Here we're profiling the `rustc` frontend, so we also include `Check`.
497 # The benchmark set includes various stress tests that put the frontend under pressure.
498 run_compiler_benchmarks(
500 profiles=["Check", "Debug", "Opt"],
502 crates=RUSTC_PGO_CRATES,
504 LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
508 profile_path = pipeline.rustc_profile_merged_file()
509 LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
511 pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
514 pipeline.rustc_profile_dir_root()
517 LOGGER.info("Rustc PGO statistics")
518 LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
520 f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
521 LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
523 # We don't need the individual .profraw files now that they have been merged
524 # into a final .profdata
525 delete_directory(pipeline.rustc_profile_dir_root())
528 def gather_llvm_bolt_profiles(pipeline: Pipeline):
529 LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
530 run_compiler_benchmarks(
532 profiles=["Check", "Debug", "Opt"],
534 crates=LLVM_BOLT_CRATES
537 merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
538 profile_files_path = Path("/tmp/prof.fdata")
539 LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
541 profile_files = sorted(glob.glob(f"{profile_files_path}*"))
545 ], output_path=merged_profile_path)
547 LOGGER.info("LLVM BOLT statistics")
548 LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
550 f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
551 LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
554 def clear_llvm_files(pipeline: Pipeline):
556 Rustbuild currently doesn't support rebuilding LLVM when PGO options
557 change (or any other llvm-related options); so just clear out the relevant
558 directories ourselves.
560 LOGGER.info("Clearing LLVM build files")
561 delete_directory(pipeline.build_artifacts() / "llvm")
562 delete_directory(pipeline.build_artifacts() / "lld")
565 def print_binary_sizes(pipeline: Pipeline):
566 bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
567 binaries = get_files(bin_dir)
569 lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
570 libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
572 paths = sorted(binaries) + sorted(libraries)
573 with StringIO() as output:
575 path_str = f"{path.name}:"
576 print(f"{path_str:<30}{format_bytes(path.stat().st_size):>14}", file=output)
577 LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
580 def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]):
581 # Clear and prepare tmp directory
582 shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
583 os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
585 pipeline.build_rustc_perf()
587 # Stage 1: Build rustc + PGO instrumented LLVM
588 with timer.stage("Build rustc (LLVM PGO)"):
589 build_rustc(pipeline, args=[
590 "--llvm-profile-generate"
592 LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
595 with timer.stage("Gather profiles (LLVM PGO)"):
596 gather_llvm_profiles(pipeline)
598 clear_llvm_files(pipeline)
599 final_build_args += [
600 "--llvm-profile-use",
601 pipeline.llvm_profile_merged_file()
604 # Stage 2: Build PGO instrumented rustc + LLVM
605 with timer.stage("Build rustc (rustc PGO)"):
606 build_rustc(pipeline, args=[
607 "--rust-profile-generate",
608 pipeline.rustc_profile_dir_root()
611 with timer.stage("Gather profiles (rustc PGO)"):
612 gather_rustc_profiles(pipeline)
614 clear_llvm_files(pipeline)
615 final_build_args += [
616 "--rust-profile-use",
617 pipeline.rustc_profile_merged_file()
620 # Stage 3: Build rustc + BOLT instrumented LLVM
621 if pipeline.supports_bolt():
622 with timer.stage("Build rustc (LLVM BOLT)"):
623 build_rustc(pipeline, args=[
624 "--llvm-profile-use",
625 pipeline.llvm_profile_merged_file(),
626 "--llvm-bolt-profile-generate",
628 with timer.stage("Gather profiles (LLVM BOLT)"):
629 gather_llvm_bolt_profiles(pipeline)
631 clear_llvm_files(pipeline)
632 final_build_args += [
633 "--llvm-bolt-profile-use",
634 pipeline.llvm_bolt_profile_merged_file()
637 # Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
638 with timer.stage("Final build"):
639 cmd(final_build_args)
642 if __name__ == "__main__":
645 format="%(name)s %(levelname)-4s: %(message)s",
648 LOGGER.info(f"Running multi-stage build using Python {sys.version}")
649 LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
651 build_args = sys.argv[1:]
654 pipeline = create_pipeline()
656 execute_build_pipeline(timer, pipeline, build_args)
657 except BaseException as e:
658 LOGGER.error("The multi-stage build has failed")
663 print_binary_sizes(pipeline)