[BOLT][Instrumentation] Keep profile open in WatchProcess

When a binary is instrumented with --instrumentation-sleep-time and
instrumentation-wait-forks options and lauched, the profile is
periodically written until all the forks die. The problem is that we
cannot wait for the whole process tree, and we have no way to tell when
it's safe to read the profile. Hovewer, if we keep profile open
throughout the life of the process tree, we can use fuser to determine
when writing is finished.

Reviewed By: rafauler

Differential Revision: https://reviews.llvm.org/D154436
This commit is contained in:
Denis Revunov 2023-07-04 12:25:28 +03:00 committed by Denis Revunov
parent 66c623bfc6
commit a799298152
2 changed files with 25 additions and 9 deletions

View File

@ -458,6 +458,16 @@ uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
return ret;
}
int __ftruncate(uint64_t fd, uint64_t length) {
int ret;
__asm__ __volatile__("movq $77, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(length)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __close(uint64_t fd) {
uint64_t ret;
__asm__ __volatile__("movq $3, %%rax\n"

View File

@ -1515,11 +1515,15 @@ extern "C" void __bolt_instr_clear_counters() {
/// on demand.
///
extern "C" void __attribute((force_align_arg_pointer))
__bolt_instr_data_dump() {
__bolt_instr_data_dump(int FD) {
// Already dumping
if (!GlobalWriteProfileMutex->acquire())
return;
int ret = __lseek(FD, 0, SEEK_SET);
assert(ret == 0, "Failed to lseek!");
ret = __ftruncate(FD, 0);
assert(ret == 0, "Failed to ftruncate!");
BumpPtrAllocator HashAlloc;
HashAlloc.setMaxSize(0x6400000);
ProfileWriterContext Ctx = readDescriptions();
@ -1527,8 +1531,6 @@ __bolt_instr_data_dump() {
DEBUG(printStats(Ctx));
int FD = openProfile();
BumpPtrAllocator Alloc;
Alloc.setMaxSize(0x6400000);
const uint8_t *FuncDesc = Ctx.FuncDescriptions;
@ -1544,7 +1546,6 @@ __bolt_instr_data_dump() {
Ctx.CallFlowTable->forEachElement(visitCallFlowEntry, FD, &Ctx);
__fsync(FD);
__close(FD);
__munmap(Ctx.MMapPtr, Ctx.MMapSize);
__close(Ctx.FileDesc);
HashAlloc.destroy();
@ -1557,6 +1558,7 @@ __bolt_instr_data_dump() {
void watchProcess() {
timespec ts, rem;
uint64_t Ellapsed = 0ull;
int FD = openProfile();
uint64_t ppid;
if (__bolt_instr_wait_forks) {
// Store parent pgid
@ -1568,7 +1570,7 @@ void watchProcess() {
ppid = __getppid();
if (ppid == 1) {
// Parent already dead
__bolt_instr_data_dump();
__bolt_instr_data_dump(FD);
goto out;
}
}
@ -1581,7 +1583,7 @@ void watchProcess() {
// so no need for us to keep dumping.
if (__kill(ppid, 0) < 0) {
if (__bolt_instr_no_counters_clear)
__bolt_instr_data_dump();
__bolt_instr_data_dump(FD);
break;
}
@ -1589,13 +1591,14 @@ void watchProcess() {
continue;
Ellapsed = 0;
__bolt_instr_data_dump();
__bolt_instr_data_dump(FD);
if (__bolt_instr_no_counters_clear == false)
__bolt_instr_clear_counters();
}
out:;
DEBUG(report("My parent process is dead, bye!\n"));
__close(FD);
__exit(0);
}
@ -1691,8 +1694,11 @@ extern "C" __attribute((naked)) void __bolt_instr_start()
/// This is hooking into ELF's DT_FINI
extern "C" void __bolt_instr_fini() {
__bolt_fini_trampoline();
if (__bolt_instr_sleep_time == 0)
__bolt_instr_data_dump();
if (__bolt_instr_sleep_time == 0) {
int FD = openProfile();
__bolt_instr_data_dump(FD);
__close(FD);
}
DEBUG(report("Finished.\n"));
}