aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosé Fonseca <[email protected]>2013-04-17 13:32:15 +0100
committerJosé Fonseca <[email protected]>2013-04-17 16:50:52 +0100
commitb8f6858fcb762b47ca2ad30efd286bd203042f17 (patch)
tree366c92461157a380530f4de96a5dca5fe3c09b61
parent35ef27d485367ffede9f478c9865515814e119b7 (diff)
gallivm: JIT symbol resolution with linux perf.
Details on docs/llvmpipe.html Reviewed-by: Brian Paul <[email protected]> Reviewed-by: Roland Scheidegger <[email protected]>
-rwxr-xr-xbin/perf-annotate-jit251
-rw-r--r--configure.ac7
-rw-r--r--docs/llvmpipe.html40
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.cpp117
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.h3
8 files changed, 372 insertions, 86 deletions
diff --git a/bin/perf-annotate-jit b/bin/perf-annotate-jit
new file mode 100755
index 00000000000..746434008fd
--- /dev/null
+++ b/bin/perf-annotate-jit
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 VMware Inc
+# Copyright 2008-2009 Jose Fonseca
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+"""Perf annotate for JIT code.
+
+Linux `perf annotate` does not work with JIT code. This script takes the data
+produced by `perf script` command, plus the diassemblies outputed by gallivm
+into /tmp/perf-XXXXX.map.asm and produces output similar to `perf annotate`.
+
+See docs/llvmpipe.html for usage instructions.
+
+The `perf script` output parser was derived from the gprof2dot.py script.
+"""
+
+
+import sys
+import os.path
+import re
+import optparse
+import subprocess
+
+
+class Parser:
+ """Parser interface."""
+
+ def __init__(self):
+ pass
+
+ def parse(self):
+ raise NotImplementedError
+
+
+class LineParser(Parser):
+ """Base class for parsers that read line-based formats."""
+
+ def __init__(self, file):
+ Parser.__init__(self)
+ self._file = file
+ self.__line = None
+ self.__eof = False
+ self.line_no = 0
+
+ def readline(self):
+ line = self._file.readline()
+ if not line:
+ self.__line = ''
+ self.__eof = True
+ else:
+ self.line_no += 1
+ self.__line = line.rstrip('\r\n')
+
+ def lookahead(self):
+ assert self.__line is not None
+ return self.__line
+
+ def consume(self):
+ assert self.__line is not None
+ line = self.__line
+ self.readline()
+ return line
+
+ def eof(self):
+ assert self.__line is not None
+ return self.__eof
+
+
+mapFile = None
+
+def lookupMap(filename, matchSymbol):
+ global mapFile
+ mapFile = filename
+ stream = open(filename, 'rt')
+ for line in stream:
+ start, length, symbol = line.split()
+
+ start = int(start, 16)
+ length = int(length,16)
+
+ if symbol == matchSymbol:
+ return start
+
+ return None
+
+def lookupAsm(filename, desiredFunction):
+ stream = open(filename + '.asm', 'rt')
+ while stream.readline() != desiredFunction + ':\n':
+ pass
+
+ asm = []
+ line = stream.readline().strip()
+ while line:
+ addr, instr = line.split(':', 1)
+ addr = int(addr)
+ asm.append((addr, instr))
+ line = stream.readline().strip()
+
+ return asm
+
+
+
+samples = {}
+
+
+class PerfParser(LineParser):
+ """Parser for linux perf callgraph output.
+
+ It expects output generated with
+
+ perf record -g
+ perf script
+ """
+
+ def __init__(self, infile, symbol):
+ LineParser.__init__(self, infile)
+ self.symbol = symbol
+
+ def readline(self):
+ # Override LineParser.readline to ignore comment lines
+ while True:
+ LineParser.readline(self)
+ if self.eof() or not self.lookahead().startswith('#'):
+ break
+
+ def parse(self):
+ # read lookahead
+ self.readline()
+
+ while not self.eof():
+ self.parse_event()
+
+ asm = lookupAsm(mapFile, self.symbol)
+
+ addresses = samples.keys()
+ addresses.sort()
+ total_samples = 0
+
+ sys.stdout.write('%s:\n' % self.symbol)
+ for address, instr in asm:
+ try:
+ sample = samples.pop(address)
+ except KeyError:
+ sys.stdout.write(6*' ')
+ else:
+ sys.stdout.write('%6u' % (sample))
+ total_samples += sample
+ sys.stdout.write('%6u: %s\n' % (address, instr))
+ print 'total:', total_samples
+ assert len(samples) == 0
+
+ sys.exit(0)
+
+ def parse_event(self):
+ if self.eof():
+ return
+
+ line = self.consume()
+ assert line
+
+ callchain = self.parse_callchain()
+ if not callchain:
+ return
+
+ def parse_callchain(self):
+ callchain = []
+ while self.lookahead():
+ function = self.parse_call(len(callchain) == 0)
+ if function is None:
+ break
+ callchain.append(function)
+ if self.lookahead() == '':
+ self.consume()
+ return callchain
+
+ call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
+
+ def parse_call(self, first):
+ line = self.consume()
+ mo = self.call_re.match(line)
+ assert mo
+ if not mo:
+ return None
+
+ if not first:
+ return None
+
+ function_name = mo.group('symbol')
+ if not function_name:
+ function_name = mo.group('address')
+
+ module = mo.group('module')
+
+ function_id = function_name + ':' + module
+
+ address = mo.group('address')
+ address = int(address, 16)
+
+ if function_name != self.symbol:
+ return None
+
+ start_address = lookupMap(module, function_name)
+ address -= start_address
+
+ #print function_name, module, address
+
+ samples[address] = samples.get(address, 0) + 1
+
+ return True
+
+
+def main():
+ """Main program."""
+
+ optparser = optparse.OptionParser(
+ usage="\n\t%prog [options] symbol_name")
+ (options, args) = optparser.parse_args(sys.argv[1:])
+ if len(args) != 1:
+ optparser.error('wrong number of arguments')
+
+ symbol = args[0]
+
+ p = subprocess.Popen(['perf', 'script'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ parser = PerfParser(p.stdout, symbol)
+ parser.parse()
+
+
+if __name__ == '__main__':
+ main()
+
+
+# vim: set sw=4 et:
diff --git a/configure.ac b/configure.ac
index fb1f324d66f..ba922580e84 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1601,13 +1601,6 @@ if test "x$enable_gallium_llvm" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
fi
- if test "x$enable_debug" = xyes; then
- # Debug builds require OProfileJIT if LLVM was built with support for it
- if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
- LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
- fi
- fi
-
if test "x$enable_opencl" = xyes; then
LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
# LLVM 3.3 >= 177971 requires IRReader
diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html
index be0308321a7..80f8a017665 100644
--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -130,38 +130,38 @@ need to ask, don't even try it.
<h1>Profiling</h1>
-To profile llvmpipe you should pass the options
-
+<p>
+To profile llvmpipe you should build as
+</p>
<pre>
scons build=profile &lt;same-as-before&gt;
</pre>
+<p>
This will ensure that frame pointers are used both in C and JIT functions, and
that no tail call optimizations are done by gcc.
+</p>
-To better profile JIT code you'll need to build LLVM with oprofile integration.
-
-<pre>
- ./configure \
- --prefix=$install_dir \
- --enable-optimized \
- --disable-profiling \
- --enable-targets=host-only \
- --with-oprofile
-
- make -C "$build_dir"
- make -C "$build_dir" install
-
- find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug
-</pre>
+<h2>Linux perf integration</h2>
-The you should define
+<p>
+On Linux, it is possible to have symbol resolution of JIT code with <a href="http://perf.wiki.kernel.org/">Linux perf</a>:
+</p>
<pre>
- export LLVM=/path/to/llvm-2.6-profile
+ perf record -g /my/application
+ perf report
</pre>
-and rebuild.
+<p>
+When run inside Linux perf, llvmpipe will create a /tmp/perf-XXXXX.map file with
+symbol address table. It also dumps assembly code to /tmp/perf-XXXXX.map.asm,
+which can be used by the bin/perf-annotate-jit script to produce disassembly of
+the generated code annotated with the samples.
+</p>
+
+<p>You can obtain a call graph via
+<a href="http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#linux_perf">Gprof2Dot</a>.</p>
<h1>Unit testing</h1>
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index af50fcc1425..ac8e10bbd5c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -31,6 +31,7 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/Target/TargetInstrInfo.h>
#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/Format.h>
#include <llvm/Support/MemoryObject.h>
#if HAVE_LLVM >= 0x0300
@@ -60,6 +61,11 @@
#include "lp_bld_debug.h"
+#ifdef __linux__
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
/**
@@ -174,8 +180,8 @@ public:
* - http://blog.llvm.org/2010/01/x86-disassembler.html
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
-extern "C" void
-lp_disassemble(const void* func)
+static size_t
+disassemble(const void* func, llvm::raw_ostream & Out)
{
#if HAVE_LLVM >= 0x0207
using namespace llvm;
@@ -209,8 +215,8 @@ lp_disassemble(const void* func)
#endif
if (!AsmInfo) {
- debug_printf("error: no assembly info for target %s\n", Triple.c_str());
- return;
+ Out << "error: no assembly info for target " << Triple << "\n";
+ return 0;
}
#if HAVE_LLVM >= 0x0300
@@ -220,12 +226,10 @@ lp_disassemble(const void* func)
OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
#endif
if (!DisAsm) {
- debug_printf("error: no disassembler for target %s\n", Triple.c_str());
- return;
+ Out << "error: no disassembler for target " << Triple << "\n";
+ return 0;
}
- raw_debug_ostream Out;
-
#if HAVE_LLVM >= 0x0300
unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
#else
@@ -235,14 +239,14 @@ lp_disassemble(const void* func)
#if HAVE_LLVM >= 0x0301
OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
if (!MRI) {
- debug_printf("error: no register info for target %s\n", Triple.c_str());
- return;
+ Out << "error: no register info for target " << Triple.c_str() << "\n";
+ return 0;
}
OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
if (!MII) {
- debug_printf("error: no instruction info for target %s\n", Triple.c_str());
- return;
+ Out << "error: no instruction info for target " << Triple.c_str() << "\n";
+ return 0;
}
#endif
@@ -260,8 +264,8 @@ lp_disassemble(const void* func)
T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
#endif
if (!Printer) {
- debug_printf("error: no instruction printer for target %s\n", Triple.c_str());
- return;
+ Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
+ return 0;
}
#if HAVE_LLVM >= 0x0301
@@ -300,7 +304,7 @@ lp_disassemble(const void* func)
* so that between runs.
*/
- debug_printf("%6lu:\t", (unsigned long)pc);
+ Out << llvm::format("%6lu:\t", (unsigned long)pc);
if (!DisAsm->getInstruction(Inst, Size, memoryObject,
pc,
@@ -309,7 +313,7 @@ lp_disassemble(const void* func)
#else
nulls())) {
#endif
- debug_printf("invalid\n");
+ Out << "invalid";
pc += 1;
}
@@ -320,25 +324,23 @@ lp_disassemble(const void* func)
if (0) {
unsigned i;
for (i = 0; i < Size; ++i) {
- debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]);
+ Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
}
for (; i < 16; ++i) {
- debug_printf(" ");
+ Out << " ";
}
}
/*
* Print the instruction.
*/
-
#if HAVE_LLVM >= 0x0300
- Printer->printInst(&Inst, Out, "");
+ Printer->printInst(&Inst, Out, "");
#elif HAVE_LLVM >= 0x208
- Printer->printInst(&Inst, Out);
+ Printer->printInst(&Inst, Out);
#else
- Printer->printInst(&Inst);
+ Printer->printInst(&Inst);
#endif
- Out.flush();
/*
* Advance.
@@ -386,7 +388,7 @@ lp_disassemble(const void* func)
* Output the address relative to the function start, given
* that MC will print the addresses relative the current pc.
*/
- debug_printf("\t\t; %lu", (unsigned long)jump);
+ Out << "\t\t; " << jump;
/*
* Ignore far jumps given it could be actually a tail return to
@@ -401,7 +403,7 @@ lp_disassemble(const void* func)
}
}
- debug_printf("\n");
+ Out << "\n";
/*
* Stop disassembling on return statements, if there is no record of a
@@ -420,12 +422,73 @@ lp_disassemble(const void* func)
*/
if (0) {
- debug_printf("disassemble %p %p\n", bytes, bytes + pc);
+ _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
}
- debug_printf("\n");
+ Out << "\n";
+ Out.flush();
+
+ return pc;
#else /* HAVE_LLVM < 0x0207 */
(void)func;
+ return 0;
#endif /* HAVE_LLVM < 0x0207 */
}
+
+extern "C" void
+lp_disassemble(LLVMValueRef func, const void *code) {
+ raw_debug_ostream Out;
+ disassemble(code, Out);
+}
+
+
+/*
+ * Linux perf profiler integration.
+ *
+ * See also:
+ * - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html
+ * - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc
+ * - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d
+ */
+extern "C" void
+lp_profile(LLVMValueRef func, const void *code)
+{
+#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
+ static boolean first_time = TRUE;
+ static FILE *perf_map_file = NULL;
+ static int perf_asm_fd = -1;
+ if (first_time) {
+ /*
+ * We rely on the disassembler for determining a function's size, but
+ * the disassembly is a leaky and slow operation, so avoid running
+ * this except when running inside linux perf, which can be inferred
+ * by the PERF_BUILDID_DIR environment variable.
+ */
+ if (getenv("PERF_BUILDID_DIR")) {
+ pid_t pid = getpid();
+ char filename[256];
+ util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
+ perf_map_file = fopen(filename, "wt");
+ util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
+ mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+ perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
+ }
+ first_time = FALSE;
+ }
+ if (perf_map_file) {
+ const char *symbol = LLVMGetValueName(func);
+ unsigned long addr = (uintptr_t)code;
+ llvm::raw_fd_ostream Out(perf_asm_fd, false);
+ Out << symbol << ":\n";
+ unsigned long size = disassemble(code, Out);
+ fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
+ fflush(perf_map_file);
+ }
+#else
+ (void)func;
+ (void)code;
+#endif
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index da873f30b2d..ab83d98feed 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -83,7 +83,11 @@ lp_check_alignment(const void *ptr, unsigned alignment);
void
-lp_disassemble(const void* func);
+lp_disassemble(LLVMValueRef func, const void *code);
+
+
+void
+lp_profile(LLVMValueRef func, const void *code);
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 4fa5887e878..1153411dd52 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -273,10 +273,6 @@ init_gallivm_engine(struct gallivm_state *gallivm)
LLVMDisposeMessage(error);
goto fail;
}
-
-#if defined(DEBUG) || defined(PROFILE)
- lp_register_oprofile_jit_event_listener(gallivm->engine);
-#endif
}
LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new
@@ -635,6 +631,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
}
+
func_pointer
gallivm_jit_function(struct gallivm_state *gallivm,
LLVMValueRef func)
@@ -650,9 +647,13 @@ gallivm_jit_function(struct gallivm_state *gallivm,
jit_func = pointer_to_func(code);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
- lp_disassemble(code);
+ lp_disassemble(func, code);
}
+#if defined(PROFILE)
+ lp_profile(func, code);
+#endif
+
/* Free the function body to save memory */
lp_func_delete_body(func);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 46cdbad2683..c51279556b6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -54,7 +54,6 @@
#include <llvm-c/ExecutionEngine.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
#if HAVE_LLVM >= 0x0301
#include <llvm/ADT/Triple.h>
#include <llvm/ExecutionEngine/JITMemoryManager.h>
@@ -75,28 +74,6 @@
#include "lp_bld_misc.h"
-/**
- * Register the engine with oprofile.
- *
- * This allows to see the LLVM IR function names in oprofile output.
- *
- * To actually work LLVM needs to be built with the --with-oprofile configure
- * option.
- *
- * Also a oprofile:oprofile user:group is necessary. Which is not created by
- * default on some distributions.
- */
-extern "C" void
-lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE)
-{
-#if HAVE_LLVM >= 0x0301
- llvm::unwrap(EE)->RegisterJITEventListener(llvm::JITEventListener::createOProfileJITEventListener());
-#else
- llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener());
-#endif
-}
-
-
extern "C" void
lp_set_target_options(void)
{
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
index 9ed7c348bb4..1f735fbcde6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,9 +41,6 @@ extern "C" {
extern void
-lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE);
-
-extern void
lp_set_target_options(void);