1 files changed, 310 insertions, 0 deletions
diff --git a/js/src/perf/pm_linux.cpp b/js/src/perf/pm_linux.cpp
new file mode 100644
index 000000000..54ba0c363
--- /dev/null
+++ b/js/src/perf/pm_linux.cpp
@@ -0,0 +1,310 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This variant of nsIPerfMeasurement uses the perf_event interface
+ * added in Linux 2.6.31.  We key compilation of this file off the
+ * existence of <linux/perf_event.h>.
+ */
+
+#include <errno.h>
+#include <linux/perf_event.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "perf/jsperf.h"
+
+using namespace js;
+
+// As of July 2010, this system call has not been added to the
+// C library, so we have to provide our own wrapper function.
+// If this code runs on a kernel that does not implement the
+// system call (2.6.30 or older) nothing unpredictable will
+// happen - it will just always fail and return -1.
+static int
+sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
+                    int group_fd, unsigned long flags)
+{
+    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+namespace {
+
+using JS::PerfMeasurement;
+typedef PerfMeasurement::EventMask EventMask;
+
+// Additional state required by this implementation.
+struct Impl
+{
+    // Each active counter corresponds to an open file descriptor.
+    int f_cpu_cycles;
+    int f_instructions;
+    int f_cache_references;
+    int f_cache_misses;
+    int f_branch_instructions;
+    int f_branch_misses;
+    int f_bus_cycles;
+    int f_page_faults;
+    int f_major_page_faults;
+    int f_context_switches;
+    int f_cpu_migrations;
+
+    // Counter group leader, for Start and Stop.
+    int group_leader;
+
+    // Whether counters are running.
+    bool running;
+
+    Impl();
+    ~Impl();
+
+    EventMask init(EventMask toMeasure);
+    void start();
+    void stop(PerfMeasurement* counters);
+};
+
+// Mapping from our event bitmask to codes passed into the kernel, and
+// to fields in the PerfMeasurement and PerfMeasurement::impl structures.
+static const struct
+{
+    EventMask bit;
+    uint32_t type;
+    uint32_t config;
+    uint64_t PerfMeasurement::* counter;
+    int Impl::* fd;
+} kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
+#define HW(mask, constant, fieldname)                                   \
+    { PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
+      &PerfMeasurement::fieldname, &Impl::f_##fieldname }
+#define SW(mask, constant, fieldname)                                   \
+    { PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
+      &PerfMeasurement::fieldname, &Impl::f_##fieldname }
+
+    HW(CPU_CYCLES,          CPU_CYCLES,          cpu_cycles),
+    HW(INSTRUCTIONS,        INSTRUCTIONS,        instructions),
+    HW(CACHE_REFERENCES,    CACHE_REFERENCES,    cache_references),
+    HW(CACHE_MISSES,        CACHE_MISSES,        cache_misses),
+    HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
+    HW(BRANCH_MISSES,       BRANCH_MISSES,       branch_misses),
+    HW(BUS_CYCLES,          BUS_CYCLES,          bus_cycles),
+    SW(PAGE_FAULTS,         PAGE_FAULTS,         page_faults),
+    SW(MAJOR_PAGE_FAULTS,   PAGE_FAULTS_MAJ,     major_page_faults),
+    SW(CONTEXT_SWITCHES,    CONTEXT_SWITCHES,    context_switches),
+    SW(CPU_MIGRATIONS,      CPU_MIGRATIONS,      cpu_migrations),
+
+#undef HW
+#undef SW
+};
+
+Impl::Impl()
+  : f_cpu_cycles(-1),
+    f_instructions(-1),
+    f_cache_references(-1),
+    f_cache_misses(-1),
+    f_branch_instructions(-1),
+    f_branch_misses(-1),
+    f_bus_cycles(-1),
+    f_page_faults(-1),
+    f_major_page_faults(-1),
+    f_context_switches(-1),
+    f_cpu_migrations(-1),
+    group_leader(-1),
+    running(false)
+{
+}
+
+Impl::~Impl()
+{
+    // Close all active counter descriptors.  Take care to do the group
+    // leader last (this may not be necessary, but it's unclear what
+    // happens if you close the group leader out from under a group).
+    for (const auto& slot : kSlots) {
+        int fd = this->*(slot.fd);
+        if (fd != -1 && fd != group_leader)
+            close(fd);
+    }
+
+    if (group_leader != -1)
+        close(group_leader);
+}
+
+EventMask
+Impl::init(EventMask toMeasure)
+{
+    MOZ_ASSERT(group_leader == -1);
+    if (!toMeasure)
+        return EventMask(0);
+
+    EventMask measured = EventMask(0);
+    struct perf_event_attr attr;
+    for (const auto& slot : kSlots) {
+        if (!(toMeasure & slot.bit))
+            continue;
+
+        memset(&attr, 0, sizeof(attr));
+        attr.size = sizeof(attr);
+
+        // Set the type and config fields to indicate the counter we
+        // want to enable.  We want read format 0, and we're not using
+        // sampling, so leave those fields unset.
+        attr.type = slot.type;
+        attr.config = slot.config;
+
+        // If this will be the group leader it should start off
+        // disabled.  Otherwise it should start off enabled (but blocked
+        // on the group leader).
+        if (group_leader == -1)
+            attr.disabled = 1;
+
+        // The rest of the bit fields are really poorly documented.
+        // For instance, I have *no idea* whether we should be setting
+        // the inherit, inherit_stat, or task flags.  I'm pretty sure
+        // we do want to set mmap and comm, and not any of the ones I
+        // haven't mentioned.
+        attr.mmap = 1;
+        attr.comm = 1;
+
+        int fd = sys_perf_event_open(&attr,
+                                     0 /* trace self */,
+                                     -1 /* on any cpu */,
+                                     group_leader,
+                                     0 /* no flags presently defined */);
+        if (fd == -1)
+            continue;
+
+        measured = EventMask(measured | slot.bit);
+        this->*(slot.fd) = fd;
+        if (group_leader == -1)
+            group_leader = fd;
+    }
+    return measured;
+}
+
+void
+Impl::start()
+{
+    if (running || group_leader == -1)
+        return;
+
+    running = true;
+    ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+void
+Impl::stop(PerfMeasurement* counters)
+{
+    // This scratch buffer is to ensure that we have read all the
+    // available data, even if that's more than we expect.
+    unsigned char buf[1024];
+
+    if (!running || group_leader == -1)
+        return;
+
+    ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
+    running = false;
+
+    // read out and reset all the counter values
+    for (const auto& slot : kSlots) {
+        int fd = this->*(slot.fd);
+        if (fd == -1)
+            continue;
+
+        if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
+            uint64_t cur;
+            memcpy(&cur, buf, sizeof(uint64_t));
+            counters->*(slot.counter) += cur;
+        }
+
+        // Reset the counter regardless of whether the read did what
+        // we expected.
+        ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+    }
+}
+
+} // namespace
+
+
+namespace JS {
+
+#define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
+
+PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
+  : impl(js_new<Impl>()),
+    eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
+                   : EventMask(0)),
+    cpu_cycles(initCtr(CPU_CYCLES)),
+    instructions(initCtr(INSTRUCTIONS)),
+    cache_references(initCtr(CACHE_REFERENCES)),
+    cache_misses(initCtr(CACHE_MISSES)),
+    branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
+    branch_misses(initCtr(BRANCH_MISSES)),
+    bus_cycles(initCtr(BUS_CYCLES)),
+    page_faults(initCtr(PAGE_FAULTS)),
+    major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
+    context_switches(initCtr(CONTEXT_SWITCHES)),
+    cpu_migrations(initCtr(CPU_MIGRATIONS))
+{
+}
+
+#undef initCtr
+
+PerfMeasurement::~PerfMeasurement()
+{
+    js_delete(static_cast<Impl*>(impl));
+}
+
+void
+PerfMeasurement::start()
+{
+    if (impl)
+        static_cast<Impl*>(impl)->start();
+}
+
+void
+PerfMeasurement::stop()
+{
+    if (impl)
+        static_cast<Impl*>(impl)->stop(this);
+}
+
+void
+PerfMeasurement::reset()
+{
+    for (const auto& slot : kSlots) {
+        if (eventsMeasured & slot.bit)
+            this->*(slot.counter) = 0;
+        else
+            this->*(slot.counter) = -1;
+    }
+}
+
+bool
+PerfMeasurement::canMeasureSomething()
+{
+    // Find out if the kernel implements the performance measurement
+    // API.  If it doesn't, syscall(__NR_perf_event_open, ...) is
+    // guaranteed to return -1 and set errno to ENOSYS.
+    //
+    // We set up input parameters that should provoke an EINVAL error
+    // from a kernel that does implement perf_event_open, but we can't
+    // be sure it will (newer kernels might add more event types), so
+    // we have to take care to close any valid fd it might return.
+
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.size = sizeof(attr);
+    attr.type = PERF_TYPE_MAX;
+
+    int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+    if (fd >= 0) {
+        close(fd);
+        return true;
+    } else {
+        return errno != ENOSYS;
+    }
+}
+
+} // namespace JS