Add m-esr52 at 52.6.0

author: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
committer: Matt A. Tobin <mattatobin@localhost.localdomain> 2018-02-02 04:16:08 -0500
commit: 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree: 10027f336435511475e392454359edea8e25895d /tools/power
parent: 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download: UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
3 files changed, 1063 insertions, 0 deletions
diff --git a/tools/power/mach_commands.py b/tools/power/mach_commands.py
new file mode 100644
index 000000000..281e7a868
--- /dev/null
+++ b/tools/power/mach_commands.py
@@ -0,0 +1,142 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import print_function
+
+from distutils.version import StrictVersion
+
+from mach.decorators import (
+    Command,
+    CommandArgument,
+    CommandProvider,
+)
+from mozbuild.base import (
+    MachCommandBase,
+    MachCommandConditions as conditions,
+)
+
+
+def is_osx_10_10_or_greater(cls):
+    import platform
+    release = platform.mac_ver()[0]
+    return release and StrictVersion(release) >= StrictVersion('10.10')
+
+
+@CommandProvider
+class MachCommands(MachCommandBase):
+    '''
+    Get system power consumption and related measurements.
+    '''
+    def __init__(self, context):
+        MachCommandBase.__init__(self, context)
+
+    @Command('power', category='misc',
+        conditions=[is_osx_10_10_or_greater],
+        description='Get system power consumption and related measurements for '
+        'all running browsers. Available only on Mac OS X 10.10 and above. '
+        'Requires root access.')
+    @CommandArgument('-i', '--interval', type=int, default=30000,
+        help='The sample period, measured in milliseconds. Defaults to 30000.')
+    def power(self, interval):
+        import os
+        import re
+        import subprocess
+
+        rapl = os.path.join(self.topobjdir, 'dist', 'bin', 'rapl')
+
+        interval = str(interval)
+
+        # Run a trivial command with |sudo| to gain temporary root privileges
+        # before |rapl| and |powermetrics| are called. This ensures that |rapl|
+        # doesn't start measuring while |powermetrics| is waiting for the root
+        # password to be entered.
+        try:
+            subprocess.check_call(['sudo', 'true'])
+        except:
+            print('\nsudo failed; aborting')
+            return 1
+
+        # This runs rapl in the background because nothing in this script
+        # depends on the output. This is good because we want |rapl| and
+        # |powermetrics| to run at the same time.
+        subprocess.Popen([rapl, '-n', '1', '-i', interval])
+
+        lines = subprocess.check_output(['sudo', 'powermetrics',
+                                         '--samplers', 'tasks',
+                                         '--show-process-coalition',
+                                         '--show-process-gpu',
+                                         '-n', '1',
+                                         '-i', interval])
+
+        # When run with --show-process-coalition, |powermetrics| groups outputs
+        # into process coalitions, each of which has a leader.
+        #
+        # For example, when Firefox runs from the dock, its coalition looks
+        # like this:
+        #
+        #   org.mozilla.firefox
+        #     firefox
+        #     plugin-container
+        #
+        # When Safari runs from the dock:
+        #
+        #   com.apple.Safari
+        #     Safari
+        #     com.apple.WebKit.Networking
+        #     com.apple.WebKit.WebContent
+        #     com.apple.WebKit.WebContent
+        #
+        # When Chrome runs from the dock:
+        #
+        #   com.google.Chrome
+        #     Google Chrome
+        #     Google Chrome Helper
+        #     Google Chrome Helper
+        #
+        # In these cases, we want to print the whole coalition.
+        #
+        # Also, when you run any of them from the command line, things are the
+        # same except that the leader is com.apple.Terminal and there may be
+        # non-browser processes in the coalition, e.g.:
+        #
+        #  com.apple.Terminal
+        #    firefox
+        #    plugin-container
+        #    <and possibly other, non-browser processes>
+        #
+        # Also, the WindowServer and kernel coalitions and processes are often
+        # relevant.
+        #
+        # We want to print all these but omit uninteresting coalitions. We
+        # could do this by properly parsing powermetrics output, but it's
+        # simpler and more robust to just grep for a handful of identifying
+        # strings.
+
+        print()  # blank line between |rapl| output and |powermetrics| output
+
+        for line in lines.splitlines():
+            # Search for the following things.
+            #
+            # - '^Name' is for the columns headings line.
+            #
+            # - 'firefox' and 'plugin-container' are for Firefox
+            #
+            # - 'Safari\b' and 'WebKit' are for Safari. The '\b' excludes
+            #   SafariCloudHistoryPush, which is a process that always
+            #   runs, even when Safari isn't open.
+            #
+            # - 'Chrome' is for Chrome.
+            #
+            # - 'Terminal' is for the terminal. If no browser is running from
+            #   within the terminal, it will show up unnecessarily. This is a
+            #   minor disadvantage of this very simple parsing strategy.
+            #
+            # - 'WindowServer' is for the WindowServer.
+            #
+            # - 'kernel' is for the kernel.
+            #
+            if re.search(r'(^Name|firefox|plugin-container|Safari\b|WebKit|Chrome|Terminal|WindowServer|kernel)', line):
+                print(line)
+
+        return 0
diff --git a/tools/power/moz.build b/tools/power/moz.build
new file mode 100644
index 000000000..a7e694a87
--- /dev/null
+++ b/tools/power/moz.build
@@ -0,0 +1,21 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+do_rapl = False
+
+if CONFIG['OS_ARCH'] == 'Darwin' and CONFIG['CPU_ARCH'] == 'x86_64':
+    do_rapl = True
+
+if CONFIG['OS_ARCH'] == 'Linux' and CONFIG['OS_TARGET'] != 'Android' and \
+   CONFIG['CPU_ARCH'] in ('x86', 'x86_64'):
+    do_rapl = True
+
+if do_rapl:
+    SimplePrograms([
+        'rapl',
+    ])
+
+DISABLE_STL_WRAPPING = True
diff --git a/tools/power/rapl.cpp b/tools/power/rapl.cpp
new file mode 100644
index 000000000..fd7ff454b
--- /dev/null
+++ b/tools/power/rapl.cpp
@@ -0,0 +1,900 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This program provides processor power estimates. It does this by reading
+// model-specific registers (MSRs) that are part Intel's Running Average Power
+// Limit (RAPL) interface. These MSRs provide good quality estimates of the
+// energy consumption of up to four system components:
+// - PKG: the entire processor package;
+// - PP0: the cores (a subset of the package);
+// - PP1: the GPU (a subset of the package);
+// - DRAM: main memory.
+//
+// For more details about RAPL, see section 14.9 of Volume 3 of the "Intel 64
+// and IA-32 Architecture's Software Developer's Manual", Order Number 325384.
+//
+// This program exists because there are no existing tools on Mac that can
+// obtain all four RAPL estimates. (|powermetrics| can obtain the package
+// estimate, but not the others. Intel Power Gadget can obtain the package and
+// cores estimates.)
+//
+// On Linux |perf| can obtain all four estimates (as Joules, which are easily
+// converted to Watts), but this program is implemented for Linux because it's
+// not too hard to do, and that gives us multi-platform consistency.
+//
+// This program does not support Windows, unfortunately. It's not obvious how
+// to access the RAPL MSRs on Windows.
+//
+// This program deliberately uses only standard libraries and avoids
+// Mozilla-specific code, to make it easy to compile and test on different
+// machines.
+
+#include <assert.h>
+#include <getopt.h>
+#include <math.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+//---------------------------------------------------------------------------
+// Utilities
+//---------------------------------------------------------------------------
+
+// MOZ_FALLTHROUGH is an annotation to suppress compiler warnings about switch
+// cases that fall through without a break or return statement. MOZ_FALLTHROUGH
+// is only needed on cases that have code. This definition of MOZ_FALLTHROUGH
+// is identical to the one in mfbt/Attributes.h, which we don't use here because
+// this file avoids depending on Mozilla headers.
+#if defined(__clang__) && __cplusplus >= 201103L
+   /* clang's fallthrough annotations are only available starting in C++11. */
+#  define MOZ_FALLTHROUGH [[clang::fallthrough]]
+#elif defined(_MSC_VER)
+   /*
+    * MSVC's __fallthrough annotations are checked by /analyze (Code Analysis):
+    * https://msdn.microsoft.com/en-us/library/ms235402%28VS.80%29.aspx
+    */
+#  include <sal.h>
+#  define MOZ_FALLTHROUGH __fallthrough
+#else
+#  define MOZ_FALLTHROUGH /* FALLTHROUGH */
+#endif
+
+// The value of argv[0] passed to main(). Used in error messages.
+static const char* gArgv0;
+
+static void
+Abort(const char* aFormat, ...)
+{
+  va_list vargs;
+  va_start(vargs, aFormat);
+  fprintf(stderr, "%s: ", gArgv0);
+  vfprintf(stderr, aFormat, vargs);
+  fprintf(stderr, "\n");
+  va_end(vargs);
+
+  exit(1);
+}
+
+static void
+CmdLineAbort(const char* aMsg)
+{
+  if (aMsg) {
+    fprintf(stderr, "%s: %s\n", gArgv0, aMsg);
+  }
+  fprintf(stderr, "Use --help for more information.\n");
+  exit(1);
+}
+
+// A special value that represents an estimate from an unsupported RAPL domain.
+static const double kUnsupported_j = -1.0;
+
+// Print to stdout and flush it, so that the output appears immediately even if
+// being redirected through |tee| or anything like that.
+static void
+PrintAndFlush(const char* aFormat, ...)
+{
+  va_list vargs;
+  va_start(vargs, aFormat);
+  vfprintf(stdout, aFormat, vargs);
+  va_end(vargs);
+
+  fflush(stdout);
+}
+
+//---------------------------------------------------------------------------
+// Mac-specific code
+//---------------------------------------------------------------------------
+
+#if defined(__APPLE__)
+
+// Because of the pkg_energy_statistics_t::pkes_version check below, the
+// earliest OS X version this code will work with is 10.9.0 (xnu-2422.1.72).
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+// OS X has four kinds of system calls:
+//
+//  1. Mach traps;
+//  2. UNIX system calls;
+//  3. machine-dependent calls;
+//  4. diagnostic calls.
+//
+// (See "Mac OS X and iOS Internals" by Jonathan Levin for more details.)
+//
+// The last category has a single call named diagCall() or diagCall64(). Its
+// mode is controlled by its first argument, and one of the modes allows access
+// to the Intel RAPL MSRs.
+//
+// The interface to diagCall64() is not exported, so we have to import some
+// definitions from the XNU kernel. All imported definitions are annotated with
+// the XNU source file they come from, and information about what XNU versions
+// they were introduced in and (if relevant) modified.
+
+// The diagCall64() mode.
+// From osfmk/i386/Diagnostics.h
+// - In 10.8.4 (xnu-2050.24.15) this value was introduced. (In 10.8.3 the value
+//   17 was used for dgGzallocTest.)
+#define dgPowerStat 17
+
+// From osfmk/i386/cpu_data.h
+// - In 10.8.5 these values were introduced, along with core_energy_stat_t.
+#define CPU_RTIME_BINS (12)
+#define CPU_ITIME_BINS (CPU_RTIME_BINS)
+
+// core_energy_stat_t and pkg_energy_statistics_t are both from
+// osfmk/i386/Diagnostics.c.
+// - In 10.8.4 (xnu-2050.24.15) both structs were introduced, but with many
+//   fewer fields.
+// - In 10.8.5 (xnu-2050.48.11) both structs were substantially expanded, with
+//   numerous new fields.
+// - In 10.9.0 (xnu-2422.1.72) pkg_energy_statistics_t::pkes_version was added.
+//   diagCall64(dgPowerStat) fills it with '1' in all versions since (up to
+//   10.10.2 at time of writing).
+// - in 10.10.2 (xnu-2782.10.72) core_energy_stat_t::gpmcs was conditionally
+//   added, if DIAG_ALL_PMCS is true. (DIAG_ALL_PMCS is not even defined in the
+//   source code, but it could be defined at compile-time via compiler flags.)
+//   pkg_energy_statistics_t::pkes_version did not change, though.
+
+typedef struct {
+        uint64_t caperf;
+        uint64_t cmperf;
+        uint64_t ccres[6];
+        uint64_t crtimes[CPU_RTIME_BINS];
+        uint64_t citimes[CPU_ITIME_BINS];
+        uint64_t crtime_total;
+        uint64_t citime_total;
+        uint64_t cpu_idle_exits;
+        uint64_t cpu_insns;
+        uint64_t cpu_ucc;
+        uint64_t cpu_urc;
+#if     DIAG_ALL_PMCS           // Added in 10.10.2 (xnu-2782.10.72).
+        uint64_t gpmcs[4];      // Added in 10.10.2 (xnu-2782.10.72).
+#endif /* DIAG_ALL_PMCS */      // Added in 10.10.2 (xnu-2782.10.72).
+} core_energy_stat_t;
+
+typedef struct {
+        uint64_t pkes_version;  // Added in 10.9.0 (xnu-2422.1.72).
+        uint64_t pkg_cres[2][7];
+
+        // This is read from MSR 0x606, which Intel calls MSR_RAPL_POWER_UNIT
+        // and XNU calls MSR_IA32_PKG_POWER_SKU_UNIT.
+        uint64_t pkg_power_unit;
+
+        // These are the four fields for the four RAPL domains. For each field
+        // we list:
+        //
+        // - the corresponding MSR number;
+        // - Intel's name for that MSR;
+        // - XNU's name for that MSR;
+        // - which Intel processors the MSR is supported on.
+        //
+        // The last of these is determined from chapter 35 of Volume 3 of the
+        // "Intel 64 and IA-32 Architecture's Software Developer's Manual",
+        // Order Number 325384. (Note that chapter 35 contradicts section 14.9
+        // to some degree.)
+
+        // 0x611 == MSR_PKG_ENERGY_STATUS == MSR_IA32_PKG_ENERGY_STATUS
+        // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
+        uint64_t pkg_energy;
+
+        // 0x639 == MSR_PP0_ENERGY_STATUS == MSR_IA32_PP0_ENERGY_STATUS
+        // Atom (various), Sandy Bridge, Next Gen Xeon Phi (model 0x57).
+        uint64_t pp0_energy;
+
+        // 0x641 == MSR_PP1_ENERGY_STATUS == MSR_PP1_ENERGY_STATUS
+        // Sandy Bridge, Haswell.
+        uint64_t pp1_energy;
+
+        // 0x619 == MSR_DRAM_ENERGY_STATUS == MSR_IA32_DDR_ENERGY_STATUS
+        // Xeon E5, Xeon E5 v2, Haswell/Haswell-E, Next Gen Xeon Phi (model
+        // 0x57)
+        uint64_t ddr_energy;
+
+        uint64_t llc_flushed_cycles;
+        uint64_t ring_ratio_instantaneous;
+        uint64_t IA_frequency_clipping_cause;
+        uint64_t GT_frequency_clipping_cause;
+        uint64_t pkg_idle_exits;
+        uint64_t pkg_rtimes[CPU_RTIME_BINS];
+        uint64_t pkg_itimes[CPU_ITIME_BINS];
+        uint64_t mbus_delay_time;
+        uint64_t mint_delay_time;
+        uint32_t ncpus;
+        core_energy_stat_t cest[];
+} pkg_energy_statistics_t;
+
+static int
+diagCall64(uint64_t aMode, void* aBuf)
+{
+  // We cannot use syscall() here because it doesn't work with diagnostic
+  // system calls -- it raises SIGSYS if you try. So we have to use asm.
+
+#ifdef __x86_64__
+  // The 0x40000 prefix indicates it's a diagnostic system call. The 0x01
+  // suffix indicates the syscall number is 1, which also happens to be the
+  // only diagnostic system call. See osfmk/mach/i386/syscall_sw.h for more
+  // details.
+  static const uint64_t diagCallNum = 0x4000001;
+  uint64_t rv;
+
+  __asm__ __volatile__(
+    "syscall"
+
+    // Return value goes in "a" (%rax).
+    : /* outputs */ "=a"(rv)
+
+    // The syscall number goes in "0", a synonym (from outputs) for "a" (%rax).
+    // The syscall arguments go in "D" (%rdi) and "S" (%rsi).
+    : /* inputs */ "0"(diagCallNum), "D"(aMode), "S"(aBuf)
+
+    // The |syscall| instruction clobbers %rcx, %r11, and %rflags ("cc"). And
+    // this particular syscall also writes memory (aBuf).
+    : /* clobbers */ "rcx", "r11", "cc", "memory"
+  );
+  return rv;
+#else
+#error Sorry, only x86-64 is supported
+#endif
+}
+
+static void
+diagCall64_dgPowerStat(pkg_energy_statistics_t* aPkes)
+{
+  static const uint64_t supported_version = 1;
+
+  // Write an unsupported version number into pkes_version so that the check
+  // below cannot succeed by dumb luck.
+  aPkes->pkes_version = supported_version - 1;
+
+  // diagCall64() returns 1 on success, and 0 on failure (which can only happen
+  // if the mode is unrecognized, e.g. in 10.7.x or earlier versions).
+  if (diagCall64(dgPowerStat, aPkes) != 1) {
+    Abort("diagCall64() failed");
+  }
+
+  if (aPkes->pkes_version != 1) {
+    Abort("unexpected pkes_version: %llu", aPkes->pkes_version);
+  }
+}
+
+class RAPL
+{
+  bool mIsGpuSupported;   // Is the GPU domain supported by the processor?
+  bool mIsRamSupported;   // Is the RAM domain supported by the processor?
+
+  // The DRAM domain on Haswell servers has a fixed energy unit (1/65536 J ==
+  // 15.3 microJoules) which is different to the power unit MSR. (See the
+  // "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, Volume 2 of
+  // 2, Registers" datasheet, September 2014, Reference Number: 330784-001.)
+  // This field records whether the quirk is present.
+  bool mHasRamUnitsQuirk;
+
+  // The abovementioned 15.3 microJoules value.
+  static const double kQuirkyRamJoulesPerTick;
+
+  // The previous sample's MSR values.
+  uint64_t mPrevPkgTicks;
+  uint64_t mPrevPp0Ticks;
+  uint64_t mPrevPp1Ticks;
+  uint64_t mPrevDdrTicks;
+
+  // The struct passed to diagCall64().
+  pkg_energy_statistics_t* mPkes;
+
+public:
+  RAPL()
+    : mHasRamUnitsQuirk(false)
+  {
+    // Work out which RAPL MSRs this CPU model supports.
+    int cpuModel;
+    size_t size = sizeof(cpuModel);
+    if (sysctlbyname("machdep.cpu.model", &cpuModel, &size, NULL, 0) != 0) {
+      Abort("sysctlbyname(\"machdep.cpu.model\") failed");
+    }
+
+    // This is similar to arch/x86/kernel/cpu/perf_event_intel_rapl.c in
+    // linux-4.1.5/.
+    switch (cpuModel) {
+      case 60:  // 0x3c: Haswell
+      case 69:  // 0x45: Haswell-Celeron
+      case 70:  // 0x46: Haswell
+      case 61:  // 0x3d: Broadwell
+        // Supports package, cores, GPU, RAM.
+        mIsGpuSupported = true;
+        mIsRamSupported = true;
+        break;
+
+      case 42:  // 0x2a: Sandy Bridge
+      case 58:  // 0x3a: Ivy Bridge
+        // Supports package, cores, GPU.
+        mIsGpuSupported = true;
+        mIsRamSupported = false;
+        break;
+
+      case 63:  // 0x3f: Haswell-Server
+        mHasRamUnitsQuirk = true;
+        MOZ_FALLTHROUGH;
+      case 45:  // 0x2d: Sandy Bridge-EP
+      case 62:  // 0x3e: Ivy Bridge-E
+        // Supports package, cores, RAM.
+        mIsGpuSupported = false;
+        mIsRamSupported = true;
+        break;
+
+      default:
+        Abort("unknown CPU model: %d", cpuModel);
+        break;
+    }
+
+    // Get the maximum number of logical CPUs so that we know how big to make
+    // |mPkes|.
+    int logicalcpu_max;
+    size = sizeof(logicalcpu_max);
+    if (sysctlbyname("hw.logicalcpu_max",
+                     &logicalcpu_max, &size, NULL, 0) != 0) {
+      Abort("sysctlbyname(\"hw.logicalcpu_max\") failed");
+    }
+
+    // Over-allocate by 1024 bytes per CPU to allow for the uncertainty around
+    // core_energy_stat_t::gpmcs and for any other future extensions to that
+    // struct. (The fields we read all come before the core_energy_stat_t
+    // array, so it won't matter to us whether gpmcs is present or not.)
+    size_t pkesSize = sizeof(pkg_energy_statistics_t) +
+                      logicalcpu_max * sizeof(core_energy_stat_t) +
+                      logicalcpu_max * 1024;
+    mPkes = (pkg_energy_statistics_t*) malloc(pkesSize);
+    if (!mPkes) {
+      Abort("malloc() failed");
+    }
+
+    // Do an initial measurement so that the first sample's diffs are sensible.
+    double dummy1, dummy2, dummy3, dummy4;
+    EnergyEstimates(dummy1, dummy2, dummy3, dummy4);
+  }
+
+  ~RAPL()
+  {
+    free(mPkes);
+  }
+
+  static double Joules(uint64_t aTicks, double aJoulesPerTick)
+  {
+    return double(aTicks) * aJoulesPerTick;
+  }
+
+  void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
+                       double& aRam_J)
+  {
+    diagCall64_dgPowerStat(mPkes);
+
+    // Bits 12:8 are the ESU.
+    // Energy measurements come in multiples of 1/(2^ESU).
+    uint32_t energyStatusUnits = (mPkes->pkg_power_unit >> 8) & 0x1f;
+    double joulesPerTick = ((double)1 / (1 << energyStatusUnits));
+
+    aPkg_J   = Joules(mPkes->pkg_energy - mPrevPkgTicks, joulesPerTick);
+    aCores_J = Joules(mPkes->pp0_energy - mPrevPp0Ticks, joulesPerTick);
+    aGpu_J   = mIsGpuSupported
+             ? Joules(mPkes->pp1_energy - mPrevPp1Ticks, joulesPerTick)
+             : kUnsupported_j;
+    aRam_J   = mIsRamSupported
+             ? Joules(mPkes->ddr_energy - mPrevDdrTicks,
+                      mHasRamUnitsQuirk ? kQuirkyRamJoulesPerTick
+                                        : joulesPerTick)
+             : kUnsupported_j;
+
+    mPrevPkgTicks = mPkes->pkg_energy;
+    mPrevPp0Ticks = mPkes->pp0_energy;
+    if (mIsGpuSupported) {
+      mPrevPp1Ticks = mPkes->pp1_energy;
+    }
+    if (mIsRamSupported) {
+      mPrevDdrTicks = mPkes->ddr_energy;
+    }
+  }
+};
+
+/* static */ const double RAPL::kQuirkyRamJoulesPerTick = (double)1 / 65536;
+
+//---------------------------------------------------------------------------
+// Linux-specific code
+//---------------------------------------------------------------------------
+
+#elif defined(__linux__)
+
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+
+// There is no glibc wrapper for this system call so we provide our own.
+static int
+perf_event_open(struct perf_event_attr* aAttr, pid_t aPid, int aCpu,
+                int aGroupFd, unsigned long aFlags)
+{
+  return syscall(__NR_perf_event_open, aAttr, aPid, aCpu, aGroupFd, aFlags);
+}
+
+// Returns false if the file cannot be opened.
+template <typename T>
+static bool
+ReadValueFromPowerFile(const char* aStr1, const char* aStr2, const char* aStr3,
+                       const char* aScanfString, T* aOut)
+{
+  // The filenames going into this buffer are under our control and the longest
+  // one is "/sys/bus/event_source/devices/power/events/energy-cores.scale".
+  // So 256 chars is plenty.
+  char filename[256];
+
+  sprintf(filename, "/sys/bus/event_source/devices/power/%s%s%s",
+          aStr1, aStr2, aStr3);
+  FILE* fp = fopen(filename, "r");
+  if (!fp) {
+    return false;
+  }
+  if (fscanf(fp, aScanfString, aOut) != 1) {
+    Abort("fscanf() failed");
+  }
+  fclose(fp);
+
+  return true;
+}
+
+// This class encapsulates the reading of a single RAPL domain.
+class Domain
+{
+  bool mIsSupported;      // Is the domain supported by the processor?
+
+  // These three are only set if |mIsSupported| is true.
+  double mJoulesPerTick;  // How many Joules each tick of the MSR represents.
+  int mFd;                // The fd through which the MSR is read.
+  double mPrevTicks;      // The previous sample's MSR value.
+
+public:
+  enum IsOptional { Optional, NonOptional };
+
+  Domain(const char* aName, uint32_t aType, IsOptional aOptional = NonOptional)
+  {
+    uint64_t config;
+    if (!ReadValueFromPowerFile("events/energy-", aName, "", "event=%llx",
+         &config)) {
+      // Failure is allowed for optional domains.
+      if (aOptional == NonOptional) {
+        Abort("failed to open file for non-optional domain '%s'\n"
+              "- Is your kernel version 3.14 or later, as required? "
+              "Run |uname -r| to see.", aName);
+      }
+      mIsSupported = false;
+      return;
+    }
+
+    mIsSupported = true;
+
+    ReadValueFromPowerFile("events/energy-", aName, ".scale", "%lf",
+                           &mJoulesPerTick);
+
+    // The unit should be "Joules", so 128 chars should be plenty.
+    char unit[128];
+    ReadValueFromPowerFile("events/energy-", aName, ".unit", "%127s", unit);
+    if (strcmp(unit, "Joules") != 0) {
+      Abort("unexpected unit '%s' in .unit file", unit);
+    }
+
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.type = aType;
+    attr.size = uint32_t(sizeof(attr));
+    attr.config = config;
+
+    // Measure all processes/threads. The specified CPU doesn't matter.
+    mFd = perf_event_open(&attr, /* aPid = */ -1, /* aCpu = */ 0,
+                          /* aGroupFd = */ -1, /* aFlags = */ 0);
+    if (mFd < 0) {
+      Abort("perf_event_open() failed\n"
+            "- Did you run as root (e.g. with |sudo|) or set\n"
+            "  /proc/sys/kernel/perf_event_paranoid to 0, as required?");
+    }
+
+    mPrevTicks = 0;
+  }
+
+  ~Domain()
+  {
+    if (mIsSupported) {
+      close(mFd);
+    }
+  }
+
+  double EnergyEstimate()
+  {
+    if (!mIsSupported) {
+      return kUnsupported_j;
+    }
+
+    uint64_t thisTicks;
+    if (read(mFd, &thisTicks, sizeof(uint64_t)) != sizeof(uint64_t)) {
+      Abort("read() failed");
+    }
+
+    uint64_t ticks = thisTicks - mPrevTicks;
+    mPrevTicks = thisTicks;
+    double joules = ticks * mJoulesPerTick;
+    return joules;
+  }
+};
+
+class RAPL
+{
+  Domain* mPkg;
+  Domain* mCores;
+  Domain* mGpu;
+  Domain* mRam;
+
+public:
+  RAPL()
+  {
+    uint32_t type;
+    ReadValueFromPowerFile("type", "", "", "%u", &type);
+
+    mPkg   = new Domain("pkg",   type);
+    mCores = new Domain("cores", type);
+    mGpu   = new Domain("gpu",   type, Domain::Optional);
+    mRam   = new Domain("ram",   type, Domain::Optional);
+    if (!mPkg || !mCores || !mGpu || !mRam) {
+      Abort("new Domain() failed");
+    }
+  }
+
+  ~RAPL()
+  {
+    delete mPkg;
+    delete mCores;
+    delete mGpu;
+    delete mRam;
+  }
+
+  void EnergyEstimates(double& aPkg_J, double& aCores_J, double& aGpu_J,
+                       double& aRam_J)
+  {
+    aPkg_J   = mPkg->EnergyEstimate();
+    aCores_J = mCores->EnergyEstimate();
+    aGpu_J   = mGpu->EnergyEstimate();
+    aRam_J   = mRam->EnergyEstimate();
+  }
+};
+
+#else
+
+//---------------------------------------------------------------------------
+// Unsupported platforms
+//---------------------------------------------------------------------------
+
+#error Sorry, this platform is not supported
+
+#endif // platform
+
+//---------------------------------------------------------------------------
+// The main loop
+//---------------------------------------------------------------------------
+
+// The sample interval, measured in seconds.
+static double gSampleInterval_sec;
+
+// The platform-specific RAPL-reading machinery.
+static RAPL* gRapl;
+
+// All the sampled "total" values, in Watts.
+static std::vector<double> gTotals_W;
+
+// Power = Energy / Time, where power is measured in Watts, Energy is measured
+// in Joules, and Time is measured in seconds.
+static double
+JoulesToWatts(double aJoules)
+{
+  return aJoules / gSampleInterval_sec;
+}
+
+// "Normalize" here means convert kUnsupported_j to zero so it can be used in
+// additive expressions. All printed values are 5 or maybe 6 chars (though 6
+// chars would require a value > 100 W, which is unlikely).
+static void
+NormalizeAndPrintAsWatts(char* aBuf, double& aValue_J)
+{
+  if (aValue_J == kUnsupported_j) {
+    aValue_J = 0;
+    sprintf(aBuf, "%s", " n/a ");
+  } else {
+    sprintf(aBuf, "%5.2f", JoulesToWatts(aValue_J));
+  }
+}
+
+static void
+SigAlrmHandler(int aSigNum, siginfo_t* aInfo, void* aContext)
+{
+  static int sampleNumber = 1;
+
+  double pkg_J, cores_J, gpu_J, ram_J;
+  gRapl->EnergyEstimates(pkg_J, cores_J, gpu_J, ram_J);
+
+  // We should have pkg and cores estimates, but might not have gpu and ram
+  // estimates.
+  assert(pkg_J   != kUnsupported_j);
+  assert(cores_J != kUnsupported_j);
+
+  // This needs to be big enough to print watt values to two decimal places. 16
+  // should be plenty.
+  static const size_t kNumStrLen = 16;
+
+  static char pkgStr[kNumStrLen], coresStr[kNumStrLen], gpuStr[kNumStrLen],
+              ramStr[kNumStrLen];
+  NormalizeAndPrintAsWatts(pkgStr,   pkg_J);
+  NormalizeAndPrintAsWatts(coresStr, cores_J);
+  NormalizeAndPrintAsWatts(gpuStr,   gpu_J);
+  NormalizeAndPrintAsWatts(ramStr,   ram_J);
+
+  // Core and GPU power are a subset of the package power.
+  assert(pkg_J >= cores_J + gpu_J);
+
+  // Compute "other" (i.e. rest of the package) and "total" only after the
+  // other values have been normalized.
+
+  char otherStr[kNumStrLen];
+  double other_J = pkg_J - cores_J - gpu_J;
+  NormalizeAndPrintAsWatts(otherStr, other_J);
+
+  char totalStr[kNumStrLen];
+  double total_J = pkg_J + ram_J;
+  NormalizeAndPrintAsWatts(totalStr, total_J);
+
+  gTotals_W.push_back(JoulesToWatts(total_J));
+
+  // Print and flush so that the output appears immediately even if being
+  // redirected through |tee| or anything like that.
+  PrintAndFlush("#%02d %s W = %s (%s + %s + %s) + %s W\n",
+                sampleNumber++, totalStr, pkgStr, coresStr, gpuStr, otherStr,
+                ramStr);
+}
+
+static void
+Finish()
+{
+  size_t n = gTotals_W.size();
+
+  // This time calculation assumes that the timers are perfectly accurate which
+  // is not true but the inaccuracy should be small in practice.
+  double time = n * gSampleInterval_sec;
+
+  printf("\n");
+  printf("%d sample%s taken over a period of %.3f second%s\n",
+    int(n), n == 1 ? "" : "s",
+    n * gSampleInterval_sec, time == 1.0 ? "" : "s");
+
+  if (n == 0 || n == 1) {
+    exit(0);
+  }
+
+  // Compute the mean.
+  double sum = std::accumulate(gTotals_W.begin(), gTotals_W.end(), 0.0);
+  double mean = sum / n;
+
+  // Compute the *population* standard deviation:
+  //
+  //   popStdDev = sqrt(Sigma(x - m)^2 / n)
+  //
+  // where |x| is the sum variable, |m| is the mean, and |n| is the
+  // population size.
+  //
+  // This is different from the *sample* standard deviation, which divides by
+  // |n - 1|, and would be appropriate if we were using a random sample of a
+  // larger population.
+  double sumOfSquaredDeviations = 0;
+  for (auto iter = gTotals_W.begin(); iter != gTotals_W.end(); ++iter) {
+    double deviation = (*iter - mean);
+    sumOfSquaredDeviations += deviation * deviation;
+  }
+  double popStdDev = sqrt(sumOfSquaredDeviations / n);
+
+  // Sort so that percentiles can be determined. We use the "Nearest Rank"
+  // method of determining percentiles, which is simplest to compute and which
+  // chooses values from those that appear in the input set.
+  std::sort(gTotals_W.begin(), gTotals_W.end());
+
+  printf("\n");
+  printf("Distribution of 'total' values:\n");
+  printf("            mean = %5.2f W\n", mean);
+  printf("         std dev = %5.2f W\n", popStdDev);
+  printf("  0th percentile = %5.2f W (min)\n", gTotals_W[0]);
+  printf("  5th percentile = %5.2f W\n", gTotals_W[ceil(0.05 * n) - 1]);
+  printf(" 25th percentile = %5.2f W\n", gTotals_W[ceil(0.25 * n) - 1]);
+  printf(" 50th percentile = %5.2f W\n", gTotals_W[ceil(0.50 * n) - 1]);
+  printf(" 75th percentile = %5.2f W\n", gTotals_W[ceil(0.75 * n) - 1]);
+  printf(" 95th percentile = %5.2f W\n", gTotals_W[ceil(0.95 * n) - 1]);
+  printf("100th percentile = %5.2f W (max)\n", gTotals_W[n - 1]);
+
+  exit(0);
+}
+
+static void
+SigIntHandler(int aSigNum, siginfo_t* aInfo, void *aContext)
+{
+  Finish();
+}
+
+static void
+PrintUsage()
+{
+  printf(
+"usage: rapl [options]\n"
+"\n"
+"Options:\n"
+"\n"
+"  -h --help                 show this message\n"
+"  -i --sample-interval <N>  sample every N ms [default=1000]\n"
+"  -n --sample-count <N>     get N samples (0 means unlimited) [default=0]\n"
+"\n"
+#if defined(__APPLE__)
+"On Mac this program can be run by any user.\n"
+#elif defined(__linux__)
+"On Linux this program can only be run by the super-user unless the contents\n"
+"of /proc/sys/kernel/perf_event_paranoid is set to 0 or lower.\n"
+#else
+#error Sorry, this platform is not supported
+#endif
+"\n"
+  );
+}
+
+int
+main(int argc, char** argv)
+{
+  // Process command line options.
+
+  gArgv0 = argv[0];
+
+  // Default values.
+  int sampleInterval_msec = 1000;
+  int sampleCount = 0;
+
+  struct option longOptions[] = {
+    { "help",            no_argument,       NULL, 'h' },
+    { "sample-interval", required_argument, NULL, 'i' },
+    { "sample-count",    required_argument, NULL, 'n' },
+    { NULL,              0,                 NULL, 0   }
+  };
+  const char* shortOptions = "hi:n:";
+
+  int c;
+  char* endPtr;
+  while ((c = getopt_long(argc, argv, shortOptions, longOptions, NULL)) != -1) {
+    switch (c) {
+      case 'h':
+        PrintUsage();
+        exit(0);
+
+      case 'i':
+        sampleInterval_msec = strtol(optarg, &endPtr, /* base = */ 10);
+        if (*endPtr) {
+          CmdLineAbort("sample interval is not an integer");
+        }
+        if (sampleInterval_msec < 1 || sampleInterval_msec > 3600000) {
+          CmdLineAbort("sample interval must be in the range 1..3600000 ms");
+        }
+        break;
+
+      case 'n':
+        sampleCount = strtol(optarg, &endPtr, /* base = */ 10);
+        if (*endPtr) {
+          CmdLineAbort("sample count is not an integer");
+        }
+        if (sampleCount < 0 || sampleCount > 1000000) {
+          CmdLineAbort("sample count must be in the range 0..1000000");
+        }
+        break;
+
+      default:
+        CmdLineAbort(NULL);
+    }
+  }
+
+  // The RAPL MSRs update every ~1 ms, but the measurement period isn't exactly
+  // 1 ms, which means the sample periods are not exact. "Power Measurement
+  // Techniques on Standard Compute Nodes: A Quantitative Comparison" by
+  // Hackenberg et al. suggests the following.
+  //
+  //   "RAPL provides energy (and not power) consumption data without
+  //   timestamps associated to each counter update. This makes sampling rates
+  //   above 20 Samples/s unfeasible if the systematic error should be below
+  //   5%... Constantly polling the RAPL registers will both occupy a processor
+  //   core and distort the measurement itself."
+  //
+  // So warn about this case.
+  if (sampleInterval_msec < 50) {
+    fprintf(stderr,
+            "\nWARNING: sample intervals < 50 ms are likely to produce "
+            "inaccurate estimates\n\n");
+  }
+  gSampleInterval_sec = double(sampleInterval_msec) / 1000;
+
+  // Initialize the platform-specific RAPL reading machinery.
+  gRapl = new RAPL();
+  if (!gRapl) {
+    Abort("new RAPL() failed");
+  }
+
+  // Install the signal handlers.
+
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  // The extra parens around (0) suppress a -Wunreachable-code warning on OS X
+  // where sigemptyset() is a macro that can never fail and always returns 0.
+  if (sigemptyset(&sa.sa_mask) < (0)) {
+    Abort("sigemptyset() failed");
+  }
+  sa.sa_sigaction = SigAlrmHandler;
+  if (sigaction(SIGALRM, &sa, NULL) < 0) {
+    Abort("sigaction(SIGALRM) failed");
+  }
+  sa.sa_sigaction = SigIntHandler;
+  if (sigaction(SIGINT, &sa, NULL) < 0) {
+    Abort("sigaction(SIGINT) failed");
+  }
+
+  // Set up the timer.
+  struct itimerval timer;
+  timer.it_interval.tv_sec = sampleInterval_msec / 1000;
+  timer.it_interval.tv_usec = (sampleInterval_msec % 1000) * 1000;
+  timer.it_value = timer.it_interval;
+  if (setitimer(ITIMER_REAL, &timer, NULL) < 0) {
+    Abort("setitimer() failed");
+  }
+
+  // Print header.
+  PrintAndFlush("    total W = _pkg_ (cores + _gpu_ + other) + _ram_ W\n");
+
+  // Take samples.
+  if (sampleCount == 0) {
+    while (true) {
+      pause();
+    }
+  } else {
+    for (int i = 0; i < sampleCount; i++) {
+      pause();
+    }
+  }
+
+  Finish();
+
+  return 0;
+}
author	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
committer	Matt A. Tobin <mattatobin@localhost.localdomain>	2018-02-02 04:16:08 -0500
commit	5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree	10027f336435511475e392454359edea8e25895d /tools/power
parent	49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
download	UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip