summaryrefslogtreecommitdiffstats
path: root/toolkit/components/terminator/nsTerminator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/terminator/nsTerminator.cpp')
-rw-r--r--toolkit/components/terminator/nsTerminator.cpp554
1 files changed, 554 insertions, 0 deletions
diff --git a/toolkit/components/terminator/nsTerminator.cpp b/toolkit/components/terminator/nsTerminator.cpp
new file mode 100644
index 000000000..f9459cc5d
--- /dev/null
+++ b/toolkit/components/terminator/nsTerminator.cpp
@@ -0,0 +1,554 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * A watchdog designed to terminate shutdown if it lasts too long.
+ *
+ * This watchdog is designed as a worst-case problem container for the
+ * common case in which Firefox just won't shutdown.
+ *
+ * We spawn a thread during quit-application. If any of the shutdown
+ * steps takes more than n milliseconds (63000 by default), kill the
+ * process as fast as possible, without any cleanup.
+ */
+
+#include "nsTerminator.h"
+
+#include "prthread.h"
+#include "prmon.h"
+#include "plstr.h"
+#include "prio.h"
+
+#include "nsString.h"
+#include "nsServiceManagerUtils.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsAppDirectoryServiceDefs.h"
+
+#include "nsIObserverService.h"
+#include "nsIPrefService.h"
+#if defined(MOZ_CRASHREPORTER)
+#include "nsExceptionHandler.h"
+#endif
+
+#if defined(XP_WIN)
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/Services.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Telemetry.h"
+
+// Normally, the number of milliseconds that AsyncShutdown waits until
+// it decides to crash is specified as a preference. We use the
+// following value as a fallback if for some reason the preference is
+// absent.
+#define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
+
+// Additional number of milliseconds to wait until we decide to exit
+// forcefully.
+#define ADDITIONAL_WAIT_BEFORE_CRASH_MS 3000
+
+namespace mozilla {
+
+namespace {
+
+// Utility function: create a thread that is non-joinable,
+// does not prevent the process from terminating, is never
+// cooperatively scheduled, and uses a default stack size.
+PRThread* CreateSystemThread(void (*start)(void* arg),
+ void* arg)
+{
+ PRThread* thread = PR_CreateThread(
+ PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
+ start,
+ arg,
+ PR_PRIORITY_LOW,
+ PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
+ PR_UNJOINABLE_THREAD,
+ 0 /* Use default stack size */
+ );
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
+ return thread;
+}
+
+
+////////////////////////////////////////////
+//
+// The watchdog
+//
+// This nspr thread is in charge of crashing the process if any stage of shutdown
+// lasts more than some predefined duration. As a side-effect, it measures the
+// duration of each stage of shutdown.
+//
+
+// The heartbeat of the operation.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// swaps gHeartbeat to 0 to mark that the shutdown process is still
+// progressing. The value swapped away indicates the number of ticks
+// it took for the shutdown step to advance.
+//
+// Watchdog thread:
+//
+// * Every tick, the watchdog thread increments gHearbeat atomically.
+//
+// A note about precision:
+// Since gHeartbeat is generally reset to 0 between two ticks, this means
+// that gHeartbeat stays at 0 less than one tick. Consequently, values
+// extracted from gHeartbeat must be considered rounded up.
+Atomic<uint32_t> gHeartbeat(0);
+
+struct Options {
+ /**
+ * How many ticks before we should crash the process.
+ */
+ uint32_t crashAfterTicks;
+};
+
+/**
+ * Entry point for the watchdog thread
+ */
+void
+RunWatchdog(void* arg)
+{
+ PR_SetCurrentThreadName("Shutdown Hang Terminator");
+
+ // Let's copy and deallocate options, that's one less leak to worry
+ // about.
+ UniquePtr<Options> options((Options*)arg);
+ uint32_t crashAfterTicks = options->crashAfterTicks;
+ options = nullptr;
+
+ const uint32_t timeToLive = crashAfterTicks;
+ while (true) {
+ //
+ // We do not want to sleep for the entire duration,
+ // as putting the computer to sleep would suddenly
+ // cause us to timeout on wakeup.
+ //
+ // Rather, we prefer sleeping for at most 1 second
+ // at a time. If the computer sleeps then wakes up,
+ // we have lost at most one second, which is much
+ // more reasonable.
+ //
+#if defined(XP_WIN)
+ Sleep(1000 /* ms */);
+#else
+ usleep(1000000 /* usec */);
+#endif
+
+ if (gHeartbeat++ < timeToLive) {
+ continue;
+ }
+
+ // Shutdown is apparently dead. Crash the process.
+ MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
+ }
+}
+
+////////////////////////////////////////////
+//
+// Writer thread
+//
+// This nspr thread is in charge of writing to disk statistics produced by the
+// watchdog thread and collected by the main thread. Note that we use a nspr
+// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
+// threads.
+//
+
+// Utility class, used by UniquePtr<> to close nspr files.
+class PR_CloseDelete
+{
+public:
+ constexpr PR_CloseDelete() {}
+
+ PR_CloseDelete(const PR_CloseDelete& aOther)
+ {}
+
+ void operator()(PRFileDesc* aPtr) const
+ {
+ PR_Close(aPtr);
+ }
+};
+
+//
+// Communication between the main thread and the writer thread.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// obtains the number of ticks from the watchdog threads, builds
+// a string representing all the data gathered so far, places
+// this string in `gWriteData`, and wakes up the writer thread
+// using `gWriteReady`. If `gWriteData` already contained a non-null
+// pointer, this means that the writer thread is lagging behind the
+// main thread, and the main thread cleans up the memory.
+//
+// Writer thread:
+//
+// * When awake, the writer thread swaps `gWriteData` to nullptr. If
+// `gWriteData` contained data to write, the . If so, the writer
+// thread writes the data to a file named "ShutdownDuration.json.tmp",
+// then moves that file to "ShutdownDuration.json" and cleans up the
+// data. If `gWriteData` contains a nullptr, the writer goes to sleep
+// until it is awkened using `gWriteReady`.
+//
+//
+// The data written by the writer thread will be read by another
+// module upon the next restart and fed to Telemetry.
+//
+Atomic<nsCString*> gWriteData(nullptr);
+PRMonitor* gWriteReady = nullptr;
+
+void RunWriter(void* arg)
+{
+ PR_SetCurrentThreadName("Shutdown Statistics Writer");
+
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
+ // Shutdown will generally complete before we have a chance to
+ // deallocate. This is not a leak.
+
+ // Setup destinationPath and tmpFilePath
+
+ nsCString destinationPath(static_cast<char*>(arg));
+ nsAutoCString tmpFilePath;
+ tmpFilePath.Append(destinationPath);
+ tmpFilePath.AppendLiteral(".tmp");
+
+ // Cleanup any file leftover from a previous run
+ Unused << PR_Delete(tmpFilePath.get());
+ Unused << PR_Delete(destinationPath.get());
+
+ while (true) {
+ //
+ // Check whether we have received data from the main thread.
+ //
+ // We perform the check before waiting on `gWriteReady` as we may
+ // have received data while we were busy writing.
+ //
+ // Also note that gWriteData may have been modified several times
+ // since we last checked. That's ok, we are not losing any important
+ // data (since we keep adding data), and we are not leaking memory
+ // (since the main thread deallocates any data that hasn't been
+ // consumed by the writer thread).
+ //
+ UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
+ if (!data) {
+ // Data is not available yet.
+ // Wait until the main thread provides it.
+ PR_EnterMonitor(gWriteReady);
+ PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
+ PR_ExitMonitor(gWriteReady);
+ continue;
+ }
+
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
+ // Shutdown may complete before we have a chance to deallocate.
+ // This is not a leak.
+
+ //
+ // Write to a temporary file
+ //
+ // In case of any error, we simply give up. Since the data is
+ // hardly critical, we don't want to spend too much effort
+ // salvaging it.
+ //
+ UniquePtr<PRFileDesc, PR_CloseDelete>
+ tmpFileDesc(PR_Open(tmpFilePath.get(),
+ PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
+ 00600));
+
+ // Shutdown may complete before we have a chance to close the file.
+ // This is not a leak.
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
+
+ if (tmpFileDesc == nullptr) {
+ break;
+ }
+ if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
+ break;
+ }
+ tmpFileDesc.reset();
+
+ //
+ // Rename on top of destination file.
+ //
+ // This is not sufficient to guarantee that the destination file
+ // will be written correctly, but, again, we don't care enough
+ // about the data to make more efforts.
+ //
+ if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
+ break;
+ }
+ }
+}
+
+/**
+ * A step during shutdown.
+ *
+ * Shutdown is divided in steps, which all map to an observer
+ * notification. The duration of a step is defined as the number of
+ * ticks between the time we receive a notification and the next one.
+ */
+struct ShutdownStep
+{
+ char const* const mTopic;
+ int mTicks;
+
+ constexpr explicit ShutdownStep(const char *const topic)
+ : mTopic(topic)
+ , mTicks(-1)
+ {}
+
+};
+
+static ShutdownStep sShutdownSteps[] = {
+ ShutdownStep("quit-application"),
+ ShutdownStep("profile-change-teardown"),
+ ShutdownStep("profile-before-change"),
+ ShutdownStep("xpcom-will-shutdown"),
+ ShutdownStep("xpcom-shutdown"),
+};
+
+} // namespace
+
+NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
+
+nsTerminator::nsTerminator()
+ : mInitialized(false)
+ , mCurrentStep(-1)
+{
+}
+
+// During startup, register as an observer for all interesting topics.
+nsresult
+nsTerminator::SelfInit()
+{
+ nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+ if (!os) {
+ return NS_ERROR_UNEXPECTED;
+ }
+
+ for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+ DebugOnly<nsresult> rv = os->AddObserver(this, sShutdownSteps[i].mTopic, false);
+ NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed");
+ }
+
+ return NS_OK;
+}
+
+// Actually launch these threads. This takes place at the first sign of shutdown.
+void
+nsTerminator::Start()
+{
+ MOZ_ASSERT(!mInitialized);
+ StartWatchdog();
+#if !defined(DEBUG)
+ // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
+ // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
+ // 1255484 when 1255478 lands.
+ StartWriter();
+#endif // !defined(DEBUG)
+ mInitialized = true;
+}
+
+// Prepare, allocate and start the watchdog thread.
+// By design, it will never finish, nor be deallocated.
+void
+nsTerminator::StartWatchdog()
+{
+ int32_t crashAfterMS =
+ Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
+ FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
+ // Ignore negative values
+ if (crashAfterMS <= 0) {
+ crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
+ }
+
+ // Add a little padding, to ensure that we do not crash before
+ // AsyncShutdown.
+ if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
+ // Defend against overflow
+ crashAfterMS = INT32_MAX;
+ } else {
+ crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+ }
+
+ UniquePtr<Options> options(new Options());
+ const PRIntervalTime ticksDuration = PR_MillisecondsToInterval(1000);
+ options->crashAfterTicks = crashAfterMS / ticksDuration;
+
+ DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
+ options.release());
+ MOZ_ASSERT(watchdogThread);
+}
+
+// Prepare, allocate and start the writer thread. By design, it will never
+// finish, nor be deallocated. In case of error, we degrade
+// gracefully to not writing Telemetry data.
+void
+nsTerminator::StartWriter()
+{
+ if (!Telemetry::CanRecordExtended()) {
+ return;
+ }
+ nsCOMPtr<nsIFile> profLD;
+ nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+ getter_AddRefs(profLD));
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ nsAutoString path;
+ rv = profLD->GetPath(path);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ gWriteReady = PR_NewMonitor();
+ MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
+ PRThread* writerThread = CreateSystemThread(RunWriter,
+ ToNewUTF8String(path));
+
+ if (!writerThread) {
+ return;
+ }
+}
+
+NS_IMETHODIMP
+nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
+{
+ if (strcmp(aTopic, "profile-after-change") == 0) {
+ return SelfInit();
+ }
+
+ // Other notifications are shutdown-related.
+
+ // As we have seen examples in the wild of shutdown notifications
+ // not being sent (or not being sent in the expected order), we do
+ // not assume a specific order.
+ if (!mInitialized) {
+ Start();
+ }
+
+ UpdateHeartbeat(aTopic);
+#if !defined(DEBUG)
+ // Only allow nsTerminator to write on non-debug builds so we don't get leak warnings on
+ // shutdown for intentional leaks (see bug 1242084). This will be enabled again by bug
+ // 1255484 when 1255478 lands.
+ UpdateTelemetry();
+#endif // !defined(DEBUG)
+ UpdateCrashReport(aTopic);
+
+ // Perform a little cleanup
+ nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+ MOZ_RELEASE_ASSERT(os);
+ (void)os->RemoveObserver(this, aTopic);
+
+ return NS_OK;
+}
+
+void
+nsTerminator::UpdateHeartbeat(const char* aTopic)
+{
+ // Reset the clock, find out how long the current phase has lasted.
+ uint32_t ticks = gHeartbeat.exchange(0);
+ if (mCurrentStep > 0) {
+ sShutdownSteps[mCurrentStep].mTicks = ticks;
+ }
+
+ // Find out where we now are in the current shutdown.
+ // Don't assume that shutdown takes place in the expected order.
+ int nextStep = -1;
+ for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+ if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
+ nextStep = i;
+ break;
+ }
+ }
+ MOZ_ASSERT(nextStep != -1);
+ mCurrentStep = nextStep;
+}
+
+void
+nsTerminator::UpdateTelemetry()
+{
+ if (!Telemetry::CanRecordExtended() || !gWriteReady) {
+ return;
+ }
+
+ //
+ // We need Telemetry data on the effective duration of each step,
+ // to be able to tune the time-to-crash of each of both the
+ // Terminator and AsyncShutdown. However, at this stage, it is too
+ // late to record such data into Telemetry, so we write it to disk
+ // and read it upon the next startup.
+ //
+
+ // Build JSON.
+ UniquePtr<nsCString> telemetryData(new nsCString());
+ telemetryData->AppendLiteral("{");
+ size_t fields = 0;
+ for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+ if (sShutdownSteps[i].mTicks < 0) {
+ // Ignore this field.
+ continue;
+ }
+ if (fields++ > 0) {
+ telemetryData->Append(", ");
+ }
+ telemetryData->AppendLiteral("\"");
+ telemetryData->Append(sShutdownSteps[i].mTopic);
+ telemetryData->AppendLiteral("\": ");
+ telemetryData->AppendInt(sShutdownSteps[i].mTicks);
+ }
+ telemetryData->AppendLiteral("}");
+
+ if (fields == 0) {
+ // Nothing to write
+ return;
+ }
+
+ //
+ // Send data to the worker thread.
+ //
+ delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
+
+ // In case the worker thread was sleeping, wake it up.
+ PR_EnterMonitor(gWriteReady);
+ PR_Notify(gWriteReady);
+ PR_ExitMonitor(gWriteReady);
+}
+
+void
+nsTerminator::UpdateCrashReport(const char* aTopic)
+{
+#if defined(MOZ_CRASHREPORTER)
+ // In case of crash, we wish to know where in shutdown we are
+ nsAutoCString report(aTopic);
+
+ Unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
+ report);
+#endif // defined(MOZ_CRASH_REPORTER)
+}
+
+
+} // namespace mozilla