1 files changed, 698 insertions, 189 deletions
diff --git a/third_party/aom/av1/encoder/bgsprite.c b/third_party/aom/av1/encoder/bgsprite.c
index 64deade06..ae2cb1d40 100644
--- a/third_party/aom/av1/encoder/bgsprite.c
+++ b/third_party/aom/av1/encoder/bgsprite.c
@@ -34,13 +34,28 @@
  */
 #define BGSPRITE_BLENDING_MODE 1
 
+// Enable removal of outliers from mean blending mode.
+#if BGSPRITE_BLENDING_MODE == 1
+#define BGSPRITE_MEAN_REMOVE_OUTLIERS 0
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
 /* Interpolation for panorama alignment sampling:
  * 0 = Nearest neighbor
  * 1 = Bilinear
  */
 #define BGSPRITE_INTERPOLATION 0
 
-#define TRANSFORM_MAT_DIM 3
+// Enable turning off bgsprite from firstpass metrics in define_gf_group.
+#define BGSPRITE_ENABLE_METRICS 1
+
+// Enable foreground/backgrond segmentation and combine with temporal filter.
+#define BGSPRITE_ENABLE_SEGMENTATION 1
+
+// Enable alignment using global motion.
+#define BGSPRITE_ENABLE_GME 0
+
+// Block size for foreground mask.
+#define BGSPRITE_MASK_BLOCK_SIZE 4
 
 typedef struct {
 #if CONFIG_HIGHBITDEPTH
@@ -52,8 +67,29 @@ typedef struct {
   uint8_t u;
   uint8_t v;
 #endif  // CONFIG_HIGHBITDEPTH
+  uint8_t exists;
 } YuvPixel;
 
+typedef struct {
+  int curr_model;
+  double mean[2];
+  double var[2];
+  int age[2];
+  double u_mean[2];
+  double v_mean[2];
+
+#if CONFIG_HIGHBITDEPTH
+  uint16_t y;
+  uint16_t u;
+  uint16_t v;
+#else
+  uint8_t y;
+  uint8_t u;
+  uint8_t v;
+#endif  // CONFIG_HIGHBITDEPTH
+  double final_var;
+} YuvPixelGaussian;
+
 // Maps to convert from matrix form to param vector form.
 static const int params_to_matrix_map[] = { 2, 3, 0, 4, 5, 1, 6, 7 };
 static const int matrix_to_params_map[] = { 2, 5, 0, 1, 3, 4, 6, 7 };
@@ -75,6 +111,8 @@ static void matrix_to_params(const double *const matrix, double *target) {
   }
 }
 
+#define TRANSFORM_MAT_DIM 3
+
 // Do matrix multiplication on params.
 static void multiply_params(double *const m1, double *const m2,
                             double *target) {
@@ -124,20 +162,20 @@ static void find_frame_limit(int width, int height,
   *y_max = (int)ceil(uv_matrix[1]);
   *y_min = (int)floor(uv_matrix[1]);
 
-  xy_matrix[0] = width;
+  xy_matrix[0] = width - 1;
   xy_matrix[1] = 0;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
 
-  xy_matrix[0] = width;
-  xy_matrix[1] = height;
+  xy_matrix[0] = width - 1;
+  xy_matrix[1] = height - 1;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
 
   xy_matrix[0] = 0;
-  xy_matrix[1] = height;
+  xy_matrix[1] = height - 1;
   multiply_mat(transform_matrix, xy_matrix, uv_matrix, TRANSFORM_MAT_DIM,
                TRANSFORM_MAT_DIM, 1);
   UPDATELIMITS(uv_matrix[0], uv_matrix[1], x_min, x_max, y_min, y_max);
@@ -198,79 +236,13 @@ static void invert_params(const double *const params, double *target) {
   matrix_to_params(inverse, target);
 }
 
-#if BGSPRITE_BLENDING_MODE == 0
-// swaps two YuvPixels.
-static void swap_yuv(YuvPixel *a, YuvPixel *b) {
-  const YuvPixel temp = *b;
-  *b = *a;
-  *a = temp;
-}
-
-// Partitions array to find pivot index in qselect.
-static int partition(YuvPixel arr[], int left, int right, int pivot_idx) {
-  YuvPixel pivot = arr[pivot_idx];
-
-  // Move pivot to the end.
-  swap_yuv(&arr[pivot_idx], &arr[right]);
-
-  int p_idx = left;
-  for (int i = left; i < right; ++i) {
-    if (arr[i].y <= pivot.y) {
-      swap_yuv(&arr[i], &arr[p_idx]);
-      p_idx++;
-    }
-  }
-
-  swap_yuv(&arr[p_idx], &arr[right]);
-
-  return p_idx;
-}
-
-// Returns the kth element in array, partially sorted in place (quickselect).
-static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) {
-  if (left >= right) {
-    return arr[left];
-  }
-  unsigned int seed = (int)time(NULL);
-  int pivot_idx = left + rand_r(&seed) % (right - left + 1);
-  pivot_idx = partition(arr, left, right, pivot_idx);
-
-  if (k == pivot_idx) {
-    return arr[k];
-  } else if (k < pivot_idx) {
-    return qselect(arr, left, pivot_idx - 1, k);
-  } else {
-    return qselect(arr, pivot_idx + 1, right, k);
-  }
-}
-#endif  // BGSPRITE_BLENDING_MODE == 0
-
-// Stitches images together to create ARF and stores it in 'panorama'.
-static void stitch_images(YV12_BUFFER_CONFIG **const frames,
-                          const int num_frames, const int center_idx,
-                          const double **const params, const int *const x_min,
-                          const int *const x_max, const int *const y_min,
-                          const int *const y_max, int pano_x_min,
-                          int pano_x_max, int pano_y_min, int pano_y_max,
-                          YV12_BUFFER_CONFIG *panorama) {
-  const int width = pano_x_max - pano_x_min + 1;
-  const int height = pano_y_max - pano_y_min + 1;
-
-  // Create temp_pano[y][x][num_frames] stack of pixel values
-  YuvPixel ***temp_pano = aom_malloc(height * sizeof(*temp_pano));
-  for (int i = 0; i < height; ++i) {
-    temp_pano[i] = aom_malloc(width * sizeof(**temp_pano));
-    for (int j = 0; j < width; ++j) {
-      temp_pano[i][j] = aom_malloc(num_frames * sizeof(***temp_pano));
-    }
-  }
-  // Create count[y][x] to count how many values in stack for median filtering
-  int **count = aom_malloc(height * sizeof(*count));
-  for (int i = 0; i < height; ++i) {
-    count[i] = aom_calloc(width, sizeof(**count));  // counts initialized to 0
-  }
-
-  // Re-sample images onto panorama (pre-median filtering).
+static void build_image_stack(YV12_BUFFER_CONFIG **const frames,
+                              const int num_frames, const double **const params,
+                              const int *const x_min, const int *const x_max,
+                              const int *const y_min, const int *const y_max,
+                              int pano_x_min, int pano_y_min,
+                              YuvPixel ***img_stack) {
+  // Re-sample images onto panorama (pre-filtering).
   const int x_offset = -pano_x_min;
   const int y_offset = -pano_y_min;
   const int frame_width = frames[0]->y_width;
@@ -376,24 +348,19 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
 
 #if CONFIG_HIGHBITDEPTH
           if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                (uint16_t)interpolated_yvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                (uint16_t)interpolated_uvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                (uint16_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].y = (uint16_t)interpolated_yvalue;
+            img_stack[pano_y][pano_x][i].u = (uint16_t)interpolated_uvalue;
+            img_stack[pano_y][pano_x][i].v = (uint16_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].exists = 1;
           } else {
 #endif  // CONFIG_HIGHBITDEPTH
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                (uint8_t)interpolated_yvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                (uint8_t)interpolated_uvalue;
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                (uint8_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].y = (uint8_t)interpolated_yvalue;
+            img_stack[pano_y][pano_x][i].u = (uint8_t)interpolated_uvalue;
+            img_stack[pano_y][pano_x][i].v = (uint8_t)interpolated_vvalue;
+            img_stack[pano_y][pano_x][i].exists = 1;
 #if CONFIG_HIGHBITDEPTH
           }
 #endif  // CONFIG_HIGHBITDEPTH
-          ++count[pano_y][pano_x];
         } else if (image_x >= 0 && image_x < frame_width && image_y >= 0 &&
                    image_y < frame_height) {
           // Place in panorama stack.
@@ -406,104 +373,405 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
               (image_x >> frames[i]->subsampling_x);
 #if CONFIG_HIGHBITDEPTH
           if (frames[i]->flags & YV12_FLAG_HIGHBITDEPTH) {
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                y_buffer16[ychannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                u_buffer16[uvchannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                v_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].y = y_buffer16[ychannel_idx];
+            img_stack[pano_y][pano_x][i].u = u_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].v = v_buffer16[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].exists = 1;
           } else {
 #endif  // CONFIG_HIGHBITDEPTH
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].y =
-                frames[i]->y_buffer[ychannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].u =
-                frames[i]->u_buffer[uvchannel_idx];
-            temp_pano[pano_y][pano_x][count[pano_y][pano_x]].v =
-                frames[i]->v_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].y = frames[i]->y_buffer[ychannel_idx];
+            img_stack[pano_y][pano_x][i].u = frames[i]->u_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].v = frames[i]->v_buffer[uvchannel_idx];
+            img_stack[pano_y][pano_x][i].exists = 1;
 #if CONFIG_HIGHBITDEPTH
           }
 #endif  // CONFIG_HIGHBITDEPTH
-          ++count[pano_y][pano_x];
         }
       }
     }
   }
+}
 
-#if BGSPRITE_BLENDING_MODE == 1
-  // Apply mean filtering and store result in temp_pano[y][x][0].
+#if BGSPRITE_BLENDING_MODE == 0
+// swaps two YuvPixels.
+static void swap_yuv(YuvPixel *a, YuvPixel *b) {
+  const YuvPixel temp = *b;
+  *b = *a;
+  *a = temp;
+}
+
+// Partitions array to find pivot index in qselect.
+static int partition(YuvPixel arr[], int left, int right, int pivot_idx) {
+  YuvPixel pivot = arr[pivot_idx];
+
+  // Move pivot to the end.
+  swap_yuv(&arr[pivot_idx], &arr[right]);
+
+  int p_idx = left;
+  for (int i = left; i < right; ++i) {
+    if (arr[i].y <= pivot.y) {
+      swap_yuv(&arr[i], &arr[p_idx]);
+      p_idx++;
+    }
+  }
+
+  swap_yuv(&arr[p_idx], &arr[right]);
+
+  return p_idx;
+}
+
+// Returns the kth element in array, partially sorted in place (quickselect).
+static YuvPixel qselect(YuvPixel arr[], int left, int right, int k) {
+  if (left >= right) {
+    return arr[left];
+  }
+  unsigned int seed = (int)time(NULL);
+  int pivot_idx = left + rand_r(&seed) % (right - left + 1);
+  pivot_idx = partition(arr, left, right, pivot_idx);
+
+  if (k == pivot_idx) {
+    return arr[k];
+  } else if (k < pivot_idx) {
+    return qselect(arr, left, pivot_idx - 1, k);
+  } else {
+    return qselect(arr, pivot_idx + 1, right, k);
+  }
+}
+
+// Blends image stack together using a temporal median.
+static void blend_median(const int width, const int height,
+                         const int num_frames, const YuvPixel ***image_stack,
+                         YuvPixel **blended_img) {
+  // Allocate stack of pixels
+  YuvPixel *pixel_stack = aom_calloc(num_frames, sizeof(*pixel_stack));
+
+  // Apply median filtering using quickselect.
   for (int y = 0; y < height; ++y) {
     for (int x = 0; x < width; ++x) {
-      if (count[y][x] == 0) {
+      int count = 0;
+      for (int i = 0; i < num_frames; ++i) {
+        if (image_stack[y][x][i].exists) {
+          pixel_stack[count] = image_stack[y][x][i];
+          ++count;
+        }
+      }
+      if (count == 0) {
         // Just make the pixel black.
         // TODO(toddnguyen): Color the pixel with nearest neighbor
+        blended_img[y][x].exists = 0;
       } else {
-        // Find
-        uint32_t y_sum = 0;
-        uint32_t u_sum = 0;
-        uint32_t v_sum = 0;
-        for (int i = 0; i < count[y][x]; ++i) {
-          y_sum += temp_pano[y][x][i].y;
-          u_sum += temp_pano[y][x][i].u;
-          v_sum += temp_pano[y][x][i].v;
+        const int median_idx = (int)floor(count / 2);
+        YuvPixel median = qselect(pixel_stack, 0, count - 1, median_idx);
+
+        // Make the median value the 0th index for UV subsampling later
+        blended_img[y][x] = median;
+        blended_img[y][x].exists = 1;
+      }
+    }
+  }
+
+  aom_free(pixel_stack);
+}
+#endif  // BGSPRITE_BLENDING_MODE == 0
+
+#if BGSPRITE_BLENDING_MODE == 1
+// Blends image stack together using a temporal mean.
+static void blend_mean(const int width, const int height, const int num_frames,
+                       const YuvPixel ***image_stack, YuvPixel **blended_img,
+                       int highbitdepth) {
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      // Find
+      uint32_t y_sum = 0;
+      uint32_t u_sum = 0;
+      uint32_t v_sum = 0;
+      uint32_t count = 0;
+      for (int i = 0; i < num_frames; ++i) {
+        if (image_stack[y][x][i].exists) {
+          y_sum += image_stack[y][x][i].y;
+          u_sum += image_stack[y][x][i].u;
+          v_sum += image_stack[y][x][i].v;
+          ++count;
         }
+      }
 
-        const uint32_t unsigned_count = (uint32_t)count[y][x];
+#if BGSPRITE_MEAN_REMOVE_OUTLIERS
+      if (count > 1) {
+        double stdev = 0;
+        double y_mean = (double)y_sum / count;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists) {
+            stdev += pow(y_mean - image_stack[y][x][i].y, 2);
+          }
+        }
+        stdev = sqrt(stdev / count);
+
+        uint32_t inlier_y_sum = 0;
+        uint32_t inlier_u_sum = 0;
+        uint32_t inlier_v_sum = 0;
+        uint32_t inlier_count = 0;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists &&
+              fabs(image_stack[y][x][i].y - y_mean) <= 1.5 * stdev) {
+            inlier_y_sum += image_stack[y][x][i].y;
+            inlier_u_sum += image_stack[y][x][i].u;
+            inlier_v_sum += image_stack[y][x][i].v;
+            ++inlier_count;
+          }
+        }
+        count = inlier_count;
+        y_sum = inlier_y_sum;
+        u_sum = inlier_u_sum;
+        v_sum = inlier_v_sum;
+      }
+#endif  // BGSPRITE_MEAN_REMOVE_OUTLIERS
 
+      if (count != 0) {
+        blended_img[y][x].exists = 1;
 #if CONFIG_HIGHBITDEPTH
-        if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
-          temp_pano[y][x][0].y = (uint16_t)OD_DIVU(y_sum, unsigned_count);
-          temp_pano[y][x][0].u = (uint16_t)OD_DIVU(u_sum, unsigned_count);
-          temp_pano[y][x][0].v = (uint16_t)OD_DIVU(v_sum, unsigned_count);
+        if (highbitdepth) {
+          blended_img[y][x].y = (uint16_t)OD_DIVU(y_sum, count);
+          blended_img[y][x].u = (uint16_t)OD_DIVU(u_sum, count);
+          blended_img[y][x].v = (uint16_t)OD_DIVU(v_sum, count);
         } else {
 #endif  // CONFIG_HIGHBITDEPTH
-          temp_pano[y][x][0].y = (uint8_t)OD_DIVU(y_sum, unsigned_count);
-          temp_pano[y][x][0].u = (uint8_t)OD_DIVU(u_sum, unsigned_count);
-          temp_pano[y][x][0].v = (uint8_t)OD_DIVU(v_sum, unsigned_count);
+          (void)highbitdepth;
+          blended_img[y][x].y = (uint8_t)OD_DIVU(y_sum, count);
+          blended_img[y][x].u = (uint8_t)OD_DIVU(u_sum, count);
+          blended_img[y][x].v = (uint8_t)OD_DIVU(v_sum, count);
 #if CONFIG_HIGHBITDEPTH
         }
 #endif  // CONFIG_HIGHBITDEPTH
+      } else {
+        blended_img[y][x].exists = 0;
       }
     }
   }
-#else
-  // Apply median filtering using quickselect.
-  for (int y = 0; y < height; ++y) {
-    for (int x = 0; x < width; ++x) {
-      if (count[y][x] == 0) {
-        // Just make the pixel black.
-        // TODO(toddnguyen): Color the pixel with nearest neighbor
+}
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
+#if BGSPRITE_ENABLE_SEGMENTATION
+// Builds dual-mode single gaussian model from image stack.
+static void build_gaussian(const YuvPixel ***image_stack, const int num_frames,
+                           const int width, const int height,
+                           const int x_block_width, const int y_block_height,
+                           const int block_size, YuvPixelGaussian **gauss) {
+  const double initial_variance = 10.0;
+  const double s_theta = 2.0;
+
+  // Add images to dual-mode single gaussian model
+  for (int y_block = 0; y_block < y_block_height; ++y_block) {
+    for (int x_block = 0; x_block < x_block_width; ++x_block) {
+      // Process all blocks.
+      YuvPixelGaussian *model = &gauss[y_block][x_block];
+
+      // Process all frames.
+      for (int i = 0; i < num_frames; ++i) {
+        // Add block to the Gaussian model.
+        double max_variance[2] = { 0.0, 0.0 };
+        double temp_y_mean = 0.0;
+        double temp_u_mean = 0.0;
+        double temp_v_mean = 0.0;
+
+        // Find mean/variance of a block of pixels.
+        int temp_count = 0;
+        for (int sub_y = 0; sub_y < block_size; ++sub_y) {
+          for (int sub_x = 0; sub_x < block_size; ++sub_x) {
+            const int y = y_block * block_size + sub_y;
+            const int x = x_block * block_size + sub_x;
+            if (y < height && x < width && image_stack[y][x][i].exists) {
+              ++temp_count;
+              temp_y_mean += (double)image_stack[y][x][i].y;
+              temp_u_mean += (double)image_stack[y][x][i].u;
+              temp_v_mean += (double)image_stack[y][x][i].v;
+
+              const double variance_0 =
+                  pow((double)image_stack[y][x][i].y - model->mean[0], 2);
+              const double variance_1 =
+                  pow((double)image_stack[y][x][i].y - model->mean[1], 2);
+
+              if (variance_0 > max_variance[0]) {
+                max_variance[0] = variance_0;
+              }
+              if (variance_1 > max_variance[1]) {
+                max_variance[1] = variance_1;
+              }
+            }
+          }
+        }
+
+        // If pixels exist in the block, add to the model.
+        if (temp_count > 0) {
+          assert(temp_count <= block_size * block_size);
+          temp_y_mean /= temp_count;
+          temp_u_mean /= temp_count;
+          temp_v_mean /= temp_count;
+
+          // Switch the background model to the oldest model.
+          if (model->age[0] > model->age[1]) {
+            model->curr_model = 0;
+          } else if (model->age[1] > model->age[0]) {
+            model->curr_model = 1;
+          }
+
+          // If model is empty, initialize model.
+          if (model->age[model->curr_model] == 0) {
+            model->mean[model->curr_model] = temp_y_mean;
+            model->u_mean[model->curr_model] = temp_u_mean;
+            model->v_mean[model->curr_model] = temp_v_mean;
+            model->var[model->curr_model] = initial_variance;
+            model->age[model->curr_model] = 1;
+          } else {
+            // Constants for current model and foreground model (0 or 1).
+            const int opposite = 1 - model->curr_model;
+            const int current = model->curr_model;
+            const double j = i;
+
+            // Put block into the appropriate model.
+            if (pow(temp_y_mean - model->mean[current], 2) <
+                s_theta * model->var[current]) {
+              // Add block to the current background model
+              model->age[current] += 1;
+              const double prev_weight = 1 / j;
+              const double curr_weight = (j - 1) / j;
+              model->mean[current] = prev_weight * model->mean[current] +
+                                     curr_weight * temp_y_mean;
+              model->u_mean[current] = prev_weight * model->u_mean[current] +
+                                       curr_weight * temp_u_mean;
+              model->v_mean[current] = prev_weight * model->v_mean[current] +
+                                       curr_weight * temp_v_mean;
+              model->var[current] = prev_weight * model->var[current] +
+                                    curr_weight * max_variance[current];
+            } else {
+              // Block does not fit into current background candidate. Add to
+              // foreground candidate and reinitialize if necessary.
+              const double var_fg = pow(temp_y_mean - model->mean[opposite], 2);
+
+              if (var_fg <= s_theta * model->var[opposite]) {
+                model->age[opposite] += 1;
+                const double prev_weight = 1 / j;
+                const double curr_weight = (j - 1) / j;
+                model->mean[opposite] = prev_weight * model->mean[opposite] +
+                                        curr_weight * temp_y_mean;
+                model->u_mean[opposite] =
+                    prev_weight * model->u_mean[opposite] +
+                    curr_weight * temp_u_mean;
+                model->v_mean[opposite] =
+                    prev_weight * model->v_mean[opposite] +
+                    curr_weight * temp_v_mean;
+                model->var[opposite] = prev_weight * model->var[opposite] +
+                                       curr_weight * max_variance[opposite];
+              } else if (model->age[opposite] == 0 ||
+                         var_fg > s_theta * model->var[opposite]) {
+                model->mean[opposite] = temp_y_mean;
+                model->u_mean[opposite] = temp_u_mean;
+                model->v_mean[opposite] = temp_v_mean;
+                model->var[opposite] = initial_variance;
+                model->age[opposite] = 1;
+              } else {
+                // This case should never happen.
+                assert(0);
+              }
+            }
+          }
+        }
+      }
+
+      // Select the oldest candidate as the background model.
+      if (model->age[0] == 0 && model->age[1] == 0) {
+        model->y = 0;
+        model->u = 0;
+        model->v = 0;
+        model->final_var = 0;
+      } else if (model->age[0] > model->age[1]) {
+        model->y = (uint8_t)model->mean[0];
+        model->u = (uint8_t)model->u_mean[0];
+        model->v = (uint8_t)model->v_mean[0];
+        model->final_var = model->var[0];
       } else {
-        // Find
-        const int median_idx = (int)floor(count[y][x] / 2);
-        YuvPixel median =
-            qselect(temp_pano[y][x], 0, count[y][x] - 1, median_idx);
+        model->y = (uint8_t)model->mean[1];
+        model->u = (uint8_t)model->u_mean[1];
+        model->v = (uint8_t)model->v_mean[1];
+        model->final_var = model->var[1];
+      }
+    }
+  }
+}
 
-        // Make the median value the 0th index for UV subsampling later
-        temp_pano[y][x][0] = median;
-        assert(median.y == temp_pano[y][x][0].y &&
-               median.u == temp_pano[y][x][0].u &&
-               median.v == temp_pano[y][x][0].v);
+// Builds foreground mask based on reference image and gaussian model.
+// In mask[][], 1 is foreground and 0 is background.
+static void build_mask(const int x_min, const int y_min, const int x_offset,
+                       const int y_offset, const int x_block_width,
+                       const int y_block_height, const int block_size,
+                       const YuvPixelGaussian **gauss,
+                       YV12_BUFFER_CONFIG *const reference,
+                       YV12_BUFFER_CONFIG *const panorama, uint8_t **mask) {
+  const int crop_x_offset = x_min + x_offset;
+  const int crop_y_offset = y_min + y_offset;
+  const double d_theta = 4.0;
+
+  for (int y_block = 0; y_block < y_block_height; ++y_block) {
+    for (int x_block = 0; x_block < x_block_width; ++x_block) {
+      // Create mask to determine if ARF is background for foreground.
+      const YuvPixelGaussian *model = &gauss[y_block][x_block];
+      double temp_y_mean = 0.0;
+      int temp_count = 0;
+
+      for (int sub_y = 0; sub_y < block_size; ++sub_y) {
+        for (int sub_x = 0; sub_x < block_size; ++sub_x) {
+          // x and y are panorama coordinates.
+          const int y = y_block * block_size + sub_y;
+          const int x = x_block * block_size + sub_x;
+
+          const int arf_y = y - crop_y_offset;
+          const int arf_x = x - crop_x_offset;
+
+          if (arf_y >= 0 && arf_y < panorama->y_height && arf_x >= 0 &&
+              arf_x < panorama->y_width) {
+            ++temp_count;
+            const int ychannel_idx = arf_y * panorama->y_stride + arf_x;
+            temp_y_mean += (double)reference->y_buffer[ychannel_idx];
+          }
+        }
+      }
+      if (temp_count > 0) {
+        assert(temp_count <= block_size * block_size);
+        temp_y_mean /= temp_count;
+
+        if (pow(temp_y_mean - model->y, 2) > model->final_var * d_theta) {
+          // Mark block as foreground.
+          mask[y_block][x_block] = 1;
+        }
       }
     }
   }
-#endif  // BGSPRITE_BLENDING_MODE == 1
+}
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
 
-  // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at
-  // the same size as the frames. For now, we crop the generated panorama.
-  // assert(panorama->y_width < width && panorama->y_height < height);
+// Resamples blended_img into panorama, including UV subsampling.
+static void resample_panorama(YuvPixel **blended_img, const int center_idx,
+                              const int *const x_min, const int *const y_min,
+                              int pano_x_min, int pano_x_max, int pano_y_min,
+                              int pano_y_max, YV12_BUFFER_CONFIG *panorama) {
+  const int width = pano_x_max - pano_x_min + 1;
+  const int height = pano_y_max - pano_y_min + 1;
+  const int x_offset = -pano_x_min;
+  const int y_offset = -pano_y_min;
   const int crop_x_offset = x_min[center_idx] + x_offset;
   const int crop_y_offset = y_min[center_idx] + y_offset;
-
 #if CONFIG_HIGHBITDEPTH
   if (panorama->flags & YV12_FLAG_HIGHBITDEPTH) {
     // Use median Y value.
     uint16_t *pano_y_buffer16 = CONVERT_TO_SHORTPTR(panorama->y_buffer);
+    uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer);
+    uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer);
+
     for (int y = 0; y < panorama->y_height; ++y) {
       for (int x = 0; x < panorama->y_width; ++x) {
         const int ychannel_idx = y * panorama->y_stride + x;
-        if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+        if (blended_img[y + crop_y_offset][x + crop_x_offset].exists) {
           pano_y_buffer16[ychannel_idx] =
-              temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+              blended_img[y + crop_y_offset][x + crop_x_offset].y;
         } else {
           pano_y_buffer16[ychannel_idx] = 0;
         }
@@ -511,9 +779,6 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
     }
 
     // UV subsampling with median UV values
-    uint16_t *pano_u_buffer16 = CONVERT_TO_SHORTPTR(panorama->u_buffer);
-    uint16_t *pano_v_buffer16 = CONVERT_TO_SHORTPTR(panorama->v_buffer);
-
     for (int y = 0; y < panorama->uv_height; ++y) {
       for (int x = 0; x < panorama->uv_width; ++x) {
         uint32_t avg_count = 0;
@@ -526,9 +791,9 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
             int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
             int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
             if (y_sample > 0 && y_sample < height && x_sample > 0 &&
-                x_sample < width && count[y_sample][x_sample] > 0) {
-              u_sum += temp_pano[y_sample][x_sample][0].u;
-              v_sum += temp_pano[y_sample][x_sample][0].v;
+                x_sample < width && blended_img[y_sample][x_sample].exists) {
+              u_sum += blended_img[y_sample][x_sample].u;
+              v_sum += blended_img[y_sample][x_sample].v;
               avg_count++;
             }
           }
@@ -546,35 +811,36 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
     }
   } else {
 #endif  // CONFIG_HIGHBITDEPTH
-    // Use median Y value.
+    // Use blended Y value.
     for (int y = 0; y < panorama->y_height; ++y) {
       for (int x = 0; x < panorama->y_width; ++x) {
         const int ychannel_idx = y * panorama->y_stride + x;
-        if (count[y + crop_y_offset][x + crop_x_offset] > 0) {
+        // Use filtered background.
+        if (blended_img[y + crop_y_offset][x + crop_x_offset].exists) {
           panorama->y_buffer[ychannel_idx] =
-              temp_pano[y + crop_y_offset][x + crop_x_offset][0].y;
+              blended_img[y + crop_y_offset][x + crop_x_offset].y;
         } else {
           panorama->y_buffer[ychannel_idx] = 0;
         }
       }
     }
 
-    // UV subsampling with median UV values
+    // UV subsampling with blended UV values.
     for (int y = 0; y < panorama->uv_height; ++y) {
       for (int x = 0; x < panorama->uv_width; ++x) {
         uint16_t avg_count = 0;
         uint16_t u_sum = 0;
         uint16_t v_sum = 0;
 
-        // Look at surrounding pixels for subsampling
+        // Look at surrounding pixels for subsampling.
         for (int s_x = 0; s_x < panorama->subsampling_x + 1; ++s_x) {
           for (int s_y = 0; s_y < panorama->subsampling_y + 1; ++s_y) {
             int y_sample = crop_y_offset + (y << panorama->subsampling_y) + s_y;
             int x_sample = crop_x_offset + (x << panorama->subsampling_x) + s_x;
             if (y_sample > 0 && y_sample < height && x_sample > 0 &&
-                x_sample < width && count[y_sample][x_sample] > 0) {
-              u_sum += temp_pano[y_sample][x_sample][0].u;
-              v_sum += temp_pano[y_sample][x_sample][0].v;
+                x_sample < width && blended_img[y_sample][x_sample].exists) {
+              u_sum += blended_img[y_sample][x_sample].u;
+              v_sum += blended_img[y_sample][x_sample].v;
               avg_count++;
             }
           }
@@ -595,19 +861,266 @@ static void stitch_images(YV12_BUFFER_CONFIG **const frames,
 #if CONFIG_HIGHBITDEPTH
   }
 #endif  // CONFIG_HIGHBITDEPTH
+}
 
+#if BGSPRITE_ENABLE_SEGMENTATION
+// Combines temporal filter output and bgsprite output to make final ARF output
+static void combine_arf(YV12_BUFFER_CONFIG *const temporal_arf,
+                        YV12_BUFFER_CONFIG *const bgsprite,
+                        uint8_t **const mask, const int block_size,
+                        const int x_offset, const int y_offset,
+                        YV12_BUFFER_CONFIG *target) {
+  const int height = temporal_arf->y_height;
+  const int width = temporal_arf->y_width;
+
+  YuvPixel **blended_img = aom_malloc(height * sizeof(*blended_img));
   for (int i = 0; i < height; ++i) {
+    blended_img[i] = aom_malloc(width * sizeof(**blended_img));
+  }
+
+  const int block_2_height = (height / BGSPRITE_MASK_BLOCK_SIZE) +
+                             (height % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+  const int block_2_width = (width / BGSPRITE_MASK_BLOCK_SIZE) +
+                            (width % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+
+  for (int block_y = 0; block_y < block_2_height; ++block_y) {
+    for (int block_x = 0; block_x < block_2_width; ++block_x) {
+      int count = 0;
+      int total = 0;
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int img_y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int img_x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
+          const int mask_y = (y_offset + img_y) / block_size;
+          const int mask_x = (x_offset + img_x) / block_size;
+
+          if (img_y < height && img_x < width) {
+            if (mask[mask_y][mask_x]) {
+              ++count;
+            }
+            ++total;
+          }
+        }
+      }
+
+      const double threshold = 0.30;
+      const int amount = (int)(threshold * total);
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
+          if (y < height && x < width) {
+            blended_img[y][x].exists = 1;
+            const int ychannel_idx = y * temporal_arf->y_stride + x;
+            const int uvchannel_idx =
+                (y >> temporal_arf->subsampling_y) * temporal_arf->uv_stride +
+                (x >> temporal_arf->subsampling_x);
+
+            if (count > amount) {
+// Foreground; use temporal arf.
+#if CONFIG_HIGHBITDEPTH
+              if (temporal_arf->flags & YV12_FLAG_HIGHBITDEPTH) {
+                uint16_t *pano_y_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->y_buffer);
+                uint16_t *pano_u_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->u_buffer);
+                uint16_t *pano_v_buffer16 =
+                    CONVERT_TO_SHORTPTR(temporal_arf->v_buffer);
+                blended_img[y][x].y = pano_y_buffer16[ychannel_idx];
+                blended_img[y][x].u = pano_u_buffer16[uvchannel_idx];
+                blended_img[y][x].v = pano_v_buffer16[uvchannel_idx];
+              } else {
+#endif  // CONFIG_HIGHBITDEPTH
+                blended_img[y][x].y = temporal_arf->y_buffer[ychannel_idx];
+                blended_img[y][x].u = temporal_arf->u_buffer[uvchannel_idx];
+                blended_img[y][x].v = temporal_arf->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+              }
+#endif  // CONFIG_HIGHBITDEPTH
+            } else {
+// Background; use bgsprite arf.
+#if CONFIG_HIGHBITDEPTH
+              if (bgsprite->flags & YV12_FLAG_HIGHBITDEPTH) {
+                uint16_t *pano_y_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->y_buffer);
+                uint16_t *pano_u_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->u_buffer);
+                uint16_t *pano_v_buffer16 =
+                    CONVERT_TO_SHORTPTR(bgsprite->v_buffer);
+                blended_img[y][x].y = pano_y_buffer16[ychannel_idx];
+                blended_img[y][x].u = pano_u_buffer16[uvchannel_idx];
+                blended_img[y][x].v = pano_v_buffer16[uvchannel_idx];
+              } else {
+#endif  // CONFIG_HIGHBITDEPTH
+                blended_img[y][x].y = bgsprite->y_buffer[ychannel_idx];
+                blended_img[y][x].u = bgsprite->u_buffer[uvchannel_idx];
+                blended_img[y][x].v = bgsprite->v_buffer[uvchannel_idx];
+#if CONFIG_HIGHBITDEPTH
+              }
+#endif  // CONFIG_HIGHBITDEPTH
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const int x_min = 0;
+  const int y_min = 0;
+  resample_panorama(blended_img, 0, &x_min, &y_min, 0, width - 1, 0, height - 1,
+                    target);
+
+  for (int i = 0; i < height; ++i) {
+    aom_free(blended_img[i]);
+  }
+  aom_free(blended_img);
+}
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
+
+// Stitches images together to create ARF and stores it in 'panorama'.
+static void stitch_images(AV1_COMP *cpi, YV12_BUFFER_CONFIG **const frames,
+                          const int num_frames, const int distance,
+                          const int center_idx, const double **const params,
+                          const int *const x_min, const int *const x_max,
+                          const int *const y_min, const int *const y_max,
+                          int pano_x_min, int pano_x_max, int pano_y_min,
+                          int pano_y_max, YV12_BUFFER_CONFIG *panorama) {
+  const int width = pano_x_max - pano_x_min + 1;
+  const int height = pano_y_max - pano_y_min + 1;
+
+  // Create pano_stack[y][x][num_frames] stack of pixel values
+  YuvPixel ***pano_stack = aom_malloc(height * sizeof(*pano_stack));
+  for (int i = 0; i < height; ++i) {
+    pano_stack[i] = aom_malloc(width * sizeof(**pano_stack));
     for (int j = 0; j < width; ++j) {
-      aom_free(temp_pano[i][j]);
+      pano_stack[i][j] = aom_calloc(num_frames, sizeof(***pano_stack));
     }
-    aom_free(temp_pano[i]);
-    aom_free(count[i]);
   }
-  aom_free(count);
-  aom_free(temp_pano);
+
+  build_image_stack(frames, num_frames, params, x_min, x_max, y_min, y_max,
+                    pano_x_min, pano_y_min, pano_stack);
+
+  // Create blended_img[y][x] of combined panorama pixel values.
+  YuvPixel **blended_img = aom_malloc(height * sizeof(*blended_img));
+  for (int i = 0; i < height; ++i) {
+    blended_img[i] = aom_malloc(width * sizeof(**blended_img));
+  }
+
+// Blending and saving result in blended_img.
+#if BGSPRITE_BLENDING_MODE == 1
+  blend_mean(width, height, num_frames, (const YuvPixel ***)pano_stack,
+             blended_img, panorama->flags & YV12_FLAG_HIGHBITDEPTH);
+#else   // BGSPRITE_BLENDING_MODE != 1
+  blend_median(width, height, num_frames, (const YuvPixel ***)pano_stack,
+               blended_img);
+#endif  // BGSPRITE_BLENDING_MODE == 1
+
+  // NOTE(toddnguyen): Right now the ARF in the cpi struct is fixed size at
+  // the same size as the frames. For now, we crop the generated panorama.
+  assert(panorama->y_width <= width && panorama->y_height <= height);
+
+  // Resamples the blended_img into the panorama buffer.
+  YV12_BUFFER_CONFIG bgsprite;
+  memset(&bgsprite, 0, sizeof(bgsprite));
+  aom_alloc_frame_buffer(&bgsprite, frames[0]->y_width, frames[0]->y_height,
+                         frames[0]->subsampling_x, frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &bgsprite);
+  bgsprite.bit_depth = frames[0]->bit_depth;
+  resample_panorama(blended_img, center_idx, x_min, y_min, pano_x_min,
+                    pano_x_max, pano_y_min, pano_y_max, &bgsprite);
+
+#if BGSPRITE_ENABLE_SEGMENTATION
+  YV12_BUFFER_CONFIG temporal_bgsprite;
+  memset(&temporal_bgsprite, 0, sizeof(temporal_bgsprite));
+  aom_alloc_frame_buffer(&temporal_bgsprite, frames[0]->y_width,
+                         frames[0]->y_height, frames[0]->subsampling_x,
+                         frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &temporal_bgsprite);
+  temporal_bgsprite.bit_depth = frames[0]->bit_depth;
+
+  av1_temporal_filter(cpi, &bgsprite, &temporal_bgsprite, distance);
+
+  // Block size constants for gaussian model.
+  const int N_1 = 2;
+  const int y_block_height = (height / N_1) + (height % N_1 != 0 ? 1 : 0);
+  const int x_block_width = (width / N_1) + (height % N_1 != 0 ? 1 : 0);
+  YuvPixelGaussian **gauss = aom_malloc(y_block_height * sizeof(*gauss));
+  for (int i = 0; i < y_block_height; ++i) {
+    gauss[i] = aom_calloc(x_block_width, sizeof(**gauss));
+  }
+
+  // Build Gaussian model.
+  build_gaussian((const YuvPixel ***)pano_stack, num_frames, width, height,
+                 x_block_width, y_block_height, N_1, gauss);
+
+  // Select background model and build foreground mask.
+  uint8_t **mask = aom_malloc(y_block_height * sizeof(*mask));
+  for (int i = 0; i < y_block_height; ++i) {
+    mask[i] = aom_calloc(x_block_width, sizeof(**mask));
+  }
+
+  const int x_offset = -pano_x_min;
+  const int y_offset = -pano_y_min;
+  build_mask(x_min[center_idx], y_min[center_idx], x_offset, y_offset,
+             x_block_width, y_block_height, N_1,
+             (const YuvPixelGaussian **)gauss,
+             (YV12_BUFFER_CONFIG * const) frames[center_idx], panorama, mask);
+
+  YV12_BUFFER_CONFIG temporal_arf;
+  memset(&temporal_arf, 0, sizeof(temporal_arf));
+  aom_alloc_frame_buffer(&temporal_arf, frames[0]->y_width, frames[0]->y_height,
+                         frames[0]->subsampling_x, frames[0]->subsampling_y,
+#if CONFIG_HIGHBITDEPTH
+                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif
+                         frames[0]->border, 0);
+  aom_yv12_copy_frame(frames[0], &temporal_arf);
+  temporal_arf.bit_depth = frames[0]->bit_depth;
+  av1_temporal_filter(cpi, NULL, &temporal_arf, distance);
+
+  combine_arf(&temporal_arf, &temporal_bgsprite, mask, N_1, x_offset, y_offset,
+              panorama);
+
+  aom_free_frame_buffer(&temporal_arf);
+  aom_free_frame_buffer(&temporal_bgsprite);
+  for (int i = 0; i < y_block_height; ++i) {
+    aom_free(gauss[i]);
+    aom_free(mask[i]);
+  }
+  aom_free(gauss);
+  aom_free(mask);
+#else   // !BGSPRITE_ENABLE_SEGMENTATION
+  av1_temporal_filter(cpi, &bgsprite, panorama, distance);
+#endif  // BGSPRITE_ENABLE_SEGMENTATION
+
+  aom_free_frame_buffer(&bgsprite);
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
+      aom_free(pano_stack[i][j]);
+    }
+    aom_free(pano_stack[i]);
+    aom_free(blended_img[i]);
+  }
+  aom_free(pano_stack);
+  aom_free(blended_img);
 }
 
 int av1_background_sprite(AV1_COMP *cpi, int distance) {
+#if BGSPRITE_ENABLE_METRICS
+  // Do temporal filter if firstpass stats disable bgsprite.
+  if (!cpi->bgsprite_allowed) {
+    return 1;
+  }
+#endif  // BGSPRITE_ENABLE_METRICS
+
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
   static const double identity_params[MAX_PARAMDIM - 1] = {
     0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
@@ -626,7 +1139,6 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
 #if CONFIG_EXT_REFS
   const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
   if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
-    cpi->alt_ref_buffer = av1_lookahead_peek(cpi->lookahead, distance)->img;
     cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
     frames_fwd = 0;
     frames_bwd = 0;
@@ -646,17 +1158,6 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
     frames[frames_to_stitch - 1 - frame] = &buf->img;
   }
 
-  YV12_BUFFER_CONFIG temp_bg;
-  memset(&temp_bg, 0, sizeof(temp_bg));
-  aom_alloc_frame_buffer(&temp_bg, frames[0]->y_width, frames[0]->y_height,
-                         frames[0]->subsampling_x, frames[0]->subsampling_y,
-#if CONFIG_HIGHBITDEPTH
-                         frames[0]->flags & YV12_FLAG_HIGHBITDEPTH,
-#endif
-                         frames[0]->border, 0);
-  aom_yv12_copy_frame(frames[0], &temp_bg);
-  temp_bg.bit_depth = frames[0]->bit_depth;
-
   // Allocate empty arrays for parameters between frames.
   double **params = aom_malloc(frames_to_stitch * sizeof(*params));
   for (int i = 0; i < frames_to_stitch; ++i) {
@@ -664,9 +1165,10 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
     memcpy(params[i], identity_params, sizeof(identity_params));
   }
 
-  // Use global motion to find affine transformations between frames.
-  // params[i] will have the transform from frame[i] to frame[i-1].
-  // params[0] will have the identity matrix because it has no previous frame.
+// Use global motion to find affine transformations between frames.
+// params[i] will have the transform from frame[i] to frame[i-1].
+// params[0] will have the identity matrix (has no previous frame).
+#if BGSPRITE_ENABLE_GME
   TransformationType model = AFFINE;
   int inliers_by_motion[RANSAC_NUM_MOTIONS];
   for (int frame = 0; frame < frames_to_stitch - 1; ++frame) {
@@ -686,6 +1188,7 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
       return 1;
     }
   }
+#endif  // BGSPRITE_ENABLE_GME
 
   // Compound the transformation parameters.
   for (int i = 1; i < frames_to_stitch; ++i) {
@@ -702,7 +1205,7 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
   int *y_max = aom_malloc(frames_to_stitch * sizeof(*y_max));
   int *y_min = aom_malloc(frames_to_stitch * sizeof(*y_min));
 
-  find_limits(cpi->initial_width, cpi->initial_height,
+  find_limits(frames[0]->y_width, frames[0]->y_height,
               (const double **const)params, frames_to_stitch, x_min, x_max,
               y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
 
@@ -721,20 +1224,17 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
   }
 
   // Recompute frame limits for new adjusted center.
-  find_limits(cpi->initial_width, cpi->initial_height,
+  find_limits(frames[0]->y_width, frames[0]->y_height,
               (const double **const)params, frames_to_stitch, x_min, x_max,
               y_min, y_max, &pano_x_min, &pano_x_max, &pano_y_min, &pano_y_max);
 
-  // Stitch Images.
-  stitch_images(frames, frames_to_stitch, center_idx,
+  // Stitch Images and apply bgsprite filter.
+  stitch_images(cpi, frames, frames_to_stitch, distance, center_idx,
                 (const double **const)params, x_min, x_max, y_min, y_max,
-                pano_x_min, pano_x_max, pano_y_min, pano_y_max, &temp_bg);
-
-  // Apply temporal filter.
-  av1_temporal_filter(cpi, &temp_bg, distance);
+                pano_x_min, pano_x_max, pano_y_min, pano_y_max,
+                &cpi->alt_ref_buffer);
 
   // Free memory.
-  aom_free_frame_buffer(&temp_bg);
   for (int i = 0; i < frames_to_stitch; ++i) {
     aom_free(params[i]);
   }
@@ -746,3 +1246,12 @@ int av1_background_sprite(AV1_COMP *cpi, int distance) {
 
   return 0;
 }
+
+#undef _POSIX_C_SOURCE
+#undef BGSPRITE_BLENDING_MODE
+#undef BGSPRITE_INTERPOLATION
+#undef BGSPRITE_ENABLE_METRICS
+#undef BGSPRITE_ENABLE_SEGMENTATION
+#undef BGSPRITE_ENABLE_GME
+#undef BGSPRITE_MASK_BLOCK_SIZE
+#undef TRANSFORM_MAT_DIM