diff --git a/src/cairo-gl-surface.c b/src/cairo-gl-surface.c
index 2acc8b5..019249e 100644
--- a/src/cairo-gl-surface.c
+++ b/src/cairo-gl-surface.c
@@ -2012,13 +2012,14 @@ typedef struct _cairo_gl_surface_span_renderer {
 
     cairo_gl_composite_setup_t setup;
 
+    int xmin, xmax;
+
     cairo_operator_t op;
     cairo_antialias_t antialias;
 
     cairo_gl_surface_t *dst;
     cairo_region_t *clip;
 
-    cairo_composite_rectangles_t composite_rectangles;
     GLuint vbo;
     void *vbo_base;
     unsigned int vbo_size;
@@ -2049,11 +2050,11 @@ _cairo_gl_span_renderer_flush (cairo_gl_surface_span_renderer_t *renderer)
 	    cairo_region_get_rectangle (renderer->clip, i, &rect);
 
 	    glScissor (rect.x, rect.y, rect.width, rect.height);
-	    glDrawArrays (GL_LINES, 0, count);
+	    glDrawArrays (GL_QUADS, 0, count);
 	}
 	glDisable (GL_SCISSOR_TEST);
     } else {
-	glDrawArrays (GL_LINES, 0, count);
+	glDrawArrays (GL_QUADS, 0, count);
     }
 }
 
@@ -2134,72 +2135,87 @@ _cairo_gl_emit_span_vertex (cairo_gl_surface_span_renderer_t *renderer,
 
 static void
 _cairo_gl_emit_span (cairo_gl_surface_span_renderer_t *renderer,
-		     int x1, int x2, int y, uint8_t alpha)
+		     int x, int y1, int y2,
+		     uint8_t alpha)
 {
     float *vertices = _cairo_gl_span_renderer_get_vbo (renderer, 2);
 
-    _cairo_gl_emit_span_vertex (renderer, x1, y, alpha, vertices);
-    _cairo_gl_emit_span_vertex (renderer, x2, y, alpha,
+    _cairo_gl_emit_span_vertex (renderer, x, y1, alpha, vertices);
+    _cairo_gl_emit_span_vertex (renderer, x, y2, alpha,
 			       vertices + renderer->vertex_size / 4);
 }
 
-/* Emits the contents of the span renderer rows as GL_LINES with the span's
- * alpha.
- *
- * Unlike the image surface, which is compositing into a temporary, we emit
- * coverage even for alpha == 0, in case we're using an unbounded operator.
- * But it means we avoid having to do the fixup.
- */
+static void
+_cairo_gl_emit_rectangle (cairo_gl_surface_span_renderer_t *renderer,
+			  int x1, int y1,
+			  int x2, int y2,
+			  int coverage)
+{
+    _cairo_gl_emit_span (renderer, x1, y1, y2, coverage);
+    _cairo_gl_emit_span (renderer, x2, y2, y1, coverage);
+}
+
 static cairo_status_t
-_cairo_gl_surface_span_renderer_render_row (
-    void				*abstract_renderer,
-    int					 y,
-    const cairo_half_open_span_t	*spans,
-    unsigned				 num_spans)
+_cairo_gl_render_bounded_spans (void *abstract_renderer,
+				int y, int height,
+				const cairo_half_open_span_t *spans,
+				unsigned num_spans)
 {
     cairo_gl_surface_span_renderer_t *renderer = abstract_renderer;
-    int xmin = renderer->composite_rectangles.mask.x;
-    int xmax = xmin + renderer->composite_rectangles.width;
-    int prev_x = xmin;
-    int prev_alpha = 0;
-    unsigned i;
-    int x_translate;
-
-    /* Make sure we're within y-range. */
-    if (y < renderer->composite_rectangles.mask.y ||
-	y >= renderer->composite_rectangles.mask.y +
-	renderer->composite_rectangles.height)
+
+    if (num_spans == 0)
 	return CAIRO_STATUS_SUCCESS;
 
-    x_translate = renderer->composite_rectangles.dst.x -
-	renderer->composite_rectangles.mask.x;
-    y += renderer->composite_rectangles.dst.y -
-	renderer->composite_rectangles.mask.y;
+    do {
+	if (spans[0].coverage) {
+	    _cairo_gl_emit_rectangle (renderer,
+				      spans[0].x, y,
+				      spans[1].x, y + height,
+				      spans[0].coverage);
+	}
 
-    /* Find the first span within x-range. */
-    for (i=0; i < num_spans && spans[i].x < xmin; i++) {}
-    if (i>0)
-	prev_alpha = spans[i-1].coverage;
+	spans++;
+    } while (--num_spans > 1);
 
-    /* Set the intermediate spans. */
-    for (; i < num_spans; i++) {
-	int x = spans[i].x;
+    return CAIRO_STATUS_SUCCESS;
+}
 
-	if (x >= xmax)
-	    break;
+static cairo_status_t
+_cairo_gl_render_unbounded_spans (void *abstract_renderer,
+				  int y, int height,
+				  const cairo_half_open_span_t *spans,
+				  unsigned num_spans)
+{
+    cairo_gl_surface_span_renderer_t *renderer = abstract_renderer;
 
-	_cairo_gl_emit_span (renderer,
-			     prev_x + x_translate, x + x_translate, y,
-			     prev_alpha);
+    if (num_spans == 0) {
+	_cairo_gl_emit_rectangle (renderer,
+				  renderer->xmin, y,
+				  renderer->xmax, y + height,
+				  0);
+	return CAIRO_STATUS_SUCCESS;
+    }
 
-	prev_x = x;
-	prev_alpha = spans[i].coverage;
+    if (spans[0].x != renderer->xmin) {
+	_cairo_gl_emit_rectangle (renderer,
+				  renderer->xmin, y,
+				  spans[0].x, y + height,
+				  0);
     }
 
-    if (prev_x < xmax) {
-	_cairo_gl_emit_span (renderer,
-			     prev_x + x_translate, xmax + x_translate, y,
-			     prev_alpha);
+    do {
+	_cairo_gl_emit_rectangle (renderer,
+				  spans[0].x, y,
+				  spans[1].x, y + height,
+				  spans[0].coverage);
+	spans++;
+    } while (--num_spans > 1);
+
+    if (spans[0].x != renderer->xmax) {
+	_cairo_gl_emit_rectangle (renderer,
+				  spans[0].x, y,
+				  renderer->xmax, y + height,
+				  0);
     }
 
     return CAIRO_STATUS_SUCCESS;
@@ -2274,8 +2290,6 @@ _cairo_gl_surface_create_span_renderer (cairo_operator_t	 op,
     cairo_gl_surface_t *dst = abstract_dst;
     cairo_gl_surface_span_renderer_t *renderer;
     cairo_status_t status;
-    int width = rects->width;
-    int height = rects->height;
     cairo_surface_attributes_t *src_attributes;
     GLenum err;
 
diff --git a/src/cairo-image-surface.c b/src/cairo-image-surface.c
index 48d8013..d52979d 100644
--- a/src/cairo-image-surface.c
+++ b/src/cairo-image-surface.c
@@ -1390,11 +1390,13 @@ typedef struct _cairo_image_surface_span_renderer {
     const cairo_pattern_t *pattern;
     cairo_antialias_t antialias;
 
+    uint8_t *mask_data;
+    uint32_t mask_stride;
+
     cairo_image_surface_t *src;
     cairo_surface_attributes_t src_attributes;
     cairo_image_surface_t *mask;
     cairo_image_surface_t *dst;
-
     cairo_composite_rectangles_t composite_rectangles;
 } cairo_image_surface_span_renderer_t;
 
@@ -1403,66 +1405,46 @@ _cairo_image_surface_span_render_row (
     int                                  y,
     const cairo_half_open_span_t        *spans,
     unsigned                             num_spans,
-    cairo_image_surface_t               *mask,
-    const cairo_composite_rectangles_t  *rects)
+    uint8_t				*data,
+    uint32_t				 stride)
 {
-    int xmin = rects->mask.x;
-    int xmax = xmin + rects->width;
     uint8_t *row;
-    int prev_x = xmin;
-    int prev_alpha = 0;
     unsigned i;
 
-    /* Make sure we're within y-range. */
-    y -= rects->mask.y;
-    if (y < 0 || y >= rects->height)
+    if (num_spans == 0)
 	return;
 
-    row = (uint8_t*)(mask->data) + y*(size_t)mask->stride - xmin;
-
-    /* Find the first span within x-range. */
-    for (i=0; i < num_spans && spans[i].x < xmin; i++) {}
-    if (i>0)
-	prev_alpha = spans[i-1].coverage;
-
-    /* Set the intermediate spans. */
-    for (; i < num_spans; i++) {
-	int x = spans[i].x;
-
-	if (x >= xmax)
-	    break;
-
-	if (prev_alpha != 0) {
-	    /* We implement setting rendering the most common single
-	     * pixel wide span case to avoid the overhead of a memset
-	     * call.  Open coding setting longer spans didn't show a
-	     * noticeable improvement over memset. */
-	    if (x == prev_x + 1) {
-		row[prev_x] = prev_alpha;
-	    }
-	    else {
-		memset(row + prev_x, prev_alpha, x - prev_x);
-	    }
+    row = data + y * stride;
+    for (i = 0; i < num_spans - 1; i++) {
+	if (! spans[i].coverage)
+	    continue;
+
+	/* We implement setting the most common single pixel wide
+	 * span case to avoid the overhead of a memset call.
+	 * Open coding setting longer spans didn't show a
+	 * noticeable improvement over memset.
+	 */
+	if (spans[i+1].x == spans[i].x + 1) {
+	    row[spans[i].x] = spans[i].coverage;
+	} else {
+	    memset (row + spans[i].x,
+		    spans[i].coverage,
+		    spans[i+1].x - spans[i].x);
 	}
-
-	prev_x = x;
-	prev_alpha = spans[i].coverage;
-    }
-
-    if (prev_alpha != 0 && prev_x < xmax) {
-	memset(row + prev_x, prev_alpha, xmax - prev_x);
     }
 }
 
 static cairo_status_t
-_cairo_image_surface_span_renderer_render_row (
+_cairo_image_surface_span_renderer_render_rows (
     void				*abstract_renderer,
     int					 y,
+    int					 height,
     const cairo_half_open_span_t	*spans,
     unsigned				 num_spans)
 {
     cairo_image_surface_span_renderer_t *renderer = abstract_renderer;
-    _cairo_image_surface_span_render_row (y, spans, num_spans, renderer->mask, &renderer->composite_rectangles);
+    while (height--)
+	_cairo_image_surface_span_render_row (y++, spans, num_spans, renderer->mask_data, renderer->mask_stride);
     return CAIRO_STATUS_SUCCESS;
 }
 
@@ -1517,11 +1499,11 @@ _cairo_image_surface_span_renderer_finish (void *abstract_renderer)
 		&dst->base,
 		src_attributes,
 		src->width, src->height,
-		rects->width, rects->height,
+		width, height,
 		rects->src.x, rects->src.y,
 		0, 0,		/* mask.x, mask.y */
 		rects->dst.x, rects->dst.y,
-		rects->width, rects->height,
+		width, height,
 		dst->clip_region);
 	}
     }
@@ -1567,7 +1549,7 @@ _cairo_image_surface_create_span_renderer (cairo_operator_t	 op,
 
     renderer->base.destroy = _cairo_image_surface_span_renderer_destroy;
     renderer->base.finish = _cairo_image_surface_span_renderer_finish;
-    renderer->base.render_row = _cairo_image_surface_span_renderer_render_row;
+    renderer->base.render_rows = _cairo_image_surface_span_renderer_render_rows;
     renderer->op = op;
     renderer->pattern = pattern;
     renderer->antialias = antialias;
@@ -1604,6 +1586,9 @@ _cairo_image_surface_create_span_renderer (cairo_operator_t	 op,
 	_cairo_image_surface_span_renderer_destroy (renderer);
 	return _cairo_span_renderer_create_in_error (status);
     }
+
+    renderer->mask_data = renderer->mask->data - rects->mask.x - rects->mask.y * renderer->mask->stride;
+    renderer->mask_stride = renderer->mask->stride;
     return &renderer->base;
 }
 
diff --git a/src/cairo-spans-private.h b/src/cairo-spans-private.h
index e29a567..af3b38c 100644
--- a/src/cairo-spans-private.h
+++ b/src/cairo-spans-private.h
@@ -47,26 +47,24 @@ typedef struct _cairo_half_open_span {
  * surfaces if they want to composite spans instead of trapezoids. */
 typedef struct _cairo_span_renderer cairo_span_renderer_t;
 struct _cairo_span_renderer {
+    /* Private status variable. */
+    cairo_status_t status;
+
     /* Called to destroy the renderer. */
     cairo_destroy_func_t	destroy;
 
-    /* Render the spans on row y of the source by whatever compositing
-     * method is required.  The function should ignore spans outside
-     * the bounding box set by the init() function. */
-    cairo_status_t (*render_row)(
-	void				*abstract_renderer,
-	int				 y,
-	const cairo_half_open_span_t	*coverages,
-	unsigned			 num_coverages);
+    /* Render the spans on row y of the destination by whatever compositing
+     * method is required. */
+    cairo_warn cairo_status_t
+    (*render_rows) (void *abstract_renderer,
+		    int y, int height,
+		    const cairo_half_open_span_t	*coverages,
+		    unsigned num_coverages);
 
     /* Called after all rows have been rendered to perform whatever
      * final rendering step is required.  This function is called just
      * once before the renderer is destroyed. */
-    cairo_status_t (*finish)(
-	void		      *abstract_renderer);
-
-    /* Private status variable. */
-    cairo_status_t status;
+    cairo_status_t (*finish) (void *abstract_renderer);
 };
 
 /* Scan converter interface. */
diff --git a/src/cairo-spans.c b/src/cairo-spans.c
index af3b85f..69894c1 100644
--- a/src/cairo-spans.c
+++ b/src/cairo-spans.c
@@ -275,13 +275,15 @@ _cairo_scan_converter_create_in_error (cairo_status_t status)
 }
 
 static cairo_status_t
-_cairo_nil_span_renderer_render_row (
+_cairo_nil_span_renderer_render_rows (
     void				*abstract_renderer,
     int					 y,
+    int					 height,
     const cairo_half_open_span_t	*coverages,
     unsigned				 num_coverages)
 {
     (void) y;
+    (void) height;
     (void) coverages;
     (void) num_coverages;
     return _cairo_span_renderer_status (abstract_renderer);
@@ -310,7 +312,7 @@ _cairo_span_renderer_set_error (
 	ASSERT_NOT_REACHED;
     }
     if (renderer->status == CAIRO_STATUS_SUCCESS) {
-	renderer->render_row = _cairo_nil_span_renderer_render_row;
+	renderer->render_rows = _cairo_nil_span_renderer_render_rows;
 	renderer->finish = _cairo_nil_span_renderer_finish;
 	renderer->status = error;
     }
diff --git a/src/cairo-tor-scan-converter.c b/src/cairo-tor-scan-converter.c
index 29262c2..2b9fb1b 100644
--- a/src/cairo-tor-scan-converter.c
+++ b/src/cairo-tor-scan-converter.c
@@ -128,27 +128,29 @@ blit_with_span_renderer(
     cairo_span_renderer_t	*span_renderer,
     struct pool			*span_pool,
     int				 y,
+    int				 height,
     int				 xmin,
     int				 xmax);
 
 static glitter_status_t
-blit_empty_with_span_renderer (cairo_span_renderer_t *renderer, int y);
+blit_empty_with_span_renderer (cairo_span_renderer_t *renderer, int y, int height);
 
 #define GLITTER_BLIT_COVERAGES_ARGS \
 	cairo_span_renderer_t *span_renderer, \
 	struct pool *span_pool
 
-#define GLITTER_BLIT_COVERAGES(cells, y, xmin, xmax) do {		\
+#define GLITTER_BLIT_COVERAGES(cells, y, height,xmin, xmax) do {	\
     cairo_status_t status = blit_with_span_renderer (cells,		\
 						     span_renderer,	\
 						     span_pool,		\
-						     y, xmin, xmax);	\
+						     y, height,		\
+						     xmin, xmax);	\
     if (unlikely (status))						\
 	return status;							\
 } while (0)
 
-#define GLITTER_BLIT_COVERAGES_EMPTY(y, xmin, xmax) do {		\
-    cairo_status_t status = blit_empty_with_span_renderer (span_renderer, y); \
+#define GLITTER_BLIT_COVERAGES_EMPTY(y, height, xmin, xmax) do {		\
+    cairo_status_t status = blit_empty_with_span_renderer (span_renderer, y, height); \
     if (unlikely (status))						\
 	return status;							\
 } while (0)
@@ -309,8 +311,8 @@ typedef int grid_area_t;
 #define UNROLL3(x) x x x
 
 struct quorem {
-    int quo;
-    int rem;
+    int32_t quo;
+    int32_t rem;
 };
 
 /* Header for a chunk of memory in a memory pool. */
@@ -382,6 +384,7 @@ struct edge {
     /* Original sign of the edge: +1 for downwards, -1 for upwards
      * edges.  */
     int dir;
+    int vertical;
 };
 
 /* Number of subsample rows per y-bucket. Must be GRID_Y. */
@@ -389,18 +392,28 @@ struct edge {
 
 #define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
 
+struct bucket {
+    /* Unsorted list of edges starting within this bucket. */
+    struct edge *edges;
+
+    /* Set to non-zero if there are edges starting strictly within the
+     * bucket. */
+    unsigned     have_inside_edges;
+};
+
 /* A collection of sorted and vertically clipped edges of the polygon.
  * Edges are moved from the polygon to an active list while scan
  * converting. */
 struct polygon {
-    /* The vertical clip extents. */
+    /* The clip extents. */
+    grid_scaled_x_t xmin, xmax;
     grid_scaled_y_t ymin, ymax;
 
     /* Array of edges all starting in the same bucket.	An edge is put
      * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
      * it is added to the polygon. */
-    struct edge **y_buckets;
-    struct edge *y_buckets_embedded[64];
+    struct bucket *y_buckets;
+    struct bucket  y_buckets_embedded[64];
 
     struct {
 	struct pool base[1];
@@ -702,7 +715,6 @@ static void
 cell_list_fini(struct cell_list *cells)
 {
     pool_fini (cells->cell_pool.base);
-    cell_list_init (cells);
 }
 
 /* Empty the cell list.  This is called at the start of every pixel
@@ -715,6 +727,26 @@ cell_list_reset (struct cell_list *cells)
     pool_reset (cells->cell_pool.base);
 }
 
+static struct cell *
+cell_list_alloc (struct cell_list *cells,
+		 struct cell **cursor,
+		 struct cell *tail,
+		 int x)
+{
+    struct cell *cell;
+
+    cell = pool_alloc (cells->cell_pool.base, sizeof (struct cell));
+    if (unlikely (NULL == cell))
+	return NULL;
+
+    *cursor = cell;
+    cell->next = tail;
+    cell->x = x;
+    cell->uncovered_area = 0;
+    cell->covered_height = 0;
+    return cell;
+}
+
 /* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
  * needed to be allocated but couldn't be.  Cells must be found with
  * non-decreasing x-coordinate until the cell list is rewound using
@@ -737,22 +769,10 @@ cell_list_find (struct cell_list *cells, int x)
     }
     cells->cursor = cursor;
 
-    if (tail->x == x) {
+    if (tail->x == x)
 	return tail;
-    } else {
-	struct cell *cell;
-
-	cell = pool_alloc (cells->cell_pool.base, sizeof (struct cell));
-	if (unlikely (NULL == cell))
-	    return NULL;
 
-	*cursor = cell;
-	cell->next = tail;
-	cell->x = x;
-	cell->uncovered_area = 0;
-	cell->covered_height = 0;
-	return cell;
-    }
+    return cell_list_alloc (cells, cursor, tail, x);
 }
 
 /* Find two cells at x1 and x2.	 This is exactly equivalent
@@ -832,9 +852,8 @@ cell_list_find_pair(struct cell_list *cells, int x1, int x2)
 /* Add an unbounded subpixel span covering subpixels >= x to the
  * coverage cells. */
 static glitter_status_t
-cell_list_add_unbounded_subspan(
-    struct cell_list *cells,
-    grid_scaled_x_t x)
+cell_list_add_unbounded_subspan (struct cell_list *cells,
+				 grid_scaled_x_t x)
 {
     struct cell *cell;
     int ix, fx;
@@ -907,20 +926,24 @@ cell_list_render_edge(
     struct edge *edge,
     int sign)
 {
-    struct quorem x1 = edge->x;
-    struct quorem x2 = x1;
     grid_scaled_y_t y1, y2, dy;
     grid_scaled_x_t dx;
     int ix1, ix2;
     grid_scaled_x_t fx1, fx2;
 
-    x2.quo += edge->dxdy_full.quo;
-    x2.rem += edge->dxdy_full.rem;
-    if (x2.rem >= 0) {
-	++x2.quo;
-	x2.rem -= edge->dy;
+    struct quorem x1 = edge->x;
+    struct quorem x2 = x1;
+
+    if (! edge->vertical) {
+	x2.quo += edge->dxdy_full.quo;
+	x2.rem += edge->dxdy_full.rem;
+	if (x2.rem >= 0) {
+	    ++x2.quo;
+	    x2.rem -= edge->dy;
+	}
+
+	edge->x = x2;
     }
-    edge->x = x2;
 
     GRID_X_TO_INT_FRAC(x1.quo, ix1, fx1);
     GRID_X_TO_INT_FRAC(x2.quo, ix2, fx2);
@@ -1026,6 +1049,7 @@ static void
 polygon_init (struct polygon *polygon)
 {
     polygon->ymin = polygon->ymax = 0;
+    polygon->xmin = polygon->xmax = 0;
     polygon->y_buckets = polygon->y_buckets_embedded;
     pool_init (polygon->edge_pool.base,
 	       8192 - sizeof (struct _pool_chunk),
@@ -1045,10 +1069,11 @@ polygon_fini (struct polygon *polygon)
  * receive new edges and clip them to the vertical range
  * [ymin,ymax). */
 static glitter_status_t
-polygon_reset(
-    struct polygon *polygon,
-    grid_scaled_y_t ymin,
-    grid_scaled_y_t ymax)
+polygon_reset (struct polygon *polygon,
+	       grid_scaled_x_t xmin,
+	       grid_scaled_x_t xmax,
+	       grid_scaled_y_t ymin,
+	       grid_scaled_y_t ymax)
 {
     unsigned h = ymax - ymin;
     unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax + EDGE_Y_BUCKET_HEIGHT-1,
@@ -1065,14 +1090,16 @@ polygon_reset(
     polygon->y_buckets =  polygon->y_buckets_embedded;
     if (num_buckets > ARRAY_LENGTH (polygon->y_buckets_embedded)) {
 	polygon->y_buckets = _cairo_malloc_ab (num_buckets,
-					       sizeof (struct edge *));
+					       sizeof (struct bucket));
 	if (unlikely (NULL == polygon->y_buckets))
 	    goto bail_no_mem;
     }
-    memset (polygon->y_buckets, 0, num_buckets * sizeof (struct edge *));
+    memset (polygon->y_buckets, 0, num_buckets * sizeof (struct bucket));
 
     polygon->ymin = ymin;
     polygon->ymax = ymax;
+    polygon->xmin = xmin;
+    polygon->xmax = xmax;
     return GLITTER_STATUS_SUCCESS;
 
  bail_no_mem:
@@ -1086,10 +1113,13 @@ _polygon_insert_edge_into_its_y_bucket(
     struct polygon *polygon,
     struct edge *e)
 {
-    unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
-    struct edge **ptail = &polygon->y_buckets[ix];
+    unsigned j = e->ytop - polygon->ymin;
+    unsigned ix = j / EDGE_Y_BUCKET_HEIGHT;
+    unsigned offset = j % EDGE_Y_BUCKET_HEIGHT;
+    struct edge **ptail = &polygon->y_buckets[ix].edges;
     e->next = *ptail;
     *ptail = e;
+    polygon->y_buckets[ix].have_inside_edges |= offset;
 }
 
 inline static glitter_status_t
@@ -1115,30 +1145,53 @@ polygon_add_edge (struct polygon *polygon,
     dx = edge->line.p2.x - edge->line.p1.x;
     dy = edge->line.p2.y - edge->line.p1.y;
     e->dy = dy;
-    e->dxdy = floored_divrem (dx, dy);
-
-    if (ymin <= edge->top)
-	ytop = edge->top;
-    else
-	ytop = ymin;
-    if (ytop == edge->line.p1.y) {
-	e->x.quo = edge->line.p1.x;
-	e->x.rem = 0;
-    } else {
-	e->x = floored_muldivrem (ytop - edge->line.p1.y, dx, dy);
-	e->x.quo += edge->line.p1.x;
-    }
-
     e->dir = edge->dir;
+
+    ytop = edge->top >= ymin ? edge->top : ymin;
+    ybot = edge->bottom <= ymax ? edge->bottom : ymax;
     e->ytop = ytop;
-    ybot = edge->bottom < ymax ? edge->bottom : ymax;
     e->height_left = ybot - ytop;
 
-    if (e->height_left >= GRID_Y) {
-	e->dxdy_full = floored_muldivrem (GRID_Y, dx, dy);
-    } else {
+    if (dx == 0) {
+	e->vertical = TRUE;
+	e->x.quo = edge->line.p1.x;
+	e->x.rem = 0;
+	e->dxdy.quo = 0;
+	e->dxdy.rem = 0;
 	e->dxdy_full.quo = 0;
 	e->dxdy_full.rem = 0;
+
+	/* Drop edges to the right of the clip extents. */
+	if (e->x.quo >= polygon->xmax)
+	    return GLITTER_STATUS_SUCCESS;
+
+	/* Offset vertical edges at the left side of the clip extents
+	 * to just shy of the left side.  We depend on this when
+	 * checking for possible intersections within the clip
+	 * rectangle. */
+	if (e->x.quo <= polygon->xmin) {
+	    e->x.quo = polygon->xmin - 1;
+	}
+    } else {
+	e->vertical = FALSE;
+	e->dxdy = floored_divrem (dx, dy);
+	if (ytop == edge->line.p1.y) {
+	    e->x.quo = edge->line.p1.x;
+	    e->x.rem = 0;
+	} else {
+	    e->x = floored_muldivrem (ytop - edge->line.p1.y, dx, dy);
+	    e->x.quo += edge->line.p1.x;
+	}
+
+	if (e->x.quo >= polygon->xmax && e->dxdy.quo >= 0)
+	    return GLITTER_STATUS_SUCCESS;
+
+	if (e->height_left >= GRID_Y) {
+	    e->dxdy_full = floored_muldivrem (GRID_Y, dx, dy);
+	} else {
+	    e->dxdy_full.quo = 0;
+	    e->dxdy_full.rem = 0;
+	}
     }
 
     _polygon_insert_edge_into_its_y_bucket (polygon, e);
@@ -1161,31 +1214,30 @@ active_list_init(struct active_list *active)
     active_list_reset(active);
 }
 
-static void
-active_list_fini(
-    struct active_list *active)
-{
-    active_list_reset(active);
-}
-
 /* Merge the edges in an unsorted list of edges into a sorted
  * list. The sort order is edges ascending by edge->x.quo.  Returns
  * the new head of the sorted list. */
 static struct edge *
 merge_unsorted_edges(struct edge *sorted_head, struct edge *unsorted_head)
 {
-    struct edge *head = unsorted_head;
     struct edge **cursor = &sorted_head;
     int x;
 
-    while (NULL != head) {
+    if (sorted_head == NULL) {
+	sorted_head = unsorted_head;
+	unsorted_head = unsorted_head->next;
+	sorted_head->next = NULL;
+	if (unsorted_head == NULL)
+	    return sorted_head;
+    }
+
+    do {
+	struct edge *next = unsorted_head->next;
 	struct edge *prev = *cursor;
-	struct edge *next = head->next;
-	x = head->x.quo;
 
-	if (NULL == prev || x < prev->x.quo) {
+	x = unsorted_head->x.quo;
+	if (x < prev->x.quo)
 	    cursor = &sorted_head;
-	}
 
 	while (1) {
 	    UNROLL3({
@@ -1196,26 +1248,29 @@ merge_unsorted_edges(struct edge *sorted_head, struct edge *unsorted_head)
 	    });
 	}
 
-	head->next = *cursor;
-	*cursor = head;
+	unsorted_head->next = *cursor;
+	*cursor = unsorted_head;
+	unsorted_head = next;
+    } while (unsorted_head != NULL);
 
-	head = next;
-    }
     return sorted_head;
 }
 
 /* Test if the edges on the active list can be safely advanced by a
  * full row without intersections or any edges ending. */
 inline static int
-active_list_can_step_full_row(
-    struct active_list *active)
+active_list_can_step_full_row (struct active_list *active,
+			       grid_scaled_x_t     xmin)
 {
+    const struct edge *e;
+    grid_scaled_x_t prev_x = INT_MIN;
+
     /* Recomputes the minimum height of all edges on the active
      * list if we have been dropping edges. */
     if (active->min_height <= 0) {
-	struct edge *e = active->head;
 	int min_height = INT_MAX;
 
+	e = active->head;
 	while (NULL != e) {
 	    if (e->height_left < min_height)
 		min_height = e->height_left;
@@ -1225,27 +1280,38 @@ active_list_can_step_full_row(
 	active->min_height = min_height;
     }
 
-    /* Check for intersections only if no edges end during the next
-     * row. */
-    if (active->min_height >= GRID_Y) {
-	grid_scaled_x_t prev_x = INT_MIN;
-	struct edge *e = active->head;
-	while (NULL != e) {
-	    struct quorem x = e->x;
+    if (active->min_height < GRID_Y)
+	return 0;
 
+    /* Check for intersections as no edges end during the next row. */
+    e = active->head;
+    while (NULL != e) {
+	struct quorem x = e->x;
+
+	if (! e->vertical) {
 	    x.quo += e->dxdy_full.quo;
 	    x.rem += e->dxdy_full.rem;
 	    if (x.rem >= 0)
 		++x.quo;
+	}
 
-	    if (x.quo <= prev_x)
+	/* There's may be an intersection if the edge sort order might
+	 * change. */
+	if (x.quo <= prev_x) {
+	    /* Ignore intersections to the left of the clip extents.
+	     * This assumes that all vertical edges on or at the left
+	     * side of the clip rectangle have been shifted slightly
+	     * to the left in polygon_add_edge(). */
+	    if (prev_x >= xmin || x.quo >= xmin || e->x.quo >= xmin)
 		return 0;
+	}
+	else {
 	    prev_x = x.quo;
-	    e = e->next;
 	}
-	return 1;
+	e = e->next;
     }
-    return 0;
+
+    return 1;
 }
 
 /* Merges edges on the given subpixel row from the polygon to the
@@ -1261,7 +1327,7 @@ active_list_merge_edges_from_polygon(
     unsigned ix = EDGE_Y_BUCKET_INDEX(y, polygon->ymin);
     int min_height = active->min_height;
     struct edge *subrow_edges = NULL;
-    struct edge **ptail = &polygon->y_buckets[ix];
+    struct edge **ptail = &polygon->y_buckets[ix].edges;
 
     while (1) {
 	struct edge *tail = *ptail;
@@ -1277,8 +1343,10 @@ active_list_merge_edges_from_polygon(
 	    ptail = &tail->next;
 	}
     }
-    active->head = merge_unsorted_edges(active->head, subrow_edges);
-    active->min_height = min_height;
+    if (subrow_edges) {
+	active->head = merge_unsorted_edges(active->head, subrow_edges);
+	active->min_height = min_height;
+    }
 }
 
 /* Advance the edges on the active list by one subsample row by
@@ -1439,11 +1507,13 @@ apply_nonzero_fill_rule_and_step_edges (struct active_list *active,
 		}
 	    }
 
-	    right_edge->x.quo += right_edge->dxdy_full.quo;
-	    right_edge->x.rem += right_edge->dxdy_full.rem;
-	    if (right_edge->x.rem >= 0) {
-		++right_edge->x.quo;
-		right_edge->x.rem -= right_edge->dy;
+	    if (! right_edge->vertical) {
+		right_edge->x.quo += right_edge->dxdy_full.quo;
+		right_edge->x.rem += right_edge->dxdy_full.rem;
+		if (right_edge->x.rem >= 0) {
+		    ++right_edge->x.quo;
+		    right_edge->x.rem -= right_edge->dy;
+		}
 	    }
 	}
 
@@ -1472,6 +1542,7 @@ apply_evenodd_fill_rule_and_step_edges (struct active_list *active,
     left_edge = *cursor;
     while (NULL != left_edge) {
 	struct edge *right_edge;
+	int winding = left_edge->dir;
 
 	left_edge->height_left -= GRID_Y;
 	if (left_edge->height_left)
@@ -1490,17 +1561,22 @@ apply_evenodd_fill_rule_and_step_edges (struct active_list *active,
 	    else
 		*cursor = right_edge->next;
 
+	    winding += right_edge->dir;
+	    if ((winding & 1) == 0) {
 	    if (right_edge->next == NULL ||
 		right_edge->next->x.quo != right_edge->x.quo)
 	    {
 		break;
 	    }
+	    }
 
-	    right_edge->x.quo += right_edge->dxdy_full.quo;
-	    right_edge->x.rem += right_edge->dxdy_full.rem;
-	    if (right_edge->x.rem >= 0) {
-		++right_edge->x.quo;
-		right_edge->x.rem -= right_edge->dy;
+	    if (! right_edge->vertical) {
+		right_edge->x.quo += right_edge->dxdy_full.quo;
+		right_edge->x.rem += right_edge->dxdy_full.rem;
+		if (right_edge->x.rem >= 0) {
+		    ++right_edge->x.quo;
+		    right_edge->x.rem -= right_edge->dy;
+		}
 	    }
 	}
 
@@ -1537,8 +1613,14 @@ blit_span(
     }
 }
 
-#define GLITTER_BLIT_COVERAGES(coverages, y, xmin, xmax) \
-	blit_cells(coverages, raster_pixels + (y)*raster_stride, xmin, xmax)
+#define GLITTER_BLIT_COVERAGES(coverages, y, height, xmin, xmax) \
+    do { \
+	int __y = y; \
+	int __h = height; \
+	do { \
+	    blit_cells(coverages, raster_pixels + (__y)*raster_stride, xmin, xmax); \
+	} while (--__h); \
+    } while (0)
 
 static void
 blit_cells(
@@ -1597,7 +1679,6 @@ static void
 _glitter_scan_converter_fini(glitter_scan_converter_t *converter)
 {
     polygon_fini(converter->polygon);
-    active_list_fini(converter->active);
     cell_list_fini(converter->coverages);
     converter->xmin=0;
     converter->ymin=0;
@@ -1641,7 +1722,7 @@ glitter_scan_converter_reset(
 
     active_list_reset(converter->active);
     cell_list_reset(converter->coverages);
-    status = polygon_reset(converter->polygon, ymin, ymax);
+    status = polygon_reset(converter->polygon, xmin, xmax, ymin, ymax);
     if (status)
 	return status;
 
@@ -1711,19 +1792,48 @@ glitter_scan_converter_add_edge (glitter_scan_converter_t *converter,
 #endif
 
 #ifndef GLITTER_BLIT_COVERAGES_EMPTY
-# define GLITTER_BLIT_COVERAGES_EMPTY(y, xmin, xmax)
+# define GLITTER_BLIT_COVERAGES_EMPTY(y0, y1, xmin, xmax)
 #endif
 
+static cairo_bool_t
+active_list_is_vertical (struct active_list *active)
+{
+    struct edge *e;
+
+    for (e = active->head; e != NULL; e = e->next) {
+	if (! e->vertical)
+	    return FALSE;
+    }
+
+    return TRUE;
+}
+
+static void
+step_edges (struct active_list *active, int count)
+{
+    struct edge **cursor = &active->head;
+    struct edge *edge;
+
+    for (edge = *cursor; edge != NULL; edge = *cursor) {
+	edge->height_left -= GRID_Y * count;
+	if (edge->height_left)
+	    cursor = &edge->next;
+	else
+	    *cursor = edge->next;
+    }
+}
+
 I glitter_status_t
 glitter_scan_converter_render(
     glitter_scan_converter_t *converter,
     int nonzero_fill,
     GLITTER_BLIT_COVERAGES_ARGS)
 {
-    int i;
+    int i, j;
     int ymax_i = converter->ymax / GRID_Y;
     int ymin_i = converter->ymin / GRID_Y;
     int xmin_i, xmax_i;
+    grid_scaled_x_t xmin = converter->xmin;
     int h = ymax_i - ymin_i;
     struct polygon *polygon = converter->polygon;
     struct cell_list *coverages = converter->coverages;
@@ -1738,22 +1848,28 @@ glitter_scan_converter_render(
     GLITTER_BLIT_COVERAGES_BEGIN;
 
     /* Render each pixel row. */
-    for (i=0; i<h; i++) {
+    for (i = 0; i < h; i = j) {
 	int do_full_step = 0;
 	glitter_status_t status = 0;
 
+	j = i + 1;
+
 	/* Determine if we can ignore this row or use the full pixel
 	 * stepper. */
-	if (GRID_Y == EDGE_Y_BUCKET_HEIGHT && ! polygon->y_buckets[i]) {
+	if (polygon->y_buckets[i].edges == NULL) {
 	    if (! active->head) {
-		GLITTER_BLIT_COVERAGES_EMPTY (i+ymin_i, xmin_i, xmax_i);
+		for (; j < h && ! polygon->y_buckets[j].edges; j++)
+		    ;
+		GLITTER_BLIT_COVERAGES_EMPTY (i+ymin_i, j-i, xmin_i, xmax_i);
 		continue;
 	    }
-
-	    do_full_step = active_list_can_step_full_row (active);
+	    do_full_step = active_list_can_step_full_row (active, xmin);
+	}
+	else if (! polygon->y_buckets[i].have_inside_edges) {
+	    grid_scaled_y_t y = (i+ymin_i)*GRID_Y;
+	    active_list_merge_edges_from_polygon (active, y, polygon);
+	    do_full_step = active_list_can_step_full_row (active, xmin);
 	}
-
-	cell_list_reset (coverages);
 
 	if (do_full_step) {
 	    /* Step by a full pixel row's worth. */
@@ -1764,8 +1880,20 @@ glitter_scan_converter_render(
 		status = apply_evenodd_fill_rule_and_step_edges (active,
 								 coverages);
 	    }
+
+	    if (active_list_is_vertical (active)) {
+		while (j < h &&
+		       polygon->y_buckets[j].edges == NULL &&
+		       active->min_height >= 2*GRID_Y)
+		{
+		    active->min_height -= GRID_Y;
+		    j++;
+		}
+		if (j != i + 1)
+		    step_edges (active, j - (i + 1));
+	    }
 	} else {
-	    /* Subsample this row. */
+	    /* Supersample this row. */
 	    grid_scaled_y_t suby;
 	    for (suby = 0; suby < GRID_Y; suby++) {
 		grid_scaled_y_t y = (i+ymin_i)*GRID_Y + suby;
@@ -1787,13 +1915,13 @@ glitter_scan_converter_render(
 	if (unlikely (status))
 	    return status;
 
-	GLITTER_BLIT_COVERAGES(coverages, i+ymin_i, xmin_i, xmax_i);
+	GLITTER_BLIT_COVERAGES(coverages, i+ymin_i, j-i, xmin_i, xmax_i);
+	cell_list_reset (coverages);
 
-	if (! active->head) {
+	if (! active->head)
 	    active->min_height = INT_MAX;
-	} else {
+	else
 	    active->min_height -= GRID_Y;
-	}
     }
 
     /* Clean up the coverage blitter. */
@@ -1807,21 +1935,20 @@ glitter_scan_converter_render(
  * scan converter subclass. */
 
 static glitter_status_t
-blit_with_span_renderer(
-    struct cell_list *cells,
-    cairo_span_renderer_t *renderer,
-    struct pool *span_pool,
-    int y,
-    int xmin,
-    int xmax)
+blit_with_span_renderer (struct cell_list *cells,
+			 cairo_span_renderer_t *renderer,
+			 struct pool *span_pool,
+			 int y, int height,
+			 int xmin, int xmax)
 {
     struct cell *cell = cells->head;
     int prev_x = xmin;
     int cover = 0;
     cairo_half_open_span_t *spans;
     unsigned num_spans;
+
     if (cell == NULL)
-	return CAIRO_STATUS_SUCCESS;
+	return blit_empty_with_span_renderer (renderer, y, height);
 
     /* Skip cells to the left of the clip region. */
     while (cell != NULL && cell->x < xmin) {
@@ -1833,12 +1960,12 @@ blit_with_span_renderer(
     /* Count number of cells remaining. */
     {
 	struct cell *next = cell;
-	num_spans = 0;
-	while (next) {
+	num_spans = 1;
+	while (next != NULL) {
 	    next = next->next;
 	    ++num_spans;
 	}
-	num_spans = 2*num_spans + 1;
+	num_spans = 2*num_spans;
     }
 
     /* Allocate enough spans for the row. */
@@ -1853,6 +1980,7 @@ blit_with_span_renderer(
     for (; cell != NULL; cell = cell->next) {
 	int x = cell->x;
 	int area;
+
 	if (x >= xmax)
 	    break;
 
@@ -1872,20 +2000,26 @@ blit_with_span_renderer(
 	prev_x = x+1;
     }
 
-    if (prev_x < xmax) {
+    if (prev_x <= xmax) {
 	spans[num_spans].x = prev_x;
 	spans[num_spans].coverage = GRID_AREA_TO_ALPHA (cover);
 	++num_spans;
     }
 
+    if (prev_x < xmax && cover) {
+	spans[num_spans].x = xmax;
+	spans[num_spans].coverage = 0;
+	++num_spans;
+    }
+
     /* Dump them into the renderer. */
-    return renderer->render_row (renderer, y, spans, num_spans);
+    return renderer->render_rows (renderer, y, height, spans, num_spans);
 }
 
 static glitter_status_t
-blit_empty_with_span_renderer (cairo_span_renderer_t *renderer, int y)
+blit_empty_with_span_renderer (cairo_span_renderer_t *renderer, int y, int height)
 {
-    return renderer->render_row (renderer, y, NULL, 0);
+    return renderer->render_rows (renderer, y, height, NULL, 0);
 }
 
 struct _cairo_tor_scan_converter {
diff --git a/src/cairo-win32-surface.c b/src/cairo-win32-surface.c
index 82d1cf5..d4575a3 100644
--- a/src/cairo-win32-surface.c
+++ b/src/cairo-win32-surface.c
@@ -1954,6 +1954,9 @@ typedef struct _cairo_win32_surface_span_renderer {
     const cairo_pattern_t *pattern;
     cairo_antialias_t antialias;
 
+    uint8_t *mask_data;
+    uint32_t mask_stride;
+
     cairo_image_surface_t *mask;
     cairo_win32_surface_t *dst;
     cairo_region_t *clip_region;
@@ -1962,14 +1965,16 @@ typedef struct _cairo_win32_surface_span_renderer {
 } cairo_win32_surface_span_renderer_t;
 
 static cairo_status_t
-_cairo_win32_surface_span_renderer_render_row (
+_cairo_win32_surface_span_renderer_render_rows (
     void				*abstract_renderer,
     int					 y,
+    int					 height,
     const cairo_half_open_span_t	*spans,
     unsigned				 num_spans)
 {
     cairo_win32_surface_span_renderer_t *renderer = abstract_renderer;
-    _cairo_image_surface_span_render_row (y, spans, num_spans, renderer->mask, &renderer->composite_rectangles);
+    while (height--)
+	_cairo_image_surface_span_render_row (y++, spans, num_spans, renderer->mask_data, renderer->mask_stride);
     return CAIRO_STATUS_SUCCESS;
 }
 
@@ -2066,8 +2071,7 @@ _cairo_win32_surface_create_span_renderer (cairo_operator_t	 op,
 
     renderer->base.destroy = _cairo_win32_surface_span_renderer_destroy;
     renderer->base.finish = _cairo_win32_surface_span_renderer_finish;
-    renderer->base.render_row =
-	_cairo_win32_surface_span_renderer_render_row;
+    renderer->base.render_rows = _cairo_win32_surface_span_renderer_render_rows;
     renderer->op = op;
     renderer->pattern = pattern;
     renderer->antialias = antialias;
@@ -2088,6 +2092,9 @@ _cairo_win32_surface_create_span_renderer (cairo_operator_t	 op,
 	_cairo_win32_surface_span_renderer_destroy (renderer);
 	return _cairo_span_renderer_create_in_error (status);
     }
+
+    renderer->mask_data = renderer->mask->data - rects->mask.x - rects->mask.y * renderer->mask->stride;
+    renderer->mask_stride = renderer->mask->stride;
     return &renderer->base;
 }
 
diff --git a/src/cairo-xlib-display.c b/src/cairo-xlib-display.c
index a7a40b8..566d9fb 100644
--- a/src/cairo-xlib-display.c
+++ b/src/cairo-xlib-display.c
@@ -407,6 +407,10 @@ _cairo_xlib_display_get (Display *dpy,
 	display->buggy_pad_reflect = TRUE;
     }
 
+    /* gradients don't seem to work */
+    display->buggy_gradients = TRUE;
+
+
     /* XXX workaround; see https://bugzilla.mozilla.org/show_bug.cgi?id=413583 */
     /* If buggy_repeat_force == -1, then initialize.
      *    - set to -2, meaning "nothing was specified", and we trust the above detection.
diff --git a/src/cairoint.h b/src/cairoint.h
index 58850ab..1cdf6ff 100644
--- a/src/cairoint.h
+++ b/src/cairoint.h
@@ -2257,8 +2257,8 @@ cairo_private void
 _cairo_image_surface_span_render_row (int				 y,
 				      const cairo_half_open_span_t	 *spans,
 				      unsigned				 num_spans,
-				      cairo_image_surface_t              *mask,
-				      const cairo_composite_rectangles_t *rects);
+				      uint8_t				*data,
+				      uint32_t				 stride);
 
 cairo_private cairo_image_transparency_t
 _cairo_image_analyze_transparency (cairo_image_surface_t      *image);