Logo Search packages:      
Sourcecode: cairo version File versions  Download package

cairo-drm-i915-shader.c

/* cairo - a vector graphics library with display and print output
 *
 * Copyright © 2009 Intel Corporation
 *
 * This library is free software; you can redistribute it and/or
 * modify it either under the terms of the GNU Lesser General Public
 * License version 2.1 as published by the Free Software Foundation
 * (the "LGPL") or, at your option, under the terms of the Mozilla
 * Public License Version 1.1 (the "MPL"). If you do not alter this
 * notice, a recipient may use your version of this file under either
 * the MPL or the LGPL.
 *
 * You should have received a copy of the LGPL along with this library
 * in the file COPYING-LGPL-2.1; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
 * You should have received a copy of the MPL along with this library
 * in the file COPYING-MPL-1.1
 *
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
 * OF ANY KIND, either express or implied. See the LGPL or the MPL for
 * the specific language governing rights and limitations.
 *
 * The Original Code is the cairo graphics library.
 *
 * Contributor(s):
 *    Chris Wilson <chris@chris-wilson.co.uk>
 */

#include "cairoint.h"

#include "cairo-error-private.h"
#include "cairo-drm-i915-private.h"
#include "cairo-surface-offset-private.h"
#include "cairo-surface-subsurface-private.h"
#include "cairo-surface-snapshot-private.h"

#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS
/* for DRI2/DRM interoperability */
#include "cairo-xcb-private.h"
#endif

#if 0
static cairo_status_t
i915_packed_pixel_surface_finish (void *abstract_surface)
{
    i915_packed_pixel_surface_t *surface = abstract_surface;
    i915_device_t *device;

    device = i915_device_acquire (&surface->device->intel.base);

    intel_bo_destroy (&device->intel, surface->bo);

    if (surface->is_current_texture) {
      if (surface->is_current_texture & CURRENT_SOURCE)
          device->current_source = NULL;
      if (surface->is_current_texture & CURRENT_MASK)
          device->current_mask = NULL;
      device->current_n_samplers = 0;
    }

    i915_device_release (device);

    return CAIRO_STATUS_SUCCESS;
}

static const cairo_surface_backend_t i915_packed_pixel_surface_backend = {
    I915_PACKED_PIXEL_SURFACE_TYPE,
    i915_packed_pixel_surface_finish,
};

static cairo_surface_t *
i915_packed_pixel_surface_create (i915_device_t *device,
                           i915_packed_pixel_t pixel,
                           const uint8_t *data,
                           uint32_t length,
                           uint32_t width, uint32_t height)
{
    i915_packed_pixel_surface_t *surface;
    cairo_content_t content;
    uint32_t tiling, size;
    uint32_t stride, half_stride;
    uint32_t i;

    if (width > 2048 || height > 2048)
      return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE));

    surface = malloc (sizeof (i915_packed_pixel_surface_t));
    if (unlikely (surface == NULL))
      return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY));

    tiling = I915_TILING_NONE; /* XXX */
    half_stride = stride = i915_tiling_stride (tiling, width/2);
    if (stride < width)
      stride *= 2 ;
    height = i915_tiling_height (tiling, height);

    switch (surface->pixel = pixel) {
    case YUV_I420:
      content = CAIRO_CONTENT_COLOR;

      surface->offset[0] = 0;
      surface->width[0] = width;
      surface->height[0] = height;
      surface->stride[0] = stride;
      surface->map0[0] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
      surface->map0[0] |= ((height - 1) << MS3_HEIGHT_SHIFT) |
                      ((width - 1)  << MS3_WIDTH_SHIFT);
      surface->map1[0] = (stride / 4 - 1) << MS4_PITCH_SHIFT;

      surface->offset[1] = stride * height;
      surface->width[1] = width / 2;
      surface->height[1] = height / 2;
      surface->stride[1] = half_stride;
      surface->map0[1] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
      surface->map0[1] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) |
                      ((width/2 - 1)  << MS3_WIDTH_SHIFT);
      surface->map1[1] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT;

      if (width < half_stride) {
          surface->offset[2] = stride * height + half_stride / 2;
          size = stride * height + half_stride * height / 2;
      } else {
          surface->offset[2] = stride * height + half_stride * height / 2;
          size = stride * height + half_stride * height;
      }
      surface->width[2] = width / 2;
      surface->height[2] = height / 2;
      surface->stride[2] = half_stride;
      surface->map0[2] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
      surface->map0[2] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) |
                      ((width/2 - 1)  << MS3_WIDTH_SHIFT);
      surface->map1[2] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT;
      break;

    case NONE:
    case YUV_YV12:
    case YUV_YUY2:
    case YUV_UYVY:
      ASSERT_NOT_REACHED;
      break;
    }

    _cairo_surface_init (&surface->base,
                       &i915_packed_pixel_surface_backend,
                   content);

    surface->bo = intel_bo_create (&device->intel, size, FALSE);
    assert (surface->bo->tiling == I915_TILING_NONE);
    if (unlikely (surface->bo == NULL)) {
      free (surface);
      return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY));
    }

    if (tiling == I915_TILING_NONE) {
      intel_bo_t *bo = surface->bo;
      uint32_t dst;
      int uv;

      dst = surface->offset[0];
      if (width == stride) {
          size = stride * height;
          intel_bo_write (&device->intel, bo, dst, size, data);
          data += size;
      } else {
          for (i = 0; i < height; i++) {
            intel_bo_write (&device->intel, bo, dst, width, data);
            dst += stride;
            data += width;
          }
      }

      for (uv = 1; uv <= 2; uv++) {
          dst = surface->offset[uv];
          if (width / 2 == half_stride) {
            size = half_stride * height / 2;
            intel_bo_write (&device->intel, bo, dst, size, data);
            data += size;
          } else {
            size = width / 2;
            for (i = 0; i < height / 2; i++) {
                intel_bo_write (&device->intel, bo, dst, size, data);
                dst += half_stride;
                data += size;
            }
          }
      }
    } else {
      uint8_t *dst, *base;

      base = intel_bo_map (&device->intel, surface->bo);

      dst = base + surface->offset[0];
      if (width == stride) {
          size = stride * height;
          memcpy (dst, data, size);
          data += size;
      } else {
          for (i = 0; i < height; i++) {
            memcpy (dst, data, width);
            dst += stride;
            data += width;
          }
      }

      dst = base + surface->offset[1];
      if (width / 2 == half_stride) {
          size = half_stride * height / 2;
          memcpy (dst, data, size);
          data += size;
      } else {
          size = width / 2;
          for (i = 0; i < height / 2; i++) {
            memcpy (dst, data, size);
            dst += half_stride;
            data += size;
          }
      }

      dst = base + surface->offset[2];
      if (width / 2 == half_stride) {
          size = half_stride * height / 2;
          memcpy (dst, data, size);
          data += size;
      } else {
          size = width / 2;
          for (i = 0; i < height / 2; i++) {
            memcpy (dst, data, size);
            dst += half_stride;
            data += size;
          }
      }
    }

    surface->device = device;
    surface->is_current_texture = 0;

    return &surface->base;
}

static cairo_int_status_t
i915_clone_yuv (i915_surface_t *surface,
             cairo_surface_t *source,
             int width, int height,
             cairo_surface_t **clone_out)
{
    const uint8_t *mime_data = NULL;
    unsigned int mime_data_length;
    cairo_surface_t *clone;

    cairo_surface_get_mime_data (source, "video/x-raw-yuv/i420",
                         &mime_data, &mime_data_length);
    if (mime_data == NULL)
      return CAIRO_INT_STATUS_UNSUPPORTED;

    clone =
      i915_packed_pixel_surface_create ((i915_device_t *) surface->base.device,
                                 YUV_I420,
                                 mime_data, mime_data_length,
                                 width, height);
    if (clone == NULL)
      return CAIRO_INT_STATUS_UNSUPPORTED;
    if (unlikely (clone->status))
      return clone->status;

    *clone_out = clone;
    return CAIRO_STATUS_SUCCESS;
}
#endif

/* Max instruction count: 4 */
static void
i915_shader_linear_color (i915_device_t *device,
                    enum i915_shader_linear_mode mode,
                    int in, int c0, int c1, int out)
{
    int tmp = FS_U0;

    switch (mode) {
    case LINEAR_TEXTURE:
      ASSERT_NOT_REACHED;
    case LINEAR_NONE:
      tmp = in;
      break;

    case LINEAR_REPEAT:
      i915_fs_frc (tmp, i915_fs_operand (in, X, X, X, X));
      break;
#if 0
    case LINEAR_REFLECT:
      /* XXX needs an extra constant: C2 [0.5, 2.0, x, x] */
      i915_fs_mul (tmp, in, 0.5);
      i915_fs_frc (tmp, i915_fs_operand_reg (tmp));
      i915_fs_mul (tmp, tmp, 2.0);
      i915_fs_add (tmp, i915_fs_operand_one (),
                 i915_fs_operand_reg_negate (tmp));
      i915_fs_cmp (tmp,
                 i915_fs_operand_reg (tmp),
                 i915_fs_operand_reg (tmp),
                 i915_fs_operand_reg_negate (tmp));
      i915_fs_add (tmp, i915_fs_operand_one (),
                 i915_fs_operand_reg_negate (tmp));
#endif
    case LINEAR_PAD:
      i915_fs_max (tmp,
                 i915_fs_operand_zero (),
                 i915_fs_operand (in, X, X, X, X));
      i915_fs_min (tmp,
                 i915_fs_operand_one (),
                 i915_fs_operand_reg (tmp));
      break;
    }

    /* interpolate */
    i915_fs_mad (out, 0,
             i915_fs_operand (tmp, NEG_X, NEG_X, NEG_X, NEG_X),
             i915_fs_operand_reg (c0),
             i915_fs_operand_reg (c0));
    i915_fs_mad (out, 0,
             i915_fs_operand (tmp, X, X, X, X),
             i915_fs_operand_reg (c1),
             i915_fs_operand_reg (out));
}

static void
i915_shader_radial_init (struct i915_shader_radial *r,
                   const cairo_radial_pattern_t *radial)
{
    double dx, dy, dr, r1;

    dx = _cairo_fixed_to_double (radial->c2.x - radial->c1.x);
    dy = _cairo_fixed_to_double (radial->c2.y - radial->c1.y);
    dr = _cairo_fixed_to_double (radial->r2 - radial->r1);

    r1 = _cairo_fixed_to_double (radial->r1);

    if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
      /* XXX dr == 0, meaningless with anything other than PAD */
      r->constants[0] = _cairo_fixed_to_double (radial->c1.x) / dr;
      r->constants[1] = _cairo_fixed_to_double (radial->c1.y) / dr;
      r->constants[2] = 1. / dr;
      r->constants[3] = -r1 / dr;

      r->constants[4] = 0;
      r->constants[5] = 0;
      r->constants[6] = 0;
      r->constants[7] = 0;

      r->base.mode = RADIAL_ONE;
    } else {
      r->constants[0] = -_cairo_fixed_to_double (radial->c1.x);
      r->constants[1] = -_cairo_fixed_to_double (radial->c1.y);
      r->constants[2] = r1;
      r->constants[3] = -4 * (dx*dx + dy*dy - dr*dr);

      r->constants[4] = -2 * dx;
      r->constants[5] = -2 * dy;
      r->constants[6] = -2 * r1 * dr;
      r->constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr));

      r->base.mode = RADIAL_TWO;
    }

    r->base.matrix = radial->base.base.matrix;
}

/* Max instruction count: 10 */
static void
i915_shader_radial_coord (i915_device_t *device,
                    enum i915_shader_radial_mode mode,
                    int in, int g0, int g1, int out)
{
    switch (mode) {
    case RADIAL_ONE:
      /*
         pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
         r² = pdx*pdx + pdy*pdy
         t = r²/sqrt(r²) - r1/dr;
         */
      i915_fs_mad (FS_U0, MASK_X | MASK_Y,
                 i915_fs_operand (in, X, Y, ZERO, ZERO),
                 i915_fs_operand (g0, Z, Z, ZERO, ZERO),
                 i915_fs_operand (g0, NEG_X, NEG_Y, ZERO, ZERO));
      i915_fs_dp2add (FS_U0, MASK_X,
                  i915_fs_operand (FS_U0, X, Y, ZERO, ZERO),
                  i915_fs_operand (FS_U0, X, Y, ZERO, ZERO),
                  i915_fs_operand_zero ());
      i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U0, X, X, X, X));
      i915_fs_mad (out, MASK_X,
                 i915_fs_operand (FS_U0, X, ZERO, ZERO, ZERO),
                 i915_fs_operand (out, X, ZERO, ZERO, ZERO),
                 i915_fs_operand (g0, W, ZERO, ZERO, ZERO));
      break;

    case RADIAL_TWO:
      /*
         pdx = x - c1x, pdy = y - c1y;
         A = dx² + dy² - dr²
         B = -2*(pdx*dx + pdy*dy + r1*dr);
         C = pdx² + pdy² - r1²;
         det = B*B - 4*A*C;
         t = (-B + sqrt (det)) / (2 * A)
         */

      /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
      i915_fs_add (FS_U0,
                 i915_fs_operand (in, X, Y, ZERO, ZERO),
                 i915_fs_operand (g0, X, Y, Z, ZERO));
      /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
      i915_fs_dp3 (FS_U0, MASK_W,
                 i915_fs_operand (FS_U0, X, Y, ONE, ZERO),
                 i915_fs_operand (g1, X, Y, Z, ZERO));
      /* u1.x = pdx² + pdy² - r1²; [C] */
      i915_fs_dp3 (FS_U1, MASK_X,
                 i915_fs_operand (FS_U0, X, Y, Z, ZERO),
                 i915_fs_operand (FS_U0, X, Y, NEG_Z, ZERO));
      /* u1.x = C, u1.y = B, u1.z=-4*A; */
      i915_fs_mov_masked (FS_U1, MASK_Y, i915_fs_operand (FS_U0, W, W, W, W));
      i915_fs_mov_masked (FS_U1, MASK_Z, i915_fs_operand (g0, W, W, W, W));
      /* u1.x = B² - 4*A*C */
      i915_fs_dp2add (FS_U1, MASK_X,
                  i915_fs_operand (FS_U1, X, Y, ZERO, ZERO),
                  i915_fs_operand (FS_U1, Z, Y, ZERO, ZERO),
                  i915_fs_operand_zero ());
      /* out.x = -B + sqrt (B² - 4*A*C),
       * out.y = -B - sqrt (B² - 4*A*C),
       */
      i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U1, X, X, X, X));
      i915_fs_mad (out, MASK_X | MASK_Y,
                 i915_fs_operand (out, X, X, ZERO, ZERO),
                 i915_fs_operand (FS_U1, X, NEG_X, ZERO, ZERO),
                 i915_fs_operand (FS_U0, NEG_W, NEG_W, ZERO, ZERO));
      /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A),
       * out.y = (-B - sqrt (B² - 4*A*C)) / (2 * A)
       */
      i915_fs_mul (out,
                 i915_fs_operand (out, X, Y, ZERO, ZERO),
                 i915_fs_operand (g1, W, W, ZERO, ZERO));
      /* if (A > 0)
       *   out = (-B + sqrt (B² - 4*A*C)) / (2 * A),
       * else
       *   out = (-B - sqrt (B² - 4*A*C)) / (2 * A)
       */
      i915_fs_cmp (out,
                 i915_fs_operand (g1, W, ZERO, ZERO, ZERO),
                 i915_fs_operand (out, X, ZERO, ZERO, ZERO),
                 i915_fs_operand (out, Y, ZERO, ZERO, ZERO));
      break;
    }
}

/* Max instruction count: 7 */
static inline void
i915_shader_yuv_color (i915_device_t *device,
                   int y, int u, int v,
                   int c0, int c1, int c2,
                   int out)
{
    i915_fs_mov_masked (FS_U0, MASK_X, i915_fs_operand_reg (y));
    i915_fs_mov_masked (FS_U0, MASK_Y, i915_fs_operand_reg (u));
    i915_fs_mov_masked (FS_U0, MASK_Z, i915_fs_operand_reg (v));

    i915_fs_add (FS_U0,
             i915_fs_operand_reg (FS_U0),
             i915_fs_operand_reg (c0));
    i915_fs_dp3 (out, MASK_X,
             i915_fs_operand_reg (FS_U0),
             i915_fs_operand (c1, X, ZERO, Y, ZERO));
    i915_fs_dp3 (out, MASK_Z,
             i915_fs_operand_reg (FS_U0),
             i915_fs_operand (c1, Z, W, ZERO, ZERO));
    i915_fs_dp3 (out, MASK_Y,
             i915_fs_operand_reg (FS_U0),
             i915_fs_operand_reg (c2));
}

static inline uint32_t
i915_shader_channel_key (const union i915_shader_channel *channel)
{
    return (channel->type.fragment & 0x0f) | (channel->base.mode << FS_DETAILS_SHIFT);
}

static uint32_t
i915_shader_channel_get_num_tex_coords (const union i915_shader_channel *channel)
{
    switch (channel->type.fragment) {
    default:
    case FS_ZERO:
    case FS_ONE:
    case FS_CONSTANT:
    case FS_PURE:
    case FS_DIFFUSE:
      return 0;

    case FS_LINEAR:
    case FS_RADIAL:
    case FS_TEXTURE:
    case FS_SPANS:
    case FS_YUV:
      return 1;
    }
}

static uint32_t
i915_shader_get_num_tex_coords (const i915_shader_t *shader)
{
    uint32_t num_tex_coords;

    num_tex_coords = 0;

    num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->source);
    num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->mask);
    num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->clip);
    num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->dst);

    return num_tex_coords;
}

#define i915_fs_operand_impure(reg, channel, pure) \
    (reg | \
     (((pure & (1 << 0)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \
     (((pure & (1 << 1)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \
     (((pure & (1 << 2)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \
     (((pure & (1 << 3)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT))

#define i915_fs_operand_pure(pure) \
    (FS_R0 | \
     (((pure & (1 << 0)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \
     (((pure & (1 << 1)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \
     (((pure & (1 << 2)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \
     (((pure & (1 << 3)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT))

static void
i915_set_shader_program (i915_device_t *device,
                   const i915_shader_t *shader)
{
    uint32_t num_tex_coords;
    uint32_t num_samplers;
    uint32_t n;
    uint32_t texture_offset = 0;
    uint32_t constant_offset = 0;
    uint32_t sampler_offset = 0;
    uint32_t source_reg;
    uint32_t source_pure;
    uint32_t mask_reg;
    uint32_t out_reg;
    uint32_t dest_reg;
    FS_LOCALS;

    n = (i915_shader_channel_key (&shader->source) <<  0) |
      (i915_shader_channel_key (&shader->mask)   <<  8) |
      (i915_shader_channel_key (&shader->clip)   << 16) |
      (shader->op << 24) |
      ((shader->opacity < 1.) << 30) |
      (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31);
    if (n == device->current_program)
      return;
    device->current_program = n;

    FS_BEGIN ();

    if (shader->source.type.fragment == FS_ZERO) {
      if (shader->clip.type.fragment == FS_TEXTURE) {
          /* XXX need_combine */
          assert (shader->mask.type.fragment == (i915_fragment_shader_t) -1);
          i915_fs_dcl (FS_T0);
          i915_fs_texld (FS_U0, FS_S0, FS_T0);
          if ((shader->content & CAIRO_CONTENT_COLOR) == 0)
            i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, W, W, W, W));
          else
            i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, ZERO, ZERO, ZERO, W));
      } else {
          i915_fs_mov (FS_OC, i915_fs_operand_zero ());
      }

      FS_END ();
      return;
    }

    num_tex_coords = i915_shader_get_num_tex_coords (shader);
    for (n = 0; n < num_tex_coords; n++)
      i915_fs_dcl (FS_T0 + n);

    num_samplers =
      shader->source.base.n_samplers +
      shader->mask.base.n_samplers +
      shader->clip.base.n_samplers +
      shader->dst.base.n_samplers;
    for (n = 0; n < num_samplers; n++)
      i915_fs_dcl (FS_S0 + n);

    source_reg = ~0;
    source_pure = 0;
    out_reg = FS_R0;
    if (! shader->need_combine &&
      shader->mask.type.fragment == (i915_fragment_shader_t) -1 &&
      shader->clip.type.fragment != FS_TEXTURE &&
      shader->content != CAIRO_CONTENT_ALPHA)
    {
      out_reg = FS_OC;
    }

    switch (shader->source.type.fragment) {
    default:
    case FS_ZERO:
    case FS_SPANS:
      ASSERT_NOT_REACHED;

    case FS_PURE:
      source_pure = shader->source.solid.pure;
    case FS_ONE:
      break;

    case FS_CONSTANT:
      source_reg = FS_C0;
      constant_offset += 1;
      break;

    case FS_DIFFUSE:
      i915_fs_dcl (FS_T8);
      source_reg = FS_T8;
      break;

    case FS_LINEAR:
      i915_shader_linear_color (device, shader->source.base.mode,
                          FS_T0, /* input */
                          FS_C0, FS_C1, /* colour ramp */
                          FS_U3); /* unpremultiplied output */
      /* XXX can we defer premultiplication? */
      i915_fs_mul (out_reg,
                 i915_fs_operand_reg (FS_U3),
                 i915_fs_operand (FS_U3, W, W, W, ONE));

      constant_offset += 2;
      texture_offset += 1;
      source_reg = out_reg;
      break;

    case FS_RADIAL:
      i915_shader_radial_coord (device, shader->source.base.mode,
                          FS_T0, /* input */
                          FS_C0, FS_C1, /* gradient constants */
                          FS_R0); /* coordinate */

      i915_fs_texld (out_reg, FS_S0, FS_R0);
      constant_offset += 2;
      texture_offset += 1;
      sampler_offset += 1;
      source_reg = out_reg;
      break;

    case FS_TEXTURE:
      i915_fs_texld (out_reg, FS_S0, FS_T0);
      texture_offset += 1;
      sampler_offset += 1;
      source_reg = out_reg;
      break;

    case FS_YUV:
      /* Load samplers to temporaries. */
      i915_fs_texld (FS_R0, FS_S0, FS_T0);
      i915_fs_texld (FS_R1, FS_S1, FS_T0);
      i915_fs_texld (FS_R2, FS_S2, FS_T0);

      i915_shader_yuv_color (device,
                         FS_R0, FS_R1, FS_R2, /* y, u, v */
                         FS_C0, FS_C1, FS_C2, /* coefficients */
                         out_reg);

      constant_offset += 3;
      texture_offset += 1;
      sampler_offset += 3;
      source_reg = out_reg;
      break;
    }

    mask_reg = ~0;
    switch (shader->mask.type.fragment) {
    case FS_PURE:
    case FS_ZERO:
    case FS_YUV:
    case FS_DIFFUSE:
      ASSERT_NOT_REACHED;
    case FS_ONE:
    default:
      break;

    case FS_SPANS:
      mask_reg = FS_T0 + texture_offset;
      texture_offset += 1;
      break;

    case FS_CONSTANT:
      mask_reg = FS_C0 + constant_offset;
      constant_offset += 1;
      break;

    case FS_LINEAR:
      i915_shader_linear_color (device, shader->mask.base.mode,
                          FS_T0 + texture_offset, /* input */
                          FS_C0 + constant_offset,
                          FS_C0 + constant_offset + 1, /* colour ramp */
                          FS_R1); /* unpremultiplied output */
      constant_offset += 2;
      texture_offset += 1;
      mask_reg = FS_R1;
      break;

    case FS_RADIAL:
      i915_shader_radial_coord (device, shader->mask.base.mode,
                          FS_T0 + texture_offset, /* input */
                          FS_C0 + constant_offset,
                          FS_C0 + constant_offset + 1, /* gradient constants */
                          FS_R1); /* coordinate */

      i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_R1);
      constant_offset += 2;
      texture_offset += 1;
      sampler_offset += 1;
      mask_reg = FS_R1;
      break;

    case FS_TEXTURE:
      i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset);
      texture_offset += 1;
      sampler_offset += 1;
      mask_reg = FS_R1;
      break;
    }

    if (mask_reg != ~0U) {
      if (! shader->need_combine &&
          shader->clip.type.fragment != FS_TEXTURE &&
          (shader->content != CAIRO_CONTENT_ALPHA || source_reg == ~0U))
      {
          out_reg = FS_OC;
      }
      if (source_reg == ~0U) {
          if (source_pure) {
            if (shader->mask.type.fragment == FS_SPANS) {
                if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
                  if (source_pure & (1 << 3))
                      i915_fs_mov (out_reg, i915_fs_operand (mask_reg, X, X, X, X));
                  else
                      i915_fs_mov (out_reg, i915_fs_operand_zero ());
                } else {
                  i915_fs_mov (out_reg,
                             i915_fs_operand_impure (mask_reg, X, source_pure));
                }
            } else {
                /* XXX ComponentAlpha
                   i915_fs_mov (out_reg,
                   i915_fs_operand_pure (mask_reg,
                   shader->source.solid.pure));
                   */
                if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
                  if (source_pure & (1 << 3))
                      i915_fs_mov (out_reg, i915_fs_operand (mask_reg, W, W, W, W));
                  else
                      i915_fs_mov (out_reg, i915_fs_operand_zero ());
                } else {
                  i915_fs_mov (out_reg,
                             i915_fs_operand_impure (mask_reg, W, source_pure));
                }
            }
            source_reg = out_reg;
          } else if (shader->mask.type.fragment == FS_SPANS) {
            i915_fs_mov (out_reg,
                       i915_fs_operand (mask_reg, X, X, X, X));
            source_reg = out_reg;
          } else {
            source_reg = mask_reg;
          }
      } else {
          if (shader->mask.type.fragment == FS_SPANS) {
                if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
                  i915_fs_mul (out_reg,
                             i915_fs_operand (source_reg, W, W, W, W),
                             i915_fs_operand (mask_reg, X, X, X, X));
                } else {
                  i915_fs_mul (out_reg,
                             i915_fs_operand_reg (source_reg),
                             i915_fs_operand (mask_reg, X, X, X, X));
                }
          } else {
            /* XXX ComponentAlpha
            i915_fs_mul (FS_R0,
                       i915_fs_operand_reg (source_reg),
                       i915_fs_operand_reg (mask_reg));
             */
            if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
                i915_fs_mul (out_reg,
                         i915_fs_operand (source_reg, W, W, W, W),
                         i915_fs_operand (mask_reg, W, W, W, W));
            } else {
                i915_fs_mul (out_reg,
                         i915_fs_operand_reg (source_reg),
                         i915_fs_operand (mask_reg, W, W, W, W));
            }
          }

          source_reg = out_reg;
      }
    }

    if (shader->opacity < 1.) {
      i915_fs_mul (source_reg,
                 i915_fs_operand_reg (source_reg),
                 i915_fs_operand_reg (FS_C0 + constant_offset));
      constant_offset++;
    }

    /* need to preserve order of src, mask, clip, dst */
    mask_reg = ~0;
    if (shader->clip.type.fragment == FS_TEXTURE) {
      i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset);
      texture_offset += 1;
      sampler_offset += 1;
      mask_reg = FS_R1;
    }

    if (shader->need_combine) {
      assert (shader->dst.type.fragment == FS_TEXTURE);

      i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset);
      texture_offset += 1;
      sampler_offset += 1;
      dest_reg = FS_R2;

      switch (shader->op) {
      case CAIRO_OPERATOR_CLEAR:
      case CAIRO_OPERATOR_SOURCE:
          ASSERT_NOT_REACHED;

      case CAIRO_OPERATOR_OVER:
          if (source_reg == ~0U) {
            /* XXX shader->source.type.fragment == FS_PURE */
            dest_reg = FS_OC;
          } else {
            i915_fs_add (FS_U0,
                       i915_fs_operand (source_reg, NEG_W, NEG_W, NEG_W, NEG_W),
                       i915_fs_operand_one ());
            i915_fs_mul (FS_U0,
                       i915_fs_operand_reg (FS_U0),
                       dest_reg);
            i915_fs_add (FS_R3,
                       i915_fs_operand_reg (source_reg),
                       i915_fs_operand_reg (FS_U0));
            source_reg = FS_R3;
          }
          break;

      case CAIRO_OPERATOR_IN:
          if (source_reg == ~0U) {
            /* XXX shader->source.type.fragment == FS_PURE */
            source_reg = dest_reg;
          } else {
            i915_fs_mul (FS_R3,
                       i915_fs_operand_reg (source_reg),
                       dest_reg);
            source_reg = FS_R3;
          }
          break;

      case CAIRO_OPERATOR_OUT:
          if (source_reg == ~0U) {
            /* XXX shader->source.type.fragment == FS_PURE */
            i915_fs_mov (FS_R3, i915_fs_operand_zero ());
            source_reg = FS_R3;
          } else {
            i915_fs_add (FS_U0,
                       i915_fs_operand (source_reg, NEG_W, NEG_W, NEG_W, NEG_W),
                       i915_fs_operand_one ());
            i915_fs_mul (FS_R3,
                       i915_fs_operand_reg (FS_U0),
                       dest_reg);
            source_reg = FS_R3;
          }
          break;

      case CAIRO_OPERATOR_ATOP:

      case CAIRO_OPERATOR_DEST:
      case CAIRO_OPERATOR_DEST_OVER:
      case CAIRO_OPERATOR_DEST_IN:
      case CAIRO_OPERATOR_DEST_OUT:
      case CAIRO_OPERATOR_DEST_ATOP:

      case CAIRO_OPERATOR_XOR:
      case CAIRO_OPERATOR_ADD:
      case CAIRO_OPERATOR_SATURATE:

      case CAIRO_OPERATOR_MULTIPLY:
      case CAIRO_OPERATOR_SCREEN:
      case CAIRO_OPERATOR_OVERLAY:
      case CAIRO_OPERATOR_DARKEN:
      case CAIRO_OPERATOR_LIGHTEN:
      case CAIRO_OPERATOR_COLOR_DODGE:
      case CAIRO_OPERATOR_COLOR_BURN:
      case CAIRO_OPERATOR_HARD_LIGHT:
      case CAIRO_OPERATOR_SOFT_LIGHT:
      case CAIRO_OPERATOR_DIFFERENCE:
      case CAIRO_OPERATOR_EXCLUSION:
      case CAIRO_OPERATOR_HSL_HUE:
      case CAIRO_OPERATOR_HSL_SATURATION:
      case CAIRO_OPERATOR_HSL_COLOR:
      case CAIRO_OPERATOR_HSL_LUMINOSITY:
          ASSERT_NOT_REACHED;
          break;
      }
    }

    if (shader->clip.type.fragment == FS_TEXTURE) {
      assert (mask_reg != ~0U);

      if (! shader->need_combine) {
          /* (source IN clip) */
          if (source_reg == ~0U) {
            if (source_pure == 0) {
                source_reg = mask_reg;
            } else {
                out_reg = FS_OC;
                if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
                  if (source_pure & (1 << 3))
                      i915_fs_mov (out_reg, i915_fs_operand (mask_reg, W, W, W, W));
                  else
                      i915_fs_mov (out_reg, i915_fs_operand_zero ());
                } else {
                  i915_fs_mov (out_reg,
                             i915_fs_operand_impure (mask_reg, W, source_pure));
                }
                source_reg = out_reg;
            }
          } else if (mask_reg) {
            out_reg = FS_OC;
            if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
                i915_fs_mul (out_reg,
                         i915_fs_operand (source_reg, W, W, W, W),
                         i915_fs_operand (mask_reg, W, W, W, W));
            } else {
                i915_fs_mul (out_reg,
                         i915_fs_operand_reg (source_reg),
                         i915_fs_operand (mask_reg, W, W, W, W));
            }

            source_reg = out_reg;
          }
      } else {
          /* (source OP dest) LERP_clip dest */
          if (source_reg == ~0U) {
            if (source_pure == 0) {
                i915_fs_mov (FS_R3,
                         i915_fs_operand (mask_reg, W, W, W, W));
            } else {
                i915_fs_mov (FS_R3,
                         i915_fs_operand_impure (mask_reg, W, source_pure));
            }
          } else {
            i915_fs_mul (FS_R3,
                       i915_fs_operand_reg (source_reg),
                       i915_fs_operand (mask_reg, W, W, W, W));
          }

          i915_fs_add (mask_reg,
                   i915_fs_operand_one (),
                   i915_fs_operand (mask_reg, NEG_W, NEG_W, NEG_W, NEG_W));

          if (dest_reg != FS_OC) {
            if (dest_reg == ~0U) {
                assert (shader->dst.type.fragment == FS_TEXTURE);

                i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset);
                texture_offset += 1;
                sampler_offset += 1;
                dest_reg = FS_R2;
            }

            i915_fs_mul (FS_U1,
                       i915_fs_operand_reg (dest_reg),
                       i915_fs_operand_reg (mask_reg));
            mask_reg = FS_U1;
          }

          source_reg = FS_OC;
          if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
            i915_fs_add (source_reg,
                       i915_fs_operand (FS_R3, W, W, W, W),
                       i915_fs_operand (mask_reg, W, W, W, W));
          } else {
            i915_fs_add (source_reg,
                       i915_fs_operand_reg (FS_R3),
                       i915_fs_operand_reg (mask_reg));
          }
      }
    }

    if (source_reg != FS_OC) {
      if (source_reg == ~0U) {
          if (source_pure) {
            if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
                if (source_pure & (1 << 3))
                  i915_fs_mov (FS_OC, i915_fs_operand_one ());
                else
                  i915_fs_mov (FS_OC, i915_fs_operand_zero ());
            } else
                i915_fs_mov (FS_OC, i915_fs_operand_pure (source_pure));
          } else {
            i915_fs_mov (FS_OC, i915_fs_operand_one ());
          }
      } else if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
          i915_fs_mov (FS_OC, i915_fs_operand (source_reg, W, W, W, W));
      } else {
          i915_fs_mov (FS_OC, i915_fs_operand_reg (source_reg));
      }
    }

    FS_END ();
}

static cairo_bool_t
i915_shader_linear_init (struct i915_shader_linear *l,
                   const cairo_linear_pattern_t *linear)
{
    double x0, y0, sf;
    double dx, dy, offset;

    dx = _cairo_fixed_to_double (linear->p2.x - linear->p1.x);
    dy = _cairo_fixed_to_double (linear->p2.y - linear->p1.y);
    sf = dx * dx + dy * dy;
    if (sf <= 1e-5)
      return FALSE;

    dx /= sf;
    dy /= sf;

    x0 = _cairo_fixed_to_double (linear->p1.x);
    y0 = _cairo_fixed_to_double (linear->p1.y);
    offset = dx*x0 + dy*y0;

    if (_cairo_matrix_is_identity (&linear->base.base.matrix)) {
      l->dx = dx;
      l->dy = dy;
      l->offset = -offset;
    } else {
      cairo_matrix_t m;

      cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0);
      cairo_matrix_multiply (&m, &linear->base.base.matrix, &m);
      l->dx = m.xx;
      l->dy = m.xy;
      l->offset = m.x0;
    }

    return TRUE;
}

static cairo_bool_t
i915_shader_linear_contains_rectangle (struct i915_shader_linear *l,
                               const cairo_rectangle_int_t *extents)
{
    double v;

    v = i915_shader_linear_texcoord (l,
                             extents->x,
                             extents->y);
    if (v < 0.)
      return FALSE;
    if (v > 1.)
      return FALSE;

    v = i915_shader_linear_texcoord (l,
                             extents->x + extents->width,
                             extents->y);
    if (v < 0.)
      return FALSE;
    if (v > 1.)
      return FALSE;

    v = i915_shader_linear_texcoord (l,
                             extents->x,
                             extents->y + extents->height);
    if (v < 0.)
      return FALSE;
    if (v > 1.)
      return FALSE;

    v = i915_shader_linear_texcoord (l,
                             extents->x + extents->width,
                             extents->y + extents->height);
    if (v < 0.)
      return FALSE;
    if (v > 1.)
      return FALSE;

    return TRUE;
}

#define is_pure(C,mask) (((mask) == 0) || (C) <= 0x00ff || (C) >= 0xff00)
#define is_one(C,mask) (((mask) != 0) && (C) >= 0xff00)
#define is_zero(C,mask) (((mask) != 0) && (C) <= 0x00ff)

static cairo_status_t
i915_shader_acquire_solid (i915_shader_t *shader,
                     union i915_shader_channel *src,
                     const cairo_solid_pattern_t *solid,
                     const cairo_rectangle_int_t *extents)
{
    cairo_content_t content;

    content = CAIRO_CONTENT_COLOR_ALPHA;
    src->solid.color = solid->color;
    if (content == 0 || solid->color.alpha_short <= 0x00ff)
    {
      src->base.content = CAIRO_CONTENT_ALPHA;
      src->type.fragment = FS_ZERO;
    }
    else if ((((content & CAIRO_CONTENT_COLOR) == 0)  ||
            (solid->color.red_short >= 0xff00 &&
             solid->color.green_short >= 0xff00 &&
             solid->color.blue_short >= 0xff00)) &&
           ((content & CAIRO_CONTENT_ALPHA) == 0 ||
            solid->color.alpha_short >= 0xff00))
    {
      src->base.content = CAIRO_CONTENT_ALPHA;
      src->type.fragment = FS_ONE;
    }
    else if (is_pure (solid->color.red_short, content & CAIRO_CONTENT_COLOR) &&
           is_pure (solid->color.green_short, content & CAIRO_CONTENT_COLOR) &&
           is_pure (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) &&
           is_pure (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA))
    {
      src->solid.pure = 0;
      src->solid.pure |= is_one (solid->color.red_short,   content & CAIRO_CONTENT_COLOR) << 0;
      src->solid.pure |= is_one (solid->color.green_short, content & CAIRO_CONTENT_COLOR) << 1;
      src->solid.pure |= is_one (solid->color.blue_short,  content & CAIRO_CONTENT_COLOR) << 2;
      src->solid.pure |= (! is_zero (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) << 3;

      if (src->solid.pure == 0) {
          src->base.content = CAIRO_CONTENT_ALPHA;
          src->type.fragment = FS_ZERO;
      } else if (src->solid.pure == 0x7) {
          src->base.content = CAIRO_CONTENT_ALPHA;
          src->type.fragment = FS_ONE;
      } else {
          src->base.content = content;
          src->type.fragment = FS_PURE;
          src->base.mode = src->solid.pure;
      }
    }
    else
    {
      src->base.content = content;
      src->type.fragment = src == &shader->source ? FS_DIFFUSE : FS_CONSTANT;
    }
    src->type.vertex = src->type.fragment == FS_ZERO ? VS_ZERO : VS_CONSTANT;
    src->type.pattern = PATTERN_CONSTANT;

    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_shader_acquire_linear (i915_shader_t *shader,
                      union i915_shader_channel *src,
                      const cairo_linear_pattern_t *linear,
                      const cairo_rectangle_int_t *extents)
{
    cairo_bool_t mode = LINEAR_TEXTURE;
    cairo_status_t status;

    if (i915_shader_linear_init (&src->linear, linear) &&
      linear->base.n_stops == 2 &&
      linear->base.stops[0].offset == 0.0 &&
      linear->base.stops[1].offset == 1.0)
    {
      if (i915_shader_linear_contains_rectangle (&src->linear,
                                       extents))
      {
          /* XXX can also lerp if contained within offset range */
          mode = LINEAR_NONE;
      }
      else switch (linear->base.base.extend) {
      case CAIRO_EXTEND_REPEAT:
          mode = LINEAR_REPEAT;
          break;
      case CAIRO_EXTEND_PAD:
          mode = LINEAR_PAD;
          break;
      case CAIRO_EXTEND_NONE:
          break;
      case CAIRO_EXTEND_REFLECT:
          break;
      default:
          ASSERT_NOT_REACHED;
          break;
      }
    }

    src->type.vertex = VS_LINEAR;
    src->type.pattern = PATTERN_LINEAR;
    src->base.texfmt = TEXCOORDFMT_1D;
    src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
    src->base.mode = mode;
    if (mode == LINEAR_TEXTURE) {
      intel_buffer_t buffer;

      status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device,
                              &linear->base, &buffer);
      if (unlikely (status))
          return status;

      src->type.fragment = FS_TEXTURE;
      src->base.bo = intel_bo_reference (buffer.bo);
      src->base.n_samplers = 1;
      src->base.offset[0] = buffer.offset;
      src->base.map[0] = buffer.map0;
      src->base.map[1] = buffer.map1;
      src->base.sampler[0] =
          (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
          i915_texture_filter (CAIRO_FILTER_BILINEAR);
      src->base.sampler[1] =
          SS3_NORMALIZED_COORDS |
          i915_texture_extend (linear->base.base.extend);
    } else {
      src->type.fragment = FS_LINEAR;
      src->linear.color0.red   = linear->base.stops[0].color.red;
      src->linear.color0.green = linear->base.stops[0].color.green;
      src->linear.color0.blue  = linear->base.stops[0].color.blue;
      src->linear.color0.alpha = linear->base.stops[0].color.alpha;

      src->linear.color1.red   = linear->base.stops[1].color.red;
      src->linear.color1.green = linear->base.stops[1].color.green;
      src->linear.color1.blue  = linear->base.stops[1].color.blue;
      src->linear.color1.alpha = linear->base.stops[1].color.alpha;
    }

    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_shader_acquire_radial (i915_shader_t *shader,
                      union i915_shader_channel *src,
                      const cairo_radial_pattern_t *radial,
                      const cairo_rectangle_int_t *extents)
{
    intel_buffer_t buffer;
    cairo_status_t status;

    status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device,
                            &radial->base, &buffer);
    if (unlikely (status))
      return status;

    i915_shader_radial_init (&src->radial, radial);

    src->type.vertex = VS_TEXTURE;
    src->type.fragment = FS_RADIAL;
    src->type.pattern = PATTERN_RADIAL;
    src->base.texfmt = TEXCOORDFMT_2D;

    src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
    src->base.bo = intel_bo_reference (buffer.bo);
    src->base.n_samplers = 1;
    src->base.offset[0] = buffer.offset;
    src->base.map[0] = buffer.map0;
    src->base.map[1] = buffer.map1;
    src->base.sampler[0] =
      (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
      i915_texture_filter (CAIRO_FILTER_BILINEAR);
    src->base.sampler[1] =
      SS3_NORMALIZED_COORDS |
      i915_texture_extend (radial->base.base.extend);

    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_surface_clone (i915_device_t *device,
                cairo_image_surface_t *image,
                i915_surface_t **clone_out)
{
    i915_surface_t *clone;
    cairo_status_t status;

#if 0
    clone =
      i915_surface_create_from_cacheable_image_internal (device, image);
    if (unlikely (clone->intel.drm.base.status))
      return clone->intel.drm.base.status;
#else
    cairo_format_t format;

    format = image->format;
    if (format == CAIRO_FORMAT_A1)
      format = CAIRO_FORMAT_A8;

    clone = (i915_surface_t *)
      i915_surface_create_internal (&device->intel.base,
                              format,
                              image->width,
                              image->height,
                              I915_TILING_DEFAULT,
                              FALSE);
    if (unlikely (clone->intel.drm.base.status))
      return clone->intel.drm.base.status;

    status = intel_bo_put_image (&device->intel,
                         to_intel_bo (clone->intel.drm.bo),
                         image,
                         0, 0,
                         image->width, image->height,
                         0, 0);

    if (unlikely (status))
      return status;
#endif

    *clone_out = clone;
    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_surface_clone_subimage (i915_device_t *device,
                       cairo_image_surface_t *image,
                       const cairo_rectangle_int_t *extents,
                       i915_surface_t **clone_out)
{
    i915_surface_t *clone;
    cairo_status_t status;
    cairo_format_t format;

    format = image->format;
    if (format == CAIRO_FORMAT_A1)
      format = CAIRO_FORMAT_A8;

    clone = (i915_surface_t *)
      i915_surface_create_internal (&device->intel.base,
                              format,
                              extents->width,
                              extents->height,
                              I915_TILING_NONE,
                              FALSE);
    if (unlikely (clone->intel.drm.base.status))
      return clone->intel.drm.base.status;

    status = intel_bo_put_image (&device->intel,
                         to_intel_bo (clone->intel.drm.bo),
                         image,
                         extents->x, extents->y,
                         extents->width, extents->height,
                         0, 0);

    if (unlikely (status))
      return status;

    *clone_out = clone;
    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_surface_render_pattern (i915_device_t *device,
                       const cairo_surface_pattern_t *pattern,
                       const cairo_rectangle_int_t *extents,
                       i915_surface_t **clone_out)
{
    i915_surface_t *clone;
    cairo_surface_t *image;
    cairo_status_t status;
    void *ptr;

    clone = (i915_surface_t *)
      i915_surface_create_internal (&device->intel.base,
                              _cairo_format_from_content (pattern->surface->content),
                              extents->width,
                              extents->height,
                              I915_TILING_NONE,
                              FALSE);
    if (unlikely (clone->intel.drm.base.status))
      return clone->intel.drm.base.status;

    ptr = intel_bo_map (&device->intel,
                  to_intel_bo (clone->intel.drm.bo));
    if (unlikely (ptr == NULL)) {
      cairo_surface_destroy (&clone->intel.drm.base);
      return _cairo_error (CAIRO_STATUS_NO_MEMORY);
    }

    image = cairo_image_surface_create_for_data (ptr,
                                     clone->intel.drm.format,
                                     clone->intel.drm.width,
                                     clone->intel.drm.height,
                                     clone->intel.drm.stride);
    if (unlikely (image->status)) {
      cairo_surface_destroy (&clone->intel.drm.base);
      return image->status;
    }

    status = _cairo_surface_offset_paint (image,
                                extents->x, extents->y,
                                CAIRO_OPERATOR_SOURCE,
                                &pattern->base,
                                NULL);
    cairo_surface_destroy (image);

    if (unlikely (status)) {
      cairo_surface_destroy (&clone->intel.drm.base);
      return status;
    }

    *clone_out = clone;
    return CAIRO_STATUS_SUCCESS;
}

static cairo_status_t
i915_shader_acquire_solid_surface (i915_shader_t *shader,
                           union i915_shader_channel *src,
                           cairo_surface_t *surface,
                           const cairo_rectangle_int_t *extents)
{
    cairo_surface_pattern_t pattern;
    cairo_surface_t *pixel;
    cairo_image_surface_t *image;
    void *image_extra;
    cairo_status_t status;
    uint32_t argb;

    status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
    if (unlikely (status))
      return status;

    /* extract the pixel as argb32 */
    pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1);
    _cairo_pattern_init_for_surface (&pattern, &image->base);
    cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y);
    pattern.base.filter = CAIRO_FILTER_NEAREST;
    status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL);
    _cairo_pattern_fini (&pattern.base);

    _cairo_surface_release_source_image (surface, image, image_extra);

    if (unlikely (status)) {
      cairo_surface_destroy (pixel);
      return status;
    }

    image = (cairo_image_surface_t *) pixel;
    argb = *(uint32_t *) image->data;
    cairo_surface_destroy (pixel);

    if (argb >> 24 == 0) {
      _cairo_color_init_rgba (&src->solid.color, 0, 0, 0, 0);
    } else {
      uint8_t alpha = argb >> 24;

      _cairo_color_init_rgba (&src->solid.color,
                        ((((argb >> 16) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
                        ((((argb >>  8) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
                        ((((argb >>  0) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
                        alpha / 255.);
    }

    src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
    src->type.fragment = FS_CONSTANT;
    src->type.vertex = VS_CONSTANT;
    src->type.pattern = PATTERN_CONSTANT;

    return CAIRO_STATUS_SUCCESS;
}

static cairo_filter_t
sampled_area (const cairo_surface_pattern_t *pattern,
            const cairo_rectangle_int_t *extents,
            cairo_rectangle_int_t *sample)
{
    cairo_rectangle_int_t surface_extents;
    cairo_filter_t filter;
    double x1, x2, y1, y2;
    double pad;

    x1 = extents->x;
    y1 = extents->y;
    x2 = extents->x + (int) extents->width;
    y2 = extents->y + (int) extents->height;

    if (_cairo_matrix_is_translation (&pattern->base.matrix)) {
      x1 += pattern->base.matrix.x0; x2 += pattern->base.matrix.x0;
      y1 += pattern->base.matrix.y0; y2 += pattern->base.matrix.y0;
    } else {
      _cairo_matrix_transform_bounding_box (&pattern->base.matrix,
                                    &x1, &y1, &x2, &y2,
                                    NULL);
    }

    filter = _cairo_pattern_analyze_filter (&pattern->base, &pad);
    sample->x = floor (x1 - pad);
    sample->y = floor (y1 - pad);
    sample->width  = ceil (x2 + pad) - sample->x;
    sample->height = ceil (y2 + pad) - sample->y;

    if (_cairo_surface_get_extents (pattern->surface, &surface_extents)) {
      cairo_bool_t is_empty;

      is_empty = _cairo_rectangle_intersect (sample,
                                     &surface_extents);
    }

    return filter;
}

static cairo_status_t
i915_shader_acquire_surface (i915_shader_t *shader,
                       union i915_shader_channel *src,
                       const cairo_surface_pattern_t *pattern,
                       const cairo_rectangle_int_t *extents)
{
    int surface_width, surface_height;
    cairo_surface_t *surface, *drm;
    cairo_extend_t extend;
    cairo_filter_t filter;
    cairo_matrix_t m;
    int src_x = 0, src_y = 0;
    cairo_surface_t *free_me = NULL;
    cairo_status_t status;
    cairo_rectangle_int_t sample;

    assert (src->type.fragment == (i915_fragment_shader_t) -1);
    drm = surface = pattern->surface;

    extend = pattern->base.extend;
    src->base.matrix = pattern->base.matrix;
    filter = sampled_area (pattern, extents, &sample);

#if CAIRO_HAS_XCB_SURFACE && CAIRO_HAS_XCB_DRM_FUNCTIONS
    if (surface->type == CAIRO_SURFACE_TYPE_XCB) {
      cairo_surface_t *xcb = surface;

      if (xcb->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
          xcb = ((cairo_surface_subsurface_t *) surface)->target;
      } else if (xcb->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) {
          xcb = ((cairo_surface_snapshot_t *) surface)->target;
      }

      /* XXX copy windows (IncludeInferiors) to a pixmap/drm surface
       * xcb = _cairo_xcb_surface_to_drm (xcb)
       */
      xcb = ((cairo_xcb_surface_t *) xcb)->drm;
      if (xcb != NULL)
          drm = xcb;
    }
#endif

    if (surface->type == CAIRO_SURFACE_TYPE_DRM) {
      if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
          drm = ((cairo_surface_subsurface_t *) surface)->target;
      } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) {
          drm = ((cairo_surface_snapshot_t *) surface)->target;
      }
    }

    if (drm->type == CAIRO_SURFACE_TYPE_DRM) {
      i915_surface_t *s = (i915_surface_t *) drm;

      if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
          if (s->intel.drm.base.device == shader->target->intel.drm.base.device &&
            s != shader->target)
          {
            cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface;
            int x;

            status = i915_surface_fallback_flush (s);
            if (unlikely (status))
                return status;

            /* XXX blt subimage and cache snapshot */

            if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) {
                /* XXX pipelined flush of RENDER/TEXTURE cache */
            }

            src->type.fragment = FS_TEXTURE;
            src->surface.pixel = NONE;
            surface_width  = sub->extents.width;
            surface_height = sub->extents.height;

            src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
            src->base.n_samplers = 1;

            x = sub->extents.x;
            if (s->intel.drm.format != CAIRO_FORMAT_A8)
                x *= 4;

            /* XXX tiling restrictions upon offset? */
            src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x;
            src->base.map[0] = s->map0;
            src->base.map[0] &= ~((2047 << MS3_HEIGHT_SHIFT) | (2047 << MS3_WIDTH_SHIFT));
            src->base.map[0] |=
                ((sub->extents.height - 1) << MS3_HEIGHT_SHIFT) |
                ((sub->extents.width - 1)  << MS3_WIDTH_SHIFT);
            src->base.map[1] = (s->intel.drm.stride / 4 - 1) << MS4_PITCH_SHIFT;
          }
      } else {
          /* XXX if s == shader->dst allow if FILTER_NEAREST, EXTEND_NONE? */
          if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
            status = i915_surface_fallback_flush (s);
            if (unlikely (status))
                return status;

            if (s == shader->target || i915_surface_needs_tiling (s)) {
                status = i915_surface_copy_subimage (i915_device (shader->target),
                                           s, &sample, TRUE, &s);
                if (unlikely (status))
                  return status;

                free_me = drm = &s->intel.drm.base;
            }

            src->type.fragment = FS_TEXTURE;
            src->surface.pixel = NONE;

            surface_width  = s->intel.drm.width;
            surface_height = s->intel.drm.height;

            src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
            src->base.n_samplers = 1;
            src->base.offset[0] = s->offset;
            src->base.map[0] = s->map0;
            src->base.map[1] = s->map1;
          }
      }
    }

    if (src->type.fragment == (i915_fragment_shader_t) -1) {
      i915_surface_t *s;

      if (extents->width == 1 && extents->height == 1) {
          return i915_shader_acquire_solid_surface (shader, src,
                                          surface, extents);
      }

      s = (i915_surface_t *)
          _cairo_surface_has_snapshot (surface,
                               shader->target->intel.drm.base.backend);
      if (s == NULL) {
          cairo_status_t status;

#if 0
          /* XXX hackity hack hack */
          status = i915_clone_yuv (surface, src,
                             image->width, image->height,
                             clone_out);
#endif

          if (sample.width > 2048 || sample.height > 2048) {
            status = i915_surface_render_pattern (i915_device (shader->target),
                                          pattern, extents,
                                          &s);
            if (unlikely (status))
                return status;

            extend = CAIRO_EXTEND_NONE;
            filter = CAIRO_FILTER_NEAREST;
            cairo_matrix_init_translate (&src->base.matrix,
                                   -extents->x, -extents->y);
          } else {
            cairo_image_surface_t *image;
            void *image_extra;

            status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
            if (unlikely (status))
                return status;

            if (image->width  < 2048 &&
                image->height < 2048 &&
                sample.width  >= image->width / 4 &&
                sample.height >= image->height /4)
            {

                status = i915_surface_clone (i915_device (shader->target),
                                     image, &s);

                if (likely (status == CAIRO_STATUS_SUCCESS)) {
                  _cairo_surface_attach_snapshot (surface,
                                          &s->intel.drm.base,
                                          intel_surface_detach_snapshot);

                  status = intel_snapshot_cache_insert (&i915_device (shader->target)->intel,
                                                &s->intel);
                  if (unlikely (status)) {
                      cairo_surface_finish (&s->intel.drm.base);
                      cairo_surface_destroy (&s->intel.drm.base);
                  }
                }
            }
            else
            {
                status = i915_surface_clone_subimage (i915_device (shader->target),
                                            image, &sample, &s);
                src_x = -extents->x;
                src_y = -extents->y;
            }

            _cairo_surface_release_source_image (surface, image, image_extra);
            if (unlikely (status))
                return status;
          }

          free_me = &s->intel.drm.base;
      }

      src->type.fragment = FS_TEXTURE;
      src->surface.pixel = NONE;

      src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
      src->base.n_samplers = 1;
      src->base.offset[0] = s->offset;
      src->base.map[0] = s->map0;
      src->base.map[1] = s->map1;

      drm = &s->intel.drm.base;

      surface_width  = s->intel.drm.width;
      surface_height = s->intel.drm.height;
    }

    /* XXX transform nx1 or 1xn surfaces to 1D */

    src->type.pattern = PATTERN_TEXTURE;
    if (extend != CAIRO_EXTEND_NONE &&
      sample.x >= 0 && sample.y >= 0 &&
      sample.x + sample.width  <= surface_width &&
      sample.y + sample.height <= surface_height)
    {
      extend = CAIRO_EXTEND_NONE;
    }
    if (extend == CAIRO_EXTEND_NONE) {
      src->type.vertex = VS_TEXTURE_16;
      src->base.texfmt = TEXCOORDFMT_2D_16;
    } else {
      src->type.vertex = VS_TEXTURE;
      src->base.texfmt = TEXCOORDFMT_2D;
    }
    src->base.content = drm->content;

    src->base.sampler[0] =
      (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
      i915_texture_filter (filter);
    src->base.sampler[1] =
      SS3_NORMALIZED_COORDS |
      i915_texture_extend (extend);

    /* tweak the src matrix to map from dst to texture coordinates */
    if (src_x | src_y)
      cairo_matrix_translate (&src->base.matrix, src_x, src_x);
    cairo_matrix_init_scale (&m, 1. / surface_width, 1. / surface_height);
    cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m);

    if (free_me != NULL)
      cairo_surface_destroy (free_me);

    return CAIRO_STATUS_SUCCESS;
}

cairo_status_t
i915_shader_acquire_pattern (i915_shader_t *shader,
                       union i915_shader_channel *src,
                       const cairo_pattern_t *pattern,
                       const cairo_rectangle_int_t *extents)
{
    switch (pattern->type) {
    case CAIRO_PATTERN_TYPE_SOLID:
      return i915_shader_acquire_solid (shader, src,
                                (cairo_solid_pattern_t *) pattern,
                                extents);

    case CAIRO_PATTERN_TYPE_LINEAR:
      return i915_shader_acquire_linear (shader, src,
                                 (cairo_linear_pattern_t *) pattern,
                                 extents);

    case CAIRO_PATTERN_TYPE_RADIAL:
      return i915_shader_acquire_radial (shader, src,
                                 (cairo_radial_pattern_t *) pattern,
                                 extents);

    case CAIRO_PATTERN_TYPE_SURFACE:
      return i915_shader_acquire_surface (shader, src,
                                  (cairo_surface_pattern_t *) pattern,
                                  extents);

    default:
      ASSERT_NOT_REACHED;
      return CAIRO_STATUS_SUCCESS;
    }
}

static uint32_t
i915_get_blend (cairo_operator_t op,
            i915_surface_t *dst)
{
#define SBLEND(X) ((BLENDFACT_##X) << S6_CBUF_SRC_BLEND_FACT_SHIFT)
#define DBLEND(X) ((BLENDFACT_##X) << S6_CBUF_DST_BLEND_FACT_SHIFT)
    static const struct blendinfo {
      cairo_bool_t dst_alpha;
      uint32_t src_blend;
      uint32_t dst_blend;
      enum {
          BOUNDED,
          SIMPLE,
          XRENDER,
      } kind;
    } i915_blend_op[] = {
      {0, SBLEND (ZERO),          DBLEND (ZERO), BOUNDED}, /* Clear */
      {0, SBLEND (ONE),           DBLEND (ZERO), BOUNDED}, /* Src */

      {0, SBLEND (ONE),           DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Over */
      {1, SBLEND (DST_ALPHA),     DBLEND (ZERO), XRENDER}, /* In */
      {1, SBLEND (INV_DST_ALPHA), DBLEND (ZERO), XRENDER}, /* Out */
      {1, SBLEND (DST_ALPHA),     DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Atop */

      {0, SBLEND (ZERO),          DBLEND (ONE), SIMPLE}, /* Dst */
      {1, SBLEND (INV_DST_ALPHA), DBLEND (ONE), SIMPLE}, /* OverReverse */
      {0, SBLEND (ZERO),          DBLEND (SRC_ALPHA), XRENDER}, /* InReverse */
      {0, SBLEND (ZERO),          DBLEND (INV_SRC_ALPHA), SIMPLE}, /* OutReverse */
      {1, SBLEND (INV_DST_ALPHA), DBLEND (SRC_ALPHA), XRENDER}, /* AtopReverse */

      {1, SBLEND (INV_DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Xor */
      {0, SBLEND (ONE),           DBLEND (ONE), SIMPLE}, /* Add */
      //{0, 0, SBLEND (SRC_ALPHA_SATURATE),         DBLEND (ONE), SIMPLE}, /* XXX Saturate */
    };
    uint32_t sblend, dblend;

    if (op >= ARRAY_LENGTH (i915_blend_op))
      return 0;

    if (i915_blend_op[op].kind == BOUNDED)
      return 0;

    sblend = i915_blend_op[op].src_blend;
    dblend = i915_blend_op[op].dst_blend;

    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
     * it as always 1.
     */
    if ((dst->intel.drm.base.content & CAIRO_CONTENT_ALPHA) == 0 &&
      i915_blend_op[op].dst_alpha)
    {
      if (sblend == SBLEND (DST_ALPHA))
          sblend = SBLEND (ONE);
      else if (sblend == SBLEND (INV_DST_ALPHA))
          sblend = SBLEND (ZERO);
    }

    /* i915 engine reads 8bit color buffer into green channel in cases
       like color buffer blending etc., and also writes back green channel.
       So with dst_alpha blend we should use color factor. See spec on
       "8-bit rendering" */
    if (dst->intel.drm.format == CAIRO_FORMAT_A8 && i915_blend_op[op].dst_alpha) {
      if (sblend == SBLEND (DST_ALPHA))
          sblend = SBLEND (DST_COLR);
      else if (sblend == SBLEND (INV_DST_ALPHA))
          sblend = SBLEND (INV_DST_COLR);
    }

    return sblend | dblend;
#undef SBLEND
#undef DBLEND
}

static void
i915_shader_channel_init (union i915_shader_channel *channel)
{
    channel->type.vertex = (i915_vertex_shader_t) -1;
    channel->type.fragment = (i915_fragment_shader_t) -1;
    channel->type.pattern = (i915_shader_channel_t) -1;
    channel->base.texfmt = TEXCOORDFMT_NOT_PRESENT;
    channel->base.bo = NULL;
    channel->base.n_samplers = 0;
    channel->base.mode = 0;
}

static void
i915_shader_channel_fini (i915_device_t *device,
                     union i915_shader_channel *channel)
{
    switch (channel->type.pattern) {
    case PATTERN_TEXTURE:
    case PATTERN_BASE:
    case PATTERN_LINEAR:
    case PATTERN_RADIAL:
      if (channel->base.bo != NULL)
          intel_bo_destroy (&device->intel, channel->base.bo);
      break;

    default:
    case PATTERN_CONSTANT:
      break;
    }
}

static void
i915_shader_channel_reset (i915_device_t *device,
                     union i915_shader_channel *channel)
{
    i915_shader_channel_fini (device, channel);
    i915_shader_channel_init (channel);
}

void
i915_shader_init (i915_shader_t *shader,
              i915_surface_t *dst,
              cairo_operator_t op,
              double opacity)
{
    shader->committed = FALSE;
    shader->device = i915_device (dst);
    shader->target = dst;
    shader->op = op;
    shader->opacity = opacity;

    shader->blend = i915_get_blend (op, dst);
    shader->need_combine = FALSE;

    shader->content = dst->intel.drm.base.content;

    i915_shader_channel_init (&shader->source);
    i915_shader_channel_init (&shader->mask);
    i915_shader_channel_init (&shader->clip);
    i915_shader_channel_init (&shader->dst);
}

static void
i915_set_shader_samplers (i915_device_t *device,
                        const i915_shader_t *shader)
{
    uint32_t n_samplers, n_maps, n;
    uint32_t samplers[2*4];
    uint32_t maps[4*4];
    uint32_t mask, s, m;

    n_maps =
      shader->source.base.n_samplers +
      shader->mask.base.n_samplers +
      shader->clip.base.n_samplers +
      shader->dst.base.n_samplers;
    assert (n_maps <= 4);

    if (n_maps == 0)
      return;

    n_samplers =
      !! shader->source.base.bo +
      !! shader->mask.base.bo +
      !! shader->clip.base.bo +
      !! shader->dst.base.bo;

    mask  = (1 << n_maps) - 1;

    /* We check for repeated setting of sample state mainly to catch
     * continuation of text strings across multiple show-glyphs.
     */
    s = m = 0;
    if (shader->source.base.bo != NULL) {
      samplers[s++] = shader->source.base.sampler[0];
      samplers[s++] = shader->source.base.sampler[1];
      maps[m++] = shader->source.base.bo->base.handle;
      for (n = 0; n < shader->source.base.n_samplers; n++) {
          maps[m++] = shader->source.base.offset[n];
          maps[m++] = shader->source.base.map[2*n+0];
          maps[m++] = shader->source.base.map[2*n+1];
      }
    }
    if (shader->mask.base.bo != NULL) {
      samplers[s++] = shader->mask.base.sampler[0];
      samplers[s++] = shader->mask.base.sampler[1];
      maps[m++] = shader->mask.base.bo->base.handle;
      for (n = 0; n < shader->mask.base.n_samplers; n++) {
          maps[m++] = shader->mask.base.offset[n];
          maps[m++] = shader->mask.base.map[2*n+0];
          maps[m++] = shader->mask.base.map[2*n+1];
      }
    }
    if (shader->clip.base.bo != NULL) {
      samplers[s++] = shader->clip.base.sampler[0];
      samplers[s++] = shader->clip.base.sampler[1];
      maps[m++] = shader->clip.base.bo->base.handle;
      for (n = 0; n < shader->clip.base.n_samplers; n++) {
          maps[m++] = shader->clip.base.offset[n];
          maps[m++] = shader->clip.base.map[2*n+0];
          maps[m++] = shader->clip.base.map[2*n+1];
      }
    }
    if (shader->dst.base.bo != NULL) {
      samplers[s++] = shader->dst.base.sampler[0];
      samplers[s++] = shader->dst.base.sampler[1];
      maps[m++] = shader->dst.base.bo->base.handle;
      for (n = 0; n < shader->dst.base.n_samplers; n++) {
          maps[m++] = shader->dst.base.offset[n];
          maps[m++] = shader->dst.base.map[2*n+0];
          maps[m++] = shader->dst.base.map[2*n+1];
      }
    }

    if (n_maps > device->current_n_maps ||
      memcmp (device->current_maps,
            maps,
            m * sizeof (uint32_t)))
    {
      memcpy (device->current_maps, maps, m * sizeof (uint32_t));
      device->current_n_maps = n_maps;

      if (device->current_source != NULL)
          *device->current_source = 0;
      if (device->current_mask != NULL)
          *device->current_mask = 0;
      if (device->current_clip != NULL)
          *device->current_clip = 0;

#if 0
      if (shader->source.type.pattern == PATTERN_TEXTURE) {
          switch ((int) shader->source.surface.surface->type) {
          case CAIRO_SURFACE_TYPE_DRM:
            {
                i915_surface_t *surface =
                  (i915_surface_t *) shader->source.surface.surface;
                device->current_source = &surface->is_current_texture;
                surface->is_current_texture |= CURRENT_SOURCE;
                break;
            }

          case I915_PACKED_PIXEL_SURFACE_TYPE:
            {
                i915_packed_pixel_surface_t *surface =
                  (i915_packed_pixel_surface_t *) shader->source.surface.surface;
                device->current_source = &surface->is_current_texture;
                surface->is_current_texture |= CURRENT_SOURCE;
                break;
            }

          default:
            device->current_source = NULL;
            break;
          }
      } else
          device->current_source = NULL;

      if (shader->mask.type.pattern == PATTERN_TEXTURE) {
          switch ((int) shader->mask.surface.surface->type) {
          case CAIRO_SURFACE_TYPE_DRM:
            {
                i915_surface_t *surface =
                  (i915_surface_t *) shader->mask.surface.surface;
                device->current_mask = &surface->is_current_texture;
                surface->is_current_texture |= CURRENT_MASK;
                break;
            }

          case I915_PACKED_PIXEL_SURFACE_TYPE:
            {
                i915_packed_pixel_surface_t *surface =
                  (i915_packed_pixel_surface_t *) shader->mask.surface.surface;
                device->current_mask = &surface->is_current_texture;
                surface->is_current_texture |= CURRENT_MASK;
                break;
            }

          default:
            device->current_mask = NULL;
            break;
          }
      } else
          device->current_mask = NULL;
#endif

      OUT_DWORD (_3DSTATE_MAP_STATE | (3 * n_maps));
      OUT_DWORD (mask);
      for (n = 0; n < shader->source.base.n_samplers; n++) {
          i915_batch_emit_reloc (device, shader->source.base.bo,
                           shader->source.base.offset[n],
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           FALSE);
          OUT_DWORD (shader->source.base.map[2*n+0]);
          OUT_DWORD (shader->source.base.map[2*n+1]);
      }
      for (n = 0; n < shader->mask.base.n_samplers; n++) {
          i915_batch_emit_reloc (device, shader->mask.base.bo,
                           shader->mask.base.offset[n],
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           FALSE);
          OUT_DWORD (shader->mask.base.map[2*n+0]);
          OUT_DWORD (shader->mask.base.map[2*n+1]);
      }
      for (n = 0; n < shader->clip.base.n_samplers; n++) {
          i915_batch_emit_reloc (device, shader->clip.base.bo,
                           shader->clip.base.offset[n],
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           FALSE);
          OUT_DWORD (shader->clip.base.map[2*n+0]);
          OUT_DWORD (shader->clip.base.map[2*n+1]);
      }
      for (n = 0; n < shader->dst.base.n_samplers; n++) {
          i915_batch_emit_reloc (device, shader->dst.base.bo,
                           shader->dst.base.offset[n],
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           FALSE);
          OUT_DWORD (shader->dst.base.map[2*n+0]);
          OUT_DWORD (shader->dst.base.map[2*n+1]);
      }
    }

    if (n_samplers > device->current_n_samplers ||
      memcmp (device->current_samplers,
            samplers,
            s * sizeof (uint32_t)))
    {
      device->current_n_samplers = s;
      memcpy (device->current_samplers, samplers, s * sizeof (uint32_t));

      OUT_DWORD (_3DSTATE_SAMPLER_STATE | (3 * n_maps));
      OUT_DWORD (mask);
      s = 0;
      for (n = 0; n < shader->source.base.n_samplers; n++) {
          OUT_DWORD (shader->source.base.sampler[0]);
          OUT_DWORD (shader->source.base.sampler[1] |
                   (s << SS3_TEXTUREMAP_INDEX_SHIFT));
          OUT_DWORD (0x0);
          s++;
      }
      for (n = 0; n < shader->mask.base.n_samplers; n++) {
          OUT_DWORD (shader->mask.base.sampler[0]);
          OUT_DWORD (shader->mask.base.sampler[1] |
                   (s << SS3_TEXTUREMAP_INDEX_SHIFT));
          OUT_DWORD (0x0);
          s++;
      }
      for (n = 0; n < shader->clip.base.n_samplers; n++) {
          OUT_DWORD (shader->clip.base.sampler[0]);
          OUT_DWORD (shader->clip.base.sampler[1] |
                   (s << SS3_TEXTUREMAP_INDEX_SHIFT));
          OUT_DWORD (0x0);
          s++;
      }
      for (n = 0; n < shader->dst.base.n_samplers; n++) {
          OUT_DWORD (shader->dst.base.sampler[0]);
          OUT_DWORD (shader->dst.base.sampler[1] |
                   (s << SS3_TEXTUREMAP_INDEX_SHIFT));
          OUT_DWORD (0x0);
          s++;
      }
    }
}

static uint32_t
i915_shader_get_texcoords (const i915_shader_t *shader)
{
    uint32_t texcoords;
    uint32_t tu;

    texcoords = S2_TEXCOORD_NONE;
    tu = 0;
    if (shader->source.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
      texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
      texcoords |= S2_TEXCOORD_FMT (tu, shader->source.base.texfmt);
      tu++;
    }
    if (shader->mask.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
      texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
      texcoords |= S2_TEXCOORD_FMT (tu, shader->mask.base.texfmt);
      tu++;
    }
    if (shader->clip.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
      texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
      texcoords |= S2_TEXCOORD_FMT (tu, shader->clip.base.texfmt);
      tu++;
    }
    if (shader->dst.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
      texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
      texcoords |= S2_TEXCOORD_FMT (tu, shader->dst.base.texfmt);
      tu++;
    }

    return texcoords;
}

static void
i915_set_shader_mode (i915_device_t *device,
                    const i915_shader_t *shader)
{
    uint32_t texcoords;
    uint32_t mask, cnt;

    texcoords = i915_shader_get_texcoords (shader);

    mask = cnt = 0;

    if (device->current_texcoords != texcoords)
      mask |= I1_LOAD_S (2), cnt++;

    if (device->current_blend != shader->blend)
      mask |= I1_LOAD_S (6), cnt++;

    if (cnt == 0)
      return;

    OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | mask | (cnt-1));

    if (device->current_texcoords != texcoords) {
      OUT_DWORD (texcoords);
      device->current_texcoords = texcoords;
    }

    if (device->current_blend != shader->blend) {
      if (shader->blend) {
          OUT_DWORD (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
                   (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
                   shader->blend);
      } else {
          OUT_DWORD (S6_COLOR_WRITE_ENABLE);
      }

      device->current_blend = shader->blend;
    }
}

static void
i915_set_constants (i915_device_t *device,
                const uint32_t *constants,
                uint32_t n_constants)
{
    uint32_t n;

    OUT_DWORD (_3DSTATE_PIXEL_SHADER_CONSTANTS | n_constants);
    OUT_DWORD ((1 << (n_constants >> 2)) - 1);

    for (n = 0; n < n_constants; n++)
      OUT_DWORD (constants[n]);

    device->current_n_constants = n_constants;
    memcpy (device->current_constants, constants, n_constants*4);
}

static uint32_t
pack_constants (const union i915_shader_channel *channel,
            uint32_t *constants)
{
    uint32_t count = 0, n;

    switch (channel->type.fragment) {
    case FS_ZERO:
    case FS_ONE:
    case FS_PURE:
    case FS_DIFFUSE:
      break;

    case FS_CONSTANT:
      constants[count++] = pack_float (channel->solid.color.red);
      constants[count++] = pack_float (channel->solid.color.green);
      constants[count++] = pack_float (channel->solid.color.blue);
      constants[count++] = pack_float (channel->solid.color.alpha);
      break;

    case FS_LINEAR:
      constants[count++] = pack_float (channel->linear.color0.red);
      constants[count++] = pack_float (channel->linear.color0.green);
      constants[count++] = pack_float (channel->linear.color0.blue);
      constants[count++] = pack_float (channel->linear.color0.alpha);

      constants[count++] = pack_float (channel->linear.color1.red);
      constants[count++] = pack_float (channel->linear.color1.green);
      constants[count++] = pack_float (channel->linear.color1.blue);
      constants[count++] = pack_float (channel->linear.color1.alpha);
      break;

    case FS_RADIAL:
      for (n = 0; n < ARRAY_LENGTH (channel->radial.constants); n++)
          constants[count++] = pack_float (channel->radial.constants[n]);
      break;

    case FS_TEXTURE:
    case FS_YUV:
    case FS_SPANS:
      break;
    }

    return count;
}

static void
i915_set_shader_constants (i915_device_t *device,
                         const i915_shader_t *shader)
{
    uint32_t constants[4*4*3+4];
    unsigned n_constants;

    n_constants = 0;
    if (shader->source.type.fragment == FS_DIFFUSE) {
      uint32_t diffuse;

      diffuse =
          ((shader->source.solid.color.alpha_short >> 8) << 24) |
          ((shader->source.solid.color.red_short   >> 8) << 16) |
          ((shader->source.solid.color.green_short >> 8) << 8) |
          ((shader->source.solid.color.blue_short  >> 8) << 0);

      if (diffuse != device->current_diffuse) {
          OUT_DWORD (_3DSTATE_DFLT_DIFFUSE_CMD);
          OUT_DWORD (diffuse);
          device->current_diffuse = diffuse;
      }
    } else {
      n_constants += pack_constants (&shader->source, constants + n_constants);
    }
    n_constants += pack_constants (&shader->mask, constants + n_constants);

    if (shader->opacity < 1.) {
      constants[n_constants+0] =
          constants[n_constants+1] =
          constants[n_constants+2] =
          constants[n_constants+3] = pack_float (shader->opacity);
      n_constants += 4;
    }

    if (n_constants != 0 &&
      (device->current_n_constants != n_constants ||
       memcmp (device->current_constants, constants, n_constants*4)))
    {
      i915_set_constants (device, constants, n_constants);
    }
}

static cairo_bool_t
i915_shader_needs_update (const i915_shader_t *shader,
                    const i915_device_t *device)
{
    uint32_t count, n;
    uint32_t buf[64];

    if (device->current_target != shader->target)
      return TRUE;

    count =
      !! shader->source.base.bo +
      !! shader->mask.base.bo +
      !! shader->clip.base.bo +
      !! shader->dst.base.bo;
    if (count > device->current_n_samplers)
      return TRUE;

    count =
      shader->source.base.n_samplers +
      shader->mask.base.n_samplers +
      shader->clip.base.n_samplers +
      shader->dst.base.n_samplers;
    if (count > device->current_n_maps)
      return TRUE;

    if (count) {
      count = 0;
      if (shader->source.base.bo != NULL) {
          buf[count++] = shader->source.base.sampler[0];
          buf[count++] = shader->source.base.sampler[1];
      }
      if (shader->mask.base.bo != NULL) {
          buf[count++] = shader->mask.base.sampler[0];
          buf[count++] = shader->mask.base.sampler[1];
      }
      if (shader->clip.base.bo != NULL) {
          buf[count++] = shader->clip.base.sampler[0];
          buf[count++] = shader->clip.base.sampler[1];
      }
      if (shader->dst.base.bo != NULL) {
          buf[count++] = shader->dst.base.sampler[0];
          buf[count++] = shader->dst.base.sampler[1];
      }
      if (memcmp (device->current_samplers, buf, count * sizeof (uint32_t)))
          return TRUE;

      count = 0;
      if (shader->source.base.bo != NULL) {
          buf[count++] = shader->source.base.bo->base.handle;
          for (n = 0; n < shader->source.base.n_samplers; n++) {
            buf[count++] = shader->source.base.offset[n];
            buf[count++] = shader->source.base.map[2*n+0];
            buf[count++] = shader->source.base.map[2*n+1];
          }
      }
      if (shader->mask.base.bo != NULL) {
          buf[count++] = shader->mask.base.bo->base.handle;
          for (n = 0; n < shader->mask.base.n_samplers; n++) {
            buf[count++] = shader->mask.base.offset[n];
            buf[count++] = shader->mask.base.map[2*n+0];
            buf[count++] = shader->mask.base.map[2*n+1];
          }
      }
      if (shader->clip.base.bo != NULL) {
          buf[count++] = shader->clip.base.bo->base.handle;
          for (n = 0; n < shader->clip.base.n_samplers; n++) {
            buf[count++] = shader->clip.base.offset[n];
            buf[count++] = shader->clip.base.map[2*n+0];
            buf[count++] = shader->clip.base.map[2*n+1];
          }
      }
      if (shader->dst.base.bo != NULL) {
          buf[count++] = shader->dst.base.bo->base.handle;
          for (n = 0; n < shader->dst.base.n_samplers; n++) {
            buf[count++] = shader->dst.base.offset[n];
            buf[count++] = shader->dst.base.map[2*n+0];
            buf[count++] = shader->dst.base.map[2*n+1];
          }
      }
      if (memcmp (device->current_maps, buf, count * sizeof (uint32_t)))
          return TRUE;
    }

    if (i915_shader_get_texcoords (shader) != device->current_texcoords)
      return TRUE;
    if (device->current_blend != shader->blend)
      return TRUE;

    count = 0;
    if (shader->source.type.fragment == FS_DIFFUSE) {
      uint32_t diffuse;

      diffuse =
          ((shader->source.solid.color.alpha_short >> 8) << 24) |
          ((shader->source.solid.color.red_short   >> 8) << 16) |
          ((shader->source.solid.color.green_short >> 8) << 8) |
          ((shader->source.solid.color.blue_short  >> 8) << 0);

      if (diffuse != device->current_diffuse)
          return TRUE;
    } else {
      count += pack_constants (&shader->source, buf + count);
    }
    count += pack_constants (&shader->mask, buf + count);

    if (count &&
      (device->current_n_constants != count ||
       memcmp (device->current_constants, buf, count*4)))
    {
      return TRUE;
    }

    n = (i915_shader_channel_key (&shader->source) <<  0) |
      (i915_shader_channel_key (&shader->mask)   <<  8) |
      (i915_shader_channel_key (&shader->clip)   << 16) |
      (shader->op << 24) |
      ((shader->opacity < 1.) << 30) |
      (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31);
    return n != device->current_program;
}

void
i915_set_dst (i915_device_t *device, i915_surface_t *dst)
{
    uint32_t size;

    if (device->current_target != dst) {
      intel_bo_t *bo;

      bo = to_intel_bo (dst->intel.drm.bo);
      assert (bo != NULL);

      OUT_DWORD (_3DSTATE_BUF_INFO_CMD);
      OUT_DWORD (BUF_3D_ID_COLOR_BACK |
               BUF_tiling (bo->tiling) |
               BUF_3D_PITCH (dst->intel.drm.stride));
      OUT_RELOC (dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);

      device->current_target = dst;
    }

    if (dst->colorbuf != device->current_colorbuf) {
      OUT_DWORD (_3DSTATE_DST_BUF_VARS_CMD);
      OUT_DWORD (dst->colorbuf);
      device->current_colorbuf = dst->colorbuf;
    }

    size = DRAW_YMAX (dst->intel.drm.height) | DRAW_XMAX (dst->intel.drm.width);
    if (size != device->current_size) {
      OUT_DWORD (_3DSTATE_DRAW_RECT_CMD);
      OUT_DWORD (0); /* dither */
      OUT_DWORD (0); /* top-left */
      OUT_DWORD (size);
      OUT_DWORD (0);  /* origin */
      device->current_size = size;
    }
}

static void
i915_set_shader_target (i915_device_t *device,
                    const i915_shader_t *shader)
{
    i915_set_dst (device, shader->target);
}

int
i915_shader_num_texcoords (const i915_shader_t *shader)
{
    int cnt = 0;

    switch (shader->source.base.texfmt) {
    default:
      ASSERT_NOT_REACHED;
    case TEXCOORDFMT_NOT_PRESENT: break;
    case TEXCOORDFMT_2D: cnt += 2; break;
    case TEXCOORDFMT_3D: cnt += 3; break;
    case TEXCOORDFMT_4D: cnt += 4; break;
    case TEXCOORDFMT_1D: cnt += 1; break;
    case TEXCOORDFMT_2D_16: cnt += 1; break;
    }

    switch (shader->mask.base.texfmt) {
    default:
      ASSERT_NOT_REACHED;
    case TEXCOORDFMT_NOT_PRESENT: break;
    case TEXCOORDFMT_2D: cnt += 2; break;
    case TEXCOORDFMT_3D: cnt += 3; break;
    case TEXCOORDFMT_4D: cnt += 4; break;
    case TEXCOORDFMT_1D: cnt += 1; break;
    case TEXCOORDFMT_2D_16: cnt += 1; break;
    }

    switch (shader->clip.base.texfmt) {
    default:
      ASSERT_NOT_REACHED;
    case TEXCOORDFMT_NOT_PRESENT: break;
    case TEXCOORDFMT_2D: cnt += 2; break;
    case TEXCOORDFMT_3D: cnt += 3; break;
    case TEXCOORDFMT_4D: cnt += 4; break;
    case TEXCOORDFMT_1D: cnt += 1; break;
    case TEXCOORDFMT_2D_16: cnt += 1; break;
    }

    switch (shader->dst.base.texfmt) {
    default:
      ASSERT_NOT_REACHED;
    case TEXCOORDFMT_NOT_PRESENT: break;
    case TEXCOORDFMT_2D: cnt += 2; break;
    case TEXCOORDFMT_3D: cnt += 3; break;
    case TEXCOORDFMT_4D: cnt += 4; break;
    case TEXCOORDFMT_1D: cnt += 1; break;
    case TEXCOORDFMT_2D_16: cnt += 1; break;
    }

    return cnt;
}

void
i915_shader_fini (i915_shader_t *shader)
{
    i915_device_t *device = i915_device (shader->target);

    i915_shader_channel_fini (device, &shader->source);
    i915_shader_channel_fini (device, &shader->mask);
    i915_shader_channel_fini (device, &shader->clip);
}

void
i915_shader_set_clip (i915_shader_t *shader,
                  cairo_clip_t *clip)
{
    cairo_surface_t *clip_surface;
    int clip_x, clip_y;
    union i915_shader_channel *channel;
    i915_surface_t *s;

    clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base, &clip_x, &clip_y);
    assert (clip_surface->status == CAIRO_STATUS_SUCCESS);
    assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM);

    channel = &shader->clip;
    channel->type.vertex = VS_TEXTURE_16;
    channel->base.texfmt = TEXCOORDFMT_2D_16;
    channel->base.content = CAIRO_CONTENT_ALPHA;

    channel->type.fragment = FS_TEXTURE;
    channel->surface.pixel = NONE;

    s = (i915_surface_t *) clip_surface;
    channel->base.bo = to_intel_bo (s->intel.drm.bo);
    channel->base.n_samplers = 1;
    channel->base.offset[0] = s->offset;
    channel->base.map[0] = s->map0;
    channel->base.map[1] = s->map1;

    channel->base.sampler[0] =
      (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
      i915_texture_filter (CAIRO_FILTER_NEAREST);
    channel->base.sampler[1] =
      SS3_NORMALIZED_COORDS |
      i915_texture_extend (CAIRO_EXTEND_NONE);

    cairo_matrix_init_scale (&shader->clip.base.matrix,
                       1. / s->intel.drm.width,
                       1. / s->intel.drm.height);
    cairo_matrix_translate (&shader->clip.base.matrix,
                      -clip_x, -clip_y);
}

static cairo_status_t
i915_shader_check_aperture (i915_shader_t *shader,
                      i915_device_t *device)
{
    cairo_status_t status;
    intel_bo_t *bo_array[4];
    uint32_t n = 0;

    if (shader->target != device->current_target)
      bo_array[n++] = to_intel_bo (shader->target->intel.drm.bo);

    if (shader->source.base.bo != NULL)
      bo_array[n++] = shader->source.base.bo;

    if (shader->mask.base.bo != NULL)
      bo_array[n++] = shader->mask.base.bo;

    if (shader->clip.base.bo != NULL)
      bo_array[n++] = shader->clip.base.bo;

    if (n == 0 || i915_check_aperture (device, bo_array, n))
      return CAIRO_STATUS_SUCCESS;

    status = i915_batch_flush (device);
    if (unlikely (status))
      return status;

    assert (i915_check_aperture (device, bo_array, n));
    return CAIRO_STATUS_SUCCESS;
}

static void
i915_shader_combine_mask (i915_shader_t *shader, i915_device_t *device)
{
    if (shader->mask.type.fragment == (i915_fragment_shader_t) -1 ||
      shader->mask.type.fragment == FS_CONSTANT)
    {
      return;
    }

    if (shader->mask.type.fragment == FS_PURE) {
      if (shader->mask.solid.pure & (1<<3)) {
          shader->mask.type.fragment = FS_ONE;
      } else {
          shader->mask.type.fragment = FS_ZERO;
      }
    }

    if (shader->mask.type.fragment == FS_ONE ||
      (shader->mask.base.content & CAIRO_CONTENT_ALPHA) == 0)
    {
      i915_shader_channel_reset (device, &shader->mask);
    }

    if (shader->mask.type.fragment == FS_ZERO) {
      i915_shader_channel_fini (device, &shader->source);

      shader->source.type.fragment = FS_ZERO;
      shader->source.type.vertex = VS_ZERO;
      shader->source.base.texfmt = TEXCOORDFMT_NOT_PRESENT;
      shader->source.base.mode = 0;
      shader->source.base.n_samplers = 0;
    }

    if (shader->source.type.fragment == FS_ZERO) {
      i915_shader_channel_reset (device, &shader->mask);
      i915_shader_channel_reset (device, &shader->clip);
    }
}

static void
i915_shader_setup_dst (i915_shader_t *shader)
{
    union i915_shader_channel *channel;
    i915_surface_t *s;

    /* We need to manual blending if we have a clip surface and an unbounded op,
     * or an extended blend mode.
     */
    if (shader->need_combine ||
      (shader->op < CAIRO_OPERATOR_SATURATE &&
       (shader->clip.type.fragment == (i915_fragment_shader_t) -1 ||
        _cairo_operator_bounded_by_mask (shader->op))))
    {
      return;
    }

    shader->need_combine = TRUE;

    channel = &shader->dst;
    channel->type.vertex = VS_TEXTURE_16;
    channel->base.texfmt = TEXCOORDFMT_2D_16;
    channel->base.content = shader->content;

    channel->type.fragment = FS_TEXTURE;
    channel->surface.pixel = NONE;

    s = shader->target;
    channel->base.bo = to_intel_bo (s->intel.drm.bo);
    channel->base.n_samplers = 1;
    channel->base.offset[0] = s->offset;
    channel->base.map[0] = s->map0;
    channel->base.map[1] = s->map1;

    channel->base.sampler[0] =
      (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
      i915_texture_filter (CAIRO_FILTER_NEAREST);
    channel->base.sampler[1] =
      SS3_NORMALIZED_COORDS |
      i915_texture_extend (CAIRO_EXTEND_NONE);

    cairo_matrix_init_scale (&shader->dst.base.matrix,
                       1. / s->intel.drm.width,
                       1. / s->intel.drm.height);
}

static void
i915_shader_combine_source (i915_shader_t *shader,
                      i915_device_t *device)
{
    if (device->last_source_fragment == shader->source.type.fragment)
      return;

    if (device->last_source_fragment == FS_DIFFUSE) {
      switch (shader->source.type.fragment) {
      case FS_ONE:
      case FS_PURE:
      case FS_CONSTANT:
      case FS_DIFFUSE:
          shader->source.type.fragment = FS_DIFFUSE;
          shader->source.base.mode = 0;
          break;
      case FS_ZERO:
      case FS_LINEAR:
      case FS_RADIAL:
      case FS_TEXTURE:
      case FS_YUV:
      case FS_SPANS:
      default:
          break;
      }
    }

    device->last_source_fragment = shader->source.type.fragment;
}

static inline float *
i915_composite_vertex (float *v,
                   const i915_shader_t *shader,
                   double x, double y)
{
    double s, t;

    /* Each vertex is:
     *   2 vertex coordinates
     *   [0-2] source texture coordinates
     *   [0-2] mask texture coordinates
     */

    *v++ = x; *v++ = y;
    switch (shader->source.type.vertex) {
    case VS_ZERO:
    case VS_CONSTANT:
      break;
    case VS_LINEAR:
      *v++ = i915_shader_linear_texcoord (&shader->source.linear, x, y);
      break;
    case VS_TEXTURE:
      s = x, t = y;
      cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t);
      *v++ = s; *v++ = t;
      break;
    case VS_TEXTURE_16:
      s = x, t = y;
      cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t);
      *v++ = texcoord_2d_16 (s, t);
      break;
    }
    switch (shader->mask.type.vertex) {
    case VS_ZERO:
    case VS_CONSTANT:
      break;
    case VS_LINEAR:
      *v++ = i915_shader_linear_texcoord (&shader->mask.linear, x, y);
      break;
    case VS_TEXTURE:
      s = x, t = y;
      cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t);
      *v++ = s; *v++ = t;
      break;
    case VS_TEXTURE_16:
      s = x, t = y;
      cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t);
      *v++ = texcoord_2d_16 (s, t);
      break;
    }

    return v;
}

static inline void
i915_shader_add_rectangle_general (const i915_shader_t *shader,
                           int x, int y,
                           int w, int h)
{
    float *vertices;

    vertices = i915_add_rectangle (shader->device);
    vertices = i915_composite_vertex (vertices, shader, x + w, y + h);
    vertices = i915_composite_vertex (vertices, shader, x, y + h);
    vertices = i915_composite_vertex (vertices, shader, x, y);
    /* XXX overflow! */
}

void
i915_vbo_flush (i915_device_t *device)
{
    assert (device->floats_per_vertex);
    assert (device->vertex_count);

    if (device->vbo == 0) {
      OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
               I1_LOAD_S (0) |
               I1_LOAD_S (1) |
               1);
      device->vbo = device->batch.used++;
      device->vbo_max_index = device->batch.used;
      OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
               (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
    }

    OUT_DWORD (PRIM3D_RECTLIST |
             PRIM3D_INDIRECT_SEQUENTIAL |
             device->vertex_count);
    OUT_DWORD (device->vertex_index);

    device->vertex_index += device->vertex_count;
    device->vertex_count = 0;
}

cairo_status_t
i915_shader_commit (i915_shader_t *shader,
                i915_device_t *device)
{
    unsigned floats_per_vertex;
    cairo_status_t status;

    assert (CAIRO_MUTEX_IS_LOCKED (device->intel.base.base.mutex));

    if (! shader->committed) {
      device->shader = shader;

      i915_shader_combine_mask (shader, device);
      i915_shader_combine_source (shader, device);
      i915_shader_setup_dst (shader);

      shader->add_rectangle = i915_shader_add_rectangle_general;

      if ((status = setjmp (shader->unwind)))
          return status;

      shader->committed = TRUE;
    }

    if (i915_shader_needs_update (shader, device)) {
      if (i915_batch_space (device) < 256) {
          status = i915_batch_flush (device);
          if (unlikely (status))
            return status;
      }

      if (device->vertex_count)
          i915_vbo_flush (device);

      status = i915_shader_check_aperture (shader, device);
      if (unlikely (status))
          return status;

  update_shader:
      i915_set_shader_target (device, shader);
      i915_set_shader_mode (device, shader);
      i915_set_shader_samplers (device, shader);
      i915_set_shader_constants (device, shader);
      i915_set_shader_program (device, shader);
    }

    floats_per_vertex = 2 + i915_shader_num_texcoords (shader);
    if (device->floats_per_vertex == floats_per_vertex)
      return CAIRO_STATUS_SUCCESS;

    if (i915_batch_space (device) < 8) {
      status = i915_batch_flush (device);
      if (unlikely (status))
          return status;

      goto update_shader;
    }

    if (device->vertex_count)
      i915_vbo_flush (device);

    if (device->vbo) {
      device->batch_base[device->vbo_max_index] |= device->vertex_index;
      OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (1) | 0);
      device->vbo_max_index = device->batch.used;
      OUT_DWORD ((floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
               (floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
    }

    device->floats_per_vertex = floats_per_vertex;
    device->rectangle_size = floats_per_vertex * 3 * sizeof (float);
    device->vertex_index =
      (device->vbo_used + 4*floats_per_vertex - 1) / (4 * floats_per_vertex);
    device->vbo_offset = 4 * device->vertex_index * floats_per_vertex;

    return CAIRO_STATUS_SUCCESS;
}

Generated by  Doxygen 1.6.0   Back to index