diff options
Diffstat (limited to 'src/gallium/auxiliary')
67 files changed, 5878 insertions, 482 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index da68498aa1f..eaa0f2fe4e8 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = pipebuffer $(LLVM_DIR) +SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) default: subdirs diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile new file mode 100644 index 00000000000..3e49266163b --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -0,0 +1,13 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = cso_cache + +C_SOURCES = \ + cso_cache.c \ + cso_hash.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript new file mode 100644 index 00000000000..9751881613e --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -0,0 +1,10 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'cso_cache', + source = [ + 'cso_cache.c', + 'cso_hash.c', + ]) + +auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0338cb3b474..b40217c524b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -158,11 +158,14 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) while (firstNode != e) { unsigned h = firstNode->key; struct cso_node *lastNode = firstNode; + struct cso_node *afterLastNode; + struct cso_node **beforeFirstNode; + while (lastNode->next != e && lastNode->next->key == h) lastNode = lastNode->next; - struct cso_node *afterLastNode = lastNode->next; - struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; while (*beforeFirstNode != e) beforeFirstNode = &(*beforeFirstNode)->next; lastNode->next = *beforeFirstNode; @@ -222,10 +225,12 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, { cso_data_might_grow(hash->data.d); - struct cso_node **nextNode = cso_hash_find_node(hash, key); - struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); - struct cso_hash_iter iter = {hash, node}; - return iter; + { + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + struct cso_hash_iter iter = {hash, node}; + return iter; + } } struct cso_hash * cso_hash_create(void) @@ -290,6 +295,10 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node **bucket; + int n; + a.next = node->next; if (!a.next) { fprintf(stderr, "iterating beyond the last element\n"); @@ -298,9 +307,9 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) if (a.next->next) return a.next; - int start = (node->key % a.d->numBuckets) + 1; - struct cso_node **bucket = a.d->buckets + start; - int n = a.d->numBuckets - start; + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; while (n--) { if (*bucket != a.e) return *bucket; @@ -316,19 +325,21 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node *sentinel; + struct cso_node **bucket; a.e = node; while (a.e->next) a.e = a.e->next; - int start; if (node == a.e) start = a.d->numBuckets - 1; else start = node->key % a.d->numBuckets; - struct cso_node *sentinel = node; - struct cso_node **bucket = a.d->buckets + start; + sentinel = node; + bucket = a.d->buckets + start; while (start >= 0) { if (*bucket != sentinel) { struct cso_node *prev = *bucket; diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index fe9b150f304..c9980f0b835 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -1,2 +1,37 @@ -default: - cd ../../../mesa ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = draw + +C_SOURCES = \ + draw_aaline.c \ + draw_aapoint.c \ + draw_clip.c \ + draw_vs_exec.c \ + draw_vs_sse.c \ + draw_vs_llvm.c \ + draw_context.c\ + draw_cull.c \ + draw_debug.c \ + draw_flatshade.c \ + draw_offset.c \ + draw_prim.c \ + draw_pstipple.c \ + draw_stipple.c \ + draw_twoside.c \ + draw_unfilled.c \ + draw_validate.c \ + draw_vbuf.c \ + draw_vertex.c \ + draw_vertex_cache.c \ + draw_vertex_fetch.c \ + draw_vertex_shader.c \ + draw_vf.c \ + draw_vf_generic.c \ + draw_vf_sse.c \ + draw_wide_prims.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript new file mode 100644 index 00000000000..8e3a8caa74c --- /dev/null +++ b/src/gallium/auxiliary/draw/SConscript @@ -0,0 +1,31 @@ +Import('*') + +draw = env.ConvenienceLibrary( + target = 'draw', + source = [ + 'draw_clip.c', + 'draw_vs_exec.c', + 'draw_vs_sse.c', + 'draw_vs_llvm.c', + 'draw_context.c', + 'draw_cull.c', + 'draw_debug.c', + 'draw_flatshade.c', + 'draw_offset.c', + 'draw_prim.c', + 'draw_stipple.c', + 'draw_twoside.c', + 'draw_unfilled.c', + 'draw_validate.c', + 'draw_vbuf.c', + 'draw_vertex.c', + 'draw_vertex_cache.c', + 'draw_vertex_fetch.c', + 'draw_vertex_shader.c', + 'draw_vf.c', + 'draw_vf_generic.c', + 'draw_vf_sse.c', + 'draw_wide_prims.c', + ]) + +auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c new file mode 100644 index 00000000000..73a02a32e42 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -0,0 +1,832 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxSampler; /**< max sampler index found */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxSampler) + aactx->maxSampler = decl->u.DeclarationRange.Last + 1; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxSampler + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + struct draw_context *draw = aaline->stage.draw; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxSampler = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + + aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; + aaline_fs.num_inputs++; + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static void +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + aaline->texture = pipe->texture_create(pipe, &texTemp); + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0); + data = pipe_surface_map(surface); + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + ubyte d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->pitch + j] = d; + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + } +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + if (!aaline->fs->aaline_fs) { + generate_aaline_fs(aaline); + } + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + float *pos, *tex; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + double a = atan2(dy, dx); + float c_a = (float) cos(a), s_a = (float) sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5F * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* + * Bind our fragprog, sampler and texture + */ + bind_aaline_fragment_shader(aaline); + + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, + aaline->state.sampler[aaline->sampler_unit]); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, + aaline->state.texture[aaline->sampler_unit]); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + + draw_alloc_temp_verts( &aaline->stage, 8 ); + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aaline_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aaline_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aaline_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + } + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.sampler[unit] = sampler; + /* pass-through */ + aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); +} + + +static void +aaline_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.texture[sampler] = texture; + /* pass-through */ + aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + assert(aaline); + draw->pipeline.aaline = &aaline->stage; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + aaline_create_texture(aaline); + aaline_create_sampler(aaline); + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_state = pipe->bind_sampler_state; + aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_state = aaline_bind_sampler_state; + pipe->set_sampler_texture = aaline_set_sampler_texture; + + add_aa_pipe_context(pipe, aaline); +} diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c new file mode 100644 index 00000000000..43119cc70b4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -0,0 +1,875 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1 ? + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KILP -t0.yyyy; # if b, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* IF t0.y # if b then */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_IF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + + { + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.x, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.x, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + } + + /* ELSE */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ELSE; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + + { + /* MOV t0.w, tex.w; # coverage = 1.0 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + } + + /* ENDIF */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct draw_context *draw = aapoint->stage.draw; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + + aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; + aapoint_fs.num_inputs++; + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + if (!aapoint->fs->aapoint_fs) { + generate_aapoint_fs(aapoint); + } + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + */ + +#if !NORMALIZE + k = 1.0 / radius; + k = 1.0 - 2.0 * k + k * k; +#else + k = 1.0 - 1.0 / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[0]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[0]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[0]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[0]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* + * Bind our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->state->num_outputs; i++) { + if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first line */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + + draw_alloc_temp_verts( &aapoint->stage, 4 ); + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = passthrough_line; + aapoint->stage.tri = passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aapoint_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aapoint_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aapoint_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + } + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +void +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + assert(aapoint); + draw->pipeline.aapoint = &aapoint->stage; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; + + add_aa_pipe_context(pipe, aapoint); +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4be38303169..c28e78d33a3 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -72,10 +72,10 @@ struct draw_context *draw_create( void ) { uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16); + char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size); + for (i = 0; i < Elements(draw->vs.queue); i++) + draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); } draw->shader_queue_flush = draw_vertex_shader_queue_flush; @@ -103,10 +103,14 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); draw->pipeline.cull->destroy( draw->pipeline.cull ); draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); - align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */ + align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ FREE( draw ); } @@ -240,6 +244,26 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable) /** + * The draw module may sometimes generate vertices with extra attributes + * (such as texcoords for AA lines). The driver can call this function + * to find those attributes. + */ +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + /* XXX there may be more than one extra vertex attrib. + * For example, simulated gl_FragCoord and gl_PointCoord. + */ + if (draw->extra_vp_outputs.semantic_name == semantic_name && + draw->extra_vp_outputs.semantic_index == semantic_index) { + return draw->extra_vp_outputs.slot; + } + return 0; +} + + +/** * Allocate space for temporary post-transform vertices, such as for clipping. */ void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ddeb184497a..c25301f71df 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -41,6 +41,7 @@ #include "pipe/p_state.h" +struct pipe_context; struct vertex_buffer; struct vertex_info; struct draw_context; @@ -93,6 +94,26 @@ void draw_convert_wide_points(struct draw_context *draw, boolean enable); void draw_convert_wide_lines(struct draw_context *draw, boolean enable); +boolean draw_use_sse(struct draw_context *draw); + +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); + +void +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); + +void +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + + +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index); + + +/* + * Vertex shader functions + */ struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, @@ -102,7 +123,11 @@ void draw_bind_vertex_shader(struct draw_context *draw, void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); -boolean draw_use_sse(struct draw_context *draw); + + +/* + * Vertex data functions + */ void draw_set_vertex_buffer(struct draw_context *draw, unsigned attr, diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 51e2242719a..7d6cd43410c 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -114,6 +114,7 @@ static void draw_prim_queue_flush( struct draw_context *draw ) } draw->pq.queue_nr = 0; + draw->vs.post_nr = 0; draw_vertex_cache_unreference( draw ); } @@ -121,11 +122,15 @@ static void draw_prim_queue_flush( struct draw_context *draw ) void draw_do_flush( struct draw_context *draw, unsigned flags ) { + static boolean flushing = FALSE; + if (0) debug_printf("Flushing with %d verts, %d prims\n", draw->vs.queue_nr, draw->pq.queue_nr ); + if (!flushing) { + flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) @@ -146,6 +151,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } } } + + flushing = FALSE; + } } @@ -219,6 +227,7 @@ static void do_triangle( struct draw_context *draw, { struct prim_header *prim = get_queued_prim( draw, 3 ); +// _mesa_printf("tri %d %d %d\n", i0, i1, i2); prim->reset_line_stipple = 1; prim->edgeflags = ~0; prim->pad = 0; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index bc11259cb21..6abced139ba 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,10 +44,11 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" +struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; @@ -119,7 +120,7 @@ struct draw_stage }; -#define PRIM_QUEUE_LENGTH 16 +#define PRIM_QUEUE_LENGTH 32 #define VCACHE_SIZE 32 #define VCACHE_OVERFLOW 4 #define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ @@ -179,6 +180,9 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; + struct draw_stage *aapoint; + struct draw_stage *aaline; + struct draw_stage *pstipple; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; @@ -212,9 +216,17 @@ struct draw_context unsigned nr_planes; boolean convert_wide_points; /**< convert wide points to tris? */ - boolean convert_wide_lines; /**< convert side lines to tris? */ + boolean convert_wide_lines; /**< convert wide lines to tris? */ boolean use_sse; + /* If a prim stage introduces new vertex attributes, they'll be stored here + */ + struct { + uint semantic_name; + uint semantic_index; + int slot; + } extra_vp_outputs; + unsigned reduced_prim; /** TGSI program interpreter runtime state */ @@ -234,8 +246,12 @@ struct draw_context */ struct { unsigned referenced; /**< bitfield */ - unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW]; + + struct { + unsigned in; /* client array element */ + unsigned out; /* index in vs queue/array */ + } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; + unsigned overflow; /** To find space in the vertex cache: */ @@ -248,9 +264,10 @@ struct draw_context struct { struct { unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *dest; /**< points into vcache.vertex[] array */ + struct vertex_header *vertex; } queue[VS_QUEUE_LENGTH]; unsigned queue_nr; + unsigned post_nr; } vs; /** diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c new file mode 100644 index 00000000000..4048abf8567 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -0,0 +1,717 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + int maxSampler; /**< max sampler index found */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) + pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + const int sampler = pctx->maxSampler + 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = sampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.maxSampler = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 1 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->sampler_unit = transform.maxSampler + 1; + + if (transform.wincoordInput < 0) { + pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.num_inputs++; + } + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = pipe->texture_create(pipe, &texTemp); + + //pstip_update_texture(pstip); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = pstip->pipe; + + assert(draw->rasterizer->poly_stipple_enable); + + /* + * Bind our fragprog, sampler and texture + */ + bind_pstip_fragment_shader(pstip); + + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, + pstip->state.sampler[pstip->sampler_unit]); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, + pstip->state.texture[pstip->sampler_unit]); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a pstip_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct pstip_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_pstip_pipe_context(struct pipe_context *pipe, struct pstip_stage *pstip) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = pstip; + NumContexts++; +} + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.sampler[unit] = sampler; + /* pass-through */ + pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); +} + + +static void +pstip_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.texture[sampler] = texture; + /* pass-through */ + pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_state = pipe->bind_sampler_state; + pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_state = pstip_bind_sampler_state; + pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + add_pstip_pipe_context(pipe, pstip); +} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 4375ebabbc4..efd6793f2bb 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -58,7 +58,18 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * shorter pipelines for lines & points. */ - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) || + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines + && !draw->rasterizer->line_smooth) || (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || draw->rasterizer->point_sprite) { draw->pipeline.wide->next = next; @@ -71,6 +82,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for lines really */ } + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 44427999ccf..53f8bbec445 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,7 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); +// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } @@ -62,43 +62,51 @@ static struct vertex_header *get_vertex( struct draw_context *draw, assert(slot < 32); /* so we don't exceed the bitfield size below */ - /* Cache miss? - */ - if (draw->vcache.idx[slot] != i) { - - /* If slot is in use, use the overflow area: + if (draw->vcache.referenced & (1<<slot)) + { + /* Cache hit? */ - if (draw->vcache.referenced & (1 << slot)) { - slot = VCACHE_SIZE + draw->vcache.overflow++; + if (draw->vcache.idx[slot].in == i) { +// _mesa_printf("HIT %d %d\n", slot, i); + assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; } + /* Otherwise a collision + */ + slot = VCACHE_SIZE + draw->vcache.overflow++; +// _mesa_printf("XXX %d --> %d\n", i, slot); + } + + /* Deal with the cache miss: + */ + { + unsigned out; + assert(slot < Elements(draw->vcache.idx)); - draw->vcache.idx[slot] = i; +// _mesa_printf("NEW %d %d\n", slot, i); + draw->vcache.idx[slot].in = i; + draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; + draw->vcache.referenced |= (1 << slot); + /* Add to vertex shader queue: */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; - draw->vs.queue[draw->vs.queue_nr].elt = i; - draw->vs.queue_nr++; + + draw->vs.queue[out].elt = i; + draw->vs.queue[out].vertex->clipmask = 0; + draw->vs.queue[out].vertex->edgeflag = 1; /*XXX use user's edge flag! */ + draw->vs.queue[out].vertex->pad = 0; + draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; /* Need to set the vertex's edge flag here. If we're being called * by do_ef_triangle(), that function needs edge flag info! */ - draw->vcache.vertex[slot]->clipmask = 0; - draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ - draw->vcache.vertex[slot]->pad = 0; - draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID; - } - - /* primitive flushing may have cleared the bitfield but did not - * clear the idx[] array values. Set the bit now. This fixes a - * bug found when drawing long triangle fans. - */ - draw->vcache.referenced |= (1 << slot); - return draw->vcache.vertex[slot]; + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + } } @@ -130,8 +138,8 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) { unsigned i; - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID; + for (i = 0; i < draw->vs.post_nr; i++) + draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index f68f6e32440..5d2f5c9c43a 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -55,7 +55,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) */ shader->prepare( shader, draw ); -// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); +// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += 4) { @@ -65,12 +65,12 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].dest; + dests[j] = draw->vs.queue[i + j].vertex; } for ( ; j < 4; j++) { elts[j] = elts[0]; - dests[j] = dests[0]; + dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); @@ -79,6 +79,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) shader->run(shader, draw, elts, n, dests); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index dc3a5ecd219..901ff20a7e7 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" +#include "rtasm/rtasm_execmem.h" #include "draw_vf.h" @@ -37,11 +38,6 @@ #define DRAW_VF_DBG 0 -/* TODO: remove this */ -extern void -_mesa_exec_free( void *addr ); - - static boolean match_fastpath( struct draw_vertex_fetch *vf, const struct draw_vf_fastpath *fp) { @@ -414,12 +410,12 @@ void draw_vf_destroy( struct draw_vertex_fetch *vf ) FREE(fp->attr); /* KW: At the moment, fp->func is constrained to be allocated by - * _mesa_exec_alloc(), as the hardwired fastpaths in + * rtasm_exec_alloc(), as the hardwired fastpaths in * t_vertex_generic.c are handled specially. It would be nice * to unify them, but this probably won't change until this * module gets another overhaul. */ - //_mesa_exec_free((void *) fp->func); + //rtasm_exec_free((void *) fp->func); FREE(fp); } diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 1ad2ae756dd..aff4ffd985c 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -26,17 +26,16 @@ */ -#include "simple_list.h" - #include "pipe/p_compiler.h" +#include "util/u_simple_list.h" #include "draw_vf.h" #if defined(USE_SSE_ASM) -#include "x86/rtasm/x86sse.h" -#include "x86/common_x86_asm.h" +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" #define X 0 @@ -576,7 +575,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) { struct x86_program p; - if (!cpu_has_xmm) { + if (!rtasm_cpu_has_sse()) { vf->codegen_emit = NULL; return; } @@ -586,7 +585,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) p.vf = vf; p.inputs_safe = 0; /* for now */ p.outputs_safe = 1; /* for now */ - p.have_sse2 = cpu_has_xmm2; + p.have_sse2 = rtasm_cpu_has_sse2(); p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 44022b6e077..0fd557d6674 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #ifdef MESA_LLVM -#include "llvm/gallivm.h" +#include "gallivm/gallivm.h" struct draw_llvm_vertex_shader { struct draw_vertex_shader base; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 27bc66812c3..11ef0c503dc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,7 +41,7 @@ #include "draw_private.h" #include "draw_context.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index 655774b155f..1f8069bdcaa 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -162,70 +162,6 @@ static void wide_line( struct draw_stage *stage, /** - * Draw a wide line by drawing a quad, using geometry which will - * fullfill GL's antialiased line requirements. - */ -static void wide_line_aa(struct draw_stage *stage, - struct prim_header *header) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - struct prim_header tri; - struct vertex_header *v[4]; - float *pos; - float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; - float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; - const float len = (float) sqrt(dx * dx + dy * dy); - uint i; - - dx = dx * half_width / len; - dy = dy * half_width / len; - - /* allocate/dup new verts */ - for (i = 0; i < 4; i++) { - v[i] = dup_vert(stage, header->v[i/2], i); - } - - /* - * Quad for line from v0 to v1: - * - * 1 3 - * +-------------------------+ - * | | - * *v0 v1* - * | | - * +-------------------------+ - * 0 2 - */ - - pos = v[0]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[1]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - pos = v[2]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[3]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - tri.det = header->det; /* only the sign matters */ - - tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - -} - - -/** * Set the vertex texcoords for sprite mode. * Coords may be left untouched or set to a right-side-up or upside-down * orientation. @@ -379,10 +315,7 @@ static void wide_first_line( struct draw_stage *stage, wide->half_line_width = 0.5f * draw->rasterizer->line_width; if (draw->rasterizer->line_width != 1.0) { - if (draw->rasterizer->line_smooth) - wide->stage.line = wide_line_aa; - else - wide->stage.line = wide_line; + wide->stage.line = wide_line; } else { wide->stage.line = passthrough_line; diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index 39fac6ea4a5..39fac6ea4a5 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript new file mode 100644 index 00000000000..c0aa51b90a9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/SConscript @@ -0,0 +1,16 @@ +Import('*') + +gallivm = env.ConvenienceLibrary( + target = 'gallivm', + source = [ + 'gallivm.cpp', + 'gallivm_cpu.cpp', + 'instructions.cpp', + 'loweringpass.cpp', + 'tgsitollvm.cpp', + 'storage.cpp', + 'storagesoa.cpp', + 'instructionssoa.cpp', + ]) + +auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/llvm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index d14bb3b99a8..d14bb3b99a8 100644 --- a/src/gallium/auxiliary/llvm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 92da4bca7f4..92da4bca7f4 100644 --- a/src/gallium/auxiliary/llvm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h diff --git a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 1796f0a1772..1796f0a1772 100644 --- a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 8f9830d0b1e..8f9830d0b1e 100644 --- a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp diff --git a/src/gallium/auxiliary/llvm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index cfe7b1901b3..cfe7b1901b3 100644 --- a/src/gallium/auxiliary/llvm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h diff --git a/src/gallium/auxiliary/llvm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 55d39fa5f12..55d39fa5f12 100644 --- a/src/gallium/auxiliary/llvm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp diff --git a/src/gallium/auxiliary/llvm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 9ebc17dd8ec..9ebc17dd8ec 100644 --- a/src/gallium/auxiliary/llvm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h diff --git a/src/gallium/auxiliary/llvm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a4d50466373..a4d50466373 100644 --- a/src/gallium/auxiliary/llvm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp diff --git a/src/gallium/auxiliary/llvm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 4169dcbb2eb..4169dcbb2eb 100644 --- a/src/gallium/auxiliary/llvm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h diff --git a/src/gallium/auxiliary/llvm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 4f98d754baa..4f98d754baa 100644 --- a/src/gallium/auxiliary/llvm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c diff --git a/src/gallium/auxiliary/llvm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp index 556dbec3661..556dbec3661 100644 --- a/src/gallium/auxiliary/llvm/loweringpass.cpp +++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp diff --git a/src/gallium/auxiliary/llvm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h index f62dcf6ba73..f62dcf6ba73 100644 --- a/src/gallium/auxiliary/llvm/loweringpass.h +++ b/src/gallium/auxiliary/gallivm/loweringpass.h diff --git a/src/gallium/auxiliary/llvm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index c4326de8c53..c4326de8c53 100644 --- a/src/gallium/auxiliary/llvm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp diff --git a/src/gallium/auxiliary/llvm/storage.h b/src/gallium/auxiliary/gallivm/storage.h index 8574f7554e3..8574f7554e3 100644 --- a/src/gallium/auxiliary/llvm/storage.h +++ b/src/gallium/auxiliary/gallivm/storage.h diff --git a/src/gallium/auxiliary/llvm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index ed0674a96f0..ed0674a96f0 100644 --- a/src/gallium/auxiliary/llvm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp diff --git a/src/gallium/auxiliary/llvm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index 6443351f270..6443351f270 100644 --- a/src/gallium/auxiliary/llvm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 2cb4acce32f..2cb4acce32f 100644 --- a/src/gallium/auxiliary/llvm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h index 7ada04d6299..7ada04d6299 100644 --- a/src/gallium/auxiliary/llvm/tgsitollvm.h +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 588629e8701..a9fa518c674 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = pipebuffer -DRIVER_SOURCES = \ +C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ pb_bufmgr_fenced.c \ @@ -12,11 +11,6 @@ DRIVER_SOURCES = \ pb_bufmgr_pool.c \ pb_winsys.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 00000000000..3d41fd84a6c --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,14 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_fenced.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_pool.c', + 'pb_winsys.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index f4fc3f6d714..e2ee72ed1fa 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,13 +34,12 @@ */ -#include "linked_list.h" - -#include "p_compiler.h" -#include "p_debug.h" -#include "p_winsys.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index c535d3276c9..bffca5b2449 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -34,8 +34,8 @@ */ -#include "p_debug.h" -#include "p_util.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 8b1b51c0e28..ff4fd123f36 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -1,7 +1,6 @@ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 1999 Wittawat Yamwong * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -34,12 +33,12 @@ */ -#include "linked_list.h" - -#include "p_defines.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_mm.h" #include "pb_buffer.h" #include "pb_bufmgr.h" @@ -50,306 +49,6 @@ #define SUPER(__derived) (&(__derived)->base) -struct mem_block -{ - struct mem_block *next, *prev; - struct mem_block *next_free, *prev_free; - struct mem_block *heap; - int ofs, size; - unsigned int free:1; - unsigned int reserved:1; -}; - - -#ifdef DEBUG -/** - * For debugging purposes. - */ -static void -mmDumpMemInfo(const struct mem_block *heap) -{ - debug_printf("Memory heap %p:\n", (void *)heap); - if (heap == 0) { - debug_printf(" heap == 0\n"); - } else { - const struct mem_block *p; - - for(p = heap->next; p != heap; p = p->next) { - debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - debug_printf("\nFree list:\n"); - - for(p = heap->next_free; p != heap; p = p->next_free) { - debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - } - debug_printf("End of memory blocks\n"); -} -#endif - - -/** - * input: total size in bytes - * return: a heap pointer if OK, NULL if error - */ -static struct mem_block * -mmInit(int ofs, int size) -{ - struct mem_block *heap, *block; - - if (size <= 0) - return NULL; - - heap = CALLOC_STRUCT(mem_block); - if (!heap) - return NULL; - - block = CALLOC_STRUCT(mem_block); - if (!block) { - FREE(heap); - return NULL; - } - - heap->next = block; - heap->prev = block; - heap->next_free = block; - heap->prev_free = block; - - block->heap = heap; - block->next = heap; - block->prev = heap; - block->next_free = heap; - block->prev_free = heap; - - block->ofs = ofs; - block->size = size; - block->free = 1; - - return heap; -} - - -static struct mem_block * -SliceBlock(struct mem_block *p, - int startofs, int size, - int reserved, int alignment) -{ - struct mem_block *newblock; - - /* break left [p, newblock, p->next], then p = newblock */ - if (startofs > p->ofs) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs; - newblock->size = p->size - (startofs - p->ofs); - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size -= newblock->size; - p = newblock; - } - - /* break right, also [p, newblock, p->next] */ - if (size < p->size) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs + size; - newblock->size = p->size - size; - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size = size; - } - - /* p = middle block */ - p->free = 0; - - /* Remove p from the free list: - */ - p->next_free->prev_free = p->prev_free; - p->prev_free->next_free = p->next_free; - - p->next_free = 0; - p->prev_free = 0; - - p->reserved = reserved; - return p; -} - - -/** - * Allocate 'size' bytes with 2^align2 bytes alignment, - * restrict the search to free memory after 'startSearch' - * depth and back buffers should be in different 4mb banks - * to get better page hits if possible - * input: size = size of block - * align2 = 2^align2 bytes alignment - * startSearch = linear offset from start of heap to begin search - * return: pointer to the allocated block, 0 if error - */ -static struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) -{ - struct mem_block *p; - const int mask = (1 << align2)-1; - int startofs = 0; - int endofs; - - if (!heap || align2 < 0 || size <= 0) - return NULL; - - for (p = heap->next_free; p != heap; p = p->next_free) { - assert(p->free); - - startofs = (p->ofs + mask) & ~mask; - if ( startofs < startSearch ) { - startofs = startSearch; - } - endofs = startofs+size; - if (endofs <= (p->ofs+p->size)) - break; - } - - if (p == heap) - return NULL; - - assert(p->free); - p = SliceBlock(p,startofs,size,0,mask+1); - - return p; -} - - -#if 0 -/** - * Free block starts at offset - * input: pointer to a heap, start offset - * return: pointer to a block - */ -static struct mem_block * -mmFindBlock(struct mem_block *heap, int start) -{ - struct mem_block *p; - - for (p = heap->next; p != heap; p = p->next) { - if (p->ofs == start) - return p; - } - - return NULL; -} -#endif - - -static INLINE int -Join2Blocks(struct mem_block *p) -{ - /* XXX there should be some assertions here */ - - /* NOTE: heap->free == 0 */ - - if (p->free && p->next->free) { - struct mem_block *q = p->next; - - assert(p->ofs + p->size == q->ofs); - p->size += q->size; - - p->next = q->next; - q->next->prev = p; - - q->next_free->prev_free = q->prev_free; - q->prev_free->next_free = q->next_free; - - FREE(q); - return 1; - } - return 0; -} - - -/** - * Free block starts at offset - * input: pointer to a block - * return: 0 if OK, -1 if error - */ -static int -mmFreeMem(struct mem_block *b) -{ - if (!b) - return 0; - - if (b->free) { - debug_printf("block already free\n"); - return -1; - } - if (b->reserved) { - debug_printf("block is reserved\n"); - return -1; - } - - b->free = 1; - b->next_free = b->heap->next_free; - b->prev_free = b->heap; - b->next_free->prev_free = b; - b->prev_free->next_free = b; - - Join2Blocks(b); - if (b->prev != b->heap) - Join2Blocks(b->prev); - - return 0; -} - - -/** - * destroy MM - */ -static void -mmDestroy(struct mem_block *heap) -{ - struct mem_block *p; - - if (!heap) - return; - - for (p = heap->next; p != heap; ) { - struct mem_block *next = p->next; - FREE(p); - p = next; - } - - FREE(heap); -} - - struct mm_pb_manager { struct pb_manager base; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 04477a865a5..528e9528f6c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -35,13 +35,12 @@ */ -#include "linked_list.h" - -#include "p_compiler.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_defines.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile new file mode 100644 index 00000000000..39b8a4dbd7a --- /dev/null +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = rtasm + +C_SOURCES = \ + rtasm_cpu.c \ + rtasm_execmem.c \ + rtasm_x86sse.c \ + rtasm_ppc_spe.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript new file mode 100644 index 00000000000..8ea25922aa1 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -0,0 +1,12 @@ +Import('*') + +rtasm = env.ConvenienceLibrary( + target = 'rtasm', + source = [ + 'rtasm_cpu.c', + 'rtasm_execmem.c', + 'rtasm_x86sse.c', + 'rtasm_ppc_spe.c', + ]) + +auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c new file mode 100644 index 00000000000..eb3359750b4 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "rtasm_cpu.h" + + +int rtasm_cpu_has_sse(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} + +int rtasm_cpu_has_sse2(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.h b/src/gallium/auxiliary/rtasm/rtasm_cpu.h new file mode 100644 index 00000000000..ebc71634fdf --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Runtime detection of CPU capabilities. + */ + +#ifndef _RTASM_CPU_H_ +#define _RTASM_CPU_H_ + + +int rtasm_cpu_has_sse(void); + +int rtasm_cpu_has_sse2(void); + + +#endif /* _RTASM_CPU_H_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c new file mode 100644 index 00000000000..300c1c2d9d7 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" + +#include "rtasm_execmem.h" + + +#if defined(__linux__) + +/* + * Allocate a large block of memory which can hold code then dole it out + * in pieces by means of the generic memory manager code. +*/ + +#include <unistd.h> +#include <sys/mman.h> +#include "util/u_mm.h" + +#define EXEC_HEAP_SIZE (10*1024*1024) + +_glthread_DECLARE_STATIC_MUTEX(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + + +void * +rtasm_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + _glthread_LOCK_MUTEX(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; + block = mmAllocMem( exec_heap, size, 32, 0 ); + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("rtasm_exec_malloc failed\n"); + + _glthread_UNLOCK_MUTEX(exec_mutex); + + return addr; +} + + +void +rtasm_exec_free(void *addr) +{ + _glthread_LOCK_MUTEX(exec_mutex); + + if (exec_heap) { + struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + mmFreeMem(block); + } + + _glthread_UNLOCK_MUTEX(exec_mutex); +} + + +#else + +/* + * Just use regular memory. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return MALLOC( size ); +} + + +void +rtasm_exec_free(void *addr) +{ + FREE(addr); +} + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.h b/src/gallium/auxiliary/rtasm/rtasm_execmem.h new file mode 100644 index 00000000000..155c6d34e0a --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _RTASM_EXECMEM_H_ +#define _RTASM_EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +rtasm_exec_malloc( size_t size ); + + +extern void +rtasm_exec_free( void *addr ); + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c new file mode 100644 index 00000000000..95a2d6fcbbe --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -0,0 +1,386 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <[email protected]> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "rtasm_ppc_spe.h" + +#ifdef GALLIUM_CELL +/** + * SPE instruction types + * + * There are 6 primary instruction encodings used on the Cell's SPEs. Each of + * the following unions encodes one type. + * + * \bug + * If, at some point, we start generating SPE code from a little-endian host + * these unions will not work. + */ +/*@{*/ +/** + * Encode one output register with two input registers + */ +union spe_inst_RR { + uint32_t bits; + struct { + unsigned op:11; + unsigned rB:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with three input registers + */ +union spe_inst_RRR { + uint32_t bits; + struct { + unsigned op:4; + unsigned rT:7; + unsigned rB:7; + unsigned rA:7; + unsigned rC:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 7-bit signed immed + */ +union spe_inst_RI7 { + uint32_t bits; + struct { + unsigned op:11; + unsigned i7:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and an 8-bit signed immed + */ +union spe_inst_RI8 { + uint32_t bits; + struct { + unsigned op:10; + unsigned i8:8; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 10-bit signed immed + */ +union spe_inst_RI10 { + uint32_t bits; + struct { + unsigned op:8; + unsigned i10:10; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 16-bit signed immediate + */ +union spe_inst_RI16 { + uint32_t bits; + struct { + unsigned op:9; + unsigned i16:16; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 18-bit signed immediate + */ +union spe_inst_RI18 { + uint32_t bits; + struct { + unsigned op:7; + unsigned i18:18; + unsigned rT:7; + } inst; +}; +/*@}*/ + + +static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB) +{ + union spe_inst_RR inst; + inst.inst.op = op; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, unsigned rC) +{ + union spe_inst_RRR inst; + inst.inst.op = op; + inst.inst.rT = rT; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rC = rC; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI7 inst; + inst.inst.op = op; + inst.inst.i7 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI8 inst; + inst.inst.op = op; + inst.inst.i8 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI10 inst; + inst.inst.op = op; + inst.inst.i10 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI16 inst; + inst.inst.op = op; + inst.inst.i16 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI18 inst; + inst.inst.op = op; + inst.inst.i18 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + + +#define EMIT_(_name, _op) \ +void _name (struct spe_function *p, unsigned rT) \ +{ \ + emit_RR(p, _op, rT, 0, 0); \ +} + +#define EMIT_R(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +{ \ + emit_RR(p, _op, rT, rA, 0); \ +} + +#define EMIT_RR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +{ \ + emit_RR(p, _op, rT, rA, rB); \ +} + +#define EMIT_RRR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +{ \ + emit_RRR(p, _op, rT, rA, rB, rC); \ +} + +#define EMIT_RI7(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI7(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI8(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI8(p, _op, rT, rA, 155 - imm); \ +} + +#define EMIT_RI10(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI16(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI16(p, _op, rT, imm); \ +} + +#define EMIT_RI18(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI18(p, _op, rT, imm); \ +} + +#define EMIT_I16(_name, _op) \ +void _name (struct spe_function *p, int imm) \ +{ \ + emit_RI16(p, _op, 0, imm); \ +} + +#include "rtasm_ppc_spe.h" + + +/* + */ +void spe_init_func(struct spe_function *p, unsigned code_size) +{ + p->store = align_malloc(code_size, 16); + p->csr = p->store; +} + + +void spe_release_func(struct spe_function *p) +{ + align_free(p->store); + p->store = NULL; + p->csr = NULL; +} + + +void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); +} + +void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); +} + +void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); +} + +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); +} + +void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); +} + + +/* Hint-for-branch instructions + */ +#if 0 +hbr; +hbra; +hbrr; +#endif + + +/* Control instructions + */ +#if 0 +stop; +EMIT_RR (spe_stopd, 0x140); +EMIT_ (spe_lnop, 0x001); +EMIT_ (spe_nop, 0x201); +sync; +EMIT_ (spe_dsync, 0x003); +EMIT_R (spe_mfspr, 0x00c); +EMIT_R (spe_mtspr, 0x10c); +#endif + +#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h new file mode 100644 index 00000000000..10ce44b3a08 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -0,0 +1,314 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <[email protected]> + */ + +#ifndef RTASM_PPC_SPE_H +#define RTASM_PPC_SPE_H + +struct spe_function { + /** + * + */ + uint32_t *store; + uint32_t *csr; + const char *fn; +}; + +extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_release_func(struct spe_function *p); + +#endif /* RTASM_PPC_SPE_H */ + +#ifndef EMIT_ +#define EMIT_(name, _op) \ + extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_R(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA) +#define EMIT_RR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB) +#define EMIT_RRR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB, unsigned rC) +#define EMIT_RI7(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI8(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI16(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_RI18(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_I16(_name, _op) \ + extern void _name (struct spe_function *p, int imm) +#define UNDEF_EMIT_MACROS +#endif /* EMIT_ */ + + +/* Memory load / store instructions + */ +EMIT_RI10(spe_lqd, 0x034); +EMIT_RR (spe_lqx, 0x1c4); +EMIT_RI16(spe_lqa, 0x061); +EMIT_RI16(spe_lqr, 0x067); +EMIT_RI10(spe_stqd, 0x024); +EMIT_RR (spe_stqx, 0x144); +EMIT_RI16(spe_stqa, 0x041); +EMIT_RI16(spe_stqr, 0x047); +EMIT_RI7 (spe_cbd, 0x1f4); +EMIT_RR (spe_cbx, 0x1d4); +EMIT_RI7 (spe_chd, 0x1f5); +EMIT_RI7 (spe_chx, 0x1d5); +EMIT_RI7 (spe_cwd, 0x1f6); +EMIT_RI7 (spe_cwx, 0x1d6); +EMIT_RI7 (spe_cdd, 0x1f7); +EMIT_RI7 (spe_cdx, 0x1d7); + + +/* Constant formation instructions + */ +EMIT_RI16(spe_ilh, 0x083); +EMIT_RI16(spe_ilhu, 0x082); +EMIT_RI16(spe_il, 0x081); +EMIT_RI18(spe_ila, 0x021); +EMIT_RI16(spe_iohl, 0x0c1); +EMIT_RI16(spe_fsmbi, 0x0c5); + + +/* Integer and logical instructions + */ +EMIT_RR (spe_ah, 0x0c8); +EMIT_RI10(spe_ahi, 0x01d); +EMIT_RR (spe_a, 0x0c0); +EMIT_RI10(spe_ai, 0x01c); +EMIT_RR (spe_sfh, 0x048); +EMIT_RI10(spe_sfhi, 0x00d); +EMIT_RR (spe_sf, 0x040); +EMIT_RI10(spe_sfi, 0x00c); +EMIT_RR (spe_addx, 0x340); +EMIT_RR (spe_cg, 0x0c2); +EMIT_RR (spe_cgx, 0x342); +EMIT_RR (spe_sfx, 0x341); +EMIT_RR (spe_bg, 0x042); +EMIT_RR (spe_bgx, 0x343); +EMIT_RR (spe_mpy, 0x3c4); +EMIT_RR (spe_mpyu, 0x3cc); +EMIT_RI10(spe_mpyi, 0x074); +EMIT_RI10(spe_mpyui, 0x075); +EMIT_RRR (spe_mpya, 0x00c); +EMIT_RR (spe_mpyh, 0x3c5); +EMIT_RR (spe_mpys, 0x3c7); +EMIT_RR (spe_mpyhh, 0x3c6); +EMIT_RR (spe_mpyhha, 0x346); +EMIT_RR (spe_mpyhhu, 0x3ce); +EMIT_RR (spe_mpyhhau, 0x34e); +EMIT_R (spe_clz, 0x2a5); +EMIT_R (spe_cntb, 0x2b4); +EMIT_R (spe_fsmb, 0x1b6); +EMIT_R (spe_fsmh, 0x1b5); +EMIT_R (spe_fsm, 0x1b4); +EMIT_R (spe_gbb, 0x1b2); +EMIT_R (spe_gbh, 0x1b1); +EMIT_R (spe_gb, 0x1b0); +EMIT_RR (spe_avgb, 0x0d3); +EMIT_RR (spe_absdb, 0x053); +EMIT_RR (spe_sumb, 0x253); +EMIT_R (spe_xsbh, 0x2b6); +EMIT_R (spe_xshw, 0x2ae); +EMIT_R (spe_xswd, 0x2a6); +EMIT_RR (spe_and, 0x0c1); +EMIT_RR (spe_andc, 0x2c1); +EMIT_RI10(spe_andbi, 0x016); +EMIT_RI10(spe_andhi, 0x015); +EMIT_RI10(spe_andi, 0x014); +EMIT_RR (spe_or, 0x041); +EMIT_RR (spe_orc, 0x2c9); +EMIT_RI10(spe_orbi, 0x006); +EMIT_RI10(spe_orhi, 0x005); +EMIT_RI10(spe_ori, 0x004); +EMIT_R (spe_orx, 0x1f0); +EMIT_RR (spe_xor, 0x241); +EMIT_RI10(spe_xorbi, 0x026); +EMIT_RI10(spe_xorhi, 0x025); +EMIT_RI10(spe_xori, 0x024); +EMIT_RR (spe_nand, 0x0c9); +EMIT_RR (spe_nor, 0x049); +EMIT_RR (spe_eqv, 0x249); +EMIT_RRR (spe_selb, 0x008); +EMIT_RRR (spe_shufb, 0x00b); + + +/* Shift and rotate instructions + */ +EMIT_RR (spe_shlh, 0x05f); +EMIT_RI7 (spe_shlhi, 0x07f); +EMIT_RR (spe_shl, 0x05b); +EMIT_RI7 (spe_shli, 0x07b); +EMIT_RR (spe_shlqbi, 0x1db); +EMIT_RI7 (spe_shlqbii, 0x1fb); +EMIT_RR (spe_shlqby, 0x1df); +EMIT_RI7 (spe_shlqbyi, 0x1ff); +EMIT_RR (spe_shlqbybi, 0x1cf); +EMIT_RR (spe_roth, 0x05c); +EMIT_RI7 (spe_rothi, 0x07c); +EMIT_RR (spe_rot, 0x058); +EMIT_RI7 (spe_roti, 0x078); +EMIT_RR (spe_rotqby, 0x1dc); +EMIT_RI7 (spe_rotqbyi, 0x1fc); +EMIT_RR (spe_rotqbybi, 0x1cc); +EMIT_RR (spe_rotqbi, 0x1d8); +EMIT_RI7 (spe_rotqbii, 0x1f8); +EMIT_RR (spe_rothm, 0x05d); +EMIT_RI7 (spe_rothmi, 0x07d); +EMIT_RR (spe_rotm, 0x059); +EMIT_RI7 (spe_rotmi, 0x079); +EMIT_RR (spe_rotqmby, 0x1dd); +EMIT_RI7 (spe_rotqmbyi, 0x1fd); +EMIT_RR (spe_rotqmbybi, 0x1cd); +EMIT_RR (spe_rotqmbi, 0x1c9); +EMIT_RI7 (spe_rotqmbii, 0x1f9); +EMIT_RR (spe_rotmah, 0x05e); +EMIT_RI7 (spe_rotmahi, 0x07e); +EMIT_RR (spe_rotma, 0x05a); +EMIT_RI7 (spe_rotmai, 0x07a); + + +/* Compare, branch, and halt instructions + */ +EMIT_RR (spe_heq, 0x3d8); +EMIT_RI10(spe_heqi, 0x07f); +EMIT_RR (spe_hgt, 0x258); +EMIT_RI10(spe_hgti, 0x04f); +EMIT_RR (spe_hlgt, 0x2d8); +EMIT_RI10(spe_hlgti, 0x05f); +EMIT_RR (spe_ceqb, 0x3d0); +EMIT_RI10(spe_ceqbi, 0x07e); +EMIT_RR (spe_ceqh, 0x3c8); +EMIT_RI10(spe_ceqhi, 0x07d); +EMIT_RR (spe_ceq, 0x3c0); +EMIT_RI10(spe_ceqi, 0x07c); +EMIT_RR (spe_cgtb, 0x250); +EMIT_RI10(spe_cgtbi, 0x04e); +EMIT_RR (spe_cgth, 0x248); +EMIT_RI10(spe_cgthi, 0x04d); +EMIT_RR (spe_cgt, 0x240); +EMIT_RI10(spe_cgti, 0x04c); +EMIT_RR (spe_clgtb, 0x2d0); +EMIT_RI10(spe_clgtbi, 0x05e); +EMIT_RR (spe_clgth, 0x2c8); +EMIT_RI10(spe_clgthi, 0x05d); +EMIT_RR (spe_clgt, 0x2c0); +EMIT_RI10(spe_clgti, 0x05c); +EMIT_I16 (spe_br, 0x064); +EMIT_I16 (spe_bra, 0x060); +EMIT_RI16(spe_brsl, 0x066); +EMIT_RI16(spe_brasl, 0x062); +EMIT_RI16(spe_brnz, 0x042); +EMIT_RI16(spe_brz, 0x040); +EMIT_RI16(spe_brhnz, 0x046); +EMIT_RI16(spe_brhz, 0x044); + +extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); + + +/* Floating-point instructions + */ +EMIT_RR (spe_fa, 0x2c4); +EMIT_RR (spe_dfa, 0x2cc); +EMIT_RR (spe_fs, 0x2c5); +EMIT_RR (spe_dfs, 0x2cd); +EMIT_RR (spe_fm, 0x2c6); +EMIT_RR (spe_dfm, 0x2ce); +EMIT_RRR (spe_fma, 0x00e); +EMIT_RR (spe_dfma, 0x35c); +EMIT_RRR (spe_fnms, 0x00d); +EMIT_RR (spe_dfnms, 0x35e); +EMIT_RRR (spe_fms, 0x00f); +EMIT_RR (spe_dfms, 0x35d); +EMIT_RR (spe_dfnma, 0x35f); +EMIT_R (spe_frest, 0x1b8); +EMIT_R (spe_frsqest, 0x1b9); +EMIT_RR (spe_fi, 0x3d4); +EMIT_RI8 (spe_csflt, 0x1da); +EMIT_RI8 (spe_cflts, 0x1d8); +EMIT_RI8 (spe_cuflt, 0x1db); +EMIT_RI8 (spe_cfltu, 0x1d9); +EMIT_R (spe_frds, 0x3b9); +EMIT_R (spe_fesd, 0x3b8); +EMIT_RR (spe_dfceq, 0x3c3); +EMIT_RR (spe_dfcmeq, 0x3cb); +EMIT_RR (spe_dfcgt, 0x2c3); +EMIT_RR (spe_dfcmgt, 0x2cb); +EMIT_RI7 (spe_dftsv, 0x3bf); +EMIT_RR (spe_fceq, 0x3c2); +EMIT_RR (spe_fcmeq, 0x3ca); +EMIT_RR (spe_fcgt, 0x2c2); +EMIT_RR (spe_fcmgt, 0x2ca); +EMIT_R (spe_fscrwr, 0x3ba); +EMIT_ (spe_fscrrd, 0x398); + + +/* Channel instructions + */ +EMIT_R (spe_rdch, 0x00d); +EMIT_R (spe_rdchcnt, 0x00f); +EMIT_R (spe_wrch, 0x10d); + + +#ifdef UNDEF_EMIT_MACROS +#undef EMIT_ +#undef EMIT_R +#undef EMIT_RR +#undef EMIT_RRR +#undef EMIT_RI7 +#undef EMIT_RI8 +#undef EMIT_RI10 +#undef EMIT_RI16 +#undef EMIT_RI18 +#undef EMIT_I16 +#undef UNDEF_EMIT_MACROS +#endif /* EMIT_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 00000000000..dcbf76f600e --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,1219 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if defined(__i386__) || defined(__386__) || defined(i386) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *)(unsigned long)label; +} + + +static void do_realloc( struct x86_function *p ) +{ + if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + unsigned used = p->csr - p->store; + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + rtasm_exec_free(tmp); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + + +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +unsigned char *x86_get_label( struct x86_function *p ) +{ + return p->csr; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ) +{ + int offset = label - (x86_get_label(p) + 2); + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_jmp_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_call_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ) +{ + *(int *)(fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, unsigned char *label) +{ + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) +{ + emit_1ub(p, 0xe8); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); +} +#endif + + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x50 + reg.idx); + p->stack_offset += 4; +} + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + emit_1ub(p, 0xc3); +} + +void x86_sahf( struct x86_function *p ) +{ + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + assert (src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +} + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char cc) +{ + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dest, arg0); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); +} + +void x87_fldz( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xee); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe8); +} + +void x87_fldl2e( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xea); +} + +void x87_fldln2( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xed); +} + +void x87_fwait( struct x86_function *p ) +{ + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); +} + +void x87_fucompp( struct x86_function *p ) +{ + emit_2ub(p, 0xda, 0xe9); +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf1); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf9); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } +} + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + p->csr = p->store; +} + +void x86_release_func( struct x86_function *p ) +{ + rtasm_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + return (void (*)(void)) (unsigned long) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h new file mode 100644 index 00000000000..606b41eb358 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -0,0 +1,278 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ + +#if defined(__i386__) || defined(__386__) || defined(i386) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + unsigned size; + unsigned char *store; + unsigned char *csr; + unsigned stack_offset; + int need_emms; + const char *fn; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +unsigned char *x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ); + +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +unsigned char *x86_jmp_forward( struct x86_function *p); + +unsigned char *x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ); + +void x86_jmp( struct x86_function *p, unsigned char *label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + unsigned char cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_ret( struct x86_function *p ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explict + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 12a8bd0409e..71f64b747c2 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,3 +1,18 @@ -default: - cd ../.. ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = tgsi + +C_SOURCES = \ + exec/tgsi_exec.c \ + exec/tgsi_sse2.c \ + util/tgsi_build.c \ + util/tgsi_dump.c \ + util/tgsi_parse.c \ + util/tgsi_transform.c \ + util/tgsi_util.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript new file mode 100644 index 00000000000..8464bfe9444 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -0,0 +1,14 @@ +Import('*') + +tgsi = env.ConvenienceLibrary( + target = 'tgsi', + source = [ + 'exec/tgsi_exec.c', + 'exec/tgsi_sse2.c', + 'util/tgsi_build.c', + 'util/tgsi_dump.c', + 'util/tgsi_parse.c', + 'util/tgsi_util.c', + ]) + +auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index eb8b14e0e89..00000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -default: - cd ../../.. ; make - diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 62c6a69c63f..779b901f2b8 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -32,7 +32,7 @@ #include "tgsi_exec.h" #include "tgsi_sse2.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #if defined(__i386__) || defined(__386__) @@ -2356,11 +2356,17 @@ tgsi_emit_sse2_fs( ok = emit_instruction( func, &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d\n", + parse.FullToken.FullInstruction.Instruction.Opcode ); + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; + debug_printf("failed to emit immediate value\n"); break; default: diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile new file mode 100644 index 00000000000..906a46d6b4e --- /dev/null +++ b/src/gallium/auxiliary/util/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = util + +C_SOURCES = \ + p_debug.c \ + p_tile.c \ + p_util.c \ + u_mm.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript new file mode 100644 index 00000000000..4717941434f --- /dev/null +++ b/src/gallium/auxiliary/util/SConscript @@ -0,0 +1,12 @@ +Import('*') + +util = env.ConvenienceLibrary( + target = 'util', + source = [ + 'p_debug.c', + 'p_tile.c', + 'p_util.c', + 'u_mm.c', + ]) + +auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/pipebuffer/linked_list.h b/src/gallium/auxiliary/util/u_double_list.h index e99817fb130..8cb10c98206 100644 --- a/src/gallium/auxiliary/pipebuffer/linked_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -34,8 +34,8 @@ * be protected using an external mutex. */ -#ifndef LINKED_LIST_H_ -#define LINKED_LIST_H_ +#ifndef _U_DOUBLE_LIST_H_ +#define _U_DOUBLE_LIST_H_ #include <stddef.h> @@ -88,4 +88,4 @@ struct list_head ((__type *)(((char *)(__item)) - offsetof(__type, __field))) -#endif /*LINKED_LIST_H_*/ +#endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c new file mode 100644 index 00000000000..b49ae074e0e --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + +#include "util/u_mm.h" + + +void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} + +struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + +int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h new file mode 100644 index 00000000000..b226b101cbe --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + + +#ifndef _U_MM_H_ +#define _U_MM_H_ + + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs,size; + unsigned int free:1; + unsigned int reserved:1; +}; + + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +extern int mmFreeMem(struct mem_block *b); + +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); + +/** + * destroy MM + */ +extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debuging purpose. + */ +extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h new file mode 100644 index 00000000000..f5f43b0faa2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -0,0 +1,197 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _U_SIMPLE_LIST_H_ +#define _U_SIMPLE_LIST_H_ + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif /* _U_SIMPLE_LIST_H_ */ |