diff options
Diffstat (limited to 'src')
212 files changed, 6212 insertions, 3486 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 6b7d02a19bc..e7fe6b3b768 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -130,6 +130,10 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned output_format; switch (vinfo->attrib[i].emit) { + case EMIT_4UB: + output_format = PIPE_FORMAT_R8G8B8A8_UNORM; + emit_sz = 4 * sizeof(unsigned char); + break; case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); @@ -153,6 +157,8 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); break; + case EMIT_OMIT: + continue; default: assert(0); output_format = PIPE_FORMAT_NONE; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index dbbc33fffa7..f2368dde5c2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -114,6 +114,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, #endif for (slot = 0; slot < shader->info.num_inputs; slot++) { +#if 0 + assert(!util_is_inf_or_nan(input[slot][0])); + assert(!util_is_inf_or_nan(input[slot][1])); + assert(!util_is_inf_or_nan(input[slot][2])); + assert(!util_is_inf_or_nan(input[slot][3])); +#endif machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; diff --git a/src/gallium/auxiliary/trace/trace_drm.h b/src/gallium/auxiliary/trace/trace_drm.h new file mode 100644 index 00000000000..892bd9860c6 --- /dev/null +++ b/src/gallium/auxiliary/trace/trace_drm.h @@ -0,0 +1,165 @@ +/* + * Copyright 2009 Jakob Bornecrantz <[email protected]> + * Corbin Simpson <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef TRACE_DRM_H +#define TRACE_DRM_H + +#include "state_tracker/drm_api.h" + +#include "trace/tr_buffer.h" +#include "trace/tr_context.h" +#include "trace/tr_screen.h" +#include "trace/tr_texture.h" + +struct drm_api hooks; + +static struct pipe_screen * +trace_drm_create_screen(int fd, struct drm_create_screen_arg *arg) +{ + struct pipe_screen *screen; + + if (arg && arg->mode != DRM_CREATE_NORMAL) + return NULL; + + screen = hooks.create_screen(fd, arg); + + return trace_screen_create(screen); +}; + +static struct pipe_context * +trace_drm_create_context(struct pipe_screen *_screen) +{ + struct pipe_screen *screen; + struct pipe_context *pipe; + + if (trace_enabled()) + screen = trace_screen(_screen)->screen; + else + screen = _screen; + + pipe = hooks.create_context(screen); + + if (trace_enabled()) + pipe = trace_context_create(_screen, pipe); + + return pipe; +}; + +static boolean +trace_drm_buffer_from_texture(struct pipe_texture *_texture, + struct pipe_buffer **_buffer, + unsigned *stride) +{ + struct pipe_texture *texture; + struct pipe_buffer *buffer = NULL; + boolean result; + + if (trace_enabled()) + texture = trace_texture(_texture)->texture; + else + texture = _texture; + + result = hooks.buffer_from_texture(texture, &buffer, stride); + + if (result && _buffer) + buffer = trace_buffer_create(trace_screen(texture->screen), buffer); + + if (_buffer) + *_buffer = buffer; + else + pipe_buffer_reference(&buffer, NULL); + + return result; +} + +static struct pipe_buffer * +trace_drm_buffer_from_handle(struct pipe_screen *_screen, + const char *name, + unsigned handle) +{ + struct pipe_screen *screen; + struct pipe_buffer *result; + + if (trace_enabled()) + screen = trace_screen(_screen)->screen; + else + screen = _screen; + + result = hooks.buffer_from_handle(screen, name, handle); + + if (trace_enabled()) + result = trace_buffer_create(trace_screen(_screen), result); + + return result; +} + +static boolean +trace_drm_handle_from_buffer(struct pipe_screen *_screen, + struct pipe_buffer *_buffer, + unsigned *handle) +{ + struct pipe_screen *screen; + struct pipe_buffer *buffer; + + if (trace_enabled()) { + screen = trace_screen(_screen)->screen; + buffer = trace_buffer(_buffer)->buffer; + } else { + screen = _screen; + buffer = _buffer; + } + + return hooks.handle_from_buffer(screen, buffer, handle); +} + +static boolean +trace_drm_global_handle_from_buffer(struct pipe_screen *_screen, + struct pipe_buffer *_buffer, + unsigned *handle) +{ + struct pipe_screen *screen; + struct pipe_buffer *buffer; + + if (trace_enabled()) { + screen = trace_screen(_screen)->screen; + buffer = trace_buffer(_buffer)->buffer; + } else { + screen = _screen; + buffer = _buffer; + } + + return hooks.global_handle_from_buffer(screen, buffer, handle); +} + +struct drm_api drm_api_hooks = +{ + .create_screen = trace_drm_create_screen, + .create_context = trace_drm_create_context, + + .buffer_from_texture = trace_drm_buffer_from_texture, + .buffer_from_handle = trace_drm_buffer_from_handle, + .handle_from_buffer = trace_drm_handle_from_buffer, + .global_handle_from_buffer = trace_drm_global_handle_from_buffer, +}; + +#endif /* TRACE_DRM_H */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 7c225e2f27c..5ffb7073abf 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -32,6 +32,7 @@ #include "spu_main.h" #include "spu_render.h" +#include "spu_shuffle.h" #include "spu_tri.h" #include "spu_tile.h" #include "cell/common.h" @@ -267,15 +268,75 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) uint drawn = 0; - /* loop over tris */ - for (j = 0; j < render->num_indexes; j += 3) { - const float *v0, *v1, *v2; - - v0 = (const float *) (vertices + indexes[j+0] * vertex_size); - v1 = (const float *) (vertices + indexes[j+1] * vertex_size); - v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - - drawn += tri_draw(v0, v1, v2, tx, ty); + const qword vertex_sizes = (qword)spu_splats(vertex_size); + const qword verticess = (qword)spu_splats((uint)vertices); + + ASSERT_ALIGN16(&indexes[0]); + + const uint num_indexes = render->num_indexes; + + /* loop over tris + * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled + * avoiding variable rotates when extracting vertex indices. + */ + for (j = 0; j < num_indexes; j += 24) { + /* Load three vectors, containing 24 ushort indices */ + const qword* lower_qword = (qword*)&indexes[j]; + const qword indices0 = lower_qword[0]; + const qword indices1 = lower_qword[1]; + const qword indices2 = lower_qword[2]; + + /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */ + /* Straightforward rotates for these */ + qword vs0 = indices0; + qword vs1 = si_shlqbyi(indices0, 6); + qword vs3 = si_shlqbyi(indices1, 2); + qword vs4 = si_shlqbyi(indices1, 8); + qword vs6 = si_shlqbyi(indices2, 4); + qword vs7 = si_shlqbyi(indices2, 10); + + /* For tri 2 and 5, the three indices are split across two machine + * words - rotate and combine */ + const qword tmp2a = si_shlqbyi(indices0, 12); + const qword tmp2b = si_rotqmbyi(indices1, 12|16); + qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20))); + + const qword tmp5a = si_shlqbyi(indices1, 14); + const qword tmp5b = si_rotqmbyi(indices2, 14|16); + qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60))); + + /* unpack indices from halfword slots to word slots */ + vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0)); + vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0)); + vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0)); + vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0)); + vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0)); + vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0)); + vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0)); + vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0)); + + /* Calculate address of vertex in vertices[] */ + vs0 = si_mpya(vs0, vertex_sizes, verticess); + vs1 = si_mpya(vs1, vertex_sizes, verticess); + vs2 = si_mpya(vs2, vertex_sizes, verticess); + vs3 = si_mpya(vs3, vertex_sizes, verticess); + vs4 = si_mpya(vs4, vertex_sizes, verticess); + vs5 = si_mpya(vs5, vertex_sizes, verticess); + vs6 = si_mpya(vs6, vertex_sizes, verticess); + vs7 = si_mpya(vs7, vertex_sizes, verticess); + + /* Select the appropriate call based on the number of vertices + * remaining */ + switch(num_indexes - j) { + default: drawn += tri_draw(vs7, tx, ty); + case 21: drawn += tri_draw(vs6, tx, ty); + case 18: drawn += tri_draw(vs5, tx, ty); + case 15: drawn += tri_draw(vs4, tx, ty); + case 12: drawn += tri_draw(vs3, tx, ty); + case 9: drawn += tri_draw(vs2, tx, ty); + case 6: drawn += tri_draw(vs1, tx, ty); + case 3: drawn += tri_draw(vs0, tx, ty); + } } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index d727268475e..58be001be4c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -133,7 +133,15 @@ struct setup_stage { uint tx, ty; /**< position of current tile (x, y) */ - int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + union { + struct { + int cliprect_minx; + int cliprect_miny; + int cliprect_maxx; + int cliprect_maxy; + }; + qword cliprect; + }; struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; @@ -432,6 +440,41 @@ print_vertex(const struct vertex_header *v) } #endif +/* Returns the minimum of each slot of two vec_float4s as qwords. + * i.e. return[n] = min(q0[n],q1[n]); + */ +static qword +minfq(qword q0, qword q1) +{ + const qword q0q1m = si_fcgt(q0, q1); + return si_selb(q0, q1, q0q1m); +} + +/* Returns the minimum of each slot of three vec_float4s as qwords. + * i.e. return[n] = min(q0[n],q1[n],q2[n]); + */ +static qword +min3fq(qword q0, qword q1, qword q2) +{ + return minfq(minfq(q0, q1), q2); +} + +/* Returns the maximum of each slot of two vec_float4s as qwords. + * i.e. return[n] = min(q0[n],q1[n],q2[n]); + */ +static qword +maxfq(qword q0, qword q1) { + const qword q0q1m = si_fcgt(q0, q1); + return si_selb(q1, q0, q0q1m); +} + +/* Returns the maximum of each slot of three vec_float4s as qwords. + * i.e. return[n] = min(q0[n],q1[n],q2[n]); + */ +static qword +max3fq(qword q0, qword q1, qword q2) { + return maxfq(maxfq(q0, q1), q2); +} /** * Sort vertices from top to bottom. @@ -440,9 +483,7 @@ print_vertex(const struct vertex_header *v) * \return FALSE if tri is totally outside tile, TRUE otherwise */ static boolean -setup_sort_vertices(const struct vertex_header *v0, - const struct vertex_header *v1, - const struct vertex_header *v2) +setup_sort_vertices(const qword vs) { float area, sign; @@ -455,57 +496,57 @@ setup_sort_vertices(const struct vertex_header *v0, } #endif - /* determine bottom to top order of vertices */ { + /* Load the float values for various processing... */ + const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]); + const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]); + const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]); + + /* Check if triangle is completely outside the tile bounds + * Find the min and max x and y positions of the three poits */ + const qword minf = min3fq(f0, f1, f2); + const qword maxf = max3fq(f0, f1, f2); + + /* Compare min and max against cliprect vals */ + const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b)); + const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0)); + + /* Use a little magic to work out of the tri is visible or not */ + if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE; + + /* determine bottom to top order of vertices */ /* A table of shuffle patterns for putting vertex_header pointers into correct order. Quite magical. */ - const vec_uchar16 sort_order_patterns[] = { - SHUFFLE4(A,B,C,C), - SHUFFLE4(C,A,B,C), - SHUFFLE4(A,C,B,C), - SHUFFLE4(B,C,A,C), - SHUFFLE4(B,A,C,C), - SHUFFLE4(C,B,A,C) }; - - /* The vertex_header pointers, packed for easy shuffling later */ - const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2}; + const qword sort_order_patterns[] = { + SHUFB4(A,B,C,C), + SHUFB4(C,A,B,C), + SHUFB4(A,C,B,C), + SHUFB4(B,C,A,C), + SHUFB4(B,A,C,C), + SHUFB4(C,B,A,C) }; /* Collate y values into two vectors for comparison. Using only one shuffle constant! ;) */ - const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C)); - const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C)); - const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C)); - const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C)); + const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C)); + const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C)); + const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C)); + const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C)); /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ - const vec_uint4 compare = spu_cmpgt(y_012, y_120); + const qword compare = si_fcgt(y_012, y_120); /* Compress the result of the comparison into 4 bits */ - const vec_uint4 gather = spu_gather(compare); + const qword gather = si_gb(compare); /* Subtract one to attain the index into the LUT. Magical. */ - const unsigned int index = spu_extract(gather, 0) - 1; + const unsigned int index = si_to_uint(gather) - 1; /* Load the appropriate pattern and construct the desired vector. */ - setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]); + setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]); /* Using the result of the comparison, set sign. Very magical. */ - sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f); + sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f); } - /* Check if triangle is completely outside the tile bounds */ - if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) - return FALSE; - if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) - return FALSE; - if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && - spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && - spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) - return FALSE; - if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && - spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && - spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) - return FALSE; - setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); @@ -761,21 +802,19 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, +tri_draw(const qword vs, uint tx, uint ty) { setup.tx = tx; setup.ty = ty; /* set clipping bounds to tile bounds */ - setup.cliprect_minx = tx * TILE_SIZE; - setup.cliprect_miny = ty * TILE_SIZE; - setup.cliprect_maxx = (tx + 1) * TILE_SIZE; - setup.cliprect_maxy = (ty + 1) * TILE_SIZE; - - if (!setup_sort_vertices((struct vertex_header *) v0, - (struct vertex_header *) v1, - (struct vertex_header *) v2)) { + const qword clipbase = (qword)((vec_uint4){tx, ty}); + const qword clipmin = si_mpyui(clipbase, TILE_SIZE); + const qword clipmax = si_ai(clipmin, TILE_SIZE); + setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b)); + + if(!setup_sort_vertices(vs)) { return FALSE; /* totally clipped */ } diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index aa694dd7c93..82e3b19ad7e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); +tri_draw(const qword vs, uint tx, uint ty); #endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 4da833c25e8..93f752faec9 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -31,7 +31,8 @@ nv04_miptree_layout(struct nv04_miptree *nv04mt) for (l = 0; l <= pt->last_level; l++) { - nv04mt->level[l].image_offset = offset; + nv04mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); offset += nv04mt->level[l].pitch * pt->height[l]; } diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h index 0d51439e3ff..399f750dbe7 100644 --- a/src/gallium/drivers/nv04/nv04_state.h +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -37,7 +37,7 @@ struct nv04_miptree { struct { uint pitch; - uint image_offset; + uint *image_offset; } level[PIPE_MAX_TEXTURE_LEVELS]; }; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2d15868ae84..aada285f2c0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -85,6 +85,9 @@ struct nv50_reg { int hw; int neg; + + int rhw; /* result hw for FP outputs, or interpolant index */ + int acc; /* instruction where this reg is last read (first insn == 1) */ }; struct nv50_pc { @@ -108,12 +111,23 @@ struct nv50_pc { struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; + + unsigned interp_mode[32]; + /* perspective interpolation registers */ + struct nv50_reg *iv_p; + struct nv50_reg *iv_c; + + /* current instruction and total number of insns */ + unsigned insn_cur; + unsigned insn_nr; + + boolean allow32; }; static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { - int i; + int i = 0; if (reg->type == P_RESULT) { if (pc->p->cfg.high_result < (reg->hw + 1)) @@ -131,7 +145,22 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) return; } - for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (reg->rhw != -1) { + /* try to allocate temporary with index rhw first */ + if (!(pc->r_temp[reg->rhw])) { + pc->r_temp[reg->rhw] = reg; + reg->hw = reg->rhw; + if (pc->p->cfg.high_temp < (reg->rhw + 1)) + pc->p->cfg.high_temp = reg->rhw + 1; + return; + } + /* make sure we don't get things like $r0 needs to go + * in $r1 and $r1 in $r0 + */ + i = pc->result_nr * 4; + } + + for (; i < NV50_SU_MAX_TEMP; i++) { if (!(pc->r_temp[i])) { pc->r_temp[i] = reg; reg->hw = i; @@ -159,6 +188,7 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) r->type = P_TEMP; r->index = -1; r->hw = i; + r->rhw = -1; pc->r_temp[i] = r; return r; } @@ -168,6 +198,38 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + +/* release the hardware resource held by r */ +static void +release_hw(struct nv50_pc *pc, struct nv50_reg *r) +{ + assert(r->type == P_TEMP); + if (r->hw == -1) + return; + + assert(pc->r_temp[r->hw] == r); + pc->r_temp[r->hw] = NULL; + + r->acc = 0; + if (r->index == -1) + FREE(r); +} + static void free_temp(struct nv50_pc *pc, struct nv50_reg *r) { @@ -250,7 +312,13 @@ alloc_immd(struct nv50_pc *pc, float f) struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); unsigned hw; - hw = ctor_immd(pc, f, 0, 0, 0) * 4; + for (hw = 0; hw < pc->immd_nr * 4; hw++) + if (pc->immd_buf[hw] == f) + break; + + if (hw == pc->immd_nr * 4) + hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; + r->type = P_IMMD; r->hw = hw; r->index = -1; @@ -341,7 +409,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + float f = pc->immd_buf[imm->hw]; + unsigned val = fui(imm->neg ? -f : f); set_long(pc, e); /*XXX: can't be predicated - bits overlap.. catch cases where both @@ -354,20 +423,35 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) e->inst[1] |= (val >> 6) << 2; } + +#define INTERP_LINEAR 0 +#define INTERP_FLAT 1 +#define INTERP_PERSPECTIVE 2 +#define INTERP_CENTROID 4 + +/* interpolant index has been stored in dst->rhw */ static void -emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, - struct nv50_reg *src, struct nv50_reg *iv) +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, + unsigned mode) { + assert(dst->rhw != -1); struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x80000000; set_dst(pc, dst, e); - alloc_reg(pc, src); - e->inst[0] |= (src->hw << 16); - if (iv) { - e->inst[0] |= (1 << 25); - alloc_reg(pc, iv); - e->inst[0] |= (iv->hw << 9); + e->inst[0] |= (dst->rhw << 16); + + if (mode & INTERP_FLAT) { + e->inst[0] |= (1 << 8); + } else { + if (mode & INTERP_PERSPECTIVE) { + e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } + + if (mode & INTERP_CENTROID) + e->inst[0] |= (1 << 24); } emit(pc, e); @@ -378,22 +462,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, struct nv50_program_exec *e) { set_long(pc, e); -#if 1 - e->inst[1] |= (1 << 22); -#else - if (src->type == P_IMMD) { - e->inst[1] |= (NV50_CB_PMISC << 22); - } else { - if (pc->p->type == PIPE_SHADER_VERTEX) - e->inst[1] |= (NV50_CB_PVP << 22); - else - e->inst[1] |= (NV50_CB_PFP << 22); - } -#endif e->param.index = src->hw; e->param.shift = s; e->param.mask = m << (s % 32); + + e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } static void @@ -405,12 +479,11 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_dst(pc, dst, e); - if (0 && dst->type != P_RESULT && src->type == P_IMMD) { + if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) { set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs */ - e->inst[0] |= 0x00008000; } else if (src->type == P_IMMD || src->type == P_CONST) { set_long(pc, e); @@ -426,18 +499,25 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) e->inst[0] |= (src->hw << 9); } - /* We really should support "half" instructions here at some point, - * but I don't feel confident enough about them yet. - */ - set_long(pc, e); if (is_long(e) && !is_immd(e)) { e->inst[1] |= 0x04000000; /* 32-bit */ - e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ - } + e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */ + if (!(e->inst[1] & 0x20000000)) + e->inst[1] |= 0x00030000; /* "subsubop" 0xf */ + } else + e->inst[0] |= 0x00008000; emit(pc, e); } +static INLINE void +emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) +{ + struct nv50_reg *imm = alloc_immd(pc, f); + emit_mov(pc, dst, imm); + FREE(imm); +} + static boolean check_swap_src_0_1(struct nv50_pc *pc, struct nv50_reg **s0, struct nv50_reg **s1) @@ -541,12 +621,26 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0xc0000000; - set_long(pc, e); + + if (!pc->allow32) + set_long(pc, e); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); set_src_0(pc, src0, e); - set_src_1(pc, src1, e); + if (src1->type == P_IMMD && !is_long(e)) { + if (src0->neg) + e->inst[0] |= 0x00008000; + set_immd(pc, src1, e); + } else { + set_src_1(pc, src1, e); + if (src0->neg ^ src1->neg) { + if (is_long(e)) + e->inst[1] |= 0x08000000; + else + e->inst[0] |= 0x00008000; + } + } emit(pc, e); } @@ -560,11 +654,20 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, e->inst[0] |= 0xb0000000; check_swap_src_0_1(pc, &src0, &src1); + + if (!pc->allow32 || src0->neg || src1->neg) { + set_long(pc, e); + e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); + } + set_dst(pc, dst, e); set_src_0(pc, src0, e); - if (is_long(e)) + if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e)) set_src_2(pc, src1, e); else + if (src1->type == P_IMMD) + set_immd(pc, src1, e); + else set_src_1(pc, src1, e); emit(pc, e); @@ -588,25 +691,13 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, emit(pc, e); } -static void +static INLINE void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] |= 0xb0000000; - - set_long(pc, e); - if (check_swap_src_0_1(pc, &src0, &src1)) - e->inst[1] |= 0x04000000; - else - e->inst[1] |= 0x08000000; - - set_dst(pc, dst, e); - set_src_0(pc, src0, e); - set_src_2(pc, src1, e); - - emit(pc, e); + src1->neg ^= 1; + emit_add(pc, dst, src0, src1); + src1->neg ^= 1; } static void @@ -623,26 +714,21 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_src_1(pc, src1, e); set_src_2(pc, src2, e); + if (src0->neg ^ src1->neg) + e->inst[1] |= 0x04000000; + if (src2->neg) + e->inst[1] |= 0x08000000; + emit(pc, e); } -static void +static INLINE void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] |= 0xe0000000; - set_long(pc, e); - e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ - - check_swap_src_0_1(pc, &src0, &src1); - set_dst(pc, dst, e); - set_src_0(pc, src0, e); - set_src_1(pc, src1, e); - set_src_2(pc, src2, e); - - emit(pc, e); + src2->neg ^= 1; + emit_mad(pc, dst, src0, src1, src2); + src2->neg ^= 1; } static void @@ -693,6 +779,48 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, e); } +#define CVTOP_RN 0x01 +#define CVTOP_FLOOR 0x03 +#define CVTOP_CEIL 0x05 +#define CVTOP_TRUNC 0x07 +#define CVTOP_SAT 0x08 +#define CVTOP_ABS 0x10 + +#define CVT_F32_F32 0xc4 +#define CVT_F32_S32 0x44 +#define CVT_F32_U32 0x64 +#define CVT_S32_F32 0x8c +#define CVT_S32_S32 0x0c +#define CVT_F32_F32_ROP 0xcc + +static void +emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, + int wp, unsigned cop, unsigned fmt) +{ + struct nv50_program_exec *e; + + e = exec(pc); + set_long(pc, e); + + e->inst[0] |= 0xa0000000; + e->inst[1] |= 0x00004000; + e->inst[1] |= (cop << 16); + e->inst[1] |= (fmt << 24); + set_src_0(pc, src, e); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + + emit(pc, e); +} + static void emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -736,22 +864,10 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, free_temp(pc, dst); } -static void +static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x08000000; /* integer mode */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ - e->inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP); } static void @@ -768,21 +884,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, free_temp(pc, temp); } -static void +static INLINE void emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= (1 << 14); /* src .f32 */ - e->inst[1] |= ((1 << 6) << 14); /* .abs */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); } static void @@ -794,18 +899,12 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); struct nv50_reg *tmp[4]; + boolean allow32 = pc->allow32; - if (mask & (1 << 0)) - emit_mov(pc, dst[0], one); - - if (mask & (1 << 3)) - emit_mov(pc, dst[3], one); + pc->allow32 = FALSE; if (mask & (3 << 1)) { - if (mask & (1 << 1)) - tmp[0] = dst[1]; - else - tmp[0] = temp_temp(pc); + tmp[0] = alloc_temp(pc, NULL); emit_minmax(pc, 4, tmp[0], src[0], zero); } @@ -823,6 +922,26 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit_mov(pc, dst[2], zero); set_pred(pc, 3, 0, pc->p->exec_tail); } + + if (mask & (1 << 1)) + assimilate_temp(pc, dst[1], tmp[0]); + else + if (mask & (1 << 2)) + free_temp(pc, tmp[0]); + + pc->allow32 = allow32; + + /* do this last, in case src[i,j] == dst[0,3] */ + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + FREE(pos128); + FREE(neg128); + FREE(zero); + FREE(one); } static void @@ -853,6 +972,8 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) e->inst[1] = 0xc4014788; set_src_0(pc, src, e); set_pred_wr(pc, 1, r_pred, e); + if (src->neg) + e->inst[1] |= 0x20000000; emit(pc, e); /* This is probably KILP */ @@ -863,6 +984,180 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) emit(pc, e); } +static void +emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src, unsigned unit, unsigned type, boolean proj) +{ + struct nv50_reg *temp, *t[4]; + struct nv50_program_exec *e; + + unsigned c, mode, dim; + + switch (type) { + case TGSI_TEXTURE_1D: + dim = 1; + break; + case TGSI_TEXTURE_UNKNOWN: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */ + case TGSI_TEXTURE_RECT: + dim = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: /* XXX */ + dim = 3; + break; + default: + assert(0); + break; + } + + alloc_temp4(pc, t, 0); + + if (proj) { + if (src[0]->type == P_TEMP && src[0]->rhw != -1) { + mode = pc->interp_mode[src[0]->index]; + + t[3]->rhw = src[3]->rhw; + emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); + emit_flop(pc, 0, t[3], t[3]); + + for (c = 0; c < dim; c++) { + t[c]->rhw = src[c]->rhw; + emit_interp(pc, t[c], t[3], + (mode | INTERP_PERSPECTIVE)); + } + } else { + emit_flop(pc, 0, t[3], src[3]); + for (c = 0; c < dim; c++) + emit_mul(pc, t[c], src[c], t[3]); + + /* XXX: for some reason the blob sometimes uses MAD: + * emit_mad(pc, t[c], src[0][c], t[3], t[3]) + * pc->p->exec_tail->inst[1] |= 0x080fc000; + */ + } + } else { + if (type == TGSI_TEXTURE_CUBE) { + temp = temp_temp(pc); + emit_minmax(pc, 4, temp, src[0], src[1]); + emit_minmax(pc, 4, temp, temp, src[2]); + emit_flop(pc, 0, temp, temp); + for (c = 0; c < 3; c++) + emit_mul(pc, t[c], src[c], temp); + } else { + for (c = 0; c < dim; c++) + emit_mov(pc, t[c], src[c]); + } + } + + e = exec(pc); + set_long(pc, e); + e->inst[0] |= 0xf0000000; + e->inst[1] |= 0x00000004; + set_dst(pc, t[0], e); + e->inst[0] |= (unit << 9); + + if (dim == 2) + e->inst[0] |= 0x00400000; + else + if (dim == 3) + e->inst[0] |= 0x00800000; + + e->inst[0] |= (mask & 0x3) << 25; + e->inst[1] |= (mask & 0xc) << 12; + + emit(pc, e); + +#if 1 + if (mask & 1) emit_mov(pc, dst[0], t[0]); + if (mask & 2) emit_mov(pc, dst[1], t[1]); + if (mask & 4) emit_mov(pc, dst[2], t[2]); + if (mask & 8) emit_mov(pc, dst[3], t[3]); + + free_temp4(pc, t); +#else + /* XXX: if p.e. MUL is used directly after TEX, it would still use + * the texture coordinates, not the fetched values: latency ? */ + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + assimilate_temp(pc, dst[c], t[c]); + else + free_temp(pc, t[c]); + } +#endif +} + +static void +convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + unsigned q = 0, m = ~0; + + assert(!is_long(e)); + + switch (e->inst[0] >> 28) { + case 0x1: + /* MOV */ + q = 0x0403c000; + m = 0xffff7fff; + break; + case 0x8: + /* INTERP */ + m = ~0x02000000; + if (e->inst[0] & 0x02000000) + q = 0x00020000; + break; + case 0x9: + /* RCP */ + break; + case 0xB: + /* ADD */ + m = ~(127 << 16); + q = ((e->inst[0] & (~m)) >> 2); + break; + case 0xC: + /* MUL */ + m = ~0x00008000; + q = ((e->inst[0] & (~m)) << 12); + break; + case 0xE: + /* MAD (if src2 == dst) */ + q = ((e->inst[0] & 0x1fc) << 12); + break; + default: + assert(0); + break; + } + + set_long(pc, e); + pc->p->exec_size++; + + e->inst[0] &= m; + e->inst[1] |= q; +} + +static boolean +negate_supported(const struct tgsi_full_instruction *insn, int i) +{ + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MAD: + return TRUE; + case TGSI_OPCODE_POW: + return (i == 1) ? TRUE : FALSE; + default: + return FALSE; + } +} + static struct nv50_reg * tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { @@ -881,11 +1176,14 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) } static struct nv50_reg * -tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, + boolean neg) { struct nv50_reg *r = NULL; struct nv50_reg *temp; - unsigned c; + unsigned sgn, c; + + sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); c = tgsi_util_get_full_src_register_extswizzle(src, chan); switch (c) { @@ -915,16 +1213,17 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) break; case TGSI_EXTSWIZZLE_ZERO: r = alloc_immd(pc, 0.0); - break; + return r; case TGSI_EXTSWIZZLE_ONE: - r = alloc_immd(pc, 1.0); - break; + if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET) + return alloc_immd(pc, -1.0); + return alloc_immd(pc, 1.0); default: assert(0); break; } - switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + switch (sgn) { case TGSI_UTIL_SIGN_KEEP: break; case TGSI_UTIL_SIGN_CLEAR: @@ -933,14 +1232,21 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) r = temp; break; case TGSI_UTIL_SIGN_TOGGLE: - temp = temp_temp(pc); - emit_neg(pc, temp, r); - r = temp; + if (neg) + r->neg = 1; + else { + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + } break; case TGSI_UTIL_SIGN_SET: temp = temp_temp(pc); emit_abs(pc, temp, r); - emit_neg(pc, temp, r); + if (neg) + temp->neg = 1; + else + emit_neg(pc, temp, temp); r = temp; break; default: @@ -951,12 +1257,40 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) return r; } +/* returns TRUE if instruction can overwrite sources before they're read */ +static boolean +direct2dest_op(const struct tgsi_full_instruction *insn) +{ + if (insn->Instruction.Saturate) + return FALSE; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + return FALSE; + default: + return TRUE; + } +} + static boolean nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; unsigned mask, sat, unit; + boolean assimilate = FALSE; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; @@ -967,6 +1301,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); else dst[c] = NULL; + rdst[c] = NULL; + src[0][c] = NULL; + src[1][c] = NULL; + src[2][c] = NULL; } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -976,7 +1314,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) unit = fs->SrcRegister.Index; for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, fs); + src[i][c] = tgsi_src(pc, c, fs, + negate_supported(inst, i)); } if (sat) { @@ -984,6 +1323,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) rdst[c] = dst[c]; dst[c] = temp_temp(pc); } + } else + if (direct2dest_op(inst)) { + for (c = 0; c < 4; c++) { + if (!dst[c] || dst[c]->type != P_TEMP) + continue; + + for (i = c + 1; i < 4; i++) { + if (dst[c] == src[0][i] || + dst[c] == src[1][i] || + dst[c] == src[2][i]) + break; + } + if (i == 4) + continue; + + assimilate = TRUE; + rdst[c] = dst[c]; + dst[c] = alloc_temp(pc, NULL); + } } switch (inst->Instruction.Opcode) { @@ -1002,7 +1360,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_COS: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 5, temp, temp); for (c = 0; c < 4; c++) { @@ -1012,7 +1370,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_DP3: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); @@ -1021,10 +1379,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; emit_mov(pc, dst[c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_DP4: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); @@ -1034,10 +1391,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; emit_mov(pc, dst[c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_DPH: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); emit_mad(pc, temp, src[0][2], src[1][2], temp); @@ -1047,7 +1403,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; emit_mov(pc, dst[c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_DST: { @@ -1064,7 +1419,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_EX2: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_preex2(pc, temp, src[0][0]); emit_flop(pc, 6, temp, temp); for (c = 0; c < 4; c++) { @@ -1072,7 +1427,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) continue; emit_mov(pc, dst[c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -1082,26 +1436,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_FRC: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; emit_flr(pc, temp, src[0][c]); emit_sub(pc, dst[c], src[0][c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); emit_kil(pc, src[0][1]); emit_kil(pc, src[0][2]); emit_kil(pc, src[0][3]); + pc->p->cfg.fp.regs[2] |= 0x00100000; break; case TGSI_OPCODE_LIT: emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_flop(pc, 3, temp, src[0][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -1110,15 +1464,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_LRP: + temp = temp_temp(pc); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - /*XXX: we can do better than this */ - temp = alloc_temp(pc, NULL); - emit_neg(pc, temp, src[0][c]); - emit_mad(pc, temp, temp, src[2][c], src[2][c]); - emit_mad(pc, dst[c], src[0][c], src[1][c], temp); - free_temp(pc, temp); + emit_sub(pc, temp, src[1][c], src[2][c]); + emit_mad(pc, dst[c], temp, src[0][c], src[2][c]); } break; case TGSI_OPCODE_MAD: @@ -1157,36 +1508,39 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_POW: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_pow(pc, temp, src[0][0], src[1][0]); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; emit_mov(pc, dst[c], temp); } - free_temp(pc, temp); break; case TGSI_OPCODE_RCP: - for (c = 0; c < 4; c++) { + for (c = 3; c >= 0; c--) { if (!(mask & (1 << c))) continue; emit_flop(pc, 0, dst[c], src[0][0]); } break; case TGSI_OPCODE_RSQ: - for (c = 0; c < 4; c++) { + for (c = 3; c >= 0; c--) { if (!(mask & (1 << c))) continue; emit_flop(pc, 2, dst[c], src[0][0]); } break; case TGSI_OPCODE_SCS: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) emit_flop(pc, 5, dst[0], temp); if (mask & (1 << 1)) emit_flop(pc, 4, dst[1], temp); + if (mask & (1 << 2)) + emit_mov_immdval(pc, dst[2], 0.0); + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_SGE: for (c = 0; c < 4; c++) { @@ -1196,7 +1550,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_SIN: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 4, temp, temp); for (c = 0; c < 4; c++) { @@ -1220,33 +1574,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: + emit_tex(pc, dst, mask, src[0], unit, + inst->InstructionExtTexture.Texture, FALSE); + break; case TGSI_OPCODE_TXP: - { - struct nv50_reg *t[4]; - struct nv50_program_exec *e; - - alloc_temp4(pc, t, 0); - emit_mov(pc, t[0], src[0][0]); - emit_mov(pc, t[1], src[0][1]); - - e = exec(pc); - e->inst[0] = 0xf6400000; - e->inst[0] |= (unit << 9); - set_long(pc, e); - e->inst[1] |= 0x0000c004; - set_dst(pc, t[0], e); - emit(pc, e); - - if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); - if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); - if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); - if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); - - free_temp4(pc, t); - } + emit_tex(pc, dst, mask, src[0], unit, + inst->InstructionExtTexture.Texture, TRUE); break; case TGSI_OPCODE_XPD: - temp = alloc_temp(pc, NULL); + temp = temp_temp(pc); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -1259,7 +1595,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_mul(pc, temp, src[0][1], src[1][0]); emit_msb(pc, dst[2], src[0][0], src[1][1], temp); } - free_temp(pc, temp); + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: break; @@ -1270,21 +1607,26 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (sat) { for (c = 0; c < 4; c++) { - struct nv50_program_exec *e; - if (!(mask & (1 << c))) continue; - e = exec(pc); + emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, + CVT_F32_F32); + } + } else if (assimilate) { + for (c = 0; c < 4; c++) + if (rdst[c]) + assimilate_temp(pc, rdst[c], dst[c]); + } - e->inst[0] = 0xa0000000; /* cvt */ - set_long(pc, e); - e->inst[1] |= (6 << 29); /* cvt */ - e->inst[1] |= 0x04000000; /* 32 bit */ - e->inst[1] |= (1 << 14); /* src .f32 */ - e->inst[1] |= ((1 << 5) << 14); /* .sat */ - set_dst(pc, rdst[c], e); - set_src_0(pc, dst[c], e); - emit(pc, e); + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + for (c = 0; c < 4; c++) { + if (!src[i][c]) + continue; + if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) + FREE(src[i][c]); + else + if (src[i][c]->acc == pc->insn_cur) + release_hw(pc, src[i][c]); } } @@ -1292,12 +1634,169 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) return TRUE; } +/* Adjust a bitmask that indicates what components of a source are used, + * we use this in tx_prep so we only load interpolants that are needed. + */ +static void +insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) +{ + const struct tgsi_instruction_ext_texture *tex; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DP3: + *mask = 0x7; + break; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + *mask = 0xF; + break; + case TGSI_OPCODE_LIT: + *mask = 0xB; + break; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + *mask = 0x1; + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + assert(insn->Instruction.Extended); + tex = &insn->InstructionExtTexture; + + *mask = 0x7; + if (tex->Texture == TGSI_TEXTURE_1D) + *mask = 0x1; + else + if (tex->Texture == TGSI_TEXTURE_2D) + *mask = 0x3; + + if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) + *mask |= 0x8; + break; + default: + break; + } +} + +static void +prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, + unsigned *r_usage[2]) +{ + const struct tgsi_full_instruction *insn; + const struct tgsi_full_src_register *src; + const struct tgsi_dst_register *dst; + + unsigned i, c, k, n, mask, *acc_p; + + insn = &tok->FullInstruction; + dst = &insn->FullDstRegisters[0].DstRegister; + mask = dst->WriteMask; + + if (!r_usage[0]) + r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); + if (!r_usage[1]) + r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); + + if (dst->File == TGSI_FILE_TEMPORARY) { + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + r_usage[0][dst->Index * 4 + c] = pc->insn_nr; + } + } + + for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { + src = &insn->FullSrcRegisters[i]; + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + acc_p = r_usage[0]; + break; + case TGSI_FILE_INPUT: + acc_p = r_usage[1]; + break; + default: + continue; + } + + insn_adjust_mask(insn, &mask); + + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + + k = tgsi_util_get_full_src_register_extswizzle(src, c); + switch (k) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + n = src->SrcRegister.Index * 4 + k; + acc_p[n] = pc->insn_nr; + break; + default: + break; + } + } + } +} + +static unsigned +load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, + int *aid, int *p_oid) +{ + struct nv50_reg *iv; + int oid, c, n; + unsigned mask = 0; + + iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; + + for (c = 0, n = i * 4; c < 4; c++, n++) { + oid = (*p_oid)++; + pc->attr[n].type = P_TEMP; + pc->attr[n].index = i; + + if (pc->attr[n].acc == acc[n]) + continue; + mask |= (1 << c); + + pc->attr[n].acc = acc[n]; + pc->attr[n].rhw = pc->attr[n].hw = -1; + alloc_reg(pc, &pc->attr[n]); + + pc->attr[n].rhw = (*aid)++; + emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); + + pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); + (*mid)++; + pc->p->cfg.fp.regs[1] += 0x00010001; + } + + return mask; +} + static boolean nv50_program_tx_prep(struct nv50_pc *pc) { struct tgsi_parse_context p; boolean ret = FALSE; unsigned i, c; + unsigned fcol, bcol, fcrd, depr; + + /* count (centroid) perspective interpolations */ + unsigned centroid_loads = 0; + unsigned perspect_loads = 0; + + /* track register access for temps and attrs */ + unsigned *r_usage[2]; + r_usage[0] = NULL; + r_usage[1] = NULL; + + depr = fcol = bcol = fcrd = 0xffff; + + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + pc->p->cfg.fp.regs[0] = 0x01000404; + pc->p->cfg.fp.regs[1] = 0x00000400; + } tgsi_parse_init(&p, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { @@ -1319,9 +1818,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *d; - unsigned last; + unsigned last, first, mode; d = &p.FullToken.FullDeclaration; + first = d->DeclarationRange.First; last = d->DeclarationRange.Last; switch (d->Declaration.File) { @@ -1332,10 +1832,69 @@ nv50_program_tx_prep(struct nv50_pc *pc) case TGSI_FILE_OUTPUT: if (pc->result_nr < (last + 1)) pc->result_nr = last + 1; + + if (!d->Declaration.Semantic) + break; + + switch (d->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + depr = first; + pc->p->cfg.fp.regs[2] |= 0x00000100; + pc->p->cfg.fp.regs[3] |= 0x00000011; + break; + default: + break; + } + break; case TGSI_FILE_INPUT: + { if (pc->attr_nr < (last + 1)) pc->attr_nr = last + 1; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) + break; + + switch (d->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + mode = INTERP_FLAT; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + mode = INTERP_PERSPECTIVE; + break; + default: + mode = INTERP_LINEAR; + break; + } + + if (d->Declaration.Semantic) { + switch (d->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fcrd = first; + break; + case TGSI_SEMANTIC_COLOR: + fcol = first; + mode = INTERP_PERSPECTIVE; + break; + case TGSI_SEMANTIC_BCOLOR: + bcol = first; + mode = INTERP_PERSPECTIVE; + break; + } + } + + if (d->Declaration.Centroid) { + mode |= INTERP_CENTROID; + if (mode & INTERP_PERSPECTIVE) + centroid_loads++; + } else + if (mode & INTERP_PERSPECTIVE) + perspect_loads++; + + assert(last < 32); + for (i = first; i <= last; i++) + pc->interp_mode[i] = mode; + } break; case TGSI_FILE_CONSTANT: if (pc->param_nr < (last + 1)) @@ -1351,6 +1910,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) } break; case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_nr++; + prep_inspect_insn(pc, tok, r_usage); break; default: break; @@ -1366,56 +1927,95 @@ nv50_program_tx_prep(struct nv50_pc *pc) for (c = 0; c < 4; c++) { pc->temp[i*4+c].type = P_TEMP; pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].rhw = -1; pc->temp[i*4+c].index = i; + pc->temp[i*4+c].acc = r_usage[0][i*4+c]; } } } if (pc->attr_nr) { - struct nv50_reg *iv = NULL; - int aid = 0; + int oid = 4, mid = 4, aid = 0; + /* oid = VP output id + * aid = FP attribute/interpolant id + * mid = VP output mapping field ID + */ pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); if (!pc->attr) goto out_err; if (pc->p->type == PIPE_SHADER_FRAGMENT) { - iv = alloc_temp(pc, NULL); - emit_interp(pc, iv, iv, NULL); - emit_flop(pc, 0, iv, iv); - aid++; - } + /* position should be loaded first */ + if (fcrd != 0xffff) { + unsigned mask; + mid = 0; + mask = load_fp_attrib(pc, fcrd, r_usage[1], + &mid, &aid, &oid); + oid = 0; + pc->p->cfg.fp.regs[1] |= (mask << 24); + pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; + } + pc->p->cfg.fp.map[0] += 0x03020100; - for (i = 0; i < pc->attr_nr; i++) { - struct nv50_reg *a = &pc->attr[i*4]; + /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ - for (c = 0; c < 4; c++) { - if (pc->p->type == PIPE_SHADER_FRAGMENT) { - struct nv50_reg *at = - alloc_temp(pc, NULL); - pc->attr[i*4+c].type = at->type; - pc->attr[i*4+c].hw = at->hw; - pc->attr[i*4+c].index = at->index; + if (perspect_loads) { + pc->iv_p = alloc_temp(pc, NULL); + + if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { + pc->p->cfg.fp.regs[1] |= 0x08000000; + pc->iv_p->rhw = aid++; + emit_interp(pc, pc->iv_p, NULL, + INTERP_LINEAR); + emit_flop(pc, 0, pc->iv_p, pc->iv_p); } else { - pc->p->cfg.vp.attr[aid/32] |= - (1 << (aid % 32)); - pc->attr[i*4+c].type = P_ATTR; - pc->attr[i*4+c].hw = aid++; - pc->attr[i*4+c].index = i; + pc->iv_p->rhw = aid - 1; + emit_flop(pc, 0, pc->iv_p, + &pc->attr[fcrd * 4 + 3]); } } - if (pc->p->type != PIPE_SHADER_FRAGMENT) - continue; + if (centroid_loads) { + pc->iv_c = alloc_temp(pc, NULL); + pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; + emit_interp(pc, pc->iv_c, NULL, + INTERP_CENTROID); + emit_flop(pc, 0, pc->iv_c, pc->iv_c); + pc->p->cfg.fp.regs[1] |= 0x08000000; + } - emit_interp(pc, &a[0], &a[0], iv); - emit_interp(pc, &a[1], &a[1], iv); - emit_interp(pc, &a[2], &a[2], iv); - emit_interp(pc, &a[3], &a[3], iv); - } + for (c = 0; c < 4; c++) { + /* I don't know what these values do, but + * let's set them like the blob does: + */ + if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) + pc->p->cfg.fp.regs[0] += 0x00010000; + if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) + pc->p->cfg.fp.regs[0] += 0x00010000; + } + + for (i = 0; i < pc->attr_nr; i++) + load_fp_attrib(pc, i, r_usage[1], + &mid, &aid, &oid); - if (iv) - free_temp(pc, iv); + if (pc->iv_p) + free_temp(pc, pc->iv_p); + if (pc->iv_c) + free_temp(pc, pc->iv_c); + + pc->p->cfg.fp.high_map = (mid / 4); + pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); + } else { + /* vertex program */ + for (i = 0; i < pc->attr_nr * 4; i++) { + pc->p->cfg.vp.attr[aid / 32] |= + (1 << (aid % 32)); + pc->attr[i].type = P_ATTR; + pc->attr[i].hw = aid++; + pc->attr[i].index = i / 4; + } + } } if (pc->result_nr) { @@ -1430,12 +2030,20 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_FRAGMENT) { pc->result[i*4+c].type = P_TEMP; pc->result[i*4+c].hw = -1; + pc->result[i*4+c].rhw = (i == depr) ? + -1 : rid++; } else { pc->result[i*4+c].type = P_RESULT; pc->result[i*4+c].hw = rid++; } pc->result[i*4+c].index = i; } + + if (pc->p->type == PIPE_SHADER_FRAGMENT && + depr != 0xffff) { + pc->result[depr * 4 + 2].rhw = + (pc->result_nr - 1) * 4; + } } } @@ -1456,7 +2064,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) } if (pc->immd_nr) { - int rid = pc->param_nr * 4; + int rid = 0; pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); if (!pc->immd) @@ -1473,15 +2081,38 @@ nv50_program_tx_prep(struct nv50_pc *pc) ret = TRUE; out_err: + if (r_usage[0]) + FREE(r_usage[0]); + if (r_usage[1]) + FREE(r_usage[1]); + tgsi_parse_free(&p); return ret; } +static void +free_nv50_pc(struct nv50_pc *pc) +{ + if (pc->immd) + FREE(pc->immd); + if (pc->param) + FREE(pc->param); + if (pc->result) + FREE(pc->result); + if (pc->attr) + FREE(pc->attr); + if (pc->temp) + FREE(pc->temp); + + FREE(pc); +} + static boolean nv50_program_tx(struct nv50_program *p) { struct tgsi_parse_context parse; struct nv50_pc *pc; + unsigned k; boolean ret; pc = CALLOC_STRUCT(nv50_pc); @@ -1498,10 +2129,16 @@ nv50_program_tx(struct nv50_program *p) while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; + /* don't allow half insn/immd on first and last instruction */ + pc->allow32 = TRUE; + if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) + pc->allow32 = FALSE; + tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + ++pc->insn_cur; ret = nv50_program_tx_insn(pc, tok); if (ret == FALSE) goto out_err; @@ -1515,8 +2152,40 @@ nv50_program_tx(struct nv50_program *p) struct nv50_reg out; out.type = P_TEMP; - for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) - emit_mov(pc, &out, &pc->result[out.hw]); + for (k = 0; k < pc->result_nr * 4; k++) { + if (pc->result[k].rhw == -1) + continue; + if (pc->result[k].hw != pc->result[k].rhw) { + out.hw = pc->result[k].rhw; + emit_mov(pc, &out, &pc->result[k]); + } + if (pc->p->cfg.high_result < (pc->result[k].rhw + 1)) + pc->p->cfg.high_result = pc->result[k].rhw + 1; + } + } + + /* look for single half instructions and make them long */ + struct nv50_program_exec *e, *e_prev; + + for (k = 0, e = pc->p->exec_head, e_prev = NULL; e; e = e->next) { + if (!is_long(e)) + k++; + + if (!e->next || is_long(e->next)) { + if (k & 1) + convert_to_long(pc, e); + k = 0; + } + + if (e->next) + e_prev = e; + } + + if (!is_long(pc->p->exec_tail)) { + /* this may occur if moving FP results */ + assert(e_prev && !is_long(e_prev)); + convert_to_long(pc, e_prev); + convert_to_long(pc, pc->p->exec_tail); } assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); @@ -1530,6 +2199,7 @@ out_err: tgsi_parse_free(&parse); out_cleanup: + free_nv50_pc(pc); return ret; } @@ -1543,7 +2213,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) static void nv50_program_upload_data(struct nv50_context *nv50, float *map, - unsigned start, unsigned count) + unsigned start, unsigned count, unsigned cbuf) { struct nouveau_channel *chan = nv50->screen->nvws->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -1552,7 +2222,7 @@ nv50_program_upload_data(struct nv50_context *nv50, float *map, unsigned nr = count > 2047 ? 2047 : count; BEGIN_RING(chan, tesla, 0x00000f00, 1); - OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8)); + OUT_RING (chan, (cbuf << 0) | (start << 8)); BEGIN_RING(chan, tesla, 0x40000f04, nr); OUT_RINGp (chan, map, nr); @@ -1567,35 +2237,50 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_winsys *nvws = nv50->screen->nvws; struct pipe_winsys *ws = nv50->pipe.winsys; - unsigned nr = p->param_nr + p->immd_nr; - if (!p->data && nr) { - struct nouveau_resource *heap = nv50->screen->vp_data_heap; + if (!p->data[0] && p->immd_nr) { + struct nouveau_resource *heap = nv50->screen->immd_heap[0]; - if (nvws->res_alloc(heap, nr, p, &p->data)) { - while (heap->next && heap->size < nr) { + if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0])) { + while (heap->next && heap->size < p->immd_nr) { struct nv50_program *evict = heap->next->priv; - nvws->res_free(&evict->data); + nvws->res_free(&evict->data[0]); } - if (nvws->res_alloc(heap, nr, p, &p->data)) + if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0])) + assert(0); + } + + /* immediates only need to be uploaded again when freed */ + nv50_program_upload_data(nv50, p->immd, p->data[0]->start, + p->immd_nr, NV50_CB_PMISC); + } + + if (!p->data[1] && p->param_nr) { + struct nouveau_resource *heap = + nv50->screen->parm_heap[p->type]; + + if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1])) { + while (heap->next && heap->size < p->param_nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data[1]); + } + + if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1])) assert(0); } } if (p->param_nr) { + unsigned cbuf = NV50_CB_PVP; float *map = ws->buffer_map(ws, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - nv50_program_upload_data(nv50, map, p->data->start, - p->param_nr); + if (p->type == PIPE_SHADER_FRAGMENT) + cbuf = NV50_CB_PFP; + nv50_program_upload_data(nv50, map, p->data[1]->start, + p->param_nr, cbuf); ws->buffer_unmap(ws, nv50->constbuf[p->type]); } - - if (p->immd_nr) { - nv50_program_upload_data(nv50, p->immd, - p->data->start + p->param_nr, - p->immd_nr); - } } static void @@ -1615,20 +2300,27 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) upload = TRUE; } - if (p->data && p->data->start != p->data_start) { + if ((p->data[0] && p->data[0]->start != p->data_start[0]) || + (p->data[1] && p->data[1]->start != p->data_start[1])) { for (e = p->exec_head; e; e = e->next) { - unsigned ei, ci; + unsigned ei, ci, bs; if (e->param.index < 0) continue; + bs = (e->inst[1] >> 22) & 0x07; + assert(bs < 2); ei = e->param.shift >> 5; - ci = e->param.index + p->data->start; + ci = e->param.index + p->data[bs]->start; e->inst[ei] &= ~e->param.mask; e->inst[ei] |= (ci << e->param.shift); } - p->data_start = p->data->start; + if (p->data[0]) + p->data_start[0] = p->data[0]->start; + if (p->data[1]) + p->data_start[1] = p->data[1]->start; + upload = TRUE; } @@ -1637,13 +2329,11 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) #ifdef NV50_PROGRAM_DUMP NOUVEAU_ERR("-------\n"); - up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { NOUVEAU_ERR("0x%08x\n", e->inst[0]); if (is_long(e)) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } - #endif up = ptr = MALLOC(p->exec_size * 4); @@ -1728,6 +2418,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; + unsigned i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -1745,17 +2436,22 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, 0x1904, 4); - so_data (so, 0x00040404); /* p: 0x01000404 */ + so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ so_data (so, 0x00000004); so_data (so, 0x00000000); so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ - so_data (so, 0x03020100); - so_data (so, 0x07060504); - so_data (so, 0x0b0a0908); + so_method(so, tesla, 0x16bc, p->cfg.fp.high_map); + for (i = 0; i < p->cfg.fp.high_map; i++) + so_data(so, p->cfg.fp.map[i]); so_method(so, tesla, 0x1988, 2); - so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ + so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1298, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x19a8, 1); + so_data (so, p->cfg.fp.regs[2]); + so_method(so, tesla, 0x196c, 1); + so_data (so, p->cfg.fp.regs[3]); so_method(so, tesla, 0x1414, 1); so_data (so, 0); /* program start offset */ so_ref(so, &nv50->state.fragprog); @@ -1779,7 +2475,8 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) if (p->buffer) pipe_buffer_reference(&p->buffer, NULL); - nv50->screen->nvws->res_free(&p->data); + nv50->screen->nvws->res_free(&p->data[0]); + nv50->screen->nvws->res_free(&p->data[1]); p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 78deed6a384..c650ecfc817 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -24,8 +24,8 @@ struct nv50_program { struct nv50_program_exec *exec_head; struct nv50_program_exec *exec_tail; unsigned exec_size; - struct nouveau_resource *data; - unsigned data_start; + struct nouveau_resource *data[2]; + unsigned data_start[2]; struct pipe_buffer *buffer; @@ -39,6 +39,11 @@ struct nv50_program { struct { unsigned attr[2]; } vp; + struct { + unsigned regs[4]; + unsigned map[5]; + unsigned high_map; + } fp; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 29805645948..a7981a36157 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -290,20 +290,62 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); - /* Shared constant buffer */ - screen->constbuf = screen->pipe.buffer_create(&screen->pipe, 0, 0, 128 * 4 * 4); - if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { - NOUVEAU_ERR("Error initialising constant buffer\n"); + /* constant buffers for immediates and VP/FP parameters */ + screen->constbuf_misc[0] = + screen->pipe.buffer_create(&screen->pipe, 0, 0, 128 * 4 * 4); + + screen->constbuf_parm[0] = + screen->pipe.buffer_create(&screen->pipe, 0, 0, 128 * 4 * 4); + + screen->constbuf_parm[1] = + screen->pipe.buffer_create(&screen->pipe, 0, 0, 128 * 4 * 4); + + if (nvws->res_init(&screen->immd_heap[0], 0, 128) || + nvws->res_init(&screen->parm_heap[0], 0, 128) || + nvws->res_init(&screen->parm_heap[1], 0, 128)) + { + NOUVEAU_ERR("Error initialising constant buffers.\n"); nv50_screen_destroy(&screen->pipe); return NULL; } + /* + // map constant buffers: + // B = buffer ID (maybe more than 1 byte) + // N = CB index used in shader instruction + // P = program type (0 = VP, 2 = GP, 3 = FP) + so_method(so, screen->tesla, 0x1694, 1); + so_data (so, 0x000BBNP1); + */ + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PMISC << 16) | 0x00000800); + so_method(so, screen->tesla, 0x1694, 1); + so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, 0x1694, 1); + so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PVP << 16) | 0x00000800); + so_method(so, screen->tesla, 0x1694, 1); + so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); + so_method(so, screen->tesla, 0x1280, 3); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + so_data (so, (NV50_CB_PFP << 16) | 0x00000800); + so_method(so, screen->tesla, 0x1694, 1); + so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); /* Texture sampler/image unit setup - we abuse the constant buffer * upload mechanism for the moment to upload data to the tex config diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index db567aaac89..31b8ef29c98 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -15,8 +15,11 @@ struct nv50_screen { struct nouveau_grobj *m2mf; struct nouveau_notifier *sync; - struct pipe_buffer *constbuf; - struct nouveau_resource *vp_data_heap; + struct pipe_buffer *constbuf_misc[1]; + struct pipe_buffer *constbuf_parm[2]; + + struct nouveau_resource *immd_heap[1]; + struct nouveau_resource *parm_heap[2]; struct pipe_buffer *tic; struct pipe_buffer *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index ba852194cdd..9d41ef55b0f 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -137,8 +137,9 @@ nv50_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { unsigned *tsc = CALLOC(8, sizeof(unsigned)); + float limit; - tsc[0] = (0x00024000 | + tsc[0] = (0x00026000 | (wrap_mode(cso->wrap_s) << 0) | (wrap_mode(cso->wrap_t) << 3) | (wrap_mode(cso->wrap_r) << 6)); @@ -202,6 +203,12 @@ nv50_sampler_state_create(struct pipe_context *pipe, tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); } + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + tsc[1] |= ((int)(limit * 256.0) & 0x1fff) << 11; + + tsc[2] |= ((int)CLAMP(cso->max_lod, 0.0, 15.0) << 20) | + ((int)CLAMP(cso->min_lod, 0.0, 15.0) << 8); + return (void *)tsc; } diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 0cc5168144d..c0f0efe158b 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -57,7 +57,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) struct nouveau_bo *bo; int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); - + bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer); if (!bo) return 1; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 223c8a3a456..775e9f30ef3 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -122,10 +122,10 @@ nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) so_data (so, 0xd0005000); so_data (so, 0x00300000); so_data (so, mt->base.width[0]); - so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, (mt->base.last_level << 28) | + (mt->base.depth[0] << 16) | mt->base.height[0]); so_data (so, 0x03000000); - so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | - NOUVEAU_BO_RD, 0, 0); + so_data (so, mt->base.last_level << 4); return 0; } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 747195b4f63..28e7edd144a 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -7,7 +7,7 @@ struct nv50_transfer { struct pipe_transfer base; struct pipe_buffer *buffer; - struct nv50_miptree_level *level; + unsigned level_offset; int level_pitch; int level_width; int level_height; @@ -17,8 +17,9 @@ struct nv50_transfer { static void nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src, - int src_pitch, int sx, int sy, int sw, int sh, - struct pipe_buffer *dst, int dst_pitch, int dx, int dy, + unsigned src_offset, int src_pitch, int sx, int sy, + int sw, int sh, struct pipe_buffer *dst, + unsigned dst_offset, int dst_pitch, int dx, int dy, int dw, int dh, int cpp, int width, int height, unsigned src_reloc, unsigned dst_reloc) { @@ -28,7 +29,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src, struct nouveau_grobj *m2mf = screen->m2mf; struct nouveau_bo *src_bo = nvws->get_bo(src); struct nouveau_bo *dst_bo = nvws->get_bo(dst); - unsigned src_offset = 0, dst_offset = 0; src_reloc |= NOUVEAU_BO_RD; dst_reloc |= NOUVEAU_BO_WR; @@ -40,7 +40,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src, OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, 0x0314, 1); OUT_RING (chan, src_pitch); - src_offset = (sy * src_pitch) + (sx * cpp); + src_offset += (sy * src_pitch) + (sx * cpp); } else { BEGIN_RING(chan, m2mf, 0x0200, 6); OUT_RING (chan, 0); @@ -56,7 +56,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src, OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, 0x0318, 1); OUT_RING (chan, dst_pitch); - dst_offset = (dy * dst_pitch) + (dx * cpp); + dst_offset += (dy * dst_pitch) + (dx * cpp); } else { BEGIN_RING(chan, m2mf, 0x021c, 6); OUT_RING (chan, 0); @@ -133,10 +133,10 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->base.stride = (w * pt->block.size); tx->base.usage = usage; - tx->level = lvl; tx->level_pitch = lvl->pitch; tx->level_width = mt->base.width[level]; tx->level_height = mt->base.height[level]; + tx->level_offset = lvl->image_offset[image]; tx->level_x = x; tx->level_y = y; tx->buffer = @@ -144,9 +144,10 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, w * tx->base.block.size * h); if (usage != PIPE_TRANSFER_WRITE) { - nv50_transfer_rect_m2mf(pscreen, mt->buffer, tx->level_pitch, - x, y, tx->level_width, tx->level_height, - tx->buffer, tx->base.stride, 0, 0, + nv50_transfer_rect_m2mf(pscreen, mt->buffer, tx->level_offset, + tx->level_pitch, x, y, tx->level_width, + tx->level_height, tx->buffer, 0, + tx->base.stride, 0, 0, tx->base.width, tx->base.height, tx->base.block.size, w, h, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, @@ -164,14 +165,15 @@ nv50_transfer_del(struct pipe_transfer *ptx) if (ptx->usage != PIPE_TRANSFER_READ) { struct pipe_screen *pscreen = ptx->texture->screen; - nv50_transfer_rect_m2mf(pscreen, tx->buffer, tx->base.stride, + nv50_transfer_rect_m2mf(pscreen, tx->buffer, 0, tx->base.stride, 0, 0, tx->base.width, tx->base.height, - mt->buffer, tx->level_pitch, - tx->level_x, tx->level_y, - tx->level_width, tx->level_height, - tx->base.block.size, tx->base.width, - tx->base.height, NOUVEAU_BO_GART, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); + mt->buffer, tx->level_offset, + tx->level_pitch, tx->level_x, + tx->level_y, tx->level_width, + tx->level_height, tx->base.block.size, + tx->base.width, tx->base.height, + NOUVEAU_BO_GART, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART); } pipe_buffer_reference(&tx->buffer, NULL); diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 9d95ad918c2..00fae8d26f3 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -34,7 +34,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->is_r500 = FALSE; caps->num_vert_fpus = 4; - /* Note: These are not ordered by PCI ID. I leave that task to GCC, * which will perform the ordering while collating jump tables. Instead, * I've tried to group them according to capabilities and age. */ @@ -150,6 +149,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 6; break; + case 0x4B48: case 0x4B49: case 0x4B4A: case 0x4B4B: @@ -349,7 +349,4 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->pci_id); break; } - - /* XXX SW TCL is broken so no forcing it off right now - caps->has_tcl = FALSE; */ } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 21eebeae600..5b2e1f05688 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -34,8 +34,6 @@ struct r300_capabilities { int family; /* The number of vertex floating-point units */ int num_vert_fpus; - /* The number of fragment pipes */ - int num_frag_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 6bdf544a05c..21c0fe2b803 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -34,10 +34,6 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); int i; - if (r300->dirty_state) { - r300_emit_dirty_state(r300); - } - for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffers[i].buffer, @@ -133,7 +129,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; - /* XXX this could be refactored now? */ r300->winsys = r300_winsys; r300->context.winsys = (struct pipe_winsys*)r300_winsys; @@ -150,8 +145,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; + /* Create a Draw. This is used for vert collation and SW TCL. */ r300->draw = draw_create(); + /* Enable our renderer. */ draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Tell Draw that we can always do non-UCP clipping. */ + draw_set_driver_clipping(r300->draw, TRUE); + /* Force Draw to never do viewport transform, since (again) we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6f62998b35d..a9dd041e083 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -63,6 +63,11 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; + /* Whether or not to enable the VTE. This is referenced at the very + * last moment during emission of VTE state, to decide whether or not + * the VTE should be used for transformation. */ + boolean enable_vte; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -136,11 +141,11 @@ struct r300_viewport_state { struct r300_constant_buffer { /* Buffer of constants */ /* XXX first number should be raised */ - float constants[8][4]; + float constants[32][4]; /* Number of user-defined constants */ - int user_count; + unsigned user_count; /* Total number of constants */ - int count; + unsigned count; }; struct r3xx_fragment_shader { @@ -153,6 +158,10 @@ struct r3xx_fragment_shader { /* Pixel stack size */ int stack_size; + + /* Are there immediates in this shader? + * If not, we can heavily optimize recompilation. */ + boolean uses_imms; }; struct r300_fragment_shader { @@ -243,6 +252,10 @@ struct r300_vertex_shader { /* Has this shader been translated yet? */ boolean translated; + /* Are there immediates in this shader? + * If not, we can heavily optimize recompilation. */ + boolean uses_imms; + /* Number of used instructions */ int instruction_count; @@ -255,6 +268,11 @@ struct r300_vertex_shader { } instructions[128]; /*< XXX magic number */ }; +static struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -264,6 +282,11 @@ struct r300_context { /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; + /* Vertex buffer for rendering. */ + struct pipe_buffer* vbo; + /* Offset into the VBO. */ + size_t vbo_offset; + /* Various CSO state objects. */ /* Blend state. */ struct r300_blend_state* blend_state; @@ -289,7 +312,7 @@ struct r300_context { /* Texture states. */ struct r300_texture* textures[8]; int texture_count; - /* Vertex buffers. */ + /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; /* Vertex information. */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 82a3942248e..2abf04d27eb 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -93,8 +93,9 @@ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d\n", \ - bo, offset); \ + debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + "domains (%d, %d, %d)\n", \ + bo, offset, rd, wd, flags); \ assert(bo); \ OUT_CS(offset); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index dd63136c9d6..ffc93eb5915 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -30,81 +30,6 @@ static void r300_dump_fs(struct r300_fragment_shader* fs) } } -static char* r500_fs_swiz[] = { - " R", - " G", - " B", - " A", - " 0", - ".5", - " 1", - " U", -}; - -static char* r500_fs_op_rgb[] = { - "MAD", - "DP3", - "DP4", - "D2A", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "SOP", - "MDH", - "MDV", -}; - -static char* r500_fs_op_alpha[] = { - "MAD", - " DP", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "EX2", - "LN2", - "RCP", - "RSQ", - "SIN", - "COS", - "MDH", - "MDV", -}; - -static char* r500_fs_mask[] = { - "NONE", - "R ", - " G ", - "RG ", - " B ", - "R B ", - " GB ", - "RGB ", - " A", - "R A", - " G A", - "RG A", - " BA", - "R BA", - " GBA", - "RGBA", -}; - -static char* r500_fs_tex[] = { - " NOP", - " LD", - "TEXKILL", - " PROJ", - "LODBIAS", - " LOD", - " DXDY", -}; - void r500_fs_dump(struct r500_fragment_shader* fs) { int i; @@ -225,12 +150,27 @@ void r500_fs_dump(struct r500_fragment_shader* fs) } } +static void r300_vs_op_dump(uint32_t op) +{ + if (op & 0x80) { + if (op & 0x1) { + debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + debug_printf(" PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + void r300_vs_dump(struct r300_vertex_shader* vs) { int i; for (i = 0; i < vs->instruction_count; i++) { - debug_printf("inst0: 0x%x\n", vs->instructions[i].inst0); + r300_vs_op_dump(vs->instructions[i].inst0); debug_printf("inst1: 0x%x\n", vs->instructions[i].inst1); debug_printf("inst2: 0x%x\n", vs->instructions[i].inst2); debug_printf("inst3: 0x%x\n", vs->instructions[i].inst3); diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h index a1f873656dc..63065940997 100644 --- a/src/gallium/drivers/r300/r300_debug.h +++ b/src/gallium/drivers/r300/r300_debug.h @@ -27,6 +27,152 @@ #include "r300_state_shader.h" #include "r300_state_tcl.h" +static char* r500_fs_swiz[] = { + " R", + " G", + " B", + " A", + " 0", + ".5", + " 1", + " U", +}; + +static char* r500_fs_op_rgb[] = { + "MAD", + "DP3", + "DP4", + "D2A", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "SOP", + "MDH", + "MDV", +}; + +static char* r500_fs_op_alpha[] = { + "MAD", + " DP", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "EX2", + "LN2", + "RCP", + "RSQ", + "SIN", + "COS", + "MDH", + "MDV", +}; + +static char* r500_fs_mask[] = { + "NONE", + "R ", + " G ", + "RG ", + " B ", + "R B ", + " GB ", + "RGB ", + " A", + "R A", + " G A", + "RG A", + " BA", + "R BA", + " GBA", + "RGBA", +}; + +static char* r500_fs_tex[] = { + " NOP", + " LD", + "TEXKILL", + " PROJ", + "LODBIAS", + " LOD", + " DXDY", +}; + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + void r500_fs_dump(struct r500_fragment_shader* fs); void r300_vs_dump(struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 01bac5f759c..caeb73a8eda 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -163,6 +163,7 @@ void r300_emit_fb_state(struct r300_context* r300, BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); pixpitch = tex->stride / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); @@ -177,7 +178,8 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; - pixpitch = (tex->stride / tex->tex.block.size); + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + pixpitch = tex->stride / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -234,7 +236,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->ip[i]); - debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); + /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */ } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); @@ -248,11 +250,11 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->inst[i]); - debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); + /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */ } - debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, - rs->inst_count); + /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, + * rs->inst_count); */ END_CS; } @@ -296,6 +298,30 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } +void r300_emit_vertex_buffer(struct r300_context* r300) +{ + CS_LOCALS(r300); + + debug_printf("r300: Preparing vertex buffer %p for render, " + "vertex size %d\n", r300->vbo, + r300->vertex_info.vinfo.size); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * OFFSET [offset into BO] + * VBPNTR [relocated BO] + */ + BEGIN_CS(7); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); + OUT_CS(1); + OUT_CS(r300->vertex_info.vinfo.size | + (r300->vertex_info.vinfo.size << 8)); + OUT_CS(r300->vbo_offset); + OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} + void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -310,22 +336,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300) OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); - for (i = 0; i < 4; i++) { - debug_printf("hwfmt%d: 0x%08x\n", i, - r300->vertex_info.vinfo.hwfmt[i]); - } + /* for (i = 0; i < 4; i++) { + * debug_printf("hwfmt%d: 0x%08x\n", i, + * r300->vertex_info.vinfo.hwfmt[i]); + * } */ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); - debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - r300->vertex_info.vap_prog_stream_cntl[i]); + /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, + * r300->vertex_info.vap_prog_stream_cntl[i]); */ } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); - debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - r300->vertex_info.vap_prog_stream_cntl_ext[i]); + /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, + * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */ } END_CS; } @@ -403,7 +429,11 @@ void r300_emit_viewport_state(struct r300_context* r300, OUT_CS_32F(viewport->zscale); OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + if (r300->rs_state->enable_vte) { + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + } else { + OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + } END_CS; } @@ -421,23 +451,66 @@ void r300_flush_textures(struct r300_context* r300) void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - int i; - int dirty_tex = 0; + struct r300_texture* tex; + int i, dirty_tex = 0; + boolean invalid = FALSE; - if (!(r300->dirty_hw)) { + if (!(r300->dirty_state)) { return; } r300_update_derived_state(r300); /* XXX check size */ - struct r300_texture* fb_tex = - (struct r300_texture*)r300->framebuffer_state.cbufs[0]; - r300->winsys->add_buffer(r300->winsys, fb_tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM); +validate: + /* Color buffers... */ + for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { + tex = (struct r300_texture*)r300->framebuffer_state.cbufs[i]->texture; + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...depth buffer... */ + if (r300->framebuffer_state.zsbuf) { + tex = (struct r300_texture*)r300->framebuffer_state.zsbuf->texture; + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...textures... */ + for (i = 0; i < r300->texture_count; i++) { + tex = r300->textures[i]; + assert(tex && tex->buffer && "texture is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...and vertex buffer. */ + if (r300->vbo) { + if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } else { + debug_printf("No VBO while emitting dirty state!\n"); + } if (r300->winsys->validate(r300->winsys)) { - /* XXX */ r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + /* Well, hell. */ + exit(1); + } + invalid = TRUE; + goto validate; } if (r300->dirty_state & R300_NEW_BLEND) { @@ -519,4 +592,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300_emit_vertex_format_state(r300); r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; } + + /* Finally, emit the VBO. */ + r300_emit_vertex_buffer(r300); + + r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 31dbc7ab853..36e14f69f78 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -62,6 +62,8 @@ void r300_emit_scissor_state(struct r300_context* r300, void r300_emit_texture(struct r300_context* r300, struct r300_texture* tex, unsigned offset); +void r300_emit_vertex_buffer(struct r300_context* r300); + void r300_emit_vertex_format_state(struct r300_context* r300); void r300_emit_vertex_shader(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 660816e1da1..920584a59e8 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3040,6 +3040,7 @@ enum { # define R500_INST_RGB_WMASK_R (1 << 11) # define R500_INST_RGB_WMASK_G (1 << 12) # define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_RGB_WMASK_RGB (7 << 11) # define R500_INST_ALPHA_WMASK (1 << 14) # define R500_INST_RGB_OMASK_R (1 << 15) # define R500_INST_RGB_OMASK_G (1 << 16) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cbd84d7c569..29b66cee7ec 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -180,27 +180,10 @@ static void prepare_render(struct r300_render* render, unsigned count) CS_LOCALS(r300); - /* Make sure that all possible state is emitted. */ - r300_emit_dirty_state(r300); + r300->vbo = render->vbo; + r300->vbo_offset = render->vbo_offset; - debug_printf("r300: Preparing vertex buffer %p for render, " - "vertex size %d, vertex count %d\n", render->vbo, - r300->vertex_info.vinfo.size, count); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [0] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info.vinfo.size | - (r300->vertex_info.vinfo.size << 8)); - OUT_CS(render->vbo_offset); - OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; + r300_emit_dirty_state(r300); } static void r300_render_draw_arrays(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index d2c5998c261..a6f1efe3563 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -87,23 +87,25 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } - return 0; case PIPE_CAP_GLSL: - /* IN THEORY */ - return 0; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } case PIPE_CAP_S3TC: /* IN THEORY */ return 0; case PIPE_CAP_ANISOTROPIC_FILTER: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_POINT_SPRITE: /* IN THEORY */ return 0; case PIPE_CAP_MAX_RENDER_TARGETS: return 4; case PIPE_CAP_OCCLUSION_QUERY: - return 1; + /* IN THEORY */ + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: /* IN THEORY */ return 0; @@ -152,17 +154,20 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) static float r300_get_paramf(struct pipe_screen* pscreen, int param) { + struct r300_screen* r300screen = r300_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: case PIPE_CAP_MAX_LINE_WIDTH_AA: - /* XXX this is the biggest thing that will fit in that register. - * Perhaps the actual rendering limits are less? */ - return 10922.0f; case PIPE_CAP_MAX_POINT_WIDTH: case PIPE_CAP_MAX_POINT_WIDTH_AA: - /* XXX this is the biggest thing that will fit in that register. - * Perhaps the actual rendering limits are less? */ - return 10922.0f; + /* The maximum dimensions of the colorbuffer are our practical + * rendering limits. 2048 pixels should be enough for anybody. */ + if (r300screen->caps->is_r500) { + return 4096.0f; + } else { + return 2048.0f; + } case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: @@ -230,9 +235,16 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, case PIPE_TEXTURE_2D: return check_tex_2d_format(format, r300_screen(pscreen)->caps->is_r500); + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + debug_printf("r300: Implementation error: Unsupported format " + "target: %d\n", target); + break; default: - debug_printf("r300: Warning: Got unknown format target: %d\n", - format); + debug_printf("r300: Fatal: This is not a format target: %d\n", + target); + assert(0); break; } @@ -337,7 +349,6 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) return NULL; caps->pci_id = r300_winsys->pci_id; - caps->num_frag_pipes = r300_winsys->gb_pipes; r300_parse_chipset(caps); diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 184a23c9e68..0461ffd681a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -62,8 +62,6 @@ static void* r300_create_blend_state(struct pipe_context* pipe, } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ - /* XXX are logicops still allowed if blending's disabled? - * Does Gallium take care of it for us? */ if (state->logicop_enable) { blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; @@ -121,7 +119,7 @@ static void r300_set_clip_state(struct pipe_context* pipe, const struct pipe_clip_state* state) { struct r300_context* r300 = r300_context(pipe); - /* XXX Draw */ + /* XXX add HW TCL clipping setup */ draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); } @@ -153,10 +151,12 @@ static void /* If the number of constants have changed, invalidate the shader. */ if (r300->shader_constants[shader].user_count != i) { - if (shader == PIPE_SHADER_FRAGMENT && r300->fs) { + if (shader == PIPE_SHADER_FRAGMENT && r300->fs && + r300->fs->uses_imms) { r300->fs->translated = FALSE; r300_translate_fragment_shader(r300, r300->fs); - } else if (shader == PIPE_SHADER_VERTEX && r300->vs) { + } else if (shader == PIPE_SHADER_VERTEX && r300->vs && + r300->vs->uses_imms) { r300->vs->translated = FALSE; r300_translate_vertex_shader(r300, r300->vs); } @@ -257,6 +257,7 @@ static void r300_set_edgeflags(struct pipe_context* pipe, const unsigned* bitfield) { /* XXX you know it's bad when i915 has this blank too */ + /* XXX and even worse, I have no idea WTF the bitfield is */ } static void @@ -289,6 +290,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe, /* Copy state directly into shader. */ fs->state = *shader; + fs->state.tokens = tgsi_dup_tokens(shader->tokens); tgsi_scan_shader(shader->tokens, &fs->info); @@ -317,13 +319,15 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) /* Delete fragment shader state. */ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { + struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader; + FREE(fs->state.tokens); FREE(shader); } static void r300_set_polygon_stipple(struct pipe_context* pipe, const struct pipe_poly_stipple* state) { - /* XXX */ + /* XXX no idea how to set this up, but not terribly important */ } /* Create a new rasterizer state based on the CSO rasterizer state. @@ -341,6 +345,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; + rs->enable_vte = !state->bypass_vs_clip_and_viewport; + /* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL. * Else, enable HW TCL and force Draw's TCL off. */ if (state->bypass_vs_clip_and_viewport || @@ -555,40 +561,35 @@ static void r300_set_viewport_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - - if (r300_screen(r300->context.screen)->caps->has_tcl) { - /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + /* Do the transform in HW. */ + r300->viewport_state->vte_control = R300_VTX_W0_FMT; - if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; - } - if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; - } - if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; - } - if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; - } - if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; - } - if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; - } - } else { - r300->viewport_state->vte_control = 0; - /* Have Draw do the actual transform. */ - draw_set_viewport_state(r300->draw, state); + if (state->scale[0] != 1.0f) { + assert(state->scale[0] != 0.0f); + r300->viewport_state->xscale = state->scale[0]; + r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + } + if (state->scale[1] != 1.0f) { + assert(state->scale[1] != 0.0f); + r300->viewport_state->yscale = state->scale[1]; + r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + } + if (state->scale[2] != 1.0f) { + assert(state->scale[2] != 0.0f); + r300->viewport_state->zscale = state->scale[2]; + r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + } + if (state->translate[0] != 0.0f) { + r300->viewport_state->xoffset = state->translate[0]; + r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + } + if (state->translate[1] != 0.0f) { + r300->viewport_state->yoffset = state->translate[1]; + r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + } + if (state->translate[2] != 0.0f) { + r300->viewport_state->zoffset = state->translate[2]; + r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; } r300->dirty_state |= R300_NEW_VIEWPORT; @@ -628,6 +629,7 @@ static void* r300_create_vs_state(struct pipe_context* pipe, struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); /* Copy state directly into shader. */ vs->state = *shader; + vs->state.tokens = tgsi_dup_tokens(shader->tokens); tgsi_scan_shader(shader->tokens, &vs->info); @@ -673,6 +675,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; draw_delete_vertex_shader(r300->draw, vs->draw); + FREE(vs->state.tokens); FREE(shader); } else { draw_delete_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c4c9784a00c..7ae339cf972 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -64,6 +64,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, break; case TGSI_SEMANTIC_FOG: fog = TRUE; + /* Fall through */ case TGSI_SEMANTIC_GENERIC: texs++; break; @@ -103,6 +104,9 @@ static void r300_vs_tab_routes(struct r300_context* r300, } } + /* XXX magic */ + assert(texs <= 8); + /* Do the actual vertex_info setup. * * vertex_info has four uints of hardware-specific data in it. @@ -140,17 +144,21 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); } - for (i = 0; i < texs; i++) { + /* Init i right here, increment it if fog is enabled. + * This gets around a double-increment problem. */ + i = 0; + + if (fog) { + i++; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } - if (fog) { - i++; + for (i; i < texs; i++) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } @@ -162,26 +170,40 @@ static void r300_vs_tab_routes(struct r300_context* r300, static void r300_vertex_psc(struct r300_context* r300, struct r300_vertex_format* vformat) { + struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; uint32_t temp; - int i; + int i, attrib_count; - debug_printf("r300: attrib count: %d\n", vinfo->num_attribs); - for (i = 0; i < vinfo->num_attribs; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on their info, + * and not on attrib information. */ + if (r300screen->caps->has_tcl) { + attrib_count = r300->vs->info.num_inputs; + debug_printf("r300: routing %d attribs in psc for vs\n", + attrib_count); + } else { + attrib_count = vinfo->num_attribs; + debug_printf("r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + debug_printf("r300: attrib: offset %d, interp %d, size %d," + " tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + tab[i]); + } } - for (i = 0; i < vinfo->num_attribs; i++) { + for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute */ assert(tab[i] != -1); /* Add the attribute to the PSC table. */ - temp = translate_vertex_data_type(vinfo->attrib[i].emit) | - (tab[i] << R300_DST_VEC_LOC_SHIFT); + temp = r300screen->caps->has_tcl ? + R300_DATA_TYPE_FLOAT_4 : + translate_vertex_data_type(vinfo->attrib[i].emit); + temp |= tab[i] << R300_DST_VEC_LOC_SHIFT; + if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16; @@ -206,7 +228,6 @@ static void r300_vertex_psc(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_vertex_format(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_vertex_format vformat; int i; diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 91b93fc367e..22c8e199ae3 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -353,25 +353,6 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) /* Non-CSO state. (For now.) */ -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - break; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - break; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - break; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - break; - } - return 0; -} - static INLINE uint32_t translate_vertex_data_type(int type) { switch (type) { case EMIT_1F: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 8bd9b41bd74..9dde662802b 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,36 +34,19 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(30 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(26 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ OUT_CS_REG(R300_GB_ENABLE, 0x0); /* Subpixel multisampling for AA */ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); - /* GB tile config and pipe setup */ - OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_DISABLE | - r300_translate_gb_pipes(caps->num_frag_pipes)); + OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); /* AA enable */ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); - /*** Geometry Assembly (GA) ***/ - /* GA errata fixes. */ - if (caps->is_r500) { - OUT_CS_REG(R300_GA_ENHANCE, - R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | - R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE | - R500_GA_ENHANCE_REG_READWRITE_ENABLE | - R500_GA_ENHANCE_REG_NOSTALL_ENABLE); - } else { - OUT_CS_REG(R300_GA_ENHANCE, - R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | - R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); - } - /*** Fog (FG) ***/ OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); @@ -86,7 +69,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(79 + (caps->has_tcl ? 7 : 0)); + BEGIN_CS(77 + (caps->has_tcl ? 7 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -113,11 +96,14 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_32F(0.0); OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); OUT_CS_32F(1.0); + /* XXX line tex stuffing */ + OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); + OUT_CS_32F(0.0); + OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); + OUT_CS_32F(1.0); OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); - OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); @@ -144,8 +130,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); /* XXX */ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c index 1b02239ee76..cc7f6a7c4b0 100644 --- a/src/gallium/drivers/r300/r300_state_shader.c +++ b/src/gallium/drivers/r300/r300_state_shader.c @@ -22,24 +22,6 @@ #include "r300_state_shader.h" -static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs) -{ - struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader; - fs->shader.stack_size = pt->shader.stack_size; - fs->alu_instruction_count = pt->alu_instruction_count; - fs->tex_instruction_count = pt->tex_instruction_count; - fs->indirections = pt->indirections; - fs->instructions[0] = pt->instructions[0]; -} - -static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs) -{ - struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader; - fs->shader.stack_size = pt->shader.stack_size; - fs->instruction_count = pt->instruction_count; - fs->instructions[0] = pt->instructions[0]; -} - static void r300_fs_declare(struct r300_fs_asm* assembler, struct tgsi_full_declaration* decl) { @@ -49,6 +31,7 @@ static void r300_fs_declare(struct r300_fs_asm* assembler, case TGSI_SEMANTIC_COLOR: assembler->color_count++; break; + case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_GENERIC: assembler->tex_count++; break; @@ -59,6 +42,12 @@ static void r300_fs_declare(struct r300_fs_asm* assembler, } break; case TGSI_FILE_OUTPUT: + /* Depth write. Mark the position of the output so we can + * identify it later. */ + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + assembler->depth_output = decl->DeclarationRange.First; + } + break; case TGSI_FILE_CONSTANT: break; case TGSI_FILE_TEMPORARY: @@ -120,6 +109,14 @@ static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, return 0; } +static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler, + struct tgsi_dst_register* dst) +{ + return (assembler->writes_depth && + (dst->File == TGSI_FILE_OUTPUT) && + (dst->Index == assembler->depth_output)); +} + static INLINE unsigned r500_fix_swiz(unsigned s) { /* For historical reasons, the swizzle values x, y, z, w, and 0 are @@ -194,11 +191,17 @@ static INLINE uint32_t r300_alpha_op(unsigned op) static INLINE uint32_t r500_rgba_op(unsigned op) { switch (op) { + case TGSI_OPCODE_COS: case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: return R500_ALU_RGBA_OP_SOP; + case TGSI_OPCODE_DDX: + return R500_ALU_RGBA_OP_MDH; + case TGSI_OPCODE_DDY: + return R500_ALU_RGBA_OP_MDV; case TGSI_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; case TGSI_OPCODE_DP3: @@ -224,6 +227,8 @@ static INLINE uint32_t r500_rgba_op(unsigned op) static INLINE uint32_t r500_alpha_op(unsigned op) { switch (op) { + case TGSI_OPCODE_COS: + return R500_ALPHA_OP_COS; case TGSI_OPCODE_EX2: return R500_ALPHA_OP_EX2; case TGSI_OPCODE_LG2: @@ -234,6 +239,12 @@ static INLINE uint32_t r500_alpha_op(unsigned op) return R500_ALPHA_OP_RSQ; case TGSI_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case TGSI_OPCODE_SIN: + return R500_ALPHA_OP_SIN; + case TGSI_OPCODE_DDX: + return R500_ALPHA_OP_MDH; + case TGSI_OPCODE_DDY: + return R500_ALPHA_OP_MDV; case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: @@ -295,38 +306,34 @@ static INLINE void r300_emit_maths(struct r300_fragment_shader* fs, } /* Setup an ALU operation. */ -static INLINE void r500_emit_alu(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_dst_register* dst) +static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_src_register* src, + struct tgsi_full_dst_register* dst, + unsigned op, + unsigned count) { int i = fs->instruction_count; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst0 = R500_INST_TYPE_OUT | - R500_ALU_OMASK(dst->DstRegister.WriteMask); + fs->instructions[i].inst0 = R500_INST_TYPE_OUT; + if (r300_fs_is_depr(assembler, dst)) { + fs->instructions[i].inst4 = R500_W_OMASK; + } else { + fs->instructions[i].inst0 |= + R500_ALU_OMASK(dst->DstRegister.WriteMask); + } } else { fs->instructions[i].inst0 = R500_INST_TYPE_ALU | - R500_ALU_WMASK(dst->DstRegister.WriteMask); + R500_ALU_WMASK(dst->DstRegister.WriteMask); } fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; - fs->instructions[i].inst4 = + fs->instructions[i].inst4 |= R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); fs->instructions[i].inst5 = R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); -} - -static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->instruction_count; - - r500_emit_alu(fs, assembler, dst); switch (count) { case 3: @@ -348,8 +355,8 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, R500_ALU_RGB_SEL_B_SRC1 | R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1])); fs->instructions[i].inst4 |= - R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) | - R500_ALPHA_SEL_B_SRC1; + R500_ALPHA_SEL_B_SRC1 | + R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])); case 1: case 0: default: @@ -361,8 +368,8 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, R500_ALU_RGB_SEL_A_SRC0 | R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0])); fs->instructions[i].inst4 |= - R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0])) | - R500_ALPHA_SEL_A_SRC0; + R500_ALPHA_SEL_A_SRC0 | + R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0])); break; } @@ -441,6 +448,9 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, * AMD/ATI names for opcodes, please, as it facilitates using the * documentation. */ switch (inst->Instruction.Opcode) { + /* XXX trig needs extra prep */ + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: /* The simple scalar ops. */ case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: @@ -452,6 +462,8 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, inst->FullSrcRegisters[0].SrcRegister.SwizzleW = inst->FullSrcRegisters[0].SrcRegister.SwizzleX; /* Fall through */ + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: case TGSI_OPCODE_FRC: r500_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); @@ -527,6 +539,60 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; + /* The compound and hybrid insts. */ + case TGSI_OPCODE_LRP: + /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */ + inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1]; + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2]; + inst->FullSrcRegisters[0].SrcRegister.Negate = + !(inst->FullSrcRegisters[0].SrcRegister.Negate); + inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; + inst->FullDstRegisters[0].DstRegister.Index = + assembler->temp_count; + inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); + inst->FullSrcRegisters[2].SrcRegister.Index = + assembler->temp_count; + inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3]; + inst->FullSrcRegisters[0].SrcRegister.Negate = + !(inst->FullSrcRegisters[0].SrcRegister.Negate); + inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); + break; + case TGSI_OPCODE_POW: + /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */ + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; + inst->FullSrcRegisters[0].SrcRegister.SwizzleW = + inst->FullSrcRegisters[0].SrcRegister.SwizzleX; + inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; + inst->FullDstRegisters[0].DstRegister.Index = + assembler->temp_count; + inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1); + inst->FullSrcRegisters[0].SrcRegister.Index = + assembler->temp_count; + inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + inst->FullSrcRegisters[2] = r500_constant_zero; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3); + inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; + r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1); + break; + /* The texture instruction set. */ case TGSI_OPCODE_KIL: case TGSI_OPCODE_TEX: @@ -555,7 +621,7 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, static void r300_fs_finalize(struct r3xx_fragment_shader* fs, struct r300_fs_asm* assembler) { - fs->stack_size = assembler->temp_count + assembler->temp_offset; + fs->stack_size = assembler->temp_count + assembler->temp_offset + 1; } static void r500_fs_finalize(struct r500_fragment_shader* fs, @@ -581,6 +647,8 @@ void r300_translate_fragment_shader(struct r300_context* r300, } /* Setup starting offset for immediates. */ assembler->imm_offset = consts->user_count; + /* Enable depth writes, if needed. */ + assembler->writes_depth = fs->info.writes_z; /* Make sure we start at the beginning of the shader. */ if (is_r500) { @@ -630,6 +698,7 @@ void r300_translate_fragment_shader(struct r300_context* r300, assembler->tex_count + assembler->color_count); consts->count = consts->user_count + assembler->imm_count; + fs->uses_imms = assembler->imm_count; debug_printf("r300: fs: %d total constants, " "%d from user and %d from immediates\n", consts->count, consts->user_count, assembler->imm_count); diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h index 185fdd90f0c..b6087404cef 100644 --- a/src/gallium/drivers/r300/r300_state_shader.h +++ b/src/gallium/drivers/r300/r300_state_shader.h @@ -57,6 +57,7 @@ #define R500_TEX_WMASK(x) ((x) << 11) #define R500_ALU_WMASK(x) ((x) << 11) #define R500_ALU_OMASK(x) ((x) << 15) +#define R500_W_OMASK (1 << 31) /* TGSI constants. TGSI is like XML: If it can't solve your problems, you're * not using enough of it. */ @@ -99,20 +100,17 @@ struct r300_fs_asm { unsigned imm_offset; /* Number of immediate constants. */ unsigned imm_count; + /* Are depth writes enabled? */ + boolean writes_depth; + /* Depth write offset. This is the TGSI output that corresponds to + * depth writes. */ + unsigned depth_output; }; void r300_translate_fragment_shader(struct r300_context* r300, struct r3xx_fragment_shader* fs); static struct r300_fragment_shader r300_passthrough_fragment_shader = { - /* XXX This is the emission code. TODO: decode - OUT_CS_REG(R300_US_CONFIG, 0); - OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); -*/ .alu_instruction_count = 1, .tex_instruction_count = 0, .indirections = 0, @@ -159,14 +157,6 @@ static struct r500_fragment_shader r500_passthrough_fragment_shader = { }; static struct r300_fragment_shader r300_texture_fragment_shader = { - /* XXX This is the emission code. TODO: decode - OUT_CS_REG(R300_US_CONFIG, 0); - OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); -*/ .alu_instruction_count = 1, .tex_instruction_count = 0, .indirections = 0, @@ -191,7 +181,7 @@ static struct r500_fragment_shader r500_texture_fragment_shader = { .instruction_count = 2, .instructions[0].inst0 = R500_INST_TYPE_TEX | R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, .instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, diff --git a/src/gallium/drivers/r300/r300_state_tcl.c b/src/gallium/drivers/r300/r300_state_tcl.c index d84912de48f..32e61bc1d72 100644 --- a/src/gallium/drivers/r300/r300_state_tcl.c +++ b/src/gallium/drivers/r300/r300_state_tcl.c @@ -71,16 +71,13 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, { switch (src->File) { case TGSI_FILE_NULL: - /* Probably a zero or one swizzle */ - return R300_PVS_SRC_REG_INPUT; - break; case TGSI_FILE_INPUT: + /* Probably a zero or one swizzle */ return R300_PVS_SRC_REG_INPUT; - break; case TGSI_FILE_TEMPORARY: return R300_PVS_SRC_REG_TEMPORARY; - break; case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: return R300_PVS_SRC_REG_CONSTANT; default: debug_printf("r300: vs: Unimplemented src type %d\n", src->File); @@ -89,16 +86,32 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, return 0; } +static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler, + struct tgsi_src_register* src) +{ + switch (src->File) { + case TGSI_FILE_NULL: + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_CONSTANT: + return src->Index; + case TGSI_FILE_IMMEDIATE: + return src->Index + assembler->imm_offset; + default: + debug_printf("r300: vs: Unimplemented src type %d\n", src->File); + break; + } + return 0; +} + static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler, struct tgsi_dst_register* dst) { switch (dst->File) { case TGSI_FILE_TEMPORARY: return R300_PVS_DST_REG_TEMPORARY; - break; case TGSI_FILE_OUTPUT: return R300_PVS_DST_REG_OUT; - break; default: debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File); break; @@ -112,10 +125,8 @@ static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler, switch (dst->File) { case TGSI_FILE_TEMPORARY: return dst->Index; - break; case TGSI_FILE_OUTPUT: return assembler->tab[dst->Index]; - break; default: debug_printf("r300: vs: Unimplemented dst %d\n", dst->File); break; @@ -135,6 +146,12 @@ static uint32_t r300_vs_op(unsigned op) case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: return R300_VE_ADD; + case TGSI_OPCODE_MAX: + return R300_VE_MAXIMUM; + case TGSI_OPCODE_SLT: + return R300_VE_SET_LESS_THAN; + case TGSI_OPCODE_RSQ: + return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX; case TGSI_OPCODE_MAD: return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD; default: @@ -158,39 +175,62 @@ static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg) } } +/* XXX icky icky icky icky */ +static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg) +{ + if (reg->SrcRegister.Extended) { + return reg->SrcRegisterExtSwz.ExtSwizzleX | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 9); + } else { + return reg->SrcRegister.SwizzleX | + (reg->SrcRegister.SwizzleX << 3) | + (reg->SrcRegister.SwizzleX << 6) | + (reg->SrcRegister.SwizzleX << 9); + } +} + +/* XXX scalar stupidity */ static void r300_vs_emit_inst(struct r300_vertex_shader* vs, struct r300_vs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, unsigned op, - unsigned count) + unsigned count, + boolean is_scalar) { int i = vs->instruction_count; vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) | R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) | R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) | - R300_PVS_DST_WE_XYZW; + R300_PVS_DST_WE(dst->DstRegister.WriteMask); switch (count) { case 3: vs->instructions[i].inst3 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[2].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[2].SrcRegister.Index) | + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[2].SrcRegister)) | R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2])); /* Fall through */ case 2: vs->instructions[i].inst2 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[1].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[1].SrcRegister.Index) | + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[1].SrcRegister)) | R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1])); /* Fall through */ case 1: vs->instructions[i].inst1 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[0].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[0].SrcRegister.Index) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[0])); + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[0].SrcRegister)) | + /* XXX the icky, it burns */ + R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0]) + : r300_vs_swiz(&src[0])); break; } vs->instruction_count++; @@ -201,11 +241,18 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs, struct tgsi_full_instruction* inst) { switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, + 1, TRUE); + break; case TGSI_OPCODE_ADD: case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_SLT: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_DP3: /* Set alpha swizzle to zero for src0 and src1 */ @@ -235,19 +282,19 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs, case TGSI_OPCODE_DP4: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: inst->FullSrcRegisters[1] = r300_constant_zero; r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_MAD: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 3); + 3, FALSE); break; case TGSI_OPCODE_END: break; @@ -278,6 +325,8 @@ static void r300_vs_init(struct r300_vertex_shader* vs, break; } } + + vs->instruction_count = 0; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -337,6 +386,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, assembler->tex_count + assembler->color_count); consts->count = consts->user_count + assembler->imm_count; + vs->uses_imms = assembler->imm_count; debug_printf("r300: vs: %d total constants, " "%d from user and %d from immediates\n", consts->count, consts->user_count, assembler->imm_count); diff --git a/src/gallium/drivers/r300/r300_state_tcl.h b/src/gallium/drivers/r300/r300_state_tcl.h index e2e1357d436..d5d425e9d6c 100644 --- a/src/gallium/drivers/r300/r300_state_tcl.h +++ b/src/gallium/drivers/r300/r300_state_tcl.h @@ -35,6 +35,10 @@ # define R300_VE_DOT_PRODUCT 1 # define R300_VE_MULTIPLY 2 # define R300_VE_ADD 3 +# define R300_VE_MAXIMUM 7 +# define R300_VE_SET_LESS_THAN 10 +#define R300_PVS_DST_MATH_INST (1 << 6) +# define R300_ME_RECIP_DX 6 #define R300_PVS_DST_MACRO_INST (1 << 7) # define R300_PVS_MACRO_OP_2CLK_MADD 0 #define R300_PVS_DST_REG_TYPE(x) ((x) << 8) diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 4dd5b8af99a..c9e2dff14ed 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -23,30 +23,36 @@ #include "r300_surface.h" -static void r300_surface_setup(struct pipe_context* pipe, - struct pipe_surface* dest, +static void r300_surface_setup(struct r300_context* r300, + struct r300_texture* dest, unsigned x, unsigned y, unsigned w, unsigned h) { - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; + unsigned pixpitch = dest->stride / dest->tex.block.size; CS_LOCALS(r300); - /* Make sure our target BO is okay. */ - r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM); - if (r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - } - r300_emit_blend_state(r300, &blend_clear_state); r300_emit_blend_color_state(r300, &blend_color_clear_state); r300_emit_dsa_state(r300, &dsa_clear_state); r300_emit_rs_state(r300, &rs_clear_state); - BEGIN_CS(15); + BEGIN_CS(24); + + /* Viewport setup */ + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F((float)w); + OUT_CS_32F((float)x); + OUT_CS_32F((float)h); + OUT_CS_32F((float)y); + OUT_CS_32F(1.0); + OUT_CS_32F(0.0); + + OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | + R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | + R300_VTX_XY_FMT | R300_VTX_Z_FMT); /* Pixel scissors. */ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); @@ -71,9 +77,9 @@ static void r300_surface_setup(struct pipe_context* pipe, /* Setup colorbuffer. */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(tex->tex.format)); + r300_translate_colorformat(dest->tex.format)); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); END_CS; @@ -93,6 +99,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; unsigned pixpitch = tex->stride / tex->tex.block.size; + boolean invalid = FALSE; CS_LOCALS(r300); a = (float)((color >> 24) & 0xff) / 255.0f; @@ -105,12 +112,29 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Fallback? */ if (FALSE) { +fallback: debug_printf("r300: Falling back on surface clear..."); util_surface_fill(pipe, dest, x, y, w, h, color); return; } - r300_surface_setup(r300, dest, x, y, w, h); + /* Make sure our target BO is okay. */ +validate: + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (r300->winsys->validate(r300->winsys)) { + r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + goto fallback; + } + invalid = TRUE; + goto validate; + } + + r300_surface_setup(r300, tex, x, y, w, h); /* Vertex shader setup */ if (caps->has_tcl) { @@ -134,7 +158,7 @@ static void r300_surface_fill(struct pipe_context* pipe, r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); } - BEGIN_CS(31); + BEGIN_CS(26); /* VAP stream control, mapping from input memory to PVS/RS memory */ if (caps->has_tcl) { @@ -161,27 +185,21 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Disable textures */ OUT_CS_REG(R300_TX_ENABLE, 0x0); - /* Viewport setup */ - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(1.0); - OUT_CS_32F((float)x); - OUT_CS_32F(1.0); - OUT_CS_32F((float)y); - OUT_CS_32F(1.0); - OUT_CS_32F(0.0); - /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, - ((h * 6) & R300_POINTSIZE_Y_MASK) | + ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); + /* Packet3 with our point vertex */ OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); /* Position */ - OUT_CS_32F(w / 2.0); - OUT_CS_32F(h / 2.0); + OUT_CS_32F(0.5); + OUT_CS_32F(0.5); OUT_CS_32F(1.0); OUT_CS_32F(1.0); /* Color */ @@ -190,11 +208,7 @@ static void r300_surface_fill(struct pipe_context* pipe, OUT_CS_32F(b); OUT_CS_32F(a); - /* XXX figure out why this is 0xA and not 0x2 */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ END_CS; @@ -213,6 +227,7 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; unsigned pixpitch = srctex->stride / srctex->tex.block.size; + boolean invalid = FALSE; CS_LOCALS(r300); debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," @@ -222,18 +237,48 @@ static void r300_surface_copy(struct pipe_context* pipe, if ((srctex == desttex) && ((destx < srcx + w) || (srcx < destx + w)) && ((desty < srcy + h) || (srcy < desty + h))) { +fallback: debug_printf("r300: Falling back on surface_copy\n"); util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); } + /* Add our target BOs to the list. */ +validate: + if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (r300->winsys->validate(r300->winsys)) { + r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + goto fallback; + } + invalid = TRUE; + goto validate; + } + + r300_surface_setup(r300, desttex, destx, desty, w, h); + + /* Setup the texture. */ r300_emit_sampler(r300, &r300_sampler_copy_state, 0); r300_emit_texture(r300, srctex, 0); - r300_flush_textures(r300); + + /* Flush and enable. */ + BEGIN_CS(4); + OUT_CS_REG(R300_TX_INVALTAGS, 0); + OUT_CS_REG(R300_TX_ENABLE, 0x1); + END_CS; /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_texture_vertex_shader); + r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); } else { BEGIN_CS(4); OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); @@ -253,6 +298,7 @@ static void r300_surface_copy(struct pipe_context* pipe, r300_emit_rs_block_state(r300, &r300_rs_block_copy_state); } + BEGIN_CS(30); /* VAP stream control, mapping from input memory to PVS/RS memory */ if (caps->has_tcl) { OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, @@ -275,33 +321,38 @@ static void r300_surface_copy(struct pipe_context* pipe, /* Two components of texture 0 */ OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4); + /* Packet3 with our texcoords */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); + OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16); OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING | (4 << R300_PRIM_NUM_VERTICES_SHIFT)); /* (x , y ) */ - OUT_CS_32F((float)destx); - OUT_CS_32F((float)desty); - OUT_CS_32F((float)srcx); - OUT_CS_32F((float)srcy); + OUT_CS_32F((float)(destx / dest->width)); + OUT_CS_32F((float)(desty / dest->height)); + OUT_CS_32F((float)(srcx / dest->width)); + OUT_CS_32F((float)(srcy / dest->height)); /* (x , y + h) */ - OUT_CS_32F((float)destx); - OUT_CS_32F((float)(desty + h)); - OUT_CS_32F((float)srcx); - OUT_CS_32F((float)(srcy + h)); + OUT_CS_32F((float)(destx / dest->width)); + OUT_CS_32F((float)((desty + h) / dest->height)); + OUT_CS_32F((float)(srcx / dest->width)); + OUT_CS_32F((float)((srcy + h) / dest->height)); /* (x + w, y + h) */ - OUT_CS_32F((float)(destx + w)); - OUT_CS_32F((float)(desty + h)); - OUT_CS_32F((float)(srcx + w)); - OUT_CS_32F((float)(srcy + h)); + OUT_CS_32F((float)((destx + w) / dest->width)); + OUT_CS_32F((float)((desty + h) / dest->height)); + OUT_CS_32F((float)((srcx + w) / dest->width)); + OUT_CS_32F((float)((srcy + h) / dest->height)); /* (x + w, y ) */ - OUT_CS_32F((float)(destx + w)); - OUT_CS_32F((float)desty); - OUT_CS_32F((float)(srcx + w)); - OUT_CS_32F((float)srcy); + OUT_CS_32F((float)((destx + w) / dest->width)); + OUT_CS_32F((float)(desty / dest->height)); + OUT_CS_32F((float)((srcx + w) / dest->width)); + OUT_CS_32F((float)(srcy / dest->height)); OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); + END_CS; + r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h index 894def07aa8..9a4c39f58bd 100644 --- a/src/gallium/drivers/r300/r300_surface.h +++ b/src/gallium/drivers/r300/r300_surface.h @@ -101,7 +101,7 @@ static struct r300_rs_block r300_rs_block_copy_state = { R500_RS_SEL_Q(R300_RS_SEL_K1), .inst[0] = R300_RS_INST_COL_CN_WRITE, .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(6), + .inst_count = R300_RS_TX_OFFSET(0), }; static struct r300_rs_block r500_rs_block_copy_state = { @@ -111,7 +111,7 @@ static struct r300_rs_block r500_rs_block_copy_state = { R500_RS_SEL_Q(R500_RS_IP_PTR_K1), .inst[0] = R500_RS_INST_TEX_CN_WRITE, .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(6), + .inst_count = R300_RS_TX_OFFSET(0), }; static struct r300_sampler_state r300_sampler_copy_state = { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fe91f4e1844..5ea9f56247b 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -86,8 +86,6 @@ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, const struct pipe_texture* template) { - /* XXX struct r300_screen* r300screen = r300_screen(screen); */ - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); if (!tex) { diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index a833bb0399a..d2893c3b9d7 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -45,9 +45,6 @@ struct r300_winsys { /* PCI ID */ uint32_t pci_id; - /* GB pipe count */ - uint32_t gb_pipes; - /* GART size. */ uint32_t gart_size; @@ -55,10 +52,10 @@ struct r300_winsys { uint32_t vram_size; /* Add a pipe_buffer to the list of buffer objects to validate. */ - void (*add_buffer)(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); + boolean (*add_buffer)(struct r300_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd); /* Revalidate all currently setup pipe_buffers. * Returns TRUE if a flush is required. */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 59d6df8f2dd..b89a7292e5b 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -92,6 +92,7 @@ struct softpipe_context { * queries. */ uint64_t occlusion_count; + unsigned active_query_count; /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 892ef87ee9f..b5f69b74264 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -80,7 +80,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad[i].blend, i ); } - if (sp->depth_stencil->depth.occlusion_count) { + if (sp->active_query_count) { sp_push_quad_first( sp, sp->quad[i].occlusion, i ); } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 93dab236d66..379cf4ad064 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "sp_context.h" #include "sp_query.h" +#include "sp_state.h" struct softpipe_query { uint64_t start; @@ -69,6 +70,8 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) struct softpipe_query *sq = softpipe_query(q); sq->start = softpipe->occlusion_count; + softpipe->active_query_count++; + softpipe->dirty |= SP_NEW_QUERY; } @@ -78,7 +81,9 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) struct softpipe_context *softpipe = softpipe_context( pipe ); struct softpipe_query *sq = softpipe_query(q); + softpipe->active_query_count--; sq->end = softpipe->occlusion_count; + softpipe->dirty |= SP_NEW_QUERY; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index a32fd3a1ba0..ce6d8ebd12b 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -82,11 +82,11 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ + return 13; /* max 4Kx4K */ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ + return 13; /* max 4Kx4K */ default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index accc692b66f..c6844a26498 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -504,6 +504,8 @@ static void print_vertex(const struct setup_context *setup, #endif /** + * Sort the vertices from top to bottom order, setting up the triangle + * edge fields (ebot, emaj, etop). * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise */ static boolean setup_sort_vertices( struct setup_context *setup, diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 9e19745889e..7a533dad9f0 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -42,6 +42,7 @@ #include "sp_texture.h" #include "sp_tile_cache.h" #include "sp_screen.h" +#include "sp_winsys.h" /* Simple, maximally packed layout. diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile index e087db169aa..ecb69fb9964 100644 --- a/src/gallium/drivers/trace/Makefile +++ b/src/gallium/drivers/trace/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ tr_buffer.c \ tr_context.c \ tr_dump.c \ + tr_dump_state.c \ tr_screen.c \ tr_state.c \ tr_texture.c diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 4215215d1a1..9b5af0d86fb 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -8,6 +8,7 @@ trace = env.ConvenienceLibrary( 'tr_buffer.c', 'tr_context.c', 'tr_dump.c', + 'tr_dump_state.c', 'tr_screen.c', 'tr_state.c', 'tr_texture.c', diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 47280459a75..2ad5ca49987 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -31,11 +31,11 @@ #include "pipe/p_screen.h" #include "tr_dump.h" +#include "tr_dump_state.h" #include "tr_state.h" #include "tr_buffer.h" #include "tr_screen.h" #include "tr_texture.h" -#include "tr_context.h" static INLINE struct pipe_buffer * @@ -121,6 +121,9 @@ trace_context_draw_arrays(struct pipe_context *_pipe, struct pipe_context *pipe = tr_ctx->pipe; boolean result; + if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) + return 0; + trace_dump_call_begin("pipe_context", "draw_arrays"); trace_dump_arg(ptr, pipe); @@ -150,6 +153,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *indexBuffer = tr_buf->buffer; boolean result; + if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) + return 0; + trace_screen_user_buffer_update(_pipe->screen, indexBuffer); trace_dump_call_begin("pipe_context", "draw_elements"); @@ -187,6 +193,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *indexBuffer = tr_buf->buffer; boolean result; + if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) + return 0; + trace_screen_user_buffer_update(_pipe->screen, indexBuffer); trace_dump_call_begin("pipe_context", "draw_range_elements"); @@ -573,23 +582,32 @@ trace_context_create_fs_state(struct pipe_context *_pipe, trace_dump_call_end(); + result = trace_shader_create(tr_ctx, state, result, TRACE_SHADER_FRAGMENT); + return result; } static INLINE void trace_context_bind_fs_state(struct pipe_context *_pipe, - void *state) + void *_state) { struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_shader *tr_shdr = trace_shader(_state); struct pipe_context *pipe = tr_ctx->pipe; + void *state = tr_shdr ? tr_shdr->state : NULL; trace_dump_call_begin("pipe_context", "bind_fs_state"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->bind_fs_state(pipe, state);; + tr_ctx->curr.fs = tr_shdr; + + if (tr_shdr && tr_shdr->replaced) + state = tr_shdr->replaced; + + pipe->bind_fs_state(pipe, state); trace_dump_call_end(); } @@ -597,19 +615,23 @@ trace_context_bind_fs_state(struct pipe_context *_pipe, static INLINE void trace_context_delete_fs_state(struct pipe_context *_pipe, - void *state) + void *_state) { struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_shader *tr_shdr = trace_shader(_state); struct pipe_context *pipe = tr_ctx->pipe; + void *state = tr_shdr->state; trace_dump_call_begin("pipe_context", "delete_fs_state"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); - pipe->delete_fs_state(pipe, state);; + pipe->delete_fs_state(pipe, state); trace_dump_call_end(); + + trace_shader_destroy(tr_ctx, tr_shdr); } @@ -626,28 +648,37 @@ trace_context_create_vs_state(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(shader_state, state); - result = pipe->create_vs_state(pipe, state);; + result = pipe->create_vs_state(pipe, state); trace_dump_ret(ptr, result); trace_dump_call_end(); + result = trace_shader_create(tr_ctx, state, result, TRACE_SHADER_VERTEX); + return result; } static INLINE void trace_context_bind_vs_state(struct pipe_context *_pipe, - void *state) + void *_state) { struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_shader *tr_shdr = trace_shader(_state); struct pipe_context *pipe = tr_ctx->pipe; + void *state = tr_shdr ? tr_shdr->state : NULL; trace_dump_call_begin("pipe_context", "bind_vs_state"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); + tr_ctx->curr.vs = tr_shdr; + + if (tr_shdr && tr_shdr->replaced) + state = tr_shdr->replaced; + pipe->bind_vs_state(pipe, state);; trace_dump_call_end(); @@ -656,10 +687,12 @@ trace_context_bind_vs_state(struct pipe_context *_pipe, static INLINE void trace_context_delete_vs_state(struct pipe_context *_pipe, - void *state) + void *_state) { struct trace_context *tr_ctx = trace_context(_pipe); + struct trace_shader *tr_shdr = trace_shader(_state); struct pipe_context *pipe = tr_ctx->pipe; + void *state = tr_shdr->state; trace_dump_call_begin("pipe_context", "delete_vs_state"); @@ -669,6 +702,8 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, pipe->delete_vs_state(pipe, state);; trace_dump_call_end(); + + trace_shader_destroy(tr_ctx, tr_shdr); } @@ -1089,7 +1124,7 @@ trace_context_create(struct pipe_screen *_screen, if(!pipe) goto error1; - if(!trace_dump_trace_enabled()) + if(!trace_enabled()) goto error1; tr_scr = trace_screen(_screen); @@ -1099,6 +1134,9 @@ trace_context_create(struct pipe_screen *_screen, if(!tr_ctx) goto error1; + pipe_mutex_init(tr_ctx->list_mutex); + make_empty_list(&tr_ctx->shaders); + tr_ctx->base.winsys = _screen->winsys; tr_ctx->base.screen = _screen; tr_ctx->base.destroy = trace_context_destroy; diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 2512a0a2328..86827f97b26 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -33,6 +33,7 @@ #include "util/u_debug.h" #include "pipe/p_context.h" +#include "tr_screen.h" #ifdef __cplusplus extern "C" { @@ -45,7 +46,19 @@ struct trace_context struct pipe_context *pipe; + /* current state */ + struct { + struct trace_shader *fs; + struct trace_shader *vs; + } curr; + + /* for list on screen */ struct tr_list list; + + /* list of state objects */ + pipe_mutex list_mutex; + unsigned num_shaders; + struct tr_list shaders; }; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c new file mode 100644 index 00000000000..23a2473b574 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -0,0 +1,490 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" + +#include "tr_dump.h" +#include "tr_dump_state.h" + + +void trace_dump_format(enum pipe_format format) +{ + trace_dump_enum(pf_name(format) ); +} + + +void trace_dump_block(const struct pipe_format_block *block) +{ + trace_dump_struct_begin("pipe_format_block"); + trace_dump_member(uint, block, size); + trace_dump_member(uint, block, width); + trace_dump_member(uint, block, height); + trace_dump_struct_end(); +} + + +static void trace_dump_reference(const struct pipe_reference *reference) +{ + trace_dump_struct_begin("pipe_reference"); + trace_dump_member(int, &reference->count, count); + trace_dump_struct_end(); +} + + +void trace_dump_template(const struct pipe_texture *templat) +{ + if(!templat) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_texture"); + + trace_dump_member(int, templat, target); + trace_dump_member(format, templat, format); + + trace_dump_member_begin("width"); + trace_dump_array(uint, templat->width, 1); + trace_dump_member_end(); + + trace_dump_member_begin("height"); + trace_dump_array(uint, templat->height, 1); + trace_dump_member_end(); + + trace_dump_member_begin("depth"); + trace_dump_array(uint, templat->depth, 1); + trace_dump_member_end(); + + trace_dump_member_begin("block"); + trace_dump_block(&templat->block); + trace_dump_member_end(); + + trace_dump_member(uint, templat, last_level); + trace_dump_member(uint, templat, tex_usage); + + trace_dump_struct_end(); +} + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_rasterizer_state"); + + trace_dump_member(bool, state, flatshade); + trace_dump_member(bool, state, light_twoside); + trace_dump_member(uint, state, front_winding); + trace_dump_member(uint, state, cull_mode); + trace_dump_member(uint, state, fill_cw); + trace_dump_member(uint, state, fill_ccw); + trace_dump_member(bool, state, offset_cw); + trace_dump_member(bool, state, offset_ccw); + trace_dump_member(bool, state, scissor); + trace_dump_member(bool, state, poly_smooth); + trace_dump_member(bool, state, poly_stipple_enable); + trace_dump_member(bool, state, point_smooth); + trace_dump_member(bool, state, point_sprite); + trace_dump_member(bool, state, point_size_per_vertex); + trace_dump_member(bool, state, multisample); + trace_dump_member(bool, state, line_smooth); + trace_dump_member(bool, state, line_stipple_enable); + trace_dump_member(uint, state, line_stipple_factor); + trace_dump_member(uint, state, line_stipple_pattern); + trace_dump_member(bool, state, line_last_pixel); + trace_dump_member(bool, state, bypass_vs_clip_and_viewport); + trace_dump_member(bool, state, flatshade_first); + trace_dump_member(bool, state, gl_rasterization_rules); + + trace_dump_member(float, state, line_width); + trace_dump_member(float, state, point_size); + trace_dump_member(float, state, point_size_min); + trace_dump_member(float, state, point_size_max); + trace_dump_member(float, state, offset_units); + trace_dump_member(float, state, offset_scale); + + trace_dump_member_array(uint, state, sprite_coord_mode); + + trace_dump_struct_end(); +} + + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_poly_stipple"); + + trace_dump_member_begin("stipple"); + trace_dump_array(uint, + state->stipple, + Elements(state->stipple)); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_viewport_state(const struct pipe_viewport_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_viewport_state"); + + trace_dump_member_array(float, state, scale); + trace_dump_member_array(float, state, translate); + + trace_dump_struct_end(); +} + + +void trace_dump_scissor_state(const struct pipe_scissor_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_scissor_state"); + + trace_dump_member(uint, state, minx); + trace_dump_member(uint, state, miny); + trace_dump_member(uint, state, maxx); + trace_dump_member(uint, state, maxy); + + trace_dump_struct_end(); +} + + +void trace_dump_clip_state(const struct pipe_clip_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_clip_state"); + + trace_dump_member_begin("ucp"); + trace_dump_array_begin(); + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { + trace_dump_elem_begin(); + trace_dump_array(float, state->ucp[i], 4); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member(uint, state, nr); + + trace_dump_struct_end(); +} + + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_constant_buffer"); + + trace_dump_member(buffer_ptr, state, buffer); + + trace_dump_struct_end(); +} + + +void trace_dump_shader_state(const struct pipe_shader_state *state) +{ + static char str[8192]; + + if(!state) { + trace_dump_null(); + return; + } + + tgsi_dump_str(state->tokens, 0, str, sizeof(str)); + + trace_dump_struct_begin("pipe_shader_state"); + + trace_dump_member_begin("tokens"); + trace_dump_string(str); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); + + trace_dump_member_begin("depth"); + trace_dump_struct_begin("pipe_depth_state"); + trace_dump_member(bool, &state->depth, enabled); + trace_dump_member(bool, &state->depth, writemask); + trace_dump_member(uint, &state->depth, func); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_member_begin("stencil"); + trace_dump_array_begin(); + for(i = 0; i < Elements(state->stencil); ++i) { + trace_dump_elem_begin(); + trace_dump_struct_begin("pipe_stencil_state"); + trace_dump_member(bool, &state->stencil[i], enabled); + trace_dump_member(uint, &state->stencil[i], func); + trace_dump_member(uint, &state->stencil[i], fail_op); + trace_dump_member(uint, &state->stencil[i], zpass_op); + trace_dump_member(uint, &state->stencil[i], zfail_op); + trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], valuemask); + trace_dump_member(uint, &state->stencil[i], writemask); + trace_dump_struct_end(); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member_begin("alpha"); + trace_dump_struct_begin("pipe_alpha_state"); + trace_dump_member(bool, &state->alpha, enabled); + trace_dump_member(uint, &state->alpha, func); + trace_dump_member(float, &state->alpha, ref_value); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_state(const struct pipe_blend_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_state"); + + trace_dump_member(bool, state, blend_enable); + + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); + + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); + + trace_dump_member(bool, state, logicop_enable); + trace_dump_member(uint, state, logicop_func); + + trace_dump_member(uint, state, colormask); + trace_dump_member(bool, state, dither); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_color(const struct pipe_blend_color *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_color"); + + trace_dump_member_array(float, state, color); + + trace_dump_struct_end(); +} + + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) +{ + trace_dump_struct_begin("pipe_framebuffer_state"); + + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + trace_dump_member(uint, state, nr_cbufs); + trace_dump_member_array(ptr, state, cbufs); + trace_dump_member(ptr, state, zsbuf); + + trace_dump_struct_end(); +} + + +void trace_dump_sampler_state(const struct pipe_sampler_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_sampler_state"); + + trace_dump_member(uint, state, wrap_s); + trace_dump_member(uint, state, wrap_t); + trace_dump_member(uint, state, wrap_r); + trace_dump_member(uint, state, min_img_filter); + trace_dump_member(uint, state, min_mip_filter); + trace_dump_member(uint, state, mag_img_filter); + trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_func); + trace_dump_member(bool, state, normalized_coords); + trace_dump_member(uint, state, prefilter); + trace_dump_member(float, state, shadow_ambient); + trace_dump_member(float, state, lod_bias); + trace_dump_member(float, state, min_lod); + trace_dump_member(float, state, max_lod); + trace_dump_member_array(float, state, border_color); + trace_dump_member(float, state, max_anisotropy); + + trace_dump_struct_end(); +} + + +void trace_dump_surface(const struct pipe_surface *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_surface"); + + trace_dump_reference(&state->reference); + + trace_dump_member(format, state, format); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + + trace_dump_member(uint, state, layout); + trace_dump_member(uint, state, offset); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); +} + + +void trace_dump_transfer(const struct pipe_transfer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_transfer"); + + trace_dump_member(format, state, format); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + + trace_dump_member_begin("block"); + trace_dump_block(&state->block); + trace_dump_member_end(); + + trace_dump_member(uint, state, nblocksx); + trace_dump_member(uint, state, nblocksy); + trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_buffer"); + + trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, max_index); + trace_dump_member(uint, state, buffer_offset); + trace_dump_member(buffer_ptr, state, buffer); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_element(const struct pipe_vertex_element *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_element"); + + trace_dump_member(uint, state, src_offset); + + trace_dump_member(uint, state, vertex_buffer_index); + trace_dump_member(uint, state, nr_components); + + trace_dump_member(format, state, src_format); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h new file mode 100644 index 00000000000..05b821adb64 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_DUMP_STATE_H_ +#define TR_DUMP_STATE_H_ + +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +void trace_dump_format(enum pipe_format format); + +void trace_dump_block(const struct pipe_format_block *block); + +void trace_dump_template(const struct pipe_texture *templat); + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); + +void trace_dump_viewport_state(const struct pipe_viewport_state *state); + +void trace_dump_scissor_state(const struct pipe_scissor_state *state); + +void trace_dump_clip_state(const struct pipe_clip_state *state); + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); + +void trace_dump_token(const struct tgsi_token *token); + +void trace_dump_shader_state(const struct pipe_shader_state *state); + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); + +void trace_dump_blend_state(const struct pipe_blend_state *state); + +void trace_dump_blend_color(const struct pipe_blend_color *state); + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); + +void trace_dump_sampler_state(const struct pipe_sampler_state *state); + +void trace_dump_surface(const struct pipe_surface *state); + +void trace_dump_transfer(const struct pipe_transfer *state); + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); + +void trace_dump_vertex_element(const struct pipe_vertex_element *state); + + +#endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 12a85353428..bc14248eebb 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -30,13 +30,15 @@ #include "tr_buffer.h" #include "tr_dump.h" -#include "tr_state.h" +#include "tr_dump_state.h" #include "tr_texture.h" #include "tr_screen.h" #include "pipe/p_inlines.h" +static boolean trace = FALSE; + static const char * trace_screen_get_name(struct pipe_screen *_screen) { @@ -212,10 +214,12 @@ static struct pipe_texture * trace_screen_texture_blanket(struct pipe_screen *_screen, const struct pipe_texture *templat, const unsigned *ppitch, - struct pipe_buffer *buffer) + struct pipe_buffer *_buffer) { struct trace_screen *tr_scr = trace_screen(_screen); + struct trace_buffer *tr_buf = trace_buffer(_buffer); struct pipe_screen *screen = tr_scr->screen; + struct pipe_buffer *buffer = tr_buf->buffer; unsigned pitch = *ppitch; struct pipe_texture *result; @@ -818,16 +822,20 @@ trace_screen_destroy(struct pipe_screen *_screen) struct pipe_screen *screen = tr_scr->screen; trace_dump_call_begin("pipe_screen", "destroy"); - trace_dump_arg(ptr, screen); + trace_dump_call_end(); + trace_dump_trace_end(); screen->destroy(screen); - trace_dump_call_end(); + FREE(tr_scr); +} - trace_dump_trace_end(); - FREE(tr_scr); +boolean +trace_enabled(void) +{ + return trace; } @@ -842,10 +850,13 @@ trace_screen_create(struct pipe_screen *screen) trace_dump_init(); - if(!trace_dump_trace_begin()) - goto error1; + if(trace_dump_trace_begin()) { + trace_dumping_start(); + trace = TRUE; + } - trace_dumping_start(); + if (!trace) + goto error1; trace_dump_call_begin("", "pipe_screen_create"); diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 59f254166d4..7fae1829858 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -71,6 +71,13 @@ struct trace_screen }; +/* + * tr_screen.c + */ + +boolean +trace_enabled(void); + struct trace_screen * trace_screen(struct pipe_screen *screen); diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index a9570c1aebd..d8c11640bf3 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -1,491 +1,66 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +/* + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. * - **************************************************************************/ - + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ -#include "pipe/p_compiler.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_dump.h" - -#include "tr_dump.h" #include "tr_state.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" -void trace_dump_format(enum pipe_format format) -{ - trace_dump_enum(pf_name(format) ); -} - - -void trace_dump_block(const struct pipe_format_block *block) -{ - trace_dump_struct_begin("pipe_format_block"); - trace_dump_member(uint, block, size); - trace_dump_member(uint, block, width); - trace_dump_member(uint, block, height); - trace_dump_struct_end(); -} - - -static void trace_dump_reference(const struct pipe_reference *reference) -{ - trace_dump_struct_begin("pipe_reference"); - trace_dump_member(int, &reference->count, count); - trace_dump_struct_end(); -} - - -void trace_dump_template(const struct pipe_texture *templat) -{ - if(!templat) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_texture"); - - trace_dump_member(int, templat, target); - trace_dump_member(format, templat, format); - - trace_dump_member_begin("width"); - trace_dump_array(uint, templat->width, 1); - trace_dump_member_end(); - - trace_dump_member_begin("height"); - trace_dump_array(uint, templat->height, 1); - trace_dump_member_end(); - - trace_dump_member_begin("depth"); - trace_dump_array(uint, templat->depth, 1); - trace_dump_member_end(); - - trace_dump_member_begin("block"); - trace_dump_block(&templat->block); - trace_dump_member_end(); - - trace_dump_member(uint, templat, last_level); - trace_dump_member(uint, templat, tex_usage); - - trace_dump_struct_end(); -} - - -void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_rasterizer_state"); - - trace_dump_member(bool, state, flatshade); - trace_dump_member(bool, state, light_twoside); - trace_dump_member(uint, state, front_winding); - trace_dump_member(uint, state, cull_mode); - trace_dump_member(uint, state, fill_cw); - trace_dump_member(uint, state, fill_ccw); - trace_dump_member(bool, state, offset_cw); - trace_dump_member(bool, state, offset_ccw); - trace_dump_member(bool, state, scissor); - trace_dump_member(bool, state, poly_smooth); - trace_dump_member(bool, state, poly_stipple_enable); - trace_dump_member(bool, state, point_smooth); - trace_dump_member(bool, state, point_sprite); - trace_dump_member(bool, state, point_size_per_vertex); - trace_dump_member(bool, state, multisample); - trace_dump_member(bool, state, line_smooth); - trace_dump_member(bool, state, line_stipple_enable); - trace_dump_member(uint, state, line_stipple_factor); - trace_dump_member(uint, state, line_stipple_pattern); - trace_dump_member(bool, state, line_last_pixel); - trace_dump_member(bool, state, bypass_vs_clip_and_viewport); - trace_dump_member(bool, state, flatshade_first); - trace_dump_member(bool, state, gl_rasterization_rules); - - trace_dump_member(float, state, line_width); - trace_dump_member(float, state, point_size); - trace_dump_member(float, state, point_size_min); - trace_dump_member(float, state, point_size_max); - trace_dump_member(float, state, offset_units); - trace_dump_member(float, state, offset_scale); - - trace_dump_member_array(uint, state, sprite_coord_mode); - - trace_dump_struct_end(); -} - - -void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_poly_stipple"); - - trace_dump_member_begin("stipple"); - trace_dump_array(uint, - state->stipple, - Elements(state->stipple)); - trace_dump_member_end(); - - trace_dump_struct_end(); -} - - -void trace_dump_viewport_state(const struct pipe_viewport_state *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_viewport_state"); - - trace_dump_member_array(float, state, scale); - trace_dump_member_array(float, state, translate); - - trace_dump_struct_end(); -} - - -void trace_dump_scissor_state(const struct pipe_scissor_state *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_scissor_state"); - - trace_dump_member(uint, state, minx); - trace_dump_member(uint, state, miny); - trace_dump_member(uint, state, maxx); - trace_dump_member(uint, state, maxy); - - trace_dump_struct_end(); -} - - -void trace_dump_clip_state(const struct pipe_clip_state *state) -{ - unsigned i; - - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_clip_state"); - - trace_dump_member_begin("ucp"); - trace_dump_array_begin(); - for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { - trace_dump_elem_begin(); - trace_dump_array(float, state->ucp[i], 4); - trace_dump_elem_end(); - } - trace_dump_array_end(); - trace_dump_member_end(); - - trace_dump_member(uint, state, nr); - - trace_dump_struct_end(); -} - - -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_constant_buffer"); - - trace_dump_member(buffer_ptr, state, buffer); - - trace_dump_struct_end(); -} - - -void trace_dump_shader_state(const struct pipe_shader_state *state) -{ - static char str[8192]; - - if(!state) { - trace_dump_null(); - return; - } - - tgsi_dump_str(state->tokens, 0, str, sizeof(str)); - - trace_dump_struct_begin("pipe_shader_state"); - - trace_dump_member_begin("tokens"); - trace_dump_string(str); - trace_dump_member_end(); - - trace_dump_struct_end(); -} - - -void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) -{ - unsigned i; - - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); - - trace_dump_member_begin("depth"); - trace_dump_struct_begin("pipe_depth_state"); - trace_dump_member(bool, &state->depth, enabled); - trace_dump_member(bool, &state->depth, writemask); - trace_dump_member(uint, &state->depth, func); - trace_dump_member(bool, &state->depth, occlusion_count); - trace_dump_struct_end(); - trace_dump_member_end(); - - trace_dump_member_begin("stencil"); - trace_dump_array_begin(); - for(i = 0; i < Elements(state->stencil); ++i) { - trace_dump_elem_begin(); - trace_dump_struct_begin("pipe_stencil_state"); - trace_dump_member(bool, &state->stencil[i], enabled); - trace_dump_member(uint, &state->stencil[i], func); - trace_dump_member(uint, &state->stencil[i], fail_op); - trace_dump_member(uint, &state->stencil[i], zpass_op); - trace_dump_member(uint, &state->stencil[i], zfail_op); - trace_dump_member(uint, &state->stencil[i], ref_value); - trace_dump_member(uint, &state->stencil[i], valuemask); - trace_dump_member(uint, &state->stencil[i], writemask); - trace_dump_struct_end(); - trace_dump_elem_end(); - } - trace_dump_array_end(); - trace_dump_member_end(); - - trace_dump_member_begin("alpha"); - trace_dump_struct_begin("pipe_alpha_state"); - trace_dump_member(bool, &state->alpha, enabled); - trace_dump_member(uint, &state->alpha, func); - trace_dump_member(float, &state->alpha, ref_value); - trace_dump_struct_end(); - trace_dump_member_end(); - - trace_dump_struct_end(); -} - - -void trace_dump_blend_state(const struct pipe_blend_state *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_blend_state"); - - trace_dump_member(bool, state, blend_enable); - - trace_dump_member(uint, state, rgb_func); - trace_dump_member(uint, state, rgb_src_factor); - trace_dump_member(uint, state, rgb_dst_factor); - - trace_dump_member(uint, state, alpha_func); - trace_dump_member(uint, state, alpha_src_factor); - trace_dump_member(uint, state, alpha_dst_factor); - - trace_dump_member(bool, state, logicop_enable); - trace_dump_member(uint, state, logicop_func); - - trace_dump_member(uint, state, colormask); - trace_dump_member(bool, state, dither); - - trace_dump_struct_end(); -} - - -void trace_dump_blend_color(const struct pipe_blend_color *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_blend_color"); - - trace_dump_member_array(float, state, color); - - trace_dump_struct_end(); -} - - -void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) -{ - trace_dump_struct_begin("pipe_framebuffer_state"); - - trace_dump_member(uint, state, width); - trace_dump_member(uint, state, height); - trace_dump_member(uint, state, nr_cbufs); - trace_dump_member_array(ptr, state, cbufs); - trace_dump_member(ptr, state, zsbuf); - - trace_dump_struct_end(); -} - - -void trace_dump_sampler_state(const struct pipe_sampler_state *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_sampler_state"); - - trace_dump_member(uint, state, wrap_s); - trace_dump_member(uint, state, wrap_t); - trace_dump_member(uint, state, wrap_r); - trace_dump_member(uint, state, min_img_filter); - trace_dump_member(uint, state, min_mip_filter); - trace_dump_member(uint, state, mag_img_filter); - trace_dump_member(bool, state, compare_mode); - trace_dump_member(uint, state, compare_func); - trace_dump_member(bool, state, normalized_coords); - trace_dump_member(uint, state, prefilter); - trace_dump_member(float, state, shadow_ambient); - trace_dump_member(float, state, lod_bias); - trace_dump_member(float, state, min_lod); - trace_dump_member(float, state, max_lod); - trace_dump_member_array(float, state, border_color); - trace_dump_member(float, state, max_anisotropy); - - trace_dump_struct_end(); -} - - -void trace_dump_surface(const struct pipe_surface *state) -{ - if(!state) { - trace_dump_null(); - return; - } - - trace_dump_struct_begin("pipe_surface"); - - trace_dump_reference(&state->reference); - - trace_dump_member(format, state, format); - trace_dump_member(uint, state, width); - trace_dump_member(uint, state, height); - - trace_dump_member(uint, state, layout); - trace_dump_member(uint, state, offset); - trace_dump_member(uint, state, usage); - - trace_dump_member(ptr, state, texture); - trace_dump_member(uint, state, face); - trace_dump_member(uint, state, level); - trace_dump_member(uint, state, zslice); - - trace_dump_struct_end(); -} - +#include "tgsi/tgsi_parse.h" -void trace_dump_transfer(const struct pipe_transfer *state) +struct trace_shader * trace_shader_create(struct trace_context *tr_ctx, + const struct pipe_shader_state *state, + void *result, + enum trace_shader_type type) { - if(!state) { - trace_dump_null(); - return; - } + struct trace_shader *tr_shdr = CALLOC_STRUCT(trace_shader); - trace_dump_struct_begin("pipe_transfer"); + tr_shdr->state = result; + tr_shdr->type = type; + tr_shdr->tokens = tgsi_dup_tokens(state->tokens); - trace_dump_member(format, state, format); - trace_dump_member(uint, state, width); - trace_dump_member(uint, state, height); + /* works on context as well */ + trace_screen_add_to_list(tr_ctx, shaders, tr_shdr); - trace_dump_member_begin("block"); - trace_dump_block(&state->block); - trace_dump_member_end(); - - trace_dump_member(uint, state, nblocksx); - trace_dump_member(uint, state, nblocksy); - trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, usage); - - trace_dump_member(ptr, state, texture); - trace_dump_member(uint, state, face); - trace_dump_member(uint, state, level); - trace_dump_member(uint, state, zslice); - - trace_dump_struct_end(); + return tr_shdr; } - -void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) +void trace_shader_destroy(struct trace_context *tr_ctx, + struct trace_shader *tr_shdr) { - if(!state) { - trace_dump_null(); - return; - } + trace_screen_remove_from_list(tr_ctx, shaders, tr_shdr); - trace_dump_struct_begin("pipe_vertex_buffer"); - - trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, max_index); - trace_dump_member(uint, state, buffer_offset); - trace_dump_member(buffer_ptr, state, buffer); - - trace_dump_struct_end(); -} - - -void trace_dump_vertex_element(const struct pipe_vertex_element *state) -{ - if(!state) { - trace_dump_null(); - return; + if (tr_shdr->replaced) { + if (tr_shdr->type == TRACE_SHADER_FRAGMENT) + tr_ctx->pipe->delete_fs_state(tr_ctx->pipe, tr_shdr->replaced); + else if (tr_shdr->type == TRACE_SHADER_VERTEX) + tr_ctx->pipe->delete_vs_state(tr_ctx->pipe, tr_shdr->replaced); + else + assert(0); } - trace_dump_struct_begin("pipe_vertex_element"); - - trace_dump_member(uint, state, src_offset); - - trace_dump_member(uint, state, vertex_buffer_index); - trace_dump_member(uint, state, nr_components); - - trace_dump_member(format, state, src_format); - - trace_dump_struct_end(); + FREE(tr_shdr->replaced_tokens); + FREE(tr_shdr->tokens); + FREE(tr_shdr); } diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 513ed0ac98d..1c16042ee5a 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -1,78 +1,68 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +/* + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. * - **************************************************************************/ - -#ifndef TR_STATE_H -#define TR_STATE_H - -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" - - -void trace_dump_format(enum pipe_format format); - -void trace_dump_block(const struct pipe_format_block *block); - -void trace_dump_template(const struct pipe_texture *templat); - - -void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); - -void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); - -void trace_dump_viewport_state(const struct pipe_viewport_state *state); - -void trace_dump_scissor_state(const struct pipe_scissor_state *state); - -void trace_dump_clip_state(const struct pipe_clip_state *state); + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ -void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); +#ifndef TR_STATE_H_ +#define TR_STATE_H_ -void trace_dump_token(const struct tgsi_token *token); +#include "tr_context.h" -void trace_dump_shader_state(const struct pipe_shader_state *state); +struct tgsi_token; -void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); +enum trace_shader_type { + TRACE_SHADER_FRAGMENT = 0, + TRACE_SHADER_VERTEX = 1, + TRACE_SHADER_GEOMETRY = 2, +}; -void trace_dump_blend_state(const struct pipe_blend_state *state); +struct trace_shader +{ + struct tr_list list; -void trace_dump_blend_color(const struct pipe_blend_color *state); + enum trace_shader_type type; -void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); + void *state; + void *replaced; -void trace_dump_sampler_state(const struct pipe_sampler_state *state); + struct tgsi_token *tokens; + struct tgsi_token *replaced_tokens; -void trace_dump_surface(const struct pipe_surface *state); + boolean disabled; +}; -void trace_dump_transfer(const struct pipe_transfer *state); -void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); +static INLINE struct trace_shader * +trace_shader(void *state) +{ + return (struct trace_shader *)state; +} -void trace_dump_vertex_element(const struct pipe_vertex_element *state); +struct trace_shader * trace_shader_create(struct trace_context *tr_ctx, + const struct pipe_shader_state *state, + void *result, + enum trace_shader_type type); +void trace_shader_destroy(struct trace_context *tr_ctx, + struct trace_shader *tr_shdr); -#endif /* TR_STATE_H */ +#endif diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 82e23c413c8..47c24f30862 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -132,6 +132,7 @@ enum pipe_texture_target { #define PIPE_TEX_FACE_NEG_Y 3 #define PIPE_TEX_FACE_POS_Z 4 #define PIPE_TEX_FACE_NEG_Z 5 +#define PIPE_TEX_FACE_MAX 6 #define PIPE_TEX_WRAP_REPEAT 0 #define PIPE_TEX_WRAP_CLAMP 1 @@ -158,14 +159,6 @@ enum pipe_texture_target { #define PIPE_TEX_COMPARE_NONE 0 #define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 -#define PIPE_TEX_FACE_POS_X 0 -#define PIPE_TEX_FACE_NEG_X 1 -#define PIPE_TEX_FACE_POS_Y 2 -#define PIPE_TEX_FACE_NEG_Y 3 -#define PIPE_TEX_FACE_POS_Z 4 -#define PIPE_TEX_FACE_NEG_Z 5 -#define PIPE_TEX_FACE_MAX 6 - #define PIPE_TEXTURE_USAGE_RENDER_TARGET 0x1 #define PIPE_TEXTURE_USAGE_DISPLAY_TARGET 0x2 /* ie a backbuffer */ #define PIPE_TEXTURE_USAGE_PRIMARY 0x4 /* ie a frontbuffer */ diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 15a2088df51..865cc8d0b6c 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -138,6 +138,18 @@ dri_get_buffers(__DRIdrawablePrivate * dPriv) dri_drawable->pBackClipRects[0].x2 = dri_drawable->w; dri_drawable->pBackClipRects[0].y2 = dri_drawable->h; + if (drawable->old_num == count && + drawable->old_w == dri_drawable->w && + drawable->old_h == dri_drawable->h && + memcmp(drawable->old, buffers, sizeof(__DRIbuffer) * count) == 0) { + return; + } else { + drawable->old_num = count; + drawable->old_w = dri_drawable->w; + drawable->old_h = dri_drawable->h; + memcpy(drawable->old, buffers, sizeof(__DRIbuffer) * count); + } + for (i = 0; i < count; i++) { enum pipe_format format = 0; int index = 0; diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h index 78a66624aaf..0f654d804a7 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.h +++ b/src/gallium/state_trackers/dri/dri_drawable.h @@ -46,6 +46,11 @@ struct dri_drawable unsigned attachments[8]; unsigned num_attachments; + __DRIbuffer old[8]; + unsigned old_num; + unsigned old_w; + unsigned old_h; + /* gallium */ struct st_framebuffer *stfb; struct pipe_fence_handle *swap_fences[DRI_SWAP_FENCES_MAX]; diff --git a/src/gallium/state_trackers/dri/dri_extensions.c b/src/gallium/state_trackers/dri/dri_extensions.c index 0c59d42d5c6..2f48162526b 100644 --- a/src/gallium/state_trackers/dri/dri_extensions.c +++ b/src/gallium/state_trackers/dri/dri_extensions.c @@ -36,9 +36,11 @@ #define need_GL_ARB_multisample #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters +#define need_GL_ARB_shader_objects #define need_GL_ARB_texture_compression #define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program +#define need_GL_ARB_vertex_shader #define need_GL_ARB_window_pos #define need_GL_EXT_blend_color #define need_GL_EXT_blend_equation_separate @@ -50,16 +52,23 @@ #define need_GL_EXT_multi_draw_arrays #define need_GL_EXT_secondary_color #define need_GL_NV_vertex_program +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 #include "extension_helper.h" /** * Extension strings exported by the driver. */ const struct dri_extension card_extensions[] = { + {"GL_ARB_fragment_shader", NULL}, {"GL_ARB_multisample", GL_ARB_multisample_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, + {"GL_ARB_shader_objects", GL_ARB_shader_objects_functions}, {"GL_ARB_texture_border_clamp", NULL}, {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, {"GL_ARB_texture_cube_map", NULL}, @@ -69,7 +78,7 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_texture_mirrored_repeat", NULL}, {"GL_ARB_texture_rectangle", NULL}, {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, - {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 692a3c8b76f..e825aa718b6 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -1,29 +1,19 @@ -TARGET = libegldrm.a -CFILES = $(wildcard ./*.c) -OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) -GALLIUMDIR = ../.. -TOP = ../../../.. +TOP = ../../../.. +include $(TOP)/configs/current -include ${TOP}/configs/current +LIBNAME = egldrm -CFLAGS := \ - -I${GALLIUMDIR}/include \ - -I${GALLIUMDIR}/auxiliary \ - -I${TOP}/src/mesa/drivers/dri/common \ - -I${TOP}/src/mesa \ - -I$(TOP)/include \ - -I$(TOP)/src/egl/main \ - ${LIBDRM_CFLAGS} \ - ${CFLAGS} +LIBRARY_INCLUDES = \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main \ + $(shell pkg-config --cflags-only-I libdrm) -############################################# -.PHONY = all clean +C_SOURCES = $(wildcard ./*.c) -all: ${TARGET} -${TARGET}: ${OBJECTS} - ar rcs $@ $^ - -clean: - rm -rf ${OBJECTS} ${TARGET} +include ../../Makefile.template diff --git a/src/gallium/state_trackers/glx/xlib/fakeglx.c b/src/gallium/state_trackers/glx/xlib/fakeglx.c index 65e7048188e..85e7ecfb9e8 100644 --- a/src/gallium/state_trackers/glx/xlib/fakeglx.c +++ b/src/gallium/state_trackers/glx/xlib/fakeglx.c @@ -97,6 +97,9 @@ struct fake_glx_context { +#define DEFAULT_DIRECT GL_TRUE + + /**********************************************************************/ /*** GLX Visual Code ***/ /**********************************************************************/ @@ -1059,7 +1062,7 @@ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo, return NULL; } - glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.isDirect = DEFAULT_DIRECT; glxCtx->glxContext.currentDpy = dpy; glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ @@ -1296,9 +1299,9 @@ Fake_glXDestroyContext( Display *dpy, GLXContext ctx ) static Bool Fake_glXIsDirect( Display *dpy, GLXContext ctx ) { - (void) dpy; + struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx; (void) ctx; - return False; + return glxCtx->glxContext.isDirect; } @@ -2055,7 +2058,7 @@ Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config, return NULL; } - glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.isDirect = DEFAULT_DIRECT; glxCtx->glxContext.currentDpy = dpy; glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ @@ -2277,7 +2280,7 @@ Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int re return NULL; } - glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.isDirect = DEFAULT_DIRECT; glxCtx->glxContext.currentDpy = dpy; glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index a3d16516531..79c2230588f 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -1100,26 +1100,19 @@ XMesaContext XMesaGetCurrentContext( void ) - - - -/* - * Copy the back buffer to the front buffer. If there's no back buffer - * this is a no-op. +/** + * Swap front and back color buffers and have winsys display front buffer. + * If there's no front color buffer no swap actually occurs. */ PUBLIC void XMesaSwapBuffers( XMesaBuffer b ) { - struct pipe_surface *surf; + struct pipe_surface *frontLeftSurf; - /* If we're swapping the buffer associated with the current context - * we have to flush any pending rendering commands first. - */ - st_notify_swapbuffers(b->stfb); + st_swapbuffers(b->stfb, &frontLeftSurf, NULL); - st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT, &surf); - if (surf) { - driver.display_surface(b, surf); + if (frontLeftSurf) { + driver.display_surface(b, frontLeftSurf); } xmesa_check_and_update_buffer_size(NULL, b); diff --git a/src/gallium/winsys/drm/intel/SConscript b/src/gallium/winsys/drm/intel/SConscript new file mode 100644 index 00000000000..50d7b75ed68 --- /dev/null +++ b/src/gallium/winsys/drm/intel/SConscript @@ -0,0 +1,7 @@ +Import('*') + +SConscript(['gem/SConscript',]) + +if 'mesa' in env['statetrackers']: + + SConscript(['dri/SConscript']) diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile index ac0891a6463..de39e759d87 100644 --- a/src/gallium/winsys/drm/intel/dri/Makefile +++ b/src/gallium/winsys/drm/intel/dri/Makefile @@ -6,6 +6,7 @@ LIBNAME = i915_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/intel/gem/libinteldrm.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/i915simple/libi915simple.a diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript new file mode 100644 index 00000000000..e14e96e32fd --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -0,0 +1,16 @@ +Import('*') + +env = drienv.Clone() + +drivers = [ + softpipe, + i915simple, + trace, + inteldrm +] + +env.SharedLibrary( + target ='i915_dri.so', + source = COMMON_GALLIUM_SOURCES, + LIBS = drivers + mesa + auxiliaries + env['LIBS'], +) diff --git a/src/gallium/winsys/drm/intel/gem/SConscript b/src/gallium/winsys/drm/intel/gem/SConscript new file mode 100644 index 00000000000..ea8a2e55f62 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env = drienv.Clone() + +inteldrm_sources = [ + 'intel_be_api.c', + 'intel_be_batchbuffer.c', + 'intel_be_context.c', + 'intel_be_device.c', +] + +inteldrm = env.ConvenienceLibrary( + target ='inteldrm', + source = inteldrm_sources, +) + +Export('inteldrm') diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_api.c b/src/gallium/winsys/drm/intel/gem/intel_be_api.c index f4ef7c2d88b..a74be13bf7a 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_be_api.c +++ b/src/gallium/winsys/drm/intel/gem/intel_be_api.c @@ -2,7 +2,13 @@ #include "intel_be_api.h" #include "i915simple/i915_winsys.h" +#ifdef DEBUG +#include "trace/trace_drm.h" + +struct drm_api hooks = +#else struct drm_api drm_api_hooks = +#endif { /* intel_be_context.c */ .create_context = intel_be_create_context, diff --git a/src/gallium/winsys/drm/nouveau/Makefile b/src/gallium/winsys/drm/nouveau/Makefile index f8c81358544..6c9cbef26df 100644 --- a/src/gallium/winsys/drm/nouveau/Makefile +++ b/src/gallium/winsys/drm/nouveau/Makefile @@ -2,7 +2,7 @@ TOP = ../../../../.. include $(TOP)/configs/current -SUBDIRS = drm dri dri2 +SUBDIRS = drm $(GALLIUM_STATE_TRACKERS_DIRS) default install clean: @for dir in $(SUBDIRS) ; do \ diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile index f7db6201fea..024ab150cbd 100644 --- a/src/gallium/winsys/drm/nouveau/dri/Makefile +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -3,9 +3,8 @@ include $(TOP)/configs/current LIBNAME = nouveau_dri.so -MINIGLX_SOURCES = - PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ $(TOP)/src/gallium/drivers/nv04/libnv04.a \ $(TOP)/src/gallium/drivers/nv10/libnv10.a \ @@ -13,22 +12,15 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a - -DRIVER_SOURCES = \ - nouveau_context.c \ - nouveau_screen.c \ - nouveau_swapbuffers.c \ - nouveau_lock.c + +DRIVER_SOURCES = C_SOURCES = \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) -ASM_SOURCES = +include ../../Makefile.template -DRIVER_DEFINES = $(shell pkg-config libdrm_nouveau --cflags) DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs) -include ../../Makefile.template - symlinks: diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_context.c deleted file mode 100644 index deb6ffcff1c..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_context.c +++ /dev/null @@ -1,118 +0,0 @@ -#include <main/glheader.h> -#include <glapi/glthread.h> -#include <GL/internal/glcore.h> -#include <utils.h> - -#include <state_tracker/st_public.h> -#include <state_tracker/st_context.h> -#include <state_tracker/drm_api.h> -#include <pipe/p_defines.h> -#include <pipe/p_context.h> -#include <pipe/p_screen.h> - -#include "nouveau_context.h" -#include "nouveau_screen.h" - -#include "nouveau_drmif.h" - -GLboolean -nouveau_context_create(const __GLcontextModes *glVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ - __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; - struct nouveau_screen *nv_screen = driScrnPriv->private; - struct nouveau_context *nv; - struct pipe_context *pipe; - struct st_context *st_share = NULL; - - if (sharedContextPrivate) - st_share = ((struct nouveau_context *)sharedContextPrivate)->st; - - nv = CALLOC_STRUCT(nouveau_context); - if (!nv) - return GL_FALSE; - - { - struct nouveau_device_priv *nvdev = - nouveau_device(nv_screen->device); - - nvdev->ctx = driContextPriv->hHWContext; - nvdev->lock = (drmLock *)&driScrnPriv->pSAREA->lock; - } - - pipe = drm_api_hooks.create_context(nv_screen->pscreen); - if (!pipe) { - FREE(nv); - return GL_FALSE; - } - pipe->priv = nv; - - driContextPriv->driverPrivate = nv; - nv->dri_screen = driScrnPriv; - - driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, - nv->dri_screen->myNum, "nouveau"); - - nv->st = st_create_context(pipe, glVis, st_share); - return GL_TRUE; -} - -void -nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - - assert(nv); - - st_finish(nv->st); - st_destroy_context(nv->st); - - FREE(nv); -} - -GLboolean -nouveau_context_bind(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - struct nouveau_context *nv; - struct nouveau_framebuffer *draw, *read; - - if (!driContextPriv) { - st_make_current(NULL, NULL, NULL); - return GL_TRUE; - } - - nv = driContextPriv->driverPrivate; - draw = driDrawPriv->driverPrivate; - read = driReadPriv->driverPrivate; - - st_make_current(nv->st, draw->stfb, read->stfb); - - if ((nv->dri_drawable != driDrawPriv) || - (nv->last_stamp != driDrawPriv->lastStamp)) { - nv->dri_drawable = driDrawPriv; - st_resize_framebuffer(draw->stfb, driDrawPriv->w, - driDrawPriv->h); - nv->last_stamp = driDrawPriv->lastStamp; - } - - if (driDrawPriv != driReadPriv) { - st_resize_framebuffer(read->stfb, driReadPriv->w, - driReadPriv->h); - } - - return GL_TRUE; -} - -GLboolean -nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) -{ - struct nouveau_context *nv = driContextPriv->driverPrivate; - (void)nv; - - st_flush(nv->st, 0, NULL); - return GL_TRUE; -} - diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_context.h deleted file mode 100644 index 2779b092e64..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_context.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef __NOUVEAU_CONTEXT_DRI_H__ -#define __NOUVEAU_CONTEXT_DRI_H__ - -#include <dri_util.h> -#include <xmlconfig.h> - -#include "nouveau/nouveau_winsys.h" - -#define NOUVEAU_ERR(fmt, args...) debug_printf("%s: "fmt, __func__, ##args) - -struct nouveau_framebuffer { - struct st_framebuffer *stfb; -}; - -struct nouveau_context { - struct st_context *st; - - /* DRI stuff */ - __DRIscreenPrivate *dri_screen; - __DRIdrawablePrivate *dri_drawable; - unsigned int last_stamp; - driOptionCache dri_option_cache; - drm_context_t drm_context; - drmLock drm_lock; - int locked; -}; - -extern GLboolean nouveau_context_create(const __GLcontextModes *, - __DRIcontextPrivate *, void *); -extern void nouveau_context_destroy(__DRIcontextPrivate *); -extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, - __DRIdrawablePrivate *draw, - __DRIdrawablePrivate *read); -extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); - -extern void nouveau_contended_lock(struct nouveau_context *nv); -extern void LOCK_HARDWARE(struct nouveau_context *nv); -extern void UNLOCK_HARDWARE(struct nouveau_context *nv); - -#ifdef DEBUG -extern int __nouveau_debug; - -#define DEBUG_BO (1 << 0) - -#define DBG(flag, ...) do { \ - if (__nouveau_debug & (DEBUG_##flag)) \ - NOUVEAU_ERR(__VA_ARGS__); \ -} while(0) -#else -#define DBG(flag, ...) -#endif - -#endif diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_lock.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_lock.c deleted file mode 100644 index 92f5bd09c9d..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_lock.c +++ /dev/null @@ -1,73 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <pipe/p_thread.h> -#include "nouveau_context.h" -#include "nouveau_screen.h" -#include "nouveau_drmif.h" - -pipe_static_mutex(lockMutex); - -/* Lock the hardware and validate our state. - */ -void -LOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->dri_screen->private; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - char __ret=0; - - assert(!nv->locked); - pipe_mutex_lock(lockMutex); - - DRM_CAS(nvdev->lock, nvdev->ctx, - (DRM_LOCK_HELD | nvdev->ctx), __ret); - - if (__ret) { - drmGetLock(nvdev->fd, nvdev->ctx, 0); - nouveau_contended_lock(nv); - } - nv->locked = 1; -} - -/* Unlock the hardware using the global current context - */ -void -UNLOCK_HARDWARE(struct nouveau_context *nv) -{ - struct nouveau_screen *nv_screen = nv->dri_screen->private; - struct nouveau_device *dev = nv_screen->device; - struct nouveau_device_priv *nvdev = nouveau_device(dev); - - assert(nv->locked); - nv->locked = 0; - - DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); - - pipe_mutex_unlock(lockMutex); -} diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.c deleted file mode 100644 index 4e9b76a9090..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.c +++ /dev/null @@ -1,330 +0,0 @@ -#include <utils.h> -#include <vblank.h> -#include <xmlpool.h> - -#include <pipe/p_context.h> -#include <state_tracker/st_public.h> -#include <state_tracker/st_cb_fbo.h> -#include <state_tracker/drm_api.h> - -#include "nouveau_context.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" -#include "nouveau_dri.h" - -#include "nouveau_drm.h" -#include "nouveau_drmif.h" - -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12 -#error nouveau_drm.h version does not match expected version -#endif - -/* Extension stuff, enabling of extensions handled by Gallium's GL state - * tracker. But, we still need to define the entry points we want. - */ -#define need_GL_ARB_fragment_program -#define need_GL_ARB_multisample -#define need_GL_ARB_occlusion_query -#define need_GL_ARB_point_parameters -#define need_GL_ARB_shader_objects -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_program -#define need_GL_ARB_vertex_shader -#define need_GL_ARB_vertex_buffer_object -#define need_GL_EXT_compiled_vertex_array -#define need_GL_EXT_fog_coord -#define need_GL_EXT_secondary_color -#define need_GL_EXT_framebuffer_object -#define need_GL_VERSION_2_0 -#define need_GL_VERSION_2_1 -#include "extension_helper.h" - -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, - { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, - { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { NULL, 0 } -}; - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN -DRI_CONF_END; -static const GLuint __driNConfigOptions = 0; - -extern const struct dri_extension common_extensions[]; -extern const struct dri_extension nv40_extensions[]; - -static GLboolean -nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, - __DRIdrawablePrivate * driDrawPriv, - const __GLcontextModes *glVis, GLboolean pixmapBuffer) -{ - struct nouveau_framebuffer *nvfb; - enum pipe_format colour, depth, stencil; - - if (pixmapBuffer) - return GL_FALSE; - - nvfb = CALLOC_STRUCT(nouveau_framebuffer); - if (!nvfb) - return GL_FALSE; - - if (glVis->redBits == 5) - colour = PIPE_FORMAT_R5G6B5_UNORM; - else - colour = PIPE_FORMAT_A8R8G8B8_UNORM; - - if (glVis->depthBits == 16) - depth = PIPE_FORMAT_Z16_UNORM; - else if (glVis->depthBits == 24) - depth = PIPE_FORMAT_Z24S8_UNORM; - else - depth = PIPE_FORMAT_NONE; - - if (glVis->stencilBits == 8) - stencil = PIPE_FORMAT_Z24S8_UNORM; - else - stencil = PIPE_FORMAT_NONE; - - nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, - driDrawPriv->w, driDrawPriv->h, - (void*)nvfb); - if (!nvfb->stfb) { - free(nvfb); - return GL_FALSE; - } - - driDrawPriv->driverPrivate = (void *)nvfb; - return GL_TRUE; -} - -static void -nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) -{ - struct nouveau_framebuffer *nvfb; - - nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; - st_unreference_framebuffer(nvfb->stfb); - free(nvfb); -} - -static __DRIconfig ** -nouveau_fill_in_modes(__DRIscreenPrivate *psp, - unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer) -{ - __DRIconfig **configs; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, - }; - - uint8_t depth_bits_array[3]; - uint8_t stencil_bits_array[3]; - uint8_t msaa_samples_array[1]; - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - if (depth_bits == 24) - stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - msaa_samples_array[0] = 0; - - depth_buffer_factor = - ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - if (pixel_bits == 16) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - configs = driCreateConfigs(fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor, msaa_samples_array, 1); - if (configs == NULL) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__); - return NULL; - } - - return configs; -} - -static struct pipe_surface * -dri_surface_from_handle(struct pipe_screen *screen, - unsigned handle, - enum pipe_format format, - unsigned width, - unsigned height, - unsigned pitch) -{ - struct pipe_surface *surface = NULL; - struct pipe_texture *texture = NULL; - struct pipe_texture templat; - struct pipe_buffer *buf = NULL; - - buf = drm_api_hooks.buffer_from_handle(screen, "front buffer", handle); - if (!buf) - return NULL; - - memset(&templat, 0, sizeof(templat)); - templat.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY | - NOUVEAU_TEXTURE_USAGE_LINEAR; - templat.target = PIPE_TEXTURE_2D; - templat.last_level = 0; - templat.depth[0] = 1; - templat.format = format; - templat.width[0] = width; - templat.height[0] = height; - pf_get_block(templat.format, &templat.block); - - texture = screen->texture_blanket(screen, - &templat, - &pitch, - buf); - - /* we don't need the buffer from this point on */ - pipe_buffer_reference(&buf, NULL); - - if (!texture) - return NULL; - - surface = screen->get_tex_surface(screen, texture, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); - - /* we don't need the texture from this point on */ - pipe_texture_reference(&texture, NULL); - return surface; -} - -static const __DRIconfig ** -nouveau_screen_create(__DRIscreenPrivate *psp) -{ - struct nouveau_dri *nv_dri = psp->pDevPriv; - struct nouveau_screen *nv_screen; - static const __DRIversion ddx_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = - { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; - - if (!driCheckDriDdxDrmVersions2("nouveau", - &psp->dri_version, &dri_expected, - &psp->ddx_version, &ddx_expected, - &psp->drm_version, &drm_expected)) { - return NULL; - } - - if (drm_expected.patch != psp->drm_version.patch) { - fprintf(stderr, "Incompatible DRM patch level.\n" - "Expected: %d\n" "Current : %d\n", - drm_expected.patch, psp->drm_version.patch); - return NULL; - } - - driInitExtensions(NULL, card_extensions, GL_FALSE); - - if (psp->devPrivSize != sizeof(struct nouveau_dri)) { - NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); - return NULL; - } - - nv_screen = CALLOC_STRUCT(nouveau_screen); - if (!nv_screen) - return NULL; - - nouveau_device_open_existing(&nv_screen->device, 0, psp->fd, 0); - - nv_screen->pscreen = drm_api_hooks.create_screen(psp->fd, NULL); - if (!nv_screen->pscreen) { - FREE(nv_screen); - return NULL; - } - nv_screen->pscreen->flush_frontbuffer = nouveau_flush_frontbuffer; - - { - enum pipe_format format; - - if (nv_dri->bpp == 16) - format = PIPE_FORMAT_R5G6B5_UNORM; - else - format = PIPE_FORMAT_A8R8G8B8_UNORM; - - nv_screen->fb = dri_surface_from_handle(nv_screen->pscreen, - nv_dri->front_offset, - format, - nv_dri->width, - nv_dri->height, - nv_dri->front_pitch * - nv_dri->bpp / 8); - } - - driParseOptionInfo(&nv_screen->option_cache, - __driConfigOptions, __driNConfigOptions); - - nv_screen->driScrnPriv = psp; - psp->private = (void *)nv_screen; - - return (const __DRIconfig **) - nouveau_fill_in_modes(psp, nv_dri->bpp, - (nv_dri->bpp == 16) ? 16 : 24, - (nv_dri->bpp == 16) ? 0 : 8, 1); -} - -static void -nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) -{ - struct nouveau_screen *nv_screen = driScrnPriv->private; - - driScrnPriv->private = NULL; - FREE(nv_screen); -} - -const struct __DriverAPIRec -driDriverAPI = { - .InitScreen = nouveau_screen_create, - .DestroyScreen = nouveau_screen_destroy, - .CreateContext = nouveau_context_create, - .DestroyContext = nouveau_context_destroy, - .CreateBuffer = nouveau_create_buffer, - .DestroyBuffer = nouveau_destroy_buffer, - .SwapBuffers = nouveau_swap_buffers, - .MakeCurrent = nouveau_context_bind, - .UnbindContext = nouveau_context_unbind, - .CopySubBuffer = nouveau_copy_sub_buffer, - - .InitScreen2 = NULL, /* one day, I promise! */ -}; - diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.h deleted file mode 100644 index ac078f3c638..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __NOUVEAU_SCREEN_DRI_H__ -#define __NOUVEAU_SCREEN_DRI_H__ - -#include "xmlconfig.h" - -struct nouveau_screen { - __DRIscreenPrivate *driScrnPriv; - driOptionCache option_cache; - - struct nouveau_device *device; - - struct pipe_screen *pscreen; - struct pipe_surface *fb; -}; - -#endif diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c deleted file mode 100644 index 9c841a0b2d0..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c +++ /dev/null @@ -1,115 +0,0 @@ -#include <main/glheader.h> -#include <glapi/glthread.h> -#include <GL/internal/glcore.h> - -#include <pipe/p_context.h> -#include <state_tracker/st_public.h> -#include <state_tracker/st_context.h> -#include <state_tracker/st_cb_fbo.h> - -#include "nouveau_context.h" -#include "nouveau_screen.h" -#include "nouveau_swapbuffers.h" - -#include "nouveau_pushbuf.h" - -void -nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, - const drm_clip_rect_t *rect) -{ - struct nouveau_context *nv = dPriv->driContextPriv->driverPrivate; - struct nouveau_screen *nv_screen = nv->dri_screen->private; - struct pipe_context *pipe = nv->st->pipe; - drm_clip_rect_t *pbox; - int nbox, i; - - LOCK_HARDWARE(nv); - if (!dPriv->numClipRects) { - UNLOCK_HARDWARE(nv); - return; - } - pbox = dPriv->pClipRects; - nbox = dPriv->numClipRects; - - for (i = 0; i < nbox; i++, pbox++) { - int sx, sy, dx, dy, w, h; - - sx = pbox->x1 - dPriv->x; - sy = pbox->y1 - dPriv->y; - dx = pbox->x1; - dy = pbox->y1; - w = pbox->x2 - pbox->x1; - h = pbox->y2 - pbox->y1; - - pipe->surface_copy(pipe, nv_screen->fb, dx, dy, surf, - sx, sy, w, h); - } - - pipe->flush(pipe, 0, NULL); - UNLOCK_HARDWARE(nv); - - if (nv->last_stamp != dPriv->lastStamp) { - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); - nv->last_stamp = dPriv->lastStamp; - } -} - -void -nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf); - if (surf) { - drm_clip_rect_t rect; - rect.x1 = x; - rect.y1 = y; - rect.x2 = x + w; - rect.y2 = y + h; - - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, &rect); - } -} - -void -nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) -{ - struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; - struct pipe_surface *surf; - - st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf); - if (surf) { - st_notify_swapbuffers(nvfb->stfb); - nouveau_copy_buffer(dPriv, surf, NULL); - } -} - -void -nouveau_flush_frontbuffer(struct pipe_screen *pscreen, struct pipe_surface *ps, - void *context_private) -{ - struct nouveau_context *nv = context_private; - __DRIdrawablePrivate *dPriv = nv->dri_drawable; - - nouveau_copy_buffer(dPriv, ps, NULL); -} - -void -nouveau_contended_lock(struct nouveau_context *nv) -{ - struct nouveau_context *nv_sub = (struct nouveau_context*)nv; - __DRIdrawablePrivate *dPriv = nv_sub->dri_drawable; - __DRIscreenPrivate *sPriv = nv_sub->dri_screen; - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); -} - diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h deleted file mode 100644 index 4ca9cc22831..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __NOUVEAU_SWAPBUFFERS_H__ -#define __NOUVEAU_SWAPBUFFERS_H__ - -void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, - const drm_clip_rect_t *); -void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, int x, int y, int w, int h); -void nouveau_swap_buffers(__DRIdrawablePrivate *); -void nouveau_flush_frontbuffer(struct pipe_screen *, struct pipe_surface *, - void *context_private); - -#endif diff --git a/src/gallium/winsys/drm/nouveau/dri2/Makefile b/src/gallium/winsys/drm/nouveau/dri2/Makefile deleted file mode 100644 index 377a80d5188..00000000000 --- a/src/gallium/winsys/drm/nouveau/dri2/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -TOP = ../../../../../.. -include $(TOP)/configs/current - -LIBNAME = nouveau_dri2.so - -PIPE_DRIVERS = \ - $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ - $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv04/libnv04.a \ - $(TOP)/src/gallium/drivers/nv10/libnv10.a \ - $(TOP)/src/gallium/drivers/nv20/libnv20.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ - $(TOP)/src/gallium/drivers/nv50/libnv50.a - -DRIVER_SOURCES = - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -include ../../Makefile.template - -DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs) - -symlinks: diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_dri.h b/src/gallium/winsys/drm/nouveau/drm/nouveau_dri.h index 1207c2d609c..1207c2d609c 100644 --- a/src/gallium/winsys/drm/nouveau/dri/nouveau_dri.h +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_dri.h diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index a558fda1401..b355a1391d3 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -7,9 +7,68 @@ #include "nouveau_channel.h" #include "nouveau_bo.h" +static struct pipe_surface * +dri_surface_from_handle(struct pipe_screen *screen, + unsigned handle, + enum pipe_format format, + unsigned width, + unsigned height, + unsigned pitch) +{ + struct pipe_surface *surface = NULL; + struct pipe_texture *texture = NULL; + struct pipe_texture templat; + struct pipe_buffer *buf = NULL; + + buf = drm_api_hooks.buffer_from_handle(screen, "front buffer", handle); + if (!buf) + return NULL; + + memset(&templat, 0, sizeof(templat)); + templat.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY | + NOUVEAU_TEXTURE_USAGE_LINEAR; + templat.target = PIPE_TEXTURE_2D; + templat.last_level = 0; + templat.depth[0] = 1; + templat.format = format; + templat.width[0] = width; + templat.height[0] = height; + pf_get_block(templat.format, &templat.block); + + texture = screen->texture_blanket(screen, + &templat, + &pitch, + buf); + + /* we don't need the buffer from this point on */ + pipe_buffer_reference(&buf, NULL); + + if (!texture) + return NULL; + + surface = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* we don't need the texture from this point on */ + pipe_texture_reference(&texture, NULL); + return surface; +} + +static struct pipe_surface * +nouveau_dri1_front_surface(struct pipe_context *pipe) +{ + return nouveau_screen(pipe->screen)->front; +} + +static struct dri1_api nouveau_dri1_api = { + nouveau_dri1_front_surface, +}; + static struct pipe_screen * nouveau_drm_create_screen(int fd, struct drm_create_screen_arg *arg) { + struct dri1_create_screen_arg *dri1 = (void *)arg; struct pipe_winsys *ws; struct nouveau_winsys *nvws; struct nouveau_device *dev = NULL; @@ -67,6 +126,33 @@ nouveau_drm_create_screen(int fd, struct drm_create_screen_arg *arg) return NULL; } + if (arg->mode == DRM_CREATE_DRI1) { + struct nouveau_pipe_winsys *nvpws = nouveau_pipe_winsys(ws); + struct nouveau_dri *nvdri = dri1->ddx_info; + enum pipe_format format; + + if (nvdri->bpp == 16) + format = PIPE_FORMAT_R5G6B5_UNORM; + else + format = PIPE_FORMAT_A8R8G8B8_UNORM; + + nvpws->front = dri_surface_from_handle(nvpws->pscreen, + nvdri->front_offset, + format, + nvdri->width, + nvdri->height, + nvdri->front_pitch * + (nvdri->bpp / 8)); + if (!nvpws->front) { + debug_printf("%s: error referencing front buffer\n", + __func__); + ws->destroy(ws); + return NULL; + } + + dri1->api = &nouveau_dri1_api; + } + return nouveau_pipe_winsys(ws)->pscreen; } diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h index 2782c83c0e7..cc237bfc13a 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h @@ -1,5 +1,7 @@ #ifndef __NOUVEAU_DRM_API_H__ #define __NOUVEAU_DRM_API_H__ #include "state_tracker/drm_api.h" +#include "state_tracker/dri1_api.h" +#include "nouveau_dri.h" #endif diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.c index 9e03a9f5dba..2dca8cd3a90 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.c @@ -74,6 +74,7 @@ nouveau_pipe_bo_create(struct pipe_winsys *ws, unsigned alignment, nvbuf->base.size = size; flags = nouveau_flags_from_usage(ws, usage); + flags |= NOUVEAU_BO_MAP; if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { FREE(nvbuf); return NULL; diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.h index 10e1e269e8c..ec10f1e00c8 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.h +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_winsys_pipe.h @@ -29,6 +29,8 @@ struct nouveau_pipe_winsys { unsigned nr_pctx; struct pipe_context **pctx; + + struct pipe_surface *front; }; static INLINE struct nouveau_pipe_winsys * diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index a15487352b2..0d0fdc5bd86 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -93,6 +93,29 @@ static struct pipe_buffer *radeon_buffer_user_create(struct pipe_winsys *ws, return &radeon_buffer->base; } +static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws, + unsigned width, + unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride) +{ + struct pipe_format_block block; + unsigned nblocksx, nblocksy, size; + + pf_get_block(format, &block); + + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + + /* Radeons enjoy things in multiples of 32. */ + /* XXX this can be 32 when POT */ + *stride = (nblocksx * block.size + 63) & ~63; + size = *stride * nblocksy; + + return radeon_buffer_create(ws, 64, usage, size); +} + static void radeon_buffer_del(struct pipe_buffer *buffer) { struct radeon_pipe_buffer *radeon_buffer = @@ -180,10 +203,11 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) radeon_ws->base.flush_frontbuffer = radeon_flush_frontbuffer; radeon_ws->base.buffer_create = radeon_buffer_create; - radeon_ws->base.buffer_destroy = radeon_buffer_del; radeon_ws->base.user_buffer_create = radeon_buffer_user_create; + radeon_ws->base.surface_buffer_create = radeon_surface_buffer_create; radeon_ws->base.buffer_map = radeon_buffer_map; radeon_ws->base.buffer_unmap = radeon_buffer_unmap; + radeon_ws->base.buffer_destroy = radeon_buffer_del; radeon_ws->base.fence_reference = radeon_fence_reference; radeon_ws->base.fence_signalled = radeon_fence_signalled; diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 428d3f65a16..5406d2bbea0 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -30,6 +30,10 @@ #include "radeon_drm.h" +#ifdef DEBUG +#include "trace/trace_drm.h" +#endif + /* Create a pipe_screen. */ struct pipe_screen* radeon_create_screen(int drmFB, struct drm_create_screen_arg *arg) @@ -112,7 +116,11 @@ boolean radeon_global_handle_from_buffer(struct pipe_screen* screen, return TRUE; } +#ifdef DEBUG +struct drm_api hooks = { +#else struct drm_api drm_api_hooks = { +#endif .create_screen = radeon_create_screen, .create_context = radeon_create_context, /* XXX fix this */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index da233203d7d..63aa3179ac8 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -22,10 +22,10 @@ #include "radeon_r300.h" -static void radeon_r300_add_buffer(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd) +static boolean radeon_r300_add_buffer(struct r300_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd) { int i; struct radeon_winsys_priv* priv = @@ -35,24 +35,30 @@ static void radeon_r300_add_buffer(struct r300_winsys* winsys, /* Check to see if this BO is already in line for validation; * find a slot for it otherwise. */ - for (i = 0; i < RADEON_MAX_BOS; i++) { + for (i = 0; i < priv->bo_count; i++) { if (sc[i].bo == bo) { - return; - } else if (sc[i].bo == NULL) { - sc[i].bo = bo; - sc[i].read_domains = rd; - sc[i].write_domain = wd; - priv->bo_count = i + 1; + sc[i].read_domains |= rd; + sc[i].write_domain |= wd; return; } } - assert(FALSE && "Oh God too many BOs!"); + if (priv->bo_count >= RADEON_MAX_BOS) { + /* Dohoho. Not falling for that one again. Request a flush. */ + return FALSE; + } + + sc[priv->bo_count].bo = bo; + sc[priv->bo_count].read_domains = rd; + sc[priv->bo_count].write_domain = wd; + priv->bo_count++; + + return TRUE; } static boolean radeon_r300_validate(struct r300_winsys* winsys) { - int retval; + int retval, i; struct radeon_winsys_priv* priv = (struct radeon_winsys_priv*)winsys->radeon_winsys; struct radeon_cs_space_check* sc = priv->sc; @@ -62,12 +68,23 @@ static boolean radeon_r300_validate(struct r300_winsys* winsys) if (retval == RADEON_CS_SPACE_OP_TO_BIG) { /* We might as well HCF, since this is not going to fit in the card, * period. */ + /* XXX just drop it on the floor instead */ exit(1); } else if (retval == RADEON_CS_SPACE_FLUSH) { /* We must flush before more rendering can commence. */ return TRUE; } + /* XXX should probably be its own function */ + for (i = 0; i < priv->bo_count; i++) { + if (sc[i].read_domains && sc[i].write_domain) { + /* Cute, cute. We need to flush first. */ + debug_printf("radeon: BO %p can't be read and written; " + "requesting flush.\n", sc[i].bo); + return TRUE; + } + } + /* Things are fine, we can proceed as normal. */ return FALSE; } @@ -108,9 +125,15 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys, { struct radeon_winsys_priv* priv = (struct radeon_winsys_priv*)winsys->radeon_winsys; + int retval = 0; - radeon_cs_write_reloc(priv->cs, + retval = radeon_cs_write_reloc(priv->cs, ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); + + if (retval) { + debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", + pbuffer, rd, wd, flags); + } } static void radeon_r300_end_cs(struct r300_winsys* winsys, @@ -128,57 +151,63 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys) { struct radeon_winsys_priv* priv = (struct radeon_winsys_priv*)winsys->radeon_winsys; - int retval = 0; + struct radeon_cs_space_check* sc = priv->sc; + int retval = 1; + /* Emit the CS. */ retval = radeon_cs_emit(priv->cs); if (retval) { debug_printf("radeon: Bad CS, dumping...\n"); radeon_cs_print(priv->cs, stderr); } radeon_cs_erase(priv->cs); + + /* Clean out BOs. */ + memset(sc, 0, sizeof(struct radeon_cs_space_check) * RADEON_MAX_BOS); + priv->bo_count = 0; } /* Helper function to do the ioctls needed for setup and init. */ static void do_ioctls(struct r300_winsys* winsys, int fd) { - struct drm_radeon_gem_info info; - drm_radeon_getparam_t gp; - int target; + struct drm_radeon_gem_info gem_info = {0}; + drm_radeon_getparam_t gp = {0}; + struct drm_radeon_info info = {0}; + int target = 0; int retval; + info.value = ⌖ gp.value = ⌖ - /* First, get the number of pixel pipes */ - gp.param = RADEON_PARAM_NUM_GB_PIPES; - retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + /* First, get PCI ID */ + info.request = RADEON_INFO_DEVICE_ID; + retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); if (retval) { - fprintf(stderr, "%s: Failed to get GB pipe count, error number %d\n", + fprintf(stderr, "%s: New ioctl for PCI ID failed " + "(error number %d), trying classic ioctl...\n", __FUNCTION__, retval); - exit(1); - } - winsys->gb_pipes = target; - - /* Then, get PCI ID */ - gp.param = RADEON_PARAM_DEVICE_ID; - retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); - if (retval) { - fprintf(stderr, "%s: Failed to get PCI ID, error number %d\n", - __FUNCTION__, retval); - exit(1); + gp.param = RADEON_PARAM_DEVICE_ID; + retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, + sizeof(gp)); + if (retval) { + fprintf(stderr, "%s: Failed to get PCI ID, " + "error number %d\n", __FUNCTION__, retval); + exit(1); + } } winsys->pci_id = target; - /* Finally, retrieve MM info */ + /* Then, retrieve MM info */ retval = drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO, - &info, sizeof(info)); + &gem_info, sizeof(gem_info)); if (retval) { fprintf(stderr, "%s: Failed to get MM info, error number %d\n", __FUNCTION__, retval); exit(1); } - winsys->gart_size = info.gart_size; + winsys->gart_size = gem_info.gart_size; /* XXX */ - winsys->vram_size = info.vram_visible; + winsys->vram_size = gem_info.vram_visible; } struct r300_winsys* diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h index 5c373cd0848..a2e0e582485 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h @@ -31,5 +31,18 @@ #include "radeon_buffer.h" +/* protect us from bonghits */ +#ifndef RADEON_INFO_DEVICE_ID +#define RADEON_INFO_DEVICE_ID 0 +#endif +#ifndef DRM_RADEON_INFO +#define DRM_RADEON_INFO 0x1 +struct drm_radeon_info { + uint32_t request; + uint32_t pad; + uint64_t value; +}; +#endif + struct r300_winsys* radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys); diff --git a/src/gallium/winsys/drm/radeon/dri/Makefile b/src/gallium/winsys/drm/radeon/dri/Makefile index c218ee9d010..a9889444de8 100644 --- a/src/gallium/winsys/drm/radeon/dri/Makefile +++ b/src/gallium/winsys/drm/radeon/dri/Makefile @@ -10,6 +10,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/r300/libr300.a C_SOURCES = \ diff --git a/src/gallium/winsys/drm/radeon/egl/Makefile b/src/gallium/winsys/drm/radeon/egl/Makefile index d989b3aa935..6a1448d1b9b 100644 --- a/src/gallium/winsys/drm/radeon/egl/Makefile +++ b/src/gallium/winsys/drm/radeon/egl/Makefile @@ -8,6 +8,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ $(GALLIUMDIR)/winsys/drm/radeon/core/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/r300/libr300.a DRIVER_SOURCES = diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index b52f427e4a7..c10e3c00ffc 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -588,7 +588,9 @@ find_supported_apis(void) EGLint mask = 0; void *handle; - handle = dlopen(NULL, 0); + handle = dlopen(NULL, RTLD_LAZY | RTLD_LOCAL); + if(!handle) + return mask; if (dlsym(handle, "st_api_OpenGL_ES1")) mask |= EGL_OPENGL_ES_BIT; diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 9f4bd1699f9..beaf9a4b129 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -19,6 +19,7 @@ DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_clear.c \ intel_extensions.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_tex_layout.c \ intel_tex_image.c \ diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index d9cad0c4bf8..8ef6c9144f1 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -39,6 +39,7 @@ #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_fbo.h" +#include "intel_buffers.h" #include "i830_context.h" #include "i830_reg.h" @@ -446,6 +447,24 @@ i830DepthMask(GLcontext * ctx, GLboolean flag) i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE; } +/** Called from ctx->Driver.Viewport() */ +static void +i830Viewport(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + intelCalcViewport(ctx); + + intel_viewport(ctx, x, y, width, height); +} + + +/** Called from ctx->Driver.DepthRange() */ +static void +i830DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + intelCalcViewport(ctx); +} + /* ============================================================= * Polygon stipple * @@ -1064,6 +1083,8 @@ i830InitStateFuncs(struct dd_function_table *functions) functions->StencilFuncSeparate = i830StencilFuncSeparate; functions->StencilMaskSeparate = i830StencilMaskSeparate; functions->StencilOpSeparate = i830StencilOpSeparate; + functions->DepthRange = i830DepthRange; + functions->Viewport = i830Viewport; } void diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 1f9f363df92..367d2a3b648 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) p->params_uptodate = 0; } - if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index d44a2f47b37..40bcf7a9aff 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -55,6 +55,17 @@ static GLint step_offsets[6][2] = { [FACE_NEG_Z] = {-1, 1}, }; + +static GLint bottom_offsets[6] = { + [FACE_POS_X] = 16 + 0 * 8, + [FACE_POS_Y] = 16 + 1 * 8, + [FACE_POS_Z] = 16 + 2 * 8, + [FACE_NEG_X] = 16 + 3 * 8, + [FACE_NEG_Y] = 16 + 4 * 8, + [FACE_NEG_Z] = 16 + 5 * 8, +}; + + /** * Cube texture map layout for i830M-GM915. * @@ -297,7 +308,7 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) * +---+ +---+ +---+ +---+ +---+ +---+ * * The bottom row continues with the remaining 2x2 then the 1x1 mip contents - * in order, with each of them aligned to a 4x4 block boundary. Thus, for + * in order, with each of them aligned to a 8x8 block boundary. Thus, for * 32x32 cube maps and smaller, the bottom row layout is going to dictate the * pitch of the tree. For a tree with 4x4 images, the pitch is at least * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1 @@ -375,10 +386,11 @@ i945_miptree_layout_cube(struct intel_context *intel, x = (face - 4) * 8; break; } + break; case 2: y = mt->total_height - 4; - x = 16 + face * 8; + x = bottom_offsets[face]; break; case 1: @@ -454,7 +466,10 @@ i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i945_miptree_layout_cube(intel, mt); + if (mt->compressed) + i945_miptree_layout_cube(intel, mt); + else + i915_miptree_layout_cube(intel, mt); break; case GL_TEXTURE_3D: i945_miptree_layout_3d(intel, mt); diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c new file mode 120000 index 00000000000..4c6b37ada01 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 2934414d99a..9712c387254 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -69,6 +70,7 @@ DRIVER_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index aef2ff5f86f..873fc8ffff6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -245,6 +246,9 @@ struct brw_vs_ouput_sizes { }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 @@ -450,8 +454,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -471,7 +473,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { @@ -555,11 +558,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9197fede2d8..a1a6c53d0e0 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -335,78 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - -/** - * Copy Mesa program parameters into given constant buffer. - */ -static void -update_constant_buffer(struct brw_context *brw, - const struct gl_program_parameter_list *params, - dri_bo *const_buffer) -{ - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* copy Mesa program constants into the buffer */ - if (const_buffer && size > 0) { - - assert(const_buffer); - assert(const_buffer->size >= size); - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(const_buffer); - memcpy(const_buffer->virtual, params->ParameterValues, size); - drm_intel_gem_bo_unmap_gtt(const_buffer); - } - else { - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); - } - - if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } - } - } -} - - -/** Copy current vertex program's parameters into the constant buffer */ -static void -update_vertex_constant_buffer(struct brw_context *brw) -{ - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); - printf("program %u\n", vp->program.Base.Id); - } - if (vp->use_const_buffer) - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); -} - - -/** Copy current fragment program's parameters into the constant buffer */ -static void -update_fragment_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - if (fp->use_const_buffer) - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); -} - - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_vertex_constant_buffer(brw); - update_fragment_constant_buffer(brw); - BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); @@ -428,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b91b20bec6f..1b8bcc14ec0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; @@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; GLuint nr_enabled = 0; /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 62c98bd8bb3..bc7756ceab4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 4000 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9bc5c35139c..4784254bc7d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 68fa9820b6f..c99918724b3 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -66,7 +66,9 @@ static void upload_sf_vp(struct brw_context *brw) sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - /* _NEW_SCISSOR */ + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ /* The scissor only needs to handle the intersection of drawable and * scissor rect. Clipping to the boundaries of static shared buffers @@ -97,7 +99,8 @@ static void upload_sf_vp(struct brw_context *brw) const struct brw_tracked_state brw_sf_vp = { .dirty = { .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = 0, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998f..bf9f6cae55e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; + /*********************************************************************** * brw_state.c */ @@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b51664066..e40d7a04164 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e612..c6dfea4743c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb40..be8ce546a9e 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <[email protected]> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -40,14 +39,14 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,7 +58,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + if (mt->compressed) { align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); @@ -72,12 +71,12 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t pack_x_pitch = mt->pitch; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +88,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,25 +97,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } break; @@ -126,12 +125,12 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t i945_miptree_layout_2d(intel, mt); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b69616d6e52..d7f75e3685e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->vp->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; else -#endif c->vp->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -709,10 +714,11 @@ get_constant(struct brw_vs_compile *c, struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + if (c->current_const[argIndex].index != src->Index || relAddr) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; c->current_const[argIndex].index = src->Index; @@ -725,13 +731,13 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg,/* writeback dest */ 0, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - if (src->RelAddr) { + if (relAddr) { /* second read */ const2_reg = get_tmp(c); @@ -742,7 +748,7 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, const2_reg, /* writeback dest */ 1, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER @@ -752,7 +758,7 @@ get_constant(struct brw_vs_compile *c, const_reg = c->current_const[argIndex].reg; - if (src->RelAddr) { + if (relAddr) { /* merge the two Owords into the constant register */ /* const_reg[7..4] = const2_reg[7..4] */ brw_MOV(p, @@ -1219,7 +1225,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; @@ -1232,7 +1238,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c new file mode 100644 index 00000000000..89f47522a1c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 90d74c2885c..3e476fd3be5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -146,6 +146,13 @@ static void do_wm_prog( struct brw_context *brw, if (c == NULL) { brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } } else { memset(c, 0, sizeof(*brw->wm.compile_data)); } @@ -312,6 +319,9 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + /* The unique fragment program ID */ key->program_string_id = fp->id; } @@ -350,7 +360,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f0d31fc1ddc..fb15c03e83d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -75,6 +75,7 @@ struct brw_wm_prog_key { GLuint program_string_id:32; GLuint origin_x, origin_y; GLuint drawable_height; + GLuint vp_outputs_written; }; @@ -240,15 +241,20 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 117460842a3..49367037991 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -42,6 +44,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -68,11 +147,23 @@ static int get_scalar_dst_index(const struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +221,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); + assert(component < 4); + /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +253,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -177,8 +302,12 @@ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; - int nr_interp_regs = 0; + int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +316,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +351,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,28 +361,47 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } /* fragment shader inputs */ - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - } + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + reg_index += 2; + } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -256,12 +410,12 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } #if 0 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); #endif } @@ -283,23 +437,21 @@ static void fetch_constants(struct brw_wm_compile *c, if (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { - if (c->current_const[i].index != src->Index) { - c->current_const[i].index = src->Index; + c->current_const[i].index = src->Index; #if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); #endif - /* need to fetch the constant now */ - brw_dp_READ_4(p, - c->current_const[i].reg, /* writeback dest */ - 1, /* msg_reg */ - src->RelAddr, /* relative indexing? */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ - ); - } + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + 1, /* msg_reg */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); } } } @@ -368,6 +520,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || @@ -665,27 +825,26 @@ static void emit_fb_write(struct brw_wm_compile *c, } if (c->key.dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; - assert(comp == 1); - assert(off == 0); -#if 0 - /* XXX do we need this code? comp always 1, off always 0, it seems */ if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else -#endif { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } @@ -2595,7 +2754,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2603,6 +2763,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2833,17 +2995,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2867,6 +3025,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b315..8fd067abe7d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -116,6 +116,10 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ) diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 780edbc42e6..6faea018fbc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - struct brw_context *brw = c->func.brw; - GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -85,16 +83,19 @@ static void init_registers( struct brw_wm_compile *c ) prealloc_reg(c, &c->creg[j], i++); for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (inputs & (1<<j)) { - /* index for vs output and ps input are not the same - in shader varying */ - GLuint index; - if (j > FRAG_ATTRIB_VAR0) - index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + if (c->key.vp_outputs_written & (1<<j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; else - index = j; + fp_index = -1; + nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[index], i++); + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index c604ef0162a..3fc18ff1f3a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -178,6 +178,16 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } + else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } else { sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 805df8a4af4..c49a5f6b4ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } - -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -static dri_bo * +dri_bo * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ) { @@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw, return bo; } +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} /** * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ -static dri_bo * +static void brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) + GLuint surf) { struct brw_context *brw = brw_context(ctx); struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = const_buffer; + key.bo = fp->const_buffer; key.depthmode = GL_NONE; key.pitch = params->NumParameters; key.width = params->NumParameters; @@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); } - - return const_buffer; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } - /** - * Update the surface state for a VS constant buffer. - * The constant buffer will be (re)allocated here if needed. + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. */ -static dri_bo * -brw_update_vs_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) +static void prepare_wm_constant_surface(struct brw_context *brw ) { - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - assert(surf == 0); - - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } - - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } - - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; } - return const_buffer; + brw_update_wm_constant_surface(ctx, surf); } +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + /** * Sets up a surface state structure to point at the given region. @@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - unsigned int unit, GLboolean cached) + unsigned int unit) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; @@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -598,7 +595,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -630,7 +628,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -646,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -682,27 +680,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { brw_update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - i, - GL_FALSE); + i); } } else { - brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; - /* Update surface / buffer for fragment shader constant buffer */ - { - const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - fp->const_buffer = - brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, - fp->program.Base.Parameters); - - brw->wm.nr_surfaces = surf + 1; - } + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { @@ -735,100 +723,16 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - -/** - * Constructs the binding table for the VS surface state. - */ -static dri_bo * -brw_vs_get_binding_table(struct brw_context *brw) -{ - dri_bo *bind_bo; - - assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); - int i; - - for (i = 0; i < brw->vs.nr_surfaces; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - data, data_size, - NULL, NULL); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - dri_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->vs.surf_bo[i]); - } - } - - free(data); - } - - return bind_bo; -} - - -/** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. - */ -static void prepare_vs_surfaces(struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - /* Update surface / buffer for vertex shader constant buffer */ - { - const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - vp->const_buffer = - brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, - vp->program.Base.Parameters); - - brw->vs.nr_surfaces = 1; - } - - dri_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); - - if (1) - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; -} - - -static void -prepare_surfaces(struct brw_context *brw) -{ - prepare_wm_surfaces(brw); - prepare_vs_surfaces(brw); -} - - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, - .prepare = prepare_surfaces, + .prepare = prepare_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c new file mode 120000 index 00000000000..4c6b37ada01 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index c849e4869e5..0db1f392c0a 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -35,6 +35,9 @@ #include "intel_batchbuffer.h" #include "intel_regions.h" +static GLboolean +intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -100,7 +103,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(!obj->Pointer); /* Mesa should have unmapped it */ + + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); @@ -205,6 +214,7 @@ intel_bufferobj_map(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); GLboolean read_only = (access == GL_READ_ONLY_ARB); + GLboolean write_only = (access == GL_WRITE_ONLY_ARB); assert(intel_obj); @@ -216,7 +226,14 @@ intel_bufferobj_map(GLcontext * ctx, return NULL; } - dri_bo_map(intel_obj->buffer, !read_only); + if (write_only && intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + intel_obj->mapped_gtt = GL_TRUE; + } else { + drm_intel_bo_map(intel_obj->buffer, !read_only); + intel_obj->mapped_gtt = GL_FALSE; + } + obj->Pointer = intel_obj->buffer->virtual; return obj->Pointer; } @@ -234,7 +251,11 @@ intel_bufferobj_unmap(GLcontext * ctx, assert(intel_obj); if (intel_obj->buffer != NULL) { assert(obj->Pointer); - dri_bo_unmap(intel_obj->buffer); + if (intel_obj->mapped_gtt) { + drm_intel_gem_bo_unmap_gtt(intel_obj->buffer); + } else { + drm_intel_bo_unmap(intel_obj->buffer); + } obj->Pointer = NULL; } return GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index bf6dbd58f27..7ef723833c0 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -46,6 +46,7 @@ struct intel_buffer_object struct intel_region *region; /* Is there a zero-copy texture associated with this (pixel) buffer object? */ + GLboolean mapped_gtt; }; diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index b86cafea241..df5c3fc1766 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ - if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ @@ -276,7 +276,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) ctx->Driver.Enable(ctx, GL_DEPTH_TEST, (ctx->Depth.Test && fb->Visual.depthBits > 0)); ctx->Driver.Enable(ctx, GL_STENCIL_TEST, - (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0)); + (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0)); } else { /* Mesa's Stencil._Enabled field is updated when diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index eb0d890f477..4dfaee8a4a3 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -255,8 +255,10 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) /* Control writing of the stencil clear value to stencil. */ if (this_mask & BUFFER_BIT_STENCIL) { _mesa_Enable(GL_STENCIL_TEST); - _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); - _mesa_StencilFuncSeparate(GL_FRONT, GL_ALWAYS, ctx->Stencil.Clear, + _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, + GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, + ctx->Stencil.Clear, ctx->Stencil.WriteMask[0]); } else { _mesa_Disable(GL_STENCIL_TEST); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 5dc3df395d1..ea43009f4c8 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -394,7 +394,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (!driContext->driScreenPriv->dri2.enabled) return; - if (!intel->internal_viewport_call) { + if (!intel->internal_viewport_call && ctx->DrawBuffer->Name == 0) { intel_update_renderbuffers(driContext, driContext->driDrawablePriv); if (driContext->driDrawablePriv != driContext->driReadablePriv) intel_update_renderbuffers(driContext, driContext->driReadablePriv); @@ -650,6 +650,13 @@ intelInitContext(struct intel_context *intel, _mesa_init_point(ctx); ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ + if (IS_965(intelScreen->deviceID)) { + if (MAX_WIDTH > 8192) + ctx->Const.MaxRenderbufferSize = 8192; + } else { + if (MAX_WIDTH > 2048) + ctx->Const.MaxRenderbufferSize = 2048; + } /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); @@ -778,13 +785,64 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb_bo = NULL; if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. + /* Nothing is currently done here to free texture heaps; + * but we're not using the texture heap utilities, so I + * rather think we shouldn't. I've taken a look, and can't + * find any private texture data hanging around anywhere, but + * I'm not yet certain there isn't any at all... */ - if (INTEL_DEBUG & DEBUG_TEXTURE) + /* if (INTEL_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "do something to free texture heaps\n"); + */ } + /* XXX In intelMakeCurrent() below, the context's static regions are + * referenced inside the frame buffer; it's listed as a hack, + * with a comment of "XXX FBO temporary fix-ups!", but + * as long as it's there, we should release the regions here. + * The do/while loop around the block is used to allow the + * "continue" statements inside the block to exit the block, + * to avoid many layers of "if" constructs. + */ + do { + __DRIdrawablePrivate * driDrawPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb; + struct intel_renderbuffer *irbDepth, *irbStencil; + if (!driDrawPriv) { + /* We're already detached from the drawable; exit this block. */ + continue; + } + intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; + if (!intel_fb) { + /* The frame buffer is already gone; exit this block. */ + continue; + } + irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + /* If the regions of the frame buffer still match the regions + * of the context, release them. If they've changed somehow, + * leave them alone. + */ + if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); + } + if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); + } + + if (irbDepth && irbDepth->region == intel->depth_region) { + intel_renderbuffer_set_region(irbDepth, NULL); + } + /* Usually, the stencil buffer is the same as the depth buffer; + * but they're handled separately in MakeCurrent, so we'll + * handle them separately here. + */ + if (irbStencil && irbStencil->region == intel->depth_region) { + intel_renderbuffer_set_region(irbStencil, NULL); + } + } while (0); + intel_region_release(&intel->front_region); intel_region_release(&intel->back_region); intel_region_release(&intel->depth_region); @@ -793,6 +851,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + + } } @@ -821,7 +881,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, if (driDrawPriv != driReadPriv) intel_update_renderbuffers(driContextPriv, driReadPriv); } else { - /* XXX FBO temporary fix-ups! */ + /* XXX FBO temporary fix-ups! These are released in + * intelDextroyContext(), above. Changes here should be + * reflected there. + */ /* if the renderbuffers don't have regions, init them from the context */ struct intel_renderbuffer *irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f45e24ca3a1..8b68cc3f04d 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -161,12 +161,22 @@ struct intel_context struct { struct gl_fragment_program *bitmap_fp; struct gl_vertex_program *passthrough_vp; + struct gl_buffer_object *texcoord_vbo; struct gl_fragment_program *saved_fp; GLboolean saved_fp_enable; struct gl_vertex_program *saved_vp; GLboolean saved_vp_enable; + struct gl_fragment_program *tex2d_fp; + + GLboolean saved_texcoord_enable; + struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo; + GLenum saved_texcoord_type; + GLsizei saved_texcoord_size, saved_texcoord_stride; + const void *saved_texcoord_ptr; + int saved_active_texture; + GLint saved_vp_x, saved_vp_y; GLsizei saved_vp_width, saved_vp_height; GLenum saved_matrix_mode; @@ -559,4 +569,10 @@ intel_context(GLcontext * ctx) return (struct intel_context *) ctx; } +static INLINE GLboolean +is_power_of_two(uint32_t value) +{ + return (value & (value - 1)) == 0; +} + #endif diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 9ec1b4ec2f4..1e8b1878abe 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -48,6 +48,7 @@ #define need_GL_EXT_point_parameters #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side +#define need_GL_APPLE_vertex_array_object #define need_GL_ATI_separate_stencil #define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite @@ -95,6 +96,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_texture_lod_bias", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, + { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions}, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 52647ddf8b2..04723a2f917 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -574,9 +574,10 @@ intel_render_texture(GLcontext * ctx, ASSERT(newImage); - if (newImage->Border != 0) { - /* Fallback on drawing to a texture with a border, which won't have a - * miptree. + intel_image = intel_texture_image(newImage); + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); @@ -607,7 +608,6 @@ intel_render_texture(GLcontext * ctx, irb->Base.RefCount); /* point the renderbufer's region to the texture image region */ - intel_image = intel_texture_image(newImage); if (irb->region != intel_image->mt->region) { if (irb->region) intel_region_release(&irb->region); @@ -679,6 +679,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) if (rb == NULL) continue; + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + switch (irb->texformat->MesaFormat) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_RGB565: diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c new file mode 100644 index 00000000000..1060fbd9e59 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" +#include "main/teximage.h" +#include "main/texenv.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "main/texparam.h" +#include "main/varray.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "main/buffers.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/depth.h" +#include "main/hash.h" +#include "main/mipmap.h" +#include "main/blend.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + +static const char *intel_fp_tex2d = + "!!ARBfp1.0\n" + "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" + "END\n"; + +static GLboolean +intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, + int level, int width, int height) +{ + struct intel_context *intel = intel_context(ctx); + GLfloat vertices[4][2]; + GLint status; + + /* Set to source from the previous level */ + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); + + /* Set to draw into the current level */ + _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, + tex_name, + level); + /* Choose to render to the color attachment. */ + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) + return GL_FALSE; + + intel_meta_set_passthrough_transform(intel); + + /* XXX: Doing it right would involve setting up the transformation to do + * 0-1 mapping or something, and not changing the vertex data. + */ + vertices[0][0] = 0; + vertices[0][1] = 0; + vertices[1][0] = width; + vertices[1][1] = 0; + vertices[2][0] = width; + vertices[2][1] = height; + vertices[3][0] = 0; + vertices[3][1] = height; + + _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); + _mesa_Enable(GL_VERTEX_ARRAY); + intel_meta_set_default_texrect(intel); + + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + + intel_meta_restore_texcoords(intel); + intel_meta_restore_transform(intel); + + return GL_TRUE; +} + +static GLboolean +intel_generate_mipmap_2d(GLcontext *ctx, + GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + GLint old_active_texture; + int level, max_levels, start_level, end_level; + GLuint fb_name; + GLboolean success = GL_FALSE; + struct gl_framebuffer *saved_fbo = NULL; + + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | + GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | + GL_DEPTH_BUFFER_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); + old_active_texture = ctx->Texture.CurrentUnit; + _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); + + _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_Disable(GL_DEPTH_TEST); + _mesa_Disable(GL_STENCIL_TEST); + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + _mesa_DepthMask(GL_FALSE); + + /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our + * minification. + */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); + _mesa_Enable(GL_TEXTURE_2D); + _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_NEAREST); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + /* Bind the new renderbuffer to the color attachment point. */ + _mesa_GenFramebuffersEXT(1, &fb_name); + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); + + intel_meta_set_fragment_program(intel, &intel->meta.tex2d_fp, + intel_fp_tex2d); + intel_meta_set_passthrough_vertex_program(intel); + + max_levels = _mesa_max_texture_levels(ctx, texObj->Target); + start_level = texObj->BaseLevel; + end_level = texObj->MaxLevel; + + /* Loop generating level+1 from level. */ + for (level = start_level; level < end_level && level < max_levels - 1; level++) { + const struct gl_texture_image *srcImage; + int width, height; + + srcImage = _mesa_select_tex_image(ctx, texObj, target, level); + if (srcImage->Border != 0) + goto fail; + + width = srcImage->Width / 2; + if (width < 1) + width = 1; + height = srcImage->Height / 2; + if (height < 1) + height = 1; + + if (width == srcImage->Width && + height == srcImage->Height) { + /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the + * maximum texture level for the object, so break out when we've gone + * over the edge. + */ + break; + } + + /* Make sure that there's space allocated for the target level. + * We could skip this if there's already space allocated and save some + * time. + */ + _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, + width, height, 0, + GL_RGBA, GL_UNSIGNED_INT, NULL); + + if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, + width, height)) + goto fail; + } + + success = GL_TRUE; + +fail: + intel_meta_restore_fragment_program(intel); + intel_meta_restore_vertex_program(intel); + + _mesa_DeleteFramebuffersEXT(1, &fb_name); + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); + if (saved_fbo) + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); + _mesa_reference_framebuffer(&saved_fbo, NULL); + _mesa_PopClientAttrib(); + _mesa_PopAttrib(); + + return success; +} + + +/** + * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap + * level). + * + * The texture object's miptree must be mapped. + * + * It would be really nice if this was just called by Mesa whenever mipmaps + * needed to be regenerated, rather than us having to remember to do so in + * each texture image modification path. + * + * This function should also include an accelerated path. + */ +void +intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int face, i; + + /* HW path */ + if (target == GL_TEXTURE_2D && + ctx->Extensions.EXT_framebuffer_object && + ctx->Extensions.ARB_fragment_program && + ctx->Extensions.ARB_vertex_program) { + GLboolean success; + + /* We'll be accessing this texture using GL entrypoints, which should + * be resilient against other access to this texture. + */ + _mesa_unlock_texture(ctx, texObj); + success = intel_generate_mipmap_2d(ctx, target, texObj); + _mesa_lock_texture(ctx, texObj); + + if (success) + return; + } + + /* SW path */ + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + + /* Update the level information in our private data in the new images, since + * it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage; + + intelImage = intel_texture_image(texObj->Image[face][i]); + if (intelImage == NULL) + break; + + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a bare + * pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } +} diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 6e1e034e53d..f3652720ece 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -62,9 +62,10 @@ intel_miptree_create_internal(struct intel_context *intel, GLboolean ok; struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), first_level, last_level); + _mesa_lookup_enum_by_nr(internal_format), + first_level, last_level, mt); mt->target = target_to_target(target); mt->internal_format = internal_format; @@ -89,6 +90,7 @@ intel_miptree_create_internal(struct intel_context *intel, if (!ok) { free(mt); + DBG("%s not okay - returning NULL\n", __FUNCTION__); return NULL; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index fc0ac0b79c0..36a684b3b85 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -27,9 +27,12 @@ #include "main/enums.h" #include "main/state.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/texstate.h" +#include "main/varray.h" #include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" @@ -334,6 +337,85 @@ intel_meta_restore_fragment_program(struct intel_context *intel) _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); } +static const float default_texcoords[4][2] = { { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 1.0, 1.0 }, + { 0.0, 1.0 } }; + +void +intel_meta_set_default_texrect(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + struct gl_client_array *old_texcoord_array; + + intel->meta.saved_active_texture = ctx->Texture.CurrentUnit; + if (intel->meta.saved_array_vbo == NULL) { + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, + ctx->Array.ArrayBufferObj); + } + + old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0]; + intel->meta.saved_texcoord_type = old_texcoord_array->Type; + intel->meta.saved_texcoord_size = old_texcoord_array->Size; + intel->meta.saved_texcoord_stride = old_texcoord_array->Stride; + intel->meta.saved_texcoord_enable = old_texcoord_array->Enabled; + intel->meta.saved_texcoord_ptr = old_texcoord_array->Ptr; + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, + old_texcoord_array->BufferObj); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0); + + if (intel->meta.texcoord_vbo == NULL) { + GLuint vbo_name; + + _mesa_GenBuffersARB(1, &vbo_name); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords), + default_texcoords, GL_STATIC_DRAW_ARB); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, + ctx->Array.ArrayBufferObj); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.texcoord_vbo->Name); + } + _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL); + + _mesa_Enable(GL_TEXTURE_COORD_ARRAY); +} + +void +intel_meta_restore_texcoords(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + /* Restore the old TexCoordPointer */ + if (intel->meta.saved_texcoord_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_texcoord_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } + + _mesa_TexCoordPointer(intel->meta.saved_texcoord_size, + intel->meta.saved_texcoord_type, + intel->meta.saved_texcoord_stride, + intel->meta.saved_texcoord_ptr); + if (!intel->meta.saved_texcoord_enable) + _mesa_Disable(GL_TEXTURE_COORD_ARRAY); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0 + + intel->meta.saved_active_texture); + + if (intel->meta.saved_array_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_array_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } +} + void intelInitPixelFuncs(struct dd_function_table *functions) { @@ -355,5 +437,7 @@ intel_free_pixel_state(struct intel_context *intel) _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL); _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL); + _mesa_reference_fragprog(ctx, &intel->meta.tex2d_fp, NULL); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, NULL); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index cb41fa182cb..6acf0813c8c 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -40,6 +40,9 @@ void intel_meta_set_fragment_program(struct intel_context *intel, const char *prog_string); void intel_meta_restore_fragment_program(struct intel_context *intel); void intel_free_pixel_state(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_restore_texcoords(struct intel_context *intel); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 1db7f5594e9..80d3239189c 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -360,7 +360,6 @@ intel_texture_bitmap(GLcontext * ctx, "END\n"; GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLint old_active_texture; GLubyte *unpacked_bitmap; GLubyte *a8_bitmap; @@ -401,6 +400,14 @@ intel_texture_bitmap(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glBitmap() fallback: NPOT texture\n"); + return GL_FALSE; + } + /* Check that we can load in a texture this big. */ if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || height > (1 << (ctx->Const.MaxTextureLevels - 1))) { @@ -485,22 +492,12 @@ intel_texture_bitmap(GLcontext * ctx, vertices[3][2] = dst_z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); intel_meta_restore_fragment_program(intel); intel_meta_restore_vertex_program(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index e8d5ac8569d..7cda6adb32b 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -70,7 +70,6 @@ intel_texture_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLfloat z; GLint old_active_texture; GLenum internalFormat; @@ -120,6 +119,14 @@ intel_texture_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels() fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -169,22 +176,13 @@ intel_texture_drawpixels(GLcontext * ctx, vertices[3][2] = z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); @@ -208,7 +206,6 @@ intel_stencil_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname, rb_name, fb_name, old_fb_name; GLfloat vertices[4][2]; - GLfloat texcoords[4][2]; struct intel_renderbuffer *irb; struct intel_renderbuffer *depth_irb; struct gl_renderbuffer *rb; @@ -273,6 +270,14 @@ intel_stencil_drawpixels(GLcontext * ctx, return GL_FALSE; } + if (!ctx->Extensions.ARB_texture_non_power_of_two && + (!is_power_of_two(width) || !is_power_of_two(height))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glDrawPixels(GL_STENCIL_INDEX) fallback: NPOT texture\n"); + return GL_FALSE; + } + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -343,7 +348,6 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_free(stencil_pixels); intel_meta_set_passthrough_transform(intel); - vertices[0][0] = x; vertices[0][1] = y; vertices[1][0] = x + width * ctx->Pixel.ZoomX; @@ -353,22 +357,13 @@ intel_stencil_drawpixels(GLcontext * ctx, vertices[3][0] = x; vertices[3][1] = y + height * ctx->Pixel.ZoomY; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 0aa5b8c02c9..534e75efe1f 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -52,12 +52,66 @@ #define FILE_DEBUG_FLAG DEBUG_REGION +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include <execinfo.h> + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + + + /* XXX: Thread safety? */ GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { if (region->pbo) intel_region_cow(intel, region); @@ -72,7 +126,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region) void intel_region_unmap(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!--region->map_refcount) { dri_bo_unmap(region->buffer); region->map = NULL; @@ -87,10 +141,10 @@ intel_region_alloc_internal(struct intel_context *intel, { struct intel_region *region; - DBG("%s\n", __FUNCTION__); - - if (buffer == NULL) + if (buffer == NULL) { + _DBG("%s <-- NULL\n", __FUNCTION__); return NULL; + } region = calloc(sizeof(*region), 1); region->cpp = cpp; @@ -104,6 +158,7 @@ intel_region_alloc_internal(struct intel_context *intel, region->tiling = I915_TILING_NONE; region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + _DBG("%s <-- %p\n", __FUNCTION__, region); return region; } @@ -158,7 +213,7 @@ void intel_region_reference(struct intel_region **dst, struct intel_region *src) { if (src) - DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); assert(*dst == NULL); if (src) { @@ -172,10 +227,12 @@ intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; - if (region == NULL) + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); return; + } - DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; @@ -251,7 +308,7 @@ intel_region_data(struct intel_context *intel, { GLboolean locked = GL_FALSE; - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -293,7 +350,7 @@ intel_region_copy(struct intel_context *intel, GLuint src_offset, GLuint srcx, GLuint srcy, GLuint width, GLuint height) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -326,7 +383,7 @@ intel_region_fill(struct intel_context *intel, GLuint dstx, GLuint dsty, GLuint width, GLuint height, GLuint color) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -356,6 +413,8 @@ intel_region_attach_pbo(struct intel_context *intel, if (region->pbo == pbo) return; + _DBG("%s %p %p\n", __FUNCTION__, region, pbo); + /* If there is already a pbo attached, break the cow tie now. * Don't call intel_region_release_pbo() as that would * unnecessarily allocate a new buffer we would have to immediately @@ -385,6 +444,7 @@ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { + _DBG("%s %p\n", __FUNCTION__, region); assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; region->pbo = NULL; @@ -412,7 +472,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) assert(region->cpp * region->pitch * region->height == pbo->Base.Size); - DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ @@ -459,6 +519,10 @@ intel_recreate_static(struct intel_context *intel, if (region == NULL) { region = calloc(sizeof(*region), 1); region->refcount = 1; + _DBG("%s creating new region %p\n", __FUNCTION__, region); + } + else { + _DBG("%s %p\n", __FUNCTION__, region); } if (intel->ctx.Visual.rgbBits == 24) diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 65e62947ef6..27288231426 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -236,7 +236,7 @@ intel_get_param(__DRIscreenPrivate *psp, int param, int *value) ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); if (ret) { - fprintf(stderr, "drm_i915_getparam: %d\n", ret); + _mesa_warning(NULL, "drm_i915_getparam: %d\n", ret); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index ae0994b183a..fbd6e1d0c36 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -158,60 +158,6 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - _mesa_generate_mipmap(ctx, target, texObj); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} - -static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - intel_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); -} - void intelInitTextureFuncs(struct dd_function_table *functions) { @@ -227,7 +173,7 @@ intelInitTextureFuncs(struct dd_function_table *functions) functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; functions->GetTexImage = intelGetTexImage; - functions->GenerateMipmap = intelGenerateMipmap; + functions->GenerateMipmap = intel_generate_mipmap; /* compressed texture functions */ functions->CompressedTexImage2D = intelCompressedTexImage2D; diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 08437aa0e2b..7c2b26ef1d4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -158,7 +158,7 @@ do_copy_texsubimage(struct intel_context *intel, /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); + intel_generate_mipmap(ctx, target, texObj); } return GL_TRUE; diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 1f192dafbe1..ddbb13e74ad 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -208,7 +208,7 @@ try_pbo_upload(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } @@ -264,7 +264,7 @@ try_pbo_zcopy(struct intel_context *intel, if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - _mesa_printf("%s: failure 1\n", __FUNCTION__); + DBG("%s: failure 1\n", __FUNCTION__); return GL_FALSE; } @@ -283,7 +283,7 @@ try_pbo_zcopy(struct intel_context *intel, dst_stride = intelImage->mt->pitch; if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) { - _mesa_printf("%s: failure 2\n", __FUNCTION__); + DBG("%s: failure 2\n", __FUNCTION__); return GL_FALSE; } @@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx, GLint postConvHeight = height; GLint texelBytes, sizeInBytes; GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; - GLboolean needs_map; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); - /* Two cases where we need a mapping of the miptree: when the user supplied - * data is mapped as well (non-PBO, memcpy upload) or when we're going to do - * (software) mipmap generation. - */ - needs_map = (pixels != NULL) || (level == texObj->BaseLevel && - texObj->GenerateMipmap); - if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, @@ -547,22 +539,22 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } void diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index f86de568976..48104de2a9b 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -101,11 +101,6 @@ intelTexSubimage(GLcontext * ctx, _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } - _mesa_unmap_teximage_pbo(ctx, packing); if (intelImage->mt) { @@ -114,6 +109,11 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 05a375e1f3b..b5cb7597d16 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -241,7 +241,7 @@ intel_tex_map_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, @@ -268,7 +268,7 @@ intel_tex_unmap_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intel_miptree_image_unmap(intel, intelImage->mt); intelImage->base.Data = NULL; } diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 2fcc87c0f5a..81ee1ed022c 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2480,11 +2480,11 @@ void r200ValidateState( GLcontext *ctx ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint new_state = rmesa->NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { r200UpdateDrawBuffer(ctx); } - if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) { r200UpdateTextureState( ctx ); new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ r200UpdateLocalViewer( ctx ); @@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx ) } if (new_state & (_NEW_PROGRAM| + _NEW_PROGRAM_CONSTANTS | /* need to test for pretty much anything due to possible parameter bindings */ _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 873cde44144..2f45429cf27 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300, fp->translated = GL_TRUE; if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) r300FragmentProgramDump(fp, &fp->code); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); } update_params(r300, fp); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 79f0b3625ca..07299f3b36f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1109,7 +1109,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) struct gl_program_parameter_list *paramList; GLuint i; - if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; @@ -2357,11 +2357,12 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + r300UpdateStateParameters(ctx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); return; } } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); } static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, @@ -2588,7 +2589,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _tnl_InvalidateState(ctx, new_state); _ae_invalidate_state(ctx, new_state); - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { r300UpdateDrawBuffer(ctx); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 292573de893..300559d0b4d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300, _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); if (RADEON_DEBUG & DEBUG_PIXEL) { if (fp->translated) { diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index b6561001e76..4432f85691e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -2255,7 +2255,7 @@ void radeonValidateState( GLcontext *ctx ) radeonContextPtr rmesa = RADEON_CONTEXT(ctx); GLuint new_state = rmesa->NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (new_state & _NEW_BUFFERS) { radeonUpdateDrawBuffer(ctx); } diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 305df548fa2..9a01465bdf9 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -912,8 +912,9 @@ xmesa_update_state( GLcontext *ctx, GLbitfield new_state ) /* * GL_DITHER, GL_READ/DRAW_BUFFER, buffer binding state, etc. effect * renderbuffer span/clear funcs. + * Check _NEW_COLOR to detect dither enable/disable. */ - if (new_state & (_NEW_COLOR | _NEW_PIXEL | _NEW_BUFFERS)) { + if (new_state & (_NEW_COLOR | _NEW_BUFFERS)) { XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer); struct xmesa_renderbuffer *front_xrb, *back_xrb; diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c index d124c724c99..f5b7d1e1385 100644 --- a/src/mesa/main/api_arrayelt.c +++ b/src/mesa/main/api_arrayelt.c @@ -1094,48 +1094,49 @@ static void _ae_update_state( GLcontext *ctx ) AEarray *aa = actx->arrays; AEattrib *at = actx->attribs; GLuint i; + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; actx->nr_vbos = 0; /* conventional vertex arrays */ - if (ctx->Array.ArrayObj->Index.Enabled) { - aa->array = &ctx->Array.ArrayObj->Index; + if (arrayObj->Index.Enabled) { + aa->array = &arrayObj->Index; aa->offset = IndexFuncs[TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } - if (ctx->Array.ArrayObj->EdgeFlag.Enabled) { - aa->array = &ctx->Array.ArrayObj->EdgeFlag; + if (arrayObj->EdgeFlag.Enabled) { + aa->array = &arrayObj->EdgeFlag; aa->offset = _gloffset_EdgeFlagv; check_vbo(actx, aa->array->BufferObj); aa++; } - if (ctx->Array.ArrayObj->Normal.Enabled) { - aa->array = &ctx->Array.ArrayObj->Normal; + if (arrayObj->Normal.Enabled) { + aa->array = &arrayObj->Normal; aa->offset = NormalFuncs[TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } - if (ctx->Array.ArrayObj->Color.Enabled) { - aa->array = &ctx->Array.ArrayObj->Color; + if (arrayObj->Color.Enabled) { + aa->array = &arrayObj->Color; aa->offset = ColorFuncs[aa->array->Size-3][TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } - if (ctx->Array.ArrayObj->SecondaryColor.Enabled) { - aa->array = &ctx->Array.ArrayObj->SecondaryColor; + if (arrayObj->SecondaryColor.Enabled) { + aa->array = &arrayObj->SecondaryColor; aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } - if (ctx->Array.ArrayObj->FogCoord.Enabled) { - aa->array = &ctx->Array.ArrayObj->FogCoord; + if (arrayObj->FogCoord.Enabled) { + aa->array = &arrayObj->FogCoord; aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { - struct gl_client_array *attribArray = &ctx->Array.ArrayObj->TexCoord[i]; + struct gl_client_array *attribArray = &arrayObj->TexCoord[i]; if (attribArray->Enabled) { /* NOTE: we use generic glVertexAttribNV functions here. * If we ever remove GL_NV_vertex_program this will have to change. @@ -1152,8 +1153,8 @@ static void _ae_update_state( GLcontext *ctx ) } /* generic vertex attribute arrays */ - for (i = 1; i < VERT_ATTRIB_MAX; i++) { /* skip zero! */ - struct gl_client_array *attribArray = &ctx->Array.ArrayObj->VertexAttrib[i]; + for (i = 1; i < Elements(arrayObj->VertexAttrib); i++) { /* skip zero! */ + struct gl_client_array *attribArray = &arrayObj->VertexAttrib[i]; if (attribArray->Enabled) { at->array = attribArray; /* Note: we can't grab the _glapi_Dispatch->VertexAttrib1fvNV @@ -1179,18 +1180,18 @@ static void _ae_update_state( GLcontext *ctx ) } /* finally, vertex position */ - if (ctx->Array.ArrayObj->VertexAttrib[0].Enabled) { + if (arrayObj->VertexAttrib[0].Enabled) { /* Use glVertex(v) instead of glVertexAttrib(0, v) to be sure it's * issued as the last (provoking) attribute). */ - aa->array = &ctx->Array.ArrayObj->VertexAttrib[0]; + aa->array = &arrayObj->VertexAttrib[0]; assert(aa->array->Size >= 2); /* XXX fix someday? */ aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; } - else if (ctx->Array.ArrayObj->Vertex.Enabled) { - aa->array = &ctx->Array.ArrayObj->Vertex; + else if (arrayObj->Vertex.Enabled) { + aa->array = &arrayObj->Vertex; aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)]; check_vbo(actx, aa->array->BufferObj); aa++; diff --git a/src/mesa/main/api_noop.c b/src/mesa/main/api_noop.c index a1cc3a2a4b8..66f9c4e6bdb 100644 --- a/src/mesa/main/api_noop.c +++ b/src/mesa/main/api_noop.c @@ -477,7 +477,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib4fvNV( GLuint index, const GLfloat static void GLAPIENTRY _mesa_noop_VertexAttrib1fARB( GLuint index, GLfloat x ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], x, 0, 0, 1); } else @@ -487,7 +487,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib1fARB( GLuint index, GLfloat x ) static void GLAPIENTRY _mesa_noop_VertexAttrib1fvARB( GLuint index, const GLfloat *v ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], v[0], 0, 0, 1); } else @@ -497,7 +497,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib1fvARB( GLuint index, const GLfloa static void GLAPIENTRY _mesa_noop_VertexAttrib2fARB( GLuint index, GLfloat x, GLfloat y ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], x, y, 0, 1); } else @@ -507,7 +507,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib2fARB( GLuint index, GLfloat x, GL static void GLAPIENTRY _mesa_noop_VertexAttrib2fvARB( GLuint index, const GLfloat *v ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], v[0], v[1], 0, 1); } else @@ -518,7 +518,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib3fARB( GLuint index, GLfloat x, GLfloat y, GLfloat z ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], x, y, z, 1); } else @@ -528,7 +528,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib3fARB( GLuint index, GLfloat x, static void GLAPIENTRY _mesa_noop_VertexAttrib3fvARB( GLuint index, const GLfloat *v ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], v[0], v[1], v[2], 1); } else @@ -539,7 +539,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib4fARB( GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], x, y, z, w); } else @@ -549,7 +549,7 @@ static void GLAPIENTRY _mesa_noop_VertexAttrib4fARB( GLuint index, GLfloat x, static void GLAPIENTRY _mesa_noop_VertexAttrib4fvARB( GLuint index, const GLfloat *v ) { GET_CURRENT_CONTEXT(ctx); - if (index < MAX_VERTEX_ATTRIBS) { + if (index < MAX_VERTEX_GENERIC_ATTRIBS) { ASSIGN_4V(ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index], v[0], v[1], v[2], v[3]); } else diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 42d1e579e08..15076712502 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -30,6 +30,26 @@ #include "state.h" + +/** + * \return number of bytes in array [count] of type. + */ +static GLsizei +index_bytes(GLenum type, GLsizei count) +{ + if (type == GL_UNSIGNED_INT) { + return count * sizeof(GLuint); + } + else if (type == GL_UNSIGNED_BYTE) { + return count * sizeof(GLubyte); + } + else { + ASSERT(type == GL_UNSIGNED_SHORT); + return count * sizeof(GLushort); + } +} + + /** * Find the max index in the given element/index buffer */ @@ -44,10 +64,8 @@ max_buffer_index(GLcontext *ctx, GLuint count, GLenum type, if (elementBuf->Name) { /* elements are in a user-defined buffer object. need to map it */ - map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - elementBuf); + map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, + GL_READ_ONLY, elementBuf); /* Actual address is the sum of pointers */ indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices); } @@ -70,14 +88,16 @@ max_buffer_index(GLcontext *ctx, GLuint count, GLenum type, } if (map) { - ctx->Driver.UnmapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf); } return max; } + +/** + * Check if OK to render by examining framebuffer status and vertex arrays. + */ static GLboolean check_valid_to_render(GLcontext *ctx, char *function) { @@ -105,6 +125,12 @@ check_valid_to_render(GLcontext *ctx, char *function) return GL_TRUE; } + +/** + * Error checking for glDrawElements(). Includes parameter checking + * and VBO bounds checking. + * \return GL_TRUE if OK to render, GL_FALSE if error found + */ GLboolean _mesa_validate_DrawElements(GLcontext *ctx, GLenum mode, GLsizei count, GLenum type, @@ -140,27 +166,8 @@ _mesa_validate_DrawElements(GLcontext *ctx, /* Vertex buffer object tests */ if (ctx->Array.ElementArrayBufferObj->Name) { /* use indices in the buffer object */ - GLuint indexBytes; - - if (!ctx->Array.ElementArrayBufferObj->Size) { - _mesa_warning(ctx, - "glDrawElements called with empty array elements buffer"); - return GL_FALSE; - } - - if (type == GL_UNSIGNED_INT) { - indexBytes = count * sizeof(GLuint); - } - else if (type == GL_UNSIGNED_BYTE) { - indexBytes = count * sizeof(GLubyte); - } - else { - ASSERT(type == GL_UNSIGNED_SHORT); - indexBytes = count * sizeof(GLushort); - } - /* make sure count doesn't go outside buffer bounds */ - if (indexBytes > (GLuint) ctx->Array.ElementArrayBufferObj->Size) { + if (index_bytes(type, count) > ctx->Array.ElementArrayBufferObj->Size) { _mesa_warning(ctx, "glDrawElements index out of buffer bounds"); return GL_FALSE; } @@ -175,8 +182,10 @@ _mesa_validate_DrawElements(GLcontext *ctx, /* find max array index */ GLuint max = max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); - if (max >= ctx->Array._MaxElement) { + if (max >= ctx->Array.ArrayObj->_MaxElement) { /* the max element is out of bounds of one or more enabled arrays */ + _mesa_warning(ctx, "glDrawElements() index=%u is " + "out of bounds (max=%u)", max, ctx->Array.ArrayObj->_MaxElement); return GL_FALSE; } } @@ -184,6 +193,12 @@ _mesa_validate_DrawElements(GLcontext *ctx, return GL_TRUE; } + +/** + * Error checking for glDrawRangeElements(). Includes parameter checking + * and VBO bounds checking. + * \return GL_TRUE if OK to render, GL_FALSE if error found + */ GLboolean _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode, GLuint start, GLuint end, @@ -224,21 +239,8 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode, /* Vertex buffer object tests */ if (ctx->Array.ElementArrayBufferObj->Name) { /* use indices in the buffer object */ - GLuint indexBytes; - - if (type == GL_UNSIGNED_INT) { - indexBytes = count * sizeof(GLuint); - } - else if (type == GL_UNSIGNED_BYTE) { - indexBytes = count * sizeof(GLubyte); - } - else { - ASSERT(type == GL_UNSIGNED_SHORT); - indexBytes = count * sizeof(GLushort); - } - /* make sure count doesn't go outside buffer bounds */ - if (indexBytes > ctx->Array.ElementArrayBufferObj->Size) { + if (index_bytes(type, count) > ctx->Array.ElementArrayBufferObj->Size) { _mesa_warning(ctx, "glDrawRangeElements index out of buffer bounds"); return GL_FALSE; } @@ -252,7 +254,7 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode, if (ctx->Const.CheckArrayBounds) { GLuint max = max_buffer_index(ctx, count, type, indices, ctx->Array.ElementArrayBufferObj); - if (max >= ctx->Array._MaxElement) { + if (max >= ctx->Array.ArrayObj->_MaxElement) { /* the max element is out of bounds of one or more enabled arrays */ return GL_FALSE; } @@ -265,6 +267,7 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode, /** * Called from the tnl module to error check the function parameters and * verify that we really can draw something. + * \return GL_TRUE if OK to render, GL_FALSE if error found */ GLboolean _mesa_validate_DrawArrays(GLcontext *ctx, @@ -290,7 +293,7 @@ _mesa_validate_DrawArrays(GLcontext *ctx, return GL_FALSE; if (ctx->Const.CheckArrayBounds) { - if (start + count > (GLint) ctx->Array._MaxElement) + if (start + count > (GLint) ctx->Array.ArrayObj->_MaxElement) return GL_FALSE; } diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 0fa5f0de551..c03353b78f5 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -1,9 +1,10 @@ /* * Mesa 3-D graphics library - * Version: 7.2 + * Version: 7.6 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * (C) Copyright IBM Corporation 2006 + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -46,6 +47,7 @@ #include "bufferobj.h" #endif #include "arrayobj.h" +#include "macros.h" #include "glapi/dispatch.h" @@ -79,6 +81,7 @@ unbind_array_object_vbos(GLcontext *ctx, struct gl_array_object *obj) GLuint i; _mesa_reference_buffer_object(ctx, &obj->Vertex.BufferObj, NULL); + _mesa_reference_buffer_object(ctx, &obj->Weight.BufferObj, NULL); _mesa_reference_buffer_object(ctx, &obj->Normal.BufferObj, NULL); _mesa_reference_buffer_object(ctx, &obj->Color.BufferObj, NULL); _mesa_reference_buffer_object(ctx, &obj->SecondaryColor.BufferObj, NULL); @@ -86,10 +89,10 @@ unbind_array_object_vbos(GLcontext *ctx, struct gl_array_object *obj) _mesa_reference_buffer_object(ctx, &obj->Index.BufferObj, NULL); _mesa_reference_buffer_object(ctx, &obj->EdgeFlag.BufferObj, NULL); - for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) + for (i = 0; i < Elements(obj->TexCoord); i++) _mesa_reference_buffer_object(ctx, &obj->TexCoord[i].BufferObj, NULL); - for (i = 0; i < VERT_ATTRIB_MAX; i++) + for (i = 0; i < Elements(obj->VertexAttrib); i++) _mesa_reference_buffer_object(ctx, &obj->VertexAttrib[i].BufferObj,NULL); } @@ -198,7 +201,8 @@ init_array(GLcontext *ctx, array->Normalized = GL_FALSE; #if FEATURE_ARB_vertex_buffer_object /* Vertex array buffers */ - array->BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &array->BufferObj, + ctx->Shared->NullBufferObj); #endif } @@ -220,16 +224,17 @@ _mesa_initialize_array_object( GLcontext *ctx, /* Init the individual arrays */ init_array(ctx, &obj->Vertex, 4, GL_FLOAT); + init_array(ctx, &obj->Weight, 1, GL_FLOAT); init_array(ctx, &obj->Normal, 3, GL_FLOAT); init_array(ctx, &obj->Color, 4, GL_FLOAT); init_array(ctx, &obj->SecondaryColor, 4, GL_FLOAT); init_array(ctx, &obj->FogCoord, 1, GL_FLOAT); init_array(ctx, &obj->Index, 1, GL_FLOAT); - for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { + for (i = 0; i < Elements(obj->TexCoord); i++) { init_array(ctx, &obj->TexCoord[i], 4, GL_FLOAT); } init_array(ctx, &obj->EdgeFlag, 1, GL_BOOL); - for (i = 0; i < VERT_ATTRIB_MAX; i++) { + for (i = 0; i < Elements(obj->VertexAttrib); i++) { init_array(ctx, &obj->VertexAttrib[i], 4, GL_FLOAT); } @@ -242,8 +247,8 @@ _mesa_initialize_array_object( GLcontext *ctx, /** * Add the given array object to the array object pool. */ -void -_mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj ) +static void +save_array_object( GLcontext *ctx, struct gl_array_object *obj ) { if (obj->Name > 0) { /* insert into hash table */ @@ -256,8 +261,8 @@ _mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj ) * Remove the given array object from the array object pool. * Do not deallocate the array object though. */ -void -_mesa_remove_array_object( GLcontext *ctx, struct gl_array_object *obj ) +static void +remove_array_object( GLcontext *ctx, struct gl_array_object *obj ) { if (obj->Name > 0) { /* remove from hash table */ @@ -266,6 +271,83 @@ _mesa_remove_array_object( GLcontext *ctx, struct gl_array_object *obj ) } + +/** + * Compute the index of the last array element that can be safely accessed + * in a vertex array. We can really only do this when the array lives in + * a VBO. + * The array->_MaxElement field will be updated. + * Later in glDrawArrays/Elements/etc we can do some bounds checking. + */ +static void +compute_max_element(struct gl_client_array *array) +{ + if (array->BufferObj->Name) { + /* Compute the max element we can access in the VBO without going + * out of bounds. + */ + array->_MaxElement = ((GLsizeiptrARB) array->BufferObj->Size + - (GLsizeiptrARB) array->Ptr + array->StrideB + - array->_ElementSize) / array->StrideB; + if (0) + _mesa_printf("%s Object %u Size %u MaxElement %u\n", + __FUNCTION__, + array->BufferObj->Name, + (GLuint) array->BufferObj->Size, + array->_MaxElement); + } + else { + /* user-space array, no idea how big it is */ + array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ + } +} + + +/** + * Helper for update_arrays(). + * \return min(current min, array->_MaxElement). + */ +static GLuint +update_min(GLuint min, struct gl_client_array *array) +{ + compute_max_element(array); + if (array->Enabled) + return MIN2(min, array->_MaxElement); + else + return min; +} + + +/** + * Examine vertex arrays to update the gl_array_object::_MaxElement field. + */ +void +_mesa_update_array_object_max_element(GLcontext *ctx, + struct gl_array_object *arrayObj) +{ + GLuint i, min = ~0; + + min = update_min(min, &arrayObj->Vertex); + min = update_min(min, &arrayObj->Weight); + min = update_min(min, &arrayObj->Normal); + min = update_min(min, &arrayObj->Color); + min = update_min(min, &arrayObj->SecondaryColor); + min = update_min(min, &arrayObj->FogCoord); + min = update_min(min, &arrayObj->Index); + min = update_min(min, &arrayObj->EdgeFlag); +#if FEATURE_point_size_array + min = update_min(min, &arrayObj->PointSize); +#endif + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) + min = update_min(min, &arrayObj->TexCoord[i]); + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) + min = update_min(min, &arrayObj->VertexAttrib[i]); + + /* _MaxElement is one past the last legal array element */ + arrayObj->_MaxElement = min; +} + + /**********************************************************************/ /* API Functions */ /**********************************************************************/ @@ -311,7 +393,7 @@ _mesa_BindVertexArrayAPPLE( GLuint id ) _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindVertexArrayAPPLE"); return; } - _mesa_save_array_object(ctx, newObj); + save_array_object(ctx, newObj); } } @@ -360,7 +442,7 @@ _mesa_DeleteVertexArraysAPPLE(GLsizei n, const GLuint *ids) } /* The ID is immediately freed for re-use */ - _mesa_remove_array_object(ctx, obj); + remove_array_object(ctx, obj); /* Unreference the array object. * If refcount hits zero, the object will be deleted. @@ -414,7 +496,7 @@ _mesa_GenVertexArraysAPPLE(GLsizei n, GLuint *arrays) _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenVertexArraysAPPLE"); return; } - _mesa_save_array_object(ctx, obj); + save_array_object(ctx, obj); arrays[i] = first + i; } diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h index 9c4036af5a7..abca5ab9b4c 100644 --- a/src/mesa/main/arrayobj.h +++ b/src/mesa/main/arrayobj.h @@ -1,9 +1,10 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.6 * * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. * (C) Copyright IBM Corporation 2006 + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,12 +57,10 @@ extern void _mesa_initialize_array_object( GLcontext *ctx, struct gl_array_object *obj, GLuint name ); -extern void -_mesa_save_array_object( GLcontext *ctx, struct gl_array_object *obj ); extern void -_mesa_remove_array_object( GLcontext *ctx, struct gl_array_object *obj ); - +_mesa_update_array_object_max_element(GLcontext *ctx, + struct gl_array_object *arrayObj); /* diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index e43fa96dd31..476a24434cc 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 7.3 + * Version: 7.6 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * Copyright (C) 2009 VMware, Inc. All Rights Reserved. @@ -541,6 +541,7 @@ end: static void pop_enable_group(GLcontext *ctx, const struct gl_enable_attrib *enable) { + const GLuint curTexUnitSave = ctx->Texture.CurrentUnit; GLuint i; #define TEST_AND_UPDATE(VALUE, NEWVALUE, ENUM) \ @@ -685,59 +686,51 @@ pop_enable_group(GLcontext *ctx, const struct gl_enable_attrib *enable) /* texture unit enables */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i].Enabled != enable->Texture[i]) { - ctx->Texture.Unit[i].Enabled = enable->Texture[i]; - if (ctx->Driver.Enable) { - if (ctx->Driver.ActiveTexture) { - (*ctx->Driver.ActiveTexture)(ctx, i); - } - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_1D, - (GLboolean) (enable->Texture[i] & TEXTURE_1D_BIT) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_2D, - (GLboolean) (enable->Texture[i] & TEXTURE_2D_BIT) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_3D, - (GLboolean) (enable->Texture[i] & TEXTURE_3D_BIT) ); - if (ctx->Extensions.ARB_texture_cube_map) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_CUBE_MAP_ARB, - (GLboolean) (enable->Texture[i] & TEXTURE_CUBE_BIT) ); - if (ctx->Extensions.NV_texture_rectangle) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_RECTANGLE_NV, - (GLboolean) (enable->Texture[i] & TEXTURE_RECT_BIT) ); + const GLbitfield enabled = enable->Texture[i]; + const GLbitfield genEnabled = enable->TexGen[i]; + + if (ctx->Texture.Unit[i].Enabled != enabled) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + i); + + _mesa_set_enable(ctx, GL_TEXTURE_1D, + (enabled & TEXTURE_1D_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_2D, + (enabled & TEXTURE_2D_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_3D, + (enabled & TEXTURE_3D_BIT) ? GL_TRUE : GL_FALSE); + if (ctx->Extensions.NV_texture_rectangle) { + _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE_ARB, + (enabled & TEXTURE_RECT_BIT) ? GL_TRUE : GL_FALSE); + } + if (ctx->Extensions.ARB_texture_cube_map) { + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, + (enabled & TEXTURE_CUBE_BIT) ? GL_TRUE : GL_FALSE); + } + if (ctx->Extensions.MESA_texture_array) { + _mesa_set_enable(ctx, GL_TEXTURE_1D_ARRAY_EXT, + (enabled & TEXTURE_1D_ARRAY_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_2D_ARRAY_EXT, + (enabled & TEXTURE_2D_ARRAY_BIT) ? GL_TRUE : GL_FALSE); } } - if (ctx->Texture.Unit[i].TexGenEnabled != enable->TexGen[i]) { - ctx->Texture.Unit[i].TexGenEnabled = enable->TexGen[i]; - if (ctx->Driver.Enable) { - if (ctx->Driver.ActiveTexture) { - (*ctx->Driver.ActiveTexture)(ctx, i); - } - if (enable->TexGen[i] & S_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_FALSE); - if (enable->TexGen[i] & T_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_FALSE); - if (enable->TexGen[i] & R_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_FALSE); - if (enable->TexGen[i] & Q_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_FALSE); - } + if (ctx->Texture.Unit[i].TexGenEnabled != genEnabled) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + i); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, + (genEnabled & S_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, + (genEnabled & T_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, + (genEnabled & R_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, + (genEnabled & Q_BIT) ? GL_TRUE : GL_FALSE); } /* GL_SGI_texture_color_table */ ctx->Texture.Unit[i].ColorTableEnabled = enable->TextureColorTable[i]; } - if (ctx->Driver.ActiveTexture) { - (*ctx->Driver.ActiveTexture)(ctx, ctx->Texture.CurrentUnit); - } + _mesa_ActiveTextureARB(GL_TEXTURE0 + curTexUnitSave); } @@ -770,6 +763,13 @@ pop_texture_group(GLcontext *ctx, struct texture_state *texstate) _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE_NV, (unit->Enabled & TEXTURE_RECT_BIT) ? GL_TRUE : GL_FALSE); } + if (ctx->Extensions.MESA_texture_array) { + _mesa_set_enable(ctx, GL_TEXTURE_1D_ARRAY_EXT, + (unit->Enabled & TEXTURE_1D_ARRAY_BIT) ? GL_TRUE : GL_FALSE); + _mesa_set_enable(ctx, GL_TEXTURE_2D_ARRAY_EXT, + (unit->Enabled & TEXTURE_2D_ARRAY_BIT) ? GL_TRUE : GL_FALSE); + } + if (ctx->Extensions.SGI_texture_color_table) { _mesa_set_enable(ctx, GL_TEXTURE_COLOR_TABLE_SGI, unit->ColorTableEnabled); @@ -1330,20 +1330,22 @@ _mesa_PopAttrib(void) * counts when pushing/popping the GL_CLIENT_VERTEX_ARRAY_BIT attribute group. */ static void -adjust_buffer_object_ref_counts(struct gl_array_attrib *array, GLint step) +adjust_buffer_object_ref_counts(struct gl_array_object *arrayObj, GLint step) { GLuint i; - array->ArrayObj->Vertex.BufferObj->RefCount += step; - array->ArrayObj->Normal.BufferObj->RefCount += step; - array->ArrayObj->Color.BufferObj->RefCount += step; - array->ArrayObj->SecondaryColor.BufferObj->RefCount += step; - array->ArrayObj->FogCoord.BufferObj->RefCount += step; - array->ArrayObj->Index.BufferObj->RefCount += step; - array->ArrayObj->EdgeFlag.BufferObj->RefCount += step; - for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) - array->ArrayObj->TexCoord[i].BufferObj->RefCount += step; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - array->ArrayObj->VertexAttrib[i].BufferObj->RefCount += step; + + arrayObj->Vertex.BufferObj->RefCount += step; + arrayObj->Weight.BufferObj->RefCount += step; + arrayObj->Normal.BufferObj->RefCount += step; + arrayObj->Color.BufferObj->RefCount += step; + arrayObj->SecondaryColor.BufferObj->RefCount += step; + arrayObj->FogCoord.BufferObj->RefCount += step; + arrayObj->Index.BufferObj->RefCount += step; + arrayObj->EdgeFlag.BufferObj->RefCount += step; + for (i = 0; i < Elements(arrayObj->TexCoord); i++) + arrayObj->TexCoord[i].BufferObj->RefCount += step; + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) + arrayObj->VertexAttrib[i].BufferObj->RefCount += step; } @@ -1434,7 +1436,7 @@ _mesa_PushClientAttrib(GLbitfield mask) newnode->next = head; head = newnode; /* bump reference counts on buffer objects */ - adjust_buffer_object_ref_counts(&ctx->Array, 1); + adjust_buffer_object_ref_counts(ctx->Array.ArrayObj, 1); } ctx->ClientAttribStack[ctx->ClientAttribStackDepth] = head; @@ -1484,7 +1486,7 @@ _mesa_PopClientAttrib(void) struct gl_array_attrib * data = (struct gl_array_attrib *) node->data; - adjust_buffer_object_ref_counts(&ctx->Array, -1); + adjust_buffer_object_ref_counts(ctx->Array.ArrayObj, -1); ctx->Array.ActiveTexture = data->ActiveTexture; if (data->LockCount != 0) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index c8d160baa9a..90daa2e5b89 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -194,7 +194,7 @@ _mesa_reference_buffer_object(GLcontext *ctx, return; if (*ptr) { - /* Unreference the old texture */ + /* Unreference the old buffer */ GLboolean deleteFlag = GL_FALSE; struct gl_buffer_object *oldObj = *ptr; @@ -227,7 +227,7 @@ _mesa_reference_buffer_object(GLcontext *ctx, ASSERT(!*ptr); if (bufObj) { - /* reference new texture */ + /* reference new buffer */ /*_glthread_LOCK_MUTEX(tex->Mutex);*/ if (bufObj->RefCount == 0) { /* this buffer's being deleted (look just above) */ @@ -389,7 +389,6 @@ _mesa_buffer_map( GLcontext *ctx, GLenum target, GLenum access, (void) ctx; (void) target; (void) access; - ASSERT(!bufObj->OnCard); /* Just return a direct pointer to the data */ if (bufObj->Pointer) { /* already mapped! */ @@ -413,7 +412,6 @@ _mesa_buffer_unmap( GLcontext *ctx, GLenum target, { (void) ctx; (void) target; - ASSERT(!bufObj->OnCard); /* XXX we might assert here that bufObj->Pointer is non-null */ bufObj->Pointer = NULL; return GL_TRUE; @@ -426,16 +424,8 @@ _mesa_buffer_unmap( GLcontext *ctx, GLenum target, void _mesa_init_buffer_objects( GLcontext *ctx ) { - /* Allocate the default buffer object and set refcount so high that - * it never gets deleted. - * XXX with recent/improved refcounting this may not longer be needed. - */ - ctx->Array.NullBufferObj = _mesa_new_buffer_object(ctx, 0, 0); - if (ctx->Array.NullBufferObj) - ctx->Array.NullBufferObj->RefCount = 1000; - - ctx->Array.ArrayBufferObj = ctx->Array.NullBufferObj; - ctx->Array.ElementArrayBufferObj = ctx->Array.NullBufferObj; + ctx->Array.ArrayBufferObj = ctx->Shared->NullBufferObj; + ctx->Array.ElementArrayBufferObj = ctx->Shared->NullBufferObj; } @@ -479,7 +469,7 @@ bind_buffer_object(GLcontext *ctx, GLenum target, GLuint buffer) /* The spec says there's not a buffer object named 0, but we use * one internally because it simplifies things. */ - newBufObj = ctx->Array.NullBufferObj; + newBufObj = ctx->Shared->NullBufferObj; } else { /* non-default buffer object */ @@ -500,7 +490,7 @@ bind_buffer_object(GLcontext *ctx, GLenum target, GLuint buffer) _mesa_reference_buffer_object(ctx, bindTarget, newBufObj); /* Pass BindBuffer call to device driver */ - if (ctx->Driver.BindBuffer && newBufObj) + if (ctx->Driver.BindBuffer) ctx->Driver.BindBuffer( ctx, target, newBufObj ); } @@ -746,7 +736,7 @@ unbind(GLcontext *ctx, struct gl_buffer_object *obj) { if (*ptr == obj) { - _mesa_reference_buffer_object(ctx, ptr, ctx->Array.NullBufferObj); + _mesa_reference_buffer_object(ctx, ptr, ctx->Shared->NullBufferObj); } } @@ -789,6 +779,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids) for (i = 0; i < n; i++) { struct gl_buffer_object *bufObj = _mesa_lookup_bufferobj(ctx, ids[i]); if (bufObj) { + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; GLuint j; ASSERT(bufObj->Name == ids[i]); @@ -801,18 +792,19 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids) } /* unbind any vertex pointers bound to this buffer */ - unbind(ctx, &ctx->Array.ArrayObj->Vertex.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->Normal.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->Color.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->SecondaryColor.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->FogCoord.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->Index.BufferObj, bufObj); - unbind(ctx, &ctx->Array.ArrayObj->EdgeFlag.BufferObj, bufObj); - for (j = 0; j < MAX_TEXTURE_COORD_UNITS; j++) { - unbind(ctx, &ctx->Array.ArrayObj->TexCoord[j].BufferObj, bufObj); + unbind(ctx, &arrayObj->Vertex.BufferObj, bufObj); + unbind(ctx, &arrayObj->Weight.BufferObj, bufObj); + unbind(ctx, &arrayObj->Normal.BufferObj, bufObj); + unbind(ctx, &arrayObj->Color.BufferObj, bufObj); + unbind(ctx, &arrayObj->SecondaryColor.BufferObj, bufObj); + unbind(ctx, &arrayObj->FogCoord.BufferObj, bufObj); + unbind(ctx, &arrayObj->Index.BufferObj, bufObj); + unbind(ctx, &arrayObj->EdgeFlag.BufferObj, bufObj); + for (j = 0; j < Elements(arrayObj->TexCoord); j++) { + unbind(ctx, &arrayObj->TexCoord[j].BufferObj, bufObj); } - for (j = 0; j < VERT_ATTRIB_MAX; j++) { - unbind(ctx, &ctx->Array.ArrayObj->VertexAttrib[j].BufferObj, bufObj); + for (j = 0; j < Elements(arrayObj->VertexAttrib); j++) { + unbind(ctx, &arrayObj->VertexAttrib[j].BufferObj, bufObj); } if (ctx->Array.ArrayBufferObj == bufObj) { @@ -947,7 +939,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size, return; } if (bufObj->Name == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferDataARB" ); + _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferDataARB(buffer 0)" ); return; } @@ -958,8 +950,12 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size, bufObj->Pointer = NULL; } + FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT); + ASSERT(ctx->Driver.BufferData); + bufObj->Written = GL_TRUE; + /* Give the buffer object to the driver! <data> may be null! */ ctx->Driver.BufferData( ctx, target, size, data, usage, bufObj ); } @@ -980,6 +976,8 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset, return; } + bufObj->Written = GL_TRUE; + ASSERT(ctx->Driver.BufferSubData); ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj ); } @@ -1029,7 +1027,7 @@ _mesa_MapBufferARB(GLenum target, GLenum access) return NULL; } if (bufObj->Name == 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glMapBufferARB" ); + _mesa_error(ctx, GL_INVALID_OPERATION, "glMapBufferARB(buffer 0)" ); return NULL; } if (bufObj->Pointer) { @@ -1044,6 +1042,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access) } bufObj->Access = access; + if (access == GL_WRITE_ONLY_ARB || access == GL_READ_WRITE_ARB) + bufObj->Written = GL_TRUE; return bufObj->Pointer; } diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index c5f13345f04..d8b5f3b1f4a 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -443,7 +443,7 @@ _mesa_readbuffer(GLcontext *ctx, GLenum buffer, GLint bufferIndex) fb->ColorReadBuffer = buffer; fb->_ColorReadBufferIndex = bufferIndex; - ctx->NewState |= _NEW_PIXEL; + ctx->NewState |= _NEW_BUFFERS; } diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 2a9fdf9ca05..5a05a65396a 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -168,7 +168,6 @@ /** For GL_ARB_vertex_program */ /*@{*/ #define MAX_VERTEX_PROGRAM_ADDRESS_REGS 1 -#define MAX_VERTEX_PROGRAM_ATTRIBS 16 /*@}*/ /** For GL_ARB_fragment_program */ @@ -187,7 +186,7 @@ #define MAX_PROGRAM_TEMPS 256 #define MAX_PROGRAM_ADDRESS_REGS 2 #define MAX_UNIFORMS 1024 /**< number of vec4 uniforms */ -#define MAX_VARYING 8 /**< number of float[4] vectors */ +#define MAX_VARYING 16 /**< number of float[4] vectors */ #define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS #define MAX_PROGRAM_INPUTS 32 #define MAX_PROGRAM_OUTPUTS 32 @@ -215,7 +214,7 @@ /** For GL_ARB_vertex_shader */ /*@{*/ -#define MAX_VERTEX_ATTRIBS 16 +#define MAX_VERTEX_GENERIC_ATTRIBS 16 #define MAX_VERTEX_TEXTURE_IMAGE_UNITS MAX_TEXTURE_IMAGE_UNITS #define MAX_COMBINED_TEXTURE_IMAGE_UNITS MAX_TEXTURE_IMAGE_UNITS /*@}*/ diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index ec0dc12a3eb..ff9dd488c74 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -455,7 +455,7 @@ _mesa_init_current(GLcontext *ctx) GLuint i; /* Init all to (0,0,0,1) */ - for (i = 0; i < VERT_ATTRIB_MAX; i++) { + for (i = 0; i < Elements(ctx->Current.Attrib); i++) { ASSIGN_4V( ctx->Current.Attrib[i], 0.0, 0.0, 0.0, 1.0 ); } @@ -602,6 +602,10 @@ _mesa_init_constants(GLcontext *ctx) ASSERT(MAX_NV_VERTEX_PROGRAM_TEMPS <= MAX_PROGRAM_TEMPS); ASSERT(MAX_NV_VERTEX_PROGRAM_INPUTS <= VERT_ATTRIB_MAX); ASSERT(MAX_NV_VERTEX_PROGRAM_OUTPUTS <= VERT_RESULT_MAX); + + /* check that we don't exceed various 32-bit bitfields */ + ASSERT(VERT_RESULT_MAX <= 32); + ASSERT(FRAG_ATTRIB_MAX <= 32); } @@ -1007,10 +1011,6 @@ _mesa_free_context_data( GLcontext *ctx ) _mesa_delete_array_object(ctx, ctx->Array.DefaultArrayObj); -#if FEATURE_ARB_vertex_buffer_object - _mesa_delete_buffer_object(ctx, ctx->Array.NullBufferObj); -#endif - /* free dispatch tables */ _mesa_free(ctx->Exec); _mesa_free(ctx->Save); diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index 2eabcdaf493..80bc6afc4c2 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -341,3 +341,104 @@ _mesa_dump_textures(GLboolean dumpImages) DumpImages = dumpImages; _mesa_HashWalk(ctx->Shared->TexObjects, dump_texture_cb, ctx); } + + +void +_mesa_dump_color_buffer(const char *filename) +{ + GET_CURRENT_CONTEXT(ctx); + const GLuint w = ctx->DrawBuffer->Width; + const GLuint h = ctx->DrawBuffer->Height; + GLubyte *buf; + + buf = (GLubyte *) _mesa_malloc(w * h * 4); + + glPushClientAttrib(GL_CLIENT_PIXEL_STORE_BIT); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_INVERT_MESA, GL_TRUE); + + glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, buf); + + _mesa_printf("ReadBuffer %p 0x%x DrawBuffer %p 0x%x\n", + ctx->ReadBuffer->_ColorReadBuffer, + ctx->ReadBuffer->ColorReadBuffer, + ctx->DrawBuffer->_ColorDrawBuffers[0], + ctx->DrawBuffer->ColorDrawBuffer[0]); + _mesa_printf("Writing %d x %d color buffer to %s\n", w, h, filename); + write_ppm(filename, buf, w, h, 4, 0, 1, 2); + + glPopClientAttrib(); + + _mesa_free(buf); +} + + +void +_mesa_dump_depth_buffer(const char *filename) +{ + GET_CURRENT_CONTEXT(ctx); + const GLuint w = ctx->DrawBuffer->Width; + const GLuint h = ctx->DrawBuffer->Height; + GLuint *buf; + GLubyte *buf2; + GLuint i; + + buf = (GLuint *) _mesa_malloc(w * h * 4); /* 4 bpp */ + buf2 = (GLubyte *) _mesa_malloc(w * h * 3); /* 3 bpp */ + + glPushClientAttrib(GL_CLIENT_PIXEL_STORE_BIT); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_INVERT_MESA, GL_TRUE); + + glReadPixels(0, 0, w, h, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, buf); + + /* spread 24 bits of Z across R, G, B */ + for (i = 0; i < w * h; i++) { + buf2[i*3+0] = (buf[i] >> 24) & 0xff; + buf2[i*3+1] = (buf[i] >> 16) & 0xff; + buf2[i*3+2] = (buf[i] >> 8) & 0xff; + } + + _mesa_printf("Writing %d x %d depth buffer to %s\n", w, h, filename); + write_ppm(filename, buf2, w, h, 3, 0, 1, 2); + + glPopClientAttrib(); + + _mesa_free(buf); + _mesa_free(buf2); +} + + +void +_mesa_dump_stencil_buffer(const char *filename) +{ + GET_CURRENT_CONTEXT(ctx); + const GLuint w = ctx->DrawBuffer->Width; + const GLuint h = ctx->DrawBuffer->Height; + GLubyte *buf; + GLubyte *buf2; + GLuint i; + + buf = (GLubyte *) _mesa_malloc(w * h); /* 1 bpp */ + buf2 = (GLubyte *) _mesa_malloc(w * h * 3); /* 3 bpp */ + + glPushClientAttrib(GL_CLIENT_PIXEL_STORE_BIT); + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_INVERT_MESA, GL_TRUE); + + glReadPixels(0, 0, w, h, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, buf); + + for (i = 0; i < w * h; i++) { + buf2[i*3+0] = buf[i]; + buf2[i*3+1] = (buf[i] & 127) * 2; + buf2[i*3+2] = (buf[i] - 128) * 2; + } + + _mesa_printf("Writing %d x %d stencil buffer to %s\n", w, h, filename); + write_ppm(filename, buf2, w, h, 3, 0, 1, 2); + + glPopClientAttrib(); + + _mesa_free(buf); + _mesa_free(buf2); +} diff --git a/src/mesa/main/debug.h b/src/mesa/main/debug.h index 1862ec75b7b..bb384c43242 100644 --- a/src/mesa/main/debug.h +++ b/src/mesa/main/debug.h @@ -60,4 +60,13 @@ extern void _mesa_init_debug( GLcontext *ctx ); extern void _mesa_dump_textures(GLboolean dumpImages); +extern void +_mesa_dump_color_buffer(const char *filename); + +extern void +_mesa_dump_depth_buffer(const char *filename); + +extern void +_mesa_dump_stencil_buffer(const char *filename); + #endif diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 537ff5881f3..dd73a1906b8 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -4883,7 +4883,7 @@ save_Attr1fNV(GLenum attr, GLfloat x) n[2].f = x; } - ASSERT(attr < MAX_VERTEX_PROGRAM_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 1; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, 0, 0, 1); @@ -4905,7 +4905,7 @@ save_Attr2fNV(GLenum attr, GLfloat x, GLfloat y) n[3].f = y; } - ASSERT(attr < MAX_VERTEX_PROGRAM_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 2; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, 0, 1); @@ -4928,7 +4928,7 @@ save_Attr3fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z) n[4].f = z; } - ASSERT(attr < MAX_VERTEX_PROGRAM_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 3; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, z, 1); @@ -4952,7 +4952,7 @@ save_Attr4fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z, GLfloat w) n[5].f = w; } - ASSERT(attr < MAX_VERTEX_PROGRAM_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 4; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, z, w); @@ -4974,7 +4974,7 @@ save_Attr1fARB(GLenum attr, GLfloat x) n[2].f = x; } - ASSERT(attr < MAX_VERTEX_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 1; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, 0, 0, 1); @@ -4996,7 +4996,7 @@ save_Attr2fARB(GLenum attr, GLfloat x, GLfloat y) n[3].f = y; } - ASSERT(attr < MAX_VERTEX_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 2; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, 0, 1); @@ -5019,7 +5019,7 @@ save_Attr3fARB(GLenum attr, GLfloat x, GLfloat y, GLfloat z) n[4].f = z; } - ASSERT(attr < MAX_VERTEX_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 3; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, z, 1); @@ -5043,7 +5043,7 @@ save_Attr4fARB(GLenum attr, GLfloat x, GLfloat y, GLfloat z, GLfloat w) n[5].f = w; } - ASSERT(attr < MAX_VERTEX_ATTRIBS); + ASSERT(attr < MAX_VERTEX_GENERIC_ATTRIBS); ctx->ListState.ActiveAttribSize[attr] = 4; ASSIGN_4V(ctx->ListState.CurrentAttrib[attr], x, y, z, w); @@ -5506,7 +5506,7 @@ index_error(void) static void GLAPIENTRY save_VertexAttrib1fNV(GLuint index, GLfloat x) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr1fNV(index, x); else index_error(); @@ -5515,7 +5515,7 @@ save_VertexAttrib1fNV(GLuint index, GLfloat x) static void GLAPIENTRY save_VertexAttrib1fvNV(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr1fNV(index, v[0]); else index_error(); @@ -5524,7 +5524,7 @@ save_VertexAttrib1fvNV(GLuint index, const GLfloat * v) static void GLAPIENTRY save_VertexAttrib2fNV(GLuint index, GLfloat x, GLfloat y) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr2fNV(index, x, y); else index_error(); @@ -5533,7 +5533,7 @@ save_VertexAttrib2fNV(GLuint index, GLfloat x, GLfloat y) static void GLAPIENTRY save_VertexAttrib2fvNV(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr2fNV(index, v[0], v[1]); else index_error(); @@ -5542,7 +5542,7 @@ save_VertexAttrib2fvNV(GLuint index, const GLfloat * v) static void GLAPIENTRY save_VertexAttrib3fNV(GLuint index, GLfloat x, GLfloat y, GLfloat z) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr3fNV(index, x, y, z); else index_error(); @@ -5551,7 +5551,7 @@ save_VertexAttrib3fNV(GLuint index, GLfloat x, GLfloat y, GLfloat z) static void GLAPIENTRY save_VertexAttrib3fvNV(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr3fNV(index, v[0], v[1], v[2]); else index_error(); @@ -5561,7 +5561,7 @@ static void GLAPIENTRY save_VertexAttrib4fNV(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr4fNV(index, x, y, z, w); else index_error(); @@ -5570,7 +5570,7 @@ save_VertexAttrib4fNV(GLuint index, GLfloat x, GLfloat y, static void GLAPIENTRY save_VertexAttrib4fvNV(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_PROGRAM_ATTRIBS) + if (index < MAX_NV_VERTEX_PROGRAM_INPUTS) save_Attr4fNV(index, v[0], v[1], v[2], v[3]); else index_error(); @@ -5582,7 +5582,7 @@ save_VertexAttrib4fvNV(GLuint index, const GLfloat * v) static void GLAPIENTRY save_VertexAttrib1fARB(GLuint index, GLfloat x) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr1fARB(index, x); else index_error(); @@ -5591,7 +5591,7 @@ save_VertexAttrib1fARB(GLuint index, GLfloat x) static void GLAPIENTRY save_VertexAttrib1fvARB(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr1fARB(index, v[0]); else index_error(); @@ -5600,7 +5600,7 @@ save_VertexAttrib1fvARB(GLuint index, const GLfloat * v) static void GLAPIENTRY save_VertexAttrib2fARB(GLuint index, GLfloat x, GLfloat y) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr2fARB(index, x, y); else index_error(); @@ -5609,7 +5609,7 @@ save_VertexAttrib2fARB(GLuint index, GLfloat x, GLfloat y) static void GLAPIENTRY save_VertexAttrib2fvARB(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr2fARB(index, v[0], v[1]); else index_error(); @@ -5618,7 +5618,7 @@ save_VertexAttrib2fvARB(GLuint index, const GLfloat * v) static void GLAPIENTRY save_VertexAttrib3fARB(GLuint index, GLfloat x, GLfloat y, GLfloat z) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr3fARB(index, x, y, z); else index_error(); @@ -5627,7 +5627,7 @@ save_VertexAttrib3fARB(GLuint index, GLfloat x, GLfloat y, GLfloat z) static void GLAPIENTRY save_VertexAttrib3fvARB(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr3fARB(index, v[0], v[1], v[2]); else index_error(); @@ -5637,7 +5637,7 @@ static void GLAPIENTRY save_VertexAttrib4fARB(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr4fARB(index, x, y, z, w); else index_error(); @@ -5646,7 +5646,7 @@ save_VertexAttrib4fARB(GLuint index, GLfloat x, GLfloat y, GLfloat z, static void GLAPIENTRY save_VertexAttrib4fvARB(GLuint index, const GLfloat * v) { - if (index < MAX_VERTEX_ATTRIBS) + if (index < MAX_VERTEX_GENERIC_ATTRIBS) save_Attr4fARB(index, v[0], v[1], v[2], v[3]); else index_error(); @@ -6806,10 +6806,10 @@ _mesa_NewList(GLuint name, GLenum mode) /* Reset acumulated list state: */ - for (i = 0; i < VERT_ATTRIB_MAX; i++) + for (i = 0; i < Elements(ctx->ListState.ActiveAttribSize); i++) ctx->ListState.ActiveAttribSize[i] = 0; - for (i = 0; i < MAT_ATTRIB_MAX; i++) + for (i = 0; i < Elements(ctx->ListState.ActiveMaterialSize); i++) ctx->ListState.ActiveMaterialSize[i] = 0; ctx->Driver.CurrentSavePrimitive = PRIM_UNKNOWN; diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index f432be183cb..48268fcd277 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -119,6 +119,7 @@ client_state(GLcontext *ctx, GLenum cap, GLboolean state) CHECK_EXTENSION(NV_vertex_program, cap); { GLint n = (GLint) cap - GL_VERTEX_ATTRIB_ARRAY0_NV; + ASSERT(n < Elements(ctx->Array.ArrayObj->VertexAttrib)); var = &ctx->Array.ArrayObj->VertexAttrib[n].Enabled; flag = _NEW_ARRAY_ATTRIB(n); } @@ -222,14 +223,16 @@ get_texcoord_unit(GLcontext *ctx) /** * Helper function to enable or disable a texture target. + * \param bit one of the TEXTURE_x_BIT values + * \return GL_TRUE if state is changing or GL_FALSE if no change */ static GLboolean -enable_texture(GLcontext *ctx, GLboolean state, GLbitfield bit) +enable_texture(GLcontext *ctx, GLboolean state, GLbitfield texBit) { const GLuint curr = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr]; - const GLuint newenabled = (!state) - ? (texUnit->Enabled & ~bit) : (texUnit->Enabled | bit); + const GLbitfield newenabled = state + ? (texUnit->Enabled | texBit) : (texUnit->Enabled & ~texBit); if (!ctx->DrawBuffer->Visual.rgbMode || texUnit->Enabled == newenabled) return GL_FALSE; @@ -1314,6 +1317,7 @@ _mesa_IsEnabled( GLenum cap ) CHECK_EXTENSION(NV_vertex_program); { GLint n = (GLint) cap - GL_VERTEX_ATTRIB_ARRAY0_NV; + ASSERT(n < Elements(ctx->Array.ArrayObj->VertexAttrib)); return (ctx->Array.ArrayObj->VertexAttrib[n].Enabled != 0); } case GL_MAP1_VERTEX_ATTRIB0_4_NV: diff --git a/src/mesa/main/enums.c b/src/mesa/main/enums.c index c077bc0a89a..cf893fdac51 100644 --- a/src/mesa/main/enums.c +++ b/src/mesa/main/enums.c @@ -948,8 +948,8 @@ LONGSTRING static const char enum_string_table[] = "GL_MAX_TRACK_MATRIX_STACK_DEPTH_NV\0" "GL_MAX_VARYING_FLOATS\0" "GL_MAX_VARYING_FLOATS_ARB\0" - "GL_MAX_VERTEX_ATTRIBS\0" - "GL_MAX_VERTEX_ATTRIBS_ARB\0" + "GL_MAX_VERTEX_GENERIC_ATTRIBS\0" + "GL_MAX_VERTEX_GENERIC_ATTRIBS_ARB\0" "GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS\0" "GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB\0" "GL_MAX_VERTEX_UNIFORM_COMPONENTS\0" @@ -2772,8 +2772,8 @@ static const enum_elt all_enums[1820] = { 19417, 0x0000862E }, /* GL_MAX_TRACK_MATRIX_STACK_DEPTH_NV */ { 19452, 0x00008B4B }, /* GL_MAX_VARYING_FLOATS */ { 19474, 0x00008B4B }, /* GL_MAX_VARYING_FLOATS_ARB */ - { 19500, 0x00008869 }, /* GL_MAX_VERTEX_ATTRIBS */ - { 19522, 0x00008869 }, /* GL_MAX_VERTEX_ATTRIBS_ARB */ + { 19500, 0x00008869 }, /* GL_MAX_VERTEX_GENERIC_ATTRIBS */ + { 19522, 0x00008869 }, /* GL_MAX_VERTEX_GENERIC_ATTRIBS_ARB */ { 19548, 0x00008B4C }, /* GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS */ { 19582, 0x00008B4C }, /* GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB */ { 19620, 0x00008B4A }, /* GL_MAX_VERTEX_UNIFORM_COMPONENTS */ @@ -4733,7 +4733,7 @@ static const unsigned reduced_enums[1319] = 317, /* GL_CURRENT_QUERY */ 1259, /* GL_QUERY_RESULT */ 1261, /* GL_QUERY_RESULT_AVAILABLE */ - 912, /* GL_MAX_VERTEX_ATTRIBS */ + 912, /* GL_MAX_VERTEX_GENERIC_ATTRIBS */ 1778, /* GL_VERTEX_ATTRIB_ARRAY_NORMALIZED */ 368, /* GL_DEPTH_STENCIL_TO_RGBA_NV */ 367, /* GL_DEPTH_STENCIL_TO_BGRA_NV */ diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index e8e8c2bf30e..83301f1e621 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -368,6 +368,11 @@ test_attachment_completeness(const GLcontext *ctx, GLenum format, att->Complete = GL_FALSE; return; } + if (texImage->TexFormat->TexelBytes == 0) { + att_incomplete("compressed internalformat"); + att->Complete = GL_FALSE; + return; + } } else if (format == GL_DEPTH) { if (texImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 332febf91f9..c06031e6cb2 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -182,6 +182,8 @@ _mesa_sizeof_type( GLenum type ) return sizeof(GLint); case GL_FLOAT: return sizeof(GLfloat); + case GL_DOUBLE: + return sizeof(GLdouble); case GL_HALF_FLOAT_ARB: return sizeof(GLhalfARB); default: diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 7a719745fcc..872982fa430 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1481,9 +1481,12 @@ next_mipmap_level_size(GLenum target, GLint border, /** - * For GL_SGIX_generate_mipmap: - * Generate a complete set of mipmaps from texObj's base-level image. + * Automatic mipmap generation. + * This is the fallback/default function for ctx->Driver.GenerateMipmap(). + * Generate a complete set of mipmaps from texObj's BaseLevel image. * Stop at texObj's MaxLevel or when we get to the 1x1 texture. + * For cube maps, target will be one of + * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z; never GL_TEXTURE_CUBE_MAP. */ void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 50dc2def87f..e3471123c65 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1028,7 +1028,7 @@ struct gl_stencil_attrib /** * An index for each type of texture object. These correspond to the GL - * target target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc. + * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc. * Note: the order is from highest priority to lowest priority. */ typedef enum @@ -1505,7 +1505,7 @@ struct gl_buffer_object GLsizeiptr Length; /**< mapped length */ GLsizeiptrARB Size; /**< Size of storage in bytes */ GLubyte *Data; /**< Location of storage either in RAM or VRAM. */ - GLboolean OnCard; /**< Is buffer in VRAM? (hardware drivers) */ + GLboolean Written; /**< Ever written to? (for debugging) */ }; @@ -1541,10 +1541,10 @@ struct gl_client_array const GLubyte *Ptr; /**< Points to array data */ GLboolean Enabled; /**< Enabled flag is a boolean */ GLboolean Normalized; /**< GL_ARB_vertex_program */ + GLuint _ElementSize; /**< size of each element in bytes */ - /**< GL_ARB_vertex_buffer_object */ - struct gl_buffer_object *BufferObj; - GLuint _MaxElement; + struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */ + GLuint _MaxElement; /**< max element index into array buffer + 1 */ }; @@ -1563,6 +1563,7 @@ struct gl_array_object /** Conventional vertex arrays */ /*@{*/ struct gl_client_array Vertex; + struct gl_client_array Weight; struct gl_client_array Normal; struct gl_client_array Color; struct gl_client_array SecondaryColor; @@ -1573,11 +1574,22 @@ struct gl_array_object struct gl_client_array PointSize; /*@}*/ - /** Generic arrays for vertex programs/shaders */ - struct gl_client_array VertexAttrib[VERT_ATTRIB_MAX]; + /** + * Generic arrays for vertex programs/shaders. + * For NV vertex programs, these attributes alias and take priority + * over the conventional attribs above. For ARB vertex programs and + * GLSL vertex shaders, these attributes are separate. + */ + struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS]; /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */ GLbitfield _Enabled; + + /** + * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs + * we can determine the max legal (in bounds) glDrawElements array index. + */ + GLuint _MaxElement; }; @@ -1586,7 +1598,10 @@ struct gl_array_object */ struct gl_array_attrib { + /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */ struct gl_array_object *ArrayObj; + + /** The default vertex array object */ struct gl_array_object *DefaultArrayObj; GLint ActiveTexture; /**< Client Active Texture */ @@ -1596,11 +1611,9 @@ struct gl_array_attrib GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */ #if FEATURE_ARB_vertex_buffer_object - struct gl_buffer_object *NullBufferObj; struct gl_buffer_object *ArrayBufferObj; struct gl_buffer_object *ElementArrayBufferObj; #endif - GLuint _MaxElement; /* Min of all enabled array's maxes */ }; @@ -2053,6 +2066,9 @@ struct gl_shared_state /** Default texture objects (shared by all texture units) */ struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS]; + /** Fallback texture used when a bound texture is incomplete */ + struct gl_texture_object *FallbackTex; + /** * \name Thread safety and statechange notification for texture * objects. @@ -2064,6 +2080,8 @@ struct gl_shared_state GLuint TextureStateStamp; /**< state notification for shared tex */ /*@}*/ + /** Default buffer object for vertex arrays that aren't in VBOs */ + struct gl_buffer_object *NullBufferObj; /** * \name Vertex/fragment programs @@ -2619,6 +2637,7 @@ struct gl_matrix_stack #define _NEW_PROGRAM 0x8000000 /**< __GLcontextRec::VertexProgram */ #define _NEW_CURRENT_ATTRIB 0x10000000 /**< __GLcontextRec::Current */ #define _NEW_PROGRAM_CONSTANTS 0x20000000 +#define _NEW_BUFFER_OBJECT 0x40000000 #define _NEW_ALL ~0 /*@}*/ diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c index 57ae9c721a8..d9f3e476e81 100644 --- a/src/mesa/main/pixel.c +++ b/src/mesa/main/pixel.c @@ -170,7 +170,7 @@ _mesa_PixelMapfv( GLenum map, GLsizei mapsize, const GLfloat *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, GL_READ_ONLY_ARB, ctx->Unpack.BufferObj); @@ -229,7 +229,7 @@ _mesa_PixelMapuiv(GLenum map, GLsizei mapsize, const GLuint *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, GL_READ_ONLY_ARB, ctx->Unpack.BufferObj); @@ -303,7 +303,7 @@ _mesa_PixelMapusv(GLenum map, GLsizei mapsize, const GLushort *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, GL_READ_ONLY_ARB, ctx->Unpack.BufferObj); @@ -371,7 +371,7 @@ _mesa_GetPixelMapfv( GLenum map, GLfloat *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); @@ -432,7 +432,7 @@ _mesa_GetPixelMapuiv( GLenum map, GLuint *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); @@ -494,7 +494,7 @@ _mesa_GetPixelMapusv( GLenum map, GLushort *values ) return; } /* restore */ - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + ctx->DefaultPacking.BufferObj = ctx->Shared->NullBufferObj; buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj); diff --git a/src/mesa/main/pixelstore.c b/src/mesa/main/pixelstore.c index ff1a6344cc9..6a641f83f27 100644 --- a/src/mesa/main/pixelstore.c +++ b/src/mesa/main/pixelstore.c @@ -245,7 +245,8 @@ _mesa_init_pixelstore( GLcontext *ctx ) ctx->Pack.ClientStorage = GL_FALSE; ctx->Pack.Invert = GL_FALSE; #if FEATURE_EXT_pixel_buffer_object - ctx->Pack.BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &ctx->Pack.BufferObj, + ctx->Shared->NullBufferObj); #endif ctx->Unpack.Alignment = 4; ctx->Unpack.RowLength = 0; @@ -258,7 +259,8 @@ _mesa_init_pixelstore( GLcontext *ctx ) ctx->Unpack.ClientStorage = GL_FALSE; ctx->Unpack.Invert = GL_FALSE; #if FEATURE_EXT_pixel_buffer_object - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &ctx->Unpack.BufferObj, + ctx->Shared->NullBufferObj); #endif /* @@ -278,6 +280,7 @@ _mesa_init_pixelstore( GLcontext *ctx ) ctx->DefaultPacking.ClientStorage = GL_FALSE; ctx->DefaultPacking.Invert = GL_FALSE; #if FEATURE_EXT_pixel_buffer_object - ctx->DefaultPacking.BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &ctx->DefaultPacking.BufferObj, + ctx->Shared->NullBufferObj); #endif } diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index ae8dd2a8364..759883743da 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -33,6 +33,7 @@ #include "mtypes.h" #include "hash.h" #include "arrayobj.h" +#include "bufferobj.h" #include "shared.h" #include "shader/program.h" #include "shader/shader_api.h" @@ -92,6 +93,13 @@ _mesa_alloc_shared_state(GLcontext *ctx) shared->BufferObjects = _mesa_NewHashTable(); #endif + /* Allocate the default buffer object and set refcount so high that + * it never gets deleted. + * XXX with recent/improved refcounting this may not longer be needed. + */ + shared->NullBufferObj = _mesa_new_buffer_object(ctx, 0, 0); + shared->NullBufferObj->RefCount = 1000; + shared->ArrayObjects = _mesa_NewHashTable(); /* Create default texture objects */ @@ -341,6 +349,10 @@ _mesa_free_shared_state(GLcontext *ctx, struct gl_shared_state *shared) _mesa_DeleteHashTable(shared->RenderBuffers); #endif +#if FEATURE_ARB_vertex_buffer_object + _mesa_delete_buffer_object(ctx, shared->NullBufferObj); +#endif + /* * Free texture objects (after FBOs since some textures might have * been bound to FBOs). diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 94e37e3dab5..7b41b8f4da4 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -64,114 +64,149 @@ update_separate_specular(GLcontext *ctx) /** - * Update state dependent on vertex arrays. + * Compute the index of the last array element that can be safely accessed + * in a vertex array. We can really only do this when the array lives in + * a VBO. + * The array->_MaxElement field will be updated. + * Later in glDrawArrays/Elements/etc we can do some bounds checking. + */ +static void +compute_max_element(struct gl_client_array *array) +{ + assert(array->Enabled); + if (array->BufferObj->Name) { + /* Compute the max element we can access in the VBO without going + * out of bounds. + */ + array->_MaxElement = ((GLsizeiptrARB) array->BufferObj->Size + - (GLsizeiptrARB) array->Ptr + array->StrideB + - array->_ElementSize) / array->StrideB; + } + else { + /* user-space array, no idea how big it is */ + array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ + } +} + + +/** + * Helper for update_arrays(). + * \return min(current min, array->_MaxElement). + */ +static GLuint +update_min(GLuint min, struct gl_client_array *array) +{ + compute_max_element(array); + return MIN2(min, array->_MaxElement); +} + + +/** + * Update ctx->Array._MaxElement (the max legal index into all enabled arrays). + * Need to do this upon new array state or new buffer object state. */ static void update_arrays( GLcontext *ctx ) { - GLuint i, min; + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i, min = ~0; /* find min of _MaxElement values for all enabled arrays */ /* 0 */ if (ctx->VertexProgram._Current - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) { - min = ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POS]._MaxElement; + && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]); } - else if (ctx->Array.ArrayObj->Vertex.Enabled) { - min = ctx->Array.ArrayObj->Vertex._MaxElement; - } - else { - /* can't draw anything without vertex positions! */ - min = 0; + else if (arrayObj->Vertex.Enabled) { + min = update_min(min, &arrayObj->Vertex); } /* 1 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]); } /* no conventional vertex weight array */ /* 2 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]); } - else if (ctx->Array.ArrayObj->Normal.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->Normal._MaxElement); + else if (arrayObj->Normal.Enabled) { + min = update_min(min, &arrayObj->Normal); } /* 3 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]); } - else if (ctx->Array.ArrayObj->Color.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->Color._MaxElement); + else if (arrayObj->Color.Enabled) { + min = update_min(min, &arrayObj->Color); } /* 4 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]); } - else if (ctx->Array.ArrayObj->SecondaryColor.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->SecondaryColor._MaxElement); + else if (arrayObj->SecondaryColor.Enabled) { + min = update_min(min, &arrayObj->SecondaryColor); } /* 5 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_FOG]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]); } - else if (ctx->Array.ArrayObj->FogCoord.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->FogCoord._MaxElement); + else if (arrayObj->FogCoord.Enabled) { + min = update_min(min, &arrayObj->FogCoord); } /* 6 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]); } - else if (ctx->Array.ArrayObj->Index.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->Index._MaxElement); + else if (arrayObj->Index.Enabled) { + min = update_min(min, &arrayObj->Index); } - /* 7 */ if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]._MaxElement); + && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]); } /* 8..15 */ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) { if (ctx->VertexProgram._Enabled - && ctx->Array.ArrayObj->VertexAttrib[i].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[i]._MaxElement); + && arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); } else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits - && ctx->Array.ArrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->TexCoord[i - VERT_ATTRIB_TEX0]._MaxElement); + && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) { + min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]); } } /* 16..31 */ if (ctx->VertexProgram._Current) { - for (i = 0; i < Elements(ctx->Array.ArrayObj->VertexAttrib); i++) { - if (ctx->Array.ArrayObj->VertexAttrib[i].Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->VertexAttrib[i]._MaxElement); + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { + if (arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); } } } - if (ctx->Array.ArrayObj->EdgeFlag.Enabled) { - min = MIN2(min, ctx->Array.ArrayObj->EdgeFlag._MaxElement); + if (arrayObj->EdgeFlag.Enabled) { + min = update_min(min, &arrayObj->EdgeFlag); } /* _MaxElement is one past the last legal array element */ - ctx->Array._MaxElement = min; + arrayObj->_MaxElement = min; } @@ -547,7 +582,7 @@ _mesa_update_state_locked( GLcontext *ctx ) if (new_state & _DD_NEW_SEPARATE_SPECULAR) update_separate_specular( ctx ); - if (new_state & (_NEW_ARRAY | _NEW_PROGRAM)) + if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) update_arrays( ctx ); if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT)) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index b63f747fe8d..d51e7b76ec7 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -281,7 +281,8 @@ valid_texture_object(const struct gl_texture_object *tex) _mesa_problem(NULL, "invalid reference to a deleted texture object"); return GL_FALSE; default: - _mesa_problem(NULL, "invalid texture object Target value"); + _mesa_problem(NULL, "invalid texture object Target 0x%x, Id = %u", + tex->Target, tex->Name); return GL_FALSE; } } @@ -662,6 +663,59 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, } } + +/** + * Return pointer to a default/fallback texture. + * The texture is a 2D 8x8 RGBA texture with all texels = (0,0,0,1). + * That's the value a sampler should get when sampling from an + * incomplete texture. + */ +struct gl_texture_object * +_mesa_get_fallback_texture(GLcontext *ctx) +{ + if (!ctx->Shared->FallbackTex) { + /* create fallback texture now */ + static GLubyte texels[8 * 8][4]; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + GLuint i; + + for (i = 0; i < 8 * 8; i++) { + texels[i][0] = + texels[i][1] = + texels[i][2] = 0x0; + texels[i][3] = 0xff; + } + + /* create texture object */ + texObj = ctx->Driver.NewTextureObject(ctx, 0, GL_TEXTURE_2D); + assert(texObj->RefCount == 1); + texObj->MinFilter = GL_NEAREST; + texObj->MagFilter = GL_NEAREST; + + /* create level[0] texture image */ + texImage = _mesa_get_tex_image(ctx, texObj, GL_TEXTURE_2D, 0); + + /* init the image fields */ + _mesa_init_teximage_fields(ctx, GL_TEXTURE_2D, texImage, + 8, 8, 1, 0, GL_RGBA); + + /* set image data */ + ctx->Driver.TexImage2D(ctx, GL_TEXTURE_2D, 0, GL_RGBA, + 8, 8, 0, + GL_RGBA, GL_UNSIGNED_BYTE, texels, + &ctx->DefaultPacking, texObj, texImage); + + _mesa_test_texobj_completeness(ctx, texObj); + assert(texObj->_Complete); + + ctx->Shared->FallbackTex = texObj; + } + return ctx->Shared->FallbackTex; +} + + + /*@}*/ diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index d5374c5d6c4..2599c0816a9 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -65,6 +65,9 @@ extern void _mesa_test_texobj_completeness( const GLcontext *ctx, struct gl_texture_object *obj ); +extern struct gl_texture_object * +_mesa_get_fallback_texture(GLcontext *ctx); + extern void _mesa_unlock_context_textures( GLcontext *ctx ); diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index e60ab6aa123..2195a334d3e 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -469,8 +469,10 @@ set_tex_parameterf(GLcontext *ctx, return GL_TRUE; } else { - _mesa_error(ctx, GL_INVALID_ENUM, - "glTexParameter(pname=GL_TEXTURE_MAX_ANISOTROPY_EXT)"); + static GLuint count = 0; + if (count++ < 10) + _mesa_error(ctx, GL_INVALID_ENUM, + "glTexParameter(pname=GL_TEXTURE_MAX_ANISOTROPY_EXT)"); } return GL_FALSE; diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index cef58d7a496..6e0c0c688a1 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -561,8 +561,19 @@ update_texture_state( GLcontext *ctx ) } if (!texUnit->_ReallyEnabled) { - _mesa_reference_texobj(&texUnit->_Current, NULL); - continue; + if (fprog) { + /* If we get here it means the shader is expecting a texture + * object, but there isn't one (or it's incomplete). Use the + * fallback texture. + */ + struct gl_texture_object *texObj = _mesa_get_fallback_texture(ctx); + texUnit->_ReallyEnabled = 1 << TEXTURE_2D_INDEX; + _mesa_reference_texobj(&texUnit->_Current, texObj); + } + else { + /* fixed-function: texture unit is really disabled */ + continue; + } } /* if we get here, we know this texture unit is enabled */ diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 72b3e834b3a..ff3128b8beb 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 7.2 + * Version: 7.6 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,15 +37,14 @@ /** - * Update the fields of a vertex array object. - * We need to do a few special things for arrays that live in - * vertex buffer objects. + * Set the fields of a vertex array. * * \param array the array to update * \param dirtyBit which bit to set in ctx->Array.NewState for this array * \param elementSize size of each array element, in bytes * \param size components per element (1, 2, 3 or 4) * \param type datatype of each component (GL_FLOAT, GL_INT, etc) + * \param format either GL_RGBA or GL_BGRA * \param stride stride between elements, in elements * \param normalized are integer types converted to floats in [-1, 1]? * \param ptr the address (or offset inside VBO) of the array data @@ -63,22 +63,11 @@ update_array(GLcontext *ctx, struct gl_client_array *array, array->StrideB = stride ? stride : elementSize; array->Normalized = normalized; array->Ptr = (const GLubyte *) ptr; -#if FEATURE_ARB_vertex_buffer_object + array->_ElementSize = elementSize; + _mesa_reference_buffer_object(ctx, &array->BufferObj, ctx->Array.ArrayBufferObj); - /* Compute the index of the last array element that's inside the buffer. - * Later in glDrawArrays we'll check if start + count > _MaxElement to - * be sure we won't go out of bounds. - */ - if (ctx->Array.ArrayBufferObj->Name) - array->_MaxElement = ((GLsizeiptrARB) ctx->Array.ArrayBufferObj->Size - - (GLsizeiptrARB) array->Ptr + array->StrideB - - elementSize) / array->StrideB; - else -#endif - array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ - ctx->NewState |= _NEW_ARRAY; ctx->Array.NewState |= dirtyBit; } @@ -528,6 +517,12 @@ _mesa_PointSizePointer(GLenum type, GLsizei stride, const GLvoid *ptr) #if FEATURE_NV_vertex_program +/** + * Set a vertex attribute array. + * Note that these arrays DO alias the conventional GL vertex arrays + * (position, normal, color, fog, texcoord, etc). + * The generic attribute slots at #16 and above are not touched. + */ void GLAPIENTRY _mesa_VertexAttribPointerNV(GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *ptr) @@ -537,7 +532,7 @@ _mesa_VertexAttribPointerNV(GLuint index, GLint size, GLenum type, GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (index >= MAX_VERTEX_PROGRAM_ATTRIBS) { + if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) { _mesa_error(ctx, GL_INVALID_VALUE, "glVertexAttribPointerNV(index)"); return; } @@ -588,6 +583,11 @@ _mesa_VertexAttribPointerNV(GLuint index, GLint size, GLenum type, #if FEATURE_ARB_vertex_program +/** + * Set a generic vertex attribute array. + * Note that these arrays DO NOT alias the conventional GL vertex arrays + * (position, normal, color, fog, texcoord, etc). + */ void GLAPIENTRY _mesa_VertexAttribPointerARB(GLuint index, GLint size, GLenum type, GLboolean normalized, @@ -1044,6 +1044,52 @@ _mesa_MultiModeDrawElementsIBM( const GLenum * mode, const GLsizei * count, /** + * Print vertex array's fields. + */ +static void +print_array(const char *name, GLint index, const struct gl_client_array *array) +{ + if (index >= 0) + _mesa_printf(" %s[%d]: ", name, index); + else + _mesa_printf(" %s: ", name); + _mesa_printf("Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %u), MaxElem=%u\n", + array->Ptr, array->Type, array->Size, + array->_ElementSize, array->StrideB, + array->BufferObj->Name, array->BufferObj->Size, + array->_MaxElement); +} + + +/** + * Print current vertex object/array info. For debug. + */ +void +_mesa_print_arrays(GLcontext *ctx) +{ + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i; + + _mesa_update_array_object_max_element(ctx, arrayObj); + + _mesa_printf("Array Object %u\n", arrayObj->Name); + if (arrayObj->Vertex.Enabled) + print_array("Vertex", -1, &arrayObj->Vertex); + if (arrayObj->Normal.Enabled) + print_array("Normal", -1, &arrayObj->Normal); + if (arrayObj->Color.Enabled) + print_array("Color", -1, &arrayObj->Color); + for (i = 0; i < Elements(arrayObj->TexCoord); i++) + if (arrayObj->TexCoord[i].Enabled) + print_array("TexCoord", i, &arrayObj->TexCoord[i]); + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) + if (arrayObj->VertexAttrib[i].Enabled) + print_array("Attrib", i, &arrayObj->VertexAttrib[i]); + _mesa_printf(" _MaxElement = %u\n", arrayObj->_MaxElement); +} + + +/** * Initialize vertex array state for given context. */ void diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h index 97d5c8219d0..46cc3ee3425 100644 --- a/src/mesa/main/varray.h +++ b/src/mesa/main/varray.h @@ -1,18 +1,9 @@ -/** - * \file varray.h - * Vertex arrays. - * - * \if subset - * (No-op) - * - * \endif - */ - /* * Mesa 3-D graphics library - * Version: 4.1 + * Version: 7.6 * - * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -169,6 +160,8 @@ _mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices); +extern void +_mesa_print_arrays(GLcontext *ctx); extern void _mesa_init_varray( GLcontext * ctx ); diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h index e109f20df4d..d4d3dd1a94e 100644 --- a/src/mesa/main/version.h +++ b/src/mesa/main/version.h @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 7.5 + * Version: 7.6 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,9 +30,9 @@ /* Mesa version */ #define MESA_MAJOR 7 -#define MESA_MINOR 5 +#define MESA_MINOR 6 #define MESA_PATCH 0 -#define MESA_VERSION_STRING "7.5-rc2" +#define MESA_VERSION_STRING "7.6-devel" /* To make version comparison easy */ #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) diff --git a/src/mesa/math/m_vector.c b/src/mesa/math/m_vector.c index c5e2fd1de12..4cbab11a358 100644 --- a/src/mesa/math/m_vector.c +++ b/src/mesa/math/m_vector.c @@ -1,4 +1,3 @@ - /* * Mesa 3-D graphics library * Version: 3.5 @@ -37,11 +36,12 @@ -/* +/** * Given a vector [count][4] of floats, set all the [][elt] values * to 0 (if elt = 0, 1, 2) or 1.0 (if elt = 3). */ -void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt ) +void +_mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt ) { static const GLubyte elem_bits[4] = { VEC_DIRTY_0, @@ -54,12 +54,13 @@ void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint count, GLuint elt ) GLfloat (*data)[4] = (GLfloat (*)[4])vec->start; GLuint i; - for (i = 0 ; i < count ; i++) + for (i = 0; i < count; i++) data[i][elt] = v; vec->flags &= ~elem_bits[elt]; } + static const GLubyte size_bits[5] = { 0, VEC_SIZE_1, @@ -69,61 +70,53 @@ static const GLubyte size_bits[5] = { }; - -/* +/** * Initialize GLvector objects. - * Input: v - the vector object to initialize. - * flags - bitwise-OR of VEC_* flags - * storage - pointer to storage for the vector's data + * \param v the vector object to initialize. + * \param flags bitwise-OR of VEC_* flags + * \param storage pointer to storage for the vector's data */ - - -void _mesa_vector4f_init( GLvector4f *v, GLuint flags, GLfloat (*storage)[4] ) +void +_mesa_vector4f_init( GLvector4f *v, GLbitfield flags, GLfloat (*storage)[4] ) { v->stride = 4 * sizeof(GLfloat); v->size = 2; /* may change: 2-4 for vertices and 1-4 for texcoords */ v->data = storage; v->start = (GLfloat *) storage; v->count = 0; - v->flags = size_bits[4] | flags ; + v->flags = size_bits[4] | flags; } - - -/* +/** * Initialize GLvector objects and allocate storage. - * Input: v - the vector object - * sz - unused???? - * flags - bitwise-OR of VEC_* flags - * count - number of elements to allocate in vector - * alignment - desired memory alignment for the data (in bytes) + * \param v the vector object + * \param flags bitwise-OR of VEC_* flags + * \param count number of elements to allocate in vector + * \param alignment desired memory alignment for the data (in bytes) */ - - -void _mesa_vector4f_alloc( GLvector4f *v, GLuint flags, GLuint count, - GLuint alignment ) +void +_mesa_vector4f_alloc( GLvector4f *v, GLbitfield flags, GLuint count, + GLuint alignment ) { v->stride = 4 * sizeof(GLfloat); v->size = 2; v->storage = ALIGN_MALLOC( count * 4 * sizeof(GLfloat), alignment ); + v->storage_count = count; v->start = (GLfloat *) v->storage; v->data = (GLfloat (*)[4]) v->storage; v->count = 0; - v->flags = size_bits[4] | flags | VEC_MALLOC ; + v->flags = size_bits[4] | flags | VEC_MALLOC; } - - -/* +/** * Vector deallocation. Free whatever memory is pointed to by the * vector's storage field if the VEC_MALLOC flag is set. * DO NOT free the GLvector object itself, though. */ - - -void _mesa_vector4f_free( GLvector4f *v ) +void +_mesa_vector4f_free( GLvector4f *v ) { if (v->flags & VEC_MALLOC) { ALIGN_FREE( v->storage ); @@ -135,13 +128,15 @@ void _mesa_vector4f_free( GLvector4f *v ) } -/* +/** * For debugging */ -void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling ) +void +_mesa_vector4f_print( const GLvector4f *v, const GLubyte *cullmask, + GLboolean culling ) { - GLfloat c[4] = { 0, 0, 0, 1 }; - const char *templates[5] = { + static const GLfloat c[4] = { 0, 0, 0, 1 }; + static const char *templates[5] = { "%d:\t0, 0, 0, 1\n", "%d:\t%f, 0, 0, 1\n", "%d:\t%f, %f, 0, 1\n", @@ -154,30 +149,32 @@ void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling ) GLuint j, i = 0, count; _mesa_printf("data-start\n"); - for ( ; d != v->start ; STRIDE_F(d, v->stride), i++) + for (; d != v->start; STRIDE_F(d, v->stride), i++) _mesa_printf(t, i, d[0], d[1], d[2], d[3]); _mesa_printf("start-count(%u)\n", v->count); count = i + v->count; if (culling) { - for ( ; i < count ; STRIDE_F(d, v->stride), i++) + for (; i < count; STRIDE_F(d, v->stride), i++) if (cullmask[i]) _mesa_printf(t, i, d[0], d[1], d[2], d[3]); } else { - for ( ; i < count ; STRIDE_F(d, v->stride), i++) + for (; i < count; STRIDE_F(d, v->stride), i++) _mesa_printf(t, i, d[0], d[1], d[2], d[3]); } - for (j = v->size ; j < 4; j++) { + for (j = v->size; j < 4; j++) { if ((v->flags & (1<<j)) == 0) { _mesa_printf("checking col %u is clean as advertised ", j); - for (i = 0, d = (GLfloat *) v->data ; - i < count && d[j] == c[j] ; - i++, STRIDE_F(d, v->stride)) {}; + for (i = 0, d = (GLfloat *) v->data; + i < count && d[j] == c[j]; + i++, STRIDE_F(d, v->stride)) { + /* no-op */ + } if (i == count) _mesa_printf(" --> ok\n"); @@ -186,5 +183,3 @@ void _mesa_vector4f_print( GLvector4f *v, GLubyte *cullmask, GLboolean culling ) } } } - - diff --git a/src/mesa/math/m_vector.h b/src/mesa/math/m_vector.h index 647388ac7d0..71281d57589 100644 --- a/src/mesa/math/m_vector.h +++ b/src/mesa/math/m_vector.h @@ -31,7 +31,6 @@ #define _M_VECTOR_H_ #include "main/glheader.h" -#include "main/mtypes.h" /* hack for GLchan */ #define VEC_DIRTY_0 0x1 @@ -50,7 +49,8 @@ -/* Wrap all the information about vectors up in a struct. Has +/** + * Wrap all the information about vectors up in a struct. Has * additional fields compared to the other vectors to help us track of * different vertex sizes, and whether we need to clean columns out * because they contain non-(0,0,0,1) values. @@ -60,29 +60,27 @@ * the transformation routines. */ typedef struct { - GLfloat (*data)[4]; /* may be malloc'd or point to client data */ - GLfloat *start; /* points somewhere inside of <data> */ - GLuint count; /* size of the vector (in elements) */ - GLuint stride; /* stride from one element to the next (in bytes) */ - GLuint size; /* 2-4 for vertices and 1-4 for texcoords */ - GLuint flags; /* which columns are dirty */ - void *storage; /* self-allocated storage */ + GLfloat (*data)[4]; /**< may be malloc'd or point to client data */ + GLfloat *start; /**< points somewhere inside of <data> */ + GLuint count; /**< size of the vector (in elements) */ + GLuint stride; /**< stride from one element to the next (in bytes) */ + GLuint size; /**< 2-4 for vertices and 1-4 for texcoords */ + GLbitfield flags; /**< bitmask of VEC_x flags */ + void *storage; /**< self-allocated storage */ + GLuint storage_count; /**< storage size in elements */ } GLvector4f; -extern void _mesa_vector4f_init( GLvector4f *v, GLuint flags, +extern void _mesa_vector4f_init( GLvector4f *v, GLbitfield flags, GLfloat (*storage)[4] ); -extern void _mesa_vector4f_alloc( GLvector4f *v, GLuint flags, +extern void _mesa_vector4f_alloc( GLvector4f *v, GLbitfield flags, GLuint count, GLuint alignment ); extern void _mesa_vector4f_free( GLvector4f *v ); -extern void _mesa_vector4f_print( GLvector4f *v, GLubyte *, GLboolean ); +extern void _mesa_vector4f_print( const GLvector4f *v, const GLubyte *, GLboolean ); extern void _mesa_vector4f_clean_elem( GLvector4f *vec, GLuint nr, GLuint elt ); - - - -/* +/** * Given vector <v>, return a pointer (cast to <type *> to the <i>-th element. * * End up doing a lot of slow imuls if not careful. diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index c7a031067ef..a90ce95a639 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -563,6 +563,7 @@ struct var_cache * we take up with our state tokens or constants. Note that * this is _not_ the same as the number of param registers * we eventually use */ + GLuint swizzle; /**< swizzle to access this variable */ struct var_cache *next; }; @@ -581,6 +582,7 @@ var_cache_create (struct var_cache **va) (**va).param_binding_begin = ~0; (**va).param_binding_length = ~0; (**va).alias_binding = NULL; + (**va).swizzle = SWIZZLE_XYZW; (**va).next = NULL; } } @@ -872,15 +874,16 @@ parse_signed_float (const GLubyte ** inst, struct arb_program *Program) * This picks out a constant value from the parsed array. The constant vector is r * returned in the *values array, which should be of length 4. * - * \param values - The 4 component vector with the constant value in it + * \param values - return the vector constant values. + * \param size - returns the number elements in valuesOut [1..4] */ static GLvoid -parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Program, - GLboolean use) +parse_constant(const GLubyte ** inst, GLfloat *values, GLint *size, + struct arb_program *Program, + GLboolean use) { GLuint components, i; - switch (*(*inst)++) { case CONSTANT_SCALAR: if (use == GL_TRUE) { @@ -893,7 +896,7 @@ parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Prog values[1] = values[2] = values[3] = parse_signed_float (inst, Program); } - + *size = 1; break; case CONSTANT_VECTOR: values[0] = values[1] = values[2] = 0; @@ -902,7 +905,12 @@ parse_constant (const GLubyte ** inst, GLfloat *values, struct arb_program *Prog for (i = 0; i < components; i++) { values[i] = parse_signed_float (inst, Program); } + *size = 4; break; + default: + _mesa_problem(NULL, "unexpected case in parse_constant()"); + values[0] = 0.0F; + *size = 0; } } @@ -945,7 +953,7 @@ parse_generic_attrib_num(GLcontext *ctx, const GLubyte ** inst, { GLint i = parse_integer(inst, Program); - if ((i < 0) || (i >= MAX_VERTEX_PROGRAM_ATTRIBS)) + if ((i < 0) || (i >= MAX_VERTEX_GENERIC_ATTRIBS)) { program_error(ctx, Program->Position, "Invalid generic vertex attribute index"); @@ -1502,10 +1510,10 @@ generic_attrib_check(struct var_cache *vc_head) { int a; struct var_cache *curr; - GLboolean explicitAttrib[MAX_VERTEX_PROGRAM_ATTRIBS], - genericAttrib[MAX_VERTEX_PROGRAM_ATTRIBS]; + GLboolean explicitAttrib[MAX_VERTEX_GENERIC_ATTRIBS], + genericAttrib[MAX_VERTEX_GENERIC_ATTRIBS]; - for (a=0; a<MAX_VERTEX_PROGRAM_ATTRIBS; a++) { + for (a=0; a<MAX_VERTEX_GENERIC_ATTRIBS; a++) { explicitAttrib[a] = GL_FALSE; genericAttrib[a] = GL_FALSE; } @@ -1516,11 +1524,11 @@ generic_attrib_check(struct var_cache *vc_head) if (curr->attrib_is_generic) { GLuint attr = (curr->attrib_binding == 0) ? 0 : (curr->attrib_binding - VERT_ATTRIB_GENERIC0); - assert(attr < MAX_VERTEX_PROGRAM_ATTRIBS); + assert(attr < MAX_VERTEX_GENERIC_ATTRIBS); genericAttrib[attr] = GL_TRUE; } else { - assert(curr->attrib_binding < MAX_VERTEX_PROGRAM_ATTRIBS); + assert(curr->attrib_binding < MAX_VERTEX_GENERIC_ATTRIBS); explicitAttrib[ curr->attrib_binding ] = GL_TRUE; } } @@ -1528,7 +1536,7 @@ generic_attrib_check(struct var_cache *vc_head) curr = curr->next; } - for (a=0; a<MAX_VERTEX_PROGRAM_ATTRIBS; a++) { + for (a=0; a<MAX_VERTEX_GENERIC_ATTRIBS; a++) { if ((explicitAttrib[a]) && (genericAttrib[a])) return 1; } @@ -1816,7 +1824,6 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst, GLint idx; GLuint err = 0; gl_state_index state_tokens[STATE_LENGTH] = {0, 0, 0, 0, 0}; - GLfloat const_values[4]; GLubyte token = *(*inst)++; @@ -1908,18 +1915,31 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst, case PARAM_CONSTANT: /* parsing something like {1.0, 2.0, 3.0, 4.0} */ - parse_constant (inst, const_values, Program, use); - idx = _mesa_add_named_constant(Program->Base.Parameters, - (char *) param_var->name, - const_values, 4); - if (param_var->param_binding_begin == ~0U) - param_var->param_binding_begin = idx; - param_var->param_binding_type = PROGRAM_STATE_VAR; - /* Note: when we reference this parameter in an instruction later, - * we'll check if it's really a constant/immediate and set the - * instruction register type appropriately. - */ - param_var->param_binding_length++; + { + GLfloat const_values[4]; + GLint size; + parse_constant(inst, const_values, &size, Program, use); + if (param_var->name[0] == ' ') { + /* this is an unnamed constant */ + idx = _mesa_add_unnamed_constant(Program->Base.Parameters, + const_values, size, + ¶m_var->swizzle); + } + else { + /* named parameter/constant */ + idx = _mesa_add_named_constant(Program->Base.Parameters, + (char *) param_var->name, + const_values, size); + } + if (param_var->param_binding_begin == ~0U) + param_var->param_binding_begin = idx; + param_var->param_binding_type = PROGRAM_STATE_VAR; + /* Note: when we reference this parameter in an instruction later, + * we'll check if it's really a constant/immediate and set the + * instruction register type appropriately. + */ + param_var->param_binding_length++; + } break; default: @@ -2428,6 +2448,9 @@ parse_swizzle_mask(const GLubyte ** inst, GLubyte *swizzle, GLint len) return; } } + + if (len == 1) + swizzle[1] = swizzle[2] = swizzle[3] = swizzle[0]; } @@ -2482,7 +2505,7 @@ static GLuint parse_src_reg (GLcontext * ctx, const GLubyte ** inst, struct var_cache **vc_head, struct arb_program *Program, - gl_register_file * File, GLint * Index, + gl_register_file * File, GLint * Index, GLuint *swizzle, GLboolean *IsRelOffset ) { struct var_cache *src; @@ -2491,6 +2514,8 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, *IsRelOffset = 0; + *swizzle = SWIZZLE_XYZW; /* default */ + /* And the binding for the src */ switch (*(*inst)++) { case REGISTER_ATTRIB: @@ -2546,6 +2571,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, } *Index = src->param_binding_begin + offset; + *swizzle = src->swizzle; break; case ARRAY_INDEX_RELATIVE: @@ -2568,6 +2594,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, /* And store it properly */ *Index = src->param_binding_begin + rel_off; *IsRelOffset = 1; + *swizzle = src->swizzle; } break; } @@ -2579,6 +2606,7 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, *File = (gl_register_file) src->param_binding_type; *Index = src->param_binding_begin; + *swizzle = src->swizzle; break; } break; @@ -2647,6 +2675,21 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, } +static GLuint +swizzle_swizzle(GLuint baseSwizzle, const GLubyte swizzle[4]) +{ + GLuint i, swz, s[4]; + for (i = 0; i < 4; i++) { + GLuint c = swizzle[i]; + if (c <= SWIZZLE_W) + s[i] = GET_SWZ(baseSwizzle, c); + else + s[i] = c; + } + swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]); + return swz; +} + /** * Parse vertex/fragment program vector source register. */ @@ -2661,12 +2704,14 @@ parse_vector_src_reg(GLcontext *ctx, const GLubyte **inst, GLubyte negateMask; GLubyte swizzle[4]; GLboolean isRelOffset; + GLuint baseSwizzle; /* Grab the sign */ negateMask = (parse_sign (inst) == -1) ? NEGATE_XYZW : NEGATE_NONE; /* And the src reg */ - if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &isRelOffset)) + if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &baseSwizzle, + &isRelOffset)) return 1; /* finally, the swizzle */ @@ -2674,7 +2719,7 @@ parse_vector_src_reg(GLcontext *ctx, const GLubyte **inst, reg->File = file; reg->Index = index; - reg->Swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + reg->Swizzle = swizzle_swizzle(baseSwizzle, swizzle); reg->Negate = negateMask; reg->RelAddr = isRelOffset; return 0; @@ -2695,12 +2740,14 @@ parse_scalar_src_reg(GLcontext *ctx, const GLubyte **inst, GLubyte negateMask; GLubyte swizzle[4]; GLboolean isRelOffset; + GLuint baseSwizzle; /* Grab the sign */ negateMask = (parse_sign (inst) == -1) ? NEGATE_XYZW : NEGATE_NONE; /* And the src reg */ - if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &isRelOffset)) + if (parse_src_reg(ctx, inst, vc_head, program, &file, &index, &baseSwizzle, + &isRelOffset)) return 1; /* finally, the swizzle */ @@ -2708,7 +2755,7 @@ parse_scalar_src_reg(GLcontext *ctx, const GLubyte **inst, reg->File = file; reg->Index = index; - reg->Swizzle = (swizzle[0] << 0); + reg->Swizzle = swizzle_swizzle(baseSwizzle, swizzle); reg->Negate = negateMask; reg->RelAddr = isRelOffset; return 0; @@ -3019,8 +3066,10 @@ parse_fp_instruction (GLcontext * ctx, const GLubyte ** inst, GLubyte negateMask; gl_register_file file; GLint index; + GLuint baseSwizzle; - if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, &rel)) + if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, + &baseSwizzle, &rel)) return 1; parse_extended_swizzle_mask(inst, swizzle, &negateMask); fp->SrcReg[0].File = file; @@ -3360,11 +3409,13 @@ parse_vp_instruction (GLcontext * ctx, const GLubyte ** inst, GLboolean relAddr; gl_register_file file; GLint index; + GLuint baseSwizzle; if (parse_dst_reg(ctx, inst, vc_head, Program, &vp->DstReg)) return 1; - if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, &relAddr)) + if (parse_src_reg(ctx, inst, vc_head, Program, &file, &index, + &baseSwizzle, &relAddr)) return 1; parse_extended_swizzle_mask (inst, swizzle, &negateMask); vp->SrcReg[0].File = file; diff --git a/src/mesa/shader/arbprogram.c b/src/mesa/shader/arbprogram.c index 981565ab8f1..39136efadac 100644 --- a/src/mesa/shader/arbprogram.c +++ b/src/mesa/shader/arbprogram.c @@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id) GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - /* Error-check target and get curProg */ if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */ (ctx->Extensions.NV_vertex_program || @@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id) return; } + /* signal new program (and its new constants) */ + FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + /* bind newProg */ if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, @@ -253,6 +254,8 @@ _mesa_EnableVertexAttribArrayARB(GLuint index) return; } + ASSERT(index < Elements(ctx->Array.ArrayObj->VertexAttrib)); + FLUSH_VERTICES(ctx, _NEW_ARRAY); ctx->Array.ArrayObj->VertexAttrib[index].Enabled = GL_TRUE; ctx->Array.ArrayObj->_Enabled |= _NEW_ARRAY_ATTRIB(index); @@ -272,6 +275,8 @@ _mesa_DisableVertexAttribArrayARB(GLuint index) return; } + ASSERT(index < Elements(ctx->Array.ArrayObj->VertexAttrib)); + FLUSH_VERTICES(ctx, _NEW_ARRAY); ctx->Array.ArrayObj->VertexAttrib[index].Enabled = GL_FALSE; ctx->Array.ArrayObj->_Enabled &= ~_NEW_ARRAY_ATTRIB(index); @@ -298,32 +303,41 @@ _mesa_GetVertexAttribdvARB(GLuint index, GLenum pname, GLdouble *params) } +/** + * Return info for a generic vertex attribute array (no alias with + * legacy vertex attributes (pos, normal, color, etc)). + */ void GLAPIENTRY _mesa_GetVertexAttribfvARB(GLuint index, GLenum pname, GLfloat *params) { + const struct gl_client_array *array; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - if (index >= MAX_VERTEX_PROGRAM_ATTRIBS) { + if (index >= MAX_VERTEX_GENERIC_ATTRIBS) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribfvARB(index)"); return; } + ASSERT(index < Elements(ctx->Array.ArrayObj->VertexAttrib)); + + array = &ctx->Array.ArrayObj->VertexAttrib[index]; + switch (pname) { case GL_VERTEX_ATTRIB_ARRAY_ENABLED_ARB: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Enabled; + params[0] = (GLfloat) array->Enabled; break; case GL_VERTEX_ATTRIB_ARRAY_SIZE_ARB: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Size; + params[0] = (GLfloat) array->Size; break; case GL_VERTEX_ATTRIB_ARRAY_STRIDE_ARB: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Stride; + params[0] = (GLfloat) array->Stride; break; case GL_VERTEX_ATTRIB_ARRAY_TYPE_ARB: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Type; + params[0] = (GLfloat) array->Type; break; case GL_VERTEX_ATTRIB_ARRAY_NORMALIZED_ARB: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Normalized; + params[0] = array->Normalized; break; case GL_CURRENT_VERTEX_ATTRIB_ARB: if (index == 0) { @@ -335,7 +349,7 @@ _mesa_GetVertexAttribfvARB(GLuint index, GLenum pname, GLfloat *params) COPY_4V(params, ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + index]); break; case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].BufferObj->Name; + params[0] = (GLfloat) array->BufferObj->Name; break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribfvARB(pname)"); @@ -379,6 +393,8 @@ _mesa_GetVertexAttribPointervARB(GLuint index, GLenum pname, GLvoid **pointer) return; } + ASSERT(index < Elements(ctx->Array.ArrayObj->VertexAttrib)); + *pointer = (GLvoid *) ctx->Array.ArrayObj->VertexAttrib[index].Ptr; } @@ -489,7 +505,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index, GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (target == GL_FRAGMENT_PROGRAM_ARB && ctx->Extensions.ARB_fragment_program) { @@ -537,7 +553,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count, GLfloat * dest; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (count <= 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)"); @@ -631,7 +647,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index, struct gl_program *prog; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if ((target == GL_FRAGMENT_PROGRAM_NV && ctx->Extensions.NV_fragment_program) || @@ -685,7 +701,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count, GLint i; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (count <= 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)"); diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c index 5142c2a4a59..d6469b17bea 100644 --- a/src/mesa/shader/nvprogram.c +++ b/src/mesa/shader/nvprogram.c @@ -354,6 +354,7 @@ _mesa_GetTrackMatrixivNV(GLenum target, GLuint address, void GLAPIENTRY _mesa_GetVertexAttribdvNV(GLuint index, GLenum pname, GLdouble *params) { + const struct gl_client_array *array; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -362,15 +363,17 @@ _mesa_GetVertexAttribdvNV(GLuint index, GLenum pname, GLdouble *params) return; } + array = &ctx->Array.ArrayObj->VertexAttrib[index]; + switch (pname) { case GL_ATTRIB_ARRAY_SIZE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Size; + params[0] = array->Size; break; case GL_ATTRIB_ARRAY_STRIDE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Stride; + params[0] = array->Stride; break; case GL_ATTRIB_ARRAY_TYPE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Type; + params[0] = array->Type; break; case GL_CURRENT_ATTRIB_NV: if (index == 0) { @@ -395,6 +398,7 @@ _mesa_GetVertexAttribdvNV(GLuint index, GLenum pname, GLdouble *params) void GLAPIENTRY _mesa_GetVertexAttribfvNV(GLuint index, GLenum pname, GLfloat *params) { + const struct gl_client_array *array; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -403,15 +407,17 @@ _mesa_GetVertexAttribfvNV(GLuint index, GLenum pname, GLfloat *params) return; } + array = &ctx->Array.ArrayObj->VertexAttrib[index]; + switch (pname) { case GL_ATTRIB_ARRAY_SIZE_NV: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Size; + params[0] = (GLfloat) array->Size; break; case GL_ATTRIB_ARRAY_STRIDE_NV: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Stride; + params[0] = (GLfloat) array->Stride; break; case GL_ATTRIB_ARRAY_TYPE_NV: - params[0] = (GLfloat) ctx->Array.ArrayObj->VertexAttrib[index].Type; + params[0] = (GLfloat) array->Type; break; case GL_CURRENT_ATTRIB_NV: if (index == 0) { @@ -436,6 +442,7 @@ _mesa_GetVertexAttribfvNV(GLuint index, GLenum pname, GLfloat *params) void GLAPIENTRY _mesa_GetVertexAttribivNV(GLuint index, GLenum pname, GLint *params) { + const struct gl_client_array *array; GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); @@ -444,15 +451,17 @@ _mesa_GetVertexAttribivNV(GLuint index, GLenum pname, GLint *params) return; } + array = &ctx->Array.ArrayObj->VertexAttrib[index]; + switch (pname) { case GL_ATTRIB_ARRAY_SIZE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Size; + params[0] = array->Size; break; case GL_ATTRIB_ARRAY_STRIDE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Stride; + params[0] = array->Stride; break; case GL_ATTRIB_ARRAY_TYPE_NV: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].Type; + params[0] = array->Type; break; case GL_CURRENT_ATTRIB_NV: if (index == 0) { @@ -467,7 +476,7 @@ _mesa_GetVertexAttribivNV(GLuint index, GLenum pname, GLint *params) params[3] = (GLint) ctx->Current.Attrib[index][3]; break; case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB: - params[0] = ctx->Array.ArrayObj->VertexAttrib[index].BufferObj->Name; + params[0] = array->BufferObj->Name; break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV"); @@ -706,7 +715,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); prog = _mesa_lookup_program(ctx, id); if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) { diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c index ae3a003feed..44c961927a3 100644 --- a/src/mesa/shader/prog_instruction.c +++ b/src/mesa/shader/prog_instruction.c @@ -343,7 +343,10 @@ _mesa_opcode_string(gl_inst_opcode opcode) { if (opcode < MAX_OPCODE) return InstInfo[opcode].Name; - else - return "OP?"; + else { + static char s[20]; + _mesa_snprintf(s, sizeof(s), "OP%u", opcode); + return s; + } } diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index 6ba2e76ff96..be903106a08 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) /** - * Find the live intervals for each temporary register in the program. - * For register R, the interval [A,B] indicates that R is referenced - * from instruction A through instruction B. - * Special consideration is needed for loops and subroutines. - * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + * Find first/last instruction that references each temporary register. */ -static GLboolean -find_live_intervals(struct gl_program *prog, - struct interval_list *liveIntervals) +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) { struct loop_info { @@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog, }; struct loop_info loopStack[MAX_LOOP_NESTING]; GLuint loopStackDepth = 0; - GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; GLuint i; - /* - * Note: we'll return GL_FALSE below if we find relative indexing - * into the TEMP register file. We can't handle that yet. - * We also give up on subroutines for now. - */ - - if (dbg) { - _mesa_printf("Optimize: Begin find intervals\n"); - } - for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ intBegin[i] = intEnd[i] = -1; } /* Scan instructions looking for temporary registers */ - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; if (inst->Opcode == OPCODE_BGNLOOP) { loopStack[loopStackDepth].Start = i; loopStack[loopStackDepth].End = inst->BranchTarget; @@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog, return GL_FALSE; } else { - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { @@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog, } } + return GL_TRUE; +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + /* build intermediate arrays */ + if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, + intBegin, intEnd)) + return GL_FALSE; + /* Build live intervals list from intermediate arrays */ liveIntervals->Num = 0; for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { @@ -792,8 +812,6 @@ _mesa_reallocate_registers(struct gl_program *prog) } - - /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. diff --git a/src/mesa/shader/prog_optimize.h b/src/mesa/shader/prog_optimize.h index d102cfd9fc1..43894a27237 100644 --- a/src/mesa/shader/prog_optimize.h +++ b/src/mesa/shader/prog_optimize.h @@ -25,7 +25,19 @@ #ifndef PROG_OPT_H #define PROG_OPT_H + +#include "main/config.h" + + struct gl_program; +struct prog_instruction; + + +extern GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]); extern void _mesa_optimize_program(GLcontext *ctx, struct gl_program *program); diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 9967f2978de..de7fef1f861 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -75,7 +75,11 @@ file_string(gl_register_file f, gl_prog_print_mode mode) case PROGRAM_UNDEFINED: return "UNDEFINED"; default: - return "Unknown program file!"; + { + static char s[20]; + _mesa_snprintf(s, sizeof(s), "FILE%u", f); + return s; + } } } @@ -736,7 +740,10 @@ _mesa_fprint_instruction_opt(FILE *f, mode, prog); } else { - _mesa_fprintf(f, "Other opcode %d\n", inst->Opcode); + fprint_alu_instruction(f, inst, + _mesa_opcode_string(inst->Opcode), + 3/*_mesa_num_inst_src_regs(inst->Opcode)*/, + mode, prog); } break; } @@ -941,6 +948,10 @@ _mesa_write_shader_to_file(const struct gl_shader *shader) fprintf(f, "/*\n"); _mesa_fprint_program_opt(f, shader->Program, PROG_PRINT_DEBUG, GL_TRUE); fprintf(f, "*/\n"); + fprintf(f, "/* Parameters / constants */\n"); + fprintf(f, "/*\n"); + _mesa_fprint_parameter_list(f, shader->Program->Parameters); + fprintf(f, "*/\n"); } fclose(f); diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 644cd39185c..a8390d30942 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program) return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM); + FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); if (program) { shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram"); @@ -1509,6 +1509,10 @@ _mesa_use_program(GLcontext *ctx, GLuint program) shProg->Shaders[i]->Name, shProg->Shaders[i]->Type); } + if (shProg->VertexProgram) + printf(" vert prog %u\n", shProg->VertexProgram->Base.Id); + if (shProg->FragmentProgram) + printf(" frag prog %u\n", shProg->FragmentProgram->Base.Id); } } else { @@ -1789,7 +1793,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); uniform = &shProg->Uniforms->Uniforms[location]; @@ -1929,7 +1933,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows, return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); uniform = &shProg->Uniforms->Uniforms[location]; diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 1fdf4db054c..5ea89d2ff3e 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -97,7 +97,8 @@ bits_agree(GLbitfield flags1, GLbitfield flags2, GLbitfield bit) * which inputs are centroid-sampled, invariant, etc. */ static GLboolean -link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) +link_varying_vars(GLcontext *ctx, + struct gl_shader_program *shProg, struct gl_program *prog) { GLuint *map, i, firstVarying, newFile; GLbitfield *inOutFlags; @@ -156,8 +157,12 @@ link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) var->Flags); } + if (shProg->Varying->NumParameters > ctx->Const.MaxVarying) { + link_error(shProg, "Too many varying variables"); + return GL_FALSE; + } + /* Map varying[i] to varying[j]. - * Plus, set prog->Input/OutputFlags[] as described above. * Note: the loop here takes care of arrays or large (sz>4) vars. */ { @@ -319,7 +324,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, const struct gl_program *origProg, struct gl_program *linkedProg) { - GLint attribMap[MAX_VERTEX_ATTRIBS]; + GLint attribMap[MAX_VERTEX_GENERIC_ATTRIBS]; GLuint i, j; GLbitfield usedAttributes; /* generics only, not legacy attributes */ @@ -355,7 +360,7 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, } /* initialize the generic attribute map entries to -1 */ - for (i = 0; i < MAX_VERTEX_ATTRIBS; i++) { + for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { attribMap[i] = -1; } @@ -396,11 +401,11 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, * Start at 1 since generic attribute 0 always aliases * glVertex/position. */ - for (attr = 0; attr < MAX_VERTEX_ATTRIBS; attr++) { + for (attr = 0; attr < MAX_VERTEX_GENERIC_ATTRIBS; attr++) { if (((1 << attr) & usedAttributes) == 0) break; } - if (attr == MAX_VERTEX_ATTRIBS) { + if (attr == MAX_VERTEX_GENERIC_ATTRIBS) { link_error(shProg, "Too many vertex attributes"); return GL_FALSE; } @@ -712,6 +717,8 @@ _slang_link(GLcontext *ctx, struct gl_vertex_program *linked_vprog = vertex_program(_mesa_clone_program(ctx, &vertProg->Base)); shProg->VertexProgram = linked_vprog; /* refcount OK */ + /* vertex program ID not significant; just set Id for debugging purposes */ + shProg->VertexProgram->Base.Id = shProg->Name; ASSERT(shProg->VertexProgram->Base.RefCount == 1); } @@ -720,16 +727,18 @@ _slang_link(GLcontext *ctx, struct gl_fragment_program *linked_fprog = fragment_program(_mesa_clone_program(ctx, &fragProg->Base)); shProg->FragmentProgram = linked_fprog; /* refcount OK */ + /* vertex program ID not significant; just set Id for debugging purposes */ + shProg->FragmentProgram->Base.Id = shProg->Name; ASSERT(shProg->FragmentProgram->Base.RefCount == 1); } /* link varying vars */ if (shProg->VertexProgram) { - if (!link_varying_vars(shProg, &shProg->VertexProgram->Base)) + if (!link_varying_vars(ctx, shProg, &shProg->VertexProgram->Base)) return; } if (shProg->FragmentProgram) { - if (!link_varying_vars(shProg, &shProg->FragmentProgram->Base)) + if (!link_varying_vars(ctx, shProg, &shProg->FragmentProgram->Base)) return; } diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 3ba7b269285..5d4d8eee02f 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -24,11 +24,12 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - * Brian Paul - */ + +/* + * Authors: + * Keith Whitwell <[email protected]> + * Brian Paul + */ #include "main/imports.h" #include "shader/prog_parameter.h" @@ -44,19 +45,21 @@ #include "st_program.h" #include "st_inlines.h" + /** * Pass the given program parameters to the graphics pipe as a * constant buffer. - * \param id either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT + * \param shader_type either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT */ void st_upload_constants( struct st_context *st, struct gl_program_parameter_list *params, - unsigned id) + unsigned shader_type) { struct pipe_context *pipe = st->pipe; - struct pipe_constant_buffer *cbuf = &st->state.constants[id]; + struct pipe_constant_buffer *cbuf = &st->state.constants[shader_type]; - assert(id == PIPE_SHADER_VERTEX || id == PIPE_SHADER_FRAGMENT); + assert(shader_type == PIPE_SHADER_VERTEX || + shader_type == PIPE_SHADER_FRAGMENT); /* update constants */ if (params && params->NumParameters) { @@ -68,13 +71,14 @@ void st_upload_constants( struct st_context *st, * avoid gratuitous rendering synchronization. */ pipe_buffer_reference(&cbuf->buffer, NULL ); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, + cbuf->buffer = pipe_buffer_create(pipe->screen, 16, + PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); - if (0) - { - printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", - __FUNCTION__, id, params->NumParameters, params->StateFlags); + if (0) { + debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", + __FUNCTION__, shader_type, params->NumParameters, + params->StateFlags); _mesa_print_parameter_list(params); } @@ -84,15 +88,16 @@ void st_upload_constants( struct st_context *st, 0, paramBytes, params->ParameterValues); - st->pipe->set_constant_buffer(st->pipe, id, 0, cbuf); + st->pipe->set_constant_buffer(st->pipe, shader_type, 0, cbuf); } else { - st->constants.tracked_state[id].dirty.mesa = 0; - // st->pipe->set_constant_buffer(st->pipe, id, 0, NULL); + st->constants.tracked_state[shader_type].dirty.mesa = 0x0; } } -/* Vertex shader: + +/** + * Vertex shader: */ static void update_vs_constants(struct st_context *st ) { @@ -102,6 +107,7 @@ static void update_vs_constants(struct st_context *st ) st_upload_constants( st, params, PIPE_SHADER_VERTEX ); } + const struct st_tracked_state st_update_vs_constants = { "st_update_vs_constants", /* name */ { /* dirty */ @@ -111,7 +117,10 @@ const struct st_tracked_state st_update_vs_constants = { update_vs_constants /* update */ }; -/* Fragment shader: + + +/** + * Fragment shader: */ static void update_fs_constants(struct st_context *st ) { @@ -121,6 +130,7 @@ static void update_fs_constants(struct st_context *st ) st_upload_constants( st, params, PIPE_SHADER_FRAGMENT ); } + const struct st_tracked_state st_update_fs_constants = { "st_update_fs_constants", /* name */ { /* dirty */ diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 536293683e9..4d897b677e0 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -122,6 +122,7 @@ update_framebuffer_state( struct st_context *st ) strb->surface); framebuffer->nr_cbufs++; } + strb->defined = GL_TRUE; /* we'll be drawing something */ } } for (i = framebuffer->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; i++) { diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index a94e11fff12..f5d802055f5 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "main/arrayobj.h" #include "main/bufferobj.h" #include "st_inlines.h" @@ -307,4 +308,8 @@ st_init_bufferobject_functions(struct dd_function_table *functions) functions->MapBufferRange = st_bufferobj_map_range; functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; functions->UnmapBuffer = st_bufferobj_unmap; + + /* For GL_APPLE_vertex_array_object */ + functions->NewArrayObject = _mesa_new_array_object; + functions->DeleteArrayObject = _mesa_delete_array_object; } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index e003b6db5cf..c249f3b3578 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -125,6 +125,8 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, strb->Base.Height = height; init_renderbuffer_bits(strb, template.format); + strb->defined = GL_FALSE; /* undefined contents now */ + /* Probably need dedicated flags for surface usage too: */ surface_usage = (PIPE_BUFFER_USAGE_GPU_READ | @@ -462,6 +464,134 @@ st_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) } +/** + * Copy back color buffer to front color buffer. + */ +static void +copy_back_to_front(struct st_context *st, + struct gl_framebuffer *fb, + gl_buffer_index frontIndex, + gl_buffer_index backIndex) + +{ + struct st_framebuffer *stfb = (struct st_framebuffer *) fb; + struct pipe_surface *surf_front, *surf_back; + + (void) st_get_framebuffer_surface(stfb, frontIndex, &surf_front); + (void) st_get_framebuffer_surface(stfb, backIndex, &surf_back); + + if (surf_front && surf_back) { + st->pipe->surface_copy(st->pipe, + surf_front, 0, 0, /* dest */ + surf_back, 0, 0, /* src */ + fb->Width, fb->Height); + } +} + + +/** + * Check if we're drawing into, or read from, a front color buffer. If the + * front buffer is missing, create it now. + * + * The back color buffer must exist since we'll use its format/samples info + * for creating the front buffer. + * + * \param frontIndex either BUFFER_FRONT_LEFT or BUFFER_FRONT_RIGHT + * \param backIndex either BUFFER_BACK_LEFT or BUFFER_BACK_RIGHT + */ +static void +check_create_front_buffer(GLcontext *ctx, struct gl_framebuffer *fb, + gl_buffer_index frontIndex, + gl_buffer_index backIndex) +{ + if (fb->Attachment[frontIndex].Renderbuffer == NULL) { + GLboolean create = GL_FALSE; + + /* check if drawing to or reading from front buffer */ + if (fb->_ColorReadBufferIndex == frontIndex) { + create = GL_TRUE; + } + else { + GLuint b; + for (b = 0; b < fb->_NumColorDrawBuffers; b++) { + if (fb->_ColorDrawBufferIndexes[b] == frontIndex) { + create = GL_TRUE; + break; + } + } + } + + if (create) { + struct st_renderbuffer *back; + struct gl_renderbuffer *front; + enum pipe_format colorFormat; + uint samples; + + if (0) + _mesa_debug(ctx, "Allocate new front buffer\n"); + + /* get back renderbuffer info */ + back = st_renderbuffer(fb->Attachment[backIndex].Renderbuffer); + colorFormat = back->format; + samples = back->Base.NumSamples; + + /* create front renderbuffer */ + front = st_new_renderbuffer_fb(colorFormat, samples); + _mesa_add_renderbuffer(fb, frontIndex, front); + + /* alloc texture/surface for new front buffer */ + front->AllocStorage(ctx, front, front->InternalFormat, + fb->Width, fb->Height); + + /* initialize the front color buffer contents by copying + * the back buffer. + */ + copy_back_to_front(ctx->st, fb, frontIndex, backIndex); + } + } +} + + +/** + * If front left/right color buffers are missing, create them now. + */ +static void +check_create_front_buffers(GLcontext *ctx, struct gl_framebuffer *fb) +{ + /* check if we need to create the front left buffer now */ + check_create_front_buffer(ctx, fb, BUFFER_FRONT_LEFT, BUFFER_BACK_LEFT); + + if (fb->Visual.stereoMode) { + check_create_front_buffer(ctx, fb, BUFFER_FRONT_RIGHT, BUFFER_BACK_RIGHT); + } + + st_invalidate_state(ctx, _NEW_BUFFERS); +} + + +/** + * Called via glDrawBuffer. + */ +static void +st_DrawBuffers(GLcontext *ctx, GLsizei count, const GLenum *buffers) +{ + (void) count; + (void) buffers; + check_create_front_buffers(ctx, ctx->DrawBuffer); +} + + +/** + * Called via glReadBuffer. + */ +static void +st_ReadBuffer(GLcontext *ctx, GLenum buffer) +{ + (void) buffer; + check_create_front_buffers(ctx, ctx->ReadBuffer); +} + + void st_init_fbo_functions(struct dd_function_table *functions) { functions->NewFramebuffer = st_new_framebuffer; @@ -474,4 +604,7 @@ void st_init_fbo_functions(struct dd_function_table *functions) /* no longer needed by core Mesa, drivers handle resizes... functions->ResizeBuffers = st_resize_buffers; */ + + functions->DrawBuffers = st_DrawBuffers; + functions->ReadBuffer = st_ReadBuffer; } diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h index 44fa9fe9a4f..fd77d0a95b0 100644 --- a/src/mesa/state_tracker/st_cb_fbo.h +++ b/src/mesa/state_tracker/st_cb_fbo.h @@ -44,6 +44,7 @@ struct st_renderbuffer struct pipe_texture *texture; struct pipe_surface *surface; /* temporary view into texture */ enum pipe_format format; /** preferred format, or PIPE_FORMAT_NONE */ + GLboolean defined; /**< defined contents? */ struct st_texture_object *rtt; /**< GL render to texture's texture */ int rtt_level, rtt_face, rtt_slice; diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index fbaffd154f9..8ceeeabcd37 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -47,10 +47,19 @@ #include "util/u_blit.h" +/** Check if we have a front color buffer and if it's been drawn to. */ static INLINE GLboolean is_front_buffer_dirty(struct st_context *st) { - return st->frontbuffer_status == FRONT_STATUS_DIRTY; + if (st->frontbuffer_status == FRONT_STATUS_DIRTY) { + return GL_TRUE; + } + else { + GLframebuffer *fb = st->ctx->DrawBuffer; + struct st_renderbuffer *strb + = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + return strb && strb->defined; + } } diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 7dd23527399..3bcccd0df46 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -194,9 +194,10 @@ new_draw_rastpos_stage(GLcontext *ctx, struct draw_context *draw) rs->stage.destroy = rastpos_destroy; rs->ctx = ctx; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { + for (i = 0; i < Elements(rs->array); i++) { rs->array[i].Size = 4; rs->array[i].Type = GL_FLOAT; + rs->array[i].Format = GL_RGBA; rs->array[i].Stride = 0; rs->array[i].StrideB = 0; rs->array[i].Ptr = (GLubyte *) ctx->Current.Attrib[i]; diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 6ffed56d9a0..18adb35e872 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -45,6 +45,7 @@ struct blit_state; struct bitmap_cache; +/** XXX we'd like to get rid of these */ #define FRONT_STATUS_UNDEFINED 0 #define FRONT_STATUS_DIRTY 1 #define FRONT_STATUS_COPY_OF_BACK 2 @@ -111,7 +112,7 @@ struct st_context struct gl_fragment_program *fragment_program; } cb; - GLuint frontbuffer_status; /**< one of FRONT_STATUS_ */ + GLuint frontbuffer_status; /**< one of FRONT_STATUS_ (XXX to be removed) */ char vendor[100]; char renderer[100]; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 225541a30ba..8e036223c65 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -159,12 +159,21 @@ static GLuint fixed_types[4] = { * Return a PIPE_FORMAT_x for the given GL datatype and size. */ GLuint -st_pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized) +st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, + GLboolean normalized) { assert((type >= GL_BYTE && type <= GL_DOUBLE) || type == GL_FIXED); assert(size >= 1); assert(size <= 4); + assert(format == GL_RGBA || format == GL_BGRA); + + if (format == GL_BGRA) { + /* this is an odd-ball case */ + assert(type == GL_UNSIGNED_BYTE); + assert(normalized); + return PIPE_FORMAT_B8G8R8A8_UNORM; + } if (normalized) { switch (type) { @@ -392,6 +401,7 @@ setup_interleaved_attribs(GLcontext *ctx, velements[attr].src_format = st_pipe_vertex_format(arrays[mesaAttr]->Type, arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, arrays[mesaAttr]->Normalized); assert(velements[attr].src_format); } @@ -479,6 +489,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, velements[attr].src_format = st_pipe_vertex_format(arrays[mesaAttr]->Type, arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, arrays[mesaAttr]->Normalized); assert(velements[attr].src_format); } diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index da04fce8e24..dcfe7e15361 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -62,7 +62,8 @@ st_feedback_draw_vbo(GLcontext *ctx, /* Internal function: */ extern GLuint -st_pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized); +st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, + GLboolean normalized); /** diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 32502a9cda4..2712c131c0d 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -178,6 +178,7 @@ st_feedback_draw_vbo(GLcontext *ctx, velements[attr].src_format = st_pipe_vertex_format(arrays[mesaAttr]->Type, arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, arrays[mesaAttr]->Normalized); assert(velements[attr].src_format); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 8f6be507742..efa88c2481f 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -167,6 +167,9 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_texture_env_combine = GL_TRUE; ctx->Extensions.EXT_texture_env_dot3 = GL_TRUE; ctx->Extensions.EXT_texture_lod_bias = GL_TRUE; + ctx->Extensions.EXT_vertex_array_bgra = GL_TRUE; + + ctx->Extensions.APPLE_vertex_array_object = GL_TRUE; ctx->Extensions.NV_blend_square = GL_TRUE; ctx->Extensions.NV_texgen_reflection = GL_TRUE; diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index daaad65ccaf..ef800291ccd 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -58,19 +58,19 @@ st_create_framebuffer( const __GLcontextModes *visual, _mesa_initialize_framebuffer(&stfb->Base, visual); - { - /* fake frontbuffer */ - /* XXX allocation should only happen in the unusual case - it's actually needed */ + if (visual->doubleBufferMode) { struct gl_renderbuffer *rb = st_new_renderbuffer_fb(colorFormat, samples); - _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_BACK_LEFT, rb); } - - if (visual->doubleBufferMode) { + else { + /* Only allocate front buffer right now if we're single buffered. + * If double-buffered, allocate front buffer on demand later. + * See check_create_front_buffers(). + */ struct gl_renderbuffer *rb = st_new_renderbuffer_fb(colorFormat, samples); - _mesa_add_renderbuffer(&stfb->Base, BUFFER_BACK_LEFT, rb); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb); } if (depthFormat == stencilFormat && depthFormat != PIPE_FORMAT_NONE) { @@ -293,6 +293,115 @@ st_notify_swapbuffers(struct st_framebuffer *stfb) } +/** + * Swap the front/back color buffers. Exchange the front/back pointers + * and update some derived state. + * No need to call st_notify_swapbuffers() first. + * + * For a single-buffered framebuffer, no swap occurs, but we still return + * the pointer(s) to the front color buffer(s). + * + * \param front_left returns pointer to front-left renderbuffer after swap + * \param front_right returns pointer to front-right renderbuffer after swap + */ +void +st_swapbuffers(struct st_framebuffer *stfb, + struct pipe_surface **front_left, + struct pipe_surface **front_right) +{ + struct gl_framebuffer *fb = &stfb->Base; + + GET_CURRENT_CONTEXT(ctx); + + if (ctx && ctx->DrawBuffer == &stfb->Base) { + st_flush( ctx->st, + PIPE_FLUSH_RENDER_CACHE | + PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_FRAME, + NULL ); + } + + if (!fb->Visual.doubleBufferMode) { + /* single buffer mode - return pointers to front surfaces */ + if (front_left) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + *front_left = strb->surface; + } + if (front_right) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer); + *front_right = strb ? strb->surface : NULL; + } + return; + } + + /* swap left buffers */ + if (fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer && + fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer) { + struct gl_renderbuffer *rbTemp; + rbTemp = fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer = + fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer = rbTemp; + if (front_left) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + *front_left = strb->surface; + } + /* mark back buffer contents as undefined */ + { + struct st_renderbuffer *back = + st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + back->defined = GL_FALSE; + } + } + else { + /* no front buffer, display the back buffer */ + if (front_left) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + *front_left = strb->surface; + } + } + + /* swap right buffers (for stereo) */ + if (fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer && + fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer) { + struct gl_renderbuffer *rbTemp; + rbTemp = fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer; + fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer = + fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer; + fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer = rbTemp; + if (front_right) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_RIGHT].Renderbuffer); + *front_right = strb->surface; + } + /* mark back buffer contents as undefined */ + { + struct st_renderbuffer *back = + st_renderbuffer(fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer); + back->defined = GL_FALSE; + } + } + else { + /* no front right buffer, display back right buffer (if exists) */ + if (front_right) { + struct st_renderbuffer *strb = + st_renderbuffer(fb->Attachment[BUFFER_BACK_RIGHT].Renderbuffer); + *front_right = strb ? strb->surface : NULL; + } + } + + /* Update the _ColorDrawBuffers[] array and _ColorReadBuffer pointer */ + _mesa_update_framebuffer(ctx); + + /* Make sure we draw into the new back surface */ + st_invalidate_state(ctx, _NEW_BUFFERS); +} + + void *st_framebuffer_private( struct st_framebuffer *stfb ) { return stfb->Private; diff --git a/src/mesa/state_tracker/st_public.h b/src/mesa/state_tracker/st_public.h index 290b8a974e6..174fbc63941 100644 --- a/src/mesa/state_tracker/st_public.h +++ b/src/mesa/state_tracker/st_public.h @@ -103,6 +103,10 @@ void st_finish( struct st_context *st ); void st_notify_swapbuffers(struct st_framebuffer *stfb); +void st_swapbuffers(struct st_framebuffer *stfb, + struct pipe_surface **front_left, + struct pipe_surface **front_right); + int st_set_teximage(struct pipe_texture *pt, int target); /** Redirect rendering into stfb's surface to a texture image */ diff --git a/src/mesa/swrast/s_imaging.c b/src/mesa/swrast/s_imaging.c index d6be3aa022e..3578b713f61 100644 --- a/src/mesa/swrast/s_imaging.c +++ b/src/mesa/swrast/s_imaging.c @@ -60,7 +60,7 @@ _swrast_CopyColorTable( GLcontext *ctx, /* save PBO binding */ bufferSave = ctx->Unpack.BufferObj; - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj; _mesa_ColorTable(target, internalformat, width, GL_RGBA, CHAN_TYPE, data); @@ -94,7 +94,7 @@ _swrast_CopyColorSubTable( GLcontext *ctx,GLenum target, GLsizei start, /* save PBO binding */ bufferSave = ctx->Unpack.BufferObj; - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj; _mesa_ColorSubTable(target, start, width, GL_RGBA, CHAN_TYPE, data); @@ -126,7 +126,7 @@ _swrast_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target, /* save PBO binding */ bufferSave = ctx->Unpack.BufferObj; - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj; /* store as convolution filter */ _mesa_ConvolutionFilter1D(target, internalFormat, width, @@ -178,12 +178,12 @@ _swrast_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target, ctx->Unpack.SkipImages = 0; ctx->Unpack.SwapBytes = GL_FALSE; ctx->Unpack.LsbFirst = GL_FALSE; - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj; ctx->NewState |= _NEW_PACKUNPACK; /* save PBO binding */ bufferSave = ctx->Unpack.BufferObj; - ctx->Unpack.BufferObj = ctx->Array.NullBufferObj; + ctx->Unpack.BufferObj = ctx->Shared->NullBufferObj; _mesa_ConvolutionFilter2D(target, internalFormat, width, height, GL_RGBA, CHAN_TYPE, rgba); diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c index 31bfb5c9520..0067d3eeb73 100644 --- a/src/mesa/swrast/s_texfilter.c +++ b/src/mesa/swrast/s_texfilter.c @@ -1329,7 +1329,7 @@ static void opt_sample_rgb_2d(GLcontext *ctx, const struct gl_texture_object *tObj, GLuint n, const GLfloat texcoords[][4], - const GLfloat lambda[], GLchan rgba[][4]) + const GLfloat lambda[], GLfloat rgba[][4]) { const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel]; const GLfloat width = (GLfloat) img->Width; @@ -1351,9 +1351,9 @@ opt_sample_rgb_2d(GLcontext *ctx, GLint j = IFLOOR(texcoords[k][1] * height) & rowMask; GLint pos = (j << shift) | i; GLchan *texel = ((GLchan *) img->Data) + 3*pos; - rgba[k][RCOMP] = texel[0]; - rgba[k][GCOMP] = texel[1]; - rgba[k][BCOMP] = texel[2]; + rgba[k][RCOMP] = CHAN_TO_FLOAT(texel[0]); + rgba[k][GCOMP] = CHAN_TO_FLOAT(texel[1]); + rgba[k][BCOMP] = CHAN_TO_FLOAT(texel[2]); } } @@ -1370,7 +1370,7 @@ static void opt_sample_rgba_2d(GLcontext *ctx, const struct gl_texture_object *tObj, GLuint n, const GLfloat texcoords[][4], - const GLfloat lambda[], GLchan rgba[][4]) + const GLfloat lambda[], GLfloat rgba[][4]) { const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel]; const GLfloat width = (GLfloat) img->Width; @@ -1392,7 +1392,10 @@ opt_sample_rgba_2d(GLcontext *ctx, const GLint row = IFLOOR(texcoords[i][1] * height) & rowMask; const GLint pos = (row << shift) | col; const GLchan *texel = ((GLchan *) img->Data) + (pos << 2); /* pos*4 */ - COPY_4V(rgba[i], texel); + rgba[i][RCOMP] = CHAN_TO_FLOAT(texel[0]); + rgba[i][GCOMP] = CHAN_TO_FLOAT(texel[1]); + rgba[i][BCOMP] = CHAN_TO_FLOAT(texel[2]); + rgba[i][ACOMP] = CHAN_TO_FLOAT(texel[3]); } } @@ -1425,7 +1428,6 @@ sample_lambda_2d(GLcontext *ctx, case GL_NEAREST: if (repeatNoBorderPOT) { switch (tImg->TexFormat->MesaFormat) { -#if 0 case MESA_FORMAT_RGB: opt_sample_rgb_2d(ctx, tObj, m, texcoords + minStart, NULL, rgba + minStart); @@ -1434,7 +1436,6 @@ sample_lambda_2d(GLcontext *ctx, opt_sample_rgba_2d(ctx, tObj, m, texcoords + minStart, NULL, rgba + minStart); break; -#endif default: sample_nearest_2d(ctx, tObj, m, texcoords + minStart, NULL, rgba + minStart ); @@ -1484,7 +1485,6 @@ sample_lambda_2d(GLcontext *ctx, case GL_NEAREST: if (repeatNoBorderPOT) { switch (tImg->TexFormat->MesaFormat) { -#if 0 case MESA_FORMAT_RGB: opt_sample_rgb_2d(ctx, tObj, m, texcoords + magStart, NULL, rgba + magStart); @@ -1493,7 +1493,6 @@ sample_lambda_2d(GLcontext *ctx, opt_sample_rgba_2d(ctx, tObj, m, texcoords + magStart, NULL, rgba + magStart); break; -#endif default: sample_nearest_2d(ctx, tObj, m, texcoords + magStart, NULL, rgba + magStart ); @@ -3180,7 +3179,6 @@ _swrast_choose_texture_sample_func( GLcontext *ctx, } else { /* check for a few optimized cases */ -#if 0 const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; ASSERT(t->MinFilter == GL_NEAREST); if (t->WrapS == GL_REPEAT && @@ -3197,10 +3195,6 @@ _swrast_choose_texture_sample_func( GLcontext *ctx, img->TexFormat->MesaFormat == MESA_FORMAT_RGBA) { return &opt_sample_rgba_2d; } -#else - if (0) - ; -#endif else { return &sample_nearest_2d; } diff --git a/src/mesa/tnl/t_vb_cliptmp.h b/src/mesa/tnl/t_vb_cliptmp.h index 788fe329ed8..61b0a89554c 100644 --- a/src/mesa/tnl/t_vb_cliptmp.h +++ b/src/mesa/tnl/t_vb_cliptmp.h @@ -127,7 +127,7 @@ TAG(clip_line)( GLcontext *ctx, GLuint v0, GLuint v1, GLubyte mask ) GLuint p; const GLuint v0_orig = v0; - if (mask & 0x3f) { + if (mask & CLIP_FRUSTUM_BITS) { LINE_CLIP( CLIP_RIGHT_BIT, -1, 0, 0, 1 ); LINE_CLIP( CLIP_LEFT_BIT, 1, 0, 0, 1 ); LINE_CLIP( CLIP_TOP_BIT, 0, -1, 0, 1 ); @@ -199,7 +199,7 @@ TAG(clip_tri)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLubyte mask ) ASSIGN_3V(inlist, v2, v0, v1 ); /* pv rotated to slot zero */ - if (mask & 0x3f) { + if (mask & CLIP_FRUSTUM_BITS) { POLY_CLIP( CLIP_RIGHT_BIT, -1, 0, 0, 1 ); POLY_CLIP( CLIP_LEFT_BIT, 1, 0, 0, 1 ); POLY_CLIP( CLIP_TOP_BIT, 0, -1, 0, 1 ); @@ -227,6 +227,25 @@ TAG(clip_tri)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLubyte mask ) } } + if (0) { + /* print pre/post-clip vertex coords */ + GLuint i, j; + _mesa_printf("pre clip\n"); + for (i = 0; i < 3; i++) { + j = outlist[i]; + _mesa_printf(" %u: %u: %f, %f, %f, %f\n", + i, j, + coord[j][0], coord[j][1], coord[j][2], coord[j][3]); + } + _mesa_printf("post clip\n"); + for (i = 0; i < n; i++) { + j = inlist[i]; + _mesa_printf(" %u: %u: %f, %f, %f, %f\n", + i, j, + coord[j][0], coord[j][1], coord[j][2], coord[j][3]); + } + } + tnl->Driver.Render.ClippedPolygon( ctx, inlist, n ); } @@ -250,7 +269,7 @@ TAG(clip_quad)( GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3, ASSIGN_4V(inlist, v3, v0, v1, v2 ); /* pv rotated to slot zero */ - if (mask & 0x3f) { + if (mask & CLIP_FRUSTUM_BITS) { POLY_CLIP( CLIP_RIGHT_BIT, -1, 0, 0, 1 ); POLY_CLIP( CLIP_LEFT_BIT, 1, 0, 0, 1 ); POLY_CLIP( CLIP_TOP_BIT, 0, -1, 0, 1 ); diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 1795f62c323..c35eaf1538e 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -207,7 +207,7 @@ init_machine(GLcontext *ctx, struct gl_program_machine *machine) { /* Input registers get initialized from the current vertex attribs */ MEMCPY(machine->VertAttribs, ctx->Current.Attrib, - MAX_VERTEX_PROGRAM_ATTRIBS * 4 * sizeof(GLfloat)); + MAX_VERTEX_GENERIC_ATTRIBS * 4 * sizeof(GLfloat)); if (ctx->VertexProgram._Current->IsNVProgram) { GLuint i; diff --git a/src/mesa/vbo/vbo_attrib_tmp.h b/src/mesa/vbo/vbo_attrib_tmp.h index ff11c7d59a7..7a889b8e2f8 100644 --- a/src/mesa/vbo/vbo_attrib_tmp.h +++ b/src/mesa/vbo/vbo_attrib_tmp.h @@ -265,7 +265,7 @@ static void GLAPIENTRY TAG(VertexAttrib1fARB)( GLuint index, GLfloat x ) GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR1F(0, x); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR1F(VBO_ATTRIB_GENERIC0 + index, x); else ERROR(); @@ -277,7 +277,7 @@ static void GLAPIENTRY TAG(VertexAttrib1fvARB)( GLuint index, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR1FV(0, v); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR1FV(VBO_ATTRIB_GENERIC0 + index, v); else ERROR(); @@ -289,7 +289,7 @@ static void GLAPIENTRY TAG(VertexAttrib2fARB)( GLuint index, GLfloat x, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR2F(0, x, y); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR2F(VBO_ATTRIB_GENERIC0 + index, x, y); else ERROR(); @@ -301,7 +301,7 @@ static void GLAPIENTRY TAG(VertexAttrib2fvARB)( GLuint index, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR2FV(0, v); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR2FV(VBO_ATTRIB_GENERIC0 + index, v); else ERROR(); @@ -313,7 +313,7 @@ static void GLAPIENTRY TAG(VertexAttrib3fARB)( GLuint index, GLfloat x, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR3F(0, x, y, z); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR3F(VBO_ATTRIB_GENERIC0 + index, x, y, z); else ERROR(); @@ -325,7 +325,7 @@ static void GLAPIENTRY TAG(VertexAttrib3fvARB)( GLuint index, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR3FV(0, v); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR3FV(VBO_ATTRIB_GENERIC0 + index, v); else ERROR(); @@ -338,7 +338,7 @@ static void GLAPIENTRY TAG(VertexAttrib4fARB)( GLuint index, GLfloat x, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR4F(0, x, y, z, w); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR4F(VBO_ATTRIB_GENERIC0 + index, x, y, z, w); else ERROR(); @@ -350,7 +350,7 @@ static void GLAPIENTRY TAG(VertexAttrib4fvARB)( GLuint index, GET_CURRENT_CONTEXT( ctx ); if (index == 0) ATTR4FV(0, v); - else if (index < MAX_VERTEX_ATTRIBS) + else if (index < MAX_VERTEX_GENERIC_ATTRIBS) ATTR4FV(VBO_ATTRIB_GENERIC0 + index, v); else ERROR(); diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index ca8190fd059..90025f62fc9 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/mtypes.h" #include "main/api_arrayelt.h" +#include "main/bufferobj.h" #include "math/m_eval.h" #include "vbo.h" #include "vbo_context.h" @@ -81,7 +82,8 @@ static void init_legacy_currval(GLcontext *ctx) cl->Type = GL_FLOAT; cl->Format = GL_RGBA; cl->Ptr = (const void *)ctx->Current.Attrib[i]; - cl->BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &cl->BufferObj, + ctx->Shared->NullBufferObj); } } @@ -106,7 +108,8 @@ static void init_generic_currval(GLcontext *ctx) cl->Stride = 0; cl->StrideB = 0; cl->Enabled = 1; - cl->BufferObj = ctx->Array.NullBufferObj; + _mesa_reference_buffer_object(ctx, &cl->BufferObj, + ctx->Shared->NullBufferObj); } } @@ -150,7 +153,7 @@ static void init_mat_currval(GLcontext *ctx) cl->Stride = 0; cl->StrideB = 0; cl->Enabled = 1; - cl->BufferObj = ctx->Array.NullBufferObj; + cl->BufferObj = ctx->Shared->NullBufferObj; } } @@ -211,7 +214,7 @@ GLboolean _vbo_CreateContext( GLcontext *ctx ) for (i = 0; i < 4; i++) vbo->map_vp_none[28+i] = i; - for (i = 0; i < VERT_ATTRIB_MAX; i++) + for (i = 0; i < Elements(vbo->map_vp_arb); i++) vbo->map_vp_arb[i] = i; } diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index bf405eb6933..8b726dc8ac5 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -68,8 +68,8 @@ struct vbo_context { struct gl_client_array *generic_currval; struct gl_client_array *mat_currval; - GLuint map_vp_none[32]; - GLuint map_vp_arb[32]; + GLuint map_vp_none[VERT_ATTRIB_MAX]; + GLuint map_vp_arb[VERT_ATTRIB_MAX]; GLfloat *current[VBO_ATTRIB_MAX]; /* points into ctx->Current, ctx->Light.Material */ GLfloat CurrentFloatEdgeFlag; @@ -92,16 +92,18 @@ static INLINE struct vbo_context *vbo_context(GLcontext *ctx) return (struct vbo_context *)(ctx->swtnl_im); } -enum { - VP_NONE = 1, - VP_NV, - VP_ARB -}; -static INLINE GLuint get_program_mode( GLcontext *ctx ) +/** + * Return VP_x token to indicate whether we're running fixed-function + * vertex transformation, an NV vertex program or ARB vertex program/shader. + */ +static INLINE enum vp_mode +get_program_mode( GLcontext *ctx ) { if (!ctx->VertexProgram._Current) return VP_NONE; + else if (ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) + return VP_NONE; else if (ctx->VertexProgram._Current->IsNVProgram) return VP_NV; else diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index 100bb8a5dec..e0f44892cff 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -48,6 +48,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define ERROR_ATTRIB 16 +/** Current vertex program mode */ +enum vp_mode { + VP_NONE, /**< fixed function */ + VP_NV, /**< NV vertex program */ + VP_ARB /**< ARB vertex program or GLSL vertex shader */ +}; struct vbo_exec_eval1_map { @@ -103,7 +109,7 @@ struct vbo_exec_context * values are squashed down to the 32 attributes passed to the * vertex program below: */ - GLuint program_mode; + enum vp_mode program_mode; GLuint enabled_flags; const struct gl_client_array *inputs[VERT_ATTRIB_MAX]; } vtx; @@ -116,7 +122,7 @@ struct vbo_exec_context } eval; struct { - GLuint program_mode; + enum vp_mode program_mode; GLuint enabled_flags; GLuint array_obj; diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 5d35ec9c111..6871ee5cab1 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -671,7 +671,7 @@ void vbo_use_buffer_objects(GLcontext *ctx) GLsizei size = VBO_VERT_BUFFER_SIZE; /* Make sure this func is only used once */ - assert(exec->vtx.bufferobj == ctx->Array.NullBufferObj); + assert(exec->vtx.bufferobj == ctx->Shared->NullBufferObj); if (exec->vtx.buffer_map) { _mesa_align_free(exec->vtx.buffer_map); exec->vtx.buffer_map = NULL; @@ -697,7 +697,7 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec ) */ _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, - ctx->Array.NullBufferObj); + ctx->Shared->NullBufferObj); ASSERT(!exec->vtx.buffer_map); exec->vtx.buffer_map = (GLfloat *)ALIGN_MALLOC(VBO_VERT_BUFFER_SIZE, 64); diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 65fe197a4d7..2815ba3cc92 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -36,7 +36,8 @@ #include "vbo_context.h" -/* Compute min and max elements for drawelements calls. +/** + * Compute min and max elements for glDraw[Range]Elements() calls. */ static void get_minmax_index( GLuint count, GLuint type, const GLvoid *indices, @@ -89,7 +90,8 @@ static void get_minmax_index( GLuint count, GLuint type, } -/* Just translate the arrayobj into a sane layout. +/** + * Just translate the arrayobj into a sane layout. */ static void bind_array_obj( GLcontext *ctx ) { @@ -103,7 +105,7 @@ static void bind_array_obj( GLcontext *ctx ) * go away. */ exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex; - exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &vbo->legacy_currval[VERT_ATTRIB_WEIGHT]; + exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight; exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal; exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color; exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor; @@ -115,11 +117,10 @@ static void bind_array_obj( GLcontext *ctx ) } exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag; - for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) + for (i = 0; i < Elements(arrayObj->TexCoord); i++) exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i]; - for (i = 0; i < MAX_VERTEX_ATTRIBS; i++) { - assert(i < Elements(arrayObj->VertexAttrib)); + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { assert(i < Elements(exec->array.generic_array)); exec->array.generic_array[i] = &arrayObj->VertexAttrib[i]; } @@ -127,6 +128,7 @@ static void bind_array_obj( GLcontext *ctx ) exec->array.array_obj = arrayObj->Name; } + static void recalculate_input_bindings( GLcontext *ctx ) { struct vbo_context *vbo = vbo_context(ctx); @@ -140,9 +142,10 @@ static void recalculate_input_bindings( GLcontext *ctx ) switch (exec->array.program_mode) { case VP_NONE: - /* When no vertex program is active, we put the material values - * into the generic slots. This is the only situation where - * material values are available as per-vertex attributes. + /* When no vertex program is active (or the vertex program is generated + * from fixed-function state). We put the material values into the + * generic slots. This is the only situation where material values + * are available as per-vertex attributes. */ for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) @@ -165,8 +168,8 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); } - break; + case VP_NV: /* NV_vertex_program - attribute arrays alias and override * conventional, legacy arrays. No materials, and the generic @@ -190,11 +193,11 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; const_inputs |= 1 << i; } - break; + case VP_ARB: - /* ARB_vertex_program - Only the attribute zero (position) array - * aliases and overrides the legacy position array. + /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0] + * attribute array aliases and overrides the legacy position array. * * Otherwise, legacy attributes available in the legacy slots, * generic attributes in the generic slots and materials are not @@ -209,7 +212,6 @@ static void recalculate_input_bindings( GLcontext *ctx ) const_inputs |= 1 << 0; } - for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; @@ -219,7 +221,7 @@ static void recalculate_input_bindings( GLcontext *ctx ) } } - for (i = 0; i < MAX_VERTEX_ATTRIBS; i++) { + for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { if (exec->array.generic_array[i]->Enabled) inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; else { @@ -234,6 +236,7 @@ static void recalculate_input_bindings( GLcontext *ctx ) _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); } + static void bind_arrays( GLcontext *ctx ) { #if 0 @@ -281,7 +284,10 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) bind_arrays( ctx ); - /* Again... + /* Again... because we may have changed the bitmask of per-vertex varying + * attributes. If we regenerate the fixed-function vertex program now + * we may be able to prune down the number of vertex attributes which we + * need in the shader. */ if (ctx->NewState) _mesa_update_state( ctx ); @@ -295,7 +301,8 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) prim[0].count = count; prim[0].indexed = 0; - vbo->draw_prims( ctx, exec->array.inputs, prim, 1, NULL, start, start + count - 1 ); + vbo->draw_prims( ctx, exec->array.inputs, prim, 1, NULL, + start, start + count - 1 ); #if 0 { @@ -340,7 +347,6 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) } - static void GLAPIENTRY vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, @@ -355,6 +361,17 @@ vbo_exec_DrawRangeElements(GLenum mode, if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, type, indices )) return; + if (end >= ctx->Array.ArrayObj->_MaxElement) { + /* the max element is out of bounds of one or more enabled arrays */ + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " + "type 0x%x) index=%u is out of bounds (max=%u)", + start, end, count, type, end, + ctx->Array.ArrayObj->_MaxElement); + if (0) + _mesa_print_arrays(ctx); + return; + } + FLUSH_CURRENT( ctx, 0 ); if (ctx->NewState) @@ -418,6 +435,7 @@ vbo_exec_DrawRangeElements(GLenum mode, vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, start, end ); } + static void GLAPIENTRY vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) { @@ -457,9 +475,6 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *ind * Initialization */ - - - void vbo_exec_array_init( struct vbo_exec_context *exec ) { #if 1 diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index da2d849ded7..c939b7b6335 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -35,7 +35,8 @@ #include "vbo_context.h" -static void vbo_exec_debug_verts( struct vbo_exec_context *exec ) +static void +vbo_exec_debug_verts( struct vbo_exec_context *exec ) { GLuint count = exec->vtx.vert_count; GLuint i; @@ -64,19 +65,19 @@ static void vbo_exec_debug_verts( struct vbo_exec_context *exec ) * NOTE: Need to have calculated primitives by this point -- do it on the fly. * NOTE: Old 'parity' issue is gone. */ -static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) +static GLuint +vbo_copy_vertices( struct vbo_exec_context *exec ) { GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; GLuint ovf, i; GLuint sz = exec->vtx.vertex_size; GLfloat *dst = exec->vtx.copied.buffer; - GLfloat *src = (exec->vtx.buffer_map + - exec->vtx.prim[exec->vtx.prim_count-1].start * - exec->vtx.vertex_size); + const GLfloat *src = (exec->vtx.buffer_map + + exec->vtx.prim[exec->vtx.prim_count-1].start * + exec->vtx.vertex_size); - switch( exec->ctx->Driver.CurrentExecPrimitive ) - { + switch (exec->ctx->Driver.CurrentExecPrimitive) { case GL_POINTS: return 0; case GL_LINES: @@ -95,8 +96,9 @@ static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); return i; case GL_LINE_STRIP: - if (nr == 0) + if (nr == 0) { return 0; + } else { _mesa_memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) ); return 1; @@ -104,12 +106,14 @@ static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) case GL_LINE_LOOP: case GL_TRIANGLE_FAN: case GL_POLYGON: - if (nr == 0) + if (nr == 0) { return 0; + } else if (nr == 1) { _mesa_memcpy( dst, src+0, sz * sizeof(GLfloat) ); return 1; - } else { + } + else { _mesa_memcpy( dst, src+0, sz * sizeof(GLfloat) ); _mesa_memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) ); return 2; @@ -122,9 +126,15 @@ static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) /* fallthrough */ case GL_QUAD_STRIP: switch (nr) { - case 0: ovf = 0; break; - case 1: ovf = 1; break; - default: ovf = 2 + (nr&1); break; + case 0: + ovf = 0; + break; + case 1: + ovf = 1; + break; + default: + ovf = 2 + (nr & 1); + break; } for (i = 0 ; i < ovf ; i++) _mesa_memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); @@ -141,13 +151,14 @@ static GLuint vbo_copy_vertices( struct vbo_exec_context *exec ) /* TODO: populate these as the vertex is defined: */ -static void vbo_exec_bind_arrays( GLcontext *ctx ) +static void +vbo_exec_bind_arrays( GLcontext *ctx ) { struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; struct gl_client_array *arrays = exec->vtx.arrays; - GLuint count = exec->vtx.vert_count; - GLubyte *data = (GLubyte *)exec->vtx.buffer_map; + const GLuint count = exec->vtx.vert_count; + const GLubyte *data = (GLubyte *) exec->vtx.buffer_map; const GLuint *map; GLuint attr; GLbitfield varying_inputs = 0x0; @@ -227,7 +238,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr]._MaxElement = count; /* ??? */ data += exec->vtx.attrsz[src] * sizeof(GLfloat); - varying_inputs |= 1<<attr; + varying_inputs |= 1 << attr; } } @@ -235,18 +246,19 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) } -static void vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) +static void +vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) { GLenum target = GL_ARRAY_BUFFER_ARB; if (exec->vtx.bufferobj->Name) { GLcontext *ctx = exec->ctx; - if(ctx->Driver.FlushMappedBufferRange) { + if (ctx->Driver.FlushMappedBufferRange) { GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset; GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); - if(length) + if (length) ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, exec->vtx.bufferobj); @@ -265,7 +277,9 @@ static void vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) } } -void vbo_exec_vtx_map( struct vbo_exec_context *exec ) + +void +vbo_exec_vtx_map( struct vbo_exec_context *exec ) { GLcontext *ctx = exec->ctx; GLenum target = GL_ARRAY_BUFFER_ARB; @@ -282,8 +296,7 @@ void vbo_exec_vtx_map( struct vbo_exec_context *exec ) } if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && - ctx->Driver.MapBufferRange) - { + ctx->Driver.MapBufferRange) { exec->vtx.buffer_map = (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, @@ -305,12 +318,13 @@ void vbo_exec_vtx_map( struct vbo_exec_context *exec ) VBO_VERT_BUFFER_SIZE, NULL, usage, exec->vtx.bufferobj); - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); + exec->vtx.buffer_map = (GLfloat *) + ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); exec->vtx.buffer_ptr = exec->vtx.buffer_map; } - if (0) _mesa_printf("map %d..\n", exec->vtx.buffer_used); + if (0) + _mesa_printf("map %d..\n", exec->vtx.buffer_used); } @@ -318,13 +332,12 @@ void vbo_exec_vtx_map( struct vbo_exec_context *exec ) /** * Execute the buffer and save copied verts. */ -void vbo_exec_vtx_flush( struct vbo_exec_context *exec, - GLboolean unmap ) +void +vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ) { if (0) vbo_exec_debug_verts( exec ); - if (exec->vtx.prim_count && exec->vtx.vert_count) { @@ -345,8 +358,9 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, vbo_exec_vtx_unmap( exec ); } - if (0) _mesa_printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, - exec->vtx.vert_count); + if (0) + _mesa_printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, + exec->vtx.vert_count); vbo_context(ctx)->draw_prims( ctx, exec->vtx.inputs, @@ -360,7 +374,7 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, */ if (exec->vtx.bufferobj->Name && !unmap) { vbo_exec_vtx_map( exec ); - } + } } } @@ -372,14 +386,12 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec, vbo_exec_vtx_unmap( exec ); } - if (unmap) exec->vtx.max_vert = 0; else exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / (exec->vtx.vertex_size * sizeof(GLfloat))); - exec->vtx.buffer_ptr = exec->vtx.buffer_map; exec->vtx.prim_count = 0; exec->vtx.vert_count = 0; diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index dae778e741e..ea87dede646 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -161,7 +161,7 @@ void vbo_rebase_prims( GLcontext *ctx, GL_ELEMENT_ARRAY_BUFFER, ib->obj); - tmp_ib.obj = ctx->Array.NullBufferObj; + tmp_ib.obj = ctx->Shared->NullBufferObj; tmp_ib.ptr = tmp_indices; tmp_ib.count = ib->count; tmp_ib.type = ib->type; diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index 5fb66d3318f..2f6a1998eaa 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -31,6 +31,7 @@ #include "main/glheader.h" #include "main/imports.h" +#include "main/image.h" #include "main/macros.h" #include "main/enums.h" #include "main/mtypes.h" @@ -41,7 +42,8 @@ #define ELT_TABLE_SIZE 16 -/* Used for vertex-level splitting of indexed buffers. Note that +/** + * Used for vertex-level splitting of indexed buffers. Note that * non-indexed primitives may be converted to indexed in some cases * (eg loops, fans) in order to use this splitting path. */ @@ -73,23 +75,21 @@ struct copy_context { GLuint *translated_elt_buf; const GLuint *srcelt; - /* A baby hash table to avoid re-emitting (some) duplicate + /** A baby hash table to avoid re-emitting (some) duplicate * vertices when splitting indexed primitives. */ struct { GLuint in; GLuint out; } vert_cache[ELT_TABLE_SIZE]; - GLuint vertex_size; GLubyte *dstbuf; - GLubyte *dstptr; /* dstptr == dstbuf + dstelt_max * vertsize */ - GLuint dstbuf_size; /* in vertices */ - GLuint dstbuf_nr; /* count of emitted vertices, also the - * largest value in dstelt. Our - * MaxIndex. - */ + GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */ + GLuint dstbuf_size; /**< in vertices */ + GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value + * in dstelt. Our MaxIndex. + */ GLuint *dstelt; GLuint dstelt_nr; @@ -102,32 +102,19 @@ struct copy_context { }; -static GLuint type_size( GLenum type ) -{ - switch(type) { - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - case GL_SHORT: return sizeof(GLshort); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_INT: return sizeof(GLint); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_FLOAT: return sizeof(GLfloat); - case GL_DOUBLE: return sizeof(GLdouble); - default: return 0; - } -} - static GLuint attr_size( const struct gl_client_array *array ) { - return array->Size * type_size(array->Type); + return array->Size * _mesa_sizeof_type(array->Type); } -/* Starts returning true slightly before the buffer fills, to ensure +/** + * Starts returning true slightly before the buffer fills, to ensure * that there is sufficient room for any remaining vertices to finish * off the prim: */ -static GLboolean check_flush( struct copy_context *copy ) +static GLboolean +check_flush( struct copy_context *copy ) { GLenum mode = copy->dstprim[copy->dstprim_nr].mode; @@ -145,7 +132,9 @@ static GLboolean check_flush( struct copy_context *copy ) return GL_FALSE; } -static void flush( struct copy_context *copy ) + +static void +flush( struct copy_context *copy ) { GLuint i; @@ -175,8 +164,11 @@ static void flush( struct copy_context *copy ) } - -static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) +/** + * Called at begin of each primitive during replay. + */ +static void +begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) { struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; @@ -187,10 +179,12 @@ static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag } -/* Use a hashtable to attempt to identify recently-emitted vertices +/** + * Use a hashtable to attempt to identify recently-emitted vertices * and avoid re-emitting them. */ -static GLuint elt(struct copy_context *copy, GLuint elt_idx) +static GLuint +elt(struct copy_context *copy, GLuint elt_idx) { GLuint elt = copy->srcelt[elt_idx]; GLuint slot = elt & (ELT_TABLE_SIZE-1); @@ -222,7 +216,6 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx) _mesa_printf("%x ", f[j]); _mesa_printf("\n"); } - } copy->vert_cache[slot].in = elt; @@ -230,9 +223,8 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx) copy->dstptr += copy->vertex_size; assert(csr == copy->dstptr); - assert(copy->dstptr == (copy->dstbuf + - copy->dstbuf_nr * - copy->vertex_size)); + assert(copy->dstptr == (copy->dstbuf + + copy->dstbuf_nr * copy->vertex_size)); } /* else */ /* _mesa_printf(" --> reuse vertex\n"); */ @@ -242,7 +234,12 @@ static GLuint elt(struct copy_context *copy, GLuint elt_idx) return check_flush(copy); } -static void end( struct copy_context *copy, GLboolean end_flag ) + +/** + * Called at end of each primitive during replay. + */ +static void +end( struct copy_context *copy, GLboolean end_flag ) { struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; @@ -257,8 +254,8 @@ static void end( struct copy_context *copy, GLboolean end_flag ) } - -static void replay_elts( struct copy_context *copy ) +static void +replay_elts( struct copy_context *copy ) { GLuint i, j, k; GLboolean split; @@ -362,7 +359,8 @@ static void replay_elts( struct copy_context *copy ) } -static void replay_init( struct copy_context *copy ) +static void +replay_init( struct copy_context *copy ) { GLcontext *ctx = copy->ctx; GLuint i; @@ -388,10 +386,7 @@ static void replay_init( struct copy_context *copy ) copy->vertex_size += attr_size(copy->array[i]); if (vbo->Name && !vbo->Pointer) - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_WRITE_ONLY, /* XXX */ - vbo); + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo); copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer, copy->array[i]->Ptr); @@ -405,12 +400,11 @@ static void replay_init( struct copy_context *copy ) * do it internally. */ if (copy->ib->obj->Name && !copy->ib->obj->Pointer) - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, /* XXX */ - GL_WRITE_ONLY, /* XXX */ + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY, copy->ib->obj); - srcptr = (const GLubyte *)ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr); + srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer, + copy->ib->ptr); switch (copy->ib->type) { case GL_UNSIGNED_BYTE: @@ -434,7 +428,6 @@ static void replay_init( struct copy_context *copy ) copy->srcelt = (const GLuint *)srcptr; break; } - /* Figure out the maximum allowed vertex buffer size: */ @@ -449,8 +442,7 @@ static void replay_init( struct copy_context *copy ) * * XXX: This should be a VBO! */ - copy->dstbuf = _mesa_malloc(copy->dstbuf_size * - copy->vertex_size); + copy->dstbuf = _mesa_malloc(copy->dstbuf_size * copy->vertex_size); copy->dstptr = copy->dstbuf; /* Setup new vertex arrays to point into the output buffer: @@ -467,7 +459,7 @@ static void replay_init( struct copy_context *copy ) dst->Ptr = copy->dstbuf + offset; dst->Enabled = GL_TRUE; dst->Normalized = src->Normalized; - dst->BufferObj = ctx->Array.NullBufferObj; + dst->BufferObj = ctx->Shared->NullBufferObj; dst->_MaxElement = copy->dstbuf_size; /* may be less! */ offset += copy->varying[i].size; @@ -487,12 +479,16 @@ static void replay_init( struct copy_context *copy ) */ copy->dstib.count = 0; /* duplicates dstelt_nr */ copy->dstib.type = GL_UNSIGNED_INT; - copy->dstib.obj = ctx->Array.NullBufferObj; + copy->dstib.obj = ctx->Shared->NullBufferObj; copy->dstib.ptr = copy->dstelt; } -static void replay_finish( struct copy_context *copy ) +/** + * Free up everything allocated during split/replay. + */ +static void +replay_finish( struct copy_context *copy ) { GLcontext *ctx = copy->ctx; GLuint i; @@ -502,25 +498,26 @@ static void replay_finish( struct copy_context *copy ) _mesa_free(copy->translated_elt_buf); _mesa_free(copy->dstbuf); _mesa_free(copy->dstelt); - + /* Unmap VBO's */ for (i = 0; i < copy->nr_varying; i++) { struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj; - if (vbo->Name && vbo->Pointer) - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo); + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo); } /* Unmap index buffer: */ if (copy->ib->obj->Name && copy->ib->obj->Pointer) { - ctx->Driver.UnmapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, /* XXX */ - copy->ib->obj); + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj); } } + +/** + * Split VBO into smaller pieces, draw the pieces. + */ void vbo_split_copy( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, @@ -546,13 +543,11 @@ void vbo_split_copy( GLcontext *ctx, copy.draw = draw; copy.limits = limits; - /* Clear the vertex cache: */ for (i = 0; i < ELT_TABLE_SIZE; i++) copy.vert_cache[i].in = ~0; - replay_init(©); replay_elts(©); replay_finish(©); diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c index fbc856e93b0..3ed6b34fbf0 100644 --- a/src/mesa/vbo/vbo_split_inplace.c +++ b/src/mesa/vbo/vbo_split_inplace.c @@ -221,7 +221,7 @@ static void split_prims( struct split_context *split) ib.count = count; ib.type = GL_UNSIGNED_INT; - ib.obj = split->ctx->Array.NullBufferObj; + ib.obj = split->ctx->Shared->NullBufferObj; ib.ptr = elts; tmpprim = *prim; |