"""custom Custom builders and methods. """ # # Copyright 2008 VMware, Inc. # All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sub license, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice (including the # next paragraph) shall be included in all copies or substantial portions # of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. # IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR # ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # import os.path import sys import subprocess import modulefinder import SCons.Action import SCons.Builder import SCons.Scanner import fixes import source_list # the get_implicit_deps() method changed between 2.4 and 2.5: now it expects # a callable that takes a scanner as argument and returns a path, rather than # a path directly. We want to support both, so we need to detect the SCons version, # for which no API is provided by SCons 8-P # Scons version string has consistently been in this format: # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd] # so this formula should cover all versions regardless of type # stable, alpha or beta. # For simplicity alpha and beta flags are removed. scons_version = tuple(map(int, SCons.__version__.split('.')[:3])) def quietCommandLines(env): # Quiet command lines # See also http://www.scons.org/wiki/HidingCommandLinesInOutput env['ASCOMSTR'] = " Assembling $SOURCE ..." env['ASPPCOMSTR'] = " Assembling $SOURCE ..." env['CCCOMSTR'] = " Compiling $SOURCE ..." env['SHCCCOMSTR'] = " Compiling $SOURCE ..." env['CXXCOMSTR'] = " Compiling $SOURCE ..." env['SHCXXCOMSTR'] = " Compiling $SOURCE ..." env['ARCOMSTR'] = " Archiving $TARGET ..." env['RANLIBCOMSTR'] = " Indexing $TARGET ..." env['LINKCOMSTR'] = " Linking $TARGET ..." env['SHLINKCOMSTR'] = " Linking $TARGET ..." env['LDMODULECOMSTR'] = " Linking $TARGET ..." env['SWIGCOMSTR'] = " Generating $TARGET ..." env['LEXCOMSTR'] = " Generating $TARGET ..." env['YACCCOMSTR'] = " Generating $TARGET ..." env['CODEGENCOMSTR'] = " Generating $TARGET ..." env['INSTALLSTR'] = " Installing $TARGET ..." def createConvenienceLibBuilder(env): """This is a utility function that creates the ConvenienceLibrary Builder in an Environment if it is not there already. If it is already there, we return the existing one. Based on the stock StaticLibrary and SharedLibrary builders. """ try: convenience_lib = env['BUILDERS']['ConvenienceLibrary'] except KeyError: action_list = [ SCons.Action.Action("$ARCOM", "$ARCOMSTR") ] if env.Detect('ranlib'): ranlib_action = SCons.Action.Action("$RANLIBCOM", "$RANLIBCOMSTR") action_list.append(ranlib_action) convenience_lib = SCons.Builder.Builder(action = action_list, emitter = '$LIBEMITTER', prefix = '$LIBPREFIX', suffix = '$LIBSUFFIX', src_suffix = '$SHOBJSUFFIX', src_builder = 'SharedObject') env['BUILDERS']['ConvenienceLibrary'] = convenience_lib return convenience_lib def python_scan(node, env, path): # http://www.scons.org/doc/0.98.5/HTML/scons-user/c2781.html#AEN2789 # https://docs.python.org/2/library/modulefinder.html contents = node.get_contents() # Tell ModuleFinder to search dependencies in the script dir, and the glapi # dirs source_dir = node.get_dir().abspath GLAPI = env.Dir('#src/mapi/glapi/gen').abspath path = [source_dir, GLAPI] + sys.path finder = modulefinder.ModuleFinder(path=path) finder.run_script(node.abspath) results = [] for name, mod in finder.modules.items(): if mod.__file__ is None: continue assert os.path.exists(mod.__file__) results.append(env.File(mod.__file__)) return results python_scanner = SCons.Scanner.Scanner(function = python_scan, skeys = ['.py']) def code_generate(env, script, target, source, command): """Method to simplify code generation via python scripts. http://www.scons.org/wiki/UsingCodeGenerators http://www.scons.org/doc/0.98.5/HTML/scons-user/c2768.html """ # We're generating code using Python scripts, so we have to be # careful with our scons elements. This entry represents # the generator file *in the source directory*. script_src = env.File(script).srcnode() # This command creates generated code *in the build directory*. command = command.replace('$SCRIPT', script_src.path) action = SCons.Action.Action(command, "$CODEGENCOMSTR") code = env.Command(target, source, action) # Explicitly mark that the generated code depends on the generator, # and on implicitly imported python modules path = (script_src.get_dir(),) if scons_version < (2, 5, 0) else lambda x: script_src deps = [script_src] deps += script_src.get_implicit_deps(env, python_scanner, path) env.Depends(code, deps) # Running the Python script causes .pyc files to be generated in the # source directory. When we clean up, they should go too. So add side # effects for .pyc files for dep in deps: pyc = env.File(str(dep) + 'c') env.SideEffect(pyc, code) return code def createCodeGenerateMethod(env): env.Append(SCANNERS = python_scanner) env.AddMethod(code_generate, 'CodeGenerate') def _pkg_check_modules(env, name, modules): '''Simple wrapper for pkg-config.''' env['HAVE_' + name] = False # For backwards compatability env[name.lower()] = False if env['platform'] == 'windows': return if not env.Detect('pkg-config'): return if subprocess.call(["pkg-config", "--exists", ' '.join(modules)]) != 0: return # Strip version expressions from modules modules = [module.split(' ', 1)[0] for module in modules] # Other flags may affect the compilation of unrelated targets, so store # them with a prefix, (e.g., XXX_CFLAGS, XXX_LIBS, etc) try: flags = env.ParseFlags('!pkg-config --cflags --libs ' + ' '.join(modules)) except OSError: return prefix = name + '_' for flag_name, flag_value in flags.items(): assert '_' not in flag_name env[prefix + flag_name] = flag_value env['HAVE_' + name] = True def pkg_check_modules(env, name, modules): sys.stdout.write('Checking for %s (%s)...' % (name, ' '.join(modules))) _pkg_check_modules(env, name, modules) result = env['HAVE_' + name] sys.stdout.write(' %s\n' % ['no', 'yes'][int(bool(result))]) # XXX: For backwards compatability env[name.lower()] = result def pkg_use_modules(env, names): '''Search for all environment flags that match NAME_FOO and append them to the FOO environment variable.''' names = env.Flatten(names) for name in names: prefix = name + '_' if not 'HAVE_' + name in env: raise Exception('Attempt to use unknown module %s' % name) if not env['HAVE_' + name]: raise Exception('Attempt to use unavailable module %s' % name) flags = {} for flag_name, flag_value in env.Dictionary().items(): if flag_name.startswith(prefix): flag_name = flag_name[len(prefix):] if '_' not in flag_name: flags[flag_name] = flag_value if flags: env.MergeFlags(flags) def createPkgConfigMethods(env): env.AddMethod(pkg_check_modules, 'PkgCheckModules') env.AddMethod(pkg_use_modules, 'PkgUseModules') def parse_source_list(env, filename, names=None): # parse the source list file parser = source_list.SourceListParser() src = env.File(filename).srcnode() cur_srcdir = env.Dir('.').srcnode().abspath top_srcdir = env.Dir('#').abspath top_builddir = os.path.join(top_srcdir, env['build_dir']) # Normalize everything to / slashes cur_srcdir = cur_srcdir.replace('\\', '/') top_srcdir = top_srcdir.replace('\\', '/') top_builddir = top_builddir.replace('\\', '/') # Populate the symbol table of the Makefile parser. parser.add_symbol('top_srcdir', top_srcdir) parser.add_symbol('top_builddir', top_builddir) sym_table = parser.parse(src.abspath) if names: if isinstance(names, basestring): names = [names] symbols = names else: symbols = list(sym_table.keys()) # convert the symbol table to source lists src_lists = {} for sym in symbols: val = sym_table[sym] srcs = [] for f in val.split(): if f: # Process source paths if f.startswith(top_builddir + '/src'): # Automake puts build output on a `src` subdirectory, but # SCons does not, so strip it here. f = top_builddir + f[len(top_builddir + '/src'):] if f.startswith(cur_srcdir + '/'): # Prefer relative source paths, as absolute files tend to # cause duplicate actions. f = f[len(cur_srcdir + '/'):] # do not include any headers if f.endswith(tuple(['.h','.hpp','.inl'])): continue srcs.append(f) src_lists[sym] = srcs # if names are given, concatenate the lists if names: srcs = [] for name in names: srcs.extend(src_lists[name]) return srcs else: return src_lists def createParseSourceListMethod(env): env.AddMethod(parse_source_list, 'ParseSourceList') def generate(env): """Common environment generation code""" verbose = env.get('verbose', False) or not env.get('quiet', True) if not verbose: quietCommandLines(env) # Custom builders and methods createConvenienceLibBuilder(env) createCodeGenerateMethod(env) createPkgConfigMethods(env) createParseSourceListMethod(env) # for debugging #print env.Dump() def exists(env): return 1 > 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
/*
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
#include "../glsl/ir_print_visitor.h"
static void
assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
{
if (reg->file == GRF) {
assert(reg->reg_offset >= 0);
reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
reg->reg_offset = 0;
}
}
void
fs_visitor::assign_regs_trivial()
{
int hw_reg_mapping[this->virtual_grf_next + 1];
int i;
int reg_width = c->dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
for (i = 1; i <= this->virtual_grf_next; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
this->virtual_grf_sizes[i - 1] * reg_width);
}
this->grf_used = hw_reg_mapping[this->virtual_grf_next];
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
if (this->grf_used >= BRW_MAX_GRF) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
this->grf_used, BRW_MAX_GRF);
}
}
static void
brw_alloc_reg_set_for_classes(struct brw_context *brw,
int *class_sizes,
int class_count,
int reg_width,
int base_reg_count)
{
struct intel_context *intel = &brw->intel;
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}
ralloc_free(brw->wm.ra_reg_to_grf);
brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
ralloc_free(brw->wm.regs);
brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
ralloc_free(brw->wm.classes);
brw->wm.classes = ralloc_array(brw, int, class_count + 1);
brw->wm.aligned_pairs_class = -1;
/* Now, add the registers to their classes, and add the conflicts
* between them and the base GRF registers (and also each other).
*/
int reg = 0;
int pairs_base_reg = 0;
int pairs_reg_count = 0;
for (int i = 0; i < class_count; i++) {
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
/* Save this off for the aligned pair class at the end. */
if (class_sizes[i] == 2) {
pairs_base_reg = reg;
pairs_reg_count = class_reg_count;
}
for (int j = 0; j < class_reg_count; j++) {
ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
brw->wm.ra_reg_to_grf[reg] = j;
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
}
reg++;
}
}
assert(reg == ra_reg_count);
/* Add a special class for aligned pairs, which we'll put delta_x/y
* in on gen5 so that we can do PLN.
*/
if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
for (int i = 0; i < pairs_reg_count; i++) {
if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
pairs_base_reg + i);
}
}
class_count++;
}
ra_set_finalize(brw->wm.regs);
}
bool
fs_visitor::assign_regs()
{
/* Most of this allocation was written for a reg_width of 1
* (dispatch_width == 8). In extending to 16-wide, the code was
* left in place and it was converted to have the hardware
* registers it's allocating be contiguous physical pairs of regs
* for reg_width == 2.
*/
int reg_width = c->dispatch_width / 8;
int hw_reg_mapping[this->virtual_grf_next];
int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;
calculate_live_intervals();
/* Set up the register classes.
*
* The base registers store a scalar value. For texture samples,
* we get virtual GRFs composed of 4 contiguous hw register. For
* structures and arrays, we store them as contiguous larger things
* than that, though we should be able to do better most of the
* time.
*/
class_sizes[class_count++] = 1;
if (brw->has_pln && intel->gen < 6) {
/* Always set up the (unaligned) pairs for gen5, so we can find
* them for making the aligned pair class.
*/
class_sizes[class_count++] = 2;
}
for (int r = 0; r < this->virtual_grf_next; r++) {
int i;
for (i = 0; i < class_count; i++) {
if (class_sizes[i] == this->virtual_grf_sizes[r])
break;
}
if (i == class_count) {
if (this->virtual_grf_sizes[r] >= base_reg_count) {
fail("Object too large to register allocate.\n");
}
class_sizes[class_count++] = this->virtual_grf_sizes[r];
}
}
brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
reg_width, base_reg_count);
struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
this->virtual_grf_next);
for (int i = 0; i < this->virtual_grf_next; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
if (brw->wm.aligned_pairs_class >= 0 &&
this->delta_x.reg == i) {
ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
} else {
ra_set_node_class(g, i, brw->wm.classes[c]);
}
break;
}
}
for (int j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
}
}
if (!ra_allocate_no_spills(g)) {
/* Failed to allocate registers. Spill a reg, and the caller will
* loop back into here to try again.
*/
int reg = choose_spill_reg(g);
if (reg == -1) {
fail("no register to spill\n");
} else if (intel->gen >= 7) {
fail("no spilling support on gen7 yet\n");
} else if (c->dispatch_width == 16) {
fail("no spilling support on 16-wide yet\n");
} else {
spill_reg(reg);
}
ralloc_free(g);
return false;
}
/* Get the chosen virtual registers for each node, and map virtual
* regs in the register classes back down to real hardware reg
* numbers.
*/
this->grf_used = first_assigned_grf;
for (int i = 0; i < this->virtual_grf_next; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = (first_assigned_grf +
brw->wm.ra_reg_to_grf[reg] * reg_width);
this->grf_used = MAX2(this->grf_used,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
reg_width);
}
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
assign_reg(hw_reg_mapping, &inst->dst, reg_width);
assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
ralloc_free(g);
return true;
}
void
fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
{
int size = virtual_grf_sizes[dst.reg];
dst.reg_offset = 0;
for (int chan = 0; chan < size; chan++) {
fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL,
dst);
dst.reg_offset++;
unspill_inst->offset = spill_offset + chan * REG_SIZE;
unspill_inst->ir = inst->ir;
unspill_inst->annotation = inst->annotation;
/* Choose a MRF that won't conflict with an MRF that's live across the
* spill. Nothing else will make it up to MRF 14/15.
*/
unspill_inst->base_mrf = 14;
unspill_inst->mlen = 1; /* header contains offset */
inst->insert_before(unspill_inst);
}
}
int
fs_visitor::choose_spill_reg(struct ra_graph *g)
{
float loop_scale = 1.0;
float spill_costs[this->virtual_grf_next];
bool no_spill[this->virtual_grf_next];
for (int i = 0; i < this->virtual_grf_next; i++) {
spill_costs[i] = 0.0;
no_spill[i] = false;
}
/* Calculate costs for spilling nodes. Call it a cost of 1 per
* spill/unspill we'll have to do, and guess that the insides of
* loops run 10 times.
*/
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
int size = virtual_grf_sizes[inst->src[i].reg];
spill_costs[inst->src[i].reg] += size * loop_scale;
}
}
if (inst->dst.file == GRF) {
int size = virtual_grf_sizes[inst->dst.reg];
spill_costs[inst->dst.reg] += size * loop_scale;
}
switch (inst->opcode) {
case BRW_OPCODE_DO:
loop_scale *= 10;
break;
case BRW_OPCODE_WHILE:
loop_scale /= 10;
break;
case FS_OPCODE_SPILL:
if (inst->src[0].file == GRF)
no_spill[inst->src[0].reg] = true;
break;
case FS_OPCODE_UNSPILL:
if (inst->dst.file == GRF)
no_spill[inst->dst.reg] = true;
break;
default:
break;
}
}
for (int i = 0; i < this->virtual_grf_next; i++) {
if (!no_spill[i])
ra_set_node_spill_cost(g, i, spill_costs[i]);
}
return ra_get_best_spill_node(g);
}
void
fs_visitor::spill_reg(int spill_reg)
{
int size = virtual_grf_sizes[spill_reg];
unsigned int spill_offset = c->last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
c->last_scratch += size * REG_SIZE;
/* Generate spill/unspill instructions for the objects being
* spilled. Right now, we spill or unspill the whole thing to a
* virtual grf of the same size. For most instructions, though, we
* could just spill/unspill the GRF being accessed.
*/
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
inst->src[i].reg == spill_reg) {
inst->src[i].reg = virtual_grf_alloc(size);
emit_unspill(inst, inst->src[i], spill_offset);
}
}
if (inst->dst.file == GRF &&
inst->dst.reg == spill_reg) {
inst->dst.reg = virtual_grf_alloc(size);
/* Since we spill/unspill the whole thing even if we access
* just a component, we may need to unspill before the
* instruction we're spilling for.
*/
if (size != 1 || inst->predicated) {
emit_unspill(inst, inst->dst, spill_offset);
}
fs_reg spill_src = inst->dst;
spill_src.reg_offset = 0;
spill_src.abs = false;
spill_src.negate = false;
spill_src.smear = -1;
for (int chan = 0; chan < size; chan++) {
fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
reg_null_f, spill_src);
spill_src.reg_offset++;
spill_inst->offset = spill_offset + chan * REG_SIZE;
spill_inst->ir = inst->ir;
spill_inst->annotation = inst->annotation;
spill_inst->base_mrf = 14;
spill_inst->mlen = 2; /* header, value */
inst->insert_after(spill_inst);
}
}
}
this->live_intervals_valid = false;
}