Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/reduce_scheduler.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include <limits.h>2526#include "gpir.h"2728/* Register sensitive schedule algorithm from paper:29* "Register-Sensitive Selection, Duplication, and Sequencing of Instructions"30* Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons31*/3233static void schedule_calc_sched_info(gpir_node *node)34{35int n = 0;36float extra_reg = 1.0f;3738/* update all children's sched info */39gpir_node_foreach_pred(node, dep) {40gpir_node *pred = dep->pred;4142if (pred->rsched.reg_pressure < 0)43schedule_calc_sched_info(pred);4445int est = pred->rsched.est + 1;46if (node->rsched.est < est)47node->rsched.est = est;4849float reg_weight = 1.0f - 1.0f / list_length(&pred->succ_list);50if (extra_reg > reg_weight)51extra_reg = reg_weight;5253n++;54}5556/* leaf instr */57if (!n) {58node->rsched.reg_pressure = 0;59return;60}6162int i = 0;63float reg[n];64gpir_node_foreach_pred(node, dep) {65gpir_node *pred = dep->pred;66reg[i++] = pred->rsched.reg_pressure;67}6869/* sort */70for (i = 0; i < n - 1; i++) {71for (int j = 0; j < n - i - 1; j++) {72if (reg[j] > reg[j + 1]) {73float tmp = reg[j + 1];74reg[j + 1] = reg[j];75reg[j] = tmp;76}77}78}7980for (i = 0; i < n; i++) {81float pressure = reg[i] + n - (i + 1);82if (pressure > node->rsched.reg_pressure)83node->rsched.reg_pressure = pressure;84}8586/* If all children of this node have multi parents, then this87* node need an extra reg to store its result. For example,88* it's not fair for parent has the same reg pressure as child89* if n==1 and child's successor>1, because we need 2 reg for90* this.91*92* But we can't add a full reg to the reg_pressure, because the93* last parent of a multi-successor child doesn't need an extra94* reg. For example, a single child (with multi successor) node95* should has less reg pressure than a two children (with single96* successor) instr.97*98* extra reg = min(all child)(1.0 - 1.0 / num successor)99*/100node->rsched.reg_pressure += extra_reg;101}102103static void schedule_insert_ready_list(struct list_head *ready_list,104gpir_node *insert_node)105{106struct list_head *insert_pos = ready_list;107108list_for_each_entry(gpir_node, node, ready_list, list) {109if (gpir_op_infos[node->op].schedule_first) {110continue;111}112113if (gpir_op_infos[insert_node->op].schedule_first ||114insert_node->rsched.parent_index < node->rsched.parent_index ||115(insert_node->rsched.parent_index == node->rsched.parent_index &&116(insert_node->rsched.reg_pressure < node->rsched.reg_pressure ||117(insert_node->rsched.reg_pressure == node->rsched.reg_pressure &&118(insert_node->rsched.est >= node->rsched.est))))) {119insert_pos = &node->list;120if (node == insert_node)121return;122break;123}124}125126list_del(&insert_node->list);127list_addtail(&insert_node->list, insert_pos);128}129130static void schedule_ready_list(gpir_block *block, struct list_head *ready_list)131{132if (list_is_empty(ready_list))133return;134135gpir_node *node = list_first_entry(ready_list, gpir_node, list);136list_del(&node->list);137138/* schedule the node to the block node list */139list_add(&node->list, &block->node_list);140node->rsched.scheduled = true;141block->rsched.node_index--;142143gpir_node_foreach_pred(node, dep) {144gpir_node *pred = dep->pred;145pred->rsched.parent_index = block->rsched.node_index;146147bool ready = true;148gpir_node_foreach_succ(pred, dep) {149gpir_node *succ = dep->succ;150if (!succ->rsched.scheduled) {151ready = false;152break;153}154}155/* all successor have been scheduled */156if (ready)157schedule_insert_ready_list(ready_list, pred);158}159160schedule_ready_list(block, ready_list);161}162163static void schedule_block(gpir_block *block)164{165/* move all nodes to node_list, block->node_list will166* contain schedule result */167struct list_head node_list;168list_replace(&block->node_list, &node_list);169list_inithead(&block->node_list);170171/* step 2 & 3 */172list_for_each_entry(gpir_node, node, &node_list, list) {173if (gpir_node_is_root(node))174schedule_calc_sched_info(node);175block->rsched.node_index++;176}177178/* step 4 */179struct list_head ready_list;180list_inithead(&ready_list);181182/* step 5 */183list_for_each_entry_safe(gpir_node, node, &node_list, list) {184if (gpir_node_is_root(node)) {185node->rsched.parent_index = INT_MAX;186schedule_insert_ready_list(&ready_list, node);187}188}189190/* step 6 */191schedule_ready_list(block, &ready_list);192}193194/* Due to how we translate from NIR, we never read a register written in the195* same block (we just pass the node through instead), so we don't have to196* worry about read-after-write dependencies. We do have to worry about197* write-after-read though, so we add those dependencies now. For example in a198* loop like this we need a dependency between the write and the read of i:199*200* i = ...201* while (...) {202* ... = i;203* i = i + 1;204* }205*/206207static void add_false_dependencies(gpir_compiler *comp)208{209/* Make sure we allocate this only once, in case there are many values and210* many blocks.211*/212gpir_node **last_written = calloc(comp->cur_reg, sizeof(gpir_node *));213214list_for_each_entry(gpir_block, block, &comp->block_list, list) {215list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {216if (node->op == gpir_op_load_reg) {217gpir_load_node *load = gpir_node_to_load(node);218gpir_node *store = last_written[load->reg->index];219if (store && store->block == block) {220gpir_node_add_dep(store, node, GPIR_DEP_WRITE_AFTER_READ);221}222} else if (node->op == gpir_op_store_reg) {223gpir_store_node *store = gpir_node_to_store(node);224last_written[store->reg->index] = node;225}226}227}228229free(last_written);230}231232bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp)233{234add_false_dependencies(comp);235236list_for_each_entry(gpir_block, block, &comp->block_list, list) {237block->rsched.node_index = 0;238list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {239node->rsched.reg_pressure = -1;240node->rsched.est = 0;241node->rsched.scheduled = false;242}243}244245list_for_each_entry(gpir_block, block, &comp->block_list, list) {246schedule_block(block);247}248249gpir_debug("after reduce scheduler\n");250gpir_node_print_prog_seq(comp);251return true;252}253254255