/** Pairwise intra-block allocators */ #include "optimise.h" #include "blocks.h" #include "flow.h" //#include "collections.h" #define DEPTH (IR->x.depth) static int DONT_CROSS_CALL; static void intraBlock(Block b); extern void increaseTreeDepth(Node n, int depth); typedef struct node_pair { int separation; int location; Node first; Node second; } *NodePair; static int (*sorter)(const NodePair first, const NodePair second); static void (*allocator)(NodePair pair); static int proximity(const NodePair n1, const NodePair n2); static int fifo(const NodePair n1, const NodePair n2); static void underneath(NodePair pair); static void costing(NodePair pair); char* koop(void) { DONT_CROSS_CALL = 1; sorter = proximity; allocator = underneath; flow_applyToBlocks(intraBlock); // verify(); return NULL; } optimiser koopman = { "koopman", "Eliminate some local variables using Phil Koopman's intra-block algorithm", 0, &koop }; char* koopX(void) { DONT_CROSS_CALL = 0; sorter = proximity; allocator = underneath; flow_applyToBlocks(intraBlock); return NULL; } optimiser koopmanX = { "koopmanX", "Eliminate some local variables using Phil Koopman's intra-block algorithm, allowing blocks to span calls.", 0, &koopX }; static void loadstore(NodePair pair); static char* base(void) { DONT_CROSS_CALL = 1; sorter = proximity; allocator = loadstore; flow_applyToBlocks(intraBlock); return NULL; } optimiser baseline = { "baseline", "Eliminate adjacent load-store pairs, to give a reasonable base-line for comparison.", 0, &base }; static int* costs = NULL; static char* byCost(void) { int i; costs = allocate(sizeof(int)*DEPTH, FUNC); for (i = 0; i < DEPTH; i++) costs[i] = 1; /* This must NOT cross calls as the costing allocator will fail if it does */ DONT_CROSS_CALL = 1; sorter = fifo; allocator = costing; flow_applyToBlocks(intraBlock); // verify(); return NULL; } optimiser pairwise = { "pairwise", "Eliminate some local variables pairwise, replacing pairs (use-use, def-use) with stack ops.", 0, &byCost }; // This is a clone of pairwise - Need a new algorithm to combine best of pairwise & koopmanX optimiser local = { "local", "Eliminate some local variables pairwise, replacing pairs (use-use, def-use) with stack ops.", 14, &byCost }; int pairID(NodePair n) { Node kid; if (generic(n->first->op) == ASGN) kid = n->first->kids[1]; else kid = n->first->kids[0]; return getTempIndex(kid); } int pairSeparation(NodePair n) { return n->separation; } int pairLocation(NodePair n) { return n->location; } static int* nodeLocations; static Node* foundNodes; static ArrayList pairs; static int location = 0; static void makePair(Node second, Node kid) { int id = getTempIndex(kid); Node first = foundNodes[id]; if (first) { NodePair nodes = allocate(sizeof(struct node_pair), FUNC); assert(first != second); nodes->separation = location - nodeLocations[id]; nodes->location = nodeLocations[id]; nodes->first = first; nodes->second = second; list_add(pairs, nodes); } foundNodes[id] = second; nodeLocations[id] = location; } static void scanTree(Node p) { if (p == NULL) { return; } if (DONT_CROSS_CALL && generic(p->op) == CALL) { int i; for (i = 0; i < local_variable_count; i++) { foundNodes[i] = NULL; } } else if (p->kids[0]) { if (generic(p->kids[0]->op) == VREG) { makePair(p, p->kids[0]); location++; } else { scanTree(p->kids[0]); } if (p->kids[1]) scanTree(p->kids[1]); } else { location++; } } ArrayList getPairs(Block b) { Node p, end; int i, size; size = local_variable_count; pairs = list_new(size); nodeLocations = allocate(sizeof(int) * size, FUNC); foundNodes = allocate(sizeof(Node) * size, FUNC); for (i = 0; i < size; i++) { foundNodes[i] = NULL; } end = block_lastNode(b)->x.next; for (p = block_firstNode(b); p != end; p = p->x.next) { if (generic(p->op) == ASGN) { if (generic(p->kids[1]->op) == VREG) { scanTree(p->kids[0]); makePair(p, p->kids[1]); location++; } else { scanTree(p->kids[0]); scanTree(p->kids[1]); } } else { scanTree(p); } } return pairs; } static void tuck(Node indir, int n) { Node kid = indir->kids[0]; int op = indir->op + TUCK - INDIR; indir->kids[0] = newnode(indir->op, kid, NULL, NULL); indir->syms[0] = kid->syms[0]; indir->op = op; indir->syms[1] = intconst(n); } #define TOTAL_DEPTH(n) (getEDepth(n) + n->x.l_depth) static void underneath(NodePair pair) { // fprintf(stderr, "Applying transforamtion to "); // printPair(pair); Node kid1, kid2; int depth1, depth2; assert(pair->separation > 0); if (generic(pair->first->op) == ASGN) { Symbol var = pair->first->kids[1]->syms[0]; kid1 = pair->first->kids[1]; depth1 = TOTAL_DEPTH(kid1); kid2 = pair->second->kids[0]; if (generic(pair->second->op) == TUCK) kid2 = kid2->kids[0]; assert(kid2->op == VREG + P); depth2 = TOTAL_DEPTH(kid2); if (depth1 < DEPTH && depth2 < DEPTH) { int op = pair->first->op + TUCK - ASGN; pair->first->kids[0] = newnode(op, pair->first->kids[0], NULL, var); pair->first->kids[0]->syms[1] = intconst(depth1); kid2->op = STACK + P; setDelve(kid2, depth2 + 1); increaseLDepth(kid1, kid2); } assert(pair->first->syms[2] == NULL); // pair->first->syms[2] = LIVE; } else { assert(generic(pair->first->op) == INDIR); assert(pair->first != pair->second); kid1 = pair->first->kids[0]; depth1 = TOTAL_DEPTH(kid1); kid2 = pair->second->kids[0]; if (generic(pair->second->op) == TUCK) kid2 = kid2->kids[0]; assert(kid2->op == VREG + P); depth2 = TOTAL_DEPTH(kid2); if (depth1 < DEPTH && depth2 < DEPTH) { int depth = (generic(kid1->op) == STACK) ? depth1 : depth1 + 1; tuck(pair->first, depth); kid2->op = STACK + P; setDelve(kid2, depth2 + 1); // Do not increase l-depth of kid1 increaseLDepth(kid1, kid2); kid1->x.l_depth--; } } pair->separation = 0; } #define LOAD_COST 5 #define LARGE 100 int nodeCost(Node temp, int currentDepth, int currentCost) { int delve, reach; if (temp->op == VREG + P) return currentCost; delve = getDelve(temp); reach = delve-getEDepth(temp); if (delve == DEPTH && reach >= currentDepth) return LARGE; if (currentDepth == DEPTH && temp->x.defn) return LARGE; if (delve == 1 && currentDepth == 1) return currentCost + 1; else return currentCost; } int nodeDepth(Node temp, int currentDepth) { int reach; if (temp->op == VREG + P || temp->op == COPY + P) return currentDepth; reach = getDelve(temp)-getEDepth(temp); if (reach < currentDepth) { if (reach + temp->x.defn <= 0) { prettytree(temp); fprintf(stderr, " defn: %d\n", temp->x.defn); assert(0); } assert(temp->op == STACK + P); if (temp->x.defn) { return currentDepth + 1; } else { return currentDepth - 1; } } else { return currentDepth; } } int getBestDepth(Node first, Node second, int* initCosts, int *final) { int MAX = DEPTH - getEDepth(first); int least_cost, cost, depth, best_start_depth, best_end_depth, i; Node p; assert(MAX <= DEPTH); least_cost = LARGE; best_start_depth = best_end_depth = 0; for (i = 0; i < MAX; i++) { cost = initCosts[i]; depth = i + 1; for (p = first->x.next; p != second; p = p->x.next) { cost = nodeCost(p, depth, cost); if (cost == LARGE) break; depth = nodeDepth(p, depth); } if ((depth + getEDepth(second) <= DEPTH) && (cost < least_cost)) { least_cost = cost; best_start_depth = i + 1; best_end_depth = depth; } } if (best_start_depth) { assert(best_start_depth + getEDepth(first) <= DEPTH); assert(best_end_depth > 0); assert(best_end_depth + getEDepth(second) <= DEPTH); depth = best_start_depth; for (p = first->x.next; p != second; p = p->x.next) { if (p->op == STACK + P || p->op == COPY + P) { int delve = getDelve(p); int reach = delve-getEDepth(p); if (reach >= depth) { // assert(delve < DEPTH); setDelve(p, delve + 1); } depth = nodeDepth(p, depth); } } *final = best_end_depth + getEDepth(second); LOG(("Start %d Finish %d\n", best_start_depth + getEDepth(first), final)); return best_start_depth + getEDepth(first); } else { return 0; } } static void loadstore(NodePair pair) { Node kid1, kid2; if (generic(pair->first->op) == ASGN) { kid1 = pair->first->kids[1]; kid2 = pair->second->kids[0]; if (kid1->x.next == kid2 && kid1->syms[0] == kid2->syms[0] && kid2->x.e_depth == 0 && kid1->syms[0]->ref < 1.5) { int op = pair->first->op + TUCK - ASGN; pair->first->kids[0] = newnode(op, pair->first->kids[0], NULL, pair->first->kids[1]->syms[0]); pair->first->kids[0]->syms[1] = intconst(1); kid2->op = STACK + P; setDelve(kid2, 1); kid2->x.l_depth++; } } } static void costing(NodePair pair) { Node first = pair->first; Node second = pair->second; Node firstTemp, kid2; // int cost[IR->x.depth]; // int depth[IR->x.depth]; int depth, final, reach = 1; if (first == NULL) return; kid2 = second->kids[0]; if (generic(first->op) == ASGN) { costs[0] = 0; firstTemp = first->kids[1]; } else { firstTemp = first->kids[0]; if (firstTemp->op == STACK + P) { int reach; reach = getDelve(firstTemp)-getEDepth(firstTemp); if (reach != 1) costs[reach - 1] = 0; } } depth = getBestDepth(firstTemp, second->kids[0], costs, &final); costs[0] = 1; costs[reach - 1] = 1; if (depth == 0) return; if (generic(first->op) == ASGN) { int op = first->op + TUCK - ASGN; first->kids[0] = newnode(op, first->kids[0], NULL, first->kids[1]->syms[0]); first->kids[0]->syms[1] = intconst(depth - 1); kid2->op = STACK + P; setDelve(kid2, final); } else { Node kid = first->kids[0]; if (kid->op == STACK + P) { if (depth == getDelve(firstTemp)) { kid->op = COPY + P; } else { tuck(first, depth); } kid2->op = STACK + P; setDelve(kid2, final); } else { tuck(first, depth); kid2->op = STACK + P; setDelve(kid2, final); } } } static int proximity(const NodePair n1, const NodePair n2) { return n1->separation - n2->separation; } static int anti_proximity(const NodePair n1, const NodePair n2) { return n2->separation - n1->separation; } static int fifo(const NodePair n1, const NodePair n2) { return n1->location - n2->location; } static int compare(const void* v1, const void* v2) { const NodePair n1 = *(NodePair*)v1; const NodePair n2 = *(NodePair*)v2; return sorter(n1, n2); } static void intraBlock(Block b) { int i, size; ArrayList pairs; Node p; if (local_variable_count == 0) return; assert(block_lastTemp(b) == NULL || block_lastTemp(b)->x.next == NULL); // for (p = block_firstTemp(b); p; p = p->x.next) // assert(p->x.l_depth == 0); pairs = getPairs(b); list_sort(pairs, &compare); size = list_size(pairs); for (i = 0; i < size; i++) { NodePair pair = (NodePair)list_get(pairs, i); if (generic((pair)->second->op) != ASGN) allocator(pair); } }