Path: blob/devel/elmergrid/src/metis-5.1.0/libmetis/fm.c
3206 views
/*!1\file2\brief Functions for the edge-based FM refinement34\date Started 7/23/975\author George6\author Copyright 1997-2011, Regents of the University of Minnesota7\version\verbatim $Id: fm.c 10187 2011-06-13 13:46:57Z karypis $ \endverbatim8*/910#include "metislib.h"111213/*************************************************************************14* This function performs an edge-based FM refinement15**************************************************************************/16void FM_2WayRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)17{18if (graph->ncon == 1)19FM_2WayCutRefine(ctrl, graph, ntpwgts, niter);20else21FM_Mc2WayCutRefine(ctrl, graph, ntpwgts, niter);22}232425/*************************************************************************/26/*! This function performs a cut-focused FM refinement */27/*************************************************************************/28void FM_2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)29{30idx_t i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp;31idx_t *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts;32idx_t *moved, *swaps, *perm;33rpq_t *queues[2];34idx_t higain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt;35idx_t tpwgts[2];3637WCOREPUSH;3839nvtxs = graph->nvtxs;40xadj = graph->xadj;41vwgt = graph->vwgt;42adjncy = graph->adjncy;43adjwgt = graph->adjwgt;44where = graph->where;45id = graph->id;46ed = graph->ed;47pwgts = graph->pwgts;48bndptr = graph->bndptr;49bndind = graph->bndind;5051moved = iwspacemalloc(ctrl, nvtxs);52swaps = iwspacemalloc(ctrl, nvtxs);53perm = iwspacemalloc(ctrl, nvtxs);5455tpwgts[0] = graph->tvwgt[0]*ntpwgts[0];56tpwgts[1] = graph->tvwgt[0]-tpwgts[0];5758limit = gk_min(gk_max(0.01*nvtxs, 15), 100);59avgvwgt = gk_min((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs);6061queues[0] = rpqCreate(nvtxs);62queues[1] = rpqCreate(nvtxs);6364IFSET(ctrl->dbglvl, METIS_DBG_REFINE,65Print2WayRefineStats(ctrl, graph, ntpwgts, 0, -2));6667origdiff = iabs(tpwgts[0]-pwgts[0]);68iset(nvtxs, -1, moved);69for (pass=0; pass<niter; pass++) { /* Do a number of passes */70rpqReset(queues[0]);71rpqReset(queues[1]);7273mincutorder = -1;74newcut = mincut = initcut = graph->mincut;75mindiff = iabs(tpwgts[0]-pwgts[0]);7677ASSERT(ComputeCut(graph, where) == graph->mincut);78ASSERT(CheckBnd(graph));7980/* Insert boundary nodes in the priority queues */81nbnd = graph->nbnd;82irandArrayPermute(nbnd, perm, nbnd, 1);83for (ii=0; ii<nbnd; ii++) {84i = perm[ii];85ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0);86ASSERT(bndptr[bndind[i]] != -1);87rpqInsert(queues[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]);88}8990for (nswaps=0; nswaps<nvtxs; nswaps++) {91from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1);92to = (from+1)%2;9394if ((higain = rpqGetTop(queues[from])) == -1)95break;96ASSERT(bndptr[higain] != -1);9798newcut -= (ed[higain]-id[higain]);99INC_DEC(pwgts[to], pwgts[from], vwgt[higain]);100101if ((newcut < mincut && iabs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) ||102(newcut == mincut && iabs(tpwgts[0]-pwgts[0]) < mindiff)) {103mincut = newcut;104mindiff = iabs(tpwgts[0]-pwgts[0]);105mincutorder = nswaps;106}107else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */108newcut += (ed[higain]-id[higain]);109INC_DEC(pwgts[from], pwgts[to], vwgt[higain]);110break;111}112113where[higain] = to;114moved[higain] = nswaps;115swaps[nswaps] = higain;116117IFSET(ctrl->dbglvl, METIS_DBG_MOVEINFO,118printf("Moved %6"PRIDX" from %"PRIDX". [%3"PRIDX" %3"PRIDX"] %5"PRIDX" [%4"PRIDX" %4"PRIDX"]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1]));119120/**************************************************************121* Update the id[i]/ed[i] values of the affected nodes122***************************************************************/123SWAP(id[higain], ed[higain], tmp);124if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])125BNDDelete(nbnd, bndind, bndptr, higain);126127for (j=xadj[higain]; j<xadj[higain+1]; j++) {128k = adjncy[j];129130kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);131INC_DEC(id[k], ed[k], kwgt);132133/* Update its boundary information and queue position */134if (bndptr[k] != -1) { /* If k was a boundary vertex */135if (ed[k] == 0) { /* Not a boundary vertex any more */136BNDDelete(nbnd, bndind, bndptr, k);137if (moved[k] == -1) /* Remove it if in the queues */138rpqDelete(queues[where[k]], k);139}140else { /* If it has not been moved, update its position in the queue */141if (moved[k] == -1)142rpqUpdate(queues[where[k]], k, ed[k]-id[k]);143}144}145else {146if (ed[k] > 0) { /* It will now become a boundary vertex */147BNDInsert(nbnd, bndind, bndptr, k);148if (moved[k] == -1)149rpqInsert(queues[where[k]], k, ed[k]-id[k]);150}151}152}153154}155156157/****************************************************************158* Roll back computations159*****************************************************************/160for (i=0; i<nswaps; i++)161moved[swaps[i]] = -1; /* reset moved array */162for (nswaps--; nswaps>mincutorder; nswaps--) {163higain = swaps[nswaps];164165to = where[higain] = (where[higain]+1)%2;166SWAP(id[higain], ed[higain], tmp);167if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])168BNDDelete(nbnd, bndind, bndptr, higain);169else if (ed[higain] > 0 && bndptr[higain] == -1)170BNDInsert(nbnd, bndind, bndptr, higain);171172INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]);173for (j=xadj[higain]; j<xadj[higain+1]; j++) {174k = adjncy[j];175176kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);177INC_DEC(id[k], ed[k], kwgt);178179if (bndptr[k] != -1 && ed[k] == 0)180BNDDelete(nbnd, bndind, bndptr, k);181if (bndptr[k] == -1 && ed[k] > 0)182BNDInsert(nbnd, bndind, bndptr, k);183}184}185186graph->mincut = mincut;187graph->nbnd = nbnd;188189IFSET(ctrl->dbglvl, METIS_DBG_REFINE,190Print2WayRefineStats(ctrl, graph, ntpwgts, 0, mincutorder));191192if (mincutorder <= 0 || mincut == initcut)193break;194}195196rpqDestroy(queues[0]);197rpqDestroy(queues[1]);198199WCOREPOP;200}201202203/*************************************************************************/204/*! This function performs a cut-focused multi-constraint FM refinement */205/*************************************************************************/206void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)207{208idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass,209me, limit, tmp, cnum;210idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *id, *ed,211*bndptr, *bndind;212idx_t *moved, *swaps, *perm, *qnum;213idx_t higain, mincut, initcut, newcut, mincutorder;214real_t *invtvwgt, *ubfactors, *minbalv, *newbalv;215real_t origbal, minbal, newbal, rgain, ffactor;216rpq_t **queues;217218WCOREPUSH;219220nvtxs = graph->nvtxs;221ncon = graph->ncon;222xadj = graph->xadj;223vwgt = graph->vwgt;224adjncy = graph->adjncy;225adjwgt = graph->adjwgt;226invtvwgt = graph->invtvwgt;227where = graph->where;228id = graph->id;229ed = graph->ed;230pwgts = graph->pwgts;231bndptr = graph->bndptr;232bndind = graph->bndind;233234moved = iwspacemalloc(ctrl, nvtxs);235swaps = iwspacemalloc(ctrl, nvtxs);236perm = iwspacemalloc(ctrl, nvtxs);237qnum = iwspacemalloc(ctrl, nvtxs);238ubfactors = rwspacemalloc(ctrl, ncon);239newbalv = rwspacemalloc(ctrl, ncon);240minbalv = rwspacemalloc(ctrl, ncon);241242limit = gk_min(gk_max(0.01*nvtxs, 25), 150);243244245/* Determine a fudge factor to allow the refinement routines to get out246of tight balancing constraints. */247ffactor = .5/gk_max(20, nvtxs);248249/* Initialize the queues */250queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *));251for (i=0; i<2*ncon; i++)252queues[i] = rpqCreate(nvtxs);253for (i=0; i<nvtxs; i++)254qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt);255256/* Determine the unbalance tolerance for each constraint. The tolerance is257equal to the maximum of the original load imbalance and the user-supplied258allowed tolerance. The rationale behind this approach is to allow the259refinement routine to improve the cut, without having to worry about fixing260load imbalance problems. The load imbalance is addressed by the balancing261routines. */262origbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, ubfactors);263for (i=0; i<ncon; i++)264ubfactors[i] = (ubfactors[i] > 0 ? ctrl->ubfactors[i]+ubfactors[i] : ctrl->ubfactors[i]);265266267IFSET(ctrl->dbglvl, METIS_DBG_REFINE,268Print2WayRefineStats(ctrl, graph, ntpwgts, origbal, -2));269270iset(nvtxs, -1, moved);271for (pass=0; pass<niter; pass++) { /* Do a number of passes */272for (i=0; i<2*ncon; i++)273rpqReset(queues[i]);274275mincutorder = -1;276newcut = mincut = initcut = graph->mincut;277278minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, minbalv);279280ASSERT(ComputeCut(graph, where) == graph->mincut);281ASSERT(CheckBnd(graph));282283/* Insert boundary nodes in the priority queues */284nbnd = graph->nbnd;285irandArrayPermute(nbnd, perm, nbnd/5, 1);286for (ii=0; ii<nbnd; ii++) {287i = bndind[perm[ii]];288ASSERT(ed[i] > 0 || id[i] == 0);289ASSERT(bndptr[i] != -1);290//rgain = 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1);291//rgain = (ed[i]-id[i] > 0 ? 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1) : ed[i]-id[i]);292rgain = ed[i]-id[i];293rpqInsert(queues[2*qnum[i]+where[i]], i, rgain);294}295296for (nswaps=0; nswaps<nvtxs; nswaps++) {297SelectQueue(graph, ctrl->pijbm, ubfactors, queues, &from, &cnum);298299to = (from+1)%2;300301if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1)302break;303ASSERT(bndptr[higain] != -1);304305newcut -= (ed[higain]-id[higain]);306307iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1);308iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);309newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, newbalv);310311if ((newcut < mincut && newbal <= ffactor) ||312(newcut == mincut && (newbal < minbal ||313(newbal == minbal && BetterBalance2Way(ncon, minbalv, newbalv))))) {314mincut = newcut;315minbal = newbal;316mincutorder = nswaps;317rcopy(ncon, newbalv, minbalv);318}319else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */320newcut += (ed[higain]-id[higain]);321iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);322iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1);323break;324}325326where[higain] = to;327moved[higain] = nswaps;328swaps[nswaps] = higain;329330if (ctrl->dbglvl&METIS_DBG_MOVEINFO) {331printf("Moved%6"PRIDX" from %"PRIDX"(%"PRIDX") Gain:%5"PRIDX", "332"Cut:%5"PRIDX", NPwgts:", higain, from, cnum, ed[higain]-id[higain], newcut);333for (l=0; l<ncon; l++)334printf("(%.3"PRREAL" %.3"PRREAL")", pwgts[l]*invtvwgt[l], pwgts[ncon+l]*invtvwgt[l]);335printf(" %+.3"PRREAL" LB: %.3"PRREAL"(%+.3"PRREAL")\n",336minbal, ComputeLoadImbalance(graph, 2, ctrl->pijbm), newbal);337}338339340/**************************************************************341* Update the id[i]/ed[i] values of the affected nodes342***************************************************************/343SWAP(id[higain], ed[higain], tmp);344if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])345BNDDelete(nbnd, bndind, bndptr, higain);346347for (j=xadj[higain]; j<xadj[higain+1]; j++) {348k = adjncy[j];349350kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);351INC_DEC(id[k], ed[k], kwgt);352353/* Update its boundary information and queue position */354if (bndptr[k] != -1) { /* If k was a boundary vertex */355if (ed[k] == 0) { /* Not a boundary vertex any more */356BNDDelete(nbnd, bndind, bndptr, k);357if (moved[k] == -1) /* Remove it if in the queues */358rpqDelete(queues[2*qnum[k]+where[k]], k);359}360else { /* If it has not been moved, update its position in the queue */361if (moved[k] == -1) {362//rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);363//rgain = (ed[k]-id[k] > 0 ?364// 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);365rgain = ed[k]-id[k];366rpqUpdate(queues[2*qnum[k]+where[k]], k, rgain);367}368}369}370else {371if (ed[k] > 0) { /* It will now become a boundary vertex */372BNDInsert(nbnd, bndind, bndptr, k);373if (moved[k] == -1) {374//rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);375//rgain = (ed[k]-id[k] > 0 ?376// 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);377rgain = ed[k]-id[k];378rpqInsert(queues[2*qnum[k]+where[k]], k, rgain);379}380}381}382}383384}385386387/****************************************************************388* Roll back computations389*****************************************************************/390for (i=0; i<nswaps; i++)391moved[swaps[i]] = -1; /* reset moved array */392for (nswaps--; nswaps>mincutorder; nswaps--) {393higain = swaps[nswaps];394395to = where[higain] = (where[higain]+1)%2;396SWAP(id[higain], ed[higain], tmp);397if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])398BNDDelete(nbnd, bndind, bndptr, higain);399else if (ed[higain] > 0 && bndptr[higain] == -1)400BNDInsert(nbnd, bndind, bndptr, higain);401402iaxpy(ncon, 1, vwgt+higain*ncon, 1, pwgts+to*ncon, 1);403iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1);404for (j=xadj[higain]; j<xadj[higain+1]; j++) {405k = adjncy[j];406407kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);408INC_DEC(id[k], ed[k], kwgt);409410if (bndptr[k] != -1 && ed[k] == 0)411BNDDelete(nbnd, bndind, bndptr, k);412if (bndptr[k] == -1 && ed[k] > 0)413BNDInsert(nbnd, bndind, bndptr, k);414}415}416417graph->mincut = mincut;418graph->nbnd = nbnd;419420IFSET(ctrl->dbglvl, METIS_DBG_REFINE,421Print2WayRefineStats(ctrl, graph, ntpwgts, minbal, mincutorder));422423if (mincutorder <= 0 || mincut == initcut)424break;425}426427for (i=0; i<2*ncon; i++)428rpqDestroy(queues[i]);429430WCOREPOP;431}432433434/*************************************************************************/435/*! This function selects the partition number and the queue from which436we will move vertices out. */437/*************************************************************************/438void SelectQueue(graph_t *graph, real_t *pijbm, real_t *ubfactors,439rpq_t **queues, idx_t *from, idx_t *cnum)440{441idx_t ncon, i, part;442real_t max, tmp;443444ncon = graph->ncon;445446*from = -1;447*cnum = -1;448449/* First determine the side and the queue, irrespective of the presence of nodes.450The side & queue is determined based on the most violated balancing constraint. */451for (max=0.0, part=0; part<2; part++) {452for (i=0; i<ncon; i++) {453tmp = graph->pwgts[part*ncon+i]*pijbm[part*ncon+i] - ubfactors[i];454/* the '=' in the test bellow is to ensure that under tight constraints455the partition that is at the max is selected */456if (tmp >= max) {457max = tmp;458*from = part;459*cnum = i;460}461}462}463464465if (*from != -1) {466/* in case the desired queue is empty, select a queue from the same side */467if (rpqLength(queues[2*(*cnum)+(*from)]) == 0) {468for (i=0; i<ncon; i++) {469if (rpqLength(queues[2*i+(*from)]) > 0) {470max = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i];471*cnum = i;472break;473}474}475476for (i++; i<ncon; i++) {477tmp = graph->pwgts[(*from)*ncon+i]*pijbm[(*from)*ncon+i] - ubfactors[i];478if (tmp > max && rpqLength(queues[2*i+(*from)]) > 0) {479max = tmp;480*cnum = i;481}482}483}484485/*486printf("Selected1 %"PRIDX"(%"PRIDX") -> %"PRIDX" [%5"PRREAL"]\n",487*from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max);488*/489}490else {491/* the partitioning does not violate balancing constraints, in which case select492a queue based on cut criteria */493for (part=0; part<2; part++) {494for (i=0; i<ncon; i++) {495if (rpqLength(queues[2*i+part]) > 0 &&496(*from == -1 || rpqSeeTopKey(queues[2*i+part]) > max)) {497max = rpqSeeTopKey(queues[2*i+part]);498*from = part;499*cnum = i;500}501}502}503/*504printf("Selected2 %"PRIDX"(%"PRIDX") -> %"PRIDX"\n",505*from, *cnum, rpqLength(queues[2*(*cnum)+(*from)]), max);506*/507}508}509510511/*************************************************************************/512/*! Prints statistics about the refinement */513/*************************************************************************/514void Print2WayRefineStats(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts,515real_t deltabal, idx_t mincutorder)516{517int i;518519if (mincutorder == -2) {520printf("Parts: ");521printf("Nv-Nb[%5"PRIDX" %5"PRIDX"] ICut: %6"PRIDX,522graph->nvtxs, graph->nbnd, graph->mincut);523printf(" [");524for (i=0; i<graph->ncon; i++)525printf("(%.3"PRREAL" %.3"PRREAL" T:%.3"PRREAL" %.3"PRREAL")",526graph->pwgts[i]*graph->invtvwgt[i],527graph->pwgts[graph->ncon+i]*graph->invtvwgt[i],528ntpwgts[i], ntpwgts[graph->ncon+i]);529printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n",530ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal);531}532else {533printf("\tMincut: %6"PRIDX" at %5"PRIDX" NBND %6"PRIDX" NPwgts: [",534graph->mincut, mincutorder, graph->nbnd);535for (i=0; i<graph->ncon; i++)536printf("(%.3"PRREAL" %.3"PRREAL")",537graph->pwgts[i]*graph->invtvwgt[i], graph->pwgts[graph->ncon+i]*graph->invtvwgt[i]);538printf("] LB: %.3"PRREAL"(%+.3"PRREAL")\n",539ComputeLoadImbalance(graph, 2, ctrl->pijbm), deltabal);540}541}542543544545