Path: blob/devel/elmergrid/src/metis-5.1.0/libmetis/coarsen.c
3206 views
/*!1\file2\brief Functions for computing matchings during graph coarsening34\date Started 7/23/975\author George6\author Copyright 1997-2011, Regents of the University of Minnesota7\version\verbatim $Id: coarsen.c 13936 2013-03-30 03:59:09Z karypis $ \endverbatim8*/91011#include "metislib.h"1213#define UNMATCHEDFOR2HOP 0.10 /* The fraction of unmatched vertices that triggers 2-hop */141516/*************************************************************************/17/*! This function takes a graph and creates a sequence of coarser graphs.18It implements the coarsening phase of the multilevel paradigm.19*/20/*************************************************************************/21graph_t *CoarsenGraph(ctrl_t *ctrl, graph_t *graph)22{23idx_t i, eqewgts, level=0;2425IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr));2627/* determine if the weights on the edges are all the same */28for (eqewgts=1, i=1; i<graph->nedges; i++) {29if (graph->adjwgt[0] != graph->adjwgt[i]) {30eqewgts = 0;31break;32}33}3435/* set the maximum allowed coarsest vertex weight */36for (i=0; i<graph->ncon; i++)37ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo;3839do {40IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));4142/* allocate memory for cmap, if it has not already been done due to43multiple cuts */44if (graph->cmap == NULL)45graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap");4647/* determine which matching scheme you will use */48switch (ctrl->ctype) {49case METIS_CTYPE_RM:50Match_RM(ctrl, graph);51break;52case METIS_CTYPE_SHEM:53if (eqewgts || graph->nedges == 0)54Match_RM(ctrl, graph);55else56Match_SHEM(ctrl, graph);57break;58default:59gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype);60}6162graph = graph->coarser;63eqewgts = 0;64level++;6566ASSERT(CheckGraph(graph, 0, 1));6768} while (graph->nvtxs > ctrl->CoarsenTo &&69graph->nvtxs < COARSEN_FRACTION*graph->finer->nvtxs &&70graph->nedges > graph->nvtxs/2);7172IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));73IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr));7475return graph;76}777879/*************************************************************************/80/*! This function takes a graph and creates a sequence of nlevels coarser81graphs, where nlevels is an input parameter.82*/83/*************************************************************************/84graph_t *CoarsenGraphNlevels(ctrl_t *ctrl, graph_t *graph, idx_t nlevels)85{86idx_t i, eqewgts, level;8788IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->CoarsenTmr));8990/* determine if the weights on the edges are all the same */91for (eqewgts=1, i=1; i<graph->nedges; i++) {92if (graph->adjwgt[0] != graph->adjwgt[i]) {93eqewgts = 0;94break;95}96}9798/* set the maximum allowed coarsest vertex weight */99for (i=0; i<graph->ncon; i++)100ctrl->maxvwgt[i] = 1.5*graph->tvwgt[i]/ctrl->CoarsenTo;101102for (level=0; level<nlevels; level++) {103IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));104105/* allocate memory for cmap, if it has not already been done due to106multiple cuts */107if (graph->cmap == NULL)108graph->cmap = imalloc(graph->nvtxs, "CoarsenGraph: graph->cmap");109110/* determine which matching scheme you will use */111switch (ctrl->ctype) {112case METIS_CTYPE_RM:113Match_RM(ctrl, graph);114break;115case METIS_CTYPE_SHEM:116if (eqewgts || graph->nedges == 0)117Match_RM(ctrl, graph);118else119Match_SHEM(ctrl, graph);120break;121default:122gk_errexit(SIGERR, "Unknown ctype: %d\n", ctrl->ctype);123}124125graph = graph->coarser;126eqewgts = 0;127128ASSERT(CheckGraph(graph, 0, 1));129130if (graph->nvtxs < ctrl->CoarsenTo ||131graph->nvtxs > COARSEN_FRACTION*graph->finer->nvtxs ||132graph->nedges < graph->nvtxs/2)133break;134}135136IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, PrintCGraphStats(ctrl, graph));137IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->CoarsenTmr));138139return graph;140}141142143/*************************************************************************/144/*! This function finds a matching by randomly selecting one of the145unmatched adjacent vertices.146*/147/**************************************************************************/148idx_t Match_RM(ctrl_t *ctrl, graph_t *graph)149{150idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, last_unmatched;151idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt;152idx_t *match, *cmap, *perm;153size_t nunmatched=0;154155WCOREPUSH;156157IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr));158159nvtxs = graph->nvtxs;160ncon = graph->ncon;161xadj = graph->xadj;162vwgt = graph->vwgt;163adjncy = graph->adjncy;164adjwgt = graph->adjwgt;165cmap = graph->cmap;166167maxvwgt = ctrl->maxvwgt;168169match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs));170perm = iwspacemalloc(ctrl, nvtxs);171172irandArrayPermute(nvtxs, perm, nvtxs/8, 1);173174for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) {175i = perm[pi];176177if (match[i] == UNMATCHED) { /* Unmatched */178maxidx = i;179180if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) {181/* Deal with island vertices. Find a non-island and match it with.182The matching ignores ctrl->maxvwgt requirements */183if (xadj[i] == xadj[i+1]) {184last_unmatched = gk_max(pi, last_unmatched)+1;185for (; last_unmatched<nvtxs; last_unmatched++) {186j = perm[last_unmatched];187if (match[j] == UNMATCHED) {188maxidx = j;189break;190}191}192}193else {194/* Find a random matching, subject to maxvwgt constraints */195if (ncon == 1) {196/* single constraint version */197for (j=xadj[i]; j<xadj[i+1]; j++) {198k = adjncy[j];199if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= maxvwgt[0]) {200maxidx = k;201break;202}203}204205/* If it did not match, record for a 2-hop matching. */206if (maxidx == i && 3*vwgt[i] < maxvwgt[0]) {207nunmatched++;208maxidx = UNMATCHED;209}210}211else {212/* multi-constraint version */213for (j=xadj[i]; j<xadj[i+1]; j++) {214k = adjncy[j];215if (match[k] == UNMATCHED &&216ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt)) {217maxidx = k;218break;219}220}221222/* If it did not match, record for a 2-hop matching. */223if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) {224nunmatched++;225maxidx = UNMATCHED;226}227}228}229}230231if (maxidx != UNMATCHED) {232cmap[i] = cmap[maxidx] = cnvtxs++;233match[i] = maxidx;234match[maxidx] = i;235}236}237}238239//printf("nunmatched: %zu\n", nunmatched);240241/* see if a 2-hop matching is required/allowed */242if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs)243cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched);244245246/* match the final unmatched vertices with themselves and reorder the vertices247of the coarse graph for memory-friendly contraction */248for (cnvtxs=0, i=0; i<nvtxs; i++) {249if (match[i] == UNMATCHED) {250match[i] = i;251cmap[i] = cnvtxs++;252}253else {254if (i <= match[i])255cmap[i] = cmap[match[i]] = cnvtxs++;256}257}258259IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr));260261CreateCoarseGraph(ctrl, graph, cnvtxs, match);262263WCOREPOP;264265return cnvtxs;266}267268269/**************************************************************************/270/*! This function finds a matching using the HEM heuristic. The vertices271are visited based on increasing degree to ensure that all vertices are272given a chance to match with something.273*/274/**************************************************************************/275idx_t Match_SHEM(ctrl_t *ctrl, graph_t *graph)276{277idx_t i, pi, ii, j, jj, jjinc, k, nvtxs, ncon, cnvtxs, maxidx, maxwgt,278last_unmatched, avgdegree;279idx_t *xadj, *vwgt, *adjncy, *adjwgt, *maxvwgt;280idx_t *match, *cmap, *degrees, *perm, *tperm;281size_t nunmatched=0;282283WCOREPUSH;284285IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->MatchTmr));286287nvtxs = graph->nvtxs;288ncon = graph->ncon;289xadj = graph->xadj;290vwgt = graph->vwgt;291adjncy = graph->adjncy;292adjwgt = graph->adjwgt;293cmap = graph->cmap;294295maxvwgt = ctrl->maxvwgt;296297match = iset(nvtxs, UNMATCHED, iwspacemalloc(ctrl, nvtxs));298perm = iwspacemalloc(ctrl, nvtxs);299tperm = iwspacemalloc(ctrl, nvtxs);300degrees = iwspacemalloc(ctrl, nvtxs);301302irandArrayPermute(nvtxs, tperm, nvtxs/8, 1);303304avgdegree = 0.7*(xadj[nvtxs]/nvtxs);305for (i=0; i<nvtxs; i++)306degrees[i] = (xadj[i+1]-xadj[i] > avgdegree ? avgdegree : xadj[i+1]-xadj[i]);307BucketSortKeysInc(ctrl, nvtxs, avgdegree, degrees, tperm, perm);308309for (cnvtxs=0, last_unmatched=0, pi=0; pi<nvtxs; pi++) {310i = perm[pi];311312if (match[i] == UNMATCHED) { /* Unmatched */313maxidx = i;314maxwgt = -1;315316if ((ncon == 1 ? vwgt[i] < maxvwgt[0] : ivecle(ncon, vwgt+i*ncon, maxvwgt))) {317/* Deal with island vertices. Find a non-island and match it with.318The matching ignores ctrl->maxvwgt requirements */319if (xadj[i] == xadj[i+1]) {320last_unmatched = gk_max(pi, last_unmatched)+1;321for (; last_unmatched<nvtxs; last_unmatched++) {322j = perm[last_unmatched];323if (match[j] == UNMATCHED) {324maxidx = j;325break;326}327}328}329else {330/* Find a heavy-edge matching, subject to maxvwgt constraints */331if (ncon == 1) {332/* single constraint version */333for (j=xadj[i]; j<xadj[i+1]; j++) {334k = adjncy[j];335if (match[k] == UNMATCHED &&336maxwgt < adjwgt[j] && vwgt[i]+vwgt[k] <= maxvwgt[0]) {337maxidx = k;338maxwgt = adjwgt[j];339}340}341342/* If it did not match, record for a 2-hop matching. */343if (maxidx == i && 3*vwgt[i] < maxvwgt[0]) {344nunmatched++;345maxidx = UNMATCHED;346}347}348else {349/* multi-constraint version */350for (j=xadj[i]; j<xadj[i+1]; j++) {351k = adjncy[j];352if (match[k] == UNMATCHED &&353ivecaxpylez(ncon, 1, vwgt+i*ncon, vwgt+k*ncon, maxvwgt) &&354(maxwgt < adjwgt[j] ||355(maxwgt == adjwgt[j] &&356BetterVBalance(ncon, graph->invtvwgt, vwgt+i*ncon,357vwgt+maxidx*ncon, vwgt+k*ncon)))) {358maxidx = k;359maxwgt = adjwgt[j];360}361}362363/* If it did not match, record for a 2-hop matching. */364if (maxidx == i && ivecaxpylez(ncon, 2, vwgt+i*ncon, vwgt+i*ncon, maxvwgt)) {365nunmatched++;366maxidx = UNMATCHED;367}368}369}370}371372if (maxidx != UNMATCHED) {373cmap[i] = cmap[maxidx] = cnvtxs++;374match[i] = maxidx;375match[maxidx] = i;376}377}378}379380//printf("nunmatched: %zu\n", nunmatched);381382/* see if a 2-hop matching is required/allowed */383if (!ctrl->no2hop && nunmatched > UNMATCHEDFOR2HOP*nvtxs)384cnvtxs = Match_2Hop(ctrl, graph, perm, match, cnvtxs, nunmatched);385386387/* match the final unmatched vertices with themselves and reorder the vertices388of the coarse graph for memory-friendly contraction */389for (cnvtxs=0, i=0; i<nvtxs; i++) {390if (match[i] == UNMATCHED) {391match[i] = i;392cmap[i] = cnvtxs++;393}394else {395if (i <= match[i])396cmap[i] = cmap[match[i]] = cnvtxs++;397}398}399400IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->MatchTmr));401402CreateCoarseGraph(ctrl, graph, cnvtxs, match);403404WCOREPOP;405406return cnvtxs;407}408409410/*************************************************************************/411/*! This function matches the unmatched vertices using a 2-hop matching412that involves vertices that are two hops away from each other. */413/**************************************************************************/414idx_t Match_2Hop(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,415idx_t cnvtxs, size_t nunmatched)416{417418cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 2);419cnvtxs = Match_2HopAll(ctrl, graph, perm, match, cnvtxs, &nunmatched, 64);420if (nunmatched > 1.5*UNMATCHEDFOR2HOP*graph->nvtxs)421cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, 3);422if (nunmatched > 2.0*UNMATCHEDFOR2HOP*graph->nvtxs)423cnvtxs = Match_2HopAny(ctrl, graph, perm, match, cnvtxs, &nunmatched, graph->nvtxs);424425return cnvtxs;426}427428429/*************************************************************************/430/*! This function matches the unmatched vertices whose degree is less than431maxdegree using a 2-hop matching that involves vertices that are two432hops away from each other.433The requirement of the 2-hop matching is a simple non-empty overlap434between the adjancency lists of the vertices. */435/**************************************************************************/436idx_t Match_2HopAny(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,437idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree)438{439idx_t i, pi, ii, j, jj, k, nvtxs;440idx_t *xadj, *adjncy, *colptr, *rowind;441idx_t *cmap;442size_t nunmatched;443444IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr));445446nvtxs = graph->nvtxs;447xadj = graph->xadj;448adjncy = graph->adjncy;449cmap = graph->cmap;450451nunmatched = *r_nunmatched;452453/*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", * nunmatched)); */454455/* create the inverted index */456WCOREPUSH;457colptr = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs+1));458for (i=0; i<nvtxs; i++) {459if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) {460for (j=xadj[i]; j<xadj[i+1]; j++)461colptr[adjncy[j]]++;462}463}464MAKECSR(i, nvtxs, colptr);465466rowind = iwspacemalloc(ctrl, colptr[nvtxs]);467for (pi=0; pi<nvtxs; pi++) {468i = perm[pi];469if (match[i] == UNMATCHED && xadj[i+1]-xadj[i] < maxdegree) {470for (j=xadj[i]; j<xadj[i+1]; j++)471rowind[colptr[adjncy[j]]++] = i;472}473}474SHIFTCSR(i, nvtxs, colptr);475476/* compute matchings by going down the inverted index */477for (pi=0; pi<nvtxs; pi++) {478i = perm[pi];479if (colptr[i+1]-colptr[i] < 2)480continue;481482for (jj=colptr[i+1], j=colptr[i]; j<jj; j++) {483if (match[rowind[j]] == UNMATCHED) {484for (jj--; jj>j; jj--) {485if (match[rowind[jj]] == UNMATCHED) {486cmap[rowind[j]] = cmap[rowind[jj]] = cnvtxs++;487match[rowind[j]] = rowind[jj];488match[rowind[jj]] = rowind[j];489nunmatched -= 2;490break;491}492}493}494}495}496WCOREPOP;497498/* IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: nunmatched: %zu\n", nunmatched)); */499500IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr));501502*r_nunmatched = nunmatched;503return cnvtxs;504}505506507/*************************************************************************/508/*! This function matches the unmatched vertices whose degree is less than509maxdegree using a 2-hop matching that involves vertices that are two510hops away from each other.511The requirement of the 2-hop matching is that of identical adjacency512lists.513*/514/**************************************************************************/515idx_t Match_2HopAll(ctrl_t *ctrl, graph_t *graph, idx_t *perm, idx_t *match,516idx_t cnvtxs, size_t *r_nunmatched, size_t maxdegree)517{518idx_t i, pi, pk, ii, j, jj, k, nvtxs, mask, idegree;519idx_t *xadj, *adjncy;520idx_t *cmap, *mark;521ikv_t *keys;522size_t nunmatched, ncand;523524IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->Aux3Tmr));525526nvtxs = graph->nvtxs;527xadj = graph->xadj;528adjncy = graph->adjncy;529cmap = graph->cmap;530531nunmatched = *r_nunmatched;532mask = IDX_MAX/maxdegree;533534/*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("IN: nunmatched: %zu\t", nunmatched)); */535536WCOREPUSH;537538/* collapse vertices with identical adjancency lists */539keys = ikvwspacemalloc(ctrl, nunmatched);540for (ncand=0, pi=0; pi<nvtxs; pi++) {541i = perm[pi];542idegree = xadj[i+1]-xadj[i];543if (match[i] == UNMATCHED && idegree > 1 && idegree < maxdegree) {544for (k=0, j=xadj[i]; j<xadj[i+1]; j++)545k += adjncy[j]%mask;546keys[ncand].val = i;547keys[ncand].key = (k%mask)*maxdegree + idegree;548ncand++;549}550}551ikvsorti(ncand, keys);552553mark = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));554for (pi=0; pi<ncand; pi++) {555i = keys[pi].val;556if (match[i] != UNMATCHED)557continue;558559for (j=xadj[i]; j<xadj[i+1]; j++)560mark[adjncy[j]] = i;561562for (pk=pi+1; pk<ncand; pk++) {563k = keys[pk].val;564if (match[k] != UNMATCHED)565continue;566567if (keys[pi].key != keys[pk].key)568break;569if (xadj[i+1]-xadj[i] != xadj[k+1]-xadj[k])570break;571572for (jj=xadj[k]; jj<xadj[k+1]; jj++) {573if (mark[adjncy[jj]] != i)574break;575}576if (jj == xadj[k+1]) {577cmap[i] = cmap[k] = cnvtxs++;578match[i] = k;579match[k] = i;580nunmatched -= 2;581break;582}583}584}585WCOREPOP;586587/*IFSET(ctrl->dbglvl, METIS_DBG_COARSEN, printf("OUT: ncand: %zu, nunmatched: %zu\n", ncand, nunmatched)); */588589IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->Aux3Tmr));590591*r_nunmatched = nunmatched;592return cnvtxs;593}594595596/*************************************************************************/597/*! This function prints various stats for each graph during coarsening598*/599/*************************************************************************/600void PrintCGraphStats(ctrl_t *ctrl, graph_t *graph)601{602idx_t i;603604printf("%10"PRIDX" %10"PRIDX" %10"PRIDX" [%"PRIDX"] [",605graph->nvtxs, graph->nedges, isum(graph->nedges, graph->adjwgt, 1), ctrl->CoarsenTo);606607for (i=0; i<graph->ncon; i++)608printf(" %8"PRIDX":%8"PRIDX, ctrl->maxvwgt[i], graph->tvwgt[i]);609printf(" ]\n");610}611612613/*************************************************************************/614/*! This function creates the coarser graph. It uses a simple hash-table615for identifying the adjacent vertices that get collapsed to the same616node. The hash-table can have conflicts, which are handled via a617linear scan.618*/619/*************************************************************************/620void CreateCoarseGraph(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs,621idx_t *match)622{623idx_t j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges,624v, u, mask, dovsize;625idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;626idx_t *cmap, *htable;627idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;628graph_t *cgraph;629630dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);631632/* Check if the mask-version of the code is a good choice */633mask = HTLENGTH;634if (cnvtxs < 2*mask || graph->nedges/graph->nvtxs > mask/20) {635CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match);636return;637}638639nvtxs = graph->nvtxs;640xadj = graph->xadj;641for (v=0; v<nvtxs; v++) {642if (xadj[v+1]-xadj[v] > (mask>>3)) {643CreateCoarseGraphNoMask(ctrl, graph, cnvtxs, match);644return;645}646}647648649WCOREPUSH;650651IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));652653ncon = graph->ncon;654vwgt = graph->vwgt;655vsize = graph->vsize;656adjncy = graph->adjncy;657adjwgt = graph->adjwgt;658cmap = graph->cmap;659660/* Initialize the coarser graph */661cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize);662cxadj = cgraph->xadj;663cvwgt = cgraph->vwgt;664cvsize = cgraph->vsize;665cadjncy = cgraph->adjncy;666cadjwgt = cgraph->adjwgt;667668htable = iset(gk_min(cnvtxs+1, mask+1), -1, iwspacemalloc(ctrl, mask+1));669670cxadj[0] = cnvtxs = cnedges = 0;671for (v=0; v<nvtxs; v++) {672if ((u = match[v]) < v)673continue;674675ASSERT(cmap[v] == cnvtxs);676ASSERT(cmap[match[v]] == cnvtxs);677678if (ncon == 1)679cvwgt[cnvtxs] = vwgt[v];680else681icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);682683if (dovsize)684cvsize[cnvtxs] = vsize[v];685686nedges = 0;687688istart = xadj[v];689iend = xadj[v+1];690for (j=istart; j<iend; j++) {691k = cmap[adjncy[j]];692kk = k&mask;693if ((m = htable[kk]) == -1) {694cadjncy[nedges] = k;695cadjwgt[nedges] = adjwgt[j];696htable[kk] = nedges++;697}698else if (cadjncy[m] == k) {699cadjwgt[m] += adjwgt[j];700}701else {702for (jj=0; jj<nedges; jj++) {703if (cadjncy[jj] == k) {704cadjwgt[jj] += adjwgt[j];705break;706}707}708if (jj == nedges) {709cadjncy[nedges] = k;710cadjwgt[nedges++] = adjwgt[j];711}712}713}714715if (v != u) {716if (ncon == 1)717cvwgt[cnvtxs] += vwgt[u];718else719iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);720721if (dovsize)722cvsize[cnvtxs] += vsize[u];723724istart = xadj[u];725iend = xadj[u+1];726for (j=istart; j<iend; j++) {727k = cmap[adjncy[j]];728kk = k&mask;729if ((m = htable[kk]) == -1) {730cadjncy[nedges] = k;731cadjwgt[nedges] = adjwgt[j];732htable[kk] = nedges++;733}734else if (cadjncy[m] == k) {735cadjwgt[m] += adjwgt[j];736}737else {738for (jj=0; jj<nedges; jj++) {739if (cadjncy[jj] == k) {740cadjwgt[jj] += adjwgt[j];741break;742}743}744if (jj == nedges) {745cadjncy[nedges] = k;746cadjwgt[nedges++] = adjwgt[j];747}748}749}750751/* Remove the contracted adjacency weight */752jj = htable[cnvtxs&mask];753if (jj >= 0 && cadjncy[jj] != cnvtxs) {754for (jj=0; jj<nedges; jj++) {755if (cadjncy[jj] == cnvtxs)756break;757}758}759/* This 2nd check is needed for non-adjacent matchings */760if (jj >= 0 && jj < nedges && cadjncy[jj] == cnvtxs) {761cadjncy[jj] = cadjncy[--nedges];762cadjwgt[jj] = cadjwgt[nedges];763}764}765766/* Zero out the htable */767for (j=0; j<nedges; j++)768htable[cadjncy[j]&mask] = -1;769htable[cnvtxs&mask] = -1;770771cnedges += nedges;772cxadj[++cnvtxs] = cnedges;773cadjncy += nedges;774cadjwgt += nedges;775}776777cgraph->nedges = cnedges;778779for (j=0; j<ncon; j++) {780cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon);781cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1);782}783784785ReAdjustMemory(ctrl, graph, cgraph);786787IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));788789WCOREPOP;790}791792793/*************************************************************************/794/*! This function creates the coarser graph. It uses a full-size array795(htable) for identifying the adjacent vertices that get collapsed to796the same node.797*/798/*************************************************************************/799void CreateCoarseGraphNoMask(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs,800idx_t *match)801{802idx_t j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize;803idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;804idx_t *cmap, *htable;805idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;806graph_t *cgraph;807808WCOREPUSH;809810dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);811812IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));813814nvtxs = graph->nvtxs;815ncon = graph->ncon;816xadj = graph->xadj;817vwgt = graph->vwgt;818vsize = graph->vsize;819adjncy = graph->adjncy;820adjwgt = graph->adjwgt;821cmap = graph->cmap;822823824/* Initialize the coarser graph */825cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize);826cxadj = cgraph->xadj;827cvwgt = cgraph->vwgt;828cvsize = cgraph->vsize;829cadjncy = cgraph->adjncy;830cadjwgt = cgraph->adjwgt;831832htable = iset(cnvtxs, -1, iwspacemalloc(ctrl, cnvtxs));833834cxadj[0] = cnvtxs = cnedges = 0;835for (v=0; v<nvtxs; v++) {836if ((u = match[v]) < v)837continue;838839ASSERT(cmap[v] == cnvtxs);840ASSERT(cmap[match[v]] == cnvtxs);841842if (ncon == 1)843cvwgt[cnvtxs] = vwgt[v];844else845icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);846847if (dovsize)848cvsize[cnvtxs] = vsize[v];849850nedges = 0;851852istart = xadj[v];853iend = xadj[v+1];854for (j=istart; j<iend; j++) {855k = cmap[adjncy[j]];856if ((m = htable[k]) == -1) {857cadjncy[nedges] = k;858cadjwgt[nedges] = adjwgt[j];859htable[k] = nedges++;860}861else {862cadjwgt[m] += adjwgt[j];863}864}865866if (v != u) {867if (ncon == 1)868cvwgt[cnvtxs] += vwgt[u];869else870iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);871872if (dovsize)873cvsize[cnvtxs] += vsize[u];874875istart = xadj[u];876iend = xadj[u+1];877for (j=istart; j<iend; j++) {878k = cmap[adjncy[j]];879if ((m = htable[k]) == -1) {880cadjncy[nedges] = k;881cadjwgt[nedges] = adjwgt[j];882htable[k] = nedges++;883}884else {885cadjwgt[m] += adjwgt[j];886}887}888889/* Remove the contracted adjacency weight */890if ((j = htable[cnvtxs]) != -1) {891ASSERT(cadjncy[j] == cnvtxs);892cadjncy[j] = cadjncy[--nedges];893cadjwgt[j] = cadjwgt[nedges];894htable[cnvtxs] = -1;895}896}897898/* Zero out the htable */899for (j=0; j<nedges; j++)900htable[cadjncy[j]] = -1;901902cnedges += nedges;903cxadj[++cnvtxs] = cnedges;904cadjncy += nedges;905cadjwgt += nedges;906}907908cgraph->nedges = cnedges;909910for (j=0; j<ncon; j++) {911cgraph->tvwgt[j] = isum(cgraph->nvtxs, cgraph->vwgt+j, ncon);912cgraph->invtvwgt[j] = 1.0/(cgraph->tvwgt[j] > 0 ? cgraph->tvwgt[j] : 1);913}914915ReAdjustMemory(ctrl, graph, cgraph);916917IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));918919WCOREPOP;920}921922923/*************************************************************************/924/*! This function creates the coarser graph. It uses a simple hash-table925for identifying the adjacent vertices that get collapsed to the same926node. The hash-table can have conflicts, which are handled via a927linear scan. It relies on the perm[] array to visit the vertices in928increasing cnvtxs order.929*/930/*************************************************************************/931void CreateCoarseGraphPerm(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs,932idx_t *match, idx_t *perm)933{934idx_t i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges,935v, u, mask, dovsize;936idx_t *xadj, *vwgt, *vsize, *adjncy, *adjwgt;937idx_t *cmap, *htable;938idx_t *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt;939graph_t *cgraph;940941WCOREPUSH;942943IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->ContractTmr));944945dovsize = (ctrl->objtype == METIS_OBJTYPE_VOL ? 1 : 0);946947mask = HTLENGTH;948949nvtxs = graph->nvtxs;950ncon = graph->ncon;951xadj = graph->xadj;952vwgt = graph->vwgt;953vsize = graph->vsize;954adjncy = graph->adjncy;955adjwgt = graph->adjwgt;956cmap = graph->cmap;957958/* Initialize the coarser graph */959cgraph = SetupCoarseGraph(graph, cnvtxs, dovsize);960cxadj = cgraph->xadj;961cvwgt = cgraph->vwgt;962cvsize = cgraph->vsize;963cadjncy = cgraph->adjncy;964cadjwgt = cgraph->adjwgt;965966htable = iset(mask+1, -1, iwspacemalloc(ctrl, mask+1));967968cxadj[0] = cnvtxs = cnedges = 0;969for (i=0; i<nvtxs; i++) {970v = perm[i];971if (cmap[v] != cnvtxs)972continue;973974u = match[v];975if (ncon == 1)976cvwgt[cnvtxs] = vwgt[v];977else978icopy(ncon, vwgt+v*ncon, cvwgt+cnvtxs*ncon);979980if (dovsize)981cvsize[cnvtxs] = vsize[v];982983nedges = 0;984985istart = xadj[v];986iend = xadj[v+1];987for (j=istart; j<iend; j++) {988k = cmap[adjncy[j]];989kk = k&mask;990if ((m = htable[kk]) == -1) {991cadjncy[nedges] = k;992cadjwgt[nedges] = adjwgt[j];993htable[kk] = nedges++;994}995else if (cadjncy[m] == k) {996cadjwgt[m] += adjwgt[j];997}998else {999for (jj=0; jj<nedges; jj++) {1000if (cadjncy[jj] == k) {1001cadjwgt[jj] += adjwgt[j];1002break;1003}1004}1005if (jj == nedges) {1006cadjncy[nedges] = k;1007cadjwgt[nedges++] = adjwgt[j];1008}1009}1010}10111012if (v != u) {1013if (ncon == 1)1014cvwgt[cnvtxs] += vwgt[u];1015else1016iaxpy(ncon, 1, vwgt+u*ncon, 1, cvwgt+cnvtxs*ncon, 1);10171018if (dovsize)1019cvsize[cnvtxs] += vsize[u];10201021istart = xadj[u];1022iend = xadj[u+1];1023for (j=istart; j<iend; j++) {1024k = cmap[adjncy[j]];1025kk = k&mask;1026if ((m = htable[kk]) == -1) {1027cadjncy[nedges] = k;1028cadjwgt[nedges] = adjwgt[j];1029htable[kk] = nedges++;1030}1031else if (cadjncy[m] == k) {1032cadjwgt[m] += adjwgt[j];1033}1034else {1035for (jj=0; jj<nedges; jj++) {1036if (cadjncy[jj] == k) {1037cadjwgt[jj] += adjwgt[j];1038break;1039}1040}1041if (jj == nedges) {1042cadjncy[nedges] = k;1043cadjwgt[nedges++] = adjwgt[j];1044}1045}1046}10471048/* Remove the contracted adjacency weight */1049jj = htable[cnvtxs&mask];1050if (jj >= 0 && cadjncy[jj] != cnvtxs) {1051for (jj=0; jj<nedges; jj++) {1052if (cadjncy[jj] == cnvtxs)1053break;1054}1055}1056if (jj >= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */1057cadjncy[jj] = cadjncy[--nedges];1058cadjwgt[jj] = cadjwgt[nedges];1059}1060}10611062for (j=0; j<nedges; j++)1063htable[cadjncy[j]&mask] = -1; /* Zero out the htable */1064htable[cnvtxs&mask] = -1;10651066cnedges += nedges;1067cxadj[++cnvtxs] = cnedges;1068cadjncy += nedges;1069cadjwgt += nedges;1070}10711072cgraph->nedges = cnedges;10731074for (i=0; i<ncon; i++) {1075cgraph->tvwgt[i] = isum(cgraph->nvtxs, cgraph->vwgt+i, ncon);1076cgraph->invtvwgt[i] = 1.0/(cgraph->tvwgt[i] > 0 ? cgraph->tvwgt[i] : 1);1077}107810791080ReAdjustMemory(ctrl, graph, cgraph);10811082IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->ContractTmr));10831084WCOREPOP;1085}108610871088/*************************************************************************/1089/*! Setup the various arrays for the coarse graph1090*/1091/*************************************************************************/1092graph_t *SetupCoarseGraph(graph_t *graph, idx_t cnvtxs, idx_t dovsize)1093{1094graph_t *cgraph;10951096cgraph = CreateGraph();10971098cgraph->nvtxs = cnvtxs;1099cgraph->ncon = graph->ncon;11001101cgraph->finer = graph;1102graph->coarser = cgraph;110311041105/* Allocate memory for the coarser graph */1106cgraph->xadj = imalloc(cnvtxs+1, "SetupCoarseGraph: xadj");1107cgraph->adjncy = imalloc(graph->nedges, "SetupCoarseGraph: adjncy");1108cgraph->adjwgt = imalloc(graph->nedges, "SetupCoarseGraph: adjwgt");1109cgraph->vwgt = imalloc(cgraph->ncon*cnvtxs, "SetupCoarseGraph: vwgt");1110cgraph->tvwgt = imalloc(cgraph->ncon, "SetupCoarseGraph: tvwgt");1111cgraph->invtvwgt = rmalloc(cgraph->ncon, "SetupCoarseGraph: invtvwgt");11121113if (dovsize)1114cgraph->vsize = imalloc(cnvtxs, "SetupCoarseGraph: vsize");11151116return cgraph;1117}111811191120/*************************************************************************/1121/*! This function re-adjusts the amount of memory that was allocated if1122it will lead to significant savings1123*/1124/*************************************************************************/1125void ReAdjustMemory(ctrl_t *ctrl, graph_t *graph, graph_t *cgraph)1126{1127if (cgraph->nedges > 10000 && cgraph->nedges < 0.9*graph->nedges) {1128cgraph->adjncy = irealloc(cgraph->adjncy, cgraph->nedges, "ReAdjustMemory: adjncy");1129cgraph->adjwgt = irealloc(cgraph->adjwgt, cgraph->nedges, "ReAdjustMemory: adjwgt");1130}1131}113211331134