#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: grid2d.c,v 1.31 2000/07/16 23:20:01 knepley Exp $";
#endif

/* Implements 2d triangular grids */
#include "petscts.h"
#include "gsolver.h"
#include "src/grid/gridimpl.h"         /*I "grid.h" I*/
#include "src/mesh/impls/triangular/triimpl.h"
#include "src/gvec/impls/triangular/2d/gvec2d.h"
#include "src/gvec/impls/triangular/2d/gvec2dView.h"
#include "src/gvec/impls/triangular/2d/gmat2d.h"
#include "grid2d.h"
#include "elemvec2d.h"
#include "varorder2d.h"

extern int GridResetConstrainedMultiply_Private(Grid, GMat);

#undef  __FUNCT__
#define __FUNCT__ "GridDestroy_Triangular_2D"
int GridDestroy_Triangular_2D(Grid grid) {
  int  field, bd;
  int  ierr;

  PetscFunctionBegin;
  /* Field variables */
  for(field = 0; field < grid->numFields; field++) {
    if (grid->fields[field].name != PETSC_NULL) {
      ierr = PetscFree(grid->fields[field].name);                                                         CHKERRQ(ierr);
    }
    ierr = PetscFree(grid->fields[field].discType);                                                       CHKERRQ(ierr);
    ierr = DiscretizationDestroy(grid->fields[field].disc);                                               CHKERRQ(ierr);
  }
  ierr = PetscFree(grid->fields);                                                                         CHKERRQ(ierr);
  /* Class variables */
  if (grid->cm) {
    ierr = FieldClassMapDestroy(grid->cm);                                                                CHKERRQ(ierr);
  }
  /* Default variable orderings */
  if (grid->order) {
    ierr = VarOrderingDestroy(grid->order);                                                               CHKERRQ(ierr);
  }
  if (grid->locOrder) {
    ierr = LocalVarOrderingDestroy(grid->locOrder);                                                       CHKERRQ(ierr);
  }
  /* Ghost variable scatter */
  if (grid->ghostVec) {
    ierr = VecDestroy(grid->ghostVec);                                                                    CHKERRQ(ierr);
  }
  if (grid->ghostScatter) {
    ierr = VecScatterDestroy(grid->ghostScatter);                                                         CHKERRQ(ierr);
  }
  /* Constraint variables */
  if (grid->constraintCM) {
    ierr = FieldClassMapDestroy(grid->constraintCM);                                                      CHKERRQ(ierr);
  }
  if (grid->constraintOrder) {
    ierr = VarOrderingDestroy(grid->constraintOrder);                                                     CHKERRQ(ierr);
  }
  if (grid->constraintOrdering) {
    ierr = ISDestroy(grid->constraintOrdering);                                                           CHKERRQ(ierr);
  }
  if (grid->constraintMatrix) {
    ierr = MatDestroy(grid->constraintMatrix);                                                            CHKERRQ(ierr);
  }
  if (grid->constraintInverse) {
    ierr = MatDestroy(grid->constraintInverse);                                                           CHKERRQ(ierr);
  }
  /* Problem variables */
  ierr = PetscFree(grid->rhsFuncs);                                                                       CHKERRQ(ierr);
  ierr = PetscFree(grid->rhsOps);                                                                         CHKERRQ(ierr);
  ierr = PetscFree(grid->matOps);                                                                         CHKERRQ(ierr);
  /* Assembly variables */
  ierr = PetscFree(grid->defaultFields);                                                                  CHKERRQ(ierr);
  if (grid->vec) {
    ierr = ElementVecDestroy(grid->vec);                                                                  CHKERRQ(ierr);
  }
  if (grid->mat) {
    ierr = ElementMatDestroy(grid->mat);                                                                  CHKERRQ(ierr);
  }
  if (grid->ghostElementVec) {
    ierr = ElementVecDestroy(grid->ghostElementVec);                                                      CHKERRQ(ierr);
  }
  /* Boundary condition variables */
  if (grid->reductionCM) {
    ierr = FieldClassMapDestroy(grid->reductionCM);                                                       CHKERRQ(ierr);
  }
  if (grid->reduceOrder) {
    ierr = VarOrderingDestroy(grid->reduceOrder);                                                         CHKERRQ(ierr);
  }
  if (grid->locReduceOrder) {
    ierr = LocalVarOrderingDestroy(grid->locReduceOrder);                                                 CHKERRQ(ierr);
  }
  ierr = PetscFree(grid->bc);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(grid->pointBC);                                                                        CHKERRQ(ierr);
  /* Boundary iteration variables */
  for(bd = 0; bd < grid->numBd; bd++) {
    if (grid->bdSize[bd] != PETSC_NULL) {
      ierr = PetscFree(grid->bdSize[bd]);                                                                 CHKERRQ(ierr);
    }
  }
  ierr = PetscFree(grid->bdSize);                                                                         CHKERRQ(ierr);
  if (grid->bdOrder) {
    ierr = VarOrderingDestroy(grid->bdOrder);                                                             CHKERRQ(ierr);
  }
  if (grid->bdLocOrder) {
    ierr = LocalVarOrderingDestroy(grid->bdLocOrder);                                                     CHKERRQ(ierr);
  }
  /* Subobjects */
  ierr = MeshDestroy(grid->mesh);                                                                         CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridView_Triangular_2D_File"
static int GridView_Triangular_2D_File(Grid grid, PetscViewer viewer) {
  VarOrdering order = grid->order;
  FILE       *fd;
  int         rank, field;
  int         ierr;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = PetscViewerASCIIGetPointer(viewer, &fd);                                                         CHKERRQ(ierr);
  PetscFPrintf(grid->comm, fd, "Grid Object:\n");
  if (grid->numFields == 1) {
    PetscFPrintf(grid->comm, fd, "  %d field:\n", grid->numFields);
  } else {
    PetscFPrintf(grid->comm, fd, "  %d fields:\n", grid->numFields);
  }
  for(field = 0; field < grid->numFields; field++) {
    /* Grid structure */
    if (grid->fields[field].name != PETSC_NULL) {
      PetscFPrintf(grid->comm, fd, "  %s field", grid->fields[field].name);
    } else {
      PetscFPrintf(grid->comm, fd, "  field %d", field);
    }
    if (grid->fields[field].numComp == 1) {
      PetscFPrintf(grid->comm, fd, " with %d component is ", grid->fields[field].numComp);
    } else {
      PetscFPrintf(grid->comm, fd, " with %d components is ", grid->fields[field].numComp);
    }
    if (grid->fields[field].isActive) {
      PetscFPrintf(grid->comm, fd, "active\n    ");
    } else {
      PetscFPrintf(grid->comm, fd, "inactive\n    ");
    }
    ierr = DiscretizationView(grid->fields[field].disc, viewer);                                          CHKERRQ(ierr);
  }

  /* Problem specific information */
  if (grid->numActiveFields > 0) {
    PetscFPrintf(grid->comm, fd, "  %d variables in the problem:\n", order->numVars);
    PetscSynchronizedFPrintf(grid->comm, fd, "    %d variables and %d ghost variables in domain %d:\n",
                             order->numLocVars, order->numOverlapVars - order->numLocVars, rank);
    PetscSynchronizedFlush(grid->comm);
  }

  /* Underlying mesh */
  ierr = MeshView(grid->mesh, viewer);                                                                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridView_Triangular_2D"
int GridView_Triangular_2D(Grid grid, PetscViewer viewer) {
  PetscTruth isascii;
  int        ierr;

  PetscFunctionBegin;
  ierr = PetscTypeCompare((PetscObject) viewer, PETSC_VIEWER_ASCII, &isascii);                            CHKERRQ(ierr);
  if (isascii == PETSC_TRUE) {
    ierr = GridView_Triangular_2D_File(grid, viewer);                                                     CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridSetupGhostScatter_Triangular_2D"
int GridSetupGhostScatter_Triangular_2D(Grid grid, VarOrdering order, Vec *ghostVec, VecScatter *ghostScatter) {
  FieldClassMap         map;
  PetscConstraintObject constCtx          = grid->constraintCtx;
  int                   numOverlapVars    = order->numOverlapVars;
  int                   numLocVars        = order->numLocVars;
  int                   numVars           = order->numVars;
  int                   numLocNewVars     = order->numLocNewVars;
  int                   numOverlapNewVars = order->numOverlapNewVars;
  int                   numGhostNewVars   = order->numOverlapNewVars - order->numLocNewVars;
  int                  *firstVar          = order->firstVar;
  int                  *offsets           = order->offsets;
  int                   numNodes, numGhostNodes;
  int                  *classes, *classSizes;
  IS                    localIS;       /* Local  indices for local ghost vector variables */
  int                  *indices;       /* Global indices for local ghost vector variables */
  IS                    globalIS;      /* Global indices for local ghost vector variables */
  Vec                   dummyVec;      /* Dummy global vector used to create the ghost variable scatter */
  int                   rank, newComp;
  int                   node, nclass, var, startVar, newField, i, c;
  int                   ierr;

  PetscFunctionBegin;
  PetscValidPointer(ghostVec);
  PetscValidPointer(ghostScatter);
  ierr = VarOrderingGetClassMap(order, &map);                                                             CHKERRQ(ierr);
  numNodes      = map->numNodes;
  numGhostNodes = map->numGhostNodes;
  classes       = map->classes;
  classSizes    = map->classSizes;

  /* Create the ghost variable scatter -- Notice that for no ghost variables localOffsets is not used */
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = ISCreateStride(grid->comm, numOverlapVars, 0, 1, &localIS);                                      CHKERRQ(ierr);
  ierr = PetscMalloc(numOverlapVars * sizeof(int), &indices);                                             CHKERRQ(ierr);
  for(var = 0; var < numLocVars; var++) {
    indices[var] = var + firstVar[rank];
  }
  for(node = 0, var = numLocVars; node < numGhostNodes; node++) {
    nclass = classes[numNodes+node];
    for(i = 0; i < classSizes[nclass]; i++) {
      indices[var++] = offsets[numNodes+node] + i;
    }
  }
  if (numGhostNewVars > 0) {
    /* Add in constraints that generate off-processor variables */
    ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, &newComp);
    CHKERRQ(ierr);
    for(newField = numLocNewVars/newComp; newField < numOverlapNewVars/newComp; newField++) {
      ierr = (*constCtx->ops->getindices)(constCtx, grid->mesh, order, newField, CONSTRAINT_NEW_INDEX, &startVar);
      CHKERRQ(ierr);
      for(c = 0; c < newComp; c++, var++) {
        indices[var] = startVar+c;
      }
    }
  }
  if (var != numOverlapVars) SETERRQ(PETSC_ERR_PLIB, "Invalid ghost vector numbering");
  ierr = ISCreateGeneral(grid->comm, numOverlapVars, indices, &globalIS);                                CHKERRQ(ierr);
  ierr = VecCreateMPI(grid->comm, numLocVars, numVars, &dummyVec);                                       CHKERRQ(ierr);
  ierr = VecCreateSeq(PETSC_COMM_SELF, numOverlapVars, ghostVec);                                        CHKERRQ(ierr);
  ierr = VecScatterCreate(dummyVec, globalIS, *ghostVec, localIS, ghostScatter);                         CHKERRQ(ierr);
  PetscLogObjectParent(grid, *ghostVec);
  PetscLogObjectParent(grid, *ghostScatter);

  /* Cleanup */
  ierr = VecDestroy(dummyVec);                                                                           CHKERRQ(ierr);
  ierr = ISDestroy(localIS);                                                                             CHKERRQ(ierr);
  ierr = ISDestroy(globalIS);                                                                            CHKERRQ(ierr);
  ierr = PetscFree(indices);                                                                             CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridSetupBoundarySizes_Triangular_2D"
int GridSetupBoundarySizes_Triangular_2D(Grid grid) {
  Mesh_Triangular         *tri  = (Mesh_Triangular *) grid->mesh->data;
  Partition                part = grid->mesh->part;
  int                      numFields    = grid->cm->numFields;
  int                     *fields       = grid->cm->fields;
  int                      numClasses   = grid->cm->numClasses;
  int                     *classes      = grid->cm->classes;
  int                    **fieldClasses = grid->cm->fieldClasses;
  int                     *bdCount; /* Number of boundary nodes of a given class */
  int                      firstNode;
  int                      bd, bdNode, f, field, node, nclass;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscMalloc(numClasses * sizeof(int), &bdCount);                                                 CHKERRQ(ierr);
  ierr = PartitionGetStartNode(part, &firstNode);                                                         CHKERRQ(ierr);
  for(bd = 0; bd < grid->numBd; bd++) {
    /* Count the number of boundary nodes of each class */
    ierr = PetscMemzero(bdCount, numClasses * sizeof(int));                                               CHKERRQ(ierr);
    for(bdNode = tri->bdBegin[bd]; bdNode < tri->bdBegin[bd+1]; bdNode++) {
      node = tri->bdNodes[bdNode] - firstNode;
      if ((node >= 0) && (node < grid->mesh->numNodes)) {
        bdCount[classes[node]]++;
      }
    }
    /* Calculate boundary sizes */
    ierr = PetscMemzero(grid->bdSize[bd], grid->numFields * sizeof(int));                                 CHKERRQ(ierr);
    for(f = 0; f < numFields; f++) {
      field = fields[f];
      for(nclass = 0; nclass < numClasses; nclass++) {
        if (fieldClasses[f][nclass]) {
          grid->bdSize[bd][field] += bdCount[nclass];
        }
      }
    }
  }

  /* Cleanup */
  ierr = PetscFree(bdCount);                                                                              CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#if 0
#undef  __FUNCT__
#define __FUNCT__ "GridExtantExchange"
/*@C
  GridExtantExchange
  This functions transfers data between local storage in different domains without a predefined mapping.

  Input Parameters:
. numExtants   - The number of extants (interior variables) in this domain
. extantProcs  - The processor to which to send each extant
. firstExtant  - The first extant variable in each domain

. ghostIndices - The global index for each ghost
. dataType     - The type of the variables
. firstVar     - The first variable on each processor
. addv         - The insert mode, INSERT_VALUES or ADD_VALUES
. mode         - The direction of the transfer, SCATTER_FORWARD or SCATTER_REVERSE
. locVars      - The local variable array

  Output Paramters:
. firstExtant  - The first extant variable in each domain after repartitioning

. ghostVars    - The ghost variables

  Note:
  The data in ghostVars is assumed contiguous and implicitly indexed by the order of
  ghostProcs and ghostIndices. The SCATTER_FORWARD mode will take the requested data
  from locVars and copy it to ghostVars in the order specified by ghostIndices. The
  SCATTER_REVERSE mode will take data from ghostVars and copy it to locVars.

  Level: developer

.keywords ghost, exchange, grid
.seealso GridGlobalToLocal, GridLocalToGlobal
@*/
int GridExtantExchange(MPI_Comm comm, int numExtants, int *extantProcs, int *firstExtant, PetscDataType dataType, AO *ordering)

                       int *firstVar, InsertMode addv, ScatterMode mode, void *locVars, void *ghostVars
{
  int         *numSendExtants; /* The number of extants from each domain */
  int         *numRecvExtants; /* The number of extants in each domain */
  int         *sumSendExtants; /* The prefix sums of numSendExtants */
  int         *sumRecvExtants; /* The prefix sums of numRecvExtantss */
  int         *offsets;        /* The offset into the send array for each domain */
  int          totSendExtants; /* The number of ghosts to request variables for */
  int          totRecvExtants; /* The number of nodes to provide class info about */
  int         *sendIndices;    /* The canonical indices of extants in this domain */
  int         *recvIndices;    /* The canonical indices of extants to return variables for */
  int         *extantIndices;  /* The new canonical indices of extants after reordering */
  char        *tempVars;       /* The variables of the requested or submitted extants */
  int          numLocVars;
  char        *locBytes   = (char *) locVars;
  MPI_Datatype MPIType;
  int          typeSize;
  int          numProcs, rank;
  int          proc, extant, locIndex, byte;
  int          ierr;

  PetscFunctionBegin;
  /* Initialize communication */
  ierr = MPI_Comm_size(comm, &numProcs);                                                                  CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);                                                                      CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &numSendExtants);                                            CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &numRecvExtants);                                            CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumSendExtants);                                            CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvExtants);                                            CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &offsets);                                                   CHKERRQ(ierr);
  ierr = PetscMemzero(numSendExtants,  numProcs * sizeof(int));                                           CHKERRQ(ierr);
  ierr = PetscMemzero(numRecvExtants,  numProcs * sizeof(int));                                           CHKERRQ(ierr);
  ierr = PetscMemzero(sumSendExtants,  numProcs * sizeof(int));                                           CHKERRQ(ierr);
  ierr = PetscMemzero(sumRecvExtants,  numProcs * sizeof(int));                                           CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,         numProcs * sizeof(int));                                           CHKERRQ(ierr);
  numLocVars = firstVar[rank+1] - firstVar[rank];

  /* Get number of extants to send to each processor */
  for(extant = 0; extant < numExtants; extant++) {
    numSendExtants[extantProcs[extant]]++;
  }

  /* Get number of extants to receive from each processor */
  ierr = MPI_Alltoall(numSendExtants, 1, MPI_INT, numRecvExtants, 1, MPI_INT, comm);                      CHKERRQ(ierr);
  for(proc = 1; proc < numProcs; proc++) {
    sumSendExtants[proc] = sumSendExtants[proc-1] + numSendExtants[proc-1];
    sumRecvExtants[proc] = sumRecvExtants[proc-1] + numRecvExtants[proc-1];
    offsets[proc]       = sumSendExtants[proc];
  }
  totSendExtants = sumSendExtants[numProcs-1] + numSendExtants[numProcs-1];
  totRecvExtants = sumRecvExtants[numProcs-1] + numRecvExtants[numProcs-1];
  if (numExtants != totSendExtants) SETERRQ(PETSC_ERR_PLIB, "Invalid number of extants in send");

  ierr = PetscDataTypeGetSize(dataType, &typeSize);                                                       CHKERRQ(ierr);
  if (totSendExtants) {
    ierr = PetscMalloc(totSendExtants * sizeof(int), &sendIndices);                                       CHKERRQ(ierr);
  }
  if (totRecvExtants) {
    ierr = PetscMalloc(totRecvExtants * sizeof(int), &recvIndices);                                       CHKERRQ(ierr);
    ierr = PetscMalloc(totRecvExtants * sizeof(int), &extantIndices);                                     CHKERRQ(ierr);
    ierr = PetscMalloc(totRecvExtants * typeSize,    &tempVars);                                          CHKERRQ(ierr);
  }

  /* Must order extants by processor */
  for(extant = 0; extant < numExtants; extant++)
    sendIndices[offsets[extantProcs[extant]]++] = extant + firstExtant[rank];

  /* Get canonical indices of extants to provide variables for */
  ierr = MPI_Alltoallv(sendIndices, numSendExtants, sumSendExtants, MPI_INT,
                       recvIndices, numRecvExtants, sumRecvExtants, MPI_INT, comm);
  CHKERRQ(ierr);

  /* Recompute size and offset of each domain */
  ierr = MPI_Allgather(&totRecvExtants, 1, MPI_INT, &firstExtant[1], 1, MPI_INT, comm);                  CHKERRQ(ierr);
  firstExtant[0] = 0;
  for(proc = 1; proc <= numProcs; proc++)
    firstExtant[proc] += firstExtant[proc-1];

  /* Create the global extant reordering */
  for(extant = 0; extant < totRecvExtants; extant++)
    /* This would be the time to do RCM on the local graph by reordering extantIndices[] */
    extantIndices[extant] =  extant + firstExtant[rank];
  ierr = AOCreateDebug(comm, totRecvExtants, recvIndices, extantIndices, ordering);                      CHKERRQ(ierr);

  switch(mode)
  {
  case SCATTER_FORWARD:
    /* Get extant variables */
    if (addv == INSERT_VALUES) {
      for(extant = 0; extant < totRecvExtants; extant++)
      {
        locIndex = recvIndices[extant] - firstVar[rank];
#ifdef PETSC_USE_BOPT_g
        if ((locIndex < 0) || (locIndex >= numLocVars)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "Invalid extant index received");
#endif
        for(byte = 0; byte < typeSize; byte++)
          tempVars[extant*typeSize+byte] = locBytes[locIndex*typeSize+byte];
      }
    } else {
      for(extant = 0; extant < totRecvExtants; extant++)
      {
        locIndex = recvIndices[extant] - firstVar[rank];
#ifdef PETSC_USE_BOPT_g
        if ((locIndex < 0) || (locIndex >= numLocVars)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "Invalid extant index received");
#endif
        for(byte = 0; byte < typeSize; byte++)
          tempVars[extant*typeSize+byte] += locBytes[locIndex*typeSize+byte];
      }
    }

    /* Communicate local variables to extant storage */
    ierr = PetscDataTypeToMPIDataType(dataType, &MPIType);                                               CHKERRQ(ierr);
    ierr = MPI_Alltoallv(tempVars,  numRecvExtants, sumRecvExtants, MPIType,
                         extantVars, numSendExtants, sumSendExtants, MPIType, comm);
    CHKERRQ(ierr);
    break;
  case SCATTER_REVERSE:
    /* Communicate extant variables to local storage */
    ierr = PetscDataTypeToMPIDataType(dataType, &MPIType);                                               CHKERRQ(ierr);
    ierr = MPI_Alltoallv(extantVars, numSendExtants, sumRecvExtants, MPIType,
                         tempVars,  numRecvExtants, sumSendExtants, MPIType, comm);
    CHKERRQ(ierr);

    /* Get extant variables */
    if (addv == INSERT_VALUES) {
      for(extant = 0; extant < totRecvExtants; extant++)
      {
        locIndex = recvIndices[extant] - firstVar[rank];
#ifdef PETSC_USE_BOPT_g
        if ((locIndex < 0) || (locIndex >= numLocVars)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "Invalid extant index received");
#endif
        for(byte = 0; byte < typeSize; byte++)
          locBytes[locIndex*typeSize+byte] = tempVars[extant*typeSize+byte];
      }
    } else {
      for(extant = 0; extant < totRecvExtants; extant++)
      {
        locIndex = recvIndices[extant] - firstVar[rank];
#ifdef PETSC_USE_BOPT_g
        if ((locIndex < 0) || (locIndex >= numLocVars)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "Invalid extant index received");
#endif
        for(byte = 0; byte < typeSize; byte++)
          locBytes[locIndex*typeSize+byte] += tempVars[extant*typeSize+byte];
      }
    }
    break;
  default:
    SETERRQ(PETSC_ERR_ARG_WRONG, "Invalid scatter mode");
  }

  /* Cleanup */
  ierr = PetscFree(numSendExtants);                                                                       CHKERRQ(ierr);
  ierr = PetscFree(numRecvExtants);                                                                       CHKERRQ(ierr);
  ierr = PetscFree(sumSendExtants);                                                                       CHKERRQ(ierr);
  ierr = PetscFree(sumRecvExtants);                                                                       CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                             CHKERRQ(ierr);
  if (totSendExtants) {
    ierr = PetscFree(sendIndices);                                                                       CHKERRQ(ierr);
  }
  if (totRecvExtants) {
    ierr = PetscFree(recvIndices);                                                                       CHKERRQ(ierr);
    ierr = PetscFree(tempVars);                                                                          CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
#endif

#undef  __FUNCT__
#define __FUNCT__ "GridSetUp_Triangular_2D"
int GridSetUp_Triangular_2D(Grid grid) {
  FieldClassMap newCM;
#ifdef NEW_REDUCTION
  int           numReduceFields;
  int          *reduceFields;
  int           bc;
#endif
  int           elemSize;
  int           f, field;
  int           ierr;

  PetscFunctionBegin;
  if (grid->numActiveFields <= 0) PetscFunctionReturn(1);

  /* Create default class map */
  if (grid->cm != PETSC_NULL) {
    ierr = FieldClassMapDestroy(grid->cm);                                                                CHKERRQ(ierr);
  }
  ierr = FieldClassMapCreateTriangular2D(grid, grid->numActiveFields, grid->defaultFields, &grid->cm);    CHKERRQ(ierr);
  /* Implement system constraints */
  if (grid->reduceSystem == PETSC_TRUE) {
    /* Constrain the default class structure */
    ierr = FieldClassMapConstrain(grid->cm, grid, PETSC_TRUE, PETSC_FALSE, &newCM);                       CHKERRQ(ierr);
    ierr = FieldClassMapDestroy(grid->cm);                                                                CHKERRQ(ierr);
    grid->cm = newCM;
    /* Create reduction class map */
    if (grid->reductionCM != PETSC_NULL) {
      ierr = FieldClassMapDestroy(grid->reductionCM);                                                     CHKERRQ(ierr);
    }
#ifdef NEW_REDUCTION
    ierr = PetscMalloc((grid->numBC+grid->numPointBC) * sizeof(int), &reduceFields);                      CHKERRQ(ierr);
    for(bc = 0, numReduceFields = 0; bc < grid->numBC; bc++) {
      if (grid->bcReduce[bc] != PETSC_TRUE) continue;
      for(f = 0; f < numReduceFields; f++) {
        if (reduceFields[f] == grid->bcField[bc]) break;
      }
      if (f == numReduceFields) reduceFields[numReduceFields++] = grid->bcField[bc];
    }
    for(bc = 0; bc < grid->numPointBC; bc++) {
      if (grid->pointBCReduce[bc] != PETSC_TRUE) continue;
      for(f = 0; f < numReduceFields; f++) {
        if (reduceFields[f] == grid->pointBCField[bc]) break;
      }
      if (f == numReduceFields) reduceFields[numReduceFields++] = grid->pointBCField[bc];
    }
    ierr = FieldClassMapCreateTriangular2D(grid, numReduceFields, reduceFields, &newCM);                 CHKERRQ(ierr);
    ierr = FieldClassMapReduce(newCM, grid, &grid->reductionCM);                                         CHKERRQ(ierr);
    ierr = FieldClassMapDestroy(newCM);                                                                  CHKERRQ(ierr);
    ierr = PetscFree(reduceFields);                                                                      CHKERRQ(ierr);
#else
    ierr = FieldClassMapReduce(grid->cm, grid, &grid->reductionCM);                                      CHKERRQ(ierr);
#endif
  }
  /* Calculate boundary sizes after reduction */
  ierr = GridSetupBoundarySizes_Triangular_2D(grid);                                                     CHKERRQ(ierr);

  /* Setup default global and local variable orderings */
  if (grid->order) {
    ierr = VarOrderingDestroy(grid->order);                                                              CHKERRQ(ierr);
  }
  if (grid->locOrder) {
    ierr = LocalVarOrderingDestroy(grid->locOrder);                                                      CHKERRQ(ierr);
  }
  ierr = VarOrderingCreate(grid, &grid->order);                                                          CHKERRQ(ierr);
  ierr = LocalVarOrderingCreate(grid, grid->cm->numFields, grid->cm->fields, &grid->locOrder);           CHKERRQ(ierr);

  /* Setup global and local variable orderings for BC reduction */
  if (grid->reduceOrder) {
    ierr = VarOrderingDestroy(grid->reduceOrder);                                                        CHKERRQ(ierr);
  }
  if (grid->locReduceOrder) {
    ierr = LocalVarOrderingDestroy(grid->locReduceOrder);                                                CHKERRQ(ierr);
  }
  if (grid->reduceSystem) {
    ierr = VarOrderingCreateReduce(grid, &grid->reduceOrder);                                            CHKERRQ(ierr);
    ierr = LocalVarOrderingCreate(grid, grid->reductionCM->numFields, grid->reductionCM->fields, &grid->locReduceOrder);
    CHKERRQ(ierr);
  }

  /* Setup element vector and matrix */
  if (grid->vec != PETSC_NULL) {
    ierr = ElementVecDestroy(grid->vec);                                                                 CHKERRQ(ierr);
  }
  if (grid->ghostElementVec != PETSC_NULL) {
    ierr = ElementVecDestroy(grid->ghostElementVec);                                                     CHKERRQ(ierr);
  }
  if (grid->mat != PETSC_NULL) {
    ierr = ElementMatDestroy(grid->mat);                                                                 CHKERRQ(ierr);
  }
  elemSize = grid->locOrder->elemSize;
  if (grid->explicitConstraints == PETSC_TRUE) {
    for(f = 0; f < grid->cm->numFields; f++) {
      field = grid->cm->fields[f];
      if (grid->fields[field].isConstrained == PETSC_TRUE)
        elemSize += grid->fields[field].disc->funcs*grid->fields[field].constraintCompDiff;
    }
  }
  ierr = ElementVecCreate(grid->comm, elemSize, &grid->vec);                                             CHKERRQ(ierr);
  ierr = ElementVecCreate(grid->comm, elemSize, &grid->ghostElementVec);                                 CHKERRQ(ierr);
  ierr = ElementMatCreate(grid->comm, elemSize, elemSize, &grid->mat);                                   CHKERRQ(ierr);
  grid->vec->reduceSize             = grid->locOrder->elemSize;
  grid->ghostElementVec->reduceSize = grid->locOrder->elemSize;
  grid->mat->reduceRowSize          = grid->locOrder->elemSize;
  grid->mat->reduceColSize          = grid->locOrder->elemSize;

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridSetupConstraints_Triangular_2D"
int GridSetupConstraints_Triangular_2D(Grid grid, PetscConstraintObject ctx) {
  Mesh             mesh;
  Field           *fields             = grid->fields;
  FieldClassMap    cm                 = grid->cm;
  int              numFields          = grid->cm->numFields;
  int              numNodes           = grid->cm->numNodes;
  int            **fieldClasses       = grid->cm->fieldClasses;
  int             *classes            = grid->cm->classes;
  int             *classSizes         = grid->cm->classSizes;
  int              numVars            = grid->order->numVars;
  int              numLocVars         = grid->order->numLocVars;
  int             *firstVar           = grid->order->firstVar;
  int             *offsets            = grid->order->offsets;
  int              numTotalFields     = grid->order->numTotalFields;
  int            **localStart         = grid->order->localStart;
  int              constField         = -1; /* The field which is constrained */
  int             *ordering;                /* Gives a mapping between the two variable numberings */
  int             *diagRows;                /* Allocation for the projector P */
  int             *offdiagRows;             /* Allocation for the projector P */
  int              numConstrainedFields;
  int              rowStartVar, colStartVar, locColStart, locColEnd, numLocConstraintVars;
  int              rank;
  int              f, field, node, marker, nclass, comp, nodeVars, var, count;
  PetscTruth       opt;
  int              ierr;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  /* Check constrained fields */
  for(field = 0, numConstrainedFields = 0; field < numTotalFields; field++)
    if (fields[field].isConstrained == PETSC_TRUE) {
      constField = field;
      numConstrainedFields++;
    }
  if (numConstrainedFields == 0) PetscFunctionReturn(0);
  if (numConstrainedFields > 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "Only one field may be constrained");

  /* Create constrained class map */
  if (grid->constraintCM != PETSC_NULL) {
    ierr = FieldClassMapDestroy(grid->constraintCM);                                                      CHKERRQ(ierr);
  }
  ierr = FieldClassMapConstrain(grid->cm, grid, PETSC_FALSE, PETSC_TRUE, &grid->constraintCM);            CHKERRQ(ierr);

  /* Create variable ordering for constrained and new fields */ 
  if (grid->constraintOrder != PETSC_NULL) {
    ierr = VarOrderingDestroy(grid->constraintOrder);                                                     CHKERRQ(ierr);
  }
  ierr = VarOrderingConstrain(grid, grid->order, &grid->constraintOrder);                                 CHKERRQ(ierr);

  /* Calculate mapping between variable numberings */ 
  if (grid->constraintOrdering != PETSC_NULL) {
    ierr = ISDestroy(grid->constraintOrdering);                                                           CHKERRQ(ierr);
  }
  ierr = PetscMalloc(numLocVars * sizeof(int), &ordering);                                                CHKERRQ(ierr);
  numLocConstraintVars = grid->constraintOrder->numLocVars - grid->constraintOrder->numLocNewVars;
  for(node = 0, count = 0; node < numNodes; node++) {
    nclass      = classes[node];
    rowStartVar = offsets[node];
    nodeVars    = classSizes[nclass];
    colStartVar = grid->constraintOrder->offsets[node];

    ierr = MeshGetNodeBoundary(mesh, node, &marker);                                                      CHKERRQ(ierr);
    if ((marker < 0) && (localStart[constField][nclass] >= 0)) {
      /* The preceeding fields on the node */
      for(var = 0; var < localStart[constField][nclass]; var++, count++)
        ordering[rowStartVar-firstVar[rank]+var] = colStartVar-grid->constraintOrder->firstVar[rank]+var;
      /* Nonzeroes in C */
      rowStartVar += localStart[constField][nclass];
      colStartVar += localStart[constField][nclass];
      for(var = 0; var < fields[constField].numComp; var++, count++)
        ordering[rowStartVar-firstVar[rank]+var] = numLocConstraintVars++;
      /* The remaining fields on the node */
      for(var = fields[constField].numComp; var < nodeVars - localStart[constField][nclass]; var++, count++)
        ordering[rowStartVar-firstVar[rank]+var] = colStartVar-grid->constraintOrder->firstVar[rank]+var-fields[constField].numComp;
    } else {
      /* Nonzeroes in I */
      for(var = 0; var < nodeVars; var++, count++)
        ordering[rowStartVar-firstVar[rank]+var] = colStartVar-grid->constraintOrder->firstVar[rank]+var;
    }
  }
  if (numLocConstraintVars != numLocVars) SETERRQ(PETSC_ERR_PLIB, "Invalid constraint variable offsets");
  if (count != numLocVars) SETERRQ(PETSC_ERR_PLIB, "Invalid constraint variable offsets");
  ierr = ISCreateGeneral(PETSC_COMM_SELF, numLocVars, ordering, &grid->constraintOrdering);               CHKERRQ(ierr);
  ierr = PetscFree(ordering);                                                                             CHKERRQ(ierr);


  /* Calculate allocation for constraint matrix which transforms unconstrained fields to constrained and new fields:

         / I 0 \ / v_Int \ = / v_Int \
         \ 0 C / \ v_Bd  /   \ v_New /
  */
  ierr = PetscMalloc(numLocVars * sizeof(int), &diagRows);                                                CHKERRQ(ierr);
  ierr = PetscMalloc(numLocVars * sizeof(int), &offdiagRows);                                             CHKERRQ(ierr);
  ierr = PetscMemzero(diagRows,    numLocVars * sizeof(int));                                             CHKERRQ(ierr);
  ierr = PetscMemzero(offdiagRows, numLocVars * sizeof(int));                                             CHKERRQ(ierr);
  locColStart = grid->constraintOrder->firstVar[rank];
  locColEnd   = grid->constraintOrder->firstVar[rank+1];
  for(node = 0; node < numNodes; node++) {
    nclass           = classes[node];
    rowStartVar      = offsets[node] - firstVar[rank];
    nodeVars         = classSizes[nclass];

    /* All constrained nodes have negative markers */
    ierr = MeshGetNodeBoundary(mesh, node, &marker);                                                      CHKERRQ(ierr);
    if (marker < 0) {
      for(f = 0; f < numFields; f++) {
        field = cm->fields[f];
        if (fields[field].isConstrained == PETSC_TRUE) {
          comp = fields[field].numComp + fields[field].constraintCompDiff;
          ierr = (*ctx->ops->getindices)(ctx, grid->mesh, grid->constraintOrder, node, CONSTRAINT_COL_INDEX, &colStartVar);
          CHKERRQ(ierr);
          /* Check to see whether the variables fall within the diagonal block --
               Notice we are overestimating as if every constrained variable
               depends on all the new variables
          */
          if ((colStartVar + comp <= locColStart) || (colStartVar >= locColEnd)) {
            for(var = 0; var < fields[field].numComp; var++, rowStartVar++)
              offdiagRows[rowStartVar] += comp;
          } else if ((colStartVar >= locColStart) && (colStartVar + comp <= locColEnd)) {
            for(var = 0; var < fields[field].numComp; var++, rowStartVar++)
              diagRows[rowStartVar]    += comp;
#if 0
          /* Allow cuts on a single node for rectangular matrices */
          } else if (rectangular) {
            if (colStartVar < locColStart) {
              /* Cut is from below */
              for(var = 0; var < fields[field].numComp; var++, rowStartVar++)
              {
                diagRows[rowStartVar]    += (colStartVar + comp) - locColStart;
                offdiagRows[rowStartVar] += locColStart - colStartVar;
              }
            } else {
              /* Cut is from above */
              for(var = 0; var < fields[field].numComp; var++, rowStartVar++)
              {
                diagRows[rowStartVar]    += locColEnd - colStartVar;
                offdiagRows[rowStartVar] += (colStartVar + comp) - locColEnd;
              }
            }
#endif
          } else {
            /* Row blocking cuts variables on a single node. This is bad partitioning. */
            SETERRQ(PETSC_ERR_ARG_WRONG, "Row blocking cut variables on a single node");
          }
        } else if (fieldClasses[f][nclass]) {
          /* Remember localStart[][] is -1 if the field is not on the node */
          for(var = 0; var < fields[field].numComp; var++, rowStartVar++)
            diagRows[rowStartVar] = 1;
        }
      }
    } else {
      /* Unconstrained nodes */
      for(var = 0; var < nodeVars; var++)
        diagRows[rowStartVar+var] = 1;
    }
  }

  /* Create the constraint matrix */
  if (grid->constraintMatrix != PETSC_NULL) {
    ierr = MatDestroy(grid->constraintMatrix);                                                            CHKERRQ(ierr);
  }
  ierr = MatCreateMPIAIJ(grid->comm, numLocVars, grid->constraintOrder->numLocVars, numVars,
                         grid->constraintOrder->numVars, 0, diagRows, 0, offdiagRows, &grid->constraintMatrix);
  CHKERRQ(ierr);
  ierr = MatSetOption(grid->constraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR);                            CHKERRQ(ierr);

  /* Create the pseudo-inverse of the constraint matrix */
  ierr = PetscOptionsHasName(PETSC_NULL, "-grid_const_inv", &opt);                                        CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    if (grid->constraintInverse != PETSC_NULL) {
      ierr = MatDestroy(grid->constraintInverse);                                                         CHKERRQ(ierr);
    }
    ierr = MatCreateMPIAIJ(grid->comm, grid->constraintOrder->numLocVars, grid->constraintOrder->numLocVars,
                           grid->constraintOrder->numVars, grid->constraintOrder->numVars, 3, PETSC_NULL, 0, PETSC_NULL,
                           &grid->constraintInverse);
    CHKERRQ(ierr);
    ierr = MatSetOption(grid->constraintInverse, MAT_NEW_NONZERO_ALLOCATION_ERR);                         CHKERRQ(ierr);
  }

  /* Cleanup */
  ierr = PetscFree(diagRows);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(offdiagRows);                                                                          CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridSetupBoundary_Triangular_2D"
int GridSetupBoundary_Triangular_2D(Grid grid) {
  Mesh                     mesh;
  Partition                part;
  FieldClassMap            map             = grid->cm;
  PetscConstraintObject    constCtx        = grid->constraintCtx;
  int                      numBC           = grid->numBC;
  GridBC                  *gridBC          = grid->bc;
  int                      numFields       = map->numFields;
  int                     *fields          = map->fields;
  int                      numNodes        = map->numNodes;
  int                      numOverlapNodes = map->numOverlapNodes;
  int                      numGhostNodes   = map->numGhostNodes;
  int                      numClasses      = map->numClasses;
  int                    **fieldClasses    = map->fieldClasses;
  int                     *classes         = map->classes;
  int                     *classSizes      = map->classSizes;
  int                     *localOffsets;
  int                      numNewVars;
  VarOrdering              o;
  LocalVarOrdering         l;
  /* Ghost variable communication */
  int                     *ghostSendVars;    /* Number of ghost variables on a given processor interior to this domain */
  int                     *sumSendVars;      /* Prefix sums of ghostSendVars */
  int                     *ghostRecvVars;    /* Number of ghost variables on a given processor */
  int                     *sumRecvVars;      /* Prefix sums of ghostRecvVars */
  int                     *displs;           /* Offsets into ghostRecvVars */
  int                      numSendGhostVars; /* The number of ghost variable offsets to send to other processors */
  int                     *sendGhostBuffer;  /* Recv: Global node numbers Send: Offsets of these nodes */
  int                      numProcs, rank;
  int                      elemOffset;
  int                      proc, f, field, bc, node, locNode, gNode, marker, nclass, var;
  int                      ierr;

  PetscFunctionBegin;
  grid->bdSetupCalled = PETSC_TRUE;
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);

  /* Destroy old orderings */
  if (grid->bdOrder) {
    ierr = VarOrderingDestroy(grid->bdOrder);                                                             CHKERRQ(ierr);
  }
  if (grid->bdLocOrder) {
    ierr = LocalVarOrderingDestroy(grid->bdLocOrder);                                                     CHKERRQ(ierr);
  }

  /* Setup the boundary ordering */
  PetscHeaderCreate(o, _VarOrdering, int, IS_COOKIE, 0, "VarOrdering", grid->comm, VarOrderingDestroy, 0);
  PetscLogObjectCreate(o);
  ierr = PetscObjectCompose((PetscObject) o, "ClassMap", (PetscObject) map);                              CHKERRQ(ierr);

  /* Allocate memory */
  ierr = MPI_Comm_size(grid->comm, &numProcs);                                                            CHKERRQ(ierr);
  ierr = MPI_Comm_rank(grid->comm, &rank);                                                                CHKERRQ(ierr);
  ierr = GridGetNumFields(grid, &o->numTotalFields);                                                      CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1)      * sizeof(int),   &o->firstVar);                                    CHKERRQ(ierr);
  ierr = PetscMalloc(numOverlapNodes   * sizeof(int),   &o->offsets);                                     CHKERRQ(ierr);
  ierr = PetscMalloc(o->numTotalFields * sizeof(int *), &o->localStart);                                  CHKERRQ(ierr);
  PetscLogObjectMemory(o, (numProcs+1 + numOverlapNodes + o->numTotalFields*numClasses) * sizeof(int) + o->numTotalFields*sizeof(int *));
  ierr = PetscMemzero(o->localStart, o->numTotalFields * sizeof(int *));                                  CHKERRQ(ierr);
  o->numLocNewVars = 0;
  o->numNewVars    = 0;

  /* Setup domain variable numbering */
  o->offsets[0] = 0;
  for(node = 0; node < numNodes-1; node++) {
    ierr = MeshGetNodeBoundary(mesh, node, &marker);                                                      CHKERRQ(ierr);
    if (marker == 0) {
      o->offsets[node+1] = o->offsets[node];
    } else {
      for(bc = 0; bc < numBC; bc++) {
        if ((gridBC[bc].reduce == PETSC_TRUE) && (gridBC[bc].boundary == marker)) break;
      }
      if (bc == numBC) {
        o->offsets[node+1] = o->offsets[node] + classSizes[classes[node]];
      } else {
        o->offsets[node+1] = o->offsets[node];
      }
    }
  }
  ierr = MeshGetNodeBoundary(mesh, numNodes-1, &marker);                                                  CHKERRQ(ierr);
  for(bc = 0; bc < numBC; bc++) {
    if ((gridBC[bc].reduce == PETSC_TRUE) && (gridBC[bc].boundary == marker)) break;
  }
  if (bc == numBC) {
    o->numLocVars = o->offsets[numNodes-1] + classSizes[classes[numNodes-1]];
  } else {
    o->numLocVars = o->offsets[numNodes-1];
  }
  if (map->isConstrained == PETSC_TRUE) {
    ierr = (*constCtx->ops->getsize)(constCtx, &o->numLocNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    o->numLocVars += o->numLocNewVars;
  }
  ierr = MPI_Allgather(&o->numLocVars, 1, MPI_INT, &o->firstVar[1], 1, MPI_INT, o->comm);                 CHKERRQ(ierr);
  o->firstVar[0] = 0;
  for(proc = 1; proc <= numProcs; proc++)
    o->firstVar[proc] += o->firstVar[proc-1];
  o->numVars = o->firstVar[numProcs];
  if (map->isConstrained == PETSC_TRUE) {
    ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, &o->numNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
    CHKERRQ(ierr);
    ierr = MPI_Allreduce(&o->numLocNewVars, &numNewVars, 1, MPI_INT, MPI_SUM, o->comm);                         CHKERRQ(ierr);
    if (o->numNewVars != numNewVars) SETERRQ(PETSC_ERR_PLIB, "Invalid partition of new variables");
  }

  /* Initialize the overlap */
  o->numOverlapVars    = o->numLocVars;
  o->numOverlapNewVars = o->numLocNewVars;

  if (numProcs > 1) {
    /* Map local to global variable numbers */
    for(node = 0; node < numNodes; node++)
      o->offsets[node] += o->firstVar[rank];

    /* Initialize communication */
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostSendVars);                                          CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumSendVars);                                            CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &ghostRecvVars);                                          CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvVars);                                            CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs * sizeof(int), &displs);                                                 CHKERRQ(ierr);
    ierr = PetscMemzero(ghostSendVars, numProcs * sizeof(int));                                          CHKERRQ(ierr);
    ierr = PetscMemzero(sumSendVars,   numProcs * sizeof(int));                                          CHKERRQ(ierr);
    ierr = PetscMemzero(ghostRecvVars, numProcs * sizeof(int));                                          CHKERRQ(ierr);
    ierr = PetscMemzero(sumRecvVars,   numProcs * sizeof(int));                                          CHKERRQ(ierr);
    ierr = PetscMemzero(displs,        numProcs * sizeof(int));                                          CHKERRQ(ierr);

    /* Get number of ghost variables to receive from each processor and size of blocks --
         we here assume that classes[] already has ghost node classes in it */
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      nclass = classes[numNodes+node];
      ghostRecvVars[proc]++;
      o->numOverlapVars += classSizes[nclass];
    }

    /* Get number of constrained ghost variables to receive from each processor and size of blocks */
    if (map->isConstrained == PETSC_TRUE) {
      ierr = (*constCtx->ops->getsize)(constCtx, PETSC_NULL, PETSC_NULL, &o->numOverlapNewVars, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
      CHKERRQ(ierr);
    }
    o->numOverlapVars += o->numOverlapNewVars - o->numLocNewVars;

    /* Get sizes of ghost variable blocks to send to each processor */
    ierr = MPI_Alltoall(ghostRecvVars, 1, MPI_INT, ghostSendVars, 1, MPI_INT, o->comm);                  CHKERRQ(ierr);

    /* Calculate offets into the ghost variable receive array */
    for(proc = 1; proc < numProcs; proc++) {
      sumRecvVars[proc] = sumRecvVars[proc-1] + ghostRecvVars[proc-1];
      displs[proc]      = sumRecvVars[proc];
    }

    /* Calculate offsets into the ghost variable send array */
    for(proc = 1; proc < numProcs; proc++)
      sumSendVars[proc] = sumSendVars[proc-1] + ghostSendVars[proc-1];

    /* Send requests for ghost variable offsets to each processor */
    numSendGhostVars = sumSendVars[numProcs-1] + ghostSendVars[numProcs-1];
    ierr = PetscMalloc(numSendGhostVars * sizeof(int), &sendGhostBuffer);                                 CHKERRQ(ierr);
    for(node = 0; node < numGhostNodes; node++) {
      ierr = PartitionGhostToGlobalNodeIndex(part, node, &gNode, &proc);                                  CHKERRQ(ierr);
      o->offsets[numNodes+(displs[proc]++)] = gNode;
    }
    ierr = MPI_Alltoallv(&o->offsets[numNodes],  ghostRecvVars, sumRecvVars, MPI_INT,
                         sendGhostBuffer,        ghostSendVars, sumSendVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Send ghost variables offsets to each processor */
    for(node = 0; node < numSendGhostVars; node++) {
      ierr = PartitionGlobalToLocalNodeIndex(part, sendGhostBuffer[node], &locNode);CHKERRQ(ierr);
      sendGhostBuffer[node] = o->offsets[locNode];
    }
    ierr = MPI_Alltoallv(sendGhostBuffer,       ghostSendVars, sumSendVars, MPI_INT,
                         &o->offsets[numNodes], ghostRecvVars, sumRecvVars, MPI_INT, o->comm);
    CHKERRQ(ierr);

    /* Cleanup */
    ierr = PetscFree(ghostSendVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumSendVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(ghostRecvVars);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(sumRecvVars);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(displs);                                                                             CHKERRQ(ierr);
    ierr = PetscFree(sendGhostBuffer);                                                                    CHKERRQ(ierr);

    /* We maintain local offsets for ghost variables, meaning the offsets after the last
       interior variable, rather than the offset of the given ghost variable in the global
       matrix. */
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &o->localOffsets);                                    CHKERRQ(ierr);
    for(node = 0, var = o->numLocVars; node < numGhostNodes; node++) {
      o->localOffsets[node] = var;
      nclass = classes[numNodes+node];
      var   += classSizes[nclass];
    }
  }

  /* Allocate memory */
  ierr = PetscMalloc(numClasses * sizeof(int), &localOffsets);                                            CHKERRQ(ierr);
  ierr = PetscMemzero(localOffsets, numClasses * sizeof(int));                                            CHKERRQ(ierr);

  /* Setup local field offsets */
  for(f = 0; f < numFields; f++) {
    field = fields[f];
    ierr  = PetscMalloc(numClasses * sizeof(int), &o->localStart[field]);                                 CHKERRQ(ierr);
    for(nclass = 0; nclass < numClasses; nclass++) {
      if (fieldClasses[f][nclass]) {
        o->localStart[field][nclass]  = localOffsets[nclass];
        localOffsets[nclass]         += grid->fields[field].disc->bdDisc->comp;
      } else {
        o->localStart[field][nclass]  = -1;
      }
    }
  }
  grid->bdOrder = o;

  /* Cleanup */
  ierr = PetscFree(localOffsets);                                                                         CHKERRQ(ierr);

  /* Setup the local boundary ordering */
  PetscHeaderCreate(l, _LocalVarOrdering, int, IS_COOKIE, 0, "LocalVarOrdering", grid->comm, LocalVarOrderingDestroy, 0);
  PetscLogObjectCreate(l);

  /* Allocate memory */
  l->numFields = numFields;
  ierr = PetscMalloc(numFields       * sizeof(int), &l->fields);                                          CHKERRQ(ierr);
  ierr = PetscMalloc(grid->numFields * sizeof(int), &l->elemStart);                                       CHKERRQ(ierr);
  PetscLogObjectMemory(l, (numFields + grid->numFields) * sizeof(int));
  ierr = PetscMemcpy(l->fields, fields, numFields * sizeof(int));                                         CHKERRQ(ierr);

  /* Put in sentinel values */
  for(f = 0; f < grid->numFields; f++) {
    l->elemStart[f] = -1;
  }

  /* Setup local and global offsets offsets with lower dimensional discretizations */
  for(f = 0, elemOffset = 0; f < numFields; f++) {
    field               = fields[f];
    l->elemStart[field] = elemOffset;
    elemOffset         += grid->fields[field].disc->bdDisc->size;
  }
  l->elemSize = elemOffset;
  grid->bdLocOrder = l;

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridReformMesh_Triangular_2D"
int GridReformMesh_Triangular_2D(Grid grid) {
  int ierr;

  PetscFunctionBegin;
  ierr = GridSetupBoundarySizes_Triangular_2D(grid);                                                      CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridGetBoundaryNext_Triangular_2D"
int GridGetBoundaryNext_Triangular_2D(Grid grid, int boundary, int fieldIdx, PetscTruth ghost, FieldClassMap map, int *node, int *nclass) {
  int ierr;

  PetscFunctionBegin;
  do {
    ierr = MeshGetBoundaryNext(grid->mesh, boundary, ghost, node);                                       CHKERRQ(ierr);
  }
  /* Note: I am using the boolean short circuit to avoid classes[] with node == -1 */
  while((*node != -1) && (map->fieldClasses[fieldIdx][map->classes[*node]] == 0));
  if (*node != -1) {
    *nclass = map->classes[*node];
  } else {
    *nclass = -1;
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridGetBoundaryStart_Triangular_2D"
int GridGetBoundaryStart_Triangular_2D(Grid grid, int boundary, int fieldIdx, PetscTruth ghost, FieldClassMap map, int *node, int *nclass) {
  Mesh mesh;
  Mesh_Triangular *tri = (Mesh_Triangular *) grid->mesh->data;
  int  b; /* Canonical boundary number */
  int  ierr;

  PetscFunctionBegin;
  /* Find canonical boundary number */
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetBoundaryIndex(mesh, boundary, &b);                                                        CHKERRQ(ierr);
  if (mesh->activeBd != -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Already iterating over a boundary");
  /* Find first boundary node of a class in the active field */
  mesh->activeBd     = b;
  mesh->activeBdOld  = b;
  mesh->activeBdNode = tri->bdBegin[b] - 1;
  ierr = GridGetBoundaryNext_Triangular_2D(grid, boundary, fieldIdx, ghost, map, node, nclass);           CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridCreateRestriction_Triangular_2D"
int GridCreateRestriction_Triangular_2D(Grid dcf, Grid dcc, GMat *gmat) {
  SETERRQ(PETSC_ERR_SUP, " ");
}

#undef  __FUNCT__
#define __FUNCT__ "GridEvaluateRhs_Triangular_2D"
int GridEvaluateRhs_Triangular_2D(Grid grid, GVec x, GVec f, PetscObject ctx) {
  Mesh                  mesh;
  Partition             part;
  Field                *fields               = grid->fields;
  int                   numNewFields         = grid->numNewFields;         /* The number of new fields added by constraints */
  GridFunc             *rhsFuncs             = grid->rhsFuncs;             /* The Rhs PointFunctions */
  int                   numRhsOps            = grid->numRhsOps;            /* The number of Rhs operators */
  GridOp               *rhsOps               = grid->rhsOps;               /* The operators on the Rhs */
  PetscTruth            reduceSystem         = grid->reduceSystem;
  PetscTruth            reduceElement        = grid->reduceElement;
  PetscTruth            explicitConstraints  = grid->explicitConstraints;
  PetscConstraintObject constCtx             = grid->constraintCtx;        /* The constraint object */
  int                   numFields            = grid->cm->numFields;        /* The number of fields in the calculation */
  LocalVarOrdering      locOrder             = grid->locOrder;             /* The default local variable ordering */
  int                   elemSize             = locOrder->elemSize;         /* The number of shape funcs in the elem mat */
  int                  *elemStart            = locOrder->elemStart;        /* The offset of each field in the elem mat */
  ElementVec            vec                  = grid->vec;                  /* The element vector */
  PetscScalar          *array                = vec->array;                 /* The values in the element vector */
  Vec                   ghostVec             = grid->ghostVec;             /* The local solution vector */
  ElementVec            elemGhostVec         = grid->ghostElementVec;      /* The element vector from ghostVec */
  PetscScalar          *ghostArray           = elemGhostVec->array;        /* The values in elemGhostVec */
  ElementMat            mat                  = grid->mat;                  /* A temporary element matrix */
  PetscScalar          *matArray             = mat->array;                 /* The values in the element matrix */
  MeshMover             mover;
  Grid                  ALEGrid;                                           /* The grid describing the mesh velocity */
  VarOrdering           order;                                             /* The default variable ordering */
  ElementVec            MeshALEVec;                                        /* ALE velocity vector from mesh */
  ElementVec            ALEVec;                                            /* ALE velocity vector */
  PetscScalar          *ALEArray;                                          /* The values in the ALE element vector */
  int                   computeFunc, computeLinear, computeNonlinear;      /* Flags for selective computation */
  PetscScalar          *nonlinearArgs[2];
  int                   newComp = 0;
  int                   numElements;
  int                   sField, tField, op, newField, elem, func, fieldIndex;
#ifdef PETSC_USE_BOPT_g
  int                   var;
  PetscTruth            opt;
#endif
  int                   ierr;

  PetscFunctionBegin;
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  if (explicitConstraints == PETSC_TRUE) {
    order = grid->constraintOrder;
  } else {
    order = grid->order;
  }
  /* Handle selective computation */
  computeFunc        = 1;
  if (grid->activeOpTypes[0] == PETSC_FALSE) computeFunc      = 0;
  computeLinear      = 1;
  if (grid->activeOpTypes[1] == PETSC_FALSE) computeLinear    = 0;
  computeNonlinear   = 1;
  if (grid->activeOpTypes[2] == PETSC_FALSE) computeNonlinear = 0;

  /* Fill the local solution vectors */
  if (x != PETSC_NULL) {
    ierr = GridGlobalToLocal(grid, INSERT_VALUES, x);                                                     CHKERRQ(ierr);
  }

  /* Setup ALE variables */
  if (grid->ALEActive == PETSC_TRUE) {
    ierr = MeshGetMover(mesh, &mover);                                                                    CHKERRQ(ierr);
    ierr = MeshMoverGetVelocityGrid(mover, &ALEGrid);                                                     CHKERRQ(ierr);
    /* Notice that the ALEArray is from this grid, not the mesh velocity grid */
    ierr = ElementVecDuplicate(grid->vec, &ALEVec);                                                       CHKERRQ(ierr);
    ALEArray   = ALEVec->array;
    MeshALEVec = ALEGrid->vec;
  } else {
    ALEArray   = PETSC_NULL;
    MeshALEVec = PETSC_NULL;
  }

  /* Loop over elements */
  ierr = PartitionGetNumElements(part, &numElements);                                                     CHKERRQ(ierr);
  for(elem = 0; elem < numElements; elem++) {
    /* Initialize element vector */
    ierr = ElementVecZero(vec);                                                                           CHKERRQ(ierr);
    vec->reduceSize          = locOrder->elemSize;
    elemGhostVec->reduceSize = locOrder->elemSize;

    /* Setup local row indices for the ghost vector */
    ierr = GridCalcLocalElementVecIndices(grid, elem, elemGhostVec);                                      CHKERRQ(ierr);
    /* Setup local solution vector */
    ierr = GridLocalToElementGeneral(grid, ghostVec, grid->bdReduceVecCur, reduceSystem, reduceElement, elemGhostVec);CHKERRQ(ierr);
    /* Must transform to unconstrained variables for element integrals */
    ierr = GridProjectElementVec(grid, mesh, elem, order, PETSC_FALSE, elemGhostVec);                     CHKERRQ(ierr);

    /* Setup ALE variables */
    if (grid->ALEActive == PETSC_TRUE) {
      ierr = GridCalcLocalElementVecIndices(ALEGrid, elem, MeshALEVec);                                   CHKERRQ(ierr);
      ierr = GridLocalToElement(ALEGrid, MeshALEVec);                                                     CHKERRQ(ierr);
    }

    if (computeFunc) {
      for(func = 0; func < grid->numRhsFuncs; func++) {
        if (fields[rhsFuncs[func].field].isActive == PETSC_FALSE) continue;
        tField = rhsFuncs[func].field;
        ierr = DiscretizationEvaluateFunctionGalerkin(fields[tField].disc, mesh, *rhsFuncs[tField].func, rhsFuncs[tField].alpha, elem,
                                                      &array[elemStart[tField]], ctx);
        CHKERRQ(ierr);
      }
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                       CHKERRQ(ierr);
#endif
    }

    for(op = 0; op < numRhsOps; op++) {
      if (fields[rhsOps[op].field].isActive == PETSC_FALSE) continue;
      if ((rhsOps[op].nonlinearOp != PETSC_NULL) && (computeNonlinear)) {
        tField = rhsOps[op].field;
        nonlinearArgs[0] = &ghostArray[elemStart[tField]];
        nonlinearArgs[1] = &ghostArray[elemStart[tField]];
        if (rhsOps[op].isALE) {
          ierr = GridInterpolateElementVec(ALEGrid, 0, MeshALEVec, grid, tField, ALEVec);                 CHKERRQ(ierr);
          ierr = DiscretizationEvaluateNonlinearALEOperatorGalerkin(fields[tField].disc, mesh, rhsOps[op].nonlinearOp,
                                                                    rhsOps[op].alpha, elem, 2, nonlinearArgs,
                                                                    ALEArray, &array[elemStart[tField]], ctx);
          CHKERRQ(ierr);
        } else {
          ierr = DiscretizationEvaluateNonlinearOperatorGalerkin(fields[tField].disc, mesh, rhsOps[op].nonlinearOp,
                                                                 rhsOps[op].alpha, elem, 2, nonlinearArgs,
                                                                 &array[elemStart[tField]], ctx);
          CHKERRQ(ierr);
        }
      } else if (computeLinear) {
        sField = rhsOps[op].field;
        tField = fields[sField].disc->operators[rhsOps[op].op]->test->field;
        ierr = ElementMatZero(mat);                                                                       CHKERRQ(ierr);
        if (rhsOps[op].isALE) {
          ierr = GridInterpolateElementVec(ALEGrid, 0, MeshALEVec, grid, sField, ALEVec);                 CHKERRQ(ierr);
          ierr = DiscretizationEvaluateALEOperatorGalerkinMF(fields[sField].disc, mesh, elemSize, elemStart[tField], elemStart[sField],
                                                             rhsOps[op].op, rhsOps[op].alpha, elem, &ghostArray[elemStart[sField]],
                                                             &ghostArray[elemStart[sField]], ALEArray, array, matArray, ctx);
          CHKERRQ(ierr);
        } else {
          ierr = DiscretizationEvaluateOperatorGalerkinMF(fields[sField].disc, mesh, elemSize, elemStart[tField], elemStart[sField],
                                                          rhsOps[op].op, rhsOps[op].alpha, elem, &ghostArray[elemStart[sField]],
                                                          &ghostArray[elemStart[sField]], array, matArray, ctx);
          CHKERRQ(ierr);
        }
      }
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                       CHKERRQ(ierr);
#endif
    }

    /* Setup global row indices, with reduction if necessary */
    ierr = GridCalcGeneralElementVecIndices(grid, elem, order, PETSC_NULL, PETSC_FALSE, vec);             CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_vec_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      for(var = 0; var < vec->reduceSize; var++)
        PetscPrintf(grid->comm, "%2d %4.2g\n", vec->indices[var], PetscRealPart(array[var]));
    }
#endif
    /* Put values in the global vector */
    ierr = ElementVecSetValues(vec, f, ADD_VALUES);                                                       CHKERRQ(ierr);
  }

  /* Cleanup ALE variables */
  if (grid->ALEActive == PETSC_TRUE) {
    ierr = ElementVecDestroy(ALEVec);                                                                     CHKERRQ(ierr);
  }

  /* Evaluate self-interaction of new fields created by constraints */
  if (explicitConstraints == PETSC_TRUE) { 
    /* WARNING: This only accomodates 1 constrained field */
    /* Get constraint information */
    for(fieldIndex = 0; fieldIndex < numFields; fieldIndex++) {
      sField = grid->cm->fields[fieldIndex];
      if (fields[sField].isConstrained == PETSC_TRUE) {
        newComp = fields[sField].numComp + fields[sField].constraintCompDiff; 
        break;
      }
    }
    /* Calculate self-interaction */
    for(newField = 0; newField < numNewFields; newField++) {
      /* Initialize element vector */
      ierr = ElementVecZero(vec);                                                                         CHKERRQ(ierr);
      vec->reduceSize = newComp;

      /* Calculate the indices and contribution to the element matrix from the new field */
      ierr = (*constCtx->ops->newelemvec)(constCtx, order, newField, vec);                                CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscOptionsHasName(PETSC_NULL, "-trace_vec_assembly_constrained", &opt);                    CHKERRQ(ierr);
      if (opt == PETSC_TRUE) {
        for(var = 0; var < vec->reduceSize; var++)
          PetscPrintf(grid->comm, "%2d %4.2g\n", vec->indices[var], PetscRealPart(array[var]));
      }
#endif
      /* Put values in global matrix */
      ierr = ElementVecSetValues(vec, f, ADD_VALUES);                                                     CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                        CHKERRQ(ierr);
#endif
    }
  }

  /* Reset element vectors */
  vec->reduceSize          = locOrder->elemSize;
  elemGhostVec->reduceSize = locOrder->elemSize;

  ierr = VecAssemblyBegin(f);                                                                             CHKERRQ(ierr);
  ierr = VecAssemblyEnd(f);                                                                               CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "GridEvaluateSystemMatrix_Triangular_2D"
int GridEvaluateSystemMatrix_Triangular_2D(Grid grid, GVec x, GMat *J, GMat *M, MatStructure *flag, PetscObject ctx) {
  GMat                  A             = *J;                     /* The working system matrix */
  Field                *fields        = grid->fields;
  int                   numNewFields  = grid->numNewFields;     /* The number of new fields added by constraints */
  int                   numMatOps     = grid->numMatOps;        /* The number of operators in the matrix */
  GridOp               *matOps        = grid->matOps;           /* The operators in the system matrix */
  VarOrdering           constOrder    = grid->constraintOrder;  /* The constrained variable ordering */
  PetscTruth            reduceSystem  = grid->reduceSystem;
  PetscTruth            reduceElement = grid->reduceElement;
  PetscTruth            expConst      = grid->explicitConstraints;
  PetscConstraintObject constCtx      = grid->constraintCtx;    /* The constraint object */
  int                   numFields     = grid->cm->numFields;    /* The number of fields in the calculation */
  LocalVarOrdering      locOrder      = grid->locOrder;         /* The default local variable ordering */
  int                   elemSize      = locOrder->elemSize;     /* The number of shape functions in the element matrix */
  int                  *elemStart     = locOrder->elemStart;    /* The offset of each field in the element matrix */
  ElementMat            mat           = grid->mat;              /* The element matrix */
  PetscScalar          *array         = mat->array;             /* The values in the element matrix */
  Vec                   ghostVec      = grid->ghostVec;         /* The local solution vector */
  ElementVec            elemGhostVec  = grid->ghostElementVec;  /* The element vector from ghostVec */
  PetscScalar          *ghostArray    = elemGhostVec->array;    /* The values in elemGhostVec */
  Mesh                  mesh;
  Partition             part;
  MeshMover             mover;
  Grid                  ALEGrid;                                /* The grid describing the mesh velocity */
  VarOrdering           order;                                  /* The default variable ordering */
  ElementVec            MeshALEVec;                             /* ALE velocity vector with mesh discretization */
  ElementVec            ALEVec;                                 /* ALE velocity vector */
  PetscScalar          *ALEArray;                               /* The values in the ALE element vector */
  int                   newComp = 0;
  int                   numElements;
  int                   elem, f, sField, tField, op, newField;
#ifdef PETSC_USE_BOPT_g
  PetscTruth            opt;
#endif
  int                   ierr;

  PetscFunctionBegin;
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetPartition(mesh, &part);                                                                   CHKERRQ(ierr);
  if (expConst == PETSC_TRUE) {
    order = grid->constraintOrder;
  } else {
    order = grid->order;
  }
  /* Fill the local solution vectors */
  if (x != PETSC_NULL) {
    ierr = GridGlobalToLocal(grid, INSERT_VALUES, x);                                                     CHKERRQ(ierr);
  }

  /* Setup ALE variables -- No new variables should be ALE so ALEVec is not recalculated */
  if (grid->ALEActive == PETSC_TRUE) {
    ierr = MeshGetMover(mesh, &mover);                                                                    CHKERRQ(ierr);
    ierr = MeshMoverGetVelocityGrid(mover, &ALEGrid);                                                     CHKERRQ(ierr);
    /* Notice that the ALEArray is from this grid, not the mesh velocity grid */
    ierr = ElementVecDuplicate(grid->vec, &ALEVec);                                                       CHKERRQ(ierr);
    ALEArray   = ALEVec->array;
    MeshALEVec = ALEGrid->vec;
  } else {
    ALEArray   = PETSC_NULL;
    MeshALEVec = PETSC_NULL;
  }

  /* Loop over elements */
  ierr = PartitionGetNumElements(part, &numElements);                                                     CHKERRQ(ierr);
  for(elem = 0; elem < numElements; elem++) {
    /* Initialize element matrix */
    ierr = ElementMatZero(mat);                                                                           CHKERRQ(ierr);
    mat->reduceRowSize       = locOrder->elemSize;
    mat->reduceColSize       = locOrder->elemSize;
    elemGhostVec->reduceSize = locOrder->elemSize;

    /* Setup local row indices for the ghost vector */
    ierr = GridCalcLocalElementVecIndices(grid, elem, elemGhostVec);                                      CHKERRQ(ierr);
    /* Setup local solution vector */
    ierr = GridLocalToElementGeneral(grid, ghostVec, grid->bdReduceVecCur, reduceSystem, reduceElement, elemGhostVec);CHKERRQ(ierr);
    /* Must transform to unconstrained variables for element integrals */
    ierr = GridProjectElementVec(grid, mesh, elem, order, PETSC_FALSE, elemGhostVec);                     CHKERRQ(ierr);

    /* Setup ALE variables */
    if (grid->ALEActive == PETSC_TRUE) {
      ierr = GridCalcLocalElementVecIndices(ALEGrid, elem, MeshALEVec);                                   CHKERRQ(ierr);
      ierr = GridLocalToElement(ALEGrid, MeshALEVec);                                                     CHKERRQ(ierr);
    }

    /* Calculate the contribution to the element matrix from each field */
    for(op = 0; op < numMatOps; op++) {
      sField = matOps[op].field;
      tField = fields[sField].disc->operators[matOps[op].op]->test->field;
      if (fields[sField].isActive) {
        if (matOps[op].isALE) {
          ierr = GridInterpolateElementVec(ALEGrid, 0, MeshALEVec, grid, sField, ALEVec);                 CHKERRQ(ierr);
          ierr = DiscretizationEvaluateALEOperatorGalerkin(fields[sField].disc, mesh, elemSize, elemStart[tField], elemStart[sField],
                                                           matOps[op].op, matOps[op].alpha, elem, &ghostArray[elemStart[sField]],
                                                           ALEArray, array, ctx);
          CHKERRQ(ierr);
        } else {
          ierr = DiscretizationEvaluateOperatorGalerkin(fields[sField].disc, mesh, elemSize, elemStart[tField], elemStart[sField],
                                                        matOps[op].op, matOps[op].alpha, elem, &ghostArray[elemStart[sField]],
                                                        array, ctx);
          CHKERRQ(ierr);
        }
#ifdef PETSC_USE_BOPT_g
        ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                      CHKERRQ(ierr);
#endif
      }
    }

    /* Setup global numbering, with reduction if necessary */
    ierr = GridCalcGeneralElementMatIndices(grid, elem, order, order, PETSC_FALSE, mat);                  CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
    ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly", &opt);                                  CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      ierr = ElementMatView(mat, PETSC_VIEWER_STDOUT_(mat->comm));                                        CHKERRQ(ierr);
    }
#endif
    /* Put values in the global matrix */
    ierr = ElementMatSetValues(mat, A, ADD_VALUES);                                                       CHKERRQ(ierr);
  }

  /* Evaluate self-interaction of new fields created by constraints */
  if (expConst == PETSC_TRUE) { 
    /* WARNING: This only accomodates 1 constrained field */
    /* Get constraint information */
    for(f = 0; f < numFields; f++) {
      sField = grid->cm->fields[f];
      if (fields[sField].isConstrained == PETSC_TRUE) {
        newComp = fields[sField].numComp + fields[sField].constraintCompDiff; 
        break;
      }
    }
    /* Calculate self-interaction */
    for(newField = 0; newField < numNewFields; newField++) {
      /* Initialize element matrix */
      ierr = ElementMatZero(mat);                                                                         CHKERRQ(ierr);
      mat->reduceRowSize = newComp;
      mat->reduceColSize = newComp;

      /* Calculate the indices and contribution to the element matrix from the new field */
      ierr = (*constCtx->ops->newelemmat)(constCtx, constOrder, newField, mat);                           CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscOptionsHasName(PETSC_NULL, "-trace_mat_assembly_constrained", &opt);                    CHKERRQ(ierr);
      if (opt == PETSC_TRUE) {
        ierr = ElementMatView(mat, PETSC_VIEWER_STDOUT_(mat->comm));                                      CHKERRQ(ierr);
      }
#endif
      /* Put values in global matrix */
      ierr = ElementMatSetValues(mat, A, ADD_VALUES);                                                     CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                        CHKERRQ(ierr);
#endif
    }
  }

  /* Assemble matrix */
  ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);                                                         CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);                                                           CHKERRQ(ierr);

  /* Reset element matrix and vector */
  mat->reduceRowSize       = locOrder->elemSize;
  mat->reduceColSize       = locOrder->elemSize;
  elemGhostVec->reduceSize = locOrder->elemSize;

  /* Cleanup */
  if (grid->ALEActive == PETSC_TRUE) {
    ierr = ElementVecDestroy(ALEVec);                                                                     CHKERRQ(ierr);
  }

  ierr = GridResetConstrainedMultiply_Private(grid, A);                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

static struct _GridOps GOps = {GridSetUp_Triangular_2D,
                               GridSetupBoundary_Triangular_2D,
                               GridSetupConstraints_Triangular_2D,
                               GridSetupGhostScatter_Triangular_2D,
                               PETSC_NULL/* GridSetFromOptions */,
                               PETSC_NULL/* GridDuplicate */,
                               PETSC_NULL/* GridReform */,
                               PETSC_NULL/* GridCopy */,
                               GridDestroy_Triangular_2D,
                               GridView_Triangular_2D,
                               GridGetBoundaryStart_Triangular_2D,
                               GridGetBoundaryNext_Triangular_2D,
                               GridReformMesh_Triangular_2D,
                               GridCreateGMat_Triangular_2D,
                               GridCreateVarOrdering_Triangular_2D,
                               GridCreateLocalVarOrdering_Triangular_2D,
                               GridCreateVarScatter_Triangular_2D,
                               GridVarOrderingConstrain_Triangular_2D,
                               GridCalcElementVecIndices_Triangular_2D,
                               GridCalcElementMatIndices_Triangular_2D,
                               GridCalcBoundaryElementVecIndices_Triangular_2D,
                               GridCalcBoundaryElementMatIndices_Triangular_2D,
                               GridProjectElementVec_Triangular_2D,
                               GVecGetLocalGVec_Triangular_2D,
                               GVecRestoreLocalGVec_Triangular_2D,
                               0,/* GVecGetWorkGVec */
                               0,/* GVecRestoreWorkGVec */
                               GVecGlobalToLocal_Triangular_2D,
                               GVecLocalToGlobal_Triangular_2D,
                               GVecView_Triangular_2D,
                               GridCreateRestriction_Triangular_2D,
                               GVecEvaluateFunction_Triangular_2D,
                               GVecEvaluateFunctionBoundary_Triangular_2D,
                               GVecEvaluateFunctionCollective_Triangular_2D,
                               GVecEvaluateFunctionGalerkin_Triangular_2D,
                               GVecEvaluateFunctionGalerkinCollective_Triangular_2D,
                               GVecEvaluateBoundaryFunctionGalerkin_Triangular_2D,
                               GVecEvaluateBoundaryFunctionGalerkinCollective_Triangular_2D,
                               GVecEvaluateOperatorGalerkin_Triangular_2D,
                               GVecEvaluateNonlinearOperatorGalerkin_Triangular_2D,
                               GVecEvaluateSystemMatrix_Triangular_2D,
                               GVecEvaluateSystemMatrixDiagonal_Triangular_2D,
                               GMatView_Triangular_2D,
                               GMatEvaluateOperatorGalerkin_Triangular_2D,
                               GMatEvaluateALEOperatorGalerkin_Triangular_2D,
                               GMatEvaluateALEConstrainedOperatorGalerkin_Triangular_2D,
                               GMatEvaluateBoundaryOperatorGalerkin_Triangular_2D,
                               GridEvaluateRhs_Triangular_2D,
                               GridEvaluateSystemMatrix_Triangular_2D};

EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "GridCreate_Triangular_2D"
int GridCreate_Triangular_2D(Grid grid) {
  int ierr;

  PetscFunctionBegin;
  ierr = PetscMemcpy(grid->ops, &GOps, sizeof(struct _GridOps));                                          CHKERRQ(ierr);
  /* General grid description */
  grid->dim  = 2;
  grid->data = PETSC_NULL;
  PetscFunctionReturn(0);
}
EXTERN_C_END
