/*

 MPIIntrinsics.c

 Copyright (c) 2006-2007, Lucas Stephen Beeler and Jeremy Wagner-Kaiser.
 All Rights Reserved.

 */

#include "MPIIntrinsics.h"
#include "ConfigurableParameters.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <memory.h>

NodeDescriptor gNodeInfo = {kUnknown, kUnknown, kUnknown, 0 };

void  PrintNodeDescriptor(const NodeDescriptor* target)
{
    printf("\nnode :=\n");

    if (target->nodePID != kUnknown) {

        printf("\tlocal processor i.d.       = %d\n", target->nodePID);
    }
    else {

        printf("\tlocal processor i.d.       = (unknown)\n");
    }

    if (target->numNodes != kUnknown) {

        printf("\ttotal number of processors = %d\n", target->numNodes);
    }
    else {

        printf("\ttotal number of processors = (unknown)\n");
    }

    if (target->nodeKind == kManagerKind) {

        printf("\tnode kind                  = kManagerKind\n");
    }
    else if (target->nodeKind == kWorkerKind) {

        printf("\tnode kind                  = kWorkerKind\n");
    }
    else {

        printf("\tnode kind                  = (unknown)\n");
    }

    if (target->isMPIInitialized == TRUE) {
        printf("\tPI state initialized       = TRUE\n");
    }
    else {

        printf("\tPI state initialized       = FALSE\n");
    }

    printf("\n");
}




void  InitializeMPIEnvironment(int* argc_ptr, char*** argv_ptr)
{
    int             localMPIRank;
    int             groupMPICount;
    int             currentError;
    char*           commBuffer = 0;
    unsigned long   commBufferBytes = 67108864; /* create 64MB communications
                                                   buffer */
    int             errorCode;

    currentError = MPI_Init(argc_ptr, argv_ptr);

    if (currentError != MPI_SUCCESS) {

        RuntimeError("InitializeMPIEnvironment( )", "couldn't initialize MPI");
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &localMPIRank);
    MPI_Comm_size(MPI_COMM_WORLD, &groupMPICount);

    if (localMPIRank == 0) {

        gNodeInfo.nodeKind = kManagerKind;
    }
    else {

        gNodeInfo.nodeKind = kWorkerKind;
    }

    gNodeInfo.numNodes = groupMPICount;

    gNodeInfo.nodePID = localMPIRank;

    gNodeInfo.isMPIInitialized = TRUE;

    /* create 256MB asynchronous communications buffer */
    commBuffer = malloc(commBufferBytes);

    if (!commBuffer) {

        RuntimeError("InitializeMPIEnvironment( )", "couldn't create "
            "asynchronous communications buffer");
    }

    errorCode = MPI_Buffer_attach(commBuffer, commBufferBytes);

    if (errorCode != MPI_SUCCESS) {

        RuntimeError("InitializeMPIEnvironment( )", "couldn't attach to"
            "asynchronous communications buffer");

    }
}




void  MakeWorkPacketEmpty(WorkPacket* target)
{
    target->numRays = 0;
}




void  AddRayToWorkPacket(WorkPacket* target, const Ray3D* ray, unsigned pixID)
{
    if (target->numRays == kMaxPacketSize) {

        LogicError("AddRayToWorkPacket( )", "packet full");
    }

    target->rayData[target->numRays] = (*ray);

    target->pixelIDs[target->numRays] = pixID;

    target->numRays++;
}




void  MakeResultPacketEmpty(ResultPacket* target)
{
    target->numColors = 0;
}



void  AddColorToResultPacket(ResultPacket* target, const RGBColor* color,
    unsigned pixID)
{
    if (target->numColors == kMaxPacketSize) {

        LogicError("AddColorToResultPacket( )", "packet full");
    }

    target->colorData[target->numColors] = (*color);

    target->pixelIDs[target->numColors] = pixID;

    target->numColors++;
}




void  PrintWorkPacket(const WorkPacket* target)
{
    int i;

    printf("\nWorkPacket := {\n");

    printf("\tsource pid = %d\n", target->sourcePID);
    printf("\tnumRays    = %d\n\n", target->numRays);
    printf("<<< RAY DATA >>>\n");

    for (i = 0; i < target->numRays; i++) {

        printf("\nray %d (pixel %d) := ", i, target->pixelIDs[i]);
        PrintRay(&target->rayData[i]);

    } /* for all rays in the packet */

    printf("}.\n");
}




void  PrintResultPacket(const ResultPacket* target)
{
    int i;

    printf("\nResultPacket := {\n");

    printf("\tsource pid = %d\n", target->sourcePID);
    printf("\tnumColors   = %d\n\n", target->numColors);
    printf("<<< COLOR DATA >>>\n");

    for (i = 0; i < target->numColors; i++) {

        printf("\ncolor %d (@ pixel %d) := ", i, target->pixelIDs[i]);
        PrintRGBColor(&target->colorData[i]);

    } /* for all colors in the packet */

    printf("}.\n");
}




WorkQueue*  CreateWorkQueue(void)
{
    WorkQueue*  result = (WorkQueue*) malloc(sizeof(WorkQueue));

    result->headIndex = 0;
    result->tailIndex = -1;
    result->slotsAllocated = 256;
    result->data =
        (WorkQueueElement*) malloc(sizeof(WorkQueueElement)
            * (result->slotsAllocated));

    return result;
}




void  EnqueueWork(WorkQueue* targetQueue, const WorkQueueElement* newWork)
{
    /* Check to see if the tail of the queue (where we enqueue new elements)
       has butted up against the end of array. If this is the case, we can't
       enqueue any new elements without overflowing the array bounds, so
       before adding new elements we'll have to either (i) shuffle elements
       down in the array; or (ii) grow the array by a factor of 2, then
       shuffle down the elements */
    if (targetQueue->tailIndex == (targetQueue->slotsAllocated - 1)) {

        /* if the array is full or near full, grow it, then shuffle down */
        if (targetQueue->headIndex < 8) {

            unsigned  newAllocdSlotCount = targetQueue->slotsAllocated * 1.5;

            WorkQueueElement*  tempBuffer =
                (WorkQueueElement*) malloc(newAllocdSlotCount *
                sizeof(WorkQueueElement));

            if (!tempBuffer) {

                RuntimeError("EnqueueWork( )", "out of memory.");
            }

            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned  numBytesToCopy = numEltsToCopy *
                sizeof(WorkQueueElement);

            memcpy(tempBuffer, &(targetQueue->data[targetQueue->headIndex]),
                numBytesToCopy);

            free(targetQueue->data);

            targetQueue->data = tempBuffer;

            targetQueue->slotsAllocated = newAllocdSlotCount;
            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;
        }
        else {

            /* in this "else" case, the array isn't near full, so it
               just needs shuffling down */
            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned i;

            for (i = 0; i < numEltsToCopy; i++) {

                targetQueue->data[i] =
                    targetQueue->data[targetQueue->headIndex + i];
            } /* for */

            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;

        } /* else */
    } /* if */

    /* now that we've grown or shuffled as needed, just append the new
       element to the tail end of the queue */
    targetQueue->tailIndex++;

    targetQueue->data[targetQueue->tailIndex] = *newWork;
}




WorkQueueElement  DequeueWork(WorkQueue* targetQueue)
{
    WorkQueueElement  result =
        targetQueue->data[targetQueue->headIndex];

    targetQueue->headIndex++;

    return result;
}




WorkQueueElement  PeekAtWorkQueueHead(const WorkQueue* targetQueue)
{
    WorkQueueElement  result =
        targetQueue->data[targetQueue->headIndex];

    return result;
}




boolean  IsWorkQueueEmpty(const WorkQueue* targetQueue)
{
    if (targetQueue->tailIndex < targetQueue->headIndex)
        return TRUE;
    else
        return FALSE;
}




int  WorkQueueSize(const WorkQueue* target)
{
    return (target->tailIndex - target->headIndex + 1);
}




WorkPacketQueue*  CreateWorkPacketQueue(void)
{
    WorkPacketQueue*  result = 
        (WorkPacketQueue*) malloc(sizeof(WorkPacketQueue));

    result->headIndex = 0;
    result->tailIndex = -1;
    result->slotsAllocated = 256;
    result->data =
        (WorkPacketQueueElem*) malloc(sizeof(WorkPacketQueueElem)
            * (result->slotsAllocated));

    return result;
}




void  EnqueueWorkPacket(WorkPacketQueue* targetQueue,
    const WorkPacketQueueElem* newElem)
{
    /* Check to see if the tail of the queue (where we enqueue new elements)
       has butted up against the end of array. If this is the case, we can't
       enqueue any new elements without overflowing the array bounds, so
       before adding new elements we'll have to either (i) shuffle elements
       down in the array; or (ii) grow the array by a factor of 2, then
       shuffle down the elements */
    if (targetQueue->tailIndex == (targetQueue->slotsAllocated - 1)) {

        /* if the array is full or near full, grow it, then shuffle down */
        if (targetQueue->headIndex < 8) {

            unsigned  newAllocdSlotCount = targetQueue->slotsAllocated * 1.5;

            WorkPacketQueueElem*  tempBuffer =
                (WorkPacketQueueElem*) malloc(newAllocdSlotCount *
                sizeof(WorkPacketQueueElem));

            if (!tempBuffer) {

                RuntimeError("EnqueueWorkPacket( )", "out of memory.");
            }

            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned  numBytesToCopy = numEltsToCopy *
                sizeof(WorkPacketQueueElem);

            memcpy(tempBuffer, &(targetQueue->data[targetQueue->headIndex]),
                numBytesToCopy);

            free(targetQueue->data);

            targetQueue->data = tempBuffer;

            targetQueue->slotsAllocated = newAllocdSlotCount;
            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;
        }
        else {

            /* in this "else" case, the array isn't near full, so it
               just needs shuffling down */
            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned i;

            for (i = 0; i < numEltsToCopy; i++) {

                targetQueue->data[i] =
                    targetQueue->data[targetQueue->headIndex + i];
            } /* for */

            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;

        } /* else */
    } /* if */

    /* now that we've grown or shuffled as needed, just append the new
       element to the tail end of the queue */
    targetQueue->tailIndex++;

    targetQueue->data[targetQueue->tailIndex] = *newElem;
}




WorkPacketQueueElem  DequeueWorkPacket(WorkPacketQueue* targetQueue)
{
    WorkPacketQueueElem  result =
        targetQueue->data[targetQueue->headIndex];

    targetQueue->headIndex++;

    return result;
}




WorkPacketQueueElem
    PeekAtWorkPacketQueueHead(const WorkPacketQueue* targetQueue)
{
    WorkPacketQueueElem  result =
        targetQueue->data[targetQueue->headIndex];

    return result;
}




boolean  IsWorkPacketQueueEmpty(const WorkPacketQueue* targetQueue)
{
    if (targetQueue->tailIndex < targetQueue->headIndex)
        return TRUE;
    else
        return FALSE;
}




ResultPacketQueue*  CreateResultPacketQueue(void)
{
    ResultPacketQueue*  result = 
        (ResultPacketQueue*) malloc(sizeof(ResultPacketQueue));

    result->headIndex = 0;
    result->tailIndex = -1;
    result->slotsAllocated = 256;
    result->data =
        (ResultPacketQueueElem*) malloc(sizeof(ResultPacketQueueElem)
            * (result->slotsAllocated));

    return result;
}




void  EnqueueResultPacket(ResultPacketQueue* targetQueue,
    const ResultPacketQueueElem* newElem)
{
    /* Check to see if the tail of the queue (where we enqueue new elements)
       has butted up against the end of array. If this is the case, we can't
       enqueue any new elements without overflowing the array bounds, so
       before adding new elements we'll have to either (i) shuffle elements
       down in the array; or (ii) grow the array by a factor of 2, then
       shuffle down the elements */
    if (targetQueue->tailIndex == (targetQueue->slotsAllocated - 1)) {

        /* if the array is full or near full, grow it, then shuffle down */
        if (targetQueue->headIndex < 8) {

            unsigned  newAllocdSlotCount = targetQueue->slotsAllocated * 1.5;

            ResultPacketQueueElem*  tempBuffer =
                (ResultPacketQueueElem*) malloc(newAllocdSlotCount *
                sizeof(ResultPacketQueueElem));

            if (!tempBuffer) {

                RuntimeError("EnqueueResultPacket( )", "out of memory.");
            }


            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned  numBytesToCopy = numEltsToCopy *
                sizeof(ResultPacketQueueElem);

            memcpy(tempBuffer, &(targetQueue->data[targetQueue->headIndex]),
                numBytesToCopy);

            free(targetQueue->data);

            targetQueue->data = tempBuffer;

            targetQueue->slotsAllocated = newAllocdSlotCount;
            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;
        }
        else {

            /* in this "else" case, the array isn't near full, so it
               just needs shuffling down */
            unsigned  numEltsToCopy = (targetQueue->tailIndex -
                targetQueue->headIndex) + 1;

            unsigned i;

            for (i = 0; i < numEltsToCopy; i++) {

                targetQueue->data[i] =
                    targetQueue->data[targetQueue->headIndex + i];
            } /* for */

            targetQueue->headIndex = 0;
            targetQueue->tailIndex = numEltsToCopy - 1;

        } /* else */
    } /* if */

    /* now that we've grown or shuffled as needed, just append the new
       element to the tail end of the queue */
    targetQueue->tailIndex++;

    targetQueue->data[targetQueue->tailIndex] = *newElem;
}




ResultPacketQueueElem  DequeueResultPacket(ResultPacketQueue* targetQueue)
{
    ResultPacketQueueElem  result =
        targetQueue->data[targetQueue->headIndex];

    targetQueue->headIndex++;

    return result;

}




ResultPacketQueueElem
    PeekAtResultPacketQueueHead(const ResultPacketQueue* targetQueue)
{
    ResultPacketQueueElem  result =
        targetQueue->data[targetQueue->headIndex];

    return result;
}




boolean  IsResultPacketQueueEmpty(const ResultPacketQueue* targetQueue)
{
    if (targetQueue->tailIndex < targetQueue->headIndex)
        return TRUE;
    else
        return FALSE;
}




ResultBuffer*   ConstructResultBufferForImage(const Image* target)
{
    ResultBuffer*   result;
    int             i;

    result = malloc(sizeof(ResultBuffer));

    result->numPixels = (target->pixelWidth) * (target->pixelHeight);

    result->colorData = malloc(sizeof(RGBColor) * (result->numPixels));

    for (i = 0; i < result->numPixels; i++) {

        result->colorData[i] = kRGB_Black;
    }

    return result;
}




void    CompositeToResultBuffer(ResultBuffer* buffer, int pixelID,
    const RGBColor* color)
{
    RGBColor  value = buffer->colorData[pixelID];
    RGBColor  temp;
    double    scaleFrac;

    scaleFrac = 1.0 / ((double)(pSamplesPerPixel));

    temp = RGBColorScale(color, scaleFrac);

    value = RGBColorAdd(&temp, &value);

    buffer->colorData[pixelID] = value;
}




void    FoldResultBufferToImage(Image* image, const ResultBuffer* buffer)
{
    int i;
    int imagePixelCount = (image->pixelWidth) * (image->pixelHeight);

    if (imagePixelCount != buffer->numPixels) {

        LogicError("FoldResultBufferToImage( )", "image and result buffer are "
            "of differing pixel dimensions");
    }

    for (i = 0; i < buffer->numPixels; i++) {

        int  pixelX = i % image->pixelWidth;
        int  pixelY = i / image->pixelWidth;

        WriteImagePixel(image, pixelX, pixelY, &buffer->colorData[i]);
    };
}


int  rpcompare(const void* a, const void* b)
{
    int aInt = *((int*)(a));
    int bInt = *((int*)(a));

    return (a - b);
}


void  RandomPermutation(int* toperm, int n)
{
    int  i;
    int  tmp;
    int  x;
        
    for (i = 0; i < n; i++) {

        x = (rand( ) % n);

        tmp = toperm[x];

        toperm[x] = toperm[i];

        toperm[i] = tmp;
    }
/*
    qsort(toperm, n, sizeof(int), rpcompare);

    for (i = 0; i < n; i++) {

        printf("entry [i]: %d\n", toperm[i]);
    }
*/
}




int   NextWorkerNode(int nodeNum)
{
    int result;

    if (gNodeInfo.isMPIInitialized == FALSE) {

        RuntimeError("NextWorkerNode( )", "MPI hasn't been initialized");
    }

    result = nodeNum % (gNodeInfo.numNodes - 1);

    result = result + 1;

    return result;
}




int   PreviousWorkerNode(int nodeNum)
{
    if (gNodeInfo.isMPIInitialized == FALSE) {

        RuntimeError("NextWorkerNode( )", "MPI hasn't been initialized");
    }

    if (nodeNum <= 1) {

        return (gNodeInfo.numNodes - 1);
    }
    else {

        return (nodeNum - 1);
    }
}




WorkerStatusArray  CreateWorkerStatusArray(void)
{
    WorkerStatusArray  result;
    int                i;

    if (gNodeInfo.isMPIInitialized == FALSE) {

        RuntimeError("CreateWorkerStatusArray( )", "MPI hasn't been "
            "initialized");
    }

    result = malloc(sizeof(boolean) * (gNodeInfo.numNodes - 1));

    for (i = 0; i < (gNodeInfo.numNodes - 1); i++) {

        result[i] = FALSE;
    };

    return result;
}




boolean   AreAllWorkersDone(const WorkerStatusArray target)
{
    int i;

    if (gNodeInfo.isMPIInitialized == FALSE) {

        RuntimeError("AreAllWorkersDone( )", "MPI hasn't been initialized");
    }

    for (i = 0; i < (gNodeInfo.numNodes - 1); i++) {

        if (target[i] == FALSE) {

            return FALSE;
        }
    } /* for all worker nodes */

    return TRUE;
}




void   MarkWorkerDone(WorkerStatusArray target, int i)
{
    target[(i - 1)] = TRUE;
}




void   PrintWorkerStatusArray(const WorkerStatusArray target)
{
    int i;

    printf("manager node: worker status: ");

    for (i = 0; i < (gNodeInfo.numNodes - 1); i++) {

        if (target[i] == FALSE) {

            printf("W ");
        }
        else {

            printf("D ");
        }
    }

    printf("\n");
}
