//-------------------------------------------------------------------------------------
// DirectXMeshOptimize.cpp
//  
// DirectX Mesh Geometry Library - Mesh optimization
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkID=324981
//-------------------------------------------------------------------------------------

#include "DirectXMeshP.h"

using namespace DirectX;

namespace
{

//-------------------------------------------------------------------------------------
template<class index_t>
class mesh_status
{
public:
    mesh_status() :
        mFaceOffset(0),
        mFaceCount(0),
        mMaxSubset(0),
        mTotalFaces(0)
    {
    }

    HRESULT initialize( _In_reads_(nFaces*3) const index_t* indices, size_t nFaces,
                        _In_reads_(nFaces*3) const uint32_t* adjacency, _In_ const std::vector<std::pair<size_t,size_t>>& subsets )
    {
        if ( !indices || !nFaces || !adjacency || subsets.empty() )
            return E_INVALIDARG;

        // Convert adjacency to 'physical' adjacency
        mPhysicalNeighbors.reset( new (std::nothrow) neighborInfo[ nFaces ] );
        if ( !mPhysicalNeighbors )
            return E_OUTOFMEMORY;

#ifdef _DEBUG
        memset( mPhysicalNeighbors.get(), 0xcd, sizeof(neighborInfo) * nFaces );
#endif

        mFaceOffset = 0;
        mFaceCount = 0;
        mMaxSubset = 0;
        mTotalFaces = nFaces;

        for( auto it = subsets.cbegin(); it != subsets.cend(); ++it )
        {
            if ( ( uint64_t(it->first) + uint64_t(it->second) ) >= UINT32_MAX )
                return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

            if ( it->second > mMaxSubset )
            {
                mMaxSubset = it->second;
            }

            uint32_t faceOffset = uint32_t( it->first );
            uint32_t faceMax = uint32_t( it->first + it->second );

            for( uint32_t face = faceOffset; face < faceMax; ++face )
            {
                if ( face >= nFaces )
                    return E_UNEXPECTED;

                index_t i0 = indices[ face*3 ];
                index_t i1 = indices[ face*3 + 1 ];
                index_t i2 = indices[ face*3 + 2 ];

                if ( i0 == index_t(-1)
                     || i1 == index_t(-1)
                     || i2 == index_t(-1)
                     || i0 == i1
                     || i0 == i2
                     || i1 == i2 )
                {
                    // unused and degenerate faces should not have neighbors
                    for( uint32_t point = 0; point < 3; ++point )
                    {
                        uint32_t k = adjacency[ face * 3 + point ];

                        if ( k != UNUSED32 )
                        {
                            if ( k >= nFaces )
                                return E_UNEXPECTED;
 
                            if ( adjacency[ k*3 ] == face )
                                mPhysicalNeighbors.get()[ k ].neighbors[ 0 ] = UNUSED32;

                            if ( adjacency[ k*3 + 1 ] == face )
                                mPhysicalNeighbors.get()[ k ].neighbors[ 1 ] = UNUSED32;

                            if ( adjacency[ k*3 + 2 ] == face )
                                mPhysicalNeighbors.get()[ k ].neighbors[ 2 ] = UNUSED32;
                        }

                        mPhysicalNeighbors.get()[ face ].neighbors[ point ] = UNUSED32;
                    }
                }
                else
                {
                    for( uint32_t n = 0; n < 3; ++n )
                    {
                        uint32_t neighbor = adjacency[ face * 3 + n ];

                        if ( neighbor != UNUSED32 )
                        {
                            if ( ( neighbor < faceOffset ) || ( neighbor >= faceMax )
                                 || ( neighbor == adjacency[ face * 3 + ( ( n + 1 ) % 3 ) ] )
                                 || ( neighbor == adjacency[ face * 3 + ( ( n + 2 ) % 3 ) ] ) )
                            {
                                // Break links for any neighbors outside of our attribute set, and remove duplicate neighbors
                                neighbor = UNUSED32;
                            }
                            else
                            {
                                uint32_t edgeBack = find_edge<uint32_t>( &adjacency[ neighbor * 3 ], face );
                                if ( edgeBack < 3 )
                                {
                                    index_t p1 = indices[ face * 3 + n ];
                                    index_t p2 = indices[ face * 3 + ( ( n + 1 ) % 3 ) ];

                                    index_t pn1 = indices[ neighbor * 3 + edgeBack ];
                                    index_t pn2 = indices[ neighbor * 3 + ( ( edgeBack + 1 ) % 3 ) ];

                                    // if wedge not identical on shared edge, drop link
                                    if ( ( p1 != pn2 ) || ( p2 != pn1 ) )
                                    {
                                        neighbor = UNUSED32;
                                    }
                                }
                                else
                                {
                                    neighbor = UNUSED32;
                                }
                            }
                        }

                        mPhysicalNeighbors.get()[ face ].neighbors[ n ] = neighbor;
                    }
                }
            }
        }
        
        if ( !mMaxSubset )
            return E_FAIL;

        mListElements.reset( new (std::nothrow) listElement[ mMaxSubset ] );
        if ( !mListElements )
            return E_OUTOFMEMORY;

        return S_OK;
    }

    HRESULT setSubset( _In_reads_(nFaces*3) const index_t* indices, size_t nFaces, size_t faceOffset, size_t faceCount )
    {
        if ( !faceCount || !indices || !nFaces )
            return E_INVALIDARG;

        if ( faceCount > mMaxSubset )
            return E_UNEXPECTED;

        if ( !mListElements )
            return E_POINTER;

        if ( ( uint64_t(faceOffset) + uint64_t(faceCount) ) >= UINT32_MAX )
            return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

        uint32_t faceMax = uint32_t( faceOffset + faceCount );

        if ( faceMax > nFaces )
            return E_UNEXPECTED;

        mFaceOffset = faceOffset;
        mFaceCount = faceCount;

        mUnprocessed[0] = UNUSED32;
        mUnprocessed[1] = UNUSED32;
        mUnprocessed[2] = UNUSED32;
        mUnprocessed[3] = UNUSED32;

        for( uint32_t face = uint32_t( faceOffset ); face < faceMax; ++face )
        {
            index_t i0 = indices[ face*3 ];
            index_t i1 = indices[ face*3 + 1 ];
            index_t i2 = indices[ face*3 + 2 ];

            if ( i0 == index_t(-1)
                 || i1 == index_t(-1)
                 || i2 == index_t(-1) )
            {
                // filter out unused triangles
                continue;
            }

            uint32_t unprocessed = 0;

            for( uint32_t n = 0; n < 3; ++n )
            {
                if ( mPhysicalNeighbors.get()[ face ].neighbors[ n ] != UNUSED32 )
                {
                    unprocessed += 1;

                    assert( mPhysicalNeighbors.get()[ face ].neighbors[ n ] >= mFaceOffset );
                    assert( mPhysicalNeighbors.get()[ face ].neighbors[ n ] < faceMax );
                }
            }

            uint32_t faceIndex = uint32_t( face - faceOffset );
            mListElements.get()[ faceIndex ].processed = false;
            mListElements.get()[ faceIndex ].unprocessed = unprocessed;

            push_front( faceIndex );
        }

        return S_OK;
    }

    bool isprocessed( uint32_t face ) const 
    {
        assert( face < mTotalFaces );
        assert( ( face >= mFaceOffset ) || ( face < ( mFaceOffset + mFaceCount ) ) );
        return mListElements.get()[ face - mFaceOffset ].processed;
    }

    uint32_t unprocessed_count( uint32_t face ) const
    {
        assert( face < mTotalFaces );
        assert( ( face >= mFaceOffset ) || ( face < ( mFaceOffset + mFaceCount ) ) );
        return mListElements.get()[ face - mFaceOffset ].unprocessed;
    }

    uint32_t find_initial() const
    {
        for( size_t j = 0; j < 4; ++j )
        {
            if  ( mUnprocessed[j] != UNUSED32 )
                return uint32_t( mUnprocessed[ j ] + mFaceOffset );
        }

        return UNUSED32;
    }

    void mark( uint32_t face )
    {
        assert( face < mTotalFaces );
        assert( ( face >= mFaceOffset ) || ( face < ( mFaceOffset + mFaceCount ) ) );

        uint32_t faceIndex = uint32_t( face - mFaceOffset );

        assert( !mListElements.get()[ faceIndex ].processed );
        mListElements.get()[ faceIndex ].processed = true;

        remove( faceIndex );

        for( uint32_t n = 0; n < 3; ++n )
        {
            uint32_t neighbor = mPhysicalNeighbors.get()[ face ].neighbors[ n ];
            if ( ( neighbor != UNUSED32 ) && !isprocessed( neighbor ) )
            {
                decrement( neighbor );
            }
        }
    }

    uint32_t find_next( uint32_t face ) const
    {
        assert( face < mTotalFaces );
        assert( ( face >= mFaceOffset ) || ( face < ( mFaceOffset + mFaceCount ) ) );

        uint32_t iret = 3;
        uint32_t minNeighbor = UNUSED32;
        uint32_t minNextNeighbor = 0;

        for( uint32_t n = 0; n < 3; ++n )
        {
            uint32_t neighbor = mPhysicalNeighbors.get()[ face ].neighbors[ n ];

            if( ( neighbor == UNUSED32 ) || isprocessed( neighbor ) )
                continue;

            uint32_t unprocessed = unprocessed_count( neighbor );
            assert( unprocessed < 3 );

            uint32_t mintemp = UNUSED32;

            for( uint32_t nt = 0; nt < 3; ++nt )
            {
                uint32_t neighborTemp = mPhysicalNeighbors.get()[ neighbor ].neighbors[ nt ];

                if( ( neighborTemp == UNUSED32 ) || isprocessed( neighborTemp ) )
                    continue;

                uint32_t next_count = unprocessed_count( neighborTemp );
                if ( next_count < mintemp )
                    mintemp = next_count;
            }

            if ( mintemp == UNUSED32 )
                mintemp = 0;

            if ( unprocessed < minNeighbor )
            {
                iret = n;
                minNeighbor = unprocessed;
                minNextNeighbor = mintemp;
            }
            else if ( ( unprocessed == minNeighbor ) && ( mintemp < minNextNeighbor ) )
            {
                iret = n;
                minNextNeighbor = mintemp;
            }
        }

        return iret;
    }

    const uint32_t get_neighbors( uint32_t face, uint32_t n ) const
    {
        assert( face < mTotalFaces );
        assert( n < 3 );
        return mPhysicalNeighbors.get()[ face ].neighbors[ n ];
    }

    const uint32_t* get_neighborsPtr( uint32_t face ) const
    {
        assert( face < mTotalFaces );
        return &mPhysicalNeighbors.get()[ face ].neighbors[ 0 ];
    }

private:
    void push_front( uint32_t faceIndex )
    {
        assert( faceIndex < mFaceCount );

        uint32_t unprocessed = mListElements.get()[ faceIndex ].unprocessed;
    
        uint32_t head = mUnprocessed[ unprocessed ];
        mListElements.get()[ faceIndex ].next = head;

        if ( head != UNUSED32 )
            mListElements.get()[ head ].prev = faceIndex;

        mUnprocessed[ unprocessed ] = faceIndex;

        mListElements.get()[ faceIndex ].prev = UNUSED32;
    }

    void remove( uint32_t faceIndex )
    {
        assert( faceIndex < mFaceCount );

        if ( mListElements.get()[ faceIndex ].prev != UNUSED32 )
        {
            assert( mUnprocessed[ mListElements.get()[ faceIndex ].unprocessed ] != faceIndex );

            uint32_t prev = mListElements.get()[ faceIndex ].prev;
            uint32_t next = mListElements.get()[ faceIndex ].next;

            mListElements.get()[ prev ].next = next;

            if ( next != UNUSED32 )
            {
                mListElements.get()[ next ].prev = prev;
            }
        }
        else
        {
            // remove head of the list
            assert( mUnprocessed[ mListElements.get()[ faceIndex ].unprocessed ] == faceIndex );

            uint32_t unprocessed = mListElements.get()[ faceIndex ].unprocessed;

            mUnprocessed[ unprocessed ] = mListElements.get()[ faceIndex ].next;

            if ( mUnprocessed[ unprocessed ] != UNUSED32 )
            {
                mListElements.get()[ mUnprocessed[ unprocessed ] ].prev = UNUSED32;
            }
        }

        mListElements.get()[ faceIndex ].prev =
        mListElements.get()[ faceIndex ].next = UNUSED32;
    }

    void decrement( uint32_t face )
    {
        assert( face < mTotalFaces );
        assert( ( face >= mFaceOffset ) || ( face < ( mFaceOffset + mFaceCount ) ) );
        assert( !isprocessed(face) );

        uint32_t faceIndex = uint32_t( face - mFaceOffset );

        assert( (mListElements.get()[faceIndex].unprocessed >= 1) && (mListElements.get()[faceIndex].unprocessed <= 3) );

        remove( faceIndex );

        mListElements.get()[ faceIndex ].unprocessed -= 1;

        push_front( faceIndex );
    }

    struct neighborInfo
    {
        uint32_t neighbors[3];
    };

    struct listElement
    {
        bool        processed;
        uint32_t    unprocessed;
        uint32_t    prev;
        uint32_t    next;
    };

    uint32_t                        mUnprocessed[4];
    size_t                          mFaceOffset;
    size_t                          mFaceCount;
    size_t                          mMaxSubset;
    size_t                          mTotalFaces;
    std::unique_ptr<listElement[]>  mListElements;
    std::unique_ptr<neighborInfo[]> mPhysicalNeighbors;
};


//-------------------------------------------------------------------------------------
typedef std::pair<uint32_t,uint32_t> facecorner_t;

template<class index_t>
inline facecorner_t counterclockwise_corner( facecorner_t corner, mesh_status<index_t>& status )
{
    assert( corner.second != UNUSED32 );
    uint32_t edge = ( corner.second + 2 ) % 3;
    uint32_t neighbor = status.get_neighbors( corner.first, edge );
    uint32_t point = ( neighbor == UNUSED32 ) ? UNUSED32 : find_edge( status.get_neighborsPtr( neighbor ), corner.first );
    return facecorner_t( neighbor, point );
}


//-------------------------------------------------------------------------------------
class sim_vcache
{
public:
    sim_vcache() : mTail(0), mCacheSize(0) {}

    HRESULT initialize( uint32_t cacheSize )
    {
        if ( !cacheSize )
            return E_INVALIDARG;

        mFIFO.reset( new (std::nothrow) uint32_t[ cacheSize ] );
        if ( !mFIFO )
            return E_OUTOFMEMORY;

        mCacheSize = cacheSize;

        clear();

        return S_OK;
    }

    void clear()
    {
        assert( mFIFO != 0 );
        mTail = 0;
        memset( mFIFO.get(), 0xff, sizeof(uint32_t) * mCacheSize );
    }

    bool access( uint32_t vertex )
    {
        assert( vertex != UNUSED32 );
        assert( mFIFO != 0 );

        for( size_t ptr = 0; ptr < mCacheSize; ++ptr )
        {
            if ( mFIFO.get()[ ptr ] == vertex )
            {
                return true;
            }
        }

        mFIFO.get()[ mTail ] = vertex;
        mTail += 1;
        if ( mTail == mCacheSize )
            mTail = 0;

        return false;
    }

private:
    uint32_t                    mTail;
    uint32_t                    mCacheSize;
    std::unique_ptr<uint32_t[]> mFIFO;
};


//-------------------------------------------------------------------------------------
template<class index_t>
HRESULT _StripReorder( _In_reads_(nFaces*3) const index_t* indices, _In_ size_t nFaces,
                       _In_reads_(nFaces*3) const uint32_t* adjacency,
                       _In_reads_opt_(nFaces) const uint32_t* attributes,
                       _Out_writes_(nFaces) uint32_t* faceRemap )
{
    auto subsets = ComputeSubsets( attributes, nFaces );

    assert( !subsets.empty() );

    mesh_status<index_t> status;
    HRESULT hr = status.initialize( indices, nFaces, adjacency, subsets );
    if ( FAILED(hr) )
        return hr;

    std::unique_ptr<uint32_t[]> faceRemapInverse( new (std::nothrow) uint32_t[ nFaces ] );
    if ( !faceRemapInverse )
        return E_OUTOFMEMORY;

    memset( faceRemapInverse.get(), 0xff, sizeof(uint32_t) * nFaces );

    for( auto it = subsets.cbegin(); it != subsets.cend(); ++it )
    {
        hr = status.setSubset( indices, nFaces, it->first, it->second );
        if ( FAILED(hr) )
            return hr;

        uint32_t curface = 0;

        for(;;)
        {
            uint32_t face = status.find_initial();
            if ( face == UNUSED32 )
                break;

            status.mark( face );

            uint32_t next = status.find_next( face );

            for(;;)
            {
                assert( face != UNUSED32 );
                faceRemapInverse.get()[ face ] = uint32_t( curface + it->first );
                curface += 1;

                // if at end of strip, break out
                if ( next >= 3 )
                    break;

                face = status.get_neighbors( face, next );
                assert( face != UNUSED32 );

                status.mark( face );

                next = status.find_next( face );
            }
        }
    }

    // inverse remap
    memset( faceRemap, 0xff, sizeof(uint32_t) * nFaces );

    for( uint32_t j = 0; j < nFaces; ++j )
    {
        uint32_t f = faceRemapInverse.get()[ j ];
        if ( f < nFaces )
        {
            faceRemap[ f ] = j;
        }
    }

    return S_OK;
}


//-------------------------------------------------------------------------------------
template<class index_t>
HRESULT _VertexCacheStripReorder( _In_reads_(nFaces*3) const index_t* indices, _In_ size_t nFaces,
                                  _In_reads_(nFaces*3) const uint32_t* adjacency,
                                  _In_reads_opt_(nFaces) const uint32_t* attributes,
                                  _Out_writes_(nFaces) uint32_t* faceRemap,
                                  uint32_t vertexCache, uint32_t restart )
{
    auto subsets = ComputeSubsets( attributes, nFaces );

    assert( !subsets.empty() );

    mesh_status<index_t> status;
    HRESULT hr = status.initialize( indices, nFaces, adjacency, subsets );
    if ( FAILED(hr) )
        return hr;

    sim_vcache vcache;
    hr = vcache.initialize( vertexCache );
    if ( FAILED(hr) )
        return hr;

    std::unique_ptr<uint32_t[]> faceRemapInverse( new (std::nothrow) uint32_t[ nFaces ] );
    if ( !faceRemapInverse )
        return E_OUTOFMEMORY;

    memset( faceRemapInverse.get(), 0xff, sizeof(uint32_t) * nFaces );

    assert( vertexCache >= restart );
    uint32_t desired = vertexCache - restart;

    for( auto it = subsets.cbegin(); it != subsets.cend(); ++it )
    {
        hr = status.setSubset( indices, nFaces, it->first, it->second );
        if ( FAILED(hr) )
            return hr;

        vcache.clear();

        uint32_t locnext = 0;
        facecorner_t nextCorner( UNUSED32, UNUSED32 );
        facecorner_t curCorner( UNUSED32, UNUSED32 );

        uint32_t curface = 0;

        for(;;)
        {
            assert( nextCorner.first == UNUSED32 );

            curCorner.first = status.find_initial();
            if ( curCorner.first == UNUSED32 )
                break;

            uint32_t n0 = status.get_neighbors( curCorner.first, 0 );
            if ( ( n0 != UNUSED32 ) && !status.isprocessed( n0 ) )
            {
                curCorner.second = 1;
            }
            else
            {
                uint32_t n1 = status.get_neighbors( curCorner.first, 1 );
                if ( ( n1 != UNUSED32 ) && !status.isprocessed( n1 ) )
                {
                    curCorner.second = 2;
                }
                else
                {
                    curCorner.second = 0;
                }
            }
             
            bool striprestart = false;
            for(;;)
            {
                assert( curCorner.first != UNUSED32 );
                assert( !status.isprocessed( curCorner.first ) );

                // Decision: either add a ring of faces or restart strip
                if ( nextCorner.first != UNUSED32 )
                {
                    uint32_t nf = 0;
                    for( facecorner_t temp = curCorner; ; )
                    {
                        facecorner_t next = counterclockwise_corner<index_t>( temp, status );
                        if ( ( next.first == UNUSED32 ) || status.isprocessed( next.first ) )
                            break;
                        ++nf;
                        temp = next;
                    }

                    if ( locnext + nf > desired )
                    {
                        // restart
                        if ( !status.isprocessed( nextCorner.first ) )
                        {
                            curCorner = nextCorner;
                        }

                        nextCorner.first = UNUSED32;
                    }
                }

                for(;;)
                {
                    assert( curCorner.first != UNUSED32 );
                    status.mark( curCorner.first );

                    faceRemapInverse.get()[ curCorner.first ] = uint32_t( curface + it->first );
                    curface += 1;

                    assert( indices[ curCorner.first * 3 ] != index_t(-1) );
                    if ( !vcache.access( indices[ curCorner.first * 3 ] ) )
                        locnext += 1;

                    assert( indices[ curCorner.first * 3 + 1 ] != index_t(-1) );
                    if ( !vcache.access( indices[ curCorner.first * 3 + 1 ] ) )
                        locnext += 1;

                    assert( indices[ curCorner.first * 3 + 2 ] != index_t(-1) );
                    if ( !vcache.access( indices[ curCorner.first * 3 + 2 ] ) )
                        locnext += 1;

                    facecorner_t intCorner = counterclockwise_corner<index_t>( curCorner, status );
                    bool interiornei = ( intCorner.first != UNUSED32 ) && !status.isprocessed( intCorner.first );

                    facecorner_t extCorner = counterclockwise_corner<index_t>( facecorner_t( curCorner.first, ( curCorner.second + 2 ) % 3 ), status );
                    bool exteriornei = ( extCorner.first != UNUSED32 ) && !status.isprocessed( extCorner.first );

                    if ( interiornei )
                    {
                        if ( exteriornei )
                        {
                            if ( nextCorner.first == UNUSED32 )
                            {
                                nextCorner = extCorner;
                                locnext = 0;
                            }
                        }
                        curCorner = intCorner;
                    }
                    else if ( exteriornei )
                    {
                        curCorner = extCorner;
                        break;
                    }
                    else
                    {
                        curCorner = nextCorner;
                        nextCorner.first = UNUSED32;

                        if ( ( curCorner.first == UNUSED32 ) || status.isprocessed( curCorner.first ) )
                        {
                            striprestart = true;
                            break;
                        }
                    }
                }

                if ( striprestart )
                    break;
            }
        }
    }

    // inverse remap
    memset( faceRemap, 0xff, sizeof(uint32_t) * nFaces );

    for( uint32_t j = 0; j < nFaces; ++j )
    {
        uint32_t f = faceRemapInverse.get()[ j ];
        if ( f < nFaces )
        {
            faceRemap[ f ] = j;
        }
    }

    return S_OK;
}


//-------------------------------------------------------------------------------------
template<class index_t>
HRESULT _OptimizeVertices( const index_t* indices, size_t nFaces, size_t nVerts, uint32_t* vertexRemap )
{
    if ( !indices || !nFaces || !nVerts || !vertexRemap )
        return E_INVALIDARG;

    if ( nVerts >= index_t(-1) )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    std::unique_ptr<uint32_t[]> tempRemap( new (std::nothrow) uint32_t[ nVerts ] );
    if ( !tempRemap )
        return E_OUTOFMEMORY;

    memset( tempRemap.get(), 0xff, sizeof(uint32_t) * nVerts );

    uint32_t curvertex = 0;
    for( size_t j = 0; j < (nFaces * 3); ++j )
    {
        index_t curindex = indices[ j ];
        if ( curindex == index_t(-1) )
            continue;

        if ( curindex >= nVerts )
            return E_UNEXPECTED;

        if ( tempRemap[ curindex ] == UNUSED32 )
        {
            tempRemap[ curindex ] = curvertex;
            ++curvertex;
        }
    }

    // inverse lookup
    memset( vertexRemap, 0xff, sizeof(uint32_t) * nVerts );

    for( uint32_t j = 0; j < nVerts; ++j )
    {
        uint32_t vertindex = tempRemap.get()[ j ];
        if ( vertindex != UNUSED32 )
        {
            vertexRemap[ vertindex ] = j;
        }
    }

    return S_OK;
}

};

namespace DirectX
{

//=====================================================================================
// Entry-points
//=====================================================================================

//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT AttributeSort( size_t nFaces, uint32_t* attributes, uint32_t* faceRemap )
{
    if ( !nFaces || !attributes || !faceRemap )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    typedef std::pair<uint32_t,uint32_t> intpair_t;

    std::vector<intpair_t> list;
    list.reserve( nFaces );
    for( uint32_t j=0; j < nFaces; ++j )
    {
        list.emplace_back( intpair_t( attributes[ j ], j ) );
    }

    std::stable_sort( list.begin(), list.end(), [](const intpair_t& a, const intpair_t& b ) -> bool
                                                {
                                                    return (a.first < b.first);
                                                });

    auto it = list.begin();
    for( uint32_t j = 0; j < nFaces; ++j, ++it )
    {
        attributes[ j ] = it->first;
        faceRemap[ j ] = it->second;
    }

    return S_OK;
}


//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT OptimizeFaces( const uint16_t* indices, size_t nFaces, const uint32_t* adjacency,
                       uint32_t* faceRemap, uint32_t vertexCache, uint32_t restart )
{
    if ( !indices || !nFaces || !adjacency || !faceRemap )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    if( vertexCache == OPTFACES_V_STRIPORDER )
    {
        return _StripReorder<uint16_t>( indices, nFaces, adjacency, nullptr, faceRemap );
    }
    else
    {
        if ( restart > vertexCache )
            return E_INVALIDARG;

        return _VertexCacheStripReorder<uint16_t>( indices, nFaces, adjacency, nullptr, faceRemap, vertexCache, restart );
    }
}

_Use_decl_annotations_
HRESULT OptimizeFaces( const uint32_t* indices, size_t nFaces, const uint32_t* adjacency,
                       uint32_t* faceRemap, uint32_t vertexCache, uint32_t restart )
{
    if ( !indices || !nFaces || !adjacency || !faceRemap )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    if( vertexCache == OPTFACES_V_STRIPORDER )
    {
        return _StripReorder<uint32_t>( indices, nFaces, adjacency, nullptr, faceRemap );
    }
    else
    {
        if ( restart > vertexCache )
            return E_INVALIDARG;

        return _VertexCacheStripReorder<uint32_t>( indices, nFaces, adjacency, nullptr, faceRemap, vertexCache, restart );
    }
}


//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT OptimizeFacesEx( const uint16_t* indices, size_t nFaces, const uint32_t* adjacency, const uint32_t* attributes,
                         uint32_t* faceRemap, uint32_t vertexCache, uint32_t restart )
{
    if ( !indices || !nFaces || !adjacency || !attributes || !faceRemap )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    if( vertexCache == OPTFACES_V_STRIPORDER )
    {
        return _StripReorder<uint16_t>( indices, nFaces, adjacency, attributes, faceRemap );
    }
    else
    {
        if ( restart > vertexCache )
            return E_INVALIDARG;

        return _VertexCacheStripReorder<uint16_t>( indices, nFaces, adjacency, nullptr, faceRemap, vertexCache, restart );
    }
}

_Use_decl_annotations_
HRESULT OptimizeFacesEx( const uint32_t* indices, size_t nFaces, const uint32_t* adjacency, const uint32_t* attributes,
                         uint32_t* faceRemap, uint32_t vertexCache, uint32_t restart )
{
    if ( !indices || !nFaces || !adjacency || !attributes || !faceRemap )
        return E_INVALIDARG;

    if ( ( uint64_t(nFaces) * 3 ) >= UINT32_MAX )
        return HRESULT_FROM_WIN32( ERROR_ARITHMETIC_OVERFLOW );

    if( vertexCache == OPTFACES_V_STRIPORDER )
    {
        return _StripReorder<uint32_t>( indices, nFaces, adjacency, attributes, faceRemap );
    }
    else
    {
        if ( restart > vertexCache )
            return E_INVALIDARG;

        return _VertexCacheStripReorder<uint32_t>( indices, nFaces, adjacency, nullptr, faceRemap, vertexCache, restart );
    }
}


//-------------------------------------------------------------------------------------
_Use_decl_annotations_
HRESULT OptimizeVertices( const uint16_t* indices, size_t nFaces, size_t nVerts, uint32_t* vertexRemap )
{
    return _OptimizeVertices<uint16_t>( indices, nFaces, nVerts, vertexRemap );
}

_Use_decl_annotations_
HRESULT OptimizeVertices( const uint32_t* indices, size_t nFaces, size_t nVerts, uint32_t* vertexRemap )
{
    return _OptimizeVertices<uint32_t>( indices, nFaces, nVerts, vertexRemap );
}

} // namespace