include/foundation/PxBitMap.h

File members: include/foundation/PxBitMap.h

// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2024 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.

#ifndef PX_BITMAP_H
#define PX_BITMAP_H

#include "foundation/PxAssert.h"
#include "foundation/PxMath.h"
#include "foundation/PxMemory.h"
#include "foundation/PxAllocator.h"
#include "foundation/PxUserAllocated.h"
#include "foundation/PxIntrinsics.h"
#include "foundation/PxBitUtils.h"
#include "foundation/PxConstructor.h"

#if !PX_DOXYGEN
namespace physx
{
#endif
    template<class PxAllocator>
    class PxBitMapBase : public PxUserAllocated
    {
        PX_NOCOPY(PxBitMapBase)

    public:

        // PX_SERIALIZATION
        /* todo: explicit */ PxBitMapBase(const PxEMPTY)
        {
            if(mMap)
                mWordCount |= PX_SIGN_BITMASK;
        }
        //~PX_SERIALIZATION

        PX_INLINE PxBitMapBase(const PxAllocator& allocator) : mMap(0), mWordCount(0), mAllocator(allocator) {}

        PX_INLINE PxBitMapBase() : mMap(0), mWordCount(0) {}

        PX_INLINE ~PxBitMapBase()
        {
            release();
        }

        PX_INLINE void release()
        {
            if(mMap && !isInUserMemory())
                mAllocator.deallocate(mMap);
            mMap = NULL;
        }

        PX_FORCE_INLINE PxAllocator&    getAllocator() { return mAllocator; }

        PX_INLINE void growAndSet(PxU32 index)
        {
            extend(index + 1);
            mMap[index >> 5] |= 1 << (index & 31);
        }

        PX_INLINE void growAndReset(PxU32 index)
        {
            extend(index + 1);
            mMap[index >> 5] &= ~(1 << (index & 31));
        }

        PX_INLINE PxIntBool boundedTest(PxU32 index) const
        {
            return PxIntBool(index >> 5 >= getWordCount() ? PxIntFalse : (mMap[index >> 5] & (1 << (index & 31))));
        }

        PX_INLINE void boundedReset(PxU32 index)
        {
            if((index >> 5) < getWordCount())
                mMap[index >> 5] &= ~(1 << (index & 31));
        }

        // Special optimized versions, when you _know_ your index is in range
        PX_INLINE void set(PxU32 index)
        {
            PX_ASSERT(index<getWordCount() * 32);
            mMap[index >> 5] |= 1 << (index & 31);
        }

        PX_INLINE void reset(PxU32 index)
        {
            PX_ASSERT(index<getWordCount() * 32);
            mMap[index >> 5] &= ~(1 << (index & 31));
        }

        PX_INLINE PxIntBool test(PxU32 index) const
        {
            PX_ASSERT(index<getWordCount() * 32);
            return PxIntBool(mMap[index >> 5] & (1 << (index & 31)));
        }

        // nibble == 4 bits
        PX_INLINE PxU32 getNibbleFast(PxU32 nibIndex) const
        {
            const PxU32 bitIndex = nibIndex << 2;
            PX_ASSERT(bitIndex < getWordCount() * 32);
            return (mMap[bitIndex >> 5] >> (bitIndex & 31)) & 0xf;
        }

        PX_INLINE void andNibbleFast(PxU32 nibIndex, PxU32 mask)
        {
            //TODO: there has to be a faster way...
            const PxU32 bitIndex = nibIndex << 2;
            const PxU32 shift = (bitIndex & 31);
            const PxU32 nibMask = (0xfu << shift);

            PX_ASSERT(bitIndex < getWordCount() * 32);

            mMap[bitIndex >> 5] &= ((mask << shift) | ~nibMask);
        }

        PX_INLINE void orNibbleFast(PxU32 nibIndex, PxU32 mask)
        {
            PX_ASSERT(!(mask & ~0xfu)); //check extra bits are not set

            const PxU32 bitIndex = nibIndex << 2;
            const PxU32 shift = bitIndex & 31;

            PX_ASSERT(bitIndex < getWordCount() * 32);

            mMap[bitIndex >> 5] |= (mask << shift);
        }

        void clear()
        {
            PxMemSet(mMap, 0, getWordCount() * sizeof(PxU32));
        }

        void resizeAndClear(PxU32 newBitCount)
        {
            extendUninitialized(newBitCount);
            PxMemSet(mMap, 0, getWordCount() * sizeof(PxU32));
        }

        void setEmpty()
        {
            mMap = NULL;
            mWordCount = 0;
        }

        void setWords(PxU32* map, PxU32 wordCount)
        {
            mMap = map;
            mWordCount = wordCount | PX_SIGN_BITMASK;
        }

        // !!! only sets /last/ bit to value
        void resize(PxU32 newBitCount, bool value = false)
        {
            PX_ASSERT(!value); // only new class supports this
            PX_UNUSED(value);
            extend(newBitCount);
        }

        PX_FORCE_INLINE PxU32 size() const { return getWordCount() * 32; }

        void copy(const PxBitMapBase& a)
        {
            extendUninitialized(a.getWordCount() << 5);
            PxMemCopy(mMap, a.mMap, a.getWordCount() * sizeof(PxU32));
            if(getWordCount() > a.getWordCount())
                PxMemSet(mMap + a.getWordCount(), 0, (getWordCount() - a.getWordCount()) * sizeof(PxU32));
        }

        PX_INLINE PxU32 count()     const
        {
            // NOTE: we can probably do this faster, since the last steps in PxBitCount can be defered to
            // the end of the seq. + 64/128bits at a time + native bit counting instructions(360 is fast non micro code).
            PxU32 count = 0;
            const PxU32 wordCount = getWordCount();
            for(PxU32 i = 0; i<wordCount; i++)
                count += PxBitCount(mMap[i]);

            return count;
        }

        PX_INLINE PxU32 count(PxU32 start, PxU32 length) const
        {
            const PxU32 end = PxMin(getWordCount() << 5, start + length);
            PxU32 count = 0;
            for(PxU32 i = start; i<end; i++)
                count += (test(i) != 0);
            return count;
        }

        PxU32 findLast() const
        {
            const PxU32 wordCount = getWordCount();
            for(PxU32 i = wordCount; i-- > 0;)
            {
                if(mMap[i])
                    return (i << 5) + PxHighestSetBit(mMap[i]);
            }
            return PxU32(0);
        }

        bool hasAnyBitSet() const
        {
            const PxU32 wordCount = getWordCount();
            for(PxU32 i = 0; i<wordCount; i++)
            {
                if (mMap[i])
                    return true;
            }
            return false;
        }

        // the obvious combiners and some used in the SDK

        struct OR { PX_INLINE PxU32 operator()(PxU32 a, PxU32 b) { return a | b; } };
        struct AND { PX_INLINE PxU32 operator()(PxU32 a, PxU32 b) { return a&b; } };
        struct XOR { PX_INLINE PxU32 operator()(PxU32 a, PxU32 b) { return a^b; } };

        // we use auxiliary functions here so as not to generate combiners for every combination
        // of allocators

        template<class Combiner, class _>
        PX_INLINE void combineInPlace(const PxBitMapBase<_>& b)
        {
            combine1<Combiner>(b.mMap, b.getWordCount());
        }

        template<class Combiner, class _1, class _2>
        PX_INLINE void combine(const PxBitMapBase<_1>& a, const PxBitMapBase<_2>& b)
        {
            combine2<Combiner>(a.mMap, a.getWordCount(), b.mMap, b.getWordCount());
        }

        PX_FORCE_INLINE const PxU32*    getWords()          const   { return mMap; }
        PX_FORCE_INLINE PxU32*          getWords()                  { return mMap; }

        // PX_SERIALIZATION
        PX_FORCE_INLINE PxU32           getWordCount()      const   { return mWordCount & ~PX_SIGN_BITMASK; }

        // We need one bit to mark arrays that have been deserialized from a user-provided memory block.
        PX_FORCE_INLINE PxU32           isInUserMemory()    const   { return mWordCount & PX_SIGN_BITMASK; }
        //~PX_SERIALIZATION

        class Iterator
        {
        public:
            static const PxU32 DONE = 0xffffffff;

            PX_INLINE Iterator(const PxBitMapBase &map) : mBitMap(map)
            {
                reset();
            }

            PX_INLINE Iterator& operator=(const Iterator& other)
            {
                PX_ASSERT(&mBitMap == &other.mBitMap);
                mBlock = other.mBlock;
                mIndex = other.mIndex;
                return *this;
            }

            PX_INLINE PxU32 getNext()
            {
                if(mBlock)
                {
                    PxU32 block = mBlock;
                    PxU32 index = mIndex;

                    const PxU32 bitIndex = index << 5 | PxLowestSetBit(block);
                    block &= block - 1;
                    PxU32 wordCount = mBitMap.getWordCount();
                    while(!block && ++index < wordCount)
                        block = mBitMap.mMap[index];

                    mBlock = block;
                    mIndex = index;

                    return bitIndex;
                }
                return DONE;
            }

            PX_INLINE void reset()
            {
                PxU32 index = 0;
                PxU32 block = 0;

                PxU32 wordCount = mBitMap.getWordCount();
                while(index < wordCount && ((block = mBitMap.mMap[index]) == 0))
                    ++index;

                mBlock = block;
                mIndex = index;
            }
        private:
            PxU32 mBlock, mIndex;
            const PxBitMapBase& mBitMap;
        };

        // DS: faster but less general: hasBits() must be true or getNext() is illegal so it is the calling code's responsibility to ensure that getNext() is not called illegally.
        class PxLoopIterator
        {
            PX_NOCOPY(PxLoopIterator)

        public:
            PX_FORCE_INLINE PxLoopIterator(const PxBitMapBase &map) : mMap(map.getWords()), mBlock(0), mIndex(-1), mWordCount(PxI32(map.getWordCount())) {}

            PX_FORCE_INLINE bool hasBits()
            {
                PX_ASSERT(mIndex<mWordCount);
                while (mBlock == 0)
                {
                    if (++mIndex == mWordCount)
                        return false;
                    mBlock = mMap[mIndex];
                }
                return true;
            }

            PX_FORCE_INLINE PxU32 getNext()
            {
                PX_ASSERT(mIndex<mWordCount && mBlock != 0);
                PxU32 result = PxU32(mIndex) << 5 | PxLowestSetBit(mBlock); // will assert if mask is zero
                mBlock &= (mBlock - 1);
                return result;
            }

        private:
            const PxU32*const mMap;
            PxU32 mBlock;       // the word we're currently scanning
            PxI32 mIndex;       // the index of the word we're currently looking at
            PxI32 mWordCount;
        };

        //Class to iterate over the bitmap from a particular start location rather than the beginning of the list
        class PxCircularIterator
        {
        public:
            static const PxU32 DONE = 0xffffffff;

            PX_INLINE PxCircularIterator(const PxBitMapBase &map, PxU32 index) : mBitMap(map)
            {
                PxU32 localIndex = 0;
                PxU32 startIndex = 0;

                const PxU32 wordCount = mBitMap.getWordCount();
                if((index << 5) < wordCount)
                {
                    localIndex = index << 5;
                    startIndex = localIndex;
                }

                PxU32 block = 0;
                if(localIndex < wordCount)
                {
                    block = mBitMap.mMap[localIndex];
                    if(block == 0)
                    {
                        localIndex = (localIndex + 1) % wordCount;
                        while(localIndex != startIndex && (block = mBitMap.mMap[localIndex]) == 0)
                            localIndex = (localIndex + 1) % wordCount;
                    }
                }

                mIndex = localIndex;
                mBlock = block;
                mStartIndex = startIndex;
            }

            PX_INLINE PxU32 getNext()
            {
                if(mBlock)
                {
                    PxU32 index = mIndex;
                    PxU32 block = mBlock;
                    const PxU32 startIndex = mStartIndex;

                    PxU32 bitIndex = index << 5 | PxLowestSetBit(block);
                    block &= block - 1;
                    PxU32 wordCount = mBitMap.getWordCount();
                    while (!block && (index = ((index + 1) % wordCount)) != startIndex)
                        block = mBitMap.mMap[index];

                    mIndex = index;
                    mBlock = block;

                    return bitIndex;
                }
                return DONE;
            }

        private:
            PxU32 mBlock, mIndex;
            PxU32 mStartIndex;
            const PxBitMapBase& mBitMap;

            PX_NOCOPY(PxCircularIterator)
        };

    protected:
        PxU32*      mMap;           //one bit per index
        PxU32       mWordCount;
        PxAllocator mAllocator;
        PxU8        mPadding[3];    // PT: "mAllocator" is empty but consumes 1 byte

        void extend(PxU32 size)
        {
            const PxU32 newWordCount = (size + 31) >> 5;
            if (newWordCount > getWordCount())
            {
                PxU32* newMap = reinterpret_cast<PxU32*>(mAllocator.allocate(newWordCount * sizeof(PxU32), PX_FL));
                if (mMap)
                {
                    PxMemCopy(newMap, mMap, getWordCount() * sizeof(PxU32));
                    if (!isInUserMemory())
                        mAllocator.deallocate(mMap);
                }
                PxMemSet(newMap + getWordCount(), 0, (newWordCount - getWordCount()) * sizeof(PxU32));
                mMap = newMap;
                // also resets the isInUserMemory bit
                mWordCount = newWordCount;
            }
        }

        void extendUninitialized(PxU32 size)
        {
            PxU32 newWordCount = (size + 31) >> 5;
            if (newWordCount > getWordCount())
            {
                if (mMap && !isInUserMemory())
                    mAllocator.deallocate(mMap);
                // also resets the isInUserMemory bit
                mWordCount = newWordCount;
                mMap = reinterpret_cast<PxU32*>(mAllocator.allocate(mWordCount * sizeof(PxU32), PX_FL));
            }
        }

        template<class Combiner>
        void combine1(const PxU32* words, PxU32 length)
        {
            extend(length << 5);
            PxU32 combineLength = PxMin(getWordCount(), length);
            for (PxU32 i = 0; i<combineLength; i++)
                mMap[i] = Combiner()(mMap[i], words[i]);
        }

        template<class Combiner>
        void combine2(const PxU32* words1, PxU32 length1,
            const PxU32* words2, PxU32 length2)
        {
            extendUninitialized(PxMax(length1, length2) << 5);

            PxU32 commonSize = PxMin(length1, length2);

            for (PxU32 i = 0; i<commonSize; i++)
                mMap[i] = Combiner()(words1[i], words2[i]);

            for (PxU32 i = commonSize; i<length1; i++)
                mMap[i] = Combiner()(words1[i], 0);

            for (PxU32 i = commonSize; i<length2; i++)
                mMap[i] = Combiner()(0, words2[i]);
        }

        friend class Iterator;
    };

    typedef PxBitMapBase<PxAllocator> PxBitMap;
    typedef PxBitMapBase<PxVirtualAllocator> PxBitMapPinned;
#if !PX_DOXYGEN
} // namespace physx
#endif

#endif