Repository for OpenCV's extra modules
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

77 lines
2.4 KiB

#ifndef TILING_KERNEL_H
#define TILING_KERNEL_H
#ifdef __CCE_KT_TEST__
#define __aicore__
#else
#define __aicore__ [aicore]
#endif
inline __aicore__ int32_t AlignNCeil(int32_t n, int32_t align) { return ((n + align) & ~(align-1)); }
inline __aicore__ int32_t AlignNFloor(int32_t n, int32_t align) { return (n & ~(align-1)); }
constexpr int32_t BUFFER_NUM = 2;
constexpr int32_t UB_BUF_LEN = 248 * 1024;
struct VectorTiling {
__aicore__ inline void calculate(uint64_t _totalLength, uint64_t _blockNum,
uint64_t _blockIdx, uint64_t _variableBytesPerElem, uint32_t _align) {
totalLength = _totalLength;
blockNum = _blockNum;
blockIdx = _blockIdx;
variableBytesPerElem = _variableBytesPerElem;
blockLength = 0;
blockOffset = 0;
align = _align;
GetBlockLengthAndOffset();
GetLoopLengthAndCount();
#ifdef __CCE_KT_TEST__
std::cout << "Block(" << blockIdx << "): BlockLength = " << blockLength
<< ", BlockOffset = " << blockOffset
<< ", LoopLength = " << loopLength
<< ", LoopCount = " << loopCount
<< ", LoopTailLength = " << loopTailLength << std::endl;
#endif
}
__aicore__ inline void GetBlockLengthAndOffset() {
// Data should Align by 32B.
uint32_t fullBlockLength = AlignNCeil(totalLength / blockNum, 32);
// Some core may get no data after Align32 Ceil.
uint32_t fullBlockNum = totalLength / fullBlockLength;
uint32_t blockTailLength = totalLength % fullBlockLength;
if (blockIdx < fullBlockNum) {
blockLength = fullBlockLength;
blockOffset = blockIdx * blockLength;
// Last block must less than full block num.
} else if (blockTailLength != 0 && blockIdx == fullBlockNum) {
blockLength = blockTailLength;
blockOffset = blockIdx * fullBlockLength;
}
}
/**
* @brief Get length for one loop and loop count.
* Use as much UB buf as possible.
*/
__aicore__ inline void GetLoopLengthAndCount() {
loopLength = AlignNFloor(UB_BUF_LEN / variableBytesPerElem, align);
loopCount = blockLength / loopLength;
loopTailLength = blockLength - (loopLength * loopCount);
}
uint64_t totalLength;
uint64_t blockNum;
uint64_t blockIdx;
uint64_t variableBytesPerElem;
uint32_t blockLength;
uint32_t blockOffset;
uint32_t loopLength;
uint32_t loopCount;
uint32_t loopTailLength;
uint32_t align;
};
#endif // TILING_KERNEL_H