Skip to content

Commit

Permalink
Merge pull request #196 from K-os/prepare_v300-rc2
Browse files Browse the repository at this point in the history
Prepare v3.0.0-rc2
  • Loading branch information
adamjw24 authored Sep 27, 2024
2 parents 7eeb48e + 35e919a commit f2d83c2
Show file tree
Hide file tree
Showing 214 changed files with 33,572 additions and 2,449 deletions.
46 changes: 3 additions & 43 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ cmake_policy( VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} )
# project name
project( vvdec VERSION 3.0.0 )
# set alternative version numbering for release candidates
set( PROJECT_VERSION_RC rc1 )
set( PROJECT_VERSION_RC rc2 )
if( PROJECT_VERSION_RC )
set( PROJECT_VERSION "${PROJECT_VERSION}-${PROJECT_VERSION_RC}" )
endif()
Expand All @@ -24,46 +24,9 @@ message( STATUS "CMAKE_MODULE_PATH: updating module path to: ${CMAKE_MODULE_PATH
# message(STATUS "${_variableName}=${${_variableName}}")
# endforeach()

function( append_cpu_type_guess output_list input_str )
set( ret ${${output_list}} )

string( TOLOWER "${input_str}" input_lower )
if( ${input_lower} MATCHES "x86\|i386\|x64\|win32\|amd64" )
list( APPEND ret "X86" )
elseif( ${input_lower} MATCHES "aarch64\|arm")
list( APPEND ret "ARM" )
endif()

set( ${output_list} ${ret} PARENT_SCOPE )
endfunction()

# try to detect the actual target architecture
if( ${CMAKE_SYSTEM_NAME} STREQUAL "Emscripten" )
message( DEBUG "Emscripten" )
# Emscripten doesn't set the CMAKE_SYSTEM_PROCESSOR
list( PREPEND vvdec_target_arch_list "WASM" )
elseif( NOT CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR )
message( DEBUG "sys != host ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR} " )
# cross compiling: CMAKE_SYSTEM_PROCESSOR was set explicitly, so we use that as first guess
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_SYSTEM_PROCESSOR}" )
endif()

# build list of architectures in order of probability
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_VS_PLATFORM_NAME}" )
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_OSX_ARCHITECTURES}" )
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_CXX_COMPILER_ARCHITECTURE_ID}" ) # set by msvc, wen not using msbuild
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_ANDROID_ARCH_ABI}" )
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_C_LIBRARY_ARCHITECTURE}" )
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_SYSTEM_PROCESSOR}" )
append_cpu_type_guess( vvdec_target_arch_list "${CMAKE_HOST_SYSTEM_PROCESSOR}" )
list( APPEND vvdec_target_arch_list "UNKNOWN" ) # no architecture for which we have specific optimizations
message( DEBUG "vvdec_target_arch_list: ${vvdec_target_arch_list}" )

# get most probable architecture
list( POP_FRONT vvdec_target_arch_list VVDEC_TARGET_ARCH )
message( STATUS "normalized target architecture: ${VVDEC_TARGET_ARCH}" )
unset( vvdec_target_arch_list )
include( vvdecCompilerSupport )

detect_target_architecture( VVDEC_TARGET_ARCH )
if( VVDEC_TARGET_ARCH STREQUAL "ARM" )
set( VVDEC_ARM_SIMD_DEFAULT TRUE )
endif()
Expand All @@ -74,11 +37,8 @@ set( VVDEC_ENABLE_ARM_SIMD ${VVDEC_ARM_SIMD_DEFAULT} CACHE BOOL "enable ARM in

set( VVDEC_ENABLE_TRACING FALSE CACHE BOOL "Compile in tracing functionality" )

include( vvdecCompilerSupport )

# enable sse4.1 build for all source files for gcc and clang
if( VVDEC_ENABLE_X86_SIMD )

if( UNIX OR MINGW )
# when building for non-x86, but emulating simd using simd-everywhere (e.g. on ARM),
# the x86-compiler flags are not understood by the compiler
Expand Down
43 changes: 43 additions & 0 deletions cmake/modules/vvdecCompilerSupport.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,46 @@ function( _emscripten_enable_wasm_simd128 )
set( CMAKE_REQUIRED_FLAGS -msimd128 PARENT_SCOPE )
endif()
endfunction()

function( detect_target_architecture output_var )
# try to detect the actual target architecture
if( ${CMAKE_SYSTEM_NAME} STREQUAL "Emscripten" )
message( DEBUG "Emscripten" )
# Emscripten doesn't set the CMAKE_SYSTEM_PROCESSOR
list( PREPEND target_arch_list "WASM" )
elseif( NOT CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR )
message( DEBUG "sys != host ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR} " )
# cross compiling: CMAKE_SYSTEM_PROCESSOR was set explicitly, so we use that as first guess
_append_cpu_type_guess( target_arch_list "${CMAKE_SYSTEM_PROCESSOR}" )
endif()

# build list of architectures in order of probability
_append_cpu_type_guess( target_arch_list "${CMAKE_VS_PLATFORM_NAME}" )
_append_cpu_type_guess( target_arch_list "${CMAKE_OSX_ARCHITECTURES}" )
_append_cpu_type_guess( target_arch_list "${CMAKE_CXX_COMPILER_ARCHITECTURE_ID}" ) # set by msvc, wen not using msbuild
_append_cpu_type_guess( target_arch_list "${CMAKE_ANDROID_ARCH_ABI}" )
_append_cpu_type_guess( target_arch_list "${CMAKE_C_LIBRARY_ARCHITECTURE}" )
_append_cpu_type_guess( target_arch_list "${CMAKE_SYSTEM_PROCESSOR}" )
_append_cpu_type_guess( target_arch_list "${CMAKE_HOST_SYSTEM_PROCESSOR}" )
list( APPEND target_arch_list "UNKNOWN" ) # no architecture for which we have specific optimizations
message( DEBUG "target_arch_list: ${target_arch_list}" )

# get most probable architecture
list( POP_FRONT target_arch_list detected_arch )
message( STATUS "normalized target architecture: ${detected_arch}" )

set( ${output_var} "${detected_arch}" PARENT_SCOPE )
endfunction()

function( _append_cpu_type_guess output_list input_str )
set( ret ${${output_list}} )

string( TOLOWER "${input_str}" input_lower )
if( ${input_lower} MATCHES "x86\|i386\|x64\|win32\|amd64" )
list( APPEND ret "X86" )
elseif( ${input_lower} MATCHES "aarch64\|arm")
list( APPEND ret "ARM" )
endif()

set( ${output_list} ${ret} PARENT_SCOPE )
endfunction()
2 changes: 1 addition & 1 deletion include/vvdec/sei.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ typedef struct vvdecSEIDecodedPictureHash
{
vvdecHashType method;
bool singleCompFlag;
int digist_length;
int digest_length;
unsigned char digest[16*3];
}vvdecSEIDecodedPictureHash;

Expand Down
46 changes: 31 additions & 15 deletions include/vvdec/vvdec.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,21 @@ typedef enum
VVDEC_DETAILS = 6
} vvdecLogLevel;

/*
\enum PicHashError
Result of the Decoded Picture hash verification.
When subpictures are present, and no DPH-SEI covering the full picture is available, but only DPHs
for some subpictures are available, VVDEC_DPH_NOT_VERIFIED will be signalled instead of VVDEC_DPH_OK,
to signify that parts of the picture could not be verified.
*/
typedef enum
{
VVDEC_DPH_NOT_VERIFIED = -1, // either no Decoded Picture Hash SEI was available or verification was disabled (default)
VVDEC_DPH_OK = 0, // the verification for hash for the Picture was successful
VVDEC_DPH_MISMATCH = 1, // the verification of Decoded Picture Hash produced an error
} vvdecPicHashError;

/*
\enum SIMD_Extension
The enum SIMD_Extension enumerates the supported simd optimizations.
Expand Down Expand Up @@ -389,21 +404,22 @@ typedef struct vvdecSeqInfo
*/
typedef struct vvdecPicAttributes
{
vvdecNalType nalType; // nal unit type
vvdecSliceType sliceType; // slice type (I/P/B) */
bool isRefPic; // reference picture
uint32_t temporalLayer; // temporal layer
int64_t poc; // picture order count
uint32_t bits; // bits of the compr. image packet
vvdecVui *vui; // if available, pointer to VUI (Video Usability Information)
vvdecHrd *hrd; // if available, pointer to HRD (Hypothetical Reference Decoder)
vvdecOlsHrd *olsHrd; // if available, pointer to OLS HRD (Output Layer Set Hypothetical Reference Decoder)
vvdecSeqInfo *seqInfo; // if available, pointer to some data extracted from the SPS (Sequence Parameter Set)

void* reservedPtr_1; // reserved space for future use
void* reservedPtr_2; // ...
int64_t reserved_1; // ...
int64_t reserved_2; // ...
vvdecNalType nalType; // nal unit type
vvdecSliceType sliceType; // slice type (I/P/B) */
bool isRefPic; // reference picture
uint32_t temporalLayer; // temporal layer
int64_t poc; // picture order count
uint32_t bits; // bits of the compr. image packet
vvdecVui* vui; // if available, pointer to VUI (Video Usability Information)
vvdecHrd* hrd; // if available, pointer to HRD (Hypothetical Reference Decoder)
vvdecOlsHrd* olsHrd; // if available, pointer to OLS HRD (Output Layer Set Hypothetical Reference Decoder)
vvdecSeqInfo* seqInfo; // if available, pointer to some data extracted from the SPS (Sequence Parameter Set)
vvdecPicHashError picHashError; // result of the decoded picture hash verification if enabled

void* reservedPtr_1; // reserved space for future use
void* reservedPtr_2; // ...
int64_t reserved_1; // ...
int64_t reserved_2; // ...
} vvdecPicAttributes;

/*
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/PicYuvMD5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ std::string hashToString(const vvdecSEIDecodedPictureHash* digest, int numChar)

CHECK_FATAL(numChar<=0, "numChar needs to be >0");

for(int pos=0; pos<int(digest->digist_length); pos++)
for(int pos=0; pos<int(digest->digest_length); pos++)
{
if ((pos % numChar) == 0 && pos!=0 )
{
Expand Down
1 change: 1 addition & 0 deletions source/Lib/CommonLib/Picture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ void Picture::resetForUse( int _layerId )

picCheckedDPH = false;
subpicsCheckedDPH.clear();
dphMismatch = false;

lockedByApplication = false;

Expand Down
1 change: 1 addition & 0 deletions source/Lib/CommonLib/Picture.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ struct Picture : public UnitArea

bool picCheckedDPH = false;
std::vector<bool> subpicsCheckedDPH;
bool dphMismatch = false;

// As long as this field is true, the picture will not be reused or deleted.
// An external application needs to call DecLib::releasePicture(), when it is done using the picture buffer.
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/SEI_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ struct PictureHash

bool equal( vvdecSEIDecodedPictureHash digest ) const
{
if ((size_t)digest.digist_length != hash.size())
if ((size_t)digest.digest_length != hash.size())
{
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ void simdDeriveClassificationBlk<AVX2>(AlfClassifier *classifier, const CPelBuf
const uint32_t scale = ( z == vbPos - 4 || z == vbPos ) ? 96 : 64;
const uint32_t scale2 = ( z2 == vbPos - 4 || z2 == vbPos ) ? 96 : 64;
__m256i activity = _mm256_mullo_epi32(tempAct, _mm256_unpacklo_epi64(_mm256_set1_epi32(scale), _mm256_set1_epi32(scale2)));
activity = _mm256_srli_epi32(activity, shift);
activity = _mm256_srl_epi32(activity, _mm_cvtsi32_si128(shift));
activity = _mm256_min_epi32(activity, _mm256_set1_epi32(15));
__m256i classIdx = _mm256_shuffle_epi8(_mm256_setr_epi8(0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4), activity);

Expand Down
8 changes: 4 additions & 4 deletions source/Lib/CommonLib/x86/BufferX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,13 @@ void addWghtAvg_SSE( const int16_t* src0, ptrdiff_t src0Stride, const int16_t* s

template<bool doShift, bool shiftR, typename T> static inline void do_shift( T &vreg, int num );
#if USE_AVX2
template<> inline void do_shift<true, true , __m256i>( __m256i &vreg, int num ) { vreg = _mm256_srai_epi32( vreg, num ); }
template<> inline void do_shift<true, false, __m256i>( __m256i &vreg, int num ) { vreg = _mm256_slli_epi32( vreg, num ); }
template<> inline void do_shift<true, true , __m256i>( __m256i &vreg, int num ) { vreg = _mm256_sra_epi32( vreg, _mm_cvtsi32_si128( num ) ); }
template<> inline void do_shift<true, false, __m256i>( __m256i &vreg, int num ) { vreg = _mm256_sll_epi32( vreg, _mm_cvtsi32_si128( num ) ); }
template<> inline void do_shift<false, true , __m256i>( __m256i &vreg, int num ) { }
template<> inline void do_shift<false, false, __m256i>( __m256i &vreg, int num ) { }
#endif
template<> inline void do_shift<true, true , __m128i>( __m128i &vreg, int num ) { vreg = _mm_srai_epi32( vreg, num ); }
template<> inline void do_shift<true, false, __m128i>( __m128i &vreg, int num ) { vreg = _mm_slli_epi32( vreg, num ); }
template<> inline void do_shift<true, true , __m128i>( __m128i &vreg, int num ) { vreg = _mm_sra_epi32( vreg, _mm_cvtsi32_si128( num ) ); }
template<> inline void do_shift<true, false, __m128i>( __m128i &vreg, int num ) { vreg = _mm_sll_epi32( vreg, _mm_cvtsi32_si128( num ) ); }
template<> inline void do_shift<false, true , __m128i>( __m128i &vreg, int num ) { }
template<> inline void do_shift<false, false, __m128i>( __m128i &vreg, int num ) { }

Expand Down
7 changes: 4 additions & 3 deletions source/Lib/CommonLib/x86/InterPredX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,29 +63,30 @@ inline void PaddBIO_SIMD( const Pel* refPel, Pel* dstPel, unsigned width, const
{
int w;
__m128i off = _mm_set1_epi16( ( Pel ) IF_INTERNAL_OFFS );
__m128i vshift = _mm_cvtsi32_si128( shift );

if( width > 4 )
{
for( w = 0; w < width; w += 8 )
{

__m128i ref = _mm_lddqu_si128( ( __m128i const * )&refPel[w] );
ref = _mm_slli_epi16( ref, shift );
ref = _mm_sll_epi16( ref, vshift );
ref = _mm_sub_epi16( ref, off );
_mm_storeu_si128( ( __m128i * )&dstPel[w], ref );

}
//2 * BIO_EXTEND_SIZE
__m128i ref = _mm_lddqu_si128( ( __m128i const * )&refPel[w] );
ref = _mm_slli_epi16( ref, shift );
ref = _mm_sll_epi16( ref, vshift );
ref = _mm_sub_epi16( ref, off );
_mm_storeu_si32( ( __m128i * )&dstPel[w], ref );

}
else
{
__m128i ref = _mm_lddqu_si128( ( __m128i const * )&refPel[0] );
ref = _mm_slli_epi16( ref, shift );
ref = _mm_sll_epi16( ref, vshift );
ref = _mm_sub_epi16( ref, off );
_mm_storeu_si64( ( __m128i * )&dstPel[0], ref );
ref = _mm_srli_si128( ref, 8 );
Expand Down
24 changes: 14 additions & 10 deletions source/Lib/CommonLib/x86/InterpolationFilterX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ static void fullPelCopySSE( const ClpRng& clpRng, const void*_src, ptrdiff_t src
int offset = IF_INTERNAL_OFFS;
__m128i voffset = _mm_set1_epi16( offset );
__m128i voffset_headroom = _mm_set1_epi16( headroom_offset );
__m128i vheadroom = _mm_cvtsi32_si128( headroom );
__m128i vmin = _mm_set1_epi16( clpRng.min() );
__m128i vmax = _mm_set1_epi16( clpRng.max() );

Expand Down Expand Up @@ -108,14 +109,14 @@ static void fullPelCopySSE( const ClpRng& clpRng, const void*_src, ptrdiff_t src
}
else if( isFirst )
{
vsrc = _mm_slli_epi16( vsrc, headroom );
vsrc = _mm_sll_epi16( vsrc, vheadroom );
vsum = _mm_sub_epi16( vsrc, voffset );
}
else
{
vsrc = _mm_add_epi16( vsrc, voffset );
vsrc = _mm_add_epi16( vsrc, voffset_headroom );
vsrc = _mm_srai_epi16( vsrc, headroom );
vsrc = _mm_sra_epi16( vsrc, vheadroom );
vsum = vsrc;

if( isLast )
Expand All @@ -142,6 +143,7 @@ static void fullPelCopySSE_M4( const ClpRng& clpRng, const void*_src, ptrdiff_t
int offset = IF_INTERNAL_OFFS;
__m128i voffset = _mm_set1_epi16( offset );
__m128i voffset_headroom = _mm_set1_epi16( headroom_offset );
__m128i vheadroom = _mm_cvtsi32_si128( headroom );
__m128i vmin = _mm_set1_epi16( clpRng.min() );
__m128i vmax = _mm_set1_epi16( clpRng.max() );

Expand Down Expand Up @@ -170,14 +172,14 @@ static void fullPelCopySSE_M4( const ClpRng& clpRng, const void*_src, ptrdiff_t
}
else if( isFirst )
{
vsrc = _mm_slli_epi16( vsrc, headroom );
vsrc = _mm_sll_epi16( vsrc, vheadroom );
vsum = _mm_sub_epi16( vsrc, voffset );
}
else
{
vsrc = _mm_add_epi16( vsrc, voffset );
vsrc = _mm_add_epi16( vsrc, voffset_headroom );
vsrc = _mm_srai_epi16( vsrc, headroom );
vsrc = _mm_sra_epi16( vsrc, vheadroom );
vsum = vsrc;

if( isLast )
Expand Down Expand Up @@ -207,6 +209,7 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, ptrdiff_t sr
__m256i vheadroom_offset = _mm256_set1_epi16( offset );
__m256i vmin = _mm256_set1_epi16( clpRng.min() );
__m256i vmax = _mm256_set1_epi16( clpRng.max() );
__m128i vheadroom = _mm_cvtsi32_si128( headroom );

__m256i vsrc, vsum;

Expand Down Expand Up @@ -235,14 +238,14 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, ptrdiff_t sr
}
else if( isFirst )
{
vsrc = _mm256_slli_epi16( vsrc, headroom );
vsrc = _mm256_sll_epi16( vsrc, vheadroom );
vsum = _mm256_sub_epi16( vsrc, vinternal_offset );
}
else
{
vsrc = _mm256_add_epi16( vsrc, vinternal_offset );
vsrc = _mm256_add_epi16( vsrc, vheadroom_offset );
vsrc = _mm256_srai_epi16( vsrc, headroom );
vsrc = _mm256_sra_epi16( vsrc, vheadroom );
vsum = vsrc;

if( isLast )
Expand All @@ -268,6 +271,7 @@ template<X86_VEXT vext>
void fullPelCopyDMVR_SSE( const int16_t* src, ptrdiff_t srcStride, int16_t* dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
{
const int shift = IF_INTERNAL_PREC_BILINEAR - clpRng.bd;
const __m128i vshift = _mm_cvtsi32_si128( shift );

CHECKD( shift < 0, "Only bit-depths of up to 10 supported!" );

Expand All @@ -289,13 +293,13 @@ void fullPelCopyDMVR_SSE( const int16_t* src, ptrdiff_t srcStride, int16_t* dst,
{
__m256i vmm;
vmm = _mm256_loadu_si256( ( const __m256i * ) &src[x] );
vmm = _mm256_slli_epi16 ( vmm, shift );
vmm = _mm256_sll_epi16 ( vmm, vshift );
_mm256_storeu_si256 ( ( __m256i * )&dst[x], vmm );
}

__m128i xmm;
xmm = _mm_loadu_si64 ( ( const __m128i * ) &src[x] );
xmm = _mm_slli_epi16 ( xmm, shift );
xmm = _mm_sll_epi16 ( xmm, vshift );
_mm_storeu_si64 ( ( __m128i * )&dst[x], xmm );

src += srcStride;
Expand All @@ -315,12 +319,12 @@ void fullPelCopyDMVR_SSE( const int16_t* src, ptrdiff_t srcStride, int16_t* dst,
for( ; x < width - 4; x += 8 )
{
xmm = _mm_loadu_si128( ( const __m128i * ) &src[x] );
xmm = _mm_slli_epi16 ( xmm, shift );
xmm = _mm_sll_epi16 ( xmm, vshift );
_mm_storeu_si128 ( ( __m128i * )&dst[x], xmm );
}

xmm = _mm_loadu_si64( ( const __m128i * ) &src[x] );
xmm = _mm_slli_epi16( xmm, shift );
xmm = _mm_sll_epi16 ( xmm, vshift );
_mm_storeu_si64 ( ( __m128i * )&dst[x], xmm );

src += srcStride;
Expand Down
Loading

0 comments on commit f2d83c2

Please sign in to comment.