From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001
From: Randy MacLeod <Randy.MacLeod@windriver.com>
Date: Wed, 26 Apr 2017 15:00:32 -0400
Subject: [PATCH 2/2] Revert "check FP16 build condition correctly"

This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13.

Fix up revert conflicts to take previous behaviour.
---
 cmake/OpenCVCompilerOptions.cmake         | 45 +++++++++--------------
 modules/core/include/opencv2/core/cvdef.h |  2 +-
 modules/core/src/convert.cpp              | 11 +++---
 modules/core/test/test_intrin.cpp         | 60 ++++++++++++++-----------------
 4 files changed, 48 insertions(+), 70 deletions(-)

diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake
index 5bb0479..4b19fdb 100644
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
     add_extra_compiler_option("-mfp16-format=ieee")
   endif(ARM)
   if(ENABLE_NEON)
-    add_extra_compiler_option("-mfpu=neon")
+    add_extra_compiler_option("-mfpu=neon-fp16")
   endif()
   if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
     add_extra_compiler_option("-mfpu=vfpv3")
@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
   add_extra_compiler_option(-fvisibility-inlines-hidden)
 endif()
 
-if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
-  if(ARM AND ENABLE_NEON)
-    set(FP16_OPTION "-mfpu=neon-fp16")
-  elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
-    set(FP16_OPTION "-mf16c")
-  endif()
-  try_compile(__VALID_FP16
-    "${OpenCV_BINARY_DIR}"
-    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
-    COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
-    OUTPUT_VARIABLE TRY_OUT
-    )
-  if(NOT __VALID_FP16)
-    if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
-      # GCC enables AVX when mf16c is passed
-      message(STATUS "FP16: Feature disabled")
-    else()
-      message(STATUS "FP16: Compiler support is not available")
-    endif()
-  else()
-    message(STATUS "FP16: Compiler support is available")
-    set(HAVE_FP16 1)
-    if(NOT ${FP16_OPTION} STREQUAL "")
-      add_extra_compiler_option(${FP16_OPTION})
-    endif()
-  endif()
-endif()
-
 #combine all "extra" options
 set(CMAKE_C_FLAGS           "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
 set(CMAKE_CXX_FLAGS         "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
@@ -450,6 +422,21 @@ if(MSVC)
   endif()
 endif()
 
+if(NOT OPENCV_FP16_DISABLE)
+  try_compile(__VALID_FP16
+    "${OpenCV_BINARY_DIR}"
+    "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
+    COMPILE_DEFINITIONS "-DCHECK_FP16"
+    OUTPUT_VARIABLE TRY_OUT
+    )
+  if(NOT __VALID_FP16)
+    message(STATUS "FP16: Compiler support is not available")
+  else()
+    message(STATUS "FP16: Compiler support is available")
+    set(HAVE_FP16 1)
+  endif()
+endif()
+
 if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
   link_directories("/usr/local/lib")
 endif()
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
index efc24ca..a10936b 100644
--- a/modules/core/include/opencv2/core/cvdef.h
+++ b/modules/core/include/opencv2/core/cvdef.h
@@ -312,7 +312,7 @@ enum CpuFeatures {
 typedef union Cv16suf
 {
     short i;
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
     __fp16 h;
 #endif
     struct _fp16Format
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index e04d89e..46db26f 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -44,7 +44,6 @@
 #include "precomp.hpp"
 
 #include "opencl_kernels_core.hpp"
-#include "opencv2/core/hal/intrin.hpp"
 
 #include "opencv2/core/openvx/ovx_defs.hpp"
 
@@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int>
 
 #endif
 
-#if !CV_FP16_TYPE
+#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
 // const numbers for floating points format
 const unsigned int kShiftSignificand    = 13;
 const unsigned int kMaskFp16Significand = 0x3ff;
@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent    = 15;
 const unsigned int kBiasFp32Exponent    = 127;
 #endif
 
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
 static float convertFp16SW(short fp16)
 {
     // Fp16 -> Fp32
@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16)
 }
 #endif
 
-#if CV_FP16_TYPE
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
 static short convertFp16SW(float fp32)
 {
     // Fp32 -> Fp16
@@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
             if ( ( (intptr_t)dst & 0xf ) == 0 )
 #endif
             {
-#if CV_FP16 && CV_SIMD128
+#if CV_FP16
                 for ( ; x <= size.width - 4; x += 4)
                 {
                     v_float32x4 v_src = v_load(src + x);
@@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
             if ( ( (intptr_t)src & 0xf ) == 0 )
 #endif
             {
-#if CV_FP16 && CV_SIMD128
+#if CV_FP16
                 for ( ; x <= size.width - 4; x += 4)
                 {
                     v_float16x4 v_src = v_load_f16(src + x);
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp
index 66b2083..7349d48 100644
--- a/modules/core/test/test_intrin.cpp
+++ b/modules/core/test/test_intrin.cpp
@@ -729,56 +729,48 @@ template<typename R> struct TheTest
         return *this;
     }
 
+#if CV_FP16
     TheTest & test_loadstore_fp16()
     {
-#if CV_FP16
         AlignedData<R> data;
         AlignedData<R> out;
 
-        if(checkHardwareSupport(CV_CPU_FP16))
-        {
-            // check if addresses are aligned and unaligned respectively
-            EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
-            EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
-            EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
-            EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
-
-            // check some initialization methods
-            R r1 = data.u;
-            R r2 = v_load_f16(data.a.d);
-            R r3(r2);
-            EXPECT_EQ(data.u[0], r1.get0());
-            EXPECT_EQ(data.a[0], r2.get0());
-            EXPECT_EQ(data.a[0], r3.get0());
-
-            // check some store methods
-            out.a.clear();
-            v_store_f16(out.a.d, r1);
-            EXPECT_EQ(data.a, out.a);
-        }
+        // check if addresses are aligned and unaligned respectively
+        EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
+        EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
+        EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
+
+        // check some initialization methods
+        R r1 = data.u;
+        R r2 = v_load_f16(data.a.d);
+        R r3(r2);
+        EXPECT_EQ(data.u[0], r1.get0());
+        EXPECT_EQ(data.a[0], r2.get0());
+        EXPECT_EQ(data.a[0], r3.get0());
+
+        // check some store methods
+        out.a.clear();
+        v_store_f16(out.a.d, r1);
+        EXPECT_EQ(data.a, out.a);
 
         return *this;
-#endif
     }
 
     TheTest & test_float_cvt_fp16()
     {
-#if CV_FP16
         AlignedData<v_float32x4> data;
 
-        if(checkHardwareSupport(CV_CPU_FP16))
-        {
-            // check conversion
-            v_float32x4 r1 = v_load(data.a.d);
-            v_float16x4 r2 = v_cvt_f16(r1);
-            v_float32x4 r3 = v_cvt_f32(r2);
-            EXPECT_EQ(0x3c00, r2.get0());
-            EXPECT_EQ(r3.get0(), r1.get0());
-        }
+        // check conversion
+        v_float32x4 r1 = v_load(data.a.d);
+        v_float16x4 r2 = v_cvt_f16(r1);
+        v_float32x4 r3 = v_cvt_f32(r2);
+        EXPECT_EQ(0x3c00, r2.get0());
+        EXPECT_EQ(r3.get0(), r1.get0());
 
         return *this;
-#endif
     }
+#endif
 
 };
 
-- 
2.9.3

