Tom Stellard 17ec3a51c3 Implement fast_normalize builtin v4
This implementation was ported from the AMD builtin library
and has been tested with piglit, OpenCV, and the ocl conformance tests.

v2:
  - Remove f suffix from constant in double implementations.
  - Consolidate implementations using the .cl/.inc approach.

v3:
 - Use __CLC_FPSIZE instead of __CLC_FP{32,64}

v4 (Jan Vesely):
 - Limit to single precision.

llvm-svn: 236920
2015-05-09 00:04:12 +00:00

56 lines
893 B
C++

#define __CLC_FLOAT float
#define __CLC_FPSIZE 32
#define __CLC_FLOATN float
#define __CLC_SCALAR
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_SCALAR
#define __CLC_FLOATN float2
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN float3
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN float4
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_FLOAT
#undef __CLC_FPSIZE
#ifndef __FLOAT_ONLY
#ifdef cl_khr_fp64
#define __CLC_FLOAT double
#define __CLC_FPSIZE 64
#define __CLC_FLOATN double
#define __CLC_SCALAR
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_SCALAR
#define __CLC_FLOATN double2
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN double3
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN double4
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_FLOAT
#undef __CLC_FPSIZE
#endif
#endif
#undef __CLC_BODY