1 // Copyright (C) 2002-2012 Nikolaus Gebhardt
\r
2 // This file is part of the "Irrlicht Engine".
\r
3 // For conditions of distribution and use, see copyright notice in irrlicht.h
\r
5 #ifndef __IRR_MATH_H_INCLUDED__
\r
6 #define __IRR_MATH_H_INCLUDED__
\r
8 #include "IrrCompileConfig.h"
\r
9 #include "irrTypes.h"
\r
12 #include <stdlib.h> // for abs() etc.
\r
13 #include <limits.h> // For INT_MAX / UINT_MAX
\r
15 #if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)
\r
16 #define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))
\r
17 #define sinf(X) (irr::f32)sin((irr::f64)(X))
\r
18 #define cosf(X) (irr::f32)cos((irr::f64)(X))
\r
19 #define asinf(X) (irr::f32)asin((irr::f64)(X))
\r
20 #define acosf(X) (irr::f32)acos((irr::f64)(X))
\r
21 #define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))
\r
22 #define ceilf(X) (irr::f32)ceil((irr::f64)(X))
\r
23 #define floorf(X) (irr::f32)floor((irr::f64)(X))
\r
24 #define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))
\r
25 #define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))
\r
26 #define fabsf(X) (irr::f32)fabs((irr::f64)(X))
\r
27 #define logf(X) (irr::f32)log((irr::f64)(X))
\r
31 #define FLT_MAX 3.402823466E+38F
\r
35 #define FLT_MIN 1.17549435e-38F
\r
43 //! Rounding error constant often used when comparing f32 values.
\r
45 const s32 ROUNDING_ERROR_S32 = 0;
\r
47 #ifdef __IRR_HAS_S64
\r
48 const s64 ROUNDING_ERROR_S64 = 0;
\r
50 const f32 ROUNDING_ERROR_f32 = 0.000001f;
\r
51 const f64 ROUNDING_ERROR_f64 = 0.00000001;
\r
53 #ifdef PI // make sure we don't collide with a define
\r
56 //! Constant for PI.
\r
57 const f32 PI = 3.14159265359f;
\r
59 //! Constant for reciprocal of PI.
\r
60 const f32 RECIPROCAL_PI = 1.0f/PI;
\r
62 //! Constant for half of PI.
\r
63 const f32 HALF_PI = PI/2.0f;
\r
65 #ifdef PI64 // make sure we don't collide with a define
\r
68 //! Constant for 64bit PI.
\r
69 const f64 PI64 = 3.1415926535897932384626433832795028841971693993751;
\r
71 //! Constant for 64bit reciprocal of PI.
\r
72 const f64 RECIPROCAL_PI64 = 1.0/PI64;
\r
74 //! 32bit Constant for converting from degrees to radians
\r
75 const f32 DEGTORAD = PI / 180.0f;
\r
77 //! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)
\r
78 const f32 RADTODEG = 180.0f / PI;
\r
80 //! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)
\r
81 const f64 DEGTORAD64 = PI64 / 180.0;
\r
83 //! 64bit constant for converting from radians to degrees
\r
84 const f64 RADTODEG64 = 180.0 / PI64;
\r
86 //! Utility function to convert a radian value to degrees
\r
87 /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
\r
88 \param radians The radians value to convert to degrees.
\r
90 inline f32 radToDeg(f32 radians)
\r
92 return RADTODEG * radians;
\r
95 //! Utility function to convert a radian value to degrees
\r
96 /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
\r
97 \param radians The radians value to convert to degrees.
\r
99 inline f64 radToDeg(f64 radians)
\r
101 return RADTODEG64 * radians;
\r
104 //! Utility function to convert a degrees value to radians
\r
105 /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
\r
106 \param degrees The degrees value to convert to radians.
\r
108 inline f32 degToRad(f32 degrees)
\r
110 return DEGTORAD * degrees;
\r
113 //! Utility function to convert a degrees value to radians
\r
114 /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
\r
115 \param degrees The degrees value to convert to radians.
\r
117 inline f64 degToRad(f64 degrees)
\r
119 return DEGTORAD64 * degrees;
\r
122 //! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)
\r
124 inline const T& min_(const T& a, const T& b)
\r
126 return a < b ? a : b;
\r
129 //! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)
\r
131 inline const T& min_(const T& a, const T& b, const T& c)
\r
133 return a < b ? min_(a, c) : min_(b, c);
\r
136 //! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)
\r
138 inline const T& max_(const T& a, const T& b)
\r
140 return a < b ? b : a;
\r
143 //! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)
\r
145 inline const T& max_(const T& a, const T& b, const T& c)
\r
147 return a < b ? max_(b, c) : max_(a, c);
\r
150 //! returns abs of two values. Own implementation to get rid of STL (VS6 problems)
\r
152 inline T abs_(const T& a)
\r
154 return a < (T)0 ? -a : a;
\r
157 //! returns linear interpolation of a and b with ratio t
\r
158 //! \return: a if t==0, b if t==1, and the linear interpolation else
\r
160 inline T lerp(const T& a, const T& b, const f32 t)
\r
162 return (T)(a*(1.f-t)) + (b*t);
\r
165 //! clamps a value between low and high
\r
167 inline const T clamp (const T& value, const T& low, const T& high)
\r
169 return min_ (max_(value,low), high);
\r
172 //! swaps the content of the passed parameters
\r
173 // Note: We use the same trick as boost and use two template arguments to
\r
174 // avoid ambiguity when swapping objects of an Irrlicht type that has not
\r
175 // it's own swap overload. Otherwise we get conflicts with some compilers
\r
176 // in combination with stl.
\r
177 template <class T1, class T2>
\r
178 inline void swap(T1& a, T2& b)
\r
186 inline T roundingError();
\r
189 inline f32 roundingError()
\r
191 return ROUNDING_ERROR_f32;
\r
195 inline f64 roundingError()
\r
197 return ROUNDING_ERROR_f64;
\r
201 inline s32 roundingError()
\r
203 return ROUNDING_ERROR_S32;
\r
207 inline u32 roundingError()
\r
209 return ROUNDING_ERROR_S32;
\r
212 #ifdef __IRR_HAS_S64
\r
214 inline s64 roundingError()
\r
216 return ROUNDING_ERROR_S64;
\r
220 inline u64 roundingError()
\r
222 return ROUNDING_ERROR_S64;
\r
227 inline T relativeErrorFactor()
\r
233 inline f32 relativeErrorFactor()
\r
239 inline f64 relativeErrorFactor()
\r
244 //! returns if a equals b, taking possible rounding errors into account
\r
246 inline bool equals(const T a, const T b, const T tolerance = roundingError<T>())
\r
248 return (a + tolerance >= b) && (a - tolerance <= b);
\r
252 //! returns if a equals b, taking relative error in form of factor
\r
253 //! this particular function does not involve any division.
\r
255 inline bool equalsRelative( const T a, const T b, const T factor = relativeErrorFactor<T>())
\r
257 //https://eagergames.wordpress.com/2017/04/01/fast-parallel-lines-and-vectors-test/
\r
259 const T maxi = max_( a, b);
\r
260 const T mini = min_( a, b);
\r
261 const T maxMagnitude = max_( maxi, -mini);
\r
263 return (maxMagnitude*factor + maxi) == (maxMagnitude*factor + mini); // MAD Wise
\r
266 union FloatIntUnion32
\r
268 FloatIntUnion32(float f1 = 0.0f) : f(f1) {}
\r
269 // Portable sign-extraction
\r
270 bool sign() const { return (i >> 31) != 0; }
\r
276 //! We compare the difference in ULP's (spacing between floating-point numbers, aka ULP=1 means there exists no float between).
\r
277 //\result true when numbers have a ULP <= maxUlpDiff AND have the same sign.
\r
278 inline bool equalsByUlp(f32 a, f32 b, int maxUlpDiff)
\r
280 // Based on the ideas and code from Bruce Dawson on
\r
281 // http://www.altdevblogaday.com/2012/02/22/comparing-floating-point-numbers-2012-edition/
\r
282 // When floats are interpreted as integers the two nearest possible float numbers differ just
\r
283 // by one integer number. Also works the other way round, an integer of 1 interpreted as float
\r
284 // is for example the smallest possible float number.
\r
286 const FloatIntUnion32 fa(a);
\r
287 const FloatIntUnion32 fb(b);
\r
289 // Different signs, we could maybe get difference to 0, but so close to 0 using epsilons is better.
\r
290 if ( fa.sign() != fb.sign() )
\r
292 // Check for equality to make sure +0==-0
\r
298 // Find the difference in ULPs.
\r
299 const int ulpsDiff = abs_(fa.i- fb.i);
\r
300 if (ulpsDiff <= maxUlpDiff)
\r
306 //! returns if a equals zero, taking rounding errors into account
\r
307 inline bool iszero(const f64 a, const f64 tolerance = ROUNDING_ERROR_f64)
\r
309 return fabs(a) <= tolerance;
\r
312 //! returns if a equals zero, taking rounding errors into account
\r
313 inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
\r
315 return fabsf(a) <= tolerance;
\r
318 //! returns if a equals not zero, taking rounding errors into account
\r
319 inline bool isnotzero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
\r
321 return fabsf(a) > tolerance;
\r
324 //! returns if a equals zero, taking rounding errors into account
\r
325 inline bool iszero(const s32 a, const s32 tolerance = 0)
\r
327 return ( a & 0x7ffffff ) <= tolerance;
\r
330 //! returns if a equals zero, taking rounding errors into account
\r
331 inline bool iszero(const u32 a, const u32 tolerance = 0)
\r
333 return a <= tolerance;
\r
336 #ifdef __IRR_HAS_S64
\r
337 //! returns if a equals zero, taking rounding errors into account
\r
338 inline bool iszero(const s64 a, const s64 tolerance = 0)
\r
340 return abs_(a) <= tolerance;
\r
344 inline s32 s32_min(s32 a, s32 b)
\r
346 const s32 mask = (a - b) >> 31;
\r
347 return (a & mask) | (b & ~mask);
\r
350 inline s32 s32_max(s32 a, s32 b)
\r
352 const s32 mask = (a - b) >> 31;
\r
353 return (b & mask) | (a & ~mask);
\r
356 inline s32 s32_clamp (s32 value, s32 low, s32 high)
\r
358 return s32_min(s32_max(value,low), high);
\r
362 float IEEE-754 bit representation
\r
370 +NaN 0x7fc00000 or 0x7ff00000
\r
371 in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)
\r
374 typedef union { u32 u; s32 s; f32 f; } inttofloat;
\r
376 #define F32_AS_S32(f) (*((s32 *) &(f)))
\r
377 #define F32_AS_U32(f) (*((u32 *) &(f)))
\r
378 #define F32_AS_U32_POINTER(f) ( ((u32 *) &(f)))
\r
380 #define F32_VALUE_0 0x00000000
\r
381 #define F32_VALUE_1 0x3f800000
\r
382 #define F32_SIGN_BIT 0x80000000U
\r
383 #define F32_EXPON_MANTISSA 0x7FFFFFFFU
\r
385 //! code is taken from IceFPU
\r
386 //! Integer representation of a floating-point value.
\r
387 #ifdef IRRLICHT_FAST_MATH
\r
388 #define IR(x) ((u32&)(x))
\r
390 inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}
\r
393 //! Absolute integer representation of a floating-point value
\r
394 #define AIR(x) (IR(x)&0x7fffffff)
\r
396 //! Floating-point representation of an integer value.
\r
397 #ifdef IRRLICHT_FAST_MATH
\r
398 #define FR(x) ((f32&)(x))
\r
400 inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}
\r
401 inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}
\r
404 //! integer representation of 1.0
\r
405 #define IEEE_1_0 0x3f800000
\r
406 //! integer representation of 255.0
\r
407 #define IEEE_255_0 0x437f0000
\r
409 #ifdef IRRLICHT_FAST_MATH
\r
410 #define F32_LOWER_0(f) (F32_AS_U32(f) > F32_SIGN_BIT)
\r
411 #define F32_LOWER_EQUAL_0(f) (F32_AS_S32(f) <= F32_VALUE_0)
\r
412 #define F32_GREATER_0(f) (F32_AS_S32(f) > F32_VALUE_0)
\r
413 #define F32_GREATER_EQUAL_0(f) (F32_AS_U32(f) <= F32_SIGN_BIT)
\r
414 #define F32_EQUAL_1(f) (F32_AS_U32(f) == F32_VALUE_1)
\r
415 #define F32_EQUAL_0(f) ( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
\r
418 #define F32_A_GREATER_B(a,b) (F32_AS_S32((a)) > F32_AS_S32((b)))
\r
422 #define F32_LOWER_0(n) ((n) < 0.0f)
\r
423 #define F32_LOWER_EQUAL_0(n) ((n) <= 0.0f)
\r
424 #define F32_GREATER_0(n) ((n) > 0.0f)
\r
425 #define F32_GREATER_EQUAL_0(n) ((n) >= 0.0f)
\r
426 #define F32_EQUAL_1(n) ((n) == 1.0f)
\r
427 #define F32_EQUAL_0(n) ((n) == 0.0f)
\r
428 #define F32_A_GREATER_B(a,b) ((a) > (b))
\r
434 #define REALINLINE __forceinline
\r
436 #define REALINLINE inline
\r
440 #if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
\r
442 // 8-bit bools in Borland builder
\r
444 //! conditional set based on mask and arithmetic shift
\r
445 REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )
\r
447 return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;
\r
450 //! conditional set based on mask and arithmetic shift
\r
451 REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )
\r
453 return ( -condition >> 31 ) & a;
\r
457 //! conditional set based on mask and arithmetic shift
\r
458 REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
\r
460 return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;
\r
463 //! conditional set based on mask and arithmetic shift
\r
464 REALINLINE u16 if_c_a_else_b ( const s16 condition, const u16 a, const u16 b )
\r
466 return ( ( -condition >> 15 ) & ( a ^ b ) ) ^ b;
\r
469 //! conditional set based on mask and arithmetic shift
\r
470 REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )
\r
472 return ( -condition >> 31 ) & a;
\r
477 if (condition) state |= m; else state &= ~m;
\r
479 REALINLINE void setbit_cond ( u32 &state, s32 condition, u32 mask )
\r
481 // 0, or any positive to mask
\r
482 //s32 conmask = -condition >> 31;
\r
483 state ^= ( ( -condition >> 31 ) ^ state ) & mask;
\r
486 // NOTE: This is not as exact as the c99/c++11 round function, especially at high numbers starting with 8388609
\r
487 // (only low number which seems to go wrong is 0.49999997 which is rounded to 1)
\r
488 // Also negative 0.5 is rounded up not down unlike with the standard function (p.E. input -0.5 will be 0 and not -1)
\r
489 inline f32 round_( f32 x )
\r
491 return floorf( x + 0.5f );
\r
494 // calculate: sqrt ( x )
\r
495 REALINLINE f32 squareroot(const f32 f)
\r
500 // calculate: sqrt ( x )
\r
501 REALINLINE f64 squareroot(const f64 f)
\r
506 // calculate: sqrt ( x )
\r
507 REALINLINE s32 squareroot(const s32 f)
\r
509 return static_cast<s32>(squareroot(static_cast<f32>(f)));
\r
512 #ifdef __IRR_HAS_S64
\r
513 // calculate: sqrt ( x )
\r
514 REALINLINE s64 squareroot(const s64 f)
\r
516 return static_cast<s64>(squareroot(static_cast<f64>(f)));
\r
520 // calculate: 1 / sqrt ( x )
\r
521 REALINLINE f64 reciprocal_squareroot(const f64 x)
\r
523 return 1.0 / sqrt(x);
\r
526 // calculate: 1 / sqrtf ( x )
\r
527 REALINLINE f32 reciprocal_squareroot(const f32 f)
\r
529 #if defined ( IRRLICHT_FAST_MATH )
\r
530 // NOTE: Unlike comment below says I found inaccuracies already at 4'th significant bit.
\r
531 // p.E: Input 1, expected 1, got 0.999755859
\r
533 #if defined(_MSC_VER) && !defined(_WIN64)
\r
534 // SSE reciprocal square root estimate, accurate to 12 significant
\r
535 // bits of the mantissa
\r
537 __asm rsqrtss xmm0, f // xmm0 = rsqrtss(f)
\r
538 __asm movss recsqrt, xmm0 // return xmm0
\r
542 // comes from Nvidia
\r
543 u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
\r
544 f32 y = *(f32*)&tmp;
\r
545 return y * (1.47f - 0.47f * x * y * y);
\r
548 return 1.f / sqrtf(f);
\r
550 #else // no fast math
\r
551 return 1.f / sqrtf(f);
\r
555 // calculate: 1 / sqrtf( x )
\r
556 REALINLINE s32 reciprocal_squareroot(const s32 x)
\r
558 return static_cast<s32>(reciprocal_squareroot(static_cast<f32>(x)));
\r
561 // calculate: 1 / x
\r
562 REALINLINE f32 reciprocal( const f32 f )
\r
564 #if defined (IRRLICHT_FAST_MATH)
\r
565 // NOTE: Unlike with 1.f / f the values very close to 0 return -nan instead of inf
\r
567 // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
\r
568 // bi ts of the mantissa
\r
569 // One Newton-Raphson Iteration:
\r
570 // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
\r
571 #if defined(_MSC_VER) && !defined(_WIN64)
\r
573 __asm rcpss xmm0, f // xmm0 = rcpss(f)
\r
574 __asm movss xmm1, f // xmm1 = f
\r
575 __asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
\r
576 __asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
\r
577 __asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
\r
578 __asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
\r
579 // - f * rcpss(f) * rcpss(f)
\r
580 __asm movss rec, xmm0 // return xmm0
\r
582 #else // no support yet for other compilers
\r
585 //! i do not divide through 0.. (fpu expection)
\r
586 // instead set f to a high value to get a return value near zero..
\r
587 // -1000000000000.f.. is use minus to stay negative..
\r
588 // must test's here (plane.normal dot anything ) checks on <= 0.f
\r
589 //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
\r
590 //return 1.f / FR ( x );
\r
592 #else // no fast math
\r
597 // calculate: 1 / x
\r
598 REALINLINE f64 reciprocal ( const f64 f )
\r
604 // calculate: 1 / x, low precision allowed
\r
605 REALINLINE f32 reciprocal_approxim ( const f32 f )
\r
607 #if defined( IRRLICHT_FAST_MATH)
\r
609 // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
\r
610 // bi ts of the mantissa
\r
611 // One Newton-Raphson Iteration:
\r
612 // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
\r
613 #if defined(_MSC_VER) && !defined(_WIN64)
\r
615 __asm rcpss xmm0, f // xmm0 = rcpss(f)
\r
616 __asm movss xmm1, f // xmm1 = f
\r
617 __asm mulss xmm1, xmm0 // xmm1 = f * rcpss(f)
\r
618 __asm mulss xmm1, xmm0 // xmm2 = f * rcpss(f) * rcpss(f)
\r
619 __asm addss xmm0, xmm0 // xmm0 = 2 * rcpss(f)
\r
620 __asm subss xmm0, xmm1 // xmm0 = 2 * rcpss(f)
\r
621 // - f * rcpss(f) * rcpss(f)
\r
622 __asm movss rec, xmm0 // return xmm0
\r
624 #else // no support yet for other compilers
\r
629 // SSE reciprocal estimate, accurate to 12 significant bits of
\r
631 __asm rcpss xmm0, f // xmm0 = rcpss(f)
\r
632 __asm movss rec , xmm0 // return xmm0
\r
636 u32 x = 0x7F000000 - IR ( p );
\r
637 const f32 r = FR ( x );
\r
638 return r * (2.0f - p * r);
\r
640 #else // no fast math
\r
646 REALINLINE s32 floor32(f32 x)
\r
648 return (s32) floorf ( x );
\r
651 REALINLINE s32 ceil32 ( f32 x )
\r
653 return (s32) ceilf ( x );
\r
656 // NOTE: Please check round_ documentation about some inaccuracies in this compared to standard library round function.
\r
657 REALINLINE s32 round32(f32 x)
\r
659 return (s32) round_(x);
\r
662 inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
\r
664 return a > b ? (a > c ? a : c) : (b > c ? b : c);
\r
667 inline f32 f32_min3(const f32 a, const f32 b, const f32 c)
\r
669 return a < b ? (a < c ? a : c) : (b < c ? b : c);
\r
672 inline f32 fract ( f32 x )
\r
674 return x - floorf ( x );
\r
677 } // end namespace core
\r
678 } // end namespace irr
\r
680 #ifndef IRRLICHT_FAST_MATH
\r
681 using irr::core::IR;
\r
682 using irr::core::FR;
\r