include/irrMath.h

   1 // Copyright (C) 2002-2012 Nikolaus Gebhardt\r
   2 // This file is part of the "Irrlicht Engine".\r
   3 // For conditions of distribution and use, see copyright notice in irrlicht.h\r
   4 \r
   5 #ifndef __IRR_MATH_H_INCLUDED__\r
   6 #define __IRR_MATH_H_INCLUDED__\r
   7 \r
   8 #include "IrrCompileConfig.h"\r
   9 #include "irrTypes.h"\r
  10 #include <math.h>\r
  11 #include <float.h>\r
  12 #include <stdlib.h> // for abs() etc.\r
  13 #include <limits.h> // For INT_MAX / UINT_MAX\r
  14 \r
  15 #if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)\r
  16         #define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))\r
  17         #define sinf(X) (irr::f32)sin((irr::f64)(X))\r
  18         #define cosf(X) (irr::f32)cos((irr::f64)(X))\r
  19         #define asinf(X) (irr::f32)asin((irr::f64)(X))\r
  20         #define acosf(X) (irr::f32)acos((irr::f64)(X))\r
  21         #define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))\r
  22         #define ceilf(X) (irr::f32)ceil((irr::f64)(X))\r
  23         #define floorf(X) (irr::f32)floor((irr::f64)(X))\r
  24         #define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))\r
  25         #define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))\r
  26         #define fabsf(X) (irr::f32)fabs((irr::f64)(X))\r
  27         #define logf(X) (irr::f32)log((irr::f64)(X))\r
  28 #endif\r
  29 \r
  30 #ifndef FLT_MAX\r
  31 #define FLT_MAX 3.402823466E+38F\r
  32 #endif\r
  33 \r
  34 #ifndef FLT_MIN\r
  35 #define FLT_MIN 1.17549435e-38F\r
  36 #endif\r
  37 \r
  38 namespace irr\r
  39 {\r
  40 namespace core\r
  41 {\r
  42 \r
  43         //! Rounding error constant often used when comparing f32 values.\r
  44 \r
  45         const s32 ROUNDING_ERROR_S32 = 0;\r
  46 \r
  47 #ifdef __IRR_HAS_S64\r
  48         const s64 ROUNDING_ERROR_S64 = 0;\r
  49 #endif\r
  50         const f32 ROUNDING_ERROR_f32 = 0.000001f;\r
  51         const f64 ROUNDING_ERROR_f64 = 0.00000001;\r
  52 \r
  53 #ifdef PI // make sure we don't collide with a define\r
  54 #undef PI\r
  55 #endif\r
  56         //! Constant for PI.\r
  57         const f32 PI = 3.14159265359f;\r
  58 \r
  59         //! Constant for reciprocal of PI.\r
  60         const f32 RECIPROCAL_PI = 1.0f/PI;\r
  61 \r
  62         //! Constant for half of PI.\r
  63         const f32 HALF_PI = PI/2.0f;\r
  64 \r
  65 #ifdef PI64 // make sure we don't collide with a define\r
  66 #undef PI64\r
  67 #endif\r
  68         //! Constant for 64bit PI.\r
  69         const f64 PI64 = 3.1415926535897932384626433832795028841971693993751;\r
  70 \r
  71         //! Constant for 64bit reciprocal of PI.\r
  72         const f64 RECIPROCAL_PI64 = 1.0/PI64;\r
  73 \r
  74         //! 32bit Constant for converting from degrees to radians\r
  75         const f32 DEGTORAD = PI / 180.0f;\r
  76 \r
  77         //! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)\r
  78         const f32 RADTODEG   = 180.0f / PI;\r
  79 \r
  80         //! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)\r
  81         const f64 DEGTORAD64 = PI64 / 180.0;\r
  82 \r
  83         //! 64bit constant for converting from radians to degrees\r
  84         const f64 RADTODEG64 = 180.0 / PI64;\r
  85 \r
  86         //! Utility function to convert a radian value to degrees\r
  87         /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X\r
  88         \param radians The radians value to convert to degrees.\r
  89         */\r
  90         inline f32 radToDeg(f32 radians)\r
  91         {\r
  92                 return RADTODEG * radians;\r
  93         }\r
  94 \r
  95         //! Utility function to convert a radian value to degrees\r
  96         /** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X\r
  97         \param radians The radians value to convert to degrees.\r
  98         */\r
  99         inline f64 radToDeg(f64 radians)\r
 100         {\r
 101                 return RADTODEG64 * radians;\r
 102         }\r
 103 \r
 104         //! Utility function to convert a degrees value to radians\r
 105         /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X\r
 106         \param degrees The degrees value to convert to radians.\r
 107         */\r
 108         inline f32 degToRad(f32 degrees)\r
 109         {\r
 110                 return DEGTORAD * degrees;\r
 111         }\r
 112 \r
 113         //! Utility function to convert a degrees value to radians\r
 114         /** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X\r
 115         \param degrees The degrees value to convert to radians.\r
 116         */\r
 117         inline f64 degToRad(f64 degrees)\r
 118         {\r
 119                 return DEGTORAD64 * degrees;\r
 120         }\r
 121 \r
 122         //! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)\r
 123         template<class T>\r
 124         inline const T& min_(const T& a, const T& b)\r
 125         {\r
 126                 return a < b ? a : b;\r
 127         }\r
 128 \r
 129         //! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)\r
 130         template<class T>\r
 131         inline const T& min_(const T& a, const T& b, const T& c)\r
 132         {\r
 133                 return a < b ? min_(a, c) : min_(b, c);\r
 134         }\r
 135 \r
 136         //! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)\r
 137         template<class T>\r
 138         inline const T& max_(const T& a, const T& b)\r
 139         {\r
 140                 return a < b ? b : a;\r
 141         }\r
 142 \r
 143         //! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)\r
 144         template<class T>\r
 145         inline const T& max_(const T& a, const T& b, const T& c)\r
 146         {\r
 147                 return a < b ? max_(b, c) : max_(a, c);\r
 148         }\r
 149 \r
 150         //! returns abs of two values. Own implementation to get rid of STL (VS6 problems)\r
 151         template<class T>\r
 152         inline T abs_(const T& a)\r
 153         {\r
 154                 return a < (T)0 ? -a : a;\r
 155         }\r
 156 \r
 157         //! returns linear interpolation of a and b with ratio t\r
 158         //! \return: a if t==0, b if t==1, and the linear interpolation else\r
 159         template<class T>\r
 160         inline T lerp(const T& a, const T& b, const f32 t)\r
 161         {\r
 162                 return (T)(a*(1.f-t)) + (b*t);\r
 163         }\r
 164 \r
 165         //! clamps a value between low and high\r
 166         template <class T>\r
 167         inline const T clamp (const T& value, const T& low, const T& high)\r
 168         {\r
 169                 return min_ (max_(value,low), high);\r
 170         }\r
 171 \r
 172         //! swaps the content of the passed parameters\r
 173         // Note: We use the same trick as boost and use two template arguments to\r
 174         // avoid ambiguity when swapping objects of an Irrlicht type that has not\r
 175         // it's own swap overload. Otherwise we get conflicts with some compilers\r
 176         // in combination with stl.\r
 177         template <class T1, class T2>\r
 178         inline void swap(T1& a, T2& b)\r
 179         {\r
 180                 T1 c(a);\r
 181                 a = b;\r
 182                 b = c;\r
 183         }\r
 184 \r
 185         template <class T>\r
 186         inline T roundingError();\r
 187 \r
 188         template <>\r
 189         inline f32 roundingError()\r
 190         {\r
 191                 return ROUNDING_ERROR_f32;\r
 192         }\r
 193 \r
 194         template <>\r
 195         inline f64 roundingError()\r
 196         {\r
 197                 return ROUNDING_ERROR_f64;\r
 198         }\r
 199 \r
 200         template <>\r
 201         inline s32 roundingError()\r
 202         {\r
 203                 return ROUNDING_ERROR_S32;\r
 204         }\r
 205 \r
 206         template <>\r
 207         inline u32 roundingError()\r
 208         {\r
 209                 return ROUNDING_ERROR_S32;\r
 210         }\r
 211 \r
 212 #ifdef __IRR_HAS_S64\r
 213         template <>\r
 214         inline s64 roundingError()\r
 215         {\r
 216                 return ROUNDING_ERROR_S64;\r
 217         }\r
 218 \r
 219         template <>\r
 220         inline u64 roundingError()\r
 221         {\r
 222                 return ROUNDING_ERROR_S64;\r
 223         }\r
 224 #endif\r
 225 \r
 226         template <class T>\r
 227         inline T relativeErrorFactor()\r
 228         {\r
 229                 return 1;\r
 230         }\r
 231 \r
 232         template <>\r
 233         inline f32 relativeErrorFactor()\r
 234         {\r
 235                 return 4;\r
 236         }\r
 237 \r
 238         template <>\r
 239         inline f64 relativeErrorFactor()\r
 240         {\r
 241                 return 8;\r
 242         }\r
 243 \r
 244         //! returns if a equals b, taking possible rounding errors into account\r
 245         template <class T>\r
 246         inline bool equals(const T a, const T b, const T tolerance = roundingError<T>())\r
 247         {\r
 248                 return (a + tolerance >= b) && (a - tolerance <= b);\r
 249         }\r
 250 \r
 251 \r
 252         //! returns if a equals b, taking relative error in form of factor\r
 253         //! this particular function does not involve any division.\r
 254         template <class T>\r
 255         inline bool equalsRelative( const T a, const T b, const T factor = relativeErrorFactor<T>())\r
 256         {\r
 257                 //https://eagergames.wordpress.com/2017/04/01/fast-parallel-lines-and-vectors-test/\r
 258 \r
 259                 const T maxi = max_( a, b);\r
 260                 const T mini = min_( a, b);\r
 261                 const T maxMagnitude = max_( maxi, -mini);\r
 262 \r
 263                 return  (maxMagnitude*factor + maxi) == (maxMagnitude*factor + mini); // MAD Wise\r
 264         }\r
 265 \r
 266         union FloatIntUnion32\r
 267         {\r
 268                 FloatIntUnion32(float f1 = 0.0f) : f(f1) {}\r
 269                 // Portable sign-extraction\r
 270                 bool sign() const { return (i >> 31) != 0; }\r
 271 \r
 272                 irr::s32 i;\r
 273                 irr::f32 f;\r
 274         };\r
 275 \r
 276         //! We compare the difference in ULP's (spacing between floating-point numbers, aka ULP=1 means there exists no float between).\r
 277         //\result true when numbers have a ULP <= maxUlpDiff AND have the same sign.\r
 278         inline bool equalsByUlp(f32 a, f32 b, int maxUlpDiff)\r
 279         {\r
 280                 // Based on the ideas and code from Bruce Dawson on\r
 281                 // http://www.altdevblogaday.com/2012/02/22/comparing-floating-point-numbers-2012-edition/\r
 282                 // When floats are interpreted as integers the two nearest possible float numbers differ just\r
 283                 // by one integer number. Also works the other way round, an integer of 1 interpreted as float\r
 284                 // is for example the smallest possible float number.\r
 285 \r
 286                 const FloatIntUnion32 fa(a);\r
 287                 const FloatIntUnion32 fb(b);\r
 288 \r
 289                 // Different signs, we could maybe get difference to 0, but so close to 0 using epsilons is better.\r
 290                 if ( fa.sign() != fb.sign() )\r
 291                 {\r
 292                         // Check for equality to make sure +0==-0\r
 293                         if (fa.i == fb.i)\r
 294                                 return true;\r
 295                         return false;\r
 296                 }\r
 297 \r
 298                 // Find the difference in ULPs.\r
 299                 const int ulpsDiff = abs_(fa.i- fb.i);\r
 300                 if (ulpsDiff <= maxUlpDiff)\r
 301                         return true;\r
 302 \r
 303                 return false;\r
 304         }\r
 305 \r
 306         //! returns if a equals zero, taking rounding errors into account\r
 307         inline bool iszero(const f64 a, const f64 tolerance = ROUNDING_ERROR_f64)\r
 308         {\r
 309                 return fabs(a) <= tolerance;\r
 310         }\r
 311 \r
 312         //! returns if a equals zero, taking rounding errors into account\r
 313         inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)\r
 314         {\r
 315                 return fabsf(a) <= tolerance;\r
 316         }\r
 317 \r
 318         //! returns if a equals not zero, taking rounding errors into account\r
 319         inline bool isnotzero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)\r
 320         {\r
 321                 return fabsf(a) > tolerance;\r
 322         }\r
 323 \r
 324         //! returns if a equals zero, taking rounding errors into account\r
 325         inline bool iszero(const s32 a, const s32 tolerance = 0)\r
 326         {\r
 327                 return ( a & 0x7ffffff ) <= tolerance;\r
 328         }\r
 329 \r
 330         //! returns if a equals zero, taking rounding errors into account\r
 331         inline bool iszero(const u32 a, const u32 tolerance = 0)\r
 332         {\r
 333                 return a <= tolerance;\r
 334         }\r
 335 \r
 336 #ifdef __IRR_HAS_S64\r
 337         //! returns if a equals zero, taking rounding errors into account\r
 338         inline bool iszero(const s64 a, const s64 tolerance = 0)\r
 339         {\r
 340                 return abs_(a) <= tolerance;\r
 341         }\r
 342 #endif\r
 343 \r
 344         inline s32 s32_min(s32 a, s32 b)\r
 345         {\r
 346                 const s32 mask = (a - b) >> 31;\r
 347                 return (a & mask) | (b & ~mask);\r
 348         }\r
 349 \r
 350         inline s32 s32_max(s32 a, s32 b)\r
 351         {\r
 352                 const s32 mask = (a - b) >> 31;\r
 353                 return (b & mask) | (a & ~mask);\r
 354         }\r
 355 \r
 356         inline s32 s32_clamp (s32 value, s32 low, s32 high)\r
 357         {\r
 358                 return s32_min(s32_max(value,low), high);\r
 359         }\r
 360 \r
 361         /*\r
 362                 float IEEE-754 bit representation\r
 363 \r
 364                 0      0x00000000\r
 365                 1.0    0x3f800000\r
 366                 0.5    0x3f000000\r
 367                 3      0x40400000\r
 368                 +inf   0x7f800000\r
 369                 -inf   0xff800000\r
 370                 +NaN   0x7fc00000 or 0x7ff00000\r
 371                 in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)\r
 372         */\r
 373 \r
 374         typedef union { u32 u; s32 s; f32 f; } inttofloat;\r
 375 \r
 376         #define F32_AS_S32(f)           (*((s32 *) &(f)))\r
 377         #define F32_AS_U32(f)           (*((u32 *) &(f)))\r
 378         #define F32_AS_U32_POINTER(f)   ( ((u32 *) &(f)))\r
 379 \r
 380         #define F32_VALUE_0             0x00000000\r
 381         #define F32_VALUE_1             0x3f800000\r
 382         #define F32_SIGN_BIT            0x80000000U\r
 383         #define F32_EXPON_MANTISSA      0x7FFFFFFFU\r
 384 \r
 385         //! code is taken from IceFPU\r
 386         //! Integer representation of a floating-point value.\r
 387 #ifdef IRRLICHT_FAST_MATH\r
 388         #define IR(x)                   ((u32&)(x))\r
 389 #else\r
 390         inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}\r
 391 #endif\r
 392 \r
 393         //! Absolute integer representation of a floating-point value\r
 394         #define AIR(x)                  (IR(x)&0x7fffffff)\r
 395 \r
 396         //! Floating-point representation of an integer value.\r
 397 #ifdef IRRLICHT_FAST_MATH\r
 398         #define FR(x)                   ((f32&)(x))\r
 399 #else\r
 400         inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}\r
 401         inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}\r
 402 #endif\r
 403 \r
 404         //! integer representation of 1.0\r
 405         #define IEEE_1_0                0x3f800000\r
 406         //! integer representation of 255.0\r
 407         #define IEEE_255_0              0x437f0000\r
 408 \r
 409 #ifdef IRRLICHT_FAST_MATH\r
 410         #define F32_LOWER_0(f)          (F32_AS_U32(f) >  F32_SIGN_BIT)\r
 411         #define F32_LOWER_EQUAL_0(f)    (F32_AS_S32(f) <= F32_VALUE_0)\r
 412         #define F32_GREATER_0(f)        (F32_AS_S32(f) >  F32_VALUE_0)\r
 413         #define F32_GREATER_EQUAL_0(f)  (F32_AS_U32(f) <= F32_SIGN_BIT)\r
 414         #define F32_EQUAL_1(f)          (F32_AS_U32(f) == F32_VALUE_1)\r
 415         #define F32_EQUAL_0(f)          ( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)\r
 416 \r
 417         // only same sign\r
 418         #define F32_A_GREATER_B(a,b)    (F32_AS_S32((a)) > F32_AS_S32((b)))\r
 419 \r
 420 #else\r
 421 \r
 422         #define F32_LOWER_0(n)          ((n) <  0.0f)\r
 423         #define F32_LOWER_EQUAL_0(n)    ((n) <= 0.0f)\r
 424         #define F32_GREATER_0(n)        ((n) >  0.0f)\r
 425         #define F32_GREATER_EQUAL_0(n)  ((n) >= 0.0f)\r
 426         #define F32_EQUAL_1(n)          ((n) == 1.0f)\r
 427         #define F32_EQUAL_0(n)          ((n) == 0.0f)\r
 428         #define F32_A_GREATER_B(a,b)    ((a) > (b))\r
 429 #endif\r
 430 \r
 431 \r
 432 #ifndef REALINLINE\r
 433         #ifdef _MSC_VER\r
 434                 #define REALINLINE __forceinline\r
 435         #else\r
 436                 #define REALINLINE inline\r
 437         #endif\r
 438 #endif\r
 439 \r
 440 #if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)\r
 441 \r
 442         // 8-bit bools in Borland builder\r
 443 \r
 444         //! conditional set based on mask and arithmetic shift\r
 445         REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )\r
 446         {\r
 447                 return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;\r
 448         }\r
 449 \r
 450         //! conditional set based on mask and arithmetic shift\r
 451         REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )\r
 452         {\r
 453                 return ( -condition >> 31 ) & a;\r
 454         }\r
 455 #else\r
 456 \r
 457         //! conditional set based on mask and arithmetic shift\r
 458         REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )\r
 459         {\r
 460                 return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;\r
 461         }\r
 462 \r
 463         //! conditional set based on mask and arithmetic shift\r
 464         REALINLINE u16 if_c_a_else_b ( const s16 condition, const u16 a, const u16 b )\r
 465         {\r
 466                 return ( ( -condition >> 15 ) & ( a ^ b ) ) ^ b;\r
 467         }\r
 468 \r
 469         //! conditional set based on mask and arithmetic shift\r
 470         REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )\r
 471         {\r
 472                 return ( -condition >> 31 ) & a;\r
 473         }\r
 474 #endif\r
 475 \r
 476         /*\r
 477                 if (condition) state |= m; else state &= ~m;\r
 478         */\r
 479         REALINLINE void setbit_cond ( u32 &state, s32 condition, u32 mask )\r
 480         {\r
 481                 // 0, or any positive to mask\r
 482                 //s32 conmask = -condition >> 31;\r
 483                 state ^= ( ( -condition >> 31 ) ^ state ) & mask;\r
 484         }\r
 485 \r
 486         // NOTE: This is not as exact as the c99/c++11 round function, especially at high numbers starting with 8388609\r
 487         //       (only low number which seems to go wrong is 0.49999997 which is rounded to 1)\r
 488         //      Also negative 0.5 is rounded up not down unlike with the standard function (p.E. input -0.5 will be 0 and not -1)\r
 489         inline f32 round_( f32 x )\r
 490         {\r
 491                 return floorf( x + 0.5f );\r
 492         }\r
 493 \r
 494         // calculate: sqrt ( x )\r
 495         REALINLINE f32 squareroot(const f32 f)\r
 496         {\r
 497                 return sqrtf(f);\r
 498         }\r
 499 \r
 500         // calculate: sqrt ( x )\r
 501         REALINLINE f64 squareroot(const f64 f)\r
 502         {\r
 503                 return sqrt(f);\r
 504         }\r
 505 \r
 506         // calculate: sqrt ( x )\r
 507         REALINLINE s32 squareroot(const s32 f)\r
 508         {\r
 509                 return static_cast<s32>(squareroot(static_cast<f32>(f)));\r
 510         }\r
 511 \r
 512 #ifdef __IRR_HAS_S64\r
 513         // calculate: sqrt ( x )\r
 514         REALINLINE s64 squareroot(const s64 f)\r
 515         {\r
 516                 return static_cast<s64>(squareroot(static_cast<f64>(f)));\r
 517         }\r
 518 #endif\r
 519 \r
 520         // calculate: 1 / sqrt ( x )\r
 521         REALINLINE f64 reciprocal_squareroot(const f64 x)\r
 522         {\r
 523                 return 1.0 / sqrt(x);\r
 524         }\r
 525 \r
 526         // calculate: 1 / sqrtf ( x )\r
 527         REALINLINE f32 reciprocal_squareroot(const f32 f)\r
 528         {\r
 529 #if defined ( IRRLICHT_FAST_MATH )\r
 530                 // NOTE: Unlike comment below says I found inaccuracies already at 4'th significant bit.\r
 531                 // p.E: Input 1, expected 1, got 0.999755859\r
 532 \r
 533         #if defined(_MSC_VER) && !defined(_WIN64)\r
 534                 // SSE reciprocal square root estimate, accurate to 12 significant\r
 535                 // bits of the mantissa\r
 536                 f32 recsqrt;\r
 537                 __asm rsqrtss xmm0, f           // xmm0 = rsqrtss(f)\r
 538                 __asm movss recsqrt, xmm0       // return xmm0\r
 539                 return recsqrt;\r
 540 \r
 541 /*\r
 542                 // comes from Nvidia\r
 543                 u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;\r
 544                 f32 y = *(f32*)&tmp;\r
 545                 return y * (1.47f - 0.47f * x * y * y);\r
 546 */\r
 547         #else\r
 548                 return 1.f / sqrtf(f);\r
 549         #endif\r
 550 #else // no fast math\r
 551                 return 1.f / sqrtf(f);\r
 552 #endif\r
 553         }\r
 554 \r
 555         // calculate: 1 / sqrtf( x )\r
 556         REALINLINE s32 reciprocal_squareroot(const s32 x)\r
 557         {\r
 558                 return static_cast<s32>(reciprocal_squareroot(static_cast<f32>(x)));\r
 559         }\r
 560 \r
 561         // calculate: 1 / x\r
 562         REALINLINE f32 reciprocal( const f32 f )\r
 563         {\r
 564 #if defined (IRRLICHT_FAST_MATH)\r
 565                 // NOTE: Unlike with 1.f / f the values very close to 0 return -nan instead of inf\r
 566 \r
 567                 // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant\r
 568                 // bi ts of the mantissa\r
 569                 // One Newton-Raphson Iteration:\r
 570                 // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)\r
 571 #if defined(_MSC_VER) && !defined(_WIN64)\r
 572                 f32 rec;\r
 573                 __asm rcpss xmm0, f               // xmm0 = rcpss(f)\r
 574                 __asm movss xmm1, f               // xmm1 = f\r
 575                 __asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)\r
 576                 __asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)\r
 577                 __asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)\r
 578                 __asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)\r
 579                                                                                   //        - f * rcpss(f) * rcpss(f)\r
 580                 __asm movss rec, xmm0             // return xmm0\r
 581                 return rec;\r
 582 #else // no support yet for other compilers\r
 583                 return 1.f / f;\r
 584 #endif\r
 585                 //! i do not divide through 0.. (fpu expection)\r
 586                 // instead set f to a high value to get a return value near zero..\r
 587                 // -1000000000000.f.. is use minus to stay negative..\r
 588                 // must test's here (plane.normal dot anything ) checks on <= 0.f\r
 589                 //u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;\r
 590                 //return 1.f / FR ( x );\r
 591 \r
 592 #else // no fast math\r
 593                 return 1.f / f;\r
 594 #endif\r
 595         }\r
 596 \r
 597         // calculate: 1 / x\r
 598         REALINLINE f64 reciprocal ( const f64 f )\r
 599         {\r
 600                 return 1.0 / f;\r
 601         }\r
 602 \r
 603 \r
 604         // calculate: 1 / x, low precision allowed\r
 605         REALINLINE f32 reciprocal_approxim ( const f32 f )\r
 606         {\r
 607 #if defined( IRRLICHT_FAST_MATH)\r
 608 \r
 609                 // SSE Newton-Raphson reciprocal estimate, accurate to 23 significant\r
 610                 // bi ts of the mantissa\r
 611                 // One Newton-Raphson Iteration:\r
 612                 // f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)\r
 613 #if defined(_MSC_VER) && !defined(_WIN64)\r
 614                 f32 rec;\r
 615                 __asm rcpss xmm0, f               // xmm0 = rcpss(f)\r
 616                 __asm movss xmm1, f               // xmm1 = f\r
 617                 __asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)\r
 618                 __asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)\r
 619                 __asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)\r
 620                 __asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)\r
 621                                                                                   //        - f * rcpss(f) * rcpss(f)\r
 622                 __asm movss rec, xmm0             // return xmm0\r
 623                 return rec;\r
 624 #else // no support yet for other compilers\r
 625                 return 1.f / f;\r
 626 #endif\r
 627 \r
 628 /*\r
 629                 // SSE reciprocal estimate, accurate to 12 significant bits of\r
 630                 f32 rec;\r
 631                 __asm rcpss xmm0, f             // xmm0 = rcpss(f)\r
 632                 __asm movss rec , xmm0          // return xmm0\r
 633                 return rec;\r
 634 */\r
 635 /*\r
 636                 u32 x = 0x7F000000 - IR ( p );\r
 637                 const f32 r = FR ( x );\r
 638                 return r * (2.0f - p * r);\r
 639 */\r
 640 #else // no fast math\r
 641                 return 1.f / f;\r
 642 #endif\r
 643         }\r
 644 \r
 645 \r
 646         REALINLINE s32 floor32(f32 x)\r
 647         {\r
 648                 return (s32) floorf ( x );\r
 649         }\r
 650 \r
 651         REALINLINE s32 ceil32 ( f32 x )\r
 652         {\r
 653                 return (s32) ceilf ( x );\r
 654         }\r
 655 \r
 656         // NOTE: Please check round_ documentation about some inaccuracies in this compared to standard library round function.\r
 657         REALINLINE s32 round32(f32 x)\r
 658         {\r
 659                 return (s32) round_(x);\r
 660         }\r
 661 \r
 662         inline f32 f32_max3(const f32 a, const f32 b, const f32 c)\r
 663         {\r
 664                 return a > b ? (a > c ? a : c) : (b > c ? b : c);\r
 665         }\r
 666 \r
 667         inline f32 f32_min3(const f32 a, const f32 b, const f32 c)\r
 668         {\r
 669                 return a < b ? (a < c ? a : c) : (b < c ? b : c);\r
 670         }\r
 671 \r
 672         inline f32 fract ( f32 x )\r
 673         {\r
 674                 return x - floorf ( x );\r
 675         }\r
 676 \r
 677 } // end namespace core\r
 678 } // end namespace irr\r
 679 \r
 680 #ifndef IRRLICHT_FAST_MATH\r
 681         using irr::core::IR;\r
 682         using irr::core::FR;\r
 683 #endif\r
 684 \r
 685 #endif\r