最近的游戏引擎已经有个大概的样子了。不过还有许多东西要修改、添加。比如:粒子系统,总是感觉渲染的有点不对头,但就是看不出来。再看看吧~ 以下是ZVector的头文件ZVector.h
1 #ifndef _ZVector_H_ 2 #define _ZVector_H_ 3 4 #include <cmath> 5 6 #pragma pack(16) // 16 byte data align 7 8 #define SIMD 9 10 #ifndef SIMD 11 #define SSE 12 #endif 13 14 #define PI (3.14159265359f) 15 #define DEG2RAD(a) (PI/180*(a)) 16 #define RAD2DEG(a) (180/PI*(a)) 17 18 //##ModelId=46286C6102A1 19 typedef float SCALAR; 20 21 __declspec(align(16)) class ZVector 22  { 23 public: 24 SCALAR x; 25 SCALAR y; 26 SCALAR z; // x,y,z coordinates 27 SCALAR w; 28 29 public: 30 /**//*** constructors ***/ 31 ZVector(SCALAR a = 0.0f, SCALAR b = 0.0f, SCALAR c = 0.0f, SCALAR d = 1.0f) : x(a), y(b), z(c), w(d) {} 32 ZVector(const ZVector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} 33 34 /**//*** menber functions ***/ 35 void Null(void); // clears vector to (0, 0, 0, 1) 36 void Normalize(); // normalize this vector 37 void Negate(void); // invert the vector 38 const SCALAR Length() const; // length of vector 39 const ZVector CrossProduct(const ZVector &vec) const; // cross product 40 const SCALAR DotProduct(const ZVector &vec) const; // dot product 41 const ZVector UnitVector() const; // return the unit vector 42 const float Angle(const ZVector &normal) const; // return angle between two vectors 43 const ZVector Reflection(const ZVector& normal) const; // reflect this vector off surface with normal vector 44 const ZVector Rotate(const float angle, 45 const ZVector &normal) const; // rotate angle degrees about a normal 46 47 /**//*** operator overloading ***/ 48 SCALAR & operator [](const long idx); // vector index 49 const ZVector & operator =(const ZVector &vec); // vector assignment 50 const bool operator ==(const ZVector &vec) const; // vecector equality 51 const bool operator !=(const ZVector &vec) const; // vecector inequality 52 const ZVector operator +(const ZVector &vec) const; // vector add 53 const ZVector operator +() const; // vector add (opposite of negation) 54 const ZVector & operator +=(const ZVector &vec); // vector increment 55 const ZVector operator -(const ZVector &vec) const; // vector subtraction 56 const ZVector operator -() const; // vector negation 57 const ZVector & operator -=(const ZVector& vec); // vector decrement 58 const ZVector & operator *=(const SCALAR &s); // SCALAR self-multiply 59 const ZVector & operator /=(const SCALAR &s); // SCALAR self-divecide 60 const ZVector operator *(const SCALAR &s) const; // post multiply by SCALAR 61 const ZVector operator *(const ZVector& vec) const; // multiply by ZVector 62 const ZVector operator /(SCALAR s) const; // divide by SCALAR 63 const ZVector operator ^(const ZVector &vec) const; // cross product 64 const SCALAR operator %(const ZVector &vec) const; // dot product 65 const SCALAR operator !() const; 66 const ZVector operator |(const SCALAR length) const; // return vector with specified length 67 const ZVector& operator |=(const float length); // set length of vector equal to length 68 69 /**//*** friends ***/ 70 friend inline const ZVector operator*(const SCALAR &s, const ZVector &vec); // pre multiply by SCALAR 71 //friend inline const ZVector operator*(const ZVector &vec, const SCALAR &s); // post multiply by SCALAR 72 }; 73 74 #endif //_ZVector_H_
以下是实现部分ZVector.cpp的某个函数代码:
1 // post multiply by SCALAR 2 const ZVector ZVector::operator *(const SCALAR &s) const 3  { 4 __declspec(align(16)) ZVector res; 5 #ifdef SIMD 6 __asm 7 { 8 mov esi, s 9 movss xmm0, [esi] // xmm1[0] = s 10 shufps xmm0, xmm0, 0 // xmm1[1, 2, 3] = xmm1[0] 11 mov edi, this 12 mulps xmm0, [edi] 13 movaps res, xmm0 14 } 15 #else 16 #ifdef SSE 17 __m128 ms = _mm_set_ps1(s); // ms[0, 1, 2, 3] = s 18 *(__m128*)&res = _mm_mul_ps(*(__m128*)(this), ms); 19 #else 20 res.x = x * s; res.y = y * s; res.z = z * s; res.w = w * s; 21 #endif 22 #endif 23 return res; 24 }
每个函数都使用了SIMD汇编代码和SSE指令集的函数来进行CPU上的优化。效率非常高,SSE的效率一般比CPP的要高400%左右,而SIMD的效率一般比CPP的高550%左右。我使用了QueryPerformanceCounter、QueryPerformanceFrequency来获得精度非常高的计算机时间,经过对比得出上面的结果。 这次优化,有了很多心得,虽然觉得汇编实在恶心,但是还是有很多收获。还有SSE,让我更加了解了CPU计算原来可以这样高效。 最后要说的是:一定要让数据保证16位对齐,否则。。。。大家自己研究吧~
|