ref: 372afde46e7defc9dd2d719a1732b8ace1fa096e
dir: /ref_soft/r_part.c/
/* Copyright (C) 1997-2001 Id Software, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "r_local.h" vec3_t r_pright, r_pup, r_ppn; #define PARTICLE_33 0 #define PARTICLE_66 1 #define PARTICLE_OPAQUE 2 typedef struct { particle_t *particle; int level; int color; } partparms_t; static partparms_t partparms; #if id386 && !defined __linux__ static unsigned s_prefetch_address; /* ** BlendParticleXX ** ** Inputs: ** EAX = color ** EDI = pdest ** ** Scratch: ** EBX = scratch (dstcolor) ** EBP = scratch ** ** Outputs: ** none */ __declspec(naked) void BlendParticle33( void ) { // return vid.alphamap[color + dstcolor*256]; __asm mov ebp, vid.alphamap __asm xor ebx, ebx __asm mov bl, byte ptr [edi] __asm shl ebx, 8 __asm add ebp, ebx __asm add ebp, eax __asm mov al, byte ptr [ebp] __asm mov byte ptr [edi], al __asm ret } __declspec(naked) void BlendParticle66( void ) { // return vid.alphamap[pcolor*256 + dstcolor]; __asm mov ebp, vid.alphamap __asm xor ebx, ebx __asm shl eax, 8 __asm mov bl, byte ptr [edi] __asm add ebp, ebx __asm add ebp, eax __asm mov al, byte ptr [ebp] __asm mov byte ptr [edi], al __asm ret } __declspec(naked) void BlendParticle100( void ) { __asm mov byte ptr [edi], al __asm ret } /* ** R_DrawParticle (asm version) ** ** Since we use __declspec( naked ) we don't have a stack frame ** that we can use. Since I want to reserve EBP anyway, I tossed ** all the important variables into statics. This routine isn't ** meant to be re-entrant, so this shouldn't cause any problems ** other than a slightly higher global memory footprint. ** */ __declspec(naked) void R_DrawParticle( void ) { static vec3_t local, transformed; static float zi; static int u, v, tmp; static short izi; static int ebpsave; static byte (*blendfunc)(void); /* ** must be memvars since x86 can't load constants ** directly. I guess I could use fld1, but that ** actually costs one more clock than fld [one]! */ static float particle_z_clip = PARTICLE_Z_CLIP; static float one = 1.0F; static float point_five = 0.5F; static float eight_thousand_hex = 0x8000; /* ** save trashed variables */ __asm mov ebpsave, ebp __asm push esi __asm push edi /* ** transform the particle */ // VectorSubtract (pparticle->origin, r_origin, local); __asm mov esi, partparms.particle __asm fld dword ptr [esi+0] ; p_o.x __asm fsub dword ptr [r_origin+0] ; p_o.x-r_o.x __asm fld dword ptr [esi+4] ; p_o.y | p_o.x-r_o.x __asm fsub dword ptr [r_origin+4] ; p_o.y-r_o.y | p_o.x-r_o.x __asm fld dword ptr [esi+8] ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x __asm fsub dword ptr [r_origin+8] ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x __asm fxch st(2) ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z __asm fstp dword ptr [local+0] ; p_o.y-r_o.y | p_o.z-r_o.z __asm fstp dword ptr [local+4] ; p_o.z-r_o.z __asm fstp dword ptr [local+8] ; (empty) // transformed[0] = DotProduct(local, r_pright); // transformed[1] = DotProduct(local, r_pup); // transformed[2] = DotProduct(local, r_ppn); __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_pright+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_pright+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_pright+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+0] ; (empty) __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_pup+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_pup+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_pup+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+4] ; (empty) __asm fld dword ptr [local+0] ; l.x __asm fmul dword ptr [r_ppn+0] ; l.x*pr.x __asm fld dword ptr [local+4] ; l.y | l.x*pr.x __asm fmul dword ptr [r_ppn+4] ; l.y*pr.y | l.x*pr.x __asm fld dword ptr [local+8] ; l.z | l.y*pr.y | l.x*pr.x __asm fmul dword ptr [r_ppn+8] ; l.z*pr.z | l.y*pr.y | l.x*pr.x __asm fxch st(2) ; l.x*pr.x | l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y | l.z*pr.z __asm faddp st(1), st ; l.x*pr.x + l.y*pr.y + l.z*pr.z __asm fstp dword ptr [transformed+8] ; (empty) /* ** make sure that the transformed particle is not in front of ** the particle Z clip plane. We can do the comparison in ** integer space since we know the sign of one of the inputs ** and can figure out the sign of the other easily enough. */ // if (transformed[2] < PARTICLE_Z_CLIP) // return; __asm mov eax, dword ptr [transformed+8] __asm and eax, eax __asm js end __asm cmp eax, particle_z_clip __asm jl end /* ** project the point by initiating the 1/z calc */ // zi = 1.0 / transformed[2]; __asm fld one __asm fdiv dword ptr [transformed+8] /* ** bind the blend function pointer to the appropriate blender ** while we're dividing */ //if ( level == PARTICLE_33 ) // blendparticle = BlendParticle33; //else if ( level == PARTICLE_66 ) // blendparticle = BlendParticle66; //else // blendparticle = BlendParticle100; __asm cmp partparms.level, PARTICLE_66 __asm je blendfunc_66 __asm jl blendfunc_33 __asm lea ebx, BlendParticle100 __asm jmp done_selecting_blend_func blendfunc_33: __asm lea ebx, BlendParticle33 __asm jmp done_selecting_blend_func blendfunc_66: __asm lea ebx, BlendParticle66 done_selecting_blend_func: __asm mov blendfunc, ebx // prefetch the next particle __asm mov ebp, s_prefetch_address __asm mov ebp, [ebp] // finish the above divide __asm fstp zi // u = (int)(xcenter + zi * transformed[0] + 0.5); // v = (int)(ycenter - zi * transformed[1] + 0.5); __asm fld zi ; zi __asm fmul dword ptr [transformed+0] ; zi * transformed[0] __asm fld zi ; zi | zi * transformed[0] __asm fmul dword ptr [transformed+4] ; zi * transformed[1] | zi * transformed[0] __asm fxch st(1) ; zi * transformed[0] | zi * transformed[1] __asm fadd xcenter ; xcenter + zi * transformed[0] | zi * transformed[1] __asm fxch st(1) ; zi * transformed[1] | xcenter + zi * transformed[0] __asm fld ycenter ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0] __asm fsubrp st(1), st(0) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] __asm fxch st(1) ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1] __asm fadd point_five ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1] __asm fxch st(1) ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 __asm fadd point_five ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 __asm fxch st(1) ; u | v __asm fistp dword ptr [u] ; v __asm fistp dword ptr [v] ; (empty) /* ** clip out the particle */ // if ((v > d_vrectbottom_particle) || // (u > d_vrectright_particle) || // (v < d_vrecty) || // (u < d_vrectx)) // { // return; // } __asm mov ebx, u __asm mov ecx, v __asm cmp ecx, d_vrectbottom_particle __asm jg end __asm cmp ecx, d_vrecty __asm jl end __asm cmp ebx, d_vrectright_particle __asm jg end __asm cmp ebx, d_vrectx __asm jl end /* ** compute addresses of zbuffer, framebuffer, and ** compute the Z-buffer reference value. ** ** EBX = U ** ECX = V ** ** Outputs: ** ESI = Z-buffer address ** EDI = framebuffer address */ // ESI = d_pzbuffer + (d_zwidth * v) + u; __asm mov esi, d_pzbuffer ; esi = d_pzbuffer __asm mov eax, d_zwidth ; eax = d_zwidth __asm mul ecx ; eax = d_zwidth*v __asm add eax, ebx ; eax = d_zwidth*v+u __asm shl eax, 1 ; eax = 2*(d_zwidth*v+u) __asm add esi, eax ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u ) // initiate // izi = (int)(zi * 0x8000); __asm fld zi __asm fmul eight_thousand_hex // EDI = pdest = d_viewbuffer + d_scantable[v] + u; __asm lea edi, [d_scantable+ecx*4] __asm mov edi, [edi] __asm add edi, d_viewbuffer __asm add edi, ebx // complete // izi = (int)(zi * 0x8000); __asm fistp tmp __asm mov eax, tmp __asm mov izi, ax /* ** determine the screen area covered by the particle, ** which also means clamping to a min and max */ // pix = izi >> d_pix_shift; __asm xor edx, edx __asm mov dx, izi __asm mov ecx, d_pix_shift __asm shr dx, cl // if (pix < d_pix_min) // pix = d_pix_min; __asm cmp edx, d_pix_min __asm jge check_pix_max __asm mov edx, d_pix_min __asm jmp skip_pix_clamp // else if (pix > d_pix_max) // pix = d_pix_max; check_pix_max: __asm cmp edx, d_pix_max __asm jle skip_pix_clamp __asm mov edx, d_pix_max skip_pix_clamp: /* ** render the appropriate pixels ** ** ECX = count (used for inner loop) ** EDX = count (used for outer loop) ** ESI = zbuffer ** EDI = framebuffer */ __asm mov ecx, edx __asm cmp ecx, 1 __asm ja over over: /* ** at this point: ** ** ECX = count */ __asm push ecx __asm push edi __asm push esi top_of_pix_vert_loop: top_of_pix_horiz_loop: // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) // { // for (i=0 ; i<pix ; i++) // { // if (pz[i] <= izi) // { // pdest[i] = blendparticle( color, pdest[i] ); // } // } // } __asm xor eax, eax __asm mov ax, word ptr [esi] __asm cmp ax, izi __asm jg end_of_horiz_loop #if ENABLE_ZWRITES_FOR_PARTICLES __asm mov bp, izi __asm mov word ptr [esi], bp #endif __asm mov eax, partparms.color __asm call [blendfunc] __asm add edi, 1 __asm add esi, 2 end_of_horiz_loop: __asm dec ecx __asm jnz top_of_pix_horiz_loop __asm pop esi __asm pop edi __asm mov ebp, d_zwidth __asm shl ebp, 1 __asm add esi, ebp __asm add edi, [r_screenwidth] __asm pop ecx __asm push ecx __asm push edi __asm push esi __asm dec edx __asm jnz top_of_pix_vert_loop __asm pop ecx __asm pop ecx __asm pop ecx end: __asm pop edi __asm pop esi __asm mov ebp, ebpsave __asm ret } #else static byte BlendParticle33( int pcolor, int dstcolor ) { return vid.alphamap[pcolor + dstcolor*256]; } static byte BlendParticle66( int pcolor, int dstcolor ) { return vid.alphamap[pcolor*256+dstcolor]; } static byte BlendParticle100( int pcolor, int dstcolor ) { dstcolor = dstcolor; return pcolor; } /* ** R_DrawParticle ** ** Yes, this is amazingly slow, but it's the C reference ** implementation and should be both robust and vaguely ** understandable. The only time this path should be ** executed is if we're debugging on x86 or if we're ** recompiling and deploying on a non-x86 platform. ** ** To minimize error and improve readability I went the ** function pointer route. This exacts some overhead, but ** it pays off in clean and easy to understand code. */ void R_DrawParticle( void ) { particle_t *pparticle = partparms.particle; int level = partparms.level; vec3_t local, transformed; float zi; byte *pdest; short *pz; int color = pparticle->color; int i, izi, pix, count, u, v; byte (*blendparticle)( int, int ); /* ** transform the particle */ VectorSubtract (pparticle->origin, r_origin, local); transformed[0] = DotProduct(local, r_pright); transformed[1] = DotProduct(local, r_pup); transformed[2] = DotProduct(local, r_ppn); if (transformed[2] < PARTICLE_Z_CLIP) return; /* ** bind the blend function pointer to the appropriate blender */ if ( level == PARTICLE_33 ) blendparticle = BlendParticle33; else if ( level == PARTICLE_66 ) blendparticle = BlendParticle66; else blendparticle = BlendParticle100; /* ** project the point */ // FIXME: preadjust xcenter and ycenter zi = 1.0 / transformed[2]; u = (int)(xcenter + zi * transformed[0] + 0.5); v = (int)(ycenter - zi * transformed[1] + 0.5); if ((v > d_vrectbottom_particle) || (u > d_vrectright_particle) || (v < d_vrecty) || (u < d_vrectx)) { return; } /* ** compute addresses of zbuffer, framebuffer, and ** compute the Z-buffer reference value. */ pz = d_pzbuffer + (d_zwidth * v) + u; pdest = d_viewbuffer + d_scantable[v] + u; izi = (int)(zi * 0x8000); /* ** determine the screen area covered by the particle, ** which also means clamping to a min and max */ pix = izi >> d_pix_shift; if (pix < d_pix_min) pix = d_pix_min; else if (pix > d_pix_max) pix = d_pix_max; /* ** render the appropriate pixels */ count = pix; switch (level) { case PARTICLE_33 : for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) { //FIXME--do it in blocks of 8? for (i=0 ; i<pix ; i++) { if (pz[i] <= izi) { pz[i] = izi; pdest[i] = vid.alphamap[color + ((int)pdest[i]<<8)]; } } } break; case PARTICLE_66 : for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) { for (i=0 ; i<pix ; i++) { if (pz[i] <= izi) { pz[i] = izi; pdest[i] = vid.alphamap[(color<<8) + (int)pdest[i]]; } } } break; default: //100 for ( ; count ; count--, pz += d_zwidth, pdest += r_screenwidth) { for (i=0 ; i<pix ; i++) { if (pz[i] <= izi) { pz[i] = izi; pdest[i] = color; } } } break; } } #endif // !id386 /* ** R_DrawParticles ** ** Responsible for drawing all of the particles in the particle list ** throughout the world. Doesn't care if we're using the C path or ** if we're using the asm path, it simply assigns a function pointer ** and goes. */ void R_DrawParticles (void) { particle_t *p; int i; extern unsigned long fpu_sp24_cw, fpu_chop_cw; VectorScale( vright, xscaleshrink, r_pright ); VectorScale( vup, yscaleshrink, r_pup ); VectorCopy( vpn, r_ppn ); #if id386 && !defined __linux__ __asm fldcw word ptr [fpu_sp24_cw] #endif for (p=r_newrefdef.particles, i=0 ; i<r_newrefdef.num_particles ; i++,p++) { if ( p->alpha > 0.66 ) partparms.level = PARTICLE_OPAQUE; else if ( p->alpha > 0.33 ) partparms.level = PARTICLE_66; else partparms.level = PARTICLE_33; partparms.particle = p; partparms.color = p->color; #if id386 && !defined __linux__ if ( i < r_newrefdef.num_particles-1 ) s_prefetch_address = ( unsigned int ) ( p + 1 ); else s_prefetch_address = ( unsigned int ) r_newrefdef.particles; #endif R_DrawParticle(); } #if id386 && !defined __linux__ __asm fldcw word ptr [fpu_chop_cw] #endif }