1 #include <windows.h> 2 3 long int crv_tab[256]; 4 long int cbu_tab[256]; 5 long int cgu_tab[256]; 6 7 long int cgv_tab[256]; 8 long int tab_76309[256]; 9 unsigned char clp[1024]; 10 11 12 void init_dither_tab() 13 { 14 long int crv,cbu,cgu,cgv; 15 int i,ind; 16 17 crv = 104597; cbu = 132201; 18 cgu = 25675; cgv = 53279; 19 20 for (i = 0; i < 256; i++) { 21 crv_tab[i] = (i-128) * crv; 22 cbu_tab[i] = (i-128) * cbu; 23 cgu_tab[i] = (i-128) * cgu; 24 cgv_tab[i] = (i-128) * cgv; 25 tab_76309[i] = 76309*(i-16); 26 } 27 28 for (i=0; i<384; i++) 29 clp[i] =0; 30 ind=384; 31 for (i=0;i<256; i++) 32 clp[ind++]=i; 33 ind=640; 34 for (i=0;i<384;i++) 35 clp[ind++]=255; 36 } 37 38 39 void YUV2RGB420(unsigned char *src0,unsigned char *src1,unsigned char *src2,unsigned char *dst_ori,int width,int height) 40 { 41 int y1,y2,u,v; 42 unsigned char *py1,*py2; 43 int i,j, c1, c2, c3, c4; 44 unsigned char *d1, *d2; 45 46 //src0=src; 47 //src1=src+width*height; 48 //src2=src+width*height+width*height/4; 49 50 py1=src0; 51 py2=py1+width; 52 d1=dst_ori; 53 d2=d1+3*width; 54 for (j = 0; j < height; j += 2) { 55 for (i = 0; i < width; i += 2) { 56 57 u = *src1++; 58 v = *src2++; 59 60 c1 = crv_tab[v]; 61 c2 = cgu_tab[u]; 62 c3 = cgv_tab[v]; 63 c4 = cbu_tab[u]; 64 65 //up-left 66 y1 = tab_76309[*py1++]; 67 *d1++ = clp[384+((y1 + c1)>>16)]; 68 *d1++ = clp[384+((y1 - c2 - c3)>>16)]; 69 *d1++ = clp[384+((y1 + c4)>>16)]; 70 71 //down-left 72 y2 = tab_76309[*py2++]; 73 *d2++ = clp[384+((y2 + c1)>>16)]; 74 *d2++ = clp[384+((y2 - c2 - c3)>>16)]; 75 *d2++ = clp[384+((y2 + c4)>>16)]; 76 77 //up-right 78 y1 = tab_76309[*py1++]; 79 *d1++ = clp[384+((y1 + c1)>>16)]; 80 *d1++ = clp[384+((y1 - c2 - c3)>>16)]; 81 *d1++ = clp[384+((y1 + c4)>>16)]; 82 83 //down-right 84 y2 = tab_76309[*py2++]; 85 *d2++ = clp[384+((y2 + c1)>>16)]; 86 *d2++ = clp[384+((y2 - c2 - c3)>>16)]; 87 *d2++ = clp[384+((y2 + c4)>>16)]; 88 } 89 d1 += 3*width; 90 d2 += 3*width; 91 py1+= width; 92 py2+= width; 93 } 94 } 95 96 97 98 //How to use: 99 //YUV_TO_RGB24(pY,width,pU,pV,width>>1,pRGBBuf,width,(int)0-height,width*3); 100 typedef UCHAR uint8_t; 101 typedef ULONGLONG uint64_t; 102 103 #define MAXIMUM_Y_WIDTH 800 104 static uint64_t mmw_mult_Y = 0x2568256825682568; 105 static uint64_t mmw_mult_U_G = 0xf36ef36ef36ef36e; 106 static uint64_t mmw_mult_U_B = 0x40cf40cf40cf40cf; 107 static uint64_t mmw_mult_V_R = 0x3343334333433343; 108 static uint64_t mmw_mult_V_G = 0xe5e2e5e2e5e2e5e2; 109 110 111 static uint64_t mmb_0x10 = 0x1010101010101010; 112 static uint64_t mmw_0x0080 = 0x0080008000800080; 113 static uint64_t mmw_0x00ff = 0x00ff00ff00ff00ff; 114 115 static uint64_t mmw_cut_red = 0x7c007c007c007c00; 116 static uint64_t mmw_cut_green = 0x03e003e003e003e0; 117 static uint64_t mmw_cut_blue = 0x001f001f001f001f; 118 119 120 void YUV_TO_RGB24( uint8_t *puc_y, int stride_y, 121 uint8_t *puc_u, uint8_t *puc_v, int stride_uv, 122 uint8_t *puc_out, int width_y, int height_y,int stride_out) 123 { 124 int y, horiz_count; 125 uint8_t *puc_out_remembered; 126 //int stride_out = width_y * 3; 127 128 if (height_y < 0) { 129 //we are flipping our output upside-down 130 height_y = -height_y; 131 puc_y += (height_y - 1) * stride_y ; 132 puc_u += (height_y/2 - 1) * stride_uv; 133 puc_v += (height_y/2 - 1) * stride_uv; 134 stride_y = -stride_y; 135 stride_uv = -stride_uv; 136 } 137 138 horiz_count = -(width_y >> 3); 139 140 for (y=0; y<height_y; y++) { 141 if (y == height_y-1) { 142 //this is the last output line - we need to be careful not to overrun the end of this line 143 uint8_t temp_buff[3*MAXIMUM_Y_WIDTH+1]; 144 puc_out_remembered = puc_out; 145 puc_out = temp_buff; //write the RGB to a temporary store 146 } 147 _asm { 148 push eax 149 push ebx 150 push ecx 151 push edx 152 push edi 153 154 mov eax, puc_out 155 mov ebx, puc_y 156 mov ecx, puc_u 157 mov edx, puc_v 158 mov edi, horiz_count 159 160 horiz_loop: 161 162 movd mm2, [ecx] 163 pxor mm7, mm7 164 165 movd mm3, [edx] 166 punpcklbw mm2, mm7 167 168 movq mm0, [ebx] 169 punpcklbw mm3, mm7 170 171 movq mm1, mmw_0x00ff 172 173 psubusb mm0, mmb_0x10 174 175 psubw mm2, mmw_0x0080 176 pand mm1, mm0 177 178 psubw mm3, mmw_0x0080 179 psllw mm1, 3 180 181 psrlw mm0, 8 182 psllw mm2, 3 183 184 pmulhw mm1, mmw_mult_Y 185 psllw mm0, 3 186 187 psllw mm3, 3 188 movq mm5, mm3 189 190 pmulhw mm5, mmw_mult_V_R 191 movq mm4, mm2 192 193 pmulhw mm0, mmw_mult_Y 194 movq mm7, mm1 195 196 pmulhw mm2, mmw_mult_U_G 197 paddsw mm7, mm5 198 199 pmulhw mm3, mmw_mult_V_G 200 packuswb mm7, mm7 201 202 pmulhw mm4, mmw_mult_U_B 203 paddsw mm5, mm0 204 205 packuswb mm5, mm5 206 paddsw mm2, mm3 207 208 movq mm3, mm1 209 movq mm6, mm1 210 211 paddsw mm3, mm4 212 paddsw mm6, mm2 213 214 punpcklbw mm7, mm5 215 paddsw mm2, mm0 216 217 packuswb mm6, mm6 218 packuswb mm2, mm2 219 220 packuswb mm3, mm3 221 paddsw mm4, mm0 222 223 packuswb mm4, mm4 224 punpcklbw mm6, mm2 225 226 punpcklbw mm3, mm4 227 228 // 32-bit shuffle. 229 pxor mm0, mm0 230 231 movq mm1, mm6 232 punpcklbw mm1, mm0 233 234 movq mm0, mm3 235 punpcklbw mm0, mm7 236 237 movq mm2, mm0 238 239 punpcklbw mm0, mm1 240 punpckhbw mm2, mm1 241 242 // 24-bit shuffle and sav 243 movd [eax], mm0 244 psrlq mm0, 32 245 246 movd 3[eax], mm0 247 248 movd 6[eax], mm2 249 250 251 psrlq mm2, 32 252 253 movd 9[eax], mm2 254 255 // 32-bit shuffle. 256 pxor mm0, mm0 257 258 movq mm1, mm6 259 punpckhbw mm1, mm0 260 261 movq mm0, mm3 262 punpckhbw mm0, mm7 263 264 movq mm2, mm0 265 266 punpcklbw mm0, mm1 267 punpckhbw mm2, mm1 268 269 // 24-bit shuffle and sav 270 movd 12[eax], mm0 271 psrlq mm0, 32 272 273 movd 15[eax], mm0 274 add ebx, 8 275 276 movd 18[eax], mm2 277 psrlq mm2, 32 278 279 add ecx, 4 280 add edx, 4 281 282 movd 21[eax], mm2 283 add eax, 24 284 285 inc edi 286 jne horiz_loop 287 288 pop edi 289 pop edx 290 pop ecx 291 pop ebx 292 pop eax 293 294 emms 295 } 296 297 298 if (y == height_y-1) { 299 //last line of output - we have used the temp_buff and need to copy 300 int x = 3 * width_y; //interation counter 301 uint8_t *ps = puc_out; // source pointer (temporary line store) 302 uint8_t *pd = puc_out_remembered; // dest pointer 303 while (x--) *(pd++) = *(ps++); // copy the line 304 } 305 306 puc_y += stride_y; 307 if (y%2) { 308 puc_u += stride_uv; 309 puc_v += stride_uv; 310 } 311 puc_out += stride_out; 312 } 313 } 314
|