/**************************************************************************
* *
* This code has been developed by John Funnell. This software is an *
* implementation of a part of one or more MPEG-4 Video tools as *
* specified in ISO/IEC 14496-2 standard. Those intending to use this *
* software module in hardware or software products are advised that its *
* use may infringe existing patents or copyrights, and any such use *
* would be at such party's own risk. The original developer of this *
* software module and his/her company, and subsequent editors and their *
* companies (including Project Mayo), will have no liability for use of *
* this software or modifications or derivatives thereof. *
* *
* Project Mayo gives users of the Codec a license to this software *
* module or modifications thereof for use in hardware or software *
* products claiming conformance to the MPEG-4 Video Standard as *
* described in the Open DivX license. *
* *
* The complete Open DivX license can be found at *
* http://www.projectmayo.com/opendivx/license.php *
* *
**************************************************************************/
/**
* Copyright (C) 2001 - Project Mayo
*
* John Funnell
* Andrea Graziani
*
* DivX Advanced Research Center <darc@projectmayo.com>
*
**/
// yuv2rgb_mmx.c //
/*
MMX version of colourspace conversion
13 Feb 2001 - John Funnell, added -height feature to invert output, fixed 24-bit overwrite
TODO:
1. Extract common part of from 32, 24 and 16 bit conversions into inline func
to avoid duplication of the core MMX code.
2. Experiment with using prefetch instructions to improve performance
Using matrix of SMPTE 170M
This is what we're doing:
Step 1.
Y -= 16
U -= 128
V -= 128
Step 2.
Y /= 219
U /= 224
V /= 224
Step 3.
now we want the inverse of this matrix:
/ 0.299 0.114 0.587 \
| -0.169 0.500 -0.331 |
\ 0.500 -0.081 -0.419 /
which is, approximately:
/ 2568 0 3343 \
| 2568 f36e e5e2 | / 65536 * 8
\ 2568 40cf 0 /
including the multiplies in Step 2
*/
#include <memory.h> // for memset() & memcpy()
#include "portab.h"
#include "yuv2rgb.h"
/**
*
**/
#define MAXIMUM_Y_WIDTH 800
#define _USE_PREFETCH
/* static constants */
/* colourspace conversion matrix values */
static uint64_t mmw_mult_Y = 0x2568256825682568;
static uint64_t mmw_mult_U_G = 0xf36ef36ef36ef36e;
static uint64_t mmw_mult_U_B = 0x40cf40cf40cf40cf;
static uint64_t mmw_mult_V_R = 0x3343334333433343;
static uint64_t mmw_mult_V_G = 0xe5e2e5e2e5e2e5e2;
/* various masks and other constants */
static uint64_t mmb_0x10 = 0x1010101010101010;
static uint64_t mmw_0x0080 = 0x0080008000800080;
static uint64_t mmw_0x00ff = 0x00ff00ff00ff00ff;
static uint64_t mmw_cut_red = 0x7c007c007c007c00;
static uint64_t mmw_cut_green = 0x03e003e003e003e0;
static uint64_t mmw_cut_blue = 0x001f001f001f001f;
/**** YUV -> RGB conversion, 32-bit output ****/
/* if height_y is negative then the output image will be inverted */
/*
note: _stride_out parameter is ignored in yuv to rgb conversion
it's assumed that stride_out = 4 * width_y for the 32 bit color bitmap
*/
void yuv2rgb_32(uint8_t *puc_y, int stride_y,
uint8_t *puc_u, uint8_t *puc_v, int stride_uv,
uint8_t *puc_out, int width_y, int height_y,
unsigned int _stride_out) {
int y, horiz_count;
int stride_out = width_y * 4;
if (height_y < 0) {
/* we are flipping our output upside-down */
height_y = -height_y;
puc_y += (height_y - 1) * stride_y ;
puc_u += (height_y/2 - 1) * stride_uv;
puc_v += (height_y/2 - 1) * stride_uv;
stride_y = -stride_y;
stride_uv = -stride_uv;
}
horiz_count = -(width_y >> 3);
for (y=0; y<height_y; y++) {
_asm {
push eax
push ebx
push ecx
push edx
push edi
mov eax, puc_out
mov ebx, puc_y
mov ecx, puc_u
mov edx, puc_v
mov edi, horiz_count
horiz_loop:
movd mm2, [ecx]
pxor mm7, mm7
movd mm3, [edx]
punpcklbw mm2, mm7 ; mm2 = __u3__u2__u1__u0
movq mm0, [ebx] ; mm0 = y7y6y5y4y3y2y1y0
punpcklbw mm3, mm7 ; mm3 = __v3__v2__v1__v0
movq mm1, mmw_0x00ff ; mm1 = 00ff00ff00ff00ff
psubusb mm0, mmb_0x10 ; mm0 -= 16
psubw mm2, mmw_0x0080 ; mm2 -= 128
pand mm1, mm0 ; mm1 = __y6__y4__y2__y0
psubw mm3, mmw_0x0080 ; mm3 -= 128
psllw mm1, 3 ; mm1 *= 8
psrlw mm0, 8 ; mm0 = __y7__y5__y3__y1
psllw mm2, 3 ; mm2 *= 8
pmulhw mm1, mmw_mult_Y ; mm1 *= luma coeff
psllw mm0, 3 ; mm0 *= 8
psllw mm3, 3 ; mm3 *= 8
movq mm5, mm3 ; mm5 = mm3 = v
pmulhw mm5, mmw_mult_V_R ; mm5 = red chroma
movq mm4, mm2 ; mm4 = mm2 = u
pmulhw mm0, mmw_mult_Y ; mm0 *= luma coeff
movq mm7, mm1 ; even luma part
pmulhw mm2, mmw_mult_U_G ; mm2 *= u green coeff
paddsw mm7, mm5 ; mm7 = luma + chroma __r6__r4__r2__r0
pmulhw mm3, mmw_mult_V_G ; mm3 *= v green coeff
packuswb mm7, mm7 ; mm7 = r6r4r2r0r6r4r2r0
pmulhw mm4, mmw_mult_U_B ; mm4 = blue chroma
paddsw mm5, mm0 ; mm5 = luma + chroma __r7__r5__r3__r1
packuswb mm5, mm5 ; mm6 = r7r5r3r1r7r5r3r1
paddsw mm2, mm3 ; mm2 = green chroma
movq mm3, mm1 ; mm3 = __y6__y4__y2__y0
movq mm6, mm1 ; mm6 = __y6__y4__y2__y0
paddsw mm3, mm4 ; mm3 = luma + chroma __b6__b4__b2__b0
paddsw mm6, mm2 ; mm6 = luma + chroma __g6__g4__g2__g0
punpcklbw mm7, mm5 ; mm7 = r7r6r5r4r3r2r1r0
paddsw mm2, mm0 ; odd luma part plus chroma part __g7__g5__g3__g1
packuswb mm6, mm6 ; mm2 = g6g4g2g0g6g4g2g0
packuswb mm2, mm2 ; mm2 = g7g5g3g1g7g5g3g1
packuswb mm3, mm3 ; mm3 = b6b4b2b0b6b4b2b0
paddsw mm4, mm0 ; odd luma part plus chroma part __b7__b5__b3__b1
packuswb mm4, mm4 ; mm4 = b7b5b3b1b7b5b3b1
punpcklbw mm6, mm2 ; mm6 = g7g6g5g4g3g2g1g0
punpcklbw mm3, mm4 ; mm3 = b7b6b5b4b3b2b1b0
/* 32-bit shuffle.... */
pxor mm0, mm0 ; is this needed?
movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
punpcklbw mm1, mm0 ; mm1 = __g3__g2__g1__g0
movq mm0, mm3 ; mm0 = b7b6b5b4b3b2b1b0
punpcklbw mm0, mm7 ; mm0 = r3b3r2b2r1b1r0b0
movq mm2, mm0 ; mm2 = r3b3r2b2r1b1r0b0
punpcklbw mm0, mm1 ; mm0 = __r1g1b1__r0g0b0
punpckhbw mm2, mm1 ; mm2 = __r3g3b3__r2g2b2
/* 32-bit save... */
movq [eax], mm0 ; eax[0] = __r1g1b1__r0g0b0
movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
movq 8[eax], mm2 ; eax[8] = __r3g3b3__r2g2b2
/* 32-bit shuffle.... */
pxor mm0, mm0
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
开源项目OpenDivX源代码包 (103个子文件)
yuv2rgb_mmx.c 28KB
basic_prediction_mmx.c 25KB
postprocess.c 23KB
deblock_vert_lpf9.c 22KB
mp4_tables.c 18KB
IDCT_MMX.C 18KB
basic_prediction_mmx.c 15KB
DECORE.C 13KB
deblock_vert_default_filter.c 13KB
Yuv2rgb.c 13KB
deblock_horiz_lpf9.c 12KB
mp4_picture.c 12KB
transferIDCT_mmx.c 11KB
mp4_predict.c 11KB
mp4_block.c 11KB
mp4_header.c 10KB
mp4_mblock.c 10KB
MP4_VLD.C 10KB
deblock_vert_useDC.c 9KB
basic_prediction.c 7KB
deblock_horiz_useDC.c 7KB
IDCT.C 7KB
deblock_vert_choose_p1p2.c 6KB
mp4_decoder.c 6KB
deblock_vert.c 6KB
test_basic_prediction.c 6KB
mp4_recon.c 5KB
postprocess.c 5KB
deblock_vert_copy_and_unpack.c 4KB
deblock_horiz.c 4KB
deblock_horiz_default_filter.c 4KB
deblock_vert_DC_on.c 4KB
fast_copy.c 4KB
GEN_DRAW.C 3KB
DEBUG.C 3KB
STORE.C 3KB
GETBITS.C 3KB
transferIDCT.c 3KB
mp4_iquant.c 3KB
deblock_horiz_DC_on.c 2KB
clearblock.c 2KB
MP4_VARS.C 2KB
clearblock_mmx.c 2KB
DO_EMMS.C 2KB
gen_usetime.c 590B
DECORE.DSP 18KB
CONSOLE.DSP 7KB
test_basic_prediction.dsp 4KB
DECORE.DSW 728B
test_basic_prediction.dsw 566B
Entries 2KB
Entries 957B
Entries 273B
Entries 145B
Entries 127B
Entries 60B
Entries 59B
Entries 36B
Entries 13B
MP4_VLD.H 9KB
mp4_mblock.h 7KB
MP4_VARS.H 5KB
postprocess_mmx.h 5KB
GETBITS.H 5KB
DECORE.H 4KB
mp4_header.h 4KB
postprocess.h 4KB
postprocess.h 4KB
YUV2RGB.H 3KB
TIMER.H 3KB
GLOBAL.H 3KB
basic_prediction.h 3KB
DEBUG.H 2KB
PORTAB.H 2KB
mp4_predict.h 2KB
mp4_decoder.h 2KB
transferIDCT.h 2KB
mp4_iquant.h 2KB
STORE.H 2KB
clearblock.h 2KB
mp4_block.h 2KB
GEN_DRAW.H 250B
gen_usetime.h 215B
IDCT.H 145B
Console.plg 3KB
Repository 43B
Repository 34B
Repository 31B
Repository 29B
Repository 29B
Repository 23B
Repository 22B
Repository 21B
Repository 17B
Root 51B
Root 51B
Root 51B
Root 51B
Root 51B
Root 51B
共 103 条
- 1
- 2
资源评论
- tufaqing2015-01-09不错,代码可以用
- lyuoong082016-03-14还好,没测试过!!
普通网友
- 粉丝: 882
- 资源: 2万+
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- mongodb 数据库基本操作.docx
- x265-3.2.1-vc15-86-64-Release
- mongodb 数据库基本操作.docx
- 使用Python实现的基于遗传算法+梯度下降算法解决鸢尾花分类问题.zip
- ME2307-VB一款P-Channel沟道SOT23的MOSFET晶体管参数介绍与应用说明
- main.m
- 古诗13-最落魄人日南山约应提邢懋之
- VMware-workstation-full-17.5.0-22583795.zip
- CAN CANWeb 现场总线 IO模块 Keil 源程序 开发板 单片机 总线为屏蔽双绞线并接或光纤串接通信
- python爱心代码高级.docx
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功