; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vp8_filter_block2d_bil_first_pass_armv6| EXPORT |vp8_filter_block2d_bil_second_pass_armv6| AREA |.text|, CODE, READONLY ; name this block of code ;------------------------------------- ; r0 unsigned char *src_ptr, ; r1 unsigned short *dst_ptr, ; r2 unsigned int src_pitch, ; r3 unsigned int height, ; stack unsigned int width, ; stack const short *vp8_filter ;------------------------------------- ; The output is transposed stroed in output array to make it easy for second pass filtering. |vp8_filter_block2d_bil_first_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address ldr r4, [sp, #36] ; width mov r12, r3 ; outer-loop counter add r7, r2, r4 ; preload next row pld [r0, r7] sub r2, r2, r4 ; src increment for height loop ldr r5, [r11] ; load up filter coefficients mov r3, r3, lsl #1 ; height*2 add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1) mov r11, r1 ; save dst_ptr for each row cmp r5, #128 ; if filter coef = 128, then skip the filter beq bil_null_1st_filter |bil_height_loop_1st_v6| ldrb r6, [r0] ; load source data ldrb r7, [r0, #1] ldrb r8, [r0, #2] mov lr, r4, lsr #2 ; 4-in-parellel loop counter |bil_width_loop_1st_v6| ldrb r9, [r0, #3] ldrb r10, [r0, #4] pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0] pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1] smuad r6, r6, r5 ; apply the filter pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2] smuad r7, r7, r5 pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3] smuad r8, r8, r5 smuad r9, r9, r5 add r0, r0, #4 subs lr, lr, #1 add r6, r6, #0x40 ; round_shift_and_clamp add r7, r7, #0x40 usat r6, #16, r6, asr #7 usat r7, #16, r7, asr #7 strh r6, [r1], r3 ; result is transposed and stored add r8, r8, #0x40 ; round_shift_and_clamp strh r7, [r1], r3 add r9, r9, #0x40 usat r8, #16, r8, asr #7 usat r9, #16, r9, asr #7 strh r8, [r1], r3 ; result is transposed and stored ldrneb r6, [r0] ; load source data strh r9, [r1], r3 ldrneb r7, [r0, #1] ldrneb r8, [r0, #2] bne bil_width_loop_1st_v6 add r0, r0, r2 ; move to next input row subs r12, r12, #1 add r9, r2, r4, lsl #1 ; adding back block width pld [r0, r9] ; preload next row add r11, r11, #2 ; move over to next column mov r1, r11 bne bil_height_loop_1st_v6 ldmia sp!, {r4 - r11, pc} |bil_null_1st_filter| |bil_height_loop_null_1st| mov lr, r4, lsr #2 ; loop counter |bil_width_loop_null_1st| ldrb r6, [r0] ; load data ldrb r7, [r0, #1] ldrb r8, [r0, #2] ldrb r9, [r0, #3] strh r6, [r1], r3 ; store it to immediate buffer add r0, r0, #4 strh r7, [r1], r3 subs lr, lr, #1 strh r8, [r1], r3 strh r9, [r1], r3 bne bil_width_loop_null_1st subs r12, r12, #1 add r0, r0, r2 ; move to next input line add r11, r11, #2 ; move over to next column mov r1, r11 bne bil_height_loop_null_1st ldmia sp!, {r4 - r11, pc} ENDP ; |vp8_filter_block2d_bil_first_pass_armv6| ;--------------------------------- ; r0 unsigned short *src_ptr, ; r1 unsigned char *dst_ptr, ; r2 int dst_pitch, ; r3 unsigned int height, ; stack unsigned int width, ; stack const short *vp8_filter ;--------------------------------- |vp8_filter_block2d_bil_second_pass_armv6| PROC stmdb sp!, {r4 - r11, lr} ldr r11, [sp, #40] ; vp8_filter address ldr r4, [sp, #36] ; width ldr r5, [r11] ; load up filter coefficients mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix mov r11, r1 cmp r5, #128 ; if filter coef = 128, then skip the filter beq bil_null_2nd_filter |bil_height_loop_2nd| ldr r6, [r0] ; load the data ldr r8, [r0, #4] ldrh r10, [r0, #8] mov lr, r3, lsr #2 ; loop counter |bil_width_loop_2nd| pkhtb r7, r6, r8 ; src[1] | src[2] pkhtb r9, r8, r10 ; src[3] | src[4] smuad r6, r6, r5 ; apply filter smuad r8, r8, r5 ; apply filter subs lr, lr, #1 smuadx r7, r7, r5 ; apply filter smuadx r9, r9, r5 ; apply filter add r0, r0, #8 add r6, r6, #0x40 ; round_shift_and_clamp add r7, r7, #0x40 usat r6, #8, r6, asr #7 usat r7, #8, r7, asr #7 strb r6, [r1], r2 ; the result is transposed back and stored add r8, r8, #0x40 ; round_shift_and_clamp strb r7, [r1], r2 add r9, r9, #0x40 usat r8, #8, r8, asr #7 usat r9, #8, r9, asr #7 strb r8, [r1], r2 ; the result is transposed back and stored ldrne r6, [r0] ; load data strb r9, [r1], r2 ldrne r8, [r0, #4] ldrneh r10, [r0, #8] bne bil_width_loop_2nd subs r12, r12, #1 add r0, r0, #4 ; update src for next row add r11, r11, #1 mov r1, r11 bne bil_height_loop_2nd ldmia sp!, {r4 - r11, pc} |bil_null_2nd_filter| |bil_height_loop_null_2nd| mov lr, r3, lsr #2 |bil_width_loop_null_2nd| ldr r6, [r0], #4 ; load data subs lr, lr, #1 ldr r8, [r0], #4 strb r6, [r1], r2 ; store data mov r7, r6, lsr #16 strb r7, [r1], r2 mov r9, r8, lsr #16 strb r8, [r1], r2 strb r9, [r1], r2 bne bil_width_loop_null_2nd subs r12, r12, #1 add r0, r0, #4 add r11, r11, #1 mov r1, r11 bne bil_height_loop_null_2nd ldmia sp!, {r4 - r11, pc} ENDP ; |vp8_filter_block2d_second_pass_armv6| END