1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
--- libmpeg2-0.5.1/libmpeg2/idct_mmx.c.orig 2011-05-12 17:30:42.028201708 -0300
+++ libmpeg2-0.5.1/libmpeg2/idct_mmx.c 2011-05-12 17:35:50.084201548 -0300
@@ -39,6 +39,11 @@
#define rounder(bias) {round (bias), round (bias)}
#define rounder_sse2(bias) {round (bias), round (bias), round (bias), round (bias)}
+#if defined(__GNUC__)
+# define ATTR_VECTOR(n) __attribute__((vector_size(n)))
+#else
+# define ATTR_VECTOR(n) /**/
+#endif
#if 0
/* C row IDCT - it is just here to document the MMXEXT and MMX versions */
@@ -464,10 +469,10 @@ static inline void sse2_idct_col (int16_
/* Almost identical to mmxext version: */
/* just do both 4x8 columns in paraller */
- static const short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1};
- static const short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2};
- static const short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3};
- static const short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4};
+ static const short t1_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T1,T1,T1,T1,T1,T1,T1,T1};
+ static const short t2_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T2,T2,T2,T2,T2,T2,T2,T2};
+ static const short t3_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {T3,T3,T3,T3,T3,T3,T3,T3};
+ static const short c4_vector[] ATTR_ALIGN(16) ATTR_VECTOR(16) = {C4,C4,C4,C4,C4,C4,C4,C4};
#if defined(__x86_64__)
@@ -710,10 +715,10 @@ static inline void sse2_idct_col (int16_
/* MMX column IDCT */
static inline void idct_col (int16_t * const col, const int offset)
{
- static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
- static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
- static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
- static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+ static const short t1_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T1,T1,T1,T1};
+ static const short t2_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T2,T2,T2,T2};
+ static const short t3_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {T3,T3,T3,T3};
+ static const short c4_vector[] ATTR_ALIGN(8) ATTR_VECTOR(16) = {C4,C4,C4,C4};
/* column code adapted from peter gubanov */
/* http://www.elecard.com/peter/idct.shtml */
@@ -847,33 +852,33 @@ static inline void idct_col (int16_t * c
}
-static const int32_t rounder0[] ATTR_ALIGN(8) =
+static const int32_t rounder0[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder ((1 << (COL_SHIFT - 1)) - 0.5);
-static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
-static const int32_t rounder1[] ATTR_ALIGN(8) =
+static const int32_t rounder4[] ATTR_ALIGN(8) ATTR_VECTOR(16) = rounder (0);
+static const int32_t rounder1[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
-static const int32_t rounder7[] ATTR_ALIGN(8) =
+static const int32_t rounder7[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
-static const int32_t rounder2[] ATTR_ALIGN(8) =
+static const int32_t rounder2[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (0.60355339059); /* C2 * (C6+C2)/2 */
-static const int32_t rounder6[] ATTR_ALIGN(8) =
+static const int32_t rounder6[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (-0.25); /* C2 * (C6-C2)/2 */
-static const int32_t rounder3[] ATTR_ALIGN(8) =
+static const int32_t rounder3[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
-static const int32_t rounder5[] ATTR_ALIGN(8) =
+static const int32_t rounder5[] ATTR_ALIGN(8) ATTR_VECTOR(16) =
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
static inline void idct (int16_t * const block) \
{ \
- static const int16_t table04[] ATTR_ALIGN(16) = \
+ static const int16_t table04[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
- static const int16_t table17[] ATTR_ALIGN(16) = \
+ static const int16_t table17[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
- static const int16_t table26[] ATTR_ALIGN(16) = \
+ static const int16_t table26[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
- static const int16_t table35[] ATTR_ALIGN(16) = \
+ static const int16_t table35[] ATTR_ALIGN(16) ATTR_VECTOR(64) = \
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
\
idct_row_head (block, 0*8, table04); \
@@ -900,29 +905,30 @@ static inline void idct (int16_t * const
static inline void sse2_idct (int16_t * const block)
{
- static const int16_t table04[] ATTR_ALIGN(16) =
+ static const int16_t table04[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520);
- static const int16_t table17[] ATTR_ALIGN(16) =
+ static const int16_t table17[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270);
- static const int16_t table26[] ATTR_ALIGN(16) =
+ static const int16_t table26[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906);
- static const int16_t table35[] ATTR_ALIGN(16) =
+ static const int16_t table35[] ATTR_ALIGN(16) ATTR_VECTOR(64) =
sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315);
- static const int32_t rounder0_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder0_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5);
- static const int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0);
- static const int32_t rounder1_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder4_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
+ rounder_sse2 (0);
+ static const int32_t rounder1_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
- static const int32_t rounder7_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder7_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */
- static const int32_t rounder2_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder2_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */
- static const int32_t rounder6_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder6_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */
- static const int32_t rounder3_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder3_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
- static const int32_t rounder5_128[] ATTR_ALIGN(16) =
+ static const int32_t rounder5_128[] ATTR_ALIGN(16) ATTR_VECTOR(16) =
rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
#if defined(__x86_64__)
|