Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_mulmat.c
1 /*
2  * Copyright 2011-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : math/NE10_addmat.c
30  */
31 
32 #include "NE10_types.h"
33 #include "macros.h"
34 
35 #include <assert.h>
36 
37 ne10_result_t ne10_mulmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count)
38 {
39 #define A1 src1[ itr ].c1.r1
40 #define A2 src2[ itr ].c1.r1
41 #define B1 src1[ itr ].c1.r2
42 #define B2 src2[ itr ].c1.r2
43 #define C1 src1[ itr ].c2.r1
44 #define C2 src2[ itr ].c2.r1
45 #define D1 src1[ itr ].c2.r2
46 #define D2 src2[ itr ].c2.r2
47 
48  NE10_X_OPERATION_FLOAT_C
49  (
50  dst[ itr ].c1.r1 = (A1 * A2) + (C1 * B2);
51  dst[ itr ].c1.r2 = (B1 * A2) + (D1 * B2);
52 
53  dst[ itr ].c2.r1 = (A1 * C2) + (C1 * D2);
54  dst[ itr ].c2.r2 = (B1 * C2) + (D1 * D2);
55  );
56 
57 #undef A1
58 #undef A2
59 #undef B1
60 #undef B2
61 #undef C1
62 #undef C2
63 #undef D1
64 #undef D2
65 }
66 
67 ne10_result_t ne10_mulmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count)
68 {
69 #define A1 src1[ itr ].c1.r1
70 #define A2 src2[ itr ].c1.r1
71 #define B1 src1[ itr ].c1.r2
72 #define B2 src2[ itr ].c1.r2
73 #define C1 src1[ itr ].c1.r3
74 #define C2 src2[ itr ].c1.r3
75 #define D1 src1[ itr ].c2.r1
76 #define D2 src2[ itr ].c2.r1
77 #define E1 src1[ itr ].c2.r2
78 #define E2 src2[ itr ].c2.r2
79 #define F1 src1[ itr ].c2.r3
80 #define F2 src2[ itr ].c2.r3
81 #define G1 src1[ itr ].c3.r1
82 #define G2 src2[ itr ].c3.r1
83 #define H1 src1[ itr ].c3.r2
84 #define H2 src2[ itr ].c3.r2
85 #define I1 src1[ itr ].c3.r3
86 #define I2 src2[ itr ].c3.r3
87 
88  NE10_X_OPERATION_FLOAT_C
89  (
90  dst[ itr ].c1.r1 = (A1 * A2) + (D1 * B2) + (G1 * C2);
91  dst[ itr ].c1.r2 = (B1 * A2) + (E1 * B2) + (H1 * C2);
92  dst[ itr ].c1.r3 = (C1 * A2) + (F1 * B2) + (I1 * C2);
93 
94  dst[ itr ].c2.r1 = (A1 * D2) + (D1 * E2) + (G1 * F2);
95  dst[ itr ].c2.r2 = (B1 * D2) + (E1 * E2) + (H1 * F2);
96  dst[ itr ].c2.r3 = (C1 * D2) + (F1 * E2) + (I1 * F2);
97 
98  dst[ itr ].c3.r1 = (A1 * G2) + (D1 * H2) + (G1 * I2);
99  dst[ itr ].c3.r2 = (B1 * G2) + (E1 * H2) + (H1 * I2);
100  dst[ itr ].c3.r3 = (C1 * G2) + (F1 * H2) + (I1 * I2);
101  );
102 
103 #undef A1
104 #undef A2
105 #undef B1
106 #undef B2
107 #undef C1
108 #undef C2
109 #undef D1
110 #undef D2
111 #undef E1
112 #undef E2
113 #undef F1
114 #undef F2
115 #undef G1
116 #undef G2
117 #undef H1
118 #undef H2
119 #undef I1
120 #undef I2
121 }
122 
123 ne10_result_t ne10_mulmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count)
124 {
125 #define A1 src1[ itr ].c1.r1
126 #define A2 src2[ itr ].c1.r1
127 #define B1 src1[ itr ].c1.r2
128 #define B2 src2[ itr ].c1.r2
129 #define C1 src1[ itr ].c1.r3
130 #define C2 src2[ itr ].c1.r3
131 #define D1 src1[ itr ].c1.r4
132 #define D2 src2[ itr ].c1.r4
133 
134 #define E1 src1[ itr ].c2.r1
135 #define E2 src2[ itr ].c2.r1
136 #define F1 src1[ itr ].c2.r2
137 #define F2 src2[ itr ].c2.r2
138 #define G1 src1[ itr ].c2.r3
139 #define G2 src2[ itr ].c2.r3
140 #define H1 src1[ itr ].c2.r4
141 #define H2 src2[ itr ].c2.r4
142 
143 #define I1 src1[ itr ].c3.r1
144 #define I2 src2[ itr ].c3.r1
145 #define J1 src1[ itr ].c3.r2
146 #define J2 src2[ itr ].c3.r2
147 #define K1 src1[ itr ].c3.r3
148 #define K2 src2[ itr ].c3.r3
149 #define L1 src1[ itr ].c3.r4
150 #define L2 src2[ itr ].c3.r4
151 
152 #define M1 src1[ itr ].c4.r1
153 #define M2 src2[ itr ].c4.r1
154 #define N1 src1[ itr ].c4.r2
155 #define N2 src2[ itr ].c4.r2
156 #define O1 src1[ itr ].c4.r3
157 #define O2 src2[ itr ].c4.r3
158 #define P1 src1[ itr ].c4.r4
159 #define P2 src2[ itr ].c4.r4
160 
161  NE10_X_OPERATION_FLOAT_C
162  (
163  dst[ itr ].c1.r1 = (A1 * A2) + (E1 * B2) + (I1 * C2) + (M1 * D2);
164  dst[ itr ].c1.r2 = (B1 * A2) + (F1 * B2) + (J1 * C2) + (N1 * D2);
165  dst[ itr ].c1.r3 = (C1 * A2) + (G1 * B2) + (K1 * C2) + (O1 * D2);
166  dst[ itr ].c1.r4 = (D1 * A2) + (H1 * B2) + (L1 * C2) + (P1 * D2);
167 
168  dst[ itr ].c2.r1 = (A1 * E2) + (E1 * F2) + (I1 * G2) + (M1 * H2);
169  dst[ itr ].c2.r2 = (B1 * E2) + (F1 * F2) + (J1 * G2) + (N1 * H2);
170  dst[ itr ].c2.r3 = (C1 * E2) + (G1 * F2) + (K1 * G2) + (O1 * H2);
171  dst[ itr ].c2.r4 = (D1 * E2) + (H1 * F2) + (L1 * G2) + (P1 * H2);
172 
173  dst[ itr ].c3.r1 = (A1 * I2) + (E1 * J2) + (I1 * K2) + (M1 * L2);
174  dst[ itr ].c3.r2 = (B1 * I2) + (F1 * J2) + (J1 * K2) + (N1 * L2);
175  dst[ itr ].c3.r3 = (C1 * I2) + (G1 * J2) + (K1 * K2) + (O1 * L2);
176  dst[ itr ].c3.r4 = (D1 * I2) + (H1 * J2) + (L1 * K2) + (P1 * L2);
177 
178  dst[ itr ].c4.r1 = (A1 * M2) + (E1 * N2) + (I1 * O2) + (M1 * P2);
179  dst[ itr ].c4.r2 = (B1 * M2) + (F1 * N2) + (J1 * O2) + (N1 * P2);
180  dst[ itr ].c4.r3 = (C1 * M2) + (G1 * N2) + (K1 * O2) + (O1 * P2);
181  dst[ itr ].c4.r4 = (D1 * M2) + (H1 * N2) + (L1 * O2) + (P1 * P2);
182  );
183 
184 #undef A1
185 #undef A2
186 #undef B1
187 #undef B2
188 #undef C1
189 #undef C2
190 #undef D1
191 #undef D2
192 #undef E1
193 #undef E2
194 #undef F1
195 #undef F2
196 #undef G1
197 #undef G2
198 #undef H1
199 #undef H2
200 #undef I1
201 #undef I2
202 #undef J1
203 #undef J2
204 #undef K1
205 #undef K2
206 #undef L1
207 #undef L2
208 #undef M1
209 #undef M2
210 #undef N1
211 #undef N2
212 #undef O1
213 #undef O2
214 #undef P1
215 #undef P2
216 }