Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_physics.neon.c
1/*
2 * Copyright 2014-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : physics/NE10_physics.c
30 */
31
32#include "NE10_types.h"
33
34#ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
35extern void ne10_physics_compute_aabb_vertex4_vec2f_neon (ne10_mat2x2f_t *aabb,
36 ne10_vec2f_t *vertices,
37 ne10_mat2x2f_t *xf,
38 ne10_vec2f_t *radius,
39 ne10_uint32_t vertex_count)
40asm ("ne10_physics_compute_aabb_vertex4_vec2f_neon");
41#endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
42
43static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v)
44{
45 ne10_vec2f_t tmp;
46 ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1;
47 ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2;
48 tmp.x = x;
49 tmp.y = y;
50 return tmp;
51}
52
53static inline ne10_float32_t min (float a, ne10_float32_t b)
54{
55 return a < b ? a : b;
56}
57
58static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b)
59{
60 ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) };
61 return tmp;
62}
63
64static inline ne10_float32_t max (float a, ne10_float32_t b)
65{
66 return a > b ? a : b;
67}
68
69static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b)
70{
71 ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) };
72 return tmp;
73}
74
86#ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
87void ne10_physics_compute_aabb_vec2f_neon (ne10_mat2x2f_t *aabb,
88 ne10_vec2f_t *vertices,
89 ne10_mat2x2f_t *xf,
90 ne10_vec2f_t *radius,
91 ne10_uint32_t vertex_count)
92{
93 ne10_int32_t residual_loops = (vertex_count & 0x3);
94 ne10_int32_t main_loops = vertex_count - residual_loops;
95
96 if (main_loops > 0)
97 {
98 ne10_physics_compute_aabb_vertex4_vec2f_neon (aabb, vertices, xf, radius, main_loops);
99 }
100
101 if (residual_loops > 0)
102 {
103 ne10_vec2f_t lower;
104 ne10_vec2f_t upper;
105 ne10_vec2f_t lower2;
106 ne10_vec2f_t upper2;
107 ne10_vec2f_t v;
108 ne10_int32_t i;
109
110 if (main_loops == 0)
111 {
112 lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
113 upper = lower;
114 }
115 else
116 {
117 lower2.x = aabb->c1.r1 + radius->x;
118 lower2.y = aabb->c1.r2 + radius->y;
119 upper2.x = aabb->c2.r1 - radius->x;
120 upper2.y = aabb->c2.r2 - radius->y;
121 lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
122 upper = lower;
123 lower = min_2f (lower, lower2);
124 upper = max_2f (upper, upper2);
125 }
126
127 for (i = main_loops + 1; i < vertex_count; ++i)
128 {
129 v = ne10_mul_matvec_float (*xf, vertices[i]);
130 lower = min_2f (lower, v);
131 upper = max_2f (upper, v);
132 }
133
134 aabb->c1.r1 = lower.x - radius->x;
135 aabb->c1.r2 = lower.y - radius->y;
136 aabb->c2.r1 = upper.x + radius->x;
137 aabb->c2.r2 = upper.y + radius->y;
138 }
139}
140#endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
a 2-tuple of ne10_float32_t values.
Definition NE10_types.h:88