1 | /* ===-- udivmoddi6.c - Implement __udivmoddi4 -----------------------------===
|
2 | *
|
3 | * The LLVM Compiler Infrastructure
|
4 | *
|
5 | * This file is dual licensed under the MIT and the University of Illinois Open
|
6 | * Source Licenses. See LICENSE.TXT for details.
|
7 | *
|
8 | * ===----------------------------------------------------------------------===
|
9 | *
|
10 | * This file implements __udivmoddi4 for the compiler_rt library.
|
11 | *
|
12 | * ===----------------------------------------------------------------------===
|
13 | */
|
14 |
|
15 | #include "int_lib.h"
|
16 |
|
17 | /* Effects: if rem != 0, *rem = a % b
|
18 | * Returns: a / b
|
19 | */
|
20 |
|
21 | /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
|
22 |
|
23 | COMPILER_RT_ABI du_int
|
24 | __udivmoddi4(du_int a, du_int b, du_int* rem)
|
25 | {
|
26 | const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
|
27 | const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
|
28 | udwords n;
|
29 | n.all = a;
|
30 | udwords d;
|
31 | d.all = b;
|
32 | udwords q;
|
33 | udwords r;
|
34 | unsigned sr;
|
35 | /* special cases, X is unknown, K != 0 */
|
36 | if (n.s.high == 0)
|
37 | {
|
38 | if (d.s.high == 0)
|
39 | {
|
40 | /* 0 X
|
41 | * ---
|
42 | * 0 X
|
43 | */
|
44 | if (rem)
|
45 | *rem = n.s.low % d.s.low;
|
46 | return n.s.low / d.s.low;
|
47 | }
|
48 | /* 0 X
|
49 | * ---
|
50 | * K X
|
51 | */
|
52 | if (rem)
|
53 | *rem = n.s.low;
|
54 | return 0;
|
55 | }
|
56 | /* n.s.high != 0 */
|
57 | if (d.s.low == 0)
|
58 | {
|
59 | if (d.s.high == 0)
|
60 | {
|
61 | /* K X
|
62 | * ---
|
63 | * 0 0
|
64 | */
|
65 | if (rem)
|
66 | *rem = n.s.high % d.s.low;
|
67 | return n.s.high / d.s.low;
|
68 | }
|
69 | /* d.s.high != 0 */
|
70 | if (n.s.low == 0)
|
71 | {
|
72 | /* K 0
|
73 | * ---
|
74 | * K 0
|
75 | */
|
76 | if (rem)
|
77 | {
|
78 | r.s.high = n.s.high % d.s.high;
|
79 | r.s.low = 0;
|
80 | *rem = r.all;
|
81 | }
|
82 | return n.s.high / d.s.high;
|
83 | }
|
84 | /* K K
|
85 | * ---
|
86 | * K 0
|
87 | */
|
88 | if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */
|
89 | {
|
90 | if (rem)
|
91 | {
|
92 | r.s.low = n.s.low;
|
93 | r.s.high = n.s.high & (d.s.high - 1);
|
94 | *rem = r.all;
|
95 | }
|
96 | return n.s.high >> __builtin_ctz(d.s.high);
|
97 | }
|
98 | /* K K
|
99 | * ---
|
100 | * K 0
|
101 | */
|
102 | sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
|
103 | /* 0 <= sr <= n_uword_bits - 2 or sr large */
|
104 | if (sr > n_uword_bits - 2)
|
105 | {
|
106 | if (rem)
|
107 | *rem = n.all;
|
108 | return 0;
|
109 | }
|
110 | ++sr;
|
111 | /* 1 <= sr <= n_uword_bits - 1 */
|
112 | /* q.all = n.all << (n_udword_bits - sr); */
|
113 | q.s.low = 0;
|
114 | q.s.high = n.s.low << (n_uword_bits - sr);
|
115 | /* r.all = n.all >> sr; */
|
116 | r.s.high = n.s.high >> sr;
|
117 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
|
118 | }
|
119 | else /* d.s.low != 0 */
|
120 | {
|
121 | if (d.s.high == 0)
|
122 | {
|
123 | /* K X
|
124 | * ---
|
125 | * 0 K
|
126 | */
|
127 | if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */
|
128 | {
|
129 | if (rem)
|
130 | *rem = n.s.low & (d.s.low - 1);
|
131 | if (d.s.low == 1)
|
132 | return n.all;
|
133 | sr = __builtin_ctz(d.s.low);
|
134 | q.s.high = n.s.high >> sr;
|
135 | q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
|
136 | return q.all;
|
137 | }
|
138 | /* K X
|
139 | * ---
|
140 | * 0 K
|
141 | */
|
142 | sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
|
143 | /* 2 <= sr <= n_udword_bits - 1
|
144 | * q.all = n.all << (n_udword_bits - sr);
|
145 | * r.all = n.all >> sr;
|
146 | */
|
147 | if (sr == n_uword_bits)
|
148 | {
|
149 | q.s.low = 0;
|
150 | q.s.high = n.s.low;
|
151 | r.s.high = 0;
|
152 | r.s.low = n.s.high;
|
153 | }
|
154 | else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1
|
155 | {
|
156 | q.s.low = 0;
|
157 | q.s.high = n.s.low << (n_uword_bits - sr);
|
158 | r.s.high = n.s.high >> sr;
|
159 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
|
160 | }
|
161 | else // n_uword_bits + 1 <= sr <= n_udword_bits - 1
|
162 | {
|
163 | q.s.low = n.s.low << (n_udword_bits - sr);
|
164 | q.s.high = (n.s.high << (n_udword_bits - sr)) |
|
165 | (n.s.low >> (sr - n_uword_bits));
|
166 | r.s.high = 0;
|
167 | r.s.low = n.s.high >> (sr - n_uword_bits);
|
168 | }
|
169 | }
|
170 | else
|
171 | {
|
172 | /* K X
|
173 | * ---
|
174 | * K K
|
175 | */
|
176 | sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
|
177 | /* 0 <= sr <= n_uword_bits - 1 or sr large */
|
178 | if (sr > n_uword_bits - 1)
|
179 | {
|
180 | if (rem)
|
181 | *rem = n.all;
|
182 | return 0;
|
183 | }
|
184 | ++sr;
|
185 | /* 1 <= sr <= n_uword_bits */
|
186 | /* q.all = n.all << (n_udword_bits - sr); */
|
187 | q.s.low = 0;
|
188 | if (sr == n_uword_bits)
|
189 | {
|
190 | q.s.high = n.s.low;
|
191 | r.s.high = 0;
|
192 | r.s.low = n.s.high;
|
193 | }
|
194 | else
|
195 | {
|
196 | q.s.high = n.s.low << (n_uword_bits - sr);
|
197 | r.s.high = n.s.high >> sr;
|
198 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
|
199 | }
|
200 | }
|
201 | }
|
202 | /* Not a special case
|
203 | * q and r are initialized with:
|
204 | * q.all = n.all << (n_udword_bits - sr);
|
205 | * r.all = n.all >> sr;
|
206 | * 1 <= sr <= n_udword_bits - 1
|
207 | */
|
208 | su_int carry = 0;
|
209 | for (; sr > 0; --sr)
|
210 | {
|
211 | /* r:q = ((r:q) << 1) | carry */
|
212 | r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
|
213 | r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
|
214 | q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
|
215 | q.s.low = (q.s.low << 1) | carry;
|
216 | /* carry = 0;
|
217 | * if (r.all >= d.all)
|
218 | * {
|
219 | * r.all -= d.all;
|
220 | * carry = 1;
|
221 | * }
|
222 | */
|
223 | const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
|
224 | carry = s & 1;
|
225 | r.all -= d.all & s;
|
226 | }
|
227 | q.all = (q.all << 1) | carry;
|
228 | if (rem)
|
229 | *rem = r.all;
|
230 | return q.all;
|
231 | }
|