1 | /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- |
---|
2 | **************************************************************************** |
---|
3 | * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge |
---|
4 | **************************************************************************** |
---|
5 | * |
---|
6 | * File: math.c |
---|
7 | * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) |
---|
8 | * Changes: |
---|
9 | * |
---|
10 | * Date: Aug 2003 |
---|
11 | * |
---|
12 | * Environment: Xen Minimal OS |
---|
13 | * Description: Library functions for 64bit arith and other |
---|
14 | * from freebsd, files in sys/libkern/ (qdivrem.c, etc) |
---|
15 | * |
---|
16 | **************************************************************************** |
---|
17 | * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ |
---|
18 | **************************************************************************** |
---|
19 | *- |
---|
20 | * Copyright (c) 1992, 1993 |
---|
21 | * The Regents of the University of California. All rights reserved. |
---|
22 | * |
---|
23 | * This software was developed by the Computer Systems Engineering group |
---|
24 | * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and |
---|
25 | * contributed to Berkeley. |
---|
26 | * |
---|
27 | * Redistribution and use in source and binary forms, with or without |
---|
28 | * modification, are permitted provided that the following conditions |
---|
29 | * are met: |
---|
30 | * 1. Redistributions of source code must retain the above copyright |
---|
31 | * notice, this list of conditions and the following disclaimer. |
---|
32 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
33 | * notice, this list of conditions and the following disclaimer in the |
---|
34 | * documentation and/or other materials provided with the distribution. |
---|
35 | * 3. All advertising materials mentioning features or use of this software |
---|
36 | * must display the following acknowledgement: |
---|
37 | * This product includes software developed by the University of |
---|
38 | * California, Berkeley and its contributors. |
---|
39 | * 4. Neither the name of the University nor the names of its contributors |
---|
40 | * may be used to endorse or promote products derived from this software |
---|
41 | * without specific prior written permission. |
---|
42 | * |
---|
43 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
46 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
53 | * SUCH DAMAGE. |
---|
54 | * |
---|
55 | * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $ |
---|
56 | */ |
---|
57 | |
---|
58 | #include <types.h> |
---|
59 | |
---|
60 | /* On ia64 these functions lead to crashes. These are replaced by |
---|
61 | * assembler functions. */ |
---|
62 | #if !defined(__ia64__) |
---|
63 | |
---|
64 | /* |
---|
65 | * Depending on the desired operation, we view a `long long' (aka quad_t) in |
---|
66 | * one or more of the following formats. |
---|
67 | */ |
---|
68 | union uu { |
---|
69 | s64 q; /* as a (signed) quad */ |
---|
70 | s64 uq; /* as an unsigned quad */ |
---|
71 | long sl[2]; /* as two signed longs */ |
---|
72 | unsigned long ul[2]; /* as two unsigned longs */ |
---|
73 | }; |
---|
74 | /* XXX RN: Yuck hardcoded endianess :) */ |
---|
75 | #define _QUAD_HIGHWORD 1 |
---|
76 | #define _QUAD_LOWWORD 0 |
---|
77 | /* |
---|
78 | * Define high and low longwords. |
---|
79 | */ |
---|
80 | #define H _QUAD_HIGHWORD |
---|
81 | #define L _QUAD_LOWWORD |
---|
82 | |
---|
83 | /* |
---|
84 | * Total number of bits in a quad_t and in the pieces that make it up. |
---|
85 | * These are used for shifting, and also below for halfword extraction |
---|
86 | * and assembly. |
---|
87 | */ |
---|
88 | #define CHAR_BIT 8 /* number of bits in a char */ |
---|
89 | #define QUAD_BITS (sizeof(s64) * CHAR_BIT) |
---|
90 | #define LONG_BITS (sizeof(long) * CHAR_BIT) |
---|
91 | #define HALF_BITS (sizeof(long) * CHAR_BIT / 2) |
---|
92 | |
---|
93 | /* |
---|
94 | * Extract high and low shortwords from longword, and move low shortword of |
---|
95 | * longword to upper half of long, i.e., produce the upper longword of |
---|
96 | * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.) |
---|
97 | * |
---|
98 | * These are used in the multiply code, to split a longword into upper |
---|
99 | * and lower halves, and to reassemble a product as a quad_t, shifted left |
---|
100 | * (sizeof(long)*CHAR_BIT/2). |
---|
101 | */ |
---|
102 | #define HHALF(x) ((x) >> HALF_BITS) |
---|
103 | #define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1)) |
---|
104 | #define LHUP(x) ((x) << HALF_BITS) |
---|
105 | |
---|
106 | /* |
---|
107 | * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), |
---|
108 | * section 4.3.1, pp. 257--259. |
---|
109 | */ |
---|
110 | #define B (1UL << HALF_BITS) /* digit base */ |
---|
111 | |
---|
112 | /* Combine two `digits' to make a single two-digit number. */ |
---|
113 | #define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b)) |
---|
114 | |
---|
115 | /* select a type for digits in base B: use unsigned short if they fit */ |
---|
116 | #if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff |
---|
117 | typedef unsigned short digit; |
---|
118 | #else |
---|
119 | typedef u_long digit; |
---|
120 | #endif |
---|
121 | |
---|
122 | |
---|
123 | /* |
---|
124 | * Shift p[0]..p[len] left `sh' bits, ignoring any bits that |
---|
125 | * `fall out' the left (there never will be any such anyway). |
---|
126 | * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. |
---|
127 | */ |
---|
128 | static void |
---|
129 | shl(register digit *p, register int len, register int sh) |
---|
130 | { |
---|
131 | register int i; |
---|
132 | |
---|
133 | for (i = 0; i < len; i++) |
---|
134 | p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); |
---|
135 | p[i] = LHALF(p[i] << sh); |
---|
136 | } |
---|
137 | |
---|
138 | /* |
---|
139 | * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. |
---|
140 | * |
---|
141 | * We do this in base 2-sup-HALF_BITS, so that all intermediate products |
---|
142 | * fit within u_long. As a consequence, the maximum length dividend and |
---|
143 | * divisor are 4 `digits' in this base (they are shorter if they have |
---|
144 | * leading zeros). |
---|
145 | */ |
---|
146 | u64 |
---|
147 | __qdivrem(u64 uq, u64 vq, u64 *arq) |
---|
148 | { |
---|
149 | union uu tmp; |
---|
150 | digit *u, *v, *q; |
---|
151 | register digit v1, v2; |
---|
152 | u_long qhat, rhat, t; |
---|
153 | int m, n, d, j, i; |
---|
154 | digit uspace[5], vspace[5], qspace[5]; |
---|
155 | |
---|
156 | /* |
---|
157 | * Take care of special cases: divide by zero, and u < v. |
---|
158 | */ |
---|
159 | if (vq == 0) { |
---|
160 | /* divide by zero. */ |
---|
161 | static volatile const unsigned int zero = 0; |
---|
162 | |
---|
163 | tmp.ul[H] = tmp.ul[L] = 1 / zero; |
---|
164 | if (arq) |
---|
165 | *arq = uq; |
---|
166 | return (tmp.q); |
---|
167 | } |
---|
168 | if (uq < vq) { |
---|
169 | if (arq) |
---|
170 | *arq = uq; |
---|
171 | return (0); |
---|
172 | } |
---|
173 | u = &uspace[0]; |
---|
174 | v = &vspace[0]; |
---|
175 | q = &qspace[0]; |
---|
176 | |
---|
177 | /* |
---|
178 | * Break dividend and divisor into digits in base B, then |
---|
179 | * count leading zeros to determine m and n. When done, we |
---|
180 | * will have: |
---|
181 | * u = (u[1]u[2]...u[m+n]) sub B |
---|
182 | * v = (v[1]v[2]...v[n]) sub B |
---|
183 | * v[1] != 0 |
---|
184 | * 1 < n <= 4 (if n = 1, we use a different division algorithm) |
---|
185 | * m >= 0 (otherwise u < v, which we already checked) |
---|
186 | * m + n = 4 |
---|
187 | * and thus |
---|
188 | * m = 4 - n <= 2 |
---|
189 | */ |
---|
190 | tmp.uq = uq; |
---|
191 | u[0] = 0; |
---|
192 | u[1] = HHALF(tmp.ul[H]); |
---|
193 | u[2] = LHALF(tmp.ul[H]); |
---|
194 | u[3] = HHALF(tmp.ul[L]); |
---|
195 | u[4] = LHALF(tmp.ul[L]); |
---|
196 | tmp.uq = vq; |
---|
197 | v[1] = HHALF(tmp.ul[H]); |
---|
198 | v[2] = LHALF(tmp.ul[H]); |
---|
199 | v[3] = HHALF(tmp.ul[L]); |
---|
200 | v[4] = LHALF(tmp.ul[L]); |
---|
201 | for (n = 4; v[1] == 0; v++) { |
---|
202 | if (--n == 1) { |
---|
203 | u_long rbj; /* r*B+u[j] (not root boy jim) */ |
---|
204 | digit q1, q2, q3, q4; |
---|
205 | |
---|
206 | /* |
---|
207 | * Change of plan, per exercise 16. |
---|
208 | * r = 0; |
---|
209 | * for j = 1..4: |
---|
210 | * q[j] = floor((r*B + u[j]) / v), |
---|
211 | * r = (r*B + u[j]) % v; |
---|
212 | * We unroll this completely here. |
---|
213 | */ |
---|
214 | t = v[2]; /* nonzero, by definition */ |
---|
215 | q1 = u[1] / t; |
---|
216 | rbj = COMBINE(u[1] % t, u[2]); |
---|
217 | q2 = rbj / t; |
---|
218 | rbj = COMBINE(rbj % t, u[3]); |
---|
219 | q3 = rbj / t; |
---|
220 | rbj = COMBINE(rbj % t, u[4]); |
---|
221 | q4 = rbj / t; |
---|
222 | if (arq) |
---|
223 | *arq = rbj % t; |
---|
224 | tmp.ul[H] = COMBINE(q1, q2); |
---|
225 | tmp.ul[L] = COMBINE(q3, q4); |
---|
226 | return (tmp.q); |
---|
227 | } |
---|
228 | } |
---|
229 | |
---|
230 | /* |
---|
231 | * By adjusting q once we determine m, we can guarantee that |
---|
232 | * there is a complete four-digit quotient at &qspace[1] when |
---|
233 | * we finally stop. |
---|
234 | */ |
---|
235 | for (m = 4 - n; u[1] == 0; u++) |
---|
236 | m--; |
---|
237 | for (i = 4 - m; --i >= 0;) |
---|
238 | q[i] = 0; |
---|
239 | q += 4 - m; |
---|
240 | |
---|
241 | /* |
---|
242 | * Here we run Program D, translated from MIX to C and acquiring |
---|
243 | * a few minor changes. |
---|
244 | * |
---|
245 | * D1: choose multiplier 1 << d to ensure v[1] >= B/2. |
---|
246 | */ |
---|
247 | d = 0; |
---|
248 | for (t = v[1]; t < B / 2; t <<= 1) |
---|
249 | d++; |
---|
250 | if (d > 0) { |
---|
251 | shl(&u[0], m + n, d); /* u <<= d */ |
---|
252 | shl(&v[1], n - 1, d); /* v <<= d */ |
---|
253 | } |
---|
254 | /* |
---|
255 | * D2: j = 0. |
---|
256 | */ |
---|
257 | j = 0; |
---|
258 | v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ |
---|
259 | v2 = v[2]; /* for D3 */ |
---|
260 | do { |
---|
261 | register digit uj0, uj1, uj2; |
---|
262 | |
---|
263 | /* |
---|
264 | * D3: Calculate qhat (\^q, in TeX notation). |
---|
265 | * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and |
---|
266 | * let rhat = (u[j]*B + u[j+1]) mod v[1]. |
---|
267 | * While rhat < B and v[2]*qhat > rhat*B+u[j+2], |
---|
268 | * decrement qhat and increase rhat correspondingly. |
---|
269 | * Note that if rhat >= B, v[2]*qhat < rhat*B. |
---|
270 | */ |
---|
271 | uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ |
---|
272 | uj1 = u[j + 1]; /* for D3 only */ |
---|
273 | uj2 = u[j + 2]; /* for D3 only */ |
---|
274 | if (uj0 == v1) { |
---|
275 | qhat = B; |
---|
276 | rhat = uj1; |
---|
277 | goto qhat_too_big; |
---|
278 | } else { |
---|
279 | u_long nn = COMBINE(uj0, uj1); |
---|
280 | qhat = nn / v1; |
---|
281 | rhat = nn % v1; |
---|
282 | } |
---|
283 | while (v2 * qhat > COMBINE(rhat, uj2)) { |
---|
284 | qhat_too_big: |
---|
285 | qhat--; |
---|
286 | if ((rhat += v1) >= B) |
---|
287 | break; |
---|
288 | } |
---|
289 | /* |
---|
290 | * D4: Multiply and subtract. |
---|
291 | * The variable `t' holds any borrows across the loop. |
---|
292 | * We split this up so that we do not require v[0] = 0, |
---|
293 | * and to eliminate a final special case. |
---|
294 | */ |
---|
295 | for (t = 0, i = n; i > 0; i--) { |
---|
296 | t = u[i + j] - v[i] * qhat - t; |
---|
297 | u[i + j] = LHALF(t); |
---|
298 | t = (B - HHALF(t)) & (B - 1); |
---|
299 | } |
---|
300 | t = u[j] - t; |
---|
301 | u[j] = LHALF(t); |
---|
302 | /* |
---|
303 | * D5: test remainder. |
---|
304 | * There is a borrow if and only if HHALF(t) is nonzero; |
---|
305 | * in that (rare) case, qhat was too large (by exactly 1). |
---|
306 | * Fix it by adding v[1..n] to u[j..j+n]. |
---|
307 | */ |
---|
308 | if (HHALF(t)) { |
---|
309 | qhat--; |
---|
310 | for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ |
---|
311 | t += u[i + j] + v[i]; |
---|
312 | u[i + j] = LHALF(t); |
---|
313 | t = HHALF(t); |
---|
314 | } |
---|
315 | u[j] = LHALF(u[j] + t); |
---|
316 | } |
---|
317 | q[j] = qhat; |
---|
318 | } while (++j <= m); /* D7: loop on j. */ |
---|
319 | |
---|
320 | /* |
---|
321 | * If caller wants the remainder, we have to calculate it as |
---|
322 | * u[m..m+n] >> d (this is at most n digits and thus fits in |
---|
323 | * u[m+1..m+n], but we may need more source digits). |
---|
324 | */ |
---|
325 | if (arq) { |
---|
326 | if (d) { |
---|
327 | for (i = m + n; i > m; --i) |
---|
328 | u[i] = (u[i] >> d) | |
---|
329 | LHALF(u[i - 1] << (HALF_BITS - d)); |
---|
330 | u[i] = 0; |
---|
331 | } |
---|
332 | tmp.ul[H] = COMBINE(uspace[1], uspace[2]); |
---|
333 | tmp.ul[L] = COMBINE(uspace[3], uspace[4]); |
---|
334 | *arq = tmp.q; |
---|
335 | } |
---|
336 | |
---|
337 | tmp.ul[H] = COMBINE(qspace[1], qspace[2]); |
---|
338 | tmp.ul[L] = COMBINE(qspace[3], qspace[4]); |
---|
339 | return (tmp.q); |
---|
340 | } |
---|
341 | |
---|
342 | |
---|
343 | /* |
---|
344 | * Divide two signed quads. |
---|
345 | * ??? if -1/2 should produce -1 on this machine, this code is wrong |
---|
346 | */ |
---|
347 | s64 |
---|
348 | __divdi3(s64 a, s64 b) |
---|
349 | { |
---|
350 | u64 ua, ub, uq; |
---|
351 | int neg; |
---|
352 | |
---|
353 | if (a < 0) |
---|
354 | ua = -(u64)a, neg = 1; |
---|
355 | else |
---|
356 | ua = a, neg = 0; |
---|
357 | if (b < 0) |
---|
358 | ub = -(u64)b, neg ^= 1; |
---|
359 | else |
---|
360 | ub = b; |
---|
361 | uq = __qdivrem(ua, ub, (u64 *)0); |
---|
362 | return (neg ? -uq : uq); |
---|
363 | } |
---|
364 | |
---|
365 | /* |
---|
366 | * Divide two unsigned quads. |
---|
367 | */ |
---|
368 | u64 |
---|
369 | __udivdi3(u64 a, u64 b) |
---|
370 | { |
---|
371 | return (__qdivrem(a, b, (u64 *)0)); |
---|
372 | } |
---|
373 | |
---|
374 | |
---|
375 | /* |
---|
376 | * Return remainder after dividing two unsigned quads. |
---|
377 | */ |
---|
378 | u_quad_t |
---|
379 | __umoddi3(u_quad_t a, u_quad_t b) |
---|
380 | { |
---|
381 | u_quad_t r; |
---|
382 | |
---|
383 | (void)__qdivrem(a, b, &r); |
---|
384 | return (r); |
---|
385 | } |
---|
386 | |
---|
387 | #endif /* !defined(__ia64__) */ |
---|