1 | .file "__umoddi3.s" |
---|
2 | |
---|
3 | // $FreeBSD: src/sys/libkern/ia64/__umoddi3.S,v 1.3 2003/02/11 20:15:11 schweikh Exp $ |
---|
4 | // |
---|
5 | // Copyright (c) 2000, Intel Corporation |
---|
6 | // All rights reserved. |
---|
7 | // |
---|
8 | // Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache, |
---|
9 | // Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab, |
---|
10 | // Intel Corporation. |
---|
11 | // |
---|
12 | // WARRANTY DISCLAIMER |
---|
13 | // |
---|
14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
---|
18 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
---|
19 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
20 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
---|
21 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
---|
22 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
---|
23 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
---|
24 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
25 | // |
---|
26 | // Intel Corporation is the author of this code, and requests that all |
---|
27 | // problem reports or change requests be submitted to it directly at |
---|
28 | // http://developer.intel.com/opensource. |
---|
29 | // |
---|
30 | |
---|
31 | .section .text |
---|
32 | |
---|
33 | // 64-bit unsigned integer remainder |
---|
34 | |
---|
35 | .proc __umoddi3# |
---|
36 | .align 32 |
---|
37 | .global __umoddi3# |
---|
38 | .align 32 |
---|
39 | |
---|
40 | __umoddi3: |
---|
41 | |
---|
42 | { .mii |
---|
43 | alloc r31=ar.pfs,3,0,0,0 |
---|
44 | nop.i 0 |
---|
45 | nop.i 0 |
---|
46 | } { .mmb |
---|
47 | |
---|
48 | // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE |
---|
49 | |
---|
50 | // general register used: |
---|
51 | // r32 - 64-bit unsigned integer dividend, called a below |
---|
52 | // r33 - 64-bit unsigned integer divisor, called b below |
---|
53 | // r8 - 64-bit unsigned integer result |
---|
54 | // floating-point registers used: f6, f7, f8, f9, f10, f11, f12 |
---|
55 | // predicate registers used: p6 |
---|
56 | |
---|
57 | setf.sig f12=r32 // holds a in integer form |
---|
58 | setf.sig f7=r33 |
---|
59 | nop.b 0;; |
---|
60 | } { .mfi |
---|
61 | // get 2s complement of b |
---|
62 | sub r33=r0,r33 |
---|
63 | fcvt.xuf.s1 f6=f12 |
---|
64 | nop.i 0 |
---|
65 | } { .mfi |
---|
66 | nop.m 0 |
---|
67 | fcvt.xuf.s1 f7=f7 |
---|
68 | nop.i 0;; |
---|
69 | } { .mfi |
---|
70 | nop.m 0 |
---|
71 | // Step (1) |
---|
72 | // y0 = 1 / b in f8 |
---|
73 | frcpa.s1 f8,p6=f6,f7 |
---|
74 | nop.i 0;; |
---|
75 | } { .mfi |
---|
76 | nop.m 0 |
---|
77 | // Step (2) |
---|
78 | // q0 = a * y0 in f10 |
---|
79 | (p6) fma.s1 f10=f6,f8,f0 |
---|
80 | nop.i 0 |
---|
81 | } { .mfi |
---|
82 | nop.m 0 |
---|
83 | // Step (3) |
---|
84 | // e0 = 1 - b * y0 in f9 |
---|
85 | (p6) fnma.s1 f9=f7,f8,f1 |
---|
86 | nop.i 0;; |
---|
87 | } { .mfi |
---|
88 | nop.m 0 |
---|
89 | // Step (4) |
---|
90 | // q1 = q0 + e0 * q0 in f10 |
---|
91 | (p6) fma.s1 f10=f9,f10,f10 |
---|
92 | nop.i 0 |
---|
93 | } { .mfi |
---|
94 | nop.m 0 |
---|
95 | // Step (5) |
---|
96 | // e1 = e0 * e0 in f11 |
---|
97 | (p6) fma.s1 f11=f9,f9,f0 |
---|
98 | nop.i 0;; |
---|
99 | } { .mfi |
---|
100 | nop.m 0 |
---|
101 | // Step (6) |
---|
102 | // y1 = y0 + e0 * y0 in f8 |
---|
103 | (p6) fma.s1 f8=f9,f8,f8 |
---|
104 | nop.i 0;; |
---|
105 | } { .mfi |
---|
106 | nop.m 0 |
---|
107 | // Step (7) |
---|
108 | // q2 = q1 + e1 * q1 in f9 |
---|
109 | (p6) fma.s1 f9=f11,f10,f10 |
---|
110 | nop.i 0;; |
---|
111 | } { .mfi |
---|
112 | nop.m 0 |
---|
113 | // Step (8) |
---|
114 | // y2 = y1 + e1 * y1 in f8 |
---|
115 | (p6) fma.s1 f8=f11,f8,f8 |
---|
116 | nop.i 0;; |
---|
117 | } { .mfi |
---|
118 | nop.m 0 |
---|
119 | // Step (9) |
---|
120 | // r2 = a - b * q2 in f10 |
---|
121 | (p6) fnma.s1 f10=f7,f9,f6 |
---|
122 | nop.i 0;; |
---|
123 | } { .mfi |
---|
124 | // f7=-b |
---|
125 | setf.sig f7=r33 |
---|
126 | // Step (10) |
---|
127 | // q3 = q2 + r2 * y2 in f8 |
---|
128 | (p6) fma.s1 f8=f10,f8,f9 |
---|
129 | nop.i 0;; |
---|
130 | } { .mfi |
---|
131 | nop.m 0 |
---|
132 | // (11) q = trunc(q3) |
---|
133 | fcvt.fxu.trunc.s1 f8=f8 |
---|
134 | nop.i 0;; |
---|
135 | } { .mfi |
---|
136 | nop.m 0 |
---|
137 | // (12) r = a + (-b) * q |
---|
138 | xma.l f8=f8,f7,f12 |
---|
139 | nop.i 0;; |
---|
140 | } { .mib |
---|
141 | getf.sig r8=f8 |
---|
142 | nop.i 0 |
---|
143 | nop.b 0 |
---|
144 | } |
---|
145 | |
---|
146 | // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE |
---|
147 | |
---|
148 | { .mib |
---|
149 | nop.m 0 |
---|
150 | nop.i 0 |
---|
151 | br.ret.sptk b0;; |
---|
152 | } |
---|
153 | |
---|
154 | .endp __umoddi3 |
---|