| 1 | .file "__umoddi3.s" |
|---|
| 2 | |
|---|
| 3 | // $FreeBSD: src/sys/libkern/ia64/__umoddi3.S,v 1.3 2003/02/11 20:15:11 schweikh Exp $ |
|---|
| 4 | // |
|---|
| 5 | // Copyright (c) 2000, Intel Corporation |
|---|
| 6 | // All rights reserved. |
|---|
| 7 | // |
|---|
| 8 | // Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache, |
|---|
| 9 | // Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab, |
|---|
| 10 | // Intel Corporation. |
|---|
| 11 | // |
|---|
| 12 | // WARRANTY DISCLAIMER |
|---|
| 13 | // |
|---|
| 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|---|
| 15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|---|
| 16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|---|
| 17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
|---|
| 18 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|---|
| 19 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|---|
| 20 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|---|
| 21 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|---|
| 22 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
|---|
| 23 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|---|
| 24 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 25 | // |
|---|
| 26 | // Intel Corporation is the author of this code, and requests that all |
|---|
| 27 | // problem reports or change requests be submitted to it directly at |
|---|
| 28 | // http://developer.intel.com/opensource. |
|---|
| 29 | // |
|---|
| 30 | |
|---|
| 31 | .section .text |
|---|
| 32 | |
|---|
| 33 | // 64-bit unsigned integer remainder |
|---|
| 34 | |
|---|
| 35 | .proc __umoddi3# |
|---|
| 36 | .align 32 |
|---|
| 37 | .global __umoddi3# |
|---|
| 38 | .align 32 |
|---|
| 39 | |
|---|
| 40 | __umoddi3: |
|---|
| 41 | |
|---|
| 42 | { .mii |
|---|
| 43 | alloc r31=ar.pfs,3,0,0,0 |
|---|
| 44 | nop.i 0 |
|---|
| 45 | nop.i 0 |
|---|
| 46 | } { .mmb |
|---|
| 47 | |
|---|
| 48 | // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE |
|---|
| 49 | |
|---|
| 50 | // general register used: |
|---|
| 51 | // r32 - 64-bit unsigned integer dividend, called a below |
|---|
| 52 | // r33 - 64-bit unsigned integer divisor, called b below |
|---|
| 53 | // r8 - 64-bit unsigned integer result |
|---|
| 54 | // floating-point registers used: f6, f7, f8, f9, f10, f11, f12 |
|---|
| 55 | // predicate registers used: p6 |
|---|
| 56 | |
|---|
| 57 | setf.sig f12=r32 // holds a in integer form |
|---|
| 58 | setf.sig f7=r33 |
|---|
| 59 | nop.b 0;; |
|---|
| 60 | } { .mfi |
|---|
| 61 | // get 2s complement of b |
|---|
| 62 | sub r33=r0,r33 |
|---|
| 63 | fcvt.xuf.s1 f6=f12 |
|---|
| 64 | nop.i 0 |
|---|
| 65 | } { .mfi |
|---|
| 66 | nop.m 0 |
|---|
| 67 | fcvt.xuf.s1 f7=f7 |
|---|
| 68 | nop.i 0;; |
|---|
| 69 | } { .mfi |
|---|
| 70 | nop.m 0 |
|---|
| 71 | // Step (1) |
|---|
| 72 | // y0 = 1 / b in f8 |
|---|
| 73 | frcpa.s1 f8,p6=f6,f7 |
|---|
| 74 | nop.i 0;; |
|---|
| 75 | } { .mfi |
|---|
| 76 | nop.m 0 |
|---|
| 77 | // Step (2) |
|---|
| 78 | // q0 = a * y0 in f10 |
|---|
| 79 | (p6) fma.s1 f10=f6,f8,f0 |
|---|
| 80 | nop.i 0 |
|---|
| 81 | } { .mfi |
|---|
| 82 | nop.m 0 |
|---|
| 83 | // Step (3) |
|---|
| 84 | // e0 = 1 - b * y0 in f9 |
|---|
| 85 | (p6) fnma.s1 f9=f7,f8,f1 |
|---|
| 86 | nop.i 0;; |
|---|
| 87 | } { .mfi |
|---|
| 88 | nop.m 0 |
|---|
| 89 | // Step (4) |
|---|
| 90 | // q1 = q0 + e0 * q0 in f10 |
|---|
| 91 | (p6) fma.s1 f10=f9,f10,f10 |
|---|
| 92 | nop.i 0 |
|---|
| 93 | } { .mfi |
|---|
| 94 | nop.m 0 |
|---|
| 95 | // Step (5) |
|---|
| 96 | // e1 = e0 * e0 in f11 |
|---|
| 97 | (p6) fma.s1 f11=f9,f9,f0 |
|---|
| 98 | nop.i 0;; |
|---|
| 99 | } { .mfi |
|---|
| 100 | nop.m 0 |
|---|
| 101 | // Step (6) |
|---|
| 102 | // y1 = y0 + e0 * y0 in f8 |
|---|
| 103 | (p6) fma.s1 f8=f9,f8,f8 |
|---|
| 104 | nop.i 0;; |
|---|
| 105 | } { .mfi |
|---|
| 106 | nop.m 0 |
|---|
| 107 | // Step (7) |
|---|
| 108 | // q2 = q1 + e1 * q1 in f9 |
|---|
| 109 | (p6) fma.s1 f9=f11,f10,f10 |
|---|
| 110 | nop.i 0;; |
|---|
| 111 | } { .mfi |
|---|
| 112 | nop.m 0 |
|---|
| 113 | // Step (8) |
|---|
| 114 | // y2 = y1 + e1 * y1 in f8 |
|---|
| 115 | (p6) fma.s1 f8=f11,f8,f8 |
|---|
| 116 | nop.i 0;; |
|---|
| 117 | } { .mfi |
|---|
| 118 | nop.m 0 |
|---|
| 119 | // Step (9) |
|---|
| 120 | // r2 = a - b * q2 in f10 |
|---|
| 121 | (p6) fnma.s1 f10=f7,f9,f6 |
|---|
| 122 | nop.i 0;; |
|---|
| 123 | } { .mfi |
|---|
| 124 | // f7=-b |
|---|
| 125 | setf.sig f7=r33 |
|---|
| 126 | // Step (10) |
|---|
| 127 | // q3 = q2 + r2 * y2 in f8 |
|---|
| 128 | (p6) fma.s1 f8=f10,f8,f9 |
|---|
| 129 | nop.i 0;; |
|---|
| 130 | } { .mfi |
|---|
| 131 | nop.m 0 |
|---|
| 132 | // (11) q = trunc(q3) |
|---|
| 133 | fcvt.fxu.trunc.s1 f8=f8 |
|---|
| 134 | nop.i 0;; |
|---|
| 135 | } { .mfi |
|---|
| 136 | nop.m 0 |
|---|
| 137 | // (12) r = a + (-b) * q |
|---|
| 138 | xma.l f8=f8,f7,f12 |
|---|
| 139 | nop.i 0;; |
|---|
| 140 | } { .mib |
|---|
| 141 | getf.sig r8=f8 |
|---|
| 142 | nop.i 0 |
|---|
| 143 | nop.b 0 |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE |
|---|
| 147 | |
|---|
| 148 | { .mib |
|---|
| 149 | nop.m 0 |
|---|
| 150 | nop.i 0 |
|---|
| 151 | br.ret.sptk b0;; |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | .endp __umoddi3 |
|---|