Please file new bugs on Launchpad: Invirt or XVM (if you're not sure which, just pick one)

Context Navigation

source: trunk/packages/xen-common/xen-common/tools/ioemu/fpu/softfloat.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 17 years ago
Add xen and xen-common
File size: 188.2 KB

Rev	Line
[34]	1
	2	/*============================================================================
	3
	4	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
	5	Package, Release 2b.
	6
	7	Written by John R. Hauser. This work was made possible in part by the
	8	International Computer Science Institute, located at Suite 600, 1947 Center
	9	Street, Berkeley, California 94704. Funding was partially provided by the
	10	National Science Foundation under grant MIP-9311980. The original version
	11	of this code was written as part of a project to build a fixed-point vector
	12	processor in collaboration with the University of California at Berkeley,
	13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
	14	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
	15	arithmetic/SoftFloat.html'.
	16
	17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
	18	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
	19	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
	20	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
	21	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
	22	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
	23	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
	24	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
	25
	26	Derivative works are acceptable, even for commercial purposes, so long as
	27	(1) the source code for the derivative work includes prominent notice that
	28	the work is derivative, and (2) the source code includes prominent notice with
	29	these four paragraphs for those parts of this code that are retained.
	30
	31	=============================================================================*/
	32
	33	#include "softfloat.h"
	34
	35	/*----------------------------------------------------------------------------
	36	\| Primitive arithmetic functions, including multi-word arithmetic, and
	37	\| division and square root approximations. (Can be specialized to target if
	38	\| desired.)
	39	----------------------------------------------------------------------------/
	40	#include "softfloat-macros.h"
	41
	42	/*----------------------------------------------------------------------------
	43	\| Functions and definitions to determine: (1) whether tininess for underflow
	44	\| is detected before or after rounding by default, (2) what (if anything)
	45	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
	46	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
	47	\| are propagated from function inputs to output. These details are target-
	48	\| specific.
	49	----------------------------------------------------------------------------/
	50	#include "softfloat-specialize.h"
	51
	52	void set_float_rounding_mode(int val STATUS_PARAM)
	53	{
	54	STATUS(float_rounding_mode) = val;
	55	}
	56
	57	void set_float_exception_flags(int val STATUS_PARAM)
	58	{
	59	STATUS(float_exception_flags) = val;
	60	}
	61
	62	#ifdef FLOATX80
	63	void set_floatx80_rounding_precision(int val STATUS_PARAM)
	64	{
	65	STATUS(floatx80_rounding_precision) = val;
	66	}
	67	#endif
	68
	69	/*----------------------------------------------------------------------------
	70	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
	71	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
	72	\| input. If `zSign' is 1, the input is negated before being converted to an
	73	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
	74	\| is simply rounded to an integer, with the inexact exception raised if the
	75	\| input cannot be represented exactly as an integer. However, if the fixed-
	76	\| point input is too large, the invalid exception is raised and the largest
	77	\| positive or negative integer is returned.
	78	----------------------------------------------------------------------------/
	79
	80	static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
	81	{
	82	int8 roundingMode;
	83	flag roundNearestEven;
	84	int8 roundIncrement, roundBits;
	85	int32 z;
	86
	87	roundingMode = STATUS(float_rounding_mode);
	88	roundNearestEven = ( roundingMode == float_round_nearest_even );
	89	roundIncrement = 0x40;
	90	if ( ! roundNearestEven ) {
	91	if ( roundingMode == float_round_to_zero ) {
	92	roundIncrement = 0;
	93	}
	94	else {
	95	roundIncrement = 0x7F;
	96	if ( zSign ) {
	97	if ( roundingMode == float_round_up ) roundIncrement = 0;
	98	}
	99	else {
	100	if ( roundingMode == float_round_down ) roundIncrement = 0;
	101	}
	102	}
	103	}
	104	roundBits = absZ & 0x7F;
	105	absZ = ( absZ + roundIncrement )>>7;
	106	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
	107	z = absZ;
	108	if ( zSign ) z = - z;
	109	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
	110	float_raise( float_flag_invalid STATUS_VAR);
	111	return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
	112	}
	113	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
	114	return z;
	115
	116	}
	117
	118	/*----------------------------------------------------------------------------
	119	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
	120	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
	121	\| and returns the properly rounded 64-bit integer corresponding to the input.
	122	\| If `zSign' is 1, the input is negated before being converted to an integer.
	123	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
	124	\| the inexact exception raised if the input cannot be represented exactly as
	125	\| an integer. However, if the fixed-point input is too large, the invalid
	126	\| exception is raised and the largest positive or negative integer is
	127	\| returned.
	128	----------------------------------------------------------------------------/
	129
	130	static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
	131	{
	132	int8 roundingMode;
	133	flag roundNearestEven, increment;
	134	int64 z;
	135
	136	roundingMode = STATUS(float_rounding_mode);
	137	roundNearestEven = ( roundingMode == float_round_nearest_even );
	138	increment = ( (sbits64) absZ1 < 0 );
	139	if ( ! roundNearestEven ) {
	140	if ( roundingMode == float_round_to_zero ) {
	141	increment = 0;
	142	}
	143	else {
	144	if ( zSign ) {
	145	increment = ( roundingMode == float_round_down ) && absZ1;
	146	}
	147	else {
	148	increment = ( roundingMode == float_round_up ) && absZ1;
	149	}
	150	}
	151	}
	152	if ( increment ) {
	153	++absZ0;
	154	if ( absZ0 == 0 ) goto overflow;
	155	absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
	156	}
	157	z = absZ0;
	158	if ( zSign ) z = - z;
	159	if ( z && ( ( z < 0 ) ^ zSign ) ) {
	160	overflow:
	161	float_raise( float_flag_invalid STATUS_VAR);
	162	return
	163	zSign ? (sbits64) LIT64( 0x8000000000000000 )
	164	: LIT64( 0x7FFFFFFFFFFFFFFF );
	165	}
	166	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	167	return z;
	168
	169	}
	170
	171	/*----------------------------------------------------------------------------
	172	\| Returns the fraction bits of the single-precision floating-point value `a'.
	173	----------------------------------------------------------------------------/
	174
	175	INLINE bits32 extractFloat32Frac( float32 a )
	176	{
	177
	178	return a & 0x007FFFFF;
	179
	180	}
	181
	182	/*----------------------------------------------------------------------------
	183	\| Returns the exponent bits of the single-precision floating-point value `a'.
	184	----------------------------------------------------------------------------/
	185
	186	INLINE int16 extractFloat32Exp( float32 a )
	187	{
	188
	189	return ( a>>23 ) & 0xFF;
	190
	191	}
	192
	193	/*----------------------------------------------------------------------------
	194	\| Returns the sign bit of the single-precision floating-point value `a'.
	195	----------------------------------------------------------------------------/
	196
	197	INLINE flag extractFloat32Sign( float32 a )
	198	{
	199
	200	return a>>31;
	201
	202	}
	203
	204	/*----------------------------------------------------------------------------
	205	\| Normalizes the subnormal single-precision floating-point value represented
	206	\| by the denormalized significand `aSig'. The normalized exponent and
	207	\| significand are stored at the locations pointed to by `zExpPtr' and
	208	\| `zSigPtr', respectively.
	209	----------------------------------------------------------------------------/
	210
	211	static void
	212	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
	213	{
	214	int8 shiftCount;
	215
	216	shiftCount = countLeadingZeros32( aSig ) - 8;
	217	*zSigPtr = aSig<<shiftCount;
	218	*zExpPtr = 1 - shiftCount;
	219
	220	}
	221
	222	/*----------------------------------------------------------------------------
	223	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
	224	\| single-precision floating-point value, returning the result. After being
	225	\| shifted into the proper positions, the three fields are simply added
	226	\| together to form the result. This means that any integer portion of `zSig'
	227	\| will be added into the exponent. Since a properly normalized significand
	228	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
	229	\| than the desired result exponent whenever `zSig' is a complete, normalized
	230	\| significand.
	231	----------------------------------------------------------------------------/
	232
	233	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
	234	{
	235
	236	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
	237
	238	}
	239
	240	/*----------------------------------------------------------------------------
	241	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	242	\| and significand `zSig', and returns the proper single-precision floating-
	243	\| point value corresponding to the abstract input. Ordinarily, the abstract
	244	\| value is simply rounded and packed into the single-precision format, with
	245	\| the inexact exception raised if the abstract input cannot be represented
	246	\| exactly. However, if the abstract value is too large, the overflow and
	247	\| inexact exceptions are raised and an infinity or maximal finite value is
	248	\| returned. If the abstract value is too small, the input value is rounded to
	249	\| a subnormal number, and the underflow and inexact exceptions are raised if
	250	\| the abstract input cannot be represented exactly as a subnormal single-
	251	\| precision floating-point number.
	252	\| The input significand `zSig' has its binary point between bits 30
	253	\| and 29, which is 7 bits to the left of the usual location. This shifted
	254	\| significand must be normalized or smaller. If `zSig' is not normalized,
	255	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
	256	\| and it must not require rounding. In the usual case that `zSig' is
	257	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
	258	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
	259	\| Binary Floating-Point Arithmetic.
	260	----------------------------------------------------------------------------/
	261
	262	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
	263	{
	264	int8 roundingMode;
	265	flag roundNearestEven;
	266	int8 roundIncrement, roundBits;
	267	flag isTiny;
	268
	269	roundingMode = STATUS(float_rounding_mode);
	270	roundNearestEven = ( roundingMode == float_round_nearest_even );
	271	roundIncrement = 0x40;
	272	if ( ! roundNearestEven ) {
	273	if ( roundingMode == float_round_to_zero ) {
	274	roundIncrement = 0;
	275	}
	276	else {
	277	roundIncrement = 0x7F;
	278	if ( zSign ) {
	279	if ( roundingMode == float_round_up ) roundIncrement = 0;
	280	}
	281	else {
	282	if ( roundingMode == float_round_down ) roundIncrement = 0;
	283	}
	284	}
	285	}
	286	roundBits = zSig & 0x7F;
	287	if ( 0xFD <= (bits16) zExp ) {
	288	if ( ( 0xFD < zExp )
	289	\|\| ( ( zExp == 0xFD )
	290	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
	291	) {
	292	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
	293	return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
	294	}
	295	if ( zExp < 0 ) {
	296	isTiny =
	297	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
	298	\|\| ( zExp < -1 )
	299	\|\| ( zSig + roundIncrement < 0x80000000 );
	300	shift32RightJamming( zSig, - zExp, &zSig );
	301	zExp = 0;
	302	roundBits = zSig & 0x7F;
	303	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
	304	}
	305	}
	306	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
	307	zSig = ( zSig + roundIncrement )>>7;
	308	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
	309	if ( zSig == 0 ) zExp = 0;
	310	return packFloat32( zSign, zExp, zSig );
	311
	312	}
	313
	314	/*----------------------------------------------------------------------------
	315	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	316	\| and significand `zSig', and returns the proper single-precision floating-
	317	\| point value corresponding to the abstract input. This routine is just like
	318	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
	319	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
	320	\| floating-point exponent.
	321	----------------------------------------------------------------------------/
	322
	323	static float32
	324	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
	325	{
	326	int8 shiftCount;
	327
	328	shiftCount = countLeadingZeros32( zSig ) - 1;
	329	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
	330
	331	}
	332
	333	/*----------------------------------------------------------------------------
	334	\| Returns the fraction bits of the double-precision floating-point value `a'.
	335	----------------------------------------------------------------------------/
	336
	337	INLINE bits64 extractFloat64Frac( float64 a )
	338	{
	339
	340	return a & LIT64( 0x000FFFFFFFFFFFFF );
	341
	342	}
	343
	344	/*----------------------------------------------------------------------------
	345	\| Returns the exponent bits of the double-precision floating-point value `a'.
	346	----------------------------------------------------------------------------/
	347
	348	INLINE int16 extractFloat64Exp( float64 a )
	349	{
	350
	351	return ( a>>52 ) & 0x7FF;
	352
	353	}
	354
	355	/*----------------------------------------------------------------------------
	356	\| Returns the sign bit of the double-precision floating-point value `a'.
	357	----------------------------------------------------------------------------/
	358
	359	INLINE flag extractFloat64Sign( float64 a )
	360	{
	361
	362	return a>>63;
	363
	364	}
	365
	366	/*----------------------------------------------------------------------------
	367	\| Normalizes the subnormal double-precision floating-point value represented
	368	\| by the denormalized significand `aSig'. The normalized exponent and
	369	\| significand are stored at the locations pointed to by `zExpPtr' and
	370	\| `zSigPtr', respectively.
	371	----------------------------------------------------------------------------/
	372
	373	static void
	374	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
	375	{
	376	int8 shiftCount;
	377
	378	shiftCount = countLeadingZeros64( aSig ) - 11;
	379	*zSigPtr = aSig<<shiftCount;
	380	*zExpPtr = 1 - shiftCount;
	381
	382	}
	383
	384	/*----------------------------------------------------------------------------
	385	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
	386	\| double-precision floating-point value, returning the result. After being
	387	\| shifted into the proper positions, the three fields are simply added
	388	\| together to form the result. This means that any integer portion of `zSig'
	389	\| will be added into the exponent. Since a properly normalized significand
	390	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
	391	\| than the desired result exponent whenever `zSig' is a complete, normalized
	392	\| significand.
	393	----------------------------------------------------------------------------/
	394
	395	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
	396	{
	397
	398	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
	399
	400	}
	401
	402	/*----------------------------------------------------------------------------
	403	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	404	\| and significand `zSig', and returns the proper double-precision floating-
	405	\| point value corresponding to the abstract input. Ordinarily, the abstract
	406	\| value is simply rounded and packed into the double-precision format, with
	407	\| the inexact exception raised if the abstract input cannot be represented
	408	\| exactly. However, if the abstract value is too large, the overflow and
	409	\| inexact exceptions are raised and an infinity or maximal finite value is
	410	\| returned. If the abstract value is too small, the input value is rounded
	411	\| to a subnormal number, and the underflow and inexact exceptions are raised
	412	\| if the abstract input cannot be represented exactly as a subnormal double-
	413	\| precision floating-point number.
	414	\| The input significand `zSig' has its binary point between bits 62
	415	\| and 61, which is 10 bits to the left of the usual location. This shifted
	416	\| significand must be normalized or smaller. If `zSig' is not normalized,
	417	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
	418	\| and it must not require rounding. In the usual case that `zSig' is
	419	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
	420	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
	421	\| Binary Floating-Point Arithmetic.
	422	----------------------------------------------------------------------------/
	423
	424	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
	425	{
	426	int8 roundingMode;
	427	flag roundNearestEven;
	428	int16 roundIncrement, roundBits;
	429	flag isTiny;
	430
	431	roundingMode = STATUS(float_rounding_mode);
	432	roundNearestEven = ( roundingMode == float_round_nearest_even );
	433	roundIncrement = 0x200;
	434	if ( ! roundNearestEven ) {
	435	if ( roundingMode == float_round_to_zero ) {
	436	roundIncrement = 0;
	437	}
	438	else {
	439	roundIncrement = 0x3FF;
	440	if ( zSign ) {
	441	if ( roundingMode == float_round_up ) roundIncrement = 0;
	442	}
	443	else {
	444	if ( roundingMode == float_round_down ) roundIncrement = 0;
	445	}
	446	}
	447	}
	448	roundBits = zSig & 0x3FF;
	449	if ( 0x7FD <= (bits16) zExp ) {
	450	if ( ( 0x7FD < zExp )
	451	\|\| ( ( zExp == 0x7FD )
	452	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
	453	) {
	454	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
	455	return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
	456	}
	457	if ( zExp < 0 ) {
	458	isTiny =
	459	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
	460	\|\| ( zExp < -1 )
	461	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
	462	shift64RightJamming( zSig, - zExp, &zSig );
	463	zExp = 0;
	464	roundBits = zSig & 0x3FF;
	465	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
	466	}
	467	}
	468	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
	469	zSig = ( zSig + roundIncrement )>>10;
	470	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
	471	if ( zSig == 0 ) zExp = 0;
	472	return packFloat64( zSign, zExp, zSig );
	473
	474	}
	475
	476	/*----------------------------------------------------------------------------
	477	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	478	\| and significand `zSig', and returns the proper double-precision floating-
	479	\| point value corresponding to the abstract input. This routine is just like
	480	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
	481	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
	482	\| floating-point exponent.
	483	----------------------------------------------------------------------------/
	484
	485	static float64
	486	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
	487	{
	488	int8 shiftCount;
	489
	490	shiftCount = countLeadingZeros64( zSig ) - 1;
	491	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
	492
	493	}
	494
	495	#ifdef FLOATX80
	496
	497	/*----------------------------------------------------------------------------
	498	\| Returns the fraction bits of the extended double-precision floating-point
	499	\| value `a'.
	500	----------------------------------------------------------------------------/
	501
	502	INLINE bits64 extractFloatx80Frac( floatx80 a )
	503	{
	504
	505	return a.low;
	506
	507	}
	508
	509	/*----------------------------------------------------------------------------
	510	\| Returns the exponent bits of the extended double-precision floating-point
	511	\| value `a'.
	512	----------------------------------------------------------------------------/
	513
	514	INLINE int32 extractFloatx80Exp( floatx80 a )
	515	{
	516
	517	return a.high & 0x7FFF;
	518
	519	}
	520
	521	/*----------------------------------------------------------------------------
	522	\| Returns the sign bit of the extended double-precision floating-point value
	523	\| `a'.
	524	----------------------------------------------------------------------------/
	525
	526	INLINE flag extractFloatx80Sign( floatx80 a )
	527	{
	528
	529	return a.high>>15;
	530
	531	}
	532
	533	/*----------------------------------------------------------------------------
	534	\| Normalizes the subnormal extended double-precision floating-point value
	535	\| represented by the denormalized significand `aSig'. The normalized exponent
	536	\| and significand are stored at the locations pointed to by `zExpPtr' and
	537	\| `zSigPtr', respectively.
	538	----------------------------------------------------------------------------/
	539
	540	static void
	541	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
	542	{
	543	int8 shiftCount;
	544
	545	shiftCount = countLeadingZeros64( aSig );
	546	*zSigPtr = aSig<<shiftCount;
	547	*zExpPtr = 1 - shiftCount;
	548
	549	}
	550
	551	/*----------------------------------------------------------------------------
	552	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
	553	\| extended double-precision floating-point value, returning the result.
	554	----------------------------------------------------------------------------/
	555
	556	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
	557	{
	558	floatx80 z;
	559
	560	z.low = zSig;
	561	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
	562	return z;
	563
	564	}
	565
	566	/*----------------------------------------------------------------------------
	567	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	568	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
	569	\| and returns the proper extended double-precision floating-point value
	570	\| corresponding to the abstract input. Ordinarily, the abstract value is
	571	\| rounded and packed into the extended double-precision format, with the
	572	\| inexact exception raised if the abstract input cannot be represented
	573	\| exactly. However, if the abstract value is too large, the overflow and
	574	\| inexact exceptions are raised and an infinity or maximal finite value is
	575	\| returned. If the abstract value is too small, the input value is rounded to
	576	\| a subnormal number, and the underflow and inexact exceptions are raised if
	577	\| the abstract input cannot be represented exactly as a subnormal extended
	578	\| double-precision floating-point number.
	579	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
	580	\| number of bits as single or double precision, respectively. Otherwise, the
	581	\| result is rounded to the full precision of the extended double-precision
	582	\| format.
	583	\| The input significand must be normalized or smaller. If the input
	584	\| significand is not normalized, `zExp' must be 0; in that case, the result
	585	\| returned is a subnormal number, and it must not require rounding. The
	586	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
	587	\| Floating-Point Arithmetic.
	588	----------------------------------------------------------------------------/
	589
	590	static floatx80
	591	roundAndPackFloatx80(
	592	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
	593	STATUS_PARAM)
	594	{
	595	int8 roundingMode;
	596	flag roundNearestEven, increment, isTiny;
	597	int64 roundIncrement, roundMask, roundBits;
	598
	599	roundingMode = STATUS(float_rounding_mode);
	600	roundNearestEven = ( roundingMode == float_round_nearest_even );
	601	if ( roundingPrecision == 80 ) goto precision80;
	602	if ( roundingPrecision == 64 ) {
	603	roundIncrement = LIT64( 0x0000000000000400 );
	604	roundMask = LIT64( 0x00000000000007FF );
	605	}
	606	else if ( roundingPrecision == 32 ) {
	607	roundIncrement = LIT64( 0x0000008000000000 );
	608	roundMask = LIT64( 0x000000FFFFFFFFFF );
	609	}
	610	else {
	611	goto precision80;
	612	}
	613	zSig0 \|= ( zSig1 != 0 );
	614	if ( ! roundNearestEven ) {
	615	if ( roundingMode == float_round_to_zero ) {
	616	roundIncrement = 0;
	617	}
	618	else {
	619	roundIncrement = roundMask;
	620	if ( zSign ) {
	621	if ( roundingMode == float_round_up ) roundIncrement = 0;
	622	}
	623	else {
	624	if ( roundingMode == float_round_down ) roundIncrement = 0;
	625	}
	626	}
	627	}
	628	roundBits = zSig0 & roundMask;
	629	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
	630	if ( ( 0x7FFE < zExp )
	631	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
	632	) {
	633	goto overflow;
	634	}
	635	if ( zExp <= 0 ) {
	636	isTiny =
	637	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
	638	\|\| ( zExp < 0 )
	639	\|\| ( zSig0 <= zSig0 + roundIncrement );
	640	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
	641	zExp = 0;
	642	roundBits = zSig0 & roundMask;
	643	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
	644	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
	645	zSig0 += roundIncrement;
	646	if ( (sbits64) zSig0 < 0 ) zExp = 1;
	647	roundIncrement = roundMask + 1;
	648	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
	649	roundMask \|= roundIncrement;
	650	}
	651	zSig0 &= ~ roundMask;
	652	return packFloatx80( zSign, zExp, zSig0 );
	653	}
	654	}
	655	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
	656	zSig0 += roundIncrement;
	657	if ( zSig0 < roundIncrement ) {
	658	++zExp;
	659	zSig0 = LIT64( 0x8000000000000000 );
	660	}
	661	roundIncrement = roundMask + 1;
	662	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
	663	roundMask \|= roundIncrement;
	664	}
	665	zSig0 &= ~ roundMask;
	666	if ( zSig0 == 0 ) zExp = 0;
	667	return packFloatx80( zSign, zExp, zSig0 );
	668	precision80:
	669	increment = ( (sbits64) zSig1 < 0 );
	670	if ( ! roundNearestEven ) {
	671	if ( roundingMode == float_round_to_zero ) {
	672	increment = 0;
	673	}
	674	else {
	675	if ( zSign ) {
	676	increment = ( roundingMode == float_round_down ) && zSig1;
	677	}
	678	else {
	679	increment = ( roundingMode == float_round_up ) && zSig1;
	680	}
	681	}
	682	}
	683	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
	684	if ( ( 0x7FFE < zExp )
	685	\|\| ( ( zExp == 0x7FFE )
	686	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
	687	&& increment
	688	)
	689	) {
	690	roundMask = 0;
	691	overflow:
	692	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
	693	if ( ( roundingMode == float_round_to_zero )
	694	\|\| ( zSign && ( roundingMode == float_round_up ) )
	695	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
	696	) {
	697	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
	698	}
	699	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	700	}
	701	if ( zExp <= 0 ) {
	702	isTiny =
	703	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
	704	\|\| ( zExp < 0 )
	705	\|\| ! increment
	706	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
	707	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
	708	zExp = 0;
	709	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
	710	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	711	if ( roundNearestEven ) {
	712	increment = ( (sbits64) zSig1 < 0 );
	713	}
	714	else {
	715	if ( zSign ) {
	716	increment = ( roundingMode == float_round_down ) && zSig1;
	717	}
	718	else {
	719	increment = ( roundingMode == float_round_up ) && zSig1;
	720	}
	721	}
	722	if ( increment ) {
	723	++zSig0;
	724	zSig0 &=
	725	~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
	726	if ( (sbits64) zSig0 < 0 ) zExp = 1;
	727	}
	728	return packFloatx80( zSign, zExp, zSig0 );
	729	}
	730	}
	731	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	732	if ( increment ) {
	733	++zSig0;
	734	if ( zSig0 == 0 ) {
	735	++zExp;
	736	zSig0 = LIT64( 0x8000000000000000 );
	737	}
	738	else {
	739	zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
	740	}
	741	}
	742	else {
	743	if ( zSig0 == 0 ) zExp = 0;
	744	}
	745	return packFloatx80( zSign, zExp, zSig0 );
	746
	747	}
	748
	749	/*----------------------------------------------------------------------------
	750	\| Takes an abstract floating-point value having sign `zSign', exponent
	751	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
	752	\| and returns the proper extended double-precision floating-point value
	753	\| corresponding to the abstract input. This routine is just like
	754	\| `roundAndPackFloatx80' except that the input significand does not have to be
	755	\| normalized.
	756	----------------------------------------------------------------------------/
	757
	758	static floatx80
	759	normalizeRoundAndPackFloatx80(
	760	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
	761	STATUS_PARAM)
	762	{
	763	int8 shiftCount;
	764
	765	if ( zSig0 == 0 ) {
	766	zSig0 = zSig1;
	767	zSig1 = 0;
	768	zExp -= 64;
	769	}
	770	shiftCount = countLeadingZeros64( zSig0 );
	771	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
	772	zExp -= shiftCount;
	773	return
	774	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
	775
	776	}
	777
	778	#endif
	779
	780	#ifdef FLOAT128
	781
	782	/*----------------------------------------------------------------------------
	783	\| Returns the least-significant 64 fraction bits of the quadruple-precision
	784	\| floating-point value `a'.
	785	----------------------------------------------------------------------------/
	786
	787	INLINE bits64 extractFloat128Frac1( float128 a )
	788	{
	789
	790	return a.low;
	791
	792	}
	793
	794	/*----------------------------------------------------------------------------
	795	\| Returns the most-significant 48 fraction bits of the quadruple-precision
	796	\| floating-point value `a'.
	797	----------------------------------------------------------------------------/
	798
	799	INLINE bits64 extractFloat128Frac0( float128 a )
	800	{
	801
	802	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
	803
	804	}
	805
	806	/*----------------------------------------------------------------------------
	807	\| Returns the exponent bits of the quadruple-precision floating-point value
	808	\| `a'.
	809	----------------------------------------------------------------------------/
	810
	811	INLINE int32 extractFloat128Exp( float128 a )
	812	{
	813
	814	return ( a.high>>48 ) & 0x7FFF;
	815
	816	}
	817
	818	/*----------------------------------------------------------------------------
	819	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
	820	----------------------------------------------------------------------------/
	821
	822	INLINE flag extractFloat128Sign( float128 a )
	823	{
	824
	825	return a.high>>63;
	826
	827	}
	828
	829	/*----------------------------------------------------------------------------
	830	\| Normalizes the subnormal quadruple-precision floating-point value
	831	\| represented by the denormalized significand formed by the concatenation of
	832	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
	833	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
	834	\| significand are stored at the location pointed to by `zSig0Ptr', and the
	835	\| least significant 64 bits of the normalized significand are stored at the
	836	\| location pointed to by `zSig1Ptr'.
	837	----------------------------------------------------------------------------/
	838
	839	static void
	840	normalizeFloat128Subnormal(
	841	bits64 aSig0,
	842	bits64 aSig1,
	843	int32 *zExpPtr,
	844	bits64 *zSig0Ptr,
	845	bits64 *zSig1Ptr
	846	)
	847	{
	848	int8 shiftCount;
	849
	850	if ( aSig0 == 0 ) {
	851	shiftCount = countLeadingZeros64( aSig1 ) - 15;
	852	if ( shiftCount < 0 ) {
	853	*zSig0Ptr = aSig1>>( - shiftCount );
	854	*zSig1Ptr = aSig1<<( shiftCount & 63 );
	855	}
	856	else {
	857	*zSig0Ptr = aSig1<<shiftCount;
	858	*zSig1Ptr = 0;
	859	}
	860	*zExpPtr = - shiftCount - 63;
	861	}
	862	else {
	863	shiftCount = countLeadingZeros64( aSig0 ) - 15;
	864	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
	865	*zExpPtr = 1 - shiftCount;
	866	}
	867
	868	}
	869
	870	/*----------------------------------------------------------------------------
	871	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
	872	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
	873	\| floating-point value, returning the result. After being shifted into the
	874	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
	875	\| added together to form the most significant 32 bits of the result. This
	876	\| means that any integer portion of `zSig0' will be added into the exponent.
	877	\| Since a properly normalized significand will have an integer portion equal
	878	\| to 1, the `zExp' input should be 1 less than the desired result exponent
	879	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
	880	\| significand.
	881	----------------------------------------------------------------------------/
	882
	883	INLINE float128
	884	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
	885	{
	886	float128 z;
	887
	888	z.low = zSig1;
	889	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
	890	return z;
	891
	892	}
	893
	894	/*----------------------------------------------------------------------------
	895	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	896	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
	897	\| and `zSig2', and returns the proper quadruple-precision floating-point value
	898	\| corresponding to the abstract input. Ordinarily, the abstract value is
	899	\| simply rounded and packed into the quadruple-precision format, with the
	900	\| inexact exception raised if the abstract input cannot be represented
	901	\| exactly. However, if the abstract value is too large, the overflow and
	902	\| inexact exceptions are raised and an infinity or maximal finite value is
	903	\| returned. If the abstract value is too small, the input value is rounded to
	904	\| a subnormal number, and the underflow and inexact exceptions are raised if
	905	\| the abstract input cannot be represented exactly as a subnormal quadruple-
	906	\| precision floating-point number.
	907	\| The input significand must be normalized or smaller. If the input
	908	\| significand is not normalized, `zExp' must be 0; in that case, the result
	909	\| returned is a subnormal number, and it must not require rounding. In the
	910	\| usual case that the input significand is normalized, `zExp' must be 1 less
	911	\| than the ``true'' floating-point exponent. The handling of underflow and
	912	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	913	----------------------------------------------------------------------------/
	914
	915	static float128
	916	roundAndPackFloat128(
	917	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
	918	{
	919	int8 roundingMode;
	920	flag roundNearestEven, increment, isTiny;
	921
	922	roundingMode = STATUS(float_rounding_mode);
	923	roundNearestEven = ( roundingMode == float_round_nearest_even );
	924	increment = ( (sbits64) zSig2 < 0 );
	925	if ( ! roundNearestEven ) {
	926	if ( roundingMode == float_round_to_zero ) {
	927	increment = 0;
	928	}
	929	else {
	930	if ( zSign ) {
	931	increment = ( roundingMode == float_round_down ) && zSig2;
	932	}
	933	else {
	934	increment = ( roundingMode == float_round_up ) && zSig2;
	935	}
	936	}
	937	}
	938	if ( 0x7FFD <= (bits32) zExp ) {
	939	if ( ( 0x7FFD < zExp )
	940	\|\| ( ( zExp == 0x7FFD )
	941	&& eq128(
	942	LIT64( 0x0001FFFFFFFFFFFF ),
	943	LIT64( 0xFFFFFFFFFFFFFFFF ),
	944	zSig0,
	945	zSig1
	946	)
	947	&& increment
	948	)
	949	) {
	950	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
	951	if ( ( roundingMode == float_round_to_zero )
	952	\|\| ( zSign && ( roundingMode == float_round_up ) )
	953	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
	954	) {
	955	return
	956	packFloat128(
	957	zSign,
	958	0x7FFE,
	959	LIT64( 0x0000FFFFFFFFFFFF ),
	960	LIT64( 0xFFFFFFFFFFFFFFFF )
	961	);
	962	}
	963	return packFloat128( zSign, 0x7FFF, 0, 0 );
	964	}
	965	if ( zExp < 0 ) {
	966	isTiny =
	967	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
	968	\|\| ( zExp < -1 )
	969	\|\| ! increment
	970	\|\| lt128(
	971	zSig0,
	972	zSig1,
	973	LIT64( 0x0001FFFFFFFFFFFF ),
	974	LIT64( 0xFFFFFFFFFFFFFFFF )
	975	);
	976	shift128ExtraRightJamming(
	977	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
	978	zExp = 0;
	979	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
	980	if ( roundNearestEven ) {
	981	increment = ( (sbits64) zSig2 < 0 );
	982	}
	983	else {
	984	if ( zSign ) {
	985	increment = ( roundingMode == float_round_down ) && zSig2;
	986	}
	987	else {
	988	increment = ( roundingMode == float_round_up ) && zSig2;
	989	}
	990	}
	991	}
	992	}
	993	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	994	if ( increment ) {
	995	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
	996	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
	997	}
	998	else {
	999	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
	1000	}
	1001	return packFloat128( zSign, zExp, zSig0, zSig1 );
	1002
	1003	}
	1004
	1005	/*----------------------------------------------------------------------------
	1006	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
	1007	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
	1008	\| returns the proper quadruple-precision floating-point value corresponding
	1009	\| to the abstract input. This routine is just like `roundAndPackFloat128'
	1010	\| except that the input significand has fewer bits and does not have to be
	1011	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
	1012	\| point exponent.
	1013	----------------------------------------------------------------------------/
	1014
	1015	static float128
	1016	normalizeRoundAndPackFloat128(
	1017	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
	1018	{
	1019	int8 shiftCount;
	1020	bits64 zSig2;
	1021
	1022	if ( zSig0 == 0 ) {
	1023	zSig0 = zSig1;
	1024	zSig1 = 0;
	1025	zExp -= 64;
	1026	}
	1027	shiftCount = countLeadingZeros64( zSig0 ) - 15;
	1028	if ( 0 <= shiftCount ) {
	1029	zSig2 = 0;
	1030	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
	1031	}
	1032	else {
	1033	shift128ExtraRightJamming(
	1034	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
	1035	}
	1036	zExp -= shiftCount;
	1037	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
	1038
	1039	}
	1040
	1041	#endif
	1042
	1043	/*----------------------------------------------------------------------------
	1044	\| Returns the result of converting the 32-bit two's complement integer `a'
	1045	\| to the single-precision floating-point format. The conversion is performed
	1046	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1047	----------------------------------------------------------------------------/
	1048
	1049	float32 int32_to_float32( int32 a STATUS_PARAM )
	1050	{
	1051	flag zSign;
	1052
	1053	if ( a == 0 ) return 0;
	1054	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
	1055	zSign = ( a < 0 );
	1056	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
	1057
	1058	}
	1059
	1060	/*----------------------------------------------------------------------------
	1061	\| Returns the result of converting the 32-bit two's complement integer `a'
	1062	\| to the double-precision floating-point format. The conversion is performed
	1063	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1064	----------------------------------------------------------------------------/
	1065
	1066	float64 int32_to_float64( int32 a STATUS_PARAM )
	1067	{
	1068	flag zSign;
	1069	uint32 absA;
	1070	int8 shiftCount;
	1071	bits64 zSig;
	1072
	1073	if ( a == 0 ) return 0;
	1074	zSign = ( a < 0 );
	1075	absA = zSign ? - a : a;
	1076	shiftCount = countLeadingZeros32( absA ) + 21;
	1077	zSig = absA;
	1078	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
	1079
	1080	}
	1081
	1082	#ifdef FLOATX80
	1083
	1084	/*----------------------------------------------------------------------------
	1085	\| Returns the result of converting the 32-bit two's complement integer `a'
	1086	\| to the extended double-precision floating-point format. The conversion
	1087	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	1088	\| Arithmetic.
	1089	----------------------------------------------------------------------------/
	1090
	1091	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
	1092	{
	1093	flag zSign;
	1094	uint32 absA;
	1095	int8 shiftCount;
	1096	bits64 zSig;
	1097
	1098	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
	1099	zSign = ( a < 0 );
	1100	absA = zSign ? - a : a;
	1101	shiftCount = countLeadingZeros32( absA ) + 32;
	1102	zSig = absA;
	1103	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
	1104
	1105	}
	1106
	1107	#endif
	1108
	1109	#ifdef FLOAT128
	1110
	1111	/*----------------------------------------------------------------------------
	1112	\| Returns the result of converting the 32-bit two's complement integer `a' to
	1113	\| the quadruple-precision floating-point format. The conversion is performed
	1114	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1115	----------------------------------------------------------------------------/
	1116
	1117	float128 int32_to_float128( int32 a STATUS_PARAM )
	1118	{
	1119	flag zSign;
	1120	uint32 absA;
	1121	int8 shiftCount;
	1122	bits64 zSig0;
	1123
	1124	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
	1125	zSign = ( a < 0 );
	1126	absA = zSign ? - a : a;
	1127	shiftCount = countLeadingZeros32( absA ) + 17;
	1128	zSig0 = absA;
	1129	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
	1130
	1131	}
	1132
	1133	#endif
	1134
	1135	/*----------------------------------------------------------------------------
	1136	\| Returns the result of converting the 64-bit two's complement integer `a'
	1137	\| to the single-precision floating-point format. The conversion is performed
	1138	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1139	----------------------------------------------------------------------------/
	1140
	1141	float32 int64_to_float32( int64 a STATUS_PARAM )
	1142	{
	1143	flag zSign;
	1144	uint64 absA;
	1145	int8 shiftCount;
	1146
	1147	if ( a == 0 ) return 0;
	1148	zSign = ( a < 0 );
	1149	absA = zSign ? - a : a;
	1150	shiftCount = countLeadingZeros64( absA ) - 40;
	1151	if ( 0 <= shiftCount ) {
	1152	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
	1153	}
	1154	else {
	1155	shiftCount += 7;
	1156	if ( shiftCount < 0 ) {
	1157	shift64RightJamming( absA, - shiftCount, &absA );
	1158	}
	1159	else {
	1160	absA <<= shiftCount;
	1161	}
	1162	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
	1163	}
	1164
	1165	}
	1166
	1167	/*----------------------------------------------------------------------------
	1168	\| Returns the result of converting the 64-bit two's complement integer `a'
	1169	\| to the double-precision floating-point format. The conversion is performed
	1170	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1171	----------------------------------------------------------------------------/
	1172
	1173	float64 int64_to_float64( int64 a STATUS_PARAM )
	1174	{
	1175	flag zSign;
	1176
	1177	if ( a == 0 ) return 0;
	1178	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
	1179	return packFloat64( 1, 0x43E, 0 );
	1180	}
	1181	zSign = ( a < 0 );
	1182	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
	1183
	1184	}
	1185
	1186	#ifdef FLOATX80
	1187
	1188	/*----------------------------------------------------------------------------
	1189	\| Returns the result of converting the 64-bit two's complement integer `a'
	1190	\| to the extended double-precision floating-point format. The conversion
	1191	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	1192	\| Arithmetic.
	1193	----------------------------------------------------------------------------/
	1194
	1195	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
	1196	{
	1197	flag zSign;
	1198	uint64 absA;
	1199	int8 shiftCount;
	1200
	1201	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
	1202	zSign = ( a < 0 );
	1203	absA = zSign ? - a : a;
	1204	shiftCount = countLeadingZeros64( absA );
	1205	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
	1206
	1207	}
	1208
	1209	#endif
	1210
	1211	#ifdef FLOAT128
	1212
	1213	/*----------------------------------------------------------------------------
	1214	\| Returns the result of converting the 64-bit two's complement integer `a' to
	1215	\| the quadruple-precision floating-point format. The conversion is performed
	1216	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1217	----------------------------------------------------------------------------/
	1218
	1219	float128 int64_to_float128( int64 a STATUS_PARAM )
	1220	{
	1221	flag zSign;
	1222	uint64 absA;
	1223	int8 shiftCount;
	1224	int32 zExp;
	1225	bits64 zSig0, zSig1;
	1226
	1227	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
	1228	zSign = ( a < 0 );
	1229	absA = zSign ? - a : a;
	1230	shiftCount = countLeadingZeros64( absA ) + 49;
	1231	zExp = 0x406E - shiftCount;
	1232	if ( 64 <= shiftCount ) {
	1233	zSig1 = 0;
	1234	zSig0 = absA;
	1235	shiftCount -= 64;
	1236	}
	1237	else {
	1238	zSig1 = absA;
	1239	zSig0 = 0;
	1240	}
	1241	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
	1242	return packFloat128( zSign, zExp, zSig0, zSig1 );
	1243
	1244	}
	1245
	1246	#endif
	1247
	1248	/*----------------------------------------------------------------------------
	1249	\| Returns the result of converting the single-precision floating-point value
	1250	\| `a' to the 32-bit two's complement integer format. The conversion is
	1251	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1252	\| Arithmetic---which means in particular that the conversion is rounded
	1253	\| according to the current rounding mode. If `a' is a NaN, the largest
	1254	\| positive integer is returned. Otherwise, if the conversion overflows, the
	1255	\| largest integer with the same sign as `a' is returned.
	1256	----------------------------------------------------------------------------/
	1257
	1258	int32 float32_to_int32( float32 a STATUS_PARAM )
	1259	{
	1260	flag aSign;
	1261	int16 aExp, shiftCount;
	1262	bits32 aSig;
	1263	bits64 aSig64;
	1264
	1265	aSig = extractFloat32Frac( a );
	1266	aExp = extractFloat32Exp( a );
	1267	aSign = extractFloat32Sign( a );
	1268	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
	1269	if ( aExp ) aSig \|= 0x00800000;
	1270	shiftCount = 0xAF - aExp;
	1271	aSig64 = aSig;
	1272	aSig64 <<= 32;
	1273	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
	1274	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
	1275
	1276	}
	1277
	1278	/*----------------------------------------------------------------------------
	1279	\| Returns the result of converting the single-precision floating-point value
	1280	\| `a' to the 32-bit two's complement integer format. The conversion is
	1281	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1282	\| Arithmetic, except that the conversion is always rounded toward zero.
	1283	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
	1284	\| the conversion overflows, the largest integer with the same sign as `a' is
	1285	\| returned.
	1286	----------------------------------------------------------------------------/
	1287
	1288	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
	1289	{
	1290	flag aSign;
	1291	int16 aExp, shiftCount;
	1292	bits32 aSig;
	1293	int32 z;
	1294
	1295	aSig = extractFloat32Frac( a );
	1296	aExp = extractFloat32Exp( a );
	1297	aSign = extractFloat32Sign( a );
	1298	shiftCount = aExp - 0x9E;
	1299	if ( 0 <= shiftCount ) {
	1300	if ( a != 0xCF000000 ) {
	1301	float_raise( float_flag_invalid STATUS_VAR);
	1302	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
	1303	}
	1304	return (sbits32) 0x80000000;
	1305	}
	1306	else if ( aExp <= 0x7E ) {
	1307	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	1308	return 0;
	1309	}
	1310	aSig = ( aSig \| 0x00800000 )<<8;
	1311	z = aSig>>( - shiftCount );
	1312	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
	1313	STATUS(float_exception_flags) \|= float_flag_inexact;
	1314	}
	1315	if ( aSign ) z = - z;
	1316	return z;
	1317
	1318	}
	1319
	1320	/*----------------------------------------------------------------------------
	1321	\| Returns the result of converting the single-precision floating-point value
	1322	\| `a' to the 64-bit two's complement integer format. The conversion is
	1323	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1324	\| Arithmetic---which means in particular that the conversion is rounded
	1325	\| according to the current rounding mode. If `a' is a NaN, the largest
	1326	\| positive integer is returned. Otherwise, if the conversion overflows, the
	1327	\| largest integer with the same sign as `a' is returned.
	1328	----------------------------------------------------------------------------/
	1329
	1330	int64 float32_to_int64( float32 a STATUS_PARAM )
	1331	{
	1332	flag aSign;
	1333	int16 aExp, shiftCount;
	1334	bits32 aSig;
	1335	bits64 aSig64, aSigExtra;
	1336
	1337	aSig = extractFloat32Frac( a );
	1338	aExp = extractFloat32Exp( a );
	1339	aSign = extractFloat32Sign( a );
	1340	shiftCount = 0xBE - aExp;
	1341	if ( shiftCount < 0 ) {
	1342	float_raise( float_flag_invalid STATUS_VAR);
	1343	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
	1344	return LIT64( 0x7FFFFFFFFFFFFFFF );
	1345	}
	1346	return (sbits64) LIT64( 0x8000000000000000 );
	1347	}
	1348	if ( aExp ) aSig \|= 0x00800000;
	1349	aSig64 = aSig;
	1350	aSig64 <<= 40;
	1351	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
	1352	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
	1353
	1354	}
	1355
	1356	/*----------------------------------------------------------------------------
	1357	\| Returns the result of converting the single-precision floating-point value
	1358	\| `a' to the 64-bit two's complement integer format. The conversion is
	1359	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1360	\| Arithmetic, except that the conversion is always rounded toward zero. If
	1361	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
	1362	\| conversion overflows, the largest integer with the same sign as `a' is
	1363	\| returned.
	1364	----------------------------------------------------------------------------/
	1365
	1366	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
	1367	{
	1368	flag aSign;
	1369	int16 aExp, shiftCount;
	1370	bits32 aSig;
	1371	bits64 aSig64;
	1372	int64 z;
	1373
	1374	aSig = extractFloat32Frac( a );
	1375	aExp = extractFloat32Exp( a );
	1376	aSign = extractFloat32Sign( a );
	1377	shiftCount = aExp - 0xBE;
	1378	if ( 0 <= shiftCount ) {
	1379	if ( a != 0xDF000000 ) {
	1380	float_raise( float_flag_invalid STATUS_VAR);
	1381	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
	1382	return LIT64( 0x7FFFFFFFFFFFFFFF );
	1383	}
	1384	}
	1385	return (sbits64) LIT64( 0x8000000000000000 );
	1386	}
	1387	else if ( aExp <= 0x7E ) {
	1388	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	1389	return 0;
	1390	}
	1391	aSig64 = aSig \| 0x00800000;
	1392	aSig64 <<= 40;
	1393	z = aSig64>>( - shiftCount );
	1394	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
	1395	STATUS(float_exception_flags) \|= float_flag_inexact;
	1396	}
	1397	if ( aSign ) z = - z;
	1398	return z;
	1399
	1400	}
	1401
	1402	/*----------------------------------------------------------------------------
	1403	\| Returns the result of converting the single-precision floating-point value
	1404	\| `a' to the double-precision floating-point format. The conversion is
	1405	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1406	\| Arithmetic.
	1407	----------------------------------------------------------------------------/
	1408
	1409	float64 float32_to_float64( float32 a STATUS_PARAM )
	1410	{
	1411	flag aSign;
	1412	int16 aExp;
	1413	bits32 aSig;
	1414
	1415	aSig = extractFloat32Frac( a );
	1416	aExp = extractFloat32Exp( a );
	1417	aSign = extractFloat32Sign( a );
	1418	if ( aExp == 0xFF ) {
	1419	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
	1420	return packFloat64( aSign, 0x7FF, 0 );
	1421	}
	1422	if ( aExp == 0 ) {
	1423	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
	1424	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1425	--aExp;
	1426	}
	1427	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
	1428
	1429	}
	1430
	1431	#ifdef FLOATX80
	1432
	1433	/*----------------------------------------------------------------------------
	1434	\| Returns the result of converting the single-precision floating-point value
	1435	\| `a' to the extended double-precision floating-point format. The conversion
	1436	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	1437	\| Arithmetic.
	1438	----------------------------------------------------------------------------/
	1439
	1440	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
	1441	{
	1442	flag aSign;
	1443	int16 aExp;
	1444	bits32 aSig;
	1445
	1446	aSig = extractFloat32Frac( a );
	1447	aExp = extractFloat32Exp( a );
	1448	aSign = extractFloat32Sign( a );
	1449	if ( aExp == 0xFF ) {
	1450	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
	1451	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	1452	}
	1453	if ( aExp == 0 ) {
	1454	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
	1455	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1456	}
	1457	aSig \|= 0x00800000;
	1458	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
	1459
	1460	}
	1461
	1462	#endif
	1463
	1464	#ifdef FLOAT128
	1465
	1466	/*----------------------------------------------------------------------------
	1467	\| Returns the result of converting the single-precision floating-point value
	1468	\| `a' to the double-precision floating-point format. The conversion is
	1469	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	1470	\| Arithmetic.
	1471	----------------------------------------------------------------------------/
	1472
	1473	float128 float32_to_float128( float32 a STATUS_PARAM )
	1474	{
	1475	flag aSign;
	1476	int16 aExp;
	1477	bits32 aSig;
	1478
	1479	aSig = extractFloat32Frac( a );
	1480	aExp = extractFloat32Exp( a );
	1481	aSign = extractFloat32Sign( a );
	1482	if ( aExp == 0xFF ) {
	1483	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
	1484	return packFloat128( aSign, 0x7FFF, 0, 0 );
	1485	}
	1486	if ( aExp == 0 ) {
	1487	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
	1488	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1489	--aExp;
	1490	}
	1491	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
	1492
	1493	}
	1494
	1495	#endif
	1496
	1497	/*----------------------------------------------------------------------------
	1498	\| Rounds the single-precision floating-point value `a' to an integer, and
	1499	\| returns the result as a single-precision floating-point value. The
	1500	\| operation is performed according to the IEC/IEEE Standard for Binary
	1501	\| Floating-Point Arithmetic.
	1502	----------------------------------------------------------------------------/
	1503
	1504	float32 float32_round_to_int( float32 a STATUS_PARAM)
	1505	{
	1506	flag aSign;
	1507	int16 aExp;
	1508	bits32 lastBitMask, roundBitsMask;
	1509	int8 roundingMode;
	1510	float32 z;
	1511
	1512	aExp = extractFloat32Exp( a );
	1513	if ( 0x96 <= aExp ) {
	1514	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
	1515	return propagateFloat32NaN( a, a STATUS_VAR );
	1516	}
	1517	return a;
	1518	}
	1519	if ( aExp <= 0x7E ) {
	1520	if ( (bits32) ( a<<1 ) == 0 ) return a;
	1521	STATUS(float_exception_flags) \|= float_flag_inexact;
	1522	aSign = extractFloat32Sign( a );
	1523	switch ( STATUS(float_rounding_mode) ) {
	1524	case float_round_nearest_even:
	1525	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
	1526	return packFloat32( aSign, 0x7F, 0 );
	1527	}
	1528	break;
	1529	case float_round_down:
	1530	return aSign ? 0xBF800000 : 0;
	1531	case float_round_up:
	1532	return aSign ? 0x80000000 : 0x3F800000;
	1533	}
	1534	return packFloat32( aSign, 0, 0 );
	1535	}
	1536	lastBitMask = 1;
	1537	lastBitMask <<= 0x96 - aExp;
	1538	roundBitsMask = lastBitMask - 1;
	1539	z = a;
	1540	roundingMode = STATUS(float_rounding_mode);
	1541	if ( roundingMode == float_round_nearest_even ) {
	1542	z += lastBitMask>>1;
	1543	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
	1544	}
	1545	else if ( roundingMode != float_round_to_zero ) {
	1546	if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
	1547	z += roundBitsMask;
	1548	}
	1549	}
	1550	z &= ~ roundBitsMask;
	1551	if ( z != a ) STATUS(float_exception_flags) \|= float_flag_inexact;
	1552	return z;
	1553
	1554	}
	1555
	1556	/*----------------------------------------------------------------------------
	1557	\| Returns the result of adding the absolute values of the single-precision
	1558	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
	1559	\| before being returned. `zSign' is ignored if the result is a NaN.
	1560	\| The addition is performed according to the IEC/IEEE Standard for Binary
	1561	\| Floating-Point Arithmetic.
	1562	----------------------------------------------------------------------------/
	1563
	1564	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
	1565	{
	1566	int16 aExp, bExp, zExp;
	1567	bits32 aSig, bSig, zSig;
	1568	int16 expDiff;
	1569
	1570	aSig = extractFloat32Frac( a );
	1571	aExp = extractFloat32Exp( a );
	1572	bSig = extractFloat32Frac( b );
	1573	bExp = extractFloat32Exp( b );
	1574	expDiff = aExp - bExp;
	1575	aSig <<= 6;
	1576	bSig <<= 6;
	1577	if ( 0 < expDiff ) {
	1578	if ( aExp == 0xFF ) {
	1579	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1580	return a;
	1581	}
	1582	if ( bExp == 0 ) {
	1583	--expDiff;
	1584	}
	1585	else {
	1586	bSig \|= 0x20000000;
	1587	}
	1588	shift32RightJamming( bSig, expDiff, &bSig );
	1589	zExp = aExp;
	1590	}
	1591	else if ( expDiff < 0 ) {
	1592	if ( bExp == 0xFF ) {
	1593	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1594	return packFloat32( zSign, 0xFF, 0 );
	1595	}
	1596	if ( aExp == 0 ) {
	1597	++expDiff;
	1598	}
	1599	else {
	1600	aSig \|= 0x20000000;
	1601	}
	1602	shift32RightJamming( aSig, - expDiff, &aSig );
	1603	zExp = bExp;
	1604	}
	1605	else {
	1606	if ( aExp == 0xFF ) {
	1607	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1608	return a;
	1609	}
	1610	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
	1611	zSig = 0x40000000 + aSig + bSig;
	1612	zExp = aExp;
	1613	goto roundAndPack;
	1614	}
	1615	aSig \|= 0x20000000;
	1616	zSig = ( aSig + bSig )<<1;
	1617	--zExp;
	1618	if ( (sbits32) zSig < 0 ) {
	1619	zSig = aSig + bSig;
	1620	++zExp;
	1621	}
	1622	roundAndPack:
	1623	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
	1624
	1625	}
	1626
	1627	/*----------------------------------------------------------------------------
	1628	\| Returns the result of subtracting the absolute values of the single-
	1629	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
	1630	\| difference is negated before being returned. `zSign' is ignored if the
	1631	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
	1632	\| Standard for Binary Floating-Point Arithmetic.
	1633	----------------------------------------------------------------------------/
	1634
	1635	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
	1636	{
	1637	int16 aExp, bExp, zExp;
	1638	bits32 aSig, bSig, zSig;
	1639	int16 expDiff;
	1640
	1641	aSig = extractFloat32Frac( a );
	1642	aExp = extractFloat32Exp( a );
	1643	bSig = extractFloat32Frac( b );
	1644	bExp = extractFloat32Exp( b );
	1645	expDiff = aExp - bExp;
	1646	aSig <<= 7;
	1647	bSig <<= 7;
	1648	if ( 0 < expDiff ) goto aExpBigger;
	1649	if ( expDiff < 0 ) goto bExpBigger;
	1650	if ( aExp == 0xFF ) {
	1651	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1652	float_raise( float_flag_invalid STATUS_VAR);
	1653	return float32_default_nan;
	1654	}
	1655	if ( aExp == 0 ) {
	1656	aExp = 1;
	1657	bExp = 1;
	1658	}
	1659	if ( bSig < aSig ) goto aBigger;
	1660	if ( aSig < bSig ) goto bBigger;
	1661	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
	1662	bExpBigger:
	1663	if ( bExp == 0xFF ) {
	1664	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1665	return packFloat32( zSign ^ 1, 0xFF, 0 );
	1666	}
	1667	if ( aExp == 0 ) {
	1668	++expDiff;
	1669	}
	1670	else {
	1671	aSig \|= 0x40000000;
	1672	}
	1673	shift32RightJamming( aSig, - expDiff, &aSig );
	1674	bSig \|= 0x40000000;
	1675	bBigger:
	1676	zSig = bSig - aSig;
	1677	zExp = bExp;
	1678	zSign ^= 1;
	1679	goto normalizeRoundAndPack;
	1680	aExpBigger:
	1681	if ( aExp == 0xFF ) {
	1682	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1683	return a;
	1684	}
	1685	if ( bExp == 0 ) {
	1686	--expDiff;
	1687	}
	1688	else {
	1689	bSig \|= 0x40000000;
	1690	}
	1691	shift32RightJamming( bSig, expDiff, &bSig );
	1692	aSig \|= 0x40000000;
	1693	aBigger:
	1694	zSig = aSig - bSig;
	1695	zExp = aExp;
	1696	normalizeRoundAndPack:
	1697	--zExp;
	1698	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
	1699
	1700	}
	1701
	1702	/*----------------------------------------------------------------------------
	1703	\| Returns the result of adding the single-precision floating-point values `a'
	1704	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
	1705	\| Binary Floating-Point Arithmetic.
	1706	----------------------------------------------------------------------------/
	1707
	1708	float32 float32_add( float32 a, float32 b STATUS_PARAM )
	1709	{
	1710	flag aSign, bSign;
	1711
	1712	aSign = extractFloat32Sign( a );
	1713	bSign = extractFloat32Sign( b );
	1714	if ( aSign == bSign ) {
	1715	return addFloat32Sigs( a, b, aSign STATUS_VAR);
	1716	}
	1717	else {
	1718	return subFloat32Sigs( a, b, aSign STATUS_VAR );
	1719	}
	1720
	1721	}
	1722
	1723	/*----------------------------------------------------------------------------
	1724	\| Returns the result of subtracting the single-precision floating-point values
	1725	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
	1726	\| for Binary Floating-Point Arithmetic.
	1727	----------------------------------------------------------------------------/
	1728
	1729	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
	1730	{
	1731	flag aSign, bSign;
	1732
	1733	aSign = extractFloat32Sign( a );
	1734	bSign = extractFloat32Sign( b );
	1735	if ( aSign == bSign ) {
	1736	return subFloat32Sigs( a, b, aSign STATUS_VAR );
	1737	}
	1738	else {
	1739	return addFloat32Sigs( a, b, aSign STATUS_VAR );
	1740	}
	1741
	1742	}
	1743
	1744	/*----------------------------------------------------------------------------
	1745	\| Returns the result of multiplying the single-precision floating-point values
	1746	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
	1747	\| for Binary Floating-Point Arithmetic.
	1748	----------------------------------------------------------------------------/
	1749
	1750	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
	1751	{
	1752	flag aSign, bSign, zSign;
	1753	int16 aExp, bExp, zExp;
	1754	bits32 aSig, bSig;
	1755	bits64 zSig64;
	1756	bits32 zSig;
	1757
	1758	aSig = extractFloat32Frac( a );
	1759	aExp = extractFloat32Exp( a );
	1760	aSign = extractFloat32Sign( a );
	1761	bSig = extractFloat32Frac( b );
	1762	bExp = extractFloat32Exp( b );
	1763	bSign = extractFloat32Sign( b );
	1764	zSign = aSign ^ bSign;
	1765	if ( aExp == 0xFF ) {
	1766	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
	1767	return propagateFloat32NaN( a, b STATUS_VAR );
	1768	}
	1769	if ( ( bExp \| bSig ) == 0 ) {
	1770	float_raise( float_flag_invalid STATUS_VAR);
	1771	return float32_default_nan;
	1772	}
	1773	return packFloat32( zSign, 0xFF, 0 );
	1774	}
	1775	if ( bExp == 0xFF ) {
	1776	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1777	if ( ( aExp \| aSig ) == 0 ) {
	1778	float_raise( float_flag_invalid STATUS_VAR);
	1779	return float32_default_nan;
	1780	}
	1781	return packFloat32( zSign, 0xFF, 0 );
	1782	}
	1783	if ( aExp == 0 ) {
	1784	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
	1785	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1786	}
	1787	if ( bExp == 0 ) {
	1788	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
	1789	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
	1790	}
	1791	zExp = aExp + bExp - 0x7F;
	1792	aSig = ( aSig \| 0x00800000 )<<7;
	1793	bSig = ( bSig \| 0x00800000 )<<8;
	1794	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
	1795	zSig = zSig64;
	1796	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
	1797	zSig <<= 1;
	1798	--zExp;
	1799	}
	1800	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
	1801
	1802	}
	1803
	1804	/*----------------------------------------------------------------------------
	1805	\| Returns the result of dividing the single-precision floating-point value `a'
	1806	\| by the corresponding value `b'. The operation is performed according to the
	1807	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1808	----------------------------------------------------------------------------/
	1809
	1810	float32 float32_div( float32 a, float32 b STATUS_PARAM )
	1811	{
	1812	flag aSign, bSign, zSign;
	1813	int16 aExp, bExp, zExp;
	1814	bits32 aSig, bSig, zSig;
	1815
	1816	aSig = extractFloat32Frac( a );
	1817	aExp = extractFloat32Exp( a );
	1818	aSign = extractFloat32Sign( a );
	1819	bSig = extractFloat32Frac( b );
	1820	bExp = extractFloat32Exp( b );
	1821	bSign = extractFloat32Sign( b );
	1822	zSign = aSign ^ bSign;
	1823	if ( aExp == 0xFF ) {
	1824	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1825	if ( bExp == 0xFF ) {
	1826	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1827	float_raise( float_flag_invalid STATUS_VAR);
	1828	return float32_default_nan;
	1829	}
	1830	return packFloat32( zSign, 0xFF, 0 );
	1831	}
	1832	if ( bExp == 0xFF ) {
	1833	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1834	return packFloat32( zSign, 0, 0 );
	1835	}
	1836	if ( bExp == 0 ) {
	1837	if ( bSig == 0 ) {
	1838	if ( ( aExp \| aSig ) == 0 ) {
	1839	float_raise( float_flag_invalid STATUS_VAR);
	1840	return float32_default_nan;
	1841	}
	1842	float_raise( float_flag_divbyzero STATUS_VAR);
	1843	return packFloat32( zSign, 0xFF, 0 );
	1844	}
	1845	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
	1846	}
	1847	if ( aExp == 0 ) {
	1848	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
	1849	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1850	}
	1851	zExp = aExp - bExp + 0x7D;
	1852	aSig = ( aSig \| 0x00800000 )<<7;
	1853	bSig = ( bSig \| 0x00800000 )<<8;
	1854	if ( bSig <= ( aSig + aSig ) ) {
	1855	aSig >>= 1;
	1856	++zExp;
	1857	}
	1858	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
	1859	if ( ( zSig & 0x3F ) == 0 ) {
	1860	zSig \|= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
	1861	}
	1862	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
	1863
	1864	}
	1865
	1866	/*----------------------------------------------------------------------------
	1867	\| Returns the remainder of the single-precision floating-point value `a'
	1868	\| with respect to the corresponding value `b'. The operation is performed
	1869	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	1870	----------------------------------------------------------------------------/
	1871
	1872	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
	1873	{
	1874	flag aSign, bSign, zSign;
	1875	int16 aExp, bExp, expDiff;
	1876	bits32 aSig, bSig;
	1877	bits32 q;
	1878	bits64 aSig64, bSig64, q64;
	1879	bits32 alternateASig;
	1880	sbits32 sigMean;
	1881
	1882	aSig = extractFloat32Frac( a );
	1883	aExp = extractFloat32Exp( a );
	1884	aSign = extractFloat32Sign( a );
	1885	bSig = extractFloat32Frac( b );
	1886	bExp = extractFloat32Exp( b );
	1887	bSign = extractFloat32Sign( b );
	1888	if ( aExp == 0xFF ) {
	1889	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
	1890	return propagateFloat32NaN( a, b STATUS_VAR );
	1891	}
	1892	float_raise( float_flag_invalid STATUS_VAR);
	1893	return float32_default_nan;
	1894	}
	1895	if ( bExp == 0xFF ) {
	1896	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
	1897	return a;
	1898	}
	1899	if ( bExp == 0 ) {
	1900	if ( bSig == 0 ) {
	1901	float_raise( float_flag_invalid STATUS_VAR);
	1902	return float32_default_nan;
	1903	}
	1904	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
	1905	}
	1906	if ( aExp == 0 ) {
	1907	if ( aSig == 0 ) return a;
	1908	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1909	}
	1910	expDiff = aExp - bExp;
	1911	aSig \|= 0x00800000;
	1912	bSig \|= 0x00800000;
	1913	if ( expDiff < 32 ) {
	1914	aSig <<= 8;
	1915	bSig <<= 8;
	1916	if ( expDiff < 0 ) {
	1917	if ( expDiff < -1 ) return a;
	1918	aSig >>= 1;
	1919	}
	1920	q = ( bSig <= aSig );
	1921	if ( q ) aSig -= bSig;
	1922	if ( 0 < expDiff ) {
	1923	q = ( ( (bits64) aSig )<<32 ) / bSig;
	1924	q >>= 32 - expDiff;
	1925	bSig >>= 2;
	1926	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
	1927	}
	1928	else {
	1929	aSig >>= 2;
	1930	bSig >>= 2;
	1931	}
	1932	}
	1933	else {
	1934	if ( bSig <= aSig ) aSig -= bSig;
	1935	aSig64 = ( (bits64) aSig )<<40;
	1936	bSig64 = ( (bits64) bSig )<<40;
	1937	expDiff -= 64;
	1938	while ( 0 < expDiff ) {
	1939	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
	1940	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
	1941	aSig64 = - ( ( bSig * q64 )<<38 );
	1942	expDiff -= 62;
	1943	}
	1944	expDiff += 64;
	1945	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
	1946	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
	1947	q = q64>>( 64 - expDiff );
	1948	bSig <<= 6;
	1949	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
	1950	}
	1951	do {
	1952	alternateASig = aSig;
	1953	++q;
	1954	aSig -= bSig;
	1955	} while ( 0 <= (sbits32) aSig );
	1956	sigMean = aSig + alternateASig;
	1957	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
	1958	aSig = alternateASig;
	1959	}
	1960	zSign = ( (sbits32) aSig < 0 );
	1961	if ( zSign ) aSig = - aSig;
	1962	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
	1963
	1964	}
	1965
	1966	/*----------------------------------------------------------------------------
	1967	\| Returns the square root of the single-precision floating-point value `a'.
	1968	\| The operation is performed according to the IEC/IEEE Standard for Binary
	1969	\| Floating-Point Arithmetic.
	1970	----------------------------------------------------------------------------/
	1971
	1972	float32 float32_sqrt( float32 a STATUS_PARAM )
	1973	{
	1974	flag aSign;
	1975	int16 aExp, zExp;
	1976	bits32 aSig, zSig;
	1977	bits64 rem, term;
	1978
	1979	aSig = extractFloat32Frac( a );
	1980	aExp = extractFloat32Exp( a );
	1981	aSign = extractFloat32Sign( a );
	1982	if ( aExp == 0xFF ) {
	1983	if ( aSig ) return propagateFloat32NaN( a, 0 STATUS_VAR );
	1984	if ( ! aSign ) return a;
	1985	float_raise( float_flag_invalid STATUS_VAR);
	1986	return float32_default_nan;
	1987	}
	1988	if ( aSign ) {
	1989	if ( ( aExp \| aSig ) == 0 ) return a;
	1990	float_raise( float_flag_invalid STATUS_VAR);
	1991	return float32_default_nan;
	1992	}
	1993	if ( aExp == 0 ) {
	1994	if ( aSig == 0 ) return 0;
	1995	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
	1996	}
	1997	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
	1998	aSig = ( aSig \| 0x00800000 )<<8;
	1999	zSig = estimateSqrt32( aExp, aSig ) + 2;
	2000	if ( ( zSig & 0x7F ) <= 5 ) {
	2001	if ( zSig < 2 ) {
	2002	zSig = 0x7FFFFFFF;
	2003	goto roundAndPack;
	2004	}
	2005	aSig >>= aExp & 1;
	2006	term = ( (bits64) zSig ) * zSig;
	2007	rem = ( ( (bits64) aSig )<<32 ) - term;
	2008	while ( (sbits64) rem < 0 ) {
	2009	--zSig;
	2010	rem += ( ( (bits64) zSig )<<1 ) \| 1;
	2011	}
	2012	zSig \|= ( rem != 0 );
	2013	}
	2014	shift32RightJamming( zSig, 1, &zSig );
	2015	roundAndPack:
	2016	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
	2017
	2018	}
	2019
	2020	/*----------------------------------------------------------------------------
	2021	\| Returns 1 if the single-precision floating-point value `a' is equal to
	2022	\| the corresponding value `b', and 0 otherwise. The comparison is performed
	2023	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2024	----------------------------------------------------------------------------/
	2025
	2026	flag float32_eq( float32 a, float32 b STATUS_PARAM )
	2027	{
	2028
	2029	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2030	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2031	) {
	2032	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
	2033	float_raise( float_flag_invalid STATUS_VAR);
	2034	}
	2035	return 0;
	2036	}
	2037	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
	2038
	2039	}
	2040
	2041	/*----------------------------------------------------------------------------
	2042	\| Returns 1 if the single-precision floating-point value `a' is less than
	2043	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
	2044	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	2045	\| Arithmetic.
	2046	----------------------------------------------------------------------------/
	2047
	2048	flag float32_le( float32 a, float32 b STATUS_PARAM )
	2049	{
	2050	flag aSign, bSign;
	2051
	2052	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2053	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2054	) {
	2055	float_raise( float_flag_invalid STATUS_VAR);
	2056	return 0;
	2057	}
	2058	aSign = extractFloat32Sign( a );
	2059	bSign = extractFloat32Sign( b );
	2060	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
	2061	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
	2062
	2063	}
	2064
	2065	/*----------------------------------------------------------------------------
	2066	\| Returns 1 if the single-precision floating-point value `a' is less than
	2067	\| the corresponding value `b', and 0 otherwise. The comparison is performed
	2068	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2069	----------------------------------------------------------------------------/
	2070
	2071	flag float32_lt( float32 a, float32 b STATUS_PARAM )
	2072	{
	2073	flag aSign, bSign;
	2074
	2075	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2076	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2077	) {
	2078	float_raise( float_flag_invalid STATUS_VAR);
	2079	return 0;
	2080	}
	2081	aSign = extractFloat32Sign( a );
	2082	bSign = extractFloat32Sign( b );
	2083	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
	2084	return ( a != b ) && ( aSign ^ ( a < b ) );
	2085
	2086	}
	2087
	2088	/*----------------------------------------------------------------------------
	2089	\| Returns 1 if the single-precision floating-point value `a' is equal to
	2090	\| the corresponding value `b', and 0 otherwise. The invalid exception is
	2091	\| raised if either operand is a NaN. Otherwise, the comparison is performed
	2092	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2093	----------------------------------------------------------------------------/
	2094
	2095	flag float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
	2096	{
	2097
	2098	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2099	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2100	) {
	2101	float_raise( float_flag_invalid STATUS_VAR);
	2102	return 0;
	2103	}
	2104	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
	2105
	2106	}
	2107
	2108	/*----------------------------------------------------------------------------
	2109	\| Returns 1 if the single-precision floating-point value `a' is less than or
	2110	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
	2111	\| cause an exception. Otherwise, the comparison is performed according to the
	2112	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2113	----------------------------------------------------------------------------/
	2114
	2115	flag float32_le_quiet( float32 a, float32 b STATUS_PARAM )
	2116	{
	2117	flag aSign, bSign;
	2118
	2119	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2120	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2121	) {
	2122	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
	2123	float_raise( float_flag_invalid STATUS_VAR);
	2124	}
	2125	return 0;
	2126	}
	2127	aSign = extractFloat32Sign( a );
	2128	bSign = extractFloat32Sign( b );
	2129	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
	2130	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
	2131
	2132	}
	2133
	2134	/*----------------------------------------------------------------------------
	2135	\| Returns 1 if the single-precision floating-point value `a' is less than
	2136	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
	2137	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
	2138	\| Standard for Binary Floating-Point Arithmetic.
	2139	----------------------------------------------------------------------------/
	2140
	2141	flag float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
	2142	{
	2143	flag aSign, bSign;
	2144
	2145	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
	2146	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
	2147	) {
	2148	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
	2149	float_raise( float_flag_invalid STATUS_VAR);
	2150	}
	2151	return 0;
	2152	}
	2153	aSign = extractFloat32Sign( a );
	2154	bSign = extractFloat32Sign( b );
	2155	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
	2156	return ( a != b ) && ( aSign ^ ( a < b ) );
	2157
	2158	}
	2159
	2160	/*----------------------------------------------------------------------------
	2161	\| Returns the result of converting the double-precision floating-point value
	2162	\| `a' to the 32-bit two's complement integer format. The conversion is
	2163	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2164	\| Arithmetic---which means in particular that the conversion is rounded
	2165	\| according to the current rounding mode. If `a' is a NaN, the largest
	2166	\| positive integer is returned. Otherwise, if the conversion overflows, the
	2167	\| largest integer with the same sign as `a' is returned.
	2168	----------------------------------------------------------------------------/
	2169
	2170	int32 float64_to_int32( float64 a STATUS_PARAM )
	2171	{
	2172	flag aSign;
	2173	int16 aExp, shiftCount;
	2174	bits64 aSig;
	2175
	2176	aSig = extractFloat64Frac( a );
	2177	aExp = extractFloat64Exp( a );
	2178	aSign = extractFloat64Sign( a );
	2179	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
	2180	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
	2181	shiftCount = 0x42C - aExp;
	2182	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
	2183	return roundAndPackInt32( aSign, aSig STATUS_VAR );
	2184
	2185	}
	2186
	2187	/*----------------------------------------------------------------------------
	2188	\| Returns the result of converting the double-precision floating-point value
	2189	\| `a' to the 32-bit two's complement integer format. The conversion is
	2190	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2191	\| Arithmetic, except that the conversion is always rounded toward zero.
	2192	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
	2193	\| the conversion overflows, the largest integer with the same sign as `a' is
	2194	\| returned.
	2195	----------------------------------------------------------------------------/
	2196
	2197	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
	2198	{
	2199	flag aSign;
	2200	int16 aExp, shiftCount;
	2201	bits64 aSig, savedASig;
	2202	int32 z;
	2203
	2204	aSig = extractFloat64Frac( a );
	2205	aExp = extractFloat64Exp( a );
	2206	aSign = extractFloat64Sign( a );
	2207	if ( 0x41E < aExp ) {
	2208	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
	2209	goto invalid;
	2210	}
	2211	else if ( aExp < 0x3FF ) {
	2212	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	2213	return 0;
	2214	}
	2215	aSig \|= LIT64( 0x0010000000000000 );
	2216	shiftCount = 0x433 - aExp;
	2217	savedASig = aSig;
	2218	aSig >>= shiftCount;
	2219	z = aSig;
	2220	if ( aSign ) z = - z;
	2221	if ( ( z < 0 ) ^ aSign ) {
	2222	invalid:
	2223	float_raise( float_flag_invalid STATUS_VAR);
	2224	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
	2225	}
	2226	if ( ( aSig<<shiftCount ) != savedASig ) {
	2227	STATUS(float_exception_flags) \|= float_flag_inexact;
	2228	}
	2229	return z;
	2230
	2231	}
	2232
	2233	/*----------------------------------------------------------------------------
	2234	\| Returns the result of converting the double-precision floating-point value
	2235	\| `a' to the 64-bit two's complement integer format. The conversion is
	2236	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2237	\| Arithmetic---which means in particular that the conversion is rounded
	2238	\| according to the current rounding mode. If `a' is a NaN, the largest
	2239	\| positive integer is returned. Otherwise, if the conversion overflows, the
	2240	\| largest integer with the same sign as `a' is returned.
	2241	----------------------------------------------------------------------------/
	2242
	2243	int64 float64_to_int64( float64 a STATUS_PARAM )
	2244	{
	2245	flag aSign;
	2246	int16 aExp, shiftCount;
	2247	bits64 aSig, aSigExtra;
	2248
	2249	aSig = extractFloat64Frac( a );
	2250	aExp = extractFloat64Exp( a );
	2251	aSign = extractFloat64Sign( a );
	2252	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
	2253	shiftCount = 0x433 - aExp;
	2254	if ( shiftCount <= 0 ) {
	2255	if ( 0x43E < aExp ) {
	2256	float_raise( float_flag_invalid STATUS_VAR);
	2257	if ( ! aSign
	2258	\|\| ( ( aExp == 0x7FF )
	2259	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
	2260	) {
	2261	return LIT64( 0x7FFFFFFFFFFFFFFF );
	2262	}
	2263	return (sbits64) LIT64( 0x8000000000000000 );
	2264	}
	2265	aSigExtra = 0;
	2266	aSig <<= - shiftCount;
	2267	}
	2268	else {
	2269	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
	2270	}
	2271	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
	2272
	2273	}
	2274
	2275	/*----------------------------------------------------------------------------
	2276	\| Returns the result of converting the double-precision floating-point value
	2277	\| `a' to the 64-bit two's complement integer format. The conversion is
	2278	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2279	\| Arithmetic, except that the conversion is always rounded toward zero.
	2280	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
	2281	\| the conversion overflows, the largest integer with the same sign as `a' is
	2282	\| returned.
	2283	----------------------------------------------------------------------------/
	2284
	2285	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
	2286	{
	2287	flag aSign;
	2288	int16 aExp, shiftCount;
	2289	bits64 aSig;
	2290	int64 z;
	2291
	2292	aSig = extractFloat64Frac( a );
	2293	aExp = extractFloat64Exp( a );
	2294	aSign = extractFloat64Sign( a );
	2295	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
	2296	shiftCount = aExp - 0x433;
	2297	if ( 0 <= shiftCount ) {
	2298	if ( 0x43E <= aExp ) {
	2299	if ( a != LIT64( 0xC3E0000000000000 ) ) {
	2300	float_raise( float_flag_invalid STATUS_VAR);
	2301	if ( ! aSign
	2302	\|\| ( ( aExp == 0x7FF )
	2303	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
	2304	) {
	2305	return LIT64( 0x7FFFFFFFFFFFFFFF );
	2306	}
	2307	}
	2308	return (sbits64) LIT64( 0x8000000000000000 );
	2309	}
	2310	z = aSig<<shiftCount;
	2311	}
	2312	else {
	2313	if ( aExp < 0x3FE ) {
	2314	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	2315	return 0;
	2316	}
	2317	z = aSig>>( - shiftCount );
	2318	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
	2319	STATUS(float_exception_flags) \|= float_flag_inexact;
	2320	}
	2321	}
	2322	if ( aSign ) z = - z;
	2323	return z;
	2324
	2325	}
	2326
	2327	/*----------------------------------------------------------------------------
	2328	\| Returns the result of converting the double-precision floating-point value
	2329	\| `a' to the single-precision floating-point format. The conversion is
	2330	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2331	\| Arithmetic.
	2332	----------------------------------------------------------------------------/
	2333
	2334	float32 float64_to_float32( float64 a STATUS_PARAM )
	2335	{
	2336	flag aSign;
	2337	int16 aExp;
	2338	bits64 aSig;
	2339	bits32 zSig;
	2340
	2341	aSig = extractFloat64Frac( a );
	2342	aExp = extractFloat64Exp( a );
	2343	aSign = extractFloat64Sign( a );
	2344	if ( aExp == 0x7FF ) {
	2345	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
	2346	return packFloat32( aSign, 0xFF, 0 );
	2347	}
	2348	shift64RightJamming( aSig, 22, &aSig );
	2349	zSig = aSig;
	2350	if ( aExp \|\| zSig ) {
	2351	zSig \|= 0x40000000;
	2352	aExp -= 0x381;
	2353	}
	2354	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
	2355
	2356	}
	2357
	2358	#ifdef FLOATX80
	2359
	2360	/*----------------------------------------------------------------------------
	2361	\| Returns the result of converting the double-precision floating-point value
	2362	\| `a' to the extended double-precision floating-point format. The conversion
	2363	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	2364	\| Arithmetic.
	2365	----------------------------------------------------------------------------/
	2366
	2367	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
	2368	{
	2369	flag aSign;
	2370	int16 aExp;
	2371	bits64 aSig;
	2372
	2373	aSig = extractFloat64Frac( a );
	2374	aExp = extractFloat64Exp( a );
	2375	aSign = extractFloat64Sign( a );
	2376	if ( aExp == 0x7FF ) {
	2377	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
	2378	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	2379	}
	2380	if ( aExp == 0 ) {
	2381	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
	2382	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2383	}
	2384	return
	2385	packFloatx80(
	2386	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
	2387
	2388	}
	2389
	2390	#endif
	2391
	2392	#ifdef FLOAT128
	2393
	2394	/*----------------------------------------------------------------------------
	2395	\| Returns the result of converting the double-precision floating-point value
	2396	\| `a' to the quadruple-precision floating-point format. The conversion is
	2397	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2398	\| Arithmetic.
	2399	----------------------------------------------------------------------------/
	2400
	2401	float128 float64_to_float128( float64 a STATUS_PARAM )
	2402	{
	2403	flag aSign;
	2404	int16 aExp;
	2405	bits64 aSig, zSig0, zSig1;
	2406
	2407	aSig = extractFloat64Frac( a );
	2408	aExp = extractFloat64Exp( a );
	2409	aSign = extractFloat64Sign( a );
	2410	if ( aExp == 0x7FF ) {
	2411	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
	2412	return packFloat128( aSign, 0x7FFF, 0, 0 );
	2413	}
	2414	if ( aExp == 0 ) {
	2415	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
	2416	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2417	--aExp;
	2418	}
	2419	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
	2420	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
	2421
	2422	}
	2423
	2424	#endif
	2425
	2426	/*----------------------------------------------------------------------------
	2427	\| Rounds the double-precision floating-point value `a' to an integer, and
	2428	\| returns the result as a double-precision floating-point value. The
	2429	\| operation is performed according to the IEC/IEEE Standard for Binary
	2430	\| Floating-Point Arithmetic.
	2431	----------------------------------------------------------------------------/
	2432
	2433	float64 float64_round_to_int( float64 a STATUS_PARAM )
	2434	{
	2435	flag aSign;
	2436	int16 aExp;
	2437	bits64 lastBitMask, roundBitsMask;
	2438	int8 roundingMode;
	2439	float64 z;
	2440
	2441	aExp = extractFloat64Exp( a );
	2442	if ( 0x433 <= aExp ) {
	2443	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
	2444	return propagateFloat64NaN( a, a STATUS_VAR );
	2445	}
	2446	return a;
	2447	}
	2448	if ( aExp < 0x3FF ) {
	2449	if ( (bits64) ( a<<1 ) == 0 ) return a;
	2450	STATUS(float_exception_flags) \|= float_flag_inexact;
	2451	aSign = extractFloat64Sign( a );
	2452	switch ( STATUS(float_rounding_mode) ) {
	2453	case float_round_nearest_even:
	2454	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
	2455	return packFloat64( aSign, 0x3FF, 0 );
	2456	}
	2457	break;
	2458	case float_round_down:
	2459	return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
	2460	case float_round_up:
	2461	return
	2462	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
	2463	}
	2464	return packFloat64( aSign, 0, 0 );
	2465	}
	2466	lastBitMask = 1;
	2467	lastBitMask <<= 0x433 - aExp;
	2468	roundBitsMask = lastBitMask - 1;
	2469	z = a;
	2470	roundingMode = STATUS(float_rounding_mode);
	2471	if ( roundingMode == float_round_nearest_even ) {
	2472	z += lastBitMask>>1;
	2473	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
	2474	}
	2475	else if ( roundingMode != float_round_to_zero ) {
	2476	if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
	2477	z += roundBitsMask;
	2478	}
	2479	}
	2480	z &= ~ roundBitsMask;
	2481	if ( z != a ) STATUS(float_exception_flags) \|= float_flag_inexact;
	2482	return z;
	2483
	2484	}
	2485
	2486	/*----------------------------------------------------------------------------
	2487	\| Returns the result of adding the absolute values of the double-precision
	2488	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
	2489	\| before being returned. `zSign' is ignored if the result is a NaN.
	2490	\| The addition is performed according to the IEC/IEEE Standard for Binary
	2491	\| Floating-Point Arithmetic.
	2492	----------------------------------------------------------------------------/
	2493
	2494	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
	2495	{
	2496	int16 aExp, bExp, zExp;
	2497	bits64 aSig, bSig, zSig;
	2498	int16 expDiff;
	2499
	2500	aSig = extractFloat64Frac( a );
	2501	aExp = extractFloat64Exp( a );
	2502	bSig = extractFloat64Frac( b );
	2503	bExp = extractFloat64Exp( b );
	2504	expDiff = aExp - bExp;
	2505	aSig <<= 9;
	2506	bSig <<= 9;
	2507	if ( 0 < expDiff ) {
	2508	if ( aExp == 0x7FF ) {
	2509	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2510	return a;
	2511	}
	2512	if ( bExp == 0 ) {
	2513	--expDiff;
	2514	}
	2515	else {
	2516	bSig \|= LIT64( 0x2000000000000000 );
	2517	}
	2518	shift64RightJamming( bSig, expDiff, &bSig );
	2519	zExp = aExp;
	2520	}
	2521	else if ( expDiff < 0 ) {
	2522	if ( bExp == 0x7FF ) {
	2523	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2524	return packFloat64( zSign, 0x7FF, 0 );
	2525	}
	2526	if ( aExp == 0 ) {
	2527	++expDiff;
	2528	}
	2529	else {
	2530	aSig \|= LIT64( 0x2000000000000000 );
	2531	}
	2532	shift64RightJamming( aSig, - expDiff, &aSig );
	2533	zExp = bExp;
	2534	}
	2535	else {
	2536	if ( aExp == 0x7FF ) {
	2537	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2538	return a;
	2539	}
	2540	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
	2541	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
	2542	zExp = aExp;
	2543	goto roundAndPack;
	2544	}
	2545	aSig \|= LIT64( 0x2000000000000000 );
	2546	zSig = ( aSig + bSig )<<1;
	2547	--zExp;
	2548	if ( (sbits64) zSig < 0 ) {
	2549	zSig = aSig + bSig;
	2550	++zExp;
	2551	}
	2552	roundAndPack:
	2553	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
	2554
	2555	}
	2556
	2557	/*----------------------------------------------------------------------------
	2558	\| Returns the result of subtracting the absolute values of the double-
	2559	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
	2560	\| difference is negated before being returned. `zSign' is ignored if the
	2561	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
	2562	\| Standard for Binary Floating-Point Arithmetic.
	2563	----------------------------------------------------------------------------/
	2564
	2565	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
	2566	{
	2567	int16 aExp, bExp, zExp;
	2568	bits64 aSig, bSig, zSig;
	2569	int16 expDiff;
	2570
	2571	aSig = extractFloat64Frac( a );
	2572	aExp = extractFloat64Exp( a );
	2573	bSig = extractFloat64Frac( b );
	2574	bExp = extractFloat64Exp( b );
	2575	expDiff = aExp - bExp;
	2576	aSig <<= 10;
	2577	bSig <<= 10;
	2578	if ( 0 < expDiff ) goto aExpBigger;
	2579	if ( expDiff < 0 ) goto bExpBigger;
	2580	if ( aExp == 0x7FF ) {
	2581	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2582	float_raise( float_flag_invalid STATUS_VAR);
	2583	return float64_default_nan;
	2584	}
	2585	if ( aExp == 0 ) {
	2586	aExp = 1;
	2587	bExp = 1;
	2588	}
	2589	if ( bSig < aSig ) goto aBigger;
	2590	if ( aSig < bSig ) goto bBigger;
	2591	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
	2592	bExpBigger:
	2593	if ( bExp == 0x7FF ) {
	2594	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2595	return packFloat64( zSign ^ 1, 0x7FF, 0 );
	2596	}
	2597	if ( aExp == 0 ) {
	2598	++expDiff;
	2599	}
	2600	else {
	2601	aSig \|= LIT64( 0x4000000000000000 );
	2602	}
	2603	shift64RightJamming( aSig, - expDiff, &aSig );
	2604	bSig \|= LIT64( 0x4000000000000000 );
	2605	bBigger:
	2606	zSig = bSig - aSig;
	2607	zExp = bExp;
	2608	zSign ^= 1;
	2609	goto normalizeRoundAndPack;
	2610	aExpBigger:
	2611	if ( aExp == 0x7FF ) {
	2612	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2613	return a;
	2614	}
	2615	if ( bExp == 0 ) {
	2616	--expDiff;
	2617	}
	2618	else {
	2619	bSig \|= LIT64( 0x4000000000000000 );
	2620	}
	2621	shift64RightJamming( bSig, expDiff, &bSig );
	2622	aSig \|= LIT64( 0x4000000000000000 );
	2623	aBigger:
	2624	zSig = aSig - bSig;
	2625	zExp = aExp;
	2626	normalizeRoundAndPack:
	2627	--zExp;
	2628	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
	2629
	2630	}
	2631
	2632	/*----------------------------------------------------------------------------
	2633	\| Returns the result of adding the double-precision floating-point values `a'
	2634	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
	2635	\| Binary Floating-Point Arithmetic.
	2636	----------------------------------------------------------------------------/
	2637
	2638	float64 float64_add( float64 a, float64 b STATUS_PARAM )
	2639	{
	2640	flag aSign, bSign;
	2641
	2642	aSign = extractFloat64Sign( a );
	2643	bSign = extractFloat64Sign( b );
	2644	if ( aSign == bSign ) {
	2645	return addFloat64Sigs( a, b, aSign STATUS_VAR );
	2646	}
	2647	else {
	2648	return subFloat64Sigs( a, b, aSign STATUS_VAR );
	2649	}
	2650
	2651	}
	2652
	2653	/*----------------------------------------------------------------------------
	2654	\| Returns the result of subtracting the double-precision floating-point values
	2655	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
	2656	\| for Binary Floating-Point Arithmetic.
	2657	----------------------------------------------------------------------------/
	2658
	2659	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
	2660	{
	2661	flag aSign, bSign;
	2662
	2663	aSign = extractFloat64Sign( a );
	2664	bSign = extractFloat64Sign( b );
	2665	if ( aSign == bSign ) {
	2666	return subFloat64Sigs( a, b, aSign STATUS_VAR );
	2667	}
	2668	else {
	2669	return addFloat64Sigs( a, b, aSign STATUS_VAR );
	2670	}
	2671
	2672	}
	2673
	2674	/*----------------------------------------------------------------------------
	2675	\| Returns the result of multiplying the double-precision floating-point values
	2676	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
	2677	\| for Binary Floating-Point Arithmetic.
	2678	----------------------------------------------------------------------------/
	2679
	2680	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
	2681	{
	2682	flag aSign, bSign, zSign;
	2683	int16 aExp, bExp, zExp;
	2684	bits64 aSig, bSig, zSig0, zSig1;
	2685
	2686	aSig = extractFloat64Frac( a );
	2687	aExp = extractFloat64Exp( a );
	2688	aSign = extractFloat64Sign( a );
	2689	bSig = extractFloat64Frac( b );
	2690	bExp = extractFloat64Exp( b );
	2691	bSign = extractFloat64Sign( b );
	2692	zSign = aSign ^ bSign;
	2693	if ( aExp == 0x7FF ) {
	2694	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
	2695	return propagateFloat64NaN( a, b STATUS_VAR );
	2696	}
	2697	if ( ( bExp \| bSig ) == 0 ) {
	2698	float_raise( float_flag_invalid STATUS_VAR);
	2699	return float64_default_nan;
	2700	}
	2701	return packFloat64( zSign, 0x7FF, 0 );
	2702	}
	2703	if ( bExp == 0x7FF ) {
	2704	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2705	if ( ( aExp \| aSig ) == 0 ) {
	2706	float_raise( float_flag_invalid STATUS_VAR);
	2707	return float64_default_nan;
	2708	}
	2709	return packFloat64( zSign, 0x7FF, 0 );
	2710	}
	2711	if ( aExp == 0 ) {
	2712	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
	2713	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2714	}
	2715	if ( bExp == 0 ) {
	2716	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
	2717	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
	2718	}
	2719	zExp = aExp + bExp - 0x3FF;
	2720	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
	2721	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
	2722	mul64To128( aSig, bSig, &zSig0, &zSig1 );
	2723	zSig0 \|= ( zSig1 != 0 );
	2724	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
	2725	zSig0 <<= 1;
	2726	--zExp;
	2727	}
	2728	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
	2729
	2730	}
	2731
	2732	/*----------------------------------------------------------------------------
	2733	\| Returns the result of dividing the double-precision floating-point value `a'
	2734	\| by the corresponding value `b'. The operation is performed according to
	2735	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2736	----------------------------------------------------------------------------/
	2737
	2738	float64 float64_div( float64 a, float64 b STATUS_PARAM )
	2739	{
	2740	flag aSign, bSign, zSign;
	2741	int16 aExp, bExp, zExp;
	2742	bits64 aSig, bSig, zSig;
	2743	bits64 rem0, rem1;
	2744	bits64 term0, term1;
	2745
	2746	aSig = extractFloat64Frac( a );
	2747	aExp = extractFloat64Exp( a );
	2748	aSign = extractFloat64Sign( a );
	2749	bSig = extractFloat64Frac( b );
	2750	bExp = extractFloat64Exp( b );
	2751	bSign = extractFloat64Sign( b );
	2752	zSign = aSign ^ bSign;
	2753	if ( aExp == 0x7FF ) {
	2754	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2755	if ( bExp == 0x7FF ) {
	2756	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2757	float_raise( float_flag_invalid STATUS_VAR);
	2758	return float64_default_nan;
	2759	}
	2760	return packFloat64( zSign, 0x7FF, 0 );
	2761	}
	2762	if ( bExp == 0x7FF ) {
	2763	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2764	return packFloat64( zSign, 0, 0 );
	2765	}
	2766	if ( bExp == 0 ) {
	2767	if ( bSig == 0 ) {
	2768	if ( ( aExp \| aSig ) == 0 ) {
	2769	float_raise( float_flag_invalid STATUS_VAR);
	2770	return float64_default_nan;
	2771	}
	2772	float_raise( float_flag_divbyzero STATUS_VAR);
	2773	return packFloat64( zSign, 0x7FF, 0 );
	2774	}
	2775	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
	2776	}
	2777	if ( aExp == 0 ) {
	2778	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
	2779	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2780	}
	2781	zExp = aExp - bExp + 0x3FD;
	2782	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
	2783	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
	2784	if ( bSig <= ( aSig + aSig ) ) {
	2785	aSig >>= 1;
	2786	++zExp;
	2787	}
	2788	zSig = estimateDiv128To64( aSig, 0, bSig );
	2789	if ( ( zSig & 0x1FF ) <= 2 ) {
	2790	mul64To128( bSig, zSig, &term0, &term1 );
	2791	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
	2792	while ( (sbits64) rem0 < 0 ) {
	2793	--zSig;
	2794	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
	2795	}
	2796	zSig \|= ( rem1 != 0 );
	2797	}
	2798	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
	2799
	2800	}
	2801
	2802	/*----------------------------------------------------------------------------
	2803	\| Returns the remainder of the double-precision floating-point value `a'
	2804	\| with respect to the corresponding value `b'. The operation is performed
	2805	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2806	----------------------------------------------------------------------------/
	2807
	2808	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
	2809	{
	2810	flag aSign, bSign, zSign;
	2811	int16 aExp, bExp, expDiff;
	2812	bits64 aSig, bSig;
	2813	bits64 q, alternateASig;
	2814	sbits64 sigMean;
	2815
	2816	aSig = extractFloat64Frac( a );
	2817	aExp = extractFloat64Exp( a );
	2818	aSign = extractFloat64Sign( a );
	2819	bSig = extractFloat64Frac( b );
	2820	bExp = extractFloat64Exp( b );
	2821	bSign = extractFloat64Sign( b );
	2822	if ( aExp == 0x7FF ) {
	2823	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
	2824	return propagateFloat64NaN( a, b STATUS_VAR );
	2825	}
	2826	float_raise( float_flag_invalid STATUS_VAR);
	2827	return float64_default_nan;
	2828	}
	2829	if ( bExp == 0x7FF ) {
	2830	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
	2831	return a;
	2832	}
	2833	if ( bExp == 0 ) {
	2834	if ( bSig == 0 ) {
	2835	float_raise( float_flag_invalid STATUS_VAR);
	2836	return float64_default_nan;
	2837	}
	2838	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
	2839	}
	2840	if ( aExp == 0 ) {
	2841	if ( aSig == 0 ) return a;
	2842	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2843	}
	2844	expDiff = aExp - bExp;
	2845	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
	2846	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
	2847	if ( expDiff < 0 ) {
	2848	if ( expDiff < -1 ) return a;
	2849	aSig >>= 1;
	2850	}
	2851	q = ( bSig <= aSig );
	2852	if ( q ) aSig -= bSig;
	2853	expDiff -= 64;
	2854	while ( 0 < expDiff ) {
	2855	q = estimateDiv128To64( aSig, 0, bSig );
	2856	q = ( 2 < q ) ? q - 2 : 0;
	2857	aSig = - ( ( bSig>>2 ) * q );
	2858	expDiff -= 62;
	2859	}
	2860	expDiff += 64;
	2861	if ( 0 < expDiff ) {
	2862	q = estimateDiv128To64( aSig, 0, bSig );
	2863	q = ( 2 < q ) ? q - 2 : 0;
	2864	q >>= 64 - expDiff;
	2865	bSig >>= 2;
	2866	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
	2867	}
	2868	else {
	2869	aSig >>= 2;
	2870	bSig >>= 2;
	2871	}
	2872	do {
	2873	alternateASig = aSig;
	2874	++q;
	2875	aSig -= bSig;
	2876	} while ( 0 <= (sbits64) aSig );
	2877	sigMean = aSig + alternateASig;
	2878	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
	2879	aSig = alternateASig;
	2880	}
	2881	zSign = ( (sbits64) aSig < 0 );
	2882	if ( zSign ) aSig = - aSig;
	2883	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
	2884
	2885	}
	2886
	2887	/*----------------------------------------------------------------------------
	2888	\| Returns the square root of the double-precision floating-point value `a'.
	2889	\| The operation is performed according to the IEC/IEEE Standard for Binary
	2890	\| Floating-Point Arithmetic.
	2891	----------------------------------------------------------------------------/
	2892
	2893	float64 float64_sqrt( float64 a STATUS_PARAM )
	2894	{
	2895	flag aSign;
	2896	int16 aExp, zExp;
	2897	bits64 aSig, zSig, doubleZSig;
	2898	bits64 rem0, rem1, term0, term1;
	2899
	2900	aSig = extractFloat64Frac( a );
	2901	aExp = extractFloat64Exp( a );
	2902	aSign = extractFloat64Sign( a );
	2903	if ( aExp == 0x7FF ) {
	2904	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
	2905	if ( ! aSign ) return a;
	2906	float_raise( float_flag_invalid STATUS_VAR);
	2907	return float64_default_nan;
	2908	}
	2909	if ( aSign ) {
	2910	if ( ( aExp \| aSig ) == 0 ) return a;
	2911	float_raise( float_flag_invalid STATUS_VAR);
	2912	return float64_default_nan;
	2913	}
	2914	if ( aExp == 0 ) {
	2915	if ( aSig == 0 ) return 0;
	2916	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
	2917	}
	2918	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
	2919	aSig \|= LIT64( 0x0010000000000000 );
	2920	zSig = estimateSqrt32( aExp, aSig>>21 );
	2921	aSig <<= 9 - ( aExp & 1 );
	2922	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
	2923	if ( ( zSig & 0x1FF ) <= 5 ) {
	2924	doubleZSig = zSig<<1;
	2925	mul64To128( zSig, zSig, &term0, &term1 );
	2926	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
	2927	while ( (sbits64) rem0 < 0 ) {
	2928	--zSig;
	2929	doubleZSig -= 2;
	2930	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
	2931	}
	2932	zSig \|= ( ( rem0 \| rem1 ) != 0 );
	2933	}
	2934	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
	2935
	2936	}
	2937
	2938	/*----------------------------------------------------------------------------
	2939	\| Returns 1 if the double-precision floating-point value `a' is equal to the
	2940	\| corresponding value `b', and 0 otherwise. The comparison is performed
	2941	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2942	----------------------------------------------------------------------------/
	2943
	2944	flag float64_eq( float64 a, float64 b STATUS_PARAM )
	2945	{
	2946
	2947	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	2948	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	2949	) {
	2950	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
	2951	float_raise( float_flag_invalid STATUS_VAR);
	2952	}
	2953	return 0;
	2954	}
	2955	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
	2956
	2957	}
	2958
	2959	/*----------------------------------------------------------------------------
	2960	\| Returns 1 if the double-precision floating-point value `a' is less than or
	2961	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
	2962	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	2963	\| Arithmetic.
	2964	----------------------------------------------------------------------------/
	2965
	2966	flag float64_le( float64 a, float64 b STATUS_PARAM )
	2967	{
	2968	flag aSign, bSign;
	2969
	2970	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	2971	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	2972	) {
	2973	float_raise( float_flag_invalid STATUS_VAR);
	2974	return 0;
	2975	}
	2976	aSign = extractFloat64Sign( a );
	2977	bSign = extractFloat64Sign( b );
	2978	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
	2979	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
	2980
	2981	}
	2982
	2983	/*----------------------------------------------------------------------------
	2984	\| Returns 1 if the double-precision floating-point value `a' is less than
	2985	\| the corresponding value `b', and 0 otherwise. The comparison is performed
	2986	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	2987	----------------------------------------------------------------------------/
	2988
	2989	flag float64_lt( float64 a, float64 b STATUS_PARAM )
	2990	{
	2991	flag aSign, bSign;
	2992
	2993	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	2994	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	2995	) {
	2996	float_raise( float_flag_invalid STATUS_VAR);
	2997	return 0;
	2998	}
	2999	aSign = extractFloat64Sign( a );
	3000	bSign = extractFloat64Sign( b );
	3001	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
	3002	return ( a != b ) && ( aSign ^ ( a < b ) );
	3003
	3004	}
	3005
	3006	/*----------------------------------------------------------------------------
	3007	\| Returns 1 if the double-precision floating-point value `a' is equal to the
	3008	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
	3009	\| if either operand is a NaN. Otherwise, the comparison is performed
	3010	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3011	----------------------------------------------------------------------------/
	3012
	3013	flag float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
	3014	{
	3015
	3016	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	3017	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	3018	) {
	3019	float_raise( float_flag_invalid STATUS_VAR);
	3020	return 0;
	3021	}
	3022	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
	3023
	3024	}
	3025
	3026	/*----------------------------------------------------------------------------
	3027	\| Returns 1 if the double-precision floating-point value `a' is less than or
	3028	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
	3029	\| cause an exception. Otherwise, the comparison is performed according to the
	3030	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3031	----------------------------------------------------------------------------/
	3032
	3033	flag float64_le_quiet( float64 a, float64 b STATUS_PARAM )
	3034	{
	3035	flag aSign, bSign;
	3036
	3037	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	3038	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	3039	) {
	3040	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
	3041	float_raise( float_flag_invalid STATUS_VAR);
	3042	}
	3043	return 0;
	3044	}
	3045	aSign = extractFloat64Sign( a );
	3046	bSign = extractFloat64Sign( b );
	3047	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
	3048	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
	3049
	3050	}
	3051
	3052	/*----------------------------------------------------------------------------
	3053	\| Returns 1 if the double-precision floating-point value `a' is less than
	3054	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
	3055	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
	3056	\| Standard for Binary Floating-Point Arithmetic.
	3057	----------------------------------------------------------------------------/
	3058
	3059	flag float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
	3060	{
	3061	flag aSign, bSign;
	3062
	3063	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
	3064	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
	3065	) {
	3066	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
	3067	float_raise( float_flag_invalid STATUS_VAR);
	3068	}
	3069	return 0;
	3070	}
	3071	aSign = extractFloat64Sign( a );
	3072	bSign = extractFloat64Sign( b );
	3073	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
	3074	return ( a != b ) && ( aSign ^ ( a < b ) );
	3075
	3076	}
	3077
	3078	#ifdef FLOATX80
	3079
	3080	/*----------------------------------------------------------------------------
	3081	\| Returns the result of converting the extended double-precision floating-
	3082	\| point value `a' to the 32-bit two's complement integer format. The
	3083	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3084	\| Floating-Point Arithmetic---which means in particular that the conversion
	3085	\| is rounded according to the current rounding mode. If `a' is a NaN, the
	3086	\| largest positive integer is returned. Otherwise, if the conversion
	3087	\| overflows, the largest integer with the same sign as `a' is returned.
	3088	----------------------------------------------------------------------------/
	3089
	3090	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
	3091	{
	3092	flag aSign;
	3093	int32 aExp, shiftCount;
	3094	bits64 aSig;
	3095
	3096	aSig = extractFloatx80Frac( a );
	3097	aExp = extractFloatx80Exp( a );
	3098	aSign = extractFloatx80Sign( a );
	3099	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
	3100	shiftCount = 0x4037 - aExp;
	3101	if ( shiftCount <= 0 ) shiftCount = 1;
	3102	shift64RightJamming( aSig, shiftCount, &aSig );
	3103	return roundAndPackInt32( aSign, aSig STATUS_VAR );
	3104
	3105	}
	3106
	3107	/*----------------------------------------------------------------------------
	3108	\| Returns the result of converting the extended double-precision floating-
	3109	\| point value `a' to the 32-bit two's complement integer format. The
	3110	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3111	\| Floating-Point Arithmetic, except that the conversion is always rounded
	3112	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
	3113	\| Otherwise, if the conversion overflows, the largest integer with the same
	3114	\| sign as `a' is returned.
	3115	----------------------------------------------------------------------------/
	3116
	3117	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
	3118	{
	3119	flag aSign;
	3120	int32 aExp, shiftCount;
	3121	bits64 aSig, savedASig;
	3122	int32 z;
	3123
	3124	aSig = extractFloatx80Frac( a );
	3125	aExp = extractFloatx80Exp( a );
	3126	aSign = extractFloatx80Sign( a );
	3127	if ( 0x401E < aExp ) {
	3128	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
	3129	goto invalid;
	3130	}
	3131	else if ( aExp < 0x3FFF ) {
	3132	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	3133	return 0;
	3134	}
	3135	shiftCount = 0x403E - aExp;
	3136	savedASig = aSig;
	3137	aSig >>= shiftCount;
	3138	z = aSig;
	3139	if ( aSign ) z = - z;
	3140	if ( ( z < 0 ) ^ aSign ) {
	3141	invalid:
	3142	float_raise( float_flag_invalid STATUS_VAR);
	3143	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
	3144	}
	3145	if ( ( aSig<<shiftCount ) != savedASig ) {
	3146	STATUS(float_exception_flags) \|= float_flag_inexact;
	3147	}
	3148	return z;
	3149
	3150	}
	3151
	3152	/*----------------------------------------------------------------------------
	3153	\| Returns the result of converting the extended double-precision floating-
	3154	\| point value `a' to the 64-bit two's complement integer format. The
	3155	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3156	\| Floating-Point Arithmetic---which means in particular that the conversion
	3157	\| is rounded according to the current rounding mode. If `a' is a NaN,
	3158	\| the largest positive integer is returned. Otherwise, if the conversion
	3159	\| overflows, the largest integer with the same sign as `a' is returned.
	3160	----------------------------------------------------------------------------/
	3161
	3162	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
	3163	{
	3164	flag aSign;
	3165	int32 aExp, shiftCount;
	3166	bits64 aSig, aSigExtra;
	3167
	3168	aSig = extractFloatx80Frac( a );
	3169	aExp = extractFloatx80Exp( a );
	3170	aSign = extractFloatx80Sign( a );
	3171	shiftCount = 0x403E - aExp;
	3172	if ( shiftCount <= 0 ) {
	3173	if ( shiftCount ) {
	3174	float_raise( float_flag_invalid STATUS_VAR);
	3175	if ( ! aSign
	3176	\|\| ( ( aExp == 0x7FFF )
	3177	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
	3178	) {
	3179	return LIT64( 0x7FFFFFFFFFFFFFFF );
	3180	}
	3181	return (sbits64) LIT64( 0x8000000000000000 );
	3182	}
	3183	aSigExtra = 0;
	3184	}
	3185	else {
	3186	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
	3187	}
	3188	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
	3189
	3190	}
	3191
	3192	/*----------------------------------------------------------------------------
	3193	\| Returns the result of converting the extended double-precision floating-
	3194	\| point value `a' to the 64-bit two's complement integer format. The
	3195	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3196	\| Floating-Point Arithmetic, except that the conversion is always rounded
	3197	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
	3198	\| Otherwise, if the conversion overflows, the largest integer with the same
	3199	\| sign as `a' is returned.
	3200	----------------------------------------------------------------------------/
	3201
	3202	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
	3203	{
	3204	flag aSign;
	3205	int32 aExp, shiftCount;
	3206	bits64 aSig;
	3207	int64 z;
	3208
	3209	aSig = extractFloatx80Frac( a );
	3210	aExp = extractFloatx80Exp( a );
	3211	aSign = extractFloatx80Sign( a );
	3212	shiftCount = aExp - 0x403E;
	3213	if ( 0 <= shiftCount ) {
	3214	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
	3215	if ( ( a.high != 0xC03E ) \|\| aSig ) {
	3216	float_raise( float_flag_invalid STATUS_VAR);
	3217	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
	3218	return LIT64( 0x7FFFFFFFFFFFFFFF );
	3219	}
	3220	}
	3221	return (sbits64) LIT64( 0x8000000000000000 );
	3222	}
	3223	else if ( aExp < 0x3FFF ) {
	3224	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
	3225	return 0;
	3226	}
	3227	z = aSig>>( - shiftCount );
	3228	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
	3229	STATUS(float_exception_flags) \|= float_flag_inexact;
	3230	}
	3231	if ( aSign ) z = - z;
	3232	return z;
	3233
	3234	}
	3235
	3236	/*----------------------------------------------------------------------------
	3237	\| Returns the result of converting the extended double-precision floating-
	3238	\| point value `a' to the single-precision floating-point format. The
	3239	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3240	\| Floating-Point Arithmetic.
	3241	----------------------------------------------------------------------------/
	3242
	3243	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
	3244	{
	3245	flag aSign;
	3246	int32 aExp;
	3247	bits64 aSig;
	3248
	3249	aSig = extractFloatx80Frac( a );
	3250	aExp = extractFloatx80Exp( a );
	3251	aSign = extractFloatx80Sign( a );
	3252	if ( aExp == 0x7FFF ) {
	3253	if ( (bits64) ( aSig<<1 ) ) {
	3254	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
	3255	}
	3256	return packFloat32( aSign, 0xFF, 0 );
	3257	}
	3258	shift64RightJamming( aSig, 33, &aSig );
	3259	if ( aExp \|\| aSig ) aExp -= 0x3F81;
	3260	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
	3261
	3262	}
	3263
	3264	/*----------------------------------------------------------------------------
	3265	\| Returns the result of converting the extended double-precision floating-
	3266	\| point value `a' to the double-precision floating-point format. The
	3267	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3268	\| Floating-Point Arithmetic.
	3269	----------------------------------------------------------------------------/
	3270
	3271	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
	3272	{
	3273	flag aSign;
	3274	int32 aExp;
	3275	bits64 aSig, zSig;
	3276
	3277	aSig = extractFloatx80Frac( a );
	3278	aExp = extractFloatx80Exp( a );
	3279	aSign = extractFloatx80Sign( a );
	3280	if ( aExp == 0x7FFF ) {
	3281	if ( (bits64) ( aSig<<1 ) ) {
	3282	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
	3283	}
	3284	return packFloat64( aSign, 0x7FF, 0 );
	3285	}
	3286	shift64RightJamming( aSig, 1, &zSig );
	3287	if ( aExp \|\| aSig ) aExp -= 0x3C01;
	3288	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
	3289
	3290	}
	3291
	3292	#ifdef FLOAT128
	3293
	3294	/*----------------------------------------------------------------------------
	3295	\| Returns the result of converting the extended double-precision floating-
	3296	\| point value `a' to the quadruple-precision floating-point format. The
	3297	\| conversion is performed according to the IEC/IEEE Standard for Binary
	3298	\| Floating-Point Arithmetic.
	3299	----------------------------------------------------------------------------/
	3300
	3301	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
	3302	{
	3303	flag aSign;
	3304	int16 aExp;
	3305	bits64 aSig, zSig0, zSig1;
	3306
	3307	aSig = extractFloatx80Frac( a );
	3308	aExp = extractFloatx80Exp( a );
	3309	aSign = extractFloatx80Sign( a );
	3310	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
	3311	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
	3312	}
	3313	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
	3314	return packFloat128( aSign, aExp, zSig0, zSig1 );
	3315
	3316	}
	3317
	3318	#endif
	3319
	3320	/*----------------------------------------------------------------------------
	3321	\| Rounds the extended double-precision floating-point value `a' to an integer,
	3322	\| and returns the result as an extended quadruple-precision floating-point
	3323	\| value. The operation is performed according to the IEC/IEEE Standard for
	3324	\| Binary Floating-Point Arithmetic.
	3325	----------------------------------------------------------------------------/
	3326
	3327	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
	3328	{
	3329	flag aSign;
	3330	int32 aExp;
	3331	bits64 lastBitMask, roundBitsMask;
	3332	int8 roundingMode;
	3333	floatx80 z;
	3334
	3335	aExp = extractFloatx80Exp( a );
	3336	if ( 0x403E <= aExp ) {
	3337	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
	3338	return propagateFloatx80NaN( a, a STATUS_VAR );
	3339	}
	3340	return a;
	3341	}
	3342	if ( aExp < 0x3FFF ) {
	3343	if ( ( aExp == 0 )
	3344	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
	3345	return a;
	3346	}
	3347	STATUS(float_exception_flags) \|= float_flag_inexact;
	3348	aSign = extractFloatx80Sign( a );
	3349	switch ( STATUS(float_rounding_mode) ) {
	3350	case float_round_nearest_even:
	3351	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
	3352	) {
	3353	return
	3354	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
	3355	}
	3356	break;
	3357	case float_round_down:
	3358	return
	3359	aSign ?
	3360	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
	3361	: packFloatx80( 0, 0, 0 );
	3362	case float_round_up:
	3363	return
	3364	aSign ? packFloatx80( 1, 0, 0 )
	3365	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
	3366	}
	3367	return packFloatx80( aSign, 0, 0 );
	3368	}
	3369	lastBitMask = 1;
	3370	lastBitMask <<= 0x403E - aExp;
	3371	roundBitsMask = lastBitMask - 1;
	3372	z = a;
	3373	roundingMode = STATUS(float_rounding_mode);
	3374	if ( roundingMode == float_round_nearest_even ) {
	3375	z.low += lastBitMask>>1;
	3376	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
	3377	}
	3378	else if ( roundingMode != float_round_to_zero ) {
	3379	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
	3380	z.low += roundBitsMask;
	3381	}
	3382	}
	3383	z.low &= ~ roundBitsMask;
	3384	if ( z.low == 0 ) {
	3385	++z.high;
	3386	z.low = LIT64( 0x8000000000000000 );
	3387	}
	3388	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
	3389	return z;
	3390
	3391	}
	3392
	3393	/*----------------------------------------------------------------------------
	3394	\| Returns the result of adding the absolute values of the extended double-
	3395	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
	3396	\| negated before being returned. `zSign' is ignored if the result is a NaN.
	3397	\| The addition is performed according to the IEC/IEEE Standard for Binary
	3398	\| Floating-Point Arithmetic.
	3399	----------------------------------------------------------------------------/
	3400
	3401	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
	3402	{
	3403	int32 aExp, bExp, zExp;
	3404	bits64 aSig, bSig, zSig0, zSig1;
	3405	int32 expDiff;
	3406
	3407	aSig = extractFloatx80Frac( a );
	3408	aExp = extractFloatx80Exp( a );
	3409	bSig = extractFloatx80Frac( b );
	3410	bExp = extractFloatx80Exp( b );
	3411	expDiff = aExp - bExp;
	3412	if ( 0 < expDiff ) {
	3413	if ( aExp == 0x7FFF ) {
	3414	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3415	return a;
	3416	}
	3417	if ( bExp == 0 ) --expDiff;
	3418	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
	3419	zExp = aExp;
	3420	}
	3421	else if ( expDiff < 0 ) {
	3422	if ( bExp == 0x7FFF ) {
	3423	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3424	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3425	}
	3426	if ( aExp == 0 ) ++expDiff;
	3427	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
	3428	zExp = bExp;
	3429	}
	3430	else {
	3431	if ( aExp == 0x7FFF ) {
	3432	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
	3433	return propagateFloatx80NaN( a, b STATUS_VAR );
	3434	}
	3435	return a;
	3436	}
	3437	zSig1 = 0;
	3438	zSig0 = aSig + bSig;
	3439	if ( aExp == 0 ) {
	3440	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
	3441	goto roundAndPack;
	3442	}
	3443	zExp = aExp;
	3444	goto shiftRight1;
	3445	}
	3446	zSig0 = aSig + bSig;
	3447	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
	3448	shiftRight1:
	3449	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
	3450	zSig0 \|= LIT64( 0x8000000000000000 );
	3451	++zExp;
	3452	roundAndPack:
	3453	return
	3454	roundAndPackFloatx80(
	3455	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
	3456
	3457	}
	3458
	3459	/*----------------------------------------------------------------------------
	3460	\| Returns the result of subtracting the absolute values of the extended
	3461	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
	3462	\| difference is negated before being returned. `zSign' is ignored if the
	3463	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
	3464	\| Standard for Binary Floating-Point Arithmetic.
	3465	----------------------------------------------------------------------------/
	3466
	3467	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
	3468	{
	3469	int32 aExp, bExp, zExp;
	3470	bits64 aSig, bSig, zSig0, zSig1;
	3471	int32 expDiff;
	3472	floatx80 z;
	3473
	3474	aSig = extractFloatx80Frac( a );
	3475	aExp = extractFloatx80Exp( a );
	3476	bSig = extractFloatx80Frac( b );
	3477	bExp = extractFloatx80Exp( b );
	3478	expDiff = aExp - bExp;
	3479	if ( 0 < expDiff ) goto aExpBigger;
	3480	if ( expDiff < 0 ) goto bExpBigger;
	3481	if ( aExp == 0x7FFF ) {
	3482	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
	3483	return propagateFloatx80NaN( a, b STATUS_VAR );
	3484	}
	3485	float_raise( float_flag_invalid STATUS_VAR);
	3486	z.low = floatx80_default_nan_low;
	3487	z.high = floatx80_default_nan_high;
	3488	return z;
	3489	}
	3490	if ( aExp == 0 ) {
	3491	aExp = 1;
	3492	bExp = 1;
	3493	}
	3494	zSig1 = 0;
	3495	if ( bSig < aSig ) goto aBigger;
	3496	if ( aSig < bSig ) goto bBigger;
	3497	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
	3498	bExpBigger:
	3499	if ( bExp == 0x7FFF ) {
	3500	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3501	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3502	}
	3503	if ( aExp == 0 ) ++expDiff;
	3504	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
	3505	bBigger:
	3506	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
	3507	zExp = bExp;
	3508	zSign ^= 1;
	3509	goto normalizeRoundAndPack;
	3510	aExpBigger:
	3511	if ( aExp == 0x7FFF ) {
	3512	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3513	return a;
	3514	}
	3515	if ( bExp == 0 ) --expDiff;
	3516	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
	3517	aBigger:
	3518	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
	3519	zExp = aExp;
	3520	normalizeRoundAndPack:
	3521	return
	3522	normalizeRoundAndPackFloatx80(
	3523	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
	3524
	3525	}
	3526
	3527	/*----------------------------------------------------------------------------
	3528	\| Returns the result of adding the extended double-precision floating-point
	3529	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
	3530	\| Standard for Binary Floating-Point Arithmetic.
	3531	----------------------------------------------------------------------------/
	3532
	3533	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
	3534	{
	3535	flag aSign, bSign;
	3536
	3537	aSign = extractFloatx80Sign( a );
	3538	bSign = extractFloatx80Sign( b );
	3539	if ( aSign == bSign ) {
	3540	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
	3541	}
	3542	else {
	3543	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
	3544	}
	3545
	3546	}
	3547
	3548	/*----------------------------------------------------------------------------
	3549	\| Returns the result of subtracting the extended double-precision floating-
	3550	\| point values `a' and `b'. The operation is performed according to the
	3551	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3552	----------------------------------------------------------------------------/
	3553
	3554	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
	3555	{
	3556	flag aSign, bSign;
	3557
	3558	aSign = extractFloatx80Sign( a );
	3559	bSign = extractFloatx80Sign( b );
	3560	if ( aSign == bSign ) {
	3561	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
	3562	}
	3563	else {
	3564	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
	3565	}
	3566
	3567	}
	3568
	3569	/*----------------------------------------------------------------------------
	3570	\| Returns the result of multiplying the extended double-precision floating-
	3571	\| point values `a' and `b'. The operation is performed according to the
	3572	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3573	----------------------------------------------------------------------------/
	3574
	3575	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
	3576	{
	3577	flag aSign, bSign, zSign;
	3578	int32 aExp, bExp, zExp;
	3579	bits64 aSig, bSig, zSig0, zSig1;
	3580	floatx80 z;
	3581
	3582	aSig = extractFloatx80Frac( a );
	3583	aExp = extractFloatx80Exp( a );
	3584	aSign = extractFloatx80Sign( a );
	3585	bSig = extractFloatx80Frac( b );
	3586	bExp = extractFloatx80Exp( b );
	3587	bSign = extractFloatx80Sign( b );
	3588	zSign = aSign ^ bSign;
	3589	if ( aExp == 0x7FFF ) {
	3590	if ( (bits64) ( aSig<<1 )
	3591	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
	3592	return propagateFloatx80NaN( a, b STATUS_VAR );
	3593	}
	3594	if ( ( bExp \| bSig ) == 0 ) goto invalid;
	3595	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3596	}
	3597	if ( bExp == 0x7FFF ) {
	3598	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3599	if ( ( aExp \| aSig ) == 0 ) {
	3600	invalid:
	3601	float_raise( float_flag_invalid STATUS_VAR);
	3602	z.low = floatx80_default_nan_low;
	3603	z.high = floatx80_default_nan_high;
	3604	return z;
	3605	}
	3606	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3607	}
	3608	if ( aExp == 0 ) {
	3609	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
	3610	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
	3611	}
	3612	if ( bExp == 0 ) {
	3613	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
	3614	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
	3615	}
	3616	zExp = aExp + bExp - 0x3FFE;
	3617	mul64To128( aSig, bSig, &zSig0, &zSig1 );
	3618	if ( 0 < (sbits64) zSig0 ) {
	3619	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
	3620	--zExp;
	3621	}
	3622	return
	3623	roundAndPackFloatx80(
	3624	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
	3625
	3626	}
	3627
	3628	/*----------------------------------------------------------------------------
	3629	\| Returns the result of dividing the extended double-precision floating-point
	3630	\| value `a' by the corresponding value `b'. The operation is performed
	3631	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3632	----------------------------------------------------------------------------/
	3633
	3634	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
	3635	{
	3636	flag aSign, bSign, zSign;
	3637	int32 aExp, bExp, zExp;
	3638	bits64 aSig, bSig, zSig0, zSig1;
	3639	bits64 rem0, rem1, rem2, term0, term1, term2;
	3640	floatx80 z;
	3641
	3642	aSig = extractFloatx80Frac( a );
	3643	aExp = extractFloatx80Exp( a );
	3644	aSign = extractFloatx80Sign( a );
	3645	bSig = extractFloatx80Frac( b );
	3646	bExp = extractFloatx80Exp( b );
	3647	bSign = extractFloatx80Sign( b );
	3648	zSign = aSign ^ bSign;
	3649	if ( aExp == 0x7FFF ) {
	3650	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3651	if ( bExp == 0x7FFF ) {
	3652	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3653	goto invalid;
	3654	}
	3655	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3656	}
	3657	if ( bExp == 0x7FFF ) {
	3658	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3659	return packFloatx80( zSign, 0, 0 );
	3660	}
	3661	if ( bExp == 0 ) {
	3662	if ( bSig == 0 ) {
	3663	if ( ( aExp \| aSig ) == 0 ) {
	3664	invalid:
	3665	float_raise( float_flag_invalid STATUS_VAR);
	3666	z.low = floatx80_default_nan_low;
	3667	z.high = floatx80_default_nan_high;
	3668	return z;
	3669	}
	3670	float_raise( float_flag_divbyzero STATUS_VAR);
	3671	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	3672	}
	3673	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
	3674	}
	3675	if ( aExp == 0 ) {
	3676	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
	3677	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
	3678	}
	3679	zExp = aExp - bExp + 0x3FFE;
	3680	rem1 = 0;
	3681	if ( bSig <= aSig ) {
	3682	shift128Right( aSig, 0, 1, &aSig, &rem1 );
	3683	++zExp;
	3684	}
	3685	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
	3686	mul64To128( bSig, zSig0, &term0, &term1 );
	3687	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
	3688	while ( (sbits64) rem0 < 0 ) {
	3689	--zSig0;
	3690	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
	3691	}
	3692	zSig1 = estimateDiv128To64( rem1, 0, bSig );
	3693	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
	3694	mul64To128( bSig, zSig1, &term1, &term2 );
	3695	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
	3696	while ( (sbits64) rem1 < 0 ) {
	3697	--zSig1;
	3698	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
	3699	}
	3700	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
	3701	}
	3702	return
	3703	roundAndPackFloatx80(
	3704	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
	3705
	3706	}
	3707
	3708	/*----------------------------------------------------------------------------
	3709	\| Returns the remainder of the extended double-precision floating-point value
	3710	\| `a' with respect to the corresponding value `b'. The operation is performed
	3711	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3712	----------------------------------------------------------------------------/
	3713
	3714	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
	3715	{
	3716	flag aSign, bSign, zSign;
	3717	int32 aExp, bExp, expDiff;
	3718	bits64 aSig0, aSig1, bSig;
	3719	bits64 q, term0, term1, alternateASig0, alternateASig1;
	3720	floatx80 z;
	3721
	3722	aSig0 = extractFloatx80Frac( a );
	3723	aExp = extractFloatx80Exp( a );
	3724	aSign = extractFloatx80Sign( a );
	3725	bSig = extractFloatx80Frac( b );
	3726	bExp = extractFloatx80Exp( b );
	3727	bSign = extractFloatx80Sign( b );
	3728	if ( aExp == 0x7FFF ) {
	3729	if ( (bits64) ( aSig0<<1 )
	3730	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
	3731	return propagateFloatx80NaN( a, b STATUS_VAR );
	3732	}
	3733	goto invalid;
	3734	}
	3735	if ( bExp == 0x7FFF ) {
	3736	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
	3737	return a;
	3738	}
	3739	if ( bExp == 0 ) {
	3740	if ( bSig == 0 ) {
	3741	invalid:
	3742	float_raise( float_flag_invalid STATUS_VAR);
	3743	z.low = floatx80_default_nan_low;
	3744	z.high = floatx80_default_nan_high;
	3745	return z;
	3746	}
	3747	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
	3748	}
	3749	if ( aExp == 0 ) {
	3750	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
	3751	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
	3752	}
	3753	bSig \|= LIT64( 0x8000000000000000 );
	3754	zSign = aSign;
	3755	expDiff = aExp - bExp;
	3756	aSig1 = 0;
	3757	if ( expDiff < 0 ) {
	3758	if ( expDiff < -1 ) return a;
	3759	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
	3760	expDiff = 0;
	3761	}
	3762	q = ( bSig <= aSig0 );
	3763	if ( q ) aSig0 -= bSig;
	3764	expDiff -= 64;
	3765	while ( 0 < expDiff ) {
	3766	q = estimateDiv128To64( aSig0, aSig1, bSig );
	3767	q = ( 2 < q ) ? q - 2 : 0;
	3768	mul64To128( bSig, q, &term0, &term1 );
	3769	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
	3770	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
	3771	expDiff -= 62;
	3772	}
	3773	expDiff += 64;
	3774	if ( 0 < expDiff ) {
	3775	q = estimateDiv128To64( aSig0, aSig1, bSig );
	3776	q = ( 2 < q ) ? q - 2 : 0;
	3777	q >>= 64 - expDiff;
	3778	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
	3779	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
	3780	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
	3781	while ( le128( term0, term1, aSig0, aSig1 ) ) {
	3782	++q;
	3783	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
	3784	}
	3785	}
	3786	else {
	3787	term1 = 0;
	3788	term0 = bSig;
	3789	}
	3790	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
	3791	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
	3792	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
	3793	&& ( q & 1 ) )
	3794	) {
	3795	aSig0 = alternateASig0;
	3796	aSig1 = alternateASig1;
	3797	zSign = ! zSign;
	3798	}
	3799	return
	3800	normalizeRoundAndPackFloatx80(
	3801	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
	3802
	3803	}
	3804
	3805	/*----------------------------------------------------------------------------
	3806	\| Returns the square root of the extended double-precision floating-point
	3807	\| value `a'. The operation is performed according to the IEC/IEEE Standard
	3808	\| for Binary Floating-Point Arithmetic.
	3809	----------------------------------------------------------------------------/
	3810
	3811	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
	3812	{
	3813	flag aSign;
	3814	int32 aExp, zExp;
	3815	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
	3816	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
	3817	floatx80 z;
	3818
	3819	aSig0 = extractFloatx80Frac( a );
	3820	aExp = extractFloatx80Exp( a );
	3821	aSign = extractFloatx80Sign( a );
	3822	if ( aExp == 0x7FFF ) {
	3823	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
	3824	if ( ! aSign ) return a;
	3825	goto invalid;
	3826	}
	3827	if ( aSign ) {
	3828	if ( ( aExp \| aSig0 ) == 0 ) return a;
	3829	invalid:
	3830	float_raise( float_flag_invalid STATUS_VAR);
	3831	z.low = floatx80_default_nan_low;
	3832	z.high = floatx80_default_nan_high;
	3833	return z;
	3834	}
	3835	if ( aExp == 0 ) {
	3836	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
	3837	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
	3838	}
	3839	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
	3840	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
	3841	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
	3842	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
	3843	doubleZSig0 = zSig0<<1;
	3844	mul64To128( zSig0, zSig0, &term0, &term1 );
	3845	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
	3846	while ( (sbits64) rem0 < 0 ) {
	3847	--zSig0;
	3848	doubleZSig0 -= 2;
	3849	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
	3850	}
	3851	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
	3852	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
	3853	if ( zSig1 == 0 ) zSig1 = 1;
	3854	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
	3855	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
	3856	mul64To128( zSig1, zSig1, &term2, &term3 );
	3857	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
	3858	while ( (sbits64) rem1 < 0 ) {
	3859	--zSig1;
	3860	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
	3861	term3 \|= 1;
	3862	term2 \|= doubleZSig0;
	3863	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
	3864	}
	3865	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
	3866	}
	3867	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
	3868	zSig0 \|= doubleZSig0;
	3869	return
	3870	roundAndPackFloatx80(
	3871	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
	3872
	3873	}
	3874
	3875	/*----------------------------------------------------------------------------
	3876	\| Returns 1 if the extended double-precision floating-point value `a' is
	3877	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
	3878	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
	3879	\| Arithmetic.
	3880	----------------------------------------------------------------------------/
	3881
	3882	flag floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
	3883	{
	3884
	3885	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	3886	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	3887	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	3888	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	3889	) {
	3890	if ( floatx80_is_signaling_nan( a )
	3891	\|\| floatx80_is_signaling_nan( b ) ) {
	3892	float_raise( float_flag_invalid STATUS_VAR);
	3893	}
	3894	return 0;
	3895	}
	3896	return
	3897	( a.low == b.low )
	3898	&& ( ( a.high == b.high )
	3899	\|\| ( ( a.low == 0 )
	3900	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
	3901	);
	3902
	3903	}
	3904
	3905	/*----------------------------------------------------------------------------
	3906	\| Returns 1 if the extended double-precision floating-point value `a' is
	3907	\| less than or equal to the corresponding value `b', and 0 otherwise. The
	3908	\| comparison is performed according to the IEC/IEEE Standard for Binary
	3909	\| Floating-Point Arithmetic.
	3910	----------------------------------------------------------------------------/
	3911
	3912	flag floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
	3913	{
	3914	flag aSign, bSign;
	3915
	3916	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	3917	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	3918	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	3919	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	3920	) {
	3921	float_raise( float_flag_invalid STATUS_VAR);
	3922	return 0;
	3923	}
	3924	aSign = extractFloatx80Sign( a );
	3925	bSign = extractFloatx80Sign( b );
	3926	if ( aSign != bSign ) {
	3927	return
	3928	aSign
	3929	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	3930	== 0 );
	3931	}
	3932	return
	3933	aSign ? le128( b.high, b.low, a.high, a.low )
	3934	: le128( a.high, a.low, b.high, b.low );
	3935
	3936	}
	3937
	3938	/*----------------------------------------------------------------------------
	3939	\| Returns 1 if the extended double-precision floating-point value `a' is
	3940	\| less than the corresponding value `b', and 0 otherwise. The comparison
	3941	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	3942	\| Arithmetic.
	3943	----------------------------------------------------------------------------/
	3944
	3945	flag floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
	3946	{
	3947	flag aSign, bSign;
	3948
	3949	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	3950	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	3951	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	3952	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	3953	) {
	3954	float_raise( float_flag_invalid STATUS_VAR);
	3955	return 0;
	3956	}
	3957	aSign = extractFloatx80Sign( a );
	3958	bSign = extractFloatx80Sign( b );
	3959	if ( aSign != bSign ) {
	3960	return
	3961	aSign
	3962	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	3963	!= 0 );
	3964	}
	3965	return
	3966	aSign ? lt128( b.high, b.low, a.high, a.low )
	3967	: lt128( a.high, a.low, b.high, b.low );
	3968
	3969	}
	3970
	3971	/*----------------------------------------------------------------------------
	3972	\| Returns 1 if the extended double-precision floating-point value `a' is equal
	3973	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
	3974	\| raised if either operand is a NaN. Otherwise, the comparison is performed
	3975	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	3976	----------------------------------------------------------------------------/
	3977
	3978	flag floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
	3979	{
	3980
	3981	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	3982	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	3983	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	3984	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	3985	) {
	3986	float_raise( float_flag_invalid STATUS_VAR);
	3987	return 0;
	3988	}
	3989	return
	3990	( a.low == b.low )
	3991	&& ( ( a.high == b.high )
	3992	\|\| ( ( a.low == 0 )
	3993	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
	3994	);
	3995
	3996	}
	3997
	3998	/*----------------------------------------------------------------------------
	3999	\| Returns 1 if the extended double-precision floating-point value `a' is less
	4000	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
	4001	\| do not cause an exception. Otherwise, the comparison is performed according
	4002	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	4003	----------------------------------------------------------------------------/
	4004
	4005	flag floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
	4006	{
	4007	flag aSign, bSign;
	4008
	4009	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	4010	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	4011	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	4012	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	4013	) {
	4014	if ( floatx80_is_signaling_nan( a )
	4015	\|\| floatx80_is_signaling_nan( b ) ) {
	4016	float_raise( float_flag_invalid STATUS_VAR);
	4017	}
	4018	return 0;
	4019	}
	4020	aSign = extractFloatx80Sign( a );
	4021	bSign = extractFloatx80Sign( b );
	4022	if ( aSign != bSign ) {
	4023	return
	4024	aSign
	4025	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	4026	== 0 );
	4027	}
	4028	return
	4029	aSign ? le128( b.high, b.low, a.high, a.low )
	4030	: le128( a.high, a.low, b.high, b.low );
	4031
	4032	}
	4033
	4034	/*----------------------------------------------------------------------------
	4035	\| Returns 1 if the extended double-precision floating-point value `a' is less
	4036	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
	4037	\| an exception. Otherwise, the comparison is performed according to the
	4038	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	4039	----------------------------------------------------------------------------/
	4040
	4041	flag floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
	4042	{
	4043	flag aSign, bSign;
	4044
	4045	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
	4046	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
	4047	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
	4048	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
	4049	) {
	4050	if ( floatx80_is_signaling_nan( a )
	4051	\|\| floatx80_is_signaling_nan( b ) ) {
	4052	float_raise( float_flag_invalid STATUS_VAR);
	4053	}
	4054	return 0;
	4055	}
	4056	aSign = extractFloatx80Sign( a );
	4057	bSign = extractFloatx80Sign( b );
	4058	if ( aSign != bSign ) {
	4059	return
	4060	aSign
	4061	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	4062	!= 0 );
	4063	}
	4064	return
	4065	aSign ? lt128( b.high, b.low, a.high, a.low )
	4066	: lt128( a.high, a.low, b.high, b.low );
	4067
	4068	}
	4069
	4070	#endif
	4071
	4072	#ifdef FLOAT128
	4073
	4074	/*----------------------------------------------------------------------------
	4075	\| Returns the result of converting the quadruple-precision floating-point
	4076	\| value `a' to the 32-bit two's complement integer format. The conversion
	4077	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4078	\| Arithmetic---which means in particular that the conversion is rounded
	4079	\| according to the current rounding mode. If `a' is a NaN, the largest
	4080	\| positive integer is returned. Otherwise, if the conversion overflows, the
	4081	\| largest integer with the same sign as `a' is returned.
	4082	----------------------------------------------------------------------------/
	4083
	4084	int32 float128_to_int32( float128 a STATUS_PARAM )
	4085	{
	4086	flag aSign;
	4087	int32 aExp, shiftCount;
	4088	bits64 aSig0, aSig1;
	4089
	4090	aSig1 = extractFloat128Frac1( a );
	4091	aSig0 = extractFloat128Frac0( a );
	4092	aExp = extractFloat128Exp( a );
	4093	aSign = extractFloat128Sign( a );
	4094	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
	4095	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
	4096	aSig0 \|= ( aSig1 != 0 );
	4097	shiftCount = 0x4028 - aExp;
	4098	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
	4099	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
	4100
	4101	}
	4102
	4103	/*----------------------------------------------------------------------------
	4104	\| Returns the result of converting the quadruple-precision floating-point
	4105	\| value `a' to the 32-bit two's complement integer format. The conversion
	4106	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4107	\| Arithmetic, except that the conversion is always rounded toward zero. If
	4108	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
	4109	\| conversion overflows, the largest integer with the same sign as `a' is
	4110	\| returned.
	4111	----------------------------------------------------------------------------/
	4112
	4113	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
	4114	{
	4115	flag aSign;
	4116	int32 aExp, shiftCount;
	4117	bits64 aSig0, aSig1, savedASig;
	4118	int32 z;
	4119
	4120	aSig1 = extractFloat128Frac1( a );
	4121	aSig0 = extractFloat128Frac0( a );
	4122	aExp = extractFloat128Exp( a );
	4123	aSign = extractFloat128Sign( a );
	4124	aSig0 \|= ( aSig1 != 0 );
	4125	if ( 0x401E < aExp ) {
	4126	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
	4127	goto invalid;
	4128	}
	4129	else if ( aExp < 0x3FFF ) {
	4130	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	4131	return 0;
	4132	}
	4133	aSig0 \|= LIT64( 0x0001000000000000 );
	4134	shiftCount = 0x402F - aExp;
	4135	savedASig = aSig0;
	4136	aSig0 >>= shiftCount;
	4137	z = aSig0;
	4138	if ( aSign ) z = - z;
	4139	if ( ( z < 0 ) ^ aSign ) {
	4140	invalid:
	4141	float_raise( float_flag_invalid STATUS_VAR);
	4142	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
	4143	}
	4144	if ( ( aSig0<<shiftCount ) != savedASig ) {
	4145	STATUS(float_exception_flags) \|= float_flag_inexact;
	4146	}
	4147	return z;
	4148
	4149	}
	4150
	4151	/*----------------------------------------------------------------------------
	4152	\| Returns the result of converting the quadruple-precision floating-point
	4153	\| value `a' to the 64-bit two's complement integer format. The conversion
	4154	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4155	\| Arithmetic---which means in particular that the conversion is rounded
	4156	\| according to the current rounding mode. If `a' is a NaN, the largest
	4157	\| positive integer is returned. Otherwise, if the conversion overflows, the
	4158	\| largest integer with the same sign as `a' is returned.
	4159	----------------------------------------------------------------------------/
	4160
	4161	int64 float128_to_int64( float128 a STATUS_PARAM )
	4162	{
	4163	flag aSign;
	4164	int32 aExp, shiftCount;
	4165	bits64 aSig0, aSig1;
	4166
	4167	aSig1 = extractFloat128Frac1( a );
	4168	aSig0 = extractFloat128Frac0( a );
	4169	aExp = extractFloat128Exp( a );
	4170	aSign = extractFloat128Sign( a );
	4171	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
	4172	shiftCount = 0x402F - aExp;
	4173	if ( shiftCount <= 0 ) {
	4174	if ( 0x403E < aExp ) {
	4175	float_raise( float_flag_invalid STATUS_VAR);
	4176	if ( ! aSign
	4177	\|\| ( ( aExp == 0x7FFF )
	4178	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
	4179	)
	4180	) {
	4181	return LIT64( 0x7FFFFFFFFFFFFFFF );
	4182	}
	4183	return (sbits64) LIT64( 0x8000000000000000 );
	4184	}
	4185	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
	4186	}
	4187	else {
	4188	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
	4189	}
	4190	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
	4191
	4192	}
	4193
	4194	/*----------------------------------------------------------------------------
	4195	\| Returns the result of converting the quadruple-precision floating-point
	4196	\| value `a' to the 64-bit two's complement integer format. The conversion
	4197	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4198	\| Arithmetic, except that the conversion is always rounded toward zero.
	4199	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
	4200	\| the conversion overflows, the largest integer with the same sign as `a' is
	4201	\| returned.
	4202	----------------------------------------------------------------------------/
	4203
	4204	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
	4205	{
	4206	flag aSign;
	4207	int32 aExp, shiftCount;
	4208	bits64 aSig0, aSig1;
	4209	int64 z;
	4210
	4211	aSig1 = extractFloat128Frac1( a );
	4212	aSig0 = extractFloat128Frac0( a );
	4213	aExp = extractFloat128Exp( a );
	4214	aSign = extractFloat128Sign( a );
	4215	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
	4216	shiftCount = aExp - 0x402F;
	4217	if ( 0 < shiftCount ) {
	4218	if ( 0x403E <= aExp ) {
	4219	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
	4220	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
	4221	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
	4222	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
	4223	}
	4224	else {
	4225	float_raise( float_flag_invalid STATUS_VAR);
	4226	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
	4227	return LIT64( 0x7FFFFFFFFFFFFFFF );
	4228	}
	4229	}
	4230	return (sbits64) LIT64( 0x8000000000000000 );
	4231	}
	4232	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
	4233	if ( (bits64) ( aSig1<<shiftCount ) ) {
	4234	STATUS(float_exception_flags) \|= float_flag_inexact;
	4235	}
	4236	}
	4237	else {
	4238	if ( aExp < 0x3FFF ) {
	4239	if ( aExp \| aSig0 \| aSig1 ) {
	4240	STATUS(float_exception_flags) \|= float_flag_inexact;
	4241	}
	4242	return 0;
	4243	}
	4244	z = aSig0>>( - shiftCount );
	4245	if ( aSig1
	4246	\|\| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
	4247	STATUS(float_exception_flags) \|= float_flag_inexact;
	4248	}
	4249	}
	4250	if ( aSign ) z = - z;
	4251	return z;
	4252
	4253	}
	4254
	4255	/*----------------------------------------------------------------------------
	4256	\| Returns the result of converting the quadruple-precision floating-point
	4257	\| value `a' to the single-precision floating-point format. The conversion
	4258	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4259	\| Arithmetic.
	4260	----------------------------------------------------------------------------/
	4261
	4262	float32 float128_to_float32( float128 a STATUS_PARAM )
	4263	{
	4264	flag aSign;
	4265	int32 aExp;
	4266	bits64 aSig0, aSig1;
	4267	bits32 zSig;
	4268
	4269	aSig1 = extractFloat128Frac1( a );
	4270	aSig0 = extractFloat128Frac0( a );
	4271	aExp = extractFloat128Exp( a );
	4272	aSign = extractFloat128Sign( a );
	4273	if ( aExp == 0x7FFF ) {
	4274	if ( aSig0 \| aSig1 ) {
	4275	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
	4276	}
	4277	return packFloat32( aSign, 0xFF, 0 );
	4278	}
	4279	aSig0 \|= ( aSig1 != 0 );
	4280	shift64RightJamming( aSig0, 18, &aSig0 );
	4281	zSig = aSig0;
	4282	if ( aExp \|\| zSig ) {
	4283	zSig \|= 0x40000000;
	4284	aExp -= 0x3F81;
	4285	}
	4286	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
	4287
	4288	}
	4289
	4290	/*----------------------------------------------------------------------------
	4291	\| Returns the result of converting the quadruple-precision floating-point
	4292	\| value `a' to the double-precision floating-point format. The conversion
	4293	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	4294	\| Arithmetic.
	4295	----------------------------------------------------------------------------/
	4296
	4297	float64 float128_to_float64( float128 a STATUS_PARAM )
	4298	{
	4299	flag aSign;
	4300	int32 aExp;
	4301	bits64 aSig0, aSig1;
	4302
	4303	aSig1 = extractFloat128Frac1( a );
	4304	aSig0 = extractFloat128Frac0( a );
	4305	aExp = extractFloat128Exp( a );
	4306	aSign = extractFloat128Sign( a );
	4307	if ( aExp == 0x7FFF ) {
	4308	if ( aSig0 \| aSig1 ) {
	4309	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
	4310	}
	4311	return packFloat64( aSign, 0x7FF, 0 );
	4312	}
	4313	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
	4314	aSig0 \|= ( aSig1 != 0 );
	4315	if ( aExp \|\| aSig0 ) {
	4316	aSig0 \|= LIT64( 0x4000000000000000 );
	4317	aExp -= 0x3C01;
	4318	}
	4319	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
	4320
	4321	}
	4322
	4323	#ifdef FLOATX80
	4324
	4325	/*----------------------------------------------------------------------------
	4326	\| Returns the result of converting the quadruple-precision floating-point
	4327	\| value `a' to the extended double-precision floating-point format. The
	4328	\| conversion is performed according to the IEC/IEEE Standard for Binary
	4329	\| Floating-Point Arithmetic.
	4330	----------------------------------------------------------------------------/
	4331
	4332	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
	4333	{
	4334	flag aSign;
	4335	int32 aExp;
	4336	bits64 aSig0, aSig1;
	4337
	4338	aSig1 = extractFloat128Frac1( a );
	4339	aSig0 = extractFloat128Frac0( a );
	4340	aExp = extractFloat128Exp( a );
	4341	aSign = extractFloat128Sign( a );
	4342	if ( aExp == 0x7FFF ) {
	4343	if ( aSig0 \| aSig1 ) {
	4344	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
	4345	}
	4346	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
	4347	}
	4348	if ( aExp == 0 ) {
	4349	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
	4350	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
	4351	}
	4352	else {
	4353	aSig0 \|= LIT64( 0x0001000000000000 );
	4354	}
	4355	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
	4356	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
	4357
	4358	}
	4359
	4360	#endif
	4361
	4362	/*----------------------------------------------------------------------------
	4363	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
	4364	\| returns the result as a quadruple-precision floating-point value. The
	4365	\| operation is performed according to the IEC/IEEE Standard for Binary
	4366	\| Floating-Point Arithmetic.
	4367	----------------------------------------------------------------------------/
	4368
	4369	float128 float128_round_to_int( float128 a STATUS_PARAM )
	4370	{
	4371	flag aSign;
	4372	int32 aExp;
	4373	bits64 lastBitMask, roundBitsMask;
	4374	int8 roundingMode;
	4375	float128 z;
	4376
	4377	aExp = extractFloat128Exp( a );
	4378	if ( 0x402F <= aExp ) {
	4379	if ( 0x406F <= aExp ) {
	4380	if ( ( aExp == 0x7FFF )
	4381	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
	4382	) {
	4383	return propagateFloat128NaN( a, a STATUS_VAR );
	4384	}
	4385	return a;
	4386	}
	4387	lastBitMask = 1;
	4388	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
	4389	roundBitsMask = lastBitMask - 1;
	4390	z = a;
	4391	roundingMode = STATUS(float_rounding_mode);
	4392	if ( roundingMode == float_round_nearest_even ) {
	4393	if ( lastBitMask ) {
	4394	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
	4395	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
	4396	}
	4397	else {
	4398	if ( (sbits64) z.low < 0 ) {
	4399	++z.high;
	4400	if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
	4401	}
	4402	}
	4403	}
	4404	else if ( roundingMode != float_round_to_zero ) {
	4405	if ( extractFloat128Sign( z )
	4406	^ ( roundingMode == float_round_up ) ) {
	4407	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
	4408	}
	4409	}
	4410	z.low &= ~ roundBitsMask;
	4411	}
	4412	else {
	4413	if ( aExp < 0x3FFF ) {
	4414	if ( ( ( (bits64) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
	4415	STATUS(float_exception_flags) \|= float_flag_inexact;
	4416	aSign = extractFloat128Sign( a );
	4417	switch ( STATUS(float_rounding_mode) ) {
	4418	case float_round_nearest_even:
	4419	if ( ( aExp == 0x3FFE )
	4420	&& ( extractFloat128Frac0( a )
	4421	\| extractFloat128Frac1( a ) )
	4422	) {
	4423	return packFloat128( aSign, 0x3FFF, 0, 0 );
	4424	}
	4425	break;
	4426	case float_round_down:
	4427	return
	4428	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
	4429	: packFloat128( 0, 0, 0, 0 );
	4430	case float_round_up:
	4431	return
	4432	aSign ? packFloat128( 1, 0, 0, 0 )
	4433	: packFloat128( 0, 0x3FFF, 0, 0 );
	4434	}
	4435	return packFloat128( aSign, 0, 0, 0 );
	4436	}
	4437	lastBitMask = 1;
	4438	lastBitMask <<= 0x402F - aExp;
	4439	roundBitsMask = lastBitMask - 1;
	4440	z.low = 0;
	4441	z.high = a.high;
	4442	roundingMode = STATUS(float_rounding_mode);
	4443	if ( roundingMode == float_round_nearest_even ) {
	4444	z.high += lastBitMask>>1;
	4445	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
	4446	z.high &= ~ lastBitMask;
	4447	}
	4448	}
	4449	else if ( roundingMode != float_round_to_zero ) {
	4450	if ( extractFloat128Sign( z )
	4451	^ ( roundingMode == float_round_up ) ) {
	4452	z.high \|= ( a.low != 0 );
	4453	z.high += roundBitsMask;
	4454	}
	4455	}
	4456	z.high &= ~ roundBitsMask;
	4457	}
	4458	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
	4459	STATUS(float_exception_flags) \|= float_flag_inexact;
	4460	}
	4461	return z;
	4462
	4463	}
	4464
	4465	/*----------------------------------------------------------------------------
	4466	\| Returns the result of adding the absolute values of the quadruple-precision
	4467	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
	4468	\| before being returned. `zSign' is ignored if the result is a NaN.
	4469	\| The addition is performed according to the IEC/IEEE Standard for Binary
	4470	\| Floating-Point Arithmetic.
	4471	----------------------------------------------------------------------------/
	4472
	4473	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
	4474	{
	4475	int32 aExp, bExp, zExp;
	4476	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
	4477	int32 expDiff;
	4478
	4479	aSig1 = extractFloat128Frac1( a );
	4480	aSig0 = extractFloat128Frac0( a );
	4481	aExp = extractFloat128Exp( a );
	4482	bSig1 = extractFloat128Frac1( b );
	4483	bSig0 = extractFloat128Frac0( b );
	4484	bExp = extractFloat128Exp( b );
	4485	expDiff = aExp - bExp;
	4486	if ( 0 < expDiff ) {
	4487	if ( aExp == 0x7FFF ) {
	4488	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4489	return a;
	4490	}
	4491	if ( bExp == 0 ) {
	4492	--expDiff;
	4493	}
	4494	else {
	4495	bSig0 \|= LIT64( 0x0001000000000000 );
	4496	}
	4497	shift128ExtraRightJamming(
	4498	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
	4499	zExp = aExp;
	4500	}
	4501	else if ( expDiff < 0 ) {
	4502	if ( bExp == 0x7FFF ) {
	4503	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4504	return packFloat128( zSign, 0x7FFF, 0, 0 );
	4505	}
	4506	if ( aExp == 0 ) {
	4507	++expDiff;
	4508	}
	4509	else {
	4510	aSig0 \|= LIT64( 0x0001000000000000 );
	4511	}
	4512	shift128ExtraRightJamming(
	4513	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
	4514	zExp = bExp;
	4515	}
	4516	else {
	4517	if ( aExp == 0x7FFF ) {
	4518	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
	4519	return propagateFloat128NaN( a, b STATUS_VAR );
	4520	}
	4521	return a;
	4522	}
	4523	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
	4524	if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
	4525	zSig2 = 0;
	4526	zSig0 \|= LIT64( 0x0002000000000000 );
	4527	zExp = aExp;
	4528	goto shiftRight1;
	4529	}
	4530	aSig0 \|= LIT64( 0x0001000000000000 );
	4531	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
	4532	--zExp;
	4533	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
	4534	++zExp;
	4535	shiftRight1:
	4536	shift128ExtraRightJamming(
	4537	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
	4538	roundAndPack:
	4539	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
	4540
	4541	}
	4542
	4543	/*----------------------------------------------------------------------------
	4544	\| Returns the result of subtracting the absolute values of the quadruple-
	4545	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
	4546	\| difference is negated before being returned. `zSign' is ignored if the
	4547	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
	4548	\| Standard for Binary Floating-Point Arithmetic.
	4549	----------------------------------------------------------------------------/
	4550
	4551	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
	4552	{
	4553	int32 aExp, bExp, zExp;
	4554	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
	4555	int32 expDiff;
	4556	float128 z;
	4557
	4558	aSig1 = extractFloat128Frac1( a );
	4559	aSig0 = extractFloat128Frac0( a );
	4560	aExp = extractFloat128Exp( a );
	4561	bSig1 = extractFloat128Frac1( b );
	4562	bSig0 = extractFloat128Frac0( b );
	4563	bExp = extractFloat128Exp( b );
	4564	expDiff = aExp - bExp;
	4565	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
	4566	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
	4567	if ( 0 < expDiff ) goto aExpBigger;
	4568	if ( expDiff < 0 ) goto bExpBigger;
	4569	if ( aExp == 0x7FFF ) {
	4570	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
	4571	return propagateFloat128NaN( a, b STATUS_VAR );
	4572	}
	4573	float_raise( float_flag_invalid STATUS_VAR);
	4574	z.low = float128_default_nan_low;
	4575	z.high = float128_default_nan_high;
	4576	return z;
	4577	}
	4578	if ( aExp == 0 ) {
	4579	aExp = 1;
	4580	bExp = 1;
	4581	}
	4582	if ( bSig0 < aSig0 ) goto aBigger;
	4583	if ( aSig0 < bSig0 ) goto bBigger;
	4584	if ( bSig1 < aSig1 ) goto aBigger;
	4585	if ( aSig1 < bSig1 ) goto bBigger;
	4586	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
	4587	bExpBigger:
	4588	if ( bExp == 0x7FFF ) {
	4589	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4590	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
	4591	}
	4592	if ( aExp == 0 ) {
	4593	++expDiff;
	4594	}
	4595	else {
	4596	aSig0 \|= LIT64( 0x4000000000000000 );
	4597	}
	4598	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
	4599	bSig0 \|= LIT64( 0x4000000000000000 );
	4600	bBigger:
	4601	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
	4602	zExp = bExp;
	4603	zSign ^= 1;
	4604	goto normalizeRoundAndPack;
	4605	aExpBigger:
	4606	if ( aExp == 0x7FFF ) {
	4607	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4608	return a;
	4609	}
	4610	if ( bExp == 0 ) {
	4611	--expDiff;
	4612	}
	4613	else {
	4614	bSig0 \|= LIT64( 0x4000000000000000 );
	4615	}
	4616	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
	4617	aSig0 \|= LIT64( 0x4000000000000000 );
	4618	aBigger:
	4619	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
	4620	zExp = aExp;
	4621	normalizeRoundAndPack:
	4622	--zExp;
	4623	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
	4624
	4625	}
	4626
	4627	/*----------------------------------------------------------------------------
	4628	\| Returns the result of adding the quadruple-precision floating-point values
	4629	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
	4630	\| for Binary Floating-Point Arithmetic.
	4631	----------------------------------------------------------------------------/
	4632
	4633	float128 float128_add( float128 a, float128 b STATUS_PARAM )
	4634	{
	4635	flag aSign, bSign;
	4636
	4637	aSign = extractFloat128Sign( a );
	4638	bSign = extractFloat128Sign( b );
	4639	if ( aSign == bSign ) {
	4640	return addFloat128Sigs( a, b, aSign STATUS_VAR );
	4641	}
	4642	else {
	4643	return subFloat128Sigs( a, b, aSign STATUS_VAR );
	4644	}
	4645
	4646	}
	4647
	4648	/*----------------------------------------------------------------------------
	4649	\| Returns the result of subtracting the quadruple-precision floating-point
	4650	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
	4651	\| Standard for Binary Floating-Point Arithmetic.
	4652	----------------------------------------------------------------------------/
	4653
	4654	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
	4655	{
	4656	flag aSign, bSign;
	4657
	4658	aSign = extractFloat128Sign( a );
	4659	bSign = extractFloat128Sign( b );
	4660	if ( aSign == bSign ) {
	4661	return subFloat128Sigs( a, b, aSign STATUS_VAR );
	4662	}
	4663	else {
	4664	return addFloat128Sigs( a, b, aSign STATUS_VAR );
	4665	}
	4666
	4667	}
	4668
	4669	/*----------------------------------------------------------------------------
	4670	\| Returns the result of multiplying the quadruple-precision floating-point
	4671	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
	4672	\| Standard for Binary Floating-Point Arithmetic.
	4673	----------------------------------------------------------------------------/
	4674
	4675	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
	4676	{
	4677	flag aSign, bSign, zSign;
	4678	int32 aExp, bExp, zExp;
	4679	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
	4680	float128 z;
	4681
	4682	aSig1 = extractFloat128Frac1( a );
	4683	aSig0 = extractFloat128Frac0( a );
	4684	aExp = extractFloat128Exp( a );
	4685	aSign = extractFloat128Sign( a );
	4686	bSig1 = extractFloat128Frac1( b );
	4687	bSig0 = extractFloat128Frac0( b );
	4688	bExp = extractFloat128Exp( b );
	4689	bSign = extractFloat128Sign( b );
	4690	zSign = aSign ^ bSign;
	4691	if ( aExp == 0x7FFF ) {
	4692	if ( ( aSig0 \| aSig1 )
	4693	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
	4694	return propagateFloat128NaN( a, b STATUS_VAR );
	4695	}
	4696	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
	4697	return packFloat128( zSign, 0x7FFF, 0, 0 );
	4698	}
	4699	if ( bExp == 0x7FFF ) {
	4700	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4701	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
	4702	invalid:
	4703	float_raise( float_flag_invalid STATUS_VAR);
	4704	z.low = float128_default_nan_low;
	4705	z.high = float128_default_nan_high;
	4706	return z;
	4707	}
	4708	return packFloat128( zSign, 0x7FFF, 0, 0 );
	4709	}
	4710	if ( aExp == 0 ) {
	4711	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
	4712	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
	4713	}
	4714	if ( bExp == 0 ) {
	4715	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
	4716	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
	4717	}
	4718	zExp = aExp + bExp - 0x4000;
	4719	aSig0 \|= LIT64( 0x0001000000000000 );
	4720	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
	4721	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
	4722	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
	4723	zSig2 \|= ( zSig3 != 0 );
	4724	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
	4725	shift128ExtraRightJamming(
	4726	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
	4727	++zExp;
	4728	}
	4729	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
	4730
	4731	}
	4732
	4733	/*----------------------------------------------------------------------------
	4734	\| Returns the result of dividing the quadruple-precision floating-point value
	4735	\| `a' by the corresponding value `b'. The operation is performed according to
	4736	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	4737	----------------------------------------------------------------------------/
	4738
	4739	float128 float128_div( float128 a, float128 b STATUS_PARAM )
	4740	{
	4741	flag aSign, bSign, zSign;
	4742	int32 aExp, bExp, zExp;
	4743	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
	4744	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
	4745	float128 z;
	4746
	4747	aSig1 = extractFloat128Frac1( a );
	4748	aSig0 = extractFloat128Frac0( a );
	4749	aExp = extractFloat128Exp( a );
	4750	aSign = extractFloat128Sign( a );
	4751	bSig1 = extractFloat128Frac1( b );
	4752	bSig0 = extractFloat128Frac0( b );
	4753	bExp = extractFloat128Exp( b );
	4754	bSign = extractFloat128Sign( b );
	4755	zSign = aSign ^ bSign;
	4756	if ( aExp == 0x7FFF ) {
	4757	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4758	if ( bExp == 0x7FFF ) {
	4759	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4760	goto invalid;
	4761	}
	4762	return packFloat128( zSign, 0x7FFF, 0, 0 );
	4763	}
	4764	if ( bExp == 0x7FFF ) {
	4765	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4766	return packFloat128( zSign, 0, 0, 0 );
	4767	}
	4768	if ( bExp == 0 ) {
	4769	if ( ( bSig0 \| bSig1 ) == 0 ) {
	4770	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
	4771	invalid:
	4772	float_raise( float_flag_invalid STATUS_VAR);
	4773	z.low = float128_default_nan_low;
	4774	z.high = float128_default_nan_high;
	4775	return z;
	4776	}
	4777	float_raise( float_flag_divbyzero STATUS_VAR);
	4778	return packFloat128( zSign, 0x7FFF, 0, 0 );
	4779	}
	4780	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
	4781	}
	4782	if ( aExp == 0 ) {
	4783	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
	4784	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
	4785	}
	4786	zExp = aExp - bExp + 0x3FFD;
	4787	shortShift128Left(
	4788	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
	4789	shortShift128Left(
	4790	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
	4791	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
	4792	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
	4793	++zExp;
	4794	}
	4795	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
	4796	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
	4797	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
	4798	while ( (sbits64) rem0 < 0 ) {
	4799	--zSig0;
	4800	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
	4801	}
	4802	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
	4803	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
	4804	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
	4805	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
	4806	while ( (sbits64) rem1 < 0 ) {
	4807	--zSig1;
	4808	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
	4809	}
	4810	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
	4811	}
	4812	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
	4813	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
	4814
	4815	}
	4816
	4817	/*----------------------------------------------------------------------------
	4818	\| Returns the remainder of the quadruple-precision floating-point value `a'
	4819	\| with respect to the corresponding value `b'. The operation is performed
	4820	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	4821	----------------------------------------------------------------------------/
	4822
	4823	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
	4824	{
	4825	flag aSign, bSign, zSign;
	4826	int32 aExp, bExp, expDiff;
	4827	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
	4828	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
	4829	sbits64 sigMean0;
	4830	float128 z;
	4831
	4832	aSig1 = extractFloat128Frac1( a );
	4833	aSig0 = extractFloat128Frac0( a );
	4834	aExp = extractFloat128Exp( a );
	4835	aSign = extractFloat128Sign( a );
	4836	bSig1 = extractFloat128Frac1( b );
	4837	bSig0 = extractFloat128Frac0( b );
	4838	bExp = extractFloat128Exp( b );
	4839	bSign = extractFloat128Sign( b );
	4840	if ( aExp == 0x7FFF ) {
	4841	if ( ( aSig0 \| aSig1 )
	4842	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
	4843	return propagateFloat128NaN( a, b STATUS_VAR );
	4844	}
	4845	goto invalid;
	4846	}
	4847	if ( bExp == 0x7FFF ) {
	4848	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
	4849	return a;
	4850	}
	4851	if ( bExp == 0 ) {
	4852	if ( ( bSig0 \| bSig1 ) == 0 ) {
	4853	invalid:
	4854	float_raise( float_flag_invalid STATUS_VAR);
	4855	z.low = float128_default_nan_low;
	4856	z.high = float128_default_nan_high;
	4857	return z;
	4858	}
	4859	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
	4860	}
	4861	if ( aExp == 0 ) {
	4862	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
	4863	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
	4864	}
	4865	expDiff = aExp - bExp;
	4866	if ( expDiff < -1 ) return a;
	4867	shortShift128Left(
	4868	aSig0 \| LIT64( 0x0001000000000000 ),
	4869	aSig1,
	4870	15 - ( expDiff < 0 ),
	4871	&aSig0,
	4872	&aSig1
	4873	);
	4874	shortShift128Left(
	4875	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
	4876	q = le128( bSig0, bSig1, aSig0, aSig1 );
	4877	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
	4878	expDiff -= 64;
	4879	while ( 0 < expDiff ) {
	4880	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
	4881	q = ( 4 < q ) ? q - 4 : 0;
	4882	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
	4883	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
	4884	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
	4885	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
	4886	expDiff -= 61;
	4887	}
	4888	if ( -64 < expDiff ) {
	4889	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
	4890	q = ( 4 < q ) ? q - 4 : 0;
	4891	q >>= - expDiff;
	4892	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
	4893	expDiff += 52;
	4894	if ( expDiff < 0 ) {
	4895	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
	4896	}
	4897	else {
	4898	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
	4899	}
	4900	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
	4901	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
	4902	}
	4903	else {
	4904	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
	4905	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
	4906	}
	4907	do {
	4908	alternateASig0 = aSig0;
	4909	alternateASig1 = aSig1;
	4910	++q;
	4911	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
	4912	} while ( 0 <= (sbits64) aSig0 );
	4913	add128(
	4914	aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
	4915	if ( ( sigMean0 < 0 )
	4916	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
	4917	aSig0 = alternateASig0;
	4918	aSig1 = alternateASig1;
	4919	}
	4920	zSign = ( (sbits64) aSig0 < 0 );
	4921	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
	4922	return
	4923	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
	4924
	4925	}
	4926
	4927	/*----------------------------------------------------------------------------
	4928	\| Returns the square root of the quadruple-precision floating-point value `a'.
	4929	\| The operation is performed according to the IEC/IEEE Standard for Binary
	4930	\| Floating-Point Arithmetic.
	4931	----------------------------------------------------------------------------/
	4932
	4933	float128 float128_sqrt( float128 a STATUS_PARAM )
	4934	{
	4935	flag aSign;
	4936	int32 aExp, zExp;
	4937	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
	4938	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
	4939	float128 z;
	4940
	4941	aSig1 = extractFloat128Frac1( a );
	4942	aSig0 = extractFloat128Frac0( a );
	4943	aExp = extractFloat128Exp( a );
	4944	aSign = extractFloat128Sign( a );
	4945	if ( aExp == 0x7FFF ) {
	4946	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
	4947	if ( ! aSign ) return a;
	4948	goto invalid;
	4949	}
	4950	if ( aSign ) {
	4951	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
	4952	invalid:
	4953	float_raise( float_flag_invalid STATUS_VAR);
	4954	z.low = float128_default_nan_low;
	4955	z.high = float128_default_nan_high;
	4956	return z;
	4957	}
	4958	if ( aExp == 0 ) {
	4959	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
	4960	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
	4961	}
	4962	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
	4963	aSig0 \|= LIT64( 0x0001000000000000 );
	4964	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
	4965	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
	4966	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
	4967	doubleZSig0 = zSig0<<1;
	4968	mul64To128( zSig0, zSig0, &term0, &term1 );
	4969	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
	4970	while ( (sbits64) rem0 < 0 ) {
	4971	--zSig0;
	4972	doubleZSig0 -= 2;
	4973	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
	4974	}
	4975	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
	4976	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
	4977	if ( zSig1 == 0 ) zSig1 = 1;
	4978	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
	4979	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
	4980	mul64To128( zSig1, zSig1, &term2, &term3 );
	4981	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
	4982	while ( (sbits64) rem1 < 0 ) {
	4983	--zSig1;
	4984	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
	4985	term3 \|= 1;
	4986	term2 \|= doubleZSig0;
	4987	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
	4988	}
	4989	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
	4990	}
	4991	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
	4992	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
	4993
	4994	}
	4995
	4996	/*----------------------------------------------------------------------------
	4997	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
	4998	\| the corresponding value `b', and 0 otherwise. The comparison is performed
	4999	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	5000	----------------------------------------------------------------------------/
	5001
	5002	flag float128_eq( float128 a, float128 b STATUS_PARAM )
	5003	{
	5004
	5005	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5006	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5007	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5008	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5009	) {
	5010	if ( float128_is_signaling_nan( a )
	5011	\|\| float128_is_signaling_nan( b ) ) {
	5012	float_raise( float_flag_invalid STATUS_VAR);
	5013	}
	5014	return 0;
	5015	}
	5016	return
	5017	( a.low == b.low )
	5018	&& ( ( a.high == b.high )
	5019	\|\| ( ( a.low == 0 )
	5020	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
	5021	);
	5022
	5023	}
	5024
	5025	/*----------------------------------------------------------------------------
	5026	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
	5027	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
	5028	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
	5029	\| Arithmetic.
	5030	----------------------------------------------------------------------------/
	5031
	5032	flag float128_le( float128 a, float128 b STATUS_PARAM )
	5033	{
	5034	flag aSign, bSign;
	5035
	5036	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5037	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5038	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5039	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5040	) {
	5041	float_raise( float_flag_invalid STATUS_VAR);
	5042	return 0;
	5043	}
	5044	aSign = extractFloat128Sign( a );
	5045	bSign = extractFloat128Sign( b );
	5046	if ( aSign != bSign ) {
	5047	return
	5048	aSign
	5049	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	5050	== 0 );
	5051	}
	5052	return
	5053	aSign ? le128( b.high, b.low, a.high, a.low )
	5054	: le128( a.high, a.low, b.high, b.low );
	5055
	5056	}
	5057
	5058	/*----------------------------------------------------------------------------
	5059	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
	5060	\| the corresponding value `b', and 0 otherwise. The comparison is performed
	5061	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	5062	----------------------------------------------------------------------------/
	5063
	5064	flag float128_lt( float128 a, float128 b STATUS_PARAM )
	5065	{
	5066	flag aSign, bSign;
	5067
	5068	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5069	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5070	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5071	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5072	) {
	5073	float_raise( float_flag_invalid STATUS_VAR);
	5074	return 0;
	5075	}
	5076	aSign = extractFloat128Sign( a );
	5077	bSign = extractFloat128Sign( b );
	5078	if ( aSign != bSign ) {
	5079	return
	5080	aSign
	5081	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	5082	!= 0 );
	5083	}
	5084	return
	5085	aSign ? lt128( b.high, b.low, a.high, a.low )
	5086	: lt128( a.high, a.low, b.high, b.low );
	5087
	5088	}
	5089
	5090	/*----------------------------------------------------------------------------
	5091	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
	5092	\| the corresponding value `b', and 0 otherwise. The invalid exception is
	5093	\| raised if either operand is a NaN. Otherwise, the comparison is performed
	5094	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	5095	----------------------------------------------------------------------------/
	5096
	5097	flag float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
	5098	{
	5099
	5100	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5101	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5102	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5103	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5104	) {
	5105	float_raise( float_flag_invalid STATUS_VAR);
	5106	return 0;
	5107	}
	5108	return
	5109	( a.low == b.low )
	5110	&& ( ( a.high == b.high )
	5111	\|\| ( ( a.low == 0 )
	5112	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
	5113	);
	5114
	5115	}
	5116
	5117	/*----------------------------------------------------------------------------
	5118	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
	5119	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
	5120	\| cause an exception. Otherwise, the comparison is performed according to the
	5121	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
	5122	----------------------------------------------------------------------------/
	5123
	5124	flag float128_le_quiet( float128 a, float128 b STATUS_PARAM )
	5125	{
	5126	flag aSign, bSign;
	5127
	5128	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5129	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5130	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5131	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5132	) {
	5133	if ( float128_is_signaling_nan( a )
	5134	\|\| float128_is_signaling_nan( b ) ) {
	5135	float_raise( float_flag_invalid STATUS_VAR);
	5136	}
	5137	return 0;
	5138	}
	5139	aSign = extractFloat128Sign( a );
	5140	bSign = extractFloat128Sign( b );
	5141	if ( aSign != bSign ) {
	5142	return
	5143	aSign
	5144	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	5145	== 0 );
	5146	}
	5147	return
	5148	aSign ? le128( b.high, b.low, a.high, a.low )
	5149	: le128( a.high, a.low, b.high, b.low );
	5150
	5151	}
	5152
	5153	/*----------------------------------------------------------------------------
	5154	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
	5155	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
	5156	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
	5157	\| Standard for Binary Floating-Point Arithmetic.
	5158	----------------------------------------------------------------------------/
	5159
	5160	flag float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
	5161	{
	5162	flag aSign, bSign;
	5163
	5164	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
	5165	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
	5166	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
	5167	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
	5168	) {
	5169	if ( float128_is_signaling_nan( a )
	5170	\|\| float128_is_signaling_nan( b ) ) {
	5171	float_raise( float_flag_invalid STATUS_VAR);
	5172	}
	5173	return 0;
	5174	}
	5175	aSign = extractFloat128Sign( a );
	5176	bSign = extractFloat128Sign( b );
	5177	if ( aSign != bSign ) {
	5178	return
	5179	aSign
	5180	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
	5181	!= 0 );
	5182	}
	5183	return
	5184	aSign ? lt128( b.high, b.low, a.high, a.low )
	5185	: lt128( a.high, a.low, b.high, b.low );
	5186
	5187	}
	5188
	5189	#endif
	5190
	5191	/* misc functions */
	5192	float32 uint32_to_float32( unsigned int a STATUS_PARAM )
	5193	{
	5194	return int64_to_float32(a STATUS_VAR);
	5195	}
	5196
	5197	float64 uint32_to_float64( unsigned int a STATUS_PARAM )
	5198	{
	5199	return int64_to_float64(a STATUS_VAR);
	5200	}
	5201
	5202	unsigned int float32_to_uint32( float32 a STATUS_PARAM )
	5203	{
	5204	int64_t v;
	5205	unsigned int res;
	5206
	5207	v = float32_to_int64(a STATUS_VAR);
	5208	if (v < 0) {
	5209	res = 0;
	5210	float_raise( float_flag_invalid STATUS_VAR);
	5211	} else if (v > 0xffffffff) {
	5212	res = 0xffffffff;
	5213	float_raise( float_flag_invalid STATUS_VAR);
	5214	} else {
	5215	res = v;
	5216	}
	5217	return res;
	5218	}
	5219
	5220	unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
	5221	{
	5222	int64_t v;
	5223	unsigned int res;
	5224
	5225	v = float32_to_int64_round_to_zero(a STATUS_VAR);
	5226	if (v < 0) {
	5227	res = 0;
	5228	float_raise( float_flag_invalid STATUS_VAR);
	5229	} else if (v > 0xffffffff) {
	5230	res = 0xffffffff;
	5231	float_raise( float_flag_invalid STATUS_VAR);
	5232	} else {
	5233	res = v;
	5234	}
	5235	return res;
	5236	}
	5237
	5238	unsigned int float64_to_uint32( float64 a STATUS_PARAM )
	5239	{
	5240	int64_t v;
	5241	unsigned int res;
	5242
	5243	v = float64_to_int64(a STATUS_VAR);
	5244	if (v < 0) {
	5245	res = 0;
	5246	float_raise( float_flag_invalid STATUS_VAR);
	5247	} else if (v > 0xffffffff) {
	5248	res = 0xffffffff;
	5249	float_raise( float_flag_invalid STATUS_VAR);
	5250	} else {
	5251	res = v;
	5252	}
	5253	return res;
	5254	}
	5255
	5256	unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
	5257	{
	5258	int64_t v;
	5259	unsigned int res;
	5260
	5261	v = float64_to_int64_round_to_zero(a STATUS_VAR);
	5262	if (v < 0) {
	5263	res = 0;
	5264	float_raise( float_flag_invalid STATUS_VAR);
	5265	} else if (v > 0xffffffff) {
	5266	res = 0xffffffff;
	5267	float_raise( float_flag_invalid STATUS_VAR);
	5268	} else {
	5269	res = v;
	5270	}
	5271	return res;
	5272	}
	5273
	5274	#define COMPARE(s, nan_exp) \
	5275	INLINE char float ## s ## _compare_internal( float ## s a, float ## s b, \
	5276	int is_quiet STATUS_PARAM ) \
	5277	{ \
	5278	flag aSign, bSign; \
	5279	\
	5280	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
	5281	extractFloat ## s ## Frac( a ) ) \|\| \
	5282	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
	5283	extractFloat ## s ## Frac( b ) )) { \
	5284	if (!is_quiet \|\| \
	5285	float ## s ## _is_signaling_nan( a ) \|\| \
	5286	float ## s ## _is_signaling_nan( b ) ) { \
	5287	float_raise( float_flag_invalid STATUS_VAR); \
	5288	} \
	5289	return float_relation_unordered; \
	5290	} \
	5291	aSign = extractFloat ## s ## Sign( a ); \
	5292	bSign = extractFloat ## s ## Sign( b ); \
	5293	if ( aSign != bSign ) { \
	5294	if ( (bits ## s) ( ( a \| b )<<1 ) == 0 ) { \
	5295	/* zero case */ \
	5296	return float_relation_equal; \
	5297	} else { \
	5298	return 1 - (2 * aSign); \
	5299	} \
	5300	} else { \
	5301	if (a == b) { \
	5302	return float_relation_equal; \
	5303	} else { \
	5304	return 1 - 2 * (aSign ^ ( a < b )); \
	5305	} \
	5306	} \
	5307	} \
	5308	\
	5309	char float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
	5310	{ \
	5311	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
	5312	} \
	5313	\
	5314	char float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
	5315	{ \
	5316	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
	5317	}
	5318
	5319	COMPARE(32, 0xff)
	5320	COMPARE(64, 0x7ff)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: