/* * addsf3.s * * Floating point add, single precision: r0r1 += r2r3 * Also implements floating point subtract, single precision: r0r1 -= r2r3 * * The contents of this file are subject to the Mozilla Public License * Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the * License for the specific language governing rights and limitations * under the License. * * The Original Code is Librcx floating point code, released May 27, 1999. * * The Initial Developer of the Original Code is Kekoa Proudfoot. * Portions created by Kekoa Proudfoot are Copyright (C) 1999 * Kekoa Proudfoot. All Rights Reserved. * * Contributor(s): Kekoa Proudfoot */ ; bug fixes: ; - 12/16/2000 fixed sp+16 sign bug (bug symptom found by Kieran Elby) ; possible optimizations: ; - combine multiple returns of second/larger operand ; - possibly simplify shift left 6 by factoring out common left shift op ; - possibly simplify stickyshift by factoring out common stickyshift op ; - possibly remove shift left 6 by computing a 1.32 result (use carry bit) .section .text ;; ;; function: subsf3 ;; input: float in r0r1 and float at sp+2 ;; output: float in r0r1 ;; .global ___subsf3 ___subsf3: ; Negate the second input mov.b @(2,r7),r2l xor.b #0x80,r2l mov.b r2l,@(2,r7) ; Fall through to add routine ;; ;; function: __addsf3 ;; input: float in r0r1 and float at sp+2 ;; output: float in r0r1 ;; .global ___addsf3 ___addsf3: ; Invoke the preamble to expand input operands jsr ___startsf subentry: ; At this point, registers/stack contain the following: ; r0r1 - first operand ; r2h - second operand flags ; r2l - second operand sign ; r3h - first operand flags ; r3l - first operand sign ; r4 - first operand exponent ; r5r6 - first operand mantissa ; sp+0 - second operand exponent ; sp+2 - second operand flags (same as r2h) ; sp+3 - second operand sign (same as r2l) ; sp+4 - second operand mantissa ; sp+16 - second operand ; Note on flag bits: 0=zero, 1=inf, 2=nan ; Is the first operand a NaN? ; If yes, return the first operand (the value already in r0r1) btst #2,r3h ; if nan flag of first operand set ; Hack! bne return_jmp ; non-zero indicates true ; Is the second operand a NaN? btst #2,r2h ; if nan flag of second operand set beq endif_0 ; zero indicates false ; Return the second operand (which we need to load off stack) mov.w @(16,r7),r0 ; set return value to second operand (sp+16) mov.w @(18,r7),r1 ; Hack! bra return_jmp endif_0: ; Are the operands both infinite with different signs? bld #7,r3l ; get sign of first operand bxor #7,r2l ; xor with sign bit of second operand band #1,r3h ; and with inf bit of first operand band #1,r2h ; and with inf bit of second operand bcc endif_1 ; carry clear indicates false ; Both operands are infinite with different signs, so return NaN mov.w #0x7fff,r0 ; set return value to NaN (7fffffff) mov.w #0xffff,r1 ; Hack! bra return_jmp endif_1: ; At this point, registers contain the following: ; r0r1 - first operand ; r2h - second operand flags ; r2l - second operand sign ; r3h - first operand flags ; r3l - first operand sign ; r4 - first operand exponent ; r5r6 - first operand mantissa ; We no longer need r0 r1 r2h r3h mov.b r2l,r3h ; transfer second operand sign to r3h ; We now have r0 r1 r2 free ; Our goal now is to make the registers/stack contain the following: ; r0r1 - smaller operand mantissa ; r2 - exponent difference (larger exponent minus smaller) ; r3h - subtract flag (xor of signs) ; r3l - larger operand sign ; r4 - [empty] ; r5r6 - larger operand mantissa ; sp+2 - larger operand exponent ; sp+16 - larger operand ; Set subtract flag xor.b r3l,r3h ; Does the second operand have a larger exponent? mov.w @r7,r2 ; load second operand exponent (sp+0) to r2 sub.w r4,r2 ; subtract first exponent (r4) from second (r2) ble else_2 ; branch if second exponent <= first exponent ; Second operand is larger, rearrange accordingly mov.w r5,r0 ; move first (smaller) mantissa to r0r1 mov.w r6,r1 mov.b @(3,r7),r3l ; load second (larger) sign to r3l mov.w @(4,r7),r5 ; load second (larger) mantissa to r5r6 mov.w @(6,r7),r6 bra endif_2 else_2: ; First operand is larger, rearrange accordingly ; Also, negate exponent difference mov.w r0,@(16,r7) ; store first (larger) operand to sp+16 mov.w r1,@(18,r7) mov.w r4,@r7 ; store first (larger) exponent to sp+0 mov.w @(4,r7),r0 ; load second (smaller) mantissa to r0r1 mov.w @(6,r7),r1 not.b r2l ; negate exponent difference not.b r2h adds.w #1,r2 endif_2: ; We have now successfully rearranged things ; Our registers and stack contain what we set out to have them contain ; See above ; If exponent difference exceeds 25, then return the larger operand mov.w #25,r4 ; load 25 to free register cmp.w r4,r2 ; if exponent difference (r2) > 25 ble endif_3 ; Return larger operand mov.w @(16,r7),r0 ; load larger operand to r0r1 mov.w @(18,r7),r1 ; Hack! return_jmp: bra return endif_3: ; Shift left both mantissas by 6 places mov.b #6,r4l ; use r4l as counter, set to 6 dowhile_4: add.w r1,r1 ; use add to shift smaller mantissa left addx.b r0l,r0l addx.b r0h,r0h add.w r6,r6 ; use add to shift larger mantissa left addx.b r5l,r5l addx.b r5h,r5h dec.b r4l ; decrement counter bne dowhile_4 ; repeat if counter not yet zero ; Shift the smaller operand right by the exponent difference ; Since exponent difference is less than 25, use only r2l as counter ; Maintain a sticky bit in lsb while_5: dec.b r2l ; if there are more places to shift blt endwhile_5 ; negative counter indicates false shlr.b r0h ; shift mantissa right 1 place rotxr.b r0l rotxr.b r1h rotxr.b r1l ; places old sticky bit in carry bor #0,r1l ; or lsb with old sticky bit to get new bit bst #0,r1l ; store new sticky bit bra while_5 endwhile_5: ; Load saved exponent from stack mov.w @r7,r4 ; load larger exponent from sp+0 ; Add or subtract? mov.b r3h,r3h ; if subtract flag not set bne else_6 ; Add add.w r1,r6 ; compute r5r6 + r0r1 addx.b r0l,r5l addx.b r0h,r5h ; If result is zero, we do not change sign, since signs were same bra endif_6 else_6: ; Subtract sub.w r1,r6 ; compute r5r6 - r0r1 subx.b r0l,r5l subx.b r0h,r5h ; Is result zero? bne else_7 ; non-zero indicates false ; Make sign zero so we return +0, not -0 sub.b r3l,r3l ; clear sign byte bra endif_7 else_7: ; Result is non-zero. Is it negative? bge endif_8 ; greater than or equal indicates false ; Flip sign xor.b #0x80,r3l ; invert sign ; Negate mantissa mov.w r5,r0 ; transfer mantissa to r0r1 mov.w r6,r1 sub.w r5,r5 ; zero destination sub.w r6,r6 sub.w r1,r6 ; subtract original mantissa subx.b r0l,r5l subx.b r0h,r5h endif_8: ; Normalize jsr ___normalizesf endif_7: endif_6: jsr ___joinsf return: ; Invoke the epilogue to cleanup and return jmp ___finishsf