_A COPROCESSOR FOR A COPROCESSOR?_
by Warren Davis and Kan Yabumoto

[LISTING ONE]

/* C program to perform display of Mandelbrot set. Needs to be 
linked with a module containing the initialize() and put_pixel() */

int   screenx, screeny;   /*  These values represent the size of the display */
              /*  screen in pixels. They are initialized in the  */
              /*  initialize() routine called by main().         */

/*****************************************************************************
compute_fractal is the heart of our program. Four parameters are passed 
from main() representing two two complex numbers. The first two parameters, 
base_R and base_I, are the real and imaginary portions of upper left corner 
of the screen screen in the complex plane. The last two, span_R and span_I, 
give the size of the area of the complex plane visible on the screen. 
SOME BACKGROUND... This routine computes successive iterations of the equation,
(An = An-1 ** 2) + C where A and C are complex numbers, and C represents a 
point in the complex plane. The initial value of A is 0+0i, and when the 
magnitude of A becomes greater than 2.0, it will be considered that series 
will eventually diverge. The color of pixel at C becomes the number of 
iterations before divergence. If after 256 iterations, there is no divergence,
color 0 is written. The color is used as an index into color palette of the 
display board. COMPLEX ARITHMETIC... For those of you a little rusty on your 
complex arithmetic, the following formulas are supplied...
If W and Z are complex numbers, then each has two parts, real and imaginary.  
(i.e. W = W_real + W_imag * i). W + Z means  (W_real + Z_real) + (W_imag + 
Z_imag) * i W * W means   (W_real * W_real) - (W_imag * W_imag)  + 
(2 * W_real * W_imag) * i.  The magnitude of Z would be  SQRT((Z_real * 
Z_real) + (Z_imag * Z_imag))
****************************************************************************/

void compute_fractal(float BaseR, float BaseI, float SpanR, float SpanI)
{
register float   AR, AI;   /*  Real and Imaginary components of A */
register float   ConstR, ConstI;/* Real and Imaginary components of C */
register float   DeltaR, DeltaI;        /* increment values for C */
register float   ARsqr, AIsqr;            /* squares of AR and AI */
register int     row, col, color;         /****   See NOTE 1   ****/

DeltaR = SpanR / (float)screenx;
DeltaI = SpanI / (float)screeny;

ConstI = BaseI;
for (row=0; row < screeny; row++)   {       /* Scan top to bottom */
    ConstR = BaseR;
    for (col=0; col < screenx; col++)   {   /* Scan left to right */
        AR = AI = ARsqr = AIsqr = 0.0F;   /****   See NOTE 2   ****/
        for (color = 256; --color > 0;)   {/* Find color for this C */
            AI = (AR * AI * 2.0F) + ConstI;   /* Compute next     */
            AR = ARsqr - AIsqr + ConstR;      /*   iteration of A */

            if ( ((ARsqr = AR * AR) + (AIsqr = AI * AI))  > 4.0F )
                    break;                /****   See NOTE 3   ****/
            }
        put_pixel(color,col,row);/* Write color to display buffer. */
        ConstR += DeltaR;
        }
    ConstI += DeltaI;
    }
}

/* NOTE 1: We declare everything to be register variables. For some processors
this may not have much of an effect, but on others (like the 34020 and 34082) 
you may be surprised. 
NOTE 2: For each point on the screen, we begin computing iterations of the 
Mandelbrot equation. The initial value of A is 0+0i. Since the values  
A_real*A_real and A_imag*A_imag are used in computing both the next iteration 
of A and its magnitude, we maintain these values as separate variables so the 
multiplications need only be computed once.
NOTE 3: For our magnitude comparison, we actually compare the SQUARE of the 
magnitude against the square of our divergence value. This saves us from 
computing a square root.
*/

/****************************************************************************
The main() function serves only to pass initial values to compute_fractal. We 
will leave the initialize() routine to be a "black box". Interested 
programmers may want to write their own routine for whatever display board is 
available. The values used in this test program show the familiar picture of 
the Mandelbrot set. By varying these numbers, you can obtain some breathtaking 
fractal landscapes. 
 ***************************************************************************/

main()
{
float origin_R,origin_I,size_R,size_I;

/*  The initialize() routine must initialize display board, clear display 
buffer, load a table of 256 colors into color palette, and set global 
variables, screenx and screeny. If successful, it returns 0. If it encounters 
any problems it returns a non-zero value. */

if (initialize()) return(1);

origin_R = -4.0;      /*  origin represents the upper left corner of */
origin_I = -3.0;      /*         the screen.                         */
size_R = 8.0;         /*  size   represents the domain of the screen */
size_I = 6.0;         /*         in the complex plane.               */

compute_fractal(origin_R,origin_I,size_R,size_I);
}


[LISTING TWO]

******************************************************************************
*  Assembly code generated by TMS340 C Compiler using the -mc option for 
*  generating coprocessor instructions.  
******************************************************************************
;  gspac -mc -v20 mandel.gc mandel.if 
;  gspcg -o -c -v20 -o mandel.if mandel.asm mandel.tmp 
   .version    20
   .ieeefl
FP .set   A13
STK.set   A14
   .file  "mandel.gc"
   .globl _screenx
   .globl _screeny

   .sym   _compute_fractal,_compute_fractal,32,2,0
   .globl _compute_fractal

   .func  50
;>>>> void compute_fractal(float BaseR,float BaseI,float SpanR,
                           float SpanI)
;>>>> register float   AR, AI, ConstR, ConstI;
;>>>> register float   ARsqr, AIsqr, DeltaI, DeltaR;
;>>>> register int     row,col,color;
******************************************************
* FUNCTION DEF : _compute_fractal
******************************************************
_compute_fractal:
   MMTM   SP,A7,A9,A10,A11,FP
   SUBI   448,SP
   MOVE   SP,A11
   MOVD   RA5,*A11+,4
   MOVD   RB6,*A11+,3
   MOVE   STK,FP
   ADDK   32,STK
   MOVE   SP,*STK+,1    ;; DEBUGGER TRACEBACK AID
   .sym   _BaseR,-32,6,9,32
   .sym   _BaseI,-64,6,9,32
   .sym   _SpanR,-96,6,9,32
   .sym   _SpanI,-128,6,9,32
   .sym   _AR,32,6,4,32
   .sym   _AI,33,6,4,32
   .sym   _ConstR,30,6,4,32
   .sym   _ConstI,31,6,4,32
   .sym   _ARsqr,28,6,4,32
   .sym   _AIsqr,29,6,4,32
   .sym   _DeltaR,26,6,4,32
   .sym   _DeltaI,0,6,1,32
   .sym   _row,9,4,4,32
   .sym   _col,10,4,4,32
   .sym   _color,11,4,4,32

   .line  9
;>>>> DeltaR = SpanR / (float)screenx;
   MOVE   @_screenx,A7,1
   MOVE   A7,RA0          ; screenx --> RA0
   CVIF   RA0,RB0         ; convert RA0 from int to float, put in RB0
   MOVE   FP,A7
   SUBI   96,A7

   MOVF   *A7+,RA0        ; move parameter SpanR --> RA0
   DIVF   RA0,RB0,RB0     ; RA0 / RB0 --> RB0.  Result is DeltaR
   ADDI   64,A7
   MOVF   RB0,*A7+        ; Store DeltaR as a local variable.

   .line  10
;>>>> DeltaI = SpanI / (float)screeny;
   MOVE   @_screeny,A7,1    
   MOVE   A7,RA1            ; screeny --> RA1
   CVIF   RA1,RB1           ; convert to float and put in RB1
   MOVE   FP,A7
   SUBI   128,A7
   MOVF   *A7+,RA1          ; get SpanI
   DIVF   RA1,RB1,RA5       ; compute DeltaI and LEAVE IN RA5!!! 
                                 ;  DeltaI is used as a register variable!
   .line  12
;>>>> ConstI = BaseI;
   ADDK   32,A7
   MOVF   *A7+,RB7        ; BaseI --> ConstI (RB7)

   .line  13
;>>>> for (row=0; row < screeny; row++)   { 
; NOTICE here that both ConstI and row are used as register variables. Yet 
;  ConstI, which is a float, is kept in a 34082 register and row, which is an 
;  int, is kept in a 34020 register! The C compiler is smart enough to know 
;  which variables should be maintained on which processor!
;
   CLRS   A9                ; 0 --> row (A9)
   MOVE   @_screeny,A7,1    
   CMP    A7,A9
   JRGE   L2

L1:
   .line  15
;>>>>     ConstR = BaseR;
   MOVE   FP,A7
   SUBK   32,A7
   MOVF   *A7+,RA7        ; BaseR --> ConstR (RA7)

   .line  16
;>>>>     for (col=0; col < screenx; col++)   {   
   CLRS   A10             ; 0 --> col (A10)
   MOVE   @_screenx,A7,1
   CMP    A7,A10
   JRGE   L4

L3:
   .line  18
;>>>>         AR = AI = ARsqr = AIsqr = 0.0F;
   CLRF   RB6             ; clear AIsqr (RB6)
   MOVF   RB6,RA6         ; clear ARsqr (RA6)
   MOVF   RB6,RB8         ; clear AI (RB8)

   MOVF   RB6,RA8         ; clear AR (RA8)

   .line  20
;>>>>         for (color = 256; --color > 0;)
   MOVI   256,A11         
   SUBK   1,A11           ; 255 --> color (A11)
   JRLE   L6

L5:
   .line  22
;>>>>             AI =  (AR * AI * 2.0F) + ConstI;
   MPYF   RA8,RB8,RA0     ; AR * AI --> RA0
   TWOF   RB0             ; 2.0F --> RB0
   MPYF   RA0,RB0,RA0     ; AR * AR * 2.0 --> RA0
   ADDF   RA0,RB7,RB8     ; RA0 + ConstR --> AI (RB8)

   .line  23
;>>>>             AR = ARsqr - AIsqr + ConstR;
   SUBF   RA6,RB6,RB1     ; ARsqr - AIsqr --> RB1
   ADDF   RA7,RB1,RA8     ; ConstR + RB1 --> AR (RA8)

   .line  25
;>>>>             if ( ((ARsqr = AR*AR)+ 
   MOVF   RA8,RB1         ; AR --> RB1
   MPYF   RA8,RB1,RA6     ; Compute new ARsqr
   MOVF   RB8,RA0         ; AI --> RA0
   MPYF   RA0,RB8,RB6     ; Compute new AR_imag
   ADDF   RA6,RB6,RA0     ; Sum of squares --> RA0
   MOVI   FS3,A7          ; FS3 is a pointer to a float constant, 4.0
   MOVF   *A7+,RB1        ; 4.0 --> RB1    
   CMPF   RA0,RB1         ; if square of magnitude > 4.0, break
   GETCST
   JRGT   L6

   .line  26
;>>>>                   (AIsqr = AI*AI))  > 4.0F ) break;
   .line  20
   SUBK   1,A11           ; Otherwise, decrement color and see
   JRGT   L5              ;     if loop ended.

L6:
   .line  29
;>>>>         put_pixel(color,col,row); 
   MOVE   STK,-*SP,1        ; Call display_board dependent routine
   MOVE   A9,*STK+,1        ;      to place a pixel on the screen.
   MOVE   A10,*STK+,1
   MOVE   A11,*STK+,1
   CALLA  _put_pixel

   .line  30
;>>>>         ConstR += DeltaR;
   MOVE   FP,A8
   MOVF   *A8+,RB0
   ADDF   RA7,RB0,RA7


   .line  16
   ADDK   1,A10             ; col++
   MOVE   @_screenx,A7,1
   CMP    A7,A10            ; If col >= screenx, end middle loop
   JRLT   L3                ; Otherwise, jump back

L4:
   .line  32
;>>>>     ConstI += DeltaI;
   ADDF   RA5,RB7,RB7

   .line  13
   ADDK   1,A9              ; row++
   MOVE   @_screeny,A7,1
   CMP    A7,A9             ; If row >= screeny, end outer loop
   JRLT   L1                ; Otherwise, jump back

L2:
EPI0_1:
   .line  34
   MOVE   *SP(640),STK,1    ; C cleanup
   MOVD   *SP+,RA5,4
   MOVD   *SP+,RB6,3
   MMFM   SP,A7,A9,A10,A11,FP
   RETS   2

   .endfunc    83,00000ee80H,32

   .sym   _main,_main,36,2,0
   .globl _main

   .func  103
;>>>> main()
;>>>> float origin_R,origin_I,size_R,size_I;
******************************************************
* FUNCTION DEF : _main
******************************************************
_main:
   MOVE   FP,-*SP,1
   MOVE   STK,FP
   ADDI   128,STK
   MOVE   SP,*STK+,1    ;; DEBUGGER TRACEBACK AID
   .sym   _origin_R,0,6,1,32
   .sym   _origin_I,32,6,1,32
   .sym   _size_R,64,6,1,32
   .sym   _size_I,96,6,1,32


   .line  12
;>>>> if (initialize()) return(1);
   CALLA  _initialize
   MOVE   A8,A8
   JRZ    L8
   MOVK   1,A8
   JR     EPI0_2


L8:
   .line  14
;>>>> origin_R = -4.0;     
   MOVE   @FS4,A8,1
   MOVE   A8,*FP,1

   .line  15
;>>>> origin_I = -3.0;     
   MOVE   @FS5,A8,1
   MOVE   A8,*FP(32),1

   .line  16
;>>>> size_R = 8.0;        
   MOVE   @FS6,A8,1
   MOVE   A8,*FP(64),1

   .line  17
;>>>> size_I = 6.0;        
   MOVE   @FS7,A8,1
   MOVE   A8,*FP(96),1

   .line  19
;>>>> compute_fractal(origin_R,origin_I,size_R,size_I);
   MOVE   STK,-*SP,1
   MOVE   *FP(96),*STK+,1
   MOVE   *FP(64),*STK+,1
   MOVE   *FP(32),*STK+,1
   MOVE   *FP(0),*STK+,1
   CALLA  _compute_fractal

EPI0_2:
   .line  20
   SUBI   160,STK
   MOVE   *SP+,FP,1
   RETS   0

   .endfunc    140,00000a000H,128

   .sym   _screenx,_screenx,4,2,32
   .globl _screenx
   .bss   _screenx,32,32

   .sym   _screeny,_screeny,4,2,32
   .globl _screeny
   .bss   _screeny,32,32
*************************************************
* DEFINE FLOATING POINT CONSTANTS               *
*************************************************
   .text
   .even 32
FS1:.float0.0
FS3:.float4.0
FS4:.float-4.0
FS5:.float-3.0

FS6:.float8.0
FS7:.float6.0
*****************************************************
* UNDEFINED REFERENCES                              *
*****************************************************
   .ref   _put_pixel
   .ref   _initialize
   .end

.po 0

[LISTING THREE]

* Hand-tweaked assembler code using Listing 2 as a basis. *
   .version   20
   .ieeefl
   .globl   _screenx
   .globl   _screeny

*   Register Nicknames are used for program clarity
*       34020 Registers...
FP    .set   A13                      ; C function Frame Pointer
STK   .set   A14                      ; C function Stack

DPTCH    .set   B3                    ; Destination Pitch of Screen
OFFSET   .set   B4                    ; Offset of Screen

*       34082 Registers...
RA0_2           .set     RA0          ; 2.0 constant
RA1_4           .set     RA1          ; 4.0 constant
RA2_TMP         .set     RA2          ; temporary storage
RA5_DI          .set     RA5          ; DeltaI
RA6_AR2         .set     RA6          ; AR squared
RA7_CR          .set     RA7          ; ConstR
RA8_AR          .set     RA8          ; AR

RB1_DR          .set     RB1          ; DeltaR
RB2_TMP         .set     RB2          ; temporary storage
RB4_BI          .set     RB4          ; BaseI
RB5_BR          .set     RB5          ; BaseR
RB6_AI2         .set     RB6          ; AI squared
RB7_CI          .set     RB7          ; ConstI
RB8_AI          .set     RB8          ; AI

TubeOffset      .set    2000H         ; These definitions apply for the 
TubePitch       .set    (1024 * 8)    ; SDB20 board which we used.

   .globl   _compute_fractal


******************************************************
* FUNCTION DEF : _compute_fractal
******************************************************
_compute_fractal:
   MMTM   SP,A0,A1,A2,A3,A4,A11,FP

*   Since we are creating a highly efficient tweaked program, we have the
*   main program place the 4 parameters used in compute_fractal directly 
*   into 34082 registers. Specifically, BaseI has been placed in RB4,
*   BaseR has been placed in RB5, SpanI has been placed in RA0, SpanR has 
*   been placed in RA1

;>>>>    DeltaR = SpanR / (float)screenx;
   MOVE   @_screenx,A3,1           ; screenx --> A3 (stays there)
   MOVE   A3,RA2_TMP
   CVIF   RA2_TMP,RB0              ; (float)screenx --> RB0
   DIVF   RA1,RB0,RB1_DR           ; SpanR / screenx = DeltaR --> RB1
                                   ;                    (stays there)
;>>>>    DeltaI = SpanI / (float)screeny;
   MOVE   @_screeny,A4,1           ; screeny --> A4 (stays there)
   MOVE   A4,RA2_TMP 
   CVIF   RA2_TMP,RB0              ; (float)screeny --> RB1
   DIVF   RA0,RB0,RA5_DI           ; SpanI / screeny = DeltaI --> RA5
                                   ;                    (stays there)
*   Set up initializations outside any loops
   TWOF   RA0_2                    ; constant 2.0 in RA0
   SQRF   RA0_2,RA1_4              ; constant 4.0 in RA1

;>>>> for (ConstI = BaseI, row=0; row < screeny; row++,ConstI += DeltaI)
   MOVF   RB4_BI,RB7_CI            ; BaseI --> ConstI  (RB7)
   CLRS   A0                       ; 0 --> row (A0)

L1:
;>>>>   for (ConstR = BaseR, col=0; col < screenx; col++,ConstR += DeltaR)
   MOVF   RB5_BR,RA7_CR            ; BaseR --> ConstR  (RA7)
   CLRS   A1                       ; 0 --> col (A1)

L3:
;>>>>         AR = AI = ARsqr = AIsqr = 0.0F;
   CLRF   RB8_AI                   ; 0.0 --> AI (RB8)
   MOVF   RB8_AI,RB6_AI2           ; 0.0 --> AI squared (RB6)
   CLRF   RA8_AR                   ; 0.0 --> AR (RA8)
   MOVF   RA8_AR,RA6_AR2           ; 0.0 --> AR squared (RA6)

;>>>>            for (color = 256; --color > 0;)
   MOVI   255,A2                   ; 255 --> color (A2)

L5:

;>>>>                AI = ( AR * AI * 2.0F ) + ConstI;
   MPYF   RA8_AR,RB8_AI,RB2_TMP    ; AR * AI --> tmp (RB2)
   MPYF   RB2_TMP,RA0_2,RA2_TMP    ; tmp * 2.0 --> tmp (RA2)
   ADDF   RA2_TMP,RB7_CI,RB8_AI    ; tmp + ConstI --> AI

;>>>>             AR = ARsqr - AIsqr + ConstR;
   SUBF   RA6_AR2,RB6_AI2,RB2_TMP  ; AR**2 - AI**2 --> tmp (RB2)
   ADDF   RB2_TMP,RA7_CR,RA8_AR    ; tmp + ConstR --> AR

;>>>>             if ( ((ARsqr = AR*AR)+ 
;>>>>                   (AIsqr = AI*AI))  > 4.0F ) break;
   SQRF   RA8_AR,RA6_AR2           ; Compute new ARsqr
   MOVF   RB8_AI,RA2_TMP           ; SQRF must be performed on an A reg.
   SQRF   RA2_TMP,RB6_AI2          ; Compute new AIsqr
   ADDF   RA6_AR2,RB6_AI2,RB2_TMP  ; sum of squares in RB2
   CMPF   RA1_4,RB2_TMP            ; if sum of squares > 4.0, break
   GETCST
   JRLE   L6

   DSJ    A2,L5                    ; dec color and loop back if not 0

L6:
;>>>>            put_pixel(color,col,row); 
   MOVE   A0,A8                    ; row becomes Y
   SLL    16,A8                    ; shift Y into upper 16 bits
   MOVA   A1,A8                    ; col becomes A,  Y:X now in A8
   PIXT   A2,*A8.XY                ; write the pixel

;               bottom of 'col' loop
   ADDF   RB1_DR,RA7_CR,RA7_CR     ; ConstR += DeltaR
   INC    A1                       ; col++
   CMP    A3,A1                    ; if col < screenx, jump back
   JRLT   L3                      

;              bottom of 'row' loop
L4:
   ADDF   RA5_DI,RB7_CI,RB7_CI     ; ConstI += DeltaI
   INC    A0                       ; row++
   CMP    A4,A0                    ; if row < screeny, jump back
   JRLT   L1                     

L2:
EPI0_1:
   MMFM   SP,A0,A1,A2,A3,A4,A11,FP
   RETS   

   .globl   _main

******************************************************
* FUNCTION DEF : _main
******************************************************
_main:
   MOVE   FP,-*SP,1
   MOVE   STK,FP
   ADDI   128,STK

   MOVE   SP,*STK+,1    ;; DEBUGGER TRACEBACK AID
   CALLA  _initialize
   MOVE   A8,A8
   JRZ    L8
   MOVK   1,A8
   JR     EPI0_2
L8:
   MOVE    @ORG_I,A8,1            ; We can place the initial parameters
   MOVF    A8,RB4_BI              ; directly into the 34082 registers
   MOVE    @ORG_R,A8,1            ; where they will be used by the
   MOVF    A8,RB5_BR              ; compute_fractal routine.
   MOVE    @SIZE_I,A8,1
   MOVF    A8,RA0
   MOVE    @SIZE_R,A8,1
   MOVF    A8,RA1
   CALLA   _compute_fractal
EPI0_2:
   MOVE   *SP+,FP,1
   RETS   0

   .globl   _screenx
   .bss   _screenx,32,32

   .globl   _screeny
   .bss   _screeny,32,32

*************************************************
* DEFINE FLOATING POINT CONSTANTS               *
*************************************************
   .text
   .even 32
ORG_R:    .float   -4.0
ORG_I:    .float   -3.0
SIZE_R:   .float    8.0
SIZE_I:   .float    6.0

   .ref   _initialize
   .end



