
//#define DEBUG
//#define SLOW_DIVIDE

#include <conio.h>
#include <string.h>
#include <6502.h>

unsigned char cpu_type = 0;
unsigned char ram_banks = 1;
unsigned char vic_pal = 0;

unsigned char cpu_mode = 8;
unsigned char use_mode_16 = 0;

unsigned char screenoff = 0;
unsigned char dtvturbo = 0;
unsigned char c128turbo = 0;
unsigned char scpumode = 0; // default, none, full
unsigned char *ptr;

void teststart(void)
{
    ptr = (unsigned char*)0xe000;
}

void testend(void)
{
    *ptr = 0x60; // rts
}

void waitframe(void) {
    while (((*(unsigned char*)0xd011) & 0x80) == 0) {
        asm ("nop");
    }
    while (((*(unsigned char*)0xd011) & 0x80) != 0) {
        asm ("nop");
    }
}

void delay_200ms(void)
{
    unsigned char n;
    for (n=0;n<10;++n) {
        waitframe();
    }
}

extern void savezpage(void);
extern void loadzpage(void);

extern void dtvturboon(void);
extern void dtvturbooff(void);

extern void set_vic_pal(void);
extern void set_ram_banks(void);
extern void set_8bit_emulation(void);
extern void set_8bit_native(void);
extern void do_test_8bit_native(void);
extern void do_test_16bit_native(void);
extern void __fastcall__ call_16bit_native(void *function);

/* argument/return value to/from multiply/divide routines */
extern unsigned long zpv0;
extern unsigned long zpv1;
extern unsigned long zpa0;
extern unsigned long zpa1;
#pragma zpsym ("zpv0");
#pragma zpsym ("zpv1");
#pragma zpsym ("zpa0");
#pragma zpsym ("zpa1");

extern void multiply_6502_setup(void);
extern void multiply_6502_u8(void);
extern void multiply_6502_u16(void);
extern void multiply_6502_u32(void);
extern void multiply_65816_setup_8(void);
extern void multiply_65816_setup_1632(void);
extern void multiply_65816_u8(void);
extern void multiply_65816_u16(void);
extern void multiply_65816_u32(void);

unsigned int multiplies[8] = {
  (unsigned int)multiply_6502_u8,
  (unsigned int)multiply_6502_u16,
  0, //24bit
  (unsigned int)multiply_6502_u32,
  (unsigned int)multiply_65816_u8,
  (unsigned int)multiply_65816_u16,
  0, //24bit
  (unsigned int)multiply_65816_u32
};

void test_multiply(void)
{
    //zpa0 = 123;
    //zpa1 = 232;
    zpa0 = (unsigned long)-1;
    zpa1 = (unsigned long)-1;

    zpv1 = 0; zpv0 = 0; multiply_6502_u8();
    gotoxy(0,14); cprintf("u8  %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);
    zpv1 = 0; zpv0 = 0; multiply_6502_u16();
    gotoxy(0,15); cprintf("u16 %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);
    zpv1 = 0; zpv0 = 0; multiply_6502_u32();
    gotoxy(0,16); cprintf("u32 %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);

    if (cpu_type == CPU_65816) {
        zpv1 = 0; zpv0 = 0; call_16bit_native(multiply_65816_u8);
        gotoxy(0,18); cprintf("u8  %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);
        zpv1 = 0; zpv0 = 0; call_16bit_native(multiply_65816_u16);
        gotoxy(0,19); cprintf("u16 %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);
        zpv1 = 0; zpv0 = 0; call_16bit_native(multiply_65816_u32);
        gotoxy(0,20); cprintf("u32 %08lx*%08lx=%08lx%08lx", zpa0, zpa1, zpv1, zpv0);
    }
}

extern void divide_6502_setup(void);
extern void divide_6502_u8(void);
extern void divide_6502_u8_slow(void);
extern void divide_6502_u16(void);
extern void divide_6502_u32(void);
extern void divide_65816_setup_8(void);
extern void divide_65816_setup_1632(void);
extern void divide_65816_u8(void);
extern void divide_65816_u8_slow(void);
extern void divide_65816_u16(void);
extern void divide_65816_u32(void);

unsigned int divides[8] = {
#ifdef SLOW_DIVIDE
  (unsigned int)divide_6502_u8_slow,
#else
  (unsigned int)divide_6502_u8,
#endif
  (unsigned int)divide_6502_u16,
  0, //24bit
  (unsigned int)divide_6502_u32,
#ifdef SLOW_DIVIDE
  (unsigned int)divide_65816_u8_slow,
#else
  (unsigned int)divide_65816_u8,
#endif
  (unsigned int)divide_65816_u16,
  0, //24bit
  (unsigned int)divide_65816_u32
};

void test_divide(void)
{
    zpa0 = 0xaabbccddUL;
    zpa1 = 0x00000007UL;

    zpv1 = 0; zpv0 = 0; divide_6502_u8();
    gotoxy(0,14); cprintf("u8  %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);
    zpv1 = 0; zpv0 = 0; divide_6502_u16();
    gotoxy(0,15); cprintf("u16 %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);
    zpv1 = 0; zpv0 = 0; divide_6502_u32();
    gotoxy(0,16); cprintf("u32 %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);

    if (cpu_type == CPU_65816) {
        zpv1 = 0; zpv0 = 0; call_16bit_native(divide_65816_u8);
        gotoxy(0,18); cprintf("u8  %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);
        zpv1 = 0; zpv0 = 0; call_16bit_native(divide_65816_u16);
        gotoxy(0,19); cprintf("u16 %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);
        zpv1 = 0; zpv0 = 0; call_16bit_native(divide_65816_u32);
        gotoxy(0,20); cprintf("u32 %08lx/%08lx=%08lx %08lx", zpa0, zpa1, zpv1, zpv0);
    }
}

unsigned int dotest(void)
{
    static unsigned int res;
    asm("sei");
    asm("jsr _savezpage");
    asm("ldy #$35");
    asm("sty $01");
    if (dtvturbo) {
        dtvturboon();
    }
    if (screenoff) {
        asm("ldy #$0b");
        asm("sty $d011");
        waitframe();
    }
    if (c128turbo) {
        asm("ldy #$01");
        asm("sty $d030");
    }

    if (cpu_type == CPU_65816) {
        // scpumode
        asm("sty $d07e");
        if (scpumode == 0) {
            asm("sty $d077"); // V1 default (no optimization)
            asm("ldy #%%11000001");
            asm("sty $d0b3"); // V2 default (optimize zp and stack)
        } else if (scpumode == 1) {
            asm("sty $d077"); // V1 (no optimization)
            asm("ldy #%%11000000");
            asm("sty $d0b3"); // V2 (no optimization)
        } else {
            asm("sty $d076"); // V1 (BASIC optimization)
            asm("ldy #%%10000100");
            asm("sty $d0b3"); // V2 (Full optimization)
        }
        asm("sty $d07f");
    }

    asm("ldy #$7f");
    asm("sty $dc0d");
    asm("bit $dc0d");
    asm("ldy #0");
    asm("sty $dc0e");
    asm("ldy #$ff");
    asm("sty $dc05");
    asm("sty $dc04");
//    asm("ldy #%%00011001");
    asm("lda #$21");
    if (cpu_type == CPU_65816) {
        // cc65 doesn't allow inline 65816 asm
        if (cpu_mode == 16) {
            do_test_16bit_native();
        } else {
            do_test_8bit_native();
        }
    } else {
        asm("ldy #$19");
        asm("sty $dc0e");
        asm("jsr $e000");
        asm("jsr $e000");
        asm("ldy #0");
        asm("sty $dc0e");
    }
    res = *(unsigned int*)0xdc04;
//    asm("ldx $dc05");
//    asm("lda $dc04");
    if (dtvturbo) {
        dtvturbooff();
    }
    if (c128turbo) {
        asm("ldy #$00");
        asm("sty $d030");
    }

    if (cpu_type == CPU_65816) {
        // scpumode
        asm("sty $d07e");
        asm("sty $d077"); // V1 default (no optimization)
        asm("ldy #%%11000001");
        asm("sty $d0b3"); // V2 default (optimize zp and stack)
        asm("sty $d07f");
    }

    asm("ldy #$1b");
    asm("sty $d011");
    asm("ldy #$36");
    asm("sty $01");
    asm("ldy #$40");
    asm("sty $dc05");
    asm("ldy #$25");
    asm("sty $dc04");
    asm("ldy #$11");
    asm("sty $dc0e");
    asm("ldy #$81");
    asm("sty $dc0d");
    asm("bit $dc0d");
    asm("jsr _loadzpage");
    asm("cli");
    return res;
}

// max 9*4+1 = 37 bytes/unrolled add
unsigned int testaddx(int size)
{
    unsigned int i, j, step = 1;
    unsigned int a1, a2, a3;
    if ((cpu_type == CPU_65816) && (size > 1) && ((cpu_mode == 16) || (use_mode_16)))
    {
      // use 16bit acc, two bytes as once
      step = 2;
    }
    for (i = 0; i < 200; ++i) {
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // set 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
            *ptr++ = 0x18; // clc
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // set 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20 | 0x01; // also clear carry
        } else {
            *ptr++ = 0x18; // clc
        }
        for (j = 0; j < size; j += step) {
            a1 = 0xfe00 + ((i + (size * 0) + j) & 0x00fe);
            a2 = 0xfe00 + ((i + (size * 1) + j) & 0x00fe);
            a3 = 0xfe00 + ((i + (size * 2) + j) & 0x00fe);
            *ptr++ = 0xad; // lda abs
            *ptr++ = a1 & 0xff; // lo
            *ptr++ = (a1 >> 8) & 0xff; // hi
            *ptr++ = 0x6d; // adc abs
            *ptr++ = a2 & 0xff; // lo
            *ptr++ = (a2 >> 8) & 0xff; // hi
            *ptr++ = 0x8d; // sta abs
            *ptr++ = a3 & 0xff; // lo
            *ptr++ = (a3 >> 8) & 0xff; // hi
        }
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // restore 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20;
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // restore 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
        }
    }
    return 0;
}

// 200 * 14 cycles = 2800
unsigned int testadd8(void)
{
    return testaddx(1);
}

// 200 * 2 + 200 * 2 * 12 cycles = 5200
unsigned int testadd16(void)
{
    return testaddx(2);
}

// 200 * 2 + 200 * 4 * 12 cycles = 10000
unsigned int testadd32(void)
{
    return testaddx(4);
}

// max 9*4 = 36 bytes/unrolled or
unsigned int testorx(int size)
{
    unsigned int i, j, step = 1;
    unsigned int a1, a2, a3;
    if ((cpu_type == CPU_65816) && (size > 1) && ((cpu_mode == 16) || (use_mode_16)))
    {
      // use 16bit acc, two bytes as once
      step = 2;
    }
    for (i = 0; i < 200; ++i) {
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // set 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // set 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20;
        }
        for (j = 0; j < size; j += step) {
            a1 = 0xfe00 + ((i + (size * 0) + j) & 0x00fe);
            a2 = 0xfe00 + ((i + (size * 1) + j) & 0x00fe);
            a3 = 0xfe00 + ((i + (size * 2) + j) & 0x00fe);
            *ptr++ = 0xad; // lda abs
            *ptr++ = a1 & 0xff; // lo
            *ptr++ = (a1 >> 8) & 0xff; // hi
            *ptr++ = 0x0d; // ora abs
            *ptr++ = a2 & 0xff; // lo
            *ptr++ = (a2 >> 8) & 0xff; // hi
            *ptr++ = 0x8d; // sta abs
            *ptr++ = a3 & 0xff; // lo
            *ptr++ = (a3 >> 8) & 0xff; // hi
        }
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // restore 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20;
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // restore 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
        }
    }
    return 0;
}

// 200 * 12 cycles = 2400
unsigned int testor8(void)
{
    return testorx(1);
}

// 200 * 2 * 12 cycles = 4800
unsigned int testor16(void)
{
    return testorx(2);
}

// 200 * 4 * 12 cycles = 9600
unsigned int testor32(void)
{
    return testorx(4);
}

// max 3*4 = 12 bytes/unrolled shift
unsigned int testshiftx(int size)
{
    unsigned int i, j, step = 1;
    unsigned int a1, a2, a3;
    if ((cpu_type == CPU_65816) && (size > 1) && ((cpu_mode == 16) || (use_mode_16)))
    {
      // use 16bit acc, two bytes as once
      step = 2;
    }
    for (i = 0; i < 400; ++i) {
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // set 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // set 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20;
        }
        for (j = 0; j < size; j += step) {
            a1 = 0xfe00 + ((i + (size * 0) + j) & 0x00fe);
            a2 = 0xfe00 + ((i + (size * 1) + j) & 0x00fe);
            a3 = 0xfe00 + ((i + (size * 2) + j) & 0x00fe);
            if (j == 0)
            {
                *ptr++ = 0x0e; // asl abs
                *ptr++ = a1 & 0xff; // lo
                *ptr++ = (a1 >> 8) & 0xff; // hi
            } else {
                *ptr++ = 0x2e; // rol abs
                *ptr++ = a1 & 0xff; // lo
                *ptr++ = (a1 >> 8) & 0xff; // hi
            }
        }
        if ((cpu_type == CPU_65816) && (size == 1) && (cpu_mode == 16)) {
            // restore 16bit acc
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x20;
        } else if ((cpu_type == CPU_65816) && (size > 1) && (cpu_mode == 8) && (use_mode_16)) {
            // restore 8bit acc
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x20;
        }
    }
    return 0;
}

// 200 * 6 cycles = 1200
unsigned int testshift8(void)
{
    return testshiftx(1);
}

// 200 * 2 * 6 cycles = 2400
unsigned int testshift16(void)
{
    return testshiftx(2);
}

// 200 * 4 * 6 cycles = 4800
unsigned int testshift32(void)
{
    return testshiftx(4);
}

// max 7 bytes/unrolled multiply
unsigned int testmultx(int size)
{
    unsigned int i, a1;
    if ((cpu_type == CPU_65816) && ((cpu_mode == 16) || (use_mode_16))) {
        size += 4;
    }
    a1 = (unsigned int)ptr + ((20 * 7) + 1);
    a1 = (a1 + 0xff) & 0xff00;
    for (i = 0; i < 20; ++i) {
        if ((cpu_type == CPU_65816) && (cpu_mode == 8) && (use_mode_16)) {
            // set 16bit acc & idx
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x30;
        }
        *ptr++ = 0x20; // jsr
        *ptr++ = a1 & 0xff; // lo
        *ptr++ = (a1 >> 8) & 0xff; // hi
        if ((cpu_type == CPU_65816) && (cpu_mode == 8) && (use_mode_16)) {
            // set 8bit acc & idx
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x30;
        }
    }
    // copy to aligned address
    memcpy((void *)a1, (void *)multiplies[size - 1], 4096);
    // worst case test values
    zpa0 = 0xffffffffUL;
    zpa1 = 0xffffffffUL;
    return 0;
}

// 200 * 6 cycles = 1200
unsigned int testmult8(void)
{
    return testmultx(1);
}

// 200 * 2 * 6 cycles = 2400
unsigned int testmult16(void)
{
    return testmultx(2);
}

// 200 * 4 * 6 cycles = 4800
unsigned int testmult32(void)
{
    return testmultx(4);
}

// max 7 bytes/unrolled divide
unsigned int testdivx(int size)
{
    unsigned int i, a1;
    if ((cpu_type == CPU_65816) && ((cpu_mode == 16) || (use_mode_16))) {
        size += 4;
    }
    a1 = (unsigned int)ptr + ((8 * 7) + 1);
    a1 = (a1 + 0xff) & 0xff00;
    for (i = 0; i < 8; ++i) {
        if ((cpu_type == CPU_65816) && (cpu_mode == 8) && (use_mode_16)) {
            // set 16bit acc & idx
            *ptr++ = 0xc2; // rep
            *ptr++ = 0x30;
        }
        *ptr++ = 0x20; // jsr
        *ptr++ = a1 & 0xff; // lo
        *ptr++ = (a1 >> 8) & 0xff; // hi
        if ((cpu_type == CPU_65816) && (cpu_mode == 8) && (use_mode_16)) {
            // set 8bit acc & idx
            *ptr++ = 0xe2; // sep
            *ptr++ = 0x30;
        }
    }
    // copy to aligned address
    memcpy((void *)a1, (void *)divides[size - 1], 4096);
    // worst case test values
    zpa0 = 0xffffffffUL;
    zpa1 = 0x00000000UL;
    return 0;
}

// 200 * 6 cycles = 1200
unsigned int testdiv8(void)
{
    return testdivx(1);
}

// 200 * 2 * 6 cycles = 2400
unsigned int testdiv16(void)
{
    return testdivx(2);
}

// 200 * 4 * 6 cycles = 4800
unsigned int testdiv32(void)
{
    return testdivx(4);
}

void fixscreen (void)
{
    memset((unsigned char*)0xd800, 14, 0x3e8);
    bordercolor(14);
    bgcolor(6);
    textcolor(14);
}

typedef struct
{
    unsigned char cpu_type;
    unsigned char ram_banks;
    char *name;
    unsigned int (*generator)(void);
    unsigned int cycles;
    unsigned int weight;
    unsigned int result8;
    unsigned int result16;
} TESTINFO;

TESTINFO testinfo[] =
{
    // Any cpu
    {CPU_6502, 1, "add 8", testadd8, 2800, 1},
    {CPU_6502, 1, "add 16", testadd16, 5200, 1},
    {CPU_6502, 1, "add 32", testadd32, 10000, 1},
    {CPU_6502, 1, "or 8", testor8, 2400, 1},
    {CPU_6502, 1, "or 16", testor16, 4800, 1},
    {CPU_6502, 1, "or 32", testor32, 9600, 1},
    {CPU_6502, 1, "shift 8", testshift8, 2400, 1},
    {CPU_6502, 1, "shift 16", testshift16, 4800, 1},
    {CPU_6502, 1, "shift 32", testshift32, 9600, 1},
    {CPU_6502, 1, "multiply 8", testmult8, 1240, 1},
    {CPU_6502, 1, "multiply 16", testmult16, 4880, 1},
    {CPU_6502, 1, "multiply 32", testmult32, 20440, 1},
#ifdef SLOW_DIVIDE
    {CPU_6502, 1, "divide 8", testdiv8, 2304, 1},
#else
    {CPU_6502, 1, "divide 8", testdiv8, 600, 1},
#endif
    {CPU_6502, 1, "divide 16", testdiv16, 7688, 1},
    {CPU_6502, 1, "divide 32", testdiv32, 28056, 1},
    {0, 0, NULL, NULL, 0, 0},
};

void printpercent(unsigned int res, unsigned int cycles)
{
    unsigned long r, c;
    unsigned int a,b;

    if (res == 0) {
        return;
    }

    c = ((unsigned long)cycles) * 100L;
    r = ((unsigned long)res);

    a = (c / r) / 100;
    b = (c / r) % 100;

    cprintf("%2d.%02dx", a, b);
}

void printinfo(unsigned char n, unsigned int res8, unsigned int res16)
{
    gotoxy(0,n + 2); cclear(40);
    gotoxy(0,n + 2); cprintf("%-12s", testinfo[n].name);
    gotoxy(13,n + 2);
    if (res8 == 0) {
        cprintf("     N/A    ");
    } else {
        cprintf("%5u ", res8);
        printpercent(res8, testinfo[n].cycles * 2);
    }
    gotoxy(26,n + 2);
    if (res16 == 0) {
        cprintf("     N/A    ");
    } else {
        cprintf("%5u ", res16);
        printpercent(res16, testinfo[n].cycles * 2);
    }
    fixscreen();
}

void printrating(void)
{
    unsigned long allresult, allcycles;
    unsigned char n;
    unsigned long r, c;
    unsigned int a,b;

    allresult = 0;
    allcycles = 0;

    n = 0;
    while (testinfo[n].generator) {
        if (testinfo[n].weight > 0) {
            allresult += testinfo[n].result8 / testinfo[n].weight;
            allcycles += (testinfo[n].cycles * 2) / testinfo[n].weight;
        }
        ++n;
    }

    c = ((unsigned long)allcycles) * 100L;
    r = ((unsigned long)allresult);

    a = (c / r) / 100;
    b = (c / r) % 100;

    revers(1);
    gotoxy(0,24); cclear(40);
    gotoxy(0,24); cprintf("Press key to abort. Total rating: %2d.%02dx", a, b);
    revers(0);
}

int calibrate(void)
{
  int i, res;

  // screen off
  asm("ldy #$0b");
  asm("sty $d011");

  res = 0;
  for (i = 0; i < 10; i++)
  {
    teststart();
    testend();
    res += 0xffff - dotest();
  }
  res /= 10;

  // screen on
  asm("ldy #$1b");
  asm("sty $d011");

  return res;
}

void tests(void)
{
    unsigned int res8, res16, cal;
    unsigned char n;
    clrscr();
    cal = calibrate();
    revers(1);
           //0123456789012345678901234567890123456789
    cprintf("                    base cpu mode       ");
    cprintf("test            8bit mode   16bit mode  ");
    revers(0);
    n = 0;
    while (testinfo[n].generator) {
        testinfo[n].result8 = 0;
        testinfo[n].result16 = 0;
        if (((cpu_type == testinfo[n].cpu_type) || (testinfo[n].cpu_type == CPU_6502)) && (ram_banks >= testinfo[n].ram_banks)) {
            printinfo(n, 0, 0);
        }
        ++n;
    }

    while(1) {
        n = 0;
        while (testinfo[n].generator) {
            if (((cpu_type == testinfo[n].cpu_type) || (testinfo[n].cpu_type == CPU_6502)) && (ram_banks >= testinfo[n].ram_banks)) {
                gotoxy(39,n + 2); cputc('*');
                // 8bit
                cpu_mode = 8;
                teststart();
                testinfo[n].generator();
                testend();
                res8 = (0xffff - cal) - dotest();
                testinfo[n].result8 = res8;
                if (cpu_type == CPU_65816) {
                    // 16bit
                    cpu_mode = 16;
                    teststart();
                    testinfo[n].generator();
                    testend();
                    res16 = (0xffff - cal) - dotest();
                    testinfo[n].result16 = res16;
                } else {
                    res16 = 0;
                }
                printrating();
                printinfo(n, res8, res16);
            }
            ++n;
            if (kbhit()) {
                break;
            }
        }
        delay_200ms();
        if (kbhit()) {
            break;
        }
    }
    while (kbhit()) {
        cgetc();
    }
}

void menu(void)
{
    unsigned char ch;
    clrscr();
    revers(1);          //1234567890123456789012345678901234567890
    gotoxy(0,0); cprintf("                       SynthMark816 v0.1");
    gotoxy(0,24); cprintf("CPU %s, RAM %5d KB (%3d BANKS) %s",
        cpu_type == CPU_65816 ? "65816" : cpu_type == CPU_65C02 ? "65C02" : "6502 ",
        ram_banks * 64, ram_banks, vic_pal ? "PAL " : "NTSC");
    revers(0);
    while (1) {
        gotoxy(1,2); cprintf("[F1] disable screen during tests: %s", screenoff ? "yes" : "no ");
        gotoxy(1,4); cprintf("[F3] use C128 fast mode: %s", c128turbo ? "yes" : "no ");
        gotoxy(1,6); cprintf("[F5] use DTV fast mode(s): %s", dtvturbo ? "yes" : "no ");
        gotoxy(1,8); cprintf("[F7] use SCPU optimization: %s", scpumode == 0 ? "default" : scpumode == 1 ? "none   " : "full   ");
        gotoxy(1,10); cprintf("[F2] use 16bit mode when possible: %s", use_mode_16 ? "yes" : "no ");
        gotoxy(1,12); cprintf("[RETURN] start benchmark");
#if 0
        //test_multiply();
        //test_divide();
#endif
        ch = cgetc();
        if (ch == 0x0d) {
            break;
        }
#ifdef DEBUG
        gotoxy (0,0); cprintf("%02x", ch);
#endif
        switch (ch) {
            case 0x85:
                screenoff ^= 1;
                break;
            case 0x86:
                c128turbo ^= 1;
                break;
            case 0x87:
                dtvturbo ^= 1;
                break;
            case 0x88:
                scpumode++;
                if (scpumode > 2) {
                    scpumode = 0;
                }
                break;
            case 0x89:
                use_mode_16 ^= 1;
                break;
        }
    }
    while (kbhit()) {
        cgetc();
    }
}

void check_ram_banks(void)
{
    if (cpu_type == CPU_65816) {
        set_ram_banks();
    } else {
        ram_banks = 1;
    }
}

void main(void)
{
    fixscreen();
    clrscr();
    gotoxy(0,0); cprintf("Generating tables, please wait...");
    cpu_type = getcpu();
    set_vic_pal();
    check_ram_banks();
    multiply_6502_setup();
    divide_6502_setup();
    if (cpu_type == CPU_65816) {
        set_8bit_native();
        multiply_65816_setup_8();
        multiply_65816_setup_1632();
        divide_65816_setup_8();
        divide_65816_setup_1632();
    }
    while (1) {
        menu();
        tests();
    }
}
