	include	whdload.i
	include	whdmacros.i

;======================================================================
;DEBUG
;CHIP_ONLY
;WARP_TO=12

        ifd CHIP_ONLY
BASE_CHIP=1024*1024
EXP_RESERVE=0
        else
BASE_CHIP=512*1024
EXP_RESERVE=512*1024
        endc
EXTRA_EXP=320*1024 ; To be able to buffer code before it's needed
BPL_ROWBYTES=160
BPL_BYTES=BPL_ROWBYTES*180
EXTRA_CHIP=BPL_BYTES ; Extra bitplane for TexturedRubberCube
;======================================================================

_base		SLAVE_HEADER				;ws_Security + ws_ID
		dc.w	18				;ws_Version
		dc.w	WHDLF_NoError!WHDLF_EmulTrap!WHDLF_ClearMem 	;ws_flags
		dc.l	BASE_CHIP+((EXTRA_CHIP+4095)&-4096) ;ws_BaseMemSize
		dc.l	0				;ws_ExecInstall
		dc.w	start-_base			;ws_GameLoader
		dc.w	0				;ws_CurrentDir
		dc.w	0				;ws_DontCache
_keydebug	dc.b	0				;ws_keydebug
_keyexit	dc.b	$59				;ws_keyexit = F10
_expmem		dc.l	EXP_RESERVE+EXTRA_EXP      	;ws_ExpMem
		dc.w	_name-_base			;ws_name
		dc.w	_copy-_base			;ws_copy
		dc.w	_info-_base			;ws_info
		dc.w    0    				; kickstart name
		dc.l    $0      			; kicksize
		dc.w    $0      			; kickcrc
    		dc.w    _config-_base
;============================================================================
_config
		dc.b	0

_name	dc.b	"Eon",0
_copy	dc.b	"2019 The Black Lotus",0
_info	dc.b	"Fixed by paraj",10
        dc.b    "Version 0.2",10
        incbin  datetime
	dc.b	0
	EVEN

        ifd DEBUG

PRINTF macro
        movem.l a0/a2,-(sp)
CARG set 9
        rept 8
        ifnb \.
        move.l  \.,-(sp)
        endc
CARG set CARG-1
        endr
        lea     .fmt\@(pc),a0
        move.l  (_resload,pc),a2
        jsr     (resload_Log,a2)
        bra     .done\@
.fmt\@: dc.b    \1
        dc.b    0
        even
.done\@:
        add     #4*(NARG-1),sp
        movem.l (sp)+,a0/a2
        endm

        else ; DEBUG

PRINTF macro
        endm

        endc ; DEBUG

UAECMD macro
.call\@:
        pea     0.w
        move.l  a0,-(sp)
        pea     .cmdend\@-.cmd\@
        pea     .cmd\@(pc)
        pea     -1.w
        pea     82.w
        jsr     $f0ff60
        lea     24(sp),sp
        bra     .done\@
.cmd\@: dc.b    \1
        dc.b    0
.cmdend\@:
        even
.done\@:
        endm

GET_EXP_BASE macro
        ifd CHIP_ONLY
        move.l  #512*1024,\1
        else
        move.l  _expmem(pc),\1
        endc
        endm

WAITLINES macro
        movem.l d0-d2/a0,-(sp)
        move.w  #\1-1,d0
        lea     $dff006,a0
.l\@
        move.b  (a0),d1
.w\@
        cmp.b   (a0),d1
        beq     .w\@
        dbf     d0,.l\@
        movem.l (sp)+,d0-d2/a0
        endm


INITIAL_ALLOC=$400
INITIAL_BUFFER=INITIAL_ALLOC+$400

start 		;A0 = resident loader
        move.l  a0,a2
	lea	_resload(pc),a1
	move.l	a2,(a1)

        lea     control_tags(pc),a0
        jsr     resload_Control(a2)

        ifd WARP_TO
        UAECMD  'AKS_WARP 1'
        endc

        jsr     init

        ; Load bootblock
        move.l	#0,d0			;offset
        move.l	#$400,d1		;size
        moveq	#1,d2			;disk
        lea	INITIAL_ALLOC,a0	;data
        move.l	_resload(pc),a2
        jsr	resload_DiskLoad(a2)

        ; Load extra data
        move.l  #$400,d0                ;offset
        move.l  INITIAL_ALLOC+$3f8,d1
        add.l   #511,d1
        and.l   #-512,d1                ; size
        moveq   #1,d2
        ; A4 points to where chip buffer is (loading code setups up $80 bytes stack below that)
        lea     INITIAL_BUFFER,a4
        move.l  a4,a0
        jsr	resload_DiskLoad(a2)

        lea     INITIAL_ALLOC+$366,a5 ; Points to info struct

        GET_EXP_BASE a0
        move.l  a0,4(a5) ; Tell it where extended memory is located

        patch   (INITIAL_ALLOC+$198),load_done

        ; Jump in after _LVOSuperState
        jmp     INITIAL_ALLOC+$d4

load_done:
        movem.l d0-a6,-(sp)
        lea     pl_program(pc),a0
        GET_EXP_BASE a1
        move.l  _resload(pc),a2
        jsr     resload_Patch(a2)

        ; Disable I$, no SMC until part 13 but part 12 finishes too early otherwise
        move.l  #WCPUF_DC!WCPUF_Exp_WT!WCPUF_Slave_CB,d0
        move.l  #WCPUF_All,d1
        move.l  _resload(pc),a2
        jsr     resload_SetCPU(a2)

        movem.l (sp)+,d0-a6

        jmp     (a2)

yield:
        move.l  a0,-(sp)
        GET_EXP_BASE a0
        jsr     $3704(a0)
        move.l  (sp)+,a0
        rts

set_int_routines:
        move.l  a1,-(sp)
        GET_EXP_BASE a1
        add.l   #$957c,a1
        move.l  a0,(a1)
.w0:    jsr     yield
        tst.l   (a1)
        bne     .w0
        move.l  (sp)+,a1
        rts


        ifd DEBUG
        ifnd CHIP_ONLY
        error Can't access FrameCounter like this
        endc
FrameCounter=$089580
        endc

exit:
        ;blitz
        pea     TDREASON_OK
        move.l  (_resload,pc),a0
        jmp     (resload_Abort,a0)



pl_program      PL_START
                PL_NOP  $35e6,4         ; Don't do disk stuff in vblank routine
                PL_R    $63b8           ; Don't initialize disk
                PL_W    $62aa,$8200     ; Don't enable disk DMA
                PL_P    $5804,diskread
                PL_PS   $5f28,change_disk
                PL_PS   $5ac0,handle_disk_range
                PL_PS   $60b0,handle_disk_range_array
                PL_P    $5dd2,handle_reloc
                PL_PS   $5fd4,next_effect
                PL_PS   $602c,next_effect
                PL_NOP  $6164,4 ; Don't detect AGA (corruption in part13)
                PL_P    $5f20,loader_done
                PL_END

pl_stop         PL_START
                PL_P    $0,exit
                PL_END

pl_part13       PL_START
                PL_PS   $4,p13_start
                PL_END

                ; Uses "7-bpl" "illegal" OCS mode
                ; BPL6 dat is fixed to 0, but BPL5 dat is modified by
                ; copperlist
pl_part13_aga   PL_START
                PL_PS   $09cf5c-$9CB64,p13_onvb
                PL_W    $09d37c-$9CB64,$5201 ; 5 bpl mode in copperlist
                ; BPL5DAT -> NOOP
                PL_W    $09dbbc-$9CB64,$1fe
                PL_W    $09dbd4-$9CB64,$1fe
                PL_END

p13_start:
        ; All registers preserved by init code a0/a1/d0 must not be modified
        move.l  (sp),a3         ; use return address to figure out where to store d0
        lea     -10(a3),a3      ; base of effect
        move.l  a3,a4
        add.l   #$acf54-$9cb64,a4
        move.w	d0,(a4)  ; replacement for original code

        movem.l d0/a0/a1,-(sp)

        move.l  chip_rev(pc),d3
        btst    #2,d3 ;GFXB_AA_ALICE
        beq     .out ; no aga

        lea     pl_part13_aga(pc),a0
        move.l  a3,a1
        move.l  _resload(pc),a2
        jsr     resload_Patch(a2)
.out:
        movem.l (sp)+,d0/a0/a1
        rts

p13_onvb:
        ; d0 also free
        move.l  #BASE_CHIP,bplpt+4*4+_custom
        GET_EXP_BASE a0
        add.l   #$cd0c0-$80000,a0
        rts

effect_patches:
        ; Disk 1
        dc.w    0                       ; 01 9CB64 TBLLogo
        dc.w    0                       ; 02 FE030 MobileIntro
        dc.w    0                       ; 03 9CB64 SpaceIntro
        dc.w    0                       ; 04 FCE80 SolarEclipse
        dc.w    0                       ; 05 9CB64 SpaceDescend
        dc.w    0                       ; 06 F2930 ForestLines
        dc.w    0                       ; 07 9CB64 PillarOfLight_I
        dc.w    0                       ; 08 B66C0 PillarOfLight_II
        dc.w    0                       ; 09 9CB64 SpaceAscend
        dc.w    0                       ; 10 EF340 ParticleSphere (Parking lot)
        dc.w    0                       ; 11 9CB64 AlleyScenePart1
        dc.w    0                       ; 12 D2D10 AlleyScenePart3
        ; Disk 2
        dc.w    pl_part13-effect_patches; 13 9CB64 TexturedRubberCube
        dc.w    0                       ; 14 FFC20 TransitionEffect0
        dc.w    0                       ; 15 9CB64 WomanFlatWideShot
        dc.w    0                       ; 16 B0910 WomanFlatShade
        dc.w    0                       ; 17 9CB64 WomanFlatWideShot2
        dc.w    0                       ; 18 B52F0 WomanFlatShadePart3
        dc.w    0                       ; 19 9CB64 WomanFlatShadeFloat
        dc.w    0                       ; 20 E19D0 WomanFlatShadeFloat2
        dc.w    0                       ; 21 9CB64 FloatingInSpace
        dc.w    0                       ; 22 FD350 SolarDeclipse
        dc.w    0                       ; 23 9CB64 EndLogoCredits

init:
        movem.l d0-a6,-(sp)

;        ifd DEBUG
;        clr.l   -(a7)                           ;TAG_DONE
;        pea     $1fe                            ;noop
;        move.l  #WHDLTAG_CUST_READ,-(a7)
;        move.l  a7,a0
;        move.l  (_resload,pc),a2
;        jsr     (resload_Control,a2)
;        lea     (12,a7),a7                      ;restore sp
;        endc

        ; Initialize bpl5 for part13
        lea     BASE_CHIP+BPL_ROWBYTES*38+12,a0
        moveq   #$dc-$78-1,d0
        moveq   #-1,d2
.yloop:
        move.l  a0,a1
        moveq   #14-1,d1
.xloop:
        move.b  d2,(a1)+
        dbf     d1,.xloop
        lea     BPL_ROWBYTES(a0),a0
        dbf     d0,.yloop

        movem.l (sp)+,d0-a6
        rts

handle_disk_range:
        ifd DEBUG
        moveq   #0,d0
        move.w  $1e(a0),d0
        move.l  $4(a0),d2
        add.l   a1,d2
        PRINTF <"[%5ld] HandleDiskRange Dest=%lx-%lx [$%lx] format %ld">,FrameCounter,a1,d2,$4(a0),d0
        endc

        move.l  buffer_enable(pc),d2
        beq     .skip

        ; Don't reloc chip buffers
        cmp.l   #512*1024,a1
        blo     .skip

        PRINTF <"Buffering...">

        ;
        ; Move buffer to expmem, and keep track of info
        ;
        movem.l d0-d1/a2-a4,-(sp)

        lea     fast_buf_ptr(pc),a4
        move.l  (a4),d1
        bne     .gotfast
        move.l  _expmem(pc),d1
        add.l   #EXP_RESERVE,d1
.gotfast:
        lea     mem_buf_ptr(pc),a2
        move.l  (a2),d0
        bne     .gotbuf
        lea     mem_buffers(pc),a3
        move.l  a3,d0
.gotbuf:
        move.l  d0,a3
        move.l  a1,(a3)+
        move.l  d1,a1
        move.l  a1,(a3)+
        move.l  $4(a0),(a3)+
        move.l  a3,(a2)
        add.l   $4(a0),d1
        move.l  d1,(a4)


        ifd DEBUG
        sub.l   _expmem(pc),d1
        sub.l   #EXP_RESERVE,d1
        cmp.l   #EXTRA_EXP,d1
        blo     .ok
        illegal
.ok:
        endc

        movem.l (sp)+,d0-d1/a2-a4    ; a1 must *not* be preserved (modified on purpose)
.skip:
	MOVE.L	A1,D2			;085ac0: 2409
	MOVE.L	$c(A0),D4               ;085ac2: 2828000c
        rts

; Search for buffer with dest=d1, returns result in a5
find_buf:
        lea     mem_buffers(pc),a5
.search:
        cmp.l   (a5),d1
        bne     .next
        rts
.next:
        lea     12(a5),a5
        cmp.l   mem_buf_ptr(pc),a5
        bcs     .search
        pea     .errmsg
        pea     TDREASON_FAILMSG
        move.l  _resload(pc),a2
        jmp     resload_Abort(a2)
.errmsg:
        dc.b    'Internal error: Shadow buffer not found',0
        even

TO_FAST_BUF macro
        move.l  \1,d1
        bsr     find_buf
        move.l  4(a5),\1
        endm

handle_reloc:
        ;PRINTF  <"[%5ld] HandleRelocs A0=%lx A1=%lx A2=%lx D0=%lx">,FrameCounter,a0,a1,a2,d0
        ; a0 = data, a1 = code, a2 = relocs, d0 = num reloc bytes
        lea     setvecs(pc),a6
        move.l  a1,(a6)         ; Store "set vectors" entry point of effect

        move.l  a0,d2           ; d2 = data addr
        move.l  a1,d3           ; d3 = code addr

        move.l  a2,a6           ; Save address of relocs

        TO_FAST_BUF a1
        TO_FAST_BUF a2
        lea     (a2,d0.l),a3    ; a3 = end of reloc data
.reloc:
        cmp.l   a2,a3
        beq     .reloc_done
        move.w  (a2)+,d4
        move.l  d2,d5           ; d5 = base address
        lsr.w   #1,d4
        bcc     .got_base
        move.l  d3,d5           ; into code
.got_base:
        move.l  a0,a4           ; a4 = taget
        lsr.w   #1,d4
        bcc     .reloc_inner
        move.l  a1,a4
.reloc_inner:
        moveq   #0,d6
        move.w  (a2)+,d6
        bpl     .do_reloc
        and.w   #$7fff,d6
        swap    d6
        move.w  (a2)+,d6
.do_reloc:
        add.l   d6,a4
        add.l   d5,(a4)
        dbf     d4,.reloc_inner
        bra     .reloc
.reloc_done:

        ; No need to copy relocs
        move.l  a6,d1
        bsr     find_buf
        clr.l   8(a5)

        ; Apply patch (if any)
        lea     effectcnt(pc),a0
        move.l  (a0),d0
        addq.l  #1,d0
        move.l  d0,(a0)
        ;PRINTF <"[%5ld] Effect %ld (base %lx)">,FrameCounter,d0,setvecs(pc)
        lea     effect_patches(pc),a0
        add.l   d0,d0
        move.w  -2(a0,d0.l),d0
        beq     .nopatches
        lea     (a0,d0.w),a0
        move.l  setvecs(pc),d1
        bsr     find_buf
        move.l  4(a5),a1
        move.l  _resload(pc),a2
        jsr     resload_Patch(a2)
.nopatches:


        ; Copy chip buffers in "background"
        lea     mem_buffers(pc),a4
        lea     mem_buf_ptr(pc),a0
        move.l  (a0),a3
.copybufs:
        move.l  (a4),a1
        cmp.l   #512*1024,a1
        bhs     .nextbuf
        move.l  4(a4),a0
        move.l  8(a4),d0
        beq     .nextbuf
        clr.l   8(a4)
        lsr.l   #2,d0           ; Allocations are always long word sized/aligned
.copy:
        move.l  (a0)+,(a1)+
        subq.l  #1,d0
        bne.b   .copy
.nextbuf:
        lea     12(a4),a4
        cmp.l   a3,a4
        bne     .copybufs

        moveq   #-1,d0 ; Return dummy value different from 0
	MOVEM.L	(A7)+,D2-D5/A2-A6	;085e14: 4cdf7c3c
	LEA.L	$000c(A7),A7		;085e18: 4fef000c
	RTS				;085e1c: 4e75

handle_disk_range_array:
        move.l  a5,a0
        movem.l a4/a5,-(sp)
        lea     buffer_enable(pc),a5
        st      (a5)
        GET_EXP_BASE a4
        jsr     $5c38(a4)
        clr.b   (a5)
        movem.l (sp)+,a4/a5
        rts

next_effect:
        movem.l d0-a6,-(sp)

        move.l  effectcnt(pc),d0
        cmp.l   #14,d0
        bne     .not14
        ; Make sure rubber cube has gone through all states
        GET_EXP_BASE a2
        add.l   #$cd0c8-$80000,a2
.wait13done:
        cmp.l   #5,(a2)
        beq     .not14
        bsr     yield
        bra     .wait13done

.not14:
        PRINTF <"[%5ld] Publishing effect %ld: %lx">,FrameCounter,effectcnt(pc),setvecs(pc)

        ifd WARP_TO
        moveq   #0,d7
        move.l  effectcnt(pc),d0
        cmp.l   #WARP_TO,d0
        bne     .notyet
        moveq   #1,d7
        UAECMD 'AKS_WARP -1'
.notyet:
        endc

        lea     mem_buffers(pc),a4
        lea     mem_buf_ptr(pc),a0
        move.l  (a0),a3
        ; reset pointers for next effect
        clr.l   (a0)
        lea     fast_buf_ptr(pc),a0
        clr.l   (a0)
.copybufs:
        move.l  (a4)+,a1
        move.l  (a4)+,a0
        move.l  (a4)+,d0
        lsr.l   #2,d0           ; Allocations are always long word sized/aligned
        beq     .nextbuf
.copy:
        move.l  (a0)+,(a1)+
        subq.l  #1,d0
        bne.b   .copy
.nextbuf:
        cmp.l   a3,a4
        bne     .copybufs

        move.l  _resload(pc),a0
        jsr     resload_FlushCache(a0)

        ; Alternate between interrupt structures
        lea     intstructs(pc),a4
        moveq   #1,d0
        and.l   effectcnt(pc),d0
        lsl.l   #5,d0
        add.l   d0,a0
        move.l  a4,a0
        rept 8
        clr.l   (a0)+
        endr

        ; a0 = intstruct
        move.l  a4,a0
        GET_EXP_BASE a1
        lea     $69d4(a1),a1; return value of function at $8336c (util function array)
        moveq   #0,d0
        move.l  setvecs(pc),a5
        jsr     (a5)

        ;PRINTF <"[%5ld] Ready %lx %lx %lx %lx %lx %lx %lx">,FrameCounter,$0004(a4),$0008(a4),$000C(a4),$0010(a4),$0014(a4),$0018(a4),$001C(a4)


        move.l  a4,a0
        bsr     set_int_routines

        ifd WARP_TO
        tst.l   d7
        beq     .notyet2
        UAECMD 'AKS_ENTERDEBUGGER 1'
.notyet2:
        endc

        movem.l (sp)+,d0-a6
        rts

diskread:
        movem.l d0-a6,-(sp)

        cmp.l   #$A790A,d0
        ; Hack: Delay loading of this sample to avoid audio hick up in part 17
        bne     .ok
        move.w  #5*50,d2
.wait:
        bsr     yield
        dbf     d2,.wait
.ok

        ;PRINTF <"[%5ld] DiskRead Offset=%lX Size=%lX Dest=%lX">,FrameCounter,d0,d1,a0
        move.l  disk(pc),d2
        move.l	_resload(pc),a2
        jsr	resload_DiskLoad(a2)

        movem.l (sp)+,d0-a6
        jmp     yield

change_disk:
        lea     disk(pc),a0
        move.l  #2,(a0)
        rts

loader_done:
        btst	#6,$bfe001
        beq     exit
        bsr     yield
        bsr     yield
        bra     loader_done

; --------------------------------------------------------

disk:
        dc.l    1
control_tags:
        dc.l WHDLTAG_CHIPREVBITS_GET
chip_rev:
        dc.l    0
        dc.l TAG_DONE
; --------------------------------------------------------
; BSS
mem_buffers:
        ds.l    40*3 ; dest, src, size
mem_buf_ptr:
        ds.l    1
fast_buf_ptr:
        ds.l    1
setvecs:
        ds.l    1
intstructs:
        ds.l    8*2
effectcnt:
        ds.l    1
buffer_enable:
        ds.l    1
_resload:
        ds.l    1
