// Defines a new builtin, creating the word header and appropriate labels.
.macro builtin label, name, flags=0
	.p2align 3
	forth.\label\().header:
		.quad last_builtin
		.word (\flags << 8) | (forth.\label\().name.end - forth.\label\().name)
	forth.\label\().name:
		.ascii "\name"
	forth.\label\().name.end:
		.align 2, 0
		.byte forth.\label\().name.end - forth.\label\().name
		.skip 3
	.global forth.\label
	forth.\label:
	.set last_builtin, forth.\label\().header
.endm
.set last_builtin, 0

.set SMUDGED, 1
.set IMMEDIATE, 2

// Checks that at least `n` bytes can be written to the dictionary. Leaves the
// dictionary pointer in x0, and the remaining space (after subtracting `n`) in
// x1.
.macro check_dict_space n
	ldp x0, x1, [x21, #0x10]
	subs x1, x1, \n
	b.lo forth.runtime.out_of_dictionary
.endm

// Checks that at least `n` values are present on the stack. May trash x0.
.macro check_stack_depth n
	.if \n == 1
		cmp x22, x23
		b.eq forth.runtime.stack_underflow
	.else
		sub x0, x23, x22
		cmp x0, \n * 8
		b.lo forth.runtime.stack_underflow
	.endif
.endm

// Checks that at least `n` values can be pushed to the stack. Leaves the stack
// depth in bytes in x0.
.macro check_stack_free n=1
	sub x0, x23, x22
	cmp x0, #0x208 - \n*8
	b.hs forth.runtime.stack_overflow
.endm

// Performs NEXT.
.macro next
	ldr x0, [x20], #8
	br x0
.endm

// Pops a value from the stack into the given register (defaults to
// top-of-stack).
.macro pop reg=x24
	ldr \reg, [x23, #-8]!
.endm

// Pushes a value from the register (by default, the top-of-stack register) to
// the stack.
.macro push reg=x24
	str \reg, [x23], #8
.endm

// Defines words for reading from and writing to a system register.
.macro system_register name, ro=0
builtin aarch64_\name\()_read, "AARCH64/\name\()@"
	check_stack_free
	push
	mrs x24, \name
	next

.ifeq \ro
builtin aarch64_\name\()_write, "AARCH64/\name\()!"
	check_stack_depth 1
	msr \name, x24
	pop
	next
.endif
.endm

.section .text.forth.builtins

builtin __branch, "(BRANCH)"
	ldr x20, [x20]
	next

builtin __branch0, "(BRANCH0)"
	check_stack_depth 1
	mov x0, x24
	pop
	ldr x1, [x20], #8
	cbnz x0, forth.__branch0.end
	mov x20, x1
forth.__branch0.end:
	next

builtin __docolon, "(DOCOLON)"
	sub x0, x26, x25
	cmp x0, #0x100
	b.hs forth.runtime.return_stack_overflow
	str x27, [x26], #8
	mov x27, x20
	mov x20, lr
	next

builtin __dodefer, "(DODEFER)"
	ldr x0, [lr]
	br x0

builtin __dodoes, "(DODOES)"
	check_stack_free
	sub x0, x26, x25
	cmp x0, #0x100
	b.hs forth.runtime.return_stack_overflow
	push
	mov x24, x29
	str x27, [x26], #8
	mov x27, x20
	mov x20, lr
	next

builtin __dovalue, "(DOVALUE)"
	check_stack_free
	push
	ldr x24, [lr]
	next

builtin __dovariable, "(DOVARIABLE)"
	check_stack_free
	push
	mov x24, lr
	next

builtin __lit, "(LIT)"
	check_stack_free
	push
	ldr x24, [x20], #8
	next

builtin __unexpected_eof, "(UNEXPECTED-EOF)"
	bl forth.runtime.unexpected_eof

builtin __word_not_found, "(WORD-NOT-FOUND)"
	check_stack_depth 2
	pop x0
	mov x1, x24
	pop
	bl forth.runtime.word_not_found

builtin _asterisk, "*"
	check_stack_depth 2
	pop x0
	mul x24, x0, x24
	next

builtin _at_b, "[email protected]"
	check_stack_depth 1
	ldrb w24, [x24]
	next

builtin _at_d, "[email protected]"
	check_stack_depth 1
	ldr x24, [x24]
	next

builtin _at_d_rev, "[email protected]"
	check_stack_depth 1
	ldr x24, [x24]
	rev x24, x24
	next

builtin _at_h, "[email protected]"
	check_stack_depth 1
	ldrh w24, [x24]
	next

builtin _at_h_rev, "[email protected]"
	check_stack_depth 1
	ldrh w24, [x24]
	rev16 w24, w24
	next

builtin _at_r, "[email protected]"
	check_stack_free
	cmp x25, x26
	b.eq forth.runtime.return_stack_underflow
	push
	mov x24, x27
	next

builtin _at_w, "[email protected]"
	check_stack_depth 1
	ldr w24, [x24]
	next

builtin _at_w_rev, "[email protected]"
	check_stack_depth 1
	ldr w24, [x24]
	rev w24, w24
	next

builtin _bang_b, "B!"
	check_stack_depth 2
	pop x0
	strb w0, [x24]
	pop
	next

builtin _bang_bl, "BL!"
	check_stack_depth 2
	pop x0
	sub x0, x0, x24
	asr x1, x0, #28
	add x1, x1, #1
	cmp x1, #1
	b.hi forth.runtime.bl_comma_out_of_range
	mov x1, #0x94000000
	bfxil x1, x0, #2, #26
	str w1, [x24]
	pop
	next

builtin _bang_d, "D!"
	check_stack_depth 2
	pop x0
	str x0, [x24]
	pop
	next

builtin _bang_d_rev, "BD!"
	check_stack_depth 2
	pop x0
	rev x0, x0
	str x0, [x24]
	pop
	next

builtin _bang_h, "H!"
	check_stack_depth 2
	pop x0
	strh w0, [x24]
	pop
	next

builtin _bang_h_rev, "BH!"
	check_stack_depth 2
	pop x0
	rev16 w0, w0
	str w0, [x24]
	pop
	next

builtin _bang_w, "W!"
	check_stack_depth 2
	pop x0
	str w0, [x24]
	pop
	next

builtin _bang_w_rev, "BW!"
	check_stack_depth 2
	pop x0
	rev w0, w0
	str w0, [x24]
	pop
	next

builtin _brackl, "[", IMMEDIATE
	ldr x0, [x21, #0x28]
	and x0, x0, #0xfffffffffffffffd
	str x0, [x21, #0x28]
	next

builtin _brackr, "]"
	ldr x0, [x21, #0x28]
	orr x0, x0, #0x02
	str x0, [x21, #0x28]
	next

builtin _comma, ","
	check_stack_depth 1
	check_dict_space 8
	str x24, [x0], #8
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin _comma_b, "B,"
	check_stack_depth 1
	check_dict_space 1
	strb w24, [x0], #1
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin _comma_h, "H,"
	check_stack_depth 1
	check_dict_space 2
	strh w24, [x0], #2
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin _comma_str, "STR,"
	check_stack_depth 2
	check_dict_space x24
	pop x2
forth._comma_str.memcpy:
	subs x24, x24, #1
	b.lo forth._comma_str.end
	ldrb w3, [x2], #1
	strb w3, [x0], #1
	b forth._comma_str.memcpy
forth._comma_str.end:
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin _comma_w, "W,"
	check_stack_depth 1
	check_dict_space 4
	str w24, [x0], #4
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin _eq, "="
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.eq _eq.eq
_eq.ne:
	mov x24, #0
	next
_eq.eq:
	mov x24, #-1
	next

builtin _gt, ">"
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.gt _gt.gt
	mov x24, #0
	next
_gt.gt:
	mov x24, #-1
	next

builtin _is_immediate, "IMMEDIATE?"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	mov x24, #0
	tbz x0, #9, forth._is_immediate.non_immediate
	mov x24, #-1
forth._is_immediate.non_immediate:
	next

builtin _is_smudged, "SMUDGED?"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	mov x24, #0
	tbz x0, #8, forth._is_smudged.visible
	mov x24, #-1
forth._is_smudged.visible:
	next

builtin _lt, "<"
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.lt _lt.lt
	mov x24, #0
	next
_lt.lt:
	mov x24, #-1
	next

builtin _minus, "-"
	check_stack_depth 2
	pop x0
	sub x24, x0, x24
	next

builtin _neq, "<>"
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.ne _neq.ne
_neq.eq:
	mov x24, #0
	next
_neq.ne:
	mov x24, #-1
	next

builtin _one_minus, "1-"
	check_stack_depth 1
	sub x24, x24, #1
	next

builtin _one_plus, "1+"
	check_stack_depth 1
	add x24, x24, #1
	next

builtin _plus, "+"
	check_stack_depth 2
	pop x0
	add x24, x0, x24
	next

builtin _slash_mod, "/MOD"
	check_stack_depth 2
	ldr x0, [x23, #-8]
	sdiv x1, x0, x24
	msub x2, x1, x24, x0
	str x2, [x23, #-8]
	mov x24, x1
	next

builtin 2_at_r, "[email protected]"
	check_stack_free 2
	sub x0, x26, x25
	cmp x0, #16
	b.lo forth.runtime.return_stack_underflow
	ldr x0, [x26, #-8]
	stp x24, x0, [x23], #16
	mov x24, x27
	next

builtin 2_from_r, "2R>"
	check_stack_free 2
	sub x0, x26, x25
	cmp x0, #16
	b.lo forth.runtime.return_stack_underflow
	ldr x0, [x26, #-8]!
	stp x24, x0, [x23], #16
	mov x24, x27
	ldr x27, [x26, #-8]!
	next

builtin 2_rdrop, "2RDROP"
	sub x0, x26, x25
	cmp x0, #16
	b.lo forth.runtime.return_stack_underflow
	ldr x27, [x26, #-16]!
	next

builtin 2_to_r, "2>R"
	check_stack_depth 2
	sub x0, x26, x25
	cmp x0, #0x0f8
	b.hs forth.runtime.return_stack_overflow
	pop x0
	stp x27, x0, [x26], #16
	mov x27, x24
	pop
	next

builtin aarch64_exception_vector_table, "AARCH64/EXCEPTION-VECTOR-TABLE"
	check_stack_free
	push
	ldr x24, =exception_vector_table
	next

// aka /STRING, ( addr len n -- addr+n len-n )
builtin adjust, "ADJUST"
	check_stack_depth 3
	mov x0, x24
	pop
	sub x24, x24, x0
	ldr x1, [x23, #-8]
	add x1, x1, x0
	str x1, [x23, #-8]
	next

builtin align_allot_pow2, "ALIGN-ALLOT-POW2"
	check_stack_depth 1
	mov x0, #1
	lsl x0, x0, x24
	sub x0, x0, #1
	ldr x1, [x21, #0x10]
	add x1, x1, x0
	mvn x0, x0
	and x1, x1, x0
	str x1, [x21, #0x10]
	pop
	next

builtin allot, "ALLOT"
	check_stack_depth 1
	check_dict_space x24
	add x0, x0, x24
	stp x0, x1, [x21, #0x10]
	pop
	next

builtin and, "AND"
	check_stack_depth 2
	pop x0
	and x24, x0, x24
	next

builtin base, "BASE"
	check_stack_free
	push
	ldr x0, [x21, #0x28]
	tbz x0, #0, forth.base.decimal
forth.base.hex:
	mov x24, #16
	next
forth.base.decimal:
	mov x24, #10
	next

// ( state-ptr -- )
//
// `state-ptr` must be aligned to 16 bytes, and point to an area of 40 bytes.
builtin blake2s_init, "BLAKE2S/PRIMITIVE/INIT"
	and x0, x24, #0xf
	cbnz x0, forth.blake2s.misaligned
	bl blake2s_init
	stp q0, q1, [x24]
	str xzr, [x24, #32]
	pop
	next

// ( in-addr state-ptr -- )
// - `in-addr` must be aligned to 16 bytes, and point to 64 bytes of data.
// - `state-ptr` must be aligned to 16 bytes, and point to an area of 40 bytes
//   previously initialized by `BLAKE2S/PRIMITIVE/INIT`, and only modified in
//   the interim by `BLAKE2S/PRIMITIVE/UPDATE`.
builtin blake2s_update, "BLAKE2S/PRIMITIVE/UPDATE"
	pop x0

	and x2, x0, #0xf
	cbnz x2, forth.blake2s.misaligned
	and x2, x24, #0xf
	cbnz x2, forth.blake2s.misaligned

	ldp q0, q1, [x24]
	ldr x1, [x24, #32]
	add x1, x1, #64
	str x1, [x24, #32]

	bl blake2s_update

	stp q0, q1, [x24]

	pop
	next

// ( in-addr size state-ptr -- )
//
// - `in-addr` must be aligned to 16 bytes, and be zero padded to 64 bytes.
// - `size` is the size of the data in the block, not including the padding.
// - `state-ptr` must be aligned to 16 bytes, and point to an area of 40 bytes
//   previously initialized by `BLAKE2S/PRIMITIVE/INIT`, and only modified in
//   the interim by `BLAKE2S/PRIMITIVE/UPDATE`.
builtin blake2s_finalize, "BLAKE2S/PRIMITIVE/FINALIZE"
	pop x1
	pop x0

	and x2, x0, #0xf
	cbnz x2, forth.blake2s.misaligned
	and x2, x24, #0xf
	cbnz x2, forth.blake2s.misaligned

	ldp q0, q1, [x24]
	ldr x2, [x24, #32]
	add x1, x1, x2

	bl blake2s_final

	stp q0, q1, [x24]

	pop
	next

// Shared between the different BLAKE2s words.
forth.blake2s.misaligned:
	adr x0, forth.blake2s.misaligned.msg
	mov x1, forth.blake2s.misaligned.msg.len
	bl panic

// ( addr -- )
builtin chacha20_block, "CHACHA20/BLOCK"
	check_stack_depth 1
	ldp q0, q1, [x24]
	ldp q2, q3, [x24, #32]
	bl chacha20
	stp q0, q1, [x24]
	stp q2, q3, [x24, #32]
	eor v0.16b, v0.16b, v0.16b
	eor v1.16b, v1.16b, v1.16b
	eor v2.16b, v2.16b, v2.16b
	eor v3.16b, v3.16b, v3.16b
	pop
	next

builtin compile_only, "COMPILE-ONLY"
	ldr x0, [x21, #0x28]
	tbz x0, #1, forth.compile_only.panic
	next
forth.compile_only.panic:
	ldr x0, =forth.compile_only.panic.msg
	mov x1, forth.compile_only.panic.msg.len
	bl panic

builtin decimal, "DECIMAL"
	ldr x0, [x21, #0x28]
	and x0, x0, #0xfffffffffffffffe
	str x0, [x21, #0x28]
	next

builtin depth, "DEPTH"
	check_stack_free
	push
	lsr x24, x0, 3
	next

builtin drop, "DROP"
	check_stack_depth 1
	pop
	next

builtin dup, "DUP"
	check_stack_depth 1
	check_stack_free
	push
	next

builtin dup_q, "?DUP"
	check_stack_depth 1
	cbz x24, forth.dup_q.end
	check_stack_free
	push
forth.dup_q.end:
	next

builtin execute, "EXECUTE"
	check_stack_depth 1
	mov x0, x24
	pop
	br x0

builtin exit, "EXIT"
	cmp x25, x26
	b.eq forth.runtime.return_stack_underflow
	mov x20, x27
	ldr x27, [x26, #-8]!
	next

// ( addr len -- 0 0 | header-addr -1 )
builtin find_word, "FIND-WORD"
	check_stack_depth 2
	ldr x0, [x23, #-8] // Name Pointer
	mov x1, x24        // Name Length
	tst x1, #0xffffffffffffff00
	b.ne forth.find_word.fail
	ldr x2, [x21, #0x20] // Header Pointer
forth.find_word.loop:
	mov x4, x2
	cbz x2, forth.find_word.fail
	ldp x2, x3, [x2]
	cmp x1, x3, uxtb
	b.ne forth.find_word.loop
	tbnz x3, #8, forth.find_word.loop
	mov x3, x1 // Remaining characters
	mov x5, x0
	add x6, x4, #0x0c
forth.find_word.strcmp:
	subs x3, x3, #1
	b.lo forth.find_word.succeed
	ldrb w7, [x5], #1
	ldrb w8, [x6], #1
	cmp w7, w8
	b.ne forth.find_word.loop
	b forth.find_word.strcmp
forth.find_word.fail:
	str xzr, [x23, #-8]
	mov x24, #0
	next
forth.find_word.succeed:
	str x4, [x23, #-8]
	mov x24, #-1
	next

builtin from_r, "R>"
	check_stack_free
	cmp x25, x26
	b.eq forth.runtime.return_stack_underflow
	push
	mov x24, x27
	ldr x27, [x26, #-8]!
	next

builtin header_to_body, "HEADER>BODY"
	check_stack_depth 1
	ldrb w0, [x24, #0x08]
	add x0, x0, #0x13
	add x0, x0, x24
	and x24, x0, #0xfffffffffffffffc
	next

builtin here, "HERE"
	check_stack_free
	push
	ldr x24, [x21, #0x10]
	next

builtin hex, "HEX"
	ldr x0, [x21, #0x28]
	orr x0, x0, #1
	str x0, [x21, #0x28]
	next

builtin idle, "IDLE"
	bl platform.idle
	next

builtin invert, "INVERT"
	check_stack_depth 1
	mvn x24, x24
	next

builtin latest, "LATEST"
	check_stack_free
	push
	ldr x24, [x21, #0x20]
	next

builtin lshift, "LSHIFT"
	check_stack_depth 2
	pop x0
	lsl x24, x0, x24
	next

// ( addr1 addr2 len -- 0 | -1 )
// TODO Optimize this
builtin memory_eq, "MEMORY-EQ"
	check_stack_depth 3
	pop x0
	pop x1
forth.memory_compare.loop:
	cbz x24, forth.memory_compare.true
	ldrb w2, [x0], #1
	ldrb w3, [x1], #1
	sub x24, x24, #1
	cmp w2, w3
	b.eq forth.memory_compare.loop
forth.memory_compare.false:
	mov x24, #0
	next
forth.memory_compare.true:
	mov x24, #-1
	next

// ( src-addr dst-addr len -- )
builtin move, "MOVE"
	check_stack_depth 3
	pop x1
	pop x0
forth.move.loop:
	cbz x24, forth.move.end
	ldrb w2, [x0], #1
	strb w2, [x1], #1
	sub x24, x24, #1
	b forth.move.loop
forth.move.end:
	pop
	next

builtin negate, "NEGATE"
	check_stack_depth 1
	neg x24, x24
	next

builtin noop, "NOOP"
	next

builtin or, "OR"
	check_stack_depth 2
	pop x0
	orr x24, x0, x24
	next

builtin over, "OVER"
	check_stack_depth 2
	check_stack_free
	ldr x0, [x23, #-8]
	push
	mov x24, x0
	next

builtin panic, "PANIC"
	check_stack_depth 2
	pop x0
	mov x1, x24
	pop
	bl panic

// ( ch addr len -- len )
// TODO UTF-8
builtin parse_from_str, "PARSE-FROM-STR"
	check_stack_depth 3
	pop x0      // The string pointer
	mov x1, x24 // The remaining string length
	pop x2      // The character being searched for
	mov x24, #0 // The length before the character was found
	cmp x2, #0x80
	b.hs forth.parse_from_str.todo_utf8
forth.parse_from_str.loop:
	subs x1, x1, #1
	b.lo forth.parse_from_str.end
	ldrb w3, [x0], #1
	cmp w3, w2
	b.eq forth.parse_from_str.end
	add x24, x24, #1
	b forth.parse_from_str.loop
forth.parse_from_str.end:
	next
forth.parse_from_str.todo_utf8:
	ldr x0, =forth.parse_from_str.todo_utf8.msg
	mov x1, forth.parse_from_str.todo_utf8.msg.len
	bl panic

// ( addr len -- len )
// does not skip leading whitespace
builtin parse_word_from_str, "PARSE-WORD-FROM-STR"
	check_stack_depth 2
	pop x0      // The string
	mov x1, x24 // The remaining length of the string
	mov x24, #0 // The length of the parsed-out string
forth.parse_word_from_str.loop:
	subs x1, x1, #1
	b.lo forth.parse_word_from_str.end
	ldrb w2, [x0], #1
	cmp w2, ' '
	b.ls forth.parse_word_from_str.end
	add x24, x24, #1
	b forth.parse_word_from_str.loop
forth.parse_word_from_str.end:
	next

builtin pick, "PICK"
	check_stack_depth 1
	cmp x24, #0
	b.lt forth.runtime.stack_underflow
	sub x0, x23, x22
	sub x2, x0, x24, lsl 3
	subs x2, x2, #8
	b.ls forth.runtime.stack_underflow
	sub x1, x23, x24, lsl 3
	ldr x24, [x1, #-8]
	next

builtin process_context, "PROCESS-CONTEXT"
	check_stack_free
	push
	mov x24, x21
	next

builtin rdepth, "RDEPTH"
	check_stack_free
	push
	sub x0, x26, x25
	lsr x24, x0, 3
	next

builtin rdrop, "RDROP"
	cmp x26, x25
	b.eq forth.runtime.return_stack_underflow
	ldr x27, [x26, #-8]!
	next

builtin root_process.data, "(ROOT-PROCESS)/DATA"
	check_stack_free
	push
	ldr x24, =root_process.data
	next

// ( x1 x2 x3 -- x2 x3 x1 )
builtin rot, "ROT"
	check_stack_depth 3
	ldr x0, [x23, #-8]
	str x24, [x23, #-8]
	ldr x24, [x23, #-16]
	str x0, [x23, #-16]
	next

// ( x1 x2 x3 -- x3 x1 x2 )
builtin rot_rev, "-ROT"
	check_stack_depth 3
	ldr x0, [x23, #-16]
	str x24, [x23, #-16]
	ldr x24, [x23, #-8]
	str x0, [x23, #-8]
	next

builtin rpick, "RPICK"
	check_stack_depth 1
	sub x0, x26, x25
	cmp x0, x24, lsl 3
	b.ls forth.runtime.return_stack_underflow
	negs x0, x24, lsl 3
	b.eq forth.rpick.top
	ldr x24, [x26, x0]
	next
forth.rpick.top:
	mov x24, x27
	next

builtin rshift, "RSHIFT"
	check_stack_depth 2
	pop x0
	lsr x24, x0, x24
	next

builtin set_latest, "SET-LATEST"
	check_stack_depth 1
	str x24, [x21, #0x20]
	pop
	next

builtin set_source, "SET-SOURCE"
	check_stack_depth 2
	pop x0
	stp x0, x24, [x21, #0x30]
	pop
	next

// ( addr len -- addr len )
builtin skip_spaces, "SKIP-SPACES"
	check_stack_depth 2
	ldr x0, [x23, #-8]
forth.skip_spaces.loop:
	cbz x24, forth.skip_spaces.end
	ldrb w1, [x0]
	cmp w1, ' '
	b.hi forth.skip_spaces.end
	add x0, x0, #1
	sub x24, x24, #1
	b forth.skip_spaces.loop
forth.skip_spaces.end:
	str x0, [x23, #-8]
	next

builtin source, "SOURCE"
	check_stack_free 2
	push
	ldp x0, x24, [x21, #0x30]
	push x0
	next

builtin state, "STATE"
	check_stack_free
	push
	ldr x0, [x21, #0x28]
	mov x24, #0
	tbz x0, #1, forth.state.interpret
	mov x24, #-1
forth.state.interpret:
	next

// ( addr len -- 0 0 | n -1 )
builtin str_to_number, "STR>NUMBER"
	check_stack_depth 2
	cbz x24, forth.str_to_number.fail

	mov x0, #0 // The accumulator.
	mov x1, #0 // The negative flag.
	mov x2, x24 // The length.

	ldr x3, [x21, #0x28] // The base.
	tbz x3, #0, forth.str_to_number.dec
forth.str_to_number.hex:
	mov x3, #16
	b forth.str_to_number.check_base
forth.str_to_number.dec:
	mov x3, #10

forth.str_to_number.check_base:
	ldr x4, [x23, #-8] // The address.
	ldrb w5, [x4] // The current character.
	cmp w5, '%'
	b.eq forth.str_to_number.check_base.bin
	cmp w5, '#'
	b.eq forth.str_to_number.check_base.dec
	cmp w5, '$'
	b.eq forth.str_to_number.check_base.hex
	b forth.str_to_number.check_neg

forth.str_to_number.check_base.bin:
	sub x2, x2, #1
	mov x3, #2
	ldrb w5, [x4, #1]!
	b forth.str_to_number.check_neg
forth.str_to_number.check_base.dec:
	sub x2, x2, #1
	mov x3, #10
	ldrb w5, [x4, #1]!
	b forth.str_to_number.check_neg
forth.str_to_number.check_base.hex:
	sub x2, x2, #1
	mov x3, #16
	ldrb w5, [x4, #1]!
	b forth.str_to_number.check_neg

forth.str_to_number.check_neg:
	cmp w5, '-'
	b.ne forth.str_to_number.loop
	mov x1, #1
	sub x2, x2, #1
	add x4, x4, #1
	cbz x2, forth.str_to_number.fail

forth.str_to_number.loop:
	subs x2, x2, #1
	b.lo forth.str_to_number.apply_neg
	ldrb w5, [x4], #1

	sub x6, x5, '0' // The current digit's numeric value.
	cmp x6, #10
	b.lo forth.str_to_number.loop.accum
	sub x6, x5, 'A'
	cmp x6, #25
	b.hi forth.str_to_number.loop.try_lowercase
	sub x6, x5, ('A' - 10)
	b forth.str_to_number.loop.accum
forth.str_to_number.loop.try_lowercase:
	sub x6, x5, 'a'
	cmp x6, #25
	b.hi forth.str_to_number.fail
	sub x6, x5, ('a' - 10)

forth.str_to_number.loop.accum:
	cmp x6, x3
	b.hs forth.str_to_number.fail
	madd x0, x0, x3, x6
	b forth.str_to_number.loop

forth.str_to_number.apply_neg:
	cbz x1, forth.str_to_number.end
	neg x0, x0
forth.str_to_number.end:
	mov x24, #-1
	str x0, [x23, #-8]
	next

forth.str_to_number.fail:
	mov x24, #0
	str xzr, [x23, #-8]
	next

// ( addr1 len1 addr2 len2 -- -1 | 0 | 1 )
// Same return as strcmp().
// TODO Optimize this
builtin string_compare, "STRING-COMPARE"
	check_stack_depth 4
	pop x2 // addr2
	pop x1 // len1
	pop x0 // addr1
	mov x3, x24 // len2
	mov x24, #0
	cmp x1, x3
	b.lo forth.string_compare.init_lt
	b.hi forth.string_compare.init_gt
	b forth.string_compare.loop
forth.string_compare.init_lt:
	mov x24, #-1
	b forth.string_compare.loop
forth.string_compare.init_gt:
	mov x24, #1
	b forth.string_compare.loop
forth.string_compare.loop:
	subs x1, x1, #1
	b.lo forth.string_compare.end
	subs x3, x3, #1
	b.lo forth.string_compare.end
	ldrb w4, [x0], #1
	ldrb w5, [x2], #1
	cmp w4, w5
	b.lo forth.string_compare.end_lt
	b.hi forth.string_compare.end_gt
	b forth.string_compare.loop
forth.string_compare.end_lt:
	mov x24, #-1
	b forth.string_compare.end
forth.string_compare.end_gt:
	mov x24, #1
forth.string_compare.end:
	next

builtin swap, "SWAP"
	check_stack_depth 2
	ldr x0, [x23, #-8]
	str x24, [x23, #-8]
	mov x24, x0
	next

builtin to_r, ">R"
	check_stack_depth 1
	sub x0, x26, x25
	cmp x0, #0x100
	b.hs forth.runtime.return_stack_overflow
	str x27, [x26], #8
	mov x27, x24
	pop
	next

builtin u_gt, "U>"
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.hi _gt.gt
	mov x24, #0
	next
u_gt.gt:
	mov x24, #-1
	next

builtin u_lt, "U<"
	check_stack_depth 2
	pop x0
	cmp x0, x24
	b.lo _lt.lt
	mov x24, #0
	next
u_lt.lt:
	mov x24, #-1
	next

builtin u_slash_mod, "U/MOD"
	check_stack_depth 2
	ldr x0, [x23, #-8]
	udiv x1, x0, x24
	msub x2, x1, x24, x0
	str x2, [x23, #-8]
	mov x24, x1
	next

builtin word_immediate, "WORD-IMMEDIATE"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	orr x0, x0, #0x0200
	str x0, [x24, #0x08]
	pop
	next

builtin word_smudge, "WORD-SMUDGE"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	orr x0, x0, #0x0100
	str x0, [x24, #0x08]
	pop
	next

builtin word_unimmediate, "WORD-UNIMMEDIATE"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	and x0, x0, #0xfffffffffffffdff
	str x0, [x24, #0x08]
	pop
	next

builtin word_unsmudge, "WORD-UNSMUDGE"
	check_stack_depth 1
	ldr x0, [x24, #0x08]
	and x0, x0, #0xfffffffffffffeff
	str x0, [x24, #0x08]
	pop
	next

builtin xor, "XOR"
	check_stack_depth 2
	pop x0
	eor x24, x0, x24
	next

// ( addr1 addr2 len -- )
// TODO: Optimize/SIMDify
builtin xor_block, "XOR-BLOCK"
	pop x1
	pop x0
forth.xor_block.loop:
	subs x24, x24, #1
	b.lo forth.xor_block.end
	ldrb w2, [x0]
	ldrb w3, [x1], #1
	eor w2, w2, w3
	strb w2, [x0], #1
	b forth.xor_block.loop
forth.xor_block.end:
	pop
	next

builtin zero_eq, "0="
	check_stack_depth 1
	cbz x24, forth.zero_eq.zero
	mov x24, #0
	next
forth.zero_eq.zero:
	mov x24, #-1
	next

builtin zero_gt, "0>"
	check_stack_depth 1
	cmp x24, #0
	mov x24, #0
	b.le forth.zero_gt.le
	mov x24, #-1
forth.zero_gt.le:
	next

builtin zero_lt, "0<"
	check_stack_depth 1
	cmp x24, #0
	mov x24, #0
	b.ge forth.zero_lt.ge
	mov x24, #-1
forth.zero_lt.ge:
	next

builtin zero_ne, "0<>"
	check_stack_depth 1
	cbz x24, forth.zero_ne.zero
	mov x24, #-1
	next
forth.zero_ne.zero:
	mov x24, #0
	next

/********************/
/* SYSTEM REGISTERS */
/********************/

system_register DAIF
system_register ELR_EL1
system_register ESR_EL1
system_register FAR_EL1
system_register MAIR_EL1
system_register MPIDR_EL1, ro=1
system_register SCTLR_EL1
system_register SPSEL
system_register SPSR_EL1
system_register TCR_EL1
system_register TTBR0_EL1
system_register TTBR1_EL1
system_register VBAR_EL1

/*******************/
/* DEBUGGING WORDS */
/*******************/

builtin debug.bp, "debug/bp"
	next

builtin debug.todo, "debug/todo"
	ldr x0, =forth.debug.todo.msg
	mov x1, forth.debug.todo.msg.len
	bl panic

builtin debug.type, "debug/type"
	check_stack_depth 2
	mov x1, x24
	pop x0
	pop
	bl platform.debug_print
	next

.global forth.__last_builtin.header
builtin __last_builtin, "", SMUDGED

.section .rodata.forth.builtins

forth.blake2s.misaligned.msg: .ascii "Misaligned argument to a BLAKE2S word"
.set forth.blake2s.misaligned.msg.len, . - forth.blake2s.misaligned.msg

forth.compile_only.panic.msg: .ascii "Cannot use compile-only word in interpret mode."
.set forth.compile_only.panic.msg.len, . - forth.compile_only.panic.msg

forth.debug.todo.msg: .ascii "TODO (from Forth)"
.set forth.debug.todo.msg.len, . - forth.debug.todo.msg

forth.parse_from_str.todo_utf8.msg: .ascii "UTF-8 NYI"
.set forth.parse_from_str.todo_utf8.msg.len, . - forth.parse_from_str.todo_utf8.msg

.section .bss.forth.builtins

.align 11
exception_vector_table: .skip 0x800

// vi: set ft=arm64asm :