# Copyright (c) 2003-2006,  Pete Sanderson and Kenneth Vollmar
#
# Developed by Pete Sanderson (psanderson@otterbein.edu)
# and Kenneth Vollmar (kenvollmar@missouristate.edu)
#
# Permission is hereby granted, free of charge, to any person obtaining 
# a copy of this software and associated documentation files (the 
# "Software"), to deal in the Software without restriction, including 
# without limitation the rights to use, copy, modify, merge, publish, 
# distribute, sublicense, and/or sell copies of the Software, and to 
# permit persons to whom the Software is furnished to do so, subject 
# to the following conditions:
#
# The above copyright notice and this permission notice shall be 
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 
# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# (MIT license, http://www.opensource.org/licenses/mit-license.html)


# File containing definitions of MIPS pseudo-ops

# File format:
#   Each line contains specification for one pseudo-op, including optional description.
#   First item is source statement syntax, specified in same "example" parser format used for regular instructions.
#   Source statement specification ends with a tab.  It is followed by a tab-separated list of basic instruction
#   templates to complete and substitute for the pseudo-op.
#   Format for specifying syntax of templates is different from specifying syntax of source statement:
#      (n=0,1,2,3,...) is token position in source statement (operator is token 0, parentheses are tokens but commas aren't)
#      RGn means substitute register found in n'th token of source statement
#      NRn means substitute next higher register than the one in n'th token of source code
#      OPn means substitute n'th token of source code as is
#      LLn means substitute low order 16 bits from label address in source token n.
#      LLnPm (m=1,2,3,4) means substitute low order 16 bits from label address in source token n, after adding m.
#      LHn means substitute high order 16 bits from label address in source token n. Must add 1 if address bit 15 is 1. 
#      LHnPm (m=1,2,3,4) means substitute high order 16 bits from label address in source token n, after adding m. Must then add 1 if bit 15 is 1. 
#      VLn means substitute low order 16 bits from 32 bit value in source token n.
#      VLnPm (m=1,2,3,4) means substitute low order 16 bits from 32 bit value in source token n, after adding m to value.
#      VHn means substitute high order 16 bits from 32 bit value in source token n. Must add 1 if value's bit 15 is 1.
#      VHnPm (m=1,2,3,4) means substitute high order 16 bits from 32 bit value in source token n, after adding m. Must then add 1 if bit 15 is 1.
#      LLP is similar to LLn, but is needed for "label+100000" address offset. Immediate is added before taking low order 16. 
#      LLPPm (m=1,2,3,4) is similar to LLP except m is added along with mmediate before taking low order 16. 
#      LHPA is similar to LHn, but is needed for "label+100000" address offset. Immediate is added before taking high order 16.
#      LHPN is similar to LHPA, used only by "la" instruction. Address resolved by "ori" so do not add 1 if bit 15 is 1.
#      LHPAPm (m=1,2,3,4) is similar to LHPA except value m is added along with immediate before taking high order 16.
#      LHL means substitute high order 16 bits from label address in token 2 of "la" (load address) source statement.
#      VHLn means substitute high order 16 bits from 32 bit value in token n.
#      LAB means substitute textual label from last token of source statement.  Used for various branches.
#      S32 means substitute the result of subtracting the constant value in last token from 32.  Used by "ror", "rol".
#   Everything else is copied as is into the generated statement (you must use register numbers not mnemonics)
#   The list of basic instruction templates is optionally followed a description of the instruction for help purposes.
#   To add optional description, append a tab then the '#' character followed immediately (no spaces) by the description.
#
#  See documentation for ExtendedInstruction.makeTemplateSubstitutions() for more details.
#
#  Matching for a given instruction mnemonic is first-fit not best-fit.  If an instruction has both 16 and 32 bit
#  immediate operand options, they should be listed in that order (16 bit version first).  Otherwise the 16 bit
#  version will never be matched since the 32 bit version fits small immediate values first.
#
#  The pseudo-op specification must start in the first column.  If first column is blank, the line will be skipped!
#
#  When specifying the example instruction (first item on line), the conventions I follow are:
#  - for a register operand, specify a numbered register (e.g. $1 or $f1) to represent any register in the set. 
#    The numerical value is not significant.  This is NOT the case when writing the templates that follow!
#    In the templates, numbered registers are parsed as is (use only $0 and $1, which are $zero and $at).
#  - for an immediate operand, specify a positive value indicative of the expected range.  I use 10 to represent
#    a 5 bit value, 100 to represent a 16 bit value, and 100000 to represent a 32 bit value.
#  - for a label operand, I use the string "label" (without the quotes). 
#  The idea is to give the parser an example that will be parsed into the desired token sequence.  Syntax checking
#  is done by comparing the source token sequence to list of token sequences generated from the examples.
#  IMPORTANT NOTE:  The use of $1,$2, etc in the instruction sample means that any CPU register reference
#                   can be used in that position.  It is simply a placeholder.  By contrast, when
#                   $1 is used in the template specification, $1 ($at) is literally placed into the generated
#                   instruction!  If you want the generated code to echo the source register, use RG1,RG2, etc.

#######################  arithmetic and branch pseudo-ops #####################

not $1,$2	nor RG1, RG2, $0	#Bitwise NOT (bit inversion)

# Here are some "convenience" arithmetic pseduo-ops.  But do they encourage sloppy programming?
add $1,$2,100	addi RG1, RG2, VL3	#Addition of 16 bit immediate to register
add $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	add RG1, RG2, $1	#Addition of 32 bit immediate to register
addi $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	add RG1, RG2, $1	#Addition of 32 bit immediate to register
sub $1,$2,100		lui $1, VL3	sra $1, $1, 16	sub RG1, RG2, $1	#Subtraction of 16 bit immediate from register
sub $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	sub RG1, RG2, $1	#Subtraction of 32 bit immediate from register
subi $1,$2,100		lui $1, VL3	sra $1, $1, 16	sub RG1, RG2, $1	#Subtraction of 16 bit immediate from register
subi $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	sub RG1, RG2, $1	#Subtraction of 32 bit immediate from register
# feel free to add more convenience arithmetic pseduo-ops.

# convenience logical operations can be added too,
andi $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	and RG1, RG2, $1	#Bitwise AND of register and 32 bit immediate
ori $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	or RG1, RG2, $1		#Bitwise OR of register and 32 bit immediate
xori $1,$2,100000	lui $1, VHL3	ori $1, $1, VL3	xor RG1, RG2, $1	#Bitwise XOR of register and 32 bit immediate
and $1,$2,100	andi RG1, RG2, VL3	#Bitwise AND of register and 16 bit immediate
or $1,$2,100	ori RG1, RG2, VL3	#Bitwise OR of register and 16 bit immediate
xor $1,$2,100	xori RG1, RG2, VL3	#Bitwise XOR of register and 16 bit immediate
and $1,100	andi RG1, RG1, VL2	#Bitwise AND of register and 16 bit immediate, result in same register
or $1,100	ori RG1, RG1, VL2	#Bitwise OR of register and 16 bit immediate, result in same register
xor $1,100	xori RG1, RG1, VL2	#Bitwise XOR of register and 16 bit immediate, result in same register
andi $1,100	andi RG1, RG1, VL2	#Bitwise AND of register and 16 bit immediate, result in same register
ori $1,100	ori RG1, RG1, VL2	#Bitwise OR of register and 16 bit immediate, result in same register
xori $1,100	xori RG1, RG1, VL2	#Bitwise XOR of register and 16 bit immediate, result in same register
andi $1,100000	lui $1, VHL2	ori $1, $1, VL2	and RG1, RG1, $1	#Bitwise AND of register and 32 bit immediate, result in same register
ori $1,100000	lui $1, VHL2	ori $1, $1, VL2	or RG1, RG1, $1		#Bitwise OR of register and 32 bit immediate, result in same register
xori $1,100000	lui $1, VHL2	ori $1, $1, VL2	xor RG1, RG1, $1	#Bitwise XOR of register and 32 bit immediate, result in same register


seq $1,$2,$3	beq RG3, RG2, 3	ori RG1, $0, 0	beq $0, $0, 2	ori RG1, $0, 1	#Set if equal
sge $1,$2,$3	bne RG3, RG2, 3	ori RG1, $0, 1	beq $0, $0, 2	slt RG1, RG3, RG2	#Set if greater than or equal
sgeu $1,$2,$3	bne RG3, RG2, 3	ori RG1, $0, 1	beq $0, $0, 2	sltu RG1, RG3, RG2	#Set if greater than or equal unsigned
sgt $1,$2,$3	slt RG1, RG3, RG2	#Set if greater than
sgtu $1,$2,$3	sltu RG1, RG3, RG2	#Set if greater than unsigned
sle $1,$2,$3	bne RG3, RG2, 3	ori RG1, $0, 1	beq $0, $0, 2	slt RG1, RG2, RG3	#Set if less than or equal
sleu $1,$2,$3	bne RG3, RG2, 3	ori RG1, $0, 1	beq $0, $0, 2	sltu RG1, RG2, RG3	#Set if less than or equal unsigned
sne $1,$2,$3	beq RG3, RG2, 3	ori RG1, $0, 1	beq $0, $0, 2	ori RG1, $0, 0	#Set if not equal
move $1,$2	addu RG1, $0, RG2	#Move (copy) contents from one register to another
abs $1,$2	addu RG1, $0, RG2	bgez RG2, 2	sub RG1, $0, RG2	#Absolute value
neg $1,$2	sub RG1, $0, RG2	#Negate
negu $1,$2	subu RG1, $0, RG2	#Negate unsigned

b label	bgez $0, LAB	#Branch unconditionally
beqz $1,label	beq RG1, $0, LAB	#Branch if equal to zero
bnez $1,label	bne RG1, $0, LAB	#Branch if not equal to zero

beq $1,100,label	ori $1, $0, VL2	beq $1, RG1, LAB	#Branch if equal to immediate
beq $1,100000,label	lui $1, VH2	ori $1, $1, VL2	beq $1, RG1, LAB
bne $1,100,label	ori $1, $0, VL2	bne $1, RG1, LAB	#Branch if not equal to immediate
bne $1,100000,label	lui $1, VH2	ori $1, $1, VL2	bne $1, RG1, LAB

bge $1,$2,label	slt $1, RG1, RG2	beq $1, $0, LAB	#Branch if greater than or equal
bge $1,100,label	slti $1, RG1, VL2	beq $1, $0, LAB	
bge $1,100000,label	lui $1, VH2	ori $1, $1, VL2	slt $1, RG1, $1	beq $1, $0, LAB	

bgeu $1,$2,label	sltu $1, RG1, RG2	beq $1, $0, LAB	#Branch if greater than or equal unsigned
bgeu $1,100,label	sltiu $1, RG1, VL2	beq $1, $0, LAB	
bgeu $1,100000,label	lui $1, VH2	ori $1, $1, VL2	sltu $1, RG1, $1	beq $1, $0, LAB	

bgt $1,$2,label	slt $1, RG2, RG1	bne $1, $0, LAB	#Branch if greater than
bgt $1,100,label	slti $1, RG1, VL2P1	beq $1, $0, LAB	
bgt $1,100000,label	lui $1, VH2P1	ori $1, $1, VL2P1	slt $1, RG1, $1	beq $1, $0, LAB

bgtu $1,$2,label	sltu $1, RG2, RG1	bne $1, $0, LAB	#Branch if greater than unsigned
bgtu $1,100,label	ori $1, $0, VL2	beq $1, RG1, 3	sltu $1, RG1, $1	beq $1, $0, LAB	
bgtu $1,100000,label	lui $1, VH2	ori $1, $1, VL2		beq $1, RG1, 3	sltu $1, RG1, $1	beq $1, $0, LAB

ble $1,$2,label	slt $1, RG2, RG1	beq $1, $0, LAB	#Branch if less than or equal
ble $1,100,label	slti $1, RG1, VL2P1	bne $1, $0, LAB	
ble $1,100000,label	lui $1, VH2P1	ori $1, $1, VL2P1	slt $1, RG1, $1	bne $1, $0, LAB	

bleu $1,$2,label	sltu $1, RG2, RG1	beq $1, $0, LAB	#Branch if less than or equal unsigned
bleu $1,100,label	ori $1, $0, VL2	beq $1, RG1, LAB	sltu $1, RG1, $1	bne $1, $0, LAB	
bleu $1,100000,label	lui $1, VH2	ori $1, $1, VL2	beq $1, RG1, LAB	sltu $1, RG1, $1	bne $1, $0, LAB	

blt $1,$2,label	slt $1, RG1, RG2	bne $1, $0, LAB	#Branch if less than
blt $1,100,label	slti $1, RG1, VL2	bne $1, $0, LAB	
blt $1,100000,label	lui $1, VH2	ori $1, $1, VL2	slt $1, RG1, $1	bne $1, $0, LAB

bltu $1,$2,label	sltu $1, RG1, RG2	bne $1, $0, LAB	#Branch if less than unsigned
bltu $1,100,label	sltiu $1, RG1, VL2	bne $1, $0, LAB	
bltu $1,100000,label	lui $1, VH2	ori $1, $1, VL2		sltu $1, RG1, $1	bne $1, $0, LAB

rol $1,$2,$3	subu $1, $0, RG3	srlv $1, RG2, $1	sllv RG1, RG2, RG3	or RG1, RG1, $1	#Rotate left variable
rol $1,$2,10	srl $1, RG2, S32	sll RG1, RG2, OP3	or RG1, RG1, $1
ror $1,$2,$3	subu $1, $0, RG3	sllv $1, RG2, $1	srlv RG1, RG2, RG3	or RG1, RG1, $1	#Rotate right variable
ror $1,$2,10	sll $1, RG2, S32	srl RG1, RG2, OP3	or RG1, RG1, $1

mfc1.d $1,$f1	mfc1 RG1, RG2	mfc1 NR1, NR2	#Move from Coprocessor 1 double precision
mtc1.d $1,$f1	mtc1 RG1, RG2	mtc1 NR1, NR2	#Move to Coprocessor 1 double precision

mulo $1,$2,$3	mult RG2, RG3	mfhi $1	mflo RG1	sra RG1, RG1, 31	beq $1, RG1,2	break	mflo RG1	#Multiplication with overflow
mulou $1,$2,$3	multu RG2, RG3	mfhi $1	beq $1,$0,2	break	mflo RG1	#Multiplication unsigned with overflow
div $1,$2,$3	bne RG3, $0, 2	break	div RG2, RG3	mflo RG1	#Division
divu $1,$2,$3	bne RG3, $0, 2	break	divu RG2, RG3	mflo RG1	#Division unsigned
rem $1,$2,$3	bne RG3, $0, 2	break	div RG2, RG3	mfhi RG1	#Remainder
remu $1,$2,$3	bne RG3, $0, 2	break	divu RG2, RG3	mfhi RG1	#Remainder unsigned


#########################  load/store pseudo-ops start here  ##########################
#
#  Most of these simply provide a variety of convenient memory addressing modes for 
#  specifying load/store address.
#

#  li $1,100	ori RG1, $0, VL2  (commented out: produces incorrect results for 16 bit negatives)
li $1,100000	lui $1, VHL2	ori RG1, $1, VL2	#load 32 bit immediate into register

la $1,($2)	addi RG1, RG3, 0	#Load the address instead of contents
la $1,100	ori RG1, $0, VL2
la $1,100000	lui $1, VHL2	ori RG1, $1, VL2
la $1,100000($2)	lui $1, VHL2	ori $1, $1, VL2	add RG1, RG4, $1
la $1,label	lui $1, LHL	ori RG1, $1, LL2
la $1,label($2)	lui $1, LHL	ori $1, $1, LL2	add RG1, RG4, $1
la $1,label+100000	lui $1, LHPN	ori RG1, $1, LLP
la $1,label+100000($2)	lui $1, LHPN	ori $1, $1, LLP	add RG1, RG6, $1

lw $1,($2)	lw RG1,0(RG3)	#Load word from effective address
lw $1,100	lw RG1, VL2($0)
lw $1,100000	lui $1, VH2	lw RG1,VL2($1)
lw $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lw RG1, VL2($1)
lw $1,label	lui $1, LH2	lw RG1, LL2($1)
lw $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lw RG1, LL2($1)
lw $1,label+100000	lui $1, LHPA	lw RG1, LLP($1)
lw $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lw RG1, LLP($1)

sw $1,($2)	sw RG1,0(RG3)	#Store word to effective address
sw $1,100	sw RG1,VL2($0)
sw $1,100000	lui $1, VH2	sw RG1,VL2($1)
sw $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sw RG1, VL2($1)
sw $1,label	lui $1, LH2	sw RG1, LL2($1)
sw $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sw RG1, LL2($1)
sw $1,label+100000	lui $1, LHPA	sw RG1, LLP($1)
sw $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sw RG1, LLP($1)

lh $1,($2)	lh RG1,0(RG3)	#Load halfword from effective address
lh $1,100	lh RG1,VL2($0)
lh $1,100000	lui $1, VH2	lh RG1,VL2($1)
lh $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lh RG1, VL2($1)
lh $1,label	lui $1, LH2	lh RG1, LL2($1)
lh $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lh RG1, LL2($1)
lh $1,label+100000	lui $1, LHPA	lh RG1, LLP($1)
lh $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lh RG1, LLP($1)

sh $1,($2)	sh RG1,0(RG3)	#Store halfword to effective address
sh $1,100	sh RG1,VL2($0)
sh $1,100000	lui $1, VH2	sh RG1,VL2($1)
sh $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sh RG1, VL2($1)
sh $1,label	lui $1, LH2	sh RG1, LL2($1)
sh $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sh RG1, LL2($1)
sh $1,label+100000	lui $1, LHPA	sh RG1, LLP($1)
sh $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sh RG1, LLP($1)

lb $1,($2)	lb RG1,0(RG3)	#Load byte from effective address
lb $1,100	lb RG1,VL2($0)
lb $1,100000	lui $1, VH2	lb RG1,VL2($1)
lb $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lb RG1, VL2($1)
lb $1,label	lui $1, LH2	lb RG1, LL2($1)
lb $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lb RG1, LL2($1)
lb $1,label+100000	lui $1, LHPA	lb RG1, LLP($1)
lb $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lb RG1, LLP($1)

sb $1,($2)	sb RG1,0(RG3)	#Store byte to effective address
sb $1,100	sb RG1,VL2($0)
sb $1,100000	lui $1, VH2	sb RG1,VL2($1)
sb $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sb RG1, VL2($1)
sb $1,label	lui $1, LH2	sb RG1, LL2($1)
sb $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sb RG1, LL2($1)
sb $1,label+100000	lui $1, LHPA	sb RG1, LLP($1)
sb $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sb RG1, LLP($1)

lhu $1,($2)	lhu RG1,0(RG3)	#Load unsigned halfword from effective address
lhu $1,100	lhu RG1,VL2($0)
lhu $1,100000	lui $1, VH2	lhu RG1,VL2($1)
lhu $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lhu RG1, VL2($1)
lhu $1,label	lui $1, LH2	lhu RG1, LL2($1)
lhu $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lhu RG1, LL2($1)
lhu $1,label+100000	lui $1, LHPA	lhu RG1, LLP($1)
lhu $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lhu RG1, LLP($1)

lbu $1,($2)	lbu RG1,0(RG3)	#Load unsigned byte from effective address 
lbu $1,100	lbu RG1,VL2($0)
lbu $1,100000	lui $1, VH2	lbu RG1,VL2($1)
lbu $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lbu RG1, VL2($1)
lbu $1,label	lui $1, LH2	lbu RG1, LL2($1)
lbu $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lbu RG1, LL2($1)
lbu $1,label+100000	lui $1, LHPA	lbu RG1, LLP($1)
lbu $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lbu RG1, LLP($1)

lwl $1,($2)	lwl RG1,0(RG3)	#Load word left from effective address
lwl $1,100	lwl RG1,VL2($0)
lwl $1,100000	lui $1, VH2	lwl RG1,VL2($1)
lwl $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lwl RG1, VL2($1)
lwl $1,label	lui $1, LH2	lwl RG1, LL2($1)
lwl $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lwl RG1, LL2($1)
lwl $1,label+100000	lui $1, LHPA	lwl RG1, LLP($1)
lwl $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lwl RG1, LLP($1)

swl $1,($2)	swl RG1,0(RG3)	#Store word left to effective address
swl $1,100	swl RG1,VL2($0)
swl $1,100000	lui $1, VH2	swl RG1,VL2($1)
swl $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	swl RG1, VL2($1)
swl $1,label	lui $1, LH2	swl RG1, LL2($1)
swl $1,label($2)	lui $1, LH2	addu $1, $1, RG4	swl RG1, LL2($1)
swl $1,label+100000	lui $1, LHPA	swl RG1, LLP($1)
swl $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	swl RG1, LLP($1)

lwr $1,($2)	lwr RG1,0(RG3)	#Load word right from effective address
lwr $1,100	lwr RG1,VL2($0)
lwr $1,100000	lui $1, VH2	lwr RG1,VL2($1)
lwr $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lwr RG1, VL2($1)
lwr $1,label	lui $1, LH2	lwr RG1, LL2($1)
lwr $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lwr RG1, LL2($1)
lwr $1,label+100000	lui $1, LHPA	lwr RG1, LLP($1)
lwr $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lwr RG1, LLP($1)

swr $1,($2)	swr RG1,0(RG3)	#Store word right to effective address
swr $1,100	swr RG1,VL2($0)
swr $1,100000	lui $1, VH2	swr RG1,VL2($1)
swr $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	swr RG1, VL2($1)
swr $1,label	lui $1, LH2	swr RG1, LL2($1)
swr $1,label($2)	lui $1, LH2	addu $1, $1, RG4	swr RG1, LL2($1)
swr $1,label+100000	lui $1, LHPA	swr RG1, LLP($1)
swr $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	swr RG1, LLP($1)

ll $1,($2)	ll RG1,0(RG3)	#Load linked from effective address
ll $1,100	ll RG1,VL2($0)
ll $1,100000	lui $1, VH2	ll RG1,VL2($1)
ll $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	ll RG1, VL2($1)
ll $1,label	lui $1, LH2	ll RG1, LL2($1)
ll $1,label($2)	lui $1, LH2	addu $1, $1, RG4	ll RG1, LL2($1)
ll $1,label+100000	lui $1, LHPA	ll RG1, LLP($1)
ll $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	ll RG1, LLP($1)

sc $1,($2)	sc RG1,0(RG3)	#Store condition to effective address
sc $1,100	sc RG1,VL2($0)
sc $1,100000	lui $1, VH2	sc RG1,VL2($1)
sc $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sc RG1, VL2($1)
sc $1,label	lui $1, LH2	sc RG1, LL2($1)
sc $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sc RG1, LL2($1)
sc $1,label+100000	lui $1, LHPA	sc RG1, LLP($1)
sc $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sc RG1, LLP($1)

# Unaligned and double loads and stores.  All the them require assembler to add a constant
# byte offset (from 1 to 4 bytes) to the given or calculated address.
# NOTE: I have abandoned the "accepted" expansion of immed16($reg) addressing form for the 
#       unaligned and double load/store pseudo-instructions because they produce what I consider 
#       incorrect results for immediate values at the upper edge of the signed 16 bit range 
#       (32765 through 32767).  The expansion of these pseudo's requires adding an additional 
#       byte offset (from 1 to 4 bytes) to the immediate value, which overflows the signed 16 
#       bit range and results in a large negative offset with no counterbalancing increment 
#       to the high order 16 bits.  Thus if the two pieces of unaligned data end up in different 
#       words, they are stored 64K bytes apart!  For example, the normal expansion of 
#       "usw $8,32767($9)" would be "swl $8,-32766($9)", "swr $8, 32767($9)"  Both SPIM and
#       Britten's text do this but I consider it incorrect.  My compromise is the following:
#       since each of the expansion calls for two calculated offsets, one of which can possibly
#       overflow due to the addition but the other will not (because there is no addition),
#       I'll code the expansion to treat the at-risk calculated offset as 32 bits (it will generate
#       the lui and addu, which is unnecessary in almost every case but is always correct)
#       and the second as 16 bits.  I'll group all instructions for this addressing mode together.
# ulw $1,100($2)	lwl RG1, VL2P3(RG4)	lwr RG1, VL2(RG4)   -- used by SPIM but not me.
# usw $1,100($2)	swl RG1, VL2P3(RG4)	swr RG1, VL2(RG4)   -- used by SPIM but not me.

ulw $1,100($2)	lui $1, VH2P3	addu $1, $1, RG4	lwl RG1, VL2P3($1)	lwr RG1, VL2(RG4)
ulh $1,100($2)	lui $1, VH2P1	addu $1, $1, RG4	lb RG1, VL2P1($1)	lbu $1, VL2(RG4)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,100($2)	lui $1, VH2P1	addu $1, $1, RG4	lbu RG1, VL2P1($1)	lbu $1, VL2(RG4)	sll RG1, RG1, 8	or RG1, RG1, $1
ld $1,100($2)	lw RG1, VL2(RG4)	lui $1, VH2P4	addu $1, $1, RG4	lw NR1, VL2P4($1)
usw $1,100($2)	lui $1, VH2P3	addu $1, $1, RG4	swl RG1, VL2P3($1)	swr RG1, VL2(RG4)
ush $1,100($2)	sb RG1, VL2(RG4)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, VH2P1	addu $1, $1, RG4	sb RG1, VL2P1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
sd $1,100($2)	sw RG1, VL2(RG4)	lui $1, VH2P4	addu $1, $1, RG4	sw NR1, VL2P4($1)

# here are the remaining addressing modes, grouped by instruction.

ulw $1,100000	lui $1, VH2P3	lwl RG1, VL2P3($1)	lui $1, VH2	lwr RG1, VL2($1)
ulw $1,label	lui $1, LH2P3	lwl RG1, LL2P3($1)	lui $1, LH2	lwr RG1, LL2($1)
ulw $1,label+100000	lui $1, LHPAP3	lwl RG1, LLPP3($1)	lui $1, LHPA	lwr RG1, LLP($1)
ulw $1,($2)	lwl RG1, 3(RG3)	lwr RG1, 0(RG3)	#Unaligned load word from effective address
ulw $1,100000($2)	lui $1, VH2P3	addu $1, $1, RG4	lwl RG1, VL2P3($1)	lui $1, VH2	addu $1, $1, RG4	lwr RG1, VL2($1)
ulw $1,label($2)	lui $1, LH2P3	addu $1, $1, RG4	lwl RG1, LL2P3($1)	lui $1, LH2	addu $1, $1, RG4	lwr RG1, LL2($1)
ulw $1,label+100000($2)	lui $1, LHPAP3	addu $1, $1, RG6	lwl RG1, LLPP3($1)	lui $1, LHPA	addu $1, $1, RG6	lwr RG1, LLP($1)

ulh $1,100000	lui $1, VH2P1	lb RG1, VL2P1($1)	lui $1, VH2	lbu $1, VL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulh $1,label	lui $1, LH2P1	lb RG1, LL2P1($1)	lui $1, LH2	lbu $1, LL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulh $1,label+100000	lui $1, LHPAP1	lb RG1, LLPP1($1)	lui $1, LHPA	lbu $1, LLP($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulh $1,($2)	lb RG1, 1(RG3)	lbu $1, 0(RG3)	sll RG1, RG1, 8	or RG1, RG1, $1	#Unaligned load halfword from effective address
ulh $1,100000($2)	lui $1, VH2P1	addu $1, $1, RG4	lb RG1, VL2P1($1)	lui $1, VH2	addu $1, $1, RG4	lbu $1, VL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulh $1,label($2)	lui $1, LH2P1	addu $1, $1, RG4	lb RG1, LL2P1($1)	lui $1, LH2	addu $1, $1, RG4	lbu $1, LL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulh $1,label+100000($2)	lui $1, LHPAP1	addu $1, $1, RG6	lb RG1, LLPP1($1)	lui $1, LHPA	addu $1, $1, RG6	lbu $1, LLP($1)	sll RG1, RG1, 8	or RG1, RG1, $1

ulhu $1,100000	lui $1, VH2P1	lbu RG1, VL2P1($1)	lui $1, VH2	lbu $1, VL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,label	lui $1, LH2P1	lbu RG1, LL2P1($1)	lui $1, LH2	lbu $1, LL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,label+100000	lui $1, LHPAP1	lbu RG1, LLPP1($1)	lui $1, LHPA	lbu $1, LLP($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,($2)	lbu RG1, 1(RG3)	lbu $1, 0(RG3)	sll RG1, RG1, 8	or RG1, RG1, $1	#Unaligned load halfword unsigned from effective address
ulhu $1,100000($2)	lui $1, VH2P1	addu $1, $1, RG4	lbu RG1, VL2P1($1)	lui $1, VH2	addu $1, $1, RG4	lbu $1, VL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,label($2)	lui $1, LH2P1	addu $1, $1, RG4	lbu RG1, LL2P1($1)	lui $1, LH2	addu $1, $1, RG4	lbu $1, LL2($1)	sll RG1, RG1, 8	or RG1, RG1, $1
ulhu $1,label+100000($2)	lui $1, LHPAP1	addu $1, $1, RG6	lbu RG1, LLPP1($1)	lui $1, LHPA	addu $1, $1, RG6	lbu $1, LLP($1)	sll RG1, RG1, 8	or RG1, RG1, $1

ld $1,100000	lui $1, VH2	lw RG1, VL2($1)	lui $1, VH2P4	lw NR1, VL2P4($1)
ld $1,label	lui $1, LH2	lw RG1, LL2($1)	lui $1, LH2P4	lw NR1, LL2P4($1)
ld $1,label+100000	lui $1, LHPA	lw RG1, LLP($1)	lui $1, LHPAP4	lw NR1, LLPP4($1)
ld $1,($2)	lw RG1, 0(RG3)	lw NR1, 4(RG3)	#Load doubleword from effective address
ld $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lw RG1, VL2($1)	lui $1, VH2P4	addu $1, $1, RG4	lw NR1, VL2P4($1)
ld $1,label($2)	lui $1, LH2	addu $1, $1, RG4	lw RG1, LL2($1)	lui $1, LH2P4	addu $1, $1, RG4	lw NR1, LL2P4($1)
ld $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lw RG1, LLP($1)	lui $1, LHPAP4	addu $1, $1, RG6	lw NR1, LLPP4($1)

usw $1,100000	lui $1, VH2P3	swl RG1, VL2P3($1)	lui $1, VH2	swr RG1, VL2($1)
usw $1,label	lui $1, LH2P3	swl RG1, LL2P3($1)	lui $1, LH2	swr RG1, LL2($1)
usw $1,label+100000	lui $1, LHPAP3	swl RG1, LLPP3($1)	lui $1, LHPA	swr RG1, LLP($1)
usw $1,($2)	swl RG1, 3(RG3)	swr RG1, 0(RG3)	#Unaligned store word
usw $1,100000($2)	lui $1, VH2P3	addu $1, $1, RG4	swl RG1, VL2P3($1)	lui $1, VH2	addu $1, $1, RG4	swr RG1, VL2($1)
usw $1,label($2)	lui $1, LH2P3	addu $1, $1, RG4	swl RG1, LL2P3($1)	lui $1, LH2	addu $1, $1, RG4	swr RG1, LL2($1)
usw $1,label+100000($2)	lui $1, LHPAP3	addu $1, $1, RG6	swl RG1, LLPP3($1)	lui $1, LHPA	addu $1, $1, RG6	swr RG1, LLP($1)

ush $1,100000	lui $1, VH2	sb RG1, VL2($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, VH2P1	sb RG1, VL2P1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
ush $1,label	lui $1, LH2	sb RG1, LL2($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, LH2P1	sb RG1, LL2P1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
ush $1,label+100000	lui $1, LHPA	sb RG1, LLP($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, LHPAP1	sb RG1, LLPP1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
ush $1,($2)	sb RG1, 0(RG3)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	sb RG1, 1(RG3)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1	#Unaligned store halfword
ush $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sb RG1, VL2($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, VH2P1	addu $1, $1, RG4	sb RG1, VL2P1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
ush $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sb RG1, LL2($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, LH2P1	addu $1, $1, RG4	sb RG1, LL2P1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1
ush $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sb RG1, LLP($1)	sll $1, RG1, 24	srl RG1, RG1, 8	or RG1, RG1, $1	lui $1, LHPAP1	addu $1, $1, RG6	sb RG1, LLPP1($1)	srl $1, RG1, 24	sll RG1, RG1, 8	or RG1, RG1, $1

sd $1,100000	lui $1, VH2	sw RG1, VL2($1)	lui $1, VH2P4	sw NR1, VL2P4($1)
sd $1,label	lui $1, LH2	sw RG1, LL2($1)	lui $1, LH2P4	sw NR1, LL2P4($1)
sd $1,label+100000	lui $1, LHPA	sw RG1, LLP($1)	lui $1, LHPAP4	sw NR1, LLPP4($1)
sd $1,($2)	sw RG1, 0(RG3)	sw NR1, 4(RG3)	#Store doubleword to effective address
sd $1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sw RG1, VL2($1)	lui $1, VH2P4	addu $1, $1, RG4	sw NR1, VL2P4($1)
sd $1,label($2)	lui $1, LH2	addu $1, $1, RG4	sw RG1, LL2($1)	lui $1, LH2P4	addu $1, $1, RG4	sw NR1, LL2P4($1)
sd $1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sw RG1, LLP($1)	lui $1, LHPAP4	addu $1, $1, RG6	sw NR1, LLPP4($1)

# load and store pseudo-instructions for floating point (coprocessor 1) registers

lwc1 $f1,($2)	lwc1 RG1,0(RG3)	#Load word coprocessor 1 from effective address
lwc1 $f1,100	lwc1 RG1,VL2($0)
lwc1 $f1,100000	lui $1, VH2	lwc1 RG1,VL2($1)
lwc1 $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lwc1 RG1, VL2($1)
lwc1 $f1,label	lui $1, LH2	lwc1 RG1, LL2($1)
lwc1 $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	lwc1 RG1, LL2($1)
lwc1 $f1,label+100000	lui $1, LHPA	lwc1 RG1, LLP($1)
lwc1 $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lwc1 RG1, LLP($1)

ldc1 $f1,($2)	ldc1 RG1,0(RG3)	#Load doubleword coprocessor 1 from effective address
ldc1 $f1,100	ldc1 RG1,VL2($0)
ldc1 $f1,100000	lui $1, VH2	ldc1 RG1,VL2($1)
ldc1 $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	ldc1 RG1, VL2($1)
ldc1 $f1,label	lui $1, LH2	ldc1 RG1, LL2($1)
ldc1 $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	ldc1 RG1, LL2($1)
ldc1 $f1,label+100000	lui $1, LHPA	ldc1 RG1, LLP($1)
ldc1 $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	ldc1 RG1, LLP($1)

swc1 $f1,($2)	swc1 RG1,0(RG3)	#Store word coprocessor 1 to effective address
swc1 $f1,100	swc1 RG1,VL2($0)
swc1 $f1,100000	lui $1, VH2	swc1 RG1,VL2($1)
swc1 $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	swc1 RG1, VL2($1)
swc1 $f1,label	lui $1, LH2	swc1 RG1, LL2($1)
swc1 $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	swc1 RG1, LL2($1)
swc1 $f1,label+100000	lui $1, LHPA	swc1 RG1, LLP($1)
swc1 $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	swc1 RG1, LLP($1)

sdc1 $f1,($2)	sdc1 RG1,0(RG3)	#Store doubleword coprocessor 1 to effective address
sdc1 $f1,100	sdc1 RG1,VL2($0)
sdc1 $f1,100000	lui $1, VH2	sdc1 RG1,VL2($1)
sdc1 $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sdc1 RG1, VL2($1)
sdc1 $f1,label	lui $1, LH2	sdc1 RG1, LL2($1)
sdc1 $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	sdc1 RG1, LL2($1)
sdc1 $f1,label+100000	lui $1, LHPA	sdc1 RG1, LLP($1)
sdc1 $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sdc1 RG1, LLP($1)

l.s $f1,($2)	lwc1 RG1,0(RG3)	#Load floating point single precision from effective address
l.s $f1,100	lwc1 RG1,VL2($0)
l.s $f1,100000	lui $1, VH2	lwc1 RG1,VL2($1)
l.s $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	lwc1 RG1, VL2($1)
l.s $f1,label	lui $1, LH2	lwc1 RG1, LL2($1)
l.s $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	lwc1 RG1, LL2($1)
l.s $f1,label+100000	lui $1, LHPA	lwc1 RG1, LLP($1)
l.s $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	lwc1 RG1, LLP($1)

s.s $f1,($2)	swc1 RG1,0(RG3)	#Store floating point single precision to effective address
s.s $f1,100	swc1 RG1,VL2($0)
s.s $f1,100000	lui $1, VH2	swc1 RG1,VL2($1)
s.s $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	swc1 RG1, VL2($1)
s.s $f1,label	lui $1, LH2	swc1 RG1, LL2($1)
s.s $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	swc1 RG1, LL2($1)
s.s $f1,label+100000	lui $1, LHPA	swc1 RG1, LLP($1)
s.s $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	swc1 RG1, LLP($1)

l.d $f1,($2)	ldc1 RG1,0(RG3)	#Load floating point double precision from effective address
l.d $f1,100	ldc1 RG1,VL2($0)
l.d $f1,100000	lui $1, VH2	ldc1 RG1,VL2($1)
l.d $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	ldc1 RG1, VL2($1)
l.d $f1,label	lui $1, LH2	ldc1 RG1, LL2($1)
l.d $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	ldc1 RG1, LL2($1)
l.d $f1,label+100000	lui $1, LHPA	ldc1 RG1, LLP($1)
l.d $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	ldc1 RG1, LLP($1)

s.d $f1,($2)	sdc1 RG1,0(RG3)	#Store floating point double precision to effective address
s.d $f1,100	sdc1 RG1,VL2($0)
s.d $f1,100000	lui $1, VH2	sdc1 RG1,VL2($1)
s.d $f1,100000($2)	lui $1, VH2	addu $1, $1, RG4	sdc1 RG1, VL2($1)
s.d $f1,label	lui $1, LH2	sdc1 RG1, LL2($1)
s.d $f1,label($2)	lui $1, LH2	addu $1, $1, RG4	sdc1 RG1, LL2($1)
s.d $f1,label+100000	lui $1, LHPA	sdc1 RG1, LLP($1)
s.d $f1,label+100000($2)	lui $1, LHPA	addu $1, $1, RG6	sdc1 RG1, LLP($1)
