Code
属性用于表示成员方法的代码部分,Code
中包含了指令集(byte数组
),JVM调用成员方法时实际上就是执行的Code
中的指令,而反编译工具则是把Code
中的指令翻译成了Java代码。
Code_attribute {
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 code[code_length];
u2 exception_table_length;
{ u2 start_pc;
u2 end_pc;
u2 handler_pc;
u2 catch_type;
} exception_table[exception_table_length];
u2 attributes_count;
attribute_info attributes[attributes_count];
}
Code解析代码片段:
int maxStack = dis.readUnsignedShort();
int maxLocals = dis.readUnsignedShort();
int codeLength = dis.readInt();
List<String> opcodeList = new ArrayList<>();
byte[] bytes = new byte[codeLength];
// 读取所有的code字节
dis.read(bytes);
// 创建Code输入流
DataInputStream bis = new DataInputStream(new ByteArrayInputStream(bytes));
// 创建属性Map
Map<String, Object> attrMap = new LinkedHashMap<>();
attrMap.put("maxStack", maxStack);
attrMap.put("maxLocals", maxLocals);
attrMap.put("codeLength", codeLength);
// 是否是宽类型
boolean wide = false;
for (int offset = 0; offset < codeLength; offset++) {
int branchOffset = -1;
int defaultOffset = -1;
int switchNumberofPairs = -1;
int switchNumberOfOffsets = -1;
int immediateByte = -1;
int immediateShort = -1;
int arrayDimensions = 0;
int incrementConst = -1;
int incrementConst2 = -1;
int switchMatch = -1;
int switchOffset = -1;
int[] switchJumpOffsets = null;
int bytesToRead = 0;
int code = bis.readUnsignedByte();
Opcodes opcode = Opcodes.getOpcodes(code);
if (opcode == null) {
continue;
}
switch (opcode) {
case BIPUSH:
case LDC:
case ILOAD:
case LLOAD:
case FLOAD:
case DLOAD:
case ALOAD:
case ISTORE:
case LSTORE:
case FSTORE:
case DSTORE:
case ASTORE:
case RET:
case NEWARRAY:
if (wide) {
immediateByte = bis.readUnsignedShort();
} else {
immediateByte = bis.readUnsignedByte();
}
addOpcodes(opcodeList, opcode, immediateByte);
// 因为读取了byte,所以需要重新计算bis偏移量
offset += wide ? 2 : 1;
break;
case LDC_W:
case LDC2_W:
case GETSTATIC:
case PUTSTATIC:
case GETFIELD:
case PUTFIELD:
case INVOKEVIRTUAL:
case INVOKESPECIAL:
case INVOKESTATIC:
case NEW:
case ANEWARRAY:
case CHECKCAST:
case INSTANCEOF:
case SIPUSH:
addOpcodes(opcodeList, opcode, bis.readUnsignedShort());
offset += 2;
break;
case IFEQ:
case IFNE:
case IFLT:
case IFGE:
case IFGT:
case IFLE:
case IF_ICMPEQ:
case IF_ICMPNE:
case IF_ICMPLT:
case IF_ICMPGE:
case IF_ICMPGT:
case IF_ICMPLE:
case IF_ACMPEQ:
case IF_ACMPNE:
case GOTO:
case JSR:
case IFNULL:
case IFNONNULL:
branchOffset = bis.readShort();
opcodeList.add(opcode.getDesc() + " " + branchOffset);
offset += 2;
break;
case GOTO_W:
case JSR_W:
branchOffset = bis.readInt();
opcodeList.add(opcode.getDesc() + " " + branchOffset);
offset += 4;
break;
case IINC:
if (wide) {
incrementConst = bis.readUnsignedShort();
} else {
incrementConst = bis.readUnsignedByte();
}
if (wide) {
incrementConst2 = bis.readUnsignedShort();
} else {
incrementConst2 = bis.readUnsignedByte();
}
opcodeList.add(opcode.getDesc() + " " + incrementConst + " by " + incrementConst2);
offset += wide ? 4 : 2;
break;
case TABLESWITCH:
bytesToRead = readPaddingBytes(bytes, bis);
defaultOffset = bis.readInt();
int lowByte = bis.readInt();
int highByte = bis.readInt();
switchNumberOfOffsets = highByte - lowByte + 1;
switchJumpOffsets = new int[switchNumberOfOffsets];
for (int k = 0; k < switchNumberOfOffsets; k++) {
switchJumpOffsets[k] = bis.readInt();
}
opcodeList.add(opcode.getDesc());
offset += bytesToRead + 12 + 4 * switchNumberOfOffsets;
break;
case LOOKUPSWITCH:
bytesToRead = readPaddingBytes(bytes, bis);
defaultOffset = bis.readInt();
switchNumberofPairs = bis.readInt();
for (int k = 0; k < switchNumberofPairs; k++) {
switchMatch = bis.readInt();
switchOffset = bis.readInt();
}
opcodeList.add(opcode.getDesc());
offset += bytesToRead + 8 + 8 * switchNumberofPairs;
break;
case INVOKEINTERFACE:
immediateShort = bis.readUnsignedShort();
offset += 2;
int count = bis.readUnsignedByte();
// 下1个byte永远为0,所以直接丢弃
bis.readByte();
addOpcodes(opcodeList, opcode, immediateShort);
offset += 2;
break;
case INVOKEDYNAMIC:
immediateShort = bis.readUnsignedShort();
offset += 2;
// 下2个byte永远为0,所以直接丢弃
bis.readUnsignedShort();
addOpcodes(opcodeList, opcode, immediateShort);
offset += 2;
break;
case MULTIANEWARRAY:
immediateShort = bis.readUnsignedShort();
offset += 2;
arrayDimensions = bis.readUnsignedByte();
addOpcodes(opcodeList, opcode, immediateShort);
offset += 1;
break;
default:
opcodeList.add(opcode.getDesc());
}
wide = (WIDE == opcode);
}
attrMap.put("opcodes", opcodeList);
// 读取异常表
attrMap.put("exceptionTable", readExceptionTable());
// u2 attributes_count;
int attributesCount = dis.readShort();
attrMap.put("attributeLength", attributeLength);
attrMap.put("attributes", readAttributes(attributesCount));
// 递归读取属性信息
attributeMap.put("Code", attrMap);
在解析Code
属性时code_length
表示的是Code
的字节长度,max_stack
和max_locals
是一个固定值,表示的是最大操作数栈和最大局部变量数,这两个值是在编译类方法时自动计算出来的,如果通过ASM
修改了类方法可能会需要重新计算max_stack
和max_locals
。
示例 - TestHelloWorld类Hello方法解析结果:
{
"access": 1,
"name": "hello",
"desc": "(Ljava/lang/String;)Ljava/lang/String;",
"attributesCount": 1,
"attributes": {
"attributeName": "Code",
"attributeLength": 88,
"Code": {
"maxStack": 2,
"maxLocals": 3,
"codeLength": 22,
"opcodes": [
"ldc #3 <Hello:>",
"astore_2",
"new #4 <java/lang/StringBuilder>",
"dup",
"invokespecial #5 <java/lang/StringBuilder.<init>>",
"aload_2",
"invokevirtual #6 <java/lang/StringBuilder.append>",
"aload_1",
"invokevirtual #6 <java/lang/StringBuilder.append>",
"invokevirtual #7 <java/lang/StringBuilder.toString>",
"areturn"
],
"exceptionTable": {
"exceptionTableLength": 0,
"exceptionTableList": [ ]
},
"attributeLength": 88,
"attributes": {
"attributeName": "LocalVariableTable",
"attributeLength": 32,
"LineNumberTable": {
"lineNumberTableLength": 2,
"lineNumberTableList": [
{
"startPc": 0,
"lineNumber": 21
},
{
"startPc": 3,
"lineNumber": 22
}
]
},
"LocalVariableTable": {
"localVariableTableLength": 3,
"localVariableTableList": [
{
"startPc": 0,
"length": 22,
"name": "this",
"desc": "Lcom/anbai/sec/bytecode/TestHelloWorld;",
"index": 0
},
{
"startPc": 0,
"length": 22,
"name": "content",
"desc": "Ljava/lang/String;",
"index": 1
},
{
"startPc": 3,
"length": 19,
"name": "str",
"desc": "Ljava/lang/String;",
"index": 2
}
]
}
}
}
}
}
解析Code
的指令集时需要对照指令集映射表,然后根据不同的指令实现不一样的指令处理逻辑,指令列表和详细的描述请参考:JVM规范-指令。