summaryrefslogtreecommitdiffstats
path: root/libraries/classparser/src/constants.h
blob: c3ef88fd555ad7d8a1a51781b8a7390a04b545ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#pragma once
#include "errors.h"
#include <sstream>

namespace java
{
class constant
{
public:
	enum type_t : uint8_t
	{
		j_hole = 0, // HACK: this is a hole in the array, because java is crazy
		j_string_data = 1,
		j_int = 3,
		j_float = 4,
		j_long = 5,
		j_double = 6,
		j_class = 7,
		j_string = 8,
		j_fieldref = 9,
		j_methodref = 10,
		j_interface_methodref = 11,
		j_nameandtype = 12
		// FIXME: missing some constant types, see https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.4
	} type;

	constant(util::membuffer &buf)
	{
		buf.read(type);

		// load data depending on type
		switch (type)
		{
		case j_float:
		case j_int:
			buf.read_be(int_data); // same as float data really
			break;
		case j_double:
		case j_long:
			buf.read_be(long_data); // same as double
			break;
		case j_class:
			buf.read_be(ref_type.class_idx);
			break;
		case j_fieldref:
		case j_methodref:
		case j_interface_methodref:
			buf.read_be(ref_type.class_idx);
			buf.read_be(ref_type.name_and_type_idx);
			break;
		case j_string:
			buf.read_be(index);
			break;
		case j_string_data:
			// HACK HACK: for now, we call these UTF-8 and do no further processing.
			// Later, we should do some decoding. It's really modified UTF-8
			// * U+0000 is represented as 0xC0,0x80 invalid character
			// * any single zero byte ends the string
			// * characters above U+10000 are encoded like in CESU-8
			buf.read_jstr(str_data);
			break;
		case j_nameandtype:
			buf.read_be(name_and_type.name_index);
			buf.read_be(name_and_type.descriptor_index);
			break;
		default:
			// invalid constant type!
			throw new classfile_exception();
		}
	}

	constant(int)
	{
		type = j_hole;
	}

	std::string toString()
	{
		std::ostringstream ss;
		switch (type)
		{
		case j_hole:
			ss << "Fake legacy entry";
			break;
		case j_float:
			ss << "Float: " << float_data;
			break;
		case j_double:
			ss << "Double: " << double_data;
			break;
		case j_int:
			ss << "Int: " << int_data;
			break;
		case j_long:
			ss << "Long: " << long_data;
			break;
		case j_string_data:
			ss << "StrData: " << str_data;
			break;
		case j_string:
			ss << "Str: " << index;
			break;
		case j_fieldref:
			ss << "FieldRef: " << ref_type.class_idx << " " << ref_type.name_and_type_idx;
			break;
		case j_methodref:
			ss << "MethodRef: " << ref_type.class_idx << " " << ref_type.name_and_type_idx;
			break;
		case j_interface_methodref:
			ss << "IfMethodRef: " << ref_type.class_idx << " " << ref_type.name_and_type_idx;
			break;
		case j_class:
			ss << "Class: " << ref_type.class_idx;
			break;
		case j_nameandtype:
			ss << "NameAndType: " << name_and_type.name_index << " "
			   << name_and_type.descriptor_index;
			break;
		default:
			ss << "Invalid entry (" << int(type) << ")";
			break;
		}
		return ss.str();
	}

	struct ref_type_t
	{
		/**
		 * Class reference:
		 * an index within the constant pool to a UTF-8 string containing
		 * the fully qualified class name (in internal format)
		 * Used for j_class, j_fieldref, j_methodref and j_interface_methodref
		 */
		uint16_t class_idx;
		// used for j_fieldref, j_methodref and j_interface_methodref
		uint16_t name_and_type_idx;
	};
	struct name_and_type_t
	{
		uint16_t name_index;
		uint16_t descriptor_index;
	};

	std::string str_data; /** String data in 'modified utf-8'.*/

	// store everything here.
	union
	{
		int32_t int_data;
		int64_t long_data;
		float float_data;
		double double_data;
		uint16_t index;
		ref_type_t ref_type;
		name_and_type_t name_and_type;
	};
};

/**
 * A helper class that represents the custom container used in Java class file for storage of
 * constants
 */
class constant_pool
{
public:
	/**
	 * Create a pool of constants
	 */
	constant_pool()
	{
	}
	/**
	 * Load a java constant pool
	 */
	void load(util::membuffer &buf)
	{
		// FIXME: @SANITY this should check for the end of buffer.
		uint16_t length = 0;
		buf.read_be(length);
		length--;
		const constant *last_constant = nullptr;
		while (length)
		{
			const constant &cnst = constant(buf);
			constants.push_back(cnst);
			last_constant = &constants[constants.size() - 1];
			if (last_constant->type == constant::j_double ||
				last_constant->type == constant::j_long)
			{
				// push in a fake constant to preserve indexing
				constants.push_back(constant(0));
				length -= 2;
			}
			else
			{
				length--;
			}
		}
	}
	typedef std::vector<java::constant> container_type;
	/**
	 * Access constants based on jar file index numbers (index of the first element is 1)
	 */
	java::constant &operator[](std::size_t constant_index)
	{
		if (constant_index == 0 || constant_index > constants.size())
		{
			throw new classfile_exception();
		}
		return constants[constant_index - 1];
	}
	;
	container_type::const_iterator begin() const
	{
		return constants.begin();
	}
	;
	container_type::const_iterator end() const
	{
		return constants.end();
	}

private:
	container_type constants;
};
}