summaryrefslogtreecommitdiffstats
path: root/libraries/pack200/src/unpack.h
blob: cc5dd60aeece1739aede8a43724e701179a5ab51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
/*
 * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

#pragma once

// Global Structures
struct jar;
struct gunzip;
struct band;
struct constant_pool;
struct entry;
struct cpindex;
struct inner_class;
struct value_stream;

typedef int64_t (*read_input_fn_t)(unpacker *self, void *buf, int64_t minlen, int64_t maxlen);

struct cpindex
{
    uint32_t len;
    entry *base1;  // base of primary index
    entry **base2; // base of secondary index
    byte ixTag;    // type of entries (!= CONSTANT_None), plus 64 if sub-index
    enum
    {
        SUB_TAG = 64
    };

    entry *get(uint32_t i);

    void init(int len_, entry *base1_, int ixTag_)
    {
        len = len_;
        base1 = base1_;
        base2 = nullptr;
        ixTag = ixTag_;
    }
    void init(int len_, entry **base2_, int ixTag_)
    {
        len = len_;
        base1 = nullptr;
        base2 = base2_;
        ixTag = ixTag_;
    }
};

struct constant_pool
{
    uint32_t nentries;
    entry *entries;
    entry *first_extra_entry;
    uint32_t maxentries; // total allocated size of entries

    // Position and size of each homogeneous subrange:
    int tag_count[CONSTANT_Limit];
    int tag_base[CONSTANT_Limit];
    cpindex tag_index[CONSTANT_Limit];
    ptrlist tag_extras[CONSTANT_Limit];

    cpindex *member_indexes; // indexed by 2*CONSTANT_Class.inord
    cpindex *getFieldIndex(entry *classRef);
    cpindex *getMethodIndex(entry *classRef);

    inner_class **ic_index;
    inner_class **ic_child_index;
    inner_class *getIC(entry *inner);
    inner_class *getFirstChildIC(entry *outer);
    inner_class *getNextChildIC(inner_class *child);

    int outputIndexLimit;  // index limit after renumbering
    ptrlist outputEntries; // list of entry* needing output idx assigned

    entry **hashTab;
    uint32_t hashTabLength;
    entry *&hashTabRef(byte tag, bytes &b);
    entry *ensureUtf8(bytes &b);
    entry *ensureClass(bytes &b);

    // Well-known Utf8 symbols.
    enum
    {
#define SNAME(n, s) s_##s,
        ALL_ATTR_DO(SNAME)
#undef SNAME
        s_lt_init_gt, // <init>
        s_LIMIT
    };
    entry *sym[s_LIMIT];

    // read counts from hdr, allocate main arrays
    enum
    {
        NUM_COUNTS = 12
    };
    void init(unpacker *u, int counts[NUM_COUNTS]);

    // pointer to outer unpacker, for error checks etc.
    unpacker *u;

    int getCount(byte tag)
    {
        assert((uint32_t)tag < CONSTANT_Limit);
        return tag_count[tag];
    }
    cpindex *getIndex(byte tag)
    {
        assert((uint32_t)tag < CONSTANT_Limit);
        return &tag_index[tag];
    }
    cpindex *getKQIndex(); // uses cur_descr

    void expandSignatures();
    void initMemberIndexes();

    void computeOutputOrder();
    void computeOutputIndexes();
    void resetOutputIndexes();
};

/*
 * The unpacker provides the entry points to the unpack engine,
 * as well as maintains the state of the engine.
 */
struct unpacker
{
    // One element of the resulting JAR.
    struct file
    {
        const char *name;
        uint64_t size;
        int modtime;
        int options;
        bytes data[2];
        // Note:  If Sum(data[*].len) < size,
        // remaining bytes must be read directly from the input stream.
        bool deflate_hint()
        {
            return ((options & FO_DEFLATE_HINT) != 0);
        }
    };

    // if running Unix-style, here are the inputs and outputs
    FILE *infileptr; // buffered
    bytes inbytes;   // direct
    gunzip *gzin;    // gunzip filter, if any
    jar *jarout;     // output JAR file

    // pointer to self, for U_NEW macro
    unpacker *u;

    ptrlist mallocs;     // list of guys to free when we are all done
    ptrlist tmallocs;    // list of guys to free on next client request
    fillbytes smallbuf;  // supplies small alloc requests
    fillbytes tsmallbuf; // supplies temporary small alloc requests

    // option management members
    int verbose;              // verbose level, 0 means no output
    int deflate_hint_or_zero; // ==0 means not set, otherwise -1 or 1
    int modification_time_or_zero;

    // input stream
    fillbytes input; // the whole block (size is predicted, has slop too)
    bool live_input; // is the data in this block live?
    bool free_input; // must the input buffer be freed?
    byte *rp;        // read pointer (< rplimit <= input.limit())
    byte *rplimit;   // how much of the input block has been read?
    uint64_t bytes_read;
    int unsized_bytes_read;

    // callback to read at least one byte, up to available input
    read_input_fn_t read_input_fn;

    // archive header fields
    int magic, minver, majver;
    size_t archive_size;
    int archive_next_count, archive_options, archive_modtime;
    int band_headers_size;
    int file_count, attr_definition_count, ic_count, class_count;
    int default_class_minver, default_class_majver;
    int default_file_options, suppress_file_options;   // not header fields
    int default_archive_modtime, default_file_modtime; // not header fields
    int code_count;                                    // not a header field
    int files_remaining;                               // not a header field

    // engine state
    band *all_bands;  // indexed by band_number
    byte *meta_rp;    // read-pointer into (copy of) band_headers
    constant_pool cp; // all constant pool information
    inner_class *ics; // InnerClasses

    // output stream
    bytes output;  // output block (either classfile head or tail)
    byte *wp;      // write pointer (< wplimit == output.limit())
    byte *wpbase;  // write pointer starting address (<= wp)
    byte *wplimit; // how much of the output block has been written?

    // output state
    file cur_file;
    entry *cur_class;    // CONSTANT_Class entry
    entry *cur_super;    // CONSTANT_Class entry or nullptr
    entry *cur_descr;    // CONSTANT_NameandType entry
    int cur_descr_flags; // flags corresponding to cur_descr
    int cur_class_minver, cur_class_majver;
    bool cur_class_has_local_ics;
    fillbytes cur_classfile_head;
    fillbytes cur_classfile_tail;
    int files_written;   // also tells which file we're working on
    int classes_written; // also tells which class we're working on
    uint64_t bytes_written;
    intlist bcimap;
    fillbytes class_fixup_type;
    intlist class_fixup_offset;
    ptrlist class_fixup_ref;
    fillbytes code_fixup_type; // which format of branch operand?
    intlist code_fixup_offset; // location of operand needing fixup
    intlist code_fixup_source; // encoded ID of branch insn
    ptrlist requested_ics;     // which ics need output?

    // stats pertaining to multiple segments (updated on reset)
    uint64_t bytes_read_before_reset;
    uint64_t bytes_written_before_reset;
    int files_written_before_reset;
    int classes_written_before_reset;
    int segments_read_before_reset;

    // attribute state
    struct layout_definition
    {
        uint32_t idx;     // index (0..31...) which identifies this layout
        const char *name; // name of layout
        entry *nameEntry;
        const char *layout; // string of layout (not yet parsed)
        band **elems;       // array of top-level layout elems (or callables)

        bool hasCallables()
        {
            return layout[0] == '[';
        }
        band **bands()
        {
            assert(elems != nullptr);
            return elems;
        }
    };
    struct attr_definitions
    {
        unpacker *u;         // pointer to self, for U_NEW macro
        int xxx_flags_hi_bn; // locator for flags, count, indexes, calls bands
        int attrc;           // ATTR_CONTEXT_CLASS, etc.
        uint32_t flag_limit; // 32 or 63, depending on archive_options bit
        uint64_t predef;     // mask of built-in definitions
        uint64_t redef;      // mask of local flag definitions or redefinitions
        ptrlist layouts;     // local (compressor-defined) defs, in index order
        int flag_count[X_ATTR_LIMIT_FLAGS_HI];
        intlist overflow_count;
        ptrlist strip_names;   // what attribute names are being stripped?
        ptrlist band_stack;    // Temp., used during layout parsing.
        ptrlist calls_to_link; //  (ditto)
        int bands_made;        //  (ditto)

        void free()
        {
            layouts.free();
            overflow_count.free();
            strip_names.free();
            band_stack.free();
            calls_to_link.free();
        }

        // Locate the five fixed bands.
        band &xxx_flags_hi();
        band &xxx_flags_lo();
        band &xxx_attr_count();
        band &xxx_attr_indexes();
        band &xxx_attr_calls();
        band &fixed_band(int e_class_xxx);

        // Register a new layout, and make bands for it.
        layout_definition *defineLayout(int idx, const char *name, const char *layout);
        layout_definition *defineLayout(int idx, entry *nameEntry, const char *layout);
        band **buildBands(layout_definition *lo);

        // Parse a layout string or part of one, recursively if necessary.
        const char *parseLayout(const char *lp, band **&res, int curCble);
        const char *parseNumeral(const char *lp, int &res);
        const char *parseIntLayout(const char *lp, band *&res, byte le_kind,
                                   bool can_be_signed = false);
        band **popBody(int band_stack_base); // pops a body off band_stack

        // Read data into the bands of the idx-th layout.
        void readBandData(int idx);                     // parse layout, make bands, read data
        void readBandData(band **body, uint32_t count); // recursive helper

        layout_definition *getLayout(uint32_t idx)
        {
            if (idx >= (uint32_t)layouts.length())
                return nullptr;
            return (layout_definition *)layouts.get(idx);
        }

        void setHaveLongFlags(bool z)
        {
            assert(flag_limit == 0); // not set up yet
            flag_limit = (z ? X_ATTR_LIMIT_FLAGS_HI : X_ATTR_LIMIT_NO_FLAGS_HI);
        }
        bool haveLongFlags()
        {
            assert(flag_limit == X_ATTR_LIMIT_NO_FLAGS_HI ||
                   flag_limit == X_ATTR_LIMIT_FLAGS_HI);
            return flag_limit == X_ATTR_LIMIT_FLAGS_HI;
        }

        // Return flag_count if idx is predef and not redef, else zero.
        int predefCount(uint32_t idx);

        bool isRedefined(uint32_t idx)
        {
            if (idx >= flag_limit)
                return false;
            return (bool)((redef >> idx) & 1);
        }
        bool isPredefined(uint32_t idx)
        {
            if (idx >= flag_limit)
                return false;
            return (bool)(((predef & ~redef) >> idx) & 1);
        }
        uint64_t flagIndexMask()
        {
            return (predef | redef);
        }
        bool isIndex(uint32_t idx)
        {
            assert(flag_limit != 0); // must be set up already
            if (idx < flag_limit)
                return (bool)(((predef | redef) >> idx) & 1);
            else
                return (idx - flag_limit < (uint32_t)overflow_count.length());
        }
        int &getCount(uint32_t idx)
        {
            assert(isIndex(idx));
            if (idx < flag_limit)
                return flag_count[idx];
            else
                return overflow_count.get(idx - flag_limit);
        }
    };

    attr_definitions attr_defs[ATTR_CONTEXT_LIMIT];

    // Initialization
    void init(read_input_fn_t input_fn = nullptr);
    // Resets to a known sane state
    void reset();
    // Deallocates all storage.
    void free();
    // Deallocates temporary storage (volatile after next client call).
    void free_temps()
    {
        tsmallbuf.init();
        tmallocs.freeAll();
    }

    // Option management methods
    bool set_option(const char *option, const char *value);
    const char *get_option(const char *option);

    // Fetching input.
    bool ensure_input(int64_t more);
    byte *input_scan()
    {
        return rp;
    }
    size_t input_remaining()
    {
        return rplimit - rp;
    }
    size_t input_consumed()
    {
        return rp - input.base();
    }

    // Entry points to the unpack engine
    static int run(int argc, char **argv); // Unix-style entry point.
    void check_options();
    void start(void *packptr = nullptr, size_t len = 0);
    void write_file_to_jar(file *f);
    void finish();

    // Public post unpack methods
    int get_files_remaining()
    {
        return files_remaining;
    }
    int get_segments_remaining()
    {
        return archive_next_count;
    }
    file *get_next_file(); // returns nullptr on last file

    // General purpose methods
    void *alloc(size_t size)
    {
        return alloc_heap(size, true);
    }
    void *temp_alloc(size_t size)
    {
        return alloc_heap(size, true, true);
    }
    void *alloc_heap(size_t size, bool smallOK = false, bool temp = false);
    void saveTo(bytes &b, const char *str)
    {
        saveTo(b, (byte *)str, strlen(str));
    }
    void saveTo(bytes &b, bytes &data)
    {
        saveTo(b, data.ptr, data.len);
    }
    void saveTo(bytes &b, byte *ptr, size_t len); //{ b.ptr = U_NEW...}
    const char *saveStr(const char *str)
    {
        bytes buf;
        saveTo(buf, str);
        return buf.strval();
    }
    const char *saveIntStr(int num)
    {
        char buf[30];
        sprintf(buf, "%d", num);
        return saveStr(buf);
    }
    static unpacker *current(); // find current instance

    // Output management
    void set_output(fillbytes *which)
    {
        assert(wp == nullptr);
        which->ensureSize(1 << 12); // covers the average classfile
        wpbase = which->base();
        wp = which->limit();
        wplimit = which->end();
    }
    fillbytes *close_output(fillbytes *which = nullptr); // inverse of set_output

    // These take an implicit parameter of wp/wplimit, and resize as necessary:
    byte *put_space(size_t len); // allocates space at wp, returns pointer
    size_t put_empty(size_t s)
    {
        byte *p = put_space(s);
        return p - wpbase;
    }
    void ensure_put_space(size_t len);
    void put_bytes(bytes &b)
    {
        b.writeTo(put_space(b.len));
    }
    void putu1(int n)
    {
        putu1_at(put_space(1), n);
    }
    void putu1_fast(int n)
    {
        putu1_at(wp++, n);
    }
    void putu2(int n);                    // { putu2_at(put_space(2), n); }
    void putu4(int n);                    // { putu4_at(put_space(4), n); }
    void putu8(int64_t n);                // { putu8_at(put_space(8), n); }
    void putref(entry *e);                // { putu2_at(put_space(2), putref_index(e, 2)); }
    void putu1ref(entry *e);              // { putu1_at(put_space(1), putref_index(e, 1)); }
    int putref_index(entry *e, int size); // size in [1..2]
    void put_label(int curIP, int size);  // size in {2,4}
    void putlayout(band **body);
    void put_stackmap_type();

    size_t wpoffset()
    {
        return (size_t)(wp - wpbase);
    } // (unvariant across overflow)
    byte *wp_at(size_t offset)
    {
        return wpbase + offset;
    }
    uint32_t to_bci(uint32_t bii);
    void get_code_header(int &max_stack, int &max_na_locals, int &handler_count, int &cflags);
    band *ref_band_for_self_op(int bc, bool &isAloadVar, int &origBCVar);
    band *ref_band_for_op(int bc);

    // Definitions of standard classfile int formats:
    static void putu1_at(byte *wp, int n)
    {
        assert(n == (n & 0xFF));
        wp[0] = n;
    }
    static void putu2_at(byte *wp, int n);
    static void putu4_at(byte *wp, int n);
    static void putu8_at(byte *wp, int64_t n);

    // Private stuff
    void reset_cur_classfile();
    void write_classfile_tail();
    void write_classfile_head();
    void write_code();
    void write_bc_ops();
    void write_members(int num, int attrc); // attrc=ATTR_CONTEXT_FIELD/METHOD
    int write_attrs(int attrc, uint64_t indexBits);

    // The readers
    void read_bands();
    void read_file_header();
    void read_cp();
    void read_cp_counts(value_stream &hdr);
    void read_attr_defs();
    void read_ics();
    void read_attrs(int attrc, int obj_count);
    void read_classes();
    void read_code_headers();
    void read_bcs();
    void read_bc_ops();
    void read_files();
    void read_Utf8_values(entry *cpMap, int len);
    void read_single_words(band &cp_band, entry *cpMap, int len);
    void read_double_words(band &cp_bands, entry *cpMap, int len);
    void read_single_refs(band &cp_band, byte refTag, entry *cpMap, int len);
    void read_double_refs(band &cp_band, byte ref1Tag, byte ref2Tag, entry *cpMap, int len);
    void read_signature_values(entry *cpMap, int len);
};