/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: * * Copyright 2016 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef wasm_binary_format_h #define wasm_binary_format_h #include "wasm/WasmTypes.h" namespace js { namespace wasm { // The Encoder class appends bytes to the Bytes object it is given during // construction. The client is responsible for the Bytes's lifetime and must // keep the Bytes alive as long as the Encoder is used. class Encoder { Bytes& bytes_; template <class T> MOZ_MUST_USE bool write(const T& v) { return bytes_.append(reinterpret_cast<const uint8_t*>(&v), sizeof(T)); } template <typename UInt> MOZ_MUST_USE bool writeVarU(UInt i) { do { uint8_t byte = i & 0x7f; i >>= 7; if (i != 0) byte |= 0x80; if (!bytes_.append(byte)) return false; } while (i != 0); return true; } template <typename SInt> MOZ_MUST_USE bool writeVarS(SInt i) { bool done; do { uint8_t byte = i & 0x7f; i >>= 7; done = ((i == 0) && !(byte & 0x40)) || ((i == -1) && (byte & 0x40)); if (!done) byte |= 0x80; if (!bytes_.append(byte)) return false; } while (!done); return true; } void patchVarU32(size_t offset, uint32_t patchBits, uint32_t assertBits) { do { uint8_t assertByte = assertBits & 0x7f; uint8_t patchByte = patchBits & 0x7f; assertBits >>= 7; patchBits >>= 7; if (assertBits != 0) { assertByte |= 0x80; patchByte |= 0x80; } MOZ_ASSERT(assertByte == bytes_[offset]); bytes_[offset] = patchByte; offset++; } while(assertBits != 0); } void patchFixedU7(size_t offset, uint8_t patchBits, uint8_t assertBits) { MOZ_ASSERT(patchBits <= uint8_t(INT8_MAX)); patchFixedU8(offset, patchBits, assertBits); } void patchFixedU8(size_t offset, uint8_t patchBits, uint8_t assertBits) { MOZ_ASSERT(bytes_[offset] == assertBits); bytes_[offset] = patchBits; } uint32_t varU32ByteLength(size_t offset) const { size_t start = offset; while (bytes_[offset] & 0x80) offset++; return offset - start + 1; } public: explicit Encoder(Bytes& bytes) : bytes_(bytes) { MOZ_ASSERT(empty()); } size_t currentOffset() const { return bytes_.length(); } bool empty() const { return currentOffset() == 0; } // Fixed-size encoding operations simply copy the literal bytes (without // attempting to align). MOZ_MUST_USE bool writeFixedU7(uint8_t i) { MOZ_ASSERT(i <= uint8_t(INT8_MAX)); return writeFixedU8(i); } MOZ_MUST_USE bool writeFixedU8(uint8_t i) { return write<uint8_t>(i); } MOZ_MUST_USE bool writeFixedU32(uint32_t i) { return write<uint32_t>(i); } MOZ_MUST_USE bool writeFixedF32(RawF32 f) { return write<uint32_t>(f.bits()); } MOZ_MUST_USE bool writeFixedF64(RawF64 d) { return write<uint64_t>(d.bits()); } MOZ_MUST_USE bool writeFixedI8x16(const I8x16& i8x16) { return write<I8x16>(i8x16); } MOZ_MUST_USE bool writeFixedI16x8(const I16x8& i16x8) { return write<I16x8>(i16x8); } MOZ_MUST_USE bool writeFixedI32x4(const I32x4& i32x4) { return write<I32x4>(i32x4); } MOZ_MUST_USE bool writeFixedF32x4(const F32x4& f32x4) { return write<F32x4>(f32x4); } // Variable-length encodings that all use LEB128. MOZ_MUST_USE bool writeVarU32(uint32_t i) { return writeVarU<uint32_t>(i); } MOZ_MUST_USE bool writeVarS32(int32_t i) { return writeVarS<int32_t>(i); } MOZ_MUST_USE bool writeVarU64(uint64_t i) { return writeVarU<uint64_t>(i); } MOZ_MUST_USE bool writeVarS64(int64_t i) { return writeVarS<int64_t>(i); } MOZ_MUST_USE bool writeValType(ValType type) { static_assert(size_t(TypeCode::Limit) <= UINT8_MAX, "fits"); MOZ_ASSERT(size_t(type) < size_t(TypeCode::Limit)); return writeFixedU8(uint8_t(type)); } MOZ_MUST_USE bool writeBlockType(ExprType type) { static_assert(size_t(TypeCode::Limit) <= UINT8_MAX, "fits"); MOZ_ASSERT(size_t(type) < size_t(TypeCode::Limit)); return writeFixedU8(uint8_t(type)); } MOZ_MUST_USE bool writeOp(Op op) { static_assert(size_t(Op::Limit) <= 2 * UINT8_MAX, "fits"); MOZ_ASSERT(size_t(op) < size_t(Op::Limit)); if (size_t(op) < UINT8_MAX) return writeFixedU8(uint8_t(op)); return writeFixedU8(UINT8_MAX) && writeFixedU8(size_t(op) - UINT8_MAX); } // Fixed-length encodings that allow back-patching. MOZ_MUST_USE bool writePatchableFixedU7(size_t* offset) { *offset = bytes_.length(); return writeFixedU8(UINT8_MAX); } void patchFixedU7(size_t offset, uint8_t patchBits) { return patchFixedU7(offset, patchBits, UINT8_MAX); } // Variable-length encodings that allow back-patching. MOZ_MUST_USE bool writePatchableVarU32(size_t* offset) { *offset = bytes_.length(); return writeVarU32(UINT32_MAX); } void patchVarU32(size_t offset, uint32_t patchBits) { return patchVarU32(offset, patchBits, UINT32_MAX); } // Byte ranges start with an LEB128 length followed by an arbitrary sequence // of bytes. When used for strings, bytes are to be interpreted as utf8. MOZ_MUST_USE bool writeBytes(const void* bytes, uint32_t numBytes) { return writeVarU32(numBytes) && bytes_.append(reinterpret_cast<const uint8_t*>(bytes), numBytes); } // A "section" is a contiguous range of bytes that stores its own size so // that it may be trivially skipped without examining the contents. Sections // require backpatching since the size of the section is only known at the // end while the size's varU32 must be stored at the beginning. Immediately // after the section length is the string id of the section. MOZ_MUST_USE bool startSection(SectionId id, size_t* offset) { MOZ_ASSERT(id != SectionId::UserDefined); // not supported yet return writeVarU32(uint32_t(id)) && writePatchableVarU32(offset); } void finishSection(size_t offset) { return patchVarU32(offset, bytes_.length() - offset - varU32ByteLength(offset)); } }; // The Decoder class decodes the bytes in the range it is given during // construction. The client is responsible for keeping the byte range alive as // long as the Decoder is used. class Decoder { const uint8_t* const beg_; const uint8_t* const end_; const uint8_t* cur_; UniqueChars* error_; template <class T> MOZ_MUST_USE bool read(T* out) { if (bytesRemain() < sizeof(T)) return false; memcpy((void*)out, cur_, sizeof(T)); cur_ += sizeof(T); return true; } template <class T> T uncheckedRead() { MOZ_ASSERT(bytesRemain() >= sizeof(T)); T ret; memcpy(&ret, cur_, sizeof(T)); cur_ += sizeof(T); return ret; } template <class T> void uncheckedRead(T* ret) { MOZ_ASSERT(bytesRemain() >= sizeof(T)); memcpy(ret, cur_, sizeof(T)); cur_ += sizeof(T); } template <typename UInt> MOZ_MUST_USE bool readVarU(UInt* out) { const unsigned numBits = sizeof(UInt) * CHAR_BIT; const unsigned remainderBits = numBits % 7; const unsigned numBitsInSevens = numBits - remainderBits; UInt u = 0; uint8_t byte; UInt shift = 0; do { if (!readFixedU8(&byte)) return false; if (!(byte & 0x80)) { *out = u | UInt(byte) << shift; return true; } u |= UInt(byte & 0x7F) << shift; shift += 7; } while (shift != numBitsInSevens); if (!readFixedU8(&byte) || (byte & (unsigned(-1) << remainderBits))) return false; *out = u | (UInt(byte) << numBitsInSevens); return true; } template <typename SInt> MOZ_MUST_USE bool readVarS(SInt* out) { const unsigned numBits = sizeof(SInt) * CHAR_BIT; const unsigned remainderBits = numBits % 7; const unsigned numBitsInSevens = numBits - remainderBits; SInt s = 0; uint8_t byte; unsigned shift = 0; do { if (!readFixedU8(&byte)) return false; s |= SInt(byte & 0x7f) << shift; shift += 7; if (!(byte & 0x80)) { if (byte & 0x40) s |= SInt(-1) << shift; *out = s; return true; } } while (shift < numBitsInSevens); if (!remainderBits || !readFixedU8(&byte) || (byte & 0x80)) return false; uint8_t mask = 0x7f & (uint8_t(-1) << remainderBits); if ((byte & mask) != ((byte & (1 << (remainderBits - 1))) ? mask : 0)) return false; *out = s | SInt(byte) << shift; return true; } public: Decoder(const uint8_t* begin, const uint8_t* end, UniqueChars* error) : beg_(begin), end_(end), cur_(begin), error_(error) { MOZ_ASSERT(begin <= end); } explicit Decoder(const Bytes& bytes, UniqueChars* error = nullptr) : beg_(bytes.begin()), end_(bytes.end()), cur_(bytes.begin()), error_(error) {} bool fail(const char* msg, ...) MOZ_FORMAT_PRINTF(2, 3); bool fail(UniqueChars msg); void clearError() { if (error_) error_->reset(); } bool done() const { MOZ_ASSERT(cur_ <= end_); return cur_ == end_; } size_t bytesRemain() const { MOZ_ASSERT(end_ >= cur_); return size_t(end_ - cur_); } // pos must be a value previously returned from currentPosition. void rollbackPosition(const uint8_t* pos) { cur_ = pos; } const uint8_t* currentPosition() const { return cur_; } size_t currentOffset() const { return cur_ - beg_; } const uint8_t* begin() const { return beg_; } // Fixed-size encoding operations simply copy the literal bytes (without // attempting to align). MOZ_MUST_USE bool readFixedU8(uint8_t* i) { return read<uint8_t>(i); } MOZ_MUST_USE bool readFixedU32(uint32_t* u) { return read<uint32_t>(u); } MOZ_MUST_USE bool readFixedF32(RawF32* f) { uint32_t u; if (!read<uint32_t>(&u)) return false; *f = RawF32::fromBits(u); return true; } MOZ_MUST_USE bool readFixedF64(RawF64* d) { uint64_t u; if (!read<uint64_t>(&u)) return false; *d = RawF64::fromBits(u); return true; } MOZ_MUST_USE bool readFixedI8x16(I8x16* i8x16) { return read<I8x16>(i8x16); } MOZ_MUST_USE bool readFixedI16x8(I16x8* i16x8) { return read<I16x8>(i16x8); } MOZ_MUST_USE bool readFixedI32x4(I32x4* i32x4) { return read<I32x4>(i32x4); } MOZ_MUST_USE bool readFixedF32x4(F32x4* f32x4) { return read<F32x4>(f32x4); } // Variable-length encodings that all use LEB128. MOZ_MUST_USE bool readVarU32(uint32_t* out) { return readVarU<uint32_t>(out); } MOZ_MUST_USE bool readVarS32(int32_t* out) { return readVarS<int32_t>(out); } MOZ_MUST_USE bool readVarU64(uint64_t* out) { return readVarU<uint64_t>(out); } MOZ_MUST_USE bool readVarS64(int64_t* out) { return readVarS<int64_t>(out); } MOZ_MUST_USE bool readValType(uint8_t* type) { static_assert(uint8_t(TypeCode::Limit) <= UINT8_MAX, "fits"); return readFixedU8(type); } MOZ_MUST_USE bool readBlockType(uint8_t* type) { static_assert(size_t(TypeCode::Limit) <= UINT8_MAX, "fits"); return readFixedU8(type); } MOZ_MUST_USE bool readOp(uint16_t* op) { static_assert(size_t(Op::Limit) <= 2 * UINT8_MAX, "fits"); uint8_t u8; if (!readFixedU8(&u8)) return false; if (MOZ_LIKELY(u8 != UINT8_MAX)) { *op = u8; return true; } if (!readFixedU8(&u8)) return false; *op = uint16_t(u8) + UINT8_MAX; return true; } // See writeBytes comment. MOZ_MUST_USE bool readBytes(uint32_t numBytes, const uint8_t** bytes = nullptr) { if (bytes) *bytes = cur_; if (bytesRemain() < numBytes) return false; cur_ += numBytes; return true; } // See "section" description in Encoder. static const uint32_t NotStarted = UINT32_MAX; MOZ_MUST_USE bool startSection(SectionId id, uint32_t* startOffset, uint32_t* size, const char* sectionName) { const uint8_t* const before = cur_; const uint8_t* beforeId = before; uint32_t idValue; if (!readVarU32(&idValue)) goto backup; while (idValue != uint32_t(id)) { if (idValue != uint32_t(SectionId::UserDefined)) goto backup; // Rewind to the section id since skipUserDefinedSection expects it. cur_ = beforeId; if (!skipUserDefinedSection()) return false; beforeId = cur_; if (!readVarU32(&idValue)) goto backup; } if (!readVarU32(size)) goto fail; if (bytesRemain() < *size) goto fail; *startOffset = cur_ - beg_; return true; backup: cur_ = before; *startOffset = NotStarted; return true; fail: return fail("failed to start %s section", sectionName); } MOZ_MUST_USE bool finishSection(uint32_t startOffset, uint32_t size, const char* sectionName) { if (size != (cur_ - beg_) - startOffset) return fail("byte size mismatch in %s section", sectionName); return true; } // "User sections" do not cause validation errors unless the error is in // the user-defined section header itself. MOZ_MUST_USE bool startUserDefinedSection(const char* expectedId, size_t expectedIdSize, uint32_t* sectionStart, uint32_t* sectionSize) { const uint8_t* const before = cur_; while (true) { if (!startSection(SectionId::UserDefined, sectionStart, sectionSize, "user-defined")) return false; if (*sectionStart == NotStarted) { cur_ = before; return true; } uint32_t idSize; if (!readVarU32(&idSize)) goto fail; if (idSize > bytesRemain() || currentOffset() + idSize > *sectionStart + *sectionSize) goto fail; if (expectedId && (expectedIdSize != idSize || !!memcmp(cur_, expectedId, idSize))) { finishUserDefinedSection(*sectionStart, *sectionSize); continue; } cur_ += idSize; return true; } MOZ_CRASH("unreachable"); fail: return fail("failed to start user-defined section"); } template <size_t IdSizeWith0> MOZ_MUST_USE bool startUserDefinedSection(const char (&id)[IdSizeWith0], uint32_t* sectionStart, uint32_t* sectionSize) { MOZ_ASSERT(id[IdSizeWith0 - 1] == '\0'); return startUserDefinedSection(id, IdSizeWith0 - 1, sectionStart, sectionSize); } void finishUserDefinedSection(uint32_t sectionStart, uint32_t sectionSize) { MOZ_ASSERT(cur_ >= beg_); MOZ_ASSERT(cur_ <= end_); cur_ = (beg_ + sectionStart) + sectionSize; MOZ_ASSERT(cur_ <= end_); clearError(); } MOZ_MUST_USE bool skipUserDefinedSection() { uint32_t sectionStart, sectionSize; if (!startUserDefinedSection(nullptr, 0, §ionStart, §ionSize)) return false; if (sectionStart == NotStarted) return fail("expected user-defined section"); finishUserDefinedSection(sectionStart, sectionSize); return true; } // The infallible "unchecked" decoding functions can be used when we are // sure that the bytes are well-formed (by construction or due to previous // validation). uint8_t uncheckedReadFixedU8() { return uncheckedRead<uint8_t>(); } uint32_t uncheckedReadFixedU32() { return uncheckedRead<uint32_t>(); } RawF32 uncheckedReadFixedF32() { return RawF32::fromBits(uncheckedRead<uint32_t>()); } RawF64 uncheckedReadFixedF64() { return RawF64::fromBits(uncheckedRead<uint64_t>()); } template <typename UInt> UInt uncheckedReadVarU() { static const unsigned numBits = sizeof(UInt) * CHAR_BIT; static const unsigned remainderBits = numBits % 7; static const unsigned numBitsInSevens = numBits - remainderBits; UInt decoded = 0; uint32_t shift = 0; do { uint8_t byte = *cur_++; if (!(byte & 0x80)) return decoded | (UInt(byte) << shift); decoded |= UInt(byte & 0x7f) << shift; shift += 7; } while (shift != numBitsInSevens); uint8_t byte = *cur_++; MOZ_ASSERT(!(byte & 0xf0)); return decoded | (UInt(byte) << numBitsInSevens); } uint32_t uncheckedReadVarU32() { return uncheckedReadVarU<uint32_t>(); } int32_t uncheckedReadVarS32() { int32_t i32 = 0; MOZ_ALWAYS_TRUE(readVarS32(&i32)); return i32; } uint64_t uncheckedReadVarU64() { return uncheckedReadVarU<uint64_t>(); } int64_t uncheckedReadVarS64() { int64_t i64 = 0; MOZ_ALWAYS_TRUE(readVarS64(&i64)); return i64; } ValType uncheckedReadValType() { return (ValType)uncheckedReadFixedU8(); } Op uncheckedReadOp() { static_assert(size_t(Op::Limit) <= 2 * UINT8_MAX, "fits"); uint8_t u8 = uncheckedReadFixedU8(); return u8 != UINT8_MAX ? Op(u8) : Op(uncheckedReadFixedU8() + UINT8_MAX); } void uncheckedReadFixedI8x16(I8x16* i8x16) { struct T { I8x16 v; }; T t = uncheckedRead<T>(); memcpy(i8x16, &t, sizeof(t)); } void uncheckedReadFixedI16x8(I16x8* i16x8) { struct T { I16x8 v; }; T t = uncheckedRead<T>(); memcpy(i16x8, &t, sizeof(t)); } void uncheckedReadFixedI32x4(I32x4* i32x4) { struct T { I32x4 v; }; T t = uncheckedRead<T>(); memcpy(i32x4, &t, sizeof(t)); } void uncheckedReadFixedF32x4(F32x4* f32x4) { struct T { F32x4 v; }; T t = uncheckedRead<T>(); memcpy(f32x4, &t, sizeof(t)); } }; // Reusable macro encoding/decoding functions reused by both the two // encoders (AsmJS/WasmTextToBinary) and all the decoders // (WasmCompile/WasmIonCompile/WasmBaselineCompile/WasmBinaryToText). // Misc helpers. UniqueChars DecodeName(Decoder& d); MOZ_MUST_USE bool DecodeTableLimits(Decoder& d, TableDescVector* tables); MOZ_MUST_USE bool GlobalIsJSCompatible(Decoder& d, ValType type, bool isMutable); MOZ_MUST_USE bool EncodeLocalEntries(Encoder& d, const ValTypeVector& locals); MOZ_MUST_USE bool DecodeLocalEntries(Decoder& d, ModuleKind kind, ValTypeVector* locals); MOZ_MUST_USE bool DecodeGlobalType(Decoder& d, ValType* type, bool* isMutable); MOZ_MUST_USE bool DecodeInitializerExpression(Decoder& d, const GlobalDescVector& globals, ValType expected, InitExpr* init); MOZ_MUST_USE bool DecodeLimits(Decoder& d, Limits* limits); MOZ_MUST_USE bool DecodeMemoryLimits(Decoder& d, bool hasMemory, Limits* memory); // Section macros. MOZ_MUST_USE bool DecodePreamble(Decoder& d); MOZ_MUST_USE bool DecodeTypeSection(Decoder& d, SigWithIdVector* sigs); MOZ_MUST_USE bool DecodeImportSection(Decoder& d, const SigWithIdVector& sigs, Uint32Vector* funcSigIndices, GlobalDescVector* globals, TableDescVector* tables, Maybe<Limits>* memory, ImportVector* imports); MOZ_MUST_USE bool DecodeFunctionSection(Decoder& d, const SigWithIdVector& sigs, size_t numImportedFunc, Uint32Vector* funcSigIndexes); MOZ_MUST_USE bool DecodeUnknownSections(Decoder& d); MOZ_MUST_USE bool DecodeDataSection(Decoder& d, bool usesMemory, uint32_t minMemoryByteLength, const GlobalDescVector& globals, DataSegmentVector* segments); MOZ_MUST_USE bool DecodeMemorySection(Decoder& d, bool hasMemory, Limits* memory, bool* present); } // namespace wasm } // namespace js #endif // wasm_binary_format_h