diff options
Diffstat (limited to 'db/mork/src/morkParser.cpp')
-rw-r--r-- | db/mork/src/morkParser.cpp | 1568 |
1 files changed, 1568 insertions, 0 deletions
diff --git a/db/mork/src/morkParser.cpp b/db/mork/src/morkParser.cpp new file mode 100644 index 000000000..667a597fd --- /dev/null +++ b/db/mork/src/morkParser.cpp @@ -0,0 +1,1568 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MDB_ +#include "mdb.h" +#endif + +#ifndef _MORK_ +#include "mork.h" +#endif + +#ifndef _MORKNODE_ +#include "morkNode.h" +#endif + +#ifndef _MORKMAP_ +#include "morkMap.h" +#endif + +#ifndef _MORKENV_ +#include "morkEnv.h" +#endif + +#ifndef _MORKPARSER_ +#include "morkParser.h" +#endif + +#ifndef _MORKSTREAM_ +#include "morkStream.h" +#endif + +#ifndef _MORKBLOB_ +#include "morkBlob.h" +#endif + +#ifndef _MORKSINK_ +#include "morkSink.h" +#endif + +#ifndef _MORKCH_ +#include "morkCh.h" +#endif + +#ifndef _MORKSTORE_ +#include "morkStore.h" +#endif + +//3456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789 + +// ````` ````` ````` ````` ````` +// { ===== begin morkNode interface ===== + +/*public virtual*/ void +morkParser::CloseMorkNode(morkEnv* ev) // CloseParser() only if open +{ + if ( this->IsOpenNode() ) + { + this->MarkClosing(); + this->CloseParser(ev); + this->MarkShut(); + } +} + +/*public virtual*/ +morkParser::~morkParser() // assert CloseParser() executed earlier +{ + MORK_ASSERT(mParser_Heap==0); + MORK_ASSERT(mParser_Stream==0); +} + +/*public non-poly*/ +morkParser::morkParser(morkEnv* ev, + const morkUsage& inUsage, nsIMdbHeap* ioHeap, + morkStream* ioStream, mdb_count inBytesPerParseSegment, + nsIMdbHeap* ioSlotHeap) +: morkNode(ev, inUsage, ioHeap) +, mParser_Heap( 0 ) +, mParser_Stream( 0 ) +, mParser_MoreGranularity( inBytesPerParseSegment ) +, mParser_State( morkParser_kStartState ) + +, mParser_GroupContentStartPos( 0 ) + +, mParser_TableMid( ) +, mParser_RowMid( ) +, mParser_CellMid( ) + +, mParser_InPort( morkBool_kFalse ) +, mParser_InDict( morkBool_kFalse ) +, mParser_InCell( morkBool_kFalse ) +, mParser_InMeta( morkBool_kFalse ) + +, mParser_InPortRow( morkBool_kFalse ) +, mParser_InRow( morkBool_kFalse ) +, mParser_InTable( morkBool_kFalse ) +, mParser_InGroup( morkBool_kFalse ) + +, mParser_AtomChange( morkChange_kNil ) +, mParser_CellChange( morkChange_kNil ) +, mParser_RowChange( morkChange_kNil ) +, mParser_TableChange( morkChange_kNil ) + +, mParser_Change( morkChange_kNil ) +, mParser_IsBroken( morkBool_kFalse ) +, mParser_IsDone( morkBool_kFalse ) +, mParser_DoMore( morkBool_kTrue ) + +, mParser_Mid() + +, mParser_ScopeCoil(ev, ioSlotHeap) +, mParser_ValueCoil(ev, ioSlotHeap) +, mParser_ColumnCoil(ev, ioSlotHeap) +, mParser_StringCoil(ev, ioSlotHeap) + +, mParser_ScopeSpool(ev, &mParser_ScopeCoil) +, mParser_ValueSpool(ev, &mParser_ValueCoil) +, mParser_ColumnSpool(ev, &mParser_ColumnCoil) +, mParser_StringSpool(ev, &mParser_StringCoil) + +, mParser_MidYarn(ev, morkUsage(morkUsage_kMember), ioSlotHeap) +{ + if ( inBytesPerParseSegment < morkParser_kMinGranularity ) + inBytesPerParseSegment = morkParser_kMinGranularity; + else if ( inBytesPerParseSegment > morkParser_kMaxGranularity ) + inBytesPerParseSegment = morkParser_kMaxGranularity; + + mParser_MoreGranularity = inBytesPerParseSegment; + + if ( ioSlotHeap && ioStream ) + { + nsIMdbHeap_SlotStrongHeap(ioSlotHeap, ev, &mParser_Heap); + morkStream::SlotStrongStream(ioStream, ev, &mParser_Stream); + + if ( ev->Good() ) + { + mParser_Tag = morkParser_kTag; + mNode_Derived = morkDerived_kParser; + } + } + else + ev->NilPointerError(); +} + +/*public non-poly*/ void +morkParser::CloseParser(morkEnv* ev) // called by CloseMorkNode(); +{ + if ( this->IsNode() ) + { + if ( !this->IsShutNode() ) + { + mParser_ScopeCoil.CloseCoil(ev); + mParser_ValueCoil.CloseCoil(ev); + mParser_ColumnCoil.CloseCoil(ev); + mParser_StringCoil.CloseCoil(ev); + nsIMdbHeap_SlotStrongHeap((nsIMdbHeap*) 0, ev, &mParser_Heap); + morkStream::SlotStrongStream((morkStream*) 0, ev, &mParser_Stream); + this->MarkShut(); + } + } + else + this->NonNodeError(ev); +} + +// } ===== end morkNode methods ===== +// ````` ````` ````` ````` ````` + +/*protected non-poly*/ void +morkParser::NonGoodParserError(morkEnv* ev) // when GoodParserTag() is false +{ + ev->NewError("non-morkNode"); +} + +/*protected non-poly*/ void +morkParser::NonUsableParserError(morkEnv* ev) // +{ + if ( this->IsNode() ) + { + if ( this->IsOpenNode() ) + { + if ( this->GoodParserTag() ) + { + // okay + } + else + this->NonGoodParserError(ev); + } + else + this->NonOpenNodeError(ev); + } + else + this->NonNodeError(ev); +} + + +/*protected non-poly*/ void +morkParser::StartParse(morkEnv* ev) +{ + MORK_USED_1(ev); + mParser_InCell = morkBool_kFalse; + mParser_InMeta = morkBool_kFalse; + mParser_InDict = morkBool_kFalse; + mParser_InPortRow = morkBool_kFalse; + + mParser_RowMid.ClearMid(); + mParser_TableMid.ClearMid(); + mParser_CellMid.ClearMid(); + + mParser_GroupId = 0; + mParser_InPort = morkBool_kTrue; + + mParser_GroupSpan.ClearSpan(); + mParser_DictSpan.ClearSpan(); + mParser_AliasSpan.ClearSpan(); + mParser_MetaSpan.ClearSpan(); + mParser_TableSpan.ClearSpan(); + mParser_RowSpan.ClearSpan(); + mParser_CellSpan.ClearSpan(); + mParser_ColumnSpan.ClearSpan(); + mParser_SlotSpan.ClearSpan(); + + mParser_PortSpan.ClearSpan(); +} + +/*protected non-poly*/ void +morkParser::StopParse(morkEnv* ev) +{ + if ( mParser_InCell ) + { + mParser_InCell = morkBool_kFalse; + mParser_CellSpan.SetEndWithEnd(mParser_PortSpan); + this->OnCellEnd(ev, mParser_CellSpan); + } + if ( mParser_InMeta ) + { + mParser_InMeta = morkBool_kFalse; + mParser_MetaSpan.SetEndWithEnd(mParser_PortSpan); + this->OnMetaEnd(ev, mParser_MetaSpan); + } + if ( mParser_InDict ) + { + mParser_InDict = morkBool_kFalse; + mParser_DictSpan.SetEndWithEnd(mParser_PortSpan); + this->OnDictEnd(ev, mParser_DictSpan); + } + if ( mParser_InPortRow ) + { + mParser_InPortRow = morkBool_kFalse; + mParser_RowSpan.SetEndWithEnd(mParser_PortSpan); + this->OnPortRowEnd(ev, mParser_RowSpan); + } + if ( mParser_InRow ) + { + mParser_InRow = morkBool_kFalse; + mParser_RowMid.ClearMid(); + mParser_RowSpan.SetEndWithEnd(mParser_PortSpan); + this->OnRowEnd(ev, mParser_RowSpan); + } + if ( mParser_InTable ) + { + mParser_InTable = morkBool_kFalse; + mParser_TableMid.ClearMid(); + mParser_TableSpan.SetEndWithEnd(mParser_PortSpan); + this->OnTableEnd(ev, mParser_TableSpan); + } + if ( mParser_GroupId ) + { + mParser_GroupId = 0; + mParser_GroupSpan.SetEndWithEnd(mParser_PortSpan); + this->OnGroupAbortEnd(ev, mParser_GroupSpan); + } + if ( mParser_InPort ) + { + mParser_InPort = morkBool_kFalse; + this->OnPortEnd(ev, mParser_PortSpan); + } +} + +int morkParser::eat_comment(morkEnv* ev) // last char was '/' +{ + morkStream* s = mParser_Stream; + // Note morkStream::Getc() returns EOF when an error occurs, so + // we don't need to check for both c != EOF and ev->Good() below. + + int c = s->Getc(ev); + if ( c == '/' ) // C++ style comment? + { + while ( (c = s->Getc(ev)) != EOF && c != 0xA && c != 0xD ) + /* empty */; + + if ( c == 0xA || c == 0xD ) + c = this->eat_line_break(ev, c); + } + else if ( c == '*' ) /* C style comment? */ + { + int depth = 1; // count depth of comments until depth reaches zero + + while ( depth > 0 && c != EOF ) // still looking for comment end(s)? + { + while ( (c = s->Getc(ev)) != EOF && c != '/' && c != '*' ) + { + if ( c == 0xA || c == 0xD ) // need to count a line break? + { + c = this->eat_line_break(ev, c); + if ( c == '/' || c == '*' ) + break; // end while loop + } + } + + if ( c == '*' ) // maybe end of a comment, if next char is '/'? + { + if ( (c = s->Getc(ev)) == '/' ) // end of comment? + { + --depth; // depth of comments has decreased by one + if ( !depth ) // comments all done? + c = s->Getc(ev); // return the byte after end of comment + } + else if ( c != EOF ) // need to put the char back? + s->Ungetc(c); // especially need to put back '*', 0xA, or 0xD + } + else if ( c == '/' ) // maybe nested comemnt, if next char is '*'? + { + if ( (c = s->Getc(ev)) == '*' ) // nested comment? + ++depth; // depth of comments has increased by one + else if ( c != EOF ) // need to put the char back? + s->Ungetc(c); // especially need to put back '/', 0xA, or 0xD + } + + if ( ev->Bad() ) + c = EOF; + } + if ( c == EOF && depth > 0 ) + ev->NewWarning("EOF before end of comment"); + } + else + ev->NewWarning("expected / or *"); + + return c; +} + +int morkParser::eat_line_break(morkEnv* ev, int inLast) +{ + morkStream* s = mParser_Stream; + int c = s->Getc(ev); // get next char after 0xA or 0xD + this->CountLineBreak(); + if ( c == 0xA || c == 0xD ) // another line break character? + { + if ( c != inLast ) // not the same as the last one? + c = s->Getc(ev); // get next char after two-byte linebreak + } + return c; +} + +int morkParser::eat_line_continue(morkEnv* ev) // last char was '\' +{ + morkStream* s = mParser_Stream; + int c = s->Getc(ev); + if ( c == 0xA || c == 0xD ) // linebreak follows \ as expected? + { + c = this->eat_line_break(ev, c); + } + else + ev->NewWarning("expected linebreak"); + + return c; +} + +int morkParser::NextChar(morkEnv* ev) // next non-white content +{ + morkStream* s = mParser_Stream; + int c = s->Getc(ev); + while ( c > 0 && ev->Good() ) + { + if ( c == '/' ) + c = this->eat_comment(ev); + else if ( c == 0xA || c == 0xD ) + c = this->eat_line_break(ev, c); + else if ( c == '\\' ) + c = this->eat_line_continue(ev); + else if ( morkCh_IsWhite(c) ) + c = s->Getc(ev); + else + break; // end while loop when return c is acceptable + } + if ( ev->Bad() ) + { + mParser_State = morkParser_kBrokenState; + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + mParser_IsBroken = morkBool_kTrue; + c = EOF; + } + else if ( c == EOF ) + { + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + } + return c; +} + +void +morkParser::OnCellState(morkEnv* ev) +{ + ev->StubMethodOnlyError(); +} + +void +morkParser::OnMetaState(morkEnv* ev) +{ + ev->StubMethodOnlyError(); +} + +void +morkParser::OnRowState(morkEnv* ev) +{ + ev->StubMethodOnlyError(); +} + +void +morkParser::OnTableState(morkEnv* ev) +{ + ev->StubMethodOnlyError(); +} + +void +morkParser::OnDictState(morkEnv* ev) +{ + ev->StubMethodOnlyError(); +} + +morkBuf* morkParser::ReadName(morkEnv* ev, int c) +{ + morkBuf* outBuf = 0; + + if ( !morkCh_IsName(c) ) + ev->NewError("not a name char"); + + morkCoil* coil = &mParser_ColumnCoil; + coil->ClearBufFill(); + + morkSpool* spool = &mParser_ColumnSpool; + spool->Seek(ev, /*pos*/ 0); + + if ( ev->Good() ) + { + spool->Putc(ev, c); + + morkStream* s = mParser_Stream; + while ( (c = s->Getc(ev)) != EOF && morkCh_IsMore(c) && ev->Good() ) + spool->Putc(ev, c); + + if ( ev->Good() ) + { + if ( c != EOF ) + { + s->Ungetc(c); + spool->FlushSink(ev); // update coil->mBuf_Fill + } + else + this->UnexpectedEofError(ev); + + if ( ev->Good() ) + outBuf = coil; + } + } + return outBuf; +} + +mork_bool +morkParser::ReadMid(morkEnv* ev, morkMid* outMid) +{ + outMid->ClearMid(); + + morkStream* s = mParser_Stream; + int next; + outMid->mMid_Oid.mOid_Id = this->ReadHex(ev, &next); + int c = next; + if ( c == ':' ) + { + if ( (c = s->Getc(ev)) != EOF && ev->Good() ) + { + if ( c == '^' ) + { + outMid->mMid_Oid.mOid_Scope = this->ReadHex(ev, &next); + if ( ev->Good() ) + s->Ungetc(next); + } + else if ( morkCh_IsName(c) ) + { + outMid->mMid_Buf = this->ReadName(ev, c); + } + else + ev->NewError("expected name or hex after ':' following ID"); + } + + if ( c == EOF && ev->Good() ) + this->UnexpectedEofError(ev); + } + else + s->Ungetc(c); + + return ev->Good(); +} + +void +morkParser::ReadCell(morkEnv* ev) +{ + mParser_CellMid.ClearMid(); + // this->StartSpanOnLastByte(ev, &mParser_CellSpan); + morkMid* cellMid = 0; // if mid syntax is used for column + morkBuf* cellBuf = 0; // if naked string is used for column + + morkStream* s = mParser_Stream; + int c; + if ( (c = s->Getc(ev)) != EOF && ev->Good() ) + { + // this->StartSpanOnLastByte(ev, &mParser_ColumnSpan); + if ( c == '^' ) + { + cellMid = &mParser_CellMid; + this->ReadMid(ev, cellMid); + // if ( !mParser_CellMid.mMid_Oid.mOid_Scope ) + // mParser_CellMid.mMid_Oid.mOid_Scope = (mork_scope) 'c'; + } + else + { + if (mParser_InMeta && c == morkStore_kFormColumn) + { + ReadCellForm(ev, c); + return; + } + else + cellBuf = this->ReadName(ev, c); + } + if ( ev->Good() ) + { + // this->EndSpanOnThisByte(ev, &mParser_ColumnSpan); + + mParser_InCell = morkBool_kTrue; + this->OnNewCell(ev, *mParser_CellSpan.AsPlace(), + cellMid, cellBuf); // , mParser_CellChange + + mParser_CellChange = morkChange_kNil; + if ( (c = this->NextChar(ev)) != EOF && ev->Good() ) + { + // this->StartSpanOnLastByte(ev, &mParser_SlotSpan); + if ( c == '=' ) + { + morkBuf* buf = this->ReadValue(ev); + if ( buf ) + { + // this->EndSpanOnThisByte(ev, &mParser_SlotSpan); + this->OnValue(ev, mParser_SlotSpan, *buf); + } + } + else if ( c == '^' ) + { + if ( this->ReadMid(ev, &mParser_Mid) ) + { + // this->EndSpanOnThisByte(ev, &mParser_SlotSpan); + if ( (c = this->NextChar(ev)) != EOF && ev->Good() ) + { + if ( c != ')' ) + ev->NewError("expected ')' after cell ^ID value"); + } + else if ( c == EOF ) + this->UnexpectedEofError(ev); + + if ( ev->Good() ) + this->OnValueMid(ev, mParser_SlotSpan, mParser_Mid); + } + } + else if ( c == 'r' || c == 't' || c == '"' || c == '\'' ) + { + ev->NewError("cell syntax not yet supported"); + } + else + { + ev->NewError("unknown cell syntax"); + } + } + + // this->EndSpanOnThisByte(ev, &mParser_CellSpan); + mParser_InCell = morkBool_kFalse; + this->OnCellEnd(ev, mParser_CellSpan); + } + } + mParser_CellChange = morkChange_kNil; + + if ( c == EOF && ev->Good() ) + this->UnexpectedEofError(ev); +} + +void morkParser::ReadRowPos(morkEnv* ev) +{ + int c; // next character + mork_pos rowPos = this->ReadHex(ev, &c); + + if ( ev->Good() && c != EOF ) // should put back byte after hex? + mParser_Stream->Ungetc(c); + + this->OnRowPos(ev, rowPos); +} + +void morkParser::ReadRow(morkEnv* ev, int c) +// zm:Row ::= zm:S? '[' zm:S? zm:Id zm:RowItem* zm:S? ']' +// zm:RowItem ::= zm:MetaRow | zm:Cell +// zm:MetaRow ::= zm:S? '[' zm:S? zm:Cell* zm:S? ']' /* meta attributes */ +// zm:Cell ::= zm:S? '(' zm:Column zm:S? zm:Slot? ')' +{ + if ( ev->Good() ) + { + // this->StartSpanOnLastByte(ev, &mParser_RowSpan); + if ( mParser_Change ) + mParser_RowChange = mParser_Change; + + mork_bool cutAllRowCols = morkBool_kFalse; + + if ( c == '[' ) + { + if ( ( c = this->NextChar(ev) ) == '-' ) + cutAllRowCols = morkBool_kTrue; + else if ( ev->Good() && c != EOF ) + mParser_Stream->Ungetc(c); + + if ( this->ReadMid(ev, &mParser_RowMid) ) + { + mParser_InRow = morkBool_kTrue; + this->OnNewRow(ev, *mParser_RowSpan.AsPlace(), + mParser_RowMid, cutAllRowCols); + + mParser_Change = mParser_RowChange = morkChange_kNil; + + while ( (c = this->NextChar(ev)) != EOF && ev->Good() && c != ']' ) + { + switch ( c ) + { + case '(': // cell + this->ReadCell(ev); + break; + + case '[': // meta + this->ReadMeta(ev, ']'); + break; + + // case '+': // plus + // mParser_CellChange = morkChange_kAdd; + // break; + + case '-': // minus + // mParser_CellChange = morkChange_kCut; + this->OnMinusCell(ev); + break; + + // case '!': // bang + // mParser_CellChange = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in row"); + break; + } // switch + } // while + + if ( ev->Good() ) + { + if ( (c = this->NextChar(ev)) == '!' ) + this->ReadRowPos(ev); + else if ( c != EOF && ev->Good() ) + mParser_Stream->Ungetc(c); + } + + // this->EndSpanOnThisByte(ev, &mParser_RowSpan); + mParser_InRow = morkBool_kFalse; + this->OnRowEnd(ev, mParser_RowSpan); + + } // if ReadMid + } // if '[' + + else // c != '[' + { + morkStream* s = mParser_Stream; + s->Ungetc(c); + if ( this->ReadMid(ev, &mParser_RowMid) ) + { + mParser_InRow = morkBool_kTrue; + this->OnNewRow(ev, *mParser_RowSpan.AsPlace(), + mParser_RowMid, cutAllRowCols); + + mParser_Change = mParser_RowChange = morkChange_kNil; + + if ( ev->Good() ) + { + if ( (c = this->NextChar(ev)) == '!' ) + this->ReadRowPos(ev); + else if ( c != EOF && ev->Good() ) + s->Ungetc(c); + } + + // this->EndSpanOnThisByte(ev, &mParser_RowSpan); + mParser_InRow = morkBool_kFalse; + this->OnRowEnd(ev, mParser_RowSpan); + } + } + } + + if ( ev->Bad() ) + mParser_State = morkParser_kBrokenState; + else if ( c == EOF ) + mParser_State = morkParser_kDoneState; +} + +void morkParser::ReadTable(morkEnv* ev) +// zm:Table ::= zm:S? '{' zm:S? zm:Id zm:TableItem* zm:S? '}' +// zm:TableItem ::= zm:MetaTable | zm:RowRef | zm:Row +// zm:MetaTable ::= zm:S? '{' zm:S? zm:Cell* zm:S? '}' /* meta attributes */ +{ + // this->StartSpanOnLastByte(ev, &mParser_TableSpan); + + if ( mParser_Change ) + mParser_TableChange = mParser_Change; + + mork_bool cutAllTableRows = morkBool_kFalse; + + int c = this->NextChar(ev); + if ( c == '-' ) + cutAllTableRows = morkBool_kTrue; + else if ( ev->Good() && c != EOF ) + mParser_Stream->Ungetc(c); + + if ( ev->Good() && this->ReadMid(ev, &mParser_TableMid) ) + { + mParser_InTable = morkBool_kTrue; + this->OnNewTable(ev, *mParser_TableSpan.AsPlace(), + mParser_TableMid, cutAllTableRows); + + mParser_Change = mParser_TableChange = morkChange_kNil; + + while ( (c = this->NextChar(ev)) != EOF && ev->Good() && c != '}' ) + { + if ( morkCh_IsHex(c) ) + { + this->ReadRow(ev, c); + } + else + { + switch ( c ) + { + case '[': // row + this->ReadRow(ev, '['); + break; + + case '{': // meta + this->ReadMeta(ev, '}'); + break; + + // case '+': // plus + // mParser_RowChange = morkChange_kAdd; + // break; + + case '-': // minus + // mParser_RowChange = morkChange_kCut; + this->OnMinusRow(ev); + break; + + // case '!': // bang + // mParser_RowChange = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in table"); + break; + } + } + } + + // this->EndSpanOnThisByte(ev, &mParser_TableSpan); + mParser_InTable = morkBool_kFalse; + this->OnTableEnd(ev, mParser_TableSpan); + + if ( ev->Bad() ) + mParser_State = morkParser_kBrokenState; + else if ( c == EOF ) + mParser_State = morkParser_kDoneState; + } +} + +mork_id morkParser::ReadHex(morkEnv* ev, int* outNextChar) +// zm:Hex ::= [0-9a-fA-F] /* a single hex digit */ +// zm:Hex+ ::= zm:Hex | zm:Hex zm:Hex+ +{ + mork_id hex = 0; + + morkStream* s = mParser_Stream; + int c = this->NextChar(ev); + + if ( ev->Good() ) + { + if ( c != EOF ) + { + if ( morkCh_IsHex(c) ) + { + do + { + if ( morkCh_IsDigit(c) ) // '0' through '9'? + c -= '0'; + else if ( morkCh_IsUpper(c) ) // 'A' through 'F'? + c -= ('A' - 10) ; // c = (c - 'A') + 10; + else // 'a' through 'f'? + c -= ('a' - 10) ; // c = (c - 'a') + 10; + + hex = (hex << 4) + c; + } + while ( (c = s->Getc(ev)) != EOF && ev->Good() && morkCh_IsHex(c) ); + } + else + this->ExpectedHexDigitError(ev, c); + } + } + if ( c == EOF ) + this->EofInsteadOfHexError(ev); + + *outNextChar = c; + return hex; +} + +/*static*/ void +morkParser::EofInsteadOfHexError(morkEnv* ev) +{ + ev->NewWarning("eof instead of hex"); +} + +/*static*/ void +morkParser::ExpectedHexDigitError(morkEnv* ev, int c) +{ + MORK_USED_1(c); + ev->NewWarning("expected hex digit"); +} + +/*static*/ void +morkParser::ExpectedEqualError(morkEnv* ev) +{ + ev->NewWarning("expected '='"); +} + +/*static*/ void +morkParser::UnexpectedEofError(morkEnv* ev) +{ + ev->NewWarning("unexpected eof"); +} + + +morkBuf* morkParser::ReadValue(morkEnv* ev) +{ + morkBuf* outBuf = 0; + + morkCoil* coil = &mParser_ValueCoil; + coil->ClearBufFill(); + + morkSpool* spool = &mParser_ValueSpool; + spool->Seek(ev, /*pos*/ 0); + + if ( ev->Good() ) + { + morkStream* s = mParser_Stream; + int c; + while ( (c = s->Getc(ev)) != EOF && c != ')' && ev->Good() ) + { + if ( c == '\\' ) // next char is escaped by '\'? + { + if ( (c = s->Getc(ev)) == 0xA || c == 0xD ) // linebreak after \? + { + c = this->eat_line_break(ev, c); + if ( c == ')' || c == '\\' || c == '$' ) + { + s->Ungetc(c); // just let while loop test read this again + continue; // goto next iteration of while loop + } + } + if ( c == EOF || ev->Bad() ) + break; // end while loop + } + else if ( c == '$' ) // "$" escapes next two hex digits? + { + if ( (c = s->Getc(ev)) != EOF && ev->Good() ) + { + mork_ch first = (mork_ch) c; // first hex digit + if ( (c = s->Getc(ev)) != EOF && ev->Good() ) + { + mork_ch second = (mork_ch) c; // second hex digit + c = ev->HexToByte(first, second); + } + else + break; // end while loop + } + else + break; // end while loop + } + spool->Putc(ev, c); + } + + if ( ev->Good() ) + { + if ( c != EOF ) + spool->FlushSink(ev); // update coil->mBuf_Fill + else + this->UnexpectedEofError(ev); + + if ( ev->Good() ) + outBuf = coil; + } + } + return outBuf; +} + +void morkParser::ReadDictForm(morkEnv *ev) +{ + int nextChar; + nextChar = this->NextChar(ev); + if (nextChar == '(') + { + nextChar = this->NextChar(ev); + if (nextChar == morkStore_kFormColumn) + { + int dictForm; + + nextChar = this->NextChar(ev); + if (nextChar == '=') + { + dictForm = this->NextChar(ev); + nextChar = this->NextChar(ev); + } + else if (nextChar == '^') + { + dictForm = this->ReadHex(ev, &nextChar); + } + else + { + ev->NewWarning("unexpected byte in dict form"); + return; + } + mParser_ValueCoil.mText_Form = dictForm; + if (nextChar == ')') + { + nextChar = this->NextChar(ev); + if (nextChar == '>') + return; + } + } + } + ev->NewWarning("unexpected byte in dict form"); +} + +void morkParser::ReadCellForm(morkEnv *ev, int c) +{ + MORK_ASSERT (c == morkStore_kFormColumn); + int nextChar; + nextChar = this->NextChar(ev); + int cellForm; + + if (nextChar == '=') + { + cellForm = this->NextChar(ev); + nextChar = this->NextChar(ev); + } + else if (nextChar == '^') + { + cellForm = this->ReadHex(ev, &nextChar); + } + else + { + ev->NewWarning("unexpected byte in cell form"); + return; + } + // ### not sure about this. Which form should we set? + // mBuilder_CellForm = mBuilder_RowForm = cellForm; + if (nextChar == ')') + { + OnCellForm(ev, cellForm); + return; + } + ev->NewWarning("unexpected byte in cell form"); +} + +void morkParser::ReadAlias(morkEnv* ev) +// zm:Alias ::= zm:S? '(' ('#')? zm:Hex+ zm:S? zm:Value ')' +// zm:Value ::= '=' ([^)$\] | '\' zm:NonCRLF | zm:Continue | zm:Dollar)* +{ + // this->StartSpanOnLastByte(ev, &mParser_AliasSpan); + + int nextChar; + mork_id hex = this->ReadHex(ev, &nextChar); + int c = nextChar; + + mParser_Mid.ClearMid(); + mParser_Mid.mMid_Oid.mOid_Id = hex; + + if ( morkCh_IsWhite(c) && ev->Good() ) + c = this->NextChar(ev); + + if ( ev->Good() ) + { + if ( c == '<') + { + ReadDictForm(ev); + if (ev->Good()) + c = this->NextChar(ev); + } + if ( c == '=' ) + { + mParser_Mid.mMid_Buf = this->ReadValue(ev); + if ( mParser_Mid.mMid_Buf ) + { + // this->EndSpanOnThisByte(ev, &mParser_AliasSpan); + this->OnAlias(ev, mParser_AliasSpan, mParser_Mid); + // need to reset this somewhere. + mParser_ValueCoil.mText_Form = 0; + + } + } + else + this->ExpectedEqualError(ev); + } +} + +void morkParser::ReadMeta(morkEnv* ev, int inEndMeta) +// zm:MetaDict ::= zm:S? '<' zm:S? zm:Cell* zm:S? '>' /* meta attributes */ +// zm:MetaTable ::= zm:S? '{' zm:S? zm:Cell* zm:S? '}' /* meta attributes */ +// zm:MetaRow ::= zm:S? '[' zm:S? zm:Cell* zm:S? ']' /* meta attributes */ +{ + // this->StartSpanOnLastByte(ev, &mParser_MetaSpan); + mParser_InMeta = morkBool_kTrue; + this->OnNewMeta(ev, *mParser_MetaSpan.AsPlace()); + + mork_bool more = morkBool_kTrue; // until end meta + int c; + while ( more && (c = this->NextChar(ev)) != EOF && ev->Good() ) + { + switch ( c ) + { + case '(': // cell + this->ReadCell(ev); + break; + + case '>': // maybe end meta? + if ( inEndMeta == '>' ) + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case '}': // maybe end meta? + if ( inEndMeta == '}' ) + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case ']': // maybe end meta? + if ( inEndMeta == ']' ) + more = morkBool_kFalse; // stop reading meta + else + this->UnexpectedByteInMetaWarning(ev); + break; + + case '[': // maybe table meta row? + if ( mParser_InTable ) + this->ReadRow(ev, '['); + else + this->UnexpectedByteInMetaWarning(ev); + break; + + default: + if ( mParser_InTable && morkCh_IsHex(c) ) + this->ReadRow(ev, c); + else + this->UnexpectedByteInMetaWarning(ev); + break; + } + } + + // this->EndSpanOnThisByte(ev, &mParser_MetaSpan); + mParser_InMeta = morkBool_kFalse; + this->OnMetaEnd(ev, mParser_MetaSpan); +} + +/*static*/ void +morkParser::UnexpectedByteInMetaWarning(morkEnv* ev) +{ + ev->NewWarning("unexpected byte in meta"); +} + +/*static*/ void +morkParser::NonParserTypeError(morkEnv* ev) +{ + ev->NewError("non morkParser"); +} + +mork_bool morkParser::MatchPattern(morkEnv* ev, const char* inPattern) +{ + // if an error occurs, we want original inPattern in the debugger: + const char* pattern = inPattern; // mutable copy of pointer + morkStream* s = mParser_Stream; + int c; + while ( *pattern && ev->Good() ) + { + char byte = *pattern++; + if ( (c = s->Getc(ev)) != byte ) + { + ev->NewError("byte not in expected pattern"); + } + } + return ev->Good(); +} + +mork_bool morkParser::FindGroupEnd(morkEnv* ev) +{ + mork_bool foundEnd = morkBool_kFalse; + + // char gidBuf[ 64 ]; // to hold hex pattern we want + // (void) ev->TokenAsHex(gidBuf, mParser_GroupId); + + morkStream* s = mParser_Stream; + int c; + + while ( (c = s->Getc(ev)) != EOF && ev->Good() && !foundEnd ) + { + if ( c == '@' ) // maybe start of group ending? + { + // this->EndSpanOnThisByte(ev, &mParser_GroupSpan); + if ( (c = s->Getc(ev)) == '$' ) // '$' follows '@' ? + { + if ( (c = s->Getc(ev)) == '$' ) // '$' follows "@$" ? + { + if ( (c = s->Getc(ev)) == '}' ) + { + foundEnd = this->ReadEndGroupId(ev); + // this->EndSpanOnThisByte(ev, &mParser_GroupSpan); + + } + else + ev->NewError("expected '}' after @$$"); + } + } + if ( !foundEnd && c == '@' ) + s->Ungetc(c); + } + } + + return foundEnd && ev->Good(); +} + +void morkParser::ReadGroup(morkEnv* mev) +{ + nsIMdbEnv *ev = mev->AsMdbEnv(); + int next = 0; + mParser_GroupId = this->ReadHex(mev, &next); + if ( next == '{' ) + { + morkStream* s = mParser_Stream; + int c; + if ( (c = s->Getc(mev)) == '@' ) + { + // we really need the following span inside morkBuilder::OnNewGroup(): + this->StartSpanOnThisByte(mev, &mParser_GroupSpan); + mork_pos startPos = mParser_GroupSpan.mSpan_Start.mPlace_Pos; + + // if ( !store->mStore_FirstCommitGroupPos ) + // store->mStore_FirstCommitGroupPos = startPos; + // else if ( !store->mStore_SecondCommitGroupPos ) + // store->mStore_SecondCommitGroupPos = startPos; + + if ( this->FindGroupEnd(mev) ) + { + mork_pos outPos; + s->Seek(ev, startPos, &outPos); + if ( mev->Good() ) + { + this->OnNewGroup(mev, mParser_GroupSpan.mSpan_Start, + mParser_GroupId); + + this->ReadContent(mev, /*inInsideGroup*/ morkBool_kTrue); + + this->OnGroupCommitEnd(mev, mParser_GroupSpan); + } + } + } + else + mev->NewError("expected '@' after @$${id{"); + } + else + mev->NewError("expected '{' after @$$id"); + +} + +mork_bool morkParser::ReadAt(morkEnv* ev, mork_bool inInsideGroup) +/* groups must be ignored until properly terminated */ +// zm:Group ::= zm:GroupStart zm:Content zm:GroupEnd /* transaction */ +// zm:GroupStart ::= zm:S? '@$${' zm:Hex+ '{@' /* xaction id has own space */ +// zm:GroupEnd ::= zm:GroupCommit | zm:GroupAbort +// zm:GroupCommit ::= zm:S? '@$$}' zm:Hex+ '}@' /* id matches start id */ +// zm:GroupAbort ::= zm:S? '@$$}~~}@' /* id matches start id */ +/* We must allow started transactions to be aborted in summary files. */ +/* Note '$$' will never occur unescaped in values we will see in Mork. */ +{ + if ( this->MatchPattern(ev, "$$") ) + { + morkStream* s = mParser_Stream; + int c; + if ( ((c = s->Getc(ev)) == '{' || c == '}') && ev->Good() ) + { + if ( c == '{' ) // start of new group? + { + if ( !inInsideGroup ) + this->ReadGroup(ev); + else + ev->NewError("nested @$${ inside another group"); + } + else // c == '}' // end of old group? + { + if ( inInsideGroup ) + { + this->ReadEndGroupId(ev); + mParser_GroupId = 0; + } + else + ev->NewError("unmatched @$$} outside any group"); + } + } + else + ev->NewError("expected '{' or '}' after @$$"); + } + return ev->Good(); +} + +mork_bool morkParser::ReadEndGroupId(morkEnv* ev) +{ + mork_bool outSawGroupId = morkBool_kFalse; + morkStream* s = mParser_Stream; + int c; + if ( (c = s->Getc(ev)) != EOF && ev->Good() ) + { + if ( c == '~' ) // transaction is aborted? + { + this->MatchPattern(ev, "~}@"); // finish rest of pattern + } + else // push back byte and read expected trailing hex id + { + s->Ungetc(c); + int next = 0; + mork_gid endGroupId = this->ReadHex(ev, &next); + if ( ev->Good() ) + { + if ( endGroupId == mParser_GroupId ) // matches start? + { + if ( next == '}' ) // '}' after @$$}id ? + { + if ( (c = s->Getc(ev)) == '@' ) // '@' after @$$}id} ? + { + // looks good, so return with no error + outSawGroupId = morkBool_kTrue; + mParser_InGroup = false; + } + else + ev->NewError("expected '@' after @$$}id}"); + } + else + ev->NewError("expected '}' after @$$}id"); + } + else + ev->NewError("end group id mismatch"); + } + } + } + return ( outSawGroupId && ev->Good() ); +} + + +void morkParser::ReadDict(morkEnv* ev) +// zm:Dict ::= zm:S? '<' zm:DictItem* zm:S? '>' +// zm:DictItem ::= zm:MetaDict | zm:Alias +// zm:MetaDict ::= zm:S? '<' zm:S? zm:Cell* zm:S? '>' /* meta attributes */ +// zm:Alias ::= zm:S? '(' ('#')? zm:Hex+ zm:S? zm:Value ')' +{ + mParser_Change = morkChange_kNil; + mParser_AtomChange = morkChange_kNil; + + // this->StartSpanOnLastByte(ev, &mParser_DictSpan); + mParser_InDict = morkBool_kTrue; + this->OnNewDict(ev, *mParser_DictSpan.AsPlace()); + + int c; + while ( (c = this->NextChar(ev)) != EOF && ev->Good() && c != '>' ) + { + switch ( c ) + { + case '(': // alias + this->ReadAlias(ev); + break; + + case '<': // meta + this->ReadMeta(ev, '>'); + break; + + default: + ev->NewWarning("unexpected byte in dict"); + break; + } + } + + // this->EndSpanOnThisByte(ev, &mParser_DictSpan); + mParser_InDict = morkBool_kFalse; + this->OnDictEnd(ev, mParser_DictSpan); + + if ( ev->Bad() ) + mParser_State = morkParser_kBrokenState; + else if ( c == EOF ) + mParser_State = morkParser_kDoneState; +} + +void morkParser::EndSpanOnThisByte(morkEnv* mev, morkSpan* ioSpan) +{ + mork_pos here; + nsIMdbEnv *ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if (NS_SUCCEEDED(rv) && mev->Good() ) + { + this->SetHerePos(here); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::EndSpanOnLastByte(morkEnv* mev, morkSpan* ioSpan) +{ + mork_pos here; + nsIMdbEnv *ev = mev->AsMdbEnv(); + nsresult rv= mParser_Stream->Tell(ev, &here); + if ( NS_SUCCEEDED(rv) && mev->Good() ) + { + if ( here > 0 ) + --here; + else + here = 0; + + this->SetHerePos(here); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::StartSpanOnLastByte(morkEnv* mev, morkSpan* ioSpan) +{ + mork_pos here; + nsIMdbEnv *ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if ( NS_SUCCEEDED(rv) && mev->Good() ) + { + if ( here > 0 ) + --here; + else + here = 0; + + this->SetHerePos(here); + ioSpan->SetStartWithEnd(mParser_PortSpan); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +void morkParser::StartSpanOnThisByte(morkEnv* mev, morkSpan* ioSpan) +{ + mork_pos here; + nsIMdbEnv *ev = mev->AsMdbEnv(); + nsresult rv = mParser_Stream->Tell(ev, &here); + if ( NS_SUCCEEDED(rv) && mev->Good() ) + { + this->SetHerePos(here); + ioSpan->SetStartWithEnd(mParser_PortSpan); + ioSpan->SetEndWithEnd(mParser_PortSpan); + } +} + +mork_bool +morkParser::ReadContent(morkEnv* ev, mork_bool inInsideGroup) +{ + int c; + mork_bool keep_going = true; + while ( keep_going && (c = this->NextChar(ev)) != EOF && ev->Good()) + { + switch ( c ) + { + case '[': // row + this->ReadRow(ev, '['); + keep_going = false; + break; + + case '{': // table + this->ReadTable(ev); + keep_going = false; + break; + + case '<': // dict + this->ReadDict(ev); + keep_going = false; + break; + + case '@': // group + return this->ReadAt(ev, inInsideGroup); + // break; + + // case '+': // plus + // mParser_Change = morkChange_kAdd; + // break; + + // case '-': // minus + // mParser_Change = morkChange_kCut; + // break; + + // case '!': // bang + // mParser_Change = morkChange_kSet; + // break; + + default: + ev->NewWarning("unexpected byte in ReadContent()"); + break; + } + } + if ( ev->Bad() ) + mParser_State = morkParser_kBrokenState; + else if ( c == EOF ) + mParser_State = morkParser_kDoneState; + + return ( ev->Good() && c != EOF ); +} + +void +morkParser::OnPortState(morkEnv* ev) +{ + mork_bool firstTime = !mParser_InPort; + mParser_InPort = morkBool_kTrue; + if (firstTime) + this->OnNewPort(ev, *mParser_PortSpan.AsPlace()); + + mork_bool done = !this->ReadContent(ev, mParser_InGroup/*inInsideGroup*/); + + if (done) + { + mParser_InPort = morkBool_kFalse; + this->OnPortEnd(ev, mParser_PortSpan); + } + + if ( ev->Bad() ) + mParser_State = morkParser_kBrokenState; +} + +void +morkParser::OnStartState(morkEnv* mev) +{ + morkStream* s = mParser_Stream; + nsIMdbEnv *ev = mev->AsMdbEnv(); + if ( s && s->IsNode() && s->IsOpenNode() ) + { + mork_pos outPos; + nsresult rv = s->Seek(ev, 0, &outPos); + if (NS_SUCCEEDED(rv) && mev->Good() ) + { + this->StartParse(mev); + mParser_State = morkParser_kPortState; + } + } + else + mev->NilPointerError(); + + if ( mev->Bad() ) + mParser_State = morkParser_kBrokenState; +} + +/*protected non-poly*/ void +morkParser::ParseChunk(morkEnv* ev) +{ + mParser_Change = morkChange_kNil; + mParser_DoMore = morkBool_kTrue; + + switch ( mParser_State ) + { + case morkParser_kCellState: // 0 + this->OnCellState(ev); break; + + case morkParser_kMetaState: // 1 + this->OnMetaState(ev); break; + + case morkParser_kRowState: // 2 + this->OnRowState(ev); break; + + case morkParser_kTableState: // 3 + this->OnTableState(ev); break; + + case morkParser_kDictState: // 4 + this->OnDictState(ev); break; + + case morkParser_kPortState: // 5 + this->OnPortState(ev); break; + + case morkParser_kStartState: // 6 + this->OnStartState(ev); break; + + case morkParser_kDoneState: // 7 + mParser_DoMore = morkBool_kFalse; + mParser_IsDone = morkBool_kTrue; + this->StopParse(ev); + break; + case morkParser_kBrokenState: // 8 + mParser_DoMore = morkBool_kFalse; + mParser_IsBroken = morkBool_kTrue; + this->StopParse(ev); + break; + default: // ? + MORK_ASSERT(morkBool_kFalse); + mParser_State = morkParser_kBrokenState; + break; + } +} + +/*public non-poly*/ mdb_count +morkParser::ParseMore( // return count of bytes consumed now + morkEnv* ev, // context + mork_pos* outPos, // current byte pos in the stream afterwards + mork_bool* outDone, // is parsing finished? + mork_bool* outBroken // is parsing irreparably dead and broken? + ) +{ + mdb_count outCount = 0; + if ( this->IsNode() && this->GoodParserTag() && this->IsOpenNode() ) + { + mork_pos startPos = this->HerePos(); + + if ( !mParser_IsDone && !mParser_IsBroken ) + this->ParseChunk(ev); + + // HerePos is only updated for groups. I'd like it to be more accurate. + + mork_pos here; + mParser_Stream->Tell(ev, &here); + + if ( outDone ) + *outDone = mParser_IsDone; + if ( outBroken ) + *outBroken = mParser_IsBroken; + if ( outPos ) + *outPos = here; + + if ( here > startPos ) + outCount = (mdb_count) (here - startPos); + } + else + { + this->NonUsableParserError(ev); + if ( outDone ) + *outDone = morkBool_kTrue; + if ( outBroken ) + *outBroken = morkBool_kTrue; + if ( outPos ) + *outPos = 0; + } + return outCount; +} + +//3456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789 + |