diff options
Diffstat (limited to 'rdf/base/nsRDFContentSink.cpp')
-rw-r--r-- | rdf/base/nsRDFContentSink.cpp | 1476 |
1 files changed, 1476 insertions, 0 deletions
diff --git a/rdf/base/nsRDFContentSink.cpp b/rdf/base/nsRDFContentSink.cpp new file mode 100644 index 000000000..ae05a9381 --- /dev/null +++ b/rdf/base/nsRDFContentSink.cpp @@ -0,0 +1,1476 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + + An implementation for an NGLayout-style content sink that knows how + to build an RDF content model from XML-serialized RDF. + + For more information on the RDF/XML syntax, + see http://www.w3.org/TR/REC-rdf-syntax/ + + This code is based on the final W3C Recommendation, + http://www.w3.org/TR/1999/REC-rdf-syntax-19990222. + + Open Issues ------------------ + + 1) factoring code with nsXMLContentSink - There's some amount of + common code between this and the HTML content sink. This will + increase as we support more and more HTML elements. How can code + from XML/HTML be factored? + + 2) We don't support the `parseType' attribute on the Description + tag; therefore, it is impossible to "inline" raw XML in this + implemenation. + + 3) We don't build the reifications at parse time due to the + footprint overhead it would incur for large RDF documents. (It + may be possible to attach a "reification" wrapper datasource that + would present this information at query-time.) Because of this, + the `bagID' attribute is not processed correctly. + + 4) No attempt is made to `resolve URIs' to a canonical form (the + specification hints that an implementation should do this). This + is omitted for the obvious reason that we can ill afford to + resolve each URI reference. + +*/ + +#include "nsCOMPtr.h" +#include "nsInterfaceHashtable.h" +#include "nsIContentSink.h" +#include "nsIRDFContainer.h" +#include "nsIRDFContainerUtils.h" +#include "nsIRDFContentSink.h" +#include "nsIRDFNode.h" +#include "nsIRDFService.h" +#include "nsIRDFXMLSink.h" +#include "nsIServiceManager.h" +#include "nsIURL.h" +#include "nsIXMLContentSink.h" +#include "nsRDFCID.h" +#include "nsTArray.h" +#include "nsXPIDLString.h" +#include "mozilla/Logging.h" +#include "rdf.h" +#include "rdfutil.h" +#include "nsReadableUtils.h" +#include "nsIExpatSink.h" +#include "nsCRT.h" +#include "nsIAtom.h" +#include "nsStaticAtom.h" +#include "nsIScriptError.h" +#include "nsIDTD.h" + +using namespace mozilla; + +/////////////////////////////////////////////////////////////////////// + +enum RDFContentSinkState { + eRDFContentSinkState_InProlog, + eRDFContentSinkState_InDocumentElement, + eRDFContentSinkState_InDescriptionElement, + eRDFContentSinkState_InContainerElement, + eRDFContentSinkState_InPropertyElement, + eRDFContentSinkState_InMemberElement, + eRDFContentSinkState_InEpilog +}; + +enum RDFContentSinkParseMode { + eRDFContentSinkParseMode_Resource, + eRDFContentSinkParseMode_Literal, + eRDFContentSinkParseMode_Int, + eRDFContentSinkParseMode_Date +}; + +typedef +NS_STDCALL_FUNCPROTO(nsresult, + nsContainerTestFn, + nsIRDFContainerUtils, IsAlt, + (nsIRDFDataSource*, nsIRDFResource*, bool*)); + +typedef +NS_STDCALL_FUNCPROTO(nsresult, + nsMakeContainerFn, + nsIRDFContainerUtils, MakeAlt, + (nsIRDFDataSource*, nsIRDFResource*, nsIRDFContainer**)); + +class RDFContentSinkImpl : public nsIRDFContentSink, + public nsIExpatSink +{ +public: + RDFContentSinkImpl(); + + // nsISupports + NS_DECL_ISUPPORTS + NS_DECL_NSIEXPATSINK + + // nsIContentSink + NS_IMETHOD WillParse(void) override; + NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode) override; + NS_IMETHOD DidBuildModel(bool aTerminated) override; + NS_IMETHOD WillInterrupt(void) override; + NS_IMETHOD WillResume(void) override; + NS_IMETHOD SetParser(nsParserBase* aParser) override; + virtual void FlushPendingNotifications(mozFlushType aType) override { } + NS_IMETHOD SetDocumentCharset(nsACString& aCharset) override { return NS_OK; } + virtual nsISupports *GetTarget() override { return nullptr; } + + // nsIRDFContentSink + NS_IMETHOD Init(nsIURI* aURL) override; + NS_IMETHOD SetDataSource(nsIRDFDataSource* aDataSource) override; + NS_IMETHOD GetDataSource(nsIRDFDataSource*& aDataSource) override; + + // pseudo constants + static int32_t gRefCnt; + static nsIRDFService* gRDFService; + static nsIRDFContainerUtils* gRDFContainerUtils; + static nsIRDFResource* kRDF_type; + static nsIRDFResource* kRDF_instanceOf; // XXX should be RDF:type + static nsIRDFResource* kRDF_Alt; + static nsIRDFResource* kRDF_Bag; + static nsIRDFResource* kRDF_Seq; + static nsIRDFResource* kRDF_nextVal; + +#define RDF_ATOM(name_, value_) static nsIAtom* name_; +#include "nsRDFContentSinkAtomList.h" +#undef RDF_ATOM + + typedef struct ContainerInfo { + nsIRDFResource** mType; + nsContainerTestFn mTestFn; + nsMakeContainerFn mMakeFn; + } ContainerInfo; + +protected: + virtual ~RDFContentSinkImpl(); + + // Text management + void ParseText(nsIRDFNode **aResult); + + nsresult FlushText(); + nsresult AddText(const char16_t* aText, int32_t aLength); + + // RDF-specific parsing + nsresult OpenRDF(const char16_t* aName); + nsresult OpenObject(const char16_t* aName ,const char16_t** aAttributes); + nsresult OpenProperty(const char16_t* aName, const char16_t** aAttributes); + nsresult OpenMember(const char16_t* aName, const char16_t** aAttributes); + nsresult OpenValue(const char16_t* aName, const char16_t** aAttributes); + + nsresult GetIdAboutAttribute(const char16_t** aAttributes, nsIRDFResource** aResource, bool* aIsAnonymous = nullptr); + nsresult GetResourceAttribute(const char16_t** aAttributes, nsIRDFResource** aResource); + nsresult AddProperties(const char16_t** aAttributes, nsIRDFResource* aSubject, int32_t* aCount = nullptr); + void SetParseMode(const char16_t **aAttributes); + + char16_t* mText; + int32_t mTextLength; + int32_t mTextSize; + + /** + * From the set of given attributes, this method extracts the + * namespace definitions and feeds them to the datasource. + * These can then be suggested to the serializer to be used again. + * Hopefully, this will keep namespace definitions intact in a + * parse - serialize cycle. + */ + void RegisterNamespaces(const char16_t **aAttributes); + + /** + * Extracts the localname from aExpatName, the name that the Expat parser + * passes us. + * aLocalName will contain the localname in aExpatName. + * The return value is a dependent string containing just the namespace. + */ + const nsDependentSubstring SplitExpatName(const char16_t *aExpatName, + nsIAtom **aLocalName); + + enum eContainerType { eBag, eSeq, eAlt }; + nsresult InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer); + nsresult ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer); + + // The datasource in which we're assigning assertions + nsCOMPtr<nsIRDFDataSource> mDataSource; + + // A hash of all the node IDs referred to + nsInterfaceHashtable<nsStringHashKey, nsIRDFResource> mNodeIDMap; + + // The current state of the content sink + RDFContentSinkState mState; + RDFContentSinkParseMode mParseMode; + + // content stack management + int32_t + PushContext(nsIRDFResource *aContext, + RDFContentSinkState aState, + RDFContentSinkParseMode aParseMode); + + nsresult + PopContext(nsIRDFResource *&aContext, + RDFContentSinkState &aState, + RDFContentSinkParseMode &aParseMode); + + nsIRDFResource* GetContextElement(int32_t ancestor = 0); + + + struct RDFContextStackElement { + nsCOMPtr<nsIRDFResource> mResource; + RDFContentSinkState mState; + RDFContentSinkParseMode mParseMode; + }; + + AutoTArray<RDFContextStackElement, 8>* mContextStack; + + nsCOMPtr<nsIURI> mDocumentURL; + +private: + static mozilla::LazyLogModule gLog; +}; + +int32_t RDFContentSinkImpl::gRefCnt = 0; +nsIRDFService* RDFContentSinkImpl::gRDFService; +nsIRDFContainerUtils* RDFContentSinkImpl::gRDFContainerUtils; +nsIRDFResource* RDFContentSinkImpl::kRDF_type; +nsIRDFResource* RDFContentSinkImpl::kRDF_instanceOf; +nsIRDFResource* RDFContentSinkImpl::kRDF_Alt; +nsIRDFResource* RDFContentSinkImpl::kRDF_Bag; +nsIRDFResource* RDFContentSinkImpl::kRDF_Seq; +nsIRDFResource* RDFContentSinkImpl::kRDF_nextVal; + +mozilla::LazyLogModule RDFContentSinkImpl::gLog("nsRDFContentSink"); + +//////////////////////////////////////////////////////////////////////// + +#define RDF_ATOM(name_, value_) nsIAtom* RDFContentSinkImpl::name_; +#include "nsRDFContentSinkAtomList.h" +#undef RDF_ATOM + +#define RDF_ATOM(name_, value_) NS_STATIC_ATOM_BUFFER(name_##_buffer, value_) +#include "nsRDFContentSinkAtomList.h" +#undef RDF_ATOM + +static const nsStaticAtom rdf_atoms[] = { +#define RDF_ATOM(name_, value_) NS_STATIC_ATOM(name_##_buffer, &RDFContentSinkImpl::name_), +#include "nsRDFContentSinkAtomList.h" +#undef RDF_ATOM +}; + +// static +void +nsRDFAtoms::RegisterAtoms() +{ + NS_RegisterStaticAtoms(rdf_atoms); +} + +RDFContentSinkImpl::RDFContentSinkImpl() + : mText(nullptr), + mTextLength(0), + mTextSize(0), + mState(eRDFContentSinkState_InProlog), + mParseMode(eRDFContentSinkParseMode_Literal), + mContextStack(nullptr) +{ + if (gRefCnt++ == 0) { + NS_DEFINE_CID(kRDFServiceCID, NS_RDFSERVICE_CID); + nsresult rv = CallGetService(kRDFServiceCID, &gRDFService); + + NS_ASSERTION(NS_SUCCEEDED(rv), "unable to get RDF service"); + if (NS_SUCCEEDED(rv)) { + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "type"), + &kRDF_type); + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "instanceOf"), + &kRDF_instanceOf); + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Alt"), + &kRDF_Alt); + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Bag"), + &kRDF_Bag); + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Seq"), + &kRDF_Seq); + rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "nextVal"), + &kRDF_nextVal); + } + + NS_DEFINE_CID(kRDFContainerUtilsCID, NS_RDFCONTAINERUTILS_CID); + rv = CallGetService(kRDFContainerUtilsCID, &gRDFContainerUtils); + } +} + + +RDFContentSinkImpl::~RDFContentSinkImpl() +{ +#ifdef DEBUG_REFS + --gInstanceCount; + fprintf(stdout, "%d - RDF: RDFContentSinkImpl\n", gInstanceCount); +#endif + + if (mContextStack) { + MOZ_LOG(gLog, LogLevel::Warning, + ("rdfxml: warning! unclosed tag")); + + // XXX we should never need to do this, but, we'll write the + // code all the same. If someone left the content stack dirty, + // pop all the elements off the stack and release them. + int32_t i = mContextStack->Length(); + while (0 < i--) { + nsIRDFResource* resource = nullptr; + RDFContentSinkState state; + RDFContentSinkParseMode parseMode; + PopContext(resource, state, parseMode); + + // print some fairly useless debugging info + // XXX we should save line numbers on the context stack: this'd + // be about 1000x more helpful. + if (resource && MOZ_LOG_TEST(gLog, LogLevel::Debug)) { + nsXPIDLCString uri; + resource->GetValue(getter_Copies(uri)); + MOZ_LOG(gLog, LogLevel::Debug, + ("rdfxml: uri=%s", (const char*) uri)); + } + + NS_IF_RELEASE(resource); + } + + delete mContextStack; + } + free(mText); + + + if (--gRefCnt == 0) { + NS_IF_RELEASE(gRDFService); + NS_IF_RELEASE(gRDFContainerUtils); + NS_IF_RELEASE(kRDF_type); + NS_IF_RELEASE(kRDF_instanceOf); + NS_IF_RELEASE(kRDF_Alt); + NS_IF_RELEASE(kRDF_Bag); + NS_IF_RELEASE(kRDF_Seq); + NS_IF_RELEASE(kRDF_nextVal); + } +} + +//////////////////////////////////////////////////////////////////////// +// nsISupports interface + +NS_IMPL_ADDREF(RDFContentSinkImpl) +NS_IMPL_RELEASE(RDFContentSinkImpl) + +NS_IMETHODIMP +RDFContentSinkImpl::QueryInterface(REFNSIID iid, void** result) +{ + NS_PRECONDITION(result, "null ptr"); + if (! result) + return NS_ERROR_NULL_POINTER; + + NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID); + NS_DEFINE_IID(kIExpatSinkIID, NS_IEXPATSINK_IID); + NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); + NS_DEFINE_IID(kIXMLContentSinkIID, NS_IXMLCONTENT_SINK_IID); + NS_DEFINE_IID(kIRDFContentSinkIID, NS_IRDFCONTENTSINK_IID); + + *result = nullptr; + if (iid.Equals(kIRDFContentSinkIID) || + iid.Equals(kIXMLContentSinkIID) || + iid.Equals(kIContentSinkIID) || + iid.Equals(kISupportsIID)) { + *result = static_cast<nsIXMLContentSink*>(this); + AddRef(); + return NS_OK; + } + else if (iid.Equals(kIExpatSinkIID)) { + *result = static_cast<nsIExpatSink*>(this); + AddRef(); + return NS_OK; + } + return NS_NOINTERFACE; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleStartElement(const char16_t *aName, + const char16_t **aAtts, + uint32_t aAttsCount, + uint32_t aLineNumber) +{ + FlushText(); + + nsresult rv = NS_ERROR_UNEXPECTED; // XXX + + RegisterNamespaces(aAtts); + + switch (mState) { + case eRDFContentSinkState_InProlog: + rv = OpenRDF(aName); + break; + + case eRDFContentSinkState_InDocumentElement: + rv = OpenObject(aName,aAtts); + break; + + case eRDFContentSinkState_InDescriptionElement: + rv = OpenProperty(aName,aAtts); + break; + + case eRDFContentSinkState_InContainerElement: + rv = OpenMember(aName,aAtts); + break; + + case eRDFContentSinkState_InPropertyElement: + case eRDFContentSinkState_InMemberElement: + rv = OpenValue(aName,aAtts); + break; + + case eRDFContentSinkState_InEpilog: + MOZ_LOG(gLog, LogLevel::Warning, + ("rdfxml: unexpected content in epilog at line %d", + aLineNumber)); + break; + } + + return rv; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleEndElement(const char16_t *aName) +{ + FlushText(); + + nsIRDFResource* resource; + if (NS_FAILED(PopContext(resource, mState, mParseMode))) { + // XXX parser didn't catch unmatched tags? + if (MOZ_LOG_TEST(gLog, LogLevel::Warning)) { + nsAutoString tagStr(aName); + char* tagCStr = ToNewCString(tagStr); + + PR_LogPrint + ("rdfxml: extra close tag '%s' at line %d", + tagCStr, 0/*XXX fix me */); + + free(tagCStr); + } + + return NS_ERROR_UNEXPECTED; // XXX + } + + // If we've just popped a member or property element, _now_ is the + // time to add that element to the graph. + switch (mState) { + case eRDFContentSinkState_InMemberElement: + { + nsCOMPtr<nsIRDFContainer> container; + NS_NewRDFContainer(getter_AddRefs(container)); + container->Init(mDataSource, GetContextElement(1)); + container->AppendElement(resource); + } + break; + + case eRDFContentSinkState_InPropertyElement: + { + mDataSource->Assert(GetContextElement(1), GetContextElement(0), resource, true); + } break; + default: + break; + } + + if (mContextStack->IsEmpty()) + mState = eRDFContentSinkState_InEpilog; + + NS_IF_RELEASE(resource); + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleComment(const char16_t *aName) +{ + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleCDataSection(const char16_t *aData, + uint32_t aLength) +{ + return aData ? AddText(aData, aLength) : NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleDoctypeDecl(const nsAString & aSubset, + const nsAString & aName, + const nsAString & aSystemId, + const nsAString & aPublicId, + nsISupports* aCatalogData) +{ + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleCharacterData(const char16_t *aData, + uint32_t aLength) +{ + return aData ? AddText(aData, aLength) : NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleProcessingInstruction(const char16_t *aTarget, + const char16_t *aData) +{ + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::HandleXMLDeclaration(const char16_t *aVersion, + const char16_t *aEncoding, + int32_t aStandalone) +{ + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::ReportError(const char16_t* aErrorText, + const char16_t* aSourceText, + nsIScriptError *aError, + bool *_retval) +{ + NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!"); + + // The expat driver should report the error. + *_retval = true; + return NS_OK; +} + +//////////////////////////////////////////////////////////////////////// +// nsIContentSink interface + +NS_IMETHODIMP +RDFContentSinkImpl::WillParse(void) +{ + return NS_OK; +} + + +NS_IMETHODIMP +RDFContentSinkImpl::WillBuildModel(nsDTDMode) +{ + if (mDataSource) { + nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource); + if (sink) + return sink->BeginLoad(); + } + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::DidBuildModel(bool aTerminated) +{ + if (mDataSource) { + nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource); + if (sink) + return sink->EndLoad(); + } + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::WillInterrupt(void) +{ + if (mDataSource) { + nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource); + if (sink) + return sink->Interrupt(); + } + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::WillResume(void) +{ + if (mDataSource) { + nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource); + if (sink) + return sink->Resume(); + } + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::SetParser(nsParserBase* aParser) +{ + return NS_OK; +} + +//////////////////////////////////////////////////////////////////////// +// nsIRDFContentSink interface + +NS_IMETHODIMP +RDFContentSinkImpl::Init(nsIURI* aURL) +{ + NS_PRECONDITION(aURL != nullptr, "null ptr"); + if (! aURL) + return NS_ERROR_NULL_POINTER; + + mDocumentURL = aURL; + mState = eRDFContentSinkState_InProlog; + return NS_OK; +} + +NS_IMETHODIMP +RDFContentSinkImpl::SetDataSource(nsIRDFDataSource* aDataSource) +{ + NS_PRECONDITION(aDataSource != nullptr, "SetDataSource null ptr"); + mDataSource = aDataSource; + NS_ASSERTION(mDataSource != nullptr,"Couldn't QI RDF DataSource"); + return NS_OK; +} + + +NS_IMETHODIMP +RDFContentSinkImpl::GetDataSource(nsIRDFDataSource*& aDataSource) +{ + aDataSource = mDataSource; + NS_IF_ADDREF(aDataSource); + return NS_OK; +} + +//////////////////////////////////////////////////////////////////////// +// Text buffering + +static bool +rdf_IsDataInBuffer(char16_t* buffer, int32_t length) +{ + for (int32_t i = 0; i < length; ++i) { + if (buffer[i] == ' ' || + buffer[i] == '\t' || + buffer[i] == '\n' || + buffer[i] == '\r') + continue; + + return true; + } + return false; +} + +void +RDFContentSinkImpl::ParseText(nsIRDFNode **aResult) +{ + // XXXwaterson wasteful, but we'd need to make a copy anyway to be + // able to call nsIRDFService::Get[Resource|Literal|...](). + nsAutoString value; + value.Append(mText, mTextLength); + value.Trim(" \t\n\r"); + + switch (mParseMode) { + case eRDFContentSinkParseMode_Literal: + { + nsIRDFLiteral *result; + gRDFService->GetLiteral(value.get(), &result); + *aResult = result; + } + break; + + case eRDFContentSinkParseMode_Resource: + { + nsIRDFResource *result; + gRDFService->GetUnicodeResource(value, &result); + *aResult = result; + } + break; + + case eRDFContentSinkParseMode_Int: + { + nsresult err; + int32_t i = value.ToInteger(&err); + nsIRDFInt *result; + gRDFService->GetIntLiteral(i, &result); + *aResult = result; + } + break; + + case eRDFContentSinkParseMode_Date: + { + PRTime t = rdf_ParseDate(nsDependentCString(NS_LossyConvertUTF16toASCII(value).get(), value.Length())); + nsIRDFDate *result; + gRDFService->GetDateLiteral(t, &result); + *aResult = result; + } + break; + + default: + NS_NOTREACHED("unknown parse type"); + break; + } +} + +nsresult +RDFContentSinkImpl::FlushText() +{ + nsresult rv = NS_OK; + if (0 != mTextLength) { + if (rdf_IsDataInBuffer(mText, mTextLength)) { + // XXX if there's anything but whitespace, then we'll + // create a text node. + + switch (mState) { + case eRDFContentSinkState_InMemberElement: { + nsCOMPtr<nsIRDFNode> node; + ParseText(getter_AddRefs(node)); + + nsCOMPtr<nsIRDFContainer> container; + NS_NewRDFContainer(getter_AddRefs(container)); + container->Init(mDataSource, GetContextElement(1)); + + container->AppendElement(node); + } break; + + case eRDFContentSinkState_InPropertyElement: { + nsCOMPtr<nsIRDFNode> node; + ParseText(getter_AddRefs(node)); + + mDataSource->Assert(GetContextElement(1), GetContextElement(0), node, true); + } break; + + default: + // just ignore it + break; + } + } + mTextLength = 0; + } + return rv; +} + + +nsresult +RDFContentSinkImpl::AddText(const char16_t* aText, int32_t aLength) +{ + // Create buffer when we first need it + if (0 == mTextSize) { + mText = (char16_t *) malloc(sizeof(char16_t) * 4096); + if (!mText) { + return NS_ERROR_OUT_OF_MEMORY; + } + mTextSize = 4096; + } + + // Copy data from string into our buffer; grow the buffer as needed. + // It never shrinks, but since the content sink doesn't stick around, + // this shouldn't be a bloat issue. + int32_t amount = mTextSize - mTextLength; + if (amount < aLength) { + // Grow the buffer by at least a factor of two to prevent thrashing. + // Since PR_REALLOC will leave mText intact if the call fails, + // don't clobber mText or mTextSize until the new mem is allocated. + int32_t newSize = (2 * mTextSize > (mTextSize + aLength)) ? + (2 * mTextSize) : (mTextSize + aLength); + char16_t* newText = + (char16_t *) realloc(mText, sizeof(char16_t) * newSize); + if (!newText) + return NS_ERROR_OUT_OF_MEMORY; + mTextSize = newSize; + mText = newText; + } + memcpy(&mText[mTextLength], aText, sizeof(char16_t) * aLength); + mTextLength += aLength; + + return NS_OK; +} + +bool +rdf_RequiresAbsoluteURI(const nsString& uri) +{ + // cheap shot at figuring out if this requires an absolute url translation + return !(StringBeginsWith(uri, NS_LITERAL_STRING("urn:")) || + StringBeginsWith(uri, NS_LITERAL_STRING("chrome:"))); +} + +nsresult +RDFContentSinkImpl::GetIdAboutAttribute(const char16_t** aAttributes, + nsIRDFResource** aResource, + bool* aIsAnonymous) +{ + // This corresponds to the dirty work of production [6.5] + nsresult rv = NS_OK; + + nsAutoString nodeID; + + nsCOMPtr<nsIAtom> localName; + for (; *aAttributes; aAttributes += 2) { + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aAttributes[0], getter_AddRefs(localName)); + + // We'll accept either `ID' or `rdf:ID' (ibid with `about' or + // `rdf:about') in the spirit of being liberal towards the + // input that we receive. + if (!nameSpaceURI.IsEmpty() && + !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { + continue; + } + + // XXX you can't specify both, but we'll just pick up the + // first thing that was specified and ignore the other. + + if (localName == kAboutAtom) { + if (aIsAnonymous) + *aIsAnonymous = false; + + nsAutoString relURI(aAttributes[1]); + if (rdf_RequiresAbsoluteURI(relURI)) { + nsAutoCString uri; + rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri); + if (NS_FAILED(rv)) return rv; + + return gRDFService->GetResource(uri, + aResource); + } + return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]), + aResource); + } + else if (localName == kIdAtom) { + if (aIsAnonymous) + *aIsAnonymous = false; + // In the spirit of leniency, we do not bother trying to + // enforce that this be a valid "XML Name" (see + // http://www.w3.org/TR/REC-xml#NT-Nmtoken), as per + // 6.21. If we wanted to, this would be where to do it. + + // Construct an in-line resource whose URI is the + // document's URI plus the XML name specified in the ID + // attribute. + nsAutoCString name; + nsAutoCString ref('#'); + AppendUTF16toUTF8(aAttributes[1], ref); + + rv = mDocumentURL->Resolve(ref, name); + if (NS_FAILED(rv)) return rv; + + return gRDFService->GetResource(name, aResource); + } + else if (localName == kNodeIdAtom) { + nodeID.Assign(aAttributes[1]); + } + else if (localName == kAboutEachAtom) { + // XXX we don't deal with aboutEach... + //MOZ_LOG(gLog, LogLevel::Warning, + // ("rdfxml: ignoring aboutEach at line %d", + // aNode.GetSourceLineNumber())); + } + } + + // Otherwise, we couldn't find anything, so just gensym one... + if (aIsAnonymous) + *aIsAnonymous = true; + + // If nodeID is present, check if we already know about it. If we've seen + // the nodeID before, use the same resource, otherwise generate a new one. + if (!nodeID.IsEmpty()) { + mNodeIDMap.Get(nodeID,aResource); + + if (!*aResource) { + rv = gRDFService->GetAnonymousResource(aResource); + mNodeIDMap.Put(nodeID,*aResource); + } + } + else { + rv = gRDFService->GetAnonymousResource(aResource); + } + + return rv; +} + +nsresult +RDFContentSinkImpl::GetResourceAttribute(const char16_t** aAttributes, + nsIRDFResource** aResource) +{ + nsCOMPtr<nsIAtom> localName; + + nsAutoString nodeID; + + for (; *aAttributes; aAttributes += 2) { + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aAttributes[0], getter_AddRefs(localName)); + + // We'll accept `resource' or `rdf:resource', under the spirit + // that we should be liberal towards the input that we + // receive. + if (!nameSpaceURI.IsEmpty() && + !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { + continue; + } + + // XXX you can't specify both, but we'll just pick up the + // first thing that was specified and ignore the other. + + if (localName == kResourceAtom) { + // XXX Take the URI and make it fully qualified by + // sticking it into the document's URL. This may not be + // appropriate... + nsAutoString relURI(aAttributes[1]); + if (rdf_RequiresAbsoluteURI(relURI)) { + nsresult rv; + nsAutoCString uri; + + rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri); + if (NS_FAILED(rv)) return rv; + + return gRDFService->GetResource(uri, aResource); + } + return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]), + aResource); + } + else if (localName == kNodeIdAtom) { + nodeID.Assign(aAttributes[1]); + } + } + + // If nodeID is present, check if we already know about it. If we've seen + // the nodeID before, use the same resource, otherwise generate a new one. + if (!nodeID.IsEmpty()) { + mNodeIDMap.Get(nodeID,aResource); + + if (!*aResource) { + nsresult rv; + rv = gRDFService->GetAnonymousResource(aResource); + if (NS_FAILED(rv)) { + return rv; + } + mNodeIDMap.Put(nodeID,*aResource); + } + return NS_OK; + } + + return NS_ERROR_FAILURE; +} + +nsresult +RDFContentSinkImpl::AddProperties(const char16_t** aAttributes, + nsIRDFResource* aSubject, + int32_t* aCount) +{ + if (aCount) + *aCount = 0; + + nsCOMPtr<nsIAtom> localName; + for (; *aAttributes; aAttributes += 2) { + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aAttributes[0], getter_AddRefs(localName)); + + // skip 'xmlns' directives, these are "meta" information + if (nameSpaceURI.EqualsLiteral("http://www.w3.org/2000/xmlns/")) { + continue; + } + + // skip `about', `ID', `resource', and 'nodeID' attributes (either with or + // without the `rdf:' prefix); these are all "special" and + // should've been dealt with by the caller. + if (localName == kAboutAtom || localName == kIdAtom || + localName == kResourceAtom || localName == kNodeIdAtom) { + if (nameSpaceURI.IsEmpty() || + nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) + continue; + } + + // Skip `parseType', `RDF:parseType', and `NC:parseType'. This + // is meta-information that will be handled in SetParseMode. + if (localName == kParseTypeAtom) { + if (nameSpaceURI.IsEmpty() || + nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || + nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) { + continue; + } + } + + NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI); + propertyStr.Append(nsAtomCString(localName)); + + // Add the assertion to RDF + nsCOMPtr<nsIRDFResource> property; + gRDFService->GetResource(propertyStr, getter_AddRefs(property)); + + nsCOMPtr<nsIRDFLiteral> target; + gRDFService->GetLiteral(aAttributes[1], + getter_AddRefs(target)); + + mDataSource->Assert(aSubject, property, target, true); + } + return NS_OK; +} + +void +RDFContentSinkImpl::SetParseMode(const char16_t **aAttributes) +{ + nsCOMPtr<nsIAtom> localName; + for (; *aAttributes; aAttributes += 2) { + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aAttributes[0], getter_AddRefs(localName)); + + if (localName == kParseTypeAtom) { + nsDependentString v(aAttributes[1]); + + if (nameSpaceURI.IsEmpty() || + nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { + if (v.EqualsLiteral("Resource")) + mParseMode = eRDFContentSinkParseMode_Resource; + + break; + } + else if (nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) { + if (v.EqualsLiteral("Date")) + mParseMode = eRDFContentSinkParseMode_Date; + else if (v.EqualsLiteral("Integer")) + mParseMode = eRDFContentSinkParseMode_Int; + + break; + } + } + } +} + +//////////////////////////////////////////////////////////////////////// +// RDF-specific routines used to build the model + +nsresult +RDFContentSinkImpl::OpenRDF(const char16_t* aName) +{ + // ensure that we're actually reading RDF by making sure that the + // opening tag is <rdf:RDF>, where "rdf:" corresponds to whatever + // they've declared the standard RDF namespace to be. + nsCOMPtr<nsIAtom> localName; + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aName, getter_AddRefs(localName)); + + if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || localName != kRDFAtom) { + // MOZ_LOG(gLog, LogLevel::Info, + // ("rdfxml: expected RDF:RDF at line %d", + // aNode.GetSourceLineNumber())); + + return NS_ERROR_UNEXPECTED; + } + + PushContext(nullptr, mState, mParseMode); + mState = eRDFContentSinkState_InDocumentElement; + return NS_OK; +} + +nsresult +RDFContentSinkImpl::OpenObject(const char16_t* aName, + const char16_t** aAttributes) +{ + // an "object" non-terminal is either a "description", a "typed + // node", or a "container", so this change the content sink's + // state appropriately. + nsCOMPtr<nsIAtom> localName; + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aName, getter_AddRefs(localName)); + + // Figure out the URI of this object, and create an RDF node for it. + nsCOMPtr<nsIRDFResource> source; + GetIdAboutAttribute(aAttributes, getter_AddRefs(source)); + + // If there is no `ID' or `about', then there's not much we can do. + if (! source) + return NS_ERROR_FAILURE; + + // Push the element onto the context stack + PushContext(source, mState, mParseMode); + + // Now figure out what kind of state transition we need to + // make. We'll either be going into a mode where we parse a + // description or a container. + bool isaTypedNode = true; + + if (nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { + isaTypedNode = false; + + if (localName == kDescriptionAtom) { + // it's a description + mState = eRDFContentSinkState_InDescriptionElement; + } + else if (localName == kBagAtom) { + // it's a bag container + InitContainer(kRDF_Bag, source); + mState = eRDFContentSinkState_InContainerElement; + } + else if (localName == kSeqAtom) { + // it's a seq container + InitContainer(kRDF_Seq, source); + mState = eRDFContentSinkState_InContainerElement; + } + else if (localName == kAltAtom) { + // it's an alt container + InitContainer(kRDF_Alt, source); + mState = eRDFContentSinkState_InContainerElement; + } + else { + // heh, that's not *in* the RDF namespace: just treat it + // like a typed node + isaTypedNode = true; + } + } + + if (isaTypedNode) { + NS_ConvertUTF16toUTF8 typeStr(nameSpaceURI); + typeStr.Append(nsAtomCString(localName)); + + nsCOMPtr<nsIRDFResource> type; + nsresult rv = gRDFService->GetResource(typeStr, getter_AddRefs(type)); + if (NS_FAILED(rv)) return rv; + + rv = mDataSource->Assert(source, kRDF_type, type, true); + if (NS_FAILED(rv)) return rv; + + mState = eRDFContentSinkState_InDescriptionElement; + } + + AddProperties(aAttributes, source); + return NS_OK; +} + +nsresult +RDFContentSinkImpl::OpenProperty(const char16_t* aName, const char16_t** aAttributes) +{ + nsresult rv; + + // an "object" non-terminal is either a "description", a "typed + // node", or a "container", so this change the content sink's + // state appropriately. + nsCOMPtr<nsIAtom> localName; + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aName, getter_AddRefs(localName)); + + NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI); + propertyStr.Append(nsAtomCString(localName)); + + nsCOMPtr<nsIRDFResource> property; + rv = gRDFService->GetResource(propertyStr, getter_AddRefs(property)); + if (NS_FAILED(rv)) return rv; + + // See if they've specified a 'resource' attribute, in which case + // they mean *that* to be the object of this property. + nsCOMPtr<nsIRDFResource> target; + GetResourceAttribute(aAttributes, getter_AddRefs(target)); + + bool isAnonymous = false; + + if (! target) { + // See if an 'ID' attribute has been specified, in which case + // this corresponds to the fourth form of [6.12]. + + // XXX strictly speaking, we should reject the RDF/XML as + // invalid if they've specified both an 'ID' and a 'resource' + // attribute. Bah. + + // XXX strictly speaking, 'about=' isn't allowed here, but + // what the hell. + GetIdAboutAttribute(aAttributes, getter_AddRefs(target), &isAnonymous); + } + + if (target) { + // They specified an inline resource for the value of this + // property. Create an RDF resource for the inline resource + // URI, add the properties to it, and attach the inline + // resource to its parent. + int32_t count; + rv = AddProperties(aAttributes, target, &count); + NS_ASSERTION(NS_SUCCEEDED(rv), "problem adding properties"); + if (NS_FAILED(rv)) return rv; + + if (count || !isAnonymous) { + // If the resource was "anonymous" (i.e., they hadn't + // explicitly set an ID or resource attribute), then we'll + // only assert this property from the context element *if* + // there were properties specified on the anonymous + // resource. + rv = mDataSource->Assert(GetContextElement(0), property, target, true); + if (NS_FAILED(rv)) return rv; + } + + // XXX Technically, we should _not_ fall through here and push + // the element onto the stack: this is supposed to be a closed + // node. But right now I'm lazy and the code will just Do The + // Right Thing so long as the RDF is well-formed. + } + + // Push the element onto the context stack and change state. + PushContext(property, mState, mParseMode); + mState = eRDFContentSinkState_InPropertyElement; + SetParseMode(aAttributes); + + return NS_OK; +} + +nsresult +RDFContentSinkImpl::OpenMember(const char16_t* aName, + const char16_t** aAttributes) +{ + // ensure that we're actually reading a member element by making + // sure that the opening tag is <rdf:li>, where "rdf:" corresponds + // to whatever they've declared the standard RDF namespace to be. + nsresult rv; + + nsCOMPtr<nsIAtom> localName; + const nsDependentSubstring& nameSpaceURI = + SplitExpatName(aName, getter_AddRefs(localName)); + + if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || + localName != kLiAtom) { + MOZ_LOG(gLog, LogLevel::Error, + ("rdfxml: expected RDF:li at line %d", + -1)); // XXX pass in line number + + return NS_ERROR_UNEXPECTED; + } + + // The parent element is the container. + nsIRDFResource* container = GetContextElement(0); + if (! container) + return NS_ERROR_NULL_POINTER; + + nsIRDFResource* resource; + if (NS_SUCCEEDED(rv = GetResourceAttribute(aAttributes, &resource))) { + // Okay, this node has an RDF:resource="..." attribute. That + // means that it's a "referenced item," as covered in [6.29]. + nsCOMPtr<nsIRDFContainer> c; + NS_NewRDFContainer(getter_AddRefs(c)); + c->Init(mDataSource, container); + c->AppendElement(resource); + + // XXX Technically, we should _not_ fall through here and push + // the element onto the stack: this is supposed to be a closed + // node. But right now I'm lazy and the code will just Do The + // Right Thing so long as the RDF is well-formed. + NS_RELEASE(resource); + } + + // Change state. Pushing a null context element is a bit weird, + // but the idea is that there really is _no_ context "property". + // The contained element will use nsIRDFContainer::AppendElement() to add + // the element to the container, which requires only the container + // and the element to be added. + PushContext(nullptr, mState, mParseMode); + mState = eRDFContentSinkState_InMemberElement; + SetParseMode(aAttributes); + + return NS_OK; +} + + +nsresult +RDFContentSinkImpl::OpenValue(const char16_t* aName, const char16_t** aAttributes) +{ + // a "value" can either be an object or a string: we'll only get + // *here* if it's an object, as raw text is added as a leaf. + return OpenObject(aName,aAttributes); +} + +//////////////////////////////////////////////////////////////////////// +// namespace resolution +void +RDFContentSinkImpl::RegisterNamespaces(const char16_t **aAttributes) +{ + nsCOMPtr<nsIRDFXMLSink> sink = do_QueryInterface(mDataSource); + if (!sink) { + return; + } + NS_NAMED_LITERAL_STRING(xmlns, "http://www.w3.org/2000/xmlns/"); + for (; *aAttributes; aAttributes += 2) { + // check the namespace + const char16_t* attr = aAttributes[0]; + const char16_t* xmlnsP = xmlns.BeginReading(); + while (*attr == *xmlnsP) { + ++attr; + ++xmlnsP; + } + if (*attr != 0xFFFF || + xmlnsP != xmlns.EndReading()) { + continue; + } + // get the localname (or "xmlns" for the default namespace) + const char16_t* endLocal = ++attr; + while (*endLocal && *endLocal != 0xFFFF) { + ++endLocal; + } + nsDependentSubstring lname(attr, endLocal); + nsCOMPtr<nsIAtom> preferred = NS_Atomize(lname); + if (preferred == kXMLNSAtom) { + preferred = nullptr; + } + sink->AddNameSpace(preferred, nsDependentString(aAttributes[1])); + } +} + +//////////////////////////////////////////////////////////////////////// +// Qualified name resolution + +const nsDependentSubstring +RDFContentSinkImpl::SplitExpatName(const char16_t *aExpatName, + nsIAtom **aLocalName) +{ + /** + * Expat can send the following: + * localName + * namespaceURI<separator>localName + * namespaceURI<separator>localName<separator>prefix + * + * and we use 0xFFFF for the <separator>. + * + */ + + const char16_t *uriEnd = aExpatName; + const char16_t *nameStart = aExpatName; + const char16_t *pos; + for (pos = aExpatName; *pos; ++pos) { + if (*pos == 0xFFFF) { + if (uriEnd != aExpatName) { + break; + } + + uriEnd = pos; + nameStart = pos + 1; + } + } + + const nsDependentSubstring& nameSpaceURI = Substring(aExpatName, uriEnd); + *aLocalName = NS_Atomize(Substring(nameStart, pos)).take(); + return nameSpaceURI; +} + +nsresult +RDFContentSinkImpl::InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer) +{ + // Do the right kind of initialization based on the container + // 'type' resource, and the state of the container (i.e., 'make' a + // new container vs. 'reinitialize' the container). + nsresult rv; + + static const ContainerInfo gContainerInfo[] = { + { &RDFContentSinkImpl::kRDF_Alt, &nsIRDFContainerUtils::IsAlt, &nsIRDFContainerUtils::MakeAlt }, + { &RDFContentSinkImpl::kRDF_Bag, &nsIRDFContainerUtils::IsBag, &nsIRDFContainerUtils::MakeBag }, + { &RDFContentSinkImpl::kRDF_Seq, &nsIRDFContainerUtils::IsSeq, &nsIRDFContainerUtils::MakeSeq }, + { 0, 0, 0 }, + }; + + for (const ContainerInfo* info = gContainerInfo; info->mType != 0; ++info) { + if (*info->mType != aContainerType) + continue; + + bool isContainer; + rv = (gRDFContainerUtils->*(info->mTestFn))(mDataSource, aContainer, &isContainer); + if (isContainer) { + rv = ReinitContainer(aContainerType, aContainer); + } + else { + rv = (gRDFContainerUtils->*(info->mMakeFn))(mDataSource, aContainer, nullptr); + } + return rv; + } + + NS_NOTREACHED("not an RDF container type"); + return NS_ERROR_FAILURE; +} + + + +nsresult +RDFContentSinkImpl::ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer) +{ + // Mega-kludge to deal with the fact that Make[Seq|Alt|Bag] is + // idempotent, and as such, containers will have state (e.g., + // RDF:nextVal) maintained in the graph across loads. This + // re-initializes each container's RDF:nextVal to '1', and 'marks' + // the container as such. + nsresult rv; + + nsCOMPtr<nsIRDFLiteral> one; + rv = gRDFService->GetLiteral(u"1", getter_AddRefs(one)); + if (NS_FAILED(rv)) return rv; + + // Re-initialize the 'nextval' property + nsCOMPtr<nsIRDFNode> nextval; + rv = mDataSource->GetTarget(aContainer, kRDF_nextVal, true, getter_AddRefs(nextval)); + if (NS_FAILED(rv)) return rv; + + rv = mDataSource->Change(aContainer, kRDF_nextVal, nextval, one); + if (NS_FAILED(rv)) return rv; + + // Re-mark as a container. XXX should be kRDF_type + rv = mDataSource->Assert(aContainer, kRDF_instanceOf, aContainerType, true); + NS_ASSERTION(NS_SUCCEEDED(rv), "unable to mark container as such"); + if (NS_FAILED(rv)) return rv; + + return NS_OK; +} + +//////////////////////////////////////////////////////////////////////// +// Content stack management + +nsIRDFResource* +RDFContentSinkImpl::GetContextElement(int32_t ancestor /* = 0 */) +{ + if ((nullptr == mContextStack) || + (uint32_t(ancestor) >= mContextStack->Length())) { + return nullptr; + } + + return mContextStack->ElementAt( + mContextStack->Length()-ancestor-1).mResource; +} + +int32_t +RDFContentSinkImpl::PushContext(nsIRDFResource *aResource, + RDFContentSinkState aState, + RDFContentSinkParseMode aParseMode) +{ + if (! mContextStack) { + mContextStack = new AutoTArray<RDFContextStackElement, 8>(); + if (! mContextStack) + return 0; + } + + RDFContextStackElement* e = mContextStack->AppendElement(); + if (! e) + return mContextStack->Length(); + + e->mResource = aResource; + e->mState = aState; + e->mParseMode = aParseMode; + + return mContextStack->Length(); +} + +nsresult +RDFContentSinkImpl::PopContext(nsIRDFResource *&aResource, + RDFContentSinkState &aState, + RDFContentSinkParseMode &aParseMode) +{ + if ((nullptr == mContextStack) || + (mContextStack->IsEmpty())) { + return NS_ERROR_NULL_POINTER; + } + + uint32_t i = mContextStack->Length() - 1; + RDFContextStackElement &e = mContextStack->ElementAt(i); + + aResource = e.mResource; + NS_IF_ADDREF(aResource); + aState = e.mState; + aParseMode = e.mParseMode; + + mContextStack->RemoveElementAt(i); + return NS_OK; +} + + +//////////////////////////////////////////////////////////////////////// + +nsresult +NS_NewRDFContentSink(nsIRDFContentSink** aResult) +{ + NS_PRECONDITION(aResult != nullptr, "null ptr"); + if (! aResult) + return NS_ERROR_NULL_POINTER; + + RDFContentSinkImpl* sink = new RDFContentSinkImpl(); + if (! sink) + return NS_ERROR_OUT_OF_MEMORY; + + NS_ADDREF(sink); + *aResult = sink; + return NS_OK; +} |