diff options
author | Matt A. Tobin <email@mattatobin.com> | 2020-01-15 14:56:04 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2020-01-15 14:56:04 -0500 |
commit | 6168dbe21f5f83b906e562ea0ab232d499b275a6 (patch) | |
tree | 658a4b27554c85ebcaad655fc83f2c2bb99e8e80 /parser/html/java/htmlparser/ruby-gcj/validator.cpp | |
parent | 09314667a692fedff8564fc347c8a3663474faa6 (diff) | |
download | UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.gz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.lz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.tar.xz UXP-6168dbe21f5f83b906e562ea0ab232d499b275a6.zip |
Add java htmlparser sources that match the original 52-level state
https://hg.mozilla.org/projects/htmlparser/
Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d
Diffstat (limited to 'parser/html/java/htmlparser/ruby-gcj/validator.cpp')
-rw-r--r-- | parser/html/java/htmlparser/ruby-gcj/validator.cpp | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp new file mode 100644 index 000000000..aadd24abe --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp @@ -0,0 +1,210 @@ +#include <gcj/cni.h> + +#include <java/io/ByteArrayInputStream.h> +#include <java/lang/System.h> +#include <java/lang/Throwable.h> +#include <java/util/ArrayList.h> +#include <javax/xml/xpath/XPath.h> +#include <javax/xml/xpath/XPathFactory.h> +#include <javax/xml/xpath/XPathExpression.h> +#include <javax/xml/xpath/XPathConstants.h> +#include <javax/xml/parsers/DocumentBuilderFactory.h> +#include <javax/xml/parsers/DocumentBuilder.h> +#include <org/w3c/dom/Attr.h> +#include <org/w3c/dom/Document.h> +#include <org/w3c/dom/Element.h> +#include <org/w3c/dom/NodeList.h> +#include <org/w3c/dom/NamedNodeMap.h> +#include <org/xml/sax/InputSource.h> + +#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h" + +#include "DomUtils.h" + +#include "ruby.h" + +using namespace java::io; +using namespace java::lang; +using namespace java::util; +using namespace javax::xml::parsers; +using namespace javax::xml::xpath; +using namespace nu::validator::htmlparser::dom; +using namespace org::w3c::dom; +using namespace org::xml::sax; + +static VALUE jaxp_Document; +static VALUE jaxp_Attr; +static VALUE jaxp_Element; +static ID ID_read; +static ID ID_doc; +static ID ID_element; + +// convert a Java string into a Ruby string +static VALUE j2r(String *string) { + if (string == NULL) return Qnil; + jint len = JvGetStringUTFLength(string); + char buf[len]; + JvGetStringUTFRegion(string, 0, len, buf); + return rb_str_new(buf, len); +} + +// convert a Ruby string into a Java string +static String *r2j(VALUE string) { + return JvNewStringUTF(RSTRING(string)->ptr); +} + +// release the Java Document associated with this Ruby Document +static void vnu_document_free(Document *doc) { + DomUtils::unpin(doc); +} + +// Nu::Validator::parse( string|file ) +static VALUE vnu_parse(VALUE self, VALUE input) { + HtmlDocumentBuilder *parser = new HtmlDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + // convert input in to a ByteArrayInputStream + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + InputSource *source = new InputSource(new ByteArrayInputStream(bytes)); + + // parse, pin, and wrap + Document *doc = parser->parse(source); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); +} + +// Jaxp::parse( string|file ) +static VALUE jaxp_parse(VALUE self, VALUE input) { + DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance(); + DocumentBuilder *parser = factory->newDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + try { + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + Document *doc = parser->parse(new ByteArrayInputStream(bytes)); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); + } catch (java::lang::Throwable *ex) { + ex->printStackTrace(); + return Qnil; + } +} + + +// Nu::Validator::Document#encoding +static VALUE jaxp_document_encoding(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + return j2r(jdoc->getXmlEncoding()); +} + +// Nu::Validator::Document#root +static VALUE jaxp_document_root(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement); + rb_ivar_set(relement, ID_doc, rdoc); + return relement; +} + +// Nu::Validator::Document#xpath +static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + XPath *xpath = XPathFactory::newInstance()->newXPath(); + XPathExpression *expr = xpath->compile(r2j(path)); + NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET); + + VALUE result = rb_ary_new(); + for (int i=0; i<list->getLength(); i++) { + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i)); + rb_ivar_set(relement, ID_doc, rdoc); + rb_ary_push(result, relement); + } + return result; +} + +// Nu::Validator::Element#name +static VALUE jaxp_element_name(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + return j2r(jelement->getNodeName()); +} + +// Nu::Validator::Element#attributes +static VALUE jaxp_element_attributes(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + VALUE result = rb_hash_new(); + NamedNodeMap *map = jelement->getAttributes(); + for (int i=0; i<map->getLength(); i++) { + Attr *jattr = (Attr *) map->item(i); + VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr); + rb_ivar_set(rattr, ID_element, relement); + rb_hash_aset(result, j2r(jattr->getName()), rattr); + } + return result; +} + +// Nu::Validator::Attribute#value +static VALUE jaxp_attribute_value(VALUE rattribute) { + Attr *jattribute; + Data_Get_Struct(rattribute, Attr, jattribute); + return j2r(jattribute->getValue()); +} + +typedef VALUE (ruby_method)(...); + +// Nu::Validator module initialization +extern "C" void Init_validator() { + JvCreateJavaVM(NULL); + JvAttachCurrentThread(NULL, NULL); + JvInitClass(&DomUtils::class$); + JvInitClass(&XPathFactory::class$); + JvInitClass(&XPathConstants::class$); + + VALUE jaxp = rb_define_module("Jaxp"); + rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1); + + VALUE nu = rb_define_module("Nu"); + VALUE validator = rb_define_module_under(nu, "Validator"); + rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1); + + jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject); + rb_define_method(jaxp_Document, "encoding", + (ruby_method*)&jaxp_document_encoding, 0); + rb_define_method(jaxp_Document, "root", + (ruby_method*)&jaxp_document_root, 0); + rb_define_method(jaxp_Document, "xpath", + (ruby_method*)&jaxp_document_xpath, 1); + + jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject); + rb_define_method(jaxp_Element, "name", + (ruby_method*)&jaxp_element_name, 0); + rb_define_method(jaxp_Element, "attributes", + (ruby_method*)&jaxp_element_attributes, 0); + + jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject); + rb_define_method(jaxp_Attr, "value", + (ruby_method*)&jaxp_attribute_value, 0); + + ID_read = rb_intern("read"); + ID_doc = rb_intern("@doc"); + ID_element = rb_intern("@element"); +} |