From 6168dbe21f5f83b906e562ea0ab232d499b275a6 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Wed, 15 Jan 2020 14:56:04 -0500 Subject: Add java htmlparser sources that match the original 52-level state https://hg.mozilla.org/projects/htmlparser/ Commit: abe62ab2a9b69ccb3b5d8a231ec1ae11154c571d --- parser/html/java/htmlparser/ruby-gcj/validator.cpp | 210 +++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 parser/html/java/htmlparser/ruby-gcj/validator.cpp (limited to 'parser/html/java/htmlparser/ruby-gcj/validator.cpp') diff --git a/parser/html/java/htmlparser/ruby-gcj/validator.cpp b/parser/html/java/htmlparser/ruby-gcj/validator.cpp new file mode 100644 index 000000000..aadd24abe --- /dev/null +++ b/parser/html/java/htmlparser/ruby-gcj/validator.cpp @@ -0,0 +1,210 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nu/validator/htmlparser/dom/HtmlDocumentBuilder.h" + +#include "DomUtils.h" + +#include "ruby.h" + +using namespace java::io; +using namespace java::lang; +using namespace java::util; +using namespace javax::xml::parsers; +using namespace javax::xml::xpath; +using namespace nu::validator::htmlparser::dom; +using namespace org::w3c::dom; +using namespace org::xml::sax; + +static VALUE jaxp_Document; +static VALUE jaxp_Attr; +static VALUE jaxp_Element; +static ID ID_read; +static ID ID_doc; +static ID ID_element; + +// convert a Java string into a Ruby string +static VALUE j2r(String *string) { + if (string == NULL) return Qnil; + jint len = JvGetStringUTFLength(string); + char buf[len]; + JvGetStringUTFRegion(string, 0, len, buf); + return rb_str_new(buf, len); +} + +// convert a Ruby string into a Java string +static String *r2j(VALUE string) { + return JvNewStringUTF(RSTRING(string)->ptr); +} + +// release the Java Document associated with this Ruby Document +static void vnu_document_free(Document *doc) { + DomUtils::unpin(doc); +} + +// Nu::Validator::parse( string|file ) +static VALUE vnu_parse(VALUE self, VALUE input) { + HtmlDocumentBuilder *parser = new HtmlDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + // convert input in to a ByteArrayInputStream + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + InputSource *source = new InputSource(new ByteArrayInputStream(bytes)); + + // parse, pin, and wrap + Document *doc = parser->parse(source); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); +} + +// Jaxp::parse( string|file ) +static VALUE jaxp_parse(VALUE self, VALUE input) { + DocumentBuilderFactory *factory = DocumentBuilderFactory::newInstance(); + DocumentBuilder *parser = factory->newDocumentBuilder(); + + // read file-like objects into memory. TODO: buffer such objects + if (rb_respond_to(input, ID_read)) + input = rb_funcall(input, ID_read, 0); + + try { + jbyteArray bytes = JvNewByteArray(RSTRING(input)->len); + memcpy(elements(bytes), RSTRING(input)->ptr, RSTRING(input)->len); + Document *doc = parser->parse(new ByteArrayInputStream(bytes)); + DomUtils::pin(doc); + return Data_Wrap_Struct(jaxp_Document, NULL, vnu_document_free, doc); + } catch (java::lang::Throwable *ex) { + ex->printStackTrace(); + return Qnil; + } +} + + +// Nu::Validator::Document#encoding +static VALUE jaxp_document_encoding(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + return j2r(jdoc->getXmlEncoding()); +} + +// Nu::Validator::Document#root +static VALUE jaxp_document_root(VALUE rdoc) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, jelement); + rb_ivar_set(relement, ID_doc, rdoc); + return relement; +} + +// Nu::Validator::Document#xpath +static VALUE jaxp_document_xpath(VALUE rdoc, VALUE path) { + Document *jdoc; + Data_Get_Struct(rdoc, Document, jdoc); + + Element *jelement = jdoc->getDocumentElement(); + if (jelement==NULL) return Qnil; + + XPath *xpath = XPathFactory::newInstance()->newXPath(); + XPathExpression *expr = xpath->compile(r2j(path)); + NodeList *list = (NodeList*) expr->evaluate(jdoc, XPathConstants::NODESET); + + VALUE result = rb_ary_new(); + for (int i=0; igetLength(); i++) { + VALUE relement = Data_Wrap_Struct(jaxp_Element, NULL, NULL, list->item(i)); + rb_ivar_set(relement, ID_doc, rdoc); + rb_ary_push(result, relement); + } + return result; +} + +// Nu::Validator::Element#name +static VALUE jaxp_element_name(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + return j2r(jelement->getNodeName()); +} + +// Nu::Validator::Element#attributes +static VALUE jaxp_element_attributes(VALUE relement) { + Element *jelement; + Data_Get_Struct(relement, Element, jelement); + VALUE result = rb_hash_new(); + NamedNodeMap *map = jelement->getAttributes(); + for (int i=0; igetLength(); i++) { + Attr *jattr = (Attr *) map->item(i); + VALUE rattr = Data_Wrap_Struct(jaxp_Attr, NULL, NULL, jattr); + rb_ivar_set(rattr, ID_element, relement); + rb_hash_aset(result, j2r(jattr->getName()), rattr); + } + return result; +} + +// Nu::Validator::Attribute#value +static VALUE jaxp_attribute_value(VALUE rattribute) { + Attr *jattribute; + Data_Get_Struct(rattribute, Attr, jattribute); + return j2r(jattribute->getValue()); +} + +typedef VALUE (ruby_method)(...); + +// Nu::Validator module initialization +extern "C" void Init_validator() { + JvCreateJavaVM(NULL); + JvAttachCurrentThread(NULL, NULL); + JvInitClass(&DomUtils::class$); + JvInitClass(&XPathFactory::class$); + JvInitClass(&XPathConstants::class$); + + VALUE jaxp = rb_define_module("Jaxp"); + rb_define_singleton_method(jaxp, "parse", (ruby_method*)&jaxp_parse, 1); + + VALUE nu = rb_define_module("Nu"); + VALUE validator = rb_define_module_under(nu, "Validator"); + rb_define_singleton_method(validator, "parse", (ruby_method*)&vnu_parse, 1); + + jaxp_Document = rb_define_class_under(jaxp, "Document", rb_cObject); + rb_define_method(jaxp_Document, "encoding", + (ruby_method*)&jaxp_document_encoding, 0); + rb_define_method(jaxp_Document, "root", + (ruby_method*)&jaxp_document_root, 0); + rb_define_method(jaxp_Document, "xpath", + (ruby_method*)&jaxp_document_xpath, 1); + + jaxp_Element = rb_define_class_under(jaxp, "Element", rb_cObject); + rb_define_method(jaxp_Element, "name", + (ruby_method*)&jaxp_element_name, 0); + rb_define_method(jaxp_Element, "attributes", + (ruby_method*)&jaxp_element_attributes, 0); + + jaxp_Attr = rb_define_class_under(jaxp, "Attr", rb_cObject); + rb_define_method(jaxp_Attr, "value", + (ruby_method*)&jaxp_attribute_value, 0); + + ID_read = rb_intern("read"); + ID_doc = rb_intern("@doc"); + ID_element = rb_intern("@element"); +} -- cgit v1.2.3