summaryrefslogtreecommitdiffstats
path: root/parser/html/java/htmlparser/src/nu/validator/saxtree/TreeBuilder.java
blob: 39fe236b363fcacee221280cd5fa47d8494cc787 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*
 * Copyright (c) 2007 Henri Sivonen
 * Copyright (c) 2008 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.saxtree;

import java.util.LinkedList;
import java.util.List;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

/**
 * Builds a SAX Tree representation of a document or a fragment 
 * streamed as <code>ContentHandler</code> and 
 * <code>LexicalHandler</code> events. The start/end event matching 
 * is expected to adhere to the SAX API contract. Things will 
 * simply break if this is not the case. Fragments are expected to
 * omit <code>startDocument()</code> and <code>endDocument()</code>
 * calls.
 * 
 * @version $Id$
 * @author hsivonen
 */
public class TreeBuilder implements ContentHandler, LexicalHandler {

    /**
     * The locator.
     */
    private Locator locator;

    /**
     * The current node.
     */
    private ParentNode current;

    /**
     * Whether to retain attribute objects.
     */
    private final boolean retainAttributes;

    /**
     * The prefix mappings for the next element to be inserted.
     */
    private List<PrefixMapping> prefixMappings;
    
    /**
     * Constructs a reusable <code>TreeBuilder</code> that builds 
     * <code>Document</code>s and copies attributes.
     */
    public TreeBuilder() {
        this(false, false);
    }
    
    /**
     * The constructor. The instance will be reusabe if building a full 
     * document and not reusable if building a fragment.
     * 
     * @param fragment whether this <code>TreeBuilder</code> should build 
     * a <code>DocumentFragment</code> instead of a <code>Document</code>.
     * @param retainAttributes whether instances of the <code>Attributes</code>
     * interface passed to <code>startElement</code> should be retained 
     * (the alternative is copying).
     */
    public TreeBuilder(boolean fragment, boolean retainAttributes) {
        if (fragment) {
            current = new DocumentFragment();
        }
        this.retainAttributes = retainAttributes;
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#characters(char[], int, int)
     */
    public void characters(char[] ch, int start, int length) throws SAXException {
        current.appendChild(new Characters(locator, ch, start, length));
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#endDocument()
     */
    public void endDocument() throws SAXException {
        current.setEndLocator(locator);
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
     */
    public void endElement(String uri, String localName, String qName) throws SAXException {
        current.setEndLocator(locator);
        current = current.getParentNode();
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
     */
    public void endPrefixMapping(String prefix) throws SAXException {
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
     */
    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
        current.appendChild(new IgnorableWhitespace(locator, ch, start, length));
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
     */
    public void processingInstruction(String target, String data) throws SAXException {
        current.appendChild(new ProcessingInstruction(locator, target, data));
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
     */
    public void setDocumentLocator(Locator locator) {
        this.locator = locator;
    }

    public void skippedEntity(String name) throws SAXException {
        current.appendChild(new SkippedEntity(locator, name));
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#startDocument()
     */
    public void startDocument() throws SAXException {
        current = new Document(locator);
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
     */
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        current = (ParentNode) current.appendChild(new Element(locator, uri, localName, qName, atts, retainAttributes, prefixMappings));
        prefixMappings = null;
    }

    /**
     * 
     * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String, java.lang.String)
     */
    public void startPrefixMapping(String prefix, String uri) throws SAXException {
        if (prefixMappings == null) {
            prefixMappings = new LinkedList<PrefixMapping>();
        }
        prefixMappings.add(new PrefixMapping(prefix, uri));
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
     */
    public void comment(char[] ch, int start, int length) throws SAXException {
        current.appendChild(new Comment(locator, ch, start, length));
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#endCDATA()
     */
    public void endCDATA() throws SAXException {
        current.setEndLocator(locator);
        current = current.getParentNode();
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#endDTD()
     */
    public void endDTD() throws SAXException {
        current.setEndLocator(locator);
        current = current.getParentNode();
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
     */
    public void endEntity(String name) throws SAXException {
        current.setEndLocator(locator);
        current = current.getParentNode();
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#startCDATA()
     */
    public void startCDATA() throws SAXException {
        current = (ParentNode) current.appendChild(new CDATA(locator));        
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
     */
    public void startDTD(String name, String publicId, String systemId) throws SAXException {
        current = (ParentNode) current.appendChild(new DTD(locator, name, publicId, systemId));        
    }

    /**
     * 
     * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
     */
    public void startEntity(String name) throws SAXException {
        current = (ParentNode) current.appendChild(new Entity(locator, name));        
    }

    /**
     * Returns the root (<code>Document</code> if building a full document or 
     * <code>DocumentFragment</code> if building a fragment.).
     * 
     * @return the root
     */
    public ParentNode getRoot() {
        return current;
    }
}