Patch regarding TagSoup and JTidy

Posted by fl.schmitt(ops-users) on
URL: https://discuss.orbeon.com/RfE-Tagsoup-configuration-in-properties-local-xml-tp2550129p2718520.html

Hi all,

to make the TagSoup and JTidy configuration accessible without the need
to recompile XFormsUtils.java, i'd like to propose the attached patch.
It handles two issues as follows:

- TagSoup: there's a new boolean config property names
"oxf.xforms.tagsoup.ignoreBogonsFeature" with default value "true".
Changing this to false should make TagSoup accept unknown (non-html)
elements.

- JTidy: a new set of tidy config options with priority over the
hard-coded ones in XFormsUtils.java is defined using the new config
property "oxf.xforms.tidy.propertiesFile" (anyURI). That URI by default
points to oxf:/config/tidy.properties, making the complete Tidy config
accessible using the Java properties syntax. The proposed
tidy.properties defines the canvas tag as additional, valid element that
would otherwise get stripped from the content parsed by JTidy.

Maybe some additional hints would be useful which tidy properties are
available (could be placed in tidy.properties or in the wiki).

I would be glad to hear your opinions!


florian



Index: src/resources-packaged/config/properties-xforms.xml
===================================================================
--- src/resources-packaged/config/properties-xforms.xml (revision 205f9ed19328b47eddc290497b0252c5d20627b7)
+++ src/resources-packaged/config/properties-xforms.xml (revision )
@@ -109,6 +109,8 @@
     <property as="xs:boolean" name="oxf.xforms.datepicker.two-months"                       value="false"/>
     <property as="xs:string"  name="oxf.xforms.htmleditor"                                  value="yui"/>       <!-- fck | yui -->
     <property as="xs:boolean" name="oxf.xforms.show-error-dialog"                           value="true"/>
+    <property as="xs:boolean" name="oxf.xforms.tagsoup.ignoreBogonsFeature"                 value="true"/>
+    <property as="xs:anyURI"  name="oxf.xforms.tidy.propertiesFile"                         value="oxf:/config/tidy.properties"/>
 
     <property as="xs:integer" name="oxf.xforms.internal-short-delay"                        value="100"/>
     <property as="xs:integer" name="oxf.xforms.delay-before-incremental-request"            value="500"/>
Index: src/resources/config/tidy.properties
===================================================================
--- src/resources/config/tidy.properties (revision )
+++ src/resources/config/tidy.properties (revision )
@@ -0,0 +1,21 @@
+##
+# Copyright (C) 2010 Orbeon, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software Foundation; either version
+# 2.1 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Lesser General Public License for more details.
+#
+# The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
+#
+show-warnings       = false
+quiet               = true
+new-empty-tags      = canvas
+new-inline-tags     = canvas
+input-encoding      = utf-8
+numeric-entities    = false
+output-xml          = false
+input-xml           = false
\ No newline at end of file
Index: src/java/org/orbeon/oxf/xforms/XFormsUtils.java
===================================================================
--- src/java/org/orbeon/oxf/xforms/XFormsUtils.java (revision 205f9ed19328b47eddc290497b0252c5d20627b7)
+++ src/java/org/orbeon/oxf/xforms/XFormsUtils.java (revision )
@@ -34,6 +34,7 @@
 import org.orbeon.oxf.xml.*;
 import org.orbeon.oxf.xml.XMLUtils;
 import org.orbeon.oxf.xml.dom4j.*;
+import org.orbeon.oxf.properties.Properties;
 import org.orbeon.saxon.Configuration;
 import org.orbeon.saxon.dom4j.NodeWrapper;
 import org.orbeon.saxon.functions.FunctionLibrary;
@@ -60,6 +61,9 @@
 
     private static final int SRC_CONTENT_BUFFER_SIZE = 1024;
 
+    private static final String XFORMS_TAGSOUP_IGNOREBOGONS = "oxf.xforms.tagsoup.ignoreBogonsFeature";
+    private static final String XFORMS_TIDY_CONFIG_URI = "oxf.xforms.tidy.propertiesFile";
+
     // Binary types supported for upload, images, etc.
     private static final Map<String, String> SUPPORTED_BINARY_TYPES = new HashMap<String, String>();
 
@@ -228,10 +232,28 @@
     public static org.w3c.dom.Document htmlStringToDocument(String value, LocationData locationData) {
         // Create and configure Tidy instance
         final Tidy tidy = new Tidy();
+        final java.util.Properties tidyProps = new java.util.Properties();
+        final String tidyPropsURI = Properties.instance().getPropertySet().getStringOrURIAsString(XFORMS_TIDY_CONFIG_URI);
+        boolean tidyConfAvailable = false;
+
+        // try to grab external tidy config
+        try {
+            final URL tidyPropsURL = URLFactory.createURL(tidyPropsURI);
+            tidyProps.load(tidyPropsURL.openStream());
+            tidy.setConfigurationFromProps(tidyProps);
+            tidyConfAvailable = true;
+        } catch (MalformedURLException e) {
+            throw new OXFException("Cannot create URL bases on tidy config property: '" + tidyPropsURI + "'", e);
+        } catch (IOException e) {
+            throw new OXFException("Cannot load external tidy config file at: '" + tidyPropsURI + "'", e);
+        }
+        // Fallback if external config isn't available
+        if (!tidyConfAvailable) {
-        tidy.setShowWarnings(false);
-        tidy.setQuiet(true);
-        tidy.setInputEncoding("utf-8");
-        //tidy.setNumEntities(true); // CHECK: what does this do exactly?
+            tidy.setShowWarnings(false);
+            tidy.setQuiet(true);
+            tidy.setInputEncoding("utf-8");
+            //tidy.setNumEntities(true); // CHECK: what does this do exactly?
+        }
 
         // Parse and output to SAXResult
         final byte[] valueBytes;
@@ -252,8 +274,17 @@
         try {
             final XMLReader xmlReader = new org.ccil.cowan.tagsoup.Parser();
             final HTMLSchema theSchema = new HTMLSchema();
+
             xmlReader.setProperty(org.ccil.cowan.tagsoup.Parser.schemaProperty, theSchema);
+
+            // try to get the ignoreBogonsProperty from Properties, set true if not available
+            final Boolean ignoreBogonsProperty = Properties.instance().getPropertySet().getBoolean(XFORMS_TAGSOUP_IGNOREBOGONS);
+            if (ignoreBogonsProperty != null) {
+                xmlReader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, ignoreBogonsProperty.booleanValue());
+            } else {
-            xmlReader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
+                xmlReader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
+            }
+
             final TransformerHandler identity = TransformerUtils.getIdentityTransformerHandler();
             identity.setResult(result);
             xmlReader.setContentHandler(identity);



--
You receive this message as a subscriber of the [hidden email] mailing list.
To unsubscribe: mailto:[hidden email]
For general help: mailto:[hidden email]?subject=help
OW2 mailing lists service home page: http://www.ow2.org/wws