internalize rexml
authorgc <gc>
Sat, 1 Mar 2008 00:11:30 +0000 (00:11 +0000)
committergc <gc>
Sat, 1 Mar 2008 00:11:30 +0000 (00:11 +0000)
64 files changed:
bin/album2booh
bin/booh
bin/booh-backend
bin/booh-classifier
bin/webalbum2booh
lib/booh/booh-lib.rb
lib/booh/rexml/README [new file with mode: 0644]
lib/booh/rexml/attlistdecl.rb [new file with mode: 0644]
lib/booh/rexml/attribute.rb [new file with mode: 0644]
lib/booh/rexml/attribute.rb.orig [new file with mode: 0644]
lib/booh/rexml/cdata.rb [new file with mode: 0644]
lib/booh/rexml/child.rb [new file with mode: 0644]
lib/booh/rexml/comment.rb [new file with mode: 0644]
lib/booh/rexml/doctype.rb [new file with mode: 0644]
lib/booh/rexml/document.rb [new file with mode: 0644]
lib/booh/rexml/dtd/attlistdecl.rb [new file with mode: 0644]
lib/booh/rexml/dtd/dtd.rb [new file with mode: 0644]
lib/booh/rexml/dtd/elementdecl.rb [new file with mode: 0644]
lib/booh/rexml/dtd/entitydecl.rb [new file with mode: 0644]
lib/booh/rexml/dtd/notationdecl.rb [new file with mode: 0644]
lib/booh/rexml/element.rb [new file with mode: 0644]
lib/booh/rexml/encoding.rb [new file with mode: 0644]
lib/booh/rexml/encodings/CP-1252.rb [new file with mode: 0644]
lib/booh/rexml/encodings/EUC-JP.rb [new file with mode: 0644]
lib/booh/rexml/encodings/ICONV.rb [new file with mode: 0644]
lib/booh/rexml/encodings/ISO-8859-1.rb [new file with mode: 0644]
lib/booh/rexml/encodings/ISO-8859-15.rb [new file with mode: 0644]
lib/booh/rexml/encodings/SHIFT-JIS.rb [new file with mode: 0644]
lib/booh/rexml/encodings/SHIFT_JIS.rb [new file with mode: 0644]
lib/booh/rexml/encodings/UNILE.rb [new file with mode: 0644]
lib/booh/rexml/encodings/US-ASCII.rb [new file with mode: 0644]
lib/booh/rexml/encodings/UTF-16.rb [new file with mode: 0644]
lib/booh/rexml/encodings/UTF-8.rb [new file with mode: 0644]
lib/booh/rexml/entity.rb [new file with mode: 0644]
lib/booh/rexml/functions.rb [new file with mode: 0644]
lib/booh/rexml/instruction.rb [new file with mode: 0644]
lib/booh/rexml/light/node.rb [new file with mode: 0644]
lib/booh/rexml/namespace.rb [new file with mode: 0644]
lib/booh/rexml/node.rb [new file with mode: 0644]
lib/booh/rexml/output.rb [new file with mode: 0644]
lib/booh/rexml/parent.rb [new file with mode: 0644]
lib/booh/rexml/parseexception.rb [new file with mode: 0644]
lib/booh/rexml/parsers/baseparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/lightparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/pullparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/sax2parser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/streamparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/treeparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/ultralightparser.rb [new file with mode: 0644]
lib/booh/rexml/parsers/xpathparser.rb [new file with mode: 0644]
lib/booh/rexml/quickpath.rb [new file with mode: 0644]
lib/booh/rexml/rexml.rb [new file with mode: 0644]
lib/booh/rexml/sax2listener.rb [new file with mode: 0644]
lib/booh/rexml/source.rb [new file with mode: 0644]
lib/booh/rexml/streamlistener.rb [new file with mode: 0644]
lib/booh/rexml/syncenumerator.rb [new file with mode: 0644]
lib/booh/rexml/text.rb [new file with mode: 0644]
lib/booh/rexml/validation/relaxng.rb [new file with mode: 0644]
lib/booh/rexml/validation/validation.rb [new file with mode: 0644]
lib/booh/rexml/validation/validationexception.rb [new file with mode: 0644]
lib/booh/rexml/xmldecl.rb [new file with mode: 0644]
lib/booh/rexml/xmltokens.rb [new file with mode: 0644]
lib/booh/rexml/xpath.rb [new file with mode: 0644]
lib/booh/rexml/xpath_parser.rb [new file with mode: 0644]

index eb39ce8..0da1ef1 100644 (file)
@@ -36,7 +36,7 @@
 require 'getoptlong'
 require 'gettext'
 include GetText
-require 'rexml/document'
+require 'booh/rexml/document'
 include REXML
 
 require 'booh/booh-lib'
index d27fa97..4127984 100755 (executable)
--- a/bin/booh
+++ b/bin/booh
@@ -31,7 +31,7 @@ require 'gettext'
 include GetText
 bindtextdomain("booh")
 
-require 'rexml/document'
+require 'booh/rexml/document'
 include REXML
 
 require 'booh/booh-lib'
@@ -3174,6 +3174,8 @@ def new_album
         destdir = from_utf8(dest.text)
         configskel = File.expand_path(from_utf8(conf.text))
         theme = theme_button.label
+        #- some sort of automatic theme preference
+        $config['default-theme'] = theme
         sizes = theme_sizes.find_all { |e| e[:widget].active? }.collect { |e| e[:value] }.join(',')
         nperrow = nperrows.find { |e| e[:widget].active? }[:value]
         nperpage = nperpage_model.get_value(nperpagecombo.active_iter, 1)
@@ -3361,8 +3363,12 @@ def properties
     save_madewith = madewithentry.text.gsub('"', '&quot;').gsub('\'', '&#39;')
     save_indexlink = indexlinkentry.text.gsub('"', '&quot;').gsub('\'', '&#39;')
     dialog.destroy
-
+    
     if ok && (save_theme != theme || save_limit_sizes != limit_sizes || save_opt432 != opt432 || save_nperrow != nperrow || save_nperpage != nperpage || save_madewith != madewith || save_indexlink != indexlinkentry)
+        if save_theme != theme
+            #- some sort of automatic theme preference
+            $config['default-theme'] = save_theme
+        end
         mark_document_as_dirty
         save_current_file
         call_backend("booh-backend --use-config '#{$filename}' --for-gui --verbose-level #{$verbose_level} " +
index 3effa7f..abe3caa 100755 (executable)
@@ -22,7 +22,7 @@
 require 'getoptlong'
 require 'gettext'
 include GetText
-require 'rexml/document'
+require 'booh/rexml/document'
 include REXML
 
 require 'booh/booh-lib'
index 574258b..715912c 100644 (file)
@@ -29,7 +29,7 @@ require 'gettext'
 include GetText
 bindtextdomain("booh")
 
-require 'rexml/document'
+require 'booh/rexml/document'
 include REXML
 
 require 'booh/booh-lib'
index 732dbaf..75e33a4 100755 (executable)
@@ -22,7 +22,7 @@
 require 'getoptlong'
 require 'gettext'
 include GetText
-require 'rexml/document'
+require 'booh/rexml/document'
 include REXML
 
 require 'booh/booh-lib'
index 804add3..7ee4a9f 100644 (file)
@@ -21,7 +21,7 @@ require 'iconv'
 require 'timeout'
 require 'tempfile'
 
-require 'rexml/document'
+require 'booh/rexml/document'
 
 require 'gettext'
 include GetText
@@ -131,7 +131,7 @@ module Booh
         msg 3, _("Selecting theme '%s'") % $theme
         themedir = "#{$FPATH}/themes/#{$theme}"
         if !File.directory?(themedir)
-            die _("Theme was not found (tried %s directory).") % themedir
+            die_ _("Theme was not found (tried %s directory).") % themedir
         end
         eval File.open("#{themedir}/metadata/parameters.rb").readlines.join
 
@@ -140,7 +140,7 @@ module Booh
                 sizes = limit_sizes.split(/,/)
                 $images_size = $images_size.find_all { |e| sizes.include?(e['name']) }
                 if $images_size.length == 0
-                    die _("Can't carry on, no valid size selected.")
+                    die_ _("Can't carry on, no valid size selected.")
                 end
             end
         else
diff --git a/lib/booh/rexml/README b/lib/booh/rexml/README
new file mode 100644 (file)
index 0000000..88d3725
--- /dev/null
@@ -0,0 +1 @@
+Forked REXML because it is too often broken on new Ruby releases.
diff --git a/lib/booh/rexml/attlistdecl.rb b/lib/booh/rexml/attlistdecl.rb
new file mode 100644 (file)
index 0000000..87c5b22
--- /dev/null
@@ -0,0 +1,62 @@
+#vim:ts=2 sw=2 noexpandtab:
+require 'booh/rexml/child'
+require 'booh/rexml/source'
+
+module REXML
+       # This class needs:
+       # * Documentation
+       # * Work!  Not all types of attlists are intelligently parsed, so we just
+       # spew back out what we get in.  This works, but it would be better if
+       # we formatted the output ourselves.
+       #
+       # AttlistDecls provide *just* enough support to allow namespace
+       # declarations.  If you need some sort of generalized support, or have an
+       # interesting idea about how to map the hideous, terrible design of DTD
+       # AttlistDecls onto an intuitive Ruby interface, let me know.  I'm desperate
+       # for anything to make DTDs more palateable.
+       class AttlistDecl < Child
+               include Enumerable
+
+               # What is this?  Got me.
+               attr_reader :element_name
+
+               # Create an AttlistDecl, pulling the information from a Source.  Notice
+               # that this isn't very convenient; to create an AttlistDecl, you basically
+               # have to format it yourself, and then have the initializer parse it.
+               # Sorry, but for the forseeable future, DTD support in REXML is pretty
+               # weak on convenience.  Have I mentioned how much I hate DTDs?
+               def initialize(source)
+                       super()
+                       if (source.kind_of? Array)
+                               @element_name, @pairs, @contents = *source
+                       end
+               end
+       
+               # Access the attlist attribute/value pairs.
+               #  value = attlist_decl[ attribute_name ]
+               def [](key)
+                       @pairs[key]
+               end
+
+               # Whether an attlist declaration includes the given attribute definition
+               #  if attlist_decl.include? "xmlns:foobar"
+               def include?(key)
+                       @pairs.keys.include? key
+               end
+
+               # Itterate over the key/value pairs:
+               #  attlist_decl.each { |attribute_name, attribute_value| ... }
+               def each(&block)
+                       @pairs.each(&block)
+               end
+
+               # Write out exactly what we got in.
+               def write out, indent=-1
+                       out << @contents
+               end
+
+               def node_type
+                       :attlistdecl
+               end
+       end
+end
diff --git a/lib/booh/rexml/attribute.rb b/lib/booh/rexml/attribute.rb
new file mode 100644 (file)
index 0000000..e38244d
--- /dev/null
@@ -0,0 +1,181 @@
+require "booh/rexml/namespace"
+require 'booh/rexml/text'
+
+module REXML
+       # Defines an Element Attribute; IE, a attribute=value pair, as in:
+       # <element attribute="value"/>.  Attributes can be in their own
+       # namespaces.  General users of REXML will not interact with the
+       # Attribute class much.
+       class Attribute
+               include Node
+               include Namespace
+
+               # The element to which this attribute belongs
+               attr_reader :element
+               # The normalized value of this attribute.  That is, the attribute with
+               # entities intact.
+               attr_writer :normalized 
+               PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+
+               # Constructor.
+    # FIXME: The parser doesn't catch illegal characters in attributes
+    #
+    # first:: 
+    #   Either: an Attribute, which this new attribute will become a
+    #   clone of; or a String, which is the name of this attribute
+    # second::
+    #   If +first+ is an Attribute, then this may be an Element, or nil.
+    #   If nil, then the Element parent of this attribute is the parent
+    #   of the +first+ Attribute.  If the first argument is a String, 
+    #   then this must also be a String, and is the content of the attribute.  
+    #   If this is the content, it must be fully normalized (contain no
+    #   illegal characters).
+    # parent::
+    #   Ignored unless +first+ is a String; otherwise, may be the Element 
+    #   parent of this attribute, or nil.
+    #
+               #
+               #  Attribute.new( attribute_to_clone )
+               #  Attribute.new( attribute_to_clone, parent_element )
+               #  Attribute.new( "attr", "attr_value" )
+               #  Attribute.new( "attr", "attr_value", parent_element )
+               def initialize( first, second=nil, parent=nil )
+                       @normalized = @unnormalized = @element = nil
+                       if first.kind_of? Attribute
+                               self.name = first.expanded_name
+                               @unnormalized = first.value
+                               if second.kind_of? Element
+                                       @element = second
+                               else
+                                       @element = first.element
+                               end
+                       elsif first.kind_of? String
+                               @element = parent if parent.kind_of? Element
+                               self.name = first
+                               @normalized = second.to_s
+                       else
+                               raise "illegal argument #{first.class.name} to Attribute constructor"
+                       end
+               end
+
+               # Returns the namespace of the attribute.
+               # 
+               #  e = Element.new( "elns:myelement" )
+               #  e.add_attribute( "nsa:a", "aval" )
+               #  e.add_attribute( "b", "bval" )
+               #  e.attributes.get_attribute( "a" ).prefix   # -> "nsa"
+               #  e.attributes.get_attribute( "b" ).prefix   # -> "elns"
+               #  a = Attribute.new( "x", "y" )
+               #  a.prefix                                   # -> ""
+               def prefix
+                       pf = super
+                       if pf == ""
+                               pf = @element.prefix if @element
+                       end
+                       pf
+               end
+
+               # Returns the namespace URL, if defined, or nil otherwise
+               # 
+               #  e = Element.new("el")
+               #  e.add_attributes({"xmlns:ns", "http://url"})
+               #  e.namespace( "ns" )              # -> "http://url"
+               def namespace arg=nil
+                       arg = prefix if arg.nil?
+                       @element.namespace arg
+               end
+
+               # Returns true if other is an Attribute and has the same name and value,
+               # false otherwise.
+               def ==( other )
+                       other.kind_of?(Attribute) and other.name==name and other.value==value
+               end
+
+               # Creates (and returns) a hash from both the name and value
+               def hash
+                       name.hash + value.hash
+               end
+
+               # Returns this attribute out as XML source, expanding the name
+               #
+               #  a = Attribute.new( "x", "y" )
+               #  a.to_string     # -> "x='y'"
+               #  b = Attribute.new( "ns:x", "y" )
+               #  b.to_string     # -> "ns:x='y'"
+               def to_string
+                       "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
+               end
+
+               # Returns the attribute value, with entities replaced
+               def to_s
+                       return @normalized if @normalized
+
+                       doctype = nil
+                       if @element
+                               doc = @element.document
+                               doctype = doc.doctype if doc
+                       end
+
+                       @normalized = Text::normalize( @unnormalized, doctype )
+                       @unnormalized = nil
+      @normalized
+               end
+
+               # Returns the UNNORMALIZED value of this attribute.  That is, entities
+               # have been expanded to their values
+               def value
+                       return @unnormalized if @unnormalized
+                       doctype = nil
+                       if @element
+                               doc = @element.document
+                               doctype = doc.doctype if doc
+                       end
+                       @unnormalized = Text::unnormalize( @normalized, doctype )
+                       @normalized = nil
+      @unnormalized
+               end
+
+               # Returns a copy of this attribute
+               def clone
+                       Attribute.new self
+               end
+
+               # Sets the element of which this object is an attribute.  Normally, this
+               # is not directly called.
+               #
+               # Returns this attribute
+               def element=( element )
+                       @element = element
+                       self
+               end
+
+               # Removes this Attribute from the tree, and returns true if successfull
+               # 
+               # This method is usually not called directly.
+               def remove
+                       @element.attributes.delete self.name unless @element.nil?
+               end
+
+               # Writes this attribute (EG, puts 'key="value"' to the output)
+               def write( output, indent=-1 )
+                       output << to_string
+               end
+
+    def node_type
+      :attribute
+    end
+
+    def inspect
+      rv = ""
+      write( rv )
+      rv
+    end
+
+    def xpath
+      path = @element.xpath
+      path += "/@#{self.expanded_name}"
+      return path
+    end
+       end
+end
+#vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/booh/rexml/attribute.rb.orig b/lib/booh/rexml/attribute.rb.orig
new file mode 100644 (file)
index 0000000..a169148
--- /dev/null
@@ -0,0 +1,163 @@
+require "rexml/namespace"
+require 'rexml/text'
+
+module REXML
+       # Defines an Element Attribute; IE, a attribute=value pair, as in:
+       # <element attribute="value"/>.  Attributes can be in their own
+       # namespaces.  General users of REXML will not interact with the
+       # Attribute class much.
+       class Attribute
+               include Node
+               include Namespace
+
+               # The element to which this attribute belongs
+               attr_reader :element
+               # The normalized value of this attribute.  That is, the attribute with
+               # entities intact.
+               attr_writer :normalized 
+               PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+
+               # Constructor.
+               #
+               #  Attribute.new( attribute_to_clone )
+               #  Attribute.new( source )
+               #  Attribute.new( "attr", "attr_value" )
+               #  Attribute.new( "attr", "attr_value", parent_element )
+               def initialize( first, second=nil, parent=nil )
+                       @normalized = @unnormalized = @element = nil
+                       if first.kind_of? Attribute
+                               self.name = first.expanded_name
+                               @value = first.value
+                               if second.kind_of? Element
+                                       @element = second
+                               else
+                                       @element = first.element
+                               end
+                       elsif first.kind_of? String
+                               @element = parent if parent.kind_of? Element
+                               self.name = first
+                               @value = second.to_s
+                       else
+                               raise "illegal argument #{first.class.name} to Attribute constructor"
+                       end
+               end
+
+               # Returns the namespace of the attribute.
+               # 
+               #  e = Element.new( "elns:myelement" )
+               #  e.add_attribute( "nsa:a", "aval" )
+               #  e.add_attribute( "b", "bval" )
+               #  e.attributes.get_attribute( "a" ).prefix   # -> "nsa"
+               #  e.attributes.get_attribute( "b" ).prefix   # -> "elns"
+               #  a = Attribute.new( "x", "y" )
+               #  a.prefix                                   # -> ""
+               def prefix
+                       pf = super
+                       if pf == ""
+                               pf = @element.prefix if @element
+                       end
+                       pf
+               end
+
+               # Returns the namespace URL, if defined, or nil otherwise
+               # 
+               #  e = Element.new("el")
+               #  e.add_attributes({"xmlns:ns", "http://url"})
+               #  e.namespace( "ns" )              # -> "http://url"
+               def namespace arg=nil
+                       arg = prefix if arg.nil?
+                       @element.namespace arg
+               end
+
+               # Returns true if other is an Attribute and has the same name and value,
+               # false otherwise.
+               def ==( other )
+                       other.kind_of?(Attribute) and other.name==name and other.value==@value
+               end
+
+               # Creates (and returns) a hash from both the name and value
+               def hash
+                       name.hash + value.hash
+               end
+
+               # Returns this attribute out as XML source, expanding the name
+               #
+               #  a = Attribute.new( "x", "y" )
+               #  a.to_string     # -> "x='y'"
+               #  b = Attribute.new( "ns:x", "y" )
+               #  b.to_string     # -> "ns:x='y'"
+               def to_string
+                       "#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
+               end
+
+               # Returns the attribute value, with entities replaced
+               def to_s
+                       return @normalized if @normalized
+
+                       doctype = nil
+                       if @element
+                               doc = @element.document
+                               doctype = doc.doctype if doc
+                       end
+
+                       @unnormalized = nil
+                       @normalized = Text::normalize( @value, doctype )
+               end
+
+               # Returns the UNNORMALIZED value of this attribute.  That is, entities
+               # have been expanded to their values
+               def value
+                       return @unnormalized if @unnormalized
+                       doctype = nil
+                       if @element
+                               doc = @element.document
+                               doctype = doc.doctype if doc
+                       end
+                       @normalized = nil
+                       @unnormalized = Text::unnormalize( @value, doctype )
+               end
+
+               # Returns a copy of this attribute
+               def clone
+                       Attribute.new self
+               end
+
+               # Sets the element of which this object is an attribute.  Normally, this
+               # is not directly called.
+               #
+               # Returns this attribute
+               def element=( element )
+                       @element = element
+                       self
+               end
+
+               # Removes this Attribute from the tree, and returns true if successfull
+               # 
+               # This method is usually not called directly.
+               def remove
+                       @element.attributes.delete self.name unless @element.nil?
+               end
+
+               # Writes this attribute (EG, puts 'key="value"' to the output)
+               def write( output, indent=-1 )
+                       output << to_string
+               end
+
+    def node_type
+      :attribute
+    end
+
+    def inspect
+      rv = ""
+      write( rv )
+      rv
+    end
+
+    def xpath
+      path = @element.xpath
+      path += "/@#{self.expanded_name}"
+      return path
+    end
+       end
+end
+#vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/booh/rexml/cdata.rb b/lib/booh/rexml/cdata.rb
new file mode 100644 (file)
index 0000000..6979ebd
--- /dev/null
@@ -0,0 +1,72 @@
+require "booh/rexml/text"
+
+module REXML
+       class CData < Text
+               START = '<![CDATA['
+               STOP = ']]>'
+               ILLEGAL = /(\]\]>)/
+
+               #       Constructor.  CData is data between <![CDATA[ ... ]]>
+               #
+               # _Examples_
+               #  CData.new( source )
+               #  CData.new( "Here is some CDATA" )
+               #  CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
+               def initialize( first, whitespace=true, parent=nil )
+                       super( first, whitespace, parent, true, true, ILLEGAL )
+               end
+
+               # Make a copy of this object
+               # 
+               # _Examples_
+               #  c = CData.new( "Some text" )
+               #  d = c.clone
+               #  d.to_s        # -> "Some text"
+               def clone
+                       CData.new self
+               end
+
+               # Returns the content of this CData object
+               #
+               # _Examples_
+               #  c = CData.new( "Some text" )
+               #  c.to_s        # -> "Some text"
+               def to_s
+                       @string
+               end
+
+    def value
+      @string
+    end
+
+               # Generates XML output of this object
+               #
+               # output::
+               #   Where to write the string.  Defaults to $stdout
+               # indent::
+               #   An integer.  If -1, no indenting will be used; otherwise, the
+               #   indentation will be this number of spaces, and children will be
+               #   indented an additional amount.  Defaults to -1.
+               # transitive::
+               #   If transitive is true and indent is >= 0, then the output will be
+               #   pretty-printed in such a way that the added whitespace does not affect
+               #   the absolute *value* of the document -- that is, it leaves the value
+               #   and number of Text nodes in the document unchanged.
+               # ie_hack::
+               #   Internet Explorer is the worst piece of crap to have ever been
+               #   written, with the possible exception of Windows itself.  Since IE is
+               #   unable to parse proper XML, we have to provide a hack to generate XML
+               #   that IE's limited abilities can handle.  This hack inserts a space 
+               #   before the /> on empty tags.
+               #
+               # _Examples_
+               #  c = CData.new( " Some text " )
+               #  c.write( $stdout )     #->  <![CDATA[ Some text ]]>
+               def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+      #indent( output, indent ) unless transitive
+                       output << START
+                       output << @string
+                       output << STOP
+               end
+       end
+end
diff --git a/lib/booh/rexml/child.rb b/lib/booh/rexml/child.rb
new file mode 100644 (file)
index 0000000..33e6d44
--- /dev/null
@@ -0,0 +1,96 @@
+require "booh/rexml/node"
+
+module REXML
+       ##
+       # A Child object is something contained by a parent, and this class
+       # contains methods to support that.  Most user code will not use this
+       # class directly.
+       class Child
+               include Node
+               attr_reader :parent             # The Parent of this object
+
+               # Constructor.  Any inheritors of this class should call super to make
+               # sure this method is called.
+               # parent::
+               #   if supplied, the parent of this child will be set to the
+               #   supplied value, and self will be added to the parent
+               def initialize( parent = nil )
+                       @parent = nil  
+                       # Declare @parent, but don't define it.  The next line sets the 
+                       # parent.
+                       parent.add( self ) if parent
+               end
+
+               # Replaces this object with another object.  Basically, calls
+               # Parent.replace_child
+               #
+               # Returns:: self
+               def replace_with( child )
+                       @parent.replace_child( self, child )
+                       self
+               end
+
+               # Removes this child from the parent.
+               #
+               # Returns:: self
+               def remove
+                       unless @parent.nil?
+                               @parent.delete self
+                       end
+                       self
+               end
+
+               # Sets the parent of this child to the supplied argument.
+               #
+               # other::
+               #   Must be a Parent object.  If this object is the same object as the
+               #   existing parent of this child, no action is taken. Otherwise, this
+               #   child is removed from the current parent (if one exists), and is added
+               #   to the new parent.
+               # Returns:: The parent added
+               def parent=( other )
+                       return @parent if @parent == other
+                       @parent.delete self if defined? @parent and @parent
+                       @parent = other
+               end
+
+               alias :next_sibling :next_sibling_node
+               alias :previous_sibling :previous_sibling_node
+
+               # Sets the next sibling of this child.  This can be used to insert a child
+               # after some other child.
+               #  a = Element.new("a")
+               #  b = a.add_element("b")
+               #  c = Element.new("c")
+               #  b.next_sibling = c
+               #  # => <a><b/><c/></a>
+               def next_sibling=( other )
+                 parent.insert_after self, other
+               end
+
+               # Sets the previous sibling of this child.  This can be used to insert a 
+               # child before some other child.
+               #  a = Element.new("a")
+               #  b = a.add_element("b")
+               #  c = Element.new("c")
+               #  b.previous_sibling = c
+               #  # => <a><b/><c/></a>
+               def previous_sibling=(other)
+                 parent.insert_before self, other
+               end
+
+               # Returns:: the document this child belongs to, or nil if this child
+               # belongs to no document
+               def document
+                       return parent.document unless parent.nil?
+                       nil
+               end
+
+               # This doesn't yet handle encodings
+               def bytes
+                       encoding = document.encoding
+
+                       to_s
+               end
+       end
+end
diff --git a/lib/booh/rexml/comment.rb b/lib/booh/rexml/comment.rb
new file mode 100644 (file)
index 0000000..ffd4f07
--- /dev/null
@@ -0,0 +1,76 @@
+require "booh/rexml/child"
+
+module REXML
+       ##
+       # Represents an XML comment; that is, text between \<!-- ... -->
+       class Comment < Child
+               include Comparable
+               START = "<!--"
+               STOP = "-->"
+
+               # The content text
+
+               attr_accessor :string
+
+               ##
+               # Constructor.  The first argument can be one of three types:
+               # @param first If String, the contents of this comment are set to the 
+               # argument.  If Comment, the argument is duplicated.  If
+               # Source, the argument is scanned for a comment.
+               # @param second If the first argument is a Source, this argument 
+               # should be nil, not supplied, or a Parent to be set as the parent 
+               # of this object
+               def initialize( first, second = nil )
+                       #puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
+                       super(second)
+                       if first.kind_of? String
+                               @string = first
+                       elsif first.kind_of? Comment
+                               @string = first.string
+                       end
+               end
+
+               def clone
+                       Comment.new self
+               end
+
+               # output::
+               #        Where to write the string
+               # indent::
+               #        An integer.    If -1, no indenting will be used; otherwise, the
+               #        indentation will be this number of spaces, and children will be
+               #        indented an additional amount.
+               # transitive::
+               #        Ignored by this class. The contents of comments are never modified.
+               # ie_hack::
+               #        Needed for conformity to the child API, but not used by this class.
+               def write( output, indent=-1, transitive=false, ie_hack=false )
+                       indent( output, indent )
+                       output << START
+                       output << @string
+                       output << STOP
+               end
+
+               alias :to_s :string
+
+               ##
+               # Compares this Comment to another; the contents of the comment are used
+               # in the comparison.
+               def <=>(other)
+                       other.to_s <=> @string
+               end
+
+               ##
+               # Compares this Comment to another; the contents of the comment are used
+               # in the comparison.
+               def ==( other )
+                       other.kind_of? Comment and
+                       (other <=> self) == 0
+               end
+
+    def node_type
+      :comment
+    end
+       end
+end
+#vim:ts=2 sw=2 noexpandtab:
diff --git a/lib/booh/rexml/doctype.rb b/lib/booh/rexml/doctype.rb
new file mode 100644 (file)
index 0000000..17eba33
--- /dev/null
@@ -0,0 +1,278 @@
+require "booh/rexml/parent"
+require "booh/rexml/parseexception"
+require "booh/rexml/namespace"
+require 'booh/rexml/entity'
+require 'booh/rexml/attlistdecl'
+require 'booh/rexml/xmltokens'
+
+module REXML
+  # Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
+  # ... >.  DOCTYPES can be used to declare the DTD of a document, as well as
+  # being used to declare entities used in the document.
+  class DocType < Parent
+    include XMLTokens
+    START = "<!DOCTYPE"
+    STOP = ">"
+    SYSTEM = "SYSTEM"
+    PUBLIC = "PUBLIC"
+    DEFAULT_ENTITIES = { 
+      'gt'=>EntityConst::GT, 
+      'lt'=>EntityConst::LT, 
+      'quot'=>EntityConst::QUOT, 
+      "apos"=>EntityConst::APOS 
+    }
+
+    # name is the name of the doctype
+    # external_id is the referenced DTD, if given
+    attr_reader :name, :external_id, :entities, :namespaces
+
+    # Constructor
+    #
+    #   dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
+    #   # <!DOCTYPE foo '-//I/Hate/External/IDs'>
+    #   dt = DocType.new( doctype_to_clone )
+    #   # Incomplete.  Shallow clone of doctype
+    #
+    # +Note+ that the constructor: 
+    #
+    #  Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
+    #
+    # is _deprecated_.  Do not use it.  It will probably disappear.
+    def initialize( first, parent=nil )
+      @entities = DEFAULT_ENTITIES
+      @long_name = @uri = nil
+      if first.kind_of? String
+        super()
+        @name = first
+        @external_id = parent
+      elsif first.kind_of? DocType
+        super( parent )
+        @name = first.name
+        @external_id = first.external_id
+      elsif first.kind_of? Array
+        super( parent )
+        @name = first[0]
+        @external_id = first[1]
+        @long_name = first[2]
+        @uri = first[3]
+      elsif first.kind_of? Source
+        super( parent )
+        parser = Parsers::BaseParser.new( first )
+        event = parser.pull
+        if event[0] == :start_doctype
+          @name, @external_id, @long_name, @uri, = event[1..-1]
+        end
+      else
+        super()
+      end
+    end
+
+    def node_type
+      :doctype
+    end
+
+    def attributes_of element
+      rv = []
+      each do |child|
+        child.each do |key,val|
+          rv << Attribute.new(key,val)
+        end if child.kind_of? AttlistDecl and child.element_name == element
+      end
+      rv
+    end
+
+    def attribute_of element, attribute
+      att_decl = find do |child|
+        child.kind_of? AttlistDecl and
+        child.element_name == element and
+        child.include? attribute
+      end
+      return nil unless att_decl
+      att_decl[attribute]
+    end
+
+    def clone
+      DocType.new self
+    end
+
+    # output::
+    #   Where to write the string
+    # indent::
+    #   An integer.  If -1, no indenting will be used; otherwise, the
+    #   indentation will be this number of spaces, and children will be
+    #   indented an additional amount.
+    # transitive::
+    #   If transitive is true and indent is >= 0, then the output will be
+    #   pretty-printed in such a way that the added whitespace does not affect
+    #   the absolute *value* of the document -- that is, it leaves the value
+    #   and number of Text nodes in the document unchanged.
+    # ie_hack::
+    #   Internet Explorer is the worst piece of crap to have ever been
+    #   written, with the possible exception of Windows itself.  Since IE is
+    #   unable to parse proper XML, we have to provide a hack to generate XML
+    #   that IE's limited abilities can handle.  This hack inserts a space 
+    #   before the /> on empty tags.
+    #
+    def write( output, indent=0, transitive=false, ie_hack=false )
+      indent( output, indent )
+      output << START
+      output << ' '
+      output << @name
+      output << " #@external_id" if @external_id
+      output << " #@long_name" if @long_name
+      output << " #@uri" if @uri
+      unless @children.empty?
+        next_indent = indent + 1
+        output << ' ['
+        child = nil    # speed
+        @children.each { |child|
+          output << "\n"
+          child.write( output, next_indent )
+        }
+        #output << '   '*next_indent
+        output << "\n]"
+      end
+      output << STOP
+    end
+
+    def context
+      @parent.context
+    end
+
+    def entity( name )
+      @entities[name].unnormalized if @entities[name]
+    end
+
+    def add child
+      super(child)
+      @entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
+      @entities[ child.name ] = child if child.kind_of? Entity
+    end
+    
+    # This method retrieves the public identifier identifying the document's 
+    # DTD.
+    #
+    # Method contributed by Henrik Martensson
+    def public
+      case @external_id
+      when "SYSTEM"
+        nil
+      when "PUBLIC"
+        strip_quotes(@long_name)
+      end
+    end
+    
+    # This method retrieves the system identifier identifying the document's DTD
+    #
+    # Method contributed by Henrik Martensson
+    def system
+      case @external_id
+      when "SYSTEM"
+        strip_quotes(@long_name)
+      when "PUBLIC"
+        @uri.kind_of?(String) ? strip_quotes(@uri) : nil
+      end
+    end
+    
+    # This method returns a list of notations that have been declared in the
+    # _internal_ DTD subset. Notations in the external DTD subset are not 
+    # listed.
+    #
+    # Method contributed by Henrik Martensson
+    def notations
+      children().select {|node| node.kind_of?(REXML::NotationDecl)}
+    end
+    
+    # Retrieves a named notation. Only notations declared in the internal
+    # DTD subset can be retrieved.
+    #
+    # Method contributed by Henrik Martensson
+    def notation(name)
+      notations.find { |notation_decl|
+        notation_decl.name == name
+      }
+    end
+    
+    private
+    
+    # Method contributed by Henrik Martensson
+    def strip_quotes(quoted_string)
+      quoted_string =~ /^[\'\"].*[\ยด\"]$/ ?
+        quoted_string[1, quoted_string.length-2] :
+        quoted_string
+    end
+  end
+
+  # We don't really handle any of these since we're not a validating
+  # parser, so we can be pretty dumb about them.  All we need to be able
+  # to do is spew them back out on a write()
+
+  # This is an abstract class.  You never use this directly; it serves as a
+  # parent class for the specific declarations.
+  class Declaration < Child
+    def initialize src
+      super()
+      @string = src
+    end
+
+    def to_s
+      @string+'>'
+    end
+
+    def write( output, indent )
+      output << ('   '*indent) if indent > 0
+      output << to_s
+    end
+  end
+  
+  public
+  class ElementDecl < Declaration
+    def initialize( src )
+      super
+    end
+  end
+
+  class ExternalEntity < Child
+    def initialize( src )
+      super()
+      @entity = src
+    end
+    def to_s
+      @entity
+    end
+    def write( output, indent )
+      output << @entity
+    end
+  end
+
+  class NotationDecl < Child
+    attr_accessor :public, :system
+    def initialize name, middle, pub, sys
+      super(nil)
+      @name = name
+      @middle = middle
+      @public = pub
+      @system = sys
+    end
+
+    def to_s
+      "<!NOTATION #@name #@middle#{
+        @public ? ' ' + public.inspect : '' 
+      }#{
+        @system ? ' ' +@system.inspect : ''
+      }>"
+    end
+
+    def write( output, indent=-1 )
+      output << ('   '*indent) if indent > 0
+      output << to_s
+    end
+    
+    # This method retrieves the name of the notation.
+    #
+    # Method contributed by Henrik Martensson
+    def name
+      @name
+    end
+  end
+end
diff --git a/lib/booh/rexml/document.rb b/lib/booh/rexml/document.rb
new file mode 100644 (file)
index 0000000..4dde12c
--- /dev/null
@@ -0,0 +1,193 @@
+require "booh/rexml/element"
+require "booh/rexml/xmldecl"
+require "booh/rexml/source"
+require "booh/rexml/comment"
+require "booh/rexml/doctype"
+require "booh/rexml/instruction"
+require "booh/rexml/rexml"
+require "booh/rexml/parseexception"
+require "booh/rexml/output"
+require "booh/rexml/parsers/baseparser"
+require "booh/rexml/parsers/streamparser"
+require "booh/rexml/parsers/treeparser"
+
+module REXML
+  # Represents a full XML document, including PIs, a doctype, etc.  A
+  # Document has a single child that can be accessed by root().
+  # Note that if you want to have an XML declaration written for a document
+  # you create, you must add one; REXML documents do not write a default
+       # declaration for you.  See |DECLARATION| and |write|.
+       class Document < Element
+               # A convenient default XML declaration.  If you want an XML declaration,
+               # the easiest way to add one is mydoc << Document::DECLARATION
+    # +DEPRECATED+
+    # Use: mydoc << XMLDecl.default
+               DECLARATION = XMLDecl.default
+
+               # Constructor
+               # @param source if supplied, must be a Document, String, or IO. 
+               # Documents have their context and Element attributes cloned.
+         # Strings are expected to be valid XML documents.  IOs are expected
+         # to be sources of valid XML documents.
+         # @param context if supplied, contains the context of the document;
+         # this should be a Hash.
+         # NOTE that I'm not sure what the context is for; I cloned it out of
+         # the Electric XML API (in which it also seems to do nothing), and it
+         # is now legacy.  It may do something, someday... it may disappear.
+               def initialize( source = nil, context = {} )
+                       super()
+                       @context = context
+                       return if source.nil?
+                       if source.kind_of? Document
+                               @context = source.context
+                               super source
+                       else
+                               build(  source )
+                       end
+               end
+
+    def node_type
+      :document
+    end
+
+               # Should be obvious
+               def clone
+                       Document.new self
+               end
+
+               # According to the XML spec, a root node has no expanded name
+               def expanded_name
+                       ''
+                       #d = doc_type
+                       #d ? d.name : "UNDEFINED"
+               end
+
+               alias :name :expanded_name
+
+               # We override this, because XMLDecls and DocTypes must go at the start
+               # of the document
+               def add( child )
+                       if child.kind_of? XMLDecl
+                               @children.unshift child
+                       elsif child.kind_of? DocType
+        # Find first Element or DocType node and insert the decl right 
+        # before it.  If there is no such node, just insert the child at the
+        # end.  If there is a child and it is an DocType, then replace it.
+        insert_before_index = 0
+        @children.find { |x| 
+          insert_before_index += 1
+          x.kind_of?(Element) || x.kind_of?(DocType)
+        }
+        if @children[ insert_before_index ] # Not null = not end of list
+          if @children[ insert_before_index ].kind_of DocType
+            @children[ insert_before_index ] = child
+          else
+            @children[ index_before_index-1, 0 ] = child
+          end
+        else  # Insert at end of list
+          @children[insert_before_index] = child
+        end
+                               child.parent = self
+                       else
+                               rv = super
+                               raise "attempted adding second root element to document" if @elements.size > 1
+                               rv
+                       end
+               end
+               alias :<< :add
+
+               def add_element(arg=nil, arg2=nil)
+                       rv = super
+                       raise "attempted adding second root element to document" if @elements.size > 1
+                       rv
+               end
+
+               # @return the root Element of the document, or nil if this document
+               # has no children.
+               def root
+      elements[1]
+      #self
+      #@children.find { |item| item.kind_of? Element }
+               end
+
+               # @return the DocType child of the document, if one exists,
+               # and nil otherwise.
+               def doctype
+                       @children.find { |item| item.kind_of? DocType }
+               end
+
+               # @return the XMLDecl of this document; if no XMLDecl has been
+               # set, the default declaration is returned.
+               def xml_decl
+                       rv = @children[0]
+      return rv if rv.kind_of? XMLDecl
+      rv = @children.unshift(XMLDecl.default)[0]
+               end
+
+               # @return the XMLDecl version of this document as a String.
+               # If no XMLDecl has been set, returns the default version.
+               def version
+                       xml_decl().version
+               end
+
+               # @return the XMLDecl encoding of this document as a String.
+               # If no XMLDecl has been set, returns the default encoding.
+               def encoding
+                       xml_decl().encoding
+               end
+
+               # @return the XMLDecl standalone value of this document as a String.
+               # If no XMLDecl has been set, returns the default setting.
+               def stand_alone?
+                       xml_decl().stand_alone?
+               end
+
+               # Write the XML tree out, optionally with indent.  This writes out the
+               # entire XML document, including XML declarations, doctype declarations,
+               # and processing instructions (if any are given).
+               # A controversial point is whether Document should always write the XML
+               # declaration (<?xml version='1.0'?>) whether or not one is given by the
+               # user (or source document).  REXML does not write one if one was not
+               # specified, because it adds unneccessary bandwidth to applications such
+               # as XML-RPC.
+               #
+               #
+               # output::
+               #         output an object which supports '<< string'; this is where the
+               #   document will be written.
+               # indent::
+               #   An integer.  If -1, no indenting will be used; otherwise, the
+               #   indentation will be this number of spaces, and children will be
+               #   indented an additional amount.  Defaults to -1
+               # transitive::
+               #   If transitive is true and indent is >= 0, then the output will be
+               #   pretty-printed in such a way that the added whitespace does not affect
+               #   the absolute *value* of the document -- that is, it leaves the value
+               #   and number of Text nodes in the document unchanged.
+               # ie_hack::
+               #   Internet Explorer is the worst piece of crap to have ever been
+               #   written, with the possible exception of Windows itself.  Since IE is
+               #   unable to parse proper XML, we have to provide a hack to generate XML
+               #   that IE's limited abilities can handle.  This hack inserts a space 
+               #   before the /> on empty tags.  Defaults to false
+               def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
+                       output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
+                       @children.each { |node|
+                               indent( output, indent ) if node.node_type == :element
+                               if node.write( output, indent, transitive, ie_hack )
+          output << "\n" unless indent<0 or node == @children[-1]
+        end
+                       }
+               end
+
+               
+               def Document::parse_stream( source, listener )
+                       Parsers::StreamParser.new( source, listener ).parse
+               end
+
+               private
+               def build( source )
+      Parsers::TreeParser.new( source, self ).parse
+               end
+       end
+end
diff --git a/lib/booh/rexml/dtd/attlistdecl.rb b/lib/booh/rexml/dtd/attlistdecl.rb
new file mode 100644 (file)
index 0000000..9483e67
--- /dev/null
@@ -0,0 +1,10 @@
+require "booh/rexml/child"
+module REXML
+       module DTD
+               class AttlistDecl < Child
+                       START = "<!ATTLIST"
+                       START_RE = /^\s*#{START}/um
+                       PATTERN_RE = /\s*(#{START}.*?>)/um
+               end
+       end
+end
diff --git a/lib/booh/rexml/dtd/dtd.rb b/lib/booh/rexml/dtd/dtd.rb
new file mode 100644 (file)
index 0000000..31db538
--- /dev/null
@@ -0,0 +1,51 @@
+require "booh/rexml/dtd/elementdecl"
+require "booh/rexml/dtd/entitydecl"
+require "booh/rexml/comment"
+require "booh/rexml/dtd/notationdecl"
+require "booh/rexml/dtd/attlistdecl"
+require "booh/rexml/parent"
+
+module REXML
+       module DTD
+               class Parser
+                       def Parser.parse( input )
+                               case input
+                               when String
+                                       parse_helper input
+                               when File
+                                       parse_helper input.read
+                               end
+                       end
+
+                       # Takes a String and parses it out
+                       def Parser.parse_helper( input )
+                               contents = Parent.new
+                               while input.size > 0
+                                       case input
+                                       when ElementDecl.PATTERN_RE
+                                               match = $&
+                                               source = $'
+                                               contents << EleemntDecl.new( match )
+                                       when AttlistDecl.PATTERN_RE
+                                               matchdata = $~
+                                               source = $'
+                                               contents << AttlistDecl.new( matchdata )
+                                       when EntityDecl.PATTERN_RE
+                                               matchdata = $~
+                                               source = $'
+                                               contents << EntityDecl.new( matchdata )
+                                       when Comment.PATTERN_RE
+                                               matchdata = $~
+                                               source = $'
+                                               contents << Comment.new( matchdata )
+                                       when NotationDecl.PATTERN_RE
+                                               matchdata = $~
+                                               source = $'
+                                               contents << NotationDecl.new( matchdata )
+                                       end
+                               end
+                               contents
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/dtd/elementdecl.rb b/lib/booh/rexml/dtd/elementdecl.rb
new file mode 100644 (file)
index 0000000..7c5d6a5
--- /dev/null
@@ -0,0 +1,17 @@
+require "booh/rexml/child"
+module REXML
+       module DTD
+               class ElementDecl < Child
+                       START = "<!ELEMENT"
+                       START_RE = /^\s*#{START}/um
+                       PATTERN_RE = /^\s*(#{START}.*?)>/um
+                       PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
+                       #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
+
+                       def initialize match
+                               @name = match[1]
+                               @rest = match[2]
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/dtd/entitydecl.rb b/lib/booh/rexml/dtd/entitydecl.rb
new file mode 100644 (file)
index 0000000..66a9f02
--- /dev/null
@@ -0,0 +1,56 @@
+require "booh/rexml/child"
+module REXML
+       module DTD
+               class EntityDecl < Child
+                       START = "<!ENTITY"
+                       START_RE = /^\s*#{START}/um
+                       PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
+                       SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
+                       PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
+                       PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
+                       # <!ENTITY name SYSTEM "...">
+                       # <!ENTITY name "...">
+                       def initialize src
+                               super()
+                               md = nil
+                               if src.match( PUBLIC )
+                                       md = src.match( PUBLIC, true )
+                                       @middle = "PUBLIC"
+                                       @content = "#{md[2]} #{md[4]}"
+                               elsif src.match( SYSTEM )
+                                       md = src.match( SYSTEM, true )
+                                       @middle = "SYSTEM"
+                                       @content = md[2]
+                               elsif src.match( PLAIN )
+                                       md = src.match( PLAIN, true )
+                                       @middle = ""
+                                       @content = md[2]
+                               elsif src.match( PERCENT )
+                                       md = src.match( PERCENT, true )
+                                       @middle = ""
+                                       @content = md[2]
+                               end
+                               raise ParseException.new("failed Entity match", src) if md.nil?
+                               @name = md[1]
+                       end
+
+                       def to_s
+                               rv = "<!ENTITY #@name "
+                               rv << "#@middle " if @middle.size > 0
+                               rv << @content
+                               rv
+                       end
+
+                       def write( output, indent )
+        indent( output, indent )
+                               output << to_s
+                       end
+
+                       def EntityDecl.parse_source source, listener
+                               md = source.match( PATTERN_RE, true )
+                               thing = md[0].squeeze(" \t\n\r")
+                               listener.send inspect.downcase, thing 
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/dtd/notationdecl.rb b/lib/booh/rexml/dtd/notationdecl.rb
new file mode 100644 (file)
index 0000000..edf7d8d
--- /dev/null
@@ -0,0 +1,39 @@
+require "booh/rexml/child"
+module REXML
+       module DTD
+               class NotationDecl < Child
+                       START = "<!NOTATION"
+                       START_RE = /^\s*#{START}/um
+                       PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+                       SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+                       def initialize src
+                               super()
+                               if src.match( PUBLIC )
+                                       md = src.match( PUBLIC, true )
+                               elsif src.match( SYSTEM )
+                                       md = src.match( SYSTEM, true )
+                               else
+                                       raise ParseException.new( "error parsing notation: no matching pattern", src )
+                               end
+                               @name = md[1]
+                               @middle = md[2]
+                               @rest = md[3]
+                       end
+
+                       def to_s
+                               "<!NOTATION #@name #@middle #@rest>"
+                       end
+
+                       def write( output, indent )
+        indent( output, indent )
+                               output << to_s
+                       end
+
+                       def NotationDecl.parse_source source, listener
+                               md = source.match( PATTERN_RE, true )
+                               thing = md[0].squeeze(" \t\n\r")
+                               listener.send inspect.downcase, thing 
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/element.rb b/lib/booh/rexml/element.rb
new file mode 100644 (file)
index 0000000..2676fcf
--- /dev/null
@@ -0,0 +1,1266 @@
+require "booh/rexml/parent"
+require "booh/rexml/namespace"
+require "booh/rexml/attribute"
+require "booh/rexml/cdata"
+require "booh/rexml/xpath"
+require "booh/rexml/parseexception"
+
+module REXML
+  # An implementation note about namespaces:
+  # As we parse, when we find namespaces we put them in a hash and assign
+  # them a unique ID.  We then convert the namespace prefix for the node
+  # to the unique ID.  This makes namespace lookup much faster for the
+  # cost of extra memory use.  We save the namespace prefix for the
+  # context node and convert it back when we write it.
+  @@namespaces = {}
+
+       # Represents a tagged XML element.  Elements are characterized by
+       # having children, attributes, and names, and can themselves be
+       # children.
+       class Element < Parent
+               include Namespace
+
+               UNDEFINED = "UNDEFINED";                # The default name
+
+               # Mechanisms for accessing attributes and child elements of this
+               # element.
+               attr_reader :attributes, :elements
+               # The context holds information about the processing environment, such as
+               # whitespace handling.
+               attr_accessor :context
+
+               # Constructor
+               # arg:: 
+               #       if not supplied, will be set to the default value.
+               #       If a String, the name of this object will be set to the argument.
+               #       If an Element, the object will be shallowly cloned; name, 
+               #       attributes, and namespaces will be copied.  Children will +not+ be
+               #       copied.
+               # parent:: 
+               #       if supplied, must be a Parent, and will be used as
+               #       the parent of this object.
+               # context::
+               #       If supplied, must be a hash containing context items.  Context items
+               #       include:
+               # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
+               #   strings being the names of the elements to respect
+               #   whitespace for.  Defaults to :+all+.
+               # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
+               #   strings being the names of the elements to ignore whitespace on.
+               #   Overrides :+respect_whitespace+.
+               # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
+               #   of strings being the names of the elements in which to ignore
+               #   whitespace-only nodes.  If this is set, Text nodes which contain only
+               #   whitespace will not be added to the document tree.
+               # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
+               #   the elements to process in raw mode.  In raw mode, special
+               #   characters in text is not converted to or from entities.
+               def initialize( arg = UNDEFINED, parent=nil, context=nil )
+                       super(parent)
+
+                       @elements = Elements.new(self)
+                       @attributes = Attributes.new(self)
+                       @context = context
+
+                       if arg.kind_of? String
+                               self.name = arg
+                       elsif arg.kind_of? Element
+                               self.name = arg.expanded_name
+                               arg.attributes.each_attribute{ |attribute|
+                                       @attributes << Attribute.new( attribute )
+                               }
+                               @context = arg.context
+                       end
+               end
+
+    def inspect
+      rv = "<#@expanded_name"
+
+      @attributes.each_attribute do |attr|
+        rv << " "
+        attr.write( rv, 0 )
+      end
+
+      if children.size > 0
+        rv << "> ... </>"
+      else
+        rv << "/>"
+      end
+    end
+
+
+               # Creates a shallow copy of self.
+               #   d = Document.new "<a><b/><b/><c><d/></c></a>"
+               #   new_a = d.root.clone
+               #   puts new_a  # => "<a/>"
+               def clone
+                       self.class.new self
+               end
+
+               # Evaluates to the root node of the document that this element 
+               # belongs to. If this element doesn't belong to a document, but does
+               # belong to another Element, the parent's root will be returned, until the
+               # earliest ancestor is found.
+    #
+    # Note that this is not the same as the document element.
+    # In the following example, <a> is the document element, and the root
+    # node is the parent node of the document element.  You may ask yourself
+    # why the root node is useful: consider the doctype and XML declaration,
+    # and any processing instructions before the document element... they
+    # are children of the root node, or siblings of the document element.
+    # The only time this isn't true is when an Element is created that is
+    # not part of any Document.  In this case, the ancestor that has no
+    # parent acts as the root node.
+               #  d = Document.new '<a><b><c/></b></a>'
+               #  a = d[1] ; c = a[1][1]
+               #  d.root_node == d   # TRUE
+               #  a.root_node        # namely, d
+               #  c.root_node        # again, d
+               def root_node
+                       parent.nil? ? self : parent.root_node
+               end
+
+    def root
+      return elements[1] if self.kind_of? Document
+      return self if parent.kind_of? Document or parent.nil?
+      return parent.root
+    end
+
+               # Evaluates to the document to which this element belongs, or nil if this
+               # element doesn't belong to a document.
+               def document
+      rt = root
+                       rt.parent if rt
+               end
+
+               # Evaluates to +true+ if whitespace is respected for this element.  This
+               # is the case if:
+               # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
+               # 2. The context has :+respect_whitespace+ set to :+all+ or
+               #    an array containing the name of this element, and 
+    #    :+compress_whitespace+ isn't set to :+all+ or an array containing the 
+    #    name of this element.
+               # The evaluation is tested against +expanded_name+, and so is namespace
+               # sensitive.
+               def whitespace
+                       @whitespace = nil
+                       if @context
+                               if @context[:respect_whitespace]
+                                       @whitespace = (@context[:respect_whitespace] == :all or
+                                                                                                @context[:respect_whitespace].include? expanded_name)
+                               end
+                               @whitespace = false if (@context[:compress_whitespace] and
+                                       (@context[:compress_whitespace] == :all or
+                                        @context[:compress_whitespace].include? expanded_name)
+                               )
+                       end
+                       @whitespace = true unless @whitespace == false
+                       @whitespace
+               end
+
+               def ignore_whitespace_nodes
+                       @ignore_whitespace_nodes = false
+                       if @context
+                               if @context[:ignore_whitespace_nodes]
+                                       @ignore_whitespace_nodes = 
+                                               (@context[:ignore_whitespace_nodes] == :all or
+                                                @context[:ignore_whitespace_nodes].include? expanded_name)
+                               end
+                       end
+               end
+
+               # Evaluates to +true+ if raw mode is set for this element.  This
+               # is the case if the context has :+raw+ set to :+all+ or
+               # an array containing the name of this element.
+               #
+               # The evaluation is tested against +expanded_name+, and so is namespace
+               # sensitive.
+               def raw
+                       @raw = (@context and @context[:raw] and
+                       (@context[:raw] == :all or
+                       @context[:raw].include? expanded_name))
+                       @raw
+               end
+
+               #once :whitespace, :raw, :ignore_whitespace_nodes
+
+               #################################################
+               # Namespaces                                    #
+               #################################################
+
+               # Evaluates to an +Array+ containing the prefixes (names) of all defined
+               # namespaces at this context node.
+               #  doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+               #  doc.elements['//b'].prefixes # -> ['x', 'y']
+               def prefixes
+                       prefixes = []
+                       prefixes = parent.prefixes if parent
+                       prefixes |= attributes.prefixes
+                       return prefixes
+               end
+
+               def namespaces
+                       namespaces = {}
+                       namespaces = parent.namespaces if parent
+                       namespaces = namespaces.merge( attributes.namespaces )
+                       return namespaces
+               end
+
+               # Evalutas to the URI for a prefix, or the empty string if no such 
+               # namespace is declared for this element. Evaluates recursively for
+               # ancestors.  Returns the default namespace, if there is one.
+               # prefix:: 
+               #   the prefix to search for.  If not supplied, returns the default
+               #   namespace if one exists
+               # Returns:: 
+               #   the namespace URI as a String, or nil if no such namespace
+               #   exists.  If the namespace is undefined, returns an empty string
+               #  doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
+               #  b = doc.elements['//b']
+               #  b.namespace           # -> '1'
+               #  b.namespace("y")      # -> '2'
+               def namespace(prefix=nil)
+                       if prefix.nil?
+                               prefix = prefix()
+                       end
+                       if prefix == ''
+                               prefix = "xmlns"
+                       else
+                               prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
+                       end
+                       ns = attributes[ prefix ]
+                       ns = parent.namespace(prefix) if ns.nil? and parent
+                       ns = '' if ns.nil? and prefix == 'xmlns'
+                       return ns
+               end
+
+               # Adds a namespace to this element.
+               # prefix:: 
+               #   the prefix string, or the namespace URI if +uri+ is not
+               #   supplied
+               # uri::    
+               #   the namespace URI.  May be nil, in which +prefix+ is used as
+               #   the URI
+               # Evaluates to: this Element
+               #  a = Element.new("a")
+               #  a.add_namespace("xmlns:foo", "bar" )
+               #  a.add_namespace("foo", "bar")  # shorthand for previous line
+               #  a.add_namespace("twiddle")
+               #  puts a   #-> <a xmlns:foo='bar' xmlns='twiddle'/>
+               def add_namespace( prefix, uri=nil )
+                       unless uri
+                               @attributes["xmlns"] = prefix
+                       else
+                               prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
+                               @attributes[ prefix ] = uri
+                       end
+                       self
+               end
+
+               # Removes a namespace from this node.  This only works if the namespace is
+               # actually declared in this node.  If no argument is passed, deletes the
+               # default namespace.
+               #
+               # Evaluates to: this element
+               #  doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
+               #  doc.root.delete_namespace
+               #  puts doc     # -> <a xmlns:foo='bar'/>
+               #  doc.root.delete_namespace 'foo'
+               #  puts doc     # -> <a/>
+               def delete_namespace namespace="xmlns"
+                       namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
+                       attribute = attributes.get_attribute(namespace)
+                       attribute.remove unless attribute.nil?
+                       self
+               end
+
+               #################################################
+               # Elements                                      #
+               #################################################
+
+               # Adds a child to this element, optionally setting attributes in
+               # the element.
+               # element:: 
+               #   optional.  If Element, the element is added.
+               #   Otherwise, a new Element is constructed with the argument (see
+               #   Element.initialize).
+               # attrs:: 
+               #   If supplied, must be a Hash containing String name,value 
+               #   pairs, which will be used to set the attributes of the new Element.
+               # Returns:: the Element that was added
+               #  el = doc.add_element 'my-tag'
+               #  el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
+               #  el = Element.new 'my-tag'
+               #  doc.add_element el
+               def add_element element, attrs=nil
+      raise "First argument must be either an element name, or an Element object" if element.nil?
+                       el = @elements.add(element)
+      attrs.each do |key, value|
+        el.attributes[key]=Attribute.new(key,value,self)
+      end      if attrs.kind_of? Hash
+                       el
+               end
+
+               # Deletes a child element.
+               # element:: 
+               #   Must be an +Element+, +String+, or +Integer+.  If Element, 
+               #   the element is removed.  If String, the element is found (via XPath) 
+               #   and removed.  <em>This means that any parent can remove any
+               #   descendant.<em>  If Integer, the Element indexed by that number will be
+               #   removed.
+               # Returns:: the element that was removed.
+               #  doc.delete_element "/a/b/c[@id='4']"
+               #  doc.delete_element doc.elements["//k"]
+               #  doc.delete_element 1
+               def delete_element element
+                       @elements.delete element
+               end
+
+               # Evaluates to +true+ if this element has at least one child Element
+               #  doc = Document.new "<a><b/><c>Text</c></a>"
+               #  doc.root.has_elements               # -> true
+               #  doc.elements["/a/b"].has_elements   # -> false
+               #  doc.elements["/a/c"].has_elements   # -> false
+               def has_elements?
+                       !@elements.empty?
+               end
+
+               # Iterates through the child elements, yielding for each Element that
+               # has a particular attribute set.
+               # key:: 
+               #   the name of the attribute to search for
+               # value:: 
+               #   the value of the attribute
+               # max:: 
+               #   (optional) causes this method to return after yielding 
+               #   for this number of matching children
+               # name:: 
+               #   (optional) if supplied, this is an XPath that filters
+               #   the children to check.
+               #
+               #  doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
+               #  # Yields b, c, d
+               #  doc.root.each_element_with_attribute( 'id' ) {|e| p e}
+               #  # Yields b, d
+               #  doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
+               #  # Yields b
+               #  doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
+               #  # Yields d
+               #  doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
+               def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
+                       each_with_something( proc {|child| 
+                               if value.nil?
+                                       child.attributes[key] != nil
+                               else
+                                       child.attributes[key]==value
+                               end
+                       }, max, name, &block )
+               end
+
+               # Iterates through the children, yielding for each Element that
+               # has a particular text set.
+               # text:: 
+               #   the text to search for.  If nil, or not supplied, will itterate
+               #   over all +Element+ children that contain at least one +Text+ node.
+               # max:: 
+               #   (optional) causes this method to return after yielding
+               #   for this number of matching children
+               # name:: 
+               #   (optional) if supplied, this is an XPath that filters
+               #   the children to check.
+               #
+               #  doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
+               #  # Yields b, c, d
+               #  doc.each_element_with_text {|e|p e}
+               #  # Yields b, c
+               #  doc.each_element_with_text('b'){|e|p e}
+               #  # Yields b
+               #  doc.each_element_with_text('b', 1){|e|p e}
+               #  # Yields d
+               #  doc.each_element_with_text(nil, 0, 'd'){|e|p e}
+               def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
+                       each_with_something( proc {|child| 
+                               if text.nil?
+                                       child.has_text?
+                               else
+                                       child.text == text
+                               end
+                       }, max, name, &block )
+               end
+
+               # Synonym for Element.elements.each
+               def each_element( xpath=nil, &block ) # :yields: Element
+                       @elements.each( xpath, &block )
+               end
+
+               # Synonym for Element.to_a
+               # This is a little slower than calling elements.each directly.
+               # xpath:: any XPath by which to search for elements in the tree
+               # Returns:: an array of Elements that match the supplied path
+               def get_elements( xpath )
+                       @elements.to_a( xpath )
+               end
+
+               # Returns the next sibling that is an element, or nil if there is
+               # no Element sibling after this one
+               #  doc = Document.new '<a><b/>text<c/></a>'
+               #  doc.root.elements['b'].next_element          #-> <c/>
+               #  doc.root.elements['c'].next_element          #-> nil
+               def next_element
+                       element = next_sibling
+                       element = element.next_sibling until element.nil? or element.kind_of? Element 
+                       return element
+               end
+
+               # Returns the previous sibling that is an element, or nil if there is
+               # no Element sibling prior to this one
+               #  doc = Document.new '<a><b/>text<c/></a>'
+               #  doc.root.elements['c'].previous_element          #-> <b/>
+               #  doc.root.elements['b'].previous_element          #-> nil
+               def previous_element
+                       element = previous_sibling
+                       element = element.previous_sibling until element.nil? or element.kind_of? Element
+                       return element
+               end
+
+
+               #################################################
+               # Text                                          #
+               #################################################
+
+               # Evaluates to +true+ if this element has at least one Text child
+               def has_text?
+                       not text().nil?
+               end
+
+               # A convenience method which returns the String value of the _first_
+               # child text element, if one exists, and +nil+ otherwise.
+               #
+               # <em>Note that an element may have multiple Text elements, perhaps
+               # separated by other children</em>.  Be aware that this method only returns
+               # the first Text node.
+               #
+               # This method returns the +value+ of the first text child node, which
+               # ignores the +raw+ setting, so always returns normalized text. See
+               # the Text::value documentation.
+               #
+               #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+               #  # The element 'p' has two text elements, "some text " and " more text".
+               #  doc.root.text              #-> "some text "
+               def text( path = nil )
+                       rv = get_text(path)
+                       return rv.value unless rv.nil?
+                       nil
+               end
+
+               # Returns the first child Text node, if any, or +nil+ otherwise.
+               # This method returns the actual +Text+ node, rather than the String content.
+               #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
+               #  # The element 'p' has two text elements, "some text " and " more text".
+               #  doc.root.get_text.value            #-> "some text "
+               def get_text path = nil
+                       rv = nil
+                       if path
+                               element = @elements[ path ]
+                               rv = element.get_text unless element.nil?
+                       else
+                               rv = @children.find { |node| node.kind_of? Text }
+                       end
+                       return rv
+               end
+
+               # Sets the first Text child of this object.  See text() for a
+               # discussion about Text children.
+               #
+               # If a Text child already exists, the child is replaced by this
+               # content.  This means that Text content can be deleted by calling
+               # this method with a nil argument.  In this case, the next Text
+               # child becomes the first Text child.  In no case is the order of
+               # any siblings disturbed.
+               # text:: 
+               #   If a String, a new Text child is created and added to
+               #   this Element as the first Text child.  If Text, the text is set
+               #   as the first Child element.  If nil, then any existing first Text
+               #   child is removed.
+               # Returns:: this Element.
+               #  doc = Document.new '<a><b/></a>'
+               #  doc.root.text = 'Sean'      #-> '<a><b/>Sean</a>'
+               #  doc.root.text = 'Elliott'   #-> '<a><b/>Elliott</a>'
+               #  doc.root.add_element 'c'    #-> '<a><b/>Elliott<c/></a>'
+               #  doc.root.text = 'Russell'   #-> '<a><b/>Russell<c/></a>'
+               #  doc.root.text = nil         #-> '<a><b/><c/></a>'
+    def text=( text )
+      if text.kind_of? String
+        text = Text.new( text, whitespace(), nil, raw() )
+      elsif text and !text.kind_of? Text
+        text = Text.new( text.to_s, whitespace(), nil, raw() )
+      end
+                       old_text = get_text
+                       if text.nil?
+                               old_text.remove unless old_text.nil?
+                       else
+                               if old_text.nil?
+                                       self << text
+                               else
+                                       old_text.replace_with( text )
+                               end
+                       end
+                       return self
+               end
+
+               # A helper method to add a Text child.  Actual Text instances can
+               # be added with regular Parent methods, such as add() and <<()
+               # text::
+               #   if a String, a new Text instance is created and added
+               #   to the parent.  If Text, the object is added directly.
+               # Returns:: this Element
+               #  e = Element.new('a')          #-> <e/>
+               #  e.add_text 'foo'              #-> <e>foo</e>
+               #  e.add_text Text.new(' bar')    #-> <e>foo bar</e>
+               # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
+               # element and <b>2</b> Text node children.
+               def add_text( text )
+                       if text.kind_of? String 
+                               if @children[-1].kind_of? Text
+                                       @children[-1] << text
+                                       return
+                               end
+                               text = Text.new( text, whitespace(), nil, raw() )
+                       end
+                       self << text unless text.nil?
+                       return self
+               end
+
+    def node_type
+      :element
+    end
+
+    def xpath
+      path_elements = []
+      cur = self
+      path_elements << __to_xpath_helper( self )
+      while cur.parent
+        cur = cur.parent
+        path_elements << __to_xpath_helper( cur )
+      end
+      return path_elements.reverse.join( "/" )
+    end
+
+               #################################################
+               # Attributes                                    #
+               #################################################
+
+               def attribute( name, namespace=nil )
+                       prefix = nil
+      prefix = namespaces.index(namespace) if namespace
+                       attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
+               end
+
+               # Evaluates to +true+ if this element has any attributes set, false
+               # otherwise.
+               def has_attributes?
+                       return !@attributes.empty?
+               end
+
+               # Adds an attribute to this element, overwriting any existing attribute
+               # by the same name.
+               # key::
+               #   can be either an Attribute or a String.  If an Attribute,
+               #   the attribute is added to the list of Element attributes.  If String,
+               #   the argument is used as the name of the new attribute, and the value
+               #   parameter must be supplied.
+               # value:: 
+               #   Required if +key+ is a String, and ignored if the first argument is
+               #   an Attribute.  This is a String, and is used as the value
+               #   of the new Attribute.  This should be the unnormalized value of the
+    #   attribute (without entities).
+               # Returns:: the Attribute added
+               #  e = Element.new 'e'
+               #  e.add_attribute( 'a', 'b' )               #-> <e a='b'/>
+               #  e.add_attribute( 'x:a', 'c' )             #-> <e a='b' x:a='c'/>
+               #  e.add_attribute Attribute.new('b', 'd')   #-> <e a='b' x:a='c' b='d'/>
+               def add_attribute( key, value=nil )
+                       if key.kind_of? Attribute
+                               @attributes << key
+                       else
+                               @attributes[key] = value
+                       end
+               end
+
+               # Add multiple attributes to this element.
+               # hash:: is either a hash, or array of arrays
+               #  el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
+               #  el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
+               def add_attributes hash
+                       if hash.kind_of? Hash
+                               hash.each_pair {|key, value| @attributes[key] = value }
+                       elsif hash.kind_of? Array
+                               hash.each { |value| @attributes[ value[0] ] = value[1] }
+                       end
+               end
+
+               # Removes an attribute
+               # key::
+               #   either an Attribute or a String.  In either case, the
+               #   attribute is found by matching the attribute name to the argument,
+               #   and then removed.  If no attribute is found, no action is taken.
+               # Returns:: 
+               #   the attribute removed, or nil if this Element did not contain
+               #   a matching attribute
+               #  e = Element.new('E')
+               #  e.add_attribute( 'name', 'Sean' )             #-> <E name='Sean'/>
+               #  r = e.add_attribute( 'sur:name', 'Russell' )  #-> <E name='Sean' sur:name='Russell'/>
+               #  e.delete_attribute( 'name' )                  #-> <E sur:name='Russell'/>
+               #  e.delete_attribute( r )                       #-> <E/>
+               def delete_attribute(key)
+                       attr = @attributes.get_attribute(key)
+                       attr.remove unless attr.nil?
+               end
+
+               #################################################
+               # Other Utilities                               #
+               #################################################
+
+               # Get an array of all CData children.  
+               # IMMUTABLE
+               def cdatas
+                       find_all { |child| child.kind_of? CData }.freeze
+               end
+
+               # Get an array of all Comment children.
+               # IMMUTABLE
+               def comments
+                       find_all { |child| child.kind_of? Comment }.freeze
+               end
+
+               # Get an array of all Instruction children.
+               # IMMUTABLE
+               def instructions
+                       find_all { |child| child.kind_of? Instruction }.freeze
+               end
+
+               # Get an array of all Text children.
+               # IMMUTABLE
+               def texts
+                       find_all { |child| child.kind_of? Text }.freeze
+               end
+
+               # Writes out this element, and recursively, all children.
+               # output::
+               #         output an object which supports '<< string'; this is where the
+               #   document will be written.
+               # indent::
+               #   An integer.  If -1, no indenting will be used; otherwise, the
+               #   indentation will be this number of spaces, and children will be
+               #   indented an additional amount.  Defaults to -1
+               # transitive::
+               #   If transitive is true and indent is >= 0, then the output will be
+               #   pretty-printed in such a way that the added whitespace does not affect
+               #   the parse tree of the document
+               # ie_hack::
+               #   Internet Explorer is the worst piece of crap to have ever been
+               #   written, with the possible exception of Windows itself.  Since IE is
+               #   unable to parse proper XML, we have to provide a hack to generate XML
+               #   that IE's limited abilities can handle.  This hack inserts a space 
+               #   before the /> on empty tags.  Defaults to false
+               #
+               #  out = ''
+               #  doc.write( out )     #-> doc is written to the string 'out'
+               #  doc.write( $stdout ) #-> doc written to the console
+               def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
+                       #print "ID:#{indent}"
+                       writer << "<#@expanded_name"
+
+                       @attributes.each_attribute do |attr|
+                               writer << " "
+                               attr.write( writer, indent )
+                       end unless @attributes.empty?
+
+                       if @children.empty?
+        if transitive and indent>-1
+          writer << "\n"
+          indent( writer, indent )
+        elsif ie_hack
+          writer << " " 
+        end
+                               writer << "/" 
+                       else
+                               if transitive and indent>-1 and !@children[0].kind_of? Text
+                                       writer << "\n"
+                                       indent writer, indent+1
+                               end
+                               writer << ">"
+                               write_children( writer, indent, transitive, ie_hack )
+                               writer << "</#{expanded_name}"
+                       end
+                       if transitive and indent>-1 and !@children.empty?
+                               writer << "\n"
+                               indent -= 1 if next_sibling.nil?
+                               indent(writer, indent)
+                       end
+                       writer << ">"
+               end
+
+
+               private
+    def __to_xpath_helper node
+      rv = node.expanded_name.clone
+      if node.parent
+        results = node.parent.find_all {|n| 
+          n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name 
+        }
+        if results.length > 1
+          idx = results.index( node )
+          rv << "[#{idx+1}]"
+        end
+      end
+      rv
+    end
+
+               # A private helper method
+               def each_with_something( test, max=0, name=nil )
+                       num = 0
+                       child=nil
+                       @elements.each( name ){ |child|
+                               yield child if test.call(child) and num += 1
+                               return if max>0 and num == max
+                       }
+               end
+
+               # A private helper method
+               def write_children( writer, indent, transitive, ie_hack )
+                       cr = (indent < 0) ? '' : "\n"
+                       if indent == -1
+                               each { |child| child.write( writer, indent, transitive, ie_hack ) }
+                       else
+                               next_indent = indent+1
+                               last_child=nil
+                               each { |child|
+                                       unless child.kind_of? Text or last_child.kind_of? Text or transitive
+                                               writer << cr
+                                               indent(writer, next_indent)
+                                       end
+                                       child.write( writer, next_indent, transitive, ie_hack )
+                                       last_child = child
+                               }
+                               unless last_child.kind_of? Text or transitive
+                                       writer << cr
+                                       indent( writer, indent )
+                               end
+                       end
+               end
+       end
+
+       ########################################################################
+       # ELEMENTS                                                             #
+       ########################################################################
+
+       # A class which provides filtering of children for Elements, and
+       # XPath search support.  You are expected to only encounter this class as
+       # the <tt>element.elements</tt> object.  Therefore, you are 
+       # _not_ expected to instantiate this yourself.
+       class Elements
+               include Enumerable
+               # Constructor
+               # parent:: the parent Element
+               def initialize parent
+                       @element = parent
+               end
+
+               # Fetches a child element.  Filters only Element children, regardless of
+               # the XPath match.
+               # index:: 
+               #   the search parameter.  This is either an Integer, which
+               #   will be used to find the index'th child Element, or an XPath,
+               #   which will be used to search for the Element.  <em>Because
+               #   of the nature of XPath searches, any element in the connected XML
+               #   document can be fetched through any other element.</em>  <b>The
+               #   Integer index is 1-based, not 0-based.</b>  This means that the first
+               #   child element is at index 1, not 0, and the +n+th element is at index
+               #   +n+, not <tt>n-1</tt>.  This is because XPath indexes element children
+               #   starting from 1, not 0, and the indexes should be the same.
+               # name:: 
+               #   optional, and only used in the first argument is an
+               #   Integer.  In that case, the index'th child Element that has the
+               #   supplied name will be returned.  Note again that the indexes start at 1.
+               # Returns:: the first matching Element, or nil if no child matched
+               #  doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
+               #  doc.root.elements[1]       #-> <b/>
+               #  doc.root.elements['c']     #-> <c id="1"/>
+               #  doc.root.elements[2,'c']   #-> <c id="2"/>
+               def []( index, name=nil)
+                       if index.kind_of? Integer
+                               raise "index (#{index}) must be >= 1" if index < 1
+                               name = literalize(name) if name
+                               num = 0
+                               child = nil
+                               @element.find { |child|
+                                       child.kind_of? Element and
+                                       (name.nil? ? true : child.has_name?( name )) and 
+                                       (num += 1) == index
+                               }
+                       else
+                               return XPath::first( @element, index )
+                               #{ |element| 
+                               #       return element if element.kind_of? Element
+                               #}
+                               #return nil
+                       end
+               end
+
+               # Sets an element, replacing any previous matching element.  If no
+               # existing element is found ,the element is added.
+               # index:: Used to find a matching element to replace.  See []().
+               # element:: 
+               #   The element to replace the existing element with
+               #   the previous element
+               # Returns:: nil if no previous element was found.
+               #
+               #  doc = Document.new '<a/>'
+               #  doc.root.elements[10] = Element.new('b')    #-> <a><b/></a>
+               #  doc.root.elements[1]                        #-> <b/>
+               #  doc.root.elements[1] = Element.new('c')     #-> <a><c/></a>
+               #  doc.root.elements['c'] = Element.new('d')   #-> <a><d/></a>
+               def []=( index, element )
+                       previous = self[index]
+                       if previous.nil?
+                               @element.add element
+                       else
+                               previous.replace_with element
+                       end
+                       return previous
+               end
+
+               # Returns +true+ if there are no +Element+ children, +false+ otherwise
+               def empty?
+                       @element.find{ |child| child.kind_of? Element}.nil?
+               end
+
+               # Returns the index of the supplied child (starting at 1), or -1 if 
+               # the element is not a child
+               # element:: an +Element+ child
+               def index element
+                       rv = 0
+                       found = @element.find do |child| 
+                               child.kind_of? Element and
+                               (rv += 1) and
+                               child == element
+                       end
+                       return rv if found == element
+                       return -1
+               end
+
+               # Deletes a child Element
+               # element:: 
+               #   Either an Element, which is removed directly; an
+               #   xpath, where the first matching child is removed; or an Integer,
+               #   where the n'th Element is removed.
+               # Returns:: the removed child
+               #  doc = Document.new '<a><b/><c/><c id="1"/></a>'
+               #  b = doc.root.elements[1]
+               #  doc.root.elements.delete b           #-> <a><c/><c id="1"/></a>
+               #  doc.elements.delete("a/c[@id='1']")  #-> <a><c/></a>
+               #  doc.root.elements.delete 1           #-> <a/>
+               def delete element
+                       if element.kind_of? Element
+                               @element.delete element
+                       else
+                               el = self[element]
+                               el.remove if el
+                       end
+               end
+
+               # Removes multiple elements.  Filters for Element children, regardless of
+               # XPath matching.
+               # xpath:: all elements matching this String path are removed.
+               # Returns:: an Array of Elements that have been removed
+               #  doc = Document.new '<a><c/><c/><c/><c/></a>'
+               #  deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
+               def delete_all( xpath )
+                       rv = []
+                       XPath::each( @element, xpath) {|element| 
+                               rv << element if element.kind_of? Element
+                       }
+                       rv.each do |element|
+                               @element.delete element
+                               element.remove
+                       end
+                       return rv
+               end
+
+               # Adds an element
+               # element:: 
+               #   if supplied, is either an Element, String, or
+               #   Source (see Element.initialize).  If not supplied or nil, a
+               #   new, default Element will be constructed
+               # Returns:: the added Element
+               #  a = Element.new 'a'
+               #  a.elements.add Element.new 'b'  #-> <a><b/></a>
+               #  a.elements.add 'c'              #-> <a><b/><c/></a>
+               def add element=nil
+                       rv = nil
+                       if element.nil?
+                               Element.new "", self, @element.context
+                       elsif not element.kind_of?(Element)
+                               Element.new element, self, @element.context
+                       else
+                               @element << element
+                               element.context = @element.context
+                               element
+                       end
+               end
+
+               alias :<< :add
+
+               # Iterates through all of the child Elements, optionally filtering
+               # them by a given XPath
+               # xpath:: 
+               #   optional.  If supplied, this is a String XPath, and is used to 
+               #   filter the children, so that only matching children are yielded.  Note
+               #   that XPaths are automatically filtered for Elements, so that
+               #   non-Element children will not be yielded
+               #  doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
+               #  doc.root.each {|e|p e}       #-> Yields b, c, d, b, c, d elements
+               #  doc.root.each('b') {|e|p e}  #-> Yields b, b elements
+               #  doc.root.each('child::node()')  {|e|p e}
+               #  #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
+               #  XPath.each(doc.root, 'child::node()', &block)
+               #  #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
+               def each( xpath=nil, &block)
+                       XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
+               end
+               
+               def collect( xpath=nil, &block )
+                       collection = []
+                       XPath::each( @element, xpath ) {|e| 
+                               collection << yield(e)  if e.kind_of?(Element) 
+                       }
+                       collection
+               end
+                       
+               def inject( xpath=nil, initial=nil, &block )
+                       first = true
+                       XPath::each( @element, xpath ) {|e|
+                               if (e.kind_of? Element)
+                                       if (first and initial == nil)
+                                               initial = e
+                                               first = false
+                                       else
+                                               initial = yield( initial, e ) if e.kind_of? Element
+                                       end
+                               end
+                       }
+                       initial
+               end
+
+               # Returns the number of +Element+ children of the parent object.
+               #  doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
+               #  doc.root.size            #-> 6, 3 element and 3 text nodes
+               #  doc.root.elements.size   #-> 3
+               def size
+                       count = 0
+                       @element.each {|child| count+=1 if child.kind_of? Element }
+                       count
+               end
+
+               # Returns an Array of Element children.  An XPath may be supplied to
+               # filter the children.  Only Element children are returned, even if the
+               # supplied XPath matches non-Element children.
+               #  doc = Document.new '<a>sean<b/>elliott<c/></a>'
+               #  doc.root.elements.to_a                  #-> [ <b/>, <c/> ]
+               #  doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] 
+               #  XPath.match(doc.root, "child::node()")  #-> [ sean, <b/>, elliott, <c/> ]
+               def to_a( xpath=nil )
+                       rv = XPath.match( @element, xpath )
+                       return rv.find_all{|e| e.kind_of? Element} if xpath
+                       rv
+               end
+
+               private
+               # Private helper class.  Removes quotes from quoted strings
+               def literalize name
+                       name = name[1..-2] if name[0] == ?' or name[0] == ?"               #'
+                       name
+               end
+       end
+
+       ########################################################################
+       # ATTRIBUTES                                                           #
+       ########################################################################
+
+       # A class that defines the set of Attributes of an Element and provides 
+       # operations for accessing elements in that set.
+       class Attributes < Hash
+               # Constructor
+               # element:: the Element of which this is an Attribute
+               def initialize element
+                       @element = element
+               end
+
+               # Fetches an attribute value.  If you want to get the Attribute itself,
+               # use get_attribute()
+               # name:: an XPath attribute name.  Namespaces are relevant here.
+               # Returns:: 
+               #   the String value of the matching attribute, or +nil+ if no
+               #   matching attribute was found.  This is the unnormalized value
+    #   (with entities expanded).
+               # 
+               #  doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
+               #  doc.root.attributes['att']         #-> '<'
+               #  doc.root.attributes['bar:att']     #-> '2'
+               def [](name)
+                       attr = get_attribute(name)
+                       return attr.value unless attr.nil?
+                       return nil
+               end
+
+               def to_a
+                       values.flatten
+               end
+
+               # Returns the number of attributes the owning Element contains.
+               #  doc = Document "<a x='1' y='2' foo:x='3'/>"
+               #  doc.root.attributes.length        #-> 3
+               def length
+                       c = 0
+                       each_attribute { c+=1 }
+                       c
+               end
+               alias :size :length
+
+               # Itterates over the attributes of an Element.  Yields actual Attribute
+               # nodes, not String values.
+               # 
+               #  doc = Document.new '<a x="1" y="2"/>'
+               #  doc.root.attributes.each_attribute {|attr|
+               #    p attr.expanded_name+" => "+attr.value
+               #  }
+               def each_attribute # :yields: attribute
+                       each_value do |val|
+                               if val.kind_of? Attribute
+                                       yield val
+                               else
+                                       val.each_value { |atr| yield atr }
+                               end
+                       end
+               end
+
+               # Itterates over each attribute of an Element, yielding the expanded name
+               # and value as a pair of Strings.
+               #
+               #  doc = Document.new '<a x="1" y="2"/>'
+               #  doc.root.attributes.each {|name, value| p name+" => "+value }
+               def each
+                       each_attribute do |attr|
+                               yield attr.expanded_name, attr.value
+                       end
+               end
+
+               # Fetches an attribute
+               # name:: 
+               #   the name by which to search for the attribute.  Can be a
+               #   <tt>prefix:name</tt> namespace name.
+               # Returns:: The first matching attribute, or nil if there was none.  This
+               # value is an Attribute node, not the String value of the attribute.
+               #  doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
+               #  doc.root.attributes.get_attribute("foo").value    #-> "2"
+               #  doc.root.attributes.get_attribute("x:foo").value  #-> "1"
+               def get_attribute( name )
+                       attr = fetch( name, nil )
+                       if attr.nil?
+                               return nil if name.nil?
+                               # Look for prefix
+                               name =~ Namespace::NAMESPLIT
+                               prefix, n = $1, $2
+                               if prefix
+                                       attr = fetch( n, nil )
+                                       # check prefix
+                                       if attr == nil
+                                       elsif attr.kind_of? Attribute
+                                               return attr if prefix == attr.prefix
+                                       else
+                                               attr = attr[ prefix ]
+                                               return attr
+                                       end
+                               end
+        element_document = @element.document
+                               if element_document and element_document.doctype
+                                       expn = @element.expanded_name
+                                       expn = element_document.doctype.name if expn.size == 0
+                                       attr_val = element_document.doctype.attribute_of(expn, name)
+                                       return Attribute.new( name, attr_val ) if attr_val
+                               end
+                               return nil
+                       end
+                       if attr.kind_of? Hash
+                               attr = attr[ @element.prefix ]
+                       end
+                       return attr
+               end
+
+               # Sets an attribute, overwriting any existing attribute value by the
+               # same name.  Namespace is significant.
+               # name:: the name of the attribute
+               # value:: 
+               #   (optional) If supplied, the value of the attribute.  If
+               #   nil, any existing matching attribute is deleted.
+               # Returns:: 
+               #   Owning element
+               #  doc = Document.new "<a x:foo='1' foo='3'/>"
+               #  doc.root.attributes['y:foo'] = '2'
+               #  doc.root.attributes['foo'] = '4'
+               #  doc.root.attributes['x:foo'] = nil
+               def []=( name, value )
+                       if value.nil?           # Delete the named attribute
+                               attr = get_attribute(name)
+                               delete attr
+                               return
+                       end
+      element_document = @element.document
+      unless value.kind_of? Attribute
+        if @element.document and @element.document.doctype
+          value = Text::normalize( value, @element.document.doctype )
+        else
+          value = Text::normalize( value, nil )
+        end
+        value = Attribute.new(name, value)
+      end
+                       value.element = @element
+                       old_attr = fetch(value.name, nil)
+                       if old_attr.nil?
+                               store(value.name, value)
+                       elsif old_attr.kind_of? Hash
+                               old_attr[value.prefix] = value
+                       elsif old_attr.prefix != value.prefix
+                               # Check for conflicting namespaces
+                               raise ParseException.new( 
+                                       "Namespace conflict in adding attribute \"#{value.name}\": "+
+                                       "Prefix \"#{old_attr.prefix}\" = "+
+                                       "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
+                                       "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if 
+                                       value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
+                                       @element.namespace( old_attr.prefix ) == 
+                                       @element.namespace( value.prefix )
+                               store value.name, { old_attr.prefix     => old_attr,
+                                                                                                               value.prefix            => value }
+                       else
+                               store value.name, value
+                       end
+                       return @element
+               end
+
+               # Returns an array of Strings containing all of the prefixes declared 
+               # by this set of # attributes.  The array does not include the default
+               # namespace declaration, if one exists.
+               #  doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
+               #        "z='glorp' p:k='gru'/>")
+               #  prefixes = doc.root.attributes.prefixes    #-> ['x', 'y']
+               def prefixes
+                       ns = []
+                       each_attribute do |attribute|
+                               ns << attribute.name if attribute.prefix == 'xmlns'
+                       end
+                       if @element.document and @element.document.doctype
+                               expn = @element.expanded_name
+                               expn = @element.document.doctype.name if expn.size == 0
+                               @element.document.doctype.attributes_of(expn).each {
+                                       |attribute|
+                                       ns << attribute.name if attribute.prefix == 'xmlns'
+                               }
+                       end
+                       ns
+               end
+
+               def namespaces
+                       namespaces = {}
+                       each_attribute do |attribute|
+                               namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+                       end
+                       if @element.document and @element.document.doctype
+                               expn = @element.expanded_name
+                               expn = @element.document.doctype.name if expn.size == 0
+                               @element.document.doctype.attributes_of(expn).each {
+                                       |attribute|
+                                       namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
+                               }
+                       end
+                       namespaces
+               end
+
+               # Removes an attribute
+               # attribute:: 
+               #   either a String, which is the name of the attribute to remove --
+               #   namespaces are significant here -- or the attribute to remove.
+               # Returns:: the owning element
+               #  doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
+               #  doc.root.attributes.delete 'foo'   #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
+               #  doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
+               #  attr = doc.root.attributes.get_attribute('y:foo')
+               #  doc.root.attributes.delete attr    #-> <a z:foo='4'/>"
+               def delete( attribute )
+                       name = nil
+                       prefix = nil
+                       if attribute.kind_of? Attribute
+                               name = attribute.name
+                               prefix = attribute.prefix
+                       else
+                               attribute =~ Namespace::NAMESPLIT
+                               prefix, name = $1, $2
+                               prefix = '' unless prefix
+                       end
+                       old = fetch(name, nil)
+                       attr = nil
+                       if old.kind_of? Hash # the supplied attribute is one of many
+                               attr = old.delete(prefix)
+                               if old.size == 1
+                                       repl = nil
+                                       old.each_value{|v| repl = v}
+                                       store name, repl
+                               end
+                       elsif old.nil?
+                               return @element
+                       else # the supplied attribute is a top-level one
+                               attr = old
+                               res = super(name)
+                       end
+                       @element
+               end
+
+               # Adds an attribute, overriding any existing attribute by the
+               # same name.  Namespaces are significant.
+               # attribute:: An Attribute
+               def add( attribute )
+                       self[attribute.name] = attribute
+               end
+
+               alias :<< :add
+
+               # Deletes all attributes matching a name.  Namespaces are significant.
+               # name:: 
+               #   A String; all attributes that match this path will be removed
+               # Returns:: an Array of the Attributes that were removed
+               def delete_all( name )
+                       rv = []
+                       each_attribute { |attribute| 
+                               rv << attribute if attribute.expanded_name == name
+                       }
+                       rv.each{ |attr| attr.remove }
+                       return rv
+               end
+    
+    # The +get_attribute_ns+ method retrieves a method by its namespace
+    # and name. Thus it is possible to reliably identify an attribute
+    # even if an XML processor has changed the prefix.
+    # 
+    # Method contributed by Henrik Martensson
+    def get_attribute_ns(namespace, name)
+      each_attribute() { |attribute|
+        if name == attribute.name &&
+           namespace == attribute.namespace()
+          return attribute
+        end
+      }
+      nil
+    end
+       end
+end
diff --git a/lib/booh/rexml/encoding.rb b/lib/booh/rexml/encoding.rb
new file mode 100644 (file)
index 0000000..5361ca3
--- /dev/null
@@ -0,0 +1,66 @@
+# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
+module REXML
+  module Encoding
+    @encoding_methods = {}
+    def self.register(enc, &block)
+      @encoding_methods[enc] = block
+    end
+    def self.apply(obj, enc)
+      @encoding_methods[enc][obj]
+    end
+    def self.encoding_method(enc)
+      @encoding_methods[enc]
+    end
+
+    # Native, default format is UTF-8, so it is declared here rather than in
+    # an encodings/ definition.
+    UTF_8 = 'UTF-8'
+    UTF_16 = 'UTF-16'
+    UNILE = 'UNILE'
+
+    # ID ---> Encoding name
+    attr_reader :encoding
+    def encoding=( enc )
+      old_verbosity = $VERBOSE
+      begin
+        $VERBOSE = false
+        enc = enc.nil? ? nil : enc.upcase
+        return false if defined? @encoding and enc == @encoding
+        if enc and enc != UTF_8
+          @encoding = enc
+          raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
+          @encoding.untaint 
+          begin
+            require 'booh/rexml/encodings/ICONV.rb'
+            Encoding.apply(self, "ICONV")
+          rescue LoadError, Exception
+            begin
+              enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
+              require enc_file
+              Encoding.apply(self, @encoding)
+            rescue LoadError => err
+              puts err.message
+              raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
+            end
+          end
+        else
+          @encoding = UTF_8
+          require 'booh/rexml/encodings/UTF-8.rb'
+          Encoding.apply(self, @encoding)
+        end
+      ensure
+        $VERBOSE = old_verbosity
+      end
+      true
+    end
+
+    def check_encoding str
+      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+      return UTF_16 if /\A\xfe\xff/n =~ str
+      return UNILE if /\A\xff\xfe/n =~ str
+      str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um
+      return $1.upcase if $1
+      return UTF_8
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/CP-1252.rb b/lib/booh/rexml/encodings/CP-1252.rb
new file mode 100644 (file)
index 0000000..51179f1
--- /dev/null
@@ -0,0 +1,98 @@
+#
+# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
+#
+module REXML
+  module Encoding
+    @@__REXML_encoding_methods = %q~
+    # Convert from UTF-8
+    def encode content
+      array_utf8 = content.unpack('U*')
+      array_enc = []
+      array_utf8.each do |num|
+        case num
+          # shortcut first bunch basic characters
+        when 0..0xFF: array_enc << num
+          # characters added compared to iso-8859-1
+        when 0x20AC: array_enc << 0x80 # 0xe2 0x82 0xac
+        when 0x201A: array_enc << 0x82 # 0xe2 0x82 0x9a
+        when 0x0192: array_enc << 0x83 # 0xc6 0x92
+        when 0x201E: array_enc << 0x84 # 0xe2 0x82 0x9e
+        when 0x2026: array_enc << 0x85 # 0xe2 0x80 0xa6
+        when 0x2020: array_enc << 0x86 # 0xe2 0x80 0xa0
+        when 0x2021: array_enc << 0x87 # 0xe2 0x80 0xa1
+        when 0x02C6: array_enc << 0x88 # 0xcb 0x86
+        when 0x2030: array_enc << 0x89 # 0xe2 0x80 0xb0
+        when 0x0160: array_enc << 0x8A # 0xc5 0xa0
+        when 0x2039: array_enc << 0x8B # 0xe2 0x80 0xb9
+        when 0x0152: array_enc << 0x8C # 0xc5 0x92
+        when 0x017D: array_enc << 0x8E # 0xc5 0xbd
+        when 0x2018: array_enc << 0x91 # 0xe2 0x80 0x98
+        when 0x2019: array_enc << 0x92 # 0xe2 0x80 0x99
+        when 0x201C: array_enc << 0x93 # 0xe2 0x80 0x9c
+        when 0x201D: array_enc << 0x94 # 0xe2 0x80 0x9d
+        when 0x2022: array_enc << 0x95 # 0xe2 0x80 0xa2
+        when 0x2013: array_enc << 0x96 # 0xe2 0x80 0x93
+        when 0x2014: array_enc << 0x97 # 0xe2 0x80 0x94
+        when 0x02DC: array_enc << 0x98 # 0xcb 0x9c
+        when 0x2122: array_enc << 0x99 # 0xe2 0x84 0xa2
+        when 0x0161: array_enc << 0x9A # 0xc5 0xa1
+        when 0x203A: array_enc << 0x9B # 0xe2 0x80 0xba
+        when 0x0152: array_enc << 0x9C # 0xc5 0x93
+        when 0x017E: array_enc << 0x9E # 0xc5 0xbe
+        when 0x0178: array_enc << 0x9F # 0xc5 0xb8
+        else
+          # all remaining basic characters can be used directly
+          if num <= 0xFF
+            array_enc << num
+          else
+            # Numeric entity (&#nnnn;); shard by  Stefan Scholl
+            array_enc.concat "&\##{num};".unpack('C*')
+          end
+        end
+      end
+      array_enc.pack('C*')
+    end
+    
+    # Convert to UTF-8
+    def decode(str)
+      array_latin9 = str.unpack('C*')
+      array_enc = []
+      array_latin9.each do |num|
+        case num
+          # characters that added compared to iso-8859-1
+        when 0x80: array_enc << 0x20AC # 0xe2 0x82 0xac
+        when 0x82: array_enc << 0x201A # 0xe2 0x82 0x9a
+        when 0x83: array_enc << 0x0192 # 0xc6 0x92
+        when 0x84: array_enc << 0x201E # 0xe2 0x82 0x9e
+        when 0x85: array_enc << 0x2026 # 0xe2 0x80 0xa6
+        when 0x86: array_enc << 0x2020 # 0xe2 0x80 0xa0
+        when 0x87: array_enc << 0x2021 # 0xe2 0x80 0xa1
+        when 0x88: array_enc << 0x02C6 # 0xcb 0x86
+        when 0x89: array_enc << 0x2030 # 0xe2 0x80 0xb0
+        when 0x8A: array_enc << 0x0160 # 0xc5 0xa0
+        when 0x8B: array_enc << 0x2039 # 0xe2 0x80 0xb9
+        when 0x8C: array_enc << 0x0152 # 0xc5 0x92
+        when 0x8E: array_enc << 0x017D # 0xc5 0xbd
+        when 0x91: array_enc << 0x2018 # 0xe2 0x80 0x98
+        when 0x92: array_enc << 0x2019 # 0xe2 0x80 0x99
+        when 0x93: array_enc << 0x201C # 0xe2 0x80 0x9c
+        when 0x94: array_enc << 0x201D # 0xe2 0x80 0x9d
+        when 0x95: array_enc << 0x2022 # 0xe2 0x80 0xa2
+        when 0x96: array_enc << 0x2013 # 0xe2 0x80 0x93
+        when 0x97: array_enc << 0x2014 # 0xe2 0x80 0x94
+        when 0x98: array_enc << 0x02DC # 0xcb 0x9c
+        when 0x99: array_enc << 0x2122 # 0xe2 0x84 0xa2
+        when 0x9A: array_enc << 0x0161 # 0xc5 0xa1
+        when 0x9B: array_enc << 0x203A # 0xe2 0x80 0xba
+        when 0x9C: array_enc << 0x0152 # 0xc5 0x93
+        when 0x9E: array_enc << 0x017E # 0xc5 0xbe
+        when 0x9F: array_enc << 0x0178 # 0xc5 0xb8
+        else
+          array_enc << num
+        end
+      end
+      array_enc.pack('U*')
+    end
+    ~
+  end
+end
diff --git a/lib/booh/rexml/encodings/EUC-JP.rb b/lib/booh/rexml/encodings/EUC-JP.rb
new file mode 100644 (file)
index 0000000..db37b6b
--- /dev/null
@@ -0,0 +1,35 @@
+module REXML
+  module Encoding
+    begin
+      require 'uconv'
+
+      def decode_eucjp(str)
+        Uconv::euctou8(str)
+      end
+
+      def encode_eucjp content
+        Uconv::u8toeuc(content)
+      end
+    rescue LoadError
+      require 'nkf'
+
+      EUCTOU8 = '-Ewm0'
+      U8TOEUC = '-Wem0'
+
+      def decode_eucjp(str)
+        NKF.nkf(EUCTOU8, str)
+      end
+
+      def encode_eucjp content
+        NKF.nkf(U8TOEUC, content)
+      end
+    end
+
+    register("EUC-JP") do |obj|
+      class << obj
+        alias decode decode_eucjp
+        alias encode encode_eucjp
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/ICONV.rb b/lib/booh/rexml/encodings/ICONV.rb
new file mode 100644 (file)
index 0000000..172fba7
--- /dev/null
@@ -0,0 +1,22 @@
+require "iconv"
+raise LoadError unless defined? Iconv
+
+module REXML
+  module Encoding
+    def decode_iconv(str)
+      Iconv.conv(UTF_8, @encoding, str)
+    end
+
+    def encode_iconv(content)
+      Iconv.conv(@encoding, UTF_8, content)
+    end
+
+    register("ICONV") do |obj|
+      Iconv.conv(UTF_8, obj.encoding, nil)
+      class << obj
+        alias decode decode_iconv
+        alias encode encode_iconv
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/ISO-8859-1.rb b/lib/booh/rexml/encodings/ISO-8859-1.rb
new file mode 100644 (file)
index 0000000..01290a3
--- /dev/null
@@ -0,0 +1,7 @@
+require 'booh/rexml/encodings/US-ASCII'
+
+module REXML
+  module Encoding
+    register("ISO-8859-1", &encoding_method("US-ASCII"))
+  end
+end
diff --git a/lib/booh/rexml/encodings/ISO-8859-15.rb b/lib/booh/rexml/encodings/ISO-8859-15.rb
new file mode 100644 (file)
index 0000000..ce565e7
--- /dev/null
@@ -0,0 +1,69 @@
+#
+# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
+#
+module REXML
+  module Encoding
+    @@__REXML_encoding_methods = %q~
+    # Convert from UTF-8
+    def to_iso_8859_15 content
+      array_utf8 = content.unpack('U*')
+      array_enc = []
+      array_utf8.each do |num|
+        case num
+          # shortcut first bunch basic characters
+        when 0..0xA3: array_enc << num
+          # characters removed compared to iso-8859-1
+        when 0xA4: array_enc << '&#164;'
+        when 0xA6: array_enc << '&#166;'
+        when 0xA8: array_enc << '&#168;'
+        when 0xB4: array_enc << '&#180;'
+        when 0xB8: array_enc << '&#184;'
+        when 0xBC: array_enc << '&#188;'
+        when 0xBD: array_enc << '&#189;'
+        when 0xBE: array_enc << '&#190;'
+          # characters added compared to iso-8859-1
+        when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac
+        when 0x0160: array_enc << 0xA6 # 0xc5 0xa0
+        when 0x0161: array_enc << 0xA8 # 0xc5 0xa1
+        when 0x017D: array_enc << 0xB4 # 0xc5 0xbd
+        when 0x017E: array_enc << 0xB8 # 0xc5 0xbe
+        when 0x0152: array_enc << 0xBC # 0xc5 0x92
+        when 0x0153: array_enc << 0xBD # 0xc5 0x93
+        when 0x0178: array_enc << 0xBE # 0xc5 0xb8
+        else
+          # all remaining basic characters can be used directly
+          if num <= 0xFF
+            array_enc << num
+          else
+            # Numeric entity (&#nnnn;); shard by  Stefan Scholl
+            array_enc.concat "&\##{num};".unpack('C*')
+          end
+        end
+      end
+      array_enc.pack('C*')
+    end
+    
+    # Convert to UTF-8
+    def from_iso_8859_15(str)
+      array_latin9 = str.unpack('C*')
+      array_enc = []
+      array_latin9.each do |num|
+        case num
+          # characters that differ compared to iso-8859-1
+        when 0xA4: array_enc << 0x20AC
+        when 0xA6: array_enc << 0x0160
+        when 0xA8: array_enc << 0x0161
+        when 0xB4: array_enc << 0x017D
+        when 0xB8: array_enc << 0x017E
+        when 0xBC: array_enc << 0x0152
+        when 0xBD: array_enc << 0x0153
+        when 0xBE: array_enc << 0x0178
+        else
+          array_enc << num
+        end
+      end
+      array_enc.pack('U*')
+    end
+    ~
+  end
+end
diff --git a/lib/booh/rexml/encodings/SHIFT-JIS.rb b/lib/booh/rexml/encodings/SHIFT-JIS.rb
new file mode 100644 (file)
index 0000000..93c7877
--- /dev/null
@@ -0,0 +1,37 @@
+module REXML
+  module Encoding
+    begin
+      require 'uconv'
+
+      def decode_sjis content
+        Uconv::sjistou8(content)
+      end
+
+      def encode_sjis(str)
+        Uconv::u8tosjis(str)
+      end
+    rescue LoadError
+      require 'nkf'
+
+      SJISTOU8 = '-Swm0'
+      U8TOSJIS = '-Wsm0'
+
+      def decode_sjis(str)
+        NKF.nkf(SJISTOU8, str)
+      end
+
+      def encode_sjis content
+        NKF.nkf(U8TOSJIS, content)
+      end
+    end
+
+    b = proc do |obj|
+      class << obj
+        alias decode decode_sjis
+        alias encode encode_sjis
+      end
+    end
+    register("SHIFT-JIS", &b)
+    register("SHIFT_JIS", &b)
+  end
+end
diff --git a/lib/booh/rexml/encodings/SHIFT_JIS.rb b/lib/booh/rexml/encodings/SHIFT_JIS.rb
new file mode 100644 (file)
index 0000000..c9bdba0
--- /dev/null
@@ -0,0 +1 @@
+require 'booh/rexml/encodings/SHIFT-JIS'
diff --git a/lib/booh/rexml/encodings/UNILE.rb b/lib/booh/rexml/encodings/UNILE.rb
new file mode 100644 (file)
index 0000000..d054140
--- /dev/null
@@ -0,0 +1,34 @@
+module REXML
+  module Encoding
+    def encode_unile content
+      array_utf8 = content.unpack("U*")
+      array_enc = []
+      array_utf8.each do |num|
+        if ((num>>16) > 0)
+          array_enc << ??
+          array_enc << 0
+        else
+          array_enc << (num & 0xFF)
+          array_enc << (num >> 8)
+        end
+      end
+      array_enc.pack('C*')
+    end
+
+    def decode_unile(str)
+      array_enc=str.unpack('C*')
+      array_utf8 = []
+      0.step(array_enc.size-1, 2){|i| 
+        array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
+      }
+      array_utf8.pack('U*')
+    end
+
+    register(UNILE) do |obj|
+      class << obj
+        alias decode decode_unile
+        alias encode encode_unile
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/US-ASCII.rb b/lib/booh/rexml/encodings/US-ASCII.rb
new file mode 100644 (file)
index 0000000..fb4c217
--- /dev/null
@@ -0,0 +1,30 @@
+module REXML
+  module Encoding
+    # Convert from UTF-8
+    def encode_ascii content
+      array_utf8 = content.unpack('U*')
+      array_enc = []
+      array_utf8.each do |num|
+        if num <= 0x7F
+          array_enc << num
+        else
+          # Numeric entity (&#nnnn;); shard by  Stefan Scholl
+          array_enc.concat "&\##{num};".unpack('C*')
+        end
+      end
+      array_enc.pack('C*')
+    end
+
+    # Convert to UTF-8
+    def decode_ascii(str)
+      str.unpack('C*').pack('U*')
+    end
+
+    register("US-ASCII") do |obj|
+      class << obj
+        alias decode decode_ascii
+        alias encode encode_ascii
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/UTF-16.rb b/lib/booh/rexml/encodings/UTF-16.rb
new file mode 100644 (file)
index 0000000..792adfd
--- /dev/null
@@ -0,0 +1,35 @@
+module REXML
+  module Encoding
+    def encode_utf16 content
+      array_utf8 = content.unpack("U*")
+      array_enc = []
+      array_utf8.each do |num|
+        if ((num>>16) > 0)
+          array_enc << 0
+          array_enc << ??
+        else
+          array_enc << (num >> 8)
+          array_enc << (num & 0xFF)
+        end
+      end
+      array_enc.pack('C*')
+    end
+
+    def decode_utf16(str)
+      str = str[2..-1] if /^\376\377/ =~ str
+      array_enc=str.unpack('C*')
+      array_utf8 = []
+      0.step(array_enc.size-1, 2){|i| 
+        array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
+      }
+      array_utf8.pack('U*')
+    end
+
+    register(UTF_16) do |obj|
+      class << obj
+        alias decode decode_utf16
+        alias encode encode_utf16
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/encodings/UTF-8.rb b/lib/booh/rexml/encodings/UTF-8.rb
new file mode 100644 (file)
index 0000000..bb08f44
--- /dev/null
@@ -0,0 +1,18 @@
+module REXML
+  module Encoding
+    def encode_utf8 content
+      content
+    end
+
+    def decode_utf8(str)
+      str
+    end
+
+    register(UTF_8) do |obj|
+      class << obj
+        alias decode decode_utf8
+        alias encode encode_utf8
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/entity.rb b/lib/booh/rexml/entity.rb
new file mode 100644 (file)
index 0000000..73d4a4f
--- /dev/null
@@ -0,0 +1,159 @@
+require 'booh/rexml/child'
+require 'booh/rexml/source'
+require 'booh/rexml/xmltokens'
+
+module REXML
+       # God, I hate DTDs.  I really do.  Why this idiot standard still
+       # plagues us is beyond me.
+       class Entity < Child
+               include XMLTokens
+               PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+               SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+               PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+               EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+               NDATADECL = "\\s+NDATA\\s+#{NAME}"
+               PEREFERENCE = "%#{NAME};"
+               ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+               PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+               ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+               PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+               GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+               ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+               attr_reader :name, :external, :ref, :ndata, :pubid
+
+               # Create a new entity.  Simple entities can be constructed by passing a
+               # name, value to the constructor; this creates a generic, plain entity
+               # reference. For anything more complicated, you have to pass a Source to
+               # the constructor with the entity definiton, or use the accessor methods.
+               # +WARNING+: There is no validation of entity state except when the entity
+               # is read from a stream.  If you start poking around with the accessors,
+               # you can easily create a non-conformant Entity.  The best thing to do is
+               # dump the stupid DTDs and use XMLSchema instead.
+               # 
+               #  e = Entity.new( 'amp', '&' )
+               def initialize stream, value=nil, parent=nil, reference=false
+                       super(parent)
+                       @ndata = @pubid = @value = @external = nil
+                       if stream.kind_of? Array
+                               @name = stream[1]
+                               if stream[-1] == '%'
+                                       @reference = true 
+                                       stream.pop
+                               else
+                                       @reference = false
+                               end
+                               if stream[2] =~ /SYSTEM|PUBLIC/
+                                       @external = stream[2]
+                                       if @external == 'SYSTEM'
+                                               @ref = stream[3]
+                                               @ndata = stream[4] if stream.size == 5
+                                       else
+                                               @pubid = stream[3]
+                                               @ref = stream[4]
+                                       end
+                               else
+                                       @value = stream[2]
+                               end
+                       else
+                               @reference = reference
+                               @external = nil
+                               @name = stream
+                               @value = value
+                       end
+               end
+
+               # Evaluates whether the given string matchs an entity definition,
+               # returning true if so, and false otherwise.
+               def Entity::matches? string
+                       (ENTITYDECL =~ string) == 0
+               end
+
+               # Evaluates to the unnormalized value of this entity; that is, replacing
+               # all entities -- both %ent; and &ent; entities.  This differs from
+               # +value()+ in that +value+ only replaces %ent; entities.
+               def unnormalized
+                       v = value()
+                       return nil if v.nil?
+                       @unnormalized = Text::unnormalize(v, parent)
+                       @unnormalized
+               end
+
+               #once :unnormalized
+
+               # Returns the value of this entity unprocessed -- raw.  This is the
+               # normalized value; that is, with all %ent; and &ent; entities intact
+               def normalized
+                       @value
+               end
+
+               # Write out a fully formed, correct entity definition (assuming the Entity
+               # object itself is valid.)
+               def write out, indent=-1
+                       out << '<!ENTITY '
+                       out << '% ' if @reference
+                       out << @name
+                       out << ' '
+                       if @external
+                               out << @external << ' '
+                               if @pubid
+                                       q = @pubid.include?('"')?"'":'"'
+                                       out << q << @pubid << q << ' '
+                               end
+                               q = @ref.include?('"')?"'":'"'
+                               out << q << @ref << q
+                               out << ' NDATA ' << @ndata if @ndata
+                       else
+                               q = @value.include?('"')?"'":'"'
+                               out << q << @value << q
+                       end
+                       out << '>'
+               end
+
+               # Returns this entity as a string.  See write().
+               def to_s
+                       rv = ''
+                       write rv
+                       rv
+               end
+
+               PEREFERENCE_RE = /#{PEREFERENCE}/um
+               # Returns the value of this entity.  At the moment, only internal entities
+               # are processed.  If the value contains internal references (IE,
+               # %blah;), those are replaced with their values.  IE, if the doctype
+               # contains:
+               #  <!ENTITY % foo "bar">
+               #  <!ENTITY yada "nanoo %foo; nanoo>
+               # then:
+               #  doctype.entity('yada').value   #-> "nanoo bar nanoo"
+               def value
+                       if @value
+                               matches = @value.scan(PEREFERENCE_RE)
+                               rv = @value.clone
+                               if @parent
+                                       matches.each do |entity_reference|
+                                               entity_value = @parent.entity( entity_reference[0] )
+                                               rv.gsub!( /%#{entity_reference};/um, entity_value )
+                                       end
+                               end
+                               return rv
+                       end
+                       nil
+               end
+       end
+
+       # This is a set of entity constants -- the ones defined in the XML
+       # specification.  These are +gt+, +lt+, +amp+, +quot+ and +apos+.
+       module EntityConst
+               # +>+
+               GT = Entity.new( 'gt', '>' )
+               # +<+
+               LT = Entity.new( 'lt', '<' )
+               # +&+
+               AMP = Entity.new( 'amp', '&' )
+               # +"+
+               QUOT = Entity.new( 'quot', '"' )
+               # +'+
+               APOS = Entity.new( 'apos', "'" )
+       end
+end
diff --git a/lib/booh/rexml/functions.rb b/lib/booh/rexml/functions.rb
new file mode 100644 (file)
index 0000000..cad4f6a
--- /dev/null
@@ -0,0 +1,383 @@
+module REXML
+  # If you add a method, keep in mind two things:
+  # (1) the first argument will always be a list of nodes from which to
+  # filter.  In the case of context methods (such as position), the function
+  # should return an array with a value for each child in the array.
+  # (2) all method calls from XML will have "-" replaced with "_".
+  # Therefore, in XML, "local-name()" is identical (and actually becomes)
+  # "local_name()"
+  module Functions
+    @@context = nil
+    @@namespace_context = {}
+    @@variables = {}
+
+    def Functions::namespace_context=(x) ; @@namespace_context=x ; end
+    def Functions::variables=(x) ; @@variables=x ; end
+    def Functions::namespace_context ; @@namespace_context ; end
+    def Functions::variables ; @@variables ; end
+
+    def Functions::context=(value); @@context = value; end
+
+    def Functions::text( )
+      if @@context[:node].node_type == :element
+        return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
+      elsif @@context[:node].node_type == :text
+        return @@context[:node].value
+      else
+        return false
+      end
+    end
+
+    def Functions::last( )
+      @@context[:size]
+    end
+
+    def Functions::position( )
+      @@context[:index]
+    end
+
+    def Functions::count( node_set )
+      node_set.size
+    end
+
+    # Since REXML is non-validating, this method is not implemented as it
+    # requires a DTD
+    def Functions::id( object )
+    end
+
+    # UNTESTED
+    def Functions::local_name( node_set=nil )
+      get_namespace( node_set ) do |node|
+        return node.local_name 
+      end
+    end
+
+    def Functions::namespace_uri( node_set=nil )
+      get_namespace( node_set ) {|node| node.namespace}
+    end
+
+    def Functions::name( node_set=nil )
+      get_namespace( node_set ) do |node| 
+        node.expanded_name
+      end
+    end
+
+    # Helper method.
+    def Functions::get_namespace( node_set = nil )
+      if node_set == nil
+        yield @@context[:node] if defined? @@context[:node].namespace
+      else  
+        if node_set.respond_to? :each
+          node_set.each { |node| yield node if defined? node.namespace }
+        elsif node_set.respond_to? :namespace
+          yield node_set
+        end
+      end
+    end
+
+    # A node-set is converted to a string by returning the string-value of the
+    # node in the node-set that is first in document order. If the node-set is
+    # empty, an empty string is returned.
+    #
+    # A number is converted to a string as follows
+    #
+    # NaN is converted to the string NaN 
+    #
+    # positive zero is converted to the string 0 
+    #
+    # negative zero is converted to the string 0 
+    #
+    # positive infinity is converted to the string Infinity 
+    #
+    # negative infinity is converted to the string -Infinity 
+    #
+    # if the number is an integer, the number is represented in decimal form
+    # as a Number with no decimal point and no leading zeros, preceded by a
+    # minus sign (-) if the number is negative
+    #
+    # otherwise, the number is represented in decimal form as a Number
+    # including a decimal point with at least one digit before the decimal
+    # point and at least one digit after the decimal point, preceded by a
+    # minus sign (-) if the number is negative; there must be no leading zeros
+    # before the decimal point apart possibly from the one required digit
+    # immediately before the decimal point; beyond the one required digit
+    # after the decimal point there must be as many, but only as many, more
+    # digits as are needed to uniquely distinguish the number from all other
+    # IEEE 754 numeric values.
+    #
+    # The boolean false value is converted to the string false. The boolean
+    # true value is converted to the string true.
+    #
+    # An object of a type other than the four basic types is converted to a
+    # string in a way that is dependent on that type.
+    def Functions::string( object=nil )
+      #object = @context unless object
+      if object.instance_of? Array
+        string( object[0] )
+      elsif defined? object.node_type
+        if object.node_type == :attribute
+          object.value
+        elsif object.node_type == :element || object.node_type == :document
+          string_value(object)
+        else
+          object.to_s
+        end
+      elsif object.nil?
+        return ""
+      else
+        object.to_s
+      end
+    end
+
+    def Functions::string_value( o )
+      rv = ""
+      o.children.each { |e|
+        if e.node_type == :text
+          rv << e.to_s
+        elsif e.node_type == :element
+          rv << string_value( e )
+        end
+      }
+      rv
+    end
+
+    # UNTESTED
+    def Functions::concat( *objects )
+      objects.join
+    end
+
+    # Fixed by Mike Stok
+    def Functions::starts_with( string, test )
+      string(string).index(string(test)) == 0
+    end
+
+    # Fixed by Mike Stok
+    def Functions::contains( string, test )
+      string(string).include?(string(test))
+    end
+
+    # Kouhei fixed this 
+    def Functions::substring_before( string, test )
+      ruby_string = string(string)
+      ruby_index = ruby_string.index(string(test))
+      if ruby_index.nil?
+        ""
+      else
+        ruby_string[ 0...ruby_index ]
+      end
+    end
+    # Kouhei fixed this too
+    def Functions::substring_after( string, test )
+      ruby_string = string(string)
+      test_string = string(test)
+      return $1 if ruby_string =~ /#{test}(.*)/
+      ""
+    end
+
+    # Take equal portions of Mike Stok and Sean Russell; mix 
+    # vigorously, and pour into a tall, chilled glass.  Serves 10,000.
+    def Functions::substring( string, start, length=nil )
+      ruby_string = string(string)
+      ruby_length = if length.nil? 
+                      ruby_string.length.to_f
+                    else
+                      number(length)
+                    end
+      ruby_start = number(start)
+
+      # Handle the special cases
+      return '' if (
+        ruby_length.nan? or 
+        ruby_start.nan? or
+        ruby_start.infinite?
+      )
+
+      infinite_length = ruby_length.infinite? == 1
+      ruby_length = ruby_string.length if infinite_length
+        
+      # Now, get the bounds.  The XPath bounds are 1..length; the ruby bounds 
+      # are 0..length.  Therefore, we have to offset the bounds by one.
+      ruby_start = ruby_start.round - 1
+      ruby_length = ruby_length.round
+
+      if ruby_start < 0
+       ruby_length += ruby_start unless infinite_length
+       ruby_start = 0
+      end
+      return '' if ruby_length <= 0
+      ruby_string[ruby_start,ruby_length]
+    end
+
+    # UNTESTED
+    def Functions::string_length( string )
+      string(string).length
+    end
+
+    # UNTESTED
+    def Functions::normalize_space( string=nil )
+      string = string(@@context[:node]) if string.nil?
+      if string.kind_of? Array
+        string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
+      else
+        string.to_s.strip.gsub(/\s+/um, ' ')
+      end
+    end
+
+    # This is entirely Mike Stok's beast
+    def Functions::translate( string, tr1, tr2 )
+      from = string(tr1)
+      to = string(tr2)
+
+      # the map is our translation table.
+      #
+      # if a character occurs more than once in the
+      # from string then we ignore the second &
+      # subsequent mappings
+      #
+      # if a charactcer maps to nil then we delete it
+      # in the output.  This happens if the from
+      # string is longer than the to string
+      #
+      # there's nothing about - or ^ being special in
+      # http://www.w3.org/TR/xpath#function-translate
+      # so we don't build ranges or negated classes
+
+      map = Hash.new
+      0.upto(from.length - 1) { |pos|
+        from_char = from[pos]
+        unless map.has_key? from_char
+          map[from_char] = 
+          if pos < to.length
+            to[pos]
+          else
+            nil
+          end
+        end
+      }
+
+      string(string).unpack('U*').collect { |c|
+        if map.has_key? c then map[c] else c end
+      }.compact.pack('U*')
+    end
+
+    # UNTESTED
+    def Functions::boolean( object=nil )
+      if object.kind_of? String
+        if object =~ /\d+/u
+          return object.to_f != 0
+        else
+          return object.size > 0
+        end
+      elsif object.kind_of? Array
+        object = object.find{|x| x and true}
+      end
+      return object ? true : false
+    end
+
+    # UNTESTED
+    def Functions::not( object )
+      not boolean( object )
+    end
+
+    # UNTESTED
+    def Functions::true( )
+      true
+    end
+
+    # UNTESTED
+    def Functions::false(  )
+      false
+    end
+
+    # UNTESTED
+    def Functions::lang( language )
+      lang = false
+      node = @@context[:node]
+      attr = nil
+      until node.nil?
+        if node.node_type == :element
+          attr = node.attributes["xml:lang"]
+          unless attr.nil?
+            lang = compare_language(string(language), attr)
+            break
+          else
+          end
+        end
+        node = node.parent
+      end
+      lang
+    end
+
+    def Functions::compare_language lang1, lang2
+      lang2.downcase.index(lang1.downcase) == 0
+    end
+
+    # a string that consists of optional whitespace followed by an optional
+    # minus sign followed by a Number followed by whitespace is converted to
+    # the IEEE 754 number that is nearest (according to the IEEE 754
+    # round-to-nearest rule) to the mathematical value represented by the
+    # string; any other string is converted to NaN
+    #
+    # boolean true is converted to 1; boolean false is converted to 0
+    #
+    # a node-set is first converted to a string as if by a call to the string
+    # function and then converted in the same way as a string argument
+    #
+    # an object of a type other than the four basic types is converted to a
+    # number in a way that is dependent on that type
+    def Functions::number( object=nil )
+      object = @@context[:node] unless object
+      case object
+      when true
+        Float(1)
+      when false
+        Float(0)
+      when Array
+        number(string( object ))
+      when Numeric
+        object.to_f
+      else
+        str = string( object )
+        #puts "STRING OF #{object.inspect} = #{str}"
+        # If XPath ever gets scientific notation...
+        #if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
+        if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
+          str.to_f
+        else
+          (0.0 / 0.0)
+        end
+      end
+    end
+
+    def Functions::sum( nodes )
+      nodes = [nodes] unless nodes.kind_of? Array
+      nodes.inject(0) { |r,n| r += number(string(n)) }
+    end
+    
+    def Functions::floor( number )
+      number(number).floor
+    end
+
+    def Functions::ceiling( number )
+      number(number).ceil
+    end
+
+    def Functions::round( number )
+      begin
+        number(number).round
+      rescue FloatDomainError
+        number(number)
+      end
+    end
+
+    def Functions::processing_instruction( node )
+      node.node_type == :processing_instruction
+    end
+
+    def Functions::method_missing( id )
+      puts "METHOD MISSING #{id.id2name}"
+      XPath.match( @@context[:node], id.id2name )
+    end
+  end
+end
diff --git a/lib/booh/rexml/instruction.rb b/lib/booh/rexml/instruction.rb
new file mode 100644 (file)
index 0000000..3042daa
--- /dev/null
@@ -0,0 +1,66 @@
+require "booh/rexml/child"
+require "booh/rexml/source"
+
+module REXML
+       # Represents an XML Instruction; IE, <? ... ?>
+       # TODO: Add parent arg (3rd arg) to constructor
+       class Instruction < Child
+               START = '<\?'
+               STOP = '\?>'
+
+               # target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
+               # content is everything else.
+               attr_accessor :target, :content
+
+               # Constructs a new Instruction
+               # @param target can be one of a number of things.  If String, then 
+               # the target of this instruction is set to this.  If an Instruction,
+               # then the Instruction is shallowly cloned (target and content are
+               # copied).  If a Source, then the source is scanned and parsed for
+               # an Instruction declaration.
+               # @param content Must be either a String, or a Parent.  Can only
+               # be a Parent if the target argument is a Source.  Otherwise, this
+               # String is set as the content of this instruction.
+               def initialize(target, content=nil)
+                       if target.kind_of? String
+                               super()
+                               @target = target
+                               @content = content
+                       elsif target.kind_of? Instruction
+                               super(content)
+                               @target = target.target
+                               @content = target.content
+                       end
+                       @content.strip! if @content
+               end
+
+               def clone
+                       Instruction.new self
+               end
+               
+               def write writer, indent=-1, transitive=false, ie_hack=false
+                       indent(writer, indent)
+                       writer << START.sub(/\\/u, '')
+                       writer << @target
+                       writer << ' '
+                       writer << @content
+                       writer << STOP.sub(/\\/u, '')
+               end
+
+               # @return true if other is an Instruction, and the content and target
+               # of the other matches the target and content of this object.
+               def ==( other )
+                       other.kind_of? Instruction and
+                       other.target == @target and
+                       other.content == @content
+               end
+
+    def node_type
+      :processing_instruction
+    end
+
+    def inspect
+      "<?p-i #{target} ...?>"
+    end
+       end
+end
diff --git a/lib/booh/rexml/light/node.rb b/lib/booh/rexml/light/node.rb
new file mode 100644 (file)
index 0000000..0bafe3d
--- /dev/null
@@ -0,0 +1,196 @@
+require 'booh/rexml/xmltokens'
+require 'booh/rexml/light/node'
+
+# [ :element, parent, name, attributes, children* ]
+       # a = Node.new
+       # a << "B"              # => <a>B</a>
+       # a.b                   # => <a>B<b/></a>
+       # a.b[1]                        # => <a>B<b/><b/><a>
+       # a.b[1]["x"] = "y"     # => <a>B<b/><b x="y"/></a>
+       # a.b[0].c              # => <a>B<b><c/></b><b x="y"/></a>
+       # a.b.c << "D"          # => <a>B<b><c>D</c></b><b x="y"/></a>
+module REXML
+       module Light
+               # Represents a tagged XML element.  Elements are characterized by
+               # having children, attributes, and names, and can themselves be
+               # children.
+               class Node
+                       NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
+                       PARENTS = [ :element, :document, :doctype ]
+                       # Create a new element.
+                       def initialize node=nil
+                               @node = node
+                               if node.kind_of? String
+                                       node = [ :text, node ]
+                               elsif node.nil?
+                                       node = [ :document, nil, nil ]
+                               elsif node[0] == :start_element
+                                       node[0] = :element
+                               elsif node[0] == :start_doctype
+                                       node[0] = :doctype
+                               elsif node[0] == :start_document
+                                       node[0] = :document
+                               end
+                       end
+
+                       def size
+                               if PARENTS.include? @node[0]
+                                       @node[-1].size
+                               else
+                                       0
+                               end
+                       end
+
+                       def each( &block )
+                               size.times { |x| yield( at(x+4) ) }
+                       end
+
+                       def name
+                               at(2)
+                       end
+
+                       def name=( name_str, ns=nil )
+                               pfx = ''
+                               pfx = "#{prefix(ns)}:" if ns
+                               _old_put(2, "#{pfx}#{name_str}")
+                       end
+
+                       def parent=( node )
+                               _old_put(1,node)
+                       end
+
+                       def local_name
+                               namesplit
+                               @name
+                       end
+
+                       def local_name=( name_str )
+                               _old_put( 1, "#@prefix:#{name_str}" )
+                       end
+
+                       def prefix( namespace=nil )
+                               prefix_of( self, namespace )
+                       end
+
+                       def namespace( prefix=prefix() )
+                               namespace_of( self, prefix )
+                       end
+
+                       def namespace=( namespace )
+                               @prefix = prefix( namespace )
+                               pfx = ''
+                               pfx = "#@prefix:" if @prefix.size > 0
+                               _old_put(1, "#{pfx}#@name")
+                       end
+
+                       def []( reference, ns=nil )
+                               if reference.kind_of? String
+                                       pfx = ''
+                                       pfx = "#{prefix(ns)}:" if ns
+                                       at(3)["#{pfx}#{reference}"]
+                               elsif reference.kind_of? Range
+                                       _old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
+                               else
+                                       _old_get( 4+reference )
+                               end
+                       end
+
+                       def =~( path )
+                               XPath.match( self, path )
+                       end
+
+                       # Doesn't handle namespaces yet
+                       def []=( reference, ns, value=nil )
+                               if reference.kind_of? String
+                                       value = ns unless value
+                                       at( 3 )[reference] = value
+                               elsif reference.kind_of? Range
+                                       _old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
+                               else
+                                       if value
+                                               _old_put( 4+reference, ns, value )
+                                       else
+                                               _old_put( 4+reference, ns )
+                                       end
+                               end
+                       end
+
+                       # Append a child to this element, optionally under a provided namespace.
+                       # The namespace argument is ignored if the element argument is an Element
+                       # object.  Otherwise, the element argument is a string, the namespace (if
+                       # provided) is the namespace the element is created in.
+                       def << element
+                               if node_type() == :text
+                                       at(-1) << element
+                               else
+                                       newnode = Node.new( element )
+                                       newnode.parent = self
+                                       self.push( newnode )
+                               end
+                               at(-1)
+                       end
+
+                       def node_type
+                               _old_get(0)
+                       end
+
+                       def text=( foo )
+                               replace = at(4).kind_of?(String)? 1 : 0
+                               self._old_put(4,replace, normalizefoo)
+                       end
+
+                       def root
+                               context = self
+                               context = context.at(1) while context.at(1)
+                       end
+
+                       def has_name?( name, namespace = '' )
+                               at(3) == name and namespace() == namespace
+                       end
+
+                       def children
+                               self
+                       end
+
+                       def parent
+                               at(1)
+                       end
+
+                       def to_s
+
+                       end
+
+                       private
+
+                       def namesplit
+                               return if @name.defined?
+                               at(2) =~ NAMESPLIT
+                               @prefix = '' || $1
+                               @name = $2
+                       end
+
+                       def namespace_of( node, prefix=nil )
+                               if not prefix
+                                       name = at(2)
+                                       name =~ NAMESPLIT
+                                       prefix = $1
+                               end
+                               to_find = 'xmlns'
+                               to_find = "xmlns:#{prefix}" if not prefix.nil?
+                               ns = at(3)[ to_find ]
+                               ns ? ns : namespace_of( @node[0], prefix )
+                       end
+
+                       def prefix_of( node, namespace=nil )
+                               if not namespace
+                                       name = node.name
+                                       name =~ NAMESPLIT
+                                       $1
+                               else
+                                       ns = at(3).find { |k,v| v == namespace }
+                                       ns ? ns : prefix_of( node.parent, namespace )
+                               end
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/namespace.rb b/lib/booh/rexml/namespace.rb
new file mode 100644 (file)
index 0000000..841f71b
--- /dev/null
@@ -0,0 +1,47 @@
+require 'booh/rexml/xmltokens'
+
+module REXML
+       # Adds named attributes to an object.
+       module Namespace
+               # The name of the object, valid if set
+               attr_reader :name, :expanded_name
+               # The expanded name of the object, valid if name is set
+               attr_accessor :prefix
+               include XMLTokens
+               NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
+
+               # Sets the name and the expanded name
+               def name=( name )
+                       @expanded_name = name
+                       name =~ NAMESPLIT
+                       if $1
+                               @prefix = $1
+                       else
+                               @prefix = ""
+                               @namespace = ""
+                       end
+                       @name = $2
+               end
+
+               # Compares names optionally WITH namespaces
+               def has_name?( other, ns=nil )
+                       if ns
+                               return (namespace() == ns and name() == other)
+                       elsif other.include? ":"
+                               return fully_expanded_name == other
+                       else
+                               return name == other
+                       end
+               end
+
+               alias :local_name :name
+
+               # Fully expand the name, even if the prefix wasn't specified in the
+               # source file.
+               def fully_expanded_name
+                       ns = prefix
+                       return "#{ns}:#@name" if ns.size > 0 
+                       return @name
+               end
+       end
+end
diff --git a/lib/booh/rexml/node.rb b/lib/booh/rexml/node.rb
new file mode 100644 (file)
index 0000000..8e8d362
--- /dev/null
@@ -0,0 +1,64 @@
+require "booh/rexml/parseexception"
+
+module REXML
+       # Represents a node in the tree.  Nodes are never encountered except as
+       # superclasses of other objects.  Nodes have siblings.
+       module Node
+               # @return the next sibling (nil if unset)
+               def next_sibling_node
+                       return nil if @parent.nil?
+                       @parent[ @parent.index(self) + 1 ]
+               end
+
+               # @return the previous sibling (nil if unset)
+               def previous_sibling_node
+                       return nil if @parent.nil?
+                       ind = @parent.index(self)
+                       return nil if ind == 0
+                       @parent[ ind - 1 ]
+               end
+
+               def to_s indent=-1
+                       rv = ""
+                       write rv,indent
+                       rv
+               end
+
+               def indent to, ind
+                       if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
+                               indentstyle = @parent.context[:indentstyle]
+                       else
+                               indentstyle = '  '
+                       end
+                       to << indentstyle*ind unless ind<1
+               end
+
+               def parent?
+                       false;
+               end
+
+
+               # Visit all subnodes of +self+ recursively
+               def each_recursive(&block) # :yields: node
+                       self.elements.each {|node|
+                               block.call(node)
+                               node.each_recursive(&block)
+                       }
+               end
+
+               # Find (and return) first subnode (recursively) for which the block 
+    # evaluates to true. Returns +nil+ if none was found.
+               def find_first_recursive(&block) # :yields: node
+      each_recursive {|node|
+        return node if block.call(node)
+      }
+      return nil
+    end
+
+    # Returns the position that +self+ holds in its parent's array, indexed
+    # from 1.
+    def index_in_parent
+      parent.index(self)+1
+    end
+       end
+end
diff --git a/lib/booh/rexml/output.rb b/lib/booh/rexml/output.rb
new file mode 100644 (file)
index 0000000..5ea7d28
--- /dev/null
@@ -0,0 +1,24 @@
+require 'booh/rexml/encoding'
+
+module REXML
+       class Output
+               include Encoding
+    
+    attr_reader :encoding
+
+               def initialize real_IO, encd="iso-8859-1"
+                       @output = real_IO
+                       self.encoding = encd
+
+                       @to_utf = encd == UTF_8 ? false : true
+               end
+
+               def <<( content )
+                       @output << (@to_utf ? self.encode(content) : content)
+               end
+
+    def to_s
+      "Output[#{encoding}]"
+    end
+       end
+end
diff --git a/lib/booh/rexml/parent.rb b/lib/booh/rexml/parent.rb
new file mode 100644 (file)
index 0000000..730e8a8
--- /dev/null
@@ -0,0 +1,166 @@
+require "booh/rexml/child"
+
+module REXML
+  # A parent has children, and has methods for accessing them.  The Parent
+  # class is never encountered except as the superclass for some other
+  # object.
+  class Parent < Child
+    include Enumerable
+    
+    # Constructor
+    # @param parent if supplied, will be set as the parent of this object
+    def initialize parent=nil
+      super(parent)
+      @children = []
+    end
+    
+    def add( object )
+      #puts "PARENT GOTS #{size} CHILDREN"
+      object.parent = self
+      @children << object
+      #puts "PARENT NOW GOTS #{size} CHILDREN"
+      object
+    end
+    
+    alias :push :add
+    alias :<< :push
+    
+    def unshift( object )
+      object.parent = self
+      @children.unshift object
+    end
+    
+    def delete( object )
+      found = false
+      @children.delete_if {|c| c.equal?(object) and found = true }
+      object.parent = nil if found
+    end
+    
+    def each(&block)
+      @children.each(&block)
+    end
+    
+    def delete_if( &block )
+      @children.delete_if(&block)
+    end
+    
+    def delete_at( index )
+      @children.delete_at index
+    end
+    
+    def each_index( &block )
+      @children.each_index(&block)
+    end
+    
+    # Fetches a child at a given index
+    # @param index the Integer index of the child to fetch
+    def []( index )
+      @children[index]
+    end
+    
+    alias :each_child :each
+    
+    
+    
+    # Set an index entry.  See Array.[]=
+    # @param index the index of the element to set
+    # @param opt either the object to set, or an Integer length
+    # @param child if opt is an Integer, this is the child to set
+    # @return the parent (self)
+    def []=( *args )
+      args[-1].parent = self
+      @children[*args[0..-2]] = args[-1]
+    end
+    
+    # Inserts an child before another child
+    # @param child1 this is either an xpath or an Element.  If an Element,
+    # child2 will be inserted before child1 in the child list of the parent.
+    # If an xpath, child2 will be inserted before the first child to match
+    # the xpath.
+    # @param child2 the child to insert
+    # @return the parent (self)
+    def insert_before( child1, child2 )
+      if child1.kind_of? String
+        child1 = XPath.first( self, child1 )
+        child1.parent.insert_before child1, child2
+      else
+        ind = index(child1)
+        child2.parent.delete(child2) if child2.parent
+        @children[ind,0] = child2
+        child2.parent = self
+      end
+      self
+    end
+    
+    # Inserts an child after another child
+    # @param child1 this is either an xpath or an Element.  If an Element,
+    # child2 will be inserted after child1 in the child list of the parent.
+    # If an xpath, child2 will be inserted after the first child to match
+    # the xpath.
+    # @param child2 the child to insert
+    # @return the parent (self)
+    def insert_after( child1, child2 )
+      if child1.kind_of? String
+        child1 = XPath.first( self, child1 )
+        child1.parent.insert_after child1, child2
+      else
+        ind = index(child1)+1
+        child2.parent.delete(child2) if child2.parent
+        @children[ind,0] = child2
+        child2.parent = self
+      end
+      self
+    end
+    
+    def to_a
+      @children.dup
+    end
+    
+    # Fetches the index of a given child
+    # @param child the child to get the index of
+    # @return the index of the child, or nil if the object is not a child
+    # of this parent.
+    def index( child )
+      count = -1
+      @children.find { |i| count += 1 ; i.hash == child.hash }
+      count
+    end
+    
+    # @return the number of children of this parent
+    def size
+      @children.size
+    end
+    
+    alias :length :size
+    
+    # Replaces one child with another, making sure the nodelist is correct
+    # @param to_replace the child to replace (must be a Child)
+    # @param replacement the child to insert into the nodelist (must be a 
+    # Child)
+    def replace_child( to_replace, replacement )
+      @children.map! {|c| c.equal?( to_replace ) ? replacement : c }
+      to_replace.parent = nil
+      replacement.parent = self
+    end
+    
+    # Deeply clones this object.  This creates a complete duplicate of this
+    # Parent, including all descendants.
+    def deep_clone
+      cl = clone()
+      each do |child|
+        if child.kind_of? Parent
+          cl << child.deep_clone
+        else
+          cl << child.clone
+        end
+      end
+      cl
+    end
+    
+    alias :children :to_a
+    
+    def parent?
+      true
+    end
+  end
+end
diff --git a/lib/booh/rexml/parseexception.rb b/lib/booh/rexml/parseexception.rb
new file mode 100644 (file)
index 0000000..feb7a7e
--- /dev/null
@@ -0,0 +1,51 @@
+module REXML
+  class ParseException < RuntimeError
+    attr_accessor :source, :parser, :continued_exception
+
+    def initialize( message, source=nil, parser=nil, exception=nil )
+      super(message)
+      @source = source
+      @parser = parser
+      @continued_exception = exception
+    end
+
+    def to_s
+      # Quote the original exception, if there was one
+      if @continued_exception
+        err = @continued_exception.inspect
+        err << "\n"
+        err << @continued_exception.backtrace.join("\n")
+        err << "\n...\n"
+      else
+        err = ""
+      end
+
+      # Get the stack trace and error message
+      err << super
+
+      # Add contextual information
+      if @source
+        err << "\nLine: #{line}\n"
+        err << "Position: #{position}\n"
+        err << "Last 80 unconsumed characters:\n"
+        err << @source.buffer[0..80].gsub(/\n/, ' ')
+      end
+      
+      err
+    end
+
+    def position
+      @source.current_line[0] if @source and defined? @source.current_line and
+      @source.current_line
+    end
+
+    def line
+      @source.current_line[2] if @source and defined? @source.current_line and 
+      @source.current_line
+    end
+
+    def context
+      @source.current_line
+    end
+  end  
+end
diff --git a/lib/booh/rexml/parsers/baseparser.rb b/lib/booh/rexml/parsers/baseparser.rb
new file mode 100644 (file)
index 0000000..4896a03
--- /dev/null
@@ -0,0 +1,463 @@
+require 'booh/rexml/parseexception'
+require 'booh/rexml/source'
+
+module REXML
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class BaseParser
+      NCNAME_STR= '[\w:][\-\w\d.]*'
+      NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+      NAMECHAR = '[\-\w\d\.:]'
+      NAME = "([\\w:]#{NAMECHAR}*)"
+      NMTOKEN = "(?:#{NAMECHAR})+"
+      NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+      REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+      REFERENCE_RE = /#{REFERENCE}/
+
+      DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+      DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+      ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+      COMMENT_START = /\A<!--/u
+      COMMENT_PATTERN = /<!--(.*?)-->/um
+      CDATA_START = /\A<!\[CDATA\[/u
+      CDATA_END = /^\s*\]\s*>/um
+      CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+      XMLDECL_START = /\A<\?xml\s/u;
+      XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
+      INSTRUCTION_START = /\A<\?/u
+      INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+      TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+      CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+
+      VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+      ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
+      STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
+
+      ENTITY_START = /^\s*<!ENTITY/
+      IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+      ELEMENTDECL_START = /^\s*<!ELEMENT/um
+      ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+      SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
+      ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+      NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+      ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+      ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+      ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+      DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+      ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+      ATTDEF_RE = /#{ATTDEF}/
+      ATTLISTDECL_START = /^\s*<!ATTLIST/um
+      ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+      NOTATIONDECL_START = /^\s*<!NOTATION/um
+      PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
+      SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
+
+      TEXT_PATTERN = /\A([^<]*)/um
+
+      # Entity constants
+      PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
+      SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+      PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+      EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+      NDATADECL = "\\s+NDATA\\s+#{NAME}"
+      PEREFERENCE = "%#{NAME};"
+      ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+      PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+      ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+      PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+      GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+      ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+      EREFERENCE = /&(?!#{NAME};)/
+
+      DEFAULT_ENTITIES = { 
+        'gt' => [/&gt;/, '&gt;', '>', />/], 
+        'lt' => [/&lt;/, '&lt;', '<', /</], 
+        'quot' => [/&quot;/, '&quot;', '"', /"/], 
+        "apos" => [/&apos;/, "&apos;", "'", /'/] 
+      }
+
+
+      ######################################################################
+      # These are patterns to identify common markup errors, to make the
+      # error messages more informative.
+      ######################################################################
+      MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
+
+      def initialize( source )
+        self.stream = source
+      end
+
+      def add_listener( listener )
+        if !defined?(@listeners) or !@listeners
+          @listeners = []
+          instance_eval <<-EOL
+            alias :_old_pull :pull
+            def pull
+              event = _old_pull
+              @listeners.each do |listener|
+                listener.receive event
+              end
+              event
+            end
+          EOL
+        end
+        @listeners << listener
+      end
+
+      attr_reader :source
+
+      def stream=( source )
+        @source = SourceFactory.create_from( source )
+        @closed = nil
+        @document_status = nil
+        @tags = []
+        @stack = []
+        @entities = []
+      end
+
+      def position
+        if @source.respond_to? :position
+          @source.position
+        else
+          # FIXME
+          0
+        end
+      end
+
+      # Returns true if there are no more events
+      def empty?
+        return (@source.empty? and @stack.empty?)
+      end
+
+      # Returns true if there are more events.  Synonymous with !empty?
+      def has_next?
+        return !(@source.empty? and @stack.empty?)
+      end
+
+      # Push an event back on the head of the stream.  This method
+      # has (theoretically) infinite depth.
+      def unshift token
+        @stack.unshift(token)
+      end
+
+      # Peek at the +depth+ event in the stack.  The first element on the stack
+      # is at depth 0.  If +depth+ is -1, will parse to the end of the input
+      # stream and return the last event, which is always :end_document.
+      # Be aware that this causes the stream to be parsed up to the +depth+ 
+      # event, so you can effectively pre-parse the entire document (pull the 
+      # entire thing into memory) using this method.  
+      def peek depth=0
+        raise %Q[Illegal argument "#{depth}"] if depth < -1
+        temp = []
+        if depth == -1
+          temp.push(pull()) until empty?
+        else
+          while @stack.size+temp.size < depth+1
+            temp.push(pull())
+          end
+        end
+        @stack += temp if temp.size > 0
+        @stack[depth]
+      end
+
+      # Returns the next event.  This is a +PullEvent+ object.
+      def pull
+        if @closed
+          x, @closed = @closed, nil
+          return [ :end_element, x ]
+        end
+        return [ :end_document ] if empty?
+        return @stack.shift if @stack.size > 0
+        @source.read if @source.buffer.size<2
+        #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
+        if @document_status == nil
+          #@source.consume( /^\s*/um )
+          word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
+          word = word[1] unless word.nil?
+          #STDERR.puts "WORD = #{word.inspect}"
+          case word
+          when COMMENT_START
+            return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+          when XMLDECL_START
+            #STDERR.puts "XMLDECL"
+            results = @source.match( XMLDECL_PATTERN, true )[1]
+            version = VERSION.match( results )
+            version = version[1] unless version.nil?
+            encoding = ENCODING.match(results)
+            encoding = encoding[1] unless encoding.nil?
+            @source.encoding = encoding
+            standalone = STANDALONE.match(results)
+            standalone = standalone[1] unless standalone.nil?
+            return [ :xmldecl, version, encoding, standalone ]
+          when INSTRUCTION_START
+            return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+          when DOCTYPE_START
+            md = @source.match( DOCTYPE_PATTERN, true )
+            identity = md[1]
+            close = md[2]
+            identity =~ IDENTITY
+            name = $1
+            raise REXML::ParseException("DOCTYPE is missing a name") if name.nil?
+            pub_sys = $2.nil? ? nil : $2.strip
+            long_name = $3.nil? ? nil : $3.strip
+            uri = $4.nil? ? nil : $4.strip
+            args = [ :start_doctype, name, pub_sys, long_name, uri ]
+            if close == ">"
+              @document_status = :after_doctype
+              @source.read if @source.buffer.size<2
+              md = @source.match(/^\s*/um, true)
+              @stack << [ :end_doctype ]
+            else
+              @document_status = :in_doctype
+            end
+            return args
+          when /^\s+/
+          else
+            @document_status = :after_doctype
+            @source.read if @source.buffer.size<2
+            md = @source.match(/\s*/um, true)
+          end
+        end
+        if @document_status == :in_doctype
+          md = @source.match(/\s*(.*?>)/um)
+          case md[1]
+          when SYSTEMENTITY 
+            match = @source.match( SYSTEMENTITY, true )[1]
+            return [ :externalentity, match ]
+
+          when ELEMENTDECL_START
+            return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+
+          when ENTITY_START
+            match = @source.match( ENTITYDECL, true ).to_a.compact
+            match[0] = :entitydecl
+            ref = false
+            if match[1] == '%'
+              ref = true
+              match.delete_at 1
+            end
+            # Now we have to sort out what kind of entity reference this is
+            if match[2] == 'SYSTEM'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+              # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+            elsif match[2] == 'PUBLIC'
+              # External reference
+              match[3] = match[3][1..-2] # PUBID
+              match[4] = match[4][1..-2] # HREF
+              # match is [ :entity, name, PUBLIC, pubid, href ]
+            else
+              match[2] = match[2][1..-2]
+              match.pop if match.size == 4
+              # match is [ :entity, name, value ]
+            end
+            match << '%' if ref
+            return match
+          when ATTLISTDECL_START
+            md = @source.match( ATTLISTDECL_PATTERN, true )
+            raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+            element = md[1]
+            contents = md[0]
+
+            pairs = {}
+            values = md[0].scan( ATTDEF_RE )
+            values.each do |attdef|
+              unless attdef[3] == "#IMPLIED"
+                attdef.compact!
+                val = attdef[3]
+                val = attdef[4] if val == "#FIXED "
+                pairs[attdef[0]] = val
+              end
+            end
+            return [ :attlistdecl, element, pairs, contents ]
+          when NOTATIONDECL_START
+            md = nil
+            if @source.match( PUBLIC )
+              md = @source.match( PUBLIC, true )
+              vals = [md[1],md[2],md[4],md[6]]
+            elsif @source.match( SYSTEM )
+              md = @source.match( SYSTEM, true )
+              vals = [md[1],md[2],nil,md[4]]
+            else
+              raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+            end
+            return [ :notationdecl, *vals ]
+          when CDATA_END
+            @document_status = :after_doctype
+            @source.match( CDATA_END, true )
+            return [ :end_doctype ]
+          end
+        end
+        begin
+          if @source.buffer[0] == ?<
+            if @source.buffer[1] == ?/
+              last_tag = @tags.pop
+              #md = @source.match_to_consume( '>', CLOSE_MATCH)
+              md = @source.match( CLOSE_MATCH, true )
+              raise REXML::ParseException.new( "Missing end tag for "+
+                "'#{last_tag}' (got \"#{md[1]}\")", 
+                @source) unless last_tag == md[1]
+              return [ :end_element, last_tag ]
+            elsif @source.buffer[1] == ?!
+              md = @source.match(/\A(\s*[^>]*>)/um)
+              #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+              raise REXML::ParseException.new("Malformed node", @source) unless md
+              if md[0][2] == ?-
+                md = @source.match( COMMENT_PATTERN, true )
+                return [ :comment, md[1] ] if md
+              else
+                md = @source.match( CDATA_PATTERN, true )
+                return [ :cdata, md[1] ] if md
+              end
+              raise REXML::ParseException.new( "Declarations can only occur "+
+                "in the doctype declaration.", @source)
+            elsif @source.buffer[1] == ??
+              md = @source.match( INSTRUCTION_PATTERN, true )
+              return [ :processing_instruction, md[1], md[2] ] if md
+              raise REXML::ParseException.new( "Bad instruction declaration",
+                @source)
+            else
+              # Get the next tag
+              md = @source.match(TAG_MATCH, true)
+              unless md
+                # Check for missing attribute quotes
+                raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
+                raise REXML::ParseException.new("malformed XML: missing tag start", @source) 
+              end
+              attrs = []
+              if md[2].size > 0
+                attrs = md[2].scan( ATTRIBUTE_PATTERN )
+                raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+              end
+        
+              if md[4]
+                @closed = md[1]
+              else
+                @tags.push( md[1] )
+              end
+              attributes = {}
+              attrs.each { |a,b,c| attributes[a] = c }
+              return [ :start_element, md[1], attributes ]
+            end
+          else
+            md = @source.match( TEXT_PATTERN, true )
+            if md[0].length == 0
+              @source.match( /(\s+)/, true )
+            end
+            #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
+            #return [ :text, "" ] if md[0].length == 0
+            # unnormalized = Text::unnormalize( md[1], self )
+            # return PullEvent.new( :text, md[1], unnormalized )
+            return [ :text, md[1] ]
+          end
+        rescue REXML::ParseException
+          raise
+        rescue Exception, NameError => error
+          raise REXML::ParseException.new( "Exception parsing",
+            @source, self, (error ? error : $!) )
+        end
+        return [ :dummy ]
+      end
+
+      def entity( reference, entities )
+        value = nil
+        value = entities[ reference ] if entities
+        if not value
+          value = DEFAULT_ENTITIES[ reference ]
+          value = value[2] if value
+        end
+        unnormalize( value, entities ) if value
+      end
+
+      # Escapes all possible entities
+      def normalize( input, entities=nil, entity_filter=nil )
+        copy = input.clone
+        # Doing it like this rather than in a loop improves the speed
+        copy.gsub!( EREFERENCE, '&amp;' )
+        entities.each do |key, value|
+          copy.gsub!( value, "&#{key};" ) unless entity_filter and 
+                                      entity_filter.include?(entity)
+        end if entities
+        copy.gsub!( EREFERENCE, '&amp;' )
+        DEFAULT_ENTITIES.each do |key, value|
+          copy.gsub!( value[3], value[1] )
+        end
+        copy
+      end
+
+      # Unescapes all possible entities
+      def unnormalize( string, entities=nil, filter=nil )
+        rv = string.clone
+        rv.gsub!( /\r\n?/, "\n" )
+        matches = rv.scan( REFERENCE_RE )
+        return rv if matches.size == 0
+        rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+          m=$1
+          m = "0#{m}" if m[0] == ?x
+          [Integer(m)].pack('U*')
+        }
+        matches.collect!{|x|x[0]}.compact!
+        if matches.size > 0
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              entity_value = entity( entity_reference, entities )
+              if entity_value
+                re = /&#{entity_reference};/
+                rv.gsub!( re, entity_value )
+              end
+            end
+          end
+          matches.each do |entity_reference|
+            unless filter and filter.include?(entity_reference)
+              er = DEFAULT_ENTITIES[entity_reference]
+              rv.gsub!( er[0], er[2] ) if er
+            end
+          end
+          rv.gsub!( /&amp;/, '&' )
+        end
+        rv
+      end
+    end
+  end
+end
+
+=begin
+  case event[0]
+  when :start_element
+  when :text
+  when :end_element
+  when :processing_instruction
+  when :cdata
+  when :comment
+  when :xmldecl
+  when :start_doctype
+  when :end_doctype
+  when :externalentity
+  when :elementdecl
+  when :entity
+  when :attlistdecl
+  when :notationdecl
+  when :end_doctype
+  end
+=end
diff --git a/lib/booh/rexml/parsers/lightparser.rb b/lib/booh/rexml/parsers/lightparser.rb
new file mode 100644 (file)
index 0000000..8f53dad
--- /dev/null
@@ -0,0 +1,60 @@
+require 'booh/rexml/parsers/streamparser'
+require 'booh/rexml/parsers/baseparser'
+require 'booh/rexml/light/node'
+
+module REXML
+       module Parsers
+               class LightParser
+                       def initialize stream
+                               @stream = stream
+                               @parser = REXML::Parsers::BaseParser.new( stream )
+                       end
+
+      def add_listener( listener )
+        @parser.add_listener( listener )
+      end
+
+      def rewind
+        @stream.rewind
+        @parser.stream = @stream
+      end
+
+                       def parse
+                               root = context = [ :document ]
+                               while true
+                                       event = @parser.pull
+                                       case event[0]
+                                       when :end_document
+                                               break
+                                       when :end_doctype
+                                               context = context[1]
+                                       when :start_element, :start_doctype
+                                               new_node = event
+                                               context << new_node
+                                               new_node[1,0] = [context]
+                                               context = new_node
+                                       when :end_element, :end_doctype
+                                               context = context[1]
+                                       else
+                                               new_node = event
+                                               context << new_node
+                                               new_node[1,0] = [context]
+                                       end
+                               end
+                               root
+                       end
+               end
+
+               # An element is an array.  The array contains:
+               #  0                    The parent element
+               #  1                    The tag name
+               #  2                    A hash of attributes
+               #  3..-1        The child elements
+               # An element is an array of size > 3
+               # Text is a String
+               # PIs are [ :processing_instruction, target, data ]
+               # Comments are [ :comment, data ]
+               # DocTypes are DocType structs
+               # The root is an array with XMLDecls, Text, DocType, Array, Text
+       end
+end
diff --git a/lib/booh/rexml/parsers/pullparser.rb b/lib/booh/rexml/parsers/pullparser.rb
new file mode 100644 (file)
index 0000000..2d181b7
--- /dev/null
@@ -0,0 +1,196 @@
+require 'forwardable'
+
+require 'booh/rexml/parseexception'
+require 'booh/rexml/parsers/baseparser'
+require 'booh/rexml/xmltokens'
+
+module REXML
+  module Parsers
+    # = Using the Pull Parser
+    # <em>This API is experimental, and subject to change.</em>
+    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+    #  end
+    # See the PullEvent class for information on the content of the results.
+    # The data is identical to the arguments passed for the various events to
+    # the StreamListener API.
+    #
+    # Notice that:
+    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
+    #  while parser.has_next?
+    #    res = parser.next
+    #    raise res[1] if res.error?
+    #  end
+    #
+    # Nat Price gave me some good ideas for the API.
+    class PullParser
+      include XMLTokens
+      extend Forwardable
+
+      def_delegators( :@parser, :has_next? )
+      def_delegators( :@parser, :entity )
+      def_delegators( :@parser, :empty? )
+      def_delegators( :@parser, :source )
+
+      def initialize stream
+        @entities = {}
+        @listeners = nil
+        @parser = BaseParser.new( stream )
+        @my_stack = []
+      end
+
+      def add_listener( listener )
+        @listeners = [] unless @listeners
+        @listeners << listener
+      end
+
+      def each
+        while has_next?
+          yield self.pull
+        end
+      end
+
+      def peek depth=0
+        if @my_stack.length <= depth
+          (depth - @my_stack.length + 1).times {
+            e = PullEvent.new(@parser.pull)
+            @my_stack.push(e)
+          }
+        end
+        @my_stack[depth]
+      end
+
+      def pull
+        return @my_stack.shift if @my_stack.length > 0
+
+        event = @parser.pull
+        case event[0]
+        when :entitydecl
+          @entities[ event[1] ] = 
+            event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+        when :text
+          unnormalized = @parser.unnormalize( event[1], @entities )
+          event << unnormalized
+        end
+        PullEvent.new( event )
+      end
+
+      def unshift token
+        @my_stack.unshift token
+      end
+    end
+
+    # A parsing event.  The contents of the event are accessed as an +Array?,
+    # and the type is given either by the ...? methods, or by accessing the
+    # +type+ accessor.  The contents of this object vary from event to event,
+    # but are identical to the arguments passed to +StreamListener+s for each
+    # event.
+    class PullEvent
+      # The type of this event.  Will be one of :tag_start, :tag_end, :text,
+      # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+      # :notationdecl, :entity, :cdata, :xmldecl, or :error.
+      def initialize(arg)
+        @contents = arg
+      end
+
+      def []( start, endd=nil)
+        if start.kind_of? Range
+          @contents.slice( start.begin+1 .. start.end )
+        elsif start.kind_of? Numeric
+          if endd.nil?
+            @contents.slice( start+1 )
+          else
+            @contents.slice( start+1, endd )
+          end
+        else
+          raise "Illegal argument #{start.inspect} (#{start.class})"
+        end
+      end
+
+      def event_type
+        @contents[0]
+      end
+
+      # Content: [ String tag_name, Hash attributes ]
+      def start_element?
+        @contents[0] == :start_element
+      end
+
+      # Content: [ String tag_name ]
+      def end_element?
+        @contents[0] == :end_element
+      end
+
+      # Content: [ String raw_text, String unnormalized_text ]
+      def text?
+        @contents[0] == :text
+      end
+
+      # Content: [ String text ]
+      def instruction?
+        @contents[0] == :processing_instruction
+      end
+
+      # Content: [ String text ]
+      def comment?
+        @contents[0] == :comment
+      end
+
+      # Content: [ String name, String pub_sys, String long_name, String uri ]
+      def doctype?
+        @contents[0] == :start_doctype
+      end
+
+      # Content: [ String text ]
+      def attlistdecl?
+        @contents[0] == :attlistdecl
+      end
+
+      # Content: [ String text ]
+      def elementdecl?
+        @contents[0] == :elementdecl
+      end
+
+      # Due to the wonders of DTDs, an entity declaration can be just about
+      # anything.  There's no way to normalize it; you'll have to interpret the
+      # content yourself.  However, the following is true:
+      #
+      # * If the entity declaration is an internal entity:
+      #   [ String name, String value ]
+      # Content: [ String text ]
+      def entitydecl?
+        @contents[0] == :entitydecl
+      end
+
+      # Content: [ String text ]
+      def notationdecl?
+        @contents[0] == :notationdecl
+      end
+
+      # Content: [ String text ]
+      def entity?
+        @contents[0] == :entity
+      end
+
+      # Content: [ String text ]
+      def cdata?
+        @contents[0] == :cdata
+      end
+
+      # Content: [ String version, String encoding, String standalone ]
+      def xmldecl?
+        @contents[0] == :xmldecl
+      end
+
+      def error?
+        @contents[0] == :error
+      end
+
+      def inspect
+        @contents[0].to_s + ": " + @contents[1..-1].inspect
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/parsers/sax2parser.rb b/lib/booh/rexml/parsers/sax2parser.rb
new file mode 100644 (file)
index 0000000..89e4640
--- /dev/null
@@ -0,0 +1,236 @@
+require 'booh/rexml/parsers/baseparser'
+require 'booh/rexml/parseexception'
+require 'booh/rexml/namespace'
+require 'booh/rexml/text'
+
+module REXML
+       module Parsers
+    # SAX2Parser
+               class SAX2Parser
+                       def initialize source
+                               @parser = BaseParser.new(source)
+                               @listeners = []
+                               @procs = []
+                               @namespace_stack = []
+                               @has_listeners = false
+                               @tag_stack = []
+        @entities = {}
+                       end
+
+      def source
+        @parser.source
+      end
+                       
+      def add_listener( listener )
+        @parser.add_listener( listener )
+      end
+
+                       # Listen arguments:
+                       #
+                       # Symbol, Array, Block
+                       #       Listen to Symbol events on Array elements
+                       # Symbol, Block
+                       #   Listen to Symbol events
+                       # Array, Listener
+                       #       Listen to all events on Array elements
+                       # Array, Block
+                       #       Listen to :start_element events on Array elements
+                       # Listener
+                       #       Listen to All events
+                       #
+                       # Symbol can be one of: :start_element, :end_element,
+                       # :start_prefix_mapping, :end_prefix_mapping, :characters,
+                       # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+                       # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+      #
+      # There is an additional symbol that can be listened for: :progress.
+      # This will be called for every event generated, passing in the current 
+      # stream position.
+                       #
+                       # Array contains regular expressions or strings which will be matched
+                       # against fully qualified element names.
+                       #
+                       # Listener must implement the methods in SAX2Listener
+                       #
+                       # Block will be passed the same arguments as a SAX2Listener method would
+                       # be, where the method name is the same as the matched Symbol.
+                       # See the SAX2Listener for more information.
+                       def listen( *args, &blok )
+                               if args[0].kind_of? Symbol
+                                       if args.size == 2
+                                               args[1].each { |match| @procs << [args[0], match, blok] }
+                                       else
+                                               add( [args[0], nil, blok] )
+                                       end
+                               elsif args[0].kind_of? Array
+                                       if args.size == 2
+                                               args[0].each { |match| add( [nil, match, args[1]] ) }
+                                       else
+                                               args[0].each { |match| add( [ :start_element, match, blok ] ) }
+                                       end
+                               else
+                                       add([nil, nil, args[0]])
+                               end
+                       end
+                       
+                       def deafen( listener=nil, &blok )
+                               if listener
+                                       @listeners.delete_if {|item| item[-1] == listener }
+                                       @has_listeners = false if @listeners.size == 0
+                               else
+                                       @procs.delete_if {|item| item[-1] == blok }
+                               end
+                       end
+                       
+                       def parse
+                               @procs.each { |sym,match,block| block.call if sym == :start_document }
+                               @listeners.each { |sym,match,block| 
+                                       block.start_document if sym == :start_document or sym.nil?
+                               }
+                               root = context = []
+                               while true
+                                       event = @parser.pull
+                                       case event[0]
+                                       when :end_document
+                                               handle( :end_document )
+                                               break
+                                       when :end_doctype
+                                               context = context[1]
+                                       when :start_element
+                                               @tag_stack.push(event[1])
+                                               # find the observers for namespaces
+                                               procs = get_procs( :start_prefix_mapping, event[1] )
+                                               listeners = get_listeners( :start_prefix_mapping, event[1] )
+                                               if procs or listeners
+                                                       # break out the namespace declarations
+                                                       # The attributes live in event[2]
+                                                       event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
+                                                       nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
+                                                       nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+                                                       @namespace_stack.push({})
+                                                       nsdecl.each do |n,v|
+                                                               @namespace_stack[-1][n] = v
+                                                               # notify observers of namespaces
+                                                               procs.each { |ob| ob.call( n, v ) } if procs
+                                                               listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+                                                       end
+                                               end
+                                               event[1] =~ Namespace::NAMESPLIT
+                                               prefix = $1
+                                               local = $2
+                                               uri = get_namespace(prefix)
+                                               # find the observers for start_element
+                                               procs = get_procs( :start_element, event[1] )
+                                               listeners = get_listeners( :start_element, event[1] )
+                                               # notify observers
+                                               procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+                                               listeners.each { |ob| 
+                                                       ob.start_element( uri, local, event[1], event[2] ) 
+                                               } if listeners
+                                       when :end_element
+                                               @tag_stack.pop
+                                               event[1] =~ Namespace::NAMESPLIT
+                                               prefix = $1
+                                               local = $2
+                                               uri = get_namespace(prefix)
+                                               # find the observers for start_element
+                                               procs = get_procs( :end_element, event[1] )
+                                               listeners = get_listeners( :end_element, event[1] )
+                                               # notify observers
+                                               procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+                                               listeners.each { |ob| 
+                                                       ob.end_element( uri, local, event[1] ) 
+                                               } if listeners
+
+                                               namespace_mapping = @namespace_stack.pop
+                                               # find the observers for namespaces
+                                               procs = get_procs( :end_prefix_mapping, event[1] )
+                                               listeners = get_listeners( :end_prefix_mapping, event[1] )
+                                               if procs or listeners
+                                                       namespace_mapping.each do |prefix, uri|
+                                                               # notify observers of namespaces
+                                                               procs.each { |ob| ob.call( prefix ) } if procs
+                                                               listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
+                                                       end
+                                               end
+                                       when :text
+            #normalized = @parser.normalize( event[1] )
+            #handle( :characters, normalized )
+            copy = event[1].clone
+            @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
+            copy.gsub!( Text::NUMERICENTITY ) {|m|
+              m=$1
+              m = "0#{m}" if m[0] == ?x
+              [Integer(m)].pack('U*')
+            }
+            handle( :characters, copy )
+          when :entitydecl
+            @entities[ event[1] ] = event[2] if event.size == 3
+                                               handle( *event )
+                                       when :processing_instruction, :comment, :doctype, :attlistdecl, 
+                                               :elementdecl, :cdata, :notationdecl, :xmldecl
+                                               handle( *event )
+                                       end
+          handle( :progress, @parser.position )
+                               end
+                       end
+
+                       private
+                       def handle( symbol, *arguments )
+                               tag = @tag_stack[-1]
+                               procs = get_procs( symbol, tag )
+                               listeners = get_listeners( symbol, tag )
+                               # notify observers
+                               procs.each { |ob| ob.call( *arguments ) } if procs
+                               listeners.each { |l| 
+                                       l.send( symbol.to_s, *arguments ) 
+                               } if listeners
+                       end
+
+                       # The following methods are duplicates, but it is faster than using
+                       # a helper
+                       def get_procs( symbol, name )
+                               return nil if @procs.size == 0
+                               @procs.find_all do |sym, match, block|
+          #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
+                                       (
+                                               (sym.nil? or symbol == sym) and 
+                                               ((name.nil? and match.nil?) or match.nil? or (
+                                                       (name == match) or
+                                                       (match.kind_of? Regexp and name =~ match)
+                                                       )
+                                               )
+                                       )
+                               end.collect{|x| x[-1]}
+                       end
+                       def get_listeners( symbol, name )
+                               return nil if @listeners.size == 0
+                               @listeners.find_all do |sym, match, block|
+                                       (
+                                               (sym.nil? or symbol == sym) and 
+                                               ((name.nil? and match.nil?) or match.nil? or (
+                                                       (name == match) or
+                                                       (match.kind_of? Regexp and name =~ match)
+                                                       )
+                                               )
+                                       )
+                               end.collect{|x| x[-1]}
+                       end
+
+                       def add( pair )
+                               if pair[-1].respond_to? :call
+                                       @procs << pair unless @procs.include? pair
+                               else
+                                       @listeners << pair unless @listeners.include? pair
+                                       @has_listeners = true
+                               end
+                       end
+
+                       def get_namespace( prefix ) 
+        uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
+                                       (@namespace_stack.find { |ns| not ns[nil].nil? })
+                               uris[-1][prefix] unless uris.nil? or 0 == uris.size
+                       end
+               end
+       end
+end
diff --git a/lib/booh/rexml/parsers/streamparser.rb b/lib/booh/rexml/parsers/streamparser.rb
new file mode 100644 (file)
index 0000000..256d0f6
--- /dev/null
@@ -0,0 +1,46 @@
+module REXML
+  module Parsers
+    class StreamParser
+      def initialize source, listener
+        @listener = listener
+        @parser = BaseParser.new( source )
+      end
+      
+      def add_listener( listener )
+        @parser.add_listener( listener )
+      end
+      
+      def parse
+        # entity string
+        while true
+          event = @parser.pull
+          case event[0]
+          when :end_document
+            return
+          when :start_element
+            attrs = event[2].each do |n, v|
+              event[2][n] = @parser.unnormalize( v )
+            end
+            @listener.tag_start( event[1], attrs )
+          when :end_element
+            @listener.tag_end( event[1] )
+          when :text
+            normalized = @parser.unnormalize( event[1] )
+            @listener.text( normalized )
+          when :processing_instruction
+            @listener.instruction( *event[1,2] )
+          when :start_doctype
+            @listener.doctype( *event[1..-1] )
+          when :end_doctype
+            # FIXME: remove this condition for milestone:3.2
+            @listener.doctype_end if @listener.respond_to? :doctype_end
+          when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
+            @listener.send( event[0].to_s, *event[1..-1] )
+          when :entitydecl, :notationdecl
+            @listener.send( event[0].to_s, event[1..-1] )
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/parsers/treeparser.rb b/lib/booh/rexml/parsers/treeparser.rb
new file mode 100644 (file)
index 0000000..4681b44
--- /dev/null
@@ -0,0 +1,95 @@
+require 'booh/rexml/validation/validationexception'
+
+module REXML
+  module Parsers
+    class TreeParser
+      def initialize( source, build_context = Document.new )
+        @build_context = build_context
+        @parser = Parsers::BaseParser.new( source )
+      end
+
+      def add_listener( listener )
+        @parser.add_listener( listener )
+      end
+
+      def parse
+        tag_stack = []
+        in_doctype = false
+        entities = nil
+        begin
+          while true
+            event = @parser.pull
+            #STDERR.puts "TREEPARSER GOT #{event.inspect}"
+            case event[0]
+            when :end_document
+              unless tag_stack.empty?
+                #raise ParseException.new("No close tag for #{tag_stack.inspect}")
+                raise ParseException.new("No close tag for #{@build_context.xpath}")
+              end
+              return
+            when :start_element
+              tag_stack.push(event[1])
+              # find the observers for namespaces
+              @build_context = @build_context.add_element( event[1], event[2] )
+            when :end_element
+              tag_stack.pop
+              @build_context = @build_context.parent
+            when :text
+              if not in_doctype
+                if @build_context[-1].instance_of? Text
+                  @build_context[-1] << event[1]
+                else
+                  @build_context.add( 
+                    Text.new(event[1], @build_context.whitespace, nil, true) 
+                  ) unless (
+                    @build_context.ignore_whitespace_nodes and
+                    event[1].strip.size==0
+                  )
+                end
+              end
+            when :comment
+              c = Comment.new( event[1] )
+              @build_context.add( c )
+            when :cdata
+              c = CData.new( event[1] )
+              @build_context.add( c )
+            when :processing_instruction
+              @build_context.add( Instruction.new( event[1], event[2] ) )
+            when :end_doctype
+              in_doctype = false
+              entities.each { |k,v| entities[k] = @build_context.entities[k].value }
+              @build_context = @build_context.parent
+            when :start_doctype
+              doctype = DocType.new( event[1..-1], @build_context )
+              @build_context = doctype
+              entities = {}
+              in_doctype = true
+            when :attlistdecl
+              n = AttlistDecl.new( event[1..-1] )
+              @build_context.add( n )
+            when :externalentity
+              n = ExternalEntity.new( event[1] )
+              @build_context.add( n )
+            when :elementdecl
+              n = ElementDecl.new( event[1] )
+              @build_context.add(n)
+            when :entitydecl
+              entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+              @build_context.add(Entity.new(event))
+            when :notationdecl
+              n = NotationDecl.new( *event[1..-1] )
+              @build_context.add( n )
+            when :xmldecl
+              x = XMLDecl.new( event[1], event[2], event[3] )
+              @build_context.add( x )
+            end
+          end
+        rescue REXML::Validation::ValidationException
+          raise
+        rescue
+          raise ParseException.new( $!.message, @parser.source, @parser, $! )
+        end
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/parsers/ultralightparser.rb b/lib/booh/rexml/parsers/ultralightparser.rb
new file mode 100644 (file)
index 0000000..cd5c697
--- /dev/null
@@ -0,0 +1,56 @@
+require 'booh/rexml/parsers/streamparser'
+require 'booh/rexml/parsers/baseparser'
+
+module REXML
+       module Parsers
+               class UltraLightParser
+                       def initialize stream
+                               @stream = stream
+                               @parser = REXML::Parsers::BaseParser.new( stream )
+                       end
+
+      def add_listener( listener )
+        @parser.add_listener( listener )
+      end
+
+      def rewind
+        @stream.rewind
+        @parser.stream = @stream
+      end
+
+                       def parse
+                               root = context = []
+                               while true
+                                       event = @parser.pull
+                                       case event[0]
+                                       when :end_document
+                                               break
+                                       when :end_doctype
+                                               context = context[1]
+                                       when :start_element, :doctype
+                                               context << event
+                                               event[1,0] = [context]
+                                               context = event
+                                       when :end_element
+                                               context = context[1]
+                                       else
+                                               context << event
+                                       end
+                               end
+                               root
+                       end
+               end
+
+               # An element is an array.  The array contains:
+               #  0                    The parent element
+               #  1                    The tag name
+               #  2                    A hash of attributes
+               #  3..-1        The child elements
+               # An element is an array of size > 3
+               # Text is a String
+               # PIs are [ :processing_instruction, target, data ]
+               # Comments are [ :comment, data ]
+               # DocTypes are DocType structs
+               # The root is an array with XMLDecls, Text, DocType, Array, Text
+       end
+end
diff --git a/lib/booh/rexml/parsers/xpathparser.rb b/lib/booh/rexml/parsers/xpathparser.rb
new file mode 100644 (file)
index 0000000..509a828
--- /dev/null
@@ -0,0 +1,698 @@
+require 'booh/rexml/namespace'
+require 'booh/rexml/xmltokens'
+
+module REXML
+  module Parsers
+    # You don't want to use this class.  Really.  Use XPath, which is a wrapper
+    # for this class.  Believe me.  You don't want to poke around in here.
+    # There is strange, dark magic at work in this code.  Beware.  Go back!  Go
+    # back while you still can!
+    class XPathParser
+      include XMLTokens
+      LITERAL    = /^'([^']*)'|^"([^"]*)"/u
+
+      def namespaces=( namespaces )
+        Functions::namespace_context = namespaces
+        @namespaces = namespaces
+      end
+
+      def parse path
+        path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
+        path.gsub!( /\s+([\]\)])/, '\1' )
+        parsed = []
+        path = OrExpr(path, parsed)
+        parsed
+      end
+
+      def predicate path
+        parsed = []
+        Predicate( "[#{path}]", parsed )
+        parsed
+      end
+
+      def abbreviate( path )
+        path = path.kind_of?(String) ? parse( path ) : path
+        string = ""
+        document = false
+        while path.size > 0
+          op = path.shift
+          case op
+          when :node
+          when :attribute
+                                               string << "/" if string.size > 0
+                                               string << "@"
+          when :child
+                                               string << "/" if string.size > 0
+          when :descendant_or_self
+            string << "/"
+          when :self
+            string << "."
+          when :parent
+            string << ".."
+          when :any
+            string << "*"
+                                       when :text
+                                               string << "text()"
+          when :following, :following_sibling, 
+                :ancestor, :ancestor_or_self, :descendant, 
+                :namespace, :preceding, :preceding_sibling
+            string << "/" unless string.size == 0
+            string << op.to_s.tr("_", "-")
+            string << "::"
+          when :qname
+            prefix = path.shift
+            name = path.shift
+            string << prefix+":" if prefix.size > 0
+            string << name
+          when :predicate
+            string << '['
+            string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
+            string << ']'
+          when :document
+            document = true
+                                       when :function
+                                               string << path.shift
+                                               string << "( "
+                                               string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
+                                               string << " )"
+                                       when :literal
+                                               string << %Q{ "#{path.shift}" }
+          else
+            string << "/" unless string.size == 0
+            string << "UNKNOWN("
+            string << op.inspect
+            string << ")"
+          end
+        end
+                               string = "/"+string if document
+        return string
+      end
+
+      def expand( path )
+        path = path.kind_of?(String) ? parse( path ) : path
+        string = ""
+        document = false
+        while path.size > 0
+          op = path.shift
+          case op
+          when :node
+            string << "node()"
+          when :attribute, :child, :following, :following_sibling, 
+                :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
+                :namespace, :preceding, :preceding_sibling, :self, :parent
+            string << "/" unless string.size == 0
+            string << op.to_s.tr("_", "-")
+            string << "::"
+          when :any
+            string << "*"
+          when :qname
+            prefix = path.shift
+            name = path.shift
+            string << prefix+":" if prefix.size > 0
+            string << name
+          when :predicate
+            string << '['
+            string << predicate_to_string( path.shift ) { |x| expand(x) }
+            string << ']'
+          when :document
+            document = true
+          else
+            string << "/" unless string.size == 0
+            string << "UNKNOWN("
+            string << op.inspect
+            string << ")"
+          end
+        end
+        string = "/"+string if document
+        return string
+      end
+
+      def predicate_to_string( path, &block )
+        string = ""
+        case path[0]
+        when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
+          op = path.shift
+          case op
+          when :eq
+            op = "="
+          when :lt
+            op = "<"
+          when :gt
+            op = ">"
+          when :lteq
+            op = "<="
+          when :gteq
+            op = ">="
+          when :neq
+            op = "!="
+          when :union
+            op = "|"
+          end
+          left = predicate_to_string( path.shift, &block )
+          right = predicate_to_string( path.shift, &block )
+          string << " "
+          string << left
+          string << " "
+          string << op.to_s
+          string << " "
+          string << right
+          string << " "
+        when :function
+          path.shift
+          name = path.shift
+          string << name
+          string << "( "
+          string << predicate_to_string( path.shift, &block )
+          string << " )"
+        when :literal
+          path.shift
+          string << " "
+          string << path.shift.inspect
+          string << " "
+        else
+          string << " "
+          string << yield( path )
+          string << " "
+        end
+        return string.squeeze(" ")
+      end
+
+      private
+      #LocationPath
+      #  | RelativeLocationPath
+      #  | '/' RelativeLocationPath?
+      #  | '//' RelativeLocationPath
+      def LocationPath path, parsed
+        #puts "LocationPath '#{path}'"
+        path = path.strip
+        if path[0] == ?/
+          parsed << :document
+          if path[1] == ?/
+            parsed << :descendant_or_self
+            parsed << :node
+            path = path[2..-1]
+          else
+            path = path[1..-1]
+          end
+        end
+        #puts parsed.inspect
+        return RelativeLocationPath( path, parsed ) if path.size > 0
+      end
+
+      #RelativeLocationPath
+      #  |                                                    Step
+      #    | (AXIS_NAME '::' | '@' | '')                     AxisSpecifier
+      #      NodeTest
+      #        Predicate
+      #    | '.' | '..'                                      AbbreviatedStep
+      #  |  RelativeLocationPath '/' Step
+      #  | RelativeLocationPath '//' Step
+      AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
+      def RelativeLocationPath path, parsed
+        #puts "RelativeLocationPath #{path}"
+        while path.size > 0
+          # (axis or @ or <child::>) nodetest predicate  >
+          # OR                                          >  / Step
+          # (. or ..)                                    >
+          if path[0] == ?.
+            if path[1] == ?.
+              parsed << :parent
+              parsed << :node
+              path = path[2..-1]
+            else
+              parsed << :self
+              parsed << :node
+              path = path[1..-1]
+            end
+          else
+            if path[0] == ?@
+              #puts "ATTRIBUTE"
+              parsed << :attribute
+              path = path[1..-1]
+              # Goto Nodetest
+            elsif path =~ AXIS
+              parsed << $1.tr('-','_').intern
+              path = $'
+              # Goto Nodetest
+            else
+              parsed << :child
+            end
+
+            #puts "NODETESTING '#{path}'"
+            n = []
+            path = NodeTest( path, n)
+            #puts "NODETEST RETURNED '#{path}'"
+
+            if path[0] == ?[
+              path = Predicate( path, n )
+            end
+
+            parsed.concat(n)
+          end
+          
+          if path.size > 0
+            if path[0] == ?/
+              if path[1] == ?/
+                parsed << :descendant_or_self
+                parsed << :node
+                path = path[2..-1]
+              else
+                path = path[1..-1]
+              end
+            else
+              return path
+            end
+          end
+        end
+        return path
+      end
+
+      # Returns a 1-1 map of the nodeset
+      # The contents of the resulting array are either:
+      #   true/false, if a positive match
+      #   String, if a name match
+      #NodeTest
+      #  | ('*' | NCNAME ':' '*' | QNAME)                NameTest
+      #  | NODE_TYPE '(' ')'                              NodeType
+      #  | PI '(' LITERAL ')'                            PI
+      #    | '[' expr ']'                                Predicate
+      NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+      QNAME     = Namespace::NAMESPLIT
+      NODE_TYPE  = /^(comment|text|node)\(\s*\)/m
+      PI        = /^processing-instruction\(/
+      def NodeTest path, parsed
+        #puts "NodeTest with #{path}"
+        res = nil
+        case path
+        when /^\*/
+          path = $'
+          parsed << :any
+        when NODE_TYPE
+          type = $1
+          path = $'
+          parsed << type.tr('-', '_').intern
+        when PI
+          path = $'
+          literal = nil
+          if path !~ /^\s*\)/
+            path =~ LITERAL
+            literal = $1
+            path = $'
+            raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
+            path = path[1..-1]
+          end
+          parsed << :processing_instruction
+          parsed << (literal || '')
+        when NCNAMETEST
+          #puts "NCNAMETEST"
+          prefix = $1
+          path = $'
+          parsed << :namespace
+          parsed << prefix
+        when QNAME
+          #puts "QNAME"
+          prefix = $1
+          name = $2
+          path = $'
+          prefix = "" unless prefix
+          parsed << :qname
+          parsed << prefix
+          parsed << name
+        end
+        return path
+      end
+
+      # Filters the supplied nodeset on the predicate(s)
+      def Predicate path, parsed
+        #puts "PREDICATE with #{path}"
+        return nil unless path[0] == ?[
+        predicates = []
+        while path[0] == ?[
+          path, expr = get_group(path)
+          predicates << expr[1..-2] if expr
+        end
+        #puts "PREDICATES = #{predicates.inspect}"
+        predicates.each{ |expr| 
+          #puts "ORING #{expr}"
+          preds = []
+          parsed << :predicate
+          parsed << preds
+          OrExpr(expr, preds) 
+        }
+        #puts "PREDICATES = #{predicates.inspect}"
+        path
+      end
+
+      # The following return arrays of true/false, a 1-1 mapping of the
+      # supplied nodeset, except for axe(), which returns a filtered
+      # nodeset
+
+      #| OrExpr S 'or' S AndExpr
+      #| AndExpr
+      def OrExpr path, parsed
+        #puts "OR >>> #{path}"
+        n = []
+        rest = AndExpr( path, n )
+        #puts "OR <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*( or )/
+            n = [ :or, n, [] ]
+            rest = AndExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| AndExpr S 'and' S EqualityExpr
+      #| EqualityExpr
+      def AndExpr path, parsed
+        #puts "AND >>> #{path}"
+        n = []
+        rest = EqualityExpr( path, n )
+        #puts "AND <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*( and )/
+            n = [ :and, n, [] ]
+            #puts "AND >>> #{rest}"
+            rest = EqualityExpr( $', n[-1] )
+            #puts "AND <<< #{rest}"
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| EqualityExpr ('=' | '!=')  RelationalExpr
+      #| RelationalExpr
+      def EqualityExpr path, parsed
+        #puts "EQUALITY >>> #{path}"
+        n = []
+        rest = RelationalExpr( path, n )
+        #puts "EQUALITY <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*(!?=)\s*/
+            if $1[0] == ?!
+              n = [ :neq, n, [] ]
+            else
+              n = [ :eq, n, [] ]
+            end
+            rest = RelationalExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
+      #| AdditiveExpr
+      def RelationalExpr path, parsed
+        #puts "RELATION >>> #{path}"
+        n = []
+        rest = AdditiveExpr( path, n )
+        #puts "RELATION <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*([<>]=?)\s*/
+            if $1[0] == ?<
+              sym = "lt"
+            else
+              sym = "gt"
+            end
+            sym << "eq" if $1[-1] == ?=
+            n = [ sym.intern, n, [] ]
+            rest = AdditiveExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+      #| MultiplicativeExpr
+      def AdditiveExpr path, parsed
+        #puts "ADDITIVE >>> #{path}"
+        n = []
+        rest = MultiplicativeExpr( path, n )
+        #puts "ADDITIVE <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*(\+| -)\s*/
+            if $1[0] == ?+
+              n = [ :plus, n, [] ]
+            else
+              n = [ :minus, n, [] ]
+            end
+            rest = MultiplicativeExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
+      #| UnaryExpr
+      def MultiplicativeExpr path, parsed
+        #puts "MULT >>> #{path}"
+        n = []
+        rest = UnaryExpr( path, n )
+        #puts "MULT <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*(\*| div | mod )\s*/
+            if $1[0] == ?*
+              n = [ :mult, n, [] ]
+            elsif $1.include?( "div" )
+              n = [ :div, n, [] ]
+            else
+              n = [ :mod, n, [] ]
+            end
+            rest = UnaryExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace(n)
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| '-' UnaryExpr
+      #| UnionExpr
+      def UnaryExpr path, parsed
+        path =~ /^(\-*)/
+        path = $'
+        if $1 and (($1.size % 2) != 0)
+          mult = -1
+        else
+          mult = 1
+        end
+        parsed << :neg if mult < 0
+
+        #puts "UNARY >>> #{path}"
+        n = []
+        path = UnionExpr( path, n )
+        #puts "UNARY <<< #{path}"
+        parsed.concat( n )
+        path
+      end
+
+      #| UnionExpr '|' PathExpr
+      #| PathExpr
+      def UnionExpr path, parsed
+        #puts "UNION >>> #{path}"
+        n = []
+        rest = PathExpr( path, n )
+        #puts "UNION <<< #{rest}"
+        if rest != path
+          while rest =~ /^\s*(\|)\s*/
+            n = [ :union, n, [] ]
+            rest = PathExpr( $', n[-1] )
+          end
+        end
+        if parsed.size == 0 and n.size != 0
+          parsed.replace( n )
+        elsif n.size > 0
+          parsed << n
+        end
+        rest
+      end
+
+      #| LocationPath
+      #| FilterExpr ('/' | '//') RelativeLocationPath
+      def PathExpr path, parsed
+        path =~ /^\s*/
+        path = $'
+        #puts "PATH >>> #{path}"
+        n = []
+        rest = FilterExpr( path, n )
+        #puts "PATH <<< '#{rest}'"
+        if rest != path
+          if rest and rest[0] == ?/
+            return RelativeLocationPath(rest, n)
+          end
+        end
+        #puts "BEFORE WITH '#{rest}'"
+        rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
+        parsed.concat(n)
+        return rest
+      end
+
+      #| FilterExpr Predicate
+      #| PrimaryExpr
+      def FilterExpr path, parsed
+        #puts "FILTER >>> #{path}"
+        n = []
+        path = PrimaryExpr( path, n )
+        #puts "FILTER <<< #{path}"
+        path = Predicate(path, n) if path and path[0] == ?[
+        #puts "FILTER <<< #{path}"
+        parsed.concat(n)
+        path
+      end
+
+      #| VARIABLE_REFERENCE
+      #| '(' expr ')'
+      #| LITERAL
+      #| NUMBER
+      #| FunctionCall
+      VARIABLE_REFERENCE  = /^\$(#{NAME_STR})/u
+      NUMBER              = /^(\d*\.?\d+)/
+      NT        = /^comment|text|processing-instruction|node$/
+      def PrimaryExpr path, parsed
+        arry = []
+        case path
+        when VARIABLE_REFERENCE
+          varname = $1
+          path = $'
+          parsed << :variable
+          parsed << varname
+          #arry << @variables[ varname ]
+        when /^(\w[-\w]*)(?:\()/
+          #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
+          fname = $1
+          tmp = $'
+          #puts "#{fname} =~ #{NT.inspect}"
+          return path if fname =~ NT
+          path = tmp
+          parsed << :function
+          parsed << fname
+          path = FunctionCall(path, parsed)
+        when NUMBER
+          #puts "LITERAL or NUMBER: #$1"
+          varname = $1.nil? ? $2 : $1
+          path = $'
+          parsed << :literal 
+          parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
+        when LITERAL
+          #puts "LITERAL or NUMBER: #$1"
+          varname = $1.nil? ? $2 : $1
+          path = $'
+          parsed << :literal 
+          parsed << varname
+        when /^\(/                                               #/
+          path, contents = get_group(path)
+          contents = contents[1..-2]
+          n = []
+          OrExpr( contents, n )
+          parsed.concat(n)
+        end
+        path
+      end
+
+      #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
+      def FunctionCall rest, parsed
+        path, arguments = parse_args(rest)
+        argset = []
+        for argument in arguments
+          args = []
+          OrExpr( argument, args )
+          argset << args
+        end
+        parsed << argset
+        path
+      end
+
+      # get_group( '[foo]bar' ) -> ['bar', '[foo]']
+      def get_group string
+        ind = 0
+        depth = 0
+        st = string[0,1]
+        en = (st == "(" ? ")" : "]")
+        begin
+          case string[ind,1]
+          when st
+            depth += 1
+          when en
+            depth -= 1
+          end
+          ind += 1
+        end while depth > 0 and ind < string.length
+        return nil unless depth==0
+        [string[ind..-1], string[0..ind-1]]
+      end
+      
+      def parse_args( string )
+        arguments = []
+        ind = 0
+                               inquot = false
+                               inapos = false
+        depth = 1
+        begin
+          case string[ind]
+          when ?"
+               inquot = !inquot unless inapos
+          when ?'
+               inapos = !inapos unless inquot
+          else
+               unless inquot or inapos
+                       case string[ind]
+                                                       when ?(
+                                                               depth += 1
+                if depth == 1
+                       string = string[1..-1]
+                       ind -= 1
+                end
+                                                       when ?)
+                                                               depth -= 1
+                                                               if depth == 0
+                                                                       s = string[0,ind].strip
+                                                                       arguments << s unless s == ""
+                                                                       string = string[ind+1..-1]
+                                                               end
+                                                       when ?,
+                                                               if depth == 1
+                                                                       s = string[0,ind].strip
+                                                                       arguments << s unless s == ""
+                                                                       string = string[ind+1..-1]
+                                                                       ind = -1 
+                                                               end
+                                                       end
+            end
+          end
+          ind += 1
+        end while depth > 0 and ind < string.length
+        return nil unless depth==0
+        [string,arguments]
+      end
+    end
+  end
+end
diff --git a/lib/booh/rexml/quickpath.rb b/lib/booh/rexml/quickpath.rb
new file mode 100644 (file)
index 0000000..dc41dd1
--- /dev/null
@@ -0,0 +1,266 @@
+require 'booh/rexml/functions'
+require 'booh/rexml/xmltokens'
+
+module REXML
+       class QuickPath
+               include Functions
+               include XMLTokens
+
+               EMPTY_HASH = {}
+
+               def QuickPath::first element, path, namespaces=EMPTY_HASH
+                       match(element, path, namespaces)[0]
+               end
+
+               def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
+                       path = "*" unless path
+                       match(element, path, namespaces).each( &block )
+               end
+
+               def QuickPath::match element, path, namespaces=EMPTY_HASH
+                       raise "nil is not a valid xpath" unless path
+                       results = nil
+                       Functions::namespace_context = namespaces
+                       case path
+                       when /^\/([^\/]|$)/u
+                               # match on root
+                               path = path[1..-1]
+                               return [element.root.parent] if path == ''
+                               results = filter([element.root], path)
+                       when /^[-\w]*::/u
+                               results = filter([element], path)
+                       when /^\*/u
+                               results = filter(element.to_a, path)
+                       when /^[\[!\w:]/u
+                               # match on child
+                               matches = []
+                               children = element.to_a
+                               results = filter(children, path)
+                       else
+                               results = filter([element], path)
+                       end
+                       return results
+               end
+
+               # Given an array of nodes it filters the array based on the path. The
+               # result is that when this method returns, the array will contain elements
+               # which match the path
+               def QuickPath::filter elements, path
+                       return elements if path.nil? or path == '' or elements.size == 0
+                       case path
+                       when /^\/\//u                                                                                   # Descendant
+                               return axe( elements, "descendant-or-self", $' )
+                       when /^\/?\b(\w[-\w]*)\b::/u                                                    # Axe
+                               axe_name = $1
+                               rest = $'
+                               return axe( elements, $1, $' )
+                       when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u    # Child
+                               rest = $'
+                               results = []
+                               elements.each do |element|
+                                       results |= filter( element.to_a, rest )
+                               end
+                               return results
+                       when /^\/?(\w[-\w]*)\(/u                                                        # / Function
+                               return function( elements, $1, $' )
+                       when Namespace::NAMESPLIT               # Element name
+                               name = $2
+                               ns = $1
+                               rest = $'
+                               elements.delete_if do |element|
+                                       !(element.kind_of? Element and 
+                                               (element.expanded_name == name or
+                                                (element.name == name and
+                                                 element.namespace == Functions.namespace_context[ns])))
+                               end
+                               return filter( elements, rest )
+                       when /^\/\[/u
+                               matches = []
+                               elements.each do |element|
+                                       matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
+                               end
+                               return matches
+                       when /^\[/u                                                                                             # Predicate
+                               return predicate( elements, path )
+                       when /^\/?\.\.\./u                                                                              # Ancestor
+                               return axe( elements, "ancestor", $' )
+                       when /^\/?\.\./u                                                                                        # Parent
+                               return filter( elements.collect{|e|e.parent}, $' )
+                       when /^\/?\./u                                                                                          # Self
+                               return filter( elements, $' )
+                       when /^\*/u                                                                                                     # Any
+                               results = []
+                    &nb