Module: RDF::RDFa::Reader::REXML

Defined in:
vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb

Overview

REXML implementation of an XML parser.

Defined Under Namespace

Classes: NodeProxy, NodeSetProxy

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.librarySymbol

Returns the name of the underlying XML library.

Returns:



14
15
16
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 14

def self.library
  :rexml
end

Instance Method Details

#detect_host_language_version(input, **options) ⇒ Object

Determine the host language and/or version from options and the input document



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 232

def detect_host_language_version(input, **options)
  @host_language = options[:host_language] ? options[:host_language].to_sym : nil
  @version = options[:version] ? options[:version].to_sym : nil
  return if @host_language && @version

  # Snif version based on input
  case input
  when ::REXML::Document
    doc_type_string = input.doctype.to_s
    version_attr = input.root && input.root.attribute("version").to_s
    root_element = input.root.name.downcase
    content_type = "application/xhtml+html" # FIXME: what about other possible XML types?
  else
    content_type = input.content_type if input.respond_to?(:content_type)

    # Determine from head of document
    head = if input.respond_to?(:read)
      input.rewind
      string = input.read(1000)
      input.rewind
      string.to_s
    else
      input.to_s[0..1000]
    end

    doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s
    root = head.match(%r(<[^!\?>]*>)m).to_s
    root_element = root.match(%r(^<(\S+)[ >])) ? $1 : ""
    version_attr = root.match(/version\s*=\s*"([^"]+)"/m) ? $1 : ""
    head_element = head.match(%r(<head.*<\/head>)mi)
    head_doc = ::REXML::Document.new(head_element.to_s)

    # May determine content-type and/or charset from meta
    # Easist way is to parse head into a document and iterate
    # of CSS matches
    ::REXML::XPath.each(head_doc, "//meta") do |e|
      if e.attribute("http-equiv").to_s.downcase == 'content-type'
        content_type, e = e.attribute("content").to_s.downcase.split(";")
        options[:encoding] = $1.downcase if e.to_s =~ /charset=([^\s]*)$/i
      elsif e.attribute("charset")
        options[:encoding] = e.attr("charset").to_s.downcase
      end
    end
  end

  # Already using XML parser, determine from DOCTYPE and/or root element
  @version ||= :"rdfa1.0" if doc_type_string =~ /RDFa 1\.0/
  @version ||= :"rdfa1.0" if version_attr =~ /RDFa 1\.0/
  @version ||= :"rdfa1.1" if version_attr =~ /RDFa 1\.1/
  @version ||= :"rdfa1.1"

  @host_language ||= case content_type
  when "application/xml"  then :xml
  when "image/svg+xml"    then :svg
  when "text/html"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :html5
    else                       :html5
    end
  when "application/xhtml+xml"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :xhtml5
    else                       :xhtml5
    end
  else
    case root_element
    when /svg/i           then :svg
    else                       :html5
    end
  end
end

#doc_base(base) ⇒ String

Find value of document base

Parameters:

  • base (String)

    Existing base from URI or :base_uri

Returns:



327
328
329
330
331
332
333
334
335
336
337
338
339
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 327

def doc_base(base)
  # find if the document has a base element
  case @host_language
  when :xhtml1, :xhtml5, :html4, :html5
    base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil
    base = base.join(base_el.attribute("href").to_s.split("#").first) if base_el
  else
    xml_base = root.attribute("base", "http://www.w3.org/XML/1998/namespace") || root.attribute('xml:base') if root
    base = base.join(xml_base) if xml_base
  end

  base || @base_uri
end

#doc_errorsObject

Document errors



318
319
320
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 318

def doc_errors
  []
end

#initialize_xml(input, **options)

This method returns an undefined value.

Initializes the underlying XML library.

Parameters:



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 211

def initialize_xml(input, **options)
  require 'rexml/document' unless defined?(::REXML)
  @doc = case input
  when ::REXML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)

    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'

    # Set xml:base for the document element, if defined
    @base_uri = base_uri ? base_uri.to_s : nil

    # Only parse as XML, no HTML mode
    ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
  end
end

#rootObject

Return proxy for document root



312
313
314
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-rdfa-ea6265716853/lib/rdf/rdfa/reader/rexml.rb', line 312

def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end