Module: RDF::Microdata::Reader::Nokogiri

Defined in:
vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb

Overview

Nokogiri implementation of an HTML parser.

Defined Under Namespace

Classes: NodeProxy, NodeSetProxy

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.librarySymbol

Returns the name of the underlying XML library.

Returns:



12
13
14
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 12

def self.library
  :nokogiri
end

Instance Method Details

#doc_base(base) ⇒ String

Find value of document base

Parameters:

  • base (String)

    Existing base from URI or :base_uri

Returns:



223
224
225
226
227
228
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 223

def doc_base(base)
  # find if the document has a base element
  base_el = @doc.at_css("html>head>base") 
  base = base_el.attribute("href").to_s.split("#").first if base_el
  base
end

#doc_errorsObject

Document errors



214
215
216
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 214

def doc_errors
  @doc.errors.reject {|e| e.to_s =~ /The doctype must be the first token in the document/}
end

#find_element_by_id(id) ⇒ Object

Look up an element in the document by id



240
241
242
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 240

def find_element_by_id(id)
  (e = @doc.at_css("##{id}")) && NodeProxy.new(e)
end

#getItemsObject

Based on Microdata element.getItems



234
235
236
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 234

def getItems
  @doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)}
end

#initialize_html(input, options = {})

This method returns an undefined value.

Initializes the underlying XML library.

Parameters:



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 181

def initialize_html(input, options = {})
  require 'nokogiri' unless defined?(::Nokogiri)
  @doc = case input
  when ::Nokogiri::XML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]

    begin
      require 'nokogumbo' unless defined?(::Nokogumbo)
      input = input.read if input.respond_to?(:read)
      ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
    rescue LoadError
      ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
    end
  end
end

#rootObject

Return proxy for document root



208
209
210
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/reader/nokogiri.rb', line 208

def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end