Class: RDF::Microdata::RdfaReader

Inherits:
RDFa::Reader show all
Defined in:
vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/rdfa_reader.rb

Overview

Update DOM to turn Microdata into RDFa and parse using the RDFa Reader

Constant Summary

Constants inherited from RDFa::Reader

RDFa::Reader::NC_REGEXP, RDFa::Reader::SafeCURIEorCURIEorIRI, RDFa::Reader::TERM_REGEXP, RDFa::Reader::TERMorCURIEorAbsIRI, RDFa::Reader::XHTML

Instance Attribute Summary collapse

Attributes inherited from RDFa::Reader

#host_language, #implementation, #repository, #version

Attributes inherited from Reader

#options

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from RDFa::Reader

#each_statement, #each_triple, options

Methods included from Util::Logger

#log_debug, #log_depth, #log_error, #log_fatal, #log_info, #log_recover, #log_recovering?, #log_statistics, #log_warn, #logger

Methods included from RDFa::Expansion

#copy_properties, #expand, #rule

Methods inherited from Reader

#base_uri, #canonicalize?, #close, each, #each_statement, #each_triple, #encoding, #fail_object, #fail_predicate, #fail_subject, for, #intern?, #lineno, open, options, #prefix, #prefixes, #prefixes=, #read_statement, #read_triple, #rewind, to_sym, #to_sym, #valid?, #validate?

Methods included from Util::Aliasing::LateBound

#alias_method

Methods included from Enumerable

add_entailment, #dump, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_term, #each_triple, #entail, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_term, #enum_triple, #graph_names, #has_graph?, #has_object?, #has_predicate?, #has_quad?, #has_statement?, #has_subject?, #has_term?, #has_triple?, #invalid?, #method_missing, #objects, #predicates, #project_graph, #quads, #respond_to_missing?, #statements, #subjects, #supports?, #terms, #to_a, #to_h, #to_set, #triples, #valid?, #validate!

Methods included from Isomorphic

#bijection_to, #isomorphic_with?

Methods included from Countable

#count, #empty?

Methods included from Readable

#readable?

Constructor Details

#initialize(input = $stdin, options = {}) {|reader| ... } ⇒ reader

Initializes the RdfaReader instance.

Parameters:

  • input (IO, File, String) (defaults to: $stdin)

    the input stream to read

  • options (Hash{Symbol => Object}) (defaults to: {})

    any additional options (see RDF::Reader#initialize)

Yields:

  • (reader)

    self

Yield Parameters:

Yield Returns:

  • (void)

    ignored

Raises:



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/rdfa_reader.rb', line 32

def initialize(input = $stdin, options = {}, &block)
  @options = options
  log_debug('', "using RDFa transformation reader")

  input = case input
  when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]
    input = input.read if input.respond_to?(:read)
    ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
  end

  # Load registry
  begin
    registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
    log_debug('', "registry = #{registry_uri.inspect}")
    Registry.load_registry(registry_uri)
  rescue JSON::ParserError => e
    log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
  end

  # For all members having @itemscope
  input.css("[itemscope]").each do |item|
    # Get @itemtypes to create @type and @vocab
    item.attribute('itemscope').remove
    if item['itemtype']
      # Only absolute URLs
      types = item.attribute('itemtype').
        remove.
        to_s.
        split(/\s+/).
        select {|t| RDF::URI(t).absolute?}

      item['typeof'] = types.join(' ') unless types.empty?
      if vocab = types.first
        vocab = Registry.find(vocab) || begin
          type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
          Registry.new(type_vocab) if type_vocab
        end
        item['vocab'] = vocab.uri.to_s if vocab
      end
    end

    # Change each itemid attribute to an resource attribute with the same value
    if item['itemid']
      id = item.attribute('itemid').remove
      item[item['itemprop'] ? 'resource' : 'about'] = id
    else
      # Otherwise, ensure that @typeof has at least an empty value
      item['typeof'] ||= ''
    end
  end

  # Add @resource for all itemprop values of object based on a @data value
  input.css("object[itemprop][data]").each do |item|
    item['resource'] ||= item['data']
  end

  # Replace all @itemprop values with @property
  input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}

  # Wrap all @itemref properties
  input.css("[itemref]").each do |item|
    item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
    item_vocab = item_vocab.to_s if item_vocab

    item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
      if referenced = input.css("##{ref}")
        # Add @vocab to referenced using the closest ansestor having @vocab of item.
        # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
        # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
        referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern"))

        # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
        link = ::Nokogiri::XML::Node.new('link', input)
        link['rel'] = 'rdfa:copy'
        link['href'] = "##{ref}"
        item << link
      end
    end
  end

  @rdfa = input
  log_debug('', "Transformed document: #{input.to_html}")

  options = options.merge(
    library: :nokogiri,
    reference_folding: true,
    host_language: :html5,
    version: :rdfa1.1")

  # Rely on RDFa reader
  super(input, options, &block)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class RDF::Enumerable

Instance Attribute Details

#rdfaRDF::HTML::Document (readonly)

The transformed DOM using RDFa

Returns:

  • (RDF::HTML::Document)


10
11
12
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/rdfa_reader.rb', line 10

def rdfa
  @rdfa
end

Class Method Details

.format(klass = nil) ⇒ Object



12
13
14
15
16
17
18
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-microdata-14cc40c62067/lib/rdf/microdata/rdfa_reader.rb', line 12

def self.format(klass = nil)
  if klass.nil?
    RDF::Microdata::Format
  else
    super
  end
end