Class: RDF::Microdata::RdfaReader

Inherits:
RDFa::Reader show all
Defined in:
vendor/bundler/ruby/3.3.0/bundler/gems/rdf-microdata-73e8bfd8e661/lib/rdf/microdata/rdfa_reader.rb

Overview

Update DOM to turn Microdata into RDFa and parse using the RDFa Reader

Constant Summary

Constants inherited from RDFa::Reader

RDFa::Reader::NC_REGEXP, RDFa::Reader::SafeCURIEorCURIEorIRI, RDFa::Reader::TERM_REGEXP, RDFa::Reader::TERMorCURIEorAbsIRI, RDFa::Reader::XHTML

Constants included from Util::Logger

Util::Logger::IOWrapper

Instance Attribute Summary collapse

Attributes inherited from RDFa::Reader

#host_language, #implementation, #repository, #version

Attributes inherited from Reader

#options

Attributes included from Enumerable

#existentials, #universals

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from RDFa::Reader

#each_statement, #each_triple, #extract_script, options

Methods included from Util::Logger

#log_debug, #log_depth, #log_error, #log_fatal, #log_info, #log_recover, #log_recovering?, #log_statistics, #log_warn, #logger

Methods included from RDFa::Expansion

#copy_properties, #expand, #rule

Methods inherited from Reader

#base_uri, #canonicalize?, #close, each, #each_pg_statement, #each_statement, #each_triple, #encoding, #fail_object, #fail_predicate, #fail_subject, for, #intern?, #lineno, open, options, #prefix, #prefixes, #prefixes=, #read_statement, #read_triple, #rewind, to_sym, #to_sym, #valid?, #validate?

Methods included from Util::Aliasing::LateBound

#alias_method

Methods included from Enumerable

add_entailment, #canonicalize, #canonicalize!, #dump, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_term, #each_triple, #entail, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_term, #enum_triple, #graph?, #graph_names, #invalid?, #method_missing, #object?, #objects, #predicate?, #predicates, #project_graph, #quad?, #quads, #respond_to_missing?, #statement?, #statements, #subject?, #subjects, #supports?, #term?, #terms, #to_a, #to_h, #to_set, #triple?, #triples, #valid?, #validate!

Methods included from Isomorphic

#bijection_to, #isomorphic_with?

Methods included from Countable

#count, #empty?

Methods included from Readable

#readable?

Constructor Details

#initialize(input = $stdin, **options) {|reader| ... } ⇒ reader

Initializes the RdfaReader instance.

Parameters:

  • input (IO, File, String) (defaults to: $stdin)

    the input stream to read

  • options (Hash{Symbol => Object})

    any additional options (see RDF::Reader#initialize)

Yields:

  • (reader)

    self

Yield Parameters:

Yield Returns:

  • (void)

    ignored

Raises:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-microdata-73e8bfd8e661/lib/rdf/microdata/rdfa_reader.rb', line 31

def initialize(input = $stdin, **options, &block)
  @options = options
  log_debug('', "using RDFa transformation reader")

  input = case input
  when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]
    begin
      input = input.read if input.respond_to?(:read)
      ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
    rescue LoadError, NoMethodError
      ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
    end
  end

  # For all members having @itemscope
  input.css("[itemscope]").each do |item|
    # Get @itemtypes to create @type and @vocab
    item.attribute('itemscope').remove
    if item['itemtype']
      # Only absolute URLs
      types = item.attribute('itemtype').
        remove.
        to_s.
        split(/\s+/).
        select {|t| RDF::URI(t).absolute?}

      item['typeof'] = types.join(' ') unless types.empty?
      if vocab = types.first
        vocab = begin
          type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
          Registry.new(type_vocab) if type_vocab
        end
        item['vocab'] = vocab.uri.to_s if vocab
      end
    end
    item['typeof'] ||= ''

    # Change each itemid attribute to an resource attribute with the same value
    if item['itemid']
      id = item.attribute('itemid').remove
      item['resource'] = id
    end
  end

  # Add @resource for all itemprop values of object based on a @data value
  input.css("object[itemprop][data]").each do |item|
    item['resource'] ||= item['data']
  end

  # Replace all @itemprop values with @property
  input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove}

  # Wrap all @itemref properties
  input.css("[itemref]").each do |item|
    item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')}
    item_vocab = item_vocab.to_s if item_vocab

    item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref|
      if referenced = input.css("##{ref}")
        # Add @vocab to referenced using the closest ansestor having @vocab of item.
        # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element.
        # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element.
        referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern" />))

        # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference
        link = ::Nokogiri::XML::Node.new('link', input)
        link['rel'] = 'rdfa:copy'
        link['href'] = "##{ref}"
        item << link
      end
    end
  end

  @rdfa = input
  log_debug('', "Transformed document: #{input.to_html}")

  options = options.merge(
    library: :nokogiri,
    reference_folding: true,
    host_language: :html5,
    version: :"rdfa1.1")

  # Rely on RDFa reader
  super(input, **options, &block)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class RDF::Enumerable

Instance Attribute Details

#rdfaRDF::HTML::Document (readonly)

The transformed DOM using RDFa

Returns:

  • (RDF::HTML::Document)


9
10
11
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-microdata-73e8bfd8e661/lib/rdf/microdata/rdfa_reader.rb', line 9

def rdfa
  @rdfa
end

Class Method Details

.format(klass = nil) ⇒ Object



11
12
13
14
15
16
17
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/rdf-microdata-73e8bfd8e661/lib/rdf/microdata/rdfa_reader.rb', line 11

def self.format(klass = nil)
  if klass.nil?
    RDF::Microdata::Format
  else
    super
  end
end