Class: RDF::Normalize::URDNA2015

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Base, Util::Logger
Defined in:
vendor/bundler/ruby/2.5.0/bundler/gems/rdf-normalize-7721c0bf9bdf/lib/rdf/normalize/urdna2015.rb

Direct Known Subclasses

URGNA2012

Defined Under Namespace

Classes: IdentifierIssuer, NormalizationState

Instance Attribute Summary

Attributes included from Base

#dataset

Instance Method Summary collapse

Methods included from Enumerable

add_entailment, #entail

Methods included from Isomorphic

#bijection_to, #isomorphic_with?

Constructor Details

#initialize(enumerable, options) ⇒ RDF::Enumerable

Create an enumerable with grounded nodes

Parameters:



12
13
14
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-normalize-7721c0bf9bdf/lib/rdf/normalize/urdna2015.rb', line 12

def initialize(enumerable, options)
  @dataset, @options = enumerable, options
end

Instance Method Details

#each(&block) ⇒ Object



16
17
18
19
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-normalize-7721c0bf9bdf/lib/rdf/normalize/urdna2015.rb', line 16

def each(&block)
  ns = NormalizationState.new(@options)
  normalize_statements(ns, &block)
end

#normalize_statements(ns, &block) ⇒ Object (protected)



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-normalize-7721c0bf9bdf/lib/rdf/normalize/urdna2015.rb', line 22

def normalize_statements(ns, &block)
  # Map BNodes to the statements they are used by
  dataset.each_statement do |statement|
    statement.to_quad.compact.select(&:node?).each do |node|
      ns.add_statement(node, statement)
    end
  end

  non_normalized_identifiers, simple = ns.bnode_to_statements.keys, true

  while simple
    simple = false
    ns.hash_to_bnodes = {}

    # Calculate hashes for first degree nodes
    non_normalized_identifiers.each do |node|
      hash = log_depth {ns.hash_first_degree_quads(node)}
      log_debug("1deg") {"hash: #{hash}"}
      ns.add_bnode_hash(node, hash)
    end

    # Create canonical replacements for hashes mapping to a single node
    ns.hash_to_bnodes.keys.sort.each do |hash|
      identifier_list = ns.hash_to_bnodes[hash]
      next if identifier_list.length > 1
      node = identifier_list.first
      id = ns.canonical_issuer.issue_identifier(node)
      log_debug("single node") {"node: #{node.to_ntriples}, hash: #{hash}, id: #{id}"}
      non_normalized_identifiers -= identifier_list
      ns.hash_to_bnodes.delete(hash)
      simple = true
    end
  end

  # Iterate over hashs having more than one node
  ns.hash_to_bnodes.keys.sort.each do |hash|
    identifier_list = ns.hash_to_bnodes[hash]

    log_debug("multiple nodes") {"node: #{identifier_list.map(&:to_ntriples).join(",")}, hash: #{hash}"}
    hash_path_list = []

    # Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements
    identifier_list.each do |identifier|
      next if ns.canonical_issuer.issued.include?(identifier)
      temporary_issuer = IdentifierIssuer.new("_:b")
      temporary_issuer.issue_identifier(identifier)
      hash_path_list << log_depth {ns.hash_n_degree_quads(identifier, temporary_issuer)}
    end
    log_debug("->") {"hash_path_list: #{hash_path_list.map(&:first).inspect}"}

    # Create canonical replacements for nodes
    hash_path_list.sort_by(&:first).map(&:last).each do |issuer|
      issuer.issued.each do |node|
        id = ns.canonical_issuer.issue_identifier(node)
        log_debug("-->") {"node: #{node.to_ntriples}, id: #{id}"}
      end
    end
  end

  # Yield statements using BNodes from canonical replacements
  dataset.each_statement do |statement|
    if statement.has_blank_nodes?
      quad = statement.to_quad.compact.map do |term|
        term.node? ? RDF::Node.intern(ns.canonical_issuer.identifier(term)[2..-1]) : term
      end
      block.call RDF::Statement.from(quad)
    else
      block.call statement
    end
  end
end