Class: RDF::Normalize::URDNA2015::NormalizationState

Inherits:
Object
  • Object
show all
Includes:
Util::Logger
Defined in:
vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Util::Logger

#log_debug, #log_depth, #log_error, #log_fatal, #log_info, #log_recover, #log_recovering?, #log_statistics, #log_warn, #logger

Constructor Details

#initialize(options) ⇒ NormalizationState

Returns a new instance of NormalizationState



103
104
105
106
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 103

def initialize(options)
  @options = options
  @bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("_:c14n")
end

Instance Attribute Details

#bnode_to_statementsObject

Returns the value of attribute bnode_to_statements



99
100
101
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 99

def bnode_to_statements
  @bnode_to_statements
end

#canonical_issuerObject

Returns the value of attribute canonical_issuer



101
102
103
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 101

def canonical_issuer
  @canonical_issuer
end

#hash_to_bnodesObject

Returns the value of attribute hash_to_bnodes



100
101
102
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 100

def hash_to_bnodes
  @hash_to_bnodes
end

Instance Method Details

#add_bnode_hash(node, hash) ⇒ Object



113
114
115
116
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 113

def add_bnode_hash(node, hash)
  hash_to_bnodes[hash] ||= []
  hash_to_bnodes[hash] << node unless hash_to_bnodes[hash].include?(node)
end

#add_statement(node, statement) ⇒ Object



108
109
110
111
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 108

def add_statement(node, statement)
  bnode_to_statements[node] ||= []
  bnode_to_statements[node] << statement unless bnode_to_statements[node].include?(statement)
end

#hash_first_degree_quads(node) ⇒ String

Returns the SHA256 hexdigest hash of statements using this node, with replacements

Parameters:

Returns:

  • (String)

    the SHA256 hexdigest hash of statements using this node, with replacements



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 120

def hash_first_degree_quads(node)
  quads = bnode_to_statements[node].
    map do |statement|
      quad = statement.to_quad.map do |t|
        case t
        when node then RDF::Node("a")
        when RDF::Node then RDF::Node("z")
        else t
        end
      end
      RDF::NQuads::Writer.serialize(RDF::Statement.from(quad))
    end

  log_debug("1deg") {"node: #{node}, quads: #{quads}"}
  hexdigest(quads.sort.join)
end

#hash_n_degree_quads(identifier, issuer) ⇒ Array<String,IdentifierIssuer>

Returns the Hash and issuer

Parameters:

Returns:



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 156

def hash_n_degree_quads(identifier, issuer)
  log_debug("ndeg") {"identifier: #{identifier.to_ntriples}"}

  # hash to related blank nodes map
  map = {}

  bnode_to_statements[identifier].each do |statement|
    hash_related_statement(identifier, statement, issuer, map)
  end

  data_to_hash = ""

  log_debug("ndeg") {"map: #{map.map {|h,l| "#{h}: #{l.map(&:to_ntriples)}"}.join('; ')}"}
  log_depth do
    map.keys.sort.each do |hash|
      list = map[hash]
      # Iterate over related nodes
      chosen_path, chosen_issuer = "", nil
      data_to_hash += hash

      list.permutation do |permutation|
        log_debug("ndeg") {"perm: #{permutation.map(&:to_ntriples).join(",")}"}
        issuer_copy, path, recursion_list = issuer.dup, "", []

        permutation.each do |related|
          if canonical_issuer.identifier(related)
            path << canonical_issuer.issue_identifier(related)
          else
            recursion_list << related if !issuer_copy.identifier(related)
            path << issuer_copy.issue_identifier(related)
          end

          # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
          break if !chosen_path.empty? && path.length >= chosen_path.length
        end
        log_debug("ndeg") {"hash: #{hash}, path: #{path}, recursion: #{recursion_list.map(&:to_ntriples)}"}

        recursion_list.each do |related|
          result = log_depth {hash_n_degree_quads(related, issuer_copy)}
          path << issuer_copy.issue_identifier(related)
          path << "<#{result.first}>"
          issuer_copy = result.last
          break if !chosen_path.empty? && path.length >= chosen_path.length && path > chosen_path
        end

        if chosen_path.empty? || path < chosen_path
          chosen_path, chosen_issuer = path, issuer_copy
        end
      end

      data_to_hash += chosen_path
      issuer = chosen_issuer
    end
  end

  log_debug("ndeg") {"datatohash: #{data_to_hash.inspect}, hash: #{hexdigest(data_to_hash)}"}
  return [hexdigest(data_to_hash), issuer]
end

Returns the SHA256 hexdigest hash

Parameters:

Returns:

  • (String)

    the SHA256 hexdigest hash



142
143
144
145
146
147
148
149
150
151
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 142

def hash_related_node(related, statement, issuer, position)
  identifier = canonical_issuer.identifier(related) ||
               issuer.identifier(related) ||
               hash_first_degree_quads(related)
  input = position.to_s
  input << statement.predicate.to_ntriples unless position == :g
  input << identifier
  log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
  hexdigest(input)
end

Group adjacent bnodes by hash



222
223
224
225
226
227
228
229
230
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 222

def hash_related_statement(identifier, statement, issuer, map)
  statement.to_h(:s, :p, :o, :g).each do |pos, term|
    next if !term.is_a?(RDF::Node) || term == identifier

    hash = log_depth {hash_related_node(term, statement, issuer, pos)}
    map[hash] ||= []
    map[hash] << term unless map[hash].include?(term)
  end
end

#hexdigest(val) ⇒ Object (protected)



217
218
219
# File 'vendor/bundler/ruby/2.4.0/bundler/gems/rdf-normalize-e113452533ce/lib/rdf/normalize/urdna2015.rb', line 217

def hexdigest(val)
  Digest::SHA256.hexdigest(val)
end