Class: RDF::Normalize::RDFC10::NormalizationState

Inherits:
Object
  • Object
show all
Includes:
Util::Logger
Defined in:
vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb

Direct Known Subclasses

URGNA2012::NormalizationState

Constant Summary

Constants included from Util::Logger

Util::Logger::IOWrapper

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Util::Logger

#log_debug, #log_depth, #log_error, #log_fatal, #log_info, #log_recover, #log_recovering?, #log_statistics, #log_warn, #logger

Constructor Details

#initialize(**options) ⇒ NormalizationState

Returns a new instance of NormalizationState.



166
167
168
169
170
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 166

def initialize(**options)
  @options = options
  @bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
  @max_calls, @total_calls = nil, 0
end

Instance Attribute Details

#bnode_to_statementsObject

Returns the value of attribute bnode_to_statements.



160
161
162
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 160

def bnode_to_statements
  @bnode_to_statements
end

#canonical_issuerObject

Returns the value of attribute canonical_issuer.



162
163
164
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 162

def canonical_issuer
  @canonical_issuer
end

#hash_to_bnodesObject

Returns the value of attribute hash_to_bnodes.



161
162
163
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 161

def hash_to_bnodes
  @hash_to_bnodes
end

#max_callsObject

Returns the value of attribute max_calls.



163
164
165
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 163

def max_calls
  @max_calls
end

#total_callsObject

Returns the value of attribute total_calls.



164
165
166
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 164

def total_calls
  @total_calls
end

Instance Method Details

#add_bnode_hash(node, hash) ⇒ Object



177
178
179
180
181
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 177

def add_bnode_hash(node, hash)
  hash_to_bnodes[hash] ||= []
  # Match on object IDs of nodes, rather than simple node equality
  hash_to_bnodes[hash] << node unless hash_to_bnodes[hash].any? {|n| n.eql?(node)}
end

#add_statement(node, statement) ⇒ Object



172
173
174
175
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 172

def add_statement(node, statement)
  bnode_to_statements[node] ||= []
  bnode_to_statements[node] << statement unless bnode_to_statements[node].any? {|st| st.eql?(statement)}
end

#hash_first_degree_quads(node) ⇒ String

This algorithm calculates a hash for a given blank node across the quads in a dataset in which that blank node is a component. If the hash uniquely identifies that blank node, no further examination is necessary. Otherwise, a hash will be created for the blank node using the algorithm in 4.9 Hash N-Degree Quads invoked via 4.5 Canonicalization Algorithm.

Parameters:

  • node (RDF::Node)

    The reference blank node identifier

Returns:

  • (String)

    the SHA256 hexdigest hash of statements using this node, with replacements



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 187

def hash_first_degree_quads(node)
  nquads = bnode_to_statements[node].
    map do |statement|
      quad = statement.to_quad.map do |t|
        case t
        when node then RDF::Node("a")
        when RDF::Node then RDF::Node("z")
        else t
        end
      end
      RDF::Statement.from(quad).to_nquads
    end
  log_debug("log point", "Hash First Degree Quads function (4.7.3).")
  log_debug("nquads:")
  nquads.each do |q|
    log_debug {"  - #{q.strip}"}
  end

  result = hexdigest(nquads.sort.join)
  log_debug("hash") {result}
  result
end

#hash_n_degree_quads(node, issuer) ⇒ Array<String,IdentifierIssuer>

Returns the Hash and issuer.

Parameters:

Returns:

Raises:

  • (RuntimeError)

    If total number of calls has exceeded max_calls times the number of blank nodes in the dataset.



237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 237

def hash_n_degree_quads(node, issuer)
  log_debug("hndq:")
  log_debug("  log point", "Hash N-Degree Quads function (4.9.3).")
  log_debug("  identifier") {node.id}
  log_debug("  issuer") {issuer.inspect}

  if max_calls && total_calls >= max_calls
    raise "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
  end
  @total_calls += 1

  # hash to related blank nodes map
  hn = {}

  log_debug("  hndq.2:")
  log_debug("    log point", "Quads for identifier (4.9.3 (2)).")
  log_debug("    quads:")
  bnode_to_statements[node].each do |s|
    log_debug {"    - #{s.to_nquads.strip}"}
  end

  # Step 3
  log_debug("  hndq.3:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (3)).")
  log_debug("    with:") unless bnode_to_statements[node].empty?
  bnode_to_statements[node].each do |statement|
    log_debug {"      - quad: #{statement.to_nquads.strip}"}
    log_debug("        hndq.3.1:")
    log_debug("          log point", "Hash related bnode component (4.9.3 (3.1))")
    log_depth(depth: 10) {hash_related_statement(node, statement, issuer, hn)}
  end
  log_debug("    Hash to bnodes:")
  hn.each do |k,v|
    log_debug("      #{k}:")
    v.each do |vv|
      log_debug("        - #{vv.id}")
    end
  end

  data_to_hash = ""

  # Step 5
  log_debug("  hndq.5:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (5)), entering loop.")
  log_debug("    with:")
  hn.keys.sort.each do |hash|
    log_debug("      - related hash", hash)
    log_debug("        data to hash") {data_to_hash.to_json}
    list = hn[hash]
    # Iterate over related nodes
    chosen_path, chosen_issuer = "", nil
    data_to_hash += hash

    log_debug("        hndq.5.4:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.4)), entering loop.")
    log_debug("          with:") unless list.empty?
    list.permutation do |permutation|
      log_debug("          - perm") {permutation.map(&:id).to_json(indent: ' ', space: ' ')}
      issuer_copy, path, recursion_list = issuer.dup, "", []

      log_debug("            hndq.5.4.4:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.4)), entering loop.")
      log_debug("              with:")
      permutation.each do |related|
        log_debug("                - related") {related.id}
        log_debug("                  path") {path.to_json}
        if canonical_issuer.identifier(related)
          path << '_:' + canonical_issuer.issue_identifier(related)
        else
          recursion_list << related if !issuer_copy.identifier(related)
          path << '_:' + issuer_copy.issue_identifier(related)
        end

        # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
        break if !chosen_path.empty? && path.length >= chosen_path.length
      end

      log_debug("            hndq.5.4.5:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.5)), before possible recursion.")
      log_debug("              recursion list") {recursion_list.map(&:id).to_json(indent: ' ')}
      log_debug("              path") {path.to_json}
      log_debug("              with:") unless recursion_list.empty?
      recursion_list.each do |related|
        log_debug("                - related") {related.id}
        result = log_depth(depth: 18) do
          hash_n_degree_quads(related, issuer_copy)
        end
        path << '_:' + issuer_copy.issue_identifier(related)
        path << "<#{result.first}>"
        issuer_copy = result.last
        log_debug("                  hndq.5.4.5.4:") 
        log_debug("                    log point", "Hash N-Degree Quads function (4.9.3 (5.4.5.4)), combine result of recursion.")
        log_debug("                    path") {path.to_json}
        log_debug("                    issuer copy") {issuer_copy.inspect}
        break if !chosen_path.empty? && path.length >= chosen_path.length && path > chosen_path
      end

      if chosen_path.empty? || path < chosen_path
        chosen_path, chosen_issuer = path, issuer_copy
      end
    end

    data_to_hash += chosen_path
    log_debug("        hndq.5.5:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.5). End of current loop with Hn hashes.")
    log_debug("          chosen path") {chosen_path.to_json}
    log_debug("          data to hash") {data_to_hash.to_json}
    issuer = chosen_issuer
  end

  log_debug("  hndq.6:")
  log_debug("    log point", "Leaving Hash N-Degree Quads function (4.9.3).")
  log_debug("    hash") {hexdigest(data_to_hash)}
  log_depth(depth: 4) {log_debug("issuer") {issuer.inspect}}
  return [hexdigest(data_to_hash), issuer]
end

Returns the SHA256 hexdigest hash.

Parameters:

Returns:

  • (String)

    the SHA256 hexdigest hash



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 215

def hash_related_node(related, statement, issuer, position)
  log_debug("related") {related.id}
  input = "#{position}"
  input << statement.predicate.to_ntriples unless position == :g
  if identifier = (canonical_issuer.identifier(related) ||
                   issuer.identifier(related))
    input << "_:#{identifier}"
  else
    log_debug("h1dq:")
    input << log_depth(depth: 2) do
      hash_first_degree_quads(related)
    end
  end
  log_debug("input") {input.inspect}
  log_debug("hash") {hexdigest(input)}
  hexdigest(input)
end

Group adjacent bnodes by hash



374
375
376
377
378
379
380
381
382
383
384
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 374

def hash_related_statement(node, statement, issuer, map)
  log_debug("with:") if statement.to_h.values.any? {|t| t.is_a?(RDF::Node)}
  statement.to_h(:s, :p, :o, :g).each do |pos, term|
    next if !term.is_a?(RDF::Node) || term == node

    log_debug("  - position", pos)
    hash = log_depth(depth: 4) {hash_related_node(term, statement, issuer, pos)}
    map[hash] ||= []
    map[hash] << term unless map[hash].any? {|n| n.eql?(term)}
  end
end

#hexdigest(val) ⇒ Object (protected)



369
370
371
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 369

def hexdigest(val)
  Digest::SHA256.hexdigest(val)
end

#inspectObject



354
355
356
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 354

def inspect
  "NormalizationState:\nbnode_to_statements: #{inspect_bnode_to_statements}\nhash_to_bnodes: #{inspect_hash_to_bnodes}\ncanonical_issuer: #{canonical_issuer.inspect}"
end

#inspect_bnode_to_statementsObject



358
359
360
361
362
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 358

def inspect_bnode_to_statements
  bnode_to_statements.map do |n, statements|
    "#{n.id}: #{statements.map {|s| s.to_nquads.strip}}"
  end.join(", ")
end

#inspect_hash_to_bnodesObject



364
365
# File 'vendor/bundler/ruby/3.2.0/bundler/gems/rdf-normalize-7f3a953c511f/lib/rdf/normalize/rdfc10.rb', line 364

def inspect_hash_to_bnodes
end