Module: EBNF::PEG::Parser

Included in:
ABNF, ISOEBNF, EBNF::Parser, SPARQL::Grammar::Parser, ShEx::Parser
Defined in:
vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb

Overview

A Generic PEG parser using the parsed rules modified for PEG parseing.

Defined Under Namespace

Modules: ClassMethods Classes: Error, Unmatched

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#packratHash{Integer => Hash{Symbol => Object}} (readonly)

A Hash structure used for memoizing rule results for a given input location.

@example Partial structure for memoizing results for a particular rule

 {
   rule: {
     86: {
           pos: 
           result: [<EBNF::Rule:80 {
             sym: :ebnf,
               id: "1",
               kind: :rule,
               expr: [:star, [:alt, :declaration, :rule]]}>],
          }
     131: [<EBNF::Rule:80 {sym: :ebnf,
         id: "1",
         kind: :rule,
         expr: [:star, [:alt, :declaration, :rule]]}>,
       <EBNF::Rule:100 {
         sym: :declaration,
         id: "2",
         kind: :rule,
         expr: [:alt, "@terminals", :pass]}>]
   },
   POSTFIX: {
     80: "*",
     368: "*",
     399: "+"
   }
 }

Returns:



45
46
47
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 45

def packrat
  @packrat
end

#scannerScanner (readonly)

Returns used for scanning input.

Returns:

  • (Scanner)

    used for scanning input.



11
12
13
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 11

def scanner
  @scanner
end

#whitespaceRegexp, Rule (readonly)

Returns how to remove inter-rule whitespace.

Returns:

  • (Regexp, Rule)

    how to remove inter-rule whitespace



7
8
9
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 7

def whitespace
  @whitespace
end

Instance Method Details

#clear_packratObject

Clear out packrat memoizer. This is appropriate when completing a top-level rule when there is no possibility of backtracking.



270
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 270

def clear_packrat; @packrat.clear; end

#debug(node, message, **options) ⇒ Object

Debug logging.

The call is ignored, unless @options[:logger] is set.

Parameters:

  • args (Array<String>)

    Relevant location associated with message

  • options (Hash)

Options Hash (**options):

  • :depth (Integer)

    Recursion depth for indenting output

Yield Returns:

  • (String)

    additional string appended to message.



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 352

def debug(*args, &block)
  return unless @options[:logger]
  options = args.last.is_a?(Hash) ? args.pop : {}
  lineno = options[:lineno] || (scanner.lineno if scanner)
  level = options.fetch(:level, 0)
  depth = options[:depth] || self.depth

  if self.respond_to?(:log_debug)
    level = [:debug, :info, :warn, :error, :fatal][level]
    log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
  elsif @options[:logger].respond_to?(:add)
    args << yield if block_given?
    @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
  elsif @options[:logger].respond_to?(:<<)
    args << yield if block_given?
    @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
  end
end

#depthObject

Depth of parsing, for log output.



264
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 264

def depth; (@productions || []).length; end

#error(node, message, **options) ⇒ Object

Error information, used as level 3 logger messages. Messages may be logged and are saved for reporting at end of parsing.

Parameters:

Options Hash (**options):

  • :production (URI, #to_s)
  • :raise (Boolean)

    abort furhter processing

  • :backtrace (Array)

    state where error occured

See Also:



283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 283

def error(node, message, **options)
  lineno = options[:lineno] || (scanner.lineno if scanner)
  m = "ERROR "
  m += "[line: #{lineno}] " if lineno
  m += message
  m += " (found #{options[:rest].inspect})" if options[:rest]
  m += ", production = #{options[:production].inspect}" if options[:production]
  @error_log << m unless @recovering
  @recovering = true
  debug(node, m, level: 3, **options)
  if options[:raise] || @options[:validate]
    raise Error.new(m,
            lineno: lineno,
            rest: options[:rest],
            production: options[:production],
            backtrace: options[:backtrace])
  end
end

#find_rule(sym) ⇒ Rule

Find a rule for a symbol

Parameters:

Returns:



474
475
476
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 474

def find_rule(sym)
  @rules[sym]
end

#onFinish(result, **options) ⇒ Object

Finish of production

Parameters:

  • result (Object)

    parse result

  • **options (Hash)

    other options available for handlers

Returns:

  • (Object)

    parse result, or the value returned from the handler



413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 413

def onFinish(result, **options)
  #puts "prod_data(f): " + @prod_data.inspect
  prod = @productions.last
  handler, clear_packrat = self.class.production_handlers[prod]
  data = @prod_data.pop if handler || self.class.start_handlers[prod]
  error("finish",
    "prod_data production mismatch: expected #{prod.inspect}, got #{data[:_production].inspect}",
    production: prod, prod_data: @prod_data) if data && prod != data[:_production]
  if handler && !@recovering && result != :unmatched
    # Pop production data element from stack, potentially allowing handler to use it
    result = begin
      self.class.eval_with_binding(self) {
        handler.call(result, data, @parse_callback, **options)
      }
    rescue ArgumentError, Error => e
      error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
      @recovering = false
    end
  end
  progress("#{prod}(:finish)", "",
         lineno: (scanner.lineno if scanner),
         level: result == :unmatched ? 0 : 1) do
    "#{result.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
  end
  self.clear_packrat if clear_packrat
  @productions.pop
  result
end

#onStart(prod, **options) ⇒ Hash

Start for production Adds data avoiable during the processing of the production

Parameters:

  • prod (Symbol)
  • **options (Hash)

    other options available for handlers

Returns:

  • (Hash)

    composed of production options. Currently only as_hash is supported.

See Also:



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 378

def onStart(prod, **options)
  handler = self.class.start_handlers[prod]
  @productions << prod
  if handler
    # Create a new production data element, potentially allowing handler
    # to customize before pushing on the @prod_data stack
    data = {_production: prod}.merge(options)
    begin
      self.class.eval_with_binding(self) {
        handler.call(data, @parse_callback)
      }
    rescue ArgumentError, Error => e
      error("start", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
      @recovering = false
    end
    @prod_data << data
  elsif self.class.production_handlers[prod]
    # Make sure we push as many was we pop, even if there is no
    # explicit start handler
    @prod_data << {_production: prod}
  end
  progress("#{prod}(:start)", "",
    lineno: (scanner.lineno if scanner),
    pos: (scanner.pos if scanner)
  ) do
    "#{data.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
  end
  return self.class.start_options.fetch(prod, {}) # any options on this production
end

#onTerminal(prod, value) ⇒ String, Object

A terminal with a defined handler

Parameters:

  • prod (Symbol)

    from the symbol of the associated rule

  • value (String)

    the scanned string

Returns:

  • (String, Object)

    either the result from the handler, or the token



447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 447

def onTerminal(prod, value)
  parentProd = @productions.last
  handler = self.class.terminal_handlers[prod]
  if handler && value != :unmatched
    value = begin
      self.class.eval_with_binding(self) {
        handler.call(value, parentProd, @parse_callback)
      }
    rescue ArgumentError, Error => e
      error("terminal", "#{e.class}: #{e.message}", value: value, production: prod, backtrace: e.backtrace)
      @recovering = false
    end
  end
  progress("#{prod}(:terminal)", "",
           depth: (depth + 1),
           lineno: (scanner.lineno if scanner),
           level: value == :unmatched ? 0 : 1) do
    "#{value.inspect}@(#{scanner ? scanner.pos : '?'})"
  end
  value
end

#parse(input = nil, start = nil, rules = nil, insensitive_strings: nil, **options) {|context, *data| ... } ⇒ Object

TODO:

FIXME implement seq_hash

Initializes a new parser instance.

Parameters:

  • input (String, #to_s) (defaults to: nil)
  • start (Symbol, #to_s) (defaults to: nil)

    The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.

  • rules (Array<EBNF::PEG::Rule>) (defaults to: nil)

    The parsed rules, which control parsing sequence. Identify the symbol of the starting rule with start.

  • options (Hash{Symbol => Object})
  • options[Integer] (Hash)

    a customizable set of options

  • options[:upper, (Hash)

    a customizable set of options

  • options[Boolean] (Hash)

    a customizable set of options

Options Hash (**options):

  • :logger (Logger)

    for errors/progress/debug.

  • :whitespace (Symbol, Regexp)

    Symbol of whitespace rule (defaults to @pass), or a regular expression for eating whitespace between non-terminal rules (strongly encouraged).

Yields:

  • (context, *data)

    Yields to return data to parser

Yield Parameters:

  • context (:statement, :trace)

    Context for block

  • *data (Symbol)

    Data specific to the call

Returns:

  • (Object)

    AST resulting from parse

Raises:

  • (Exception)

    Raises exceptions for parsing errors or errors raised during processing callbacks. Internal errors are raised using Error.



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 208

def parse(input = nil, start = nil, rules = nil, insensitive_strings: nil, **options, &block)
  start ||= options[:start]
  rules ||= options[:rules] || []
  @rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)}
  @packrat = {}

  # Add parser reference to each rule
  @rules.each_value {|rule| rule.parser = self}

  # Take whitespace from options, a named rule, a `pass` rule, a rule named :WS, or a default
  @whitespace = case options[:whitespace]
  when Regexp then options[:whitespace]
  when Symbol then @rules[options[:whitespace]]
  else options[:whitespace]
  end ||
    @rules.values.detect(&:pass?) ||
    /(?:\s|(?:#[^x][^\n\r]*))+/m.freeze

  @options = options.dup
  @productions = []
  @parse_callback = block
  @error_log = []
  @prod_data = []

  @scanner = EBNF::LL1::Scanner.new(input)
  start = start.split('#').last.to_sym unless start.is_a?(Symbol)
  start_rule = @rules[start]
  raise Error, "Starting production #{start.inspect} not defined" unless start_rule

  result = start_rule.parse(scanner, insensitive_strings: insensitive_strings)
  if result == :unmatched
    # Start rule wasn't matched, which is about the only error condition
    error("--top--", @furthest_failure.to_s,
      pos: @furthest_failure.pos,
      lineno: @furthest_failure.lineno,
      rest: scanner.string[@furthest_failure.pos, 20])
  end

  # Eat any remaining whitespace
  start_rule.eat_whitespace(scanner)
  if !scanner.eos?
    error("--top--", @furthest_failure.to_s,
      pos: @furthest_failure.pos,
      lineno: @furthest_failure.lineno,
      rest: scanner.string[@furthest_failure.pos, 20])
  end

  # When all is said and done, raise the error log
  unless @error_log.empty?
    raise Error, @error_log.join("\n")
  end

  result
end

#prod_dataObject

Current ProdData element



267
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 267

def prod_data; @prod_data.last || {}; end

#progress(node, message, **options, &block) ⇒ Object

Progress logged when parsing. Passed as level 1 logger messages.

The call is ignored, unless @options[:logger] is set.

Parameters:

  • node (String)

    Relevant location associated with message

  • message (String)

    ("")

  • options (Hash)

Options Hash (**options):

  • :depth (Integer)

    Recursion depth for indenting output

See Also:



334
335
336
337
338
339
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 334

def progress(node, *args, &block)
  return unless @options[:logger]
  args << {} unless args.last.is_a?(Hash)
  args.last[:level] ||= 1
  debug(node, *args, &block)
end

#terminal_options(sym) ⇒ Regexp

Find a regular expression defined for a terminal

Parameters:

Returns:



492
493
494
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 492

def terminal_options(sym)
  self.class.terminal_options[sym]
end

#terminal_regexp(sym) ⇒ Regexp

Find a regular expression defined for a terminal

Parameters:

Returns:



483
484
485
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 483

def terminal_regexp(sym)
  self.class.terminal_regexps[sym]
end

#update_furthest_failure(pos, lineno, token) ⇒ Object

Record furthest failure.

Parameters:

  • pos (Integer)

    The position in the input stream where the failure occured.

  • lineno (Integer)

    Line where the failure occured.

  • token (Symbol, String)

    The terminal token or string which attempted to match.

See Also:



506
507
508
509
510
511
512
513
514
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 506

def update_furthest_failure(pos, lineno, token)
  # Skip generated productions
  return if token.is_a?(Symbol) && token.to_s.start_with?('_')
  if @furthest_failure.nil? || pos > @furthest_failure.pos
    @furthest_failure = Unmatched.new(pos, lineno, [token])
  elsif pos == @furthest_failure.pos && !@furthest_failure[:expecting].include?(token)
    @furthest_failure[:expecting] << token
  end
end

#warn(node, message, **options) ⇒ Object

Warning information, used as level 2 logger messages. Messages may be logged and are saved for reporting at end of parsing.

Parameters:

  • node (String)

    Relevant location associated with message

  • message (String)

    Error string

  • options (Hash)

Options Hash (**options):

  • :production (URI, #to_s)
  • :token (Token)

See Also:



312
313
314
315
316
317
318
319
320
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-c8f40958c6c3/lib/ebnf/peg/parser.rb', line 312

def warn(node, message, **options)
  lineno = options[:lineno] || (scanner.lineno if scanner)
  m = "WARNING "
  m += "[line: #{lineno}] " if lineno
  m += message
  m += " (found #{options[:rest].inspect})" if options[:rest]
  m += ", production = #{options[:production].inspect}" if options[:production]
  debug(node, m, level: 2, **options)
end