Module: EBNF::LL1::Parser
- Included in:
- LD::Patch::Parser, RDF::N3::Reader, RDF::Turtle::Reader, SPARQL::Grammar::Parser
- Defined in:
- vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb
Overview
A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).
# Creating terminal definitions and parser rules to parse generated grammars
The parser is initialized to callbacks invoked on entry and exit
to each terminal
and production
. A trivial parser loop can be described as follows:
require 'ebnf/ll1/parser'
require 'meta'
class Parser
include Meta
include EBNF::LL1::Parser
terminal(:SYMBOL, /([a-z]|[A-Z]|[0-9]|_)+/) do |prod, token, input|
# Add data based on scanned token to input
input[:symbol] = token.value
end
start_production(:rule) do |input, current, callback|
# Process on start of production
# Set state for entry into recursed rules through current
# Callback to parser loop with callback
end
production(:rule) do |input, current, callback|
# Process on end of production
# return results in input, retrieve results from recursed rules in current
# Callback to parser loop with callback
end
def initialize(input)
parse(input, start_symbol,
branch: BRANCH,
first: FIRST,
follow: FOLLOW,
cleanup: CLEANUP
) do |context, *data|
# Process calls from callback from productions
rescue ArgumentError, RDF::LL1::Parser::Error => e
progress("Parsing completed with errors:\n\t#{e.message}")
raise RDF::ReaderError, e.message if validate?
end
Defined Under Namespace
Modules: ClassMethods Classes: Error
Instance Attribute Summary collapse
-
#lineno ⇒ Integer
readonly
Line number of current token.
Instance Method Summary collapse
-
#add_prod_data(sym, *values) ⇒ Object
Add values to production data, values aranged as an array.
-
#add_prod_datum(sym, values) ⇒ Object
Add a single value to prod_data, allows for values to be an array.
-
#debug(node, message, **options) ⇒ Object
protected
Debug logging.
- #depth ⇒ Object
-
#error(node, message, **options) ⇒ Object
protected
Error information, used as level
3
logger messages. -
#parse(input = nil, start = nil, **options) {|context, *data| ... } ⇒ EBNF::LL1::Parser
Initializes a new parser instance.
-
#prod_data ⇒ Object
Current ProdData element.
-
#progress(node, message, **options, &block) ⇒ Object
protected
Progress logged when parsing.
-
#warn(node, message, **options) ⇒ Object
protected
Warning information, used as level
2
logger messages.
Instance Attribute Details
#lineno ⇒ Integer (readonly)
Returns line number of current token.
54 55 56 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 54 def lineno @lineno end |
Instance Method Details
#add_prod_data(sym, *values) ⇒ Object
Add values to production data, values aranged as an array
491 492 493 494 495 496 497 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 491 def add_prod_data(sym, *values) return if values.compact.empty? prod_data[sym] ||= [] prod_data[sym] += values debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"} end |
#add_prod_datum(sym, values) ⇒ Object
Add a single value to prod_data, allows for values to be an array
475 476 477 478 479 480 481 482 483 484 485 486 487 488 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 475 def add_prod_datum(sym, values) case values when Array prod_data[sym] ||= [] debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"} prod_data[sym] += values when nil return else prod_data[sym] ||= [] debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"} prod_data[sym] << values end end |
#debug(node, message, **options) ⇒ Object (protected)
Debug logging.
The call is ignored, unless @options[:logger]
is set.
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 579 def debug(*args, &block) return unless @options[:logger] = args.last.is_a?(Hash) ? args.pop : {} lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) level = .fetch(:level, 0) depth = [:depth] || self.depth if self.respond_to?(:log_debug) level = [:debug, :info, :warn, :error, :fatal][level] log_debug(*args, **.merge(level: level, lineno: lineno, depth: depth), &block) elsif @options[:logger].respond_to?(:add) args << yield if block_given? @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" ")) elsif @options[:logger].respond_to?(:<<) args << yield if block_given? @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ") end end |
#depth ⇒ Object
469 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 469 def depth; (@productions || []).length; end |
#error(node, message, **options) ⇒ Object (protected)
Error information, used as level 3
logger messages.
Messages may be logged and are saved for reporting at end of parsing.
511 512 513 514 515 516 517 518 519 520 521 522 523 524 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 511 def error(node, , **) lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) m = "ERROR " m += "[line: #{lineno}] " if lineno m += m += " (found #{[:token].inspect})" if [:token] m += ", production = #{[:production].inspect}" if [:production] @error_log << m unless @recovering @recovering = true debug(node, m, level: .fetch(:level, 3), **) if [:raise] || @options[:validate] raise Error.new(m, lineno: lineno, token: [:token], production: [:production]) end end |
#parse(input = nil, start = nil, **options) {|context, *data| ... } ⇒ EBNF::LL1::Parser
Initializes a new parser instance.
Attempts to recover from errors.
|
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 266 def parse(input = nil, start = nil, **, &block) @options = .dup @branch = [:branch] @first = [:first] ||= {} @follow = [:follow] ||= {} @cleanup = [:cleanup] ||= {} @lexer = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, **@options) @productions = [] @parse_callback = block @recovering = false @error_log = [] terminals = self.class.patterns.map(&:type) # Get defined terminals to help with branching # Unrecoverable errors raise Error, "Branch table not defined" unless @branch && @branch.length > 0 raise Error, "Starting production not defined" unless start @prod_data = [{}] start = start.split('#').last.to_sym unless start.is_a?(Symbol) todo_stack = [{prod: start, terms: nil}] while !todo_stack.empty? begin @recovering = false pushed = false if todo_stack.last[:terms].nil? todo_stack.last[:terms] = [] cur_prod = todo_stack.last[:prod] # If cur_prod is the starting production, we can reset the stack # to the beginning to avoid excessive growth in the production # stack if [:reset_on_start] && cur_prod == start todo_stack = [{prod: start, terms: []}] @productions = [] @prod_data = [{}] end # Fetch the current token token = get_token(:recover) # At this point, token is either nil, in the first set of the production, # or in the follow set of this production or any previous production debug("parse(production)") do "token #{token ? token.representation.inspect : 'nil'}, " + "prod #{cur_prod.inspect}, " + "depth #{depth}" end # Got an opened production onStart(cur_prod) if token.nil? if !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof)) # End of file, and production does not contain eps, or it does, but follow does not contain eof error("parse(production)", "Unexpected end of input", lineno: lineno, production: cur_prod, raise: true) else debug("parse(production)") {"End of input prod #{cur_prod.inspect}"} end elsif prod_branch = @branch[cur_prod] sequence = prod_branch.fetch(token.representation) do error("parse(production)", "Expected one of #{@first[cur_prod].inspect}", token: token, production: cur_prod, raise: true) end debug("parse(production)") do "token #{token.representation.inspect} " + "prod #{cur_prod.inspect}, " + "prod_branch #{prod_branch.keys.inspect}, " + "sequence #{sequence.inspect}" end todo_stack.last[:terms] += sequence else error("parse(production)", "Unexpected", token: token, production: cur_prod, raise: true) end end debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} while !todo_stack.last[:terms].to_a.empty? # Get the next term in this sequence term = todo_stack.last[:terms].shift debug("parse(token)") {"accept #{term.inspect}"} if token = accept(term) debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"} onTerminal(term, token) elsif terminals.include?(term) # If term is a terminal, then it is an error if token does not # match it error("parse(token)", "Expected #{term.inspect}", token: get_token, production: cur_prod, raise: true) else token = get_token # If token is not in firsts of term, but eps is, skip to next # term if first_include?(term, :_eps) && !first_include?(term, token) debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"} break else # Push term onto stack todo_stack << {prod: term, terms: nil} debug("parse(push)") {"term #{term.inspect}, depth #{depth}"} pushed = true break end end end rescue Lexer::Error, Error => e # Lexer encountered an illegal token or the parser encountered # a terminal which is inappropriate for the current production. # Perform error recovery to find a reasonable terminal based # on the follow sets of the relevant productions. This includes # remaining terms from the current production and the stacked # productions @lineno = e.lineno if e.is_a?(Lexer::Error) # Skip to the next valid terminal @lexer.recover error("parse(#{e.class})", "With input '#{e.input}': #{e.}", production: @productions.last, token: e.token) else # Otherwise, the terminal is fine, just not for this production. @lexer.shift error("parse(#{e.class})", "#{e.}", production: @productions.last, token: e.token) end # Get the list of follows for this sequence, this production and the stacked productions. debug("recovery", "stack follows:") todo_stack.reverse.each do |todo| debug("recovery") {" #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"} end # Find all follows to the top of the stack follows = todo_stack.inject([]) do |follow, todo| prod = todo[:prod] follow += @follow[prod] || [] end.uniq debug("recovery") {"follows: #{follows.inspect}"} # Skip tokens until one is found in follows while (token = get_token(:recover)) && follows.none? {|t| token === t} skipped = @lexer.shift progress("recovery") {"skip #{skipped.inspect}"} end debug("recovery") {"found #{token.inspect} in follows"} # Pop stack elements until token is in follows while !todo_stack.empty? && !follow_include?(todo_stack.last[:prod], token || :_eof) debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} todo_stack.pop onFinish end # Token is now in the first of the top production unless todo_stack.empty? todo_stack.pop onFinish end if todo_stack.empty? # Recovered to end of last production warn("recover", "recovered to end of productions") else warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}") end @recovering = false ensure # After completing the last production in a sequence, pop down until we find a production # # If in recovery mode, continue popping until we find a term with a follow list while !pushed && !todo_stack.empty? && todo_stack.last.fetch(:terms, []).empty? debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"} todo_stack.pop onFinish end end end error("parse(eof)", "Finished processing before end of file", token: @lexer.first) if @lexer.first # Continue popping contexts off of the stack while !todo_stack.empty? debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"} # There can't be anything left to do, or if there is, it must be optional last_terms = todo_stack.last[:terms] if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)} error("parse(eof)", "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}" ) end todo_stack.pop onFinish end # When all is said and done, raise the error log unless @error_log.empty? raise Error, @error_log.join("\n") end end |
#prod_data ⇒ Object
Current ProdData element
472 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 472 def prod_data; @prod_data.last; end |
#progress(node, message, **options, &block) ⇒ Object (protected)
Progress logged when parsing. Passed as level 1
logger messages.
The call is ignored, unless @options[:logger]
is set.
559 560 561 562 563 564 565 566 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 559 def progress(node, *args, &block) return unless @options[:logger] lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) args << {} unless args.last.is_a?(Hash) args.last[:level] ||= 1 args.last[:lineno] ||= lineno debug(node, *args, &block) end |
#warn(node, message, **options) ⇒ Object (protected)
Warning information, used as level 2
logger messages.
Messages may be logged and are saved for reporting at end of parsing.
536 537 538 539 540 541 542 543 544 545 |
# File 'vendor/bundler/ruby/3.3.0/bundler/gems/ebnf-cb49ee954bab/lib/ebnf/ll1/parser.rb', line 536 def warn(node, , **) lineno = @lineno || ([:token].lineno if [:token].respond_to?(:lineno)) m = "WARNING " m += "[line: #{lineno}] " if lineno m += m += " (found #{[:token].inspect})" if [:token] m += ", production = #{[:production].inspect}" if [:production] @error_log << m unless @recovering debug(node, m, level: 2, lineno: lineno, **) end |