Module: RDF::N3::Parser

Included in:
Reader
Defined in:
vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb

Constant Summary collapse

START =
'http://www.w3.org/2000/10/swap/grammar/n3#document'
R_WHITESPACE =
Regexp.compile('\A\s*(?:#.*$)?')
R_MLSTRING =
Regexp.compile("^.*([^\"\\\\]*)\"\"\"")
SINGLE_CHARACTER_SELECTORS =
%{\t\r\n !\"#$\%&'()*.,+/;<=>?[\\]^`{|}~}
NOT_QNAME_CHARS =
SINGLE_CHARACTER_SELECTORS + "@"
NOT_NAME_CHARS =
NOT_QNAME_CHARS + ":"

Instance Method Summary collapse

Instance Method Details

#abbr(prodURI) ⇒ Object



190
191
192
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 190

def abbr(prodURI)
  prodURI.to_s.split('#').last
end

#bufferObject

Return data from current off set to end of line



178
179
180
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 178

def buffer
  @line[@pos, @line.length - @pos] unless @line.nil?
end

#consume(n) ⇒ Object

Cause n characters of line to be consumed. Read new line while line is empty or until eof



183
184
185
186
187
188
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 183

def consume(n)
  @memo = {}
  @pos += n
  readline while @line && @line.length <= @pos
  #log_debug("consume[#{n}]") {buffer}
end

#dump_stack(stack) ⇒ Object



208
209
210
211
212
213
214
215
216
217
218
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 208

def dump_stack(stack)
  STDERR.puts "\nstack trace:"
  stack.reverse.each do |se|
    STDERR.puts "#{se[:prod]}"
    STDERR.puts "  " + case se[:terms]
    when nil then "nil"
    when [] then "empty"
    else          se[:terms].join(",\n  ")
    end
  end
end

#error(str) ⇒ Object



12
13
14
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 12

def error(str)
  log_error(str, lineno: @lineno, exception: RDF::ReaderError)
end

#get_tokenObject



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 113

def get_token
  whitespace
  
  return nil if buffer.nil?
  
  ch2 = buffer[0, 2]
  return ch2 if %w(=> <= ^^).include?(ch2)
  
  ch = buffer[0, 1]
  @keyword_mode = false if ch == '.' && @keyword_mode
  
  return ch if SINGLE_CHARACTER_SELECTORS.include?(ch)
  return ":" if ch == ":"
  return "0" if "+-0123456789".include?(ch)
  
  if ch == '@'
    return '@' if @pos > 0 && @line[@pos-1, 1] == '"'

    j = 0
    j += 1 while buffer[j+1, 1] && !NOT_NAME_CHARS.include?(buffer[j+1, 1])
    name = buffer[1, j]
    if name == 'keywords'
      @keywords = []
      @keyword_mode = true
    end
    return '@' + name
  end

  j = 0
  j += 1 while buffer[j, 1] && !NOT_QNAME_CHARS.include?(buffer[j, 1])
  word = buffer[0, j]
  error("Tokenizer expected qname, found #{buffer[0, 10]}") unless word
  if @keyword_mode
    @keywords << word
  elsif @keywords.include?(word)
    if word == 'keywords'
      @keywords = []
      @keyword_mode = true
    end
    return '@' + word.to_s # implicit keyword
  end

  'a'
end

#onFinishObject



199
200
201
202
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 199

def onFinish
  prod = @productions.pop()
  $stdout.puts ' ' * @productions.length + '/' + prod
end

#onStart(prod) ⇒ Object



194
195
196
197
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 194

def onStart(prod)
  $stdout.puts ' ' * @productions.length + prod
  @productions << prod
end

#onToken(prod, tok) ⇒ Object



204
205
206
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 204

def onToken(prod, tok)
  $stdout.puts ' ' * @productions.length + "#{prod}(#{tok})"
end

#parse(prod) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 16

def parse(prod)
  todo_stack = [{prod: prod, terms: nil}]
  while !todo_stack.empty?
    pushed = false
    if todo_stack.last[:terms].nil?
      todo_stack.last[:terms] = []
      tok = self.token
      #log_debug("parse tok: '#{tok}'") {"prod #{todo_stack.last[:prod]}"}
      
      # Got an opened production
      onStart(abbr(todo_stack.last[:prod]))
      break if tok.nil?
      
      cur_prod = todo_stack.last[:prod]
      prod_branch = @branches[cur_prod]
      error("No branches found for '#{abbr(cur_prod)}'") if prod_branch.nil?
      sequence = prod_branch[tok]
      if sequence.nil?
        dump_stack(todo_stack) if $verbose
        expected = prod_branch.values.uniq.map {|u| u.map {|v| abbr(v).inspect}.join(",")}
        error("Found '#{tok}' when parsing a #{abbr(cur_prod)}. expected #{expected.join(' | ')}")
      end
      #log_debug("sequence") {sequence.inspect}
      todo_stack.last[:terms] += sequence
    end
    
    #log_debug("parse") {todo_stack.last.inspect}
    while !todo_stack.last[:terms].to_a.empty?
      term = todo_stack.last[:terms].shift
      if term.is_a?(String)
        log_debug("parse term(string)") {term.to_s}
        word = buffer[0, term.length]
        if word == term
          onToken(term, word)
          consume(term.length)
        elsif '@' + word.chop == term && @keywords.include?(word.chop)
          onToken(term, word.chop)
          consume(term.length - 1)
        else
          error("Found '#{buffer[0, 10]}...'; #{term} expected")
        end
      elsif regexp = @regexps[term]
        if abbr(term) == 'string' && buffer[0, 3] == '"""'
          # Read until end of multi-line comment if this is the start of a multi-line comment
          string = '"""'
          consume(3)
          next_line = buffer
          #log_debug("ml-str(start)") {next_line.dump}
          until md = R_MLSTRING.match(next_line)
            begin
              string += next_line
              next_line = readline
            rescue EOFError
              error("EOF reached searching for end of multi-line comment")
            end
          end
          string += md[0].to_s
          consume(md[0].to_s.length)
          onToken('string', string)
          #log_debug("ml-str now") {buffer.dump}
        else
          md = regexp.match(buffer)
          error("Token(#{abbr(term)}) '#{buffer[0, 10]}...' should match #{regexp}") unless md
          log_debug("parse") {"term(#{abbr(term)}:regexp): #{term}, #{regexp}.match('#{buffer[0, 10]}...') => '#{md.inspect.force_encoding(Encoding::UTF_8)}'"}
          onToken(abbr(term), md.to_s)
          consume(md[0].length)
        end
      else
        log_debug("parse term(push)") {term}
        todo_stack << {prod: term, terms: nil}
        pushed = true
        break
      end
      self.token
    end
    
    while !pushed && todo_stack.last[:terms].to_a.empty?
      todo_stack.pop
      self.onFinish
    end
  end
  while !todo_stack.empty?
    todo_stack.pop
    self.onFinish
  end
end

#readlineObject



166
167
168
169
170
171
172
173
174
175
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 166

def readline
  @line = @input.readline
  @lineno += 1
  @line.force_encoding(Encoding::UTF_8)
  log_debug("readline[#{@lineno}]") {@line.dump}
  @pos = 0
  @line
rescue EOFError
  @line, @pos = nil, 0
end

#test(input, branches, regexps) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 220

def test(input, branches, regexps)
  # FIXME: for now, read in entire doc, eventually, process as stream
  @input = input.respond_to?(:read) ? (input.rewind; input) : StringIO.new(input.to_s)
  @lineno = 0
  readline  # Prime the pump
  $stdout ||= STDOUT
  
  @memo = {}
  @keyword_mode = false
  @keywords = %w(a is of this has)
  @productions = []

  @branches = branches
  @regexps = regexps
  parse(START.to_sym)
end

#tokenObject

Memoizer for get_token



104
105
106
107
108
109
110
111
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 104

def token
  unless @memo.has_key?(@pos)
    tok = self.get_token
    @memo[@pos] = tok
    log_debug("token") {"'#{tok}'('#{buffer[0, 10]}...')"} if buffer
  end
  @memo[@pos]
end

#whitespaceObject



158
159
160
161
162
163
164
# File 'vendor/bundler/ruby/2.5.0/bundler/gems/rdf-n3-3a340a80c182/lib/rdf/n3/reader/parser.rb', line 158

def whitespace
  while buffer && md = R_WHITESPACE.match(buffer)
    return unless md[0].length > 0
    consume(md[0].length)
    #log_debug("ws") {"'#{md[0]}', pos=#{@pos}"}
  end
end