diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index b13e7cad8a..a31b36404d 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -1016,7 +1016,7 @@ def handle_method(type, var_name, meth_name, function, param_count, file_name = File.join @file_dir, source_file if File.exist? file_name then - file_content = File.read file_name + file_content = RDoc::Encoding.read_file file_name, @options.encoding else @options.warn "unknown source #{source_file} for #{meth_name} in #{@file_name}" end diff --git a/test/rdoc/parser/c_test.rb b/test/rdoc/parser/c_test.rb index 17237c2400..00b57d5d7d 100644 --- a/test/rdoc/parser/c_test.rb +++ b/test/rdoc/parser/c_test.rb @@ -2292,6 +2292,42 @@ def test_reparse_c_file_no_duplicates assert_include method_names, 'baz' end + def test_handle_method_source_file_with_non_ascii + # Regression test: when the C parser reads an external source file + # (via "/* in file.c */"), it must use RDoc::Encoding.read_file instead + # of File.read. On systems where Encoding.default_external is US-ASCII, + # bare File.read produces a US-ASCII string that raises ArgumentError + # on String#scan when the file contains non-ASCII bytes. + source_path = File.join(File.dirname(@fn), 'greet.c') + File.binwrite source_path, <<~C.encode('UTF-8') + /* + * Returns a greeting \u2014 "h\u00e9llo w\u00f6rld" + */ + VALUE + rb_greet(VALUE obj) { + return rb_str_new2("hello"); + } + C + + parser = util_parser <<~C + void Init_Foo(void) { + VALUE cFoo = rb_define_class("Foo", rb_cObject); + rb_define_method(cFoo, "greet", rb_greet, 0); /* in greet.c */ + } + C + + parser.scan + + foo = @top_level.find_module_named 'Foo' + assert foo, 'Foo class should be found' + + greet = foo.method_list.first + assert greet, 'greet method should be found' + assert_equal 'greet', greet.name + ensure + File.delete source_path if source_path && File.exist?(source_path) + end + def util_get_class(content, name = nil) @parser = util_parser content @parser.scan