From a736befeb9c639e9fb7e369183869d4aeee63a4e Mon Sep 17 00:00:00 2001 From: eileencodes Date: Mon, 5 Feb 2024 14:39:29 -0500 Subject: [PATCH] Implement file parsing error handling This PR implements proper file parsing error handling. Previously `file_options` would call `pm_string_mapped_init` which would print an error from `perror`. However this wouldn't raise a proper Ruby error so it was just a string output. I've done the following: - Update `pm_string_mapped_init` to delete `perror` and instead of returning `true` or `false` return the `errno` or `0`. - `pm_string_mapped_init` now returns an `int` instead of `bool`. - Raise a `rb_syserr_fail` error with the filepath and errno return code from `pm_string_mapped_init`. - Update `file_options` to return `static void` instead of `static bool`. - No longer return `Qnil` if `file_options` returns false (because now it will raise). - Add tests for `dump_file`, `lex_file`, `parse_file`, `parse_file_comments`, `parse_lex_file`, and `parse_file_success?` when a file doesn't exist and for `nil`. - Updates the `bin/parse` script to no longer raise it's own `ArgumentError` now that we raise a proper error. - Updates the `ffi` backend to raise a `SystemCallError` with the `filepath` and return code from `pm_string_mapped_init`. Fixes: ruby/prism#2207 Note: I'll have a followup PR to do a similar thing for ruby/ruby in `pm_parse_file`, I just need a test. --- bin/parse | 2 - ext/prism/extension.c | 39 +++++++++----- include/prism/util/pm_string.h | 3 +- lib/prism/ffi.rb | 10 +++- src/util/pm_string.c | 31 +++++------ test/prism/parse_test.rb | 97 ++++++++++++++++++++++++++++++++++ 6 files changed, 147 insertions(+), 35 deletions(-) diff --git a/bin/parse b/bin/parse index 6b66a8f3675..b37ff9a9839 100755 --- a/bin/parse +++ b/bin/parse @@ -13,8 +13,6 @@ if ARGV[0] == "-e" result = Prism.parse(ARGV[1]) else result = Prism.parse_file(ARGV[0] || "test.rb") - - raise ArgumentError, "There was nothing to parse. You must pass source code with `-e` or a Ruby file." if result.nil? end result.mark_newlines! if ENV["MARK_NEWLINES"] diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 7cad38404e5..396c18f9ccf 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -212,7 +212,7 @@ string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) /** * Read options for methods that look like (filepath, **options). */ -static bool +static void file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { VALUE filepath; VALUE keywords; @@ -220,12 +220,15 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { extract_options(options, filepath, keywords); - if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) { + const char * string_source = (const char *) pm_string_source(&options->filepath); + int return_code = pm_string_mapped_init(input, string_source); + + // Raise system error that is returned from pm_string_mapped_init + // if it wasn't successful. + if (return_code != 0) { pm_options_free(options); - return false; + rb_syserr_fail(return_code, string_source); } - - return true; } /******************************************************************************/ @@ -299,7 +302,8 @@ static VALUE dump_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE value = dump_input(&input, &options); pm_string_free(&input); @@ -609,7 +613,8 @@ static VALUE lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, false); pm_string_free(&input); @@ -710,7 +715,8 @@ static VALUE parse_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE value = parse_input(&input, &options); pm_string_free(&input); @@ -770,7 +776,8 @@ static VALUE parse_file_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE value = parse_input_comments(&input, &options); pm_string_free(&input); @@ -824,7 +831,8 @@ static VALUE parse_lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); @@ -881,7 +889,8 @@ static VALUE parse_file_success_p(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; - if (!file_options(argc, argv, &input, &options)) return Qnil; + + file_options(argc, argv, &input, &options); VALUE result = parse_input_success_p(&input, &options); pm_string_free(&input); @@ -959,7 +968,13 @@ profile_file(VALUE self, VALUE filepath) { pm_string_t input; const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; + int return_code = pm_string_mapped_init(&input, checked); + + // Raise system error that is returned from pm_string_mapped_init + // if it wasn't successful. + if (return_code != 0) { + rb_syserr_fail(return_code, checked); + } pm_options_t options = { 0 }; pm_options_filepath_set(&options, checked); diff --git a/include/prism/util/pm_string.h b/include/prism/util/pm_string.h index ddb153784f4..5ce6dcc0460 100644 --- a/include/prism/util/pm_string.h +++ b/include/prism/util/pm_string.h @@ -9,6 +9,7 @@ #include "prism/defines.h" #include +#include #include #include #include @@ -106,7 +107,7 @@ void pm_string_constant_init(pm_string_t *string, const char *source, size_t len * @param filepath The filepath to read. * @return Whether or not the file was successfully mapped. */ -PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath); +PRISM_EXPORTED_FUNCTION int pm_string_mapped_init(pm_string_t *string, const char *filepath); /** * Returns the memory size associated with the string. diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 12177450f4f..df4cb57ab91 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -160,7 +160,15 @@ def self.with(filepath, &block) pointer = FFI::MemoryPointer.new(SIZEOF) begin - raise unless LibRubyParser.pm_string_mapped_init(pointer, filepath) + # If successful, pm_mapped_string will return 0, otherwise + # it returns an error code and we need to raise that appropriate + # SystemCallError. + return_code = LibRubyParser.pm_string_mapped_init(pointer, filepath) + + unless return_code == 0 + raise SystemCallError.new(filepath, return_code) + end + yield new(pointer) ensure LibRubyParser.pm_string_free(pointer) diff --git a/src/util/pm_string.c b/src/util/pm_string.c index f4d3033a1b7..f9e96fdd0a7 100644 --- a/src/util/pm_string.c +++ b/src/util/pm_string.c @@ -58,23 +58,21 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length) * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use * `mmap`, and on other POSIX systems we'll use `read`. */ -bool +int pm_string_mapped_init(pm_string_t *string, const char *filepath) { #ifdef _WIN32 // Open the file for reading. HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (file == INVALID_HANDLE_VALUE) { - perror("CreateFile failed"); - return false; + return errno; } // Get the file size. DWORD file_size = GetFileSize(file, NULL); if (file_size == INVALID_FILE_SIZE) { CloseHandle(file); - perror("GetFileSize failed"); - return false; + return errno; } // If the file is empty, then we don't need to do anything else, we'll set @@ -83,15 +81,14 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { CloseHandle(file); const uint8_t source[] = ""; *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return true; + return 0; } // Create a mapping of the file. HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); if (mapping == NULL) { CloseHandle(file); - perror("CreateFileMapping failed"); - return false; + return errno; } // Map the file into memory. @@ -100,26 +97,23 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { CloseHandle(file); if (source == NULL) { - perror("MapViewOfFile failed"); - return false; + return errno; } *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size }; - return true; + return 0; #else // Open the file for reading int fd = open(filepath, O_RDONLY); if (fd == -1) { - perror("open"); - return false; + return errno; } // Stat the file to get the file size struct stat sb; if (fstat(fd, &sb) == -1) { close(fd); - perror("fstat"); - return false; + return errno; } // mmap the file descriptor to virtually get the contents @@ -130,18 +124,17 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { close(fd); const uint8_t source[] = ""; *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return true; + return 0; } source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); if (source == MAP_FAILED) { - perror("Map failed"); - return false; + return errno; } close(fd); *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size }; - return true; + return 0; #endif } diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index 26243150084..f2bb7c6dcb1 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -69,11 +69,108 @@ def test_parse_lex assert_equal 5, tokens.length end + def test_dump_file + assert_nothing_raised do + Prism.dump_file(__FILE__) + end + + error = assert_raise Errno::ENOENT do + Prism.dump_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.dump_file(nil) + end + + assert_equal "Bad address", error.message + end + + def test_lex_file + assert_nothing_raised do + Prism.lex_file(__FILE__) + end + + error = assert_raise Errno::ENOENT do + Prism.lex_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.lex_file(nil) + end + + assert_equal "Bad address", error.message + end + def test_parse_lex_file node, tokens = Prism.parse_lex_file(__FILE__).value assert_kind_of ProgramNode, node refute_empty tokens + + error = assert_raise Errno::ENOENT do + Prism.parse_lex_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.parse_lex_file(nil) + end + + assert_equal "Bad address", error.message + end + + def test_parse_file + node = Prism.parse_file(__FILE__).value + assert_kind_of ProgramNode, node + + error = assert_raise Errno::ENOENT do + Prism.parse_file("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.parse_file(nil) + end + + assert_equal "Bad address", error.message + end + + def test_parse_file_success + assert_predicate Prism.parse_file_comments(__FILE__), :any? + + error = assert_raise Errno::ENOENT do + Prism.parse_file_comments("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.parse_file_comments(nil) + end + + assert_equal "Bad address", error.message + end + + def test_parse_file_comments + assert_predicate Prism.parse_file_comments(__FILE__), :any? + + error = assert_raise Errno::ENOENT do + Prism.parse_file_comments("idontexist.rb") + end + + assert_equal "No such file or directory - idontexist.rb", error.message + + error = assert_raise Errno::EFAULT do + Prism.parse_file_comments(nil) + end + + assert_equal "Bad address", error.message end # To accurately compare against Ripper, we need to make sure that we're