protobuf/ruby/tests/utf8.rb

#!/usr/bin/ruby

require 'google/protobuf'
require 'utf8_pb'
require 'test/unit'

module CaptureWarnings
  @@warnings = nil

  module_function

  def warn(message, category: nil, **kwargs)
    if @@warnings
      @@warnings << message
    else
      super
    end
  end

  def capture
    @@warnings = []
    yield
    @@warnings
  ensure
    @@warnings = nil
  end
end

Warning.extend CaptureWarnings

module Utf8Test
  def test_scalar
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.optional_string = bad_utf8_string() }
  end

  def test_repeated
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.repeated_string << bad_utf8_string() }
  end

  def test_map_key
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" }
  end

  def test_map_value
    msg = Utf8TestProtos::TestUtf8.new
    assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() }
  end
end

# Tests the case of string objects that are marked UTF-8, but contain invalid
# UTF-8.
#
# For now these only warn, but in the next major version they will throw an
# exception.
class MarkedUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8(&block)
    warnings = CaptureWarnings.capture(&block)
    assert_equal 1, warnings.length
    assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])
  end

  def bad_utf8_string
    str = "\x80"
    assert_false str.valid_encoding?
    str
  end

  include Utf8Test
end

# This test doesn't work in JRuby because JRuby appears to have a bug where
# the "valid" bit on a string's data is not invalidated properly when the
# string is modified: https://github.com/jruby/jruby/issues/8316
if !defined? JRUBY_VERSION
  # Tests the case of string objects that are marked UTF-8, and initially contain
  # valid UTF-8, but are later modified to be invalid UTF-8.  This may put the
  # string into an state of "unknown" validity.
  #
  # For now these only warn, but in the next major version they will throw an
  # exception.
  class MarkedModifiedUtf8Test < Test::Unit::TestCase
    def assert_bad_utf8(&block)
      warnings = CaptureWarnings.capture(&block)
      assert_equal 1, warnings.length
      assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])
    end

    def bad_utf8_string
      str = " "
      assert_true str.valid_encoding?
      str[0] = "\x80"
      str
    end

    include Utf8Test
  end
end

# Tests the case of string objects that are marked with a non-UTF-8 encoding,
# but contain invalid UTF-8.
#
# This case will raise Encoding::UndefinedConversionError.
class MarkedNonUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8
    assert_raises(Encoding::UndefinedConversionError) { yield }
  end

  def bad_utf8_string
    str = "\x80".force_encoding(Encoding::ASCII_8BIT)
    assert_true str.valid_encoding?
    str
  end

  include Utf8Test
end

# Tests the case of string objects that are marked with a non-UTF-8 encoding,
# but are invalid even in their source encoding.
#
# This case will raise Encoding::InvalidByteSequenceError
class MarkedNonUtf8Test < Test::Unit::TestCase
  def assert_bad_utf8(&block)
    assert_raises(Encoding::InvalidByteSequenceError, &block)
  end

  def bad_utf8_string
    str = "\x80".force_encoding(Encoding::ASCII)
    assert_false str.valid_encoding?
    str
  end

  include Utf8Test
end
[Ruby] Warn if assigning a "UTF-8" string with invalid UTF-8. (#17253) This is a warning now, but will be an error in the next version. PiperOrigin-RevId: 653236128 5 months ago			`#!/usr/bin/ruby`

			`require 'google/protobuf'`
			`require 'utf8_pb'`
			`require 'test/unit'`

			`module CaptureWarnings`
			`@@warnings = nil`

			`module_function`

			`def warn(message, category: nil, **kwargs)`
			`if @@warnings`
			`@@warnings << message`
			`else`
			`super`
			`end`
			`end`

			`def capture`
			`@@warnings = []`
			`yield`
			`@@warnings`
			`ensure`
			`@@warnings = nil`
			`end`
			`end`

			`Warning.extend CaptureWarnings`

			`module Utf8Test`
			`def test_scalar`
			`msg = Utf8TestProtos::TestUtf8.new`
			`assert_bad_utf8 { msg.optional_string = bad_utf8_string() }`
			`end`

			`def test_repeated`
			`msg = Utf8TestProtos::TestUtf8.new`
			`assert_bad_utf8 { msg.repeated_string << bad_utf8_string() }`
			`end`

			`def test_map_key`
			`msg = Utf8TestProtos::TestUtf8.new`
			`assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" }`
			`end`

			`def test_map_value`
			`msg = Utf8TestProtos::TestUtf8.new`
			`assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() }`
			`end`
			`end`

			`# Tests the case of string objects that are marked UTF-8, but contain invalid`
			`# UTF-8.`
			`#`
			`# For now these only warn, but in the next major version they will throw an`
			`# exception.`
			`class MarkedUtf8Test < Test::Unit::TestCase`
			`def assert_bad_utf8(&block)`
			`warnings = CaptureWarnings.capture(&block)`
			`assert_equal 1, warnings.length`
			`assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])`
			`end`

			`def bad_utf8_string`
			`str = "\x80"`
			`assert_false str.valid_encoding?`
			`str`
			`end`

			`include Utf8Test`
			`end`

			`# This test doesn't work in JRuby because JRuby appears to have a bug where`
			`# the "valid" bit on a string's data is not invalidated properly when the`
			`# string is modified: https://github.com/jruby/jruby/issues/8316`
			`if !defined? JRUBY_VERSION`
			`# Tests the case of string objects that are marked UTF-8, and initially contain`
			`# valid UTF-8, but are later modified to be invalid UTF-8. This may put the`
			`# string into an state of "unknown" validity.`
			`#`
			`# For now these only warn, but in the next major version they will throw an`
			`# exception.`
			`class MarkedModifiedUtf8Test < Test::Unit::TestCase`
			`def assert_bad_utf8(&block)`
			`warnings = CaptureWarnings.capture(&block)`
			`assert_equal 1, warnings.length`
			`assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])`
			`end`

			`def bad_utf8_string`
			`str = " "`
			`assert_true str.valid_encoding?`
			`str[0] = "\x80"`
			`str`
			`end`

			`include Utf8Test`
			`end`
			`end`

			`# Tests the case of string objects that are marked with a non-UTF-8 encoding,`
			`# but contain invalid UTF-8.`
			`#`
			`# This case will raise Encoding::UndefinedConversionError.`
			`class MarkedNonUtf8Test < Test::Unit::TestCase`
			`def assert_bad_utf8`
			`assert_raises(Encoding::UndefinedConversionError) { yield }`
			`end`

			`def bad_utf8_string`
			`str = "\x80".force_encoding(Encoding::ASCII_8BIT)`
			`assert_true str.valid_encoding?`
			`str`
			`end`

			`include Utf8Test`
			`end`

			`# Tests the case of string objects that are marked with a non-UTF-8 encoding,`
			`# but are invalid even in their source encoding.`
			`#`
			`# This case will raise Encoding::InvalidByteSequenceError`
			`class MarkedNonUtf8Test < Test::Unit::TestCase`
			`def assert_bad_utf8(&block)`
			`assert_raises(Encoding::InvalidByteSequenceError, &block)`
			`end`

			`def bad_utf8_string`
			`str = "\x80".force_encoding(Encoding::ASCII)`
			`assert_false str.valid_encoding?`
			`str`
			`end`

			`include Utf8Test`
			`end`