From 953a0253fc1bd69ae788bbcb672c4cdfe562b8cd Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Thu, 30 Nov 2017 11:20:11 +0100 Subject: [PATCH 1/6] io_win32_unittest: make //:win32_test run again Do not use "googletest.h", apprently that leads to linking errors on Windows which I couldn't figure out how to solve, and decided to just go with plain gTest instead. See https://github.com/google/protobuf/issues/3951 --- .../protobuf/stubs/io_win32_unittest.cc | 78 +++++++------------ 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index ce6f716256..a5c7dbfddd 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -48,7 +48,6 @@ #include #include -#include #include #include @@ -89,62 +88,43 @@ void StripTrailingSlashes(string* str) { for (; i >= 0 && ((*str)[i] == '/' || (*str)[i] == '\\'); --i) {} str->resize(i+1); } -} // namespace -void IoWin32Test::SetUp() { - test_tmpdir = string(TestTempDir()); - wtest_tmpdir.clear(); - if (test_tmpdir.empty()) { - const char* test_tmpdir_env = getenv("TEST_TMPDIR"); - if (test_tmpdir_env != NULL && *test_tmpdir_env) { - test_tmpdir = string(test_tmpdir_env); - } +bool GetEnvVar(const char* name, string* result) { + DWORD size = ::GetEnvironmentVariableA(name, NULL, 0); + if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) { + scoped_array str(new char[size]); + ::GetEnvironmentVariableA(name, str.get(), size); + result->assign(str.get()); + return true; + } else { + return false; + } +} - // Only Bazel defines TEST_TMPDIR, CMake does not, so look for other - // suitable environment variables. - if (test_tmpdir.empty()) { - static const char* names[] = {"TEMP", "TMP"}; - for (int i = 0; i < sizeof(names)/sizeof(names[0]); ++i) { - const char* name = names[i]; - test_tmpdir_env = getenv(name); - if (test_tmpdir_env != NULL && *test_tmpdir_env) { - test_tmpdir = string(test_tmpdir_env); - break; - } - } - } +} // namespace - // No other temp directory was found. Use the current director - if (test_tmpdir.empty()) { - char buffer[MAX_PATH]; - // Use GetCurrentDirectoryA instead of GetCurrentDirectoryW, because the - // current working directory must always be shorter than MAX_PATH, even - // with - // "\\?\" prefix (except on Windows 10 version 1607 and beyond, after - // opting in to long paths by default [1]). - // - // [1] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath - DWORD result = ::GetCurrentDirectoryA(MAX_PATH, buffer); - if (result > 0) { - test_tmpdir = string(buffer); - } else { - // Using assertions in SetUp/TearDown seems to confuse the test - // framework, so just leave the member variables empty in case of - // failure. - GOOGLE_CHECK_OK(false); - return; - } - } +void IoWin32Test::SetUp() { + string tmp; + bool ok = false; + if (!ok) { + ok = GetEnvVar("TEST_TMPDIR", &tmp); + } + if (!ok) { + ok = GetEnvVar("TEMP", &tmp); + } + if (!ok) { + ok = GetEnvVar("TMP", &tmp); + } + if (!ok || tmp.empty()) { + FAIL(); } - StripTrailingSlashes(&test_tmpdir); - test_tmpdir += "\\io_win32_unittest.tmp"; - // CreateDirectoryA's limit is 248 chars, see MSDN. - // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363855(v=vs.85).aspx + StripTrailingSlashes(&tmp); + test_tmpdir = tmp + "\\io_win32_unittest.tmp"; wtest_tmpdir = testonly_path_to_winpath(test_tmpdir); if (!DeleteAllUnder(wtest_tmpdir) || !CreateAllUnder(wtest_tmpdir)) { - GOOGLE_CHECK_OK(false); + FAIL(); test_tmpdir.clear(); wtest_tmpdir.clear(); } From 65da9fd97f67a9499771b6294cf017248ece5661 Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Thu, 30 Nov 2017 14:12:24 +0100 Subject: [PATCH 2/6] io_win32: support non-ASCII paths Fixes https://github.com/google/protobuf/issues/3951 --- src/google/protobuf/stubs/io_win32.cc | 157 +++++++++++------- src/google/protobuf/stubs/io_win32.h | 18 +- .../protobuf/stubs/io_win32_unittest.cc | 90 ++++++---- 3 files changed, 176 insertions(+), 89 deletions(-) diff --git a/src/google/protobuf/stubs/io_win32.cc b/src/google/protobuf/stubs/io_win32.cc index fa2cb8b130..b5af494a66 100644 --- a/src/google/protobuf/stubs/io_win32.cc +++ b/src/google/protobuf/stubs/io_win32.cc @@ -30,10 +30,11 @@ // Author: laszlocsomor@google.com (Laszlo Csomor) // -// Implementation for long-path-aware open/mkdir/etc. on Windows. +// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well +// as for the supporting utility functions. // // These functions convert the input path to an absolute Windows path -// with "\\?\" prefix if necessary, then pass that to _wopen/_wmkdir/etc. +// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc. // (declared in ) respectively. This allows working with files/directories // whose paths are longer than MAX_PATH (260 chars). // @@ -59,7 +60,6 @@ #include #include -#include #include #include #include @@ -89,6 +89,11 @@ struct CharTraits { static bool is_alpha(wchar_t ch) { return iswalpha(ch); } }; +template +bool null_or_empty(const char_type* s) { + return s == nullptr || *s == 0; +} + // Returns true if the path starts with a drive letter, e.g. "c:". // Note that this won't check for the "\" after the drive letter, so this also // returns true for "c:foo" (which is "c:\${PWD}\foo"). @@ -121,16 +126,7 @@ bool is_drive_relative(const char_type* path) { return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2])); } -template -void replace_directory_separators(char_type* p) { - for (; *p; ++p) { - if (*p == '/') { - *p = '\\'; - } - } -} - -string join_paths(const string& path1, const string& path2) { +wstring join_paths(const wstring& path1, const wstring& path2) { if (path1.empty() || is_path_absolute(path2.c_str()) || has_longpath_prefix(path2.c_str())) { return path2; @@ -144,23 +140,23 @@ string join_paths(const string& path1, const string& path2) { : (path1 + path2); } else { return is_separator(path2[0]) ? (path1 + path2) - : (path1 + '\\' + path2); + : (path1 + L'\\' + path2); } } -string normalize(string path) { +wstring normalize(wstring path) { if (has_longpath_prefix(path.c_str())) { path = path.substr(4); } - static const string dot("."); - static const string dotdot(".."); + static const wstring dot(L"."); + static const wstring dotdot(L".."); - std::vector segments; + std::vector segments; int segment_start = -1; // Find the path segments in `path` (separated by "/"). for (int i = 0;; ++i) { - if (!is_separator(path[i]) && path[i] != '\0') { + if (!is_separator(path[i]) && path[i] != L'\0') { // The current character does not end a segment, so start one unless it's // already started. if (segment_start < 0) { @@ -169,7 +165,7 @@ string normalize(string path) { } else if (segment_start >= 0 && i > segment_start) { // The current character is "/" or "\0", so this ends a segment. // Add that to `segments` if there's anything to add; handle "." and "..". - string segment(path, segment_start, i - segment_start); + wstring segment(path, segment_start, i - segment_start); segment_start = -1; if (segment == dotdot) { if (!segments.empty() && @@ -180,7 +176,7 @@ string normalize(string path) { segments.push_back(segment); } } - if (path[i] == '\0') { + if (path[i] == L'\0') { break; } } @@ -189,64 +185,58 @@ string normalize(string path) { // form of it, e.g. "c:\.."). if (segments.size() == 1 && segments[0].size() == 2 && has_drive_letter(segments[0].c_str())) { - return segments[0] + '\\'; + return segments[0] + L'\\'; } // Join all segments. bool first = true; - std::ostringstream result; + std::wstringstream result; for (int i = 0; i < segments.size(); ++i) { if (!first) { - result << '\\'; + result << L'\\'; } first = false; result << segments[i]; } // Preserve trailing separator if the input contained it. if (!path.empty() && is_separator(path[path.size() - 1])) { - result << '\\'; + result << L'\\'; } return result.str(); } -WCHAR* as_wstring(const string& s) { - int len = ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), NULL, 0); - WCHAR* result = new WCHAR[len + 1]; - ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), result, len + 1); - result[len] = 0; - return result; -} - -void as_wchar_path(const string& path, wstring* wchar_path) { - scoped_array wbuf(as_wstring(path)); - replace_directory_separators(wbuf.get()); - wchar_path->assign(wbuf.get()); -} - -bool as_windows_path(const string& path, wstring* result) { - if (path.empty()) { +bool as_windows_path(const char* path, wstring* result) { + if (null_or_empty(path)) { result->clear(); return true; } - if (is_separator(path[0]) || is_drive_relative(path.c_str())) { + if (is_separator(path[0]) || is_drive_relative(path)) { + return false; + } + + wstring wpath; + if (!strings::utf8_to_wcs(path, &wpath)) { return false; } - string mutable_path = path; - if (!is_path_absolute(mutable_path.c_str()) && - !has_longpath_prefix(mutable_path.c_str())) { - char cwd[MAX_PATH]; - ::GetCurrentDirectoryA(MAX_PATH, cwd); - mutable_path = join_paths(cwd, mutable_path); + if (!is_path_absolute(wpath.c_str()) && !has_longpath_prefix(wpath.c_str())) { + int size = ::GetCurrentDirectoryW(0, NULL); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + scoped_array wcwd(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcwd.get()); + wpath = join_paths(wcwd.get(), wpath); } - as_wchar_path(normalize(mutable_path), result); - if (!has_longpath_prefix(result->c_str())) { + wpath = normalize(wpath); + if (!has_longpath_prefix(wpath.c_str())) { // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API // from processing the path and "helpfully" removing trailing dots from the // path, for example. // See https://github.com/bazelbuild/bazel/issues/2935 - *result = wstring(L"\\\\?\\") + *result; + wpath = wstring(L"\\\\?\\") + wpath; } + *result = wpath; return true; } @@ -319,13 +309,21 @@ int stat(const char* path, struct _stat* buffer) { FILE* fopen(const char* path, const char* mode) { #ifdef SUPPORT_LONGPATHS + if (null_or_empty(path)) { + errno = EINVAL; + return NULL; + } wstring wpath; if (!as_windows_path(path, &wpath)) { errno = ENOENT; return NULL; } - scoped_array wmode(as_wstring(mode)); - return ::_wfopen(wpath.c_str(), wmode.get()); + wstring wmode; + if (!strings::utf8_to_wcs(mode, &wmode)) { + errno = EINVAL; + return NULL; + } + return ::_wfopen(wpath.c_str(), wmode.c_str()); #else return ::fopen(path, mode); #endif @@ -347,16 +345,61 @@ int write(int fd, const void* buffer, size_t size) { return ::_write(fd, buffer, size); } -wstring testonly_path_to_winpath(const string& path) { +wstring testonly_utf8_to_winpath(const char* path) { wstring wpath; - as_windows_path(path, &wpath); - return wpath; + return as_windows_path(path, &wpath) ? wpath : wstring(); +} + +namespace strings { + +bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + BOOL usedDefaultChar = FALSE; + SetLastError(0); + int size = WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL, + outUtf8 ? NULL : &usedDefaultChar); + if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + || usedDefaultChar) { + return false; + } + scoped_array astr(new CHAR[size]); + WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL); + out->assign(astr.get()); + return true; +} + +bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + + SetLastError(0); + int size = + MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + scoped_array wstr(new WCHAR[size]); + MultiByteToWideChar( + inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1); + out->assign(wstr.get()); + return true; } +bool utf8_to_wcs(const char* input, wstring* out) { + return mbs_to_wcs(input, out, true); +} + +} // namespace strings } // namespace win32 } // namespace internal } // namespace protobuf } // namespace google #endif // defined(_WIN32) - diff --git a/src/google/protobuf/stubs/io_win32.h b/src/google/protobuf/stubs/io_win32.h index 5316008968..60d0ceb675 100644 --- a/src/google/protobuf/stubs/io_win32.h +++ b/src/google/protobuf/stubs/io_win32.h @@ -69,8 +69,22 @@ LIBPROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); LIBPROTOBUF_EXPORT int setmode(int fd, int mode); LIBPROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); LIBPROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); -LIBPROTOBUF_EXPORT std::wstring testonly_path_to_winpath( - const std::string& path); +LIBPROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); + +namespace strings { + +// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text. +LIBPROTOBUF_EXPORT bool wcs_to_mbs( + const wchar_t* s, std::string* out, bool outUtf8); + +// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16. +LIBPROTOBUF_EXPORT bool mbs_to_wcs( + const char* s, std::wstring* out, bool inUtf8); + +// Convert from UTF-8-encoded text to UTF-16. +LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); + +} // namespace strings } // namespace win32 } // namespace internal diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index a5c7dbfddd..e88b75540f 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -30,7 +30,8 @@ // Author: laszlocsomor@google.com (Laszlo Csomor) // -// Unit tests for long-path-aware open/mkdir/access on Windows. +// Unit tests for long-path-aware open/mkdir/access/etc. on Windows, as well as +// for the supporting utility functions. // // This file is only used on Windows, it's empty on other platforms. @@ -89,13 +90,17 @@ void StripTrailingSlashes(string* str) { str->resize(i+1); } -bool GetEnvVar(const char* name, string* result) { - DWORD size = ::GetEnvironmentVariableA(name, NULL, 0); +bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { + DWORD size = ::GetEnvironmentVariableW(name, NULL, 0); if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) { - scoped_array str(new char[size]); - ::GetEnvironmentVariableA(name, str.get(), size); - result->assign(str.get()); - return true; + scoped_array wcs(new WCHAR[size]); + ::GetEnvironmentVariableW(name, wcs.get(), size); + // GetEnvironmentVariableA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_mbs(wcs.get(), result, true); } else { return false; } @@ -104,30 +109,30 @@ bool GetEnvVar(const char* name, string* result) { } // namespace void IoWin32Test::SetUp() { + test_tmpdir.clear(); + wtest_tmpdir.clear(); + string tmp; bool ok = false; if (!ok) { - ok = GetEnvVar("TEST_TMPDIR", &tmp); + ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp); } if (!ok) { - ok = GetEnvVar("TEMP", &tmp); + ok = GetEnvVarAsUtf8(L"TEMP", &tmp); } if (!ok) { - ok = GetEnvVar("TMP", &tmp); + ok = GetEnvVarAsUtf8(L"TMP", &tmp); } if (!ok || tmp.empty()) { FAIL(); } - StripTrailingSlashes(&tmp); test_tmpdir = tmp + "\\io_win32_unittest.tmp"; - wtest_tmpdir = testonly_path_to_winpath(test_tmpdir); - if (!DeleteAllUnder(wtest_tmpdir) || !CreateAllUnder(wtest_tmpdir)) { - FAIL(); - test_tmpdir.clear(); - wtest_tmpdir.clear(); - } + wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str()); + ASSERT_FALSE(wtest_tmpdir.empty()); + ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir)); + ASSERT_TRUE(CreateAllUnder(wtest_tmpdir)); } void IoWin32Test::TearDown() { @@ -171,8 +176,8 @@ bool IoWin32Test::DeleteAllUnder(wstring path) { path = wstring(L"\\\\?\\") + path; } // Append "\" if necessary. - if (path[path.size() - 1] != '\\') { - path.push_back('\\'); + if (path[path.size() - 1] != L'\\') { + path.push_back(L'\\'); } WIN32_FIND_DATAW metadata; @@ -290,12 +295,12 @@ TEST_F(IoWin32Test, MkdirTest) { } TEST_F(IoWin32Test, ChdirTest) { - char owd[MAX_PATH]; - EXPECT_GT(::GetCurrentDirectoryA(MAX_PATH, owd), 0); + WCHAR owd[MAX_PATH]; + EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, owd), 0); string path("C:\\"); EXPECT_EQ(access(path.c_str(), F_OK), 0); ASSERT_EQ(chdir(path.c_str()), 0); - EXPECT_TRUE(::SetCurrentDirectoryA(owd)); + EXPECT_TRUE(::SetCurrentDirectoryW(owd)); // Do not try to chdir into the test_tmpdir, it may already contain directory // names with trailing dots. @@ -316,11 +321,11 @@ TEST_F(IoWin32Test, AsWindowsPathTest) { EXPECT_GT(GetCurrentDirectoryW(size, cwd_str.get()), 0); wstring cwd = wstring(L"\\\\?\\") + cwd_str.get(); - ASSERT_EQ(testonly_path_to_winpath("relative_mkdirtest"), + ASSERT_EQ(testonly_utf8_to_winpath("relative_mkdirtest"), cwd + L"\\relative_mkdirtest"); - ASSERT_EQ(testonly_path_to_winpath("preserve//\\trailing///"), + ASSERT_EQ(testonly_utf8_to_winpath("preserve//\\trailing///"), cwd + L"\\preserve\\trailing\\"); - ASSERT_EQ(testonly_path_to_winpath("./normalize_me\\/../blah"), + ASSERT_EQ(testonly_utf8_to_winpath("./normalize_me\\/../blah"), cwd + L"\\blah"); std::ostringstream relpath; for (wchar_t* p = cwd_str.get(); *p; ++p) { @@ -329,18 +334,43 @@ TEST_F(IoWin32Test, AsWindowsPathTest) { } } relpath << ".\\/../\\./beyond-toplevel"; - ASSERT_EQ(testonly_path_to_winpath(relpath.str()), + ASSERT_EQ(testonly_utf8_to_winpath(relpath.str().c_str()), wstring(L"\\\\?\\") + cwd_str.get()[0] + L":\\beyond-toplevel"); // Absolute unix paths lack drive letters, driveless absolute windows paths // do too. Neither can be converted to a drive-specifying absolute Windows // path. - ASSERT_EQ(testonly_path_to_winpath("/absolute/unix/path"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("/absolute/unix/path"), L""); // Though valid on Windows, we also don't support UNC paths (\\UNC\\blah). - ASSERT_EQ(testonly_path_to_winpath("\\driveless\\absolute"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("\\driveless\\absolute"), L""); // Though valid in cmd.exe, drive-relative paths are not supported. - ASSERT_EQ(testonly_path_to_winpath("c:foo"), L""); - ASSERT_EQ(testonly_path_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); + ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); +} + +TEST_F(IoWin32Test, Utf8ToUtf16Test) { + const char hi_utf8[] = { + 'h', 'i', ' ', + // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F + 0xd0, 0x9f, + // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 + 0xd1, 0x80, + // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 + 0xd0, 0xb8, + // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 + 0xd0, 0xb2, + // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 + 0xd0, 0xb5, + // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 + 0xd1, 0x82, 0 + }; + const wchar_t hi_utf16[] = { + L'h', L'i', L' ', 0x041f, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0 + }; + + wstring wcs; + ASSERT_TRUE(strings::utf8_to_wcs(hi_utf8, &wcs)); + ASSERT_EQ(wcs, hi_utf16); } } // namespace From 57a01c7fbe89ca540e32a02c1f46b9e0478b3a8f Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Thu, 30 Nov 2017 17:32:11 +0100 Subject: [PATCH 3/6] io_win32: add more encoding-related tests --- src/google/protobuf/stubs/io_win32.cc | 18 +++- src/google/protobuf/stubs/io_win32.h | 3 + .../protobuf/stubs/io_win32_unittest.cc | 99 ++++++++++++++----- 3 files changed, 89 insertions(+), 31 deletions(-) diff --git a/src/google/protobuf/stubs/io_win32.cc b/src/google/protobuf/stubs/io_win32.cc index b5af494a66..ad2d2d265e 100644 --- a/src/google/protobuf/stubs/io_win32.cc +++ b/src/google/protobuf/stubs/io_win32.cc @@ -210,16 +210,20 @@ bool as_windows_path(const char* path, wstring* result) { result->clear(); return true; } - if (is_separator(path[0]) || is_drive_relative(path)) { - return false; - } - wstring wpath; if (!strings::utf8_to_wcs(path, &wpath)) { return false; } + if (has_longpath_prefix(wpath.c_str())) { + *result = wpath; + return true; + } + if (is_separator(path[0]) || is_drive_relative(path)) { + return false; + } - if (!is_path_absolute(wpath.c_str()) && !has_longpath_prefix(wpath.c_str())) { + + if (!is_path_absolute(wpath.c_str())) { int size = ::GetCurrentDirectoryW(0, NULL); if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { return false; @@ -396,6 +400,10 @@ bool utf8_to_wcs(const char* input, wstring* out) { return mbs_to_wcs(input, out, true); } +bool wcs_to_utf8(const wchar_t* input, string* out) { + return wcs_to_mbs(input, out, true); +} + } // namespace strings } // namespace win32 } // namespace internal diff --git a/src/google/protobuf/stubs/io_win32.h b/src/google/protobuf/stubs/io_win32.h index 60d0ceb675..9e17d25304 100644 --- a/src/google/protobuf/stubs/io_win32.h +++ b/src/google/protobuf/stubs/io_win32.h @@ -84,6 +84,9 @@ LIBPROTOBUF_EXPORT bool mbs_to_wcs( // Convert from UTF-8-encoded text to UTF-16. LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); +// Convert from UTF-16-encoded text to UTF-8. +LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); + } // namespace strings } // namespace win32 diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index e88b75540f..7a33187243 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -61,6 +61,27 @@ namespace internal { namespace win32 { namespace { +const char kUtf8Text[] = { + 'h', 'i', ' ', + // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F + 0xd0, 0x9f, + // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 + 0xd1, 0x80, + // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 + 0xd0, 0xb8, + // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 + 0xd0, 0xb2, + // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 + 0xd0, 0xb5, + // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 + 0xd1, 0x82, 0 +}; + +const wchar_t kUtf16Text[] = { + L'h', L'i', L' ', + L'\x41f', L'\x440', L'\x438', L'\x432', L'\x435', L'\x442', 0 +}; + using std::string; using std::wstring; @@ -73,6 +94,7 @@ class IoWin32Test : public ::testing::Test { bool CreateAllUnder(wstring path); bool DeleteAllUnder(wstring path); + WCHAR working_directory[MAX_PATH]; string test_tmpdir; wstring wtest_tmpdir; }; @@ -111,6 +133,7 @@ bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { void IoWin32Test::SetUp() { test_tmpdir.clear(); wtest_tmpdir.clear(); + EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, working_directory), 0); string tmp; bool ok = false; @@ -128,7 +151,14 @@ void IoWin32Test::SetUp() { } StripTrailingSlashes(&tmp); - test_tmpdir = tmp + "\\io_win32_unittest.tmp"; + std::stringstream result; + // Deleting files and directories is asynchronous on Windows, and if TearDown + // just deleted the previous temp directory, sometimes we cannot recreate the + // same directory. + // Use a counter so every test method gets its own temp directory. + static int counter = 0; + result << tmp << "\\io_win32_test" << counter++ << ".tmp"; + test_tmpdir = result.str(); wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str()); ASSERT_FALSE(wtest_tmpdir.empty()); ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir)); @@ -139,6 +169,7 @@ void IoWin32Test::TearDown() { if (!wtest_tmpdir.empty()) { DeleteAllUnder(wtest_tmpdir); } + ::SetCurrentDirectoryW(working_directory); } bool IoWin32Test::CreateAllUnder(wstring path) { @@ -294,13 +325,24 @@ TEST_F(IoWin32Test, MkdirTest) { ASSERT_EQ(errno, ENOENT); } +TEST_F(IoWin32Test, MkdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a non-ASCII path. + // Ensure that we can create the directory using SetCurrentDirectoryW. + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1").c_str(), NULL)); + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1\\" + kUtf16Text).c_str(), NULL)); + // Ensure that we can create a very similarly named directory using mkdir. + // We don't attemp to delete and recreate the same directory, because on + // Windows, deleting files and directories seems to be asynchronous. + EXPECT_EQ(mkdir((test_tmpdir + "\\2").c_str(), 0644), 0); + EXPECT_EQ(mkdir((test_tmpdir + "\\2\\" + kUtf8Text).c_str(), 0644), 0); +} + TEST_F(IoWin32Test, ChdirTest) { - WCHAR owd[MAX_PATH]; - EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, owd), 0); string path("C:\\"); EXPECT_EQ(access(path.c_str(), F_OK), 0); ASSERT_EQ(chdir(path.c_str()), 0); - EXPECT_TRUE(::SetCurrentDirectoryW(owd)); // Do not try to chdir into the test_tmpdir, it may already contain directory // names with trailing dots. @@ -315,6 +357,26 @@ TEST_F(IoWin32Test, ChdirTest) { ASSERT_NE(chdir(path.c_str()), 0); } +TEST_F(IoWin32Test, ChdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a directory with a non-ASCII path and ensure we can cd into it. + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + string nonAscii; + EXPECT_TRUE(strings::wcs_to_utf8(wNonAscii.c_str(), &nonAscii)); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), NULL)); + WCHAR cwd[MAX_PATH]; + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + // Ensure that we can cd into the path using SetCurrentDirectoryW. + EXPECT_TRUE(SetCurrentDirectoryW(wNonAscii.c_str())); + EXPECT_TRUE(SetCurrentDirectoryW(cwd)); + // Ensure that we can cd into the path using chdir. + ASSERT_EQ(chdir(nonAscii.c_str()), 0); + // Ensure that the GetCurrentDirectoryW returns the desired path. + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + ASSERT_EQ(wNonAscii, cwd); +} + TEST_F(IoWin32Test, AsWindowsPathTest) { DWORD size = GetCurrentDirectoryW(0, NULL); scoped_array cwd_str(new wchar_t[size]); @@ -346,31 +408,16 @@ TEST_F(IoWin32Test, AsWindowsPathTest) { // Though valid in cmd.exe, drive-relative paths are not supported. ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L""); ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); + ASSERT_EQ(testonly_utf8_to_winpath("\\\\?\\C:\\foo"), L"\\\\?\\C:\\foo"); } -TEST_F(IoWin32Test, Utf8ToUtf16Test) { - const char hi_utf8[] = { - 'h', 'i', ' ', - // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F - 0xd0, 0x9f, - // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 - 0xd1, 0x80, - // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 - 0xd0, 0xb8, - // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 - 0xd0, 0xb2, - // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 - 0xd0, 0xb5, - // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 - 0xd1, 0x82, 0 - }; - const wchar_t hi_utf16[] = { - L'h', L'i', L' ', 0x041f, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0 - }; - +TEST_F(IoWin32Test, Utf8Utf16ConversionTest) { + string mbs; wstring wcs; - ASSERT_TRUE(strings::utf8_to_wcs(hi_utf8, &wcs)); - ASSERT_EQ(wcs, hi_utf16); + ASSERT_TRUE(strings::utf8_to_wcs(kUtf8Text, &wcs)); + ASSERT_TRUE(strings::wcs_to_utf8(kUtf16Text, &mbs)); + ASSERT_EQ(wcs, kUtf16Text); + ASSERT_EQ(mbs, kUtf8Text); } } // namespace From 3f1b1a6da5f1a778ccad46b6f340e82ad4c99209 Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Fri, 1 Dec 2017 10:29:34 +0100 Subject: [PATCH 4/6] io_win32_unittest: use CWD as last tempdir If the test cannot find a temp directory by checking environment variables, it will fall back to using the current working directory as the temp directory root. This is what the test used to do as of commit https://github.com/google/protobuf/commit/6de51caed52d798815954646b230c5aef3e4d2fc and what was then changed by commit https://github.com/google/protobuf/pull/3978/commits/792d098769d8e000d8d474c8ffd201d2eabc2134 --- .../protobuf/stubs/io_win32_unittest.cc | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index 7a33187243..d277ad6e4b 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -128,6 +128,22 @@ bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { } } +bool GetCwdAsUtf8(string* result) { + DWORD size = ::GetCurrentDirectoryW(0, NULL); + if (size == 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + scoped_array wcs(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcs.get()); + // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_mbs(wcs.get(), result, true); + } else { + return false; + } +} + } // namespace void IoWin32Test::SetUp() { @@ -138,16 +154,23 @@ void IoWin32Test::SetUp() { string tmp; bool ok = false; if (!ok) { + // Bazel sets this environment variable when it runs tests. ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp); } if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. ok = GetEnvVarAsUtf8(L"TEMP", &tmp); } if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. ok = GetEnvVarAsUtf8(L"TMP", &tmp); } + if (!ok) { + // Fall back to using the current directory. + ok = GetCwdAsUtf8(&tmp); + } if (!ok || tmp.empty()) { - FAIL(); + FAIL() << "Cannot find a temp directory."; } StripTrailingSlashes(&tmp); @@ -156,8 +179,8 @@ void IoWin32Test::SetUp() { // just deleted the previous temp directory, sometimes we cannot recreate the // same directory. // Use a counter so every test method gets its own temp directory. - static int counter = 0; - result << tmp << "\\io_win32_test" << counter++ << ".tmp"; + static unsigned int counter = 0; + result << tmp << "\\w32tst" << counter++ << ".tmp"; test_tmpdir = result.str(); wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str()); ASSERT_FALSE(wtest_tmpdir.empty()); From eb3bd6ec297b4dff3734c51a159713e559f5c2cf Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Fri, 1 Dec 2017 11:09:34 +0100 Subject: [PATCH 5/6] io_win32_unittest: fix condition in GetCwdAsUtf8 --- src/google/protobuf/stubs/io_win32_unittest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index d277ad6e4b..710548a202 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -130,7 +130,7 @@ bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { bool GetCwdAsUtf8(string* result) { DWORD size = ::GetCurrentDirectoryW(0, NULL); - if (size == 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (size > 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { scoped_array wcs(new WCHAR[size]); ::GetCurrentDirectoryW(size, wcs.get()); // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which From a3a1c93fb4bb204fd42cb91ac1053f93bec4da7a Mon Sep 17 00:00:00 2001 From: Laszlo Csomor Date: Fri, 1 Dec 2017 11:31:12 +0100 Subject: [PATCH 6/6] io_win32_unittest: remove incorrect error check Unlike GetEnvironmentVariableW, GetCurrentDirectoryW doesn't set ERROR_INSUFFICIENT_BUFFER. --- src/google/protobuf/stubs/io_win32_unittest.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index 710548a202..b216aece34 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -122,7 +122,7 @@ bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { // to be no API function to do that conversion directly. // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need // to convert to UTF-8. - return strings::wcs_to_mbs(wcs.get(), result, true); + return strings::wcs_to_utf8(wcs.get(), result); } else { return false; } @@ -130,7 +130,7 @@ bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { bool GetCwdAsUtf8(string* result) { DWORD size = ::GetCurrentDirectoryW(0, NULL); - if (size > 0 && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (size > 0) { scoped_array wcs(new WCHAR[size]); ::GetCurrentDirectoryW(size, wcs.get()); // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which @@ -138,7 +138,7 @@ bool GetCwdAsUtf8(string* result) { // to be no API function to do that conversion directly. // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need // to convert to UTF-8. - return strings::wcs_to_mbs(wcs.get(), result, true); + return strings::wcs_to_utf8(wcs.get(), result); } else { return false; }