Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Unicode paths on Windows #5514

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ full changeset diff at the end of each section.
Current Trunk
-------------

- Add support for Unicode paths on Windows (#4995)

v112
----

Expand Down
1 change: 1 addition & 0 deletions src/support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(support_SOURCES
file.cpp
istring.cpp
path.cpp
pchar.cpp
safe_integer.cpp
threads.cpp
utilities.cpp
Expand Down
28 changes: 27 additions & 1 deletion src/support/command-line.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,33 @@ Options& Options::add_positional(const std::string& name,
return *this;
}

void Options::parse(int argc, const char* argv[]) {
// This function converts the platform-specific pchar arrays to char arrays, on
// windows by converting from UTF-16 to UTF-8, then calls parse2.
//
// Further processing is the performed on plain chars and strings.
//
// For arguments that represent paths, the reverse UTF-8 to UTF-16 encoding will
// be performed (on windows) by the fspath constructor.
//
// On non-windows this is just copying bytes around without conversion.
void Options::parse(int argc, const pchar* argv[]) {
std::vector<std::vector<char>> utf8_argv;
std::vector<const char*> utf8_argv_ptrs;

for (int i = 0; i != argc; ++i) {
pstring arg = pstring(argv[i]);
std::string utf8_arg = pstring_to_string(arg);
std::vector<char> utf8_arg_vec(utf8_arg.begin(), utf8_arg.end());
utf8_arg_vec.push_back('\0');
auto ptr = utf8_arg_vec.data();
utf8_argv.push_back(std::move(utf8_arg_vec));
utf8_argv_ptrs.push_back(ptr);
}

Options::parse2(argc, utf8_argv_ptrs.data());
}

void Options::parse2(int argc, const char* argv[]) {
assert(argc > 0 && "expect at least program name as an argument");
size_t positionalsSeen = 0;
auto dashes = [](const std::string& s) {
Expand Down
5 changes: 4 additions & 1 deletion src/support/command-line.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <utility>
#include <vector>

#include "pchar.h"
#include "wasm.h"

namespace wasm {
Expand Down Expand Up @@ -63,9 +64,11 @@ class Options {
Options& add_positional(const std::string& name,
Arguments arguments,
const Action& action);
void parse(int argc, const char* argv[]);
void parse(int argc, const pchar* argv[]);

private:
void parse2(int argc, const char* argv[]);

struct Option {
std::string longName;
std::string shortName;
Expand Down
47 changes: 25 additions & 22 deletions src/support/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,26 @@ template<> std::string do_read_stdin<std::string>::operator()() {
}

template<typename T>
T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) {
if (filename == "-") {
T wasm::read_file(const wasm::fspath& filename, Flags::BinaryOption binary) {
if (filename.stdpath() == "-") {
return do_read_stdin<T>{}();
}
BYN_TRACE("Loading '" << filename << "'...\n");
BYN_TRACE("Loading '" << filename.stdpath() << "'...\n");
std::ifstream infile;
std::ios_base::openmode flags = std::ifstream::in;
if (binary == Flags::Binary) {
flags |= std::ifstream::binary;
}
infile.open(filename, flags);
infile.open(filename.stdpath(), flags);
if (!infile.is_open()) {
Fatal() << "Failed opening '" << filename << "'";
Fatal() << "Failed opening '" << filename.stdpath() << "'";
}
infile.seekg(0, std::ios::end);
std::streampos insize = infile.tellg();
if (uint64_t(insize) >= std::numeric_limits<size_t>::max()) {
// Building a 32-bit executable where size_t == 32 bits, we are not able to
// create strings larger than 2^32 bytes in length, so must abort here.
Fatal() << "Failed opening '" << filename
Fatal() << "Failed opening '" << filename.stdpath()
<< "': Input file too large: " << insize
<< " bytes. Try rebuilding in 64-bit mode.";
}
Expand All @@ -87,47 +87,50 @@ T wasm::read_file(const std::string& filename, Flags::BinaryOption binary) {
return input;
}

std::string wasm::read_possible_response_file(const std::string& input) {
if (input.size() == 0 || input[0] != '@') {
return input;
std::string wasm::read_possible_response_file(const wasm::fspath& input) {
auto input_str = input.stdpath().native();
if (input_str.size() == 0 || input_str[0] != '@') {
return wasm::pstring_to_string(input.stdpath().native());
}
return wasm::read_file<std::string>(input.substr(1), Flags::Text);
auto input_substr = input_str.substr(1);
auto real_path = wasm::fspath::from_pstring(input_substr);
return wasm::read_file<std::string>(real_path, Flags::Text);
}

// Explicit instantiations for the explicit specializations.
template std::string wasm::read_file<>(const std::string&, Flags::BinaryOption);
template std::vector<char> wasm::read_file<>(const std::string&,
template std::string wasm::read_file<>(const wasm::fspath&, Flags::BinaryOption);
template std::vector<char> wasm::read_file<>(const wasm::fspath&,
Flags::BinaryOption);

wasm::Output::Output(const std::string& filename, Flags::BinaryOption binary)
wasm::Output::Output(const wasm::fspath& filename, Flags::BinaryOption binary)
: outfile(), out([this, filename, binary]() {
// Ensure a single return at the very end, to avoid clang-tidy warnings
// about the types of different returns here.
std::streambuf* buffer;
if (filename == "-" || filename.empty()) {
if (filename.stdpath() == "-" || filename.stdpath().empty()) {
buffer = std::cout.rdbuf();
} else {
BYN_TRACE("Opening '" << filename << "'\n");
BYN_TRACE("Opening '" << filename.stdpath() << "'\n");
auto flags = std::ofstream::out | std::ofstream::trunc;
if (binary == Flags::Binary) {
flags |= std::ofstream::binary;
}
outfile.open(filename, flags);
outfile.open(filename.stdpath(), flags);
if (!outfile.is_open()) {
Fatal() << "Failed opening '" << filename << "'";
Fatal() << "Failed opening '" << filename.stdpath() << "'";
}
buffer = outfile.rdbuf();
}
return buffer;
}()) {}

void wasm::copy_file(std::string input, std::string output) {
std::ifstream src(input, std::ios::binary);
std::ofstream dst(output, std::ios::binary);
void wasm::copy_file(wasm::fspath input, wasm::fspath output) {
std::ifstream src(input.stdpath(), std::ios::binary);
std::ofstream dst(output.stdpath(), std::ios::binary);
dst << src.rdbuf();
}

size_t wasm::file_size(std::string filename) {
std::ifstream infile(filename, std::ifstream::ate | std::ifstream::binary);
size_t wasm::file_size(wasm::fspath filename) {
std::ifstream infile(filename.stdpath(), std::ifstream::ate | std::ifstream::binary);
return infile.tellg();
}
16 changes: 9 additions & 7 deletions src/support/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <utility>
#include <vector>

#include "pchar.h"

namespace wasm {

namespace Flags {
Expand All @@ -35,23 +37,23 @@ enum BinaryOption { Binary, Text };
std::vector<char> read_stdin();

template<typename T>
T read_file(const std::string& filename, Flags::BinaryOption binary);
T read_file(const wasm::fspath& filename, Flags::BinaryOption binary);

// Declare the valid explicit specializations.
extern template std::string read_file<>(const std::string&,
extern template std::string read_file<>(const wasm::fspath&,
Flags::BinaryOption);
extern template std::vector<char> read_file<>(const std::string&,
extern template std::vector<char> read_file<>(const wasm::fspath&,
Flags::BinaryOption);

// Given a string which may be a response file (i.e., a filename starting
// with "@"), if it is a response file read it and return that, or if it
// is not a response file, return it as is.
std::string read_possible_response_file(const std::string&);
std::string read_possible_response_file(const wasm::fspath&);

class Output {
public:
// An empty filename or "-" will open stdout instead.
Output(const std::string& filename, Flags::BinaryOption binary);
Output(const wasm::fspath& filename, Flags::BinaryOption binary);
~Output() = default;
template<typename T> std::ostream& operator<<(const T& v) { return out << v; }

Expand All @@ -70,10 +72,10 @@ class Output {
};

// Copies a file to another file
void copy_file(std::string input, std::string output);
void copy_file(wasm::fspath input, wasm::fspath output);

// Retusn the size of a file
size_t file_size(std::string filename);
size_t file_size(wasm::fspath filename);

} // namespace wasm

Expand Down
36 changes: 36 additions & 0 deletions src/support/main.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright 2016 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

//
// Cross-platform definition of main.
//
// Users will write main like:
//
// int BYN_MAIN(int argc, const pchar* argv[]) { ... }
//

#ifndef wasm_support_main_h
#define wasm_support_main_h

#include "support/pchar.h"

#ifdef _WIN32
#define BYN_MAIN wmain
#else
#define BYN_MAIN main
#endif

#endif // wasm_support_main_h
94 changes: 94 additions & 0 deletions src/support/pchar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright 2015 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "pchar.h"

namespace wasm {

#ifdef _WIN32

#include "windows.h"

// The conversion functions here will always succeed, with invalid chars
// converted to replacement chars. If there are bugs here they should manifest
// in file-not-found errors and not something worse.

wasm::pstring string_to_pstring(const std::string& s) {
auto inptr = s.data();
auto inlen = s.size();
auto outlen = MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, NULL, 0);
auto outstr = wasm::pstring(outlen, 0);
auto outptr = outstr.data();
MultiByteToWideChar(CP_UTF8, 0, inptr, inlen, outptr, outlen);
return outstr;
}

std::string pstring_to_string(const wasm::pstring& s) {
auto inptr = s.data();
auto inlen = s.size();
auto outlen = WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, NULL, 0, NULL, NULL);
auto outstr = std::string(outlen, 0);
auto outptr = outstr.data();
WideCharToMultiByte(CP_UTF8, 0, inptr, inlen, outptr, outlen, NULL, NULL);
return outstr;
}

#else

wasm::pstring string_to_pstring(const std::string& s) {
return wasm::pstring(s);
}

std::string pstring_to_string(const wasm::pstring& s) {
return std::string(s);
}

#endif

std::filesystem::path string_to_path(const std::string& s) {
auto pstring = wasm::string_to_pstring(s);
return std::filesystem::path(pstring);
}

fspath::fspath(const std::string& path) {
inner_path = string_to_path(path);
}

fspath::fspath(const char path[]) {
inner_path = string_to_path(std::string(path));
}

fspath::fspath(const wasm::fspath& path) {
inner_path = path.inner_path;
}

fspath::fspath(const std::filesystem::path& path) {
inner_path = path;
}

wasm::fspath fspath::from_pstring(const wasm::pstring& path) {
return fspath(std::filesystem::path(path));
}

wasm::fspath fspath::operator=(const wasm::fspath& path) const {
return wasm::fspath(path);
}

const std::filesystem::path& fspath::stdpath() const {
return inner_path;
}

} // namespace wasm
Loading