Add some new features to our quoted string parser, and a unit test
- Now supports \n \t \f escape sequences - Now supports strings with literal tabs
This commit is contained in:
@@ -201,6 +201,7 @@
|
||||
<ClCompile Include="..\..\..\test\scen_read.cpp" />
|
||||
<ClCompile Include="..\..\..\test\scen_write.cpp" />
|
||||
<ClCompile Include="..\..\..\test\spec_legacy.cpp" />
|
||||
<ClCompile Include="..\..\..\test\string_quote.cpp" />
|
||||
<ClCompile Include="..\..\..\test\tagfile.cpp" />
|
||||
<ClCompile Include="..\..\..\test\talk_legacy.cpp" />
|
||||
<ClCompile Include="..\..\..\test\talk_read.cpp" />
|
||||
|
@@ -77,6 +77,9 @@
|
||||
<ClCompile Include="..\..\..\test\spec_legacy.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\test\string_quote.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\test\tagfile.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@@ -59,6 +59,7 @@
|
||||
911A14031B8FAFC600900FD9 /* town_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6EC1B8FA91400346948 /* town_read.cpp */; };
|
||||
911A14041B8FB00300900FD9 /* talk_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6EE1B8FAA8E00346948 /* talk_read.cpp */; };
|
||||
911A14051B8FB00600900FD9 /* out_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6ED1B8FA9FB00346948 /* out_read.cpp */; };
|
||||
911DD995297C56F500205EBC /* string_quote.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 911DD994297C56F500205EBC /* string_quote.cpp */; };
|
||||
911F2D991B98F43B00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
|
||||
911F2D9A1B98F43C00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
|
||||
911F2D9B1B98F43C00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
|
||||
@@ -616,6 +617,7 @@
|
||||
910BBAB50FB91A26001E34EA /* field.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = field.cpp; sourceTree = "<group>"; };
|
||||
910BBAB80FB91ADB001E34EA /* message.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = message.hpp; sourceTree = "<group>"; };
|
||||
910BBAB90FB91ADB001E34EA /* message.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = message.cpp; sourceTree = "<group>"; };
|
||||
911DD994297C56F500205EBC /* string_quote.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = string_quote.cpp; sourceTree = "<group>"; };
|
||||
911F2D981B98F43B00E3102E /* libCommon.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libCommon.a; path = lib/libCommon.a; sourceTree = "<group>"; };
|
||||
911F2D9D1B98F44700E3102E /* libCommon-Party.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libCommon-Party.a"; path = "lib/libCommon-Party.a"; sourceTree = "<group>"; };
|
||||
911F2DA21B98FF2300E3102E /* cursors */ = {isa = PBXFileReference; lastKnownFileType = folder; path = cursors; sourceTree = "<group>"; };
|
||||
@@ -1540,6 +1542,7 @@
|
||||
9176FEC51D550EFE006EF694 /* town_legacy.cpp */,
|
||||
91C2A6EC1B8FA91400346948 /* town_read.cpp */,
|
||||
91E381451B97671E00F69B81 /* town_write.cpp */,
|
||||
911DD994297C56F500205EBC /* string_quote.cpp */,
|
||||
);
|
||||
name = src;
|
||||
sourceTree = "<group>";
|
||||
@@ -2131,6 +2134,7 @@
|
||||
91430438296C0088003A3967 /* vector2d.cpp in Sources */,
|
||||
91C763DD1B4EE7950086D879 /* map_write.cpp in Sources */,
|
||||
91EF27731B693D3900666469 /* ter_read.cpp in Sources */,
|
||||
911DD995297C56F500205EBC /* string_quote.cpp in Sources */,
|
||||
91EF27751B693D4800666469 /* ter_write.cpp in Sources */,
|
||||
91EF27771B693D5500666469 /* item_read.cpp in Sources */,
|
||||
91EF27791B693D5F00666469 /* item_write.cpp in Sources */,
|
||||
|
@@ -128,37 +128,49 @@ std::string read_maybe_quoted_string(std::istream& from) {
|
||||
from >> std::ws;
|
||||
if(from.peek() == '"' || from.peek() == '\'') {
|
||||
char delim = from.get();
|
||||
getline(from, result, delim);
|
||||
if(result.empty()) return result;
|
||||
while(result[result.length() - 1] == '\\') {
|
||||
result[result.length() - 1] = delim;
|
||||
bool reached_end = true;
|
||||
do {
|
||||
std::string nextPart;
|
||||
getline(from, nextPart, delim);
|
||||
if(!nextPart.empty() && nextPart.back() == '\\') {
|
||||
nextPart.back() = delim;
|
||||
reached_end = false;
|
||||
} else {
|
||||
reached_end = true;
|
||||
}
|
||||
// Collapse any double backslashes; remove any single backslashes
|
||||
for(std::string::iterator iter = nextPart.begin(); iter != nextPart.end(); iter++) {
|
||||
if(iter[0] == '\\' && iter + 1 != nextPart.end() && iter[1] != '\\') {
|
||||
if(iter[0] == '\\' && iter + 1 != nextPart.end()) {
|
||||
iter = nextPart.erase(iter);
|
||||
// After this, iter points to the second of the two backslashes, so
|
||||
// when incremented by the loop, it'll point to the character after the backslashes.
|
||||
// However! It might also be pointing at an n, t, or f, so substitute that if so.
|
||||
switch(*iter) {
|
||||
case 'n': *iter = '\n'; break;
|
||||
case 't': *iter = '\t'; break;
|
||||
case 'f': *iter = '\f'; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Note that this does not support escaping the single quotes in strings delimited by double quotes, and vice versa.
|
||||
result += nextPart;
|
||||
}
|
||||
} while(!reached_end);
|
||||
} else from >> result;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string maybe_quote_string(std::string which) {
|
||||
if(which.empty()) return "''";
|
||||
if(which.find_first_of(' ') != std::string::npos || which[0] == '"' || which[0] == '\'') {
|
||||
if(which.find_first_of(" \t\n\f") != std::string::npos || which[0] == '"' || which[0] == '\'') {
|
||||
// The string contains spaces or starts with a quote, so quote it.
|
||||
// We may have to escape quotes or backslashes.
|
||||
int apos = 0, quot = 0, bslash = 0;
|
||||
std::for_each(which.begin(), which.end(), [&apos,",&bslash](char c) {
|
||||
int apos = 0, quot = 0, bslash = 0, newline = 0, formfeed = 0;
|
||||
std::for_each(which.begin(), which.end(), [&apos,",&bslash,&newline,&formfeed](char c) {
|
||||
if(c == '\'') apos++;
|
||||
if(c == '"') quot++;
|
||||
if(c == '\\') bslash++;
|
||||
if(c == '\n') newline++;
|
||||
if(c == '\f') formfeed++;
|
||||
});
|
||||
char quote_c;
|
||||
// Surround it in whichever quote character appears fewer times.
|
||||
@@ -166,15 +178,20 @@ std::string maybe_quote_string(std::string which) {
|
||||
else quote_c = '\'';
|
||||
// Let's create this string to initially have the required size.
|
||||
std::string temp;
|
||||
size_t quoted_len = which.length() + std::min(quot,apos) + bslash + 2;
|
||||
size_t quoted_len = which.length() + std::min(quot,apos) + bslash + newline + formfeed + 2;
|
||||
temp.reserve(quoted_len);
|
||||
temp += quote_c;
|
||||
for(size_t i = 0; i < which.length(); i++) {
|
||||
if(which[i] == quote_c) {
|
||||
temp += '\\';
|
||||
temp += quote_c;
|
||||
} else if(which[i] == '\\')
|
||||
} else if(which[i] == '\\') {
|
||||
temp += R"(\\)";
|
||||
} else if(which[i] == '\n') {
|
||||
temp += R"(\n)";
|
||||
} else if(which[i] == '\f') {
|
||||
temp += R"(\f)";
|
||||
}
|
||||
else temp += which[i];
|
||||
}
|
||||
temp += quote_c;
|
||||
|
69
test/string_quote.cpp
Normal file
69
test/string_quote.cpp
Normal file
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// string_quote.cpp
|
||||
// boe_test
|
||||
//
|
||||
// Created by Celtic Minstrel on 2023-01-21.
|
||||
//
|
||||
|
||||
#include "catch.hpp"
|
||||
#include "fileio/fileio.hpp"
|
||||
|
||||
TEST_CASE("Quoting Strings") {
|
||||
CHECK(maybe_quote_string("") == std::string("''"));
|
||||
CHECK(maybe_quote_string(" ") == std::string("' '"));
|
||||
CHECK(maybe_quote_string("Don't!") == std::string("Don't!"));
|
||||
CHECK(maybe_quote_string("\"") == std::string("'\"'"));
|
||||
CHECK(maybe_quote_string("'") == std::string("\"'\""));
|
||||
CHECK(maybe_quote_string("-\"-") == std::string("-\"-"));
|
||||
CHECK(maybe_quote_string("-'-") == std::string("-'-"));
|
||||
CHECK(maybe_quote_string("Hello World") == std::string("'Hello World'"));
|
||||
CHECK(maybe_quote_string("It's great!") == std::string("\"It's great!\""));
|
||||
CHECK(maybe_quote_string("That is a \"silly\" idea.") == std::string("'That is a \"silly\" idea.'"));
|
||||
CHECK(maybe_quote_string("1\n2") == std::string("'1\\n2'"));
|
||||
CHECK(maybe_quote_string("1\t2") == std::string("'1\t2'"));
|
||||
CHECK(maybe_quote_string("1\f2") == std::string("'1\\f2'"));
|
||||
CHECK(maybe_quote_string("foo\"") == std::string("foo\""));
|
||||
CHECK(maybe_quote_string("foo'") == std::string("foo'"));
|
||||
CHECK(maybe_quote_string("That|is|great") == std::string("That|is|great"));
|
||||
CHECK(maybe_quote_string("==!==") == std::string("==!=="));
|
||||
CHECK(maybe_quote_string("Hello") == std::string("Hello"));
|
||||
CHECK(maybe_quote_string("123") == std::string("123"));
|
||||
CHECK(maybe_quote_string(".") == std::string("."));
|
||||
CHECK(maybe_quote_string("path\\to\\file") == std::string("path\\to\\file"));
|
||||
CHECK(maybe_quote_string("'path\\to\\file'") == std::string("\"'path\\\\to\\\\file'\""));
|
||||
CHECK(maybe_quote_string("Can't stumble with \"quotes\" of both types!") == std::string("'Can\\'t stumble with \"quotes\" of both types!'"));
|
||||
CHECK(maybe_quote_string("This is a \"complicated\" string\nwith 'many' different things to \\escape\\ in it! Shouldn't be too hard...?") == std::string("\"This is a \\\"complicated\\\" string\\nwith 'many' different things to \\\\escape\\\\ in it! Shouldn't be too hard...?\""));
|
||||
}
|
||||
|
||||
static std::string unquote_string(std::string str) {
|
||||
std::istringstream is(str);
|
||||
return read_maybe_quoted_string(is);
|
||||
}
|
||||
|
||||
TEST_CASE("Unquoting Strings") {
|
||||
CHECK(unquote_string("''") == std::string(""));
|
||||
CHECK(unquote_string("' '" ) == std::string(" "));
|
||||
CHECK(unquote_string("Don't!") == std::string("Don't!"));
|
||||
CHECK(unquote_string("'\"'") == std::string("\""));
|
||||
CHECK(unquote_string("\"'\"" ) == std::string("'"));
|
||||
CHECK(unquote_string("-\"-") == std::string("-\"-"));
|
||||
CHECK(unquote_string("-'-") == std::string("-'-"));
|
||||
CHECK(unquote_string("'Hello World'") == std::string("Hello World"));
|
||||
CHECK(unquote_string("\"It's great!\"") == std::string("It's great!"));
|
||||
CHECK(unquote_string("'That is a \"silly\" idea.'") == std::string("That is a \"silly\" idea."));
|
||||
CHECK(unquote_string("'1\\n2'") == std::string("1\n2"));
|
||||
CHECK(unquote_string("'1\t2'") == std::string("1\t2"));
|
||||
CHECK(unquote_string("'1\\t2'") == std::string("1\t2"));
|
||||
CHECK(unquote_string("'1\\f2'") == std::string("1\f2"));
|
||||
CHECK(unquote_string("foo\"") == std::string("foo\""));
|
||||
CHECK(unquote_string("foo'") == std::string("foo'"));
|
||||
CHECK(unquote_string("That|is|great") == std::string("That|is|great"));
|
||||
CHECK(unquote_string("==!==") == std::string("==!=="));
|
||||
CHECK(unquote_string("Hello") == std::string("Hello"));
|
||||
CHECK(unquote_string("123") == std::string("123"));
|
||||
CHECK(unquote_string(".") == std::string("."));
|
||||
CHECK(unquote_string("path\\to\\file") == std::string("path\\to\\file"));
|
||||
CHECK(unquote_string("\"'path\\\\to\\\\file'\"") == std::string("'path\\to\\file'"));
|
||||
CHECK(unquote_string("'Can\\'t stumble with \"quotes\" of both types!'") == std::string("Can't stumble with \"quotes\" of both types!"));
|
||||
CHECK(unquote_string("\"This is a \\\"complicated\\\" string\\nwith 'many' different things to \\\\escape\\\\ in it! Shouldn't be too hard...?\"") == std::string("This is a \"complicated\" string\nwith 'many' different things to \\escape\\ in it! Shouldn't be too hard...?"));
|
||||
}
|
Reference in New Issue
Block a user