Add some new features to our quoted string parser, and a unit test

- Now supports \n \t \f escape sequences
- Now supports strings with literal tabs
This commit is contained in:
2023-01-21 14:45:55 -05:00
parent 105a7efa96
commit 1117b27f5d
5 changed files with 105 additions and 11 deletions

View File

@@ -201,6 +201,7 @@
<ClCompile Include="..\..\..\test\scen_read.cpp" />
<ClCompile Include="..\..\..\test\scen_write.cpp" />
<ClCompile Include="..\..\..\test\spec_legacy.cpp" />
<ClCompile Include="..\..\..\test\string_quote.cpp" />
<ClCompile Include="..\..\..\test\tagfile.cpp" />
<ClCompile Include="..\..\..\test\talk_legacy.cpp" />
<ClCompile Include="..\..\..\test\talk_read.cpp" />

View File

@@ -77,6 +77,9 @@
<ClCompile Include="..\..\..\test\spec_legacy.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\test\string_quote.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\test\tagfile.cpp">
<Filter>Source Files</Filter>
</ClCompile>

View File

@@ -59,6 +59,7 @@
911A14031B8FAFC600900FD9 /* town_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6EC1B8FA91400346948 /* town_read.cpp */; };
911A14041B8FB00300900FD9 /* talk_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6EE1B8FAA8E00346948 /* talk_read.cpp */; };
911A14051B8FB00600900FD9 /* out_read.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 91C2A6ED1B8FA9FB00346948 /* out_read.cpp */; };
911DD995297C56F500205EBC /* string_quote.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 911DD994297C56F500205EBC /* string_quote.cpp */; };
911F2D991B98F43B00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
911F2D9A1B98F43C00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
911F2D9B1B98F43C00E3102E /* libCommon.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 911F2D981B98F43B00E3102E /* libCommon.a */; };
@@ -616,6 +617,7 @@
910BBAB50FB91A26001E34EA /* field.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = field.cpp; sourceTree = "<group>"; };
910BBAB80FB91ADB001E34EA /* message.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = message.hpp; sourceTree = "<group>"; };
910BBAB90FB91ADB001E34EA /* message.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = message.cpp; sourceTree = "<group>"; };
911DD994297C56F500205EBC /* string_quote.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = string_quote.cpp; sourceTree = "<group>"; };
911F2D981B98F43B00E3102E /* libCommon.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libCommon.a; path = lib/libCommon.a; sourceTree = "<group>"; };
911F2D9D1B98F44700E3102E /* libCommon-Party.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libCommon-Party.a"; path = "lib/libCommon-Party.a"; sourceTree = "<group>"; };
911F2DA21B98FF2300E3102E /* cursors */ = {isa = PBXFileReference; lastKnownFileType = folder; path = cursors; sourceTree = "<group>"; };
@@ -1540,6 +1542,7 @@
9176FEC51D550EFE006EF694 /* town_legacy.cpp */,
91C2A6EC1B8FA91400346948 /* town_read.cpp */,
91E381451B97671E00F69B81 /* town_write.cpp */,
911DD994297C56F500205EBC /* string_quote.cpp */,
);
name = src;
sourceTree = "<group>";
@@ -2131,6 +2134,7 @@
91430438296C0088003A3967 /* vector2d.cpp in Sources */,
91C763DD1B4EE7950086D879 /* map_write.cpp in Sources */,
91EF27731B693D3900666469 /* ter_read.cpp in Sources */,
911DD995297C56F500205EBC /* string_quote.cpp in Sources */,
91EF27751B693D4800666469 /* ter_write.cpp in Sources */,
91EF27771B693D5500666469 /* item_read.cpp in Sources */,
91EF27791B693D5F00666469 /* item_write.cpp in Sources */,

View File

@@ -128,37 +128,49 @@ std::string read_maybe_quoted_string(std::istream& from) {
from >> std::ws;
if(from.peek() == '"' || from.peek() == '\'') {
char delim = from.get();
getline(from, result, delim);
if(result.empty()) return result;
while(result[result.length() - 1] == '\\') {
result[result.length() - 1] = delim;
bool reached_end = true;
do {
std::string nextPart;
getline(from, nextPart, delim);
if(!nextPart.empty() && nextPart.back() == '\\') {
nextPart.back() = delim;
reached_end = false;
} else {
reached_end = true;
}
// Collapse any double backslashes; remove any single backslashes
for(std::string::iterator iter = nextPart.begin(); iter != nextPart.end(); iter++) {
if(iter[0] == '\\' && iter + 1 != nextPart.end() && iter[1] != '\\') {
if(iter[0] == '\\' && iter + 1 != nextPart.end()) {
iter = nextPart.erase(iter);
// After this, iter points to the second of the two backslashes, so
// when incremented by the loop, it'll point to the character after the backslashes.
// However! It might also be pointing at an n, t, or f, so substitute that if so.
switch(*iter) {
case 'n': *iter = '\n'; break;
case 't': *iter = '\t'; break;
case 'f': *iter = '\f'; break;
}
}
}
// Note that this does not support escaping the single quotes in strings delimited by double quotes, and vice versa.
result += nextPart;
}
} while(!reached_end);
} else from >> result;
return result;
}
std::string maybe_quote_string(std::string which) {
if(which.empty()) return "''";
if(which.find_first_of(' ') != std::string::npos || which[0] == '"' || which[0] == '\'') {
if(which.find_first_of(" \t\n\f") != std::string::npos || which[0] == '"' || which[0] == '\'') {
// The string contains spaces or starts with a quote, so quote it.
// We may have to escape quotes or backslashes.
int apos = 0, quot = 0, bslash = 0;
std::for_each(which.begin(), which.end(), [&apos,&quot,&bslash](char c) {
int apos = 0, quot = 0, bslash = 0, newline = 0, formfeed = 0;
std::for_each(which.begin(), which.end(), [&apos,&quot,&bslash,&newline,&formfeed](char c) {
if(c == '\'') apos++;
if(c == '"') quot++;
if(c == '\\') bslash++;
if(c == '\n') newline++;
if(c == '\f') formfeed++;
});
char quote_c;
// Surround it in whichever quote character appears fewer times.
@@ -166,15 +178,20 @@ std::string maybe_quote_string(std::string which) {
else quote_c = '\'';
// Let's create this string to initially have the required size.
std::string temp;
size_t quoted_len = which.length() + std::min(quot,apos) + bslash + 2;
size_t quoted_len = which.length() + std::min(quot,apos) + bslash + newline + formfeed + 2;
temp.reserve(quoted_len);
temp += quote_c;
for(size_t i = 0; i < which.length(); i++) {
if(which[i] == quote_c) {
temp += '\\';
temp += quote_c;
} else if(which[i] == '\\')
} else if(which[i] == '\\') {
temp += R"(\\)";
} else if(which[i] == '\n') {
temp += R"(\n)";
} else if(which[i] == '\f') {
temp += R"(\f)";
}
else temp += which[i];
}
temp += quote_c;

69
test/string_quote.cpp Normal file
View File

@@ -0,0 +1,69 @@
//
// string_quote.cpp
// boe_test
//
// Created by Celtic Minstrel on 2023-01-21.
//
#include "catch.hpp"
#include "fileio/fileio.hpp"
TEST_CASE("Quoting Strings") {
CHECK(maybe_quote_string("") == std::string("''"));
CHECK(maybe_quote_string(" ") == std::string("' '"));
CHECK(maybe_quote_string("Don't!") == std::string("Don't!"));
CHECK(maybe_quote_string("\"") == std::string("'\"'"));
CHECK(maybe_quote_string("'") == std::string("\"'\""));
CHECK(maybe_quote_string("-\"-") == std::string("-\"-"));
CHECK(maybe_quote_string("-'-") == std::string("-'-"));
CHECK(maybe_quote_string("Hello World") == std::string("'Hello World'"));
CHECK(maybe_quote_string("It's great!") == std::string("\"It's great!\""));
CHECK(maybe_quote_string("That is a \"silly\" idea.") == std::string("'That is a \"silly\" idea.'"));
CHECK(maybe_quote_string("1\n2") == std::string("'1\\n2'"));
CHECK(maybe_quote_string("1\t2") == std::string("'1\t2'"));
CHECK(maybe_quote_string("1\f2") == std::string("'1\\f2'"));
CHECK(maybe_quote_string("foo\"") == std::string("foo\""));
CHECK(maybe_quote_string("foo'") == std::string("foo'"));
CHECK(maybe_quote_string("That|is|great") == std::string("That|is|great"));
CHECK(maybe_quote_string("==!==") == std::string("==!=="));
CHECK(maybe_quote_string("Hello") == std::string("Hello"));
CHECK(maybe_quote_string("123") == std::string("123"));
CHECK(maybe_quote_string(".") == std::string("."));
CHECK(maybe_quote_string("path\\to\\file") == std::string("path\\to\\file"));
CHECK(maybe_quote_string("'path\\to\\file'") == std::string("\"'path\\\\to\\\\file'\""));
CHECK(maybe_quote_string("Can't stumble with \"quotes\" of both types!") == std::string("'Can\\'t stumble with \"quotes\" of both types!'"));
CHECK(maybe_quote_string("This is a \"complicated\" string\nwith 'many' different things to \\escape\\ in it! Shouldn't be too hard...?") == std::string("\"This is a \\\"complicated\\\" string\\nwith 'many' different things to \\\\escape\\\\ in it! Shouldn't be too hard...?\""));
}
static std::string unquote_string(std::string str) {
std::istringstream is(str);
return read_maybe_quoted_string(is);
}
TEST_CASE("Unquoting Strings") {
CHECK(unquote_string("''") == std::string(""));
CHECK(unquote_string("' '" ) == std::string(" "));
CHECK(unquote_string("Don't!") == std::string("Don't!"));
CHECK(unquote_string("'\"'") == std::string("\""));
CHECK(unquote_string("\"'\"" ) == std::string("'"));
CHECK(unquote_string("-\"-") == std::string("-\"-"));
CHECK(unquote_string("-'-") == std::string("-'-"));
CHECK(unquote_string("'Hello World'") == std::string("Hello World"));
CHECK(unquote_string("\"It's great!\"") == std::string("It's great!"));
CHECK(unquote_string("'That is a \"silly\" idea.'") == std::string("That is a \"silly\" idea."));
CHECK(unquote_string("'1\\n2'") == std::string("1\n2"));
CHECK(unquote_string("'1\t2'") == std::string("1\t2"));
CHECK(unquote_string("'1\\t2'") == std::string("1\t2"));
CHECK(unquote_string("'1\\f2'") == std::string("1\f2"));
CHECK(unquote_string("foo\"") == std::string("foo\""));
CHECK(unquote_string("foo'") == std::string("foo'"));
CHECK(unquote_string("That|is|great") == std::string("That|is|great"));
CHECK(unquote_string("==!==") == std::string("==!=="));
CHECK(unquote_string("Hello") == std::string("Hello"));
CHECK(unquote_string("123") == std::string("123"));
CHECK(unquote_string(".") == std::string("."));
CHECK(unquote_string("path\\to\\file") == std::string("path\\to\\file"));
CHECK(unquote_string("\"'path\\\\to\\\\file'\"") == std::string("'path\\to\\file'"));
CHECK(unquote_string("'Can\\'t stumble with \"quotes\" of both types!'") == std::string("Can't stumble with \"quotes\" of both types!"));
CHECK(unquote_string("\"This is a \\\"complicated\\\" string\\nwith 'many' different things to \\\\escape\\\\ in it! Shouldn't be too hard...?\"") == std::string("This is a \"complicated\" string\nwith 'many' different things to \\escape\\ in it! Shouldn't be too hard...?"));
}