Skip to content

Commit 4634fb9

Browse files
committed
add more error handling, address comments
1 parent a991261 commit 4634fb9

File tree

1 file changed

+68
-42
lines changed

1 file changed

+68
-42
lines changed

clang/lib/Lex/LiteralSupport.cpp

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,7 +1766,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
17661766
HadError = false;
17671767

17681768
Kind = kind;
1769-
LiteralConverter *LiteralConv = &PP.getLiteralConverter();
1769+
LiteralConverter LiteralConv = PP.getLiteralConverter();
17701770

17711771
const char *TokBegin = begin;
17721772

@@ -1834,8 +1834,8 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
18341834
}
18351835

18361836
llvm::TextEncodingConverter *Converter = nullptr;
1837-
if (!isUTFLiteral(Kind) && !isWideLiteral(Kind) && LiteralConv)
1838-
Converter = LiteralConv->getConverter(CA_ToExecEncoding);
1837+
if (!isUTFLiteral(Kind) && !isWideLiteral(Kind))
1838+
Converter = LiteralConv.getConverter(CA_ToExecEncoding);
18391839

18401840
while (begin != end) {
18411841
// Is this a span of non-escape characters?
@@ -1902,21 +1902,29 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
19021902
PP.Diag(Loc, diag::err_character_too_large);
19031903
}
19041904
} else {
1905-
char Cp[8];
1905+
char Cp[5];
19061906
char *ResultPtr = Cp;
1907-
unsigned CharByteWidth = 1;
19081907
EncodeUCNEscape(TokBegin, begin, end, ResultPtr, HadError,
19091908
FullSourceLoc(Loc, PP.getSourceManager()),
1910-
CharByteWidth, &PP.getDiagnostics(), PP.getLangOpts());
1909+
/*CharByteWidth=*/1u, &PP.getDiagnostics(),
1910+
PP.getLangOpts());
1911+
assert(ResultPtr - Cp <= 4 &&
1912+
"unexpected result size for UCN escape character");
19111913
if (!HadError) {
19121914
SmallString<8> CpConv;
1913-
Converter->convert(StringRef(Cp), CpConv);
1914-
if (CpConv.size() > 1) {
1915+
StringRef ToConvert(Cp, ResultPtr - Cp);
1916+
std::error_code EC = Converter->convert(StringRef(Cp), CpConv);
1917+
if (EC) {
1918+
PP.Diag(Loc, diag::err_exec_charset_conversion_failed)
1919+
<< EC.message();
19151920
HadError = true;
1916-
PP.Diag(Loc, diag::err_character_too_large);
19171921
} else {
1918-
memcpy(Cp, CpConv.data(), CpConv.size());
1919-
*buffer_begin = *Cp;
1922+
if (CpConv.size() > 1) {
1923+
HadError = true;
1924+
PP.Diag(Loc, diag::err_character_too_large);
1925+
} else {
1926+
*buffer_begin = CpConv[0];
1927+
}
19201928
}
19211929
}
19221930
}
@@ -2048,6 +2056,42 @@ StringLiteralParser::StringLiteralParser(ArrayRef<Token> StringToks,
20482056
init(StringToks, Action);
20492057
}
20502058

2059+
static char *convertCharactersInPlace(char *ResultPtr, char *ResultPtrBefore,
2060+
const unsigned CharByteWidth,
2061+
bool &hadError,
2062+
llvm::TextEncodingConverter &Converter) {
2063+
assert(!hadError && "Unexpected call to convertCharactersInPlace");
2064+
2065+
SmallString<256> CpConv;
2066+
int ResultLength = ResultPtr - ResultPtrBefore;
2067+
assert(ResultLength % CharByteWidth == 0 &&
2068+
"Unexpected span of bytes for the characters.");
2069+
char *Cp = ResultPtrBefore;
2070+
if (Converter.convert(StringRef(Cp, ResultLength / CharByteWidth), CpConv)) {
2071+
hadError = true;
2072+
return ResultPtr;
2073+
}
2074+
if (CharByteWidth == 1) {
2075+
memcpy(Cp, CpConv.data(), CpConv.size());
2076+
return Cp + CpConv.size();
2077+
}
2078+
std::string UTF8String;
2079+
if (CharByteWidth == 4)
2080+
convertUTF32ToUTF8String(ArrayRef<char>(Cp, ResultLength), UTF8String);
2081+
else if (CharByteWidth == 2)
2082+
convertUTF16ToUTF8String(ArrayRef<char>(Cp, ResultLength), UTF8String);
2083+
if (Converter.convert(UTF8String, CpConv)) {
2084+
hadError = true;
2085+
return ResultPtr;
2086+
}
2087+
int NewCharByteWidth = ((int)CpConv.size()) / (ResultLength / CharByteWidth);
2088+
unsigned EndianOffset = llvm::sys::IsBigEndianHost ? CharByteWidth - 1 : 0;
2089+
for (int i = 0; i < (int)CpConv.size(); i += NewCharByteWidth)
2090+
memcpy(Cp + EndianOffset + i * CharByteWidth, CpConv.data() + i,
2091+
NewCharByteWidth);
2092+
return Cp + CpConv.size() * CharByteWidth;
2093+
}
2094+
20512095
void StringLiteralParser::init(ArrayRef<Token> StringToks,
20522096
ConversionAction Action) {
20532097
// The literal token may have come from an invalid source location (e.g. due
@@ -2254,28 +2298,19 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks,
22542298
StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
22552299
StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
22562300

2301+
char *ResultPtrBefore = ResultPtr;
22572302
// Copy everything before the \r\n sequence into the string literal.
22582303
if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
22592304
hadError = true;
22602305

22612306
if (!hadError && Converter) {
22622307
assert(Kind != tok::wide_string_literal &&
22632308
"Wide character translation not supported");
2264-
SmallString<256> CpConv;
2265-
int ResultLength = BeforeCRLF.size() * CharByteWidth;
2266-
char *Cp = ResultPtr - ResultLength;
2267-
std::error_code EC =
2268-
Converter->convert(StringRef(Cp, ResultLength), CpConv);
2269-
if (EC) {
2270-
if (Diags)
2271-
Diags->Report(StringToks[i].getLocation(),
2272-
diag::err_exec_charset_conversion_failed)
2273-
<< EC.message();
2274-
hadError = true;
2275-
} else {
2276-
memcpy(Cp, CpConv.data(), ResultLength);
2277-
ResultPtr = Cp + CpConv.size();
2278-
}
2309+
ResultPtr = convertCharactersInPlace(
2310+
ResultPtr, ResultPtrBefore, CharByteWidth, hadError, *Converter);
2311+
if (hadError && Diags)
2312+
Diags->Report(StringToks[i].getLocation(),
2313+
diag::err_exec_charset_conversion_failed);
22792314
}
22802315
// Point into the \n inside the \r\n sequence and operate on the
22812316
// remaining portion of the literal.
@@ -2311,7 +2346,7 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks,
23112346
++ThisTokBuf;
23122347
} while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
23132348

2314-
int Length = ThisTokBuf - InStart;
2349+
char *ResultPtrBefore = ResultPtr;
23152350
// Copy the character span over.
23162351
if (CopyStringFragment(StringToks[i], ThisTokBegin,
23172352
StringRef(InStart, ThisTokBuf - InStart)))
@@ -2320,21 +2355,12 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks,
23202355
if (!hadError && Converter) {
23212356
assert(Kind != tok::wide_string_literal &&
23222357
"Wide character translation not supported");
2323-
SmallString<256> CpConv;
2324-
int ResultLength = Length * CharByteWidth;
2325-
char *Cp = ResultPtr - ResultLength;
2326-
std::error_code EC =
2327-
Converter->convert(StringRef(Cp, ResultLength), CpConv);
2328-
if (EC) {
2329-
if (Diags)
2330-
Diags->Report(StringToks[i].getLocation(),
2331-
diag::err_exec_charset_conversion_failed)
2332-
<< EC.message();
2333-
hadError = true;
2334-
} else {
2335-
memcpy(Cp, CpConv.data(), ResultLength);
2336-
ResultPtr = Cp + CpConv.size();
2337-
}
2358+
ResultPtr =
2359+
convertCharactersInPlace(ResultPtr, ResultPtrBefore,
2360+
CharByteWidth, hadError, *Converter);
2361+
if (hadError && Diags)
2362+
Diags->Report(StringToks[i].getLocation(),
2363+
diag::err_exec_charset_conversion_failed);
23382364
}
23392365
continue;
23402366
}

0 commit comments

Comments
 (0)