Skip to content

Commit 0fc123c

Browse files
committed
MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
Item_func_group_concat::print() did not take into account that Item_func_group_concat::separator can be of a different character set than the "String *str" (when the printing is being done to). Therefore, printing did not work correctly for: - non-ASCII separators when GROUP_CONCAT is done on 8bit data or multi-byte data with mbminlen==1. - all separators (even including simple ones like comma) when GROUP_CONCAT is done on ucs2/utf16/utf32 data (mbminlen>1). Because of this problem, VIEW definitions did not print correctly to their FRM files. This later led to a wrong SELECT and SHOW CREATE output. Fix: - Adding new String methods: bool append_for_single_quote_using_mb_wc(const char *str, size_t length, CHARSET_INFO *cs); bool append_for_single_quote_opt_convert(const char *str, size_t length, CHARSET_INFO *cs) which perform both escaping and character set conversion at the same time. - Adding a new String method escaped_wc_for_single_quote(), to reuse the code between the old and the new methods. - Fixing Item_func_group_concat::print() to use the new method append_for_single_quote_opt_convert().
1 parent 58df209 commit 0fc123c

File tree

7 files changed

+143
-15
lines changed

7 files changed

+143
-15
lines changed

mysql-test/main/ctype_ucs.result

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6520,5 +6520,25 @@ SELECT 1 COLLATE latin1_swedish_ci;
65206520
ERROR 42000: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'ucs2'
65216521
SET NAMES utf8;
65226522
#
6523+
# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
6524+
#
6525+
SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci;
6526+
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2;
6527+
INSERT INTO t1 VALUES ('a'),('A');
6528+
CREATE OR REPLACE VIEW v1 AS
6529+
SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c;
6530+
SELECT * FROM v1;
6531+
cnt c1
6532+
2 a,A
6533+
SELECT HEX(c1) FROM v1;
6534+
HEX(c1)
6535+
0061002C0041
6536+
SHOW CREATE VIEW v1;
6537+
View Create View character_set_client collation_connection
6538+
v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select count(0) AS `cnt`,group_concat(`t1`.`c` separator ',') AS `c1` from `t1` group by `t1`.`c` utf8 ucs2_general_ci
6539+
DROP VIEW v1;
6540+
DROP TABLE t1;
6541+
SET NAMES utf8mb3;
6542+
#
65236543
# End of 10.5 tests
65246544
#

mysql-test/main/ctype_ucs.test

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,23 @@ SELECT HEX(1 COLLATE ucs2_bin);
11891189
SELECT 1 COLLATE latin1_swedish_ci;
11901190
SET NAMES utf8;
11911191

1192+
--echo #
1193+
--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
1194+
--echo #
1195+
1196+
SET NAMES utf8mb3, @@collation_connection=ucs2_general_ci;
1197+
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET ucs2;
1198+
INSERT INTO t1 VALUES ('a'),('A');
1199+
CREATE OR REPLACE VIEW v1 AS
1200+
SELECT COUNT(*) AS cnt, GROUP_CONCAT(c) AS c1 FROM t1 GROUP BY c;
1201+
SELECT * FROM v1;
1202+
SELECT HEX(c1) FROM v1;
1203+
SHOW CREATE VIEW v1;
1204+
DROP VIEW v1;
1205+
DROP TABLE t1;
1206+
SET NAMES utf8mb3;
1207+
1208+
11921209
--echo #
11931210
--echo # End of 10.5 tests
11941211
--echo #

mysql-test/main/func_gconcat.result

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,4 +1517,24 @@ deallocate prepare stmt;
15171517
set join_cache_level=default;
15181518
set group_concat_max_len=default;
15191519
drop table t1,t2;
1520+
#
1521+
# MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
1522+
#
1523+
SET NAMES utf8, @@collation_connection=latin1_swedish_ci;
1524+
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1;
1525+
INSERT INTO t1 VALUES ('a'),('A');
1526+
CREATE OR REPLACE VIEW v1 AS
1527+
SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c;
1528+
SELECT * FROM v1;
1529+
c1
1530+
aßA
1531+
SELECT HEX(c1) FROM v1;
1532+
HEX(c1)
1533+
61DF41
1534+
SHOW CREATE VIEW v1;
1535+
View Create View character_set_client collation_connection
1536+
v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select group_concat(`t1`.`c` separator 'ß') AS `c1` from `t1` group by `t1`.`c` utf8 latin1_swedish_ci
1537+
DROP VIEW v1;
1538+
DROP TABLE t1;
1539+
SET NAMES latin1;
15201540
# End of 10.5 tests

mysql-test/main/func_gconcat.test

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,4 +1105,20 @@ set group_concat_max_len=default;
11051105

11061106
drop table t1,t2;
11071107

1108+
--echo #
1109+
--echo # MDEV-33772 Bad SEPARATOR value in GROUP_CONCAT on character set conversion
1110+
--echo #
1111+
1112+
SET NAMES utf8, @@collation_connection=latin1_swedish_ci;
1113+
CREATE TABLE t1 (c VARCHAR(10)) CHARACTER SET latin1;
1114+
INSERT INTO t1 VALUES ('a'),('A');
1115+
CREATE OR REPLACE VIEW v1 AS
1116+
SELECT GROUP_CONCAT(c SEPARATOR 'ß') AS c1 FROM t1 GROUP BY c;
1117+
SELECT * FROM v1;
1118+
SELECT HEX(c1) FROM v1;
1119+
SHOW CREATE VIEW v1;
1120+
DROP VIEW v1;
1121+
DROP TABLE t1;
1122+
SET NAMES latin1;
1123+
11081124
--echo # End of 10.5 tests

sql/item_sum.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4587,7 +4587,7 @@ void Item_func_group_concat::print(String *str, enum_query_type query_type)
45874587
if (sum_func() == GROUP_CONCAT_FUNC)
45884588
{
45894589
str->append(STRING_WITH_LEN(" separator \'"));
4590-
str->append_for_single_quote(separator->ptr(), separator->length());
4590+
str->append_for_single_quote_opt_convert(*separator);
45914591
str->append(STRING_WITH_LEN("\'"));
45924592
}
45934593

sql/sql_string.cc

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,26 +1126,45 @@ bool String::append_for_single_quote(const char *st, size_t len)
11261126
int chlen;
11271127
for (; st < end; st++)
11281128
{
1129-
switch (*st)
1129+
char ch2= (char) (uchar) escaped_wc_for_single_quote((uchar) *st);
1130+
if (ch2)
11301131
{
1131-
case '\\': APPEND(STRING_WITH_LEN("\\\\")); break;
1132-
case '\0': APPEND(STRING_WITH_LEN("\\0")); break;
1133-
case '\'': APPEND(STRING_WITH_LEN("\\'")); break;
1134-
case '\n': APPEND(STRING_WITH_LEN("\\n")); break;
1135-
case '\r': APPEND(STRING_WITH_LEN("\\r")); break;
1136-
case '\032': APPEND(STRING_WITH_LEN("\\Z")); break;
1137-
default: if ((chlen=charset()->charlen(st, end)) > 0)
1138-
{
1139-
APPEND(st, chlen);
1140-
st+= chlen-1;
1141-
}
1142-
else
1143-
APPEND(*st);
1132+
if (append('\\') || append(ch2))
1133+
return true;
1134+
continue;
11441135
}
1136+
if ((chlen= charset()->charlen(st, end)) > 0)
1137+
{
1138+
APPEND(st, chlen);
1139+
st+= chlen-1;
1140+
}
1141+
else
1142+
APPEND(*st);
11451143
}
11461144
return 0;
11471145
}
11481146

1147+
1148+
bool String::append_for_single_quote_using_mb_wc(const char *src,
1149+
size_t length,
1150+
CHARSET_INFO *cs)
1151+
{
1152+
DBUG_ASSERT(&my_charset_bin != charset());
1153+
DBUG_ASSERT(&my_charset_bin != cs);
1154+
const uchar *str= (const uchar *) src;
1155+
const uchar *end= (const uchar *) src + length;
1156+
int chlen;
1157+
my_wc_t wc;
1158+
for ( ; (chlen= cs->cset->mb_wc(cs, &wc, str, end)) > 0; str+= chlen)
1159+
{
1160+
my_wc_t wc2= escaped_wc_for_single_quote(wc);
1161+
if (wc2 ? (append_wc('\\') || append_wc(wc2)) : append_wc(wc))
1162+
return true;
1163+
}
1164+
return false;
1165+
}
1166+
1167+
11491168
void String::print(String *str) const
11501169
{
11511170
str->append_for_single_quote(Ptr, str_length);

sql/sql_string.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,42 @@ class String: public Charset, public Binary_string
11341134
print_with_conversion(to, cs);
11351135
}
11361136

1137+
static my_wc_t escaped_wc_for_single_quote(my_wc_t ch)
1138+
{
1139+
switch (ch)
1140+
{
1141+
case '\\': return '\\';
1142+
case '\0': return '0';
1143+
case '\'': return '\'';
1144+
case '\n': return 'n';
1145+
case '\r': return 'r';
1146+
case '\032': return 'Z';
1147+
}
1148+
return 0;
1149+
}
1150+
1151+
// Append for single quote using mb_wc/wc_mb Unicode conversion
1152+
bool append_for_single_quote_using_mb_wc(const char *str, size_t length,
1153+
CHARSET_INFO *cs);
1154+
1155+
// Append for single quote with optional mb_wc/wc_mb conversion
1156+
bool append_for_single_quote_opt_convert(const char *str,
1157+
size_t length,
1158+
CHARSET_INFO *cs)
1159+
{
1160+
return charset() == &my_charset_bin || cs == &my_charset_bin ||
1161+
my_charset_same(charset(), cs) ?
1162+
append_for_single_quote(str, length) :
1163+
append_for_single_quote_using_mb_wc(str, length, cs);
1164+
}
1165+
1166+
bool append_for_single_quote_opt_convert(const String &str)
1167+
{
1168+
return append_for_single_quote_opt_convert(str.ptr(),
1169+
str.length(),
1170+
str.charset());
1171+
}
1172+
11371173
bool append_for_single_quote(const char *st, size_t len);
11381174
bool append_for_single_quote(const String *s)
11391175
{

0 commit comments

Comments
 (0)