If...
INSERT INTO TABLE_NAME SELECT STRING_AGG(COLUMN_NAME, ',') FROM TABLE_NAME introduces an anti-pattern (i.e., can cause poor performance, incorrect results (kindly verify this one), and maintainability issues of the T-SQL queries);
How about this:
INSERT INTO TABLE_NAME(COLUMN_NAME) SELECT N'{"KEY_VALUE": [' + TBN.STR_AGG + '"]}' JSON_FORMAT_VALUE FROM (SELECT STRING_AGG('"' + COLUMN_NAME, + '", ') STR_AGG FROM TABLE_NAME) TBN This is my actual test sample query:
DECLARE @users TABLE( id INT IDENTITY(1,1) NOT NULL PRIMARY KEY, username NVARCHAR(100) NOT NULL, email NVARCHAR(100) NOT NULL, status VARCHAR(50) ); DECLARE @features TABLE( id INT IDENTITY(1,1) NOT NULL PRIMARY KEY, name VARCHAR(100) NOT NULL, description VARCHAR(200) NOT NULL, is_enabled BIT ); DECLARE @feature_user TABLE( user_id INT NOT NULL, feature_id INT NOT NULL ); INSERT INTO @users( username, email, status ) VALUES( N'john_doe', N'[email protected]', 'active' ), ( N'mark_daniels', N'[email protected]', 'inactive' ), ( N'alice_jane', N'[email protected]', 'active' ); INSERT INTO @features( name, description, is_enabled ) VALUES( 'notifications', 'Send notifications to users', 'TRUE' ), ( 'csv export', 'Export data to CSV format', 'FALSE' ), ( 'redesign landing page', 'Revamp the landing page layout', 'TRUE' ); INSERT INTO @feature_user VALUES( 1, 1 ), ( 1, 2 ), ( 1, 3 ), ( 2, 1 ), ( 2, 2 ), ( 3, 3 ) -- Produces comma-delimited data structure SELECT u.id AS user_id, u.username, u.email, u.status, STRING_AGG(f.name, ', ') AS feature_names FROM @users u LEFT JOIN @feature_user fu ON fu.user_id = u.id LEFT JOIN @features f ON fu.feature_id = f.id GROUP BY u.id, u.username, u.email, u.status; -- Produces denormalized data structure SELECT feature_data.user_id, feature_data.username, feature_data.email, feature_data.status, N'{"feature_data": [' + feature_data.feature_names + '"]}' feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM( SELECT u.id AS user_id, u.username, u.email, u.status, STRING_AGG('"' + f.name, + '", ') feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM @users u LEFT JOIN @feature_user fu ON fu.user_id = u.id LEFT JOIN @features f ON fu.feature_id = f.id GROUP BY u.id, u.username, u.email, u.status) feature_data; -- The below query to check if the JSON data is valid SELECT ISJSON(N'{"feature_data": [' + feature_data.feature_names + '"]}') feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM( SELECT STRING_AGG('"' + f.name, + '", ') feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM @users u LEFT JOIN @feature_user fu ON fu.user_id = u.id LEFT JOIN @features f ON fu.feature_id = f.id GROUP BY u.id, u.username, u.email, u.status) feature_data; And the SELECT query outputs are:
Credits to the owner of the idea: How to Avoid Redundant Rows When Joining Tables in SQL?
However, this case-scenario reflects with my database setup.
Explanations: The JSON data will be decoded for later use in the SQL Server database with a compatibility level of 130 or greater, and I am using compatibility level 140 or SQL Server 2017.
The application of this idea as to relate to the given example is when concatenating data within a single property (e.g., specific column set). Which one from the given example do offer much performance efficient in terms of considering the constantly growing database.
Other than this, can somebody give a set of queries in order to avoid anti-pattern?
I want to deep dive into anti-pattern scheme, for me to really understand how it really affects queries.
UPDATE:
DECLARE @compile_table_str_agg TABLE( --denormalizing data through comma-delimited data compilation user_id INT NOT NULL, username NVARCHAR(100) NOT NULL, email NVARCHAR(100) NOT NULL, status VARCHAR(50), feature_names VARCHAR(100) NOT NULL ); DECLARE @compile_table_json TABLE( --denormalizing data through json-structure data compilation user_id INT NOT NULL, username NVARCHAR(100) NOT NULL, email NVARCHAR(100) NOT NULL, status VARCHAR(50), feature_names VARCHAR(100) NOT NULL ); INSERT INTO @compile_table_str_agg SELECT u.id AS user_id, u.username, u.email, u.status, STRING_AGG(f.name, ', ') AS feature_names FROM @users u LEFT JOIN @feature_user fu ON fu.user_id = u.id LEFT JOIN @features f ON fu.feature_id = f.id GROUP BY u.id, u.username, u.email, u.status; INSERT INTO @compile_table_json SELECT feature_data.user_id, feature_data.username, feature_data.email, feature_data.status, N'{"feature_data": [' + feature_data.feature_names + '"]}' feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM( SELECT u.id AS user_id, u.username, u.email, u.status, STRING_AGG('"' + f.name, + '", ') feature_names --ADD HERE THE RECORDS THAT YOU DON'T WANT TO BE DUPLICATED ROWS, WHERE YOU NEED TO USE THE SAME APPROACH WITH feature_names FROM @users u LEFT JOIN @feature_user fu ON fu.user_id = u.id LEFT JOIN @features f ON fu.feature_id = f.id GROUP BY u.id, u.username, u.email, u.status) feature_data; -- Retrieving the compiled STRING_AGG data SELECT user_id, username, email, status, ca.* FROM @compile_table_str_agg str_agg1 CROSS APPLY( SELECT com_delimited.* FROM STRING_SPLIT( feature_names, ',') com_delimited ) ca; -- Retrieving the compiled JSON structure data SELECT user_id, username, email, status, sa.ft_values FROM @compile_table_json json1 CROSS APPLY OPENJSON(json1.feature_names) WITH( feature_data NVARCHAR(MAX) '$.feature_data' AS JSON ) ca CROSS APPLY OPENJSON(ca.feature_data) WITH (ft_values NVARCHAR(25) '$') sa; And the UPDATE, SELECT query outputs are:
CONCLUSION: Both offer the same output, but which one offers more performance or maintainability efficiency even as the database gets larger?
JSON QUERY REFERENCE:
DECLARE @json NVARCHAR(MAX); SET @json = '{"info": {"address": [{"town": "Belgrade"}, {"town": "Paris"}, {"town":"Madrid"}]}}'; SET @json = JSON_MODIFY(@json, '$.info.address[0].town', 'Philippines'); SELECT modifiedJson = @json; DECLARE @json2 NVARCHAR(MAX); SET @json2 = N'[ {"id": 2, "info": {"name": "John", "surname": "Smith"}, "age": 25}, {"id": 5, "info": {"name": "Jane", "surname": "Smith"}, "dob": "2005-11-04T12:00:00"} ]'; SELECT * FROM OPENJSON(@json2) WITH ( id INT 'strict $.id', firstName NVARCHAR(50) '$.info.name', lastName NVARCHAR(50) '$.info.surname', age INT, dateOfBirth DATETIME2 '$.dob' ); DECLARE @json3 NVARCHAR(MAX); SET @json3 = N'[ {"id": 3, "info": {"name": "John", "surname": "Smith"}, "age": 25}, {"id": 5, "info": {"name": "Jane", "surname": "Smith", "skills": ["SQL", "C#", "Azure"]}, "dob": "2005-11-04T12:00:00"}, {"id": 1, "info": {"name": "DevQt", "surname": "PH", "skills": ["Dart", "Java", "C#", "VB", "Javascript", "SQL"]}, "age": 26, "dob": "2005-11-04T12:00:00"} ]'; SELECT id, firstName, lastName, age, dateOfBirth, skill FROM OPENJSON(@json3) WITH ( id INT 'strict $.id', firstName NVARCHAR(50) '$.info.name', lastName NVARCHAR(50) '$.info.surname', age INT, dateOfBirth DATETIME2 '$.dob', skills NVARCHAR(MAX) '$.info.skills' AS JSON ) OUTER APPLY OPENJSON(skills) WITH (skill NVARCHAR(15) '$') ORDER BY id; DECLARE @jsonVariable NVARCHAR(MAX); SET @jsonVariable = N'[ { "Order": { "Number":"SO43659", "Date":"2011-05-31T00:00:00" }, "AccountNumber":"AW29825", "Item": { "Price":2024.9940, "Quantity":1 } }, { "Order": { "Number":"SO43661", "Date":"2011-06-01T00:00:00" }, "AccountNumber":"AW73565", "Item": { "Price":2024.9940, "Quantity":3 } } ]'; -- INSERT INTO <sampleTable> SELECT SalesOrderJsonData.* FROM OPENJSON(@jsonVariable, N'$') WITH ( Number VARCHAR(200) N'$.Order.Number', Date DATETIME N'$.Order.Date', Customer VARCHAR(200) N'$.AccountNumber', Quantity INT N'$.Item.Quantity' ) AS SalesOrderJsonData; And the JSON query outputs are:
Additional explanations: I've included the reference in handling JSON for the others to see my perception of embedding JSON with relational databases in SQL Server.
Original reference: JSON data in SQL Server


