JSON & XML trong SQL Server

JSON trong SQL Server (2016+)

Lưu trữ JSON

SQL Server không có kiểu dữ liệu JSON riêng — JSON được lưu dưới dạng NVARCHAR(MAX).

-- Tạo bảng với cột JSON
CREATE TABLE Products (
    ProductID   INT PRIMARY KEY IDENTITY,
    Name        NVARCHAR(200) NOT NULL,
    Attributes  NVARCHAR(MAX),  -- JSON column
    CONSTRAINT CK_Products_Attributes CHECK (ISJSON(Attributes) = 1)
);

-- Insert dữ liệu JSON
INSERT INTO Products (Name, Attributes) VALUES 
('Laptop Dell XPS', N'{
    "brand": "Dell",
    "model": "XPS 15",
    "specs": {
        "cpu": "Intel Core i7-12700H",
        "ram": 32,
        "storage": 512
    },
    "tags": ["gaming", "professional", "lightweight"],
    "inStock": true
}');

-- ISJSON: kiểm tra chuỗi có phải JSON hợp lệ không
SELECT ISJSON('{"key": "value"}');  -- 1 (TRUE)
SELECT ISJSON('invalid json');       -- 0 (FALSE)
SELECT ISJSON(NULL);                  -- NULL

FOR JSON: Xuất dữ liệu dạng JSON

FOR JSON PATH

Cho phép control cấu trúc JSON bằng cách đặt tên cột theo dạng object.property.

-- FOR JSON PATH cơ bản
SELECT 
    ProductID AS 'id',
    Name AS 'name',
    JSON_VALUE(Attributes, '$.brand') AS 'brand'
FROM Products
FOR JSON PATH;
-- Output: [{"id":1,"name":"Laptop Dell XPS","brand":"Dell"}]

-- WITH ROOT: bọc array trong một object với key
SELECT ProductID, Name
FROM Products
FOR JSON PATH, ROOT('products');
-- Output: {"products":[{"ProductID":1,"Name":"Laptop Dell XPS"}]}

-- WITHOUT_ARRAY_WRAPPER: khi chỉ có 1 row, không bọc trong array
SELECT TOP 1 ProductID, Name
FROM Products
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER;
-- Output: {"ProductID":1,"Name":"Laptop Dell XPS"}

-- Nested objects (dùng tên cột có dấu chấm)
SELECT 
    o.OrderID AS 'order.id',
    o.OrderDate AS 'order.date',
    c.CustomerName AS 'order.customer.name',
    c.Email AS 'order.customer.email'
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
FOR JSON PATH;
-- Output: [{"order":{"id":1,"date":"2026-04-01","customer":{"name":"Nguyen Van A","email":"a@b.com"}}}]

FOR JSON AUTO

Tự động tạo cấu trúc JSON dựa trên thứ tự SELECT và JOIN.

-- FOR JSON AUTO: cấu trúc tự động từ table alias
SELECT 
    o.OrderID,
    o.OrderDate,
    c.CustomerName,
    c.Email
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
FOR JSON AUTO;
-- Output: [{"OrderID":1,"OrderDate":"2026-04-01","c":[{"CustomerName":"Nguyen Van A"}]}]

Khuyến nghị: Dùng FOR JSON PATH để có control tốt hơn về cấu trúc output.

OPENJSON: Parse JSON thành Rows

OPENJSON cơ bản (không có schema)

DECLARE @json NVARCHAR(MAX) = N'[
    {"id": 1, "name": "Product A", "price": 100.00},
    {"id": 2, "name": "Product B", "price": 200.00}
]';

-- Trả về key, value, type cho mỗi element
SELECT * FROM OPENJSON(@json);
-- key | value                                | type
-- 0   | {"id":1,"name":"Product A",...}      | 5 (object)
-- 1   | {"id":2,"name":"Product B",...}      | 5 (object)

-- Một level sâu hơn
SELECT * FROM OPENJSON(@json, '$[0]');
-- key   | value    | type
-- id    | 1        | 2 (number)
-- name  | Product A| 1 (string)
-- price | 100.00   | 2 (number)

OPENJSON với WITH clause (có schema)

-- WITH clause định nghĩa output schema
DECLARE @json NVARCHAR(MAX) = N'[
    {"id": 1, "name": "Product A", "price": 100.00, "active": true},
    {"id": 2, "name": "Product B", "price": 200.00, "active": false}
]';

SELECT *
FROM OPENJSON(@json)
WITH (
    ProductID   INT         '$.id',
    ProductName NVARCHAR(100) '$.name',
    Price       DECIMAL(10,2) '$.price',
    IsActive    BIT         '$.active'
);

-- Join với bảng thực thế
SELECT p.Name, oj.Price
FROM Products p
CROSS APPLY OPENJSON(p.Attributes)
WITH (
    Brand   NVARCHAR(100) '$.brand',
    RAM     INT           '$.specs.ram',
    Storage INT           '$.specs.storage',
    Tags    NVARCHAR(MAX) '$.tags' AS JSON  -- AS JSON để lấy nested array/object
) oj;

-- Parse nested array
DECLARE @order NVARCHAR(MAX) = N'{
    "orderId": 1001,
    "items": [
        {"productId": 1, "qty": 2, "price": 50.00},
        {"productId": 2, "qty": 1, "price": 150.00}
    ]
}';

SELECT 
    JSON_VALUE(@order, '$.orderId') AS OrderID,
    items.ProductID,
    items.Qty,
    items.Price
FROM OPENJSON(@order, '$.items')
WITH (
    ProductID INT '$.productId',
    Qty       INT '$.qty',
    Price     DECIMAL(10,2) '$.price'
) items;

JSON Functions

JSON_VALUE: Lấy giá trị scalar

-- Cú pháp: JSON_VALUE(expression, path)
-- Trả về: NVARCHAR(4000), NULL nếu path không tìm thấy

SELECT 
    JSON_VALUE(Attributes, '$.brand') AS Brand,
    JSON_VALUE(Attributes, '$.specs.ram') AS RAM,
    JSON_VALUE(Attributes, '$.tags[0]') AS FirstTag  -- Array index
FROM Products;

-- Strict mode: ném lỗi nếu path không tồn tại hoặc giá trị không phải scalar
SELECT JSON_VALUE(Attributes, 'strict $.brand') AS Brand FROM Products;

-- Lax mode (mặc định): trả về NULL nếu không tìm thấy
SELECT JSON_VALUE(Attributes, 'lax $.nonexistent') AS Val FROM Products;  -- NULL

JSON_QUERY: Lấy object/array

-- Cú pháp: JSON_QUERY(expression, path)
-- Trả về: NVARCHAR(MAX) chứa JSON object hoặc array

SELECT 
    JSON_QUERY(Attributes, '$.specs') AS Specs,   -- Trả về {"cpu":"...","ram":32}
    JSON_QUERY(Attributes, '$.tags') AS Tags       -- Trả về ["gaming","professional"]
FROM Products;

-- Kết hợp JSON_VALUE và JSON_QUERY trong một query
SELECT 
    Name,
    JSON_VALUE(Attributes, '$.brand') AS Brand,
    JSON_QUERY(Attributes, '$.specs') AS Specs,
    JSON_VALUE(Attributes, '$.specs.ram') AS RAM
FROM Products;

JSON_MODIFY: Cập nhật JSON

-- JSON_MODIFY: cập nhật giá trị trong JSON (trả về JSON mới, không modify in-place)
DECLARE @json NVARCHAR(MAX) = N'{"brand": "Dell", "specs": {"ram": 16}}';

-- Cập nhật giá trị hiện có
SELECT JSON_MODIFY(@json, '$.specs.ram', 32);
-- Output: {"brand":"Dell","specs":{"ram":32}}

-- Thêm property mới
SELECT JSON_MODIFY(@json, '$.color', 'Silver');
-- Output: {"brand":"Dell","specs":{"ram":16},"color":"Silver"}

-- Xóa property (set = NULL)
SELECT JSON_MODIFY(@json, '$.brand', NULL);
-- Output: {"specs":{"ram":16}}

-- Append vào array
SELECT JSON_MODIFY(@json, 'append $.tags', 'sale');

-- UPDATE thực tế trong database
UPDATE Products
SET Attributes = JSON_MODIFY(Attributes, '$.specs.ram', 64)
WHERE ProductID = 1;

-- Update nhiều properties
UPDATE Products
SET Attributes = JSON_MODIFY(
                    JSON_MODIFY(Attributes, '$.specs.ram', 64),
                    '$.specs.storage', 1024)
WHERE ProductID = 1;

JSON Index Strategy

SQL Server không có native JSON index. Giải pháp: computed column + index.

-- Tạo computed column từ JSON path
ALTER TABLE Products
ADD Brand AS JSON_VALUE(Attributes, '$.brand') PERSISTED;

ALTER TABLE Products
ADD RAM AS CAST(JSON_VALUE(Attributes, '$.specs.ram') AS INT) PERSISTED;

-- Tạo index trên computed column
CREATE INDEX IX_Products_Brand ON Products(Brand);
CREATE INDEX IX_Products_RAM ON Products(RAM);

-- Query sẽ tự động dùng index
SELECT * FROM Products WHERE Brand = 'Dell';  -- Index seek!
SELECT * FROM Products WHERE RAM >= 16;       -- Index seek!

-- Verify với execution plan
-- Nếu không dùng PERSISTED: SQL Server vẫn có thể dùng index nhưng phải compute on-the-fly

XML trong SQL Server

XML data type

-- Typed XML: validated against XML schema collection
-- Untyped XML: không validate schema
CREATE TABLE Documents (
    DocID       INT PRIMARY KEY IDENTITY,
    Title       NVARCHAR(200),
    Content     XML,                    -- Untyped
    TypedContent XML(SchemaCollection1) -- Typed (nếu có schema collection)
);

-- Giới hạn: XML column tối đa 2GB
-- XML type tự động parse và lưu dưới dạng binary (không phải text thuần)

XML Schema Collections

-- Tạo XML schema collection
CREATE XML SCHEMA COLLECTION ProductSchema AS N'
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <xs:element name="Product">
        <xs:complexType>
            <xs:sequence>
                <xs:element name="Name" type="xs:string"/>
                <xs:element name="Price" type="xs:decimal"/>
                <xs:element name="InStock" type="xs:boolean"/>
            </xs:sequence>
            <xs:attribute name="ID" type="xs:integer" use="required"/>
        </xs:complexType>
    </xs:element>
</xs:schema>';

-- Dùng schema collection trong column definition
CREATE TABLE TypedProducts (
    ID      INT PRIMARY KEY,
    Data    XML(ProductSchema)  -- Validated against ProductSchema
);

-- Insert: sẽ validate theo schema
INSERT INTO TypedProducts VALUES (1, 
    N'<Product ID="1"><Name>Laptop</Name><Price>999.99</Price><InStock>true</InStock></Product>');

FOR XML: Xuất dữ liệu dạng XML

FOR XML RAW

-- Mỗi row → một element <row>
SELECT ProductID, Name
FROM Products
FOR XML RAW;
-- Output: <row ProductID="1" Name="Laptop Dell XPS"/>

-- Custom element name
SELECT ProductID, Name
FROM Products
FOR XML RAW('Product');
-- Output: <Product ProductID="1" Name="Laptop Dell XPS"/>

-- ELEMENTS: attributes → sub-elements
SELECT ProductID, Name
FROM Products
FOR XML RAW('Product'), ELEMENTS;
-- Output: <Product><ProductID>1</ProductID><Name>Laptop Dell XPS</Name></Product>

FOR XML AUTO

-- Tự động tạo hierarchy từ table name
SELECT o.OrderID, o.OrderDate, c.CustomerName
FROM Orders o
JOIN Customers c ON o.CustomerID = c.CustomerID
FOR XML AUTO, ELEMENTS;

FOR XML PATH (linh hoạt nhất)

-- Full control qua column aliases (XPath)
SELECT 
    ProductID AS '@ID',           -- Attribute
    Name AS 'Name',               -- Child element
    JSON_VALUE(Attributes, '$.brand') AS 'Details/Brand',  -- Nested element
    JSON_VALUE(Attributes, '$.specs.ram') AS 'Details/RAM'
FROM Products
FOR XML PATH('Product'), ROOT('Products');
-- Output:
-- <Products>
--   <Product ID="1">
--     <Name>Laptop Dell XPS</Name>
--     <Details><Brand>Dell</Brand><RAM>32</RAM></Details>
--   </Product>
-- </Products>

-- Build comma-separated list (classic trick với FOR XML PATH)
SELECT STUFF(
    (SELECT ', ' + Name 
     FROM Products
     FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'),
    1, 2, '') AS ProductList;

XML Methods

.value() - Lấy giá trị scalar

DECLARE @xml XML = N'<Product ID="1">
    <Name>Laptop</Name>
    <Price>999.99</Price>
    <Specs><RAM>32</RAM><Storage>512</Storage></Specs>
</Product>';

-- .value(XQuery, SQL_type)
SELECT 
    @xml.value('(/Product/@ID)[1]', 'INT') AS ProductID,
    @xml.value('(/Product/Name)[1]', 'NVARCHAR(100)') AS Name,
    @xml.value('(/Product/Price)[1]', 'DECIMAL(10,2)') AS Price,
    @xml.value('(/Product/Specs/RAM)[1]', 'INT') AS RAM;
-- Lưu ý: [1] là bắt buộc (lấy phần tử đầu tiên)

.query() - Lấy XML fragment

-- .query(XQuery): trả về XML
SELECT @xml.query('/Product/Specs') AS Specs;
-- Output: <Specs><RAM>32</RAM><Storage>512</Storage></Specs>

SELECT @xml.query('for $p in /Product return $p/Name') AS Names;

.nodes() - Shred XML thành rows

DECLARE @xml XML = N'<Products>
    <Product ID="1"><Name>Laptop</Name><Price>999.99</Price></Product>
    <Product ID="2"><Name>Mouse</Name><Price>29.99</Price></Product>
    <Product ID="3"><Name>Keyboard</Name><Price>49.99</Price></Product>
</Products>';

-- .nodes(): trả về bảng của XML nodes
SELECT 
    p.value('@ID', 'INT') AS ProductID,
    p.value('(Name)[1]', 'NVARCHAR(100)') AS Name,
    p.value('(Price)[1]', 'DECIMAL(10,2)') AS Price
FROM @xml.nodes('/Products/Product') AS T(p);
-- Kết quả: 3 rows như một bảng thông thường

-- Áp dụng với column trong bảng
SELECT 
    d.DocID,
    p.value('@ID', 'INT') AS ProductID,
    p.value('(Name)[1]', 'NVARCHAR(100)') AS Name
FROM Documents d
CROSS APPLY d.Content.nodes('/Products/Product') AS T(p);

.exist() - Kiểm tra sự tồn tại

-- .exist(): trả về 1 (TRUE) hoặc 0 (FALSE)
SELECT 
    @xml.exist('/Product/Specs/RAM') AS HasRAM,           -- 1
    @xml.exist('/Product/Specs/GPU') AS HasGPU,           -- 0
    @xml.exist('/Product[@ID="1"]') AS IDIs1,              -- 1
    @xml.exist('/Product/Price[. > 500]') AS PriceOver500  -- 1

-- Dùng trong WHERE clause
SELECT DocID, Title
FROM Documents
WHERE Content.exist('/Products/Product[@ID="1"]') = 1;

.modify() - Cập nhật XML

DECLARE @xml XML = N'<Product><Name>Laptop</Name><Price>999</Price></Product>';

-- Insert: thêm node mới
SET @xml.modify('insert <Color>Silver</Color> into (/Product)[1]');

-- Update: thay đổi giá trị
SET @xml.modify('replace value of (/Product/Price/text())[1] with 1099');

-- Delete: xóa node
SET @xml.modify('delete /Product/Color');

SELECT @xml;

XML Indexes

-- Primary XML Index: bắt buộc có trước khi tạo secondary
CREATE PRIMARY XML INDEX PXML_Documents_Content
ON Documents(Content);

-- Secondary XML indexes (chọn loại phù hợp với query pattern)
-- PATH: tối ưu cho queries dùng path expressions trong .exist() và .nodes()
CREATE XML INDEX IXML_Documents_Content_PATH
ON Documents(Content)
USING XML INDEX PXML_Documents_Content
FOR PATH;

-- VALUE: tối ưu cho queries dùng .value() để tìm kiếm
CREATE XML INDEX IXML_Documents_Content_VALUE
ON Documents(Content)
USING XML INDEX PXML_Documents_Content
FOR VALUE;

-- PROPERTY: tối ưu khi retrieve nhiều properties từ cùng một node
CREATE XML INDEX IXML_Documents_Content_PROPERTY
ON Documents(Content)
USING XML INDEX PXML_Documents_Content
FOR PROPERTY;

OPENXML Function (Legacy)

-- OPENXML: cách cũ để shred XML (trước SQL 2005)
-- Khuyến nghị: dùng .nodes() thay thế cho code mới

DECLARE @xml NVARCHAR(MAX) = N'<Products>
    <Product ID="1"><Name>Laptop</Name><Price>999.99</Price></Product>
    <Product ID="2"><Name>Mouse</Name><Price>29.99</Price></Product>
</Products>';

DECLARE @handle INT;

-- Chuẩn bị XML document trong memory
EXEC sp_xml_preparedocument @handle OUTPUT, @xml;

-- Query với OPENXML
SELECT *
FROM OPENXML(@handle, '/Products/Product', 2)  -- 2 = element-centric
WITH (
    ProductID   INT         '@ID',
    Name        VARCHAR(100) 'Name',
    Price       DECIMAL(10,2) 'Price'
);

-- QUAN TRỌNG: phải giải phóng memory
EXEC sp_xml_removedocument @handle;

Q&A - Phỏng vấn JSON & XML

Junior Level

Q1: Tại sao SQL Server không có kiểu dữ liệu JSON riêng? Lưu JSON như thế nào?

SQL Server lưu JSON trong NVARCHAR(MAX). Dùng constraint CHECK (ISJSON(column) = 1) để đảm bảo luôn là JSON hợp lệ. Điều này cho phép backward compatibility và flexibility — JSON vẫn là text, SQL Server cung cấp các hàm để làm việc với nó.

Q2: JSON_VALUE và JSON_QUERY khác nhau thế nào?

JSON_VALUE: trả về scalar value (string, number, boolean) → NVARCHAR(4000)
JSON_QUERY: trả về object hoặc array → NVARCHAR(MAX) chứa JSON

Dùng nhầm sẽ trả về NULL: JSON_VALUE trả về NULL nếu path là object/array; JSON_QUERY trả về NULL nếu path là scalar.

Q3: FOR JSON PATH và FOR JSON AUTO khác nhau thế nào?

FOR JSON AUTO: cấu trúc JSON tự động từ table names và query structure — ít control
FOR JSON PATH: dùng tên cột với dấu chấm ('order.customer.name') để define nested structure → flexible hơn, khuyến khích dùng

Mid Level

Q4: Làm thế nào để index cho JSON column? Tại sao cần vậy?

JSON column không có native index. Giải pháp: tạo computed column với JSON_VALUE(column, '$.path') rồi đánh PERSISTED, sau đó tạo index trên computed column đó.

Ưu điểm: query WHERE ComputedCol = 'value' dùng index seek thay vì full scan toàn bảng.

Q5: .value(), .query(), .nodes(), .exist(), .modify() — mỗi method dùng khi nào?

.value(path, type): lấy scalar value (string, number) từ XML
.query(xquery): lấy XML fragment (object, collection)
.nodes(path): “shred” XML thành rows — dùng với CROSS/OUTER APPLY
.exist(xquery): kiểm tra path có tồn tại không (WHERE clause)
.modify(dml): cập nhật XML in-place (INSERT, DELETE, REPLACE)

Q6: Tại sao nên dùng .nodes() thay vì OPENXML?

.nodes() là phương pháp native, không cần quản lý memory, không cần sp_xml_preparedocument/sp_xml_removedocument. Nếu quên gọi sp_xml_removedocument, memory leak xảy ra. .nodes() cũng được optimize tốt hơn bởi query optimizer.

Senior Level

Q7: Khi nào nên lưu dữ liệu dạng JSON trong SQL Server thay vì normalized tables?

Phù hợp lưu JSON khi:

Schema thay đổi thường xuyên (attributes, metadata) — không muốn migration mỗi lần thêm field
Sparse attributes — nhiều loại sản phẩm với thuộc tính khác nhau (EAV pattern replacement)
Semi-structured data — external API responses, event payloads
Read-heavy, write-light — JSON stored proc → ít joins hơn

Không nên lưu JSON khi:

Cần query, filter, aggregate trên các fields bên trong JSON thường xuyên → dùng computed columns + index nhưng đây là workaround
Foreign key relationships → JSON không enforce referential integrity
Data integrity quan trọng → không có constraint enforcement trong JSON values

Q8: Thiết kế schema cho hệ thống lưu trữ product catalog với attributes khác nhau per category?

Option 1: JSON column

CREATE TABLE Products (
    ProductID INT PRIMARY KEY,
    CategoryID INT REFERENCES Categories(ID),
    Name NVARCHAR(200),
    Attributes NVARCHAR(MAX),  -- JSON, schema per category
    CONSTRAINT CK_Attributes CHECK (ISJSON(Attributes) = 1)
);
-- Thêm computed columns + indexes cho các attributes thường query

Option 2: Per-category tables (schema-first)

Option 3: EAV (Entity-Attribute-Value) — generally avoid

Khuyến nghị: Option 1 (JSON) cho trường hợp nhiều categories với attributes đa dạng. Dùng computed columns + indexes cho top N queryable attributes. Validate JSON structure ở application layer hoặc dùng CHECK constraints.

Q9: XML Primary XML Index vs Secondary XML Indexes — giải thích mục đích từng loại?

Primary XML Index: shreds XML column thành bảng nội bộ chứa tất cả tags, values, paths. Tất cả secondary indexes đều phụ thuộc vào primary.

Secondary XML Indexes:

PATH: B-tree trên đường dẫn → tối ưu cho WHERE col.exist('/a/b/c') = 1
VALUE: B-tree trên values → tối ưu cho WHERE col.value('...', 'INT') = 42
PROPERTY: B-tree trên (path, value) pairs → tối ưu cho retrieve nhiều properties từ same node

Trong thực tế: nếu query chủ yếu filter bằng .exist(), dùng PATH index. Nếu filter bằng .value(), dùng VALUE index. Nếu dùng cả hai → có thể cần cả hai secondary indexes.

Keyboard shortcuts

Learning