🔐Encoding & Escaping
XML Character Escaping
Escape special characters for valid XML/HTML content
Explanation
XML requires escaping of <, >, &, ', " characters to prevent parsing errors and security issues.
Examples
Escape XML
Input
Price: <$100> & free
Output
Price: <$100> & free
Escape attributes
Input
title="She said "Hi""
Output
title="She said "Hi""
Code Examples
JavaScript
// Escape XML/HTML entities
function escapeXml(text) {
const map = {
'&': '&', // Must be first
'<': '<',
'>': '>',
'"': '"',
"'": '''
};
return text.replace(/[&<>"']/g, m => map[m]);
}
// Unescape XML entities
function unescapeXml(text) {
const map = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
''': "'"
};
return text.replace(/&(?:amp|lt|gt|quot|apos);/g, m => map[m]);
}
// Build XML safely
function buildXmlElement(tag, attributes, content) {
const attrs = Object.entries(attributes)
.map(([key, value]) => `${key}="${escapeXml(String(value))}"`)
.join(' ');
const escapedContent = escapeXml(String(content));
return `<${tag}${attrs ? ' ' + attrs : ''}>${escapedContent}</${tag}>`;
}
// Usage
const xml = buildXmlElement(
'message',
{ user: 'John "Doe"', id: '123' },
'Hello <World> & friends'
);
// <message user="John "Doe"" id="123">Hello <World> & friends</message>
// CDATA sections (no escaping needed inside)
function wrapCData(content) {
// CDATA cannot contain ]]>, must split if present
if (content.includes(']]>')) {
return content
.split(']]>')
.map(part => `<![CDATA[${part}]]`)
.join('>');
}
return `<![CDATA[${content}]]>`;
}
// Use library for complex XML
// npm install fast-xml-parser
import { XMLBuilder } from 'fast-xml-parser';
const builder = new XMLBuilder();
const xml = builder.build({
root: {
message: {
'@_user': 'John "Doe"',
'#text': 'Hello <World> & friends'
}
}
}); Python
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape, unescape
# Escape XML
text = 'Price: <$100> & free'
escaped = escape(text)
print(escaped) # Price: <$100> & free
# Escape with quotes
escaped_full = escape(text, {"'": "'", '"': """})
# Unescape XML
unescaped = unescape(escaped)
# Build XML safely with ElementTree
root = ET.Element('root')
message = ET.SubElement(root, 'message', user='John "Doe"')
message.text = 'Hello <World> & friends'
xml_string = ET.tostring(root, encoding='unicode')
# <root><message user="John "Doe"">Hello <World> & friends</message></root>
# Parse XML
tree = ET.ElementTree(ET.fromstring(xml_string))
root = tree.getroot()
# CDATA (use lxml for better CDATA support)
from lxml import etree
root = etree.Element('root')
message = etree.SubElement(root, 'message')
message.text = etree.CDATA('Hello <World> & friends')
xml_string = etree.tostring(root, encoding='unicode')
# <root><message><![CDATA[Hello <World> & friends]]></message></root>💡 Tips
- Always escape &, <, >, ", ' in XML content
- Escape & first when replacing multiple chars
- Use CDATA for large blocks of special characters
- Attributes need quote escaping
- Use XML libraries instead of string concatenation
- Validate XML after generation
- Consider XML schema validation
- CDATA cannot contain ]]>
⚠️ Common Pitfalls
- Not escaping & causes parse errors
- Wrong order when replacing (& must be first)
- CDATA contains ]]> breaks parsing
- Single vs double quote context in attributes
- Encoding issues (use UTF-8)
- Unescaped < > creates invalid XML
- HTML entities may not work in XML