🔐Encoding & Escaping

XML Character Escaping

Escape special characters for valid XML/HTML content

Explanation

XML requires escaping of <, >, &, ', " characters to prevent parsing errors and security issues.

Examples

Escape XML
Input
Price: <$100> & free
Output
Price: &lt;$100&gt; &amp; free
Escape attributes
Input
title="She said "Hi""
Output
title="She said &quot;Hi&quot;"

Code Examples

JavaScript
// Escape XML/HTML entities
function escapeXml(text) {
  const map = {
    '&': '&amp;',   // Must be first
    '<': '&lt;',
    '>': '&gt;',
    '"': '&quot;',
    "'": '&apos;'
  };
  return text.replace(/[&<>"']/g, m => map[m]);
}

// Unescape XML entities
function unescapeXml(text) {
  const map = {
    '&amp;': '&',
    '&lt;': '<',
    '&gt;': '>',
    '&quot;': '"',
    '&apos;': "'"
  };
  return text.replace(/&(?:amp|lt|gt|quot|apos);/g, m => map[m]);
}

// Build XML safely
function buildXmlElement(tag, attributes, content) {
  const attrs = Object.entries(attributes)
    .map(([key, value]) => `${key}="${escapeXml(String(value))}"`)
    .join(' ');
  
  const escapedContent = escapeXml(String(content));
  
  return `<${tag}${attrs ? ' ' + attrs : ''}>${escapedContent}</${tag}>`;
}

// Usage
const xml = buildXmlElement(
  'message',
  { user: 'John "Doe"', id: '123' },
  'Hello <World> & friends'
);
// <message user="John &quot;Doe&quot;" id="123">Hello &lt;World&gt; &amp; friends</message>

// CDATA sections (no escaping needed inside)
function wrapCData(content) {
  // CDATA cannot contain ]]>, must split if present
  if (content.includes(']]>')) {
    return content
      .split(']]>')
      .map(part => `<![CDATA[${part}]]`)
      .join('>');
  }
  return `<![CDATA[${content}]]>`;
}

// Use library for complex XML
// npm install fast-xml-parser
import { XMLBuilder } from 'fast-xml-parser';

const builder = new XMLBuilder();
const xml = builder.build({
  root: {
    message: {
      '@_user': 'John "Doe"',
      '#text': 'Hello <World> & friends'
    }
  }
});
Python
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape, unescape

# Escape XML
text = 'Price: <$100> & free'
escaped = escape(text)
print(escaped)  # Price: &lt;$100&gt; &amp; free

# Escape with quotes
escaped_full = escape(text, {"'": "&apos;", '"': "&quot;"})

# Unescape XML
unescaped = unescape(escaped)

# Build XML safely with ElementTree
root = ET.Element('root')
message = ET.SubElement(root, 'message', user='John "Doe"')
message.text = 'Hello <World> & friends'

xml_string = ET.tostring(root, encoding='unicode')
# <root><message user="John &quot;Doe&quot;">Hello &lt;World&gt; &amp; friends</message></root>

# Parse XML
tree = ET.ElementTree(ET.fromstring(xml_string))
root = tree.getroot()

# CDATA (use lxml for better CDATA support)
from lxml import etree

root = etree.Element('root')
message = etree.SubElement(root, 'message')
message.text = etree.CDATA('Hello <World> & friends')

xml_string = etree.tostring(root, encoding='unicode')
# <root><message><![CDATA[Hello <World> & friends]]></message></root>

💡 Tips

  • Always escape &, <, >, ", ' in XML content
  • Escape & first when replacing multiple chars
  • Use CDATA for large blocks of special characters
  • Attributes need quote escaping
  • Use XML libraries instead of string concatenation
  • Validate XML after generation
  • Consider XML schema validation
  • CDATA cannot contain ]]>

⚠️ Common Pitfalls

  • Not escaping & causes parse errors
  • Wrong order when replacing (& must be first)
  • CDATA contains ]]> breaks parsing
  • Single vs double quote context in attributes
  • Encoding issues (use UTF-8)
  • Unescaped < > creates invalid XML
  • HTML entities may not work in XML