Source code for csvsmith.utils.normalize
import unicodedata
[docs]
def normalize(text, ignore_case=True, ignore_whitespace=True, nfkc=True):
"""
Standardizes strings to bypass Excel formatting artifacts.
"""
if text is None:
return ""
# Cast to string to handle numeric cells safely
text = str(text)
# 1. Unicode Compatibility (Handles full-width/ligatures)
if nfkc:
text = unicodedata.normalize('NFKC', text)
# 2. Case Folding
if ignore_case:
text = text.lower()
# 3. Whitespace handling
# Always trim outer whitespace, and optionally remove all internal whitespace.
text = text.strip()
if ignore_whitespace:
text = "".join(text.split())
return text