Source code for csvsmith.utils.normalize

import unicodedata


[docs] def normalize(text, ignore_case=True, ignore_whitespace=True, nfkc=True): """ Standardizes strings to bypass Excel formatting artifacts. """ if text is None: return "" # Cast to string to handle numeric cells safely text = str(text) # 1. Unicode Compatibility (Handles full-width/ligatures) if nfkc: text = unicodedata.normalize('NFKC', text) # 2. Case Folding if ignore_case: text = text.lower() # 3. Whitespace handling # Always trim outer whitespace, and optionally remove all internal whitespace. text = text.strip() if ignore_whitespace: text = "".join(text.split()) return text