Source code for csvsmith.utils.clean_numeric
import re
from typing import Any
NON_BREAKING_SPACE = "\xa0"
SEPARATOR_PATTERN = re.compile(r"[ _\xa0]")
NUMBER_PATTERN = re.compile(r"^-?(?:\d+|\d*\.\d+)$")
INVALID_NUMBER_MESSAGE = "Could not convert {value!r} to a valid number."
CURRENCY_PREFIX_PATTERN = re.compile(r"^[\$€£¥₹]")
[docs]
def strip_currency_prefix(value: Any) -> Any:
"""
Remove a single common currency symbol from the start of a value.
"""
text = str(value).strip()
if text and CURRENCY_PREFIX_PATTERN.match(text):
return text[1:].strip()
return value
def _normalize_numeric_text(value: Any, *, sep: str, decimal: str) -> str:
"""
Normalize a numeric text string for consistent formatting.
Converts a given value to a string representation and ensures normalization of numeric formatting,
such as removing group separators, converting localized decimal separators, and handling negative
values enclosed in parentheses.
"""
numeric_text = str(value).strip()
if numeric_text.startswith("(") and numeric_text.endswith(")"):
numeric_text = f"-{numeric_text[1:-1]}"
if sep:
numeric_text = numeric_text.replace(sep, "")
if decimal != ".":
numeric_text = numeric_text.replace(decimal, ".")
return numeric_text
def _has_valid_grouping(numeric_text: str, *, decimal: str) -> bool:
"""
Checks whether a numeric text string has valid grouping based on a specified decimal character.
"""
if not numeric_text:
return False
unsigned_text = numeric_text[1:] if numeric_text.startswith("-") else numeric_text
if unsigned_text.count(decimal) > 1:
return False
integer_text, _, fraction_text = unsigned_text.partition(decimal)
if not integer_text and not fraction_text:
return False
for part in (integer_text, fraction_text):
if not part:
continue
if part.startswith("_") or part.endswith("_"):
return False
if part.startswith(" ") or part.endswith(" "):
return False
if part.startswith(NON_BREAKING_SPACE) or part.endswith(NON_BREAKING_SPACE):
return False
if "__" in part or " " in part or NON_BREAKING_SPACE * 2 in part:
return False
stripped_text = SEPARATOR_PATTERN.sub("", numeric_text)
return bool(NUMBER_PATTERN.fullmatch(stripped_text))
def _strip_group_separators(numeric_text: str) -> str:
return SEPARATOR_PATTERN.sub("", numeric_text)
def _invalid_number_error(value: Any) -> ValueError:
return ValueError(INVALID_NUMBER_MESSAGE.format(value=value))
[docs]
def clean_numeric(
value: Any, *, sep: str = ",", decimal: str = ".", relaxed: bool = False
) -> float | Any:
"""
Cleans and converts a given input to a float by normalizing its numeric representation.
"""
if value is None:
return 0.0
normalized_text = _normalize_numeric_text(value, sep=sep, decimal=decimal)
if not _has_valid_grouping(normalized_text, decimal=decimal):
if relaxed:
return value
raise _invalid_number_error(value)
candidate_text = _strip_group_separators(normalized_text)
try:
return float(candidate_text)
except ValueError as exc:
if relaxed:
return value
raise _invalid_number_error(value) from exc
[docs]
def clean_currency_numeric(
value: Any, *, sep: str = ",", decimal: str = ".", relaxed: bool = False
) -> float | Any:
"""
Cleans and converts a currency-prefixed numeric string to a float.
"""
if value is None:
return 0.0
return clean_numeric(
strip_currency_prefix(value),
sep=sep,
decimal=decimal,
relaxed=relaxed,
)