Normalize converter labels and fix card grid sizing
This commit is contained in:
@@ -640,10 +640,12 @@ a:focus-visible {
|
||||
margin: 0 auto;
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
grid-auto-rows: minmax(132px, auto);
|
||||
gap: clamp(0.75rem, 1.3vw, 1.25rem);
|
||||
}
|
||||
.category-grid .category-card {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
@media (min-width: 640px) {
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.6rem;
|
||||
min-height: 132px;
|
||||
padding: 1.5rem 1rem;
|
||||
background: var(--card-bg);
|
||||
border: 1px solid var(--border);
|
||||
|
||||
73
migrate.py
73
migrate.py
@@ -31,6 +31,62 @@ CATEGORY_KEYS = [
|
||||
|
||||
CATEGORY_SET = set(CATEGORY_KEYS)
|
||||
|
||||
# Lightweight label normalization to catch duplicate/identity conversions
|
||||
# that differ only by abbreviations (e.g., "cm" vs "centimeters").
|
||||
TOKEN_MAP = {
|
||||
'cm': 'centimeter',
|
||||
'centimeter': 'centimeter',
|
||||
'centimetre': 'centimeter',
|
||||
'centimetres': 'centimeter',
|
||||
'centimeters': 'centimeter',
|
||||
'mm': 'millimeter',
|
||||
'millimeter': 'millimeter',
|
||||
'millimeters': 'millimeter',
|
||||
'millimetre': 'millimeter',
|
||||
'millimetres': 'millimeter',
|
||||
'm': 'meter',
|
||||
'meter': 'meter',
|
||||
'meters': 'meter',
|
||||
'metre': 'meter',
|
||||
'metres': 'meter',
|
||||
'km': 'kilometer',
|
||||
'kilometer': 'kilometer',
|
||||
'kilometers': 'kilometer',
|
||||
'kilometre': 'kilometer',
|
||||
'kilometres': 'kilometer',
|
||||
'in': 'inch',
|
||||
'inch': 'inch',
|
||||
'inches': 'inch',
|
||||
'ft': 'foot',
|
||||
'foot': 'foot',
|
||||
'feet': 'foot',
|
||||
}
|
||||
|
||||
|
||||
def normalize_label(label: str) -> str:
|
||||
"""Canonicalize a unit label for duplicate detection.
|
||||
|
||||
- Lowercase
|
||||
- Replace '/' with ' per ' to align fraction style with text style
|
||||
- Strip punctuation into tokens
|
||||
- Collapse common abbreviations/plurals via TOKEN_MAP and simple singularization
|
||||
"""
|
||||
cleaned = label.lower().replace('/', ' per ')
|
||||
tokens = re.split(r'[^a-z0-9]+', cleaned)
|
||||
|
||||
normalized_tokens = []
|
||||
for tok in tokens:
|
||||
if not tok:
|
||||
continue
|
||||
base = tok
|
||||
# Drop a trailing 's' for simple plurals, but avoid short abbreviations like 'cms'
|
||||
if base.endswith('s') and len(base) > 3:
|
||||
base = base[:-1]
|
||||
base = TOKEN_MAP.get(base, base)
|
||||
normalized_tokens.append(base)
|
||||
|
||||
return ' '.join(normalized_tokens)
|
||||
|
||||
def load_external_descriptions():
|
||||
# Placeholder for future enrichment sources.
|
||||
return {}
|
||||
@@ -92,6 +148,7 @@ def process():
|
||||
calculators_ts_entries = []
|
||||
|
||||
seen_slugs = set()
|
||||
seen_norm_pairs = set()
|
||||
for raw_name, slug, category_raw, factor_raw in active_rows:
|
||||
if raw_name == 'Calculator Name' or not slug:
|
||||
continue
|
||||
@@ -106,7 +163,21 @@ def process():
|
||||
in1, in2 = "From", "To"
|
||||
|
||||
custom_labels = None
|
||||
|
||||
|
||||
norm_in1 = normalize_label(in1)
|
||||
norm_in2 = normalize_label(in2)
|
||||
|
||||
# Skip identity conversions that only differ by spelling/abbreviation
|
||||
if norm_in1 == norm_in2:
|
||||
print(f"Skipping identity converter {slug}: {in1} -> {in2}")
|
||||
continue
|
||||
|
||||
pair_key = (norm_in1, norm_in2)
|
||||
if pair_key in seen_norm_pairs:
|
||||
print(f"Skipping duplicate converter {slug}: {in1} -> {in2}")
|
||||
continue
|
||||
seen_norm_pairs.add(pair_key)
|
||||
|
||||
category = normalize_category(category_raw)
|
||||
if not category:
|
||||
raise ValueError(f'Category required for {display_name}')
|
||||
|
||||
Reference in New Issue
Block a user