492 lines
18 KiB
Python
492 lines
18 KiB
Python
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
CALCLIST = BASE_DIR / 'calculators_list.md'
|
|
OUTPUT_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/calculators.ts'
|
|
|
|
CATEGORY_KEYS = [
|
|
'length',
|
|
'weight',
|
|
'temperature',
|
|
'volume',
|
|
'fluids',
|
|
'area',
|
|
'speed',
|
|
'pressure',
|
|
'energy',
|
|
'magnetism',
|
|
'power',
|
|
'data',
|
|
'time',
|
|
'angle',
|
|
'number-systems',
|
|
'radiation',
|
|
'electrical',
|
|
'force',
|
|
'light',
|
|
'other',
|
|
]
|
|
|
|
CATEGORY_SET = set(CATEGORY_KEYS)
|
|
|
|
# Lightweight label normalization to catch duplicate/identity conversions
|
|
# that differ only by abbreviations (e.g., "cm" vs "centimeters").
|
|
TOKEN_MAP = {
|
|
'cm': 'centimeter',
|
|
'centimeter': 'centimeter',
|
|
'centimetre': 'centimeter',
|
|
'centimetres': 'centimeter',
|
|
'centimeters': 'centimeter',
|
|
'mm': 'millimeter',
|
|
'millimeter': 'millimeter',
|
|
'millimeters': 'millimeter',
|
|
'millimetre': 'millimeter',
|
|
'millimetres': 'millimeter',
|
|
'm': 'meter',
|
|
'meter': 'meter',
|
|
'meters': 'meter',
|
|
'metre': 'meter',
|
|
'metres': 'meter',
|
|
'km': 'kilometer',
|
|
'kilometer': 'kilometer',
|
|
'kilometers': 'kilometer',
|
|
'kilometre': 'kilometer',
|
|
'kilometres': 'kilometer',
|
|
'in': 'inch',
|
|
'inch': 'inch',
|
|
'inches': 'inch',
|
|
'ft': 'foot',
|
|
'foot': 'foot',
|
|
'feet': 'foot',
|
|
}
|
|
|
|
|
|
def normalize_label(label: str) -> str:
|
|
"""Canonicalize a unit label for duplicate detection.
|
|
|
|
- Lowercase
|
|
- Replace '/' with ' per ' to align fraction style with text style
|
|
- Strip punctuation into tokens
|
|
- Collapse common abbreviations/plurals via TOKEN_MAP and simple singularization
|
|
"""
|
|
cleaned = label.lower().replace('/', ' per ')
|
|
tokens = re.split(r'[^a-z0-9]+', cleaned)
|
|
|
|
normalized_tokens = []
|
|
for tok in tokens:
|
|
if not tok:
|
|
continue
|
|
base = tok
|
|
# Drop a trailing 's' for simple plurals, but avoid short abbreviations like 'cms'
|
|
if base.endswith('s') and len(base) > 3:
|
|
base = base[:-1]
|
|
base = TOKEN_MAP.get(base, base)
|
|
normalized_tokens.append(base)
|
|
|
|
return ' '.join(normalized_tokens)
|
|
|
|
def load_external_descriptions():
|
|
# Placeholder for future enrichment sources.
|
|
return {}
|
|
|
|
def parse_calculators_list():
|
|
active_calcs = []
|
|
with open(CALCLIST, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
in_table = False
|
|
header_map = {}
|
|
for line in lines:
|
|
if in_table and line.startswith('## '):
|
|
break
|
|
if '| Calculator Name' in line:
|
|
in_table = True
|
|
headers = [p.strip() for p in line.strip().strip('|').split('|')]
|
|
header_map = {header: idx for idx, header in enumerate(headers)}
|
|
continue
|
|
if in_table and line.startswith('| :---'):
|
|
continue
|
|
if in_table and line.startswith('|'):
|
|
parts = [p.strip() for p in line.strip().strip('|').split('|')]
|
|
name_idx = header_map.get('Calculator Name')
|
|
slug_idx = header_map.get('Slug')
|
|
category_idx = header_map.get('Category')
|
|
factor_idx = header_map.get('Conversion Factor')
|
|
if None not in (name_idx, slug_idx, category_idx, factor_idx) and len(parts) > max(name_idx, slug_idx, category_idx, factor_idx):
|
|
name = parts[name_idx]
|
|
slug = parts[slug_idx]
|
|
category = parts[category_idx]
|
|
factor_raw = parts[factor_idx]
|
|
active_calcs.append((name, slug, category, factor_raw))
|
|
|
|
return active_calcs
|
|
|
|
def split_name_and_teaser(name):
|
|
parts = re.split(r'\s[–—-]\s', name, maxsplit=1)
|
|
if len(parts) == 2:
|
|
return parts[0].strip(), parts[1].strip()
|
|
return name.strip(), ''
|
|
|
|
|
|
def split_conversion_name(name):
|
|
parts = re.split(r'\s+to\s+', name, maxsplit=1, flags=re.IGNORECASE)
|
|
if len(parts) == 2:
|
|
return parts[0].strip(), parts[1].strip()
|
|
return None
|
|
|
|
def normalize_category(raw: str) -> str:
|
|
normalized = raw.strip().lower().replace(' ', '-')
|
|
normalized = re.sub(r'[^a-z0-9-]', '', normalized)
|
|
return normalized
|
|
|
|
def process():
|
|
external_descriptions = load_external_descriptions()
|
|
active_rows = parse_calculators_list()
|
|
|
|
calculators_ts_entries = []
|
|
|
|
seen_slugs = set()
|
|
seen_norm_pairs = set()
|
|
for raw_name, slug, category_raw, factor_raw in active_rows:
|
|
if raw_name == 'Calculator Name' or not slug:
|
|
continue
|
|
|
|
display_name, teaser = split_name_and_teaser(raw_name)
|
|
|
|
# Name splitting
|
|
parsed = split_conversion_name(display_name)
|
|
if parsed:
|
|
in1, in2 = parsed
|
|
else:
|
|
in1, in2 = "From", "To"
|
|
|
|
custom_labels = None
|
|
|
|
norm_in1 = normalize_label(in1)
|
|
norm_in2 = normalize_label(in2)
|
|
|
|
# Skip identity conversions that only differ by spelling/abbreviation
|
|
if norm_in1 == norm_in2:
|
|
print(f"Skipping identity converter {slug}: {in1} -> {in2}")
|
|
continue
|
|
|
|
pair_key = (norm_in1, norm_in2)
|
|
if pair_key in seen_norm_pairs:
|
|
print(f"Skipping duplicate converter {slug}: {in1} -> {in2}")
|
|
continue
|
|
seen_norm_pairs.add(pair_key)
|
|
|
|
category = normalize_category(category_raw)
|
|
if not category:
|
|
raise ValueError(f'Category required for {display_name}')
|
|
if category not in CATEGORY_SET:
|
|
raise ValueError(f'Unknown category \"{category_raw}\" resolved to \"{category}\" for {slug}')
|
|
if slug in seen_slugs:
|
|
continue
|
|
seen_slugs.add(slug)
|
|
desc_html = external_descriptions.get(slug, "")
|
|
|
|
c_type = 'standard'
|
|
factor_val = "1"
|
|
offset_val = "0"
|
|
|
|
# Special-case calculator families that require custom math beyond simple factors.
|
|
if 'molarity-to-grams-per-liter' == slug:
|
|
c_type = 'molarity'
|
|
custom_labels = {'in1': 'Molarity (mol/L)', 'in2': 'Grams per liter', 'in3': 'Molar mass (g/mol)'}
|
|
elif 'rockwell-c-to-vickers' == slug:
|
|
c_type = 'rockwell-vickers'
|
|
elif 'ev-to-lux' in slug or 'lux-to-ev' in slug:
|
|
c_type = 'ev-lux'
|
|
elif 'focal-length-to-angle-of-view' in slug:
|
|
c_type = 'aov'
|
|
elif 'awg' in slug:
|
|
c_type = 'awg'
|
|
elif 'swg-to' in slug or '-to-swg' in slug:
|
|
c_type = 'swg'
|
|
elif 'brinell-to-rockwell-c' == slug or 'rockwell-c-to-brinell' == slug:
|
|
c_type = 'brinell-rockwell'
|
|
elif 'saybolt-universal-seconds-to-centistokes' == slug:
|
|
c_type = 'sus-cst'
|
|
elif '1/x' in factor_raw:
|
|
c_type = 'inverse'
|
|
factor_val = "1"
|
|
elif 'Multi-Variable' in factor_raw:
|
|
c_type = '3col'
|
|
if 'amps' in slug and 'watts' in slug:
|
|
# Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col
|
|
if slug == 'amps-to-watts': c_type = '3col-mul'
|
|
if slug == 'lux-to-lumens': c_type = '3col-mul'
|
|
elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw:
|
|
if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int'
|
|
elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl'
|
|
elif 'volts' in slug: c_type = 'db-v'
|
|
else: c_type = 'db-w'
|
|
elif 'Base 60' in factor_raw:
|
|
if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd'
|
|
else: c_type = 'dd-dms'
|
|
elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug:
|
|
c_type = 'dec-frac'
|
|
elif 'N/A' in factor_raw or 'Text' in factor_raw:
|
|
if 'ascii' in slug:
|
|
c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text'
|
|
elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug:
|
|
c_type = 'base'
|
|
elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower():
|
|
c_type = 'base'
|
|
elif 'Linear Offset' in factor_raw:
|
|
# "Linear Offset (1.8x + 32)"
|
|
m = re.search(r'Linear Offset \(([-\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw)
|
|
if m:
|
|
f_v = m.group(1)
|
|
# handle frac
|
|
if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1]))
|
|
o_v = m.group(2).replace(' ', '')
|
|
factor_val = f_v
|
|
offset_val = o_v
|
|
else:
|
|
m2 = re.search(r'Linear Offset \(([-\d\./]+)x\)', factor_raw)
|
|
if m2: factor_val = m2.group(1)
|
|
c_type = 'standard'
|
|
else:
|
|
try:
|
|
# If it's a number
|
|
float(factor_raw)
|
|
factor_val = factor_raw
|
|
except:
|
|
pass
|
|
|
|
# Give 3-col calculators honest display names instead of "A to B"
|
|
if c_type in ['3col', '3col-mul'] and split_conversion_name(display_name):
|
|
op = '*' if c_type == '3col-mul' else '/'
|
|
display_name = f"{in1} {op} {in2}"
|
|
|
|
# Avoid escaping single quotes by using JSON or dict
|
|
entry = {
|
|
'slug': slug,
|
|
'name': display_name,
|
|
'category': category,
|
|
'type': c_type
|
|
}
|
|
if teaser:
|
|
entry['teaser'] = teaser
|
|
|
|
# Determine labels
|
|
labels = {'in1': in1, 'in2': in2}
|
|
if c_type in ['3col', '3col-mul']:
|
|
# generic 3rd label; make it descriptive instead of the vague "Result"
|
|
if 'watts' in slug and 'amps' in slug:
|
|
labels['in3'] = 'Volts'
|
|
elif 'lumens' in slug:
|
|
labels['in3'] = 'Area (sq m)'
|
|
elif 'moles' in slug:
|
|
labels['in3'] = 'Molar Mass'
|
|
else:
|
|
op = '*' if c_type == '3col-mul' else '/'
|
|
labels['in3'] = f"{in1} {op} {in2}"
|
|
|
|
if custom_labels:
|
|
labels = custom_labels
|
|
|
|
entry['labels'] = labels
|
|
|
|
if c_type == 'standard' and factor_val != "1":
|
|
try: entry['factor'] = float(factor_val)
|
|
except: pass
|
|
if c_type == 'standard' and offset_val != "0":
|
|
try: entry['offset'] = float(offset_val)
|
|
except: pass
|
|
|
|
if c_type == 'base':
|
|
if 'binary' in slug:
|
|
if slug.startswith('binary'): entry['fromBase'] = 2
|
|
else: entry['toBase'] = 2
|
|
if 'hex' in slug:
|
|
if slug.startswith('hex'): entry['fromBase'] = 16
|
|
else: entry['toBase'] = 16
|
|
if 'octal' in slug:
|
|
if slug.startswith('octal'): entry['fromBase'] = 8
|
|
else: entry['toBase'] = 8
|
|
if 'decimal' in slug:
|
|
if slug.startswith('decimal'): entry['fromBase'] = 10
|
|
else: entry['toBase'] = 10
|
|
if 'base-' in slug:
|
|
parts = slug.split('-')
|
|
if len(parts) >= 5 and parts[0] == 'base' and parts[2] == 'to' and parts[3] == 'base':
|
|
try:
|
|
entry.setdefault('fromBase', int(parts[1]))
|
|
entry.setdefault('toBase', int(parts[4]))
|
|
except ValueError:
|
|
pass
|
|
if 'base' in factor_raw.lower():
|
|
match = re.search(r'base\\s*(\\d+)\\s*(?:→|to)\\s*(?:base\\s*)?(\\d+)', factor_raw, re.IGNORECASE)
|
|
if match:
|
|
entry.setdefault('fromBase', int(match.group(1)))
|
|
entry.setdefault('toBase', int(match.group(2)))
|
|
|
|
if category == 'data' and c_type == 'standard':
|
|
# Fix data scale names and factors
|
|
# Megabytes, Gigabytes, Terabytes etc should use decimal base-10 sizes (1000).
|
|
# Mebibytes, Gibibytes, Tebibytes should use binary base-2 sizes (1024).
|
|
|
|
# Create a simple mapping for names to exponents to calculate strict factors
|
|
units_10 = {'byte': 0, 'kilobyte': 3, 'megabyte': 6, 'gigabyte': 9, 'terabyte': 12, 'petabyte': 15, 'exabyte': 18}
|
|
units_2 = {'kibibyte': 10, 'mebibyte': 20, 'gibibyte': 30, 'tebibyte': 40, 'pebibyte': 50, 'exbibyte': 60}
|
|
|
|
in1_key = in1.lower()
|
|
if in1_key.endswith('s'): in1_key = in1_key[:-1]
|
|
in2_key = in2.lower()
|
|
if in2_key.endswith('s'): in2_key = in2_key[:-1]
|
|
|
|
# Helper to get base and exp
|
|
def get_val(k):
|
|
if k in units_10: return 10, units_10[k]
|
|
if k in units_2: return 2, units_2[k]
|
|
if k == 'bit': return 10, -1 # placeholder relative to bytes, though bits are 1/8 byte. Handling simple bytes here only
|
|
return None, None
|
|
|
|
b1, e1 = get_val(in1_key)
|
|
b2, e2 = get_val(in2_key)
|
|
|
|
if b1 and b2 and b1 == b2 and b1 == 10:
|
|
# Decimal to decimal
|
|
factor = 10 ** (e1 - e2)
|
|
factor_val = str(factor)
|
|
try: entry['factor'] = float(factor_val)
|
|
except: pass
|
|
elif b1 and b2 and b1 == b2 and b1 == 2:
|
|
# Binary to binary
|
|
factor = 2 ** (e1 - e2)
|
|
factor_val = str(factor)
|
|
try: entry['factor'] = float(factor_val)
|
|
except: pass
|
|
elif b1 and b2:
|
|
# Cross conversion
|
|
val1 = (10 ** e1) if b1 == 10 else (2 ** e1)
|
|
val2 = (10 ** e2) if b2 == 10 else (2 ** e2)
|
|
factor = val1 / val2
|
|
try: entry['factor'] = float(factor)
|
|
except: pass
|
|
|
|
# Remove empty descriptions
|
|
if desc_html:
|
|
entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '')
|
|
|
|
calculators_ts_entries.append(entry)
|
|
|
|
# Mark duplicates / reverse pairs
|
|
# E.g. If "A to B" has a factor F, and "B to A" exists
|
|
existing_slugs = [e['slug'] for e in calculators_ts_entries]
|
|
for e in calculators_ts_entries:
|
|
# Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one.
|
|
# But a better heuristic: reverse of split(' to ')
|
|
if e.get('category') == 'data':
|
|
continue
|
|
parsed = split_conversion_name(e['name'])
|
|
if parsed:
|
|
rev_name = f"{parsed[1]} to {parsed[0]}"
|
|
rev_slug = rev_name.lower().replace(' ', '-')
|
|
if rev_slug in existing_slugs and e['slug'] != rev_slug:
|
|
# hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily
|
|
if 'factor' in e and e['factor'] < 1.0:
|
|
e['hidden'] = True
|
|
elif 'factor' not in e:
|
|
# just hide alphabetical later
|
|
if e['slug'] > rev_slug:
|
|
e['hidden'] = True
|
|
|
|
# Ensure types are right
|
|
# write to TS
|
|
out = """// THIS FILE IS AUTO-GENERATED BY migrate.py
|
|
export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w' | 'awg' | 'brinell-rockwell' | 'ev-lux' | 'aov' | 'swg' | 'rockwell-vickers' | 'sus-cst' | 'molarity';
|
|
|
|
export interface CalculatorDef {
|
|
slug: string;
|
|
name: string;
|
|
category: string;
|
|
type: CalcType;
|
|
hidden?: boolean;
|
|
factor?: number;
|
|
offset?: number;
|
|
fromBase?: number;
|
|
toBase?: number;
|
|
labels: { in1: string; in2: string; in3?: string };
|
|
descriptionHTML?: string;
|
|
teaser?: string;
|
|
}
|
|
|
|
export const categories: Record<string, { label: string; icon: string }> = {
|
|
length: { label: 'Length / Distance', icon: '📏' },
|
|
weight: { label: 'Weight / Mass', icon: '⚖️' },
|
|
temperature: { label: 'Temperature', icon: '🌡️' },
|
|
volume: { label: 'Volume', icon: '🧪' },
|
|
fluids: { label: 'Fluids', icon: '💧' },
|
|
area: { label: 'Area', icon: '📐' },
|
|
speed: { label: 'Speed / Velocity', icon: '💨' },
|
|
pressure: { label: 'Pressure', icon: '🔽' },
|
|
energy: { label: 'Energy', icon: '⚡' },
|
|
magnetism: { label: 'Magnetism', icon: '🧲' },
|
|
power: { label: 'Power', icon: '🔌' },
|
|
data: { label: 'Data Storage', icon: '💾' },
|
|
time: { label: 'Time', icon: '⏱️' },
|
|
angle: { label: 'Angle', icon: '📐' },
|
|
'number-systems':{ label: 'Number Systems', icon: '🔢' },
|
|
radiation: { label: 'Radiation', icon: '☢️' },
|
|
electrical: { label: 'Electrical', icon: '🔋' },
|
|
force: { label: 'Force / Torque', icon: '💪' },
|
|
light: { label: 'Light', icon: '💡' },
|
|
other: { label: 'Other', icon: '🔄' },
|
|
};
|
|
|
|
export const calculators: CalculatorDef[] = [
|
|
"""
|
|
for e in calculators_ts_entries:
|
|
desc = e.pop('descriptionHTML', '')
|
|
e_str = json.dumps(e)
|
|
if desc:
|
|
# manually inject descriptionHTML into json representation without double encoding html
|
|
out += f" {{...{e_str}, descriptionHTML: `{desc}`}},\n"
|
|
else:
|
|
out += f" {e_str},\n"
|
|
|
|
out += """
|
|
];
|
|
|
|
const slugIndex = new Map(calculators.map(c => [c.slug, c]));
|
|
|
|
export function getCalculatorBySlug(slug: string): CalculatorDef | undefined {
|
|
return slugIndex.get(slug);
|
|
}
|
|
|
|
export function getCalculatorsByCategory(category: string): CalculatorDef[] {
|
|
return calculators.filter(c => c.category === category && !c.hidden);
|
|
}
|
|
|
|
export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] {
|
|
return Object.entries(categories).map(([key, meta]) => ({
|
|
key,
|
|
...meta,
|
|
count: calculators.filter(c => c.category === key && !c.hidden).length,
|
|
}));
|
|
}
|
|
|
|
export function searchCalculators(query: string): CalculatorDef[] {
|
|
const q = query.toLowerCase();
|
|
return calculators.filter(c =>
|
|
(c.name.toLowerCase().includes(q) ||
|
|
c.slug.includes(q) ||
|
|
c.labels.in1.toLowerCase().includes(q) ||
|
|
c.labels.in2.toLowerCase().includes(q)) && !c.hidden
|
|
);
|
|
}
|
|
"""
|
|
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
|
f.write(out)
|
|
|
|
print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts")
|
|
|
|
if __name__ == '__main__':
|
|
process()
|