import json import re import os from pathlib import Path BASE_DIR = Path(__file__).resolve().parent CALCLIST = BASE_DIR / 'calculators_list.md' OUTPUT_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/calculators.ts' STATS_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/stats.ts' CALCULATORS_JSON = BASE_DIR / 'hdyc-svelte/static/data/calculators.json' CATEGORY_KEYS = [ 'length', 'weight', 'temperature', 'volume', 'fluids', 'area', 'speed', 'pressure', 'energy', 'magnetism', 'power', 'data', 'time', 'angle', 'number-systems', 'radiation', 'electrical', 'force', 'light', 'other', ] CATEGORIES = { 'length': {'label': 'Length / Distance', 'icon': '๐Ÿ“'}, 'weight': {'label': 'Weight / Mass', 'icon': 'โš–๏ธ'}, 'temperature': {'label': 'Temperature', 'icon': '๐ŸŒก๏ธ'}, 'volume': {'label': 'Volume', 'icon': '๐Ÿงช'}, 'fluids': {'label': 'Fluids', 'icon': '๐Ÿ’ง'}, 'area': {'label': 'Area', 'icon': '๐Ÿ”ณ'}, 'speed': {'label': 'Speed / Velocity', 'icon': '๐Ÿ’จ'}, 'pressure': {'label': 'Pressure', 'icon': '๐Ÿ”ฝ'}, 'energy': {'label': 'Energy', 'icon': 'โšก'}, 'magnetism': {'label': 'Magnetism', 'icon': '๐Ÿงฒ'}, 'power': {'label': 'Power', 'icon': '๐Ÿ”Œ'}, 'data': {'label': 'Data Storage', 'icon': '๐Ÿ’พ'}, 'time': {'label': 'Time', 'icon': 'โฑ๏ธ'}, 'angle': {'label': 'Angle', 'icon': '๐Ÿ“'}, 'number-systems': {'label': 'Number Systems', 'icon': '๐Ÿ”ข'}, 'radiation': {'label': 'Radiation', 'icon': 'โ˜ข๏ธ'}, 'electrical': {'label': 'Electrical', 'icon': '๐Ÿ”‹'}, 'force': {'label': 'Force / Torque', 'icon': '๐Ÿ’ช'}, 'light': {'label': 'Light', 'icon': '๐Ÿ’ก'}, 'other': {'label': 'Other', 'icon': '๐Ÿ”„'}, } CATEGORY_SET = set(CATEGORY_KEYS) # Lightweight label normalization to catch duplicate/identity conversions # that differ only by abbreviations (e.g., "cm" vs "centimeters"). TOKEN_MAP = { 'cm': 'centimeter', 'centimeter': 'centimeter', 'centimetre': 'centimeter', 'centimetres': 'centimeter', 'centimeters': 'centimeter', 'mm': 'millimeter', 'millimeter': 'millimeter', 'millimeters': 'millimeter', 'millimetre': 'millimeter', 'millimetres': 'millimeter', 'm': 'meter', 'meter': 'meter', 'meters': 'meter', 'metre': 'meter', 'metres': 'meter', 'km': 'kilometer', 'kilometer': 'kilometer', 'kilometers': 'kilometer', 'kilometre': 'kilometer', 'kilometres': 'kilometer', 'in': 'inch', 'inch': 'inch', 'inches': 'inch', 'ft': 'foot', 'foot': 'foot', 'feet': 'foot', } def normalize_label(label: str) -> str: """Canonicalize a unit label for duplicate detection. - Lowercase - Replace '/' with ' per ' to align fraction style with text style - Strip punctuation into tokens - Collapse common abbreviations/plurals via TOKEN_MAP and simple singularization """ cleaned = label.lower().replace('/', ' per ') tokens = re.split(r'[^a-z0-9]+', cleaned) normalized_tokens = [] for tok in tokens: if not tok: continue base = tok # Drop a trailing 's' for simple plurals, but avoid short abbreviations like 'cms' if base.endswith('s') and len(base) > 3: base = base[:-1] base = TOKEN_MAP.get(base, base) normalized_tokens.append(base) return ' '.join(normalized_tokens) def load_external_descriptions(): # Placeholder for future enrichment sources. return {} def parse_calculators_list(): active_calcs = [] with open(CALCLIST, 'r') as f: lines = f.readlines() in_table = False header_map = {} for line in lines: if in_table and line.startswith('## '): break if '| Calculator Name' in line: in_table = True headers = [p.strip() for p in line.strip().strip('|').split('|')] header_map = {header: idx for idx, header in enumerate(headers)} continue if in_table and line.startswith('| :---'): continue if in_table and line.startswith('|'): parts = [p.strip() for p in line.strip().strip('|').split('|')] name_idx = header_map.get('Calculator Name') slug_idx = header_map.get('Slug') category_idx = header_map.get('Category') factor_idx = header_map.get('Conversion Factor') if None not in (name_idx, slug_idx, category_idx, factor_idx) and len(parts) > max(name_idx, slug_idx, category_idx, factor_idx): name = parts[name_idx] slug = parts[slug_idx] category = parts[category_idx] factor_raw = parts[factor_idx] active_calcs.append((name, slug, category, factor_raw)) return active_calcs def split_name_and_teaser(name): parts = re.split(r'\s[โ€“โ€”-]\s', name, maxsplit=1) if len(parts) == 2: return parts[0].strip(), parts[1].strip() return name.strip(), '' def split_conversion_name(name): parts = re.split(r'\s+to\s+', name, maxsplit=1, flags=re.IGNORECASE) if len(parts) == 2: return parts[0].strip(), parts[1].strip() return None def normalize_category(raw: str) -> str: normalized = raw.strip().lower().replace(' ', '-') normalized = re.sub(r'[^a-z0-9-]', '', normalized) return normalized def process(): external_descriptions = load_external_descriptions() active_rows = parse_calculators_list() calculators_ts_entries = [] seen_slugs = set() seen_norm_pairs = set() for raw_name, slug, category_raw, factor_raw in active_rows: if raw_name == 'Calculator Name' or not slug: continue display_name, teaser = split_name_and_teaser(raw_name) # Name splitting parsed = split_conversion_name(display_name) if parsed: in1, in2 = parsed else: in1, in2 = "From", "To" custom_labels = None norm_in1 = normalize_label(in1) norm_in2 = normalize_label(in2) # Skip identity conversions that only differ by spelling/abbreviation if norm_in1 == norm_in2: print(f"Skipping identity converter {slug}: {in1} -> {in2}") continue pair_key = (norm_in1, norm_in2) if pair_key in seen_norm_pairs: print(f"Skipping duplicate converter {slug}: {in1} -> {in2}") continue seen_norm_pairs.add(pair_key) category = normalize_category(category_raw) if not category: raise ValueError(f'Category required for {display_name}') if category not in CATEGORY_SET: raise ValueError(f'Unknown category \"{category_raw}\" resolved to \"{category}\" for {slug}') if slug in seen_slugs: continue seen_slugs.add(slug) desc_html = external_descriptions.get(slug, "") c_type = 'standard' factor_val = "1" offset_val = "0" # Special-case calculator families that require custom math beyond simple factors. if 'molarity-to-grams-per-liter' == slug: c_type = 'molarity' custom_labels = {'in1': 'Molarity (mol/L)', 'in2': 'Grams per liter', 'in3': 'Molar mass (g/mol)'} elif 'rockwell-c-to-vickers' == slug: c_type = 'rockwell-vickers' elif 'ev-to-lux' in slug or 'lux-to-ev' in slug: c_type = 'ev-lux' elif 'focal-length-to-angle-of-view' in slug: c_type = 'aov' elif 'awg' in slug: c_type = 'awg' elif 'swg-to' in slug or '-to-swg' in slug: c_type = 'swg' elif 'brinell-to-rockwell-c' == slug or 'rockwell-c-to-brinell' == slug: c_type = 'brinell-rockwell' elif 'saybolt-universal-seconds-to-centistokes' == slug: c_type = 'sus-cst' elif '1/x' in factor_raw: c_type = 'inverse' factor_val = "1" elif 'Multi-Variable' in factor_raw: c_type = '3col' if 'amps' in slug and 'watts' in slug: # Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col if slug == 'amps-to-watts': c_type = '3col-mul' if slug == 'lux-to-lumens': c_type = '3col-mul' elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw: if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int' elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl' elif 'volts' in slug: c_type = 'db-v' else: c_type = 'db-w' elif 'Base 60' in factor_raw: if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd' else: c_type = 'dd-dms' elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug: c_type = 'dec-frac' elif 'N/A' in factor_raw or 'Text' in factor_raw: if 'ascii' in slug: c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text' elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug: c_type = 'base' elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower(): c_type = 'base' elif 'Linear Offset' in factor_raw: # "Linear Offset (1.8x + 32)" m = re.search(r'Linear Offset \(([-\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw) if m: f_v = m.group(1) # handle frac if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1])) o_v = m.group(2).replace(' ', '') factor_val = f_v offset_val = o_v else: m2 = re.search(r'Linear Offset \(([-\d\./]+)x\)', factor_raw) if m2: factor_val = m2.group(1) c_type = 'standard' else: try: # If it's a number float(factor_raw) factor_val = factor_raw except: pass # Give 3-col calculators honest display names instead of "A to B" if c_type in ['3col', '3col-mul'] and split_conversion_name(display_name): op = '*' if c_type == '3col-mul' else '/' display_name = f"{in1} {op} {in2}" # Avoid escaping single quotes by using JSON or dict entry = { 'slug': slug, 'name': display_name, 'category': category, 'type': c_type } if teaser: entry['teaser'] = teaser # Determine labels labels = {'in1': in1, 'in2': in2} if c_type in ['3col', '3col-mul']: # generic 3rd label; make it descriptive instead of the vague "Result" if 'watts' in slug and 'amps' in slug: labels['in3'] = 'Volts' elif 'lumens' in slug: labels['in3'] = 'Area (sq m)' elif 'moles' in slug: labels['in3'] = 'Molar Mass' else: op = '*' if c_type == '3col-mul' else '/' labels['in3'] = f"{in1} {op} {in2}" if custom_labels: labels = custom_labels entry['labels'] = labels if c_type == 'standard' and factor_val != "1": try: entry['factor'] = float(factor_val) except: pass if c_type == 'standard' and offset_val != "0": try: entry['offset'] = float(offset_val) except: pass if c_type == 'base': if 'binary' in slug: if slug.startswith('binary'): entry['fromBase'] = 2 else: entry['toBase'] = 2 if 'hex' in slug: if slug.startswith('hex'): entry['fromBase'] = 16 else: entry['toBase'] = 16 if 'octal' in slug: if slug.startswith('octal'): entry['fromBase'] = 8 else: entry['toBase'] = 8 if 'decimal' in slug: if slug.startswith('decimal'): entry['fromBase'] = 10 else: entry['toBase'] = 10 if 'base-' in slug: parts = slug.split('-') if len(parts) >= 5 and parts[0] == 'base' and parts[2] == 'to' and parts[3] == 'base': try: entry.setdefault('fromBase', int(parts[1])) entry.setdefault('toBase', int(parts[4])) except ValueError: pass if 'base' in factor_raw.lower(): match = re.search(r'base\\s*(\\d+)\\s*(?:โ†’|to)\\s*(?:base\\s*)?(\\d+)', factor_raw, re.IGNORECASE) if match: entry.setdefault('fromBase', int(match.group(1))) entry.setdefault('toBase', int(match.group(2))) if category == 'data' and c_type == 'standard': # Fix data scale names and factors # Megabytes, Gigabytes, Terabytes etc should use decimal base-10 sizes (1000). # Mebibytes, Gibibytes, Tebibytes should use binary base-2 sizes (1024). # Create a simple mapping for names to exponents to calculate strict factors units_10 = {'byte': 0, 'kilobyte': 3, 'megabyte': 6, 'gigabyte': 9, 'terabyte': 12, 'petabyte': 15, 'exabyte': 18} units_2 = {'kibibyte': 10, 'mebibyte': 20, 'gibibyte': 30, 'tebibyte': 40, 'pebibyte': 50, 'exbibyte': 60} in1_key = in1.lower() if in1_key.endswith('s'): in1_key = in1_key[:-1] in2_key = in2.lower() if in2_key.endswith('s'): in2_key = in2_key[:-1] # Helper to get base and exp def get_val(k): if k in units_10: return 10, units_10[k] if k in units_2: return 2, units_2[k] if k == 'bit': return 2, -3 # bit is 1/8 of a byte (2^-3) return None, None b1, e1 = get_val(in1_key) b2, e2 = get_val(in2_key) if b1 and b2 and b1 == b2 and b1 == 10: # Decimal to decimal factor = 10 ** (e1 - e2) factor_val = str(factor) try: entry['factor'] = float(factor_val) except: pass elif b1 and b2 and b1 == b2 and b1 == 2: # Binary to binary factor = 2 ** (e1 - e2) factor_val = str(factor) try: entry['factor'] = float(factor_val) except: pass elif b1 and b2: # Cross conversion val1 = (10 ** e1) if b1 == 10 else (2 ** e1) val2 = (10 ** e2) if b2 == 10 else (2 ** e2) factor = val1 / val2 try: entry['factor'] = float(factor) except: pass # Remove empty descriptions if desc_html: entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '') calculators_ts_entries.append(entry) # Mark duplicates / reverse pairs # E.g. If "A to B" has a factor F, and "B to A" exists existing_slugs = [e['slug'] for e in calculators_ts_entries] for e in calculators_ts_entries: # Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one. # But a better heuristic: reverse of split(' to ') if e.get('category') == 'data': continue parsed = split_conversion_name(e['name']) if parsed: rev_name = f"{parsed[1]} to {parsed[0]}" rev_slug = rev_name.lower().replace(' ', '-') if rev_slug in existing_slugs and e['slug'] != rev_slug: # hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily if 'factor' in e and e['factor'] < 1.0: e['hidden'] = True elif 'factor' not in e: # just hide alphabetical later if e['slug'] > rev_slug: e['hidden'] = True # Ensure types are right # write to TS out = """// THIS FILE IS AUTO-GENERATED BY migrate.py export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w' | 'awg' | 'brinell-rockwell' | 'ev-lux' | 'aov' | 'swg' | 'rockwell-vickers' | 'sus-cst' | 'molarity'; export interface CalculatorDef { slug: string; name: string; category: string; type: CalcType; hidden?: boolean; factor?: number; offset?: number; fromBase?: number; toBase?: number; labels: { in1: string; in2: string; in3?: string }; descriptionHTML?: string; teaser?: string; } export const categories: Record = { """ for k, v in CATEGORIES.items(): out += f" '{k}': {json.dumps(v, ensure_ascii=False).replace('{', '{ ').replace('}', ' }')},\n" out += "};\n" out += """ export const calculators: CalculatorDef[] = [ """ for e in calculators_ts_entries: desc = e.pop('descriptionHTML', '') e_str = json.dumps(e) if desc: # manually inject descriptionHTML into json representation without double encoding html out += f" {{...{e_str}, descriptionHTML: `{desc}`}},\n" else: out += f" {e_str},\n" out += """ ]; """ out += """ export function getCalculatorBySlug(slug: string): CalculatorDef | undefined { return slugIndex.get(slug); } export function getCalculatorsByCategory(category: string): CalculatorDef[] { return calculators.filter(c => c.category === category && !c.hidden); } export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] { return Object.entries(categories).map(([key, meta]) => ({ key, ...meta, count: calculators.filter(c => c.category === key && !c.hidden).length, })); } export function searchCalculators(query: string): CalculatorDef[] { const q = query.toLowerCase(); return calculators.filter(c => (c.name.toLowerCase().includes(q) || c.slug.includes(q) || c.labels.in1.toLowerCase().includes(q) || c.labels.in2.toLowerCase().includes(q)) && !c.hidden ); } """ with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: f.write(out) print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts") # Generate stats.ts total_count = len(calculators_ts_entries) stats_content = f"""// THIS FILE IS AUTO-GENERATED BY migrate.py export const categories: Record = {json.dumps(CATEGORIES, indent=2, ensure_ascii=False)}; export const totalCalculators = {total_count}; """ with open(STATS_FILE, 'w', encoding='utf-8') as f: f.write(stats_content) print(f"Generated stats.ts with {total_count} total calculators") # Generate calculators.json for true lazy loading os.makedirs(os.path.dirname(CALCULATORS_JSON), exist_ok=True) with open(CALCULATORS_JSON, 'w', encoding='utf-8') as f: json.dump(calculators_ts_entries, f, ensure_ascii=False, indent=2) print(f"Generated calculators.json (Size: {os.path.getsize(CALCULATORS_JSON) // 1024}KB)") if __name__ == '__main__': process()