HowDoYouConvert/migrate.py

import json
import re
from pathlib import Path

BASE_DIR = Path(__file__).resolve().parent
CALCLIST = BASE_DIR / 'calculators_list.md'
OUTPUT_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/calculators.ts'

def load_external_descriptions():
    # Placeholder for future enrichment sources.
    return {}

def parse_calculators_list():
    active_calcs = []
    with open(CALCLIST, 'r') as f:
        lines = f.readlines()

    in_table = False
    header_map = {}
    for line in lines:
        if in_table and line.startswith('## '):
            break
        if '| Calculator Name' in line:
            in_table = True
            headers = [p.strip() for p in line.strip().strip('|').split('|')]
            header_map = {header: idx for idx, header in enumerate(headers)}
            continue
        if in_table and line.startswith('| :---'):
            continue
        if in_table and line.startswith('|'):
            parts = [p.strip() for p in line.strip().strip('|').split('|')]
            name_idx = header_map.get('Calculator Name')
            slug_idx = header_map.get('Slug')
            factor_idx = header_map.get('Conversion Factor')
            if None not in (name_idx, slug_idx, factor_idx) and len(parts) > max(name_idx, slug_idx, factor_idx):
                name = parts[name_idx]
                slug = parts[slug_idx]
                factor_raw = parts[factor_idx]
                active_calcs.append((name, slug, factor_raw))

    return active_calcs

def split_name_and_teaser(name):
    parts = re.split(r'\s[–—-]\s', name, maxsplit=1)
    if len(parts) == 2:
        return parts[0].strip(), parts[1].strip()
    return name.strip(), ''


def split_conversion_name(name):
    parts = re.split(r'\s+to\s+', name, maxsplit=1, flags=re.IGNORECASE)
    if len(parts) == 2:
        return parts[0].strip(), parts[1].strip()
    return None

def guess_category(name):
    name_l = name.lower()
    if 'temp scale' in name_l or 'newton (temp' in name_l:
        return 'temperature'
    if any(x in name_l for x in ['force', 'torque', 'newton', 'dyne', 'foot-pound']): return 'force'
    if any(x in name_l for x in ['acre', 'hectare', 'square']): return 'area'
    if any(x in name_l for x in ['meter', 'inch', 'feet', 'yard', 'mile', 'cable', 'fathom', 'rod', 'chain', 'nautical', 'league']): return 'length'
    if any(x in name_l for x in ['gram', 'pound', 'ounce', 'carat', 'stone', 'slug', 'ton', 'pennyweight', 'grain', 'momme']): return 'weight'
    if any(x in name_l for x in ['celsius', 'fahrenheit', 'kelvin', 'rankine', 'delisle', 'reaumur', 'réaumur', 'romer', 'rømer']): return 'temperature'
    if any(x in name_l for x in ['liter', 'gallon', 'cup', 'pint', 'quart', 'fluid', 'milliliter', 'spoon', 'drop']): return 'volume'
    if ' per ' in name_l or 'knot' in name_l or 'mach' in name_l or 'rpm' in name_l: return 'speed' # RPM might be frequency, close enough
    if any(x in name_l for x in ['pascal', 'bar', 'psi', 'atmosphere', 'mmhg', 'torr', 'water', 'mercury']): return 'pressure'
    if any(x in name_l for x in ['joule', 'calorie', 'btu', 'erg', 'therm', 'electron-volt']): return 'energy'
    if any(x in name_l for x in ['watt', 'horsepower']): return 'power'
    if any(x in name_l for x in ['byte', 'bit', 'nibble', 'baud']): return 'data'
    if 'light' in name_l or any(x in name_l for x in ['lumen', 'lux', 'candela']): return 'light'
    if any(x in name_l for x in ['second', 'minute', 'hour', 'day', 'week', 'month', 'year']): return 'time'
    if any(x in name_l for x in ['degree', 'radian', 'mil ', 'arc', 'gradian', 'quadrant']): return 'angle'
    if any(x in name_l for x in ['binary', 'hex', 'octal', 'decimal', 'ascii', 'fraction']): return 'number-systems'
    if any(x in name_l for x in ['becquerel', 'curie', 'gray', 'rad', 'sievert', 'rem', 'roentgen', 'rutherford']): return 'radiation'
    if any(x in name_l for x in ['volt', 'amp', 'ohm', 'siemens', 'farad', 'henry', 'coulomb']): return 'electrical'
    return 'other'

def process():
    external_descriptions = load_external_descriptions()
    active_rows = parse_calculators_list()

    calculators_ts_entries = []

    for raw_name, slug, factor_raw in active_rows:
        if raw_name == 'Calculator Name' or not slug:
            continue

        display_name, teaser = split_name_and_teaser(raw_name)

        # Name splitting
        parsed = split_conversion_name(display_name)
        if parsed:
            in1, in2 = parsed
        else:
            in1, in2 = "From", "To"

        category = guess_category(display_name)
        desc_html = external_descriptions.get(slug, "")

        c_type = 'standard'
        factor_val = "1"
        offset_val = "0"

        if '1/x' in factor_raw:
            c_type = 'inverse'
            factor_val = "1"
        elif 'Multi-Variable' in factor_raw:
            c_type = '3col'
            if 'amps' in slug and 'watts' in slug:
                # Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col
                if slug == 'amps-to-watts': c_type = '3col-mul'
            if slug == 'lux-to-lumens': c_type = '3col-mul'
        elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw:
            if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int'
            elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl'
            elif 'volts' in slug: c_type = 'db-v'
            else: c_type = 'db-w'
        elif 'Base 60' in factor_raw:
            if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd'
            else: c_type = 'dd-dms'
        elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug:
            c_type = 'dec-frac'
        elif 'N/A' in factor_raw or 'Text' in factor_raw:
            if 'ascii' in slug:
                c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text'
            elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug:
                c_type = 'base'
        elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower():
            c_type = 'base'
        elif 'Linear Offset' in factor_raw:
            # "Linear Offset (1.8x + 32)"
            m = re.search(r'Linear Offset \(([\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw)
            if m:
                f_v = m.group(1)
                # handle frac
                if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1]))
                o_v = m.group(2).replace(' ', '')
                factor_val = f_v
                offset_val = o_v
            else:
                m2 = re.search(r'Linear Offset \(([\d\./]+)x\)', factor_raw)
                if m2: factor_val = m2.group(1)
            c_type = 'standard'
        else:
            try:
                # If it's a number
                float(factor_raw)
                factor_val = factor_raw
            except:
                pass

        # Avoid escaping single quotes by using JSON or dict
        entry = {
            'slug': slug,
            'name': display_name,
            'category': category,
            'type': c_type
        }
        if teaser:
            entry['teaser'] = teaser

        # Determine labels
        labels = {'in1': in1, 'in2': in2}
        if c_type in ['3col', '3col-mul']:
            # generic 3rd label
            if 'watts' in slug and 'amps' in slug: labels['in3'] = 'Volts'
            elif 'lumens' in slug: labels['in3'] = 'Area (sq m)'
            elif 'moles' in slug: labels['in3'] = 'Molar Mass'
            else: labels['in3'] = 'Result'

        entry['labels'] = labels

        if c_type == 'standard' and factor_val != "1":
            try: entry['factor'] = float(factor_val)
            except: pass
        if c_type == 'standard' and offset_val != "0":
            try: entry['offset'] = float(offset_val)
            except: pass

        if c_type == 'base':
            if 'binary' in slug:
                if slug.startswith('binary'): entry['fromBase'] = 2
                else: entry['toBase'] = 2
            if 'hex' in slug:
                if slug.startswith('hex'): entry['fromBase'] = 16
                else: entry['toBase'] = 16
            if 'octal' in slug:
                if slug.startswith('octal'): entry['fromBase'] = 8
                else: entry['toBase'] = 8
            if 'decimal' in slug:
                if slug.startswith('decimal'): entry['fromBase'] = 10
                else: entry['toBase'] = 10

        if category == 'data' and c_type == 'standard':
            # Fix data scale names and factors
            # Megabytes, Gigabytes, Terabytes etc should use decimal base-10 sizes (1000).
            # Mebibytes, Gibibytes, Tebibytes should use binary base-2 sizes (1024).

            # Create a simple mapping for names to exponents to calculate strict factors
            units_10 = {'byte': 0, 'kilobyte': 3, 'megabyte': 6, 'gigabyte': 9, 'terabyte': 12, 'petabyte': 15, 'exabyte': 18}
            units_2 = {'kibibyte': 10, 'mebibyte': 20, 'gibibyte': 30, 'tebibyte': 40, 'pebibyte': 50, 'exbibyte': 60}

            in1_key = in1.lower()
            if in1_key.endswith('s'): in1_key = in1_key[:-1]
            in2_key = in2.lower()
            if in2_key.endswith('s'): in2_key = in2_key[:-1]

            # Helper to get base and exp
            def get_val(k):
                if k in units_10: return 10, units_10[k]
                if k in units_2: return 2, units_2[k]
                if k == 'bit': return 10, -1 # placeholder relative to bytes, though bits are 1/8 byte. Handling simple bytes here only
                return None, None

            b1, e1 = get_val(in1_key)
            b2, e2 = get_val(in2_key)

            if b1 and b2 and b1 == b2 and b1 == 10:
                # Decimal to decimal
                factor = 10 ** (e1 - e2)
                factor_val = str(factor)
                try: entry['factor'] = float(factor_val)
                except: pass
            elif b1 and b2 and b1 == b2 and b1 == 2:
                # Binary to binary
                factor = 2 ** (e1 - e2)
                factor_val = str(factor)
                try: entry['factor'] = float(factor_val)
                except: pass
            elif b1 and b2:
                # Cross conversion
                val1 = (10 ** e1) if b1 == 10 else (2 ** e1)
                val2 = (10 ** e2) if b2 == 10 else (2 ** e2)
                factor = val1 / val2
                try: entry['factor'] = float(factor)
                except: pass

        # Remove empty descriptions
        if desc_html:
            entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '')

        calculators_ts_entries.append(entry)

    # Mark duplicates / reverse pairs
    # E.g. If "A to B" has a factor F, and "B to A" exists
    existing_slugs = [e['slug'] for e in calculators_ts_entries]
    for e in calculators_ts_entries:
        # Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one.
        # But a better heuristic: reverse of split(' to ')
        parsed = split_conversion_name(e['name'])
        if parsed:
            rev_name = f"{parsed[1]} to {parsed[0]}"
            rev_slug = rev_name.lower().replace(' ', '-')
            if rev_slug in existing_slugs and e['slug'] != rev_slug:
                # hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily
                if 'factor' in e and e['factor'] < 1.0:
                    e['hidden'] = True
                elif 'factor' not in e:
                    # just hide alphabetical later
                    if e['slug'] > rev_slug:
                        e['hidden'] = True

    # Ensure types are right
    # write to TS
    out = """// THIS FILE IS AUTO-GENERATED BY migrate.py
export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w';

export interface CalculatorDef {
  slug: string;
  name: string;
  category: string;
  type: CalcType;
  hidden?: boolean;
  factor?: number;
  offset?: number;
  fromBase?: number;
  toBase?: number;
  labels: { in1: string; in2: string; in3?: string };
  descriptionHTML?: string;
  teaser?: string;
}

export const categories: Record<string, { label: string; icon: string }> = {
  length:          { label: 'Length / Distance',   icon: '📏' },
  weight:          { label: 'Weight / Mass',       icon: '⚖️' },
  temperature:     { label: 'Temperature',         icon: '🌡️' },
  volume:          { label: 'Volume',              icon: '🧪' },
  area:            { label: 'Area',                icon: '📐' },
  speed:           { label: 'Speed / Velocity',    icon: '💨' },
  pressure:        { label: 'Pressure',            icon: '🔽' },
  energy:          { label: 'Energy',              icon: '⚡' },
  power:           { label: 'Power',               icon: '🔌' },
  data:            { label: 'Data Storage',        icon: '💾' },
  time:            { label: 'Time',                icon: '⏱️' },
  angle:           { label: 'Angle',               icon: '📐' },
  'number-systems':{ label: 'Number Systems',      icon: '🔢' },
  radiation:       { label: 'Radiation',           icon: '☢️' },
  electrical:      { label: 'Electrical',          icon: '🔋' },
  force:           { label: 'Force / Torque',      icon: '💪' },
  light:           { label: 'Light',               icon: '💡' },
  other:           { label: 'Other',               icon: '🔄' },
};

export const calculators: CalculatorDef[] = [
"""
    for e in calculators_ts_entries:
        desc = e.pop('descriptionHTML', '')
        e_str = json.dumps(e)
        if desc:
            # manually inject descriptionHTML into json representation without double encoding html
            out += f"  {{...{e_str}, descriptionHTML: `{desc}`}},\n"
        else:
            out += f"  {e_str},\n"

    out += """
];

const slugIndex = new Map(calculators.map(c => [c.slug, c]));

export function getCalculatorBySlug(slug: string): CalculatorDef | undefined {
  return slugIndex.get(slug);
}

export function getCalculatorsByCategory(category: string): CalculatorDef[] {
  return calculators.filter(c => c.category === category && !c.hidden);
}

export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] {
  return Object.entries(categories).map(([key, meta]) => ({
    key,
    ...meta,
    count: calculators.filter(c => c.category === key && !c.hidden).length,
  }));
}

export function searchCalculators(query: string): CalculatorDef[] {
  const q = query.toLowerCase();
  return calculators.filter(c =>
    (c.name.toLowerCase().includes(q) ||
    c.slug.includes(q) ||
    c.labels.in1.toLowerCase().includes(q) ||
    c.labels.in2.toLowerCase().includes(q)) && !c.hidden
  );
}
"""
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        f.write(out)

    print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts")

if __name__ == '__main__':
    process()