Files
HowDoYouConvert/migrate.py
2026-03-07 10:21:05 +00:00

352 lines
15 KiB
Python

import json
import re
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent
CALCLIST = BASE_DIR / 'calculators_list.md'
OUTPUT_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/calculators.ts'
def load_external_descriptions():
# Placeholder for future enrichment sources.
return {}
def parse_calculators_list():
active_calcs = []
with open(CALCLIST, 'r') as f:
lines = f.readlines()
in_table = False
header_map = {}
for line in lines:
if in_table and line.startswith('## '):
break
if '| Calculator Name' in line:
in_table = True
headers = [p.strip() for p in line.strip().strip('|').split('|')]
header_map = {header: idx for idx, header in enumerate(headers)}
continue
if in_table and line.startswith('| :---'):
continue
if in_table and line.startswith('|'):
parts = [p.strip() for p in line.strip().strip('|').split('|')]
name_idx = header_map.get('Calculator Name')
slug_idx = header_map.get('Slug')
factor_idx = header_map.get('Conversion Factor')
if None not in (name_idx, slug_idx, factor_idx) and len(parts) > max(name_idx, slug_idx, factor_idx):
name = parts[name_idx]
slug = parts[slug_idx]
factor_raw = parts[factor_idx]
active_calcs.append((name, slug, factor_raw))
return active_calcs
def split_name_and_teaser(name):
parts = re.split(r'\s[–—-]\s', name, maxsplit=1)
if len(parts) == 2:
return parts[0].strip(), parts[1].strip()
return name.strip(), ''
def split_conversion_name(name):
parts = re.split(r'\s+to\s+', name, maxsplit=1, flags=re.IGNORECASE)
if len(parts) == 2:
return parts[0].strip(), parts[1].strip()
return None
def guess_category(name):
name_l = name.lower()
if any(x in name_l for x in ['force', 'torque', 'newton', 'dyne', 'foot-pound']): return 'force'
if any(x in name_l for x in ['meter', 'inch', 'feet', 'yard', 'mile', 'cable', 'fathom', 'rod', 'chain', 'nautical', 'league']): return 'length'
if any(x in name_l for x in ['gram', 'pound', 'ounce', 'carat', 'stone', 'slug', 'ton', 'pennyweight', 'grain', 'momme']): return 'weight'
if any(x in name_l for x in ['celsius', 'fahrenheit', 'kelvin', 'rankine']): return 'temperature'
if any(x in name_l for x in ['liter', 'gallon', 'cup', 'pint', 'quart', 'fluid', 'milliliter', 'spoon', 'drop']): return 'volume'
if any(x in name_l for x in ['acre', 'hectare', 'square']): return 'area'
if ' per ' in name_l or 'knot' in name_l or 'mach' in name_l or 'rpm' in name_l: return 'speed' # RPM might be frequency, close enough
if any(x in name_l for x in ['pascal', 'bar', 'psi', 'atmosphere', 'mmhg', 'torr', 'water', 'mercury']): return 'pressure'
if any(x in name_l for x in ['joule', 'calorie', 'btu', 'erg', 'therm', 'electron-volt']): return 'energy'
if any(x in name_l for x in ['watt', 'horsepower']): return 'power'
if any(x in name_l for x in ['byte', 'bit', 'nibble', 'baud']): return 'data'
if any(x in name_l for x in ['second', 'minute', 'hour', 'day', 'week', 'month', 'year']): return 'time'
if any(x in name_l for x in ['degree', 'radian', 'mil ', 'arc', 'gradian', 'quadrant']): return 'angle'
if any(x in name_l for x in ['binary', 'hex', 'octal', 'decimal', 'ascii', 'fraction']): return 'number-systems'
if any(x in name_l for x in ['becquerel', 'curie', 'gray', 'rad', 'sievert', 'rem', 'roentgen', 'rutherford']): return 'radiation'
if any(x in name_l for x in ['volt', 'amp', 'ohm', 'siemens', 'farad', 'henry', 'coulomb']): return 'electrical'
if any(x in name_l for x in ['lumen', 'lux', 'candela']): return 'light'
return 'other'
def process():
external_descriptions = load_external_descriptions()
active_rows = parse_calculators_list()
calculators_ts_entries = []
for raw_name, slug, factor_raw in active_rows:
if raw_name == 'Calculator Name' or not slug:
continue
display_name, teaser = split_name_and_teaser(raw_name)
# Name splitting
parsed = split_conversion_name(display_name)
if parsed:
in1, in2 = parsed
else:
in1, in2 = "From", "To"
category = guess_category(display_name)
desc_html = external_descriptions.get(slug, "")
c_type = 'standard'
factor_val = "1"
offset_val = "0"
if '1/x' in factor_raw:
c_type = 'inverse'
factor_val = "1"
elif 'Multi-Variable' in factor_raw:
c_type = '3col'
if 'amps' in slug and 'watts' in slug:
# Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col
if slug == 'amps-to-watts': c_type = '3col-mul'
if slug == 'lux-to-lumens': c_type = '3col-mul'
elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw:
if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int'
elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl'
elif 'volts' in slug: c_type = 'db-v'
else: c_type = 'db-w'
elif 'Base 60' in factor_raw:
if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd'
else: c_type = 'dd-dms'
elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug:
c_type = 'dec-frac'
elif 'N/A' in factor_raw or 'Text' in factor_raw:
if 'ascii' in slug:
c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text'
elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug:
c_type = 'base'
elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower():
c_type = 'base'
elif 'Linear Offset' in factor_raw:
# "Linear Offset (1.8x + 32)"
m = re.search(r'Linear Offset \(([\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw)
if m:
f_v = m.group(1)
# handle frac
if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1]))
o_v = m.group(2).replace(' ', '')
factor_val = f_v
offset_val = o_v
else:
m2 = re.search(r'Linear Offset \(([\d\./]+)x\)', factor_raw)
if m2: factor_val = m2.group(1)
c_type = 'standard'
else:
try:
# If it's a number
float(factor_raw)
factor_val = factor_raw
except:
pass
# Avoid escaping single quotes by using JSON or dict
entry = {
'slug': slug,
'name': display_name,
'category': category,
'type': c_type
}
if teaser:
entry['teaser'] = teaser
# Determine labels
labels = {'in1': in1, 'in2': in2}
if c_type in ['3col', '3col-mul']:
# generic 3rd label
if 'watts' in slug and 'amps' in slug: labels['in3'] = 'Volts'
elif 'lumens' in slug: labels['in3'] = 'Area (sq m)'
elif 'moles' in slug: labels['in3'] = 'Molar Mass'
else: labels['in3'] = 'Result'
entry['labels'] = labels
if c_type == 'standard' and factor_val != "1":
try: entry['factor'] = float(factor_val)
except: pass
if c_type == 'standard' and offset_val != "0":
try: entry['offset'] = float(offset_val)
except: pass
if c_type == 'base':
if 'binary' in slug:
if slug.startswith('binary'): entry['fromBase'] = 2
else: entry['toBase'] = 2
if 'hex' in slug:
if slug.startswith('hex'): entry['fromBase'] = 16
else: entry['toBase'] = 16
if 'octal' in slug:
if slug.startswith('octal'): entry['fromBase'] = 8
else: entry['toBase'] = 8
if 'decimal' in slug:
if slug.startswith('decimal'): entry['fromBase'] = 10
else: entry['toBase'] = 10
if category == 'data' and c_type == 'standard':
# Fix data scale names and factors
# Megabytes, Gigabytes, Terabytes etc should use decimal base-10 sizes (1000).
# Mebibytes, Gibibytes, Tebibytes should use binary base-2 sizes (1024).
# Create a simple mapping for names to exponents to calculate strict factors
units_10 = {'byte': 0, 'kilobyte': 3, 'megabyte': 6, 'gigabyte': 9, 'terabyte': 12, 'petabyte': 15, 'exabyte': 18}
units_2 = {'kibibyte': 10, 'mebibyte': 20, 'gibibyte': 30, 'tebibyte': 40, 'pebibyte': 50, 'exbibyte': 60}
in1_key = in1.lower()
if in1_key.endswith('s'): in1_key = in1_key[:-1]
in2_key = in2.lower()
if in2_key.endswith('s'): in2_key = in2_key[:-1]
# Helper to get base and exp
def get_val(k):
if k in units_10: return 10, units_10[k]
if k in units_2: return 2, units_2[k]
if k == 'bit': return 10, -1 # placeholder relative to bytes, though bits are 1/8 byte. Handling simple bytes here only
return None, None
b1, e1 = get_val(in1_key)
b2, e2 = get_val(in2_key)
if b1 and b2 and b1 == b2 and b1 == 10:
# Decimal to decimal
factor = 10 ** (e1 - e2)
factor_val = str(factor)
try: entry['factor'] = float(factor_val)
except: pass
elif b1 and b2 and b1 == b2 and b1 == 2:
# Binary to binary
factor = 2 ** (e1 - e2)
factor_val = str(factor)
try: entry['factor'] = float(factor_val)
except: pass
elif b1 and b2:
# Cross conversion
val1 = (10 ** e1) if b1 == 10 else (2 ** e1)
val2 = (10 ** e2) if b2 == 10 else (2 ** e2)
factor = val1 / val2
try: entry['factor'] = float(factor)
except: pass
# Remove empty descriptions
if desc_html:
entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '')
calculators_ts_entries.append(entry)
# Mark duplicates / reverse pairs
# E.g. If "A to B" has a factor F, and "B to A" exists
existing_slugs = [e['slug'] for e in calculators_ts_entries]
for e in calculators_ts_entries:
# Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one.
# But a better heuristic: reverse of split(' to ')
parsed = split_conversion_name(e['name'])
if parsed:
rev_name = f"{parsed[1]} to {parsed[0]}"
rev_slug = rev_name.lower().replace(' ', '-')
if rev_slug in existing_slugs and e['slug'] != rev_slug:
# hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily
if 'factor' in e and e['factor'] < 1.0:
e['hidden'] = True
elif 'factor' not in e:
# just hide alphabetical later
if e['slug'] > rev_slug:
e['hidden'] = True
# Ensure types are right
# write to TS
out = """// THIS FILE IS AUTO-GENERATED BY migrate.py
export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w';
export interface CalculatorDef {
slug: string;
name: string;
category: string;
type: CalcType;
hidden?: boolean;
factor?: number;
offset?: number;
fromBase?: number;
toBase?: number;
labels: { in1: string; in2: string; in3?: string };
descriptionHTML?: string;
teaser?: string;
}
export const categories: Record<string, { label: string; icon: string }> = {
length: { label: 'Length / Distance', icon: '📏' },
weight: { label: 'Weight / Mass', icon: '⚖️' },
temperature: { label: 'Temperature', icon: '🌡️' },
volume: { label: 'Volume', icon: '🧪' },
area: { label: 'Area', icon: '📐' },
speed: { label: 'Speed / Velocity', icon: '💨' },
pressure: { label: 'Pressure', icon: '🔽' },
energy: { label: 'Energy', icon: '' },
power: { label: 'Power', icon: '🔌' },
data: { label: 'Data Storage', icon: '💾' },
time: { label: 'Time', icon: '⏱️' },
angle: { label: 'Angle', icon: '📐' },
'number-systems':{ label: 'Number Systems', icon: '🔢' },
radiation: { label: 'Radiation', icon: '☢️' },
electrical: { label: 'Electrical', icon: '🔋' },
force: { label: 'Force / Torque', icon: '💪' },
light: { label: 'Light', icon: '💡' },
other: { label: 'Other', icon: '🔄' },
};
export const calculators: CalculatorDef[] = [
"""
for e in calculators_ts_entries:
desc = e.pop('descriptionHTML', '')
e_str = json.dumps(e)
if desc:
# manually inject descriptionHTML into json representation without double encoding html
out += f" {{...{e_str}, descriptionHTML: `{desc}`}},\n"
else:
out += f" {e_str},\n"
out += """
];
const slugIndex = new Map(calculators.map(c => [c.slug, c]));
export function getCalculatorBySlug(slug: string): CalculatorDef | undefined {
return slugIndex.get(slug);
}
export function getCalculatorsByCategory(category: string): CalculatorDef[] {
return calculators.filter(c => c.category === category && !c.hidden);
}
export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] {
return Object.entries(categories).map(([key, meta]) => ({
key,
...meta,
count: calculators.filter(c => c.category === key && !c.hidden).length,
}));
}
export function searchCalculators(query: string): CalculatorDef[] {
const q = query.toLowerCase();
return calculators.filter(c =>
(c.name.toLowerCase().includes(q) ||
c.slug.includes(q) ||
c.labels.in1.toLowerCase().includes(q) ||
c.labels.in2.toLowerCase().includes(q)) && !c.hidden
);
}
"""
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
f.write(out)
print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts")
if __name__ == '__main__':
process()