Files
HowDoYouConvert/migrate.py
2026-03-07 21:46:23 +00:00

434 lines
18 KiB
Python

import json
import re
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent
CALCLIST = BASE_DIR / 'calculators_list.md'
OUTPUT_FILE = BASE_DIR / 'hdyc-svelte/src/lib/data/calculators.ts'
FLUID_KEYWORDS = [
'flow',
'mass flux',
'volumetric',
'permeability',
'viscosity',
'kinematic',
'surface tension',
'molar',
'concentration',
'flux density',
'flow rate',
'gallon per',
'gallons per',
'liter per',
'liters per',
'cubic per',
'cubic meter per',
'cubic meters per',
'cubic foot per',
'cubic feet per',
'cubic inch per',
'cubic inches per',
'kg per',
'kilogram per',
'kilograms per',
'gram per',
'grams per',
'g per',
'lb per',
'lbs per',
'pound per',
'pounds per',
'mole per',
'moles per',
'mol per',
'mmol per',
'percent by mass',
'ppm',
'heat transfer coefficient',
'per square meter',
'per square metre',
'per square foot',
'per square inch',
'per square centimeter',
'per square centimetre',
'per cubic meter',
'per cubic metre',
'per cubic foot',
'per cubic inch'
]
CURRENCY_KEYWORDS = ['currency', 'exchange rate', 'forex']
def load_external_descriptions():
# Placeholder for future enrichment sources.
return {}
def parse_calculators_list():
active_calcs = []
with open(CALCLIST, 'r') as f:
lines = f.readlines()
in_table = False
header_map = {}
for line in lines:
if in_table and line.startswith('## '):
break
if '| Calculator Name' in line:
in_table = True
headers = [p.strip() for p in line.strip().strip('|').split('|')]
header_map = {header: idx for idx, header in enumerate(headers)}
continue
if in_table and line.startswith('| :---'):
continue
if in_table and line.startswith('|'):
parts = [p.strip() for p in line.strip().strip('|').split('|')]
name_idx = header_map.get('Calculator Name')
slug_idx = header_map.get('Slug')
factor_idx = header_map.get('Conversion Factor')
if None not in (name_idx, slug_idx, factor_idx) and len(parts) > max(name_idx, slug_idx, factor_idx):
name = parts[name_idx]
slug = parts[slug_idx]
factor_raw = parts[factor_idx]
active_calcs.append((name, slug, factor_raw))
return active_calcs
def split_name_and_teaser(name):
parts = re.split(r'\s[–—-]\s', name, maxsplit=1)
if len(parts) == 2:
return parts[0].strip(), parts[1].strip()
return name.strip(), ''
def split_conversion_name(name):
parts = re.split(r'\s+to\s+', name, maxsplit=1, flags=re.IGNORECASE)
if len(parts) == 2:
return parts[0].strip(), parts[1].strip()
return None
def guess_category(name):
name_l = name.lower()
if any(keyword in name_l for keyword in CURRENCY_KEYWORDS):
return 'currency'
if any(keyword in name_l for keyword in FLUID_KEYWORDS):
return 'fluids'
if any(x in name_l for x in ['acre-foot', 'acre-feet', 'acrefoot', 'acre feet']):
return 'volume'
if 'temp scale' in name_l or 'newton (temp' in name_l:
return 'temperature'
if any(x in name_l for x in ['force', 'torque', 'newton', 'dyne', 'foot-pound']): return 'force'
if any(x in name_l for x in ['acre', 'hectare', 'square']): return 'area'
if any(x in name_l for x in ['meter', 'inch', 'feet', 'yard', 'mile', 'cable', 'fathom', 'rod', 'chain', 'nautical', 'league']): return 'length'
if any(x in name_l for x in ['gram', 'pound', 'ounce', 'carat', 'stone', 'slug', 'ton', 'pennyweight', 'grain', 'momme']): return 'weight'
if any(x in name_l for x in ['celsius', 'fahrenheit', 'kelvin', 'rankine', 'delisle', 'reaumur', 'réaumur', 'romer', 'rømer']): return 'temperature'
if any(x in name_l for x in ['liter', 'gallon', 'cup', 'pint', 'quart', 'fluid', 'milliliter', 'spoon', 'drop']): return 'volume'
if any(x in name_l for x in ['pascal', 'bar', 'psi', 'atmosphere', 'mmhg', 'torr', 'water', 'mercury']): return 'pressure'
if any(x in name_l for x in ['magnet', 'magnetic', 'tesla', 'gauss', 'oersted', 'weber', 'maxwell', 'gilbert', 'ampere-turn', 'ampere turns', 'ampere per meter', 'magnetomotive']): return 'magnetism'
if any(x in name_l for x in ['joule', 'calorie', 'btu', 'erg', 'therm', 'electron-volt']): return 'energy'
if any(x in name_l for x in ['thermal', 'heat', 'conductivity', 'resistance', 'capacity', 'expansion', 'transfer']): return 'temperature'
if any(x in name_l for x in ['watt', 'horsepower']): return 'power'
if any(x in name_l for x in ['byte', 'bit', 'nibble', 'baud']): return 'data'
if 'light' in name_l or any(x in name_l for x in ['lumen', 'lux', 'candela']): return 'light'
if any(x in name_l for x in ['degree', 'degrees', 'radian', 'radians', 'arcminute', 'arcminutes', 'arcsecond', 'arcseconds', 'gradian', 'gradians', 'mil', 'mils', 'quadrant', 'quadrants', 'sextant', 'sextants', 'turn', 'turns', 'points (compass', 'points-compass']): return 'angle'
if any(x in name_l for x in ['second', 'minute', 'hour', 'day', 'week', 'month', 'year']): return 'time'
if any(x in name_l for x in ['binary', 'hex', 'octal', 'decimal', 'ascii', 'fraction']): return 'number-systems'
if any(x in name_l for x in ['becquerel', 'curie', 'gray', 'rad', 'sievert', 'rem', 'roentgen', 'rutherford']): return 'radiation'
if any(x in name_l for x in ['volt', 'amp', 'ohm', 'siemens', 'farad', 'henry', 'coulomb']): return 'electrical'
if ' per ' in name_l or 'knot' in name_l or 'mach' in name_l or 'rpm' in name_l: return 'speed' # RPM might be frequency, close enough
if any(x in name_l for x in ['binary', 'hex', 'octal', 'decimal', 'base']):
return 'number-systems'
return 'other'
def process():
external_descriptions = load_external_descriptions()
active_rows = parse_calculators_list()
calculators_ts_entries = []
for raw_name, slug, factor_raw in active_rows:
if raw_name == 'Calculator Name' or not slug:
continue
display_name, teaser = split_name_and_teaser(raw_name)
# Name splitting
parsed = split_conversion_name(display_name)
if parsed:
in1, in2 = parsed
else:
in1, in2 = "From", "To"
category = guess_category(display_name)
desc_html = external_descriptions.get(slug, "")
c_type = 'standard'
factor_val = "1"
offset_val = "0"
if '1/x' in factor_raw:
c_type = 'inverse'
factor_val = "1"
elif 'Multi-Variable' in factor_raw:
c_type = '3col'
if 'amps' in slug and 'watts' in slug:
# Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col
if slug == 'amps-to-watts': c_type = '3col-mul'
if slug == 'lux-to-lumens': c_type = '3col-mul'
elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw:
if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int'
elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl'
elif 'volts' in slug: c_type = 'db-v'
else: c_type = 'db-w'
elif 'Base 60' in factor_raw:
if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd'
else: c_type = 'dd-dms'
elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug:
c_type = 'dec-frac'
elif 'N/A' in factor_raw or 'Text' in factor_raw:
if 'ascii' in slug:
c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text'
elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug:
c_type = 'base'
elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower():
c_type = 'base'
elif 'Linear Offset' in factor_raw:
# "Linear Offset (1.8x + 32)"
m = re.search(r'Linear Offset \(([-\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw)
if m:
f_v = m.group(1)
# handle frac
if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1]))
o_v = m.group(2).replace(' ', '')
factor_val = f_v
offset_val = o_v
else:
m2 = re.search(r'Linear Offset \(([-\d\./]+)x\)', factor_raw)
if m2: factor_val = m2.group(1)
c_type = 'standard'
else:
try:
# If it's a number
float(factor_raw)
factor_val = factor_raw
except:
pass
# Avoid escaping single quotes by using JSON or dict
entry = {
'slug': slug,
'name': display_name,
'category': category,
'type': c_type
}
if teaser:
entry['teaser'] = teaser
# Determine labels
labels = {'in1': in1, 'in2': in2}
if c_type in ['3col', '3col-mul']:
# generic 3rd label
if 'watts' in slug and 'amps' in slug: labels['in3'] = 'Volts'
elif 'lumens' in slug: labels['in3'] = 'Area (sq m)'
elif 'moles' in slug: labels['in3'] = 'Molar Mass'
else: labels['in3'] = 'Result'
entry['labels'] = labels
if c_type == 'standard' and factor_val != "1":
try: entry['factor'] = float(factor_val)
except: pass
if c_type == 'standard' and offset_val != "0":
try: entry['offset'] = float(offset_val)
except: pass
if c_type == 'base':
if 'binary' in slug:
if slug.startswith('binary'): entry['fromBase'] = 2
else: entry['toBase'] = 2
if 'hex' in slug:
if slug.startswith('hex'): entry['fromBase'] = 16
else: entry['toBase'] = 16
if 'octal' in slug:
if slug.startswith('octal'): entry['fromBase'] = 8
else: entry['toBase'] = 8
if 'decimal' in slug:
if slug.startswith('decimal'): entry['fromBase'] = 10
else: entry['toBase'] = 10
if 'base-' in slug:
parts = slug.split('-')
if len(parts) >= 5 and parts[0] == 'base' and parts[2] == 'to' and parts[3] == 'base':
try:
entry.setdefault('fromBase', int(parts[1]))
entry.setdefault('toBase', int(parts[4]))
except ValueError:
pass
if 'base' in factor_raw.lower():
match = re.search(r'base\\s*(\\d+)\\s*(?:→|to)\\s*(?:base\\s*)?(\\d+)', factor_raw, re.IGNORECASE)
if match:
entry.setdefault('fromBase', int(match.group(1)))
entry.setdefault('toBase', int(match.group(2)))
if category == 'data' and c_type == 'standard':
# Fix data scale names and factors
# Megabytes, Gigabytes, Terabytes etc should use decimal base-10 sizes (1000).
# Mebibytes, Gibibytes, Tebibytes should use binary base-2 sizes (1024).
# Create a simple mapping for names to exponents to calculate strict factors
units_10 = {'byte': 0, 'kilobyte': 3, 'megabyte': 6, 'gigabyte': 9, 'terabyte': 12, 'petabyte': 15, 'exabyte': 18}
units_2 = {'kibibyte': 10, 'mebibyte': 20, 'gibibyte': 30, 'tebibyte': 40, 'pebibyte': 50, 'exbibyte': 60}
in1_key = in1.lower()
if in1_key.endswith('s'): in1_key = in1_key[:-1]
in2_key = in2.lower()
if in2_key.endswith('s'): in2_key = in2_key[:-1]
# Helper to get base and exp
def get_val(k):
if k in units_10: return 10, units_10[k]
if k in units_2: return 2, units_2[k]
if k == 'bit': return 10, -1 # placeholder relative to bytes, though bits are 1/8 byte. Handling simple bytes here only
return None, None
b1, e1 = get_val(in1_key)
b2, e2 = get_val(in2_key)
if b1 and b2 and b1 == b2 and b1 == 10:
# Decimal to decimal
factor = 10 ** (e1 - e2)
factor_val = str(factor)
try: entry['factor'] = float(factor_val)
except: pass
elif b1 and b2 and b1 == b2 and b1 == 2:
# Binary to binary
factor = 2 ** (e1 - e2)
factor_val = str(factor)
try: entry['factor'] = float(factor_val)
except: pass
elif b1 and b2:
# Cross conversion
val1 = (10 ** e1) if b1 == 10 else (2 ** e1)
val2 = (10 ** e2) if b2 == 10 else (2 ** e2)
factor = val1 / val2
try: entry['factor'] = float(factor)
except: pass
# Remove empty descriptions
if desc_html:
entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '')
calculators_ts_entries.append(entry)
# Mark duplicates / reverse pairs
# E.g. If "A to B" has a factor F, and "B to A" exists
existing_slugs = [e['slug'] for e in calculators_ts_entries]
for e in calculators_ts_entries:
# Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one.
# But a better heuristic: reverse of split(' to ')
parsed = split_conversion_name(e['name'])
if parsed:
rev_name = f"{parsed[1]} to {parsed[0]}"
rev_slug = rev_name.lower().replace(' ', '-')
if rev_slug in existing_slugs and e['slug'] != rev_slug:
# hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily
if 'factor' in e and e['factor'] < 1.0:
e['hidden'] = True
elif 'factor' not in e:
# just hide alphabetical later
if e['slug'] > rev_slug:
e['hidden'] = True
# Ensure types are right
# write to TS
out = """// THIS FILE IS AUTO-GENERATED BY migrate.py
export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w';
export interface CalculatorDef {
slug: string;
name: string;
category: string;
type: CalcType;
hidden?: boolean;
factor?: number;
offset?: number;
fromBase?: number;
toBase?: number;
labels: { in1: string; in2: string; in3?: string };
descriptionHTML?: string;
teaser?: string;
}
export const categories: Record<string, { label: string; icon: string }> = {
length: { label: 'Length / Distance', icon: '📏' },
weight: { label: 'Weight / Mass', icon: '⚖️' },
temperature: { label: 'Temperature', icon: '🌡️' },
volume: { label: 'Volume', icon: '🧪' },
fluids: { label: 'Fluids', icon: '💧' },
area: { label: 'Area', icon: '📐' },
speed: { label: 'Speed / Velocity', icon: '💨' },
pressure: { label: 'Pressure', icon: '🔽' },
energy: { label: 'Energy', icon: '' },
currency: { label: 'Currency', icon: '💱' },
magnetism: { label: 'Magnetism', icon: '🧲' },
power: { label: 'Power', icon: '🔌' },
data: { label: 'Data Storage', icon: '💾' },
time: { label: 'Time', icon: '⏱️' },
angle: { label: 'Angle', icon: '📐' },
'number-systems':{ label: 'Number Systems', icon: '🔢' },
radiation: { label: 'Radiation', icon: '☢️' },
electrical: { label: 'Electrical', icon: '🔋' },
force: { label: 'Force / Torque', icon: '💪' },
light: { label: 'Light', icon: '💡' },
other: { label: 'Other', icon: '🔄' },
};
export const calculators: CalculatorDef[] = [
"""
for e in calculators_ts_entries:
desc = e.pop('descriptionHTML', '')
e_str = json.dumps(e)
if desc:
# manually inject descriptionHTML into json representation without double encoding html
out += f" {{...{e_str}, descriptionHTML: `{desc}`}},\n"
else:
out += f" {e_str},\n"
out += """
];
const slugIndex = new Map(calculators.map(c => [c.slug, c]));
export function getCalculatorBySlug(slug: string): CalculatorDef | undefined {
return slugIndex.get(slug);
}
export function getCalculatorsByCategory(category: string): CalculatorDef[] {
return calculators.filter(c => c.category === category && !c.hidden);
}
export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] {
return Object.entries(categories).map(([key, meta]) => ({
key,
...meta,
count: calculators.filter(c => c.category === key && !c.hidden).length,
}));
}
export function searchCalculators(query: string): CalculatorDef[] {
const q = query.toLowerCase();
return calculators.filter(c =>
(c.name.toLowerCase().includes(q) ||
c.slug.includes(q) ||
c.labels.in1.toLowerCase().includes(q) ||
c.labels.in2.toLowerCase().includes(q)) && !c.hidden
);
}
"""
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
f.write(out)
print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts")
if __name__ == '__main__':
process()