Files
HowDoYouConvert/migrate.py

325 lines
14 KiB
Python

import json
import urllib.request
import re
import math
import os
CALCLIST = '/home/ben/Seafile/Storage/Docs/howdoyouconvert/calculators_list.md'
API_URL = 'https://howdoyouconvert.com/wp-json/wp/v2/calculator?per_page=100'
def fetch_all_wp_calculators():
calculators = {}
page = 1
while True:
url = f"{API_URL}&page={page}"
print(f"Fetching {url}...")
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req) as response:
data = json.loads(response.read().decode())
if not data:
break
for post in data:
slug = post['slug']
content = post['content']['rendered']
# Extract SEO paragraphs, ignore the "<!-- more -->" and any scripts
# We'll just grab everything inside <p> except ones with strong Kadence layout text if possible
# Or simple approach: just grab all text
pars = re.findall(r'<p[^>]*>(.*?)</p>', content, re.DOTALL)
# Clean up paragraphs
clean_pars = []
for p in pars:
p_clean = re.sub(r'<[^>]+>', '', p).replace('\n', ' ').strip()
p_clean = re.sub(r'\s+', ' ', p_clean)
if p_clean and "The transformation of this data is governed by universal standard definitions" not in p_clean:
# Actually, maybe the user wants the exact SEO text including that text?
# Let's keep it if they wrote it.
pass
if p_clean:
clean_pars.append(f"<p>{p_clean}</p>")
calculators[slug] = "\n".join(clean_pars)
page += 1
except Exception as e:
if hasattr(e, 'code') and e.code == 400:
# End of pages
break
print(f"Error fetching page {page}: {e}")
break
# As a fallback if WP API descriptions are purely generic, we just keep what WP generated.
return calculators
def parse_calculators_list():
active_calcs = []
with open(CALCLIST, 'r') as f:
lines = f.readlines()
in_table = False
for line in lines:
if line.startswith('## Backlog'):
break
if '| Calculator Name' in line:
in_table = True
continue
if in_table and line.startswith('| :---'):
continue
if in_table and line.startswith('|'):
parts = [p.strip() for p in line.split('|')]
if len(parts) >= 6:
name = parts[1]
slug = parts[4]
factor_raw = parts[5]
active_calcs.append((name, slug, factor_raw))
return active_calcs
def guess_category(name):
name_l = name.lower()
if any(x in name_l for x in ['meter', 'inch', 'feet', 'yard', 'mile', 'cable', 'fathom', 'rod', 'chain', 'nautical', 'league']): return 'length'
if any(x in name_l for x in ['gram', 'pound', 'ounce', 'carat', 'stone', 'slug', 'ton', 'pennyweight', 'grain', 'momme']): return 'weight'
if any(x in name_l for x in ['celsius', 'fahrenheit', 'kelvin', 'rankine']): return 'temperature'
if any(x in name_l for x in ['liter', 'gallon', 'cup', 'pint', 'quart', 'fluid', 'milliliter', 'spoon', 'drop']): return 'volume'
if any(x in name_l for x in ['acre', 'hectare', 'square']): return 'area'
if ' per ' in name_l or 'knot' in name_l or 'mach' in name_l or 'rpm' in name_l: return 'speed' # RPM might be frequency, close enough
if any(x in name_l for x in ['pascal', 'bar', 'psi', 'atmosphere', 'mmhg', 'torr', 'water', 'mercury']): return 'pressure'
if any(x in name_l for x in ['joule', 'calorie', 'btu', 'erg', 'therm', 'electron-volt']): return 'energy'
if any(x in name_l for x in ['watt', 'horsepower']): return 'power'
if any(x in name_l for x in ['byte', 'bit', 'nibble', 'baud']): return 'data'
if any(x in name_l for x in ['second', 'minute', 'hour', 'day', 'week', 'month', 'year']): return 'time'
if any(x in name_l for x in ['degree', 'radian', 'mil ', 'arc', 'gradian', 'quadrant']): return 'angle'
if any(x in name_l for x in ['binary', 'hex', 'octal', 'decimal', 'ascii', 'fraction']): return 'number-systems'
if any(x in name_l for x in ['becquerel', 'curie', 'gray', 'rad', 'sievert', 'rem', 'roentgen', 'rutherford']): return 'radiation'
if any(x in name_l for x in ['volt', 'amp', 'ohm', 'siemens', 'farad', 'henry', 'coulomb']): return 'electrical'
if any(x in name_l for x in ['newton', 'dyne', 'foot-pound']): return 'force'
if any(x in name_l for x in ['lumen', 'lux', 'candela']): return 'light'
return 'other'
def process():
wp_data = fetch_all_wp_calculators()
active_rows = parse_calculators_list()
calculators_ts_entries = []
for name, slug, factor_raw in active_rows:
if name == 'Calculator Name' or not slug: continue
# Name splitting
parts = name.split(' to ')
if len(parts) == 2:
in1, in2 = parts[0].strip(), parts[1].strip()
else:
in1, in2 = "From", "To"
category = guess_category(name)
desc_html = wp_data.get(slug, "")
c_type = 'standard'
factor_val = "1"
offset_val = "0"
if '1/x' in factor_raw:
c_type = 'inverse'
factor_val = "1"
elif 'Multi-Variable' in factor_raw:
c_type = '3col'
if 'amps' in slug and 'watts' in slug:
# Based on hdyc-calculators.js, apps-to-watts is 3col-mul, watts-to-amps is 3col
if slug == 'amps-to-watts': c_type = '3col-mul'
if slug == 'lux-to-lumens': c_type = '3col-mul'
elif 'Logarithmic' in factor_raw or 'Exponential' in factor_raw:
if 'db-int' in slug or 'intensity' in slug: c_type = 'db-int'
elif 'spl' in slug or 'sound' in slug: c_type = 'db-spl'
elif 'volts' in slug: c_type = 'db-v'
else: c_type = 'db-w'
elif 'Base 60' in factor_raw:
if slug == 'degrees-minutes-and-seconds-to-decimal-degrees': c_type = 'dms-dd'
else: c_type = 'dd-dms'
elif 'GCD' in factor_raw or 'string split' in factor_raw or 'fraction' in slug:
c_type = 'dec-frac'
elif 'N/A' in factor_raw or 'Text' in factor_raw:
if 'ascii' in slug:
c_type = 'text-bin' if slug.startswith('ascii') else 'bin-text'
elif 'binary' in slug or 'hex' in slug or 'decimal' in slug or 'octal' in slug:
c_type = 'base'
elif '10_to_2' in factor_raw or '16_to_2' in factor_raw or '10_to_16' in factor_raw or 'base' in factor_raw.lower():
c_type = 'base'
elif 'Linear Offset' in factor_raw:
# "Linear Offset (1.8x + 32)"
m = re.search(r'Linear Offset \(([\d\./]+)x\s*([+-]\s*[\d\.]+)\)', factor_raw)
if m:
f_v = m.group(1)
# handle frac
if '/' in f_v: f_v = str(float(f_v.split('/')[0]) / float(f_v.split('/')[1]))
o_v = m.group(2).replace(' ', '')
factor_val = f_v
offset_val = o_v
else:
m2 = re.search(r'Linear Offset \(([\d\./]+)x\)', factor_raw)
if m2: factor_val = m2.group(1)
c_type = 'standard'
else:
try:
# If it's a number
float(factor_raw)
factor_val = factor_raw
except:
pass
# Avoid escaping single quotes by using JSON or dict
entry = {
'slug': slug,
'name': name,
'category': category,
'type': c_type
}
# Determine labels
labels = {'in1': in1, 'in2': in2}
if c_type in ['3col', '3col-mul']:
# generic 3rd label
if 'watts' in slug and 'amps' in slug: labels['in3'] = 'Volts'
elif 'lumens' in slug: labels['in3'] = 'Area (sq m)'
elif 'moles' in slug: labels['in3'] = 'Molar Mass'
else: labels['in3'] = 'Result'
entry['labels'] = labels
if c_type == 'standard' and factor_val != "1":
try: entry['factor'] = float(factor_val)
except: pass
if c_type == 'standard' and offset_val != "0":
try: entry['offset'] = float(offset_val)
except: pass
if c_type == 'base':
if 'binary' in slug:
if slug.startswith('binary'): entry['fromBase'] = 2
else: entry['toBase'] = 2
if 'hex' in slug:
if slug.startswith('hex'): entry['fromBase'] = 16
else: entry['toBase'] = 16
if 'octal' in slug:
if slug.startswith('octal'): entry['fromBase'] = 8
else: entry['toBase'] = 8
if 'decimal' in slug:
if slug.startswith('decimal'): entry['fromBase'] = 10
else: entry['toBase'] = 10
# Remove empty descriptions
if desc_html:
entry['descriptionHTML'] = desc_html.replace('"', '\\"').replace('\n', '')
calculators_ts_entries.append(entry)
# Mark duplicates / reverse pairs
# E.g. If "A to B" has a factor F, and "B to A" exists
existing_slugs = [e['slug'] for e in calculators_ts_entries]
for e in calculators_ts_entries:
# Check if inverse exists. We hide the one with the smaller factor (usually < 1) or hide alphabetical later one.
# But a better heuristic: reverse of split(' to ')
parts = e['name'].split(' to ')
if len(parts) == 2:
rev_name = f"{parts[1]} to {parts[0]}"
rev_slug = rev_name.lower().replace(' ', '-')
if rev_slug in existing_slugs and e['slug'] != rev_slug:
# hide one of them. We'll hide the one where factor < 1, or if both 1, arbitrarily
if 'factor' in e and e['factor'] < 1.0:
e['hidden'] = True
elif 'factor' not in e:
# just hide alphabetical later
if e['slug'] > rev_slug:
e['hidden'] = True
# Ensure types are right
# write to TS
out = """// THIS FILE IS AUTO-GENERATED BY migrate.py
export type CalcType = 'standard' | 'inverse' | '3col' | '3col-mul' | 'base' | 'text-bin' | 'bin-text' | 'dms-dd' | 'dd-dms' | 'dec-frac' | 'db-int' | 'db-spl' | 'db-v' | 'db-w';
export interface CalculatorDef {
slug: string;
name: string;
category: string;
type: CalcType;
hidden?: boolean;
factor?: number;
offset?: number;
fromBase?: number;
toBase?: number;
labels: { in1: string; in2: string; in3?: string };
descriptionHTML?: string;
}
export const categories: Record<string, { label: string; icon: string }> = {
length: { label: 'Length / Distance', icon: '📏' },
weight: { label: 'Weight / Mass', icon: '⚖️' },
temperature: { label: 'Temperature', icon: '🌡️' },
volume: { label: 'Volume', icon: '🧪' },
area: { label: 'Area', icon: '📐' },
speed: { label: 'Speed / Velocity', icon: '💨' },
pressure: { label: 'Pressure', icon: '🔽' },
energy: { label: 'Energy', icon: '' },
power: { label: 'Power', icon: '🔌' },
data: { label: 'Data Storage', icon: '💾' },
time: { label: 'Time', icon: '⏱️' },
angle: { label: 'Angle', icon: '📐' },
'number-systems':{ label: 'Number Systems', icon: '🔢' },
radiation: { label: 'Radiation', icon: '☢️' },
electrical: { label: 'Electrical', icon: '🔋' },
force: { label: 'Force / Torque', icon: '💪' },
light: { label: 'Light', icon: '💡' },
other: { label: 'Other', icon: '🔄' },
};
export const calculators: CalculatorDef[] = [
"""
for e in calculators_ts_entries:
desc = e.pop('descriptionHTML', '')
e_str = json.dumps(e)
if desc:
# manually inject descriptionHTML into json representation without double encoding html
out += f" {{...{e_str}, descriptionHTML: `{desc}`}},\n"
else:
out += f" {e_str},\n"
out += """
];
const slugIndex = new Map(calculators.map(c => [c.slug, c]));
export function getCalculatorBySlug(slug: string): CalculatorDef | undefined {
return slugIndex.get(slug);
}
export function getCalculatorsByCategory(category: string): CalculatorDef[] {
return calculators.filter(c => c.category === category);
}
export function getCategoriesWithCounts(): { key: string; label: string; icon: string; count: number }[] {
return Object.entries(categories).map(([key, meta]) => ({
key,
...meta,
count: calculators.filter(c => c.category === key && !c.hidden).length,
}));
}
export function searchCalculators(query: string): CalculatorDef[] {
const q = query.toLowerCase();
return calculators.filter(c =>
(c.name.toLowerCase().includes(q) ||
c.slug.includes(q) ||
c.labels.in1.toLowerCase().includes(q) ||
c.labels.in2.toLowerCase().includes(q)) && !c.hidden
);
}
"""
with open('/home/ben/Seafile/Storage/Docs/howdoyouconvert/hdyc-svelte/src/lib/data/calculators.ts', 'w') as f:
f.write(out)
print(f"Generated {len(calculators_ts_entries)} calculators into calculators.ts")
if __name__ == '__main__':
process()