SafeBite/update_catalog.py
Bruno Charest d569c344b0 feat: expand catalog seed data with 4000+ grocery items across grains, condiments, beverages, dairy, meat, produce, and specialty categories with bilingual support
- Increment catalog version from 3 to 4
- Add 330+ grain/cereal items including gluten-free variants, pasta types, rice varieties, flour, and breakfast cereals with French/English names
- Add 200+ condiment items covering sauces, spices, oils, vinegars, and seasonings
- Add 150+ beverage items including juices, sodas, coffee, tea, and alcoholic drinks
2026-04-29 14:25:33 -04:00

123 lines
3.9 KiB
Python

import json, re, unicodedata
def slugify(name):
s = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii').lower()
s = re.sub(r'[^a-z0-9\s]', '', s)
s = s.strip().replace(' ', '_').replace("'", '')
s = re.sub(r'_+', '_', s)
return s[:50]
with open('app/src/main/assets/catalog_seed.json', 'r', encoding='utf-8') as f:
data = json.load(f)
with open('items_data.json', 'r', encoding='utf-8') as f:
items_data = json.load(f)
data['version'] = 4
# Mapping des catégories vers (domainId, categoryId)
cat_map = {
'spices': ('grocery', 'spices'),
'frozen': ('grocery', 'frozen'),
'pantry': ('grocery', 'pantry'),
'snacks': ('grocery', 'snacks'),
'beverages': ('grocery', 'beverages'),
'cleaning': ('cleaning', 'cleaning_products'),
'hygiene': ('pharmacy', 'hygiene'),
'pets': ('pets', 'pet_accessories'),
'garden': ('hardware', 'garden'),
'misc': ('grocery', 'misc'),
}
# Emojis par catégorie
emoji_map = {
'spices': '🧂', 'frozen': '🧊', 'pantry': '🌾', 'snacks': '🍿',
'beverages': '🥤', 'cleaning': '🧴', 'hygiene': '🧴', 'pets': '🐾',
'garden': '🌱', 'misc': '📦'
}
# Tags par catégorie
tag_map = {
'spices': 'condiment', 'frozen': 'surgele', 'pantry': 'feculent',
'snacks': 'snack', 'beverages': 'boisson', 'cleaning': 'menage',
'hygiene': 'hygiene', 'pets': 'animal', 'garden': 'jardin', 'misc': 'divers'
}
# Collecter tous les IDs et noms existants
all_existing_ids = set()
all_existing_names = {}
for d in data['domains']:
for c in d.get('categories', []):
for i in c.get('items', []):
all_existing_ids.add(i['itemId'])
all_existing_names[i['name'].lower()] = c['categoryId']
# Créer catégorie misc dans grocery si nécessaire
for d in data['domains']:
if d['domainId'] == 'grocery':
cats = d['categories']
has_misc = any(c['categoryId'] == 'misc' for c in cats)
if not has_misc:
cats.append({
"categoryId": "misc",
"name": "Divers",
"emoji": "📦",
"color": "#9E9E9E",
"sortOrder": 10,
"items": []
})
break
# Ajouter les items
added_total = 0
for src_cat, items in items_data.items():
domain_id, cat_id = cat_map[src_cat]
emoji = emoji_map[src_cat]
tag = tag_map[src_cat]
target_cat = None
for d in data['domains']:
if d['domainId'] == domain_id:
for c in d.get('categories', []):
if c['categoryId'] == cat_id:
target_cat = c
break
break
if not target_cat:
print(f"SKIP: category {cat_id} not found in {domain_id}")
continue
existing_items = target_cat.get('items', [])
existing_names_cat = {i['name'].lower() for i in existing_items}
for name in items:
if name.lower() in all_existing_names:
continue
item_id = slugify(name)
base = item_id
suffix = 1
while item_id in all_existing_ids:
item_id = f"{base}_{suffix}"
suffix += 1
all_existing_ids.add(item_id)
all_existing_names[name.lower()] = cat_id
existing_items.append({
"itemId": item_id,
"name": name,
"emoji": emoji,
"tags": tag
})
added_total += 1
target_cat['items'] = existing_items
print(f"{src_cat}: +{len(existing_items) - len([i for i in existing_items if i['name'].lower() in {n.lower() for n in items} and i not in existing_items[:len(existing_items)-len(items)]])} items, total {len(existing_items)}")
print(f"Total added: {added_total}")
with open('app/src/main/assets/catalog_seed.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=0, separators=(',', ':'))
f.write('\n')
print("Updated catalog_seed.json")