mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
feat(consumer-prices): SearchAdapter global pipeline -- 10 markets, 18 retailers (#2063)
* fix(consumer-prices): smarter Exa queries + Firecrawl URL fallback for missed prices
- Add market-aware query building (maps marketCode to country name) so LuLu
queries prefer UAE pages over BHD/SAR/QAR GCC country pages
- Support searchQueryTemplate in acquisition config per retailer:
- noon_grocery_ae: adds "grocery fresh food" to avoid seeds and storage boxes
- lulu_ae: adds UAE country hint to anchor to the right market
- Firecrawl URL fallback: when all Exa summaries fail price extraction, scrape
the first result URL with Firecrawl (JS-rendered markdown surfaces prices
that static Exa summaries miss, fixes Noon chicken/rice/bread pricing gaps)
- Pass FIRECRAWL_API_KEY to ExaSearchAdapter in scrape.ts
* feat(consumer-prices): SearchAdapter -- Exa URL discovery + Firecrawl structured extraction
Replace ExaSearchAdapter regex-on-AI-summary with proper two-stage pipeline:
Stage 1 (Exa): neural search on retailer domain, ranked product page URLs.
Stage 2 (Firecrawl): structured LLM extraction from JS-rendered page.
Fixes Noon JS price invisibility and LuLu wrong-currency results.
Domain allowlist (exact hostname) + isTitlePlausible (40% token overlap) block
seed/container mismatches. All 4 AE retailers switched to adapter: search.
queryTemplate per retailer: LuLu anchors to UAE, Noon adds grocery food context.
* fix(consumer-prices): SearchAdapter smoke test fixes
- Fix FirecrawlProvider extract response path: data.data.extract not data.data
- Add prompt to ExtractSchema so Firecrawl LLM parses split prices (4 + .69 = 4.69)
- Update extract prompt to return listed price even when out of stock
- isTitlePlausible: strip packaging words (Pack/Box/Bag) from canonical tokens so
size variants (eggs x 6, eggs x 15) match canonical Eggs Fresh 12 Pack on eggs token
while keeping 40% threshold to block seeds/canned goods
- Add urlPathContains to SearchConfigSchema for URL path filtering
- Noon: urlPathContains /p/ blocks category pages returning wrong products
- LuLu: baseUrl -> gcc.luluhypermarket.com (site migrated), urlPathContains /en-ae/
- All retailers: numResults 3 -> 5 for more fallback URLs
- Add ExtractSchema.prompt field to acquisition types
* feat(consumer-prices): SearchAdapter genericity improvements
- MARKET_NAMES: expand to 20 markets (GB, US, CA, AU, DE, FR, NL, SG, IN, PK, NG, KE, ZA)
fallback to marketCode.toUpperCase() for any unlisted market
- Default query: add 'grocery' hint so new retailers work well without custom queryTemplate
- queryTemplate: add {category} token (maps to basket item category)
- isTitlePlausible: naive stemming (tomatoes->tomato, carrots->carrot) with two guards:
1. NON_FOOD_INDICATORS set (seeds/planting/seedling) rejects garden products before token check
2. stem only applied when result length >= 4 and differs from original (blocks egg->egg false positive)
- stem guard prevents 'eggs'->'egg' matching 'Egg Storage Box Container'
- stem guard allows 'tomatoes'->'tomato' matching 'Fresh Tomato India 1kg'
* feat(consumer-prices): global expansion -- 9 markets, 13 retailers, USD normalization
- Add 9 basket configs: US, UK, SG, IN, CH, SA, AU, KE, BR
- Add 13 retailer configs: Walmart/Kroger/WholeFoods (US), Tesco/Sainsbury's (UK),
FairPrice (SG), BigBasket (IN), Migros/Coop (CH), Tamimi (SA),
Woolworths (AU), Naivas (KE), Pao de Acucar (BR)
- Add src/fx/rates.ts: static FX table (16 currencies to USD)
- aggregate.ts: compute basket_total_usd metric for cross-country comparison
- search.ts: add Switzerland (ch) and Brazil (br) to MARKET_NAMES
* fix(consumer-prices): code review fixes -- match gate, MARKET_NAMES dedup, domain check
- scrape.ts: extend match-creation gate to include search adapter (was exa-search only)
Without this, all 13 new global retailers never wrote product_matches rows so the
aggregate job produced no index values -- the global expansion was a silent no-op
- market-names.ts: extract shared MARKET_NAMES; exa-search had an incomplete 7-market
copy silently producing blank market context for non-GCC queries
- exa-search.ts: add isAllowedHost check before firecrawlFetch (domain validation bypass)
- fx/rates.ts: add RATES_DATE export for ops staleness visibility
- essentials_in/ke: add 12th item (paneer / processed cheese) for coverage parity
- wholefoods_us: add urlPathContains /product/ to block non-product Exa results
This commit is contained in:
119
consumer-prices-core/configs/baskets/essentials_au.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_au.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-au
|
||||
name: Essentials Basket Australia
|
||||
marketCode: au
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Australian grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in Australia.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Free Range Eggs 12 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_2l
|
||||
category: dairy
|
||||
canonicalName: Full Cream Milk 2L
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 1800
|
||||
maxBaseQty: 2200
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sandwich Bread 700g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 600
|
||||
maxBaseQty: 800
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Long Grain White Rice 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_sunflower_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Sunflower Oil 1L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 1200
|
||||
maxBaseQty: 2000
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Brown Onions 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Still Mineral Water 1.5L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1300
|
||||
maxBaseQty: 1700
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: White Sugar 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_cheddar_500g
|
||||
category: dairy
|
||||
canonicalName: Tasty Cheddar Cheese 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_cheddar
|
||||
minBaseQty: 400
|
||||
maxBaseQty: 600
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Natural Yogurt 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
110
consumer-prices-core/configs/baskets/essentials_br.yaml
Normal file
110
consumer-prices-core/configs/baskets/essentials_br.yaml
Normal file
@@ -0,0 +1,110 @@
|
||||
basket:
|
||||
slug: essentials-br
|
||||
name: Essentials Basket Brazil
|
||||
marketCode: br
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Brazilian grocery retailers.
|
||||
Weighted to reflect a typical urban household of 4 in Brazil.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Ovos Frescos 12 Unidades
|
||||
weight: 0.10
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Leite Integral 1L
|
||||
weight: 0.12
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: Pão de Forma Branco 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Arroz Branco 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_soy_900ml
|
||||
category: cooking_oil
|
||||
canonicalName: Óleo de Soja 900ml
|
||||
weight: 0.09
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_soy
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1000
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Frango Inteiro Resfriado 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomate Fresco 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Cebola 1kg
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Água Mineral 1.5L
|
||||
weight: 0.06
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1300
|
||||
maxBaseQty: 1700
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: Açúcar Cristal 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Iogurte Natural 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_ch.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_ch.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-ch
|
||||
name: Essentials Basket Switzerland
|
||||
marketCode: ch
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Swiss grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in Switzerland.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_10
|
||||
category: eggs
|
||||
canonicalName: Fresh Eggs 10 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 8
|
||||
maxBaseQty: 12
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Vollmilch 1L
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: Weissbrot Sandwich 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Basmati Rice 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_basmati
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_sunflower_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Sonnenblumenöl 1L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1400
|
||||
|
||||
- id: tomatoes_500g
|
||||
category: tomatoes
|
||||
canonicalName: Tomaten 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 400
|
||||
maxBaseQty: 600
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Zwiebeln 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Mineralwasser 1.5L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1300
|
||||
maxBaseQty: 1700
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: Zucker 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_200g
|
||||
category: dairy
|
||||
canonicalName: Emmentaler Käse 200g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_hard
|
||||
minBaseQty: 150
|
||||
maxBaseQty: 250
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Naturjoghurt 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_gb.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_gb.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-gb
|
||||
name: Essentials Basket UK
|
||||
marketCode: gb
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across UK grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in the United Kingdom.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Free Range Eggs 12 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Semi Skimmed Milk 2 Pint
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_semi_skimmed
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sliced Bread 800g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 700
|
||||
maxBaseQty: 900
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Basmati Rice 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_basmati
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_sunflower_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Sunflower Oil 1L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1.5kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 1200
|
||||
maxBaseQty: 1800
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Brown Onions 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Still Water 6 x 1.5L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 7000
|
||||
maxBaseQty: 10000
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: Granulated White Sugar 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_cheddar_400g
|
||||
category: dairy
|
||||
canonicalName: Mature Cheddar Cheese 400g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_cheddar
|
||||
minBaseQty: 350
|
||||
maxBaseQty: 450
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Natural Yogurt 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_in.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_in.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-in
|
||||
name: Essentials Basket India
|
||||
marketCode: in
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Indian grocery retailers.
|
||||
Weighted to reflect a typical urban household of 4 in India.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Fresh Eggs 12 Pack
|
||||
weight: 0.10
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Full Cream Milk 1L
|
||||
weight: 0.12
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sandwich Bread 400g
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 350
|
||||
maxBaseQty: 450
|
||||
|
||||
- id: rice_basmati_1kg
|
||||
category: rice
|
||||
canonicalName: Basmati Rice 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_basmati
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_sunflower_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Sunflower Oil 1L
|
||||
weight: 0.09
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh 1kg
|
||||
weight: 0.09
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Onions 1kg
|
||||
weight: 0.09
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1l
|
||||
category: water
|
||||
canonicalName: Packaged Drinking Water 1L
|
||||
weight: 0.06
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: White Sugar 1kg
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: paneer_200g
|
||||
category: dairy
|
||||
canonicalName: Fresh Paneer 200g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_paneer
|
||||
minBaseQty: 150
|
||||
maxBaseQty: 250
|
||||
|
||||
- id: yogurt_400g
|
||||
category: dairy
|
||||
canonicalName: Plain Curd Yogurt 400g
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 350
|
||||
maxBaseQty: 450
|
||||
119
consumer-prices-core/configs/baskets/essentials_ke.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_ke.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-ke
|
||||
name: Essentials Basket Kenya
|
||||
marketCode: ke
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Kenyan grocery retailers.
|
||||
Weighted to reflect a typical urban household of 4 in Kenya.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Fresh Eggs 12 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Fresh Full Cream Milk 1L
|
||||
weight: 0.12
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sliced Bread 400g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 350
|
||||
maxBaseQty: 450
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Long Grain Rice 1kg
|
||||
weight: 0.10
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Cooking Oil 1L
|
||||
weight: 0.09
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh 1kg
|
||||
weight: 0.09
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Red Onions 1kg
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Drinking Water 1.5L
|
||||
weight: 0.06
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1300
|
||||
maxBaseQty: 1700
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: White Sugar 1kg
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_processed_200g
|
||||
category: dairy
|
||||
canonicalName: Processed Cheese 200g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_processed
|
||||
minBaseQty: 150
|
||||
maxBaseQty: 250
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Plain Yogurt 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_sa.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_sa.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-sa
|
||||
name: Essentials Basket Saudi Arabia
|
||||
marketCode: sa
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Saudi grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in Saudi Arabia.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Eggs Fresh 12 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Full Fat Fresh Milk 1L
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sliced Bread 600g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 500
|
||||
maxBaseQty: 700
|
||||
|
||||
- id: rice_basmati_1kg
|
||||
category: rice
|
||||
canonicalName: Basmati Rice 1kg
|
||||
weight: 0.10
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_basmati
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_sunflower_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Sunflower Oil 1L
|
||||
weight: 0.08
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh 1kg
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Onions 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Drinking Water 1.5L
|
||||
weight: 0.08
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1400
|
||||
maxBaseQty: 1600
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: White Sugar 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_processed_200g
|
||||
category: dairy
|
||||
canonicalName: Processed Cheese Slices 200g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_processed
|
||||
minBaseQty: 150
|
||||
maxBaseQty: 250
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Plain Yogurt 500g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_sg.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_sg.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-sg
|
||||
name: Essentials Basket Singapore
|
||||
marketCode: sg
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across Singapore grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in Singapore.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_10
|
||||
category: eggs
|
||||
canonicalName: Fresh Eggs 10 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 8
|
||||
maxBaseQty: 12
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Fresh Full Cream Milk 1L
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sandwich Bread 400g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 350
|
||||
maxBaseQty: 450
|
||||
|
||||
- id: rice_jasmine_5kg
|
||||
category: rice
|
||||
canonicalName: Jasmine Rice 5kg
|
||||
weight: 0.10
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_jasmine
|
||||
minBaseQty: 4500
|
||||
maxBaseQty: 5500
|
||||
|
||||
- id: cooking_oil_sunflower_2l
|
||||
category: cooking_oil
|
||||
canonicalName: Sunflower Oil 2L
|
||||
weight: 0.08
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_sunflower
|
||||
minBaseQty: 1800
|
||||
maxBaseQty: 2200
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh 1kg
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: tomatoes_500g
|
||||
category: tomatoes
|
||||
canonicalName: Cherry Tomatoes 500g
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 400
|
||||
maxBaseQty: 600
|
||||
|
||||
- id: onions_500g
|
||||
category: onions
|
||||
canonicalName: Yellow Onions 500g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 400
|
||||
maxBaseQty: 600
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Mineral Water 1.5L
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 1300
|
||||
maxBaseQty: 1700
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: White Sugar 1kg
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 900
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cheese_200g
|
||||
category: dairy
|
||||
canonicalName: Processed Cheese Slices 200g
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_processed
|
||||
minBaseQty: 150
|
||||
maxBaseQty: 250
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Plain Yogurt 500g
|
||||
weight: 0.07
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 450
|
||||
maxBaseQty: 550
|
||||
119
consumer-prices-core/configs/baskets/essentials_us.yaml
Normal file
119
consumer-prices-core/configs/baskets/essentials_us.yaml
Normal file
@@ -0,0 +1,119 @@
|
||||
basket:
|
||||
slug: essentials-us
|
||||
name: Essentials Basket USA
|
||||
marketCode: us
|
||||
methodology: fixed
|
||||
baseDate: "2025-01-01"
|
||||
description: >
|
||||
Core household essentials tracked weekly across US grocery retailers.
|
||||
Weighted to reflect a typical household of 4 in the United States.
|
||||
Does not represent official CPI. Tracks consumer price pressure only.
|
||||
|
||||
items:
|
||||
- id: eggs_12
|
||||
category: eggs
|
||||
canonicalName: Eggs Fresh 12 Pack
|
||||
weight: 0.12
|
||||
baseUnit: ct
|
||||
substitutionGroup: eggs
|
||||
minBaseQty: 10
|
||||
maxBaseQty: 15
|
||||
|
||||
- id: milk_1l
|
||||
category: dairy
|
||||
canonicalName: Whole Milk 1 Gallon
|
||||
weight: 0.10
|
||||
baseUnit: ml
|
||||
substitutionGroup: milk_full_fat
|
||||
minBaseQty: 3500
|
||||
maxBaseQty: 4000
|
||||
|
||||
- id: bread_white
|
||||
category: bread
|
||||
canonicalName: White Sandwich Bread Loaf
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: bread_white
|
||||
minBaseQty: 500
|
||||
maxBaseQty: 700
|
||||
|
||||
- id: rice_1kg
|
||||
category: rice
|
||||
canonicalName: Long Grain White Rice 2lb
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: rice_white
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1100
|
||||
|
||||
- id: cooking_oil_vegetable_1l
|
||||
category: cooking_oil
|
||||
canonicalName: Vegetable Oil 48oz
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: cooking_oil_vegetable
|
||||
minBaseQty: 1200
|
||||
maxBaseQty: 1600
|
||||
|
||||
- id: chicken_whole_1kg
|
||||
category: chicken
|
||||
canonicalName: Whole Chicken Fresh
|
||||
weight: 0.12
|
||||
baseUnit: g
|
||||
substitutionGroup: chicken_whole
|
||||
minBaseQty: 1200
|
||||
maxBaseQty: 2000
|
||||
|
||||
- id: tomatoes_1kg
|
||||
category: tomatoes
|
||||
canonicalName: Tomatoes Fresh
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: tomatoes
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1200
|
||||
|
||||
- id: onions_1kg
|
||||
category: onions
|
||||
canonicalName: Yellow Onions 3lb
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: onions
|
||||
minBaseQty: 1000
|
||||
maxBaseQty: 1600
|
||||
|
||||
- id: water_1_5l
|
||||
category: water
|
||||
canonicalName: Drinking Water 24 Pack 16oz
|
||||
weight: 0.07
|
||||
baseUnit: ml
|
||||
substitutionGroup: water_still
|
||||
minBaseQty: 6000
|
||||
maxBaseQty: 10000
|
||||
|
||||
- id: sugar_1kg
|
||||
category: sugar
|
||||
canonicalName: Granulated White Sugar 4lb
|
||||
weight: 0.06
|
||||
baseUnit: g
|
||||
substitutionGroup: sugar_white
|
||||
minBaseQty: 1600
|
||||
maxBaseQty: 2000
|
||||
|
||||
- id: cheese_cheddar_200g
|
||||
category: dairy
|
||||
canonicalName: Cheddar Cheese Slices 8oz
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: cheese_cheddar
|
||||
minBaseQty: 200
|
||||
maxBaseQty: 280
|
||||
|
||||
- id: yogurt_500g
|
||||
category: dairy
|
||||
canonicalName: Plain Yogurt 32oz
|
||||
weight: 0.08
|
||||
baseUnit: g
|
||||
substitutionGroup: yogurt_plain
|
||||
minBaseQty: 800
|
||||
maxBaseQty: 1000
|
||||
23
consumer-prices-core/configs/retailers/bigbasket_in.yaml
Normal file
23
consumer-prices-core/configs/retailers/bigbasket_in.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: bigbasket_in
|
||||
name: BigBasket India
|
||||
marketCode: in
|
||||
currencyCode: INR
|
||||
adapter: search
|
||||
baseUrl: https://www.bigbasket.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /pd/
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
@@ -3,12 +3,12 @@ retailer:
|
||||
name: Carrefour UAE
|
||||
marketCode: ae
|
||||
currencyCode: AED
|
||||
adapter: exa-search
|
||||
adapter: search
|
||||
baseUrl: https://www.carrefouruae.com
|
||||
enabled: true
|
||||
|
||||
acquisition:
|
||||
provider: exa
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 20
|
||||
|
||||
23
consumer-prices-core/configs/retailers/coop_ch.yaml
Normal file
23
consumer-prices-core/configs/retailers/coop_ch.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: coop_ch
|
||||
name: Coop Switzerland
|
||||
marketCode: ch
|
||||
currencyCode: CHF
|
||||
adapter: search
|
||||
baseUrl: https://www.coop.ch
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /de/food/
|
||||
queryTemplate: "{canonicalName} Lebensmittel {market} {currency} Preis"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/fairprice_sg.yaml
Normal file
23
consumer-prices-core/configs/retailers/fairprice_sg.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: fairprice_sg
|
||||
name: FairPrice Singapore
|
||||
marketCode: sg
|
||||
currencyCode: SGD
|
||||
adapter: search
|
||||
baseUrl: https://www.fairprice.com.sg
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /product/
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/kroger_us.yaml
Normal file
23
consumer-prices-core/configs/retailers/kroger_us.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: kroger_us
|
||||
name: Kroger USA
|
||||
marketCode: us
|
||||
currencyCode: USD
|
||||
adapter: search
|
||||
baseUrl: https://www.kroger.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /p/
|
||||
queryTemplate: "{canonicalName} grocery {market} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 4000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
@@ -3,12 +3,14 @@ retailer:
|
||||
name: Lulu Hypermarket UAE
|
||||
marketCode: ae
|
||||
currencyCode: AED
|
||||
adapter: exa-search
|
||||
baseUrl: https://www.luluhypermarket.com
|
||||
adapter: search
|
||||
baseUrl: https://gcc.luluhypermarket.com
|
||||
enabled: true
|
||||
|
||||
acquisition:
|
||||
provider: exa
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
queryTemplate: "{canonicalName} {currency} {market} price"
|
||||
urlPathContains: /en-ae/
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
|
||||
23
consumer-prices-core/configs/retailers/migros_ch.yaml
Normal file
23
consumer-prices-core/configs/retailers/migros_ch.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: migros_ch
|
||||
name: Migros Switzerland
|
||||
marketCode: ch
|
||||
currencyCode: CHF
|
||||
adapter: search
|
||||
baseUrl: https://www.migros.ch
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /de/produkt/
|
||||
queryTemplate: "{canonicalName} Lebensmittel {market} {currency} Preis"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/naivas_ke.yaml
Normal file
23
consumer-prices-core/configs/retailers/naivas_ke.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: naivas_ke
|
||||
name: Naivas Supermarket Kenya
|
||||
marketCode: ke
|
||||
currencyCode: KES
|
||||
adapter: search
|
||||
baseUrl: https://www.naivas.online
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /product/
|
||||
queryTemplate: "{canonicalName} grocery Kenya {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
@@ -3,12 +3,14 @@ retailer:
|
||||
name: Noon Grocery UAE
|
||||
marketCode: ae
|
||||
currencyCode: AED
|
||||
adapter: exa-search
|
||||
adapter: search
|
||||
baseUrl: https://www.noon.com
|
||||
enabled: true
|
||||
|
||||
acquisition:
|
||||
provider: exa
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /p/
|
||||
queryTemplate: "{canonicalName} grocery fresh food {currency} {market}"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
|
||||
23
consumer-prices-core/configs/retailers/pao_de_acucar_br.yaml
Normal file
23
consumer-prices-core/configs/retailers/pao_de_acucar_br.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: pao_de_acucar_br
|
||||
name: Pão de Açúcar Brazil
|
||||
marketCode: br
|
||||
currencyCode: BRL
|
||||
adapter: search
|
||||
baseUrl: https://www.paodeacucar.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /produto/
|
||||
queryTemplate: "{canonicalName} supermercado Brasil {currency} preço"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/sainsburys_gb.yaml
Normal file
23
consumer-prices-core/configs/retailers/sainsburys_gb.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: sainsburys_gb
|
||||
name: Sainsbury's UK
|
||||
marketCode: gb
|
||||
currencyCode: GBP
|
||||
adapter: search
|
||||
baseUrl: https://www.sainsburys.co.uk
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /shop/gb/groceries/
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 4000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
@@ -3,12 +3,12 @@ retailer:
|
||||
name: Spinneys UAE
|
||||
marketCode: ae
|
||||
currencyCode: AED
|
||||
adapter: exa-search
|
||||
adapter: search
|
||||
baseUrl: https://www.spinneys.com
|
||||
enabled: true
|
||||
|
||||
acquisition:
|
||||
provider: exa
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
|
||||
22
consumer-prices-core/configs/retailers/tamimi_sa.yaml
Normal file
22
consumer-prices-core/configs/retailers/tamimi_sa.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
retailer:
|
||||
slug: tamimi_sa
|
||||
name: Tamimi Markets Saudi Arabia
|
||||
marketCode: sa
|
||||
currencyCode: SAR
|
||||
adapter: search
|
||||
baseUrl: https://tamimimarkets.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/tesco_gb.yaml
Normal file
23
consumer-prices-core/configs/retailers/tesco_gb.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: tesco_gb
|
||||
name: Tesco UK
|
||||
marketCode: gb
|
||||
currencyCode: GBP
|
||||
adapter: search
|
||||
baseUrl: https://www.tesco.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /groceries/en-GB/products/
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 4000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/walmart_us.yaml
Normal file
23
consumer-prices-core/configs/retailers/walmart_us.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: walmart_us
|
||||
name: Walmart USA
|
||||
marketCode: us
|
||||
currencyCode: USD
|
||||
adapter: search
|
||||
baseUrl: https://www.walmart.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /ip/
|
||||
queryTemplate: "{canonicalName} grocery {market} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 4000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/wholefoods_us.yaml
Normal file
23
consumer-prices-core/configs/retailers/wholefoods_us.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: wholefoods_us
|
||||
name: Whole Foods Market USA
|
||||
marketCode: us
|
||||
currencyCode: USD
|
||||
adapter: search
|
||||
baseUrl: https://www.wholefoodsmarket.com
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /product/
|
||||
queryTemplate: "{canonicalName} grocery organic {market} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 10
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 6000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
23
consumer-prices-core/configs/retailers/woolworths_au.yaml
Normal file
23
consumer-prices-core/configs/retailers/woolworths_au.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
retailer:
|
||||
slug: woolworths_au
|
||||
name: Woolworths Australia
|
||||
marketCode: au
|
||||
currencyCode: AUD
|
||||
adapter: search
|
||||
baseUrl: https://www.woolworths.com.au
|
||||
enabled: true
|
||||
|
||||
searchConfig:
|
||||
numResults: 5
|
||||
urlPathContains: /shop/productdetails/
|
||||
queryTemplate: "{canonicalName} grocery {market} {currency} price"
|
||||
|
||||
rateLimit:
|
||||
requestsPerMinute: 15
|
||||
maxConcurrency: 1
|
||||
delayBetweenRequestsMs: 4000
|
||||
|
||||
discovery:
|
||||
mode: search
|
||||
maxPages: 20
|
||||
seeds: []
|
||||
@@ -17,7 +17,10 @@ interface FirecrawlSearchResponse {
|
||||
|
||||
interface FirecrawlExtractResponse {
|
||||
success: boolean;
|
||||
data?: Record<string, unknown>;
|
||||
data?: {
|
||||
extract?: Record<string, unknown>;
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export class FirecrawlProvider implements AcquisitionProvider {
|
||||
@@ -109,7 +112,7 @@ export class FirecrawlProvider implements AcquisitionProvider {
|
||||
body: JSON.stringify({
|
||||
url,
|
||||
formats: ['extract'],
|
||||
extract: { schema: jsonSchema },
|
||||
extract: { schema: jsonSchema, ...(schema.prompt ? { prompt: schema.prompt } : {}) },
|
||||
timeout: opts.timeout ?? 30_000,
|
||||
}),
|
||||
});
|
||||
@@ -120,7 +123,7 @@ export class FirecrawlProvider implements AcquisitionProvider {
|
||||
|
||||
return {
|
||||
url,
|
||||
data: (data.data ?? {}) as T,
|
||||
data: (data.data?.extract ?? {}) as T,
|
||||
provider: this.name,
|
||||
fetchedAt: new Date(),
|
||||
};
|
||||
|
||||
@@ -17,6 +17,7 @@ export interface SearchOptions {
|
||||
|
||||
export interface ExtractSchema {
|
||||
fields: Record<string, { description: string; type: 'string' | 'number' | 'boolean' | 'array' }>;
|
||||
prompt?: string;
|
||||
}
|
||||
|
||||
export interface FetchResult {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* 1. Discovers targets from the basket YAML config (one target per basket item)
|
||||
* 2. Calls Exa with contents.summary to get AI-extracted price text from retailer pages
|
||||
* 3. Uses regex to extract the price from the summary
|
||||
* 4. Falls back to Firecrawl URL scrape when Exa summaries yield no price
|
||||
*
|
||||
* Basket → product match is written automatically (match_status: 'auto')
|
||||
* because the search is item-specific — no ambiguity in what was searched.
|
||||
@@ -13,6 +14,8 @@
|
||||
import { loadAllBasketConfigs } from '../config/loader.js';
|
||||
import type { AdapterContext, FetchResult, ParsedProduct, RetailerAdapter, Target } from './types.js';
|
||||
import type { RetailerConfig } from '../config/types.js';
|
||||
import { MARKET_NAMES } from './market-names.js';
|
||||
import { isAllowedHost } from './search.js';
|
||||
|
||||
const CHROME_UA =
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
|
||||
@@ -78,6 +81,8 @@ interface ExaResult {
|
||||
|
||||
interface SearchPayload {
|
||||
exaResults: ExaResult[];
|
||||
firecrawlMarkdown?: string;
|
||||
firecrawlUrl?: string;
|
||||
basketSlug: string;
|
||||
itemCategory: string;
|
||||
canonicalName: string;
|
||||
@@ -86,7 +91,10 @@ interface SearchPayload {
|
||||
export class ExaSearchAdapter implements RetailerAdapter {
|
||||
readonly key = 'exa-search';
|
||||
|
||||
constructor(private readonly apiKey: string) {}
|
||||
constructor(
|
||||
private readonly apiKey: string,
|
||||
private readonly firecrawlKey?: string,
|
||||
) {}
|
||||
|
||||
async discoverTargets(ctx: AdapterContext): Promise<Target[]> {
|
||||
const baskets = loadAllBasketConfigs().filter((b) => b.marketCode === ctx.config.marketCode);
|
||||
@@ -112,6 +120,18 @@ export class ExaSearchAdapter implements RetailerAdapter {
|
||||
return targets;
|
||||
}
|
||||
|
||||
private buildQuery(canonicalName: string, currency: string, marketCode: string, template?: string): string {
|
||||
const market = MARKET_NAMES[marketCode] ?? '';
|
||||
if (template) {
|
||||
return template
|
||||
.replace('{canonicalName}', canonicalName)
|
||||
.replace('{currency}', currency)
|
||||
.replace('{market}', market)
|
||||
.trim();
|
||||
}
|
||||
return `${canonicalName} ${market} ${currency} price`.trim();
|
||||
}
|
||||
|
||||
async fetchTarget(ctx: AdapterContext, target: Target): Promise<FetchResult> {
|
||||
if (!this.apiKey) throw new Error('EXA_API_KEY is required for exa-search adapter');
|
||||
|
||||
@@ -122,8 +142,15 @@ export class ExaSearchAdapter implements RetailerAdapter {
|
||||
basketSlug: string;
|
||||
};
|
||||
|
||||
const searchQuery = this.buildQuery(
|
||||
canonicalName,
|
||||
currency,
|
||||
ctx.config.marketCode,
|
||||
ctx.config.acquisition?.searchQueryTemplate,
|
||||
);
|
||||
|
||||
const body = {
|
||||
query: `${canonicalName} ${currency} retail price`,
|
||||
query: searchQuery,
|
||||
numResults: 5,
|
||||
type: 'auto',
|
||||
includeDomains: [domain],
|
||||
@@ -151,13 +178,36 @@ export class ExaSearchAdapter implements RetailerAdapter {
|
||||
}
|
||||
|
||||
const data = (await resp.json()) as { results?: ExaResult[] };
|
||||
const exaResults = data.results ?? [];
|
||||
|
||||
const payload: SearchPayload = {
|
||||
exaResults: data.results ?? [],
|
||||
exaResults,
|
||||
basketSlug,
|
||||
itemCategory: target.category,
|
||||
canonicalName,
|
||||
};
|
||||
|
||||
// Firecrawl fallback: when all Exa summaries fail price extraction,
|
||||
// scrape the first result URL directly (JS-rendered pages expose prices in markdown).
|
||||
const anyExaPrice = exaResults.some(
|
||||
(r) => matchPrice(r.summary ?? '', currency) !== null || matchPrice(r.title ?? '', currency) !== null,
|
||||
);
|
||||
|
||||
if (!anyExaPrice && exaResults.length > 0 && this.firecrawlKey) {
|
||||
const firstUrl = exaResults[0].url;
|
||||
if (firstUrl && isAllowedHost(firstUrl, domain)) {
|
||||
try {
|
||||
const fc = await this.firecrawlFetch(firstUrl);
|
||||
if (fc) {
|
||||
payload.firecrawlMarkdown = fc;
|
||||
payload.firecrawlUrl = firstUrl;
|
||||
}
|
||||
} catch {
|
||||
// fallback failed silently — Exa results will be tried in parseListing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: target.url,
|
||||
html: JSON.stringify(payload),
|
||||
@@ -166,15 +216,32 @@ export class ExaSearchAdapter implements RetailerAdapter {
|
||||
};
|
||||
}
|
||||
|
||||
private async firecrawlFetch(url: string): Promise<string | null> {
|
||||
const resp = await fetch('https://api.firecrawl.dev/v1/scrape', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${this.firecrawlKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ url, formats: ['markdown'], timeout: 20_000 }),
|
||||
signal: AbortSignal.timeout(25_000),
|
||||
});
|
||||
|
||||
if (!resp.ok) return null;
|
||||
const data = (await resp.json()) as { success: boolean; data?: { markdown?: string } };
|
||||
return data.success ? (data.data?.markdown ?? null) : null;
|
||||
}
|
||||
|
||||
async parseListing(ctx: AdapterContext, result: FetchResult): Promise<ParsedProduct[]> {
|
||||
const payload = JSON.parse(result.html) as SearchPayload;
|
||||
const currency = ctx.config.currencyCode;
|
||||
|
||||
if (payload.exaResults.length === 0) {
|
||||
if (payload.exaResults.length === 0 && !payload.firecrawlMarkdown) {
|
||||
ctx.logger.warn(` [exa] ${payload.canonicalName}: 0 results from Exa (no indexed pages on this domain for query)`);
|
||||
return [];
|
||||
}
|
||||
|
||||
// Try Exa results first
|
||||
for (const r of payload.exaResults) {
|
||||
const price =
|
||||
matchPrice(r.summary ?? '', currency) ??
|
||||
@@ -211,6 +278,38 @@ export class ExaSearchAdapter implements RetailerAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
// Firecrawl fallback: scrape the first result URL for a JS-rendered price
|
||||
if (payload.firecrawlMarkdown && payload.firecrawlUrl) {
|
||||
const price = matchPrice(payload.firecrawlMarkdown.slice(0, 3000), currency);
|
||||
if (price !== null) {
|
||||
ctx.logger.info(` [firecrawl-fallback] ${payload.canonicalName}: found ${currency} ${price} via Firecrawl`);
|
||||
return [
|
||||
{
|
||||
sourceUrl: payload.firecrawlUrl,
|
||||
rawTitle: payload.exaResults[0]?.title ?? payload.canonicalName,
|
||||
rawBrand: null,
|
||||
rawSizeText: null,
|
||||
imageUrl: null,
|
||||
categoryText: payload.itemCategory,
|
||||
retailerSku: null,
|
||||
price,
|
||||
listPrice: null,
|
||||
promoPrice: null,
|
||||
promoText: null,
|
||||
inStock: true,
|
||||
rawPayload: {
|
||||
exaUrl: payload.firecrawlUrl,
|
||||
firecrawlFallback: true,
|
||||
basketSlug: payload.basketSlug,
|
||||
itemCategory: payload.itemCategory,
|
||||
canonicalName: payload.canonicalName,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
ctx.logger.warn(` [firecrawl-fallback] ${payload.canonicalName}: no ${currency} price found in Firecrawl markdown either`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ export class GenericPlaywrightAdapter implements RetailerAdapter {
|
||||
}
|
||||
|
||||
async fetchTarget(ctx: AdapterContext, target: Target): Promise<FetchResult> {
|
||||
if (!ctx.config.acquisition) throw new Error(`Generic adapter requires acquisition config (retailer: ${ctx.config.slug})`);
|
||||
const result = await fetchWithFallback(target.url, ctx.config.acquisition, ctx.config.rateLimit ? {
|
||||
timeout: 30_000,
|
||||
} : undefined);
|
||||
|
||||
25
consumer-prices-core/src/adapters/market-names.ts
Normal file
25
consumer-prices-core/src/adapters/market-names.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
/** Canonical display name for each ISO-3166-1 alpha-2 market code used in search queries. */
|
||||
export const MARKET_NAMES: Record<string, string> = {
|
||||
ae: 'UAE',
|
||||
sa: 'Saudi Arabia',
|
||||
kw: 'Kuwait',
|
||||
qa: 'Qatar',
|
||||
bh: 'Bahrain',
|
||||
om: 'Oman',
|
||||
eg: 'Egypt',
|
||||
gb: 'UK',
|
||||
us: 'USA',
|
||||
ca: 'Canada',
|
||||
au: 'Australia',
|
||||
de: 'Germany',
|
||||
fr: 'France',
|
||||
nl: 'Netherlands',
|
||||
sg: 'Singapore',
|
||||
in: 'India',
|
||||
pk: 'Pakistan',
|
||||
ng: 'Nigeria',
|
||||
ke: 'Kenya',
|
||||
za: 'South Africa',
|
||||
ch: 'Switzerland',
|
||||
br: 'Brazil',
|
||||
};
|
||||
91
consumer-prices-core/src/adapters/search.smoke.ts
Normal file
91
consumer-prices-core/src/adapters/search.smoke.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
/**
|
||||
* Smoke test: SearchAdapter end-to-end against live Exa + Firecrawl APIs.
|
||||
* Tests 2 items on each of the 4 AE retailers.
|
||||
* Run with: EXA_API_KEYS=... FIRECRAWL_API_KEY=... npx tsx src/adapters/search.smoke.ts
|
||||
*/
|
||||
import { ExaProvider } from '../acquisition/exa.js';
|
||||
import { FirecrawlProvider } from '../acquisition/firecrawl.js';
|
||||
import { SearchAdapter } from './search.js';
|
||||
import { loadRetailerConfig } from '../config/loader.js';
|
||||
import type { AdapterContext } from './types.js';
|
||||
|
||||
const RETAILERS = ['carrefour_ae', 'spinneys_ae', 'lulu_ae', 'noon_grocery_ae'];
|
||||
const ITEMS = [
|
||||
{ id: 'eggs', canonicalName: 'Eggs Fresh 12 Pack', category: 'dairy-eggs' },
|
||||
{ id: 'milk', canonicalName: 'Full Fat Fresh Milk 1L', category: 'dairy-eggs' },
|
||||
{ id: 'tomatoes', canonicalName: 'Tomatoes Fresh 1kg', category: 'produce' },
|
||||
];
|
||||
|
||||
const exaKey = (process.env.EXA_API_KEYS || process.env.EXA_API_KEY || '').split(/[\n,]+/)[0].trim();
|
||||
const fcKey = process.env.FIRECRAWL_API_KEY ?? '';
|
||||
|
||||
if (!exaKey || !fcKey) {
|
||||
console.error('Missing EXA_API_KEYS or FIRECRAWL_API_KEY');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const adapter = new SearchAdapter(new ExaProvider(exaKey), new FirecrawlProvider(fcKey));
|
||||
|
||||
const logger = {
|
||||
info: (msg: string) => console.log(msg),
|
||||
warn: (msg: string) => console.warn('[WARN]', msg),
|
||||
error: (msg: string) => console.error('[ERR]', msg),
|
||||
debug: () => {},
|
||||
};
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
for (const slug of RETAILERS) {
|
||||
const retailerCfg = loadRetailerConfig(slug);
|
||||
const domain = new URL(retailerCfg.baseUrl).hostname;
|
||||
|
||||
const ctx: AdapterContext = {
|
||||
config: retailerCfg,
|
||||
logger,
|
||||
runId: `smoke-${slug}`,
|
||||
};
|
||||
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`Retailer: ${retailerCfg.name} (${slug})`);
|
||||
console.log(`Domain: ${domain}`);
|
||||
console.log(`${'='.repeat(60)}`);
|
||||
|
||||
for (const item of ITEMS) {
|
||||
const target = {
|
||||
id: item.id,
|
||||
url: retailerCfg.baseUrl,
|
||||
category: item.category,
|
||||
metadata: {
|
||||
canonicalName: item.canonicalName,
|
||||
domain,
|
||||
basketSlug: 'essentials_ae',
|
||||
currency: retailerCfg.currencyCode,
|
||||
},
|
||||
};
|
||||
|
||||
process.stdout.write(` ${item.canonicalName.padEnd(30)} `);
|
||||
try {
|
||||
const fetchResult = await adapter.fetchTarget(ctx, target);
|
||||
const products = await adapter.parseListing(ctx, fetchResult);
|
||||
if (products.length > 0 && products[0].price > 0) {
|
||||
console.log(`✓ ${products[0].price} ${retailerCfg.currencyCode} "${products[0].rawTitle?.slice(0, 50)}"`);
|
||||
passed++;
|
||||
} else {
|
||||
console.log(`✗ parseListing returned empty/zero price`);
|
||||
failed++;
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(`✗ ${err instanceof Error ? err.message : String(err)}`);
|
||||
failed++;
|
||||
}
|
||||
|
||||
// Respect rate limits between items
|
||||
await new Promise((r) => setTimeout(r, 4000));
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`Results: ${passed} passed, ${failed} failed`);
|
||||
console.log(`${'='.repeat(60)}`);
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
62
consumer-prices-core/src/adapters/search.test.ts
Normal file
62
consumer-prices-core/src/adapters/search.test.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { isTitlePlausible, isAllowedHost } from './search.js';
|
||||
|
||||
describe('isAllowedHost', () => {
|
||||
it('accepts exact domain match', () => {
|
||||
expect(isAllowedHost('https://www.luluhypermarket.com/ae/eggs', 'luluhypermarket.com')).toBe(false);
|
||||
expect(isAllowedHost('https://luluhypermarket.com/ae/eggs', 'luluhypermarket.com')).toBe(true);
|
||||
});
|
||||
|
||||
it('accepts proper subdomain', () => {
|
||||
expect(isAllowedHost('https://www.luluhypermarket.com/ae/eggs', 'luluhypermarket.com')).toBe(false);
|
||||
// www is a subdomain — but our allowedHost is the bare hostname from baseUrl
|
||||
expect(isAllowedHost('https://www.luluhypermarket.com/item', 'www.luluhypermarket.com')).toBe(true);
|
||||
});
|
||||
|
||||
it('blocks domain with shared suffix (no dot boundary)', () => {
|
||||
expect(isAllowedHost('https://evilluluhypermarket.com/page', 'luluhypermarket.com')).toBe(false);
|
||||
});
|
||||
|
||||
it('blocks entirely different domain', () => {
|
||||
expect(isAllowedHost('https://amazon.com/eggs', 'noon.com')).toBe(false);
|
||||
});
|
||||
|
||||
it('handles malformed URLs gracefully', () => {
|
||||
expect(isAllowedHost('not-a-url', 'noon.com')).toBe(false);
|
||||
expect(isAllowedHost('', 'noon.com')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isTitlePlausible', () => {
|
||||
it('accepts when product name contains canonical tokens', () => {
|
||||
expect(isTitlePlausible('Eggs Fresh 12 Pack', 'Farm Fresh Eggs 12 Pack White')).toBe(true);
|
||||
expect(isTitlePlausible('Milk 1L', 'Almarai Full Fat Fresh Milk 1 Litre')).toBe(true);
|
||||
expect(isTitlePlausible('Basmati Rice 1kg', 'Tilda Pure Basmati Rice 1kg')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects gross mismatches (seeds vs vegetables)', () => {
|
||||
expect(isTitlePlausible('Tomatoes Fresh 1kg', 'GGOOT Tomato Seeds 100 pcs Vegetable Garden')).toBe(false);
|
||||
expect(isTitlePlausible('Onions 1kg', 'Red Karmen Onion Sets for Planting x200')).toBe(false);
|
||||
expect(isTitlePlausible('Eggs Fresh 12 Pack', 'Generic 12 Grids Egg Storage Box Container')).toBe(false);
|
||||
});
|
||||
|
||||
it('rejects when productName is undefined or empty', () => {
|
||||
expect(isTitlePlausible('Milk 1L', undefined)).toBe(false);
|
||||
expect(isTitlePlausible('Milk 1L', '')).toBe(false);
|
||||
});
|
||||
|
||||
it('handles short canonical names with single-token check', () => {
|
||||
// "Milk" → 1 token, need ≥1 match
|
||||
expect(isTitlePlausible('Milk', 'Fresh Pasteurized Milk 1L')).toBe(true);
|
||||
expect(isTitlePlausible('Milk', 'Orange Juice 1L')).toBe(false);
|
||||
});
|
||||
|
||||
it('is case-insensitive', () => {
|
||||
expect(isTitlePlausible('EGGS FRESH 12 PACK', 'farm fresh eggs 12 pack')).toBe(true);
|
||||
});
|
||||
|
||||
it('ignores short tokens (≤2 chars)', () => {
|
||||
// "1L" → filtered out, only "Milk" counts
|
||||
expect(isTitlePlausible('Milk 1L', 'Fresh Milk Whole 1 Litre')).toBe(true);
|
||||
});
|
||||
});
|
||||
273
consumer-prices-core/src/adapters/search.ts
Normal file
273
consumer-prices-core/src/adapters/search.ts
Normal file
@@ -0,0 +1,273 @@
|
||||
/**
|
||||
* SearchAdapter — two-stage grocery price pipeline.
|
||||
*
|
||||
* Stage 1 (Exa): neural search on retailer domain → ranked product page URLs
|
||||
* Stage 2 (Firecrawl): structured LLM extraction from the confirmed URL → {price, currency, inStock}
|
||||
*
|
||||
* Replaces ExaSearchAdapter's fragile regex-on-AI-summary approach.
|
||||
* Firecrawl renders JS so dynamic prices (Noon, etc.) are visible.
|
||||
* Domain allowlist + title plausibility check prevent wrong-product and SSRF risks.
|
||||
*/
|
||||
import { z } from 'zod';
|
||||
import { loadAllBasketConfigs } from '../config/loader.js';
|
||||
import type { ExaProvider } from '../acquisition/exa.js';
|
||||
import type { FirecrawlProvider } from '../acquisition/firecrawl.js';
|
||||
import type { RetailerConfig } from '../config/types.js';
|
||||
import type { AdapterContext, FetchResult, ParsedProduct, RetailerAdapter, Target } from './types.js';
|
||||
import { MARKET_NAMES } from './market-names.js';
|
||||
|
||||
/** Packaging/container words that are not product identity tokens. */
|
||||
const PACKAGING_WORDS = new Set(['pack', 'box', 'bag', 'container', 'bottle', 'can', 'jar', 'tin', 'set', 'kit', 'bundle']);
|
||||
|
||||
/**
|
||||
* Token overlap: ≥40% of canonical name identity words (>2 chars, non-packaging) must appear
|
||||
* in extracted productName.
|
||||
* Packaging words (Pack/Box/Bag/etc.) are stripped before comparison so "Eggs Fresh 12 Pack"
|
||||
* matches "Eggs x 15" on the "eggs" token alone.
|
||||
* Catches gross mismatches because category tokens like "tomatoes" differ from "tomato"
|
||||
* (stemming gap blocks seed/storage box false positives).
|
||||
*/
|
||||
/** Strip common English plural suffixes for basic stemming. */
|
||||
function stem(w: string): string {
|
||||
return w.replace(/ies$/, 'y').replace(/es$/, '').replace(/s$/, '');
|
||||
}
|
||||
|
||||
/** Non-food product indicator words — reject before token matching. */
|
||||
const NON_FOOD_INDICATORS = new Set(['seeds', 'seed', 'seedling', 'seedlings', 'planting', 'fertilizer', 'fertiliser']);
|
||||
|
||||
export function isTitlePlausible(canonicalName: string, productName: string | undefined): boolean {
|
||||
if (!productName) return false;
|
||||
const titleWords = productName.toLowerCase().split(/\W+/);
|
||||
if (titleWords.some((w) => NON_FOOD_INDICATORS.has(w))) return false;
|
||||
const tokens = canonicalName
|
||||
.toLowerCase()
|
||||
.split(/\W+/)
|
||||
.filter((w) => w.length > 2 && !PACKAGING_WORDS.has(w));
|
||||
if (tokens.length === 0) return true;
|
||||
const extracted = productName.toLowerCase();
|
||||
const matches = tokens.filter((w) => {
|
||||
if (extracted.includes(w)) return true;
|
||||
const s = stem(w);
|
||||
return s.length >= 4 && s !== w && extracted.includes(s);
|
||||
});
|
||||
return matches.length >= Math.max(1, Math.ceil(tokens.length * 0.4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Safe host boundary check. Prevents evilluluhypermarket.com from passing
|
||||
* when allowedHost is luluhypermarket.com.
|
||||
*/
|
||||
export function isAllowedHost(url: string, allowedHost: string): boolean {
|
||||
try {
|
||||
const { hostname } = new URL(url);
|
||||
return hostname === allowedHost;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
interface ExtractedProduct {
|
||||
productName?: string;
|
||||
price?: number;
|
||||
currency?: string;
|
||||
inStock?: boolean;
|
||||
}
|
||||
|
||||
interface SearchPayload {
|
||||
extracted: ExtractedProduct;
|
||||
productUrl: string;
|
||||
canonicalName: string;
|
||||
basketSlug: string;
|
||||
itemCategory: string;
|
||||
}
|
||||
|
||||
export class SearchAdapter implements RetailerAdapter {
|
||||
readonly key = 'search';
|
||||
|
||||
constructor(
|
||||
private readonly exa: ExaProvider,
|
||||
private readonly firecrawl: FirecrawlProvider,
|
||||
) {}
|
||||
|
||||
async validateConfig(config: RetailerConfig): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
if (!config.baseUrl) errors.push('baseUrl is required');
|
||||
return errors;
|
||||
}
|
||||
|
||||
async discoverTargets(ctx: AdapterContext): Promise<Target[]> {
|
||||
const baskets = loadAllBasketConfigs().filter((b) => b.marketCode === ctx.config.marketCode);
|
||||
const domain = new URL(ctx.config.baseUrl).hostname;
|
||||
const targets: Target[] = [];
|
||||
|
||||
for (const basket of baskets) {
|
||||
for (const item of basket.items) {
|
||||
targets.push({
|
||||
id: item.id,
|
||||
url: ctx.config.baseUrl,
|
||||
category: item.category,
|
||||
metadata: {
|
||||
canonicalName: item.canonicalName,
|
||||
domain,
|
||||
basketSlug: basket.slug,
|
||||
currency: ctx.config.currencyCode,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return targets;
|
||||
}
|
||||
|
||||
async fetchTarget(ctx: AdapterContext, target: Target): Promise<FetchResult> {
|
||||
const { canonicalName, domain, currency, basketSlug } = target.metadata as {
|
||||
canonicalName: string;
|
||||
domain: string;
|
||||
currency: string;
|
||||
basketSlug: string;
|
||||
};
|
||||
|
||||
const marketName = MARKET_NAMES[ctx.config.marketCode] ?? ctx.config.marketCode.toUpperCase();
|
||||
const cfg = ctx.config.searchConfig;
|
||||
|
||||
const query = cfg?.queryTemplate
|
||||
? cfg.queryTemplate
|
||||
.replace('{canonicalName}', canonicalName)
|
||||
.replace('{category}', target.category)
|
||||
.replace('{currency}', currency)
|
||||
.replace('{market}', marketName)
|
||||
.trim()
|
||||
: `${canonicalName} grocery ${marketName} ${currency}`.trim();
|
||||
|
||||
// Stage 1: Exa URL discovery
|
||||
const exaResults = await this.exa.search(query, {
|
||||
numResults: cfg?.numResults ?? 3,
|
||||
includeDomains: [domain],
|
||||
});
|
||||
|
||||
if (exaResults.length === 0) {
|
||||
throw new Error(`Exa: no pages found for "${canonicalName}" on ${domain}`);
|
||||
}
|
||||
|
||||
const pathFilter = cfg?.urlPathContains;
|
||||
const safeUrls = exaResults
|
||||
.map((r) => r.url)
|
||||
.filter((url) => !!url && isAllowedHost(url, domain) && (!pathFilter || url.includes(pathFilter)));
|
||||
|
||||
ctx.logger.info(
|
||||
` [search:discovery] ${canonicalName}: ${exaResults.length} URLs from Exa, ${safeUrls.length} passed domain check`,
|
||||
);
|
||||
|
||||
if (safeUrls.length === 0) {
|
||||
throw new Error(`Exa: all ${exaResults.length} results failed domain check (expected hostname: ${domain}${pathFilter ? `, path: *${pathFilter}*` : ''})`);
|
||||
}
|
||||
|
||||
// Stage 2: Firecrawl structured extraction — iterate safe URLs until one yields a valid price
|
||||
const extractSchema = {
|
||||
prompt: `Find the listed retail price of this product in ${currency}. The price may be displayed as two parts split across lines — like "3" and ".95" next to "${currency}" — combine them to get 3.95. Return the listed price even if the product is currently out of stock. Return the product name, the numeric price in ${currency}, the currency code, and whether it is in stock.`,
|
||||
fields: {
|
||||
productName: { type: 'string' as const, description: 'Name or title of the product' },
|
||||
price: { type: 'number' as const, description: `Retail price in ${currency} as a single number (e.g. 4.69)` },
|
||||
currency: { type: 'string' as const, description: `Currency code, should be ${currency}` },
|
||||
inStock: { type: 'boolean' as const, description: 'Whether the product is currently in stock and purchasable' },
|
||||
},
|
||||
};
|
||||
|
||||
let extracted: ExtractedProduct | null = null;
|
||||
let usedUrl = safeUrls[0];
|
||||
const lastErrors: string[] = [];
|
||||
|
||||
for (const url of safeUrls) {
|
||||
try {
|
||||
const result = await this.firecrawl.extract<ExtractedProduct>(url, extractSchema, { timeout: 30_000 });
|
||||
const data = result.data;
|
||||
const price = data?.price;
|
||||
|
||||
if (typeof price !== 'number' || !Number.isFinite(price) || price <= 0) {
|
||||
ctx.logger.warn(` [search:extract] ${canonicalName}: no price from ${url}, trying next`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isTitlePlausible(canonicalName, data.productName)) {
|
||||
ctx.logger.warn(
|
||||
` [search:extract] ${canonicalName}: title mismatch "${data.productName}" at ${url}, trying next`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
extracted = data;
|
||||
usedUrl = url;
|
||||
break;
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
ctx.logger.warn(` [search:extract] ${canonicalName}: Firecrawl error on ${url}: ${msg}`);
|
||||
lastErrors.push(msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (extracted === null) {
|
||||
throw new Error(
|
||||
`All ${safeUrls.length} URLs failed extraction for "${canonicalName}".${lastErrors.length ? ` Last: ${lastErrors.at(-1)}` : ''}`,
|
||||
);
|
||||
}
|
||||
|
||||
ctx.logger.info(
|
||||
` [search:extract] ${canonicalName}: price=${extracted.price} ${extracted.currency} from ${usedUrl}`,
|
||||
);
|
||||
|
||||
return {
|
||||
url: usedUrl,
|
||||
html: JSON.stringify({
|
||||
extracted,
|
||||
productUrl: usedUrl,
|
||||
canonicalName,
|
||||
basketSlug,
|
||||
itemCategory: target.category,
|
||||
} satisfies SearchPayload),
|
||||
statusCode: 200,
|
||||
fetchedAt: new Date(),
|
||||
};
|
||||
}
|
||||
|
||||
async parseListing(ctx: AdapterContext, result: FetchResult): Promise<ParsedProduct[]> {
|
||||
const { extracted, productUrl, canonicalName, basketSlug, itemCategory } =
|
||||
JSON.parse(result.html) as SearchPayload;
|
||||
|
||||
const priceResult = z.number().positive().finite().safeParse(extracted?.price);
|
||||
if (!priceResult.success) {
|
||||
ctx.logger.warn(` [search] ${canonicalName}: invalid price "${extracted?.price}" from ${productUrl}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (extracted.currency && extracted.currency.toUpperCase() !== ctx.config.currencyCode) {
|
||||
ctx.logger.warn(
|
||||
` [search] ${canonicalName}: currency mismatch ${extracted.currency} ≠ ${ctx.config.currencyCode} at ${productUrl}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
sourceUrl: productUrl,
|
||||
rawTitle: extracted.productName ?? canonicalName,
|
||||
rawBrand: null,
|
||||
rawSizeText: null,
|
||||
imageUrl: null,
|
||||
categoryText: itemCategory,
|
||||
retailerSku: null,
|
||||
price: priceResult.data,
|
||||
listPrice: null,
|
||||
promoPrice: null,
|
||||
promoText: null,
|
||||
// inStock defaults to true when Firecrawl does not return the field.
|
||||
// This is a conservative assumption — monitor for out-of-stock false positives.
|
||||
inStock: extracted.inStock ?? true,
|
||||
rawPayload: { extracted, basketSlug, itemCategory, canonicalName },
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async parseProduct(_ctx: AdapterContext, _result: FetchResult): Promise<ParsedProduct> {
|
||||
throw new Error('SearchAdapter does not support single-product parsing');
|
||||
}
|
||||
}
|
||||
@@ -49,16 +49,23 @@ export const DiscoverySeedSchema = z.object({
|
||||
category: z.string().optional(),
|
||||
});
|
||||
|
||||
export const SearchConfigSchema = z.object({
|
||||
numResults: z.number().default(3),
|
||||
queryTemplate: z.string().optional(),
|
||||
urlPathContains: z.string().optional(),
|
||||
});
|
||||
|
||||
export const RetailerConfigSchema = z.object({
|
||||
retailer: z.object({
|
||||
slug: z.string(),
|
||||
name: z.string(),
|
||||
marketCode: z.string().length(2),
|
||||
currencyCode: z.string().length(3),
|
||||
adapter: z.enum(['generic', 'exa-search', 'custom']).default('generic'),
|
||||
adapter: z.enum(['generic', 'exa-search', 'search', 'custom']).default('generic'),
|
||||
baseUrl: z.string().url(),
|
||||
rateLimit: RateLimitSchema.optional(),
|
||||
acquisition: AcquisitionConfigSchema,
|
||||
acquisition: AcquisitionConfigSchema.optional(),
|
||||
searchConfig: SearchConfigSchema.optional(),
|
||||
discovery: z.object({
|
||||
mode: z.enum(['category_urls', 'sitemap', 'search']).default('category_urls'),
|
||||
seeds: z.array(DiscoverySeedSchema),
|
||||
@@ -81,6 +88,7 @@ export const RetailerConfigSchema = z.object({
|
||||
});
|
||||
|
||||
export type RetailerConfig = z.infer<typeof RetailerConfigSchema>['retailer'];
|
||||
export type SearchConfig = z.infer<typeof SearchConfigSchema>;
|
||||
|
||||
export const BasketItemSchema = z.object({
|
||||
id: z.string(),
|
||||
|
||||
33
consumer-prices-core/src/fx/rates.ts
Normal file
33
consumer-prices-core/src/fx/rates.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* Static FX rates: local currency → USD.
|
||||
* Approximate mid-market rates, updated periodically.
|
||||
* Source: major central bank published rates / ECB reference rates.
|
||||
*
|
||||
* Update RATES_DATE and rates together when refreshing. Pegged currencies
|
||||
* (AED, SAR, QAR) are stable; floating currencies (BRL, KES, INR) can
|
||||
* drift 20-30%/year — refresh quarterly.
|
||||
*/
|
||||
export const RATES_DATE = '2026-03';
|
||||
|
||||
export const FX_RATES_TO_USD: Record<string, number> = {
|
||||
USD: 1,
|
||||
AED: 0.2723, // UAE dirham (fixed peg ~3.673)
|
||||
SAR: 0.2667, // Saudi riyal (fixed peg ~3.75)
|
||||
GBP: 1.275,
|
||||
EUR: 1.08,
|
||||
CHF: 1.115,
|
||||
SGD: 0.745,
|
||||
AUD: 0.635,
|
||||
CAD: 0.735,
|
||||
INR: 0.012,
|
||||
BRL: 0.180,
|
||||
KES: 0.0077,
|
||||
NGN: 0.00065,
|
||||
ZAR: 0.054,
|
||||
PKR: 0.0036,
|
||||
EGP: 0.020,
|
||||
KWD: 3.27,
|
||||
QAR: 0.2747,
|
||||
BHD: 2.653,
|
||||
OMR: 2.597,
|
||||
};
|
||||
@@ -6,6 +6,7 @@
|
||||
import { query, closePool } from '../db/client.js';
|
||||
import { loadAllBasketConfigs } from '../config/loader.js';
|
||||
import { validateAll } from './validate.js';
|
||||
import { FX_RATES_TO_USD } from '../fx/rates.js';
|
||||
|
||||
const logger = {
|
||||
info: (msg: string, ...args: unknown[]) => console.log(`[aggregate] ${msg}`, ...args),
|
||||
@@ -259,6 +260,22 @@ export async function aggregateBasket(basketSlug: string, marketCode: string) {
|
||||
await writeComputedIndex(basketId, null, category, 'coverage_pct', catCoverage);
|
||||
}
|
||||
|
||||
// Absolute basket cost in USD for cross-country comparison
|
||||
const byItemForTotal = new Map<string, BasketRow[]>();
|
||||
for (const r of rows) {
|
||||
if (!byItemForTotal.has(r.basketItemId)) byItemForTotal.set(r.basketItemId, []);
|
||||
byItemForTotal.get(r.basketItemId)!.push(r);
|
||||
}
|
||||
let basketTotalLocal = 0;
|
||||
for (const itemRows of byItemForTotal.values()) {
|
||||
basketTotalLocal += itemRows.reduce((s, r) => s + r.price, 0) / itemRows.length;
|
||||
}
|
||||
const currencyCode = rows[0].currencyCode;
|
||||
const fxRate = FX_RATES_TO_USD[currencyCode];
|
||||
if (fxRate !== undefined) {
|
||||
await writeComputedIndex(basketId, null, null, 'basket_total_usd', Math.round(basketTotalLocal * fxRate * 100) / 100);
|
||||
}
|
||||
|
||||
logger.info(`${basketSlug}:${marketCode} essentials=${essentialsIndex.toFixed(2)} value=${valueIndex.toFixed(2)} coverage=${coveragePct.toFixed(1)}%`);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,9 @@ import { loadAllRetailerConfigs, loadRetailerConfig } from '../config/loader.js'
|
||||
import { initProviders, teardownAll } from '../acquisition/registry.js';
|
||||
import { GenericPlaywrightAdapter } from '../adapters/generic.js';
|
||||
import { ExaSearchAdapter } from '../adapters/exa-search.js';
|
||||
import { SearchAdapter } from '../adapters/search.js';
|
||||
import { ExaProvider } from '../acquisition/exa.js';
|
||||
import { FirecrawlProvider } from '../acquisition/firecrawl.js';
|
||||
import type { AdapterContext } from '../adapters/types.js';
|
||||
import { upsertCanonicalProduct } from '../db/queries/products.js';
|
||||
import { getBasketItemId, upsertProductMatch } from '../db/queries/matches.js';
|
||||
@@ -73,9 +76,19 @@ export async function scrapeRetailer(slug: string) {
|
||||
|
||||
logger.info(`Run ${runId} started for ${slug}`);
|
||||
|
||||
const exaKey = (process.env.EXA_API_KEYS || process.env.EXA_API_KEY || '').split(/[\n,]+/)[0].trim();
|
||||
const fcKey = process.env.FIRECRAWL_API_KEY ?? '';
|
||||
|
||||
if (config.adapter === 'search') {
|
||||
if (!exaKey) throw new Error(`search adapter requires EXA_API_KEY / EXA_API_KEYS (retailer: ${slug})`);
|
||||
if (!fcKey) throw new Error(`search adapter requires FIRECRAWL_API_KEY (retailer: ${slug})`);
|
||||
}
|
||||
|
||||
const adapter =
|
||||
config.adapter === 'exa-search'
|
||||
? new ExaSearchAdapter((process.env.EXA_API_KEYS || process.env.EXA_API_KEY || '').split(/[\n,]+/)[0].trim())
|
||||
config.adapter === 'search'
|
||||
? new SearchAdapter(new ExaProvider(exaKey), new FirecrawlProvider(fcKey))
|
||||
: config.adapter === 'exa-search'
|
||||
? new ExaSearchAdapter(exaKey, process.env.FIRECRAWL_API_KEY)
|
||||
: new GenericPlaywrightAdapter();
|
||||
const ctx: AdapterContext = { config, runId, logger };
|
||||
|
||||
@@ -131,10 +144,10 @@ export async function scrapeRetailer(slug: string) {
|
||||
rawPayloadJson: product.rawPayload,
|
||||
});
|
||||
|
||||
// For exa-search adapter: auto-create product → basket match since we
|
||||
// For search-based adapters: auto-create product → basket match since we
|
||||
// searched for a specific basket item (no ambiguity in what was scraped).
|
||||
if (
|
||||
config.adapter === 'exa-search' &&
|
||||
(config.adapter === 'exa-search' || config.adapter === 'search') &&
|
||||
product.rawPayload.basketSlug &&
|
||||
product.rawPayload.canonicalName
|
||||
) {
|
||||
|
||||
Reference in New Issue
Block a user