chore: sync with Gitea master and restore local-only files

Reset local main to gitea/master (new source of truth) and restored
local-only files: web scrapers, admin dashboard, ChromaDB integration,
debug scripts, and utility libraries that aren't tracked in Gitea.

Gitea master adds: discovermass, buscarmisas-network, hk-parishes,
bohosluzby, kerknet, gottesdienstzeiten, miserend importers,
ClaimRequest model, forward geocoding, heartbeat healthcheck.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Albert
2026-04-12 19:11:22 -04:00
parent 76cca3ba75
commit 2c51513851
133 changed files with 30381 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env tsx
import { GenericScraper } from '../../src/scrapers/strategies/generic';
async function check() {
const scraper = new GenericScraper();
await scraper.init();
scraper.setCountry('PL');
const result = await scraper.scrape('http://parafialubojna.pl');
if (result.rawHtml) {
const text = result.rawHtml
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.toLowerCase();
const niedziela_matches = [];
let idx = 0;
while ((idx = text.indexOf('niedziela', idx)) !== -1) {
niedziela_matches.push({
position: idx,
context: text.substring(Math.max(0, idx-30), idx+70)
});
idx++;
}
console.log(`niedziela occurrences: ${niedziela_matches.length}\n`);
niedziela_matches.forEach((m, i) => {
console.log(`Occurrence ${i+1} at position ${m.position}:`);
console.log(` "${m.context}"`);
console.log('');
});
}
await scraper.close();
}
check();