Skip to content

Commit

Permalink
update current crawl size in redis on each healthcheck call (#685)
Browse files Browse the repository at this point in the history
- allows Browsertrix app to adjust size, if needed, more frequently
- run checkLimits() before starting crawl, in case out of space
  • Loading branch information
ikreymer committed Sep 10, 2024
1 parent b425483 commit fdb76f2
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
21 changes: 18 additions & 3 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1206,13 +1206,23 @@ self.__bx_behaviors.selectMainBehavior();
return res ? frame : null;
}

async checkLimits() {
let interrupt = false;
async updateCurrSize(): Promise<number> {
if (this.params.dryRun) {
return 0;
}

const size = this.params.dryRun ? 0 : await getDirSize(this.archivesDir);
const size = await getDirSize(this.archivesDir);

await this.crawlState.setArchiveSize(size);

return size;
}

async checkLimits() {
let interrupt = false;

const size = await this.updateCurrSize();

if (this.params.sizeLimit) {
if (size >= this.params.sizeLimit) {
logger.info(
Expand Down Expand Up @@ -1323,6 +1333,9 @@ self.__bx_behaviors.selectMainBehavior();
this.healthChecker = new HealthChecker(
this.params.healthCheckPort,
this.params.workers,
async () => {
await this.updateCurrSize();
},
);
}

Expand Down Expand Up @@ -1380,6 +1393,8 @@ self.__bx_behaviors.selectMainBehavior();
return;
}

await this.checkLimits();

await this.crawlState.setStatus("running");

this.pagesFH = await this.initPages(this.seedPagesFile, "Seed Pages");
Expand Down
15 changes: 14 additions & 1 deletion src/util/healthcheck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@ export class HealthChecker {
errorThreshold: number;
healthServer: http.Server;

updater: (() => Promise<void>) | null;

errorCount = 0;

constructor(port: number, errorThreshold: number) {
constructor(
port: number,
errorThreshold: number,
updater: (() => Promise<void>) | null = null,
) {
this.port = port;
this.errorThreshold = errorThreshold;

Expand All @@ -19,6 +25,8 @@ export class HealthChecker {
);
logger.info(`Healthcheck server started on ${port}`, {}, "healthcheck");
this.healthServer.listen(port);

this.updater = updater;
}

async healthCheck(req: http.IncomingMessage, res: http.ServerResponse) {
Expand All @@ -34,6 +42,11 @@ export class HealthChecker {
res.writeHead(200);
res.end();
}
if (this.updater) {
this.updater().catch((e) =>
logger.warn("Healthcheck Updater failed", e, "healthcheck"),
);
}
return;
}

Expand Down

0 comments on commit fdb76f2

Please sign in to comment.