Skip to content

Commit

Permalink
Don't submit empty seed_urls or sitemap_urls when making a partial cr…
Browse files Browse the repository at this point in the history
…awl request (#126972)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
Byron Hulcher and kibanamachine authored Mar 7, 2022
1 parent de3ae9b commit a24b1fc
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,25 @@ describe('CrawlCustomSettingsFlyoutLogic', () => {
});

describe('startCustomCrawl', () => {
it('starts a custom crawl with the user set values', async () => {
it('can start a custom crawl for selected domains', async () => {
mount({
includeSitemapsInRobotsTxt: true,
maxCrawlDepth: 5,
selectedDomainUrls: ['https://www.elastic.co', 'https://swiftype.com'],
});
jest.spyOn(CrawlerLogic.actions, 'startCrawl');

CrawlCustomSettingsFlyoutLogic.actions.startCustomCrawl();
await nextTick();

expect(CrawlerLogic.actions.startCrawl).toHaveBeenCalledWith({
domain_allowlist: ['https://www.elastic.co', 'https://swiftype.com'],
max_crawl_depth: 5,
sitemap_discovery_disabled: false,
});
});

it('can start a custom crawl selected domains, sitemaps, and seed urls', async () => {
mount({
includeSitemapsInRobotsTxt: true,
maxCrawlDepth: 5,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { flashAPIErrors } from '../../../../../shared/flash_messages';
import { HttpLogic } from '../../../../../shared/http';
import { EngineLogic } from '../../../engine';

import { CrawlerLogic } from '../../crawler_logic';
import { CrawlerLogic, CrawlRequestOverrides } from '../../crawler_logic';
import { DomainConfig, DomainConfigFromServer } from '../../types';
import { domainConfigServerToClient } from '../../utils';
import { extractDomainAndEntryPointFromUrl } from '../add_domain/utils';
Expand Down Expand Up @@ -213,13 +213,23 @@ export const CrawlCustomSettingsFlyoutLogic = kea<
actions.fetchDomainConfigData();
},
startCustomCrawl: () => {
CrawlerLogic.actions.startCrawl({
domain_allowlist: values.selectedDomainUrls,
max_crawl_depth: values.maxCrawlDepth,
seed_urls: [...values.selectedEntryPointUrls, ...values.customEntryPointUrls],
sitemap_urls: [...values.selectedSitemapUrls, ...values.customSitemapUrls],
const overrides: CrawlRequestOverrides = {
sitemap_discovery_disabled: !values.includeSitemapsInRobotsTxt,
});
max_crawl_depth: values.maxCrawlDepth,
domain_allowlist: values.selectedDomainUrls,
};

const seedUrls = [...values.selectedEntryPointUrls, ...values.customEntryPointUrls];
if (seedUrls.length > 0) {
overrides.seed_urls = seedUrls;
}

const sitemapUrls = [...values.selectedSitemapUrls, ...values.customSitemapUrls];
if (sitemapUrls.length > 0) {
overrides.sitemap_urls = sitemapUrls;
}

CrawlerLogic.actions.startCrawl(overrides);
},
}),
});
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const ACTIVE_STATUSES = [
CrawlerStatus.Canceling,
];

interface CrawlRequestOverrides {
export interface CrawlRequestOverrides {
domain_allowlist?: string[];
max_crawl_depth?: number;
seed_urls?: string[];
Expand Down

0 comments on commit a24b1fc

Please sign in to comment.