Chore: filter out bots from frontend logging (#111157)

* Chore: filter out bots from frontend logging

* chore: updates after PR feedback

* chore: change solution to use beforeSend
This commit is contained in:
Hugo Häggmark 2025-09-16 14:13:06 +02:00 committed by GitHub
parent c6fe19b472
commit 35df3ae554
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 121 additions and 0 deletions

View File

@ -1167,4 +1167,9 @@ export interface FeatureToggles {
* @default false
*/
pluginContainers?: boolean;
/**
* Filter out bots from collecting data for Frontend Observability
* @default false
*/
filterOutBotsFromFrontendLogs?: boolean;
}

View File

@ -2022,6 +2022,14 @@ var (
Expression: "false",
RequiresRestart: true,
},
{
Name: "filterOutBotsFromFrontendLogs",
Description: "Filter out bots from collecting data for Frontend Observability",
Stage: FeatureStageExperimental,
FrontendOnly: true,
Owner: grafanaPluginsPlatformSquad,
Expression: "false",
},
}
)

View File

@ -259,3 +259,4 @@ azureResourcePickerUpdates,preview,@grafana/partner-datasources,false,false,true
prometheusTypeMigration,experimental,@grafana/partner-datasources,false,true,false
dskitBackgroundServices,experimental,@grafana/plugins-platform-backend,false,true,false
pluginContainers,privatePreview,@grafana/plugins-platform-backend,false,true,false
filterOutBotsFromFrontendLogs,experimental,@grafana/plugins-platform-backend,false,false,true

1 Name Stage Owner requiresDevMode RequiresRestart FrontendOnly
259 prometheusTypeMigration experimental @grafana/partner-datasources false true false
260 dskitBackgroundServices experimental @grafana/plugins-platform-backend false true false
261 pluginContainers privatePreview @grafana/plugins-platform-backend false true false
262 filterOutBotsFromFrontendLogs experimental @grafana/plugins-platform-backend false false true

View File

@ -1046,4 +1046,8 @@ const (
// FlagPluginContainers
// Enables running plugins in containers
FlagPluginContainers = "pluginContainers"
// FlagFilterOutBotsFromFrontendLogs
// Filter out bots from collecting data for Frontend Observability
FlagFilterOutBotsFromFrontendLogs = "filterOutBotsFromFrontendLogs"
)

View File

@ -1562,6 +1562,20 @@
"hideFromDocs": true
}
},
{
"metadata": {
"name": "filterOutBotsFromFrontendLogs",
"resourceVersion": "1758000919535",
"creationTimestamp": "2025-09-16T05:35:19Z"
},
"spec": {
"description": "Filter out bots from collecting data for Frontend Observability",
"stage": "experimental",
"codeowner": "@grafana/plugins-platform-backend",
"frontend": true,
"expression": "false"
}
},
{
"metadata": {
"name": "foldersAppPlatformAPI",

View File

@ -4,6 +4,7 @@ import { Faro, Instrumentation } from '@grafana/faro-core';
import * as faroWebSdkModule from '@grafana/faro-web-sdk';
import { BrowserConfig, FetchTransport, SessionInstrumentation } from '@grafana/faro-web-sdk';
import { TracingInstrumentation } from '@grafana/faro-web-tracing';
import { config } from '@grafana/runtime';
import { EchoSrvTransport } from './EchoSrvTransport';
import {
@ -46,6 +47,8 @@ describe('GrafanaJavascriptAgentEchoBackend', () => {
instrumentations: mockedInstrumentations,
internalLogger: mockedInternalLogger,
});
config.featureToggles.filterOutBotsFromFrontendLogs = false;
});
afterEach(() => {
@ -154,6 +157,13 @@ describe('GrafanaJavascriptAgentEchoBackend', () => {
expect(initializeFaroMock.mock.calls[1][0].instrumentations?.[1]).toBeInstanceOf(SessionInstrumentation);
});
it('should use a beforeSend handler', () => {
new GrafanaJavascriptAgentBackend(options);
expect(initializeFaroMock).toHaveBeenCalledTimes(1);
expect(initializeFaroMock.mock.calls[0][0].beforeSend).toBeDefined();
});
//@FIXME - make integration test work
// it('integration test with EchoSrv and GrafanaJavascriptAgent', async () => {

View File

@ -16,6 +16,7 @@ import { TracingInstrumentation } from '@grafana/faro-web-tracing';
import { EchoBackend, EchoEvent, EchoEventType } from '@grafana/runtime';
import { EchoSrvTransport } from './EchoSrvTransport';
import { beforeSendHandler } from './beforeSendHandler';
import { GrafanaJavascriptAgentEchoEvent, User } from './types';
function isCrossOriginIframe() {
@ -118,6 +119,7 @@ export class GrafanaJavascriptAgentBackend
sendTimeout: 1000,
},
internalLoggerLevel: options.internalLoggerLevel || defaultInternalLoggerLevel,
beforeSend: beforeSendHandler,
};
this.faroInstance = initializeFaro(grafanaJavaScriptAgentOptions);

View File

@ -0,0 +1,53 @@
import { TransportItem, TransportItemType } from '@grafana/faro-core';
import { config } from '@grafana/runtime';
import { beforeSendHandler } from './beforeSendHandler';
const getTransportationItem = (userAgent: string | undefined): TransportItem => ({
meta: { browser: { userAgent } },
payload: {},
type: TransportItemType.LOG,
});
describe('beforeSendHandler', () => {
beforeEach(() => {
config.featureToggles.filterOutBotsFromFrontendLogs = false;
});
it('should return item when feature toggle is disabled', () => {
const botUserAgent = 'Googlebot/2.1 (+http://www.google.com/bot.html)';
const item = getTransportationItem(botUserAgent);
expect(beforeSendHandler(item)).toBe(item);
});
it('should return item for regular user agents', () => {
config.featureToggles.filterOutBotsFromFrontendLogs = true;
const regularUserAgent =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36';
const item = getTransportationItem(regularUserAgent);
expect(beforeSendHandler(item)).toBe(item);
});
it.each(['', undefined])('should return item when user agent is %s', (userAgent) => {
config.featureToggles.filterOutBotsFromFrontendLogs = true;
const item = getTransportationItem(userAgent);
expect(beforeSendHandler(item)).toBe(item);
});
it.each([
'Googlebot/2.1 (+http://www.google.com/bot.html)',
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)',
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)',
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
'Twitterbot/1.0',
'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
'Mozilla/5.0 (compatible; DuckDuckBot-Https/1.1; https://duckduckgo.com/duckduckbot)',
'Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)',
'Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)',
])('should return 0 for bot user agent: %s', (userAgent) => {
config.featureToggles.filterOutBotsFromFrontendLogs = true;
const item = getTransportationItem(userAgent);
expect(beforeSendHandler(item)).toBe(null);
});
});

View File

@ -0,0 +1,24 @@
import { TransportItem } from '@grafana/faro-core';
import { config } from '@grafana/runtime';
// as listed in https://grafana.com/docs/grafana-cloud/monitor-applications/frontend-observability/instrument/filter-bots/#filter-out-bots-from-collecting-data-for-frontend-observability
const bots =
'(googlebot|Googlebot-Mobile|Googlebot-Image|Google favicon|Mediapartners-Google|bingbot|slurp|Commons-HttpClient|Python-urllib|libwww|httpunit|nutch|phpcrawl|msnbot|jyxobot|FAST-WebCrawler|FAST Enterprise Crawler|biglotron|teoma|convera|seekbot|gigablast|exabot|ngbot|ia_archiver|GingerCrawler|webmon |httrack|webcrawler|grub.org|UsineNouvelleCrawler|antibot|netresearchserver|speedy|fluffy|bibnum.bnf|findlink|msrbot|panscient|yacybot|AISearchBot|IOI|ips-agent|tagoobot|MJ12bot|dotbot|woriobot|yanga|buzzbot|mlbot|yandexbot|purebot|Linguee Bot|Voyager|CyberPatrol|voilabot|baiduspider|citeseerxbot|spbot|twengabot|postrank|turnitinbot|scribdbot|page2rss|sitebot|linkdex|Adidxbot|blekkobot|ezooms|Mail.RU_Bot|discobot|heritrix|findthatfile|europarchive.org|NerdByNature.Bot|sistrix crawler|ahrefsbot|Aboundex|domaincrawler|wbsearchbot|summify|ccbot|edisterbot|seznambot|ec2linkfinder|gslfbot|aihitbot|intelium_bot|facebookexternalhit|yeti|RetrevoPageAnalyzer|lb-spider|sogou|lssbot|careerbot|wotbox|wocbot|ichiro|DuckDuckBot|lssrocketcrawler|drupact|webcompanycrawler|acoonbot|openindexspider|gnam gnam spider|web-archive-net.com.bot|backlinkcrawler|coccoc|integromedb|content crawler spider|toplistbot|seokicks-robot|it2media-domain-crawler|ip-web-crawler.com|siteexplorer.info|elisabot|proximic|changedetection|blexbot|arabot|WeSEE:Search|niki-bot|CrystalSemanticsBot|rogerbot|360Spider|psbot|InterfaxScanBot|Lipperhey SEO Service|CC Metadata Scaper|g00g1e.net|GrapeshotCrawler|urlappendbot|brainobot|fr-crawler|binlar|SimpleCrawler|Livelapbot|Twitterbot|cXensebot|smtbot|bnf.fr_bot|A6-Indexer|ADmantX|Facebot|OrangeBot|memorybot|AdvBot|MegaIndex|SemanticScholarBot|ltx71|nerdybot|xovibot|BUbiNG|Qwantify|archive.org_bot|Applebot|TweetmemeBot|crawler4j|findxbot|SemrushBot|yoozBot|lipperhey|y!j-asr|Domain Re-Animator Bot|AddThis)';
const botsRegex = new RegExp(bots, 'i');
export function beforeSendHandler(item: TransportItem): TransportItem | null {
if (!config.featureToggles.filterOutBotsFromFrontendLogs) {
return item;
}
if (!item.meta.browser?.userAgent) {
return item;
}
if (botsRegex.test(item.meta.browser.userAgent)) {
return null;
}
return item;
}