Add latest changes from gitlab-org/gitlab@master

This commit is contained in:
GitLab Bot 2024-07-09 21:26:04 +00:00
parent 425a28df9f
commit f1da2397eb
56 changed files with 775 additions and 3079 deletions

View File

@ -1758,6 +1758,8 @@
.qa:rules:e2e:test-on-cng:
rules:
- <<: *if-merge-request-targeting-stable-branch
when: never
- <<: *if-security-merge-request
when: never
- <<: *if-security-schedule

View File

@ -525,7 +525,7 @@
{"name":"rack","version":"2.2.9","platform":"ruby","checksum":"fd6301a97a1c1e955e68f85c861fcb1cde6145a32c532e1ea321a72ff8cc4042"},
{"name":"rack-accept","version":"0.4.5","platform":"ruby","checksum":"66247b5449db64ebb93ae2ec4af4764b87d1ae8a7463c7c68893ac13fa8d4da2"},
{"name":"rack-attack","version":"6.7.0","platform":"ruby","checksum":"3ca47e8f66cd33b2c96af53ea4754525cd928ed3fa8da10ee6dad0277791d77c"},
{"name":"rack-cors","version":"2.0.1","platform":"ruby","checksum":"bcc66bdf5c6a4af05d571c4d01d35ac4a873552ba4f86c05fbe39365c39b9b0a"},
{"name":"rack-cors","version":"2.0.2","platform":"ruby","checksum":"415d4e1599891760c5dc9ef0349c7fecdf94f7c6a03e75b2e7c2b54b82adda1b"},
{"name":"rack-oauth2","version":"1.21.3","platform":"ruby","checksum":"4e72a79dd6a866692e84422a552b27c38a5a1918ded06661e04910f2bbe676ba"},
{"name":"rack-protection","version":"2.2.2","platform":"ruby","checksum":"fd41414dbabbec274af0bdb1f72a48504449de4d979782c9af38cbb5dfff3299"},
{"name":"rack-proxy","version":"0.7.7","platform":"ruby","checksum":"446a4b57001022145d5c3ba73b775f66a2260eaf7420c6907483141900395c8a"},

View File

@ -1427,7 +1427,7 @@ GEM
rack (>= 0.4)
rack-attack (6.7.0)
rack (>= 1.0, < 4)
rack-cors (2.0.1)
rack-cors (2.0.2)
rack (>= 2.0.0)
rack-oauth2 (1.21.3)
activesupport

View File

@ -10,7 +10,7 @@ import { parsePikadayDate } from './pikaday_utility';
* If `abbreviated` is provided, returns abbreviated
* name.
*
* @param {Boolean} abbreviated
* @param {boolean} abbreviated
*/
export const getMonthNames = (abbreviated) => {
if (abbreviated) {
@ -49,7 +49,7 @@ export const getMonthNames = (abbreviated) => {
* Returns month name based on provided date.
*
* @param {Date} date
* @param {Boolean} abbreviated
* @param {boolean} abbreviated
*/
export const monthInWords = (date, abbreviated = false) => {
if (!date) {
@ -59,6 +59,13 @@ export const monthInWords = (date, abbreviated = false) => {
return getMonthNames(abbreviated)[date.getMonth()];
};
/**
* Formats date to `January 01, 1970`
*
* @param {Date} [date]
* @param {boolean} [abbreviated]
* @param {boolean} [hideYear]
*/
export const dateInWords = (date, abbreviated = false, hideYear = false) => {
if (!date) return date;
@ -86,9 +93,10 @@ export const dateInWords = (date, abbreviated = false, hideYear = false) => {
*
* The largest supported unit is "days".
*
* @param {Number} intervalInSeconds The time interval in seconds
* @param {Object} params.abbreviated - Abbreviate the returned units (seconds = s, days = d, etc)
* @returns {String} A humanized description of the time interval
* @param {number} intervalInSeconds The time interval in seconds
* @param {Object} [params]
* @param {boolean} [params.abbreviated] Abbreviate the returned units (seconds = s, days = d, etc)
* @returns {string} A humanized description of the time interval
*/
export const humanizeTimeInterval = (intervalInSeconds, { abbreviated = false } = {}) => {
if (intervalInSeconds < 60 /* = 1 minute */) {
@ -123,19 +131,20 @@ export const getWeekdayNames = () => [
/**
* Given a date object returns the day of the week in English
* @param {date} date
* @returns {String}
* @param {Date} date
* @returns {string}
*/
export const getDayName = (date) => getWeekdayNames()[date.getDay()];
/**
* Returns the i18n month name from a given date
* @example
* formatDateAsMonth(new Date('2020-06-28')) -> 'Jun'
* @param {String} datetime where month is extracted from
* @param {Object} options
* @param {Boolean} options.abbreviated whether to use the abbreviated month string, or not
* @return {String} the i18n month name
* // returns 'Jun'
* formatDateAsMonth(new Date('2020-06-28'))
* @param {string} datetime where month is extracted from
* @param {Object} [options]
* @param {boolean} [options.abbreviated] whether to use the abbreviated month string, or not
* @return {string} the i18n month name
*/
export function formatDateAsMonth(datetime, options = {}) {
const { abbreviated = true } = options;
@ -145,11 +154,12 @@ export function formatDateAsMonth(datetime, options = {}) {
/**
* @example
* dateFormat('2017-12-05','mmm d, yyyy h:MMtt Z' ) -> "Dec 5, 2017 12:00am UTC"
* @param {date} datetime
* @param {String} format
* @param {Boolean} UTC convert local time to UTC
* @returns {String}
* // returns "Dec 5, 2017 12:00am UTC"
* formatDate('2017-12-05','mmm d, yyyy h:MMtt Z' )
* @param {(Date|string|number)} [datetime]
* @param {string} format
* @param {boolean} UTC convert local time to UTC
* @returns {string}
*/
export const formatDate = (datetime, format = 'mmm d, yyyy h:MMtt Z', utc = false) => {
if (isString(datetime) && datetime.match(/\d+-\d+\d+ /)) {
@ -162,7 +172,7 @@ export const formatDate = (datetime, format = 'mmm d, yyyy h:MMtt Z', utc = fals
* Formats milliseconds as timestamp (e.g. 01:02:03).
* This takes durations longer than a day into account (e.g. two days would be 48:00:00).
*
* @param milliseconds
* @param {number} milliseconds
* @returns {string}
*/
export const formatTime = (milliseconds) => {
@ -190,8 +200,8 @@ export const formatTime = (milliseconds) => {
/**
* Port of ruby helper time_interval_in_words.
*
* @param {Number} seconds
* @return {String}
* @param {number} seconds
* @return {string}
*/
export const timeIntervalInWords = (intervalInSeconds) => {
const secondsInteger = parseInt(intervalInSeconds, 10);
@ -231,6 +241,8 @@ export const stringifyTime = (timeObject, fullNameFormat = false) => {
* Accepts seconds and returns a timeObject { weeks: #, days: #, hours: #, minutes: # }
* Seconds can be negative or positive, zero or non-zero. Can be configured for any day
* or week length.
*
* @param {number} seconds
*/
export const parseSeconds = (
seconds,
@ -276,7 +288,7 @@ export const parseSeconds = (
/**
* Pads given items with zeros to reach a length of 2 characters.
*
* @param {...any} args Items to be padded.
* @param {...any} args Items to be padded.
* @returns {Array<String>} Padded items.
*/
export const padWithZeros = (...args) => args.map((arg) => `${arg}`.padStart(2, '0'));
@ -286,12 +298,15 @@ export const padWithZeros = (...args) => args.map((arg) => `${arg}`.padStart(2,
* This can be useful when populating date/time fields along with a distinct timezone selector, in
* which case we'd want to ignore the timezone's offset when populating the date and time.
*
* Examples:
* stripTimezoneFromISODate('2021-08-16T00:00:00.000-02:00') => '2021-08-16T00:00:00.000'
* stripTimezoneFromISODate('2021-08-16T00:00:00.000Z') => '2021-08-16T00:00:00.000'
* @example
* // returns '2021-08-16T00:00:00.000'
* stripTimezoneFromISODate('2021-08-16T00:00:00.000-02:00')
* @example
* // returns '2021-08-16T00:00:00.000'
* stripTimezoneFromISODate('2021-08-16T00:00:00.000Z')
*
* @param {String} date The ISO date string representation.
* @returns {String} The ISO date string without the timezone.
* @param {string} date The ISO date string representation.
* @returns {string} The ISO date string without the timezone.
*/
export const stripTimezoneFromISODate = (date) => {
if (Number.isNaN(Date.parse(date))) {
@ -302,11 +317,13 @@ export const stripTimezoneFromISODate = (date) => {
/**
* Extracts the year, month and day from a Date instance and returns them in an object.
* For example:
* dateToYearMonthDate(new Date('2021-08-16')) => { year: '2021', month: '08', day: '16' }
*
* @example
* // returns { year: '2021', month: '08', day: '16' }
* dateToYearMonthDate(new Date('2021-08-16'))
*
* @param {Date} date The date to be parsed
* @returns {Object} An object containing the extracted year, month and day.
* @returns An object containing the extracted year, month and day.
*/
export const dateToYearMonthDate = (date) => {
if (!isDate(date)) {
@ -323,11 +340,13 @@ export const dateToYearMonthDate = (date) => {
/**
* Extracts the hours and minutes from a string representing a time.
* For example:
* timeToHoursMinutes('12:46') => { hours: '12', minutes: '46' }
*
* @param {String} time The time to be parsed in the form HH:MM.
* @returns {Object} An object containing the hours and minutes.
* @example
* // returns { hours: '12', minutes: '46' }
* timeToHoursMinutes('12:46')
*
* @param {string} time The time to be parsed in the form HH:MM.
* @returns An object containing the hours and minutes.
*/
export const timeToHoursMinutes = (time = '') => {
if (!time || !time.match(/\d{1,2}:\d{1,2}/)) {
@ -341,10 +360,10 @@ export const timeToHoursMinutes = (time = '') => {
/**
* This combines a date and a time and returns the computed Date's ISO string representation.
*
* @param {Date} date Date object representing the base date.
* @param {String} time String representing the time to be used, in the form HH:MM.
* @param {String} offset An optional Date-compatible offset.
* @returns {String} The combined Date's ISO string representation.
* @param {Date} date Date object representing the base date.
* @param {string} time String representing the time to be used, in the form HH:MM.
* @param {string} offset An optional Date-compatible offset.
* @returns {string} The combined Date's ISO string representation.
*/
export const dateAndTimeToISOString = (date, time, offset = '') => {
const { year, month, day } = dateToYearMonthDate(date);
@ -361,8 +380,8 @@ export const dateAndTimeToISOString = (date, time, offset = '') => {
* Converts a Date instance to time input-compatible value consisting in a 2-digits hours and
* minutes, separated by a semi-colon, in the 24-hours format.
*
* @param {Date} date Date to be converted
* @returns {String} time input-compatible string in the form HH:MM.
* @param {Date} date Date to be converted
* @returns {string} time input-compatible string in the form HH:MM.
*/
export const dateToTimeInputValue = (date) => {
if (!isDate(date)) {
@ -376,6 +395,21 @@ export const dateToTimeInputValue = (date) => {
});
};
/**
* Formats a given amount of time units
*
* @example
* // returns '42 days'
* formatTimeAsSummary({ days: 42 });
*
* @param {Object} config object containing exactly one property to format
* @param {number} [config.seconds]
* @param {number} [config.minutes]
* @param {number} [config.hours]
* @param {number} [config.days]
* @param {number} [config.weeks]
* @param {number} [config.months]
*/
export const formatTimeAsSummary = ({ seconds, hours, days, minutes, weeks, months }) => {
if (months) {
const value = roundToNearestHalf(months);
@ -431,9 +465,8 @@ export const formatTimeAsSummary = ({ seconds, hours, days, minutes, weeks, mont
* ie -32400 => -9 hours
* ie -12600 => -3.5 hours
*
* @param {Number} offset UTC offset in seconds as a integer
*
* @return {String} the + or - offset in hours, e.g. `-10`, ` 0`, `+4`
* @param {number} offset UTC offset in seconds as a integer
* @returns {string} the + or - offset in hours, e.g. `-10`, ` 0`, `+4`
*/
export const formatUtcOffset = (offset) => {
const parsed = parseInt(offset, 10);
@ -447,8 +480,8 @@ export const formatUtcOffset = (offset) => {
/**
* Returns formatted timezone
*
* @param {Object} timezone item with offset and name
* @returns {String} the UTC timezone with the offset, e.g. `[UTC+2] Berlin, [UTC 0] London`
* @param {Object} timezone item with offset and name
* @returns {string} the UTC timezone with the offset, e.g. `[UTC+2] Berlin, [UTC 0] London`
*/
export const formatTimezone = ({ offset, name }) => `[UTC${formatUtcOffset(offset)}] ${name}`;
@ -457,7 +490,6 @@ export const formatTimezone = ({ offset, name }) => `[UTC${formatUtcOffset(offse
*
* @param {Date} startDate
* @param {Date} dueDate
* @returns
*/
export const humanTimeframe = (startDate, dueDate) => {
const start = startDate ? parsePikadayDate(startDate) : null;

View File

@ -380,7 +380,9 @@ export default {
<div v-if="allBranches" class="gl-mt-2" data-testid="all-branches">
{{ $options.i18n.allBranches }}
</div>
<code v-else class="gl-bg-none p-0 gl-font-base" data-testid="branch">{{ branch }}</code>
<code v-else class="gl-bg-transparent p-0 gl-font-base" data-testid="branch">{{
branch
}}</code>
<p v-if="matchingBranchesCount" class="gl-mt-3 gl-mb-0">
<gl-link :href="matchingBranchesLinkHref">{{ matchingBranchesLinkTitle }}</gl-link>
</p>

View File

@ -1,10 +1,14 @@
<script>
import { GlAlert, GlSprintf, GlTab, GlTabs } from '@gitlab/ui';
import { updateHistory } from '~/lib/utils/url_utility';
import { InternalEvents } from '~/tracking';
const trackingMixin = InternalEvents.mixin();
export default {
name: 'UsageQuotasApp',
components: { GlAlert, GlSprintf, GlTab, GlTabs },
mixins: [trackingMixin],
inject: ['tabs'],
methods: {
glTabLinkAttributes(tab) {
@ -15,16 +19,20 @@ export default {
return activeTabHash === hash;
},
updateActiveTab(hash) {
updateActiveTab(tab) {
const url = new URL(window.location.href);
url.hash = hash;
url.hash = tab.hash;
updateHistory({
url,
title: document.title,
replace: true,
});
if (tab.tracking?.action) {
this.trackEvent(tab.tracking.action);
}
},
},
};
@ -43,7 +51,7 @@ export default {
:active="isActive(tab.hash)"
:data-testid="`${tab.testid}-tab-content`"
:title-link-attributes="glTabLinkAttributes(tab)"
@click="updateActiveTab(tab.hash)"
@click="updateActiveTab(tab)"
>
<component :is="tab.component" :data-testid="`${tab.testid}-app`" />
</gl-tab>

View File

@ -3,70 +3,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
/*
Components layer:
https://tailwindcss.com/docs/adding-custom-styles#adding-component-classes
The components layer will be rendered _before_ the utilities,
so that utilities can overwrite values
*/
@layer components {
/*
Type scale: adding our gl-heading shorthands.
*/
.gl-heading-display {
font-size: clamp(1.75rem, 0.8611111111rem + 1.8518518519vw, 2.25rem);
line-height: 1.125;
@apply gl-font-bold;
letter-spacing: -0.01em;
@apply gl-mt-0;
@apply gl-mb-6;
}
.gl-heading-1 {
font-size: clamp(1.5rem, 0.8333333333rem + 1.3888888889vw, 1.875rem);
line-height: 1.25;
@apply gl-font-bold;
letter-spacing: -0.01em;
@apply gl-mt-0;
@apply gl-mb-5;
}
.gl-heading-2 {
font-size: clamp(1.3125rem, 0.8680555556rem + 0.9259259259vw, 1.5625rem);
line-height: 1.25;
@apply gl-font-bold;
letter-spacing: -0.01em;
@apply gl-mt-0;
@apply gl-mb-5;
}
.gl-heading-3 {
font-size: clamp(1.125rem, 0.9027777778rem + 0.462962963vw, 1.25rem);
line-height: 1.25;
@apply gl-font-bold;
letter-spacing: inherit;
@apply gl-mt-0;
@apply gl-mb-5;
}
.gl-heading-4 {
@apply gl-text-lg;
line-height: 1.25;
@apply gl-font-bold;
letter-spacing: inherit;
@apply gl-mt-0;
@apply gl-mb-5;
}
.gl-heading-5 {
@apply gl-text-base;
line-height: 1.25;
@apply gl-font-bold;
letter-spacing: inherit;
@apply gl-mt-0;
@apply gl-mb-5;
}
}

View File

@ -26,6 +26,12 @@ module Types
field :name, GraphQL::Types::String, null: false, description: 'Name of the container repository.'
field :path, GraphQL::Types::String, null: false, description: 'Path of the container repository.'
field :project, Types::ProjectType, null: false, description: 'Project of the container registry.'
field :protection_rule_exists, GraphQL::Types::Boolean,
null: false,
alpha: { milestone: '17.2' },
description:
'Whether any matching container protection rule exists for this container. ' \
'Available only when feature flag `container_registry_protected_containers` is enabled.'
field :status, Types::ContainerRepositoryStatusEnum, null: true, description: 'Status of the container repository.'
field :tags_count, GraphQL::Types::Int, null: false, description: 'Number of tags associated with this image.'
field :updated_at, Types::TimeType, null: false, description: 'Timestamp when the container repository was updated.'
@ -46,5 +52,15 @@ module Types
def migration_state
''
end
def protection_rule_exists
return false if Feature.disabled?(:container_registry_protected_containers, object.project)
BatchLoader::GraphQL.for(object.path).batch do |repository_paths, loader|
::ContainerRegistry::Protection::Rule
.for_push_exists_for_multiple_containers(repository_paths: repository_paths, project_id: object.project_id)
.each { |row| loader.call(row['repository_path'], row['protected']) }
end
end
end
end

View File

@ -42,6 +42,34 @@ module ContainerRegistry
.exists?
end
def self.for_push_exists_for_multiple_containers(repository_paths:, project_id:)
return none if repository_paths.blank? || project_id.blank?
cte_query =
select('*').from(
sanitize_sql_array([
"unnest(ARRAY[:repository_paths]) AS x(repository_path)", { repository_paths: repository_paths }
])
)
cte_name = :container_names_and_types_cte
cte = Gitlab::SQL::CTE.new(cte_name, cte_query)
rules_cte_repository_path = "#{cte_name}.#{connection.quote_column_name('repository_path')}"
protection_rule_exsits_subquery =
select(1)
.where(project_id: project_id)
.where("#{rules_cte_repository_path} ILIKE #{::Gitlab::SQL::Glob.to_like('repository_path_pattern')}")
query = select(
rules_cte_repository_path,
sanitize_sql_array(['EXISTS(?) AS protected', protection_rule_exsits_subquery])
).from(Arel.sql(cte_name.to_s))
connection.exec_query(query.with(cte.to_arel).to_sql)
end
private
def path_pattern_starts_with_project_full_path

View File

@ -4,7 +4,7 @@ module Packages
module Npm
class ProcessPackageFileService
ExtractionError = Class.new(StandardError)
PACKAGE_JSON_ENTRY_PATH = '*/package.json'
PACKAGE_JSON_ENTRY_PATH = 'package/package.json'
MAX_FILE_SIZE = 4.megabytes
delegate :package, to: :package_file

View File

@ -0,0 +1,9 @@
---
name: ai_gateway_agents
feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/465858
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/157986
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/469606
milestone: '17.2'
group: group::ai framework
type: beta
default_enabled: false

View File

@ -1,7 +1,6 @@
const path = require('path');
const plugin = require('tailwindcss/plugin');
const tailwindDefaults = require('@gitlab/ui/tailwind.defaults');
const { range, round } = require('lodash');
// Try loading the tailwind css_in_js, in case they exist
let utilities = {};
@ -24,109 +23,6 @@ try {
delete require.cache[path.resolve(__filename)];
}
function gitLabUIUtilities({ addComponents, addUtilities }) {
addComponents({
'.border': {
'border-style': 'solid',
'border-color': 'var(--gray-100, #dcdcde)',
},
'.border-t': {
'border-top-style': 'solid',
'border-top-color': 'var(--gray-100, #dcdcde)',
},
'.border-r': {
'border-right-style': 'solid',
'border-right-color': 'var(--gray-100, #dcdcde)',
},
'.border-b': {
'border-bottom-style': 'solid',
'border-bottom-color': 'var(--gray-100, #dcdcde)',
},
'.border-l': {
'border-left-style': 'solid',
'border-left-color': 'var(--gray-100, #dcdcde)',
},
'.str-truncated': {
display: 'inline-block',
overflow: 'hidden',
'text-overflow': 'ellipsis',
'vertical-align': 'top',
'white-space': 'nowrap',
'max-width': '82%',
},
'.no-spin[type="number"]': {
'&::-webkit-outer-spin-button': {
'-webkit-appearance': 'none',
margin: '0',
},
'&::-webkit-inner-spin-button': {
'-webkit-appearance': 'none',
margin: '0',
},
'-moz-appearance': 'textfield',
},
});
addUtilities({
'.font-monospace': {
'font-family':
'var(--default-mono-font, "GitLab Mono"), "JetBrains Mono", "Menlo", "DejaVu Sans Mono", "Liberation Mono", "Consolas", "Ubuntu Mono", "Courier New", "andale mono", "lucida console", monospace',
'font-variant-ligatures': 'none',
},
'.break-anywhere': {
'overflow-wrap': 'anywhere',
'word-break': 'normal',
},
'.wrap-anywhere': {
'overflow-wrap': 'anywhere',
},
'.border-b-solid': {
'border-bottom-style': 'solid',
},
'.border-b-initial': {
'border-bottom-style': 'initial',
},
'.border-l-solid': {
'border-left-style': 'solid',
},
'.border-r-solid': {
'border-right-style': 'solid',
},
'.border-t-solid': {
'border-top-style': 'solid',
},
'.clearfix': {
'&::after': {
display: 'block',
clear: 'both',
content: '""',
},
},
'.focus': {
'box-shadow': '0 0 0 1px var(--white, #fff), 0 0 0 3px var(--blue-400, #428fdc)',
outline: 'none',
},
'.text-align-inherit': {
'text-align': 'inherit',
},
});
}
const widthPercentageScales = [8, 10, 20];
const widthPercentageScale = widthPercentageScales.reduce((accumulator1, denominator) => {
return {
...accumulator1,
...range(1, denominator).reduce((accumulator2, numerator) => {
const width = (numerator / denominator) * 100;
return {
...accumulator2,
[`${numerator}/${denominator}`]: `${round(width, 6)}%`,
};
}, {}),
};
}, {});
/** @type {import('tailwindcss').Config} */
module.exports = {
presets: [tailwindDefaults],
@ -144,168 +40,7 @@ module.exports = {
// this from happening. For now, we are simply blocking the only problematic occurrence.
'[link:page-slug]',
],
corePlugins: {
/*
We set background: none, Tailwind background-image: none...
Probably compatible enough?
We could also extend the theme, so that we use background: none in tailwind
*/
backgroundImage: false,
/*
Disable preflight styles so that `@tailwind base` compiles to CSS vars declarations without
any of the resets which we don't need.
More on this at https://tailwindcss.com/docs/preflight.
*/
preflight: false,
},
theme: {
// TODO: Backport to GitLab UI
fontFamily: {
regular:
'var(--default-regular-font, "GitLab Sans"), -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Noto Sans", Ubuntu, Cantarell, "Helvetica Neue", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"',
},
// TODO: Backport to GitLab UI
opacity: {
0: '0',
1: '.1',
2: '.2',
3: '.3',
4: '.4',
5: '.5',
6: '.6',
7: '.7',
8: '.8',
9: '.9',
10: '1',
},
// TODO: Backport to GitLab UI
zIndex: {
0: '0',
1: '1',
2: '2',
3: '3',
200: '200',
9999: '9999',
},
// TODO: Backport to GitLab UI.
lineHeight: {
reset: 'inherit',
0: '0',
1: '1',
normal: '1rem',
20: '1.25rem',
24: '1.5rem',
28: '1.75rem',
32: '2rem',
36: '2.25rem',
42: '2.625rem',
},
transitionDuration: {
DEFAULT: '200ms',
slow: '400ms',
medium: '200ms',
fast: '100ms',
},
transitionTimingFunction: {
ease: 'ease',
linear: 'linear',
},
// TODO: Backport to GitLab UI.
borderRadius: {
none: '0',
6: '1.5rem',
base: '.25rem',
full: '50%', // Tailwind gl-rounded-full is 9999px
small: '.125rem',
lg: '.5rem',
pill: '.75rem',
},
animation: {
spin: 'spin 2s infinite linear',
},
// These extends probably should be moved to GitLab UI:
extend: {
// TODO: Backport to GitLab UI. This should be part of the default colors config.
colors: {
current: 'currentColor',
inherit: 'inherit',
},
borderWidth: {
// We have a border-1 class, while tailwind was missing it
1: '1px',
},
boxShadow: {
none: 'none',
// TODO: I don't think we have a --t-gray matching class... --t-gray-a-24 seems close
DEFAULT: '0 1px 4px 0 #0000004d',
sm: '0 1px 2px var(--t-gray-a-08, #1f1e2414)',
md: '0 2px 8px var(--t-gray-a-16, #1f1e2429), 0 0 2px var(--t-gray-a-16, #1f1e2429)',
lg: '0 4px 12px var(--t-gray-a-16, #1f1e2429), 0 0 4px var(--t-gray-a-16, #1f1e2429)',
// TODO: backport these inset box shadows to GitLab UI
'inner-1-gray-100': 'inset 0 0 0 1px var(--gray-100, #dcdcde)',
'inner-b-1-gray-100': 'inset 0 -1px 0 0 var(--gray-100, #dcdcde)',
'inner-1-gray-200': 'inset 0 0 0 1px var(--gray-200, #bfbfc3)',
'inner-l-4-gray-100': 'inset 4px 0 0 0 var(--gray-100, #dcdcde)',
'inner-1-red-400': 'inset 0 0 0 1px var(--red-400, #ec5941)',
'inner-1-gray-400': 'inset 0 0 0 1px var(--gray-400, #89888d)',
'inner-2-blue-400': 'inset 0 0 0 2px var(--blue-400, #428fdc)',
'inner-1-blue-500': 'inset 0 0 0 1px var(--blue-500, #1f75cb)',
'inner-b-2-blue-500': 'inset 0 -2px 0 0 var(--blue-500, #1f75cb)',
'inner-1-red-500': 'inset 0 0 0 1px var(--red-500, #dd2b0e)',
'inner-l-3-red-600': 'inset 3px 0 0 0 var(--red-600, #c91c00)',
'inner-b-2-theme-accent':
'inset 0 -2px 0 0 var(--gl-theme-accent, var(--theme-indigo-500, #6666c4))',
'x0-y2-b4-s0': '0 2px 4px 0 #0000001a',
'x0-y0-b3-s1-blue-500': 'inset 0 0 3px 1px var(--blue-500, #1f75cb)',
},
// TODO: backport these width percentage classes to GitLab UI
width: widthPercentageScale,
maxWidth: {
...widthPercentageScale,
screen: '100vw',
limited: '1006px',
'1/2': '50%',
},
transitionProperty: {
transform: 'transform',
background: 'background',
opacity: 'opacity',
left: 'left',
right: 'right',
width: 'width',
stroke: 'stroke',
padding: 'padding',
'stroke-opacity': 'stroke-opacity',
'box-shadow': 'box-shadow',
},
transitionTimingFunction: {
DEFAULT: 'ease',
},
gridTemplateRows: {
auto: 'auto',
},
// The default preset already includes the primary font size scale (xs/sm/base/lg). The below
// sizes are non-standard and should probably be migrated to gl-heading-* utils at some point.
// In the meantime, we add them to GitLab's own Tailwind config.
fontSize: {
'size-h-display': '1.75rem',
'size-h1': '1.4375rem',
'size-h2': '1.1875rem',
'size-h1-xl': '2rem',
'size-h2-xl': '1.4375rem',
'size-reset': 'inherit',
},
spacing: {
px: '1px',
},
flexGrow: {
2: '2',
},
},
},
plugins: [
plugin(gitLabUIUtilities),
plugin(({ addUtilities }) => {
addUtilities(utilities);
}),

View File

@ -40,7 +40,11 @@ A site shows as "Unhealthy" if the site's status is more than 10 minutes old. In
Geo::MetricsUpdateWorker.new.perform
```
If it raises an error, then the error is probably also preventing the jobs from completing. If it takes longer than 10 minutes, then there may be a performance issue, and the UI may always show "Unhealthy" even if the status eventually does get updated.
If it raises an error, then the error is probably also preventing the jobs from completing. If it takes longer than 10 minutes, then the status might flap or persist as "Unhealthy", even if the status does occasionally get updated. This might be due to growth in usage, growth in data over time, or performance bugs such as a missing database index.
You can monitor system CPU load with a utility like `top` or `htop`. If PostgreSQL is using a significant amount of CPU, it might indicate that there's a problem, or that the system is underprovisioned. System memory should also be monitored.
If you increase memory, you should also check the PostgreSQL memory-related settings in your `/etc/gitlab/gitlab.rb` configuration.
If it successfully updates the status, then something may be wrong with Sidekiq. Is it running? Do the logs show errors? This job is supposed to be enqueued every minute and might not run if a [job deduplication idempotency](../../../sidekiq/sidekiq_troubleshooting.md#clearing-a-sidekiq-job-deduplication-idempotency-key) key was not cleared properly. It takes an exclusive lease in Redis to ensure that only one of these jobs can run at a time. The primary site updates its status directly in the PostgreSQL database. Secondary sites send an HTTP Post request to the primary site with their status data.

View File

@ -19045,6 +19045,7 @@ A container repository.
| <a id="containerrepositoryname"></a>`name` | [`String!`](#string) | Name of the container repository. |
| <a id="containerrepositorypath"></a>`path` | [`String!`](#string) | Path of the container repository. |
| <a id="containerrepositoryproject"></a>`project` | [`Project!`](#project) | Project of the container registry. |
| <a id="containerrepositoryprotectionruleexists"></a>`protectionRuleExists` **{warning-solid}** | [`Boolean!`](#boolean) | **Introduced** in GitLab 17.2. **Status**: Experiment. Whether any matching container protection rule exists for this container. Available only when feature flag `container_registry_protected_containers` is enabled. |
| <a id="containerrepositorystatus"></a>`status` | [`ContainerRepositoryStatus`](#containerrepositorystatus) | Status of the container repository. |
| <a id="containerrepositorytagscount"></a>`tagsCount` | [`Int!`](#int) | Number of tags associated with this image. |
| <a id="containerrepositoryupdatedat"></a>`updatedAt` | [`Time!`](#time) | Timestamp when the container repository was updated. |
@ -19069,6 +19070,7 @@ Details of a container repository.
| <a id="containerrepositorydetailsname"></a>`name` | [`String!`](#string) | Name of the container repository. |
| <a id="containerrepositorydetailspath"></a>`path` | [`String!`](#string) | Path of the container repository. |
| <a id="containerrepositorydetailsproject"></a>`project` | [`Project!`](#project) | Project of the container registry. |
| <a id="containerrepositorydetailsprotectionruleexists"></a>`protectionRuleExists` **{warning-solid}** | [`Boolean!`](#boolean) | **Introduced** in GitLab 17.2. **Status**: Experiment. Whether any matching container protection rule exists for this container. Available only when feature flag `container_registry_protected_containers` is enabled. |
| <a id="containerrepositorydetailssize"></a>`size` | [`Float`](#float) | Deduplicated size of the image repository in bytes. This is only available on GitLab.com for repositories created after `2021-11-04`. |
| <a id="containerrepositorydetailsstatus"></a>`status` | [`ContainerRepositoryStatus`](#containerrepositorystatus) | Status of the container repository. |
| <a id="containerrepositorydetailstagscount"></a>`tagsCount` | [`Int!`](#int) | Number of tags associated with this image. |

View File

@ -52,41 +52,41 @@ GET /merge_requests?search=foo&in=title
Supported attributes:
| Attribute | Type | Required | Description |
| ------------------------------- | -------------- | -------- | ----------- |
| `approved_by_ids` | integer array | No | Returns the merge requests approved by all the users with the given `id`, up to 5 users. `None` returns merge requests with no approvals. `Any` returns merge requests with an approval. Premium and Ultimate only. |
| `approver_ids` | integer array | No | Returns merge requests which have specified all the users with the given `id` as individual approvers. `None` returns merge requests without approvers. `Any` returns merge requests with an approver. Premium and Ultimate only. |
| `approved` | string | No | Filters merge requests by their `approved` status. `yes` returns only approved merge requests. `no` returns only non-approved merge requests. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/3159) in GitLab 15.11 with the flag `mr_approved_filter`. Disabled by default. |
| `assignee_id` | integer | No | Returns merge requests assigned to the given user `id`. `None` returns unassigned merge requests. `Any` returns merge requests with an assignee. |
| `author_id` | integer | No | Returns merge requests created by the given user `id`. Mutually exclusive with `author_username`. Combine with `scope=all` or `scope=assigned_to_me`. |
| `author_username` | string | No | Returns merge requests created by the given `username`. Mutually exclusive with `author_id`. |
| `created_after` | datetime | No | Returns merge requests created on or after the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `created_before` | datetime | No | Returns merge requests created on or before the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `deployed_after` | datetime | No | Returns merge requests deployed after the given date/time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `deployed_before` | datetime | No | Returns merge requests deployed before the given date/time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `environment` | string | No | Returns merge requests deployed to the given environment. |
| `in` | string | No | Change the scope of the `search` attribute. `title`, `description`, or a string joining them with comma. Default is `title,description`. |
| `labels` | string | No | Returns merge requests matching a comma-separated list of labels. `None` lists all merge requests with no labels. `Any` lists all merge requests with at least one label. Predefined names are case-insensitive. |
| `merge_user_id` | integer | No | Returns the merge requests merged by the user with the given user `id`. Mutually exclusive with `merge_user_username`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/140002) in GitLab 17.0. |
| `merge_user_username` | string | No | Returns the merge requests merged by the user with the given `username`. Mutually exclusive with `merge_user_id`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/140002) in GitLab 17.0. |
| `milestone` | string | No | Returns merge requests for a specific milestone. `None` returns merge requests with no milestone. `Any` returns merge requests that have an assigned milestone. |
| `my_reaction_emoji` | string | No | Returns merge requests reacted by the authenticated user by the given `emoji`. `None` returns issues not given a reaction. `Any` returns issues given at least one reaction. |
| `not` | Hash | No | Returns merge requests that do not match the parameters supplied. Accepts: `labels`, `milestone`, `author_id`, `author_username`, `assignee_id`, `assignee_username`, `reviewer_id`, `reviewer_username`, `my_reaction_emoji`. |
| `order_by` | string | No | Returns requests ordered by `created_at`, `title`, or `updated_at` fields. Default is `created_at`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/331625) in GitLab 14.8.|
| `reviewer_id` | integer | No | Returns merge requests which have the user as a [reviewer](../user/project/merge_requests/reviews/index.md) with the given user `id`. `None` returns merge requests with no reviewers. `Any` returns merge requests with any reviewer. Mutually exclusive with `reviewer_username`. |
| `reviewer_username` | string | No | Returns merge requests which have the user as a [reviewer](../user/project/merge_requests/reviews/index.md) with the given `username`. `None` returns merge requests with no reviewers. `Any` returns merge requests with any reviewer. Mutually exclusive with `reviewer_id`. |
| `scope` | string | No | Returns merge requests for the given scope: `created_by_me`, `assigned_to_me` or `all`. Defaults to `created_by_me`. |
| `search` | string | No | Search merge requests against their `title` and `description`. |
| `sort` | string | No | Returns requests sorted in `asc` or `desc` order. Default is `desc`. |
| `source_branch` | string | No | Returns merge requests with the given source branch. |
| `state` | string | No | Returns all merge requests or just those that are `opened`, `closed`, `locked`, or `merged`. |
| `target_branch` | string | No | Returns merge requests with the given target branch. |
| `updated_after` | datetime | No | Returns merge requests updated on or after the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `updated_before` | datetime | No | Returns merge requests updated on or before the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `view` | string | No | If `simple`, returns the `iid`, URL, title, description, and basic state of merge request. |
| `with_labels_details` | boolean | No | If `true`, response returns more details for each label in labels field: `:name`, `:color`, `:description`, `:description_html`, `:text_color`. Default is `false`. |
| Attribute | Type | Required | Description |
|-----------------------------|---------------|----------|-------------|
| `approved_by_ids` | integer array | No | Returns the merge requests approved by all the users with the given `id`, up to 5 users. `None` returns merge requests with no approvals. `Any` returns merge requests with an approval. Premium and Ultimate only. |
| `approver_ids` | integer array | No | Returns merge requests which have specified all the users with the given `id` as individual approvers. `None` returns merge requests without approvers. `Any` returns merge requests with an approver. Premium and Ultimate only. |
| `approved` | string | No | Filters merge requests by their `approved` status. `yes` returns only approved merge requests. `no` returns only non-approved merge requests. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/3159) in GitLab 15.11 with the flag `mr_approved_filter`. Disabled by default. |
| `assignee_id` | integer | No | Returns merge requests assigned to the given user `id`. `None` returns unassigned merge requests. `Any` returns merge requests with an assignee. |
| `author_id` | integer | No | Returns merge requests created by the given user `id`. Mutually exclusive with `author_username`. Combine with `scope=all` or `scope=assigned_to_me`. |
| `author_username` | string | No | Returns merge requests created by the given `username`. Mutually exclusive with `author_id`. |
| `created_after` | datetime | No | Returns merge requests created on or after the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `created_before` | datetime | No | Returns merge requests created on or before the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `deployed_after` | datetime | No | Returns merge requests deployed after the given date/time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `deployed_before` | datetime | No | Returns merge requests deployed before the given date/time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `environment` | string | No | Returns merge requests deployed to the given environment. |
| `in` | string | No | Change the scope of the `search` attribute. `title`, `description`, or a string joining them with comma. Default is `title,description`. |
| `labels` | string | No | Returns merge requests matching a comma-separated list of labels. `None` lists all merge requests with no labels. `Any` lists all merge requests with at least one label. Predefined names are case-insensitive. |
| `merge_user_id` | integer | No | Returns the merge requests merged by the user with the given user `id`. Mutually exclusive with `merge_user_username`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/140002) in GitLab 17.0. |
| `merge_user_username` | string | No | Returns the merge requests merged by the user with the given `username`. Mutually exclusive with `merge_user_id`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/140002) in GitLab 17.0. |
| `milestone` | string | No | Returns merge requests for a specific milestone. `None` returns merge requests with no milestone. `Any` returns merge requests that have an assigned milestone. |
| `my_reaction_emoji` | string | No | Returns merge requests reacted by the authenticated user by the given `emoji`. `None` returns issues not given a reaction. `Any` returns issues given at least one reaction. |
| `not` | Hash | No | Returns merge requests that do not match the parameters supplied. Accepts: `labels`, `milestone`, `author_id`, `author_username`, `assignee_id`, `assignee_username`, `reviewer_id`, `reviewer_username`, `my_reaction_emoji`. |
| `order_by` | string | No | Returns requests ordered by `created_at`, `title`, `merged_at` ([introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/147052) in GitLab 17.2), or `updated_at` fields. Default is `created_at`. |
| `reviewer_id` | integer | No | Returns merge requests which have the user as a [reviewer](../user/project/merge_requests/reviews/index.md) with the given user `id`. `None` returns merge requests with no reviewers. `Any` returns merge requests with any reviewer. Mutually exclusive with `reviewer_username`. |
| `reviewer_username` | string | No | Returns merge requests which have the user as a [reviewer](../user/project/merge_requests/reviews/index.md) with the given `username`. `None` returns merge requests with no reviewers. `Any` returns merge requests with any reviewer. Mutually exclusive with `reviewer_id`. |
| `scope` | string | No | Returns merge requests for the given scope: `created_by_me`, `assigned_to_me` or `all`. Defaults to `created_by_me`. |
| `search` | string | No | Search merge requests against their `title` and `description`. |
| `sort` | string | No | Returns requests sorted in `asc` or `desc` order. Default is `desc`. |
| `source_branch` | string | No | Returns merge requests with the given source branch. |
| `state` | string | No | Returns all merge requests or just those that are `opened`, `closed`, `locked`, or `merged`. |
| `target_branch` | string | No | Returns merge requests with the given target branch. |
| `updated_after` | datetime | No | Returns merge requests updated on or after the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `updated_before` | datetime | No | Returns merge requests updated on or before the given time. Expected in ISO 8601 format (`2019-03-15T08:00:00Z`). |
| `view` | string | No | If `simple`, returns the `iid`, URL, title, description, and basic state of merge request. |
| `with_labels_details` | boolean | No | If `true`, response returns more details for each label in labels field: `:name`, `:color`, `:description`, `:description_html`, `:text_color`. Default is `false`. |
| `with_merge_status_recheck` | boolean | No | If `true`, this projection requests (but does not guarantee) an asynchronous recalculation of the `merge_status` field. Default is `false`. In GitLab 15.11 and later, enable the `restrict_merge_status_recheck` feature [flag](../administration/feature_flags.md) to ignore this attribute when requested by users without at least the Developer role. |
| `wip` | string | No | Filter merge requests against their `wip` status. Use `yes` to return *only* draft merge requests, `no` to return *non-draft* merge requests. |
| `wip` | string | No | Filter merge requests against their `wip` status. Use `yes` to return *only* draft merge requests, `no` to return *non-draft* merge requests. |
Example response:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View File

@ -1,200 +1,11 @@
---
status: implemented
creation-date: "2023-09-11"
authors: [ "@abrandl" ]
coach: "@andrewn"
approvers: [ "@swiskow", "@lmcandrew", "@o-lluch" ]
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/capacity_planning/'
remove_date: '2025-07-08'
---
<!-- Blueprints often contain forward-looking statements -->
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/capacity_planning/).
# Capacity planning for GitLab Dedicated
## Summary
This document outlines how we plan to set up infrastructure capacity planning for GitLab Dedicated tenant environments, which started as a [FY24-Q3 OKR](https://gitlab.com/gitlab-com/gitlab-OKRs/-/work_items/3507).
We make use of [Tamland](https://gitlab.com/gitlab-com/gl-infra/tamland), a tool we build to provide saturation forecasting insights for GitLab.com infrastructure resources.
We propose to include Tamland as a part of the GitLab Dedicated stack and execute forecasting from within the tenant environments.
Tamland predicts SLO violations and their respective dates, which need to be reviewed and acted upon.
In terms of team organisation, the Dedicated team is proposed to own the tenant-side setup for Tamland and to own the predicted SLO violations, with the help and guidance of the Scalability::Observability team, which drives further development, documentation and overall guidance for capacity planning, including for Dedicated.
With this setup, we aim to turn Tamland into a more generic tool, which can be used in various environments including but not limited to Dedicated tenants.
Long-term, we think of including Tamland in self-managed installations and think of Tamland as a candidate for open source release.
## Motivation
### Background: Capacity planning for GitLab.com
[Tamland](https://gitlab.com/gitlab-com/gl-infra/tamland) is an infrastructure resource forecasting project owned by the [Scalability::Observability](https://handbook.gitlab.com/handbook/engineering/infrastructure/team/scalability/#scalabilityobservability) group.
It implements [capacity planning](https://handbook.gitlab.com/handbook/engineering/infrastructure/capacity-planning/) for GitLab.com, which is a [controlled activity covered by SOC 2](https://gitlab.com/gitlab-com/gl-security/security-assurance/security-compliance-commercial-and-dedicated/observation-management/-/issues/604).
As of today, it is used exclusively for GitLab.com to predict upcoming SLO violations across hundreds of monitored infrastructure components.
Tamland produces a [report](https://gitlab-com.gitlab.io/gl-infra/tamland/intro.html) (internal link, hosted on GitLab Pages) containing forecast plots, information around predicted violations and other information around the components monitored.
Any predicted SLO violation results in a capacity warning issue being created in the [issue tracker for capacity planning](https://gitlab.com/gitlab-com/gl-infra/capacity-planning/-/boards/2816983) on GitLab.com.
At present, Tamland is quite tailor made and specific for GitLab.com:
1. GitLab.com specific parameters and assumptions are built into Tamland
1. We execute Tamland from a single CI project, exclusively for GitLab.com
[Turning Tamland into a tool](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/1106) we can use more generically and making it independent of GitLab.com specifics is subject of ongoing work.
For illustration, we can see a saturation forecast plot below for the `disk_space` resource for a PostgreSQL service called `patroni-ci`.
Within the 90 days forecast horizon, we predict a violation of the `soft` SLO (set at 85% saturation) and this resulted in the creation of a [capacity planning issue](https://gitlab.com/gitlab-com/gl-infra/capacity-planning/-/issues/1219) for further review and potential actions.
At present, the Scalability::Observability group reviews those issues and engages with the respective DRI for the service in question to remedy a saturation concern.
<img src="images/image-20230911144743188.png" alt="image-20230911144743188" style="zoom:67%;" />
For GitLab.com capacity planning, we operate Tamland from a scheduled CI pipeline with access to the central Thanos, which provides saturation and utilization metrics for GitLab.com.
The CI pipeline produces the desired report, exposes it on GitLab Pages and also creates capacity planning issues.
Scalability::Observability runs a capacity planning triage rotation which entails reviewing and prioritizing any open issues and their respective saturation concerns.
### Problem Statement
With the number of [GitLab Dedicated](https://about.gitlab.com/dedicated/) deployments increasing, we need to establish capacity planning processes for Dedicated tenants.
This is going to help us notice any pending resource constraints soon enough to be able to upgrade the infrastructure for a given tenant before the resource saturates and causes an incident.
Each Dedicated tenant is an isolated GitLab environment, with a full set of metrics monitored.
These metrics are standardized in the [metrics catalog](https://gitlab.com/gitlab-com/runbooks/-/blob/master/reference-architectures/get-hybrid/src/gitlab-metrics-config.libsonnet?ref_type=heads) and on top of these, we have defined saturation metrics along with respective SLOs.
In order to provide capacity planning and forecasts for saturation metrics for each tenant, we'd like to get Tamland set up for GitLab Dedicated.
While Tamland is developed by the Scalability::Observability and this team also owns the capacity planning process for GitLab.com, they don't have access to any of the Dedicated infrastructure as we have strong isolation implemented for Dedicated environments.
As such, the technical design choices are going to affect how those teams interact and vice versa. We include this consideration into this documentation as we think the organisational aspect is a crucial part of it.
### Key questions
1. How does Tamland access Prometheus data for each tenant?
1. Where does Tamland execute and how do we scale that?
1. Where do we store resulting forecasting data?
1. How do we consume the forecasts?
### Goals: Iteration 0
1. Tamland is flexible enough to forecast saturation events for a Dedicated tenant and for GitLab.com separately
1. Forecasting is executed at least weekly, for each Dedicated tenant
1. Tamland's output is forecasting data only (plots, SLO violation dates, etc. - no report, no issue management - see below)
1. Tamland stores the output data in a S3 bucket for further inspection
### Goals: Iteration 1
In Iteration 0, we've integrated Tamland into GitLab Dedicated environments and started to generate forecasting data for each tenant regularly.
In order to consume this data and make it actionable, this iteration is about providing reporting functionality for GitLab Dedicated:
We generate a GitLab Pages deployed static site that contains individual Tamland reports for all tenants.
We use the default Tamland report to generate the per-tenant report.
In a future iteration, we may want to provide another type of report specifically tailored for GitLab Dedicated needs.
### Goals: Iteration 2
In order to raise awareness for a predicted SLO violation, Tamland has functionality to manage a GitLab issue tracker and create an issue for a capacity warning.
We use this, for example, to manage capacity warnings for GitLab.com using the [`gitlab-com` capacity planning tracker](https://gitlab.com/gitlab-com/gl-infra/capacity-planning-trackers/gitlab-com/-/issues).
For GitLab Dedicated tenants, we suggest to use the `gitlab-dedicated` capacity planning tracker in a similar fashion:
For each predicted SLO violation with reasonable confidence, we create a capacity warning issue on this tracker and use a scoped label to distinguish warnings for different tenants (see below for more details).
### Non-goals
#### Customizing forecasting models
Forecasting models can and should be tuned and informed with domain knowledge to produce accurate forecasts.
This information is a part of the Tamland manifest.
In the first iteration, we don't support per-tenant customization, but this can be added later.
## Proposed Design for Dedicated: A part of the Dedicated stack
Dedicated environments are fully isolated and run their own Prometheus instance to capture metrics, including saturation metrics.
Tamland will run from each individual Dedicated tenant environment, consume metrics from Prometheus and store the resulting data in S3.
From there, we consume forecast data and act on it.
![dedicated-capacity-planning-forecasting](images/dedicated-capacity-planning-forecasting.png)
### Generating forecasts
#### Storage for output and cache
Any data Tamland relies on is stored in a S3 bucket.
We use one bucket per tenant to clearly separate data between tenants.
1. Resulting forecast data and other outputs
1. Tamland's internal cache for Prometheus metrics data
There is no need for a persistent state across Tamland runs aside from the S3 bucket.
#### Benefits of executing inside tenant environments
Each Tamland run for a single environment (tenant) can take a few hours to execute.
With the number of tenants expected to increase significantly, we need to consider scaling the execution environment for Tamland.
In this design, Tamland becomes a part of the Dedicated stack and a component of the individual tenant environment.
As such, scaling the execution environment for Tamland is solved by design, because tenant forecasts execute inherently parallel in their respective environments.
#### Distribution model: Docker + Helm chart
Tamland is released as a Docker image along with a Helm chart, see [Tamland's README](https://gitlab.com/gitlab-com/gl-infra/tamland/-/blob/main/README.md) for further details.
#### Tamland Manifest
The manifest contains information about which saturation metrics to forecast on (see this [manifest example](https://gitlab.com/gitlab-com/gl-infra/tamland/-/blob/62854e1afbc2ed3160a55a738ea587e0cf7f994f/saturation.json) for GitLab.com).
This will be generated from the metrics catalog and will be the same for all tenants for starters.
In order to generate the manifest from the metrics catalog, we setup dedicated GitLab project `tamland-dedicated`.
On a regular basis, a scheduled pipeline grabs the metrics catalog, generates the JSON manifest from it and commits this to the project.
On the Dedicated tenants, we download the latest version of the committed JSON manifest from `tamland-dedicated` and use this as input to execute Tamland.
### Capacity planning reports and Capacity Warnings
Based on Tamland's forecasting data, we generate reports to display forecasting information and enable teams to act on capacity warnings by creating capacity warnings in a GitLab issue tracker.
![dedicated-capacity-planning-reporting](images/dedicated-capacity-planning-reporting.png)
The Scalability::Observability team maintains an [internal GitLab project called `gitlab-dedicated`](https://gitlab.com/gitlab-com/gl-infra/capacity-planning-trackers/gitlab-dedicated).
This project contains a scheduled CI pipeline to regularly produce a [static site deployed to GitLab Pages (only available internally)](https://gitlab-com.gitlab.io/gl-infra/capacity-planning-trackers/gitlab-dedicated/).
It also contains functionality to create and manage capacity warnings in the issue tracker of this project.
CI configuration for this project contains a list of tenants along with their respective metadata (e.g. AWS account, codename, etc.).
For each configured tenant, the CI pipeline uses a central IAM role in the amp account.
With this role, a tenant-specific IAM role can be assumed, which has read-only access to the respective S3 bucket containing the tenant's forecasting data.
The CI pipeline produces a standard Tamland report for each tenant and integrates all individual reports into a single static site.
This site provides unified access to capacity forecasting insights across tenant environments.
Along with the report, the CI pipeline also reacts to predicted SLO violations and creates a capacity warning issue in the project's issue tracker.
As the tracker is being used for *all* GitLab Dedicated tenants, we employ a `~tenant:CN` label to distinguish tenant environments (e.g. we use `~tenant:C1` for the tenant with codename C1).
These issues contain further information about the tenant and component affected, along with forecasts and status information.
The intention here is to create visibility into predicted SLO violations and provide a way for the Dedicated team to engage with capacity warnings directly (e.g. for discussion, work scheduling etc.).
Overall, the Dedicated teams and operators use the Tamland report and issue tracker to act on capacity warnings.
In order to get started, we suggest the Dedicated group to take a regular pass across the capacity warnings and triage those.
For additional visibility, we may want to consider enabling getting Slack updates sent out for new capacity warnings created.
## Alternative Solution
### Tamland as a Service (not chosen)
An alternative design, we don't consider an option at this point, is to set up Tamland as a Service and run it fully **outside** of tenant environments.
![dedicated-capacity-planning-as-a-service](images/dedicated-capacity-planning-as-a-service.png)
In this design, a central Prometheus/Thanos instance is needed to provide the metrics data for Tamland.
Dedicated tenants use remote-write to push their Prometheus data to the central Thanos instance.
Tamland is set up to run on a regular basis and consume metrics data from the single Thanos instance.
It stores its results and cache in S3, similar to the other design.
In order to execute forecasts regularly, we need to provide an execution environment to run Tamland in.
With an increasing number of tenants, we'd need to scale up resources for this cluster.
This design **has not been chosen** because of both technical and organisational concerns:
1. Our central Thanos instance currently doesn't have metrics data for Dedicated tenants as of the start of FY24Q3.
1. Extra work required to set up scalable execution environment.
1. Thanos is considered a bottleneck as it provides data for all tenants and this poses a risk of overloading it when we execute the forecasting for a high number of tenants.
1. We strive to build out Tamland into a tool of more general use. We expect a better outcome in terms of design, documentation and process efficiency by building it as a tool for other teams to use and not offering it as a service. In the long run, we might be able to integrate Tamland (as a tool) inside self-managed environments or publish Tamland as an open source forecasting tool. This would not be feasible if we were hosting it as a service.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,60 +1,11 @@
---
status: implemented
creation-date: "2022-09-14"
authors: [ "@ayufan", "@fabiopitino", "@grzesiek" ]
coach: [ "@ayufan", "@grzesiek" ]
approvers: [ "@dhershkovitch", "@marknuzzo" ]
owning-stage: "~devops::verify"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/ci_pipeline_components/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/ci_pipeline_components/).
# CI/CD Catalog
## Summary
The goal of the CI/CD pipeline components catalog is to make the reusing
pipeline configurations easier and more efficient. Providing a way to
discover, understand and learn how to reuse pipeline constructs allows for a
more streamlined experience. Having a CI/CD pipeline components catalog also
sets a framework for users to collaborate on pipeline constructs so that they
can be evolved and improved over time.
This design doc used to define the architectural guidelines on how to build a CI/CD
catalog of pipeline components. Since we've done it, you can find a link to the official user documentation below along with the historic content of the document.
For more information on the feature, see the [CI/CD Components documentation](../../../ci/components/index.md).
The archived version of the blueprint file can be found [here](https://gitlab.com/gitlab-org/gitlab/-/blob/a22b7be24f372feec596bcf71ebaf07ea0df40cf/doc/architecture/blueprints/ci_pipeline_components/index.md).
## Who
Proposal:
<!-- vale gitlab.Spelling = NO -->
| Role | Who |
|--------------------------------|-----|
| Author | Fabio Pitino |
| Engineering Leaders | Cheryl Li, Mark Nuzzo |
| Product Manager | Dov Hershkovitch |
| Architecture Evolution Coaches | Kamil Trzciński, Grzegorz Bizon |
DRIs:
| Role | Who |
|-------------|-----|
| Leadership | Mark Nuzzo |
| Product | Dov Hershkovitch |
| Engineering | Fabio Pitino |
| UX | Sunjung Park |
Domain experts:
| Area | Who |
|-----------------------------|-----|
| Verify / Pipeline authoring | Avielle Wolfe |
| Verify / Pipeline authoring | Laura Montemayor |
<!-- vale gitlab.Spelling = YES -->
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,52 +1,11 @@
---
owning-stage: "~devops::data stores"
description: 'Cloud Connector ADR 001: Use load balancer as single entry point'
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_connector/decisions/001_lb_entry_point/'
remove_date: '2025-07-08'
---
# Cloud Connector ADR 001: Load balancer as single entry point
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_connector/decisions/001_lb_entry_point/).
## Context
The original iteration of the blueprint suggested to stand up a dedicated Cloud Connector edge service,
through which all traffic that uses features under the Cloud Connector umbrella would pass.
The primary reasons for why we wanted this to be a dedicated service were to:
1. **Provide a single entry point for customers.** We identified the ability for any GitLab instance
around the world to consume Cloud Connector features through a single endpoint such as
`cloud.gitlab.com` as a must-have property.
1. **Have the ability to execute custom logic.** There was a desire from product to create a space where we can
run cross-cutting business logic such as application-level rate limiting, which is hard or impossible to
do using a traditional load balancer such as HAProxy.
## Decision
We decided to take a smaller incremental step toward having a "smart router" by focusing on
the ability to provide a single endpoint through which Cloud Connector traffic enters our
infrastructure. This can be accomplished using simpler means than deploying dedicated services, specifically
by pulling in a load balancing layer listening at `cloud.gitlab.com` that can also perform simple routing
tasks to forward traffic into feature backends.
Our reasons for this decision were:
1. **Unclear requirements for custom logic to run.** We are still exploring how and to what extent we would
apply rate limiting logic at the Cloud Connector level. This is being explored in
[issue 429592](https://gitlab.com/gitlab-org/gitlab/-/issues/429592). Because we need to have a single
entry point by January, and because we think we will not be ready by then to implement such logic at the
Cloud Connector level, a web service is not required yet.
1. **New use cases found that are not suitable to run through a dedicated service.** We started to work with
the Observability group to see how we can bring the GitLab Observability Backend (GOB) to Cloud Connector
customers in [MR 131577](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/131577).
In this discussion it became clear that due to the large amounts of traffic and data volume passing
through GOB each day, putting another service in front of this stack does not provide a sensible
risk/benefit trade-off. Instead, we will probably split traffic and make Cloud Connector components
available through other means for special cases like these (for example, through a Cloud Connector library).
We are exploring several options for load-balancing this new endpoint in [issue 429818](https://gitlab.com/gitlab-org/gitlab/-/issues/429818)
and are working with the `Infrastructure:Foundations` team to deploy this in [issue 24711](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/24711).
## Consequences
We have not yet discarded the plan to build a smart router eventually, either as a service or
through other means, but have delayed this decision in face of uncertainty at both a product
and technical level. We will reassess how to proceed in Q1 2024.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,67 +1,11 @@
---
status: implemented
creation-date: "2023-09-28"
authors: [ "@mkaeppler" ]
coach: "@ayufan"
approvers: [ "@rogerwoo", "@pjphillips" ]
owning-stage: "~devops::data stores"
participating-stages: ["~devops::fulfillment", "~devops::ai-powered"]
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_connector/'
remove_date: '2025-07-08'
---
# Cloud Connector architecture evolution
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_connector/).
## Summary
This design doc covers architectural decisions and proposed changes to
[Cloud Connector's technical foundations](https://gitlab.com/groups/gitlab-org/-/epics/11417).
Refer to the [official architecture documentation](../../../development/cloud_connector/architecture.md)
for an accurate description of the current status.
## Motivation
Our "big problem to solve" is to bring feature parity to our SaaS and self-managed offerings.
Until now, SaaS and self-managed (SM) GitLab instances consume features only from the [AI gateway](../ai_gateway/index.md),
which also implements an `Access Layer` to verify that a given request is allowed
to access the respective AI feature endpoint.
This approach has served us well because it:
- Required minimal changes from an architectural standpoint to allow SM users to consume AI features hosted by us.
- Caused minimal friction with ongoing development on GitLab.com.
- Reduced time to market.
However, the AI gateway alone does not sufficiently abstract over a wider variety of features,
as by definition it is designed to serve AI features only.
### Goals
We will use this blueprint to make incremental changes to Cloud Connector's technical framework
to enable other backend services to service self-managed/GitLab Dedicated customers in the same way
the AI gateway does today. This will directly support our mission of bringing feature parity
to all GitLab customers.
The major areas we are focused on are:
- [**Provide single access point for customers.**](https://gitlab.com/groups/gitlab-org/-/epics/12405)
We found that customers are not keen on configuring their web proxies and firewalls
to allow outbound traffic to an ever growing list of GitLab-hosted services. We therefore decided to
install a global, load-balanced entry point at `cloud.gitlab.com`. This entry point can make simple
routing decisions based on the requested path, which allows us to target different backend services
as we broaden the feature scope covered by Cloud Connector.
- **Status:** done. The decision was documented as [ADR001](decisions/001_lb_entry_point.md).
- [**Give instance admins control over product usage data.**](https://gitlab.com/groups/gitlab-org/-/epics/12020)
Telemetry for Cloud Connector services are either instrumented within
Editor Extensions or the AI gateway. Our approach to AI telemetry is currently independent of our long-term vision of
[Unified Internal events tracking](https://gitlab.com/groups/gitlab-org/-/epics/9610).
As Cloud Connector implements additional use cases beyond AI, we want to bring AI-related telemetry into alignment with existing
technical choices.
- **Status:** in discovery.
- [**Rate-limiting features.**](https://gitlab.com/groups/gitlab-org/-/epics/12032)
During periods of elevated traffic, backends integrated with Cloud Connector such as
AI gateway or TanuKey may experience resource constraints. GitLab should apply a consistent strategy when deciding which instance
should be prioritized over others. This strategy should be uniform across all Cloud Connector services.
- **Status:** planned.
## Decisions
- [ADR-001: Use load balancer as single entry point](decisions/001_lb_entry_point.md)
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,125 +1,11 @@
---
status: implemented
creation-date: "2020-08-26"
authors: [ "@grzesiek" ]
coach: [ "@ayufan", "@grzesiek" ]
approvers: [ "@thaoyeager", "@darbyfrey" ]
owning-stage: "~devops::release"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_native_build_logs/'
remove_date: '2025-07-08'
---
# Cloud Native Build Logs
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_native_build_logs/).
Cloud native and the adoption of Kubernetes has been recognised by GitLab to be
one of the top two biggest tailwinds that are helping us grow faster as a
company behind the project.
This effort is described in a more details
[in the infrastructure team handbook](https://handbook.gitlab.com/handbook/engineering/infrastructure/production/architecture/).
## Traditional build logs
Traditional job logs depend a lot on availability of a local shared storage.
Every time a GitLab Runner sends a new partial build output, we write this
output to a file on a disk. This is simple, but this mechanism depends on
shared local storage - the same file needs to be available on every GitLab web
node machine, because GitLab Runner might connect to a different one every time
it performs an API request. Sidekiq also needs access to the file because when
a job is complete, the trace file contents are sent to the object store.
## New architecture
New architecture writes data to Redis instead of writing build logs into a
file.
To make this performant and resilient enough, we implemented a chunked
I/O mechanism - we store data in Redis in chunks, and migrate them to an object
store once we reach a desired chunk size.
Simplified sequence diagram is available below.
```mermaid
sequenceDiagram
autonumber
participant U as User
participant R as Runner
participant G as GitLab (rails)
participant I as Redis
participant D as Database
participant O as Object store
loop incremental trace update sent by a runner
Note right of R: Runner appends a build trace
R->>+G: PATCH trace [build.id, offset, data]
G->>+D: find or create chunk [chunk.index]
D-->>-G: chunk [id, index]
G->>I: append chunk data [chunk.index, data]
G-->>-R: 200 OK
end
Note right of R: User retrieves a trace
U->>+G: GET build trace
loop every trace chunk
G->>+D: find chunk [index]
D-->>-G: chunk [id]
G->>+I: read chunk data [chunk.index]
I-->>-G: chunk data [data, size]
end
G-->>-U: build trace
Note right of R: Trace chunk is full
R->>+G: PATCH trace [build.id, offset, data]
G->>+D: find or create chunk [chunk.index]
D-->>-G: chunk [id, index]
G->>I: append chunk data [chunk.index, data]
G->>G: chunk full [index]
G-->>-R: 200 OK
G->>+I: read chunk data [chunk.index]
I-->>-G: chunk data [data, size]
G->>O: send chunk data [data, size]
G->>+D: update data store type [chunk.id]
G->>+I: delete chunk data [chunk.index]
```
## NFS coupling
In 2017, we experienced serious problems of scaling our NFS infrastructure. We
even tried to replace NFS with
[CephFS](https://docs.ceph.com/docs/master/cephfs/) - unsuccessfully.
Since that time it has become apparent that the cost of operations and
maintenance of a NFS cluster is significant and that if we ever decide to
migrate to Kubernetes
[we need to decouple GitLab from a shared local storage and NFS](https://gitlab.com/gitlab-org/gitlab-pages/-/issues/426#note_375646396).
1. NFS might be a single point of failure
1. NFS can only be reliably scaled vertically
1. Moving to Kubernetes means increasing the number of mount points by an order
of magnitude
1. NFS depends on extremely reliable network which can be difficult to provide
in Kubernetes environment
1. Storing customer data on NFS involves additional security risks
Moving GitLab to Kubernetes without NFS decoupling would result in an explosion
of complexity, maintenance cost and enormous, negative impact on availability.
## Iterations
1. ✓ Implement the new architecture in way that it does not depend on shared local storage
1. ✓ Evaluate performance and edge-cases, iterate to improve the new architecture
1. ✓ Design cloud native build logs correctness verification mechanisms
1. ✓ Build observability mechanisms around performance and correctness
1. ✓ Rollout the feature into production environment incrementally
The work needed to make the new architecture production ready and enabled on
GitLab.com had been tracked in [Cloud Native Build Logs on GitLab.com](https://gitlab.com/groups/gitlab-org/-/epics/4275) epic.
Enabling this feature on GitLab.com is a subtask of
[making the new architecture generally available](https://gitlab.com/groups/gitlab-org/-/epics/3791) for everyone.
## Status
This change has been implemented and enabled on GitLab.com.
We are working on [an epic to make this feature more resilient and observable](https://gitlab.com/groups/gitlab-org/-/epics/4860).
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,104 +1,11 @@
---
status: implemented
creation-date: "2019-05-16"
authors: [ "@grzesiek" ]
coach: [ "@ayufan", "@grzesiek" ]
approvers: [ "@ogolowinski", "@dcroft", "@vshushlin" ]
owning-stage: "~devops::release"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_native_gitlab_pages/'
remove_date: '2025-07-08'
---
# GitLab Pages New Architecture
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/cloud_native_gitlab_pages/).
GitLab Pages is an important component of the GitLab product. It is mostly
being used to serve static content, and has a limited set of well defined
responsibilities. That being said, unfortunately it has become a blocker for
GitLab.com Kubernetes migration.
Cloud Native and the adoption of Kubernetes has been recognised by GitLab to be
one of the top two biggest tailwinds that are helping us grow faster as a
company behind the project.
This effort is described in more detail
[in the infrastructure team handbook page](https://handbook.gitlab.com/handbook/engineering/infrastructure/production/architecture/).
GitLab Pages is tightly coupled with NFS and to unblock Kubernetes
migration a significant change to GitLab Pages' architecture is required. This
is an ongoing work that we have started more than a year ago. This blueprint
might be useful to understand why it is important, and what is the roadmap.
## How GitLab Pages Works
GitLab Pages is a daemon designed to serve static content, written in
[Go](https://go.dev/).
Initially, GitLab Pages has been designed to store static content on a local
shared block storage (NFS) in a hierarchical group > project directory
structure. Each directory, representing a project, was supposed to contain a
configuration file and static content that GitLab Pages daemon was supposed to
read and serve.
```mermaid
graph LR
A(GitLab Rails) -- Writes new pages deployment --> B[(NFS)]
C(GitLab Pages) -. Reads static content .-> B
```
This initial design has become outdated because of a few reasons - NFS coupling
being one of them - and we decided to replace it with more "decoupled
service"-like architecture. The new architecture, that we are working on, is
described in this blueprint.
## NFS coupling
In 2017, we experienced serious problems of scaling our NFS infrastructure. We
even tried to replace NFS with
[CephFS](https://docs.ceph.com/docs/master/cephfs/) - unsuccessfully.
Since that time it has become apparent that the cost of operations and
maintenance of a NFS cluster is significant and that if we ever decide to
migrate to Kubernetes
[we need to decouple GitLab from a shared local storage and NFS](https://gitlab.com/gitlab-org/gitlab-pages/-/issues/426#note_375646396).
1. NFS might be a single point of failure
1. NFS can only be reliably scaled vertically
1. Moving to Kubernetes means increasing the number of mount points by an order
of magnitude
1. NFS depends on extremely reliable network which can be difficult to provide
in Kubernetes environment
1. Storing customer data on NFS involves additional security risks
Moving GitLab to Kubernetes without NFS decoupling would result in an explosion
of complexity, maintenance cost and enormous, negative impact on availability.
## New GitLab Pages Architecture
- GitLab Pages sources domains' configuration from the GitLab internal
API, instead of reading `config.json` files from a local shared storage.
- GitLab Pages serves static content from Object Storage.
```mermaid
graph TD
A(User) -- Pushes pages deployment --> B{GitLab}
C((GitLab Pages)) -. Reads configuration from API .-> B
C -. Reads static content .-> D[(Object Storage)]
C -- Serves static content --> E(Visitors)
```
This new architecture has been briefly described in
[the blog post](https://about.gitlab.com/blog/2020/08/03/how-gitlab-pages-uses-the-gitlab-api-to-serve-content/)
too.
## Iterations
1. ✓ Redesign GitLab Pages configuration source to use the GitLab API
1. ✓ Evaluate performance and build reliable caching mechanisms
1. ✓ Incrementally rollout the new source on GitLab.com
1. ✓ Make GitLab Pages API domains configuration source enabled by default
1. Enable experimentation with different servings through feature flags
1. Triangulate object store serving design through meaningful experiments
1. Design pages migration mechanisms that can work incrementally
1. Gradually migrate towards object storage serving on GitLab.com
[GitLab Pages Architecture](https://gitlab.com/groups/gitlab-org/-/epics/1316)
epic with detailed roadmap is also available.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,591 +1,11 @@
---
status: rejected
creation-date: "2021-05-19"
authors: [ "@ayufan", "@mkaeppler" ]
coach: "@glopezfernandez"
approvers: []
owning-stage: "~devops::non_devops"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/composable_codebase_using_rails_engines/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/composable_codebase_using_rails_engines/).
# Composable GitLab Codebase
NOTE:
This architecture design document has been superseded by [GitLab Modular Monolith](../modular_monolith/index.md).
The one of the major risks of a single codebase is an infinite growth of the whole
application. The more code being added results in not only ever increasing resource requirements
for running the application, but increased application coupling and explosion of the complexity.
## Executive summary
This blueprint discusses an impact of introducing **Application Layers** as a way to reduce and improve the application
codebase. This discusses the positive and negative outcomes of the proposed solution and tries to estimate the impact
on GitLab.com and smaller installations.
**Application Layers** tries to split GitLab Rails codebase horizontally following the pattern of how we actually
run GitLab instead of vertical split. This follows the idea that a single feature needs to run in many different ways
(CI for example has Web interface, uses API, and performs background processing), and we are not able to easily
run only a given feature separate to the rest application (like CI) due to coupling.
The proposal itself does allow us to disconnect some aspects of the features. These aspects could be treated
as components that are run separately from the rest of the stack, but still sharing a large portion of core.
This model could be implemented to provide an API interface for external tooling (Runners API, Packages API, Feature Flags Unleash API)
and allow us to have much better resiliency and much easier way to scale application in the future.
The actual split was tested with the usage of [Rails Engines](https://guides.rubyonrails.org/engines.html)
implementing separate gems in a single repository. The [Rails Engines](https://guides.rubyonrails.org/engines.html)
allowed us to well describe the individual components with its dependencies and run an application
consisting of many Rails Engines.
The blueprint aims to retain all key aspects of GitLab success: single and monolithic codebase (with a [single data-store](https://handbook.gitlab.com/handbook/product/single-application/#single-data-store)),
but allows us to better model application and make our codebase more composable.
## Challenges of the Monolith (a current state)
Today the usage of monolith proves to be challenging in many cases. A single big monolith
codebase without clear boundaries results in a number of problems and inefficiencies, some of them being:
- Deep coupling makes application harder to develop in longer term, as it leads to a spaghetti implementation
instead of considering building more interface-based architecture
- Deep coupling between parts of the codebase making it harder to test. To test only a small portion of application
we usually need to run a whole test suite to confidently know which parts are affected. This to
some extent can be improved by building a heuristic to aid this process, but it is prone to errors and hard
to keep accurate at all times
- All components need to be loaded at all times to run only parts of the application
- Increased resource usage, as we load parts of the application that are rarely used in a given context
- The high memory usage results in slowing the whole application as it increases GC cycles duration
creating significantly longer latency for processing requests or worse cache usage of CPUs
- Increased application boot-up times as we need to load and parse significantly more files
- Longer boot-up times slows down the development, as running application or tests takes significantly longer
reducing velocity and amount of iterations
## Composable codebase dimensions
In general, we can think about two ways how codebase can be modeled:
- **vertically** in Bounded Contexts, each representing a domain of the application, ex.: All features related to CI are in a given context
- **horizontally** in Application Layers: Sidekiq, GraphQL, REST API, Web Controllers, all Domain Models and Services that interface with DB directly
This blueprint explicitly talks about **horizontal** split and **Application Layers**.
## Current state of Bounded Contexts (**vertical** split)
The Bounded Contexts is a topic that was discussed extensively number of times for a couple of years.
Reflected in number of issues:
- [Create new models / classes in a module / namespace](https://gitlab.com/gitlab-org/gitlab/-/issues/212156)
- [Make teams to be maintainers of their code](https://gitlab.com/gitlab-org/gitlab/-/issues/25872)
- [Use nested structure to organize CI classes](https://gitlab.com/gitlab-org/gitlab/-/issues/209745)
- [WIP: Make it simple to build and use "Decoupled Services"](https://gitlab.com/gitlab-org/gitlab/-/issues/31121)
We are partially executing a **Bounded Contexts** idea:
- Make each team to own their own namespace, namespace which is defined as a `module` in a codebase
- Make each team to own their own tests, as namespaces would define a clear boundaries
- Since we use namespaces, individual contributor or reviewer can know who to reach from domain experts about help with
the given context
The module namespaces are actively being used today to model codebase around team boundaries. The most
prominent namespaces being used today are `Ci::` and `Packages::`. They provide a good way to contain the code owned
by a group in a well-defined structure.
However, the **Bounded Contexts** while it helps development, it does not help with the above stated goals. This is purely
a logical split of the code. This does not prevent deep-coupling. It is still possible to create a circular dependency (and it often happens)
between a background processing of CI pipeline and Runner API interface.
API can call Sidekiq Worker, Sidekiq can use API to create an endpoint path.
The **Bounded Contexts** do not make our codebase smarter to know what depends on what, as the whole codebase
is treated as single package that needs to be loaded and executed.
Possible additional considerations to the disadvantages of Bounded Context:
- It can lead to tribal knowledge and duplicate code
- The deep coupling can make it difficult to iterate and make minimal changes
- Changes may have cascading effects that are difficult to isolate due to the vertical split
## The Application Layers (**horizontal* split)
While we continue leveraging **Bounded Contexts** in form of namespace separation that aids development and review process
the **Application Layers** can provide a way to create a clean separation between different functional parts.
Our main codebase (`GitLab Rails` after a GitLab running on Ruby on Rails) consists many of implicit **Application Layers**.
There are no clear boundaries between each layer which results in a deep coupling.
The concept of **Application Layers** looks at the application from the perspective of how we run the application
instead of perspective of individual features (like CI or Packages). GitLab application today can be decomposed into the following
application layers. This list is not exhaustive, but shows a general list of the different parts of a single monolithic codebase:
- Web Controllers: process Web requests coming from users visiting web interface
- Web API: API calls coming from the automated tooling, in some cases also users visiting web interface
- Web Runners API: API calls from the Runners, that allows Runner to fetch new jobs, or update trace log
- Web GraphQL: provide a flexible API interface, allowing the Web frontend to fetch only the data needed thereby reducing the amount of compute and data transfer
- Web ActionCable: provide bi-directional connection to enable real-time features for Users visiting web interface
- Web Feature Flags Unleash Backend: provide an Unleash-compatible Server that uses GitLab API
- Web Packages API: provide a REST API compatible with the packaging tools: Debian, Maven, container registry proxy, etc.
- Git nodes: all code required to authorize `git pull/push` over `SSH` or `HTTPS`
- Sidekiq: run background jobs
- Services/Models/DB: all code required to maintain our database structure, data validation, business logic, and policies models that needs to be shared with other components
The best way to likely describe how the actual GitLab Rails split would look like. It is a satellite model.
Where we have a single core, that is shared across all satellite components. The design of that implies
that satellite components have a limited way to communicate with each other. In a single monolithic application
in most of cases application would communicate with a code. In a satellite model the communication needs
to be performed externally to the component. This can be via Database, Redis or using a well defined exposed API.
```mermaid
flowchart TD
subgraph Data Store
D[Database]
R[Redis]
end
subgraph Rails Engines
subgraph Data Access Layer
C[Core]
end
subgraph Web Processing
W[Web]
end
subgraph Background Processing
S[Sidekiq]
end
end
C --> D & R
W & S -- using application models --> C
R -- push background job --> S
W -- via async schedule --> S
S -- via Web API --> W
```
### Application Layers for on-premise installations
The on-premise installations are significantly smaller and they usually run GitLab Rails in two main flavors:
```mermaid
graph LR
gitlab_node[GitLab Node with Load Balancer]
gitlab_node_web[Web running Puma]
gitlab_node_sidekiq[Background jobs running Sidekiq]
gitlab_node_git[Git running Puma and SSH]
subgraph GitLab Rails
gitlab_rails_web_controllers[Controllers]
gitlab_rails_api[API]
gitlab_rails_api_runners[API Runner]
gitlab_rails_graphql[GraphQL]
gitlab_rails_actioncable[ActionCable]
gitlab_rails_services[Services]
gitlab_rails_models[Models]
gitlab_rails_sidekiq[Sidekiq Workers]
end
postgresql_db[(PostgreSQL Database)]
redis_db[(Redis Database)]
gitlab_node --> gitlab_node_web
gitlab_node --> gitlab_node_sidekiq
gitlab_node --> gitlab_node_git
gitlab_node_web --> gitlab_rails_web_controllers
gitlab_node_web --> gitlab_rails_api
gitlab_node_web --> gitlab_rails_api_runners
gitlab_node_web --> gitlab_rails_graphql
gitlab_node_web --> gitlab_rails_actioncable
gitlab_node_git --> gitlab_rails_api
gitlab_node_sidekiq --> gitlab_rails_sidekiq
gitlab_rails_web_controllers --> gitlab_rails_services
gitlab_rails_api --> gitlab_rails_services
gitlab_rails_api_runners --> gitlab_rails_services
gitlab_rails_graphql --> gitlab_rails_services
gitlab_rails_actioncable --> gitlab_rails_services
gitlab_rails_sidekiq --> gitlab_rails_services
gitlab_rails_services --> gitlab_rails_models
gitlab_rails_models --> postgresql_db
gitlab_rails_models --> redis_db
```
### Application Layers on GitLab.com
Due to its scale, GitLab.com requires much more attention to run. This is needed to better manage resources
and provide SLAs for different functional parts. The chart below provides a simplistic view of GitLab.com application layers.
It does not include all components, like Object Storage nor Gitaly nodes, but shows the GitLab Rails dependencies between
different components and how they are configured on GitLab.com today:
```mermaid
graph LR
gitlab_com_lb[GitLab.com Load Balancer]
gitlab_com_web[Web Nodes running Puma]
gitlab_com_api[API Nodes running Puma]
gitlab_com_websockets[WebSockets Nodes running Puma]
gitlab_com_sidekiq[Background Jobs running Sidekiq]
gitlab_com_git[Git Nodes running Puma and SSH]
subgraph GitLab Rails
gitlab_rails_web_controllers[Controllers]
gitlab_rails_api[API]
gitlab_rails_api_runners[API Runner]
gitlab_rails_graphql[GraphQL]
gitlab_rails_actioncable[ActionCable]
gitlab_rails_services[Services]
gitlab_rails_models[Models]
gitlab_rails_sidekiq[Sidekiq Workers]
end
postgresql_db[(PostgreSQL Database)]
redis_db[(Redis Database)]
gitlab_com_lb --> gitlab_com_web
gitlab_com_lb --> gitlab_com_api
gitlab_com_lb --> gitlab_com_websockets
gitlab_com_lb --> gitlab_com_git
gitlab_com_web --> gitlab_rails_web_controllers
gitlab_com_api --> gitlab_rails_api
gitlab_com_api --> gitlab_rails_api_runners
gitlab_com_api --> gitlab_rails_graphql
gitlab_com_websockets --> gitlab_rails_actioncable
gitlab_com_git --> gitlab_rails_api
gitlab_com_sidekiq --> gitlab_rails_sidekiq
gitlab_rails_web_controllers --> gitlab_rails_services
gitlab_rails_api --> gitlab_rails_services
gitlab_rails_api_runners --> gitlab_rails_services
gitlab_rails_graphql --> gitlab_rails_services
gitlab_rails_actioncable --> gitlab_rails_services
gitlab_rails_sidekiq --> gitlab_rails_services
gitlab_rails_services --> gitlab_rails_models
gitlab_rails_models --> postgresql_db
gitlab_rails_models --> redis_db
```
### Layer dependencies
The differences in how GitLab is run for on-premise versus how we run GitLab.com does show a main division line in GitLab Rails:
- Web: containing all API, all Controllers, all GraphQL and ActionCable functionality
- Sidekiq: containing all background processing jobs
- Core: containing all database, models and services that needs to be shared between Web and Sidekiq
Each of these top-level application layers do depend only on a fraction of the codebase with all relevant dependencies:
- In all cases we need the underlying database structure and application models
- In some cases we need dependent services
- We only need a part of the application common library
- We need gems to support the requested functionality
- Individual layers should not use another sibling layer (tight coupling), rather connect via API, Redis or DB to share data (loose coupling)
## Proposal
The Memory team group conducted a Proof-of-Concept phase to understand the impact of introducing **Application Layers**.
We did this to understand the complexity, impact, and needed iterations to execute this proposal.
The proposals here should be treated as evaluation of the impact of this blueprint,
but not a final solution to be implemented. The PoC as defined is not something that should be merged,
but serves as a basis for future work.
### PoC using Rails Engines
We decided to use Rails Engines by modeling a Web Application Layer. The Web Engine contained Controllers, API, GraphQL.
This allowed us to run Web Nodes with all dependencies, but measure the impact on Sidekiq not having these components loaded.
All work can be found in these merge requests:
- [Provide mechanism to load GraphQL with all dependencies only when needed](https://gitlab.com/gitlab-org/gitlab/-/issues/288044)
- [Draft: PoC - Move GraphQL to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180)
- [Draft: PoC - Move Controllers and Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720)
- [Draft: PoC - Move only Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53982)
- [Measure performance impact for proposed `web_engine`](https://gitlab.com/gitlab-org/gitlab/-/issues/300548)
What was done?
- We used [Rails Engines](https://guides.rubyonrails.org/engines.html)
- The 99% of changes as visible in the above MRs is moving files as-is
- We moved all GraphQL code and specs into `engines/web_engine/` as-is
- We moved all API and Controllers code and specs into `engines/web_engine`
- We adapted CI to test `engines/web_engine/` as a self-sufficient component of stack
- We configured GitLab to load `gem web_engine` running Web nodes (Puma web server)
- We disabled loading `web_engine` when running Background processing nodes (Sidekiq)
#### Implementation details for proposed solution
1. Introduce new Rails Engine for each application layer.
We created `engines` folder, which could contain different engines for each application layer we introduce in the future.
In the above PoCs we introduced the new Web Application Layer, located in `engines/web_engine` folder.
1. Move all code and specs into `engines/web_engine/`
- We moved all GraphQL code and specs into `engines/web_engine/` without changing files itself
- We moved all Grape API and Controllers code into `engines/web_engine/` without changing files itself
1. Move gems to the `engines/web_engine/`
- We moved all GraphQL gems to the actual `web_engine` Gemfile
- We moved Grape API gem to the actual `web_engine` Gemfile
```ruby
Gem::Specification.new do |spec|
spec.add_dependency 'apollo_upload_server'
spec.add_dependency 'graphql'
spec.add_dependency 'graphiql-rails'
spec.add_dependency 'graphql-docs'
spec.add_dependency 'grape'
end
```
1. Move routes to the `engines/web_engine/config/routes.rb` file
- We moved GraphQL routes to the `web_engine` routes.
- We moved API routes to the `web_engine` routes.
- We moved most of the controller routes to the `web_engine` routes.
```ruby
Rails.application.routes.draw do
post '/api/graphql', to: 'graphql#execute'
mount GraphiQL::Rails::Engine, at: '/-/graphql-explorer', graphql_path:
Gitlab::Utils.append_path(Gitlab.config.gitlab.relative_url_root, '/api/graphql')
draw :api
#...
end
```
1. Move initializers to the `engines/web_engine/config/initializers` folder
- We moved `graphql.rb` initializer to the `web_engine` initializers folder
- We moved `grape_patch.rb` and `graphe_validators` to the `web_engine` initializers folder
1. Connect GitLab application with the WebEngine
In GitLab Gemfile.rb, add `web_engine` to the engines group
```ruby
# Gemfile
group :engines, :test do
gem 'web_engine', path: 'engines/web_engine'
end
```
Since the gem is inside :engines group, it is not automatically required by default.
1. Configure GitLab when to load the engine.
In GitLab `config/engines.rb`, we can configure when do we want to load our engines by relying on our `Gitlab::Runtime`
```ruby
# config/engines.rb
# Load only in case we are running web_server or rails console
if Gitlab::Runtime.puma? || Gitlab::Runtime.console?
require 'web_engine'
end
```
1. Configure Engine
Our Engine inherits from the `Rails::Engine` class. This way this gem notifies Rails that
there's an engine at the specified path so it will correctly mount the engine inside
the application, performing tasks such as adding the app directory of the engine to
the load path for models, mailers, controllers, and views.
A file at `lib/web_engine/engine.rb`, is identical in function to a standard Rails
application's `config/application.rb` file. This way engines can access a configuration
object which contains configuration shared by all railties and the application.
Additionally, each engine can access `autoload_paths`, `eager_load_paths`, and `autoload_once_paths`
settings which are scoped to that engine.
```ruby
module WebEngine
class Engine < ::Rails::Engine
config.eager_load_paths.push(*%W[#{config.root}/lib
#{config.root}/app/graphql/resolvers/concerns
#{config.root}/app/graphql/mutations/concerns
#{config.root}/app/graphql/types/concerns])
if Gitlab.ee?
ee_paths = config.eager_load_paths.each_with_object([]) do |path, memo|
ee_path = config.root
.join('ee', Pathname.new(path).relative_path_from(config.root))
memo << ee_path.to_s
end
# Eager load should load CE first
config.eager_load_paths.push(*ee_paths)
end
end
end
```
1. Testing
We adapted CI to test `engines/web_engine/` as a self-sufficient component of stack.
- We moved `spec` as-is files to the `engines/web_engine/spec` folder
- We moved `ee/spec` as-is files to the `engines/web_engine/ee/spec` folder
- We control specs from main application using environment variable `TEST_WEB_ENGINE`
- We added new CI job that will run `engines/web_engine/spec` tests separately using `TEST_WEB_ENGINE` environment variable.
- We added new CI job that will run `engines/web_engine/ee/spec` tests separately using `TEST_WEB_ENGINE` environment variable.
- We are running all white box frontend tests with `TEST_WEB_ENGINE=true`
#### Results
The effect on introducing these changes:
- Savings for RSS
- 61.06 MB (7.76%) - Sidekiq without GraphQL
- 100.11 MB (12.73%) - Sidekiq without GraphQL and API
- 208.83 MB (26.56%) - Sidekiq without GraphQL, API, Controllers
- The size of Web nodes (running Puma) stayed the same as before
Savings on Sidekiq `start-up` event, for a single Sidekiq cluster without GraphQL, API, Controllers
- We saved 264.13 MB RSS (28.69%)
- We saved 264.09 MB USS (29.36%)
- Boot-up time was reduced from 45.31 to 21.80 seconds. It was 23.51 seconds faster (51.89%)
- We have 805,772 less live objects, 4,587,535 less allocated objects, 2,866 less allocated pages and 3.65 MB less allocated space for objects outside of the heap
- We loaded 2,326 less code files (15.64%)
- We reduced the duration of a single full GC cycle from 0.80 seconds to 0.70 seconds (12.64%)
Puma single, showed very little difference as expected.
More details can be found in the [issue](https://gitlab.com/gitlab-org/gitlab/-/issues/300548#note_516323444).
#### Impact on GitLab.com
Estimating the results for the scale of running GitLab.com, today we use:
- Individual GC cycle takes around [130 ms for Web](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=avg(rate(ruby_gc_duration_seconds_sum%7Bstage%3D%22main%22%2Ctype%3D%22web%22%7D%5B5m%5D)%2Frate(ruby_gc_duration_seconds_count%5B5m%5D))&g0.tab=0)
and [200 ms for Sidekiq](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=avg(rate(ruby_gc_duration_seconds_sum%7Bstage%3D%22main%22%2Ctype%3D%22sidekiq%22%7D%5B5m%5D)%2Frate(ruby_gc_duration_seconds_count%5B5m%5D))&g0.tab=0) on GitLab.com
- On average we do around [2 GC cycles per-second](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.end_input=2021-02-17%2017%3A56&g0.max_source_resolution=0s&g0.expr=avg(rate(ruby_gc_duration_seconds_count%7Bstage%3D%22main%22%2Ctype%3D%22web%22%7D%5B5m%5D))&g0.tab=0)
or [0.12 cycles per second for Sidekiq](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.end_input=2021-02-17%2017%3A56&g0.max_source_resolution=0s&g0.expr=avg(rate(ruby_gc_duration_seconds_count%7Bstage%3D%22main%22%2Ctype%3D%22sidekiq%22%7D%5B5m%5D))&g0.tab=0)
- This translates to using [around 9.5 vCPUs per-second for Web](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=sum(rate(ruby_gc_duration_seconds_sum%7Bstage%3D%22main%22%2Ctype%3D%22web%22%7D%5B5m%5D))&g0.tab=0)
and [around 8 vCPUs per-second for Sidekiq](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=sum(rate(ruby_gc_duration_seconds_sum%7Bstage%3D%22main%22%2Ctype%3D%22sidekiq%22%7D%5B5m%5D))&g0.tab=0) of spend on GC alone
- Sidekiq [uses 2.1 GB on average](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=max(ruby_process_unique_memory_bytes%7Btype%3D%22sidekiq%22%7D)%2F1024%2F1024%2F1024&g0.tab=1)
or [550 GB in total](https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=sum(ruby_process_unique_memory_bytes%7Btype%3D%22sidekiq%22%7D)%2F1024%2F1024%2F1024&g0.tab=0) of memory on GitLab.com
We estimate the possible maximum savings for introducing `web_engine`:
- Reduce a GC cycle time by 20%, from to 200 ms to 160 ms
- The amount of GC cycles per-second would stay the same, but due to GC cycle time reduction we would use around 6 vCPUs instead of 8 vCPUs
- In the best case we would be looking at Sidekiq alone we would be estimating to save up-to 137 GB of memory on GitLab.com
This model could be extended to introduce `sidekiq_engine` giving a similar benefits
(even more important due to visible impact on users) for Web nodes.
#### Outcome
We achieved a number of benefits introducing these changes.
Pros:
- Significantly lower memory usage
- Significantly shorter application load time for Sidekiq
- Significantly improved responsiveness of Sidekiq service due to much shorter GC cycles
- Significantly easier testing of a portion of application, ex. changing `web_engines/` does require
re-running test only for this application layer
- We retained a monolithic architecture of the codebase, but sharing database and application models
- A significant saving from the infrastructure side
- Ability to comfortably run on constrained environments by reducing application footprint
Cons:
- It is harder to implement GraphQL subscriptions as in case of Sidekiq as we need another way to pass subscriptions
- `api_v4` paths can be used in some services that are used by Sidekiq (for example `api_v4_projects_path`)
- `url_helpers` paths are used in models and services, that could be used by Sidekiq (for example `Gitlab::Routing.url_helpers.project_pipelines_path` is used by [ExpirePipelineCacheService](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/ci/expire_pipeline_cache_service.rb#L20) in [ExpirePipelineCacheWorker](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/expire_pipeline_cache_worker.rb#L18))
#### Example: GraphQL
[Draft: PoC - Move GraphQL to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180)
- The [99% of changes](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180/diffs?commit_id=49c9881c6696eb620dccac71532a3173f5702ea8) as visible in the above MRs is moving files as-is.
- The [actual work](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180/diffs?commit_id=1d9a9edfa29ea6638e7d8a6712ddf09f5be77a44) on fixing cross-dependencies, specs, and configuring `web_engine`
- We [adapted](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180/diffs?commit_id=d7f862cc209ce242000b2aec88ff7f4485acdd92) CI to test `engines/web_engine/` as a self-sufficient component of stack
Today, loading GraphQL requires a bunch of [dependencies](https://gitlab.com/gitlab-org/gitlab/-/issues/288044):
> We also discovered that we load/require 14480 files, [memory-team-2gb-week#9](https://gitlab.com/gitlab-org/memory-team/memory-team-2gb-week/-/issues/9#note_452530513)
> when we start GitLab. 1274 files belong to GraphQL. This means that if we don't load 1274 application files
> and all related GraphQL gems when we don't need them (Sidekiq), we could save a lot of memory.
GraphQL only needs to run in a specific context. If we could limit when it is being loaded we could effectively improve application efficiency, by reducing application load time and required memory. This, for example, is applicable for every size installation.
A potential challenge with GraphQL and Websockets is that at some point we might want to use Action Cable subscriptions and push GraphQL/API payload from Sidekiq to clients. This would likely use Redis to pass data through. Where Sidekiq would publish information on Redis and ActionCable Node would pass through that information to connected clients. This way of working is possible in the above model, but we would have to use GraphQL or API (over HTTP endpoint) to calculate what should be sent.
An alternative way is to use a notification system that would always make an `ActionCable` node (the one handling WebSockets) generate a payload based on a send query instead of performing passthrough. This could be applicable since `ActionCable` is the one handling a given connection for a client. This could have a downside of having to recalculate the same payload if many clients would be watching the same resource. However, this behavior of system might still be desired for security purposes, as generated payload might be dependent on permission of watching client (we would show different for anonymous, and different for the member of the project).
#### Example: API
[Draft: PoC - Move only Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53982)
- [99% of the changes](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53982/diffs?commit_id=c8b72249b6e8f875ed4c713f0668207377604043), as visible in the above MRs, are moving the files as-is.
- The [actual work](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53982/diffs?commit_id=00d9b54ba952c85ff4d158a18205c2fac13eaf8d) on fixing cross-dependencies, specs, configuring initializers, gems and routes.
Grape::API is another example that only needs to run only in a web server context.
Potential challenges with Grape API:
- Currently there are some API::API dependencies in the models (for example `API::Helpers::Version` dependency in [project model](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/project.rb#L2019) or API::API dependency in GeoNode model for [`geo_retrieve_url`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/app/models/geo_node.rb#L183))
- `api_v4` paths are used in helpers, presenters, and views (for example `api_v4_projects_path` in [PackagesHelper](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/helpers/packages_helper.rb#L17))
#### Example: Controllers
[Draft: PoC - Move Controllers and Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720)
- [99% of the changes](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720/diffs?commit_id=17174495cf3263c8e69a0420092d9fa759170aa6), as visible in the above MRs, are moving files as-is.
- The [actual work](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720/diffs?commit_id=39cc4bb1e0ce47f66605d06eb1b0d6b89ba174e6) on fixing cross-dependencies, specs, configuring initializers, gems and routes.
Controllers, Serializers, some presenters and some of the Grape:Entities are also good examples that only need to be run in web server context.
Potential challenges with moving Controllers:
- We needed to extend `Gitlab::Patch::DrawRoute` to support `engines/web_engine/config/routes` and `engines/web_engine/ee/config/routes` in case when `web_engine` is loaded. Here is potential [solution](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720#note_506957398).
- `Gitlab::Routing.url_helpers` paths are used in models and services, that could be used by Sidekiq (for example `Gitlab::Routing.url_helpers.project_pipelines_path` is used by [ExpirePipelineCacheService](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/ci/expire_pipeline_cache_service.rb#L20) in [ExpirePipelineCacheWorker](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/expire_pipeline_cache_worker.rb#L18)))
### Packwerk
NOTE:
Packwerk is currently accepting bug fixes only, and it is not being actively developed. Check for [more details](https://github.com/Shopify/packwerk#note-packwerk-is-considered-to-be-feature-complete-for-shopifys-uses-we-are-currently-accepting-bug-fixes-only-and-it-is-not-being-actively-developed-please-fork-this-project-if-you-are-interested-in-adding-new-features)
## Future impact
**Application Layers** and this proposal currently defines only `web_engine`. Following the same pattern we could easily introduce
additional engines dedicated for supporting that would allow us to maintain much better separation, lower memory usage
and much better maintainability of GitLab Rails into the future.
This would be a framework for introducing all new interfaces for features that do not need to be part of the core codebase,
like support for additional Package services. Allowing us to better scale application in the future, but retaining a single codebase
and monolithic architecture of GitLab.
As of today, it seems reasonable to define three **application layers**:
- `gitlab-core`: a core functionality: DB structure, models, services, common library. It models a data access layer, and initially all services needed to run GitLab. This might be potentially be split in the future into smaller aspects
- `gitlab-web`: a Controllers/API/GraphQL/ActionCable functionality needed to run in a web server context (depends on `gitlab-core`)
- `gitlab-sidekiq`: a background jobs functionality needed to run Sidekiq Workers (depends on `gitlab-core`)
This model is best described today as a shared core with satellite. The shared core defines data access layer, where as satellites define a way to present and process this data. Satellites can only talk with Core. They cannot directly load or talk to another satellite unless they use a well defined interface in form of API, GraphQL or Redis (as for scheduling Sidekiq jobs).
It is reasonable to assume that we limit how many `engines` we allow. Initial proposal is to allow up to 5 engines
to be created to ensure that we do not have explosion of engines.
## Issues and Merge Requests
- [Split application into functional parts to ensure that only needed code is loaded with all dependencies](https://gitlab.com/gitlab-org/gitlab/-/issues/290935)
- [Provide mechanism to load GraphQL with all dependencies only when needed](https://gitlab.com/gitlab-org/gitlab/-/issues/288044)
- [Draft: PoC - Move GraphQL to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50180)
- [Draft: PoC - Move Controllers and Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53720)
- [Draft: PoC - Move only Grape API:API to the WebEngine](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/53982)
- [Measure performance impact for proposed `web_engine`](https://gitlab.com/gitlab-org/gitlab/-/issues/300548)
- [Create new models / classes within a module / namespace](https://gitlab.com/gitlab-org/gitlab/-/issues/212156)
- [Make teams to be maintainers of their code](https://gitlab.com/gitlab-org/gitlab/-/issues/25872)
- [Use nested structure to organize CI classes](https://gitlab.com/gitlab-org/gitlab/-/issues/209745)
- [WIP: Make it simple to build and use "Decoupled Services"](https://gitlab.com/gitlab-org/gitlab/-/issues/31121)
- [Rails takes awhile to boot, let's see if we can improve this](https://gitlab.com/gitlab-org/gitlab/-/issues/213992)
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,351 +1,11 @@
---
status: implemented
creation-date: "2020-09-29"
authors: [ "@jdrpereira" ]
coach: "@glopezfernandez"
approvers: [ "@trizzi", "@hswimelar" ]
owning-stage: "~devops::package"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/container_registry_metadata_database/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/container_registry_metadata_database/).
# Container registry metadata database
## Usage of the GitLab container registry
With the [container registry](https://gitlab.com/gitlab-org/container-registry) integrated into GitLab, every GitLab project can have its own space to store its Docker images. You can use the registry to build, push and share images using the Docker client, CI/CD or the GitLab API.
Each day on GitLab.com, between [150k and 200k images are pushed to the registry](https://app.periscopedata.com/app/gitlab/527857/Package-GitLab.com-Stage-Activity-Dashboard?widget=9620193&udv=0), generating about [700k API events](https://app.periscopedata.com/app/gitlab/527857/Package-GitLab.com-Stage-Activity-Dashboard?widget=7601761&udv=0). It's also worth noting that although some customers use other registry vendors, [more than 96% of instances](https://app.periscopedata.com/app/gitlab/527857/Package-GitLab.com-Stage-Activity-Dashboard?widget=9832282&udv=0) are using the GitLab container registry.
For GitLab.com and for GitLab customers, the container registry is a critical component to building and deploying software.
## Current Architecture
The container registry is a single [Go](https://go.dev/) application. Its only dependency is the storage backend on which images and metadata are stored.
```mermaid
graph LR
C((Client)) -- HTTP request --> R(Container Registry) -- Upload/download blobs --> B(Storage Backend)
R -- Write/read metadata --> B
```
Client applications (for example, GitLab Rails and Docker CLI) interact with the container registry through its [HTTP API](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/gitlab/api.md). The most common operations are pushing and pulling images to/from the registry, which require a series of HTTP requests in a specific order. The request flow for these operations is detailed in the [Request flow](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/push-pull-request-flow.md).
The registry supports multiple [storage backends](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/configuration.md#storage), including Google Cloud Storage (GCS) which is used for the GitLab.com registry. In the storage backend, images are stored as blobs, deduplicated, and shared across repositories. These are then linked (like a symlink) to each repository that relies on them, giving them access to the central storage location.
The name and hierarchy of repositories, as well as image manifests and tags are also stored in the storage backend, represented by a nested structure of folders and files. [This video](https://www.youtube.com/watch?v=i5mbF2bgWoM&feature=youtu.be) gives a practical overview of the registry storage structure.
### Clients
The container registry has two main clients: the GitLab Rails application and the Docker client/CLI.
#### Docker
The Docker client (`docker` CLI) interacts with the GitLab container registry mainly using the [login](https://docs.docker.com/reference/cli/docker/login/),
[push](https://docs.docker.com/reference/cli/docker/image/push/), and [pull](https://docs.docker.com/reference/cli/docker/image/pull/) commands.
##### Login and Authentication
GitLab Rails is the default token-based authentication provider for the GitLab container registry.
Once the registry receives a request sent by an unauthenticated Docker client, it will reply with `401 Unauthorized` and instruct the client to obtain a token from the GitLab Rails API. The Docker client will then request a Bearer token and embed it in the `Authorization` header of all requests. The registry is responsible for determining if the user is authentication/authorized to perform those requests based on the provided token.
```mermaid
sequenceDiagram
participant C as Docker client
participant R as GitLab container registry
participant G as GitLab Rails
C->>R: docker login gitlab.example.com
R->>C: 401 Unauthorized
Note left of R: The response includes the realm (for example, https://gitlab.example.com/jwt/auth)<br> from where a token should be obtained
C->>G: Obtain Bearer token
G->>C: 200 OK
C-->>R: Push/pull requests
Note right of C: Bearer token included in the Authorization header
```
For more details, refer to the [Docker documentation](https://distribution.github.io/distribution/spec/auth/token/).
##### Push and Pull
Push and pull commands are used to upload and download images, more precisely manifests and blobs. The push/pull flow is described in the [documentation](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/push-pull-request-flow.md).
#### GitLab Rails
GitLab Rails interacts with the registry through the HTTP API and consumes its webhook notifications.
##### From GitLab Rails to Registry
The single entrypoint for the registry is the [HTTP API](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md). GitLab Rails invokes the API to perform all operations, which include:
| Operation | UI | Background | Observations |
| ------------------------------------------------------------ | ------------------ | ------------------------ | ------------------------------------------------------------ |
| [Check API version](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#api-version-check) | **{check-circle}** Yes | **{check-circle}** Yes | Used globally to ensure that the registry supports the Docker Distribution V2 API, as well as for identifying whether GitLab Rails is talking to the GitLab container registry or a third-party one (used to toggle features only available in the former). |
| [List repository tags](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#listing-image-tags) | **{check-circle}** Yes | **{check-circle}** Yes | Used to list and show tags in the UI. Used to list tags in the background for [cleanup policies](../../../user/packages/container_registry/reduce_container_registry_storage.md#cleanup-policy) and [Geo replication](../../../administration/geo/replication/container_registry.md). |
| [Check if manifest exists](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#existing-manifests) | **{check-circle}** Yes | **{dotted-circle}** No | Used to get the digest of a manifest by tag. This is then used to pull the manifest and show the tag details in the UI. |
| [Pull manifest](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#pulling-an-image-manifest) | **{check-circle}** Yes | **{dotted-circle}** No | Used to show the image size and the manifest digest in the tag details UI. |
| [Pull blob](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#pulling-a-layer) | **{check-circle}** Yes | **{dotted-circle}** No | Used to show the configuration digest and the creation date in the tag details UI. |
| [Delete tag](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#delete-tag) | **{check-circle}** Yes | **{check-circle}** Yes | Used to delete a tag from the UI and in background (cleanup policies). |
A valid authentication token is generated in GitLab Rails and embedded in all these requests before sending them to the registry.
##### From Registry to GitLab Rails
The registry supports [webhook notifications](https://distribution.github.io/distribution/about/notifications/) to notify external applications when an event occurs, such as an image push.
For GitLab, the registry is currently [configured](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/configuration.md#notifications) to deliver notifications for image push events to the GitLab Rails API. These notifications are currently used for Snowplow metrics and Geo replication.
### Challenges
#### Garbage Collection
The container registry relies on an offline *mark* and *sweep* garbage collection (GC) algorithm. To run it, the registry needs to be either shutdown or set to read-only, remaining like that during the whole GC run.
During the *mark* phase, the registry analyzes all repositories, creating a list of configurations, layers, and manifests that are referenced/linked in each one of them. The registry will then list all existing configurations, layers, and manifests (stored centrally) and obtain a list of those that are not referenced/linked in any repository. This is the list of blobs eligible for deletion.
With the output from the *mark* phase in hand, the registry starts the *sweep* phase, where it will loop over all blobs identified as eligible for deletion and delete them from the storage backend, one by one.
Doing this for a huge registry may require multiple hours/days to complete, during which the registry must remain in read-only mode. This is not feasible for platforms with tight availability requirements, such as GitLab.com.
This limitation is also described in the upstream [Docker Distribution documentation](https://github.com/docker/distribution/blob/749f6afb4572201e3c37325d0ffedb6f32be8950/ROADMAP.md#deletes).
#### Performance
Due to the current architecture and its reliance on the (possibly remote) storage backend to store repository and image metadata, even the most basic operations, such as listing repositories or tags, can become prohibitively slow, and it only gets worse as the registry grows in size.
For example, to be able to tell which repositories exist, the registry has to walk through all folders in the storage backend and identify repositories in them. Only when all folders that exist have been visited, the registry can then reply to the client with the list of repositories. If using a remote storage backend (such as GCS or S3), performance becomes even worse, as for each visited folder multiple HTTP requests are required to list and inspect their contents.
#### Consistency
Some storage backends, like S3, can only provide [eventual consistency](https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel). As an example, reading a blob after deleting it can succeed for a short amount of time. Eventual consistency could be problematic when paired with online garbage collection, as read requests at the API level could continue to succeed for deleted blobs.
#### Insights
For similar reasons as highlighted above, currently, it's not feasible to extract valuable information from the registry, such as how much space a repository is using, which repositories are using the most space, which ones are more active, detailed push/pull metrics for each image or tag, and others. Not having access to these insights and metrics strongly weakens the ability to make informed decisions in regards to the product strategy.
#### Additional Features
Due to the metadata limitations, it's currently not feasible to implement valuable features such as [pagination](https://gitlab.com/gitlab-org/container-registry/-/issues/13#note_271769891), filtering and sorting for HTTP API, and more advanced features such as the ability to [distinguish between Docker and Helm charts images](https://gitlab.com/gitlab-org/gitlab/-/issues/38047).
Because of all these constraints, we decided to [freeze the development of new features](https://gitlab.com/gitlab-org/container-registry/-/issues/44) until we have a solution in place to overcome all these foundational limitations.
## New Architecture
To overcome all challenges described above, we started an effort to migrate the registry metadata (the list of blobs, repositories, and which manifest/layers are referenced/linked in each one of them) from the storage backend into a [PostgreSQL database](#database).
The ultimate goal of the new architecture is to enable online garbage collection ([&2313](https://gitlab.com/groups/gitlab-org/-/epics/2313)), but once the database is in place, we will also be able to implement all features that have been blocked by the metadata limitations. The performance of the existing API should drastically increase as well.
The introduction of a database will affect the registry architecture, as we will have one more component involved:
```mermaid
graph LR
C((Client)) -- HTTP request --> R(Container Registry) -- Upload/download blobs --> B(Storage Backend)
R -- Write/read metadata --> D[(Database)]
```
With a database in place, the registry will no longer use the storage backend to write and read metadata. Instead, metadata will be stored and manipulated on the PostgreSQL database. The storage backend will then be used only for uploading and downloading blobs.
The interaction between the registry and its clients, including GitLab Rails and the Docker client, will remain unchanged, as documented in the [Current Architecture](#current-architecture) section. The architectural changes and the database are internal only. The registry HTTP API and webhook notifications will also remain unchanged.
### Database
Following the GitLab [Go standards and style guidelines](../../../development/go_guide/index.md), no ORM is used to manage the database, only the [`database/sql`](https://pkg.go.dev/database/sql) package from the Go standard library, a PostgreSQL driver ([`lib/pq`](https://pkg.go.dev/github.com/lib/pq?tab=doc)) and raw SQL queries, over a TCP connection pool.
The design and development of the registry database adhere to the GitLab [database guidelines](../../../development/database/index.md). Being a Go application, the required tooling to support the database will have to be developed, such as for running database migrations.
Running *online* and [*post deployment*](../../../development/database/post_deployment_migrations.md) migrations is already supported by the registry CLI, as described in the [documentation](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/database-migrations.md).
#### Partitioning
The registry database will be partitioned from start to achieve greater performance (by limiting the amount of data to act upon and enable parallel execution), easier maintenance (by splitting tables and indexes into smaller units), and high availability (with partition independence). By partitioning the database from start we can also facilitate a sharding implementation later on if necessary.
Although blobs are shared across repositories, manifest and tag metadata are scoped by repository. This is also visible at the API level, where all write and read requests (except [listing repositories](https://gitlab.com/gitlab-org/container-registry/-/blob/a113d0f0ab29b49cf88e173ee871893a9fc56a90/docs/spec/api.md#listing-repositories)) are scoped by repository, with its namespace being part of the request URI. For this reason, after [identifying access patterns](https://gitlab.com/gitlab-org/gitlab/-/issues/234255), we decided to partition manifests and tags by repository and blobs by digest, ensuring that lookups are always performed by partition key for optimal performance. The initial version of the partitioned schema was documented [in a merge request](https://gitlab.com/gitlab-com/www-gitlab-com/-/merge_requests/60918).
#### GitLab.com
Due to scale, performance and isolation concerns, for GitLab.com the registry database will be on a separate dedicated PostgreSQL cluster. See [#93](https://gitlab.com/gitlab-org/container-registry/-/issues/93) and [GitLab-com/gl-infra/reliability#10109](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/10109) for additional context.
The diagram below illustrates the architecture of the database cluster:
![GitLab.com database architecture](https://gitlab.com/gitlab-org/gitlab/uploads/949ae16d993bca6d4813830dfc501aa2/gitlabcom_database_architecture.png)
##### Expected Rate and Size Requirements
[Rate](https://gitlab.com/gitlab-org/container-registry/-/issues/94) and [size](https://gitlab.com/gitlab-org/container-registry/-/issues/61#note_446609886) requirements for the GitLab.com database were extrapolated based on the `dev.gitlab.org` registry and are available in the linked issues.
#### Self-managed instances
By default, for self-managed instances, the registry will have a separate logical database in the same PostgreSQL instance/cluster as the GitLab database. However, it will be possible to configure the registry to use a separate instance/cluster if needed.
#### PostgreSQL
During the discussion of the [initial database schema](https://gitlab.com/gitlab-org/gitlab/-/issues/207147), we have decided to opt for PostgreSQL over other database engines mainly because:
- It offers all the features we need, including the ACID guarantees of an RDBMS, and partitioning;
- It is already used for GitLab, thus we have the required experience and tools in place to manage it;
- We want to offer self-managed customers the possibility of hosting the registry database within the same PostgreSQL instance that they already have for GitLab.
##### Version 12
PostgreSQL introduced significant improvements for partitioning in [version 12](https://www.postgresql.org/docs/12/release-12.html#id-1.11.6.9.5), among which we highlight:
- It's now possible for foreign keys to reference partitioned tables. This is a hard requirement for this project not only to guarantee consistency and integrity but also to enable cascading deletes at the database level;
- Major performance improvements for inserts, selects, and updates with less locking and consistent performance for a large number of partitions ([benchmarks](https://www.2ndquadrant.com/en/blog/postgresql-12-partitioning/));
- Major improvements to the planning algorithm for tables with a large number of partitions, with some tests finding speedups of up to 10,000 times ([source](https://aws.amazon.com/blogs/database/postgresql-12-a-deep-dive-into-some-new-functionality/));
- Attaching new partitions to an existing table no longer requires locking the entire table;
- Bulk load (`COPY`) now uses bulk inserts instead of inserting one row at a time;
To leverage these features and performance improvements, we need to use PostgreSQL 12 from the start.
GitLab 14.0 and later [ships with PostgreSQL 12](../../../administration/package_information/postgresql_versions.md)
for self-managed instances. Customers not able to upgrade to PostgreSQL 12 have two options:
- Administrators can manually provision and configure a separate PostgreSQL 12 database for the
registry. This lets you benefit from the features provided by the new registry and its metadata
database.
- If online garbage collection isn't a concern or provisioning a separate database isn't possible,
continue to use the current registry without the database. GitLab supports the current version
with security backports and bug fixes.
Apart from online garbage collection, the metadata database's availability unblocks the
implementation of many requested features for the GitLab container registry. These features are only
available for instances using the new version backed by the metadata database.
### Availability
Apart from the authentication service and the blob storage backend, the registry gains one more dependency with the new architecture - the database. Given that the registry can only be as reliable as its dependencies, the database deployment should be projected for high availability. A gradual migration approach should help to identify and mitigate implementation and resource constraints for high availability environments.
#### HTTP API
This is a list of all the registry HTTP API operations and how they depend on the storage backend and the new database. The registry will return an error response if any dependencies are not available when processing a request.
| Operation | Method | Path | Requires Database | Requires Storage | Used by GitLab Rails * |
|--------------------------------------------------------------------------------------------------------------------------------|----------|-----------------------------------------|-------------------|------------------|------------------------|
| [Check API version](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#api-version-check) | `GET` | `/v2/` | **{dotted-circle}** No | **{dotted-circle}** No | **{check-circle}** Yes |
| [List repositories](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#listing-repositories) | `GET` | `/v2/_catalog` | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [List repository tags](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#listing-image-tags) | `GET` | `/v2/<name>/tags/list` | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes |
| [Delete tag](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#delete-tag) | `DELETE` | `/v2/<name>/manifests/<reference>` | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes |
| [Check if manifest exists](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#existing-manifests) | `HEAD` | `/v2/<name>/manifests/<reference>` | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes |
| [Pull manifest](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#pulling-an-image-manifest) | `GET` | `/v2/<name>/manifests/<reference>` | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes |
| [Push manifest](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#pushing-an-image-manifest) | `PUT` | `/v2/<name>/manifests/<reference>` | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Delete manifest](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#deleting-an-image) | `DELETE` | `/v2/<name>/manifests/<reference>` | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Check if blob exists](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#existing-layers) | `HEAD` | `/v2/<name>/blobs/<digest>` | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Pull blob](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#fetch-blob) | `GET` | `/v2/<name>/blobs/<digest>` | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| [Delete blob](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#delete-blob) | `DELETE` | `/v2/<name>/blobs/<digest>` | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Start blob upload](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#starting-an-upload) | `POST` | `/v2/<name>/blobs/uploads/` | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Check blob upload status](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#get-blob-upload) | `GET` | `/v2/<name>/blobs/uploads/<uuid>` | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Push blob chunk](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#chunked-upload-1) | `PATCH` | `/v2/<name>/blobs/uploads/<uuid>` | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Complete blob upload](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#put-blob-upload) | `PUT` | `/v2/<name>/blobs/uploads/<uuid>` | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Cancel blob upload](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/spec/api.md#canceling-an-upload) | `DELETE` | `/v2/<name>/blobs/uploads/<uuid>` | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
`*` Refer to the [list of interactions between registry and Rails](#from-gitlab-rails-to-registry) to know why and how.
#### Failure Scenarios
With the addition of a database, it is essential to highlight possible failure scenarios, how we expect the registry to behave in such conditions, and their impact on the registry availability and functionality.
##### Database Failover
In case of refused connections or timeouts when attempting to connect to the database, which might happen during a failover scenario, the registry will discard broken connections upfront and attempt to open a new one from the pool immediately.
The application will not panic in this scenario. It will try to establish a new connection for every request. If failed, an HTTP `503 Service Unavailable` error is returned to clients, and the error is logged and reported to Sentry. There is no retry cadence. The registry will only try to establish a new connection when another request that requires database access is received.
It is also possible to configure the registry to periodically check the health of the database server using a TCP health checker with a configurable interval and threshold ([docs](https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs/configuration.md#tcp)). In case the health check fails, incoming requests will be halted with an HTTP `503 Service Unavailable` error.
Once the database server is available again, the registry will gracefully reconnect on the next incoming request, restoring the full API functionality without human intervention.
The expected registry behavior will be covered with integration tests, using a programmable TCP proxy between the registry and a database server to simulate a failover scenario.
##### Connection Pool Saturation
If unable to pull a connection from the pool to serve a given request, the registry will timeout and return an HTTP `500 Internal Server Error` error to the client and report the error to Sentry. These issues should trigger a development escalation to investigate why the pool is being exhausted. There might be too much load for the preconfigured pool size, or there might be transactions holding on to connections for too long.
Prometheus metrics should be used to create alerts to act on a possible saturation before the application starts returning errors. Special attention will be paid to these scenarios during the gradual migration of the GitLab.com registry, where we will have limited, gradual, and controlled exposure on the new registry nodes. During that process, we can identify usage patterns, observe metrics, and fine tune both infrastructure and application settings accordingly as the load increases. If needed, a rate limiting algorithm may be applied to limit impact. Decisions will be based on real data to avoid overly restrictive measures and premature optimizations.
The expected registry behavior will be covered with integration tests by manipulating the pool size and spawning multiple concurrent requests against the API, putting pressure on the pool and eventually exhausting its capacity.
##### Latency
Excessive latency on established connections is hard to detect and debug, as these might not raise an application error or network timeout in typical circumstances but usually precede them.
For this reason, the duration of database queries used to serve HTTP API requests should be instrumented using metrics, allowing the detection of unusual variations and trigger alarms accordingly before an excessive latency becomes a timeout or service unavailability.
The expected registry behavior will be covered with integration tests, using a programmable TCP proxy between the registry and a database server to simulate increasing latency scenarios.
##### Problematic Migrations
Apart from unusual network and systems conditions, problematic migrations and data failures can also affect the database availability and, as a consequence, the registry availability.
Database migrations will adhere to the same [development best practices](../../../development/database/index.md) used for GitLab Rails, except Rails-specific methods and tools, as the registry is a Go application with no ORM, and migrations are therefore expressed with raw SQL statements. Regardless, all changes will require a review and approval from the Database team.
Database migrations will be idempotent, with guard clauses used whenever necessary. It is also intended to guarantee that they are forward compatible. For a clustered environment, a node running registry version `N` should not cause any issues when the database schema is from version `N`+1.
### Observability
Adding one more component to the system makes it even more critical to guarantee that we have proper observability over the registry's behavior and its dependencies. It should be guaranteed that we have all the necessary tools in place before rolling out the new registry backed by the metadata database.
For this purpose, [error reporting with Sentry](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/11297), [improved structured logging](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/10933), and [improved HTTP metrics](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/10935) were already implemented and released. At the time of writing, the GitLab.com rollout is in progress.
Additionally, the Prometheus metrics will be augmented with [details on the database connection pool](https://gitlab.com/gitlab-org/container-registry/-/issues/238). These will be added to the registry Grafana dashboards, joining the existing HTTP API, deployment, and storage metrics. The database cluster for the registry [will also have metrics and alerts](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/11447).
Together, these resources should provide an adequate level of insight into the registry's performance and behavior.
### New Features and Breaking Changes
#### Third-Party Container Registries
GitLab ships with the GitLab container registry by default, but it's also compatible with third-party registries, as long as they comply with the [Docker Distribution V2 Specification](https://distribution.github.io/distribution/spec/api/), now superseded by the [Open Container Initiative (OCI) Image Specification](https://github.com/opencontainers/image-spec/blob/master/spec.md).
So far, we have tried to maintain full compatibility with third-party registries when adding new features. For example, in 12.8, we introduced a new [tag delete feature](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/23325) to delete a single tag without deleting the underlying manifest. Because this feature is not part of the Docker or OCI specifications, we have kept the previous behavior as a fallback option to maintain compatibility with third-party registries.
However, this will likely change in the future. Apart from online garbage collection, and as described in [challenges](#challenges), the metadata database will unblock the implementation of many requested features for the GitLab container registry in the mid/long term. Most of these features will only be available for instances using the GitLab container registry. They are not part of the Docker Distribution or OCI specifications, neither we will be able to provide a compatible fallback option.
For this reason, any features that require the use of the GitLab container registry will be disabled if using a third-party registry, for as long as third-party registries continue to be supported.
#### Synchronizing Changes With GitLab Rails
Currently, the GitLab Rails and GitLab container registry releases and deployments have been fully independent, as we have not introduced any new API features or breaking changes, apart from the described tag delete feature.
The registry will remain independent from GitLab Rails changes, but in the mid/long term, the implementation of new features or breaking changes will imply a corresponding change in GitLab Rails, so the latter will depend on a specific minimum version of the registry.
For example, to track the size of each repository, we may extend the metadata database to store that information and then propagate it to GitLab Rails by extending the HTTP API that it consumes. In GitLab Rails, this new information would likely be stored in its database and processed to offer a new feature at the UI/API level.
This kind of changes will require a synchronization between the GitLab Rails and the GitLab container registry releases and deployments, as the former will depend on a specific version of the latter.
##### Feature Toggling
All GitLab Rails features dependent on a specific version of the registry should be guarded by validating the registry vendor and version.
This is already done to determine whether a tag should be deleted using the new tag delete feature (only available in the GitLab container registry v2.8.1+) or the old method. In this case, GitLab Rails sends an `OPTIONS` request to the registry tag route to determine whether the `DELETE` method is supported or not.
Alternatively, and as the universal long-term solution, we need to determine the registry vendor, version, and supported features (the last two are only applicable if the vendor is GitLab) and persist it in the GitLab Rails database. This information can then be used in real time to toggle features or fallback to alternative methods, if possible. The initial implementation of this approach was introduced as part of [#204839](https://gitlab.com/gitlab-org/gitlab/-/issues/204839). Currently, it's only used for metrics purposes. Further improvements are required to guarantee that the version information is kept up to date in self-managed instances, where the registry may be hot swapped.
##### Release and Deployment
As described above, feature toggling offers a last line of defense against desynchronized releases and deployments, ensuring that GitLab Rails remains functional in case the registry version that supports new features is not yet available.
However, the release and deployment of GitLab Rails and the GitLab container registry should be synchronized to avoid any delays. Contrary to GitLab Rails, the registry release and deployment are manual processes, so special attention must be paid by maintainers to ensure that the GitLab Rails changes are only released and deployed after the corresponding registry changes.
As a solution to strengthen this process, a file can be added to the GitLab Rails codebase, containing the minimum required version of the registry. This file should be updated with every change that depends on a specific version of the registry. It should also be considered when releasing and deploying GitLab Rails, ensuring that the pipeline only goes through once the specified minimum required registry version is deployed.
## Iterations
1. Design metadata database schema;
1. Add support for managing metadata using the database;
1. Design plans and tools to facilitate the migration of small, medium and large repositories;
1. Implement online garbage collection;
1. Create database clusters in staging and production for GitLab.com;
1. Create automated deployment pipeline for GitLab.com;
1. Deployment and gradual migration of the existing registry for GitLab.com;
1. Rollout support for the metadata database to self-managed installs.
A more detailed list of all tasks, as well as periodic progress updates can be found in the epic [&2313](https://gitlab.com/groups/gitlab-org/-/epics/2313).
## Relevant Links
- [Allow administrators to run garbage collection with zero downtime](https://gitlab.com/groups/gitlab-org/-/epics/2313)
- [Proposal for continuous, on-demand online garbage collection](https://gitlab.com/gitlab-org/container-registry/-/issues/199)
- [Gradual migration proposal for the GitLab.com container registry](https://gitlab.com/gitlab-org/container-registry/-/issues/191)
- [Create a self-serve registry deployment](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/316)
- [Database cluster for container registry](https://gitlab.com/gitlab-com/gl-infra/reliability/-/issues/11154)
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,253 +1,11 @@
---
status: implemented
creation-date: "2021-02-08"
authors: [ "@abrandl" ]
coach: "@glopezfernandez"
approvers: [ "@fabian", "@craig-gomes" ]
owning-stage: "~devops::data stores"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/database_testing/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/database_testing/).
# Database Testing
**Notice:** This blueprint has been partially implemented. We still plan to
iterate on the tooling. The content below is a historical version of the
blueprint, written prior to incorporating database testing into our development
workflow.
We have identified [common themes of reverted migrations](https://gitlab.com/gitlab-org/gitlab/-/issues/233391) and discovered
failed migrations breaking in both production and staging even when successfully
tested in a developer environment. We have also experienced production incidents
even with successful testing in staging. These failures are quite expensive:
they can have a significant effect on availability, block deployments, and
generate incident escalations. These escalations must be triaged and either
reverted or fixed forward. Often, this can take place without the original
author's involvement due to time zones and/or the criticality of the escalation.
With our increased deployment speeds and stricter uptime requirements, the need
for improving database testing is critical, particularly earlier in the
development process (shift left).
From a developer's perspective, it is hard, if not unfeasible, to validate a
migration on a large enough dataset before it goes into production.
Our primary goal is to
**provide developers with immediate feedback for new migrations and other database-related changes tested on a full copy of the production database**,
and to do so with high levels of efficiency (particularly in terms of infrastructure costs) and security.
## Current day
Developers are expected to test database migrations prior to deploying to any
environment, but we lack the ability to perform testing against large
environments such as GitLab.com. The [developer database migration style guide](../../../development/migration_style_guide.md)
provides guidelines on migrations, and we focus on validating migrations during code review and testing
in CI and staging.
The [code review phase](../../../development/database_review.md) involves
Database Reviewers and Maintainers to manually check the migrations committed.
This often involves knowing and spotting problematic patterns and their
particular behavior on GitLab.com from experience. There is no large-scale
environment available that allows us to test database migrations before they are
being merged.
Testing in CI is done on a very small database. We mainly check forward/backward
migration consistency, evaluate RuboCop rules to detect well-known problematic
behaviors (static code checking) and have a few other, rather technical checks
in place (adding the right files etc). That is, we typically find code or other
rather simple errors, but cannot surface any data related errors - which are
also typically not covered by unit tests either.
Once merged, migrations are being deployed to the staging environment. Its
database size is less than 5% of the production database size as of January 2021
and its recent data distribution does not resemble the production site.
Oftentimes, we see migrations succeed in staging but then fail in production due
to query timeouts or other unexpected problems. Even if we caught problems in
staging, this is still expensive to reconcile and ideally we want to catch those
problems as early as possible in the development cycle.
Today, we have gained experience with working on a thin-cloned production
database (more on this below) and already use it to provide developers with
access to production query plans, automated query feedback and suggestions with
optimizations. This is built around [Database Lab](https://gitlab.com/postgres-ai/database-lab)
and [Joe](https://gitlab.com/postgres-ai/joe), both available through Slack
(using ChatOps) and [postgres.ai](https://postgres.ai/).
## Vision
As a developer:
1. I am working on a GitLab code change that includes a data migration and changes a heavy database query.
1. I push my code, create a merge request, and provide an example query in the description.
1. The pipeline executes the data migration and examines the query in a large-scale environment (a copy of GitLab.com).
1. Once the pipeline finishes, the merge request gets detailed feedback and information about the migration and the query I provided. This is based on a full clone of the production database with a state that is very close to production (minutes).
For database migrations, the information gathered from execution on the clone includes:
- Overall runtime.
- Detailed statistics for queries being executed in the migration (normalizing queries and showing their frequencies and execution times as plots).
- Dangerous locks held during the migration (which would cause blocking situations in production).
For database queries, we can automatically gather:
- Query plans along with visualization.
- Execution times and predictions for production.
- Suggestions on optimizations from Joe.
- Memory and IO statistics.
After having gotten that feedback:
1. I can go back and investigate a performance problem with the data migration.
1. Once I have a fix pushed, I can repeat the above cycle and eventually send my
merge request for database review. During the database review, the database
reviewer and maintainer have all the additional generated information
available to them to make an informed decision on the performance of the
introduced changes.
This information gathering is done in a protected and safe environment, making
sure that there is no unauthorized access to production data and we can safely
execute code in this environment.
The intended benefits include:
- Shifting left: Allow developers to understand large-scale database performance
and what to expect to happen on GitLab.com in a self-service manner
- Identify errors that are only generated when working against a production
scale dataset with real data (with inconsistencies or unexpected patterns)
- Automate the information gathering phase to make it easier for everybody
involved in code review (developer, reviewer, maintainer) by providing
relevant details automatically and upfront.
## Technology and next steps
We already use Database Lab from [postgres.ai](https://postgres.ai/), which is a
thin-cloning technology. We maintain a PostgreSQL replica which is up to date
with production data but does not serve any production traffic. This runs
Database Lab which allows us to quickly create a full clone of the production
dataset (in the order of seconds).
Internally, this is based on ZFS and implements a "thin-cloning technology".
That is, ZFS snapshots are being used to clone the data and it exposes a full
read/write PostgreSQL cluster based on the cloned data. This is called a *thin clone*.
It is rather short lived and is going to be destroyed again shortly
after we are finished using it.
A thin clone is fully read/write. This allows us to execute migrations on top of it.
Database Lab provides an API we can interact with to manage thin clones. In
order to automate the migration and query testing, we add steps to the
`gitlab/gitlab-org` CI pipeline. This triggers automation that performs the
following steps for a given merge request:
1. Create a thin-clone with production data for this testing session.
1. Pull GitLab code from the merge request.
1. Execute migrations and gather all necessary information from it.
1. Execute query testing and gather all necessary information from it.
1. Post back the results of the migration and query testing to the merge request.
1. Destroy the thin-clone.
### Short-term
The short-term focus is on testing regular migrations (typically schema changes)
and using the existing Database Lab instance from postgres.ai for it.
In order to secure this process and meet compliance goals, the runner
environment is treated as a *production* environment and similarly locked down,
monitored and audited. Only Database Maintainers have access to the CI pipeline
and its job output. Everyone else can only see the results and statistics posted
back on the merge request.
We implement a secured CI pipeline on [Internal GitLab for Operations](https://ops.gitlab.net/users/sign_in)
that adds the execution steps outlined above. The goal is to secure this pipeline
to solve the following problem:
Make sure we strongly protect production data, even though we allow everyone
(GitLab team/developers) to execute arbitrary code on the thin-clone which contains production data.
This is in principle achieved by locking down the GitLab Runner instance
executing the code and its containers on a network level, such that no data can
escape over the network. We make sure no communication can happen to the outside
world from within the container executing the GitLab Rails code (and its
database migrations).
Furthermore, we limit the ability to view the results of the jobs (including the
output printed from code) to Maintainer and Owner level on the
[Internal GitLab for Operations](https://ops.gitlab.net/users/sign_in) pipeline and provide only
a high level summary back to the original MR. If there are issues or errors in
one of the jobs run, the database Maintainer assigned to review the MR can check
the original job for more details.
With this step implemented, we already have the ability to execute database
migrations on the thin-cloned GitLab.com database automatically from GitLab CI
and provide feedback back to the merge request and the developer. The content of
that feedback is expected to evolve over time and we can continuously add to
this.
We already have an
[MVC-style implementation for the pipeline](https://gitlab.com/gitlab-org/database-team/gitlab-com-migrations)
for reference and an [example merge request with feedback](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50793#note_477815261)
from the pipeline.
The short-term goal is detailed in [this epic](https://gitlab.com/groups/gitlab-org/database-team/-/epics/6).
### Mid-term - Improved feedback, query testing and background migration testing
Mid-term, we plan to expand the level of detail the testing pipeline reports
back to the merge request and expand its scope to cover query testing, too. By
doing so, we use our experience from database code reviews and using thin-clone
technology and bring this back closer to the GitLab workflow. Instead of
reaching out to different tools (`postgres.ai`, `joe`, Slack, plan
visualizations, and so on) we bring this back to GitLab and working directly on
the merge request.
Secondly, we plan to cover background migrations testing, too. These are
typically data migrations that are scheduled to run over a long period of time.
The success of both the scheduling phase and the job execution phase typically
depends a lot on data distribution - which only surfaces when running these
migrations on actual production data. In order to become confident about a
background migration, we plan to provide the following feedback:
1. Scheduling phase - query statistics (for example a histogram of query
execution times), job statistics (how many jobs, overall duration, and so on),
batch sizes.
1. Execution phase - using a few instances of a job as examples, we execute those to gather query and runtime statistics.
### Long-term - incorporate into GitLab product
There are opportunities to discuss for extracting features from this into GitLab
itself. For example, annotating the merge request with query examples and
attaching feedback gathered from the testing run can become a first-class
citizen instead of using merge request description and comments for it. We plan
to evaluate those ideas as we see those being used in earlier phases and bring
our experience back into the product.
## An alternative discussed: Anonymization
At the core of this problem lies the concern about executing (potentially arbitrary)
code on a production dataset and making sure the production data is
well protected. The approach discussed above solves this by strongly limiting
access to the output of said code.
An alternative approach we have discussed and abandoned is to "scrub" and
anonymize production data. The idea is to remove any sensitive data from the
database and use the resulting dataset for database testing. This has a lot of
downsides which led us to abandon the idea:
- Anonymization is complex by nature - it is a hard problem to call a "scrubbed clone"
actually safe to work with in public. Different data types may require
different anonymization techniques (for example, anonymizing sensitive
information inside a JSON field) and only focusing on one attribute at a time
does not guarantee that a dataset is fully anonymized (for example join
attacks or using timestamps in conjunction to public profiles/projects to
de-anonymize users by there activity).
- Anonymization requires an additional process to keep track and update the set
of attributes considered as sensitive, ongoing maintenance and security
reviews every time the database schema changes.
- Annotating data as "sensitive" is error prone, with the wrong anonymization
approach used for a data type or one sensitive attribute accidentally not
marked as such possibly leading to a data breach.
- Scrubbing not only removes sensitive data, but it also changes data
distribution, which greatly affects performance of migrations and queries.
- Scrubbing heavily changes the database contents, potentially updating a lot of
data, which leads to different data storage details (think MVC bloat),
affecting performance of migrations and queries.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,122 +1,11 @@
---
status: implemented
creation-date: "2020-06-10"
authors: [ "@ayufan" ]
coach: "@glopezfernandez"
approvers: [ "@kencjohnston", "@craig-gomes" ]
owning-stage: "~devops::non_devops"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/feature_flags_development/'
remove_date: '2025-07-08'
---
# Development Feature Flags Architecture
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/feature_flags_development/).
Usage of feature flags become crucial for the development of GitLab. The
feature flags are a convenient way to ship changes early, and safely rollout
them to wide audience ensuring that feature is stable and performant.
Since the presence of feature is controlled with a dedicated condition, a
developer can decide for a best time for testing the feature, ensuring that
feature is not enable prematurely.
## Challenges
The extensive usage of feature flags poses a few challenges
- Each feature flag that we add to codebase is a ~"technical debt" as it adds a
matrix of configurations.
- Testing each combination of feature flags is close to impossible, so we
instead try to optimize our testing of feature flags to the most common
scenarios.
- There's a growing challenge of maintaining a growing number of feature flags.
We sometimes forget how our feature flags are configured or why we haven't
yet removed the feature flag.
- The usage of feature flags can also be confusing to people outside of
development that might not fully understand dependence of ~"type::feature" or ~"type::bug"
fix on feature flag and how this feature flag is configured. Or if the feature
should be announced as part of release post.
- Maintaining feature flags poses additional challenge of having to manage
different configurations across different environments/target. We have
different configuration of feature flags for testing, for development, for
staging, for production and what is being shipped to our customers as part of
on-premise offering.
## Goals
The biggest challenge today with our feature flags usage is their implicit
nature. Feature flags are part of the codebase, making them hard to understand
outside of development function.
We should aim to make our feature flag based development to be accessible to
any interested party.
- developer / engineer
- can easily add a new feature flag, and configure it's state
- can quickly find who to reach if touches another feature flag
- can quickly find stale feature flags
- engineering manager
- can understand what feature flags her/his group manages
- engineering manager and director
- can understand how much ~"technical debt" is inflicted due to amount of feature flags that we have to manage
- can understand how many feature flags are added and removed in each release
- product manager and documentation writer
- can understand what features are gated by what feature flags
- can understand if feature and thus feature flag is generally available on GitLab.com
- can understand if feature and thus feature flag is enabled by default for on-premise installations
- delivery engineer
- can understand what feature flags are introduced and changed between subsequent deployments
- support and reliability engineer
- can understand how feature flags changed between releases: what feature flags become enabled, what removed
- can quickly find relevant information about feature flag to know individuals which might help with an ongoing support request or incident
## Proposal
To help with above goals we should aim to make our feature flags usage explicit
and understood by all involved parties.
Introduce a YAML-described `feature-flags/<name-of-feature.yml>` that would
allow us to have:
1. A central place where all feature flags are documented,
1. A description of why the given feature flag was introduced,
1. A what relevant issue and merge request it was introduced by,
1. Build automated documentation with all feature flags in the codebase,
1. Track how many feature flags are per given group
1. Track how many feature flags are added and removed between releases
1. Make this information easily accessible for all
1. Allow our customers to easily discover how to enable features and quickly
find out information what did change between different releases
### The `YAML`
```yaml
---
name: ci_disallow_to_create_merge_request_pipelines_in_target_project
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/40724
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/235119
group: group::environments
type: development
default_enabled: false
```
## Reasons
These are reason why these changes are needed:
- we have around 500 different feature flags today
- we have hard time tracking their usage
- we have ambiguous usage of feature flag with different `default_enabled:` and
different `actors` used
- we lack a clear indication who owns what feature flag and where to find
relevant information
- we do not emphasise the desire to create feature flag rollout issue to
indicate that feature flag is in fact a ~"technical debt"
- we don't know exactly what feature flags we have in our codebase
- we don't know exactly how our feature flags are configured for different
environments: what is being used for `test`, what we ship for `on-premise`,
what is our settings for `staging`, `qa` and `production`
## Iterations
This work is being done as part of dedicated epic:
[Improve internal usage of Feature Flags](https://gitlab.com/groups/gitlab-org/-/epics/3551).
This epic describes a meta reasons for making these changes.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -10,7 +10,7 @@ participating-stages: []
# Feature Flags usage in GitLab development and operations
This blueprint builds upon [the Development Feature Flags Architecture blueprint](../feature_flags_development/index.md).
This blueprint builds upon [the Development Feature Flags Architecture blueprint](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/feature_flags_development/).
## Summary
@ -84,7 +84,7 @@ The feature flag rollout process is currently:
### Technical debt and codebase complexity
[The challenges from the Development Feature Flags Architecture blueprint still stand](../feature_flags_development/index.md#challenges).
[The challenges from the Development Feature Flags Architecture blueprint still stand](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/feature_flags_development/#challenges).
Additionally, there are new challenges:

View File

@ -1,308 +1,11 @@
---
status: implemented
creation-date: "2022-11-23"
authors: [ "@shinya.maeda" ]
coach: "@DylanGriffith"
approvers: [ "@nagyv-gitlab", "@cbalane", "@hustewart", "@hfyngvason" ]
owning-stage: "~devops::deploy"
participating-stages: [Environments]
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_agent_deployments/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_agent_deployments/).
# View and manage resources deployed by GitLab agent For Kubernetes
## Summary
As part of the [GitLab Kubernetes Dashboard](https://gitlab.com/groups/gitlab-org/-/epics/2493) epic,
users want to view and manage their resources deployed by GitLab agent For Kubernetes.
Users should be able to interact with the resources through GitLab UI, such as Environment Index/Details page.
This blueprint describes how the association is established and how these domain models interact with each other.
## Motivation
### Goals
- The proposed architecture can be used in [GitLab Kubernetes Dashboard](https://gitlab.com/groups/gitlab-org/-/epics/2493).
- The proposed architecture can be used in [Organization-level Environment dashboard](https://gitlab.com/gitlab-org/gitlab/-/issues/241506).
- The cluster resources and events can be visualized per [GitLab Environment](../../../ci/environments/index.md).
An environment-specific view scoped to the resources managed either directly or indirectly by a deployment commit.
- Support both GitOps mode and [CI Access mode](../../../user/clusters/agent/ci_cd_workflow.md#authorize-the-agent).
### Non-Goals
- The design details of [GitLab Kubernetes Dashboard](https://gitlab.com/groups/gitlab-org/-/epics/2493) and [Organization-level Environment dashboard](https://gitlab.com/gitlab-org/gitlab/-/issues/241506).
- Support Environment/Deployment features that rely on GitLab CI/CD pipelines, such as [Protected Environments](../../../ci/environments/protected_environments.md), [Deployment Approvals](../../../ci/environments/deployment_approvals.md), [Deployment safety](../../../ci/environments/deployment_safety.md), and [Environment rollback](../../../ci/environments/index.md#environment-rollback). These features are already available in CI Access mode, however, it's not available in GitOps mode.
## Proposal
### Overview
- GitLab Environment and GitLab agent For Kubernetes have 1-to-1 relationship.
- GitLab Environment tracks all resources produced by the connected [agent](../../../user/clusters/agent/index.md). This includes not only resources written in manifest files but also subsequently generated resources (for example, `Pod`s created by `Deployment` manifest file).
- GitLab Environment renders dependency graph, such as `Deployment` => `ReplicaSet` => `Pod`. This is for providing ArgoCD-style resource view.
- GitLab Environment has the Resource Health status that represents a summary of resource statuses, such as `Healthy`, `Progressing` or `Degraded`.
```mermaid
flowchart LR
subgraph Kubernetes["Kubernetes"]
subgraph ResourceGroupProduction["Production"]
direction LR
ResourceGroupProductionService(["Service"])
ResourceGroupProductionDeployment(["Deployment"])
ResourceGroupProductionPod1(["Pod1"])
ResourceGroupProductionPod2(["Pod2"])
end
subgraph ResourceGroupStaging["Staging"]
direction LR
ResourceGroupStagingService(["Service"])
ResourceGroupStagingDeployment(["Deployment"])
ResourceGroupStagingPod1(["Pod1"])
ResourceGroupStagingPod2(["Pod2"])
end
end
subgraph GitLab
subgraph Organization
subgraph Project
environment1["production environment"]
environment2["staging environment"]
end
end
end
environment1 --- ResourceGroupProduction
environment2 --- ResourceGroupStaging
ResourceGroupProductionService -.- ResourceGroupProductionDeployment
ResourceGroupProductionDeployment -.- ResourceGroupProductionPod1
ResourceGroupProductionDeployment -.- ResourceGroupProductionPod2
ResourceGroupStagingService -.- ResourceGroupStagingDeployment
ResourceGroupStagingDeployment -.- ResourceGroupStagingPod1
ResourceGroupStagingDeployment -.- ResourceGroupStagingPod2
```
### Existing components and relationships
- [GitLab Project](../../../user/project/working_with_projects.md) and GitLab Environment have 1-to-many relationship.
- GitLab Project and Agent have 1-to-many _direct_ relationship. Only one project can own a specific agent.
- GitOps mode
- GitLab Project and Agent do _NOT_ have many-to-many _indirect_ relationship yet. This will be supported in [Manifest projects outside of the Agent configuration project](https://gitlab.com/groups/gitlab-org/-/epics/7704).
- [CI Access mode](../../../user/clusters/agent/ci_cd_workflow.md#authorize-the-agent)
- GitLab Project and Agent have many-to-many _indirect_ relationship. The project owning the agent can [share the access with the other proejcts](../../../user/clusters/agent/ci_cd_workflow.md#authorize-the-agent-to-access-projects-in-your-groups). (NOTE: Technically, only running jobs inside the project are allowed to access the cluster due to job-token authentication.)
### Issues
- GitLab Environment should have ID of GitLab agent For Kubernetes as the foreign key.
- GitLab Environment should have parameters how to group resources in the associated cluster, for example, `namespace`, `lable` and `inventory-id` (GitOps mode only) can passed as parameters.
- GitLab Environment should be able to fetch all relevant resources, including both default resource kinds and other [Custom Resources](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/).
- GitLab Environment should be aware of dependency graph.
- GitLab Environment should be able to compute Resource Health status from the associated resources.
### Example
This is an example of how the architecture works in push-based deployment.
The feature is documented [here](../../../user/clusters/agent/ci_cd_workflow.md) as CI access mode.
```mermaid
flowchart LR
subgraph ProductionKubernetes["Production Kubernetes"]
subgraph ResourceGroupProductionFrontend["Production"]
direction LR
ResourceGroupProductionFrontendService(["Service"])
ResourceGroupProductionFrontendDeployment(["Deployment"])
ResourceGroupProductionFrontendPod1(["Pod1"])
ResourceGroupProductionFrontendPod2(["Pod2"])
end
subgraph ResourceGroupProductionBackend["Staging"]
direction LR
ResourceGroupProductionBackendService(["Service"])
ResourceGroupProductionBackendDeployment(["Deployment"])
ResourceGroupProductionBackendPod1(["Pod1"])
ResourceGroupProductionBackendPod2(["Pod2"])
end
subgraph ResourceGroupProductionPrometheus["Monitoring"]
direction LR
ResourceGroupProductionPrometheusService(["Service"])
ResourceGroupProductionPrometheusDeployment(["Deployment"])
ResourceGroupProductionPrometheusPod1(["Pod1"])
ResourceGroupProductionPrometheusPod2(["Pod2"])
end
end
subgraph GitLab
subgraph Organization
subgraph OperationGroup
subgraph AgentManagementProject
AgentManagementAgentProduction["Production agent"]
AgentManagementManifestFiles["Kubernetes Manifest Files"]
AgentManagementEnvironmentProductionPrometheus["production prometheus environment"]
AgentManagementPipelines["CI/CD pipelines"]
end
end
subgraph DevelopmentGroup
subgraph FrontendAppProject
FrontendAppCode["VueJS"]
FrontendDockerfile["Dockerfile"]
end
subgraph BackendAppProject
BackendAppCode["Golang"]
BackendDockerfile["Dockerfile"]
end
subgraph DeploymentProject
DeploymentManifestFiles["Kubernetes Manifest Files"]
DeploymentPipelines["CI/CD pipelines"]
DeploymentEnvironmentProductionFrontend["production frontend environment"]
DeploymentEnvironmentProductionBackend["production backend environment"]
end
end
end
end
DeploymentEnvironmentProductionFrontend --- ResourceGroupProductionFrontend
DeploymentEnvironmentProductionBackend --- ResourceGroupProductionBackend
AgentManagementEnvironmentProductionPrometheus --- ResourceGroupProductionPrometheus
ResourceGroupProductionFrontendService -.- ResourceGroupProductionFrontendDeployment
ResourceGroupProductionFrontendDeployment -.- ResourceGroupProductionFrontendPod1
ResourceGroupProductionFrontendDeployment -.- ResourceGroupProductionFrontendPod2
ResourceGroupProductionBackendService -.- ResourceGroupProductionBackendDeployment
ResourceGroupProductionBackendDeployment -.- ResourceGroupProductionBackendPod1
ResourceGroupProductionBackendDeployment -.- ResourceGroupProductionBackendPod2
ResourceGroupProductionPrometheusService -.- ResourceGroupProductionPrometheusDeployment
ResourceGroupProductionPrometheusDeployment -.- ResourceGroupProductionPrometheusPod1
ResourceGroupProductionPrometheusDeployment -.- ResourceGroupProductionPrometheusPod2
AgentManagementAgentProduction -- Shared with --- DeploymentProject
DeploymentPipelines -- "Deploy" --> ResourceGroupProductionFrontend
DeploymentPipelines -- "Deploy" --> ResourceGroupProductionBackend
AgentManagementPipelines -- "Deploy" --> ResourceGroupProductionPrometheus
```
### Further details
#### Multi-Project Deployment Pipelines
The microservice project setup can be improved by [Multi-Project Deployment Pipelines](https://gitlab.com/groups/gitlab-org/-/epics/8483):
- Deployment Project can behave as the shared deployment engine for any upstream application projects and environments.
- Environments can be created within the application projects. It gives more visibility of environments for developers.
- Deployment Project can be managed under Operator group. More segregation of duties.
- Users don't need to set up [RBAC to restrict CI/CD jobs](../../../user/clusters/agent/ci_cd_workflow.md#restrict-project-and-group-access-by-using-impersonation).
- This is especially helpful for [dynamic environments](../../../ci/environments/index.md#create-a-dynamic-environment) like review apps.
```mermaid
flowchart LR
subgraph ProductionKubernetes["Production Kubernetes"]
subgraph ResourceGroupProductionFrontend["Frontend"]
direction LR
ResourceGroupProductionFrontendService(["Service"])
ResourceGroupProductionFrontendDeployment(["Deployment"])
ResourceGroupProductionFrontendPod1(["Pod1"])
ResourceGroupProductionFrontendPod2(["Pod2"])
end
subgraph ResourceGroupProductionBackend["Backend"]
direction LR
ResourceGroupProductionBackendService(["Service"])
ResourceGroupProductionBackendDeployment(["Deployment"])
ResourceGroupProductionBackendPod1(["Pod1"])
ResourceGroupProductionBackendPod2(["Pod2"])
end
subgraph ResourceGroupProductionPrometheus["Monitoring"]
direction LR
ResourceGroupProductionPrometheusService(["Service"])
ResourceGroupProductionPrometheusDeployment(["Deployment"])
ResourceGroupProductionPrometheusPod1(["Pod1"])
ResourceGroupProductionPrometheusPod2(["Pod2"])
end
end
subgraph GitLab
subgraph Organization
subgraph OperationGroup
subgraph DeploymentProject
DeploymentAgentProduction["Production agent"]
DeploymentManifestFiles["Kubernetes Manifest Files"]
DeploymentEnvironmentProductionPrometheus["production prometheus environment"]
DeploymentPipelines["CI/CD pipelines"]
end
end
subgraph DevelopmentGroup
subgraph FrontendAppProject
FrontendDeploymentPipelines["CI/CD pipelines"]
FrontendEnvironmentProduction["production environment"]
end
subgraph BackendAppProject
BackendDeploymentPipelines["CI/CD pipelines"]
BackendEnvironmentProduction["production environment"]
end
end
end
end
FrontendEnvironmentProduction --- ResourceGroupProductionFrontend
BackendEnvironmentProduction --- ResourceGroupProductionBackend
DeploymentEnvironmentProductionPrometheus --- ResourceGroupProductionPrometheus
ResourceGroupProductionFrontendService -.- ResourceGroupProductionFrontendDeployment
ResourceGroupProductionFrontendDeployment -.- ResourceGroupProductionFrontendPod1
ResourceGroupProductionFrontendDeployment -.- ResourceGroupProductionFrontendPod2
ResourceGroupProductionBackendService -.- ResourceGroupProductionBackendDeployment
ResourceGroupProductionBackendDeployment -.- ResourceGroupProductionBackendPod1
ResourceGroupProductionBackendDeployment -.- ResourceGroupProductionBackendPod2
ResourceGroupProductionPrometheusService -.- ResourceGroupProductionPrometheusDeployment
ResourceGroupProductionPrometheusDeployment -.- ResourceGroupProductionPrometheusPod1
ResourceGroupProductionPrometheusDeployment -.- ResourceGroupProductionPrometheusPod2
FrontendDeploymentPipelines -- "Trigger downstream pipeline" --> DeploymentProject
BackendDeploymentPipelines -- "Trigger downstream pipeline" --> DeploymentProject
DeploymentPipelines -- "Deploy" --> ResourceGroupProductionFrontend
DeploymentPipelines -- "Deploy" --> ResourceGroupProductionBackend
```
## Design and implementation details
### Associate Environment with Agent
Users can explicitly set a GitLab agent For Kubernetes to a GitLab Environment in setting UI.
Frontend will use this associated agent for authenticating/authorizing the user access, which is described in a latter section.
We need to adjust the `read_cluster_agent` permission in DeclarivePolicy for supporting agents shared by an external project (also known as the Agent management project).
### Fetch resources through `user_access`
When user visits an environment page, GitLab frontend fetches an environment via GraphQL. Frontend additionally fetches the associated agent-ID and namespace.
Here is an example of GraphQL query:
```graphql
{
project(fullPath: "group/project") {
id
environment(name: "<environment-name>") {
slug
kubernetesNamespace
clusterAgent {
id
name
project {
name
}
}
}
}
}
```
GitLab frontend authenticate/authorize the user access with [browser cookie](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/master/doc/kubernetes_user_access.md#browser-cookie-on-gitlab-frontend). If the access is forbidden, frontend shows an error message that `You don't have access to an agent that deployed to this environment. Please contact agent administrator if you are allowed in "user_access" in agent config file. See <troubleshooting-doc-link>`.
After the user gained access to the agent, GitLab frontend fetches specific Resource kinds (for example, `Deployment`, `Pod`) in the Kubernetes with the following parameters:
- `namespace` ... `#{environment.kubernetesNamespace}`
If no resources are found, this is likely that the users have not embedded these lables into their resources. In this case, frontend shows an warning message `There are no resources found for the environment. Do resources have GitLab preserved labels? See <troubleshooting-doc-link>`.
### Dependency graph
- GitLab frontend uses [Owner References](https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/) to idenfity the dependencies between resources. These are embedded in resources as `metadata.ownerReferences` field.
- For the resoruces that don't have owner references, we can use [Well-Known Labels, Annotations and Taints](https://kubernetes.io/docs/reference/labels-annotations-taints/) as complement. for example, `EndpointSlice` doesn't have `metadata.ownerReferences`, but has `kubernetes.io/service-name` as a reference to the parent `Service` resource.
### Health status of resources
- GitLab frontend computes the status summary from the fetched resources. Something similar to ArgoCD's [Resource Health](https://argo-cd.readthedocs.io/en/stable/operator-manual/health/) for example, `Healthy`, `Progressing`, `Degraded` and `Suspended`. The formula is TBD.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,133 +1,11 @@
---
status: implemented
creation-date: "2023-10-18"
authors: [ "@DylanGriffith" ]
coach:
approvers: [ "@rymai", "@tigerwnz" ]
owning-stage: "~devops::tenant scale"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_housekeeper/'
remove_date: '2025-07-08'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_housekeeper/).
# GitLab Housekeeper - automating merge requests
## Summary
This blueprint documents the philosophy behind the
["GitLab Housekeeper" gem](https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/gitlab-housekeeper)
which was introduced in
<https://gitlab.com/gitlab-org/gitlab/-/merge_requests/139492> and has already
been used to create many merge requests.
The tool should be used to save developers from mundane repetitive tasks that
can be automated. The tool is scoped to any task where a developer needs to
create a straightforward merge request and is known ahead of time.
This tool should be useful for at least the following kinds of mundane MRs
we create:
1. Remove a feature flag after X date
1. Remove an unused index where the unused index is identified by some
automation
1. Remove an `ignore_column` after X date (part of renaming/removing columns
multi-step procedure)
1. Populate sharding keys for organizations/cells on tables that are missing a
sharding key
## Motivation
We've observed there are many cases where developers are doing a lot of
manual work for tasks that are entirely predictable and automatable. Often
these manual tasks are done after waiting some known period of time. As such we
usually create an issue and set the future milestone. Then in the future the
developer remembers to followup on that issue and opens an MR to make the
manual change.
The biggest examples we've seen lately are:
1. Feature flag removal: <https://gitlab.com/groups/gitlab-org/-/epics/5325>. We
have many opportunities for automation with feature flags but this blueprint
focuses on removing the feature flag after it's fully rolled out. A step
that is often forgotten leading to growing technical debt.
1. Removing duplicated or unused indexes in Postgres:
<https://gitlab.com/gitlab-org/gitlab/-/issues/385701>. For now we're
developing automation that creates issues and assigns them to groups to
follow up and manually open MRs to remove them. This blueprint would take it
a step further and the automation would just create the MRs to remove them
once we have identified them.
1. Removing out of date `ignore_column` references:
<https://docs.gitlab.com/ee/development/database/avoiding_downtime_in_migrations.html#removing-the-ignore-rule-release-m2>
. For now we leave a note in our code telling us the date it needs to be
removed and often create an issue as a reminder. This blueprint proposes
that automation just reads this note and opens the MR to remove it after the
date.
1. Adding and backfilling sharding keys for organizations for Cells:
<https://gitlab.com/gitlab-org/gitlab/-/merge_requests/133796>. The cells
architecture depends on all tables having a sharding key that is attributed
to an organization. We will need to backfill this for ~300 tables. Much of
this will be repetitive and mundane work that we can automate provided that
groups just identify what the name of the sharding key should be and how we
will backfill it. As such we can automate the creation of MRs that guess the
sharding key and owning groups can check and correct those MRs. Then we can
automate the MR creation for adding the columns and backfilling the data.
Some kind of automation like this will be necessary to finish this work in a
reasonable timeframe.
### Goals
1. Identify the common tasks that take development time and automate them.
1. Focus on MR creation rather than issue creation as MRs are the results we
want and issues are a process for reminding us to get those results.
1. Improve developer job satisfaction by knowing that automation is doing the
busy work while we get to do the challenging and creative work.
1. Developers should be encouraged to contribute to the automation framework
when they see a pattern rather than documenting the manual work for future
developers to do it again.
1. Automation MRs should be very easily identified and reviewed and merged much
more quickly than other MRs. If our automation MRs cause too much effort for
reviewers we maybe will outweigh the benefits. This might mean that some
automations get disabled when they are just noisy.
## Solution
The
[GitLab Housekeeper gem](https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/gitlab-housekeeper)
should be used to automate creation of mundane merge requests.
Using this tool reflects our
[bias for action](https://handbook.gitlab.com/handbook/values/#bias-for-action)
subvalue. As such, developers should preference contributing a new
[keep](https://gitlab.com/gitlab-org/gitlab/-/tree/master/keeps) over the following:
1. Documenting a process that involves creating several merge requests over a
period of time
1. Setting up periodic reminders for developers (in Slack or issues) to create
some merge request
The keeps may sometimes take more work to implement than documentation or
reminders so judgement should be used to assess the likely time savings from
using automation. The `gitlab-housekeeper` gem will evolve over time with many
utilities that make it simpler to contribute new keeps and it is expected that
over time the cost to implementing a keep should be small enough that we will
mostly prefer this whenever developers need to do a repeatable task more than a
few times.
## Design and implementation details
The key details for this architecture is:
1. The design of this tool is like a combination of `rubocop -a` and Renovate
bot. It extends on `rubocop -a` to understand when things need to be removed
after certain deadlines as well as creating a steady stream of manageable
merge requests for the reviewer rather than leaving those decisions to the
developer. Like the renovate bot it attempts to create MRs periodically and
assign them to the right people to review.
1. The keeps live in the GitLab repo which means that there are no
dependencies to update and the keeps can use code inside the
GitLab codebase.
1. The script can be run locally by a developer or can be run periodically
in some automated way.
1. The keeps are able to use any data sources (eg. local code, Prometheus,
Postgres database archive, logs) needed to determine whether and how to make
the change.
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,141 +1,11 @@
---
status: implemented
creation-date: "2020-12-03"
authors: [ "@ash2k" ]
coach: "@andrewn"
approvers: [ "@nicholasklick", "@nagyv-gitlab" ]
owning-stage: "~devops::configure"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_to_kubernetes_communication/'
remove_date: '2025-07-08'
---
# GitLab to Kubernetes communication
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_to_kubernetes_communication/).
The goal of this document is to define how GitLab can communicate with Kubernetes
and in-cluster services through the GitLab agent.
## Challenges
### Lack of network connectivity
For various features that exist today, GitLab communicates with Kubernetes by directly
or indirectly calling its API endpoints. This works well, as long as a network
path from GitLab to the cluster exists, which isn't always the case:
- GitLab.com and a self-managed cluster, where the cluster is not exposed to the Internet.
- GitLab.com and a cloud-vendor managed cluster, where the cluster is not exposed to the Internet.
- Self-managed GitLab and a cloud-vendor managed cluster, where the cluster is not
exposed to the Internet and there is no private peering between the cloud network
and the customer's network.
This last item is the hardest to address, as something must give to create a network
path. This feature gives the customer an extra option (exposing the `gitlab-kas` domain but
not the whole GitLab) in addition to the existing options (peering the networks,
or exposing one of the two sides).
Even if technically possible, it's almost always undesirable to expose a Kubernetes
cluster's API to the Internet for security reasons. As a result, our customers
are reluctant to do so, and are faced with a choice of security versus the features
GitLab provides for connected clusters.
This choice is true not only for Kubernetes' API, but for all APIs exposed by services
running on a customer's cluster that GitLab may need to access. For example,
Prometheus running in a cluster must be exposed for the GitLab integration to access it.
### Cluster-admin permissions
Both current integrations - building your own cluster (certificate-based) and GitLab-managed
cluster in a cloud - require granting full `cluster-admin` access to GitLab. Credentials
are stored on the GitLab side and this is yet another security concern for our customers.
For more discussion on these issues, read
[issue #212810](https://gitlab.com/gitlab-org/gitlab/-/issues/212810).
## GitLab agent epic
To address these challenges and provide some new features, the Configure group
is building an active in-cluster component that inverts the
direction of communication:
1. The customer installs an agent into their cluster.
1. The agent connects to GitLab.com or their self-managed GitLab instance,
receiving commands from it.
The customer does not need to provide any credentials to GitLab, and
is in full control of what permissions the agent has.
For more information, visit the
[GitLab agent repository](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent) or
[the epic](https://gitlab.com/groups/gitlab-org/-/epics/3329).
### Request routing
Agents connect to the server-side component called GitLab agent server
(`gitlab-kas`) and keep an open connection that waits for commands. The
difficulty with the approach is in routing requests from GitLab to the correct agent.
Each cluster may contain multiple logical agents, and each may be running as multiple
replicas (`Pod`s), connected to an arbitrary `gitlab-kas` instance.
Existing and new features require real-time access to the APIs of the cluster
and (optionally) APIs of components, running in the cluster. As a result, it's difficult to pass
the information back and forth using the more traditional polling approach.
A good example to illustrate the real-time need is Prometheus integration.
If we wanted to draw real-time graphs, we would need direct access to the Prometheus API
to make queries and quickly return results. `gitlab-kas` could expose the Prometheus API
to GitLab, and transparently route traffic to one of the correct agents connected
at the moment. The agent then would stream the request to Prometheus and stream the response back.
## Proposal
Implement request routing in `gitlab-kas`. Encapsulate and hide all related
complexity from the main application by providing a clean API to work with Kubernetes
and the agents.
The above does not necessarily mean proxying Kubernetes' API directly, but that
is possible should we need it.
What APIs `gitlab-kas` provides depends on the features developed, but first
we must solve the request routing problem. It blocks any and all features
that require direct communication with agents, Kubernetes or in-cluster services.
Detailed implementation proposal with all technical details is in
[`kas_request_routing.md`](https://gitlab.com/gitlab-org/cluster-integration/gitlab-agent/-/blob/master/doc/kas_request_routing.md).
```mermaid
flowchart LR
subgraph "Kubernetes 1"
agentk1p1["agentk 1, Pod1"]
agentk1p2["agentk 1, Pod2"]
end
subgraph "Kubernetes 2"
agentk2p1["agentk 2, Pod1"]
end
subgraph "Kubernetes 3"
agentk3p1["agentk 3, Pod1"]
end
subgraph kas
kas1["kas 1"]
kas2["kas 2"]
kas3["kas 3"]
end
GitLab["GitLab Rails"]
Redis
GitLab -- "gRPC to any kas" --> kas
kas1 -- register connected agents --> Redis
kas2 -- register connected agents --> Redis
kas1 -- lookup agent --> Redis
agentk1p1 -- "gRPC" --> kas1
agentk1p2 -- "gRPC" --> kas2
agentk2p1 -- "gRPC" --> kas1
agentk3p1 -- "gRPC" --> kas2
```
### Iterations
Iterations are tracked in [the dedicated epic](https://gitlab.com/groups/gitlab-org/-/epics/4591).
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,112 +1,11 @@
---
status: implemented
creation-date: "2020-10-21"
authors: [ "@craig-gomes" ]
coach: "@ayufan"
approvers: [ "@timzallmann", "@joshlambert" ]
owning-stage: "~devops::non_devops"
participating-stages: []
redirect_to: 'https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/image_resizing/'
remove_date: '2025-07-08'
---
# Image resizing for avatars and content images
This document was moved to [another location](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/image_resizing/).
Currently, we are showing all uploaded images 1:1, which is of course not ideal.
To improve performance greatly, add image resizing to the backend. There are two
main areas of image resizing to consider; avatars and content images. The MVC
for this implementation focuses on Avatars. Avatars requests consist of
approximately 70% of total image requests. There is an identified set of sizes
we intend to support which makes the scope of this first MVC very narrow.
Content image resizing has many more considerations for size and features. It is
entirely possible that we have two separate development efforts with the same
goal of increasing performance via image resizing.
## MVC Avatar Resizing
When implementing a dynamic image resizing solution, images should be resized
and optimized on the fly so that if we define new targeted sizes later we can
add them dynamically. This would mean a huge improvement in performance as some
of the measurements suggest that we can save up to 95% of our current load size.
Our initial investigations indicate that we have uploaded approximately 1.65 million
avatars totaling approximately 80 GB in size and averaging approximately
48 KB each. Early measurements indicate we can reduce the most common avatar
dimensions to between 1-3 KB in size, netting us a greater than 90% size
reduction. For the MVC we don't consider application level caching and rely
purely on HTTP based caches as implemented in CDNs and browsers, but might
revisit this decision later on. To mitigate performance issues with avatar
resizing, especially in the case of self managed, an operations feature flag is
implemented to disable dynamic image resizing.
```mermaid
sequenceDiagram
autonumber
Requester->>Workhorse: Incoming request
Workhorse->>RailsApp: Incoming request
alt All is true: 1.Avatar is requested, 2.Requested Width is allowed, 3.Feature is enabled
Note right of RailsApp: Width Allowlist: https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/avatarable.rb#L10
RailsApp->>Workhorse: `send-scaled-img:` request
Note right of RailsApp: Set `send-scaled-img:` Header
Workhorse->>Workhorse: Image resizing using Go lib
Workhorse->>Requester: Serve the resized image
else All other cases
RailsApp->>Workhorse: Usual request scenario
Workhorse->>Requester: Usual request scenario
end
```
## Content Image Resizing
Content image resizing is a more complex problem to tackle. There are no set
size restrictions and there are additional features or requirements to consider.
- Dynamic WebP support - the WebP format typically achieves an average of 30% more
compression than JPEG without the loss of image quality. More details are in
[this Google Comparative Study](https://developers.google.com/speed/webp/docs/c_study)
- Extract first GIF image so we can prevent from loading 10 MB pixels
- Check Device Pixel Ratio to deliver nice images on High DPI screens
- Progressive image loading, similar to what is described in
[this article about how to build a progressive image loader](https://www.sitepoint.com/how-to-build-your-own-progressive-image-loader/)
- Resizing recommendations (for example, size and clarity)
- Storage
The MVC Avatar resizing implementation is integrated into Workhorse. With the
extra requirements for content image resizing, this may require further use of
GraphicsMagik (GM) or a similar library and breaking it out of Workhorse.
## Iterations
1. ✓ POC on different image resizing solutions
1. ✓ Review solutions with security team
1. ✓ Implement avatar resizing MVC
1. Deploy, measure, monitor
1. Clarify features for content image resizing
1. Weigh options between using current implementation of image resizing vs new solution
1. Implement content image resizing MVC
1. Deploy, measure, monitor
## Who
Proposal:
<!-- vale gitlab.Spelling = NO -->
| Role | Who |
|------------------------------|-----|
| Author | Craig Gomes |
| Architecture Evolution Coach | Kamil Trzciński |
| Engineering Leader | Tim Zallmann |
| Domain Expert | Matthias Kaeppler |
| Domain Expert | Aleksei Lipniagov |
<!-- vale gitlab.Spelling = YES -->
DRIs:
<!-- vale gitlab.Spelling = NO -->
| Role | Who |
|-------------|-----|
| Product | Josh Lambert |
| Leadership | Craig Gomes |
| Engineering | Matthias Kaeppler |
<!-- vale gitlab.Spelling = YES -->
<!-- This redirect file can be deleted after <2025-07-08>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -489,42 +489,164 @@ J --> K[::GitlabSchema.subscriptions.trigger]
## How to implement a new action
### Register a new method
Implementing a new AI action will require changes in the GitLab monolith as well as in the AI Gateway.
We'll use the example of wanting to implement an action that allows users to rewrite issue descriptions according to
a given prompt.
Go to the `Llm::ExecuteMethodService` and add a new method with the new service class you will create.
### 1. Add your action to the Cloud Connector feature list
```ruby
class ExecuteMethodService < BaseService
METHODS = {
# ...
amazing_new_ai_feature: Llm::AmazingNewAiFeatureService
}.freeze
The Cloud Connector configuration stores the permissions needed to access your service, as well as additional metadata.
For more information, see [Cloud Connector: Configuration](../cloud_connector/configuration.md).
```yaml
# ee/config/cloud_connector/access_data.yml
services:
# ...
rewrite_description:
backend: 'gitlab-ai-gateway'
bundled_with:
duo_enterprise:
unit_primitives:
- rewrite_issue_description
```
### Create a Service
### 2. Create an Agent definition in the AI Gateway
In [the AI Gateway project](https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist), create a
new agent definition under `ai_gateway/agents/definitions`. Create a new subfolder corresponding to the name of your
AI action, and a new YAML file for your agent. Specify the model and provider you wish to use, and the prompts that
will be fed to the model. You can specify inputs to be plugged into the prompt by using `{}`.
```yaml
# ai_gateway/agents/definitions/rewrite_description/base.yml
name: Description rewriter
provider: anthropic
model: claude-3-sonnet-20240229
prompt_template:
system: |
You are a helpful assistant that rewrites the description of resources. You'll be given the current description, and a prompt on how you should rewrite it. Reply only with your rewritten description.
<description>{description}</description>
<prompt>{prompt}</prompt>
```
### 3. Create a Completion class
1. Create a new completion under `ee/lib/gitlab/llm/ai_gateway/completions/` and inherit it from the `Base`
AI Gateway Completion.
```ruby
# ee/lib/gitlab/llm/ai_gateway/completions/rewrite_description.rb
module Gitlab
module Llm
module AiGateway
module Completions
class RewriteDescription < Base
def agent_name
'base' # Must match the name of the agent you defined on the AI Gateway
end
def inputs
{ description: resource.description, prompt: prompt_message.content }
end
end
end
end
end
end
```
### 4. Create a Service
1. Create a new service under `ee/app/services/llm/` and inherit it from the `BaseService`.
1. The `resource` is the object we want to act on. It can be any object that includes the `Ai::Model` concern. For example it could be a `Project`, `MergeRequest`, or `Issue`.
```ruby
# ee/app/services/llm/amazing_new_ai_feature_service.rb
# ee/app/services/llm/rewrite_description_service.rb
module Llm
class AmazingNewAiFeatureService < BaseService
class RewriteDescriptionService < BaseService
extend ::Gitlab::Utils::Override
override :valid
def valid?
super &&
# You can restrict which type of resources your service applies to
resource.to_ability_name == "issue" &&
# Always check that the user is allowed to perform this action on the resource
Ability.allowed?(user, :rewrite_description, resource)
end
private
def perform
::Llm::CompletionWorker.perform_async(user.id, resource.id, resource.class.name, :amazing_new_ai_feature)
success
end
def valid?
super && Ability.allowed?(user, :amazing_new_ai_feature, resource)
schedule_completion_worker
end
end
end
```
### 5. Register the feature in the catalogue
Go to `Gitlab::Llm::Utils::AiFeaturesCatalogue` and add a new entry for your AI action.
```ruby
class AiFeaturesCatalogue
LIST = {
# ...
rewrite_description: {
service_class: ::Gitlab::Llm::AiGateway::Completions::RewriteDescription,
feature_category: :ai_abstraction_layer,
execute_method: ::Llm::RewriteDescriptionService,
maturity: :experimental,
self_managed: false,
internal: false
}
}.freeze
```
## How to migrate an existing action to the AI Gateway
AI actions were initially implemented inside the GitLab monolith. As part of our
[AI Gateway as the Sole Access Point for Monolith to Access Models Epic](https://gitlab.com/groups/gitlab-org/-/epics/13024)
we're migrating prompts, model selection and model parameters into the AI Gateway. This will increase the speed at which
we can deliver improvements to self-managed users, by decoupling prompt and model changes from monolith releases. To
migrate an existing action:
1. Follow steps 1 through 3 on [How to implement a new action](#how-to-implement-a-new-action).
1. Modify the entry for your AI action in the catalogue to list the new completion class as the `aigw_service_class`.
```ruby
class AiFeaturesCatalogue
LIST = {
# ...
generate_description: {
service_class: ::Gitlab::Llm::Anthropic::Completions::GenerateDescription,
aigw_service_class: ::Gitlab::Llm::AiGateway::Completions::GenerateDescription,
prompt_class: ::Gitlab::Llm::Templates::GenerateDescription,
feature_category: :ai_abstraction_layer,
execute_method: ::Llm::GenerateDescriptionService,
maturity: :experimental,
self_managed: false,
internal: false
},
# ...
}.freeze
```
When the feature flag `ai_gateway_agents` is enabled, the `aigw_service_class` will be used to process the AI action.
Once you've validated the correct functioning of your action, you can remove the `aigw_service_class` key and replace
the `service_class` with the new `AiGateway::Completions` class to make it the permanent provider.
For a complete example of the changes needed to migrate an AI action, see the following MRs:
- [Changes to the GitLab Rails monolith](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/152429)
- [Changes to the AI Gateway](https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/-/merge_requests/921)
### Authorization in GitLab-Rails
We recommend to use [policies](../policies.md) to deal with authorization for a feature. Currently we need to make sure to cover the following checks:
@ -536,7 +658,7 @@ We recommend to use [policies](../policies.md) to deal with authorization for a
1. User is a member of the group/project.
1. `experiment_features_enabled` settings are set on the `Namespace`.
For our example, we need to implement the `allowed?(:amazing_new_ai_feature)` call. As an example, you can look at the [Issue Policy for the summarize comments feature](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/app/policies/ee/issue_policy.rb). In our example case, we want to implement the feature for Issues as well:
For our example, we need to implement the `allowed?(:rewrite_description)` call. As an example, you can look at the [Issue Policy for the summarize comments feature](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/app/policies/ee/issue_policy.rb). In our example case, we want to implement the feature for Issues as well:
```ruby
# ee/app/policies/ee/issue_policy.rb
@ -551,14 +673,14 @@ module EE
end
with_scope :subject
condition(:amazing_new_ai_feature_enabled) do
::Feature.enabled?(:amazing_new_ai_feature, subject_container) &&
subject_container.licensed_feature_available?(:amazing_new_ai_feature)
condition(:rewrite_description_enabled) do
::Feature.enabled?(:rewrite_description, subject_container) &&
subject_container.licensed_feature_available?(:rewrite_description)
end
rule do
ai_available & amazing_new_ai_feature_enabled & is_project_member
end.enable :amazing_new_ai_feature
ai_available & rewrite_description_enabled & is_project_member
end.enable :rewrite_description
end
end
end
@ -640,7 +762,7 @@ The `CompletionWorker` will call the `Completions::Factory` which will initializ
In our example, we will use VertexAI and implement two new classes:
```ruby
# /ee/lib/gitlab/llm/vertex_ai/completions/amazing_new_ai_feature.rb
# /ee/lib/gitlab/llm/vertex_ai/completions/rewrite_description.rb
module Gitlab
module Llm
@ -666,7 +788,7 @@ end
```
```ruby
# /ee/lib/gitlab/llm/vertex_ai/templates/amazing_new_ai_feature.rb
# /ee/lib/gitlab/llm/vertex_ai/templates/rewrite_description.rb
module Gitlab
module Llm

View File

@ -2234,6 +2234,15 @@ Do not use **whilst**. Use [while](#while) instead. **While** is more succinct a
Do not use **whitelist**. Another option is **allowlist**. ([Vale](../testing/vale.md) rule: [`InclusionCultural.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/doc/.vale/gitlab/InclusionCultural.yml))
## within
When possible, do not use **within**. Use **in** instead, unless you are referring to a time frame, limit, or boundary. For example:
- The upgrade occurs within the four-hour maintenance window.
- The Wi-Fi signal is accessible within a 30-foot radius.
([Vale](../testing/vale.md) rule: [`SubstitutionWarning.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/doc/.vale/gitlab/SubstitutionWarning.yml))
## yet
Do not use **yet** when talking about the product or its features. The documentation describes the product as it is today.

View File

@ -14,24 +14,57 @@ These insights appear on the left sidebar, under [**Analyze**](../project/settin
## Analytics features
| Feature | Description | Category | Project-level | Group-level | Instance-level |
| ------- | ----------- | -------- | ------------- | ----------- | -------------- |
| [Analytics dashboards](analytics_dashboards.md) | Built-in and customizable dashboards to visualize collected data. | End-to-end insight & visibility | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [CI/CD analytics and DORA metrics](ci_cd_analytics.md) | Pipeline duration and successes/failures, and DORA metrics over time. | CI/CD | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Code review analytics](code_review_analytics.md) | Open merge requests with information about merge request activity. | Productivity | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Contribution analytics](../group/contribution_analytics/index.md) | Overview of [contribution events](../../user/profile/contributions_calendar.md) made by group members, with bar chart of push events, merge requests, and issues. | Developer | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Contributor analytics](../../user/analytics/contributor_analytics.md) | Overview of commits made by project members, with line chart of number of commits. | Developer | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [DevOps adoption](../group/devops_adoption/index.md) | Organization's maturity in DevOps adoption, with group-level feature adoption over time and adoption by subgroup. | End-to-end insight & visibility | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Insights](../project/insights/index.md) | Customizable reports to explore issues, merged merge requests, and triage hygiene. | End-to-end insight & visibility | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Instance-level analytics](../../administration/analytics/index.md) | Aggregated analytics across GitLab about multiple projects and groups in one place. | End-to-end insight & visibility | **{dotted-circle}** No | **{dotted-circle}** No | **{check-circle}** Yes |
| [Issue analytics](../group/issues_analytics/index.md) | Visualization of issues created each month. | Productivity | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Merge request analytics](merge_request_analytics.md) | Overview of merge requests, with mean time to merge, throughput, and activity details. | Productivity | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Product analytics](../product_analytics/index.md) | Understanding how users behave and interact with your product.| Product Analytics | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Productivity analytics](productivity_analytics.md) | Merge request lifecycle, filterable down to author level. | Productivity | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Repository analytics](../group/repositories_analytics/index.md) | Programming languages used in the repository and code coverage statistics. | Developer | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Security Dashboards](../application_security/security_dashboard/index.md) | Collection of metrics, ratings, and charts for vulnerabilities detected by security scanners. | Security | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Value Streams Dashboard](value_streams_dashboard.md) | Insights into DevSecOps trends, patterns, and opportunities for digital transformation improvements. | End-to-end insight & visibility | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Value Stream Management Analytics](../group/value_stream_analytics/index.md) | Insights into time-to-value through customizable stages. | End-to-end insight & visibility | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
### End-to-end insight & visibility analytics
Use these features to gain insights into your overall software development lifecycle.
| Feature | Description | Project-level | Group-level | Instance-level |
| ------- | ----------- | ------------- | ----------- | -------------- |
| [Value Streams Dashboard](value_streams_dashboard.md) | Insights into DevSecOps trends, patterns, and opportunities for digital transformation improvements. | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Value Stream Management Analytics](../group/value_stream_analytics/index.md) | Insights into time-to-value through customizable stages. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [DevOps adoption](../group/devops_adoption/index.md) | Organization's maturity in DevOps adoption, with group-level feature adoption over time and adoption by subgroup. | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Instance-level analytics](../../administration/analytics/index.md) | Aggregated analytics across GitLab about multiple projects and groups in one place. | **{dotted-circle}** No | **{dotted-circle}** No | **{check-circle}** Yes |
| [Insights](../project/insights/index.md) | Customizable reports to explore issues, merged merge requests, and triage hygiene. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Product analytics](../product_analytics/index.md) | Understanding how users behave and interact with your product.| **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Analytics dashboards](analytics_dashboards.md) | Built-in and customizable dashboards to visualize collected data. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
### Productivity analytics
Use these features to gain insights into the productivity of your team on issues and merge requests.
| Feature | Description | Project-level | Group-level | Instance-level |
| ------- | ----------- | ------------- | ----------- | -------------- |
| [Issue analytics](../group/issues_analytics/index.md) | Visualization of issues created each month. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [Merge request analytics](merge_request_analytics.md) | Overview of merge requests, with mean time to merge, throughput, and activity details. | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Productivity analytics](productivity_analytics.md) | Merge request lifecycle, filterable down to author level. | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Code review analytics](code_review_analytics.md) | Open merge requests with information about merge request activity. | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
### Developer analytics
Use these features to gain insights into developer productivity and code coverage.
| Feature | Description | Project-level | Group-level | Instance-level |
| ------- | ----------- | ------------- | ----------- | -------------- |
| [Contribution analytics](../group/contribution_analytics/index.md) | Overview of [contribution events](../../user/profile/contributions_calendar.md) made by group members, with bar chart of push events, merge requests, and issues. | **{dotted-circle}** No | **{check-circle}** Yes | **{dotted-circle}** No |
| [Contributor analytics](../../user/analytics/contributor_analytics.md) | Overview of commits made by project members, with line chart of number of commits. | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No |
| [Repository analytics](../group/repositories_analytics/index.md) | Programming languages used in the repository and code coverage statistics. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
### CI/CD analytics
Use these features to gain insights into CI/CD performance.
| Feature | Description | Project-level | Group-level | Instance-level |
| ------- | ----------- | ------------- | ----------- | -------------- |
| [CI/CD analytics](ci_cd_analytics.md) | Pipeline duration and successes/failures. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
| [DORA metrics](dora_metrics.md) | DORA metrics over time. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
### Security analytics
Use these features to gain insights into security vulnerabilities and metrics.
| Feature | Description | Project-level | Group-level | Instance-level |
| ------- | ----------- | ------------- | ----------- | -------------- |
| [Security Dashboards](../application_security/security_dashboard/index.md) | Collection of metrics, ratings, and charts for vulnerabilities detected by security scanners. | **{check-circle}** Yes | **{check-circle}** Yes | **{dotted-circle}** No |
## Glossary

View File

@ -10,7 +10,6 @@ info: To determine the technical writer assigned to the Stage/Group associated w
DETAILS:
**Tier:** Free, Premium, Ultimate
**Offering:** GitLab.com, Self-managed, GitLab Dedicated
**Status:** GA
Pipeline secret detection scans files after they are committed to a Git repository and pushed to GitLab.

View File

@ -231,7 +231,7 @@ If a section heading cannot be parsed, the section is:
1. Added to the previous section.
1. If no previous section exists, the section is added to the default section.
For example, this file is missing a square closing bracket:
#### After the default section
```plaintext
* @group
@ -247,21 +247,20 @@ GitLab recognizes the heading `[Section name` as an entry. The default section i
- `[Section` owned by `name`
- `docs/` owned by `@docs_group`
This file contains an unescaped space between the words `Section` and `name`.
GitLab recognizes the intended heading as an entry:
#### After a named section
```plaintext
[Docs]
docs/**/* @group
[Section name]{2} @group
[Section name
docs/ @docs_group
```
The `[Docs]` section then includes 3 rules:
GitLab recognizes the heading `[Section name` as an entry. The `[Docs]` section includes 3 rules:
- `docs/**/*` owned by `@group`
- `[Section` owned by `name]{2} @group`
- `[Section` owned by `name`
- `docs/` owned by `@docs_group`
### Malformed owners

View File

@ -50,18 +50,18 @@ Code Suggestions supports these languages:
| HTML | **{check-circle}** Yes | **{dotted-circle}** No | **{dotted-circle}** No | **{dotted-circle}** No | **{dotted-circle}** No |
| Java | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| JavaScript | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Kotlin | **{check-circle}** Yes | **{check-circle}** Yes <br><br>(Requires third-party extension providing Kotlin support) | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Kotlin | **{dotted-circle}** No | **{check-circle}** Yes <br><br>(Requires third-party extension providing Kotlin support) | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Markdown | **{check-circle}** Yes |**{dotted-circle}** No | **{dotted-circle}** No | **{dotted-circle}** No | **{dotted-circle}** No |
| PHP | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Python | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Ruby | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Rust | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Scala | **{check-circle}** Yes | **{check-circle}** Yes <br><br>(Requires third-party extension providing Scala support) | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Scala | **{dotted-circle}** No | **{check-circle}** Yes <br><br>(Requires third-party extension providing Scala support) | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Shell scripts (`bash` only) | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Svelte | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Swift | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| TypeScript | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
| Terraform | **{check-circle}** Yes | **{check-circle}** Yes <br><br>(Requires third-party extension providing Terraform support) | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes <br><br>(Requires third-party extension providing the `terraform` file type) |
| Terraform | **{dotted-circle}** No | **{check-circle}** Yes <br><br>(Requires third-party extension providing Terraform support) | **{check-circle}** Yes | **{dotted-circle}** No | **{check-circle}** Yes <br><br>(Requires third-party extension providing the `terraform` file type) |
| Vue | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes | **{check-circle}** Yes |
NOTE:

View File

@ -164,8 +164,7 @@ Prerequisites:
1. On the left sidebar, select **Search or go to** and find your project or group.
1. Select **Plan > Wiki**.
1. Go to the page you want to delete.
1. Select **Edit**.
1. Select **Delete page**.
1. Select **Wiki actions** (**{ellipsis_v}**), then **Delete page**.
1. Confirm the deletion.
## Move or rename a wiki page

View File

@ -219,6 +219,10 @@ module API
def find_group!(id, organization: nil)
group = find_group(id, organization: organization)
# We need to ensure the namespace is in the context since
# it's possible a method such as bypass_session! might log
# a message before @group is set.
::Gitlab::ApplicationContext.push(namespace: group) if group
check_group_access(group)
end

View File

@ -66,7 +66,7 @@
"@gitlab/favicon-overlay": "2.0.0",
"@gitlab/fonts": "^1.3.0",
"@gitlab/svgs": "3.103.0",
"@gitlab/ui": "85.12.2",
"@gitlab/ui": "^86.0.1",
"@gitlab/web-ide": "^0.0.1-dev-20240613133550",
"@mattiasbuelens/web-streams-adapter": "^0.1.0",
"@rails/actioncable": "7.0.8-4",

View File

@ -24,12 +24,17 @@ module QA
# @return [void]
def export
mapping_files = Dir.glob(mapping_files_glob)
return logger.warn("No files matched pattern") if mapping_files.empty?
return logger.warn("No files matched pattern, skipping coverage mapping upload") if mapping_files.empty?
unless ENV["QA_RUN_TYPE"].present?
return logger.warn("QA_RUN_TYPE variable is not set, skipping coverage mapping upload")
end
logger.info("Number of mapping files found: #{mapping_files.size}")
mapping_data = mapping_files.flat_map { |file| JSON.parse(File.read(file)) }.reduce(:merge!)
file = "test-code-paths-mapping-merged-pipeline-#{ENV['CI_PIPELINE_ID'] || 'local'}.json"
file = "#{ENV['CI_COMMIT_REF_SLUG']}/#{ENV['QA_RUN_TYPE']}/test-
code-paths-mapping-merged-pipeline-#{ENV['CI_PIPELINE_ID'] || 'local'}.json"
File.write(file, mapping_data.to_json) && logger.debug("Saved test code paths mapping to #{file}")
upload_to_gcs(file, mapping_data)
end
@ -42,6 +47,7 @@ module QA
client.put_object(BUCKET, file_name, JSON.pretty_generate(mapping_data))
rescue StandardError => e
logger.error("Failed to upload code paths mapping to GCS. Error: #{e}")
logger.error("Backtrace: #{e.backtrace}")
end
# GCS client

View File

@ -31,6 +31,7 @@ RSpec.describe QA::Tools::Ci::ExportCodePathsMapping do
allow(Dir).to receive(:glob).with(glob) { file_paths }
allow(::File).to receive(:read).with(anything).and_return(code_path_mappings_data.to_json)
stub_env('QA_CODE_PATH_MAPPINGS_GCS_CREDENTIALS', gcs_credentials)
stub_env('QA_RUN_TYPE', 'package-and-test')
end
context "with mapping files present" do

View File

@ -60,6 +60,10 @@ export const mismatchAllowList = [
'.line-clamp-3',
'.outline-none',
'.outline-0',
// Tailwind's `bg-none` util applies `background-image: none` while ours does `background: none`.
// Our recommendation is to use `bg-transparent` instead. Existing usages of `bg-none` have been
// migrated to `bg-transparent` as of this comment.
'.bg-none',
];
export function loadCSSFromFile(filePath) {

View File

@ -5,7 +5,6 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { fileURLToPath } from 'node:url';
import path from 'node:path';
import _ from 'lodash';
import postcss from 'postcss';
import * as prettier from 'prettier';
@ -144,14 +143,17 @@ export async function toMinimalUtilities() {
const { css: tailwindClasses } = await postcss([
tailwindcss({
...tailwindConfig,
// We must ensure the GitLab UI plugin is disabled during this run so that whatever it defines
// is purged out of the CSS-in-Js.
presets: [
{
...tailwindConfig.presets[0],
plugins: [],
},
],
// Disable all core plugins, all we care about are the legacy utils
// that are provided via addUtilities.
corePlugins: [],
// We must ensure the GitLab UI plugin is disabled during this run so that whatever it defines
// is purged out of the CSS-in-Js.
plugins: tailwindConfig.plugins.filter(
(plugin) => plugin.handler.name !== 'gitLabUIUtilities',
),
}),
]).process('@tailwind utilities;', { map: false, from: undefined });
@ -221,12 +223,10 @@ export async function convertUtilsToCSSInJS({ buildOnlyUsed = false } = {}) {
// We only want to generate the utils based on the fresh
// allUtilitiesFile
content: [allUtilitiesFile],
// We are disabling all plugins but the GitLab UI one which contains legitimate utils that
// will need to be backported to GitLab UI.
// This prevents the css-to-js import from causing trouble.
plugins: tailwindConfig.plugins.filter(
(plugin) => plugin.handler.name === 'gitLabUIUtilities',
),
// We are disabling all plugins to prevent the CSS-in-Js import from causing trouble.
// The GitLab UI preset still registers its own plugin, which we need to define legitimate
// custom utils.
plugins: [],
}),
]).process(await readFile(tailwindSource, 'utf-8'), { map: false, from: undefined });

View File

@ -1,7 +1,8 @@
import { GlAlert } from '@gitlab/ui';
import { GlAlert, GlTab } from '@gitlab/ui';
import { shallowMountExtended } from 'helpers/vue_test_utils_helper';
import UsageQuotasApp from '~/usage_quotas/components/usage_quotas_app.vue';
import { defaultProvide } from '../mock_data';
import Tracking from '~/tracking';
import { defaultProvide, provideWithTabs } from '../mock_data';
describe('UsageQuotasApp', () => {
let wrapper;
@ -15,17 +16,47 @@ describe('UsageQuotasApp', () => {
});
};
beforeEach(() => {
createComponent();
});
const findGlAlert = () => wrapper.findComponent(GlAlert);
const findTabs = () => wrapper.findAllComponents(GlTab);
describe('when tabs array is empty', () => {
beforeEach(() => {
createComponent();
});
it('shows error alert', () => {
expect(findGlAlert().text()).toContain(
'Something went wrong while loading Usage Quotas Tabs',
);
});
});
describe('when there are tabs', () => {
beforeEach(() => {
jest.spyOn(Tracking, 'event');
createComponent({
provide: provideWithTabs,
});
});
it('does not show error alert', () => {
expect(findGlAlert().exists()).toBe(false);
});
it('tracks internal events when user clicks on a tab that has tracking data', () => {
findTabs().at(0).vm.$emit('click');
expect(Tracking.event).toHaveBeenCalledWith(
undefined,
provideWithTabs.tabs[0].tracking.action,
expect.any(Object),
);
});
it('does not track any event when user clicks on a tab that does not have tracking data', () => {
findTabs().at(1).vm.$emit('click');
expect(Tracking.event).not.toHaveBeenCalledWith();
});
});
});

View File

@ -1,3 +1,29 @@
export const defaultProvide = {
tabs: [],
};
export const provideWithTabs = {
tabs: [
{
title: 'Tab 1 title',
hash: '#tab-1-hash',
testid: 'tab-1-testid',
component: {
name: 'Tab1Component',
render: () => {},
},
tracking: {
action: 'click_on_tab_on_usage_quotas',
},
},
{
title: 'Tab 2 title',
hash: '#tab-2-hash',
testid: 'tab-2-testid',
component: {
name: 'Tab2Component',
render: () => {},
},
},
],
};

View File

@ -5,7 +5,8 @@ require 'spec_helper'
RSpec.describe GitlabSchema.types['ContainerRepositoryDetails'], feature_category: :container_registry do
fields = %i[id name path location created_at updated_at expiration_policy_started_at
status tags_count expiration_policy_cleanup_status tags size manifest
project migration_state last_cleanup_deleted_tags_count user_permissions last_published_at]
project migration_state last_cleanup_deleted_tags_count user_permissions last_published_at
protection_rule_exists]
it { expect(described_class.graphql_name).to eq('ContainerRepositoryDetails') }

View File

@ -7,7 +7,7 @@ RSpec.describe GitlabSchema.types['ContainerRepository'], feature_category: :con
fields = %i[id name path location created_at updated_at expiration_policy_started_at
status tags_count expiration_policy_cleanup_status project
migration_state last_cleanup_deleted_tags_count user_permissions]
migration_state last_cleanup_deleted_tags_count user_permissions protection_rule_exists]
it { expect(described_class.graphql_name).to eq('ContainerRepository') }

View File

@ -333,4 +333,46 @@ RSpec.describe ContainerRegistry::Protection::Rule, type: :model, feature_catego
end
end
end
describe '.for_push_exists_for_multiple_containers' do
let_it_be(:project) { create(:project) }
let_it_be(:ppr_for_maintainer) do
create(:container_registry_protection_rule,
repository_path_pattern: "#{project.full_path}/my-container-prod*",
project: project
)
end
let(:repository_paths) {
[
"#{project.full_path}/my-container-prod-1",
"#{project.full_path}/unmatched-container-name"
]
}
subject do
described_class
.for_push_exists_for_multiple_containers(project_id: project.id, repository_paths: repository_paths)
.to_a
end
it do
is_expected.to eq([
{ "repository_path" => repository_paths.first, "protected" => true },
{ "repository_path" => repository_paths.second, "protected" => false }
])
end
context 'when edge cases' do
where(:repository_paths, :expected_result) do
nil | []
[] | []
end
with_them do
it { is_expected.to eq(expected_result) }
end
end
end
end

View File

@ -210,4 +210,89 @@ RSpec.describe 'getting container repositories in a project', feature_category:
end
end
end
describe 'protectionRuleExists' do
let_it_be(:container_registry_protection_rule) do
create(:container_registry_protection_rule, project: project, repository_path_pattern: container_repository.path)
end
it 'returns true for the field "protectionRuleExists" for the protected container respository' do
subject
expect(container_repositories_response).to include 'node' => hash_including('path' => container_repository.path, 'protectionRuleExists' => true)
container_repositories_response
.reject { |cr| cr.dig('node', 'path') == container_repository.path }
.each do |repository_response|
expect(repository_response.dig('node', 'protectionRuleExists')).to eq false
end
end
context "when feature flag ':container_registry_protected_containers' disabled" do
before do
stub_feature_flags(container_registry_protected_containers: false)
end
it 'returns false for the field "protectionRuleExists" for each container repository' do
subject
expect(container_repositories_response).to all include 'node' => include('protectionRuleExists' => false)
end
end
# In order to trigger the N+1 query, we need to create project with different container repository counts.
# In this case, project1 has 4 container repositories and project2 has 10 container repositories.
describe "efficient database queries" do
let_it_be(:project1) { create(:project, :private) }
let_it_be(:user1) { create(:user, developer_of: project1) }
let_it_be(:project1_container_repositories) { create_list(:container_repository, 4, project: project1) }
let_it_be(:project1_container_repository_protected) { project1_container_repositories.first }
let_it_be(:project1_npm_container_protection_rule) do
create(:container_registry_protection_rule,
project: project1,
repository_path_pattern: project1_container_repository_protected.path
)
end
let_it_be(:project2) { create(:project, :private) }
let_it_be(:user2) { create(:user, developer_of: project2) }
let_it_be(:project2_container_repositories) { create_list(:container_repository, 8, project: project2) }
let_it_be(:project2_container_repository_protected) { project2_container_repositories.first }
let_it_be(:project2_npm_container_protection_rule) do
create(:container_registry_protection_rule,
project: project2,
repository_path_pattern: project2_container_repository_protected.path
)
end
let(:fields) do
<<~GQL
containerRepositories {
nodes {
path
protectionRuleExists
}
}
GQL
end
before do
project1_container_repositories.each do |repository|
stub_container_registry_tags(repository: repository.path, tags: %w[tag1 tag2 tag3], with_manifest: false)
end
project2_container_repositories.each do |repository|
stub_container_registry_tags(repository: repository.path, tags: %w[tag1 tag2 tag3], with_manifest: false)
end
end
it 'avoids N+1 database queries' do
query1 = graphql_query_for('project', { 'fullPath' => project1.full_path }, fields)
control_count1 = ActiveRecord::QueryRecorder.new { post_graphql(query1, current_user: user1) }
query2 = graphql_query_for('project', { 'fullPath' => project2.full_path }, fields)
expect { post_graphql(query2, current_user: user2) }.not_to exceed_query_limit(control_count1)
end
end
end
end

View File

@ -18,6 +18,17 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
let_it_be(:project3) { create(:project, namespace: group1, path: 'test', visibility_level: Gitlab::VisibilityLevel::PRIVATE) }
let_it_be(:archived_project) { create(:project, namespace: group1, archived: true) }
def expect_log_keys(caller_id:, route:, root_namespace:)
expect(API::API::LOG_FORMATTER).to receive(:call) do |_severity, _datetime, _, data|
expect(data.stringify_keys).to include(
'correlation_id' => an_instance_of(String),
'meta.caller_id' => caller_id,
'route' => route,
'meta.root_namespace' => root_namespace
)
end
end
shared_examples 'group avatar upload' do
context 'when valid' do
let(:file_path) { 'spec/fixtures/banana_sample.gif' }
@ -627,6 +638,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
end
it 'returns 200 for a public group', :aggregate_failures do
expect_log_keys(caller_id: "GET /api/:version/groups/:id",
route: "/api/:version/groups/:id",
root_namespace: group1.path)
get api("/groups/#{group1.id}")
expect(response).to have_gitlab_http_status(:ok)
@ -1305,6 +1320,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
context "when authenticated as user" do
context 'with min access level' do
it 'returns projects with min access level or higher' do
expect_log_keys(caller_id: "GET /api/:version/groups/:id/projects",
route: "/api/:version/groups/:id/projects",
root_namespace: group1.path)
group_guest = create(:user)
group1.add_guest(group_guest)
project4 = create(:project, group: group1)
@ -1927,6 +1946,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
context 'when authenticated as user' do
it 'returns the shared groups in the group', :aggregate_failures do
expect_log_keys(caller_id: "GET /api/:version/groups/:id/groups/shared",
route: "/api/:version/groups/:id/groups/shared",
root_namespace: main_group.path)
get api(path, user1)
expect(response).to have_gitlab_http_status(:ok)
@ -2155,6 +2178,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
context 'when authenticated as user' do
context 'when user is not member of a public group' do
it 'returns no subgroups for the public group', :aggregate_failures do
expect_log_keys(caller_id: "GET /api/:version/groups/:id/subgroups",
route: "/api/:version/groups/:id/subgroups",
root_namespace: group1.path)
get api("/groups/#{group1.id}/subgroups", user2)
expect(response).to have_gitlab_http_status(:ok)
@ -2694,6 +2721,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
context "when authenticated as admin" do
it "removes any existing group" do
expect_log_keys(caller_id: "DELETE /api/:version/groups/:id",
route: "/api/:version/groups/:id",
root_namespace: group2.path)
delete api("/groups/#{group2.id}", admin, admin_mode: true)
expect(response).to have_gitlab_http_status(:accepted)
@ -2809,6 +2840,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
end
it 'only includes groups where the user has permissions to transfer a group to' do
expect_log_keys(caller_id: "GET /api/:version/groups/:id/transfer_locations",
route: "/api/:version/groups/:id/transfer_locations",
root_namespace: source_group.path)
request
expect(group_ids_from_response).to contain_exactly(
@ -2873,6 +2908,10 @@ RSpec.describe API::Groups, feature_category: :groups_and_projects do
context 'when promoting a subgroup to a root group' do
shared_examples_for 'promotes the subgroup to a root group' do
it 'returns success', :aggregate_failures do
expect_log_keys(caller_id: "POST /api/:version/groups/:id/transfer",
route: "/api/:version/groups/:id/transfer",
root_namespace: group.path)
make_request(user)
expect(response).to have_gitlab_http_status(:created)

View File

@ -75,22 +75,12 @@ RSpec.describe ::Packages::Npm::ProcessPackageFileService, feature_category: :pa
it_behaves_like 'raising an error', 'package.json file too large'
end
context 'with custom root folder name' do
before do
allow_next_instance_of(Gem::Package::TarReader::Entry) do |instance|
allow(instance).to receive(:full_name).and_return('custom/package.json')
end
end
it_behaves_like 'processing the package file'
end
context 'with multiple package.json entries' do
before do
allow(Gem::Package::TarReader).to receive(:new).and_return([
instance_double(Gem::Package::TarReader::Entry, full_name: 'pkg1/package.json'),
instance_double(Gem::Package::TarReader::Entry, full_name: 'pkg2/package.json'),
instance_double(Gem::Package::TarReader::Entry, full_name: 'pkg3/package.json')
instance_double(Gem::Package::TarReader::Entry, full_name: 'package/package.json'),
instance_double(Gem::Package::TarReader::Entry, full_name: 'package2/package.json'),
instance_double(Gem::Package::TarReader::Entry, full_name: 'package3/package.json')
])
end

View File

@ -1331,10 +1331,10 @@
resolved "https://registry.yarnpkg.com/@gitlab/svgs/-/svgs-3.103.0.tgz#af61387481100eadef2bea8fe8605250311ac582"
integrity sha512-jVWCrRVRF6nw2A+Aowc0quXV2bdRPl2v08ElCPSestfdKjQ92tSlCrIsLB8GvdW5aI0eFsD1vJ1w2qkzZdpA4A==
"@gitlab/ui@85.12.2":
version "85.12.2"
resolved "https://registry.yarnpkg.com/@gitlab/ui/-/ui-85.12.2.tgz#0842e7723a57b1a23d802d2e0a58d46c47a22dee"
integrity sha512-rz3Cp4ydyjcBccET0KIuwwLpkEkpg2McWvpYm5zJrXBtc70YZt3EUsX8dIt+nOrcbEjzGu05em6U1poimkQC4g==
"@gitlab/ui@^86.0.1":
version "86.0.1"
resolved "https://registry.yarnpkg.com/@gitlab/ui/-/ui-86.0.1.tgz#dc051257105a100edb7be3a81c9613e717ef2f79"
integrity sha512-Ye239tgMWKJtROdPCj8PCoPjlquelF0RePVGJVRRJy9eDHyRAXTeOWsWhSYmX2Po66dpbW7LaOpG/+hHtr3j3g==
dependencies:
"@floating-ui/dom" "1.4.3"
echarts "^5.3.2"