Add latest changes from gitlab-org/gitlab@master

This commit is contained in:
GitLab Bot 2024-05-15 18:13:03 +00:00
parent 3c7efb067c
commit f22e8932e2
31 changed files with 1188 additions and 365 deletions

View File

@ -927,9 +927,12 @@ lib/gitlab/checks/**
/doc/tutorials/scan_execution_policy/ @rdickenson
/doc/tutorials/scan_result_policy/ @rdickenson
/doc/tutorials/scrum_events/ @msedlakjakubowski
/doc/tutorials/setup_steps/ @fneill
/doc/tutorials/update_commit_messages/ @msedlakjakubowski
/doc/tutorials/website_project_with_analytics/ @lciutacu
/doc/update/ @axil
/doc/user/ai_data_usage.md @sselhorn
/doc/user/ai_experiments.md @sselhorn
/doc/user/ai_features.md @sselhorn
/doc/user/ai_features_enable.md @sselhorn
/doc/user/analytics/ @lciutacu
@ -950,6 +953,9 @@ lib/gitlab/checks/**
/doc/user/get_started/get_started_planning_work.md @msedlakjakubowski
/doc/user/get_started/get_started_projects.md @lciutacu
/doc/user/gitlab_duo_chat.md @sselhorn
/doc/user/gitlab_duo_chat_enable.md @sselhorn
/doc/user/gitlab_duo_chat_examples.md @sselhorn
/doc/user/gitlab_duo_chat_troubleshooting.md @sselhorn
/doc/user/gitlab_duo_examples.md @sselhorn
/doc/user/group/access_and_permissions.md @lciutacu
/doc/user/group/clusters/ @phillipwells

View File

@ -941,10 +941,9 @@
# When new minor release tag is pushed, re-tag gdk image with pattern ignored by registry cleanup
.build-images:rules:retag-gdk-image:
rules:
- !reference [".qa:rules:package-and-test-never-run", rules]
- if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.0-ee$/ && $CI_PIPELINE_SOURCE == "push"'
- if: '$CI_SERVER_HOST == "gitlab.com" && $CI_PROJECT_PATH == "gitlab-org/gitlab" && $CI_COMMIT_TAG =~ /^v\d+\.\d+\.0-ee$/ && $CI_PIPELINE_SOURCE == "push"'
# In case gdk base tag is updated via backport mr, make sure we retag it with stable prefix as well
- if: '$CI_MERGE_REQUEST_IID && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^[\d-]+-stable-ee$/'
- if: '$CI_SERVER_HOST == "gitlab.com" && $CI_PROJECT_PATH == "gitlab-org/gitlab" && $CI_MERGE_REQUEST_IID && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /^[\d-]+-stable-ee$/'
changes:
- qa/gdk/Dockerfile.gdk

View File

@ -2,17 +2,6 @@
# Cop supports --autocorrect.
Layout/MultilineOperationIndentation:
Exclude:
- 'app/components/pajamas/concerns/checkbox_radio_label_with_help_text.rb'
- 'app/controllers/projects/application_controller.rb'
- 'app/controllers/repositories/git_http_client_controller.rb'
- 'app/controllers/sent_notifications_controller.rb'
- 'app/graphql/types/ci/stage_type.rb'
- 'app/helpers/auth_helper.rb'
- 'app/helpers/issuables_helper.rb'
- 'app/helpers/mirror_helper.rb'
- 'app/helpers/packages_helper.rb'
- 'app/helpers/projects_helper.rb'
- 'app/helpers/visibility_level_helper.rb'
- 'app/policies/project_policy.rb'
- 'app/serializers/deploy_keys/deploy_key_entity.rb'
- 'app/services/ci/create_downstream_pipeline_service.rb'

View File

@ -16,7 +16,7 @@ module Pajamas
def label_entry
if help_text_content
content_tag(:span, label_content) +
content_tag(:p, help_text_content, class: 'help-text', data: { testid: 'pajamas-component-help-text' })
content_tag(:p, help_text_content, class: 'help-text', data: { testid: 'pajamas-component-help-text' })
else
content_tag(:span, label_content)
end

View File

@ -57,7 +57,7 @@ class Projects::ApplicationController < ApplicationController
def check_issuables_available!
render_404 unless project.feature_available?(:issues, current_user) ||
project.feature_available?(:merge_requests, current_user)
project.feature_available?(:merge_requests, current_user)
end
def method_missing(method_sym, *arguments, &block)

View File

@ -140,9 +140,9 @@ module Repositories
def http_download_allowed?
Gitlab::ProtocolAccess.allowed?('http') &&
download_request? &&
container &&
::Users::Anonymous.can?(repo_type.guest_read_ability, container)
download_request? &&
container &&
::Users::Anonymous.can?(repo_type.guest_read_ability, container)
end
def bypass_admin_mode!(&block)

View File

@ -33,7 +33,7 @@ module Types
by_pipeline = keys.group_by(&:pipeline)
include_needs = keys.any? do |k|
k.requires?(%i[nodes jobs nodes needs]) ||
k.requires?(%i[nodes jobs nodes previousStageJobsOrNeeds])
k.requires?(%i[nodes jobs nodes previousStageJobsOrNeeds])
end
by_pipeline.each do |pl, key_group|

View File

@ -8,7 +8,7 @@ module IssuablesHelper
def sidebar_gutter_toggle_icon
content_tag(:span, class: 'js-sidebar-toggle-container gl-button-text', data: { is_expanded: !sidebar_gutter_collapsed? }) do
sprite_icon('chevron-double-lg-left', css_class: "js-sidebar-expand #{'hidden' unless sidebar_gutter_collapsed?}") +
sprite_icon('chevron-double-lg-right', css_class: "js-sidebar-collapse #{'hidden' if sidebar_gutter_collapsed?}")
sprite_icon('chevron-double-lg-right', css_class: "js-sidebar-collapse #{'hidden' if sidebar_gutter_collapsed?}")
end
end

View File

@ -49,35 +49,35 @@ module PackagesHelper
def show_cleanup_policy_link(project)
Gitlab.com? &&
Gitlab.config.registry.enabled &&
project.feature_available?(:container_registry, current_user) &&
project.container_expiration_policy.nil? &&
project.container_repositories.exists?
Gitlab.config.registry.enabled &&
project.feature_available?(:container_registry, current_user) &&
project.container_expiration_policy.nil? &&
project.container_repositories.exists?
end
def show_container_registry_settings(project)
Gitlab.config.registry.enabled &&
Ability.allowed?(current_user, :admin_container_image, project)
Ability.allowed?(current_user, :admin_container_image, project)
end
def show_package_registry_settings(project)
Gitlab.config.packages.enabled &&
Ability.allowed?(current_user, :admin_package, project)
Ability.allowed?(current_user, :admin_package, project)
end
def show_group_package_registry_settings(group)
group.packages_feature_enabled? &&
Ability.allowed?(current_user, :admin_group, group)
Ability.allowed?(current_user, :admin_group, group)
end
def can_delete_packages?(project)
Gitlab.config.packages.enabled &&
Ability.allowed?(current_user, :destroy_package, project)
Ability.allowed?(current_user, :destroy_package, project)
end
def can_delete_group_packages?(group)
group.packages_feature_enabled? &&
Ability.allowed?(current_user, :destroy_package, group)
Ability.allowed?(current_user, :destroy_package, group)
end
def cleanup_settings_data(project)

View File

@ -342,7 +342,7 @@ module ProjectsHelper
setting = @project.error_tracking_setting
return if setting.blank? || setting.project_slug.blank? ||
setting.organization_slug.blank?
setting.organization_slug.blank?
{
sentry_project_id: setting.sentry_project_id,

View File

@ -72,7 +72,7 @@ module VisibilityLevelHelper
def available_visibility_levels(form_model)
Gitlab::VisibilityLevel.values.reject do |level|
disallowed_visibility_level?(form_model, level) ||
restricted_visibility_levels.include?(level)
restricted_visibility_levels.include?(level)
end
end

View File

@ -11,6 +11,7 @@ module Ci
include PgFullTextSearchable
include Gitlab::VisibilityLevel
include Sortable
include EachBatch
self.table_name = 'catalog_resources'

View File

@ -11,6 +11,7 @@ module Ci
# we want to preserve historical usage data.
class Usage < ::ApplicationRecord
include PartitionedTable
include EachBatch
self.table_name = 'p_catalog_resource_component_usages'
self.primary_key = :id

View File

@ -49,7 +49,7 @@ The API allows you to create Error Tracking settings for a project. Only for use
the project.
NOTE:
This API is only available when used with [integrated error tracking](../operations/error_tracking.md#integrated-error-tracking).
This API is only available when used with [integrated error tracking](../operations/integrated_error_tracking.md).
```plaintext
PUT /projects/:id/error_tracking/settings

View File

@ -0,0 +1,44 @@
---
stage: core platform
group: Database
description: 'Cells ADR 008: Cluster wide unique database sequences'
---
# Cells ADR 008: Cluster wide unique database sequences
## Context
Having non-overlapping unique sequences across the cluster is necessary for moving organizations between cells,
this was highlighted in [core-platform-section/-/epics/3](https://gitlab.com/groups/gitlab-org/core-platform-section/-/epics/3)
and different solutions were discussed in <https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102>.
## Decision
All secondary cells will have bigint IDs on creation. While provisioning, each of them will get a
large range of sequences to use from the [Topology Service](../topology_service.md).
On decommissioning the cell, these ranges will be
returned back to the topology service. If the returned range is large enough for another cell, it could be handed out to
them so that the short-lived cells won't exhaust large parts of the key range.
We will update the primary cell's sequence to have a `maxval`, it will be a minimum possible range to make sure it
won't collide with any secondary cells.
## Consequences
The above decision will support till [Cells 1.5](../iterations/cells-1.5.md) but not [Cells 2.0](../iterations/cells-2.0.md).
To support Cells 2.0 (i.e: allow moving organizations from
secondary cells to the primary), we need all integer IDs in the primary to be converted to `bigint`. Which is an
ongoing effort as part of [core-platform-section/data-stores/-/issues/111](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/111)
and it is estimated to take around 12 months.
## Alternatives
In addition to the [earliest proposal](../rejected/impacted_features/database_sequences.md), we evaluated
below solutions before making the final decision.
- [Solution 1: Global Service to claim sequences](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102#note_1853252715)
- [Solution 2: Converting all int IDs to bigint to generate uniq IDs](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102#note_1853260434)
- [Solution 3: Using composite primary key [(existing PKs), original cell ID]](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102#note_1853265147)
- [Solution 4: Use bigint IDs only for Secondary cell](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102#note_1853328985)
- [Solution 5: Using Logical replication](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/102#note_1857486154)

View File

@ -1,74 +1,11 @@
---
stage: enablement
group: Tenant Scale
description: 'Cells: Database Sequences'
redirect_to: '../rejected/impacted_features/database_sequences.md'
remove_date: '2024-08-15'
---
<!-- vale gitlab.FutureTense = NO -->
This document was moved to [another location](../rejected/impacted_features/database_sequences.md).
This document is a work-in-progress and represents a very early state of the
Cells design. Significant aspects are not documented, though we expect to add
them in the future. This is one possible architecture for Cells, and we intend to
contrast this with alternatives before deciding which approach to implement.
This documentation will be kept even if we decide not to implement this so that
we can document the reasons for not choosing this approach.
# Cells: Database Sequences
GitLab today ensures that every database row create has a unique ID, allowing to access a merge request, CI Job or Project by a known global ID.
Cells will use many distinct and not connected databases, each of them having a separate ID for most entities.
At a minimum, any ID referenced between a Cell and the shared schema will need to be unique across the cluster to avoid ambiguous references.
Further to required global IDs, it might also be desirable to retain globally unique IDs for all database rows to allow migrating resources between Cells in the future.
## 1. Definition
## 2. Data flow
## 3. Proposal
These are some preliminary ideas how we can retain unique IDs across the system.
### 3.1. UUID
Instead of using incremental sequences, use UUID (128 bit) that is stored in the database.
- This might break existing IDs and requires adding a UUID column for all existing tables.
- This makes all indexes larger as it requires storing 128 bit instead of 32/64 bit in index.
### 3.2. Use Cell index encoded in ID
Because a significant number of tables already use 64 bit ID numbers we could use MSB to encode the Cell ID:
- This might limit the amount of Cells that can be enabled in a system, as we might decide to only allocate 1024 possible Cell numbers.
- This would make it possible to migrate IDs between Cells, because even if an entity from Cell 1 is migrated to Cell 100 this ID would still be unique.
- If resources are migrated the ID itself will not be enough to decode the Cell number and we would need a lookup table.
- This requires updating all IDs to 32 bits.
### 3.3. Allocate sequence ranges from central place
Each Cell might receive its own range of sequences as they are consumed from a centrally managed place.
Once a Cell consumes all IDs assigned for a given table it would be replenished and a next range would be allocated.
Ranges would be tracked to provide a faster lookup table if a random access pattern is required.
- This might make IDs migratable between Cells, because even if an entity from Cell 1 is migrated to Cell 100 this ID would still be unique.
- If resources are migrated the ID itself will not be enough to decode the Cell number and we would need a much more robust lookup table as we could be breaking previously assigned sequence ranges.
- This does not require updating all IDs to 64 bits.
- This adds some performance penalty to all `INSERT` statements in Postgres or at least from Rails as we need to check for the sequence number and potentially wait for our range to be refreshed from the ID server.
- The available range will need to be stored and incremented in a centralized place so that concurrent transactions cannot possibly get the same value.
### 3.4. Define only some tables to require unique IDs
Maybe it is acceptable only for some tables to have a globally unique IDs. It could be Projects, Groups and other top-level entities.
All other tables like `merge_requests` would only offer a Cell-local ID, but when referenced outside it would rather use an IID (an ID that is monotonic in context of a given resource, like a Project).
- This makes the ID 10000 for `merge_requests` be present on all Cells, which might be sometimes confusing regarding the uniqueness of the resource.
- This might make random access by ID (if ever needed) impossible without using a composite key, like: `project_id+merge_request_id`.
- This would require us to implement a transformation/generation of new ID if we need to migrate records to another Cell. This can lead to very difficult migration processes when these IDs are also used as foreign keys for other records being migrated.
- If IDs need to change when moving between Cells this means that any links to records by ID would no longer work even if those links included the `project_id`.
- If we plan to allow these IDs to not be unique and change the unique constraint to be based on a composite key then we'd need to update all foreign key references to be based on the composite key.
## 4. Evaluation
## 4.1. Pros
## 4.2. Cons
<!-- This redirect file can be deleted after <2024-08-15>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -260,6 +260,7 @@ This section links all different technical proposals that are being evaluated.
- [Topology Service](topology_service.md)
- Planned: Indexing Service
- [Feature Flags](feature_flags.md)
- [Cluster wide unique sequences](unique_sequences.md)
## Impacted features
@ -272,7 +273,6 @@ Below is a list of known affected features with preliminary proposed solutions.
- [Cells: CI Runners](impacted_features/ci-runners.md)
- [Cells: Container Registry](impacted_features/container-registry.md)
- [Cells: Contributions: Forks](impacted_features/contributions-forks.md)
- [Cells: Database Sequences](impacted_features/database-sequences.md)
- [Cells: Data Migration](impacted_features/data-migration.md)
- [Cells: Explore](impacted_features/explore.md)
- [Cells: Git Access](impacted_features/git-access.md)
@ -390,6 +390,7 @@ The Tenant Scale team sees an opportunity to use GitLab Dedicated as a base for
- [ADR-005: Cells use Flexible Reference Architectures](decisions/005_flexible_reference_architectures.md)
- [ADR-006: Use Geo for Disaster Recovery](decisions/006_disaster_recovery_geo.md)
- [ADR-007: Cells 1.0 for internal customers only](decisions/007_internal_customers.md)
- [ADR-008: Cluster wide unique database sequences](decisions/008_database_sequences.md)
## Links

View File

@ -0,0 +1,79 @@
---
stage: enablement
group: Tenant Scale
description: 'Cells: Database Sequences'
status: rejected
---
_This was surpassed by the [Cells: Unique sequences](../../unique_sequences.md) blueprint._
<!-- vale gitlab.FutureTense = NO -->
# Overview
This document is a work-in-progress and represents a very early state of the
Cells design. Significant aspects are not documented, though we expect to add
them in the future. This is one possible architecture for Cells, and we intend to
contrast this with alternatives before deciding which approach to implement.
This documentation will be kept even if we decide not to implement this so that
we can document the reasons for not choosing this approach.
# Cells: Database Sequences
GitLab today ensures that every database row create has a unique ID, allowing to access a merge request, CI Job or Project by a known global ID.
Cells will use many distinct and not connected databases, each of them having a separate ID for most entities.
At a minimum, any ID referenced between a Cell and the shared schema will need to be unique across the cluster to avoid ambiguous references.
Further to required global IDs, it might also be desirable to retain globally unique IDs for all database rows to allow migrating resources between Cells in the future.
## 1. Definition
## 2. Data flow
## 3. Proposal
These are some preliminary ideas how we can retain unique IDs across the system.
### 3.1. UUID
Instead of using incremental sequences, use UUID (128 bit) that is stored in the database.
- This might break existing IDs and requires adding a UUID column for all existing tables.
- This makes all indexes larger as it requires storing 128 bit instead of 32/64 bit in index.
### 3.2. Use Cell index encoded in ID
Because a significant number of tables already use 64 bit ID numbers we could use MSB to encode the Cell ID:
- This might limit the amount of Cells that can be enabled in a system, as we might decide to only allocate 1024 possible Cell numbers.
- This would make it possible to migrate IDs between Cells, because even if an entity from Cell 1 is migrated to Cell 100 this ID would still be unique.
- If resources are migrated the ID itself will not be enough to decode the Cell number and we would need a lookup table.
- This requires updating all IDs to 32 bits.
### 3.3. Allocate sequence ranges from central place
Each Cell might receive its own range of sequences as they are consumed from a centrally managed place.
Once a Cell consumes all IDs assigned for a given table it would be replenished and a next range would be allocated.
Ranges would be tracked to provide a faster lookup table if a random access pattern is required.
- This might make IDs migratable between Cells, because even if an entity from Cell 1 is migrated to Cell 100 this ID would still be unique.
- If resources are migrated the ID itself will not be enough to decode the Cell number and we would need a much more robust lookup table as we could be breaking previously assigned sequence ranges.
- This does not require updating all IDs to 64 bits.
- This adds some performance penalty to all `INSERT` statements in Postgres or at least from Rails as we need to check for the sequence number and potentially wait for our range to be refreshed from the ID server.
- The available range will need to be stored and incremented in a centralized place so that concurrent transactions cannot possibly get the same value.
### 3.4. Define only some tables to require unique IDs
Maybe it is acceptable only for some tables to have a globally unique IDs. It could be Projects, Groups and other top-level entities.
All other tables like `merge_requests` would only offer a Cell-local ID, but when referenced outside it would rather use an IID (an ID that is monotonic in context of a given resource, like a Project).
- This makes the ID 10000 for `merge_requests` be present on all Cells, which might be sometimes confusing regarding the uniqueness of the resource.
- This might make random access by ID (if ever needed) impossible without using a composite key, like: `project_id+merge_request_id`.
- This would require us to implement a transformation/generation of new ID if we need to migrate records to another Cell. This can lead to very difficult migration processes when these IDs are also used as foreign keys for other records being migrated.
- If IDs need to change when moving between Cells this means that any links to records by ID would no longer work even if those links included the `project_id`.
- If we plan to allow these IDs to not be unique and change the unique constraint to be based on a composite key then we'd need to update all foreign key references to be based on the composite key.
## 4. Evaluation
## 4.1. Pros
## 4.2. Cons

View File

@ -132,7 +132,7 @@ service SequenceService {
}
```
The purpose of this service is to global allocator of the [Database Sequences](impacted_features/database-sequences.md).
The purpose of this service is to be the global allocator of [Database Sequences](decisions/008_database_sequences.md).
#### Sequence Allocation workflow
@ -269,7 +269,7 @@ sequenceDiagram
Note over Cell 1: User not found
Cell 1->>+TS / Classify Service: Classify(Login) "john"
TS / Classify Service-->>- Cell 1: "john": Cell 2
Cell 1 ->>- HTTP Router: "Cell 2". <br /> 307 Temporary Redirect
Cell 1 ->>- HTTP Router: "Cell 2". <br /> 307 Temporary Redirect
HTTP Router ->> User: Set Header Cell "Cell 2". <br /> 307 Temporary Redirect
User->>HTTP Router: Headers: Cell: Cell 2 <br /> Sign in with Username: john, password: test123.
HTTP Router->>+Cell 2: Sign in with Username: john, password: test123.

View File

@ -0,0 +1,32 @@
---
stage: core platform
group: database
description: 'Cells: Unique sequences'
status: ongoing
---
<!-- vale gitlab.FutureTense = NO -->
# Cells: Unique Sequences
GitLab today ensures that every database row create has a unique ID, allowing to access a merge request, CI Job or Project by a known global ID.
Cells will use many distinct and not connected databases, each of them having a separate ID for most entities.
At a minimum, any ID referenced between a Cell and the shared schema will need to be unique across the cluster to avoid ambiguous references.
Further to required global IDs, it might also be desirable to retain globally unique IDs for all database rows to allow moving organizations between Cells.
## 1. Decision
Secondary cells will have bigint IDs while provisioning and the primary cell's sequences will be altered to make sure it
doesn't overlap with the other cell's sequences.
More details on the decision taken and other solutions evaluated can be found [here](decisions/008_database_sequences.md).
## 1. Goal
Each cell will use Topology service's [Sequence Service](topology_service.md#sequence-service) to get the range of
sequences to use. Topology service will make sure the given sequence range is unique across the cluster.
## 3. Workflow
This section will get updated with the functional diagrams on the completion of [core-platform-section/data-stores/-/issues/106](https://gitlab.com/gitlab-org/core-platform-section/data-stores/-/issues/106).

View File

@ -20,7 +20,7 @@ In order to obtain a GCP service key for local development, follow the steps bel
- If you are using an individual GCP project, you may also need to enable the Vertex AI API:
1. Visit [welcome page](https://console.cloud.google.com/welcome), choose your project (e.g. jdoe-5d23dpe).
1. Go to **APIs & Services > Enabled APIs & services**.
1. Select **+ Enable APIs and Services**.
1. Select **Enable APIs and Services**.
1. Search for `Vertex AI API`.
1. Select **Vertex AI API**, then select **Enable**.
- Install the [`gcloud` CLI](https://cloud.google.com/sdk/docs/install)
@ -30,7 +30,7 @@ In order to obtain a GCP service key for local development, follow the steps bel
```ruby
# PROJECT_ID = "your-gcp-project-name"
Gitlab::CurrentSettings.update(vertex_ai_project: PROJECT_ID)
Gitlab::CurrentSettings.update(vertex_ai_project: "PROJECT_ID")
```
#### Anthropic
@ -38,7 +38,7 @@ Gitlab::CurrentSettings.update(vertex_ai_project: PROJECT_ID)
[After filling out an access request](https://gitlab.com/gitlab-com/team-member-epics/access-requests/-/issues/new?issuable_template=AI_Access_Request), you can sign up for an Anthropic account and create an API key. You will then configure it:
```ruby
Gitlab::CurrentSettings.update!(anthropic_api_key: <insert API key>)
Gitlab::CurrentSettings.update!(anthropic_api_key: "<insert API key>")
```
### Local setup
@ -67,9 +67,9 @@ the feature must request to the [AI Gateway](../../architecture/blueprints/ai_ga
Alternatively, you can create an `env.runit` file in the root of your GDK with the above snippet.
1. Enable all AI feature flags:
```shell
rake gitlab:duo:enable_feature_flags
```
```shell
rake gitlab:duo:enable_feature_flags
```
1. Set up AI Gateway: [internal video tutorial](https://youtu.be/ePoHqvw78oQ)
1. [Install it](https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist/-/blob/main/README.md#how-to-run-the-server-locally).
@ -525,16 +525,16 @@ embeddings to be used in specs.
1. Enable [`pgvector`](https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/main/doc/howto/pgvector.md#enable-pgvector-in-the-gdk) in GDK
1. Enable the embedding database in GDK
```shell
gdk config set gitlab.rails.databases.embedding.enabled true
```
```shell
gdk config set gitlab.rails.databases.embedding.enabled true
```
1. Run `gdk reconfigure`
1. Run database migrations to create the embedding database in the `gitlab` folder of the GDK
```shell
RAILS_ENV=development bin/rails db:migrate
```
```shell
RAILS_ENV=development bin/rails db:migrate
```
### Populate

View File

@ -115,6 +115,7 @@ When the design is ready, _before_ starting its implementation:
or [GitLab Designs feature](../../user/project/issues/design_management.md).
See [when you should use each tool](https://handbook.gitlab.com/handbook/product/ux/product-designer/#deliver).
- Document user flow and states (for example, using [Mermaid flowcharts in Markdown](../../user/markdown.md#mermaid)).
- Document [design tokens](https://design.gitlab.com/product-foundations/design-tokens) (for example using the [design token annotation](https://www.figma.com/file/dWP1ldkBU4jeUqx5rO3jrn/Annotations-and-utilities?type=design&node-id=2002-34) in Figma).
- Document animations and transitions.
- Document responsive behaviors.
- Document non-evident behaviors (for example, field is auto-focused).

View File

@ -278,7 +278,7 @@ There may be times where you need to return data through the GitLab API that is
another system. In these cases you may have to paginate a third-party's API.
An example of this is with our [Error Tracking](../../operations/error_tracking.md) implementation,
where we proxy [Sentry errors](../../operations/error_tracking.md#sentry-error-tracking) through
where we proxy [Sentry errors](../../operations/sentry_error_tracking.md) through
the GitLab API. We do this by calling the Sentry API which enforces its own pagination rules.
This means we cannot access the collection within GitLab to perform our own custom pagination.

View File

@ -6,13 +6,11 @@ info: To determine the technical writer assigned to the Stage/Group associated w
# Error Tracking
DETAILS:
**Tier:** Free, Premium, Ultimate
**Offering:** GitLab.com, Self-managed, GitLab Dedicated
Error Tracking allows developers to discover and view errors generated by their application. Because error information is surfaced where the code is developed, this increases efficiency and awareness. Users can choose between [GitLab Integrated error tracking](#integrated-error-tracking) and [Sentry based](#sentry-error-tracking) backends.
To leave feedback about Error Tracking bugs or functionality, comment in the [feedback issue](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/2362) or open a [new issue](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/new).
Error Tracking allows developers to discover and view errors generated by their
application. Because error information is surfaced where the code is developed,
this increases efficiency and awareness. Users can choose between
[GitLab Integrated error tracking](integrated_error_tracking.md) and
[Sentry based](sentry_error_tracking.md) backends.
## How error tracking works
@ -21,242 +19,15 @@ For error tracking to work, you need:
- **Your application configured with the Sentry SDK:** when the error happens, Sentry SDK captures information
about it and sends it over the network to the backend. The backend stores information about all
errors.
- **Error tracking backend:** the backend can be either GitLab itself or Sentry. When it's GitLab,
you name it _integrated error tracking_ because you don't need to set up a separate backend. It's
already part of the product.
- To use the GitLab backend, see [integrated error tracking](#integrated-error-tracking).
- To use Sentry as the backend, see [Sentry error tracking](#sentry-error-tracking).
Whatever backend you choose, the [error tracking UI](#error-tracking-list)
is the same.
## Integrated error tracking
DETAILS:
**Tier:** Free, Premium, Ultimate
**Offering:** GitLab.com
This guide provides you with basics of setting up error tracking for your project, using examples from different languages.
Error tracking provided by GitLab Observability is based on [Sentry SDK](https://docs.sentry.io/). Check the [Sentry SDK documentation](https://docs.sentry.io/platforms/) for more thorough examples of how you can use Sentry SDK in your application.
According to the Sentry [data model](https://develop.sentry.dev/sdk/envelopes/#data-model), the item types are:
- [Event](https://develop.sentry.dev/sdk/event-payloads/)
- [Transactions](https://develop.sentry.dev/sdk/event-payloads/transaction/)
- [Attachments](https://develop.sentry.dev/sdk/envelopes/#attachment)
- [Session](https://develop.sentry.dev/sdk/envelopes/#session)
- [Sessions](https://develop.sentry.dev/sdk/envelopes/#sessions)
- [User feedback](https://develop.sentry.dev/sdk/envelopes/#user-feedback) (also known as user report)
- [Client report](https://develop.sentry.dev/sdk/client-reports/)
### Enable error tracking for a project
Regardless of the programming language you use, you first need to enable error tracking for your GitLab project.
The `GitLab.com` instance is used in this guide.
Prerequisites:
- You have a project for which you want to enable error tracking. To learn how to create a new one, see [Create a project](../user/project/index.md).
To enable error tracking with GitLab as the backend:
1. In your project, go to **Settings > Monitor**.
1. Expand **Error Tracking**.
1. Under **Enable error tracking**, select the **Active** checkbox.
1. Under **Error tracking backend**, select **GitLab**.
1. Select **Save changes**.
1. Copy the Data Source Name (DSN) string. You need it for configuring your SDK implementation.
### Configure user tracking
To track the number of users impacted by an error:
- In the instrumentation code, ensure each user is uniquely identified.
You can use a user ID, name, email address, or IP address to identify a user.
For example, if you use [Python](https://docs.sentry.io/platforms/python/enriching-events/identify-user/), you can identify a user by email:
```python
Sentry.setUser({ email: "john.doe@example.com" });
```
For more information about user identification, see the [Sentry documentation](https://docs.sentry.io/).
## Error tracking list
After your application has emitted errors to the Error Tracking API through the Sentry SDK,
they should be available under the **Monitor > Error Tracking** tab/section.
![MonitorListErrors](img/list_errors_v16_0.png)
## Error tracking details
In the Error Details view you can see more details of the exception, including number of occurrences,
users affected, first seen, and last seen dates.
You can also review the stack trace.
![MonitorDetailErrors](img/detail_errors_v16_0.png)
## Emit errors
### Supported language SDKs & Sentry types
The following table lists all event types available through Sentry SDK, and whether they are supported by GitLab Error Tracking.
| Language | Tested SDK client and version | Endpoint | Supported item types |
| -------- | ------------------------------- | ---------- | --------------------------------- |
| Go | `sentry-go/0.20.0` | `store` | `exception`, `message` |
| Java | `sentry.java:6.18.1` | `envelope` | `exception`, `message` |
| NodeJS | `sentry.javascript.node:7.38.0` | `envelope` | `exception`, `message` |
| PHP | `sentry.php/3.18.0` | `store` | `exception`, `message` |
| Python | `sentry.python/1.21.0` | `envelope` | `exception`, `message`, `session` |
| Ruby | `sentry.ruby:5.9.0` | `envelope` | `exception`, `message` |
| Rust | `sentry.rust/0.31.0` | `envelope` | `exception`, `message`, `session` |
For a detailed version of this table, see [this issue](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/1737).
## Usage examples
You can also find working samples for all [supported language SDKs](https://gitlab.com/gitlab-org/opstrace/opstrace/-/tree/main/test/sentry-sdk/testdata/supported-sdk-clients).
Each listed program shows a basic example of how to capture exceptions, events,
or messages with the respective SDK.
For more in-depth documentation,
see [Sentry SDK's documentation](https://docs.sentry.io/) specific to the used language.
## Rotate generated DSN
The Sentry Data Source Name, or DSN, (client key) is a secret and it should not be exposed to the public.
In case of a leak, rotate the Sentry DSN by following these steps:
1. [Create an access token](../user/profile/personal_access_tokens.md#create-a-personal-access-token)
by selecting your profile picture in GitLab.com.
Then select Preferences, and then Access Token. Make sure you add API scope.
1. Using the [error tracking API](../api/error_tracking.md),
create a new Sentry DSN:
```shell
curl --request POST --header "PRIVATE-TOKEN: <your_access_token>"
--header "Content-Type: application/json" \
"https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys"
```
1. Get the available client keys (Sentry DSNs).
Ensure that the newly created Sentry DSN is in place.
Then note down the key ID of the old client key:
```shell
curl --header "PRIVATE-TOKEN: <your_access_token>" "https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys"
```
1. Delete the old client key.
```shell
curl --request DELETE --header "PRIVATE-TOKEN: <your_access_token>" "https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys/<key_id>"
```
## Debug SDK issues
The majority of languages supported by Sentry expose `debug` option as part of initialization.
This can be helpful when debugging issues with sending errors. Otherwise,
there are options that could allow outputting JSON before it is sent to the API.
## Sentry error tracking
[Sentry](https://sentry.io/) is an open source error tracking system. GitLab allows
administrators to connect Sentry to GitLab
so users can view a list of Sentry errors in GitLab.
### Deploying Sentry
You can sign up to the cloud-hosted [Sentry](https://sentry.io) or deploy your own
[on-premise instance](https://github.com/getsentry/onpremise/).
### Enable Sentry integration for a project
GitLab provides a way to connect Sentry to your project.
Prerequisites:
- You must have at least the Maintainer role for the project.
To enable the Sentry integration:
1. Sign up to Sentry.io or [deploy your own](#deploying-sentry) Sentry instance.
1. [Create a new Sentry project](https://docs.sentry.io/product/sentry-basics/integrate-frontend/create-new-project/).
For each GitLab project that you want to integrate, you should create a new Sentry project.
1. Find or generate a [Sentry auth token](https://docs.sentry.io/api/auth/#auth-tokens).
For the SaaS version of Sentry, you can find or generate the auth token at [https://sentry.io/api/](https://sentry.io/api/).
You should give the token at least the following scopes: `project:read`,
`event:read`, and
`event:write` (for resolving events).
1. In GitLab, enable and configure Error Tracking:
1. On the left sidebar, select **Search or go to** and find your project.
1. Select **Settings > Monitor > Error Tracking**.
1. Under **Enable error tracking**, select the **Active** checkbox.
1. Under **Error tracking backend**, select **Sentry**.
1. Under **Sentry API URL**, enter your Sentry hostname. For example,
enter `https://sentry.example.com`.
For the SaaS version of Sentry, the hostname is `https://sentry.io`.
1. Under **Auth Token**, enter the token you previously generated.
1. To test the connection to Sentry and populate the **Project** dropdown list,
select **Connect**.
1. From the **Project** list, choose a Sentry project to link to your GitLab project.
1. Select **Save changes**.
You can now visit **Monitor > Error Tracking** in your project's sidebar to
[view a list](#error-tracking-list) of Sentry errors.
### Sentry's GitLab integration
You might also want to enable Sentry's GitLab integration by following the steps
in the [Sentry documentation](https://docs.sentry.io/product/integrations/source-code-mgmt/gitlab/).
### Enable GitLab Runner
To configure GitLab Runner with Sentry, you must add the value for `sentry_dsn`
to your runner's `config.toml` configuration file, as referenced in
[Advanced configuration](https://docs.gitlab.com/runner/configuration/advanced-configuration.html).
If you're asked for the project type while setting up Sentry, select **Go**.
To rectify the following error, specify the deprecated DSN in **Sentry.io > Project Settings > Client Keys (DSN) > Show deprecated DSN**.
```plaintext
ERROR: Sentry failure builds=0 error=raven: dsn missing private key
```
### Analyzing bar graph data
The last seen timestamp gives specific details about the error itself. If you hover above the box with your mouse, you can see the specific timestamp of when
the error last happened. In the following example, the error happened at 11:41 CEST.
![MonitorDetailErrors](img/last_seen_v16.10.png)
The graph below is measured as an hourly bucket, with the total number of errors in this hour counted. In this example, the 11 am hour bucket has seen 239 errors. The last seen field states 11 hours and will not be updated until the full hour is complete. This is because of the library we use for this
call ([`import * as timeago from 'timeago.js'`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/assets/javascripts/lib/utils/datetime/timeago_utility.js#L1)).
![MonitorDetailErrors](img/error_bucket_v16.10.png)
## Data retention
GitLab has a retention limit of 90 days for all errors.
## Troubleshooting
When working with Error Tracking, you might encounter the following issues.
### Error `Connection failed. Check auth token and try again`
If the Monitor feature is disabled in the [project settings](../user/project/settings/index.md#configure-project-features-and-permissions),
you might see an error when you try to [enable Sentry integration for a project](#enable-sentry-integration-for-a-project).
The resulting request to `/project/path/-/error_tracking/projects.json?api_host=https:%2F%2Fsentry.example.com%2F&token=<token>` returns a 404 status.
To fix this issue, enable the Monitor feature for the project.
- **Error tracking backend:** the backend can be either GitLab itself or Sentry.
- To use the GitLab backend, see [GitLab Integrated error tracking](integrated_error_tracking.md).
Integrated error tracking is available only on GitLab.com.
- To use Sentry as the backend, see [Sentry based](sentry_error_tracking.md).
Sentry based error tracking is available only for self-managed instances.
Here is a summary of the capabilities for each version:
| Feature | Availability | Data collection | Data storage | Data query |
| ----------- | ----------- | ----------- | ----------- | ----------- |
| [GitLab integrated Error Tracking](integrated_error_tracking.md) | GitLab.com only | with [Sentry SDK](https://github.com/getsentry/sentry?tab=readme-ov-file#official-sentry-sdks) | Data stored on GitLab.com | with GitLab.com only |
| [Sentry-based Error Tracking](sentry_error_tracking.md) | GitLab.com, GitLab Dedicated, Self-managed | with [Sentry SDK](https://github.com/getsentry/sentry?tab=readme-ov-file#official-sentry-sdks) | Data stored on Sentry instance (Cloud Sentry.io or [Self-Hosted Sentry](https://develop.sentry.dev/self-hosted/)) | with GitLab.com or Sentry instance |

View File

@ -0,0 +1,173 @@
---
stage: Monitor
group: Observability
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments
---
# Integrated error tracking
DETAILS:
**Tier:** Free, Premium, Ultimate
**Offering:** GitLab.com
This guide provides you with basics of setting up integrated error tracking for
your project, using examples from different languages.
Error tracking provided by GitLab Observability is based on
[Sentry SDK](https://docs.sentry.io/). Check the
[Sentry SDK documentation](https://docs.sentry.io/platforms/) for more thorough
examples of how you can use Sentry SDK in your application.
## Enable error tracking for a project
Regardless of the programming language you use, you first need to enable error tracking
for your GitLab project. This guide uses the `GitLab.com` instance.
Prerequisites:
- You have a project for which you want to enable error tracking. To learn how to
create a new one, see [Create a project](../user/project/index.md).
To enable error tracking with GitLab as the backend:
1. On the left sidebar, select **Search or go to** and find your project.
1. Go to **Settings > Monitor**.
1. Expand **Error Tracking**.
1. For **Enable error tracking**, select **Active**.
1. For **Error tracking backend**, select **GitLab**.
1. Select **Save changes**.
1. Copy the **Data Source Name (DSN)** string. You need it to configure your SDK implementation.
## Configure user tracking
To track the number of users affected by an error:
- In the instrumentation code, ensure each user is uniquely identified.
You can use a user ID, name, email address, or IP address to identify a user.
For example, if you use
[Python](https://docs.sentry.io/platforms/python/enriching-events/identify-user/),
you can identify a user by email:
```python
Sentry.setUser({ email: "john.doe@example.com" });
```
For more information about user identification, see the [Sentry documentation](https://docs.sentry.io/).
## View tracked errors
After your application emits errors to the Error Tracking API through the Sentry SDK,
those errors are available in the GitLab UI. To view them:
1. On the left sidebar, select **Search or go to** and find your project.
1. Go to **Monitor > Error Tracking** to see the list of open errors:
![MonitorListErrors](img/list_errors_v16_0.png)
1. Select an error to see the **Error details** view:
![MonitorDetailErrors](img/detail_errors_v16_0.png)
This page shows more details of the exception, including:
- Total number of occurrences.
- Total users affected.
- First seen: the date, and the commit (**{commit}**).
- Last seen date, shown as a relative date. To see the timestamp, hover over the date with your mouse.
- A bar graph of error frequency per hour. Hover over a bar to see the total number of errors in that hour.
- A stack trace.
### Analyze an error's details
On the **Error details** page, point to the **Last seen** date to see a full
timestamp of the error. In this example, the error happened at 11:41 CEST:
![MonitorDetailErrors](img/last_seen_v16.10.png)
The **Last 24 hours** graph measures how many times this error occurred per hour.
By pointing at the `11 am` bar, the dialog shows the error was seen 239 times:
![MonitorDetailErrors](img/error_bucket_v16.10.png)
The **Last seen** field does not update until the full hour is complete, due to
the library used for the call
[`import * as timeago from 'timeago.js'`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/assets/javascripts/lib/utils/datetime/timeago_utility.js#L1).
## Emit errors
### Supported language SDKs & Sentry types
GitLab Error Tracking supports these event types:
| Language | Tested SDK client and version | Endpoint | Supported item types |
| -------- | ------------------------------- | ---------- | --------------------------------- |
| Go | `sentry-go/0.20.0` | `store` | `exception`, `message` |
| Java | `sentry.java:6.18.1` | `envelope` | `exception`, `message` |
| NodeJS | `sentry.javascript.node:7.38.0` | `envelope` | `exception`, `message` |
| PHP | `sentry.php/3.18.0` | `store` | `exception`, `message` |
| Python | `sentry.python/1.21.0` | `envelope` | `exception`, `message`, `session` |
| Ruby | `sentry.ruby:5.9.0` | `envelope` | `exception`, `message` |
| Rust | `sentry.rust/0.31.0` | `envelope` | `exception`, `message`, `session` |
For a detailed version of this table, see
[issue 1737](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/1737).
Working examples for each of the
[supported language SDKs](https://gitlab.com/gitlab-org/opstrace/opstrace/-/tree/main/test/sentry-sdk/testdata/supported-sdk-clients)
are available. Each example shows how to capture exceptions, events, or messages
with that SDK. For more in-depth documentation, see
[Sentry SDK's documentation](https://docs.sentry.io/) for that specific language.
## Rotate generated DSN
The Sentry Data Source Name (DSN), also known as a client key, is a secret.
Don't expose it to the public. If it is leaked or exposed, rotate the Sentry DSN.
Prerequisites:
- You need the numeric [project ID](../user/project/working_with_projects.md#access-the-project-overview-page-by-using-the-project-id)
for your project.
To rotate the Sentry DSN:
1. [Create an access token](../user/profile/personal_access_tokens.md#create-a-personal-access-token)
with the `api` scope. Copy off this value, as you need it in future steps.
1. Use the [error tracking API](../api/error_tracking.md) to create a new Sentry DSN,
replacing `<your_access_token>` and `<your_project_number>`:
```shell
curl --request POST \
--header "PRIVATE-TOKEN: <your_access_token>" \
--header "Content-Type: application/json" \
--url "https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys"
```
1. Get the available client keys (Sentry DSNs). Ensure your newly-created Sentry DSN
is in place. Run this command with the key ID of the old client key, replacing `<your_access_token>`
and `<your_project_number>`:
```shell
curl --header "PRIVATE-TOKEN: <your_access_token>" \
--url "https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys"
```
1. Delete the old client key.
```shell
curl --request DELETE \
--header "PRIVATE-TOKEN: <your_access_token>" \
--url "https://gitlab.example.com/api/v4/projects/<your_project_number>/error_tracking/client_keys/<key_id>"
```
## Debug SDK issues
The majority of languages supported by Sentry expose a `debug` option as part
of initialization. The `debug` option can help you when you debug issues with
sending errors. Other options exist to output JSON before sending the data to the API.
## Data retention
GitLab has a 90-day retention limit for all errors.
To leave feedback about Error Tracking bugs or features, comment in the
[feedback issue](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/2362) or open a
[new issue](https://gitlab.com/gitlab-org/opstrace/opstrace/-/issues/new).

View File

@ -0,0 +1,68 @@
---
stage: Monitor
group: Observability
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments
---
# Sentry error tracking
DETAILS:
**Tier:** Free, Premium, Ultimate
**Offering:** Self-managed, GitLab Dedicated
[Sentry](https://sentry.io/) is an open source error tracking system. GitLab enables
administrators to connect Sentry to GitLab, so users can view a list of Sentry errors in GitLab.
GitLab integrates with both the cloud-hosted [Sentry](https://sentry.io) and Sentry
deployed in your [on-premise instance](https://github.com/getsentry/onpremise/).
## Enable Sentry integration for a project
GitLab provides a way to connect Sentry to your project.
Prerequisites:
- You must have at least the Maintainer role for the project.
To enable the Sentry integration:
1. Sign up to Sentry.io, or deploy your own [on-premise Sentry instance](https://github.com/getsentry/onpremise/).
1. [Create a new Sentry project](https://docs.sentry.io/product/sentry-basics/integrate-frontend/create-new-project/).
For each GitLab project that you want to integrate, create a new Sentry project.
1. Find or generate a [Sentry auth token](https://docs.sentry.io/api/auth/#auth-tokens).
For the SaaS version of Sentry, you can find or generate the auth token at [https://sentry.io/api/](https://sentry.io/api/).
Give the token at least the following scopes: `project:read`, `event:read`, and
`event:write` (for resolving events).
1. In GitLab, enable and configure Error Tracking:
1. On the left sidebar, select **Search or go to** and find your project.
1. Select **Settings > Monitor**, then expand **Error Tracking**.
1. For **Enable error tracking**, select **Active**.
1. For **Error tracking backend**, select **Sentry**.
1. For **Sentry API URL**, enter your Sentry hostname. For example,
enter `https://sentry.example.com`.
For the SaaS version of Sentry, the hostname is `https://sentry.io`.
1. For **Auth Token**, enter the token you generated previously.
1. To test the connection to Sentry and populate the **Project** dropdown list,
select **Connect**.
1. From the **Project** list, choose a Sentry project to link to your GitLab project.
1. Select **Save changes**.
To view a list of Sentry errors, on your project's sidebar, go to **Monitor > Error Tracking**.
## Enable Sentry's integration with GitLab
You might also want to enable Sentry's GitLab integration by following the steps
in the [Sentry documentation](https://docs.sentry.io/product/integrations/source-code-mgmt/gitlab/).
## Troubleshooting
When working with Error Tracking, you might encounter the following issues.
### Error `Connection failed. Check auth token and try again`
If the **Monitor** feature is disabled in the
[project settings](../user/project/settings/index.md#configure-project-features-and-permissions),
you might see an error when you try to [enable Sentry integration for a project](#enable-sentry-integration-for-a-project).
The resulting request to `/project/path/-/error_tracking/projects.json?api_host=https:%2F%2Fsentry.example.com%2F&token=<token>` returns a 404 error.
To fix this issue, enable the **Monitor** feature for the project.

View File

@ -319,3 +319,52 @@ To replace the token:
depending on how you use the token, for example if configured as a secret or
embedded within an application. Requests made from this token should no longer
return `401` responses.
## Troubleshooting
### Identify Project and Group Access Tokens expiring on a certain date using the rails console
These scripts can be executed in a [Rails console session](../administration/operations/rails_console.md#starting-a-rails-console-session) or [using the Rails Runner](../administration/operations/rails_console.md#using-the-rails-runner) for identifying tokens affected by [incident 18003](https://gitlab.com/gitlab-com/gl-infra/production/-/issues/18003) in self-managed GitLab instances.
The first script looks for tokens that expire on one specific day you'd want to adjust `expires_at_date` to the day one year after your instance was upgraded to GitLab 16.0. If you're not sure about the exact timing you can use the second script which allows specifying a date range to search in.
To run either of the scripts, start a Rails console session with `sudo gitlab-rails console`, paste the entire script and hit `Enter`.
Alternatively execute it with the Rails Runner: `sudo gitlab-rails runner /path/to/expired_tokens.rb` it _has_ to be the full path, and the file needs to be accessible to `git:git`. See details in the [Rails Runner troubleshooting section](../administration/operations/rails_console.md#troubleshooting).
#### expired_tokens.rb
```ruby
expires_at_date = "2024-05-22"
PersonalAccessToken.project_access_token.where(expires_at: expires_at_date).find_each do |token|
token.user.members.each do |member|
type = if member.is_a?(GroupMember)
'Group'
elsif member.is_a?(ProjectMember)
'Project'
end
puts "Expired #{type} Access Token in #{type} ID #{member.source_id}, Token ID: #{token.id}, Name: #{token.name}, Scopes: #{token.scopes}, Last used: #{token.last_used_at}"
end
end
```
#### expired_tokens_date_range.rb
```ruby
# This is an alternative version of the above script that allows
# searching for tokens that expire within a certain date range,
# e.g. `1.month` (from when the script is executed). Useful if
# you're unsure when exactly your GitLab 16.0 upgrade completed.
date_range = 1.month
PersonalAccessToken.project_access_token.where(expires_at: Date.today .. Date.today + date_range).find_each do |token|
token.user.members.each do |member|
type = member.is_a?(GroupMember) ? 'Group' : 'Project'
puts "Expired #{type} Access Token in #{type} ID #{member.source_id}, Token ID: #{token.id}, Name: #{token.name}, Scopes: #{token.scopes}, Last used: #{token.last_used_at}"
end
end
```

View File

@ -0,0 +1,162 @@
# frozen_string_literal: true
module Gitlab
module Ci
module Components
module Usages
# Component usage is defined as the number of unique `used_by_project_id`s in the table
# `p_catalog_resource_component_usages` for a given scope.
#
# This aggregator iterates through the target scope in batches. For each target ID, it collects
# the usage count using `distinct_each_batch` for the given usage window. Since this process can
# be interrupted when it reaches MAX_RUNTIME, we utilize a Redis cursor so the aggregator can
# resume from where it left off on each run. We collect the count in Rails because the SQL query
# `COUNT(DISTINCT(*))` is not performant when the dataset is large.
#
# RUNTIME: The actual total runtime will be slightly longer than MAX_RUNTIME because
# it depends on the execution time of `&usage_counts_block`.
# EXCLUSIVE LEASE: This aggregator is protected from parallel processing with an exclusive lease guard.
# WORKER: The worker running this service should be scheduled at the same cadence as MAX_RUNTIME, with:
# deduplicate :until_executed, if_deduplicated: :reschedule_once, ttl: LEASE_TIMEOUT
#
##### Usage
#
# each_batch:
# - Yields each batch of `usage_counts` to the given block.
# - The block should be able to handle targets that might be reprocessed multiple times.
# - `usage_counts` format: { target_object1 => 100, target_object2 => 200, ... }
# - If the lease is obtained, returns a Result containing the `cursor` object and
# `total_targets_completed`. Otherwise, returns nil.
#
# Example:
# aggregator = Gitlab::Ci::Components::Usages::Aggregator.new(
# target_scope: Ci::Catalog::Resource.scope_to_get_only_unprocessed_targets,
# group_by_column: :catalog_resource_id,
# usage_start_date: Date.today - 30.days,
# usage_end_date: Date.today - 1.day,
# lease_key: 'my_aggregator_service_lease_key'
# )
#
# result = aggregator.each_batch do |usage_counts|
# # Bulk update usage counts in the database
# end
#
##### Parameters
#
# target_scope:
# - ActiveRecord relation to retrieve the target IDs. Processed in order of ID ascending.
# - The target model class should have `include EachBatch`.
# - When cursor.target_id gets reset to 0, the aggregator may reprocess targets that have
# already been processed for the given usage window. To minimize redundant reprocessing,
# add a limiting condition to the target scope so it only retrieves unprocessed targets.
# group_by_column: This should be the usage table's foreign key of the target_scope.
# usage_start_date & usage_end_date: Date objects specifiying the window of usage data to aggregate.
# lease_key: Used for obtaining an exclusive lease. Also used as part of the cursor Redis key.
#
# rubocop: disable CodeReuse/ActiveRecord -- Custom queries required for data processing
class Aggregator
include Gitlab::Utils::StrongMemoize
include ExclusiveLeaseGuard
Result = Struct.new(:cursor, :total_targets_completed, keyword_init: true)
TARGET_BATCH_SIZE = 1000
DISTINCT_USAGE_BATCH_SIZE = 100
MAX_RUNTIME = 4.minutes # Should be >= job scheduling frequency so there is no gap between job runs
LEASE_TIMEOUT = 5.minutes # Should be MAX_RUNTIME + extra time to execute `&usage_counts_block`
def initialize(target_scope:, group_by_column:, usage_start_date:, usage_end_date:, lease_key:)
@target_scope = target_scope
@group_by_column = group_by_column
@lease_key = lease_key # Used by ExclusiveLeaseGuard
@runtime_limiter = Gitlab::Metrics::RuntimeLimiter.new(MAX_RUNTIME)
@cursor = Aggregators::Cursor.new(
redis_key: "#{lease_key}:cursor",
target_scope: target_scope,
usage_window: Aggregators::Cursor::Window.new(usage_start_date, usage_end_date)
)
end
def each_batch(&usage_counts_block)
try_obtain_lease do
total_targets_completed = process_targets(&usage_counts_block)
Result.new(cursor: cursor, total_targets_completed: total_targets_completed)
end
end
private
attr_reader :target_scope, :group_by_column, :cursor, :runtime_limiter
def process_targets
# Restore the scope from cursor so we can resume from the last run
restored_target_scope = target_scope.where('id >= ?', cursor.target_id)
total_targets_completed = 0
restored_target_scope.each_batch(of: TARGET_BATCH_SIZE) do |targets_relation|
usage_counts = aggregate_usage_counts(targets_relation)
yield usage_counts if usage_counts.present?
total_targets_completed += usage_counts.length
break if runtime_limiter.over_time?
end
cursor.advance unless cursor.interrupted?
cursor.save!
total_targets_completed
end
def aggregate_usage_counts(targets_relation)
usage_counts = {}
targets_relation.order(:id).each do |target|
# When target.id is different from the cursor's target_id, it
# resets last_usage_count and last_used_by_project_id to 0.
cursor.target_id = target.id
usage_scope = ::Ci::Catalog::Resources::Components::Usage
.where(group_by_column => cursor.target_id)
.where(used_date: cursor.usage_window.start_date..cursor.usage_window.end_date)
# Restore the scope from cursor so we can resume from the last run if interrupted
restored_usage_scope = usage_scope.where('used_by_project_id > ?', cursor.last_used_by_project_id)
usage_counts[target] = cursor.last_usage_count
restored_usage_scope
.distinct_each_batch(column: :used_by_project_id, of: DISTINCT_USAGE_BATCH_SIZE) do |usages_relation|
count = usages_relation.count
usage_counts[target] += count
# If we're over time and count == batch size, it means there is likely another batch
# to process for the current target, so the usage count is incomplete. We store the
# last used_by_project_id and count so that we can resume counting on the next run.
if runtime_limiter.over_time? && count == DISTINCT_USAGE_BATCH_SIZE
cursor.interrupt!(
last_used_by_project_id: usages_relation.maximum(:used_by_project_id).to_i,
last_usage_count: usage_counts[target]
)
usage_counts.delete(target) # Remove the incomplete count
break
end
end
break if runtime_limiter.over_time?
end
usage_counts
end
def lease_timeout
LEASE_TIMEOUT
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end

View File

@ -0,0 +1,118 @@
# frozen_string_literal: true
module Gitlab
module Ci
module Components
module Usages
module Aggregators
# This class represents a Redis cursor that keeps track of the data processing
# position and progression in Gitlab::Ci::Components::Usages::Aggregator. It
# updates and saves the attributes necessary for the aggregation to resume
# from where it was interrupted on its last run.
#
# The cursor's target_id is reset to 0 under these circumstances:
# 1. When the Redis cursor is first initialized.
# 2. When the Redis cursor expires or is lost and must be re-initialized.
# 3. When the cursor advances past max_target_id.
#
##### Attributes
#
# target_id: The target ID from which to resume aggregating the usage counts.
# usage_window: The window of usage data to aggregate.
# last_used_by_project_id: The last used_by_project_id that was counted before interruption.
# last_usage_count: The last usage_count that was recorded before interruption.
#
# The last_used_by_project_id and last_usage_count only pertain to the exact target_id
# and usage_window that was saved before interruption. If either of the latter attributes
# change, then we reset the last_* values to 0.
#
class Cursor
include Gitlab::Utils::StrongMemoize
Window = Struct.new(:start_date, :end_date)
CURSOR_REDIS_KEY_TTL = 7.days
attr_reader :target_id, :usage_window, :last_used_by_project_id, :last_usage_count, :interrupted
alias_method :interrupted?, :interrupted
def initialize(redis_key:, target_scope:, usage_window:)
@redis_key = redis_key
@target_scope = target_scope
@usage_window = usage_window
@interrupted = false
fetch_initial_attributes!
end
def interrupt!(last_used_by_project_id:, last_usage_count:)
@last_used_by_project_id = last_used_by_project_id
@last_usage_count = last_usage_count
@interrupted = true
end
def target_id=(target_id)
reset_last_usage_attributes if target_id != self.target_id
@target_id = target_id
end
def advance
self.target_id += 1
self.target_id = 0 if target_id > max_target_id
end
def attributes
{
target_id: target_id,
usage_window: usage_window,
last_used_by_project_id: last_used_by_project_id,
last_usage_count: last_usage_count
}
end
def max_target_id
target_scope.maximum(:id).to_i
end
strong_memoize_attr :max_target_id
def save!
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, attributes.to_json, ex: CURSOR_REDIS_KEY_TTL)
end
end
private
attr_reader :redis_key, :target_scope
def fetch_initial_attributes!
data = Gitlab::Redis::SharedState.with do |redis|
raw = redis.get(redis_key)
raw.present? ? Gitlab::Json.parse(raw) : {}
end.with_indifferent_access
start_date = parse_date(data.dig(:usage_window, :start_date))
end_date = parse_date(data.dig(:usage_window, :end_date))
@target_id = data[:target_id].to_i
@last_used_by_project_id = data[:last_used_by_project_id].to_i
@last_usage_count = data[:last_usage_count].to_i
reset_last_usage_attributes if usage_window != Window.new(start_date, end_date)
end
def reset_last_usage_attributes
@last_used_by_project_id = 0
@last_usage_count = 0
end
def parse_date(date_str)
Date.parse(date_str) if date_str
end
end
end
end
end
end
end

View File

@ -0,0 +1,225 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Components::Usages::Aggregator, :clean_gitlab_redis_shared_state, :freeze_time,
feature_category: :pipeline_composition do
let_it_be(:usage_start_date) { Date.today - 30.days }
let_it_be(:usage_end_date) { Date.today - 1.day }
let(:usage_model) { Ci::Catalog::Resources::Components::Usage }
let(:target_scope) { Ci::Catalog::Resource }
let(:group_by_column) { :catalog_resource_id }
let(:lease_key) { 'my_lease_key' }
let(:usage_window) do
Gitlab::Ci::Components::Usages::Aggregators::Cursor::Window.new(usage_start_date, usage_end_date)
end
before_all do
# First catalog resource: 3 components and 3 usages per component on usage_end_date
version = create(:ci_catalog_resource_version)
create_list(:ci_catalog_resource_component, 3, version: version).each do |component|
(1..3).each do |k|
create(
:ci_catalog_resource_component_usage,
component: component,
used_date: usage_end_date,
used_by_project_id: k
)
end
end
# Create 4 more catalog resources, each with 1-4 components and 0-6 usages
# per component on different dates before and after usage_end_date
create_list(:ci_catalog_resource_version, 4).each_with_index do |version, i|
create_list(:ci_catalog_resource_component, i + 1, version: version).each_with_index do |component, j|
next unless j > 0
(1..j * 2).each do |k|
create(
:ci_catalog_resource_component_usage,
component: component,
used_date: usage_end_date - 3.days + k.days,
used_by_project_id: k
)
end
end
end
end
describe '#each_batch' do
shared_examples 'when the runtime limit is not reached' do
it 'returns the expected result' do
# We process all catalog resources and advance the cursor
batched_usage_counts, result = run_new_aggregator_each_batch
expect(batched_usage_counts).to eq(expected_batched_usage_counts)
expect(result.total_targets_completed).to eq(target_scope.count)
expect(result.cursor.attributes).to eq({
target_id: 0,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
shared_examples 'with multiple distinct usage batches' do
before do
stub_const("#{described_class}::DISTINCT_USAGE_BATCH_SIZE", 2)
end
it_behaves_like 'when the runtime limit is not reached'
context 'when the runtime limit is reached' do
before do
# Sets the aggregator to break after the first iteration on each run
stub_const("#{described_class}::MAX_RUNTIME", 0)
end
it 'returns the expected result for each run' do
# On 1st run, we get an incomplete usage count for the first catalog resource
batched_usage_counts, result = run_new_aggregator_each_batch
expect(batched_usage_counts).to eq([])
expect(result.total_targets_completed).to eq(0)
expect(result.cursor.attributes).to eq({
target_id: target_scope.first.id,
usage_window: usage_window,
last_used_by_project_id: 2,
last_usage_count: 2
})
# On 2nd run, we get the complete usage count for the first catalog resource and advance the cursor
batched_usage_counts, result = run_new_aggregator_each_batch
expect(batched_usage_counts).to eq([{ target_scope.first => 3 }])
expect(result.total_targets_completed).to eq(1)
expect(result.cursor.attributes).to eq({
target_id: target_scope.first.id + 1,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
all_batched_usage_counts = batched_usage_counts + repeat_new_aggregator_each_batch_until_done
batched_usage_counts_merged = all_batched_usage_counts.flatten.reduce(&:merge)
expect(batched_usage_counts_merged.length).to eq(5)
expect(batched_usage_counts_merged).to eq(expected_batched_usage_counts_merged)
end
context 'when a target is deleted between runs' do
it 'returns the expected result for each run' do
# On 1st run, we get an incomplete usage count for the first catalog resource
batched_usage_counts, result = run_new_aggregator_each_batch
expect(batched_usage_counts).to eq([])
expect(result.total_targets_completed).to eq(0)
expect(result.cursor.attributes).to eq({
target_id: target_scope.first.id,
usage_window: usage_window,
last_used_by_project_id: 2,
last_usage_count: 2
})
target_scope.first.delete
all_batched_usage_counts = repeat_new_aggregator_each_batch_until_done
batched_usage_counts_merged = all_batched_usage_counts.reduce(&:merge)
expect(batched_usage_counts_merged.length).to eq(4)
expect(batched_usage_counts_merged).to eq(expected_batched_usage_counts_merged)
end
end
context 'when there are no usage records' do
it 'returns the expected result' do
usage_model.delete_all
all_batched_usage_counts = repeat_new_aggregator_each_batch_until_done
batched_usage_counts_merged = all_batched_usage_counts.reduce(&:merge)
expect(batched_usage_counts_merged.length).to eq(5)
expect(batched_usage_counts_merged).to eq(expected_batched_usage_counts_merged)
end
end
end
end
it_behaves_like 'when the runtime limit is not reached'
it_behaves_like 'with multiple distinct usage batches'
context 'with multiple target batches' do
before do
stub_const("#{described_class}::TARGET_BATCH_SIZE", 3)
end
it_behaves_like 'when the runtime limit is not reached'
it_behaves_like 'with multiple distinct usage batches'
end
it 'prevents parallel processing with an exclusive lease guard' do
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute).tap(&:try_obtain)
result = run_new_aggregator_each_batch.last
expect(result).to be_nil
lease.cancel
end
end
private
def run_new_aggregator_each_batch
aggregator = described_class.new(
target_scope: target_scope,
group_by_column: group_by_column,
usage_start_date: usage_start_date,
usage_end_date: usage_end_date,
lease_key: lease_key
)
batched_usage_counts = []
result = aggregator.each_batch do |usage_counts|
batched_usage_counts << usage_counts
end
[batched_usage_counts, result]
end
def expected_batched_usage_counts
batched_usage_counts = []
target_scope.each_batch(of: described_class::TARGET_BATCH_SIZE) do |targets|
usage_counts = usage_model
.includes(:catalog_resource)
.select('catalog_resource_id, COUNT(DISTINCT used_by_project_id) AS usage_count')
.where(used_date: usage_start_date..usage_end_date)
.where(group_by_column => targets)
.group(:catalog_resource_id)
.each_with_object({}) { |r, hash| hash[r.catalog_resource] = r.usage_count }
batched_usage_counts << targets.index_with { 0 }.merge(usage_counts)
end
batched_usage_counts
end
def expected_batched_usage_counts_merged
expected_batched_usage_counts.reduce(&:merge)
end
def repeat_new_aggregator_each_batch_until_done
all_batched_usage_counts = []
30.times do
batched_usage_counts, result = run_new_aggregator_each_batch
all_batched_usage_counts << batched_usage_counts
break if result.cursor.target_id == 0
end
all_batched_usage_counts.flatten
end
end

View File

@ -0,0 +1,167 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Components::Usages::Aggregators::Cursor, :clean_gitlab_redis_shared_state,
feature_category: :pipeline_composition do
let(:redis_key) { 'my_redis_key:cursor' }
let(:target_scope) { class_double(Ci::Catalog::Resource, maximum: max_target_id) }
let(:max_target_id) { initial_redis_attributes[:target_id] }
let(:usage_window) { described_class::Window.new(Date.parse('2024-01-08'), Date.parse('2024-01-14')) }
let(:initial_redis_usage_window) { usage_window }
let(:initial_redis_attributes) do
{
target_id: 1,
usage_window: initial_redis_usage_window,
last_used_by_project_id: 100,
last_usage_count: 10
}
end
subject(:cursor) { described_class.new(redis_key: redis_key, target_scope: target_scope, usage_window: usage_window) }
before do
Gitlab::Redis::SharedState.with do |redis|
redis.set(redis_key, initial_redis_attributes.to_json)
end
end
describe '.new' do
it 'fetches and parses the attributes from Redis' do
expect(cursor.attributes).to eq(initial_redis_attributes)
end
context 'when Redis usage_window is different than the given usage_window' do
let(:initial_redis_usage_window) do
described_class::Window.new(Date.parse('2024-01-01'), Date.parse('2024-01-07'))
end
it 'resets last usage attributes' do
expect(cursor.attributes).to eq({
target_id: initial_redis_attributes[:target_id],
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
context 'when cursor does not exist in Redis' do
before do
Gitlab::Redis::SharedState.with do |redis|
redis.del(redis_key)
end
end
it 'sets target_id and last usage attributes to zero' do
expect(cursor.attributes).to eq({
target_id: 0,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
end
describe '#interrupt!' do
it 'updates last usage attributes and sets interrupted? to true' do
expect(cursor.interrupted?).to eq(false)
cursor.interrupt!(
last_used_by_project_id: initial_redis_attributes[:last_used_by_project_id] + 1,
last_usage_count: initial_redis_attributes[:last_usage_count] + 1
)
expect(cursor.interrupted?).to eq(true)
expect(cursor.attributes).to eq({
target_id: initial_redis_attributes[:target_id],
usage_window: usage_window,
last_used_by_project_id: initial_redis_attributes[:last_used_by_project_id] + 1,
last_usage_count: initial_redis_attributes[:last_usage_count] + 1
})
end
end
describe '#target_id=(target_id)' do
context 'when new target_id is different from cursor target_id' do
it 'sets new target_id and resets last usage attributes' do
cursor.target_id = initial_redis_attributes[:target_id] + 1
expect(cursor.attributes).to eq({
target_id: initial_redis_attributes[:target_id] + 1,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
context 'when new target_id is the same as cursor target_id' do
it 'does not change cursor attributes' do
expect(cursor.attributes).to eq(initial_redis_attributes)
end
end
end
describe '#advance' do
context 'when cursor target_id is less than max_target_id' do
let(:max_target_id) { initial_redis_attributes[:target_id] + 100 }
it 'increments cursor target_id and resets last usage attributes' do
cursor.advance
expect(cursor.attributes).to eq({
target_id: initial_redis_attributes[:target_id] + 1,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
context 'when cursor target_id is equal to or greater than max_target_id' do
it 'resets cursor target_id and last usage attributes' do
cursor.advance
expect(cursor.attributes).to eq({
target_id: 0,
usage_window: usage_window,
last_used_by_project_id: 0,
last_usage_count: 0
})
end
end
end
describe '#max_target_id' do
let(:target_scope) { Ci::Catalog::Resource }
before_all do
create(:ci_catalog_resource, id: 123)
create(:ci_catalog_resource, id: 100)
end
it 'returns maximum ID of the target scope' do
expect(cursor.max_target_id).to eq(123)
end
end
describe '#save!' do
it 'saves cursor attributes to Redis as JSON' do
cursor.target_id = 11
cursor.interrupt!(
last_used_by_project_id: 33,
last_usage_count: 22
)
cursor.save!
data = Gitlab::Redis::SharedState.with { |redis| redis.get(redis_key) }
expect(data).to eq('{"target_id":11,"usage_window":{"start_date":"2024-01-08","end_date":"2024-01-14"},' \
'"last_used_by_project_id":33,"last_usage_count":22}')
end
end
end