Add latest changes from gitlab-org/gitlab@master

This commit is contained in:
GitLab Bot 2023-12-21 12:15:58 +00:00
parent 4ecd816dcb
commit 2779809e45
52 changed files with 1492 additions and 211 deletions

View File

@ -1223,7 +1223,6 @@ rspec-ee system pg15 es8:
extends:
- .rails:rules:rspec fail-fast
stage: test
needs: ["setup-test-env", "retrieve-tests-metadata", "compile-test-assets", "detect-tests"]
script:
- !reference [.base-script, script]
- rspec_fail_fast "${MATCHING_TESTS_PATH}" "--fail-fast=${RSPEC_FAIL_FAST_THRESHOLD} --tag ~quarantine --tag ~zoekt --tag ~click_house"
@ -1298,7 +1297,10 @@ fail-pipeline-early:
extends:
- .rails:rules:rerun-previous-failed-tests
stage: test
needs: ["setup-test-env", "retrieve-tests-metadata", "compile-test-assets", "detect-previous-failed-tests"]
needs:
- !reference [.rspec-base-needs, needs]
- job: "compile-test-assets"
- job: "detect-previous-failed-tests"
script:
- !reference [.base-script, script]
- rspec_rerun_previous_failed_tests "${PREVIOUS_FAILED_TESTS_FILE}"

View File

@ -56,10 +56,18 @@ include:
variables:
GITALY_PRAEFECT_WITH_DB: '1'
.rspec-base-needs:
needs:
- job: "clone-gitlab-repo"
optional: true # Optional so easier to switch in between
- job: "setup-test-env"
- job: "retrieve-tests-metadata"
.rspec-base:
extends:
- .rails-job-base
- .base-artifacts
# - .repo-from-artifacts # Comment this to clone instead of using artifacts
stage: test
variables:
RUBY_GC_MALLOC_LIMIT: 67108864
@ -69,8 +77,7 @@ include:
SUCCESSFULLY_RETRIED_TEST_EXIT_CODE: 137
EVENT_PROF: "sql.active_record"
needs:
- job: "setup-test-env"
- job: "retrieve-tests-metadata"
- !reference [.rspec-base-needs, needs]
- job: "compile-test-assets"
- job: "detect-tests"
optional: true
@ -163,8 +170,7 @@ include:
- .as-if-foss
- .use-pg14
needs:
- job: "setup-test-env"
- job: "retrieve-tests-metadata"
- !reference [.rspec-base-needs, needs]
- job: "compile-test-assets as-if-foss"
- job: "detect-tests"
optional: true

View File

@ -43,6 +43,28 @@ dont-interrupt-me:
script:
- echo "This jobs makes sure this pipeline won't be interrupted! See https://docs.gitlab.com/ee/ci/yaml/#interruptible."
clone-gitlab-repo:
extends:
- .absolutely-predictive-job
rules: [when: never] # Uncomment this to clone instead of using artifacts
stage: sync
script:
- echo OK
variables:
GIT_STRATEGY: clone
artifacts:
paths:
- '*'
expire_in: '12 hours'
.repo-from-artifacts:
variables:
GIT_STRATEGY: none
needs:
# If the job extending this also defines `needs`, make sure to update
# its `needs` to include `clone-gitlab-repo` because it'll be overridden.
- clone-gitlab-repo
gitlab_git_test:
extends:
- .predictive-job

View File

@ -3206,7 +3206,6 @@ RSpec/FeatureCategory:
- 'spec/lib/gitlab/database/migration_helpers/announce_database_spec.rb'
- 'spec/lib/gitlab/database/migration_helpers/cascading_namespace_settings_spec.rb'
- 'spec/lib/gitlab/database/migration_helpers/loose_foreign_key_helpers_spec.rb'
- 'spec/lib/gitlab/database/migration_helpers/v2_spec.rb'
- 'spec/lib/gitlab/database/migration_spec.rb'
- 'spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb'
- 'spec/lib/gitlab/database/migrations/base_background_runner_spec.rb'

View File

@ -2,14 +2,18 @@
module Ci
class PipelineChatData < Ci::ApplicationRecord
include Ci::Partitionable
include Ci::NamespacedModelName
self.table_name = 'ci_pipeline_chat_data'
belongs_to :chat_name
belongs_to :pipeline
validates :pipeline_id, presence: true
validates :chat_name_id, presence: true
validates :response_url, presence: true
partitionable scope: :pipeline
end
end

View File

@ -7,9 +7,6 @@ module Ci
include Ci::HasStatus
include Gitlab::OptimisticLocking
include Presentable
include IgnorableColumns
ignore_column :pipeline_id_convert_to_bigint, remove_with: '16.6', remove_after: '2023-10-22'
partitionable scope: :pipeline

View File

@ -21,6 +21,7 @@ module Ci
Ci::PendingBuild
Ci::RunningBuild
Ci::RunnerManagerBuild
Ci::PipelineChatData
Ci::PipelineVariable
Ci::Sources::Pipeline
Ci::Stage

View File

@ -3,6 +3,6 @@ name: telesign_intelligence
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/137739
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/432757
milestone: '16.7'
type: development
type: ops
group: group::anti-abuse
default_enabled: false
default_enabled: true

View File

@ -0,0 +1,9 @@
---
migration_job_name: BackfillPartitionIdCiPipelineChatData
description: Fixes incorrect values for ci pipeline chat data being in the wrong partition
feature_category: continuous_integration
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/139515
milestone: '16.8'
queued_migration_version: 20231218092401
finalize_after: '2023-12-23'
finalized_by: # version of the migration that finalized this BBM

View File

@ -0,0 +1,10 @@
# frozen_string_literal: true
class AddPartitionIdToPipelineChatData < Gitlab::Database::Migration[2.2]
milestone '16.8'
enable_lock_retries!
def change
add_column(:ci_pipeline_chat_data, :partition_id, :bigint, default: 100, null: false)
end
end

View File

@ -0,0 +1,26 @@
# frozen_string_literal: true
class QueueBackfillPartitionIdCiPipelineChatData < Gitlab::Database::Migration[2.2]
milestone '16.8'
restrict_gitlab_migration gitlab_schema: :gitlab_ci
MIGRATION = 'BackfillPartitionIdCiPipelineChatData'
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 1000
SUB_BATCH_SIZE = 100
def up
queue_batched_background_migration(
MIGRATION,
:ci_pipeline_chat_data,
:id,
job_interval: DELAY_INTERVAL,
batch_size: BATCH_SIZE,
sub_batch_size: SUB_BATCH_SIZE
)
end
def down
delete_batched_background_migration(MIGRATION, :ci_pipeline_chat_data, :id, [])
end
end

View File

@ -0,0 +1,54 @@
# frozen_string_literal: true
class AddFkToCiBuildTraceMetadataOnPartitionIdAndTraceArtifactId < Gitlab::Database::Migration[2.2]
include Gitlab::Database::MigrationHelpers::WraparoundAutovacuum
milestone '16.8'
disable_ddl_transaction!
SOURCE_TABLE_NAME = :ci_build_trace_metadata
TARGET_TABLE_NAME = :ci_job_artifacts
COLUMN = :trace_artifact_id
TARGET_COLUMN = :id
FK_NAME = :fk_21d25cac1a_p
PARTITION_COLUMN = :partition_id
def up
return unless should_run?
add_concurrent_foreign_key(
SOURCE_TABLE_NAME,
TARGET_TABLE_NAME,
column: [PARTITION_COLUMN, COLUMN],
target_column: [PARTITION_COLUMN, TARGET_COLUMN],
validate: false,
reverse_lock_order: true,
on_update: :cascade,
on_delete: :cascade,
name: FK_NAME
)
prepare_async_foreign_key_validation(SOURCE_TABLE_NAME, name: FK_NAME)
end
def down
return unless should_run?
unprepare_async_foreign_key_validation(SOURCE_TABLE_NAME, name: FK_NAME)
with_lock_retries do
remove_foreign_key_if_exists(
SOURCE_TABLE_NAME,
TARGET_TABLE_NAME,
name: FK_NAME,
reverse_lock_order: true
)
end
end
private
def should_run?
can_execute_on?(TARGET_TABLE_NAME)
end
end

View File

@ -0,0 +1,54 @@
# frozen_string_literal: true
class AddFkToCiJobArtifactStatesOnPartitionIdAndJobArtifactId < Gitlab::Database::Migration[2.2]
include Gitlab::Database::MigrationHelpers::WraparoundAutovacuum
milestone '16.8'
disable_ddl_transaction!
SOURCE_TABLE_NAME = :ci_job_artifact_states
TARGET_TABLE_NAME = :ci_job_artifacts
COLUMN = :job_artifact_id
TARGET_COLUMN = :id
FK_NAME = :fk_rails_80a9cba3b2_p
PARTITION_COLUMN = :partition_id
def up
return unless should_run?
add_concurrent_foreign_key(
SOURCE_TABLE_NAME,
TARGET_TABLE_NAME,
column: [PARTITION_COLUMN, COLUMN],
target_column: [PARTITION_COLUMN, TARGET_COLUMN],
validate: false,
reverse_lock_order: true,
on_update: :cascade,
on_delete: :cascade,
name: FK_NAME
)
prepare_async_foreign_key_validation(SOURCE_TABLE_NAME, name: FK_NAME)
end
def down
return unless should_run?
unprepare_async_foreign_key_validation(SOURCE_TABLE_NAME, name: FK_NAME)
with_lock_retries do
remove_foreign_key_if_exists(
SOURCE_TABLE_NAME,
TARGET_TABLE_NAME,
name: FK_NAME,
reverse_lock_order: true
)
end
end
private
def should_run?
can_execute_on?(TARGET_TABLE_NAME)
end
end

View File

@ -0,0 +1 @@
86f8e4e111f42d4b8def51c30dc6ca7be035766b30abd2c0c3f2323b4e501cf7

View File

@ -0,0 +1 @@
8e62da6b0dfd415c462df2bd6ae62826dcfe440eb71e4fc9ad1cb216084f1141

View File

@ -0,0 +1 @@
2350d314c570dc9c3264376293b81fa41c3340fce2a4e5e1ea149bc205b408e7

View File

@ -0,0 +1 @@
e724aa0b32b88a291d2683612ba480977b7270a7c0552d677e7fc13ea829a669

View File

@ -14286,7 +14286,8 @@ CREATE TABLE ci_pipeline_chat_data (
id bigint NOT NULL,
chat_name_id integer NOT NULL,
response_url text NOT NULL,
pipeline_id bigint NOT NULL
pipeline_id bigint NOT NULL,
partition_id bigint DEFAULT 100 NOT NULL
);
CREATE SEQUENCE ci_pipeline_chat_data_id_seq
@ -37376,6 +37377,9 @@ ALTER TABLE ONLY namespace_settings
ALTER TABLE ONLY ci_build_trace_metadata
ADD CONSTRAINT fk_21d25cac1a FOREIGN KEY (trace_artifact_id) REFERENCES ci_job_artifacts(id) ON DELETE CASCADE;
ALTER TABLE ONLY ci_build_trace_metadata
ADD CONSTRAINT fk_21d25cac1a_p FOREIGN KEY (partition_id, trace_artifact_id) REFERENCES ci_job_artifacts(partition_id, id) ON UPDATE CASCADE ON DELETE CASCADE NOT VALID;
ALTER TABLE ONLY users_star_projects
ADD CONSTRAINT fk_22cd27ddfc FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
@ -39302,6 +39306,9 @@ ALTER TABLE ONLY dependency_proxy_manifest_states
ALTER TABLE ONLY ci_job_artifact_states
ADD CONSTRAINT fk_rails_80a9cba3b2 FOREIGN KEY (job_artifact_id) REFERENCES ci_job_artifacts(id) ON DELETE CASCADE;
ALTER TABLE ONLY ci_job_artifact_states
ADD CONSTRAINT fk_rails_80a9cba3b2_p FOREIGN KEY (partition_id, job_artifact_id) REFERENCES ci_job_artifacts(partition_id, id) ON UPDATE CASCADE ON DELETE CASCADE NOT VALID;
ALTER TABLE ONLY approval_merge_request_rules_users
ADD CONSTRAINT fk_rails_80e6801803 FOREIGN KEY (approval_merge_request_rule_id) REFERENCES approval_merge_request_rules(id) ON DELETE CASCADE;

View File

@ -811,7 +811,7 @@ GraphQL queries are recorded in the file. For example:
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/133371) in GitLab 16.5.
The `clickhouse.log` file logs information related to the
ClickHouse database client in GitLab.
[ClickHouse database client](../../integration/clickhouse.md) in GitLab.
## `migrations.log`

View File

@ -823,6 +823,7 @@ Parameters:
| `auto_devops_enabled` | boolean | no | Default to Auto DevOps pipeline for all projects within this group. |
| `avatar` | mixed | no | Image file for avatar of the group. [Introduced in GitLab 12.9](https://gitlab.com/gitlab-org/gitlab/-/issues/36681) |
| `default_branch_protection` | integer | no | See [Options for `default_branch_protection`](#options-for-default_branch_protection). Default to the global level default branch protection setting. |
| `default_branch_protection_defaults` | hash | no | See [Options for `default_branch_protection_defaults`](#options-for-default_branch_protection_defaults). |
| `description` | string | no | The group's description. |
| `emails_disabled` | boolean | no | _([Deprecated](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/127899) in GitLab 16.5.)_ Disable email notifications. Use `emails_enabled` instead. |
| `emails_enabled` | boolean | no | Enable email notifications. |
@ -836,7 +837,7 @@ Parameters:
| `subgroup_creation_level` | string | no | Allowed to [create subgroups](../user/group/subgroups/index.md#create-a-subgroup). Can be `owner` (Owners), or `maintainer` (users with the Maintainer role). |
| `two_factor_grace_period` | integer | no | Time before Two-factor authentication is enforced (in hours). |
| `visibility` | string | no | The group's visibility. Can be `private`, `internal`, or `public`. |
| `membership_lock` **(PREMIUM ALL)** | boolean | no | Users cannot be added to projects in this group. |
| `membership_lock` **(PREMIUM ALL)** | boolean | no | Users cannot be added to projects in this group. |
| `extra_shared_runners_minutes_limit` **(PREMIUM SELF)** | integer | no | Can be set by administrators only. Additional compute minutes for this group. |
| `shared_runners_minutes_limit` **(PREMIUM SELF)** | integer | no | Can be set by administrators only. Maximum number of monthly compute minutes for this group. Can be `nil` (default; inherit system default), `0` (unlimited), or `> 0`. |
| `wiki_access_level` **(PREMIUM ALL)** | string | no | The wiki access level. Can be `disabled`, `private`, or `enabled`. |
@ -853,6 +854,18 @@ The `default_branch_protection` attribute determines whether users with the Deve
| `3` | Protected against pushes. Users with the Maintainer role can: <br>- Push new commits<br>- Force push changes<br>- Accept merge requests<br>Users with the Developer role can:<br>- Accept merge requests|
| `4` | Full protection after initial push. User with the Developer role can: <br>- Push commit to empty repository.<br> Users with the Maintainer role can: <br>- Push new commits<br>- Accept merge requests|
### Options for `default_branch_protection_defaults`
The `default_branch_protection_defaults` attribute describes the default branch
protection defaults. All parameters are optional.
| Key | Type | Description |
|------------------------------|---------|-----------------------------------------------------------------------------------------|
| `allowed_to_push` | array | An array of access levels allowed to push. Supports Developer (30) or Maintainer (40). |
| `allow_force_push` | boolean | Allow force push for all users with push access. |
| `allowed_to_merge` | array | An array of access levels allowed to merge. Supports Developer (30) or Maintainer (40). |
| `developer_can_initial_push` | boolean | Allow developers to initial push. |
## New Subgroup
This is similar to creating a [New group](#new-group). You need the `parent_id` from the [List groups](#list-groups) call. You can then enter the desired:
@ -981,6 +994,7 @@ PUT /groups/:id
| `auto_devops_enabled` | boolean | no | Default to Auto DevOps pipeline for all projects within this group. |
| `avatar` | mixed | no | Image file for avatar of the group. [Introduced in GitLab 12.9](https://gitlab.com/gitlab-org/gitlab/-/issues/36681) |
| `default_branch_protection` | integer | no | See [Options for `default_branch_protection`](#options-for-default_branch_protection). |
| `default_branch_protection_defaults` | hash | no | See [Options for `default_branch_protection_defaults`](#options-for-default_branch_protection_defaults). |
| `description` | string | no | The description of the group. |
| `emails_disabled` | boolean | no | _([Deprecated](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/127899) in GitLab 16.5.)_ Disable email notifications. Use `emails_enabled` instead. |
| `emails_enabled` | boolean | no | Enable email notifications. |

View File

@ -1763,6 +1763,7 @@ Supported attributes:
| `feature_flags_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |
| `infrastructure_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |
| `monitor_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |
| `model_experiments_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |
| `model_registry_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |
| `remove_source_branch_after_merge` | boolean | No | Enable `Delete source branch` option by default for all new merge requests. |
| `repository_access_level` | string | No | One of `disabled`, `private`, or `enabled`. |

View File

@ -305,8 +305,7 @@ It is expected that initial iterations will be rather slow, because they require
The Cells architecture has long lasting implications to data processing, location, scalability and the GitLab architecture.
This section links all different technical proposals that are being evaluated.
- [Stateless Router That Uses a Cache to Pick Cell and Is Redirected When Wrong Cell Is Reached](proposal-stateless-router-with-buffering-requests.md)
- [Stateless Router That Uses a Cache to Pick Cell and pre-flight `/api/v4/internal/cells/learn`](proposal-stateless-router-with-routes-learning.md)
- [Routing Service](routing-service.md)
## Impacted features

View File

@ -13,7 +13,7 @@ contrast this with alternatives before deciding which approach to implement.
This documentation will be kept even if we decide not to implement this so that
we can document the reasons for not choosing this approach.
# Proposal: Stateless Router
# Proposal: Stateless Router using Requests Buffering
We will decompose `gitlab_users`, `gitlab_routes` and `gitlab_admin` related
tables so that they can be shared between all cells and allow any cell to

View File

@ -13,7 +13,7 @@ contrast this with alternatives before deciding which approach to implement.
This documentation will be kept even if we decide not to implement this so that
we can document the reasons for not choosing this approach.
# Proposal: Stateless Router
# Proposal: Stateless Router using Routes Learning
We will decompose `gitlab_users`, `gitlab_routes` and `gitlab_admin` related
tables so that they can be shared between all cells and allow any cell to
@ -35,7 +35,7 @@ Organization can only be on a single Cell.
## Differences
The main difference between this proposal and one [with buffering requests](proposal-stateless-router-with-buffering-requests.md)
is that this proposal uses a pre-flight API request (`/pi/v4/internal/cells/learn`) to redirect the request body to the correct Cell.
is that this proposal uses a pre-flight API request (`/api/v4/internal/cells/learn`) to redirect the request body to the correct Cell.
This means that each request is sent exactly once to be processed, but the URI is used to decode which Cell it should be directed.
## Summary in diagrams

View File

@ -59,22 +59,23 @@ For example:
## Requirements
| Requirement | Description | Priority |
|--------------------------------------------|-------------------------------------------------------------------|----------|
| Discovery | needs to be able to discover and monitor the health of all Cells. | high |
| Security | only authorized cells can be routed to | high |
| Single domain | for example GitLab.com | high |
| Caching | can cache routing information for performance | high |
| [50 ms of increased latency](#low-latency) | | high |
| Path-based | can make routing decision based on path | high |
| Complexity | the routing service should be configuration-driven and small | high |
| Feature Flags | features can be turned on, off, and % rollout | high |
| Progressive Rollout | We can slowly rollout a change | medium |
| Stateless | does not need database, Cells provide all routing information | medium |
| Secrets-based | can make routing decision based on secret (for example JWT) | medium |
| Observability | can use existing observability tooling | low |
| Self-managed | can be eventually used by [self-managed](goals.md#self-managed) | low |
| Regional | can route requests to different [regions](goals.md#regions) | low |
| Requirement | Description | Priority |
| ------------------- | ----------------------------------------------------------------- | -------- |
| Discovery | needs to be able to discover and monitor the health of all Cells. | high |
| Security | only authorized cells can be routed to | high |
| Single domain | for example GitLab.com | high |
| Caching | can cache routing information for performance | high |
| Low latency | [50 ms of increased latency](#low-latency) | high |
| Path-based | can make routing decision based on path | high |
| Complexity | the routing service should be configuration-driven and small | high |
| Rolling | the routing service works with Cells running mixed versions | high |
| Feature Flags | features can be turned on, off, and % rollout | high |
| Progressive Rollout | we can slowly rollout a change | medium |
| Stateless | does not need database, Cells provide all routing information | medium |
| Secrets-based | can make routing decision based on secret (for example JWT) | medium |
| Observability | can use existing observability tooling | low |
| Self-managed | can be eventually used by [self-managed](goals.md#self-managed) | low |
| Regional | can route requests to different [regions](goals.md#regions) | low |
### Low Latency
@ -93,7 +94,7 @@ The main SLI we use is the [rails requests](../../../development/application_sli
It has multiple `satisfied` targets (apdex) depending on the [request urgency](../../../development/application_slis/rails_request.md#how-to-adjust-the-urgency):
| Urgency | Duration in ms |
|------------|----------------|
| ---------- | -------------- |
| `:high` | 250 _ms_ |
| `:medium` | 500 _ms_ |
| `:default` | 1000 _ms_ |
@ -110,7 +111,7 @@ The way we calculate the headroom we have is by using the following:
**`web`**:
| Target Duration | Percentile | Headroom |
|-----------------|------------|-----------|
| --------------- | ---------- | --------- |
| 5000 _ms_ | p99 | 4000 _ms_ |
| 5000 _ms_ | p95 | 4500 _ms_ |
| 5000 _ms_ | p90 | 4600 _ms_ |
@ -133,7 +134,7 @@ _Analysis was done in <https://gitlab.com/gitlab-org/gitlab/-/issues/432934#note
**`api`**:
| Target Duration | Percentile | Headroom |
|-----------------|------------|-----------|
| --------------- | ---------- | --------- |
| 5000 _ms_ | p99 | 3500 _ms_ |
| 5000 _ms_ | p95 | 4300 _ms_ |
| 5000 _ms_ | p90 | 4600 _ms_ |
@ -156,7 +157,7 @@ _Analysis was done in <https://gitlab.com/gitlab-org/gitlab/-/issues/432934#note
**`git`**:
| Target Duration | Percentile | Headroom |
|-----------------|------------|-----------|
| --------------- | ---------- | --------- |
| 5000 _ms_ | p99 | 3760 _ms_ |
| 5000 _ms_ | p95 | 4280 _ms_ |
| 5000 _ms_ | p90 | 4430 _ms_ |
@ -182,7 +183,585 @@ Not yet defined.
## Proposal
TBD
The Routing Service implements the following design guidelines:
1. Simple:
- Routing service does not buffer requests.
- Routing service can only proxy to a single Cell based on request headers.
1. Stateless:
- Routing service does not have permanent storage.
- Routing service uses multi-level cache: in-memory, external shared cache.
1. Zero-trust:
- Routing service signs each request that is being proxied.
- The trust is established by using JWT token, or mutual authentication scheme.
- Cells can be available over public internet, as long as they follow the zero-trust model.
1. Configuration-based:
- Routing service is configured with a static list of Cells.
- Routing service configuration is applied as part of service deployment.
1. Rule-based:
- Routing service is deployed with a routing rules gathered from all Cells.
- Routing service does support rules lists generated by different versions of GitLab.
- rules allows to match by any criteria: header, content of the header, or route path.
1. Agnostic:
- Routing service is not aware of high-level concepts like organizations.
- The classification is done per-specification provided in a rules, to find the sharding key.
- The sharding key result is cached.
- The single sharding key cached is used to handle many similar requests.
The following diagram shows how a user request routes through DNS to the Routing Service deployed
as Cloudflare Worker and the router chooses a cell to send the request to.
```mermaid
graph TD;
user((User));
router[Routing Service];
cell_us0{Cell US0};
cell_us1{Cell US1};
cell_eu0{Cell EU0};
cell_eu1{Cell EU1};
user-->router;
router-->cell_eu0;
router-->cell_eu1;
router-->cell_us0;
router-->cell_us1;
subgraph Europe
cell_eu0;
cell_eu1;
end
subgraph United States
cell_us0;
cell_us1;
end
```
### Routing rules
Each Cell will publish a precompiled list of routing rules that will be consumed by the Routing Service:
- The routing rules describe how to decode the request, find the sharding key, and make the routing decision.
- The routing rules are compiled during the deployment of the Routing Service.
- The deployment process fetches latest version of the routing rules from each Cell
that is part of Routing Service configuration.
- The compilation process merges the routing rules from all Cells.
- The conflicting rules prevent routing service from being compiled / started.
- Each routing rule entry has a unique identifier to ease the merge.
- The Routing Service would be re-deployed only if the list of rules was changed,
which shouldn't happen frequently, because we expect the majority of newly added endpoints
to already adhere to the prior route rules.
- The configuration describes from which Cells the routing rules need to be fetched during deploy.
- The published routing rules might make routing decision based on the secret. For example, if the session cookie
or authentication token has prefix `c100-` all requests are to be forwarded to the given Cell.
- The Cell does publish routing rules at `/api/v4/internal/cells/route_rules.json`.
- The rules published by Cell only include endpoints that the particular Cell can process.
- The Cell might request to perform dynamic classification based on sharding key, by configuring
routing rules to call `/api/v4/internal/cells/classify`.
- The routing rules should use `prefix` as a way to speed up classification. During the compilation phase
the routing service transforms all found prefixes into a decision tree to speed up any subsequent regex matches.
- The routing rules is ideally compiled into source code to avoid expensive parsing and evaluation of the rules
dynamically as part of deployment.
The routing rules JSON structure describes all matchers:
```json
{
"rules": [
{
"id": "<unique-identifier>",
"cookies": {
"<cookie_name>": {
"prefix": "<match-given-prefix>",
"match_regex": "<regex_match>"
},
"<cookie_name2>": {
"prefix": "<match-given-prefix>",
"match_regex": "<regex_match>"
}
},
"headers": {
"<header_name>": {
"prefix": "<match-given-prefix>",
"match_regex": "<regex_match>"
},
"<header_name2>": {
"prefix": "<match-given-prefix>",
"match_regex": "<regex_match>"
},
},
"path": {
"prefix": "<match-given-prefix>",
"match_regex": "<regex_match>"
},
"method": ["<list_of_accepted_methods>"],
// If many rules are matched, define which one wins
"priority": 1000,
// Accept request and proxy to the Cell in question
"action": "proxy",
// Classify request based on regex matching groups
"action": "classify",
"classify": {
"keys": ["list_of_regex_match_capture_groups"]
}
}
]
}
```
Example of the routing rules published by the Cell 100 that makes routing decision based session cookie, and secret.
The high priority is assigned since the routing rules is secret-based, and should take precedence before all other matchers:
```json
{
"rules": [
{
"id": "t4mkd5ndsk58si6uwwz7rdavil9m2hpq",
"cookies": {
"_gitlab_session": {
"prefix": "c100-" // accept `_gitlab_session` that are prefixed with `c100-`
}
},
"action": "proxy",
"priority": 1000
},
{
"id": "jcshae4d4dtykt8byd6zw1ecccl5dkts",
"headers": {
"GITLAB_TOKEN": {
"prefix": "C100_" // accept `GITLAB_TOKEN` that are prefixed with `C100_`
}
},
"action": "proxy",
"priority": 1000
}
]
}
```
Example of the routing rules published by all Cells that makes routing decision based on the path:
```json
{
"rules": [
{
"id": "c9scvaiwj51a75kzoh917uwtnw8z4ebl",
"path": {
"prefix": "/api/v4/projects/", // speed-up rule matching
"match_regex": "^/api/v4/projects/(?<project_id_or_path_encoded>[^/]+)(/.*)?$"
},
"action": "classify",
"classify": {
"keys": ["project_id_or_path_encoded"]
}
}
]
}
```
### Classification
Each Cell does implement classification endpoint:
- The classification endpoint is at `/api/v4/internal/cells/classify` (or gRPC endpoint).
- The classification endpoint accepts a list of the sharding keys. Sharding keys are decoded from request,
based on the routing rules provided by the Cell.
- The endpoint returns other equivalent sharding keys to pollute cache for similar requests.
This is to ensure that all similar requests can be handled quickly without having to classify each time.
- Routing Service tracks the health of Cells, and issues a `classify` request to Cells based on weights,
health of the Cell, or other defined criteria. Weights would indicate which Cell is preferred to perform the
classification of sharding keys.
- Routing Service retries the `classify` call for a reasonable amount of time.
The repetitive failure of Cell to `classify` is indicative of Cell being unhealthy.
- The `classify` result is cached regardless of returned `action` (proxy or reject).
The rejected classification is cached to prevent excessive amount of
requests for sharding keys that are not found.
- The cached response is for time defined by `expiry` and `refresh`.
- The `expiry` defines when the item is removed from cache unless used.
- The `refresh` defines when the item needs to be reclassified if used.
- The refresh is done asynchronously as the request should be served without a delay if they were classified. The refresh is done to ensure that cache is always hot and up-to date.
For the above example:
1. The router sees request to `/api/v4/projects/1000/issues`.
1. It selects the above `rule` for this request, which requests `classify` for `project_id_or_path_encoded`.
1. It decodes `project_id_or_path_encoded` to be `1000`.
1. Checks the cache if there's `project_id_or_path_encoded=1000` associated to any Cell.
1. Sends the request to `/api/v4/internal/cells/classify` if no Cells was found in cache.
1. Rails responds with the Cell holding the given project, and also all other equivalent sharding keys
for the resource that should be put in the cache.
1. Routing Service caches for the duration specified in configuration, or response.
```json
# POST /api/v4/internal/cells/classify
## Request:
{
"metadata": {
"rule_id": "c9scvaiwj51a75kzoh917uwtnw8z4ebl",
"headers": {
"all_request_headers": "value"
},
"method": "GET",
"path": "/api/v4/projects/100/issues"
},
"keys": {
"project_id_or_path_encoded": 100
}
}
## Response:
{
"action": "proxy",
"proxy": {
"name": "cell_1",
"url": "https://cell1.gitlab.com"
},
"ttl": "10 minutes",
"matched_keys": [ // list of all equivalent keys that should be put in the cache
{ "project_id_or_path_encoded": 100 },
{ "project_id_or_path_encoded": "gitlab-org%2Fgitlab" },
{ "project_full_path": "gitlab-org/gitlab" },
{ "namespace_full_path": "gitlab-org" },
{ "namespace_id": 10 },
{ "organization_full_path": "gitlab-inc" },
{ "organization_id": 50 },
]
}
```
The following code represents a negative response when a sharding key was not found:
```json
# POST /api/v4/internal/cells/classify
## Request:
{
"metadata": {
"rule_id": "c9scvaiwj51a75kzoh917uwtnw8z4ebl",
"headers": {
"all_request_headers": "value"
},
"method": "GET",
"path": "/api/v4/projects/100/issues"
},
"keys": {
"project_id_or_path_encoded": 100
}
}
## Response:
{
"action": "reject",
"reject": {
"http_status": 404
},
"cache": {
"refresh": "10 minutes",
"expiry": "10 minutes"
},
"matched_keys": [ // list of all equivalent keys that should be put in the cache
{ "project_id_or_path_encoded": 100 },
]
}
```
### Configuration
The Routing Service will use the configuration similar to this:
```toml
[[cells]]
name=cell_1
url=https://cell1.gitlab.com
key=ABC123
classify_weight=100
[[cells]]
name=cell_2
url=https://cell2.gitlab.com
key=CDE123
classify_weight=1
[cache.memory.classify]
refresh_time=10 minutes
expiry_time=1 hour
[cache.external.classify]
refresh_time=30 minutes
expiry_time=6 hour
```
We assume that this is acceptable to provide a static list of Cells, because:
1. Static: Cells provisioned are unlikely to be dynamically provisioned and decommissioned.
1. Good enough: We can manage such list even up to 100 Cells.
1. Simple: We don't have to implement robust service discovery in the service,
and we have guarantee that this list is always exhaustive.
The configuration describes all Cells, URLs, zero-trust keys, and weights,
and how long requests should be cached. The `classify_weight` defines how often
the Cell should receive classification requests versus other Cells.
## Request flows
1. There are two Cells.
1. `gitlab-org` is a top-level namespace and lives in `Cell US0` in the `GitLab.com Public` organization.
1. `my-company` is a top-level namespace and lives in `Cell EU0` in the `my-organization` organization.
### Router configured to perform static routing
1. The Cell US0 supports all other public-facing projects.
1. The Cells is configured to generate all secrets and session cookies with a prefix like `eu0_` for Cell EU0.
1. The Personal Access Token is scoped to Organization, and because the Organization is part only of a single Cell,
the PATs generated are prefixed with Cell identifier.
1. The Session Cookie encodes Organization in-use, and because the Organization is part only of a single Cell,
the session cookie generated is prefixed with Cell identifier.
1. The Cell EU0 allows only private organizations, groups, and projects.
1. The Cell US0 is a target Cell for all requests unless explicitly prefixed.
Cell US0:
```json
{
"rules": [
{
"id": "tjh147se67wadjzum7onwqiad2b75uft",
"path": {
"prefix": "/"
},
"action": "proxy",
"priority": 1
}
]
}
```
Cell EU0:
```json
{
"rules": [
{
"id": "t4mkd5ndsk58si6uwwz7rdavil9m2hpq",
"cookies": {
"_gitlab_session": {
"prefix": "eu0_"
}
},
"path": {
"prefix": "/"
},
"action": "proxy",
"priority": 1000
},
{
"id": "jcshae4d4dtykt8byd6zw1ecccl5dkts",
"headers": {
"GITLAB_TOKEN": {
"prefix": "eu0_"
}
},
"path": {
"prefix": "/"
},
"action": "proxy",
"priority": 1000
}
]
}
```
#### Navigates to `/my-company/my-project` while logged in into Cell EU0
1. Because user switched the Organization to `my-company`, its session cookie is prefixed with `eu0_`.
1. User sends request `/my-company/my-project`, and because the cookie is prefixed with `eu0_` it is directed to Cell EU0.
1. `Cell EU0` returns the correct response.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cell_eu0 as Cell EU0
participant cell_eu1 as Cell EU1
user->>router: GET /my-company/my-project<br/>_gitlab_session=eu0_uwwz7rdavil9
router->>cell_eu0: GET /my-company/my-project
cell_eu0->>user: <h1>My Project...
```
#### Navigates to `/my-company/my-project` while not logged in
1. User visits `/my-company/my-project`, and because it does not have session cookie, the request is forwarded to `Cell US0`.
1. User signs in.
1. GitLab sees that user default organization is `my-company`, so it assigns session cookie with `eu0_` to indicate that
user is meant to interact with `my-company`.
1. User sends request to `/my-company/my-project` again, now with the session cookie that proxies to `Cell EU0`.
1. `Cell EU0` returns the correct response.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cell_us0 as Cell US0
participant cell_eu0 as Cell EU0
user->>router: GET /my-company/my-project
router->>cell_us0: GET /my-company/my-project
cell_us0->>user: HTTP 302 /users/sign_in?redirect=/my-company/my-project
user->>router: GET /users/sign_in?redirect=/my-company/my-project
router->>cell_us0: GET /users/sign_in?redirect=/my-company/my-project
cell_us0-->>user: <h1>Sign in...
user->>router: POST /users/sign_in?redirect=/my-company/my-project
router->>cell_us0: POST /users/sign_in?redirect=/my-company/my-project
cell_us0->>user: HTTP 302 /my-company/my-project<br/>_gitlab_session=eu0_uwwz7rdavil9
user->>router: GET /my-company/my-project<br/>_gitlab_session=eu0_uwwz7rdavil9
router->>cell_eu0: GET /my-company/my-project<br/>_gitlab_session=eu0_uwwz7rdavil9
cell_eu0->>user: <h1>My Project...
```
#### Navigates to `/gitlab-org/gitlab` after last step
User visits `/my-company/my-project`, and because it does not have a session cookie, the request is forwarded to `Cell US0`.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cell_eu0 as Cell EU0
participant cell_us0 as Cell US0
user->>router: GET /gitlab-org/gitlab<br/>_gitlab_session=eu0_uwwz7rdavil9
router->>cell_eu0: GET /gitlab-org/gitlab
cell_eu0->>user: HTTP 404
```
### Router configured to perform dynamic routing based on classification
The Cells publish route rules that allows to classify the requests.
Cell US0 and EU0:
```json
{
"rules": [
{
"id": "tjh147se67wadjzum7onwqiad2b75uft",
"path": {
"prefix": "/",
"regex": "^/(?top_level_group)[^/]+(/.*)?$",
},
"action": "classify",
"classify": {
"keys": ["top_level_group"]
}
},
{
"id": "jcshae4d4dtykt8byd6zw1ecccl5dkts",
"path": {
"prefix": "/"
},
"action": "proxy"
}
]
}
```
#### Navigates to `/my-company/my-project` while logged in into Cell EU0
1. The `/my-company/my-project/` is visited.
1. Router decodes sharding key `top_level_group=my-company`.
1. Router checks if this sharding key is cached.
1. Because it is not, the classification request is sent to a random Cell to `/classify`.
1. The response of classify is cached.
1. The request is then proxied to Cell returned by classification.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cache as Cache
participant cell_us0 as Cell US0
participant cell_eu0 as Cell EU0
user->>router: GET /my-company/my-project
router->>cache: CACHE_GET: top_level_group=my-company
cache->>router: CACHE_NOT_FOUND
router->>cell_us0: POST /api/v4/internal/cells/classify<br/>top_level_group=my-company
cell_us0->>router: CLASSIFY: top_level_group=my-company, cell=cell_eu0
router->>cache: CACHE_SET: top_level_group=my-company, cell=cell_eu0
router->>cell_eu0: GET /my-company/my-project
cell_eu0->>user: <h1>My Project...
```
#### Navigates to `/my-company/my-project` while not logged in
1. The `/my-company/my-project/` is visited.
1. Router decodes sharding key `top_level_group=my-company`.
1. Router checks if this sharding key is cached.
1. Because it is not, the classification request is sent to a random Cell to `/classify`.
1. The response of `classify` is cached.
1. The request is then proxied to Cell returned by classification.
1. Because project is private, user is redirected to sign in.
1. The sign-in since is defined to be handled by all Cells, so it is proxied to a random Cell.
1. User visits the `/my-company/my-project/` again after logging in.
1. The `top_level_group=my-company` is proxied to the correct Cell.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cache as Cache
participant cell_us0 as Cell US0
participant cell_eu0 as Cell EU0
user->>router: GET /my-company/my-project
router->>cache: CACHE_GET: top_level_group=my-company
cache->>router: CACHE_NOT_FOUND
router->>cell_us0: POST /api/v4/internal/cells/classify<br/>top_level_group=my-company
cell_us0->>router: CLASSIFY: top_level_group=my-company, cell=cell_eu0
router->>cache: CACHE_SET: top_level_group=my-company, cell=cell_eu0
router->>cell_eu0: GET /my-company/my-project
cell_eu0->>user: HTTP 302 /users/sign_in?redirect=/my-company/my-project
user->>router: GET /users/sign_in?redirect=/my-company/my-project
router->>cell_us0: GET /users/sign_in?redirect=/my-company/my-project
cell_us0-->>user: <h1>Sign in...
user->>router: POST /users/sign_in?redirect=/my-company/my-project
router->>cell_eu0: POST /users/sign_in?redirect=/my-company/my-project
cell_eu0->>user: HTTP 302 /my-company/my-project
user->>router: GET /my-company/my-project
router->>cache: CACHE_GET: top_level_group=my-company
cache->>router: CACHE_FOUND: cell=cell_eu0
router->>cell_eu0: GET /my-company/my-project
cell_eu0->>user: <h1>My Project...
```
#### Navigates to `/gitlab-org/gitlab` after last step
1. Because the `/gitlab-org` is not found in cache, it will be classified and then directed to correct Cell.
```mermaid
sequenceDiagram
participant user as User
participant router as Router
participant cache as Cache
participant cell_us0 as Cell US0
participant cell_eu0 as Cell EU0
user->>router: GET /gitlab-org/gitlab
router->>cache: CACHE_GET: top_level_group=gitlab-org
cache->>router: CACHE_NOT_FOUND
router->>cell_us0: POST /api/v4/internal/cells/classify<br/>top_level_group=gitlab-org
cell_us0->>router: CLASSIFY: top_level_group=gitlab-org, cell=cell_us0
router->>cache: CACHE_SET: top_level_group=gitlab-org, cell=cell_us0
router->>cell_us0: GET /gitlab-org/gitlab
cell_us0->>user: <h1>My Project...
```
### Performance and reliability considerations
- It is expected that each Cell can classify all sharding keys.
- Alternatively the classification could be done by Cluster-wide Data Provider
if it would own all data required to classify.
- The published routing rules allow to define static criteria, allowing to make routing decision
only on a secret. As a result, the Routing Service doesn't add any latency
for request processing, and superior resiliency.
- It is expected that there will be penalty when learning new sharding key. However,
it is expected that multi-layer cache should provide a very high cache-hit-ratio,
due to low cardinality of sharding key. The sharding key would effectively be mapped
into resource (organization, group, or project), and there's a finite amount of those.
## Technology
@ -190,7 +769,30 @@ TBD
## Alternatives
TBD
### Buffering requests
The [Stateless Router using Requests Buffering](proposal-stateless-router-with-buffering-requests.md)
describes an approach where Cell answers with `X-Gitlab-Cell-Redirect` to redirect request to another Cell:
- This is based on a need to buffer the whole request (headers + body) which is very memory intensive.
- This proposal does not provide an easy way to handle mixed deployment of Cells, where Cells might be running different versions.
- This proposal likely requires caching significantly more information, since it is based on requests, rather than on decoded sharding keys.
### Learn request
The [Stateless Router using Routes Learning](proposal-stateless-router-with-routes-learning.md)
describes an approach similar to the one in this document. Except the route rules and classification
is done in a single go in a form of pre-flight check `/api/v4/internal/cells/learn`:
- This makes the whole routes learning dynamic, and dependent on availability of the Cells.
- This proposal does not provide an easy way to handle mixed deployment of Cells, where Cells might be running different versions.
- This proposal likely requires caching significantly more information, since it is based on requests, rather than on decoded sharding keys.
## FAQ
1. How and when will Routing Service compile set of rules?
To be defined.
## Links

View File

@ -887,7 +887,8 @@ The following topics explain how to use keywords to configure CI/CD pipelines.
### `after_script`
Use `after_script` to define an array of commands that run after each job, including failed jobs.
Use `after_script` to define an array of commands that run after a job's `script` section, including failed jobs with failure type of `script_failure`.
`after_script` commands do not run after [other failure types](#retrywhen).
**Keyword type**: Job keyword. You can use it only as part of a job or in the
[`default` section](#default).

View File

@ -100,7 +100,7 @@ LIMIT 1
## Store merge request data in ClickHouse
Several other use cases exist for storing and querying merge request data in
ClickHouse. In this document, we focus on this particular feature.
[ClickHouse](../../../integration/clickhouse.md). In this document, we focus on this particular feature.
The core data exists in the `merge_request_metrics` and in the `merge_requests`
database tables. Some filters require extra tables to be joined:

View File

@ -8,7 +8,7 @@ info: >-
---
# Runner Fleet Dashboard **(ULTIMATE EXPERIMENT)**
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/424495) in GitLab 16.6 behind several [feature flags](#enable-feature-flags).
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/424495) in GitLab 16.6 behind several [feature flags](../integration/clickhouse.md#enable-feature-flags).
This feature is an [Experiment](../policy/experiment-beta-support.md).
To join the list of users testing this feature, contact us in
@ -50,138 +50,7 @@ To test the Runner Fleet Dashboard as part of the early adopters program, you mu
- Run GitLab 16.7 or above.
- Have an [Ultimate license](https://about.gitlab.com/pricing/).
- Be able to run ClickHouse database. We recommend using [ClickHouse Cloud](https://clickhouse.cloud/).
## Setup
To setup ClickHouse as the GitLab data storage:
1. [Run ClickHouse Cluster and configure database](#run-and-configure-clickhouse).
1. [Configure GitLab connection to Clickhouse](#configure-the-gitlab-connection-to-clickhouse).
1. [Run ClickHouse migrations](#run-clickhouse-migrations).
1. [Enable the feature flags](#enable-feature-flags).
<i class="fa fa-youtube-play youtube" aria-hidden="true"></i>
For a video walkthrough, see [Setting up Runner Fleet Dashboard with ClickHouse](https://www.youtube.com/watch?v=YpGV95Ctbpk).
### Run and configure ClickHouse
The most straightforward way to run ClickHouse is with [ClickHouse Cloud](https://clickhouse.cloud/).
You can also [run ClickHouse on your own server](https://clickhouse.com/docs/en/install). Refer to the ClickHouse
documentation regarding [recommendations for self-managed instances](https://clickhouse.com/docs/en/install#recommendations-for-self-managed-clickhouse).
When you run ClickHouse on a hosted server, various data points might impact the resource consumption, like the number
of builds that run on your instance each month, the selected hardware, the data center choice to host ClickHouse, and more.
Regardless, the cost should not be significant.
NOTE:
ClickHouse is a secondary data store for GitLab. All your data is still stored in Postgres,
and only duplicated in ClickHouse for analytics purposes.
To create necessary user and database objects:
1. Generate a secure password and save it.
1. Sign in to the ClickHouse SQL console.
1. Execute the following command. Replace `PASSWORD_HERE` with the generated password.
```sql
CREATE DATABASE gitlab_clickhouse_main_production;
CREATE USER gitlab IDENTIFIED WITH sha256_password BY 'PASSWORD_HERE';
CREATE ROLE gitlab_app;
GRANT SELECT, INSERT, ALTER, CREATE, UPDATE, DROP, TRUNCATE, OPTIMIZE ON gitlab_clickhouse_main_production.* TO gitlab_app;
GRANT gitlab_app TO gitlab;
```
### Configure the GitLab connection to ClickHouse
::Tabs
:::TabTitle Linux package
To provide GitLab with ClickHouse credentials:
1. Edit `/etc/gitlab/gitlab.rb`:
```ruby
gitlab_rails['clickhouse_databases']['main']['database'] = 'gitlab_clickhouse_main_production'
gitlab_rails['clickhouse_databases']['main']['url'] = 'https://example.com/path'
gitlab_rails['clickhouse_databases']['main']['username'] = 'gitlab'
gitlab_rails['clickhouse_databases']['main']['password'] = 'PASSWORD_HERE' # replace with the actual password
```
1. Save the file and reconfigure GitLab:
```shell
sudo gitlab-ctl reconfigure
```
:::TabTitle Helm chart (Kubernetes)
1. Save the ClickHouse password as a Kubernetes Secret:
```shell
kubectl create secret generic gitlab-clickhouse-password --from-literal="main_password=PASSWORD_HERE"
```
1. Export the Helm values:
```shell
helm get values gitlab > gitlab_values.yaml
```
1. Edit `gitlab_values.yaml`:
```yaml
global:
clickhouse:
enabled: true
main:
username: default
password:
secret: gitlab-clickhouse-password
key: main_password
database: gitlab_clickhouse_main_production
url: 'http://example.com'
```
1. Save the file and apply the new values:
```shell
helm upgrade -f gitlab_values.yaml gitlab gitlab/gitlab
```
::EndTabs
To verify that your connection is set up successfully:
1. Log in to [Rails console](../administration/operations/rails_console.md#starting-a-rails-console-session)
1. Execute the following:
```ruby
ClickHouse::Client.select('SELECT 1', :main)
```
If successful, the command returns `[{"1"=>1}]`
### Run ClickHouse migrations
To create the required database objects execute:
```shell
sudo gitlab-rake gitlab:clickhouse:migrate
```
### Enable feature flags
Features that use ClickHouse are currently under development and are disabled by feature flags.
To enable these features, [enable](../administration/feature_flags.md#how-to-enable-and-disable-features-behind-flags)
the following feature flags:
| Feature flag name | Purpose |
|------------------------------------|---------------------------------------------------------------------------|
| `ci_data_ingestion_to_click_house` | Enables synchronization of new finished CI builds to Clickhouse database. |
| `clickhouse_ci_analytics` | Enables the **Wait time to pick a job** chart. |
- Be able to run [ClickHouse database](../integration/clickhouse.md). We recommend using [ClickHouse Cloud](https://clickhouse.cloud/).
## What's next

View File

@ -0,0 +1,137 @@
---
stage: none
group: unassigned
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments
---
# ClickHouse integration guidelines
Instructions about how to setup integration between GitLab and ClickHouse database.
## Setup
To setup ClickHouse as the GitLab data storage:
1. [Run ClickHouse Cluster and configure database](#run-and-configure-clickhouse).
1. [Configure GitLab connection to Clickhouse](#configure-the-gitlab-connection-to-clickhouse).
1. [Run ClickHouse migrations](#run-clickhouse-migrations).
1. [Enable the feature flags](#enable-feature-flags).
### Run and configure ClickHouse
The most straightforward way to run ClickHouse is with [ClickHouse Cloud](https://clickhouse.cloud/).
You can also [run ClickHouse on your own server](https://clickhouse.com/docs/en/install). Refer to the ClickHouse
documentation regarding [recommendations for self-managed instances](https://clickhouse.com/docs/en/install#recommendations-for-self-managed-clickhouse).
When you run ClickHouse on a hosted server, various data points might impact the resource consumption, like the number
of builds that run on your instance each month, the selected hardware, the data center choice to host ClickHouse, and more.
Regardless, the cost should not be significant.
NOTE:
ClickHouse is a secondary data store for GitLab. All your data is still stored in Postgres,
and only duplicated in ClickHouse for analytics purposes.
To create necessary user and database objects:
1. Generate a secure password and save it.
1. Sign in to the ClickHouse SQL console.
1. Execute the following command. Replace `PASSWORD_HERE` with the generated password.
```sql
CREATE DATABASE gitlab_clickhouse_main_production;
CREATE USER gitlab IDENTIFIED WITH sha256_password BY 'PASSWORD_HERE';
CREATE ROLE gitlab_app;
GRANT SELECT, INSERT, ALTER, CREATE, UPDATE, DROP, TRUNCATE, OPTIMIZE ON gitlab_clickhouse_main_production.* TO gitlab_app;
GRANT gitlab_app TO gitlab;
```
### Configure the GitLab connection to ClickHouse
::Tabs
:::TabTitle Linux package
To provide GitLab with ClickHouse credentials:
1. Edit `/etc/gitlab/gitlab.rb`:
```ruby
gitlab_rails['clickhouse_databases']['main']['database'] = 'gitlab_clickhouse_main_production'
gitlab_rails['clickhouse_databases']['main']['url'] = 'https://example.com/path'
gitlab_rails['clickhouse_databases']['main']['username'] = 'gitlab'
gitlab_rails['clickhouse_databases']['main']['password'] = 'PASSWORD_HERE' # replace with the actual password
```
1. Save the file and reconfigure GitLab:
```shell
sudo gitlab-ctl reconfigure
```
:::TabTitle Helm chart (Kubernetes)
1. Save the ClickHouse password as a Kubernetes Secret:
```shell
kubectl create secret generic gitlab-clickhouse-password --from-literal="main_password=PASSWORD_HERE"
```
1. Export the Helm values:
```shell
helm get values gitlab > gitlab_values.yaml
```
1. Edit `gitlab_values.yaml`:
```yaml
global:
clickhouse:
enabled: true
main:
username: default
password:
secret: gitlab-clickhouse-password
key: main_password
database: gitlab_clickhouse_main_production
url: 'http://example.com'
```
1. Save the file and apply the new values:
```shell
helm upgrade -f gitlab_values.yaml gitlab gitlab/gitlab
```
::EndTabs
To verify that your connection is set up successfully:
1. Log in to [Rails console](../administration/operations/rails_console.md#starting-a-rails-console-session)
1. Execute the following:
```ruby
ClickHouse::Client.select('SELECT 1', :main)
```
If successful, the command returns `[{"1"=>1}]`
### Run ClickHouse migrations
To create the required database objects execute:
```shell
sudo gitlab-rake gitlab:clickhouse:migrate
```
### Enable feature flags
Features that use ClickHouse are currently under development and are disabled by feature flags.
To enable these features, [enable](../administration/feature_flags.md#how-to-enable-and-disable-features-behind-flags)
the following feature flags:
| Feature flag name | Purpose |
|------------------------------------|---------------------------------------------------------------------------|
| `ci_data_ingestion_to_click_house` | Enables synchronization of new finished CI builds to Clickhouse database. |
| `clickhouse_ci_analytics` | Enables the **Wait time to pick a job** chart. |

View File

@ -16,9 +16,7 @@ your Git branches like a CI/CD server.
This means you don't have to wait for dependencies to be downloaded and builds to finish, you can start
coding immediately. With Gitpod you can start coding instantly on any project, branch, and merge
request from any device, at any time, from your browser:
![Gitpod interface](img/gitpod_web_interface_v13_4.png)
request from your browser.
To use the GitLab Gitpod integration, it must be enabled for your GitLab instance. Users of:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

View File

@ -30,7 +30,7 @@ To leave feedback about Product Analytics bugs or functionality:
Product analytics uses several tools:
- [**Snowplow**](https://docs.snowplow.io/docs) - A developer-first engine for collecting behavioral data, and passing it through to ClickHouse.
- [**ClickHouse**](https://clickhouse.com/docs) - A database suited to store, query, and retrieve analytical data.
- [**ClickHouse**](../../integration/clickhouse.md) - A database suited to store, query, and retrieve analytical data.
- [**Cube**](https://cube.dev/docs/) - A universal semantic layer that provides an API to run queries against the data stored in ClickHouse.
The following diagram illustrates the product analytics flow:

View File

@ -0,0 +1,56 @@
---
stage: Create
group: Code Creation
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments
---
# Repository X-Ray **(ULTIMATE)**
Repository X-Ray enhances GitLab Duo Code Suggestions by providing additional context to improve the accuracy and relevance of code recommendations.
Repository X-Ray gives the code assistant more insight into the project's codebase and dependencies to generate better code suggestions. It does this by analyzing key project configuration files such as `Gemfile.lock`, `package.json`, and `go.mod` to build additional context.
By understanding the frameworks, libraries and other dependencies in use, Repository X-Ray helps the code assistant tailor suggestions to match the coding patterns, styles and technologies used in the project. This results in code recommendations that integrate more seamlessly and follow best practices for that stack.
## Supported languages and package managers
| Language | Package Manager | Configuration File |
| ---------- |-----------------| -------------------- |
| Go | Go Modules | `go.mod` |
| JavaScript | NPM, Yarn | `package.json` |
| Ruby | RubyGems | `Gemfile.lock` |
| Python | Poetry | `pyproject.toml` |
| Python | Pip | `requirements.txt` |
| Python | Conda | `environment.yml` |
## Enable Repository X-Ray
Prerequisites:
- You must have access to [GitLab Duo Code Suggestions](index.md) in the project.
- GitLab Runner must be set up and enabled for the project, because Repository X-Ray runs analysis pipelines using GitLab runners.
To enable Repository X-Ray, add the following definition job to the project's `.gitlab-ci.yml`.
```yaml
xray:
stage: build
image: registry.gitlab.com/gitlab-org/code-creation/repository-x-ray:latest
allow_failure: true
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
OUTPUT_DIR: reports
script:
- x-ray-scan -p "$CI_PROJECT_DIR" -o "$OUTPUT_DIR"
artifacts:
reports:
repository_xray: "$OUTPUT_DIR/*/*.json"
```
- The `$OUTPUT_DIR` environment variable defines the:
- Output directory for reports.
- Path that artifacts are uploaded from.
- The added rules restrict the job to the default branch only. Restricting the job this way ensures development changes do not impact the baseline X-Ray data used for production code suggestions.
After the initial x-ray job completes and uploads the repository analysis reports, no further action is required. Repository X-Ray automatically enriches all code generation requests from that point forward.

View File

@ -39,7 +39,7 @@ You can upload a file from the GitLab UI.
1. Go to the directory where you want to upload the file.
1. Next to the directory name, select the plus icon (**{plus}**) > **Upload file**.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Upload file**.

View File

@ -20,7 +20,7 @@ To create a text file in the Web Editor:
1. Go to the directory where you want to create the new file.
1. Next to the directory name, select the plus icon (**{plus}**) > **New file**.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Commit changes**.
@ -31,14 +31,14 @@ To create a text file from a template in the Web Editor:
1. On the left sidebar, select **Search or go to** and find your project.
1. Go to the directory where you want to create the new file.
1. Next to the directory name, select the plus icon (**{plus}**) > **New file**.
1. In **Filename**, enter a filename that GitLab provides a template for:
1. In **Filename**, enter a name that GitLab provides a template for:
- `.gitignore`
- `.gitlab-ci.yml`
- `LICENSE`
- `Dockerfile`
1. From the **Apply a template** dropdown list, select a template.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Commit changes**.
@ -50,7 +50,7 @@ To edit a text file in the Web Editor:
1. Go to the file you want to edit.
1. Select **Edit > Edit single file**.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Commit changes**.
@ -72,7 +72,7 @@ To close the preview panel, select the **Write** tab.
### Link to specific lines
To link to single or multiple lines in the Web Editor, add hash
information to the filename segment of the URL. For example:
information to the file name segment of the URL. For example:
- `MY_FILE.js#L3` highlights line 3 in `MY_FILE.js`.
- `MY_FILE.js#L3-10` highlights lines 3 to 10 in `MY_FILE.js`.
@ -90,7 +90,7 @@ To upload a file in the Web Editor:
1. Go to the directory where you want to upload the file.
1. Next to the directory name, select the plus icon (**{plus}**) > **Upload file**.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Upload file**.
@ -102,7 +102,7 @@ To create a directory in the Web Editor:
1. Go to the directory where you want to create the new directory.
1. Next to the directory name, select the plus icon (**{plus}**) > **New directory**.
1. Complete the fields.
- To create a merge request with your changes, enter a branch name
To create a merge request with your changes, enter a branch name
that's not your repository's [default branch](branches/default.md).
1. Select **Create directory**.

View File

@ -94,13 +94,16 @@ To upload a file in the Web IDE:
1. On the left activity bar, select **Explorer**, or
press <kbd>Shift</kbd>+<kbd>Command</kbd>+<kbd>E</kbd>.
1. Go to the directory where you want to upload the file.
- To create a new directory, on the left **Explorer** sidebar,
in the upper right, select **New Folder** (**{folder-new}**).
To create a new directory:
- On the left **Explorer** sidebar, in the upper right,
select **New Folder** (**{folder-new}**).
1. Right-click the directory and select **Upload**.
1. Select the file you want to upload.
You can upload multiple files at once.
The new files are uploaded and automatically added to the repository.
The files are uploaded and automatically added to the repository.
## Switch branches

View File

@ -7,7 +7,7 @@ module Gitlab
attr_reader :promise
delegate :state, to: :promise
delegate :state, :complete?, to: :promise
def initialize(promise, path, options, log_info)
@promise = promise

View File

@ -0,0 +1,28 @@
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class BackfillPartitionIdCiPipelineChatData < BatchedMigrationJob
operation_name :update
feature_category :continuous_integration
def perform
return unless uses_multiple_partitions?
each_sub_batch do |sub_batch|
sub_batch
.where('ci_pipeline_chat_data.pipeline_id = ci_pipelines.id')
.update_all('partition_id = ci_pipelines.partition_id FROM ci_pipelines')
end
end
private
def uses_multiple_partitions?
!!connection.select_value(<<~SQL)
SELECT true FROM p_ci_builds WHERE partition_id = 101 LIMIT 1
SQL
end
end
end
end

View File

@ -11,13 +11,16 @@ module Gitlab
include ::Gitlab::Utils::StrongMemoize
attr_reader :project, :sha, :user, :parent_pipeline, :variables, :pipeline_config
attr_reader :pipeline, :expandset, :execution_deadline, :logger, :max_includes, :max_total_yaml_size_bytes
attr_reader :project, :sha, :user, :parent_pipeline, :variables, :pipeline_config, :parallel_requests,
:pipeline, :expandset, :execution_deadline, :logger, :max_includes, :max_total_yaml_size_bytes
attr_accessor :total_file_size_in_bytes
delegate :instrument, to: :logger
# We try to keep the number of parallel HTTP requests to a minimum to avoid overloading IO.
MAX_PARALLEL_REMOTE_REQUESTS = 2
def initialize(
project: nil, pipeline: nil, sha: nil, user: nil, parent_pipeline: nil, variables: nil,
pipeline_config: nil, logger: nil
@ -30,6 +33,7 @@ module Gitlab
@variables = variables || Ci::Variables::Collection.new
@pipeline_config = pipeline_config
@expandset = []
@parallel_requests = []
@execution_deadline = 0
@logger = logger || Gitlab::Ci::Pipeline::Logger.new(project: project)
@max_includes = Gitlab::CurrentSettings.current_application_settings.ci_max_includes
@ -65,6 +69,7 @@ module Gitlab
ctx.logger = logger
ctx.max_includes = max_includes
ctx.max_total_yaml_size_bytes = max_total_yaml_size_bytes
ctx.parallel_requests = parallel_requests
end
end
@ -76,6 +81,16 @@ module Gitlab
raise TimeoutError if execution_expired?
end
def execute_remote_parallel_request(lazy_response)
parallel_requests.delete_if(&:complete?)
# We are "assuming" that the first request in the queue is the first one to complete.
# This is good enough approximation.
parallel_requests.first&.wait unless parallel_requests.size < MAX_PARALLEL_REMOTE_REQUESTS
parallel_requests << lazy_response.execute
end
def sentry_payload
{
user: user.inspect,
@ -106,7 +121,8 @@ module Gitlab
protected
attr_writer :pipeline, :expandset, :execution_deadline, :logger, :max_includes, :max_total_yaml_size_bytes
attr_writer :pipeline, :expandset, :execution_deadline, :logger, :max_includes, :max_total_yaml_size_bytes,
:parallel_requests
private

View File

@ -56,8 +56,10 @@ module Gitlab
def fetch_async_content
return if ::Feature.disabled?(:ci_parallel_remote_includes, context.project)
# It starts fetching the remote content in a separate thread and returns a promise immediately.
Gitlab::HTTP.get(location, async: true).execute
# It starts fetching the remote content in a separate thread and returns a lazy_response immediately.
Gitlab::HTTP.get(location, async: true).tap do |lazy_response|
context.execute_remote_parallel_request(lazy_response)
end
end
strong_memoize_attr :fetch_async_content

View File

@ -115,10 +115,13 @@ module Gitlab
# type is used.
# batch_column_name - option is for tables without primary key, in this
# case another unique integer column can be used. Example: :user_id
def rename_column_concurrently(table, old_column, new_column, type: nil, batch_column_name: :id)
def rename_column_concurrently(table, old_column, new_column, type: nil, batch_column_name: :id, type_cast_function: nil)
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
setup_renamed_column(__callee__, table, old_column, new_column, type, batch_column_name)
setup_renamed_column(
__callee__, table, old_column, new_column,
type: type, batch_column_name: batch_column_name, type_cast_function: type_cast_function
)
with_lock_retries do
install_bidirectional_triggers(table, old_column, new_column)
@ -167,7 +170,10 @@ module Gitlab
def undo_cleanup_concurrent_column_rename(table, old_column, new_column, type: nil, batch_column_name: :id)
Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode!
setup_renamed_column(__callee__, table, new_column, old_column, type, batch_column_name)
setup_renamed_column(
__callee__, table, new_column, old_column,
type: type, batch_column_name: batch_column_name
)
with_lock_retries do
install_bidirectional_triggers(table, old_column, new_column)
@ -198,7 +204,7 @@ module Gitlab
private
def setup_renamed_column(calling_operation, table, old_column, new_column, type, batch_column_name)
def setup_renamed_column(calling_operation, table, old_column, new_column, type:, batch_column_name:, type_cast_function: nil)
if transaction_open?
raise "#{calling_operation} can not be run inside a transaction"
end
@ -220,7 +226,7 @@ module Gitlab
check_trigger_permissions!(table)
unless column_exists?(table, new_column)
create_column_from(table, old_column, new_column, type: type, batch_column_name: batch_column_name)
create_column_from(table, old_column, new_column, type: type, batch_column_name: batch_column_name, type_cast_function: type_cast_function)
end
end

View File

@ -5,7 +5,7 @@ ENV GITLAB_LICENSE_MODE=test \
# Clone GDK at specific sha and bootstrap packages
#
ARG GDK_SHA=768deeebc94e37ba103dfec8102e794ba0f22c4e
ARG GDK_SHA=ba07fa47f98fc8a1e6ef16cdef9dabd7a0d445b0
RUN set -eux; \
git clone --depth 1 https://gitlab.com/gitlab-org/gitlab-development-kit.git && cd gitlab-development-kit; \
git fetch --depth 1 origin ${GDK_SHA} && git -c advice.detachedHead=false checkout ${GDK_SHA}; \

View File

@ -48,6 +48,7 @@ RSpec.describe 'Database schema', feature_category: :database do
chat_teams: %w[team_id],
ci_builds: %w[project_id runner_id user_id erased_by_id trigger_request_id partition_id auto_canceled_by_partition_id],
ci_namespace_monthly_usages: %w[namespace_id],
ci_pipeline_chat_data: %w[partition_id],
ci_pipeline_variables: %w[partition_id],
ci_pipelines: %w[partition_id],
ci_runner_projects: %w[runner_id],

View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
FactoryBot.define do
factory :ci_pipeline_chat_data, class: 'Ci::PipelineChatData' do
pipeline factory: :ci_empty_pipeline
chat_name
response_url { "https://response.com" }
end
end

View File

@ -0,0 +1,67 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillPartitionIdCiPipelineChatData,
feature_category: :continuous_integration do
let(:ci_pipelines_table) { table(:ci_pipelines, database: :ci) }
let(:ci_pipeline_chat_data_table) { table(:ci_pipeline_chat_data, database: :ci) }
let!(:pipeline1) { ci_pipelines_table.create!(id: 1, partition_id: 100) }
let!(:pipeline2) { ci_pipelines_table.create!(id: 2, partition_id: 101) }
let!(:invalid_ci_pipeline_chat_data) do
ci_pipeline_chat_data_table.create!(
id: 1,
pipeline_id: pipeline1.id,
chat_name_id: 1,
response_url: '',
partition_id: pipeline1.partition_id
)
end
let!(:valid_ci_pipeline_chat_data) do
ci_pipeline_chat_data_table.create!(
id: 2,
pipeline_id: pipeline2.id,
chat_name_id: 2,
response_url: '',
partition_id: pipeline2.partition_id
)
end
let(:migration_attrs) do
{
start_id: ci_pipeline_chat_data_table.minimum(:id),
end_id: ci_pipeline_chat_data_table.maximum(:id),
batch_table: :ci_pipeline_chat_data,
batch_column: :id,
sub_batch_size: 1,
pause_ms: 0,
connection: Ci::ApplicationRecord.connection
}
end
let!(:migration) { described_class.new(**migration_attrs) }
describe '#perform' do
context 'when second partition does not exist' do
it 'does not execute the migration' do
expect { migration.perform }
.not_to change { invalid_ci_pipeline_chat_data.reload.partition_id }
end
end
context 'when second partition exists' do
before do
allow(migration).to receive(:uses_multiple_partitions?).and_return(true)
pipeline1.update!(partition_id: 101)
end
it 'fixes invalid records in the wrong the partition' do
expect { migration.perform }
.to change { invalid_ci_pipeline_chat_data.reload.partition_id }
.from(100)
.to(101)
end
end
end
end

View File

@ -159,10 +159,14 @@ RSpec.describe Gitlab::Ci::Config::External::Context, feature_category: :pipelin
shared_examples 'a mutated context' do
let(:mutated) { subject.mutate(new_attributes) }
let(:lazy_response) { double('lazy_response') }
before do
allow(lazy_response).to receive(:execute).and_return(lazy_response)
subject.expandset << :a_file
subject.set_deadline(15.seconds)
subject.execute_remote_parallel_request(lazy_response)
end
it { expect(mutated).not_to eq(subject) }
@ -170,8 +174,9 @@ RSpec.describe Gitlab::Ci::Config::External::Context, feature_category: :pipelin
it { expect(mutated).to have_attributes(new_attributes) }
it { expect(mutated.pipeline).to eq(subject.pipeline) }
it { expect(mutated.expandset).to eq(subject.expandset) }
it { expect(mutated.execution_deadline).to eq(mutated.execution_deadline) }
it { expect(mutated.logger).to eq(mutated.logger) }
it { expect(mutated.execution_deadline).to eq(subject.execution_deadline) }
it { expect(mutated.logger).to eq(subject.logger) }
it { expect(mutated.parallel_requests).to eq(subject.parallel_requests) }
end
context 'with attributes' do
@ -212,4 +217,80 @@ RSpec.describe Gitlab::Ci::Config::External::Context, feature_category: :pipelin
end
end
end
describe '#execute_remote_parallel_request' do
let(:lazy_response1) { double('lazy_response', wait: true, complete?: complete1) }
let(:lazy_response2) { double('lazy_response') }
let(:complete1) { false }
before do
allow(lazy_response1).to receive(:execute).and_return(lazy_response1)
allow(lazy_response2).to receive(:execute).and_return(lazy_response2)
end
context 'when the queue is empty' do
before do
stub_const("Gitlab::Ci::Config::External::Context::MAX_PARALLEL_REMOTE_REQUESTS", 2)
end
it 'adds the new lazy response to the queue' do
expect { subject.execute_remote_parallel_request(lazy_response1) }
.to change { subject.parallel_requests }
.from([])
.to([lazy_response1])
end
end
context 'when there is a lazy response in the queue' do
before do
subject.execute_remote_parallel_request(lazy_response1)
end
context 'when there is a free slot in the queue' do
before do
stub_const("Gitlab::Ci::Config::External::Context::MAX_PARALLEL_REMOTE_REQUESTS", 2)
end
it 'adds the new lazy response to the queue' do
expect { subject.execute_remote_parallel_request(lazy_response2) }
.to change { subject.parallel_requests }
.from([lazy_response1])
.to([lazy_response1, lazy_response2])
end
end
context 'when the queue is full' do
before do
stub_const("Gitlab::Ci::Config::External::Context::MAX_PARALLEL_REMOTE_REQUESTS", 1)
end
context 'when the first lazy response in the queue is complete' do
let(:complete1) { true }
it 'removes the completed lazy response and adds the new one to the queue' do
expect(lazy_response1).not_to receive(:wait)
expect { subject.execute_remote_parallel_request(lazy_response2) }
.to change { subject.parallel_requests }
.from([lazy_response1])
.to([lazy_response2])
end
end
context 'when the first lazy response in the queue is not complete' do
let(:complete1) { false }
it 'waits for the first lazy response to complete and then adds the new one to the queue' do
expect(lazy_response1).to receive(:wait)
expect { subject.execute_remote_parallel_request(lazy_response2) }
.to change { subject.parallel_requests }
.from([lazy_response1])
.to([lazy_response1, lazy_response2])
end
end
end
end
end
end

View File

@ -157,6 +157,40 @@ RSpec.describe Gitlab::Ci::Config::External::File::Remote, feature_category: :pi
it_behaves_like "#content"
end
describe '#preload_content' do
context 'when the parallel request queue is full' do
let(:location1) { 'https://gitlab.com/gitlab-org/gitlab-foss/blob/1234/.secret_file1.yml' }
let(:location2) { 'https://gitlab.com/gitlab-org/gitlab-foss/blob/1234/.secret_file2.yml' }
before do
# Makes the parallel queue full easily
stub_const("Gitlab::Ci::Config::External::Context::MAX_PARALLEL_REMOTE_REQUESTS", 1)
# Adding a failing promise to the queue
promise = Concurrent::Promise.new do
sleep 1.1
raise Timeout::Error
end
context.execute_remote_parallel_request(
Gitlab::HTTP_V2::LazyResponse.new(promise, location1, {}, nil)
)
stub_full_request(location2).to_return(body: remote_file_content)
end
it 'waits for the queue' do
file2 = described_class.new({ remote: location2 }, context)
start_at = Time.current
file2.preload_content
end_at = Time.current
expect(end_at - start_at).to be > 1
end
end
end
describe "#error_message" do
subject(:error_message) do
Gitlab::Ci::Config::External::Mapper::Verifier.new(context).process([remote_file])

View File

@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::MigrationHelpers::V2 do
RSpec.describe Gitlab::Database::MigrationHelpers::V2, feature_category: :database do
include Database::TriggerHelpers
include Database::TableSchemaHelpers
@ -59,7 +59,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers::V2 do
context 'when the batch column does exist' do
it 'passes it when creating the column' do
expect(migration).to receive(:create_column_from)
.with(:_test_table, existing_column, added_column, type: nil, batch_column_name: :status)
.with(:_test_table, existing_column, added_column, type: nil, batch_column_name: :status, type_cast_function: nil)
.and_call_original
migration.public_send(operation, :_test_table, :original, :renamed, batch_column_name: :status)
@ -495,4 +495,83 @@ RSpec.describe Gitlab::Database::MigrationHelpers::V2 do
end
end
end
describe '#change_column_type_concurrently' do
let(:table_name) { :_test_change_column_type_concurrently }
before do
migration.connection.execute(<<~SQL)
DROP TABLE IF EXISTS #{table_name};
CREATE TABLE #{table_name} (
id serial NOT NULL PRIMARY KEY,
user_id bigint,
name character varying
);
/* at least one record for batching update */
INSERT INTO #{table_name} (id, user_id, name)
VALUES (1, 9, '{ \"lucky_number\": 8 }')
SQL
end
it 'adds a column of the new type and triggers to keep these two columns in sync' do
allow(migration).to receive(:transaction_open?).and_return(false)
recorder = ActiveRecord::QueryRecorder.new do
migration.change_column_type_concurrently(table_name, :name, :text)
end
expect(recorder.log).to include(/ALTER TABLE "_test_change_column_type_concurrently" ADD "name_for_type_change" text/)
expect(recorder.log).to include(/BEGIN\n IF NEW."name" IS NOT DISTINCT FROM NULL AND NEW."name_for_type_change" IS DISTINCT FROM NULL THEN\n NEW."name" = NEW."name_for_type_change";\n END IF;\n\n IF NEW."name_for_type_change" IS NOT DISTINCT FROM NULL AND NEW."name" IS DISTINCT FROM NULL THEN\n NEW."name_for_type_change" = NEW."name";\n END IF;\n\n RETURN NEW;\nEND/m)
expect(recorder.log).to include(/BEGIN\n NEW."name" := NEW."name_for_type_change";\n RETURN NEW;\nEND/m)
expect(recorder.log).to include(/BEGIN\n NEW."name_for_type_change" := NEW."name";\n RETURN NEW;\nEND/m)
expect(recorder.log).to include(/ON "_test_change_column_type_concurrently"\nFOR EACH ROW\sEXECUTE FUNCTION/m)
expect(recorder.log).to include(/UPDATE .* WHERE "_test_change_column_type_concurrently"."id" >= \d+/)
end
context 'with batch column name' do
it 'updates the new column using the batch column' do
allow(migration).to receive(:transaction_open?).and_return(false)
recorder = ActiveRecord::QueryRecorder.new do
migration.change_column_type_concurrently(table_name, :name, :text, batch_column_name: :user_id)
end
expect(recorder.log).to include(/UPDATE .* WHERE "_test_change_column_type_concurrently"."user_id" >= \d+/)
end
end
context 'with type cast function' do
it 'updates the new column with casting the value to the given type' do
allow(migration).to receive(:transaction_open?).and_return(false)
recorder = ActiveRecord::QueryRecorder.new do
migration.change_column_type_concurrently(table_name, :name, :text, type_cast_function: 'JSON')
end
expect(recorder.log).to include(/SET "name_for_type_change" = JSON\("_test_change_column_type_concurrently"\."name"\)/m)
end
end
end
describe '#undo_change_column_type_concurrently' do
let(:table_name) { :_test_undo_change_column_type_concurrently }
before do
migration.connection.execute(<<~SQL)
DROP TABLE IF EXISTS #{table_name};
CREATE TABLE #{table_name} (
id serial NOT NULL PRIMARY KEY,
user_id bigint,
name character varying
);
/* at least one record for batching update */
INSERT INTO #{table_name} (id, user_id, name)
VALUES (1, 9, 'For every young')
SQL
end
it 'undoes the column type change' do
allow(migration).to receive(:transaction_open?).and_return(false)
migration.change_column_type_concurrently(table_name, :name, :text)
recorder = ActiveRecord::QueryRecorder.new do
migration.undo_change_column_type_concurrently(table_name, :name)
end
expect(recorder.log).to include(/DROP TRIGGER IF EXISTS .+ON "_test_undo_change_column_type_concurrently"/m)
expect(recorder.log).to include(/ALTER TABLE "_test_undo_change_column_type_concurrently" DROP COLUMN "name_for_type_change"/)
end
end
end

View File

@ -0,0 +1,56 @@
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe QueueBackfillPartitionIdCiPipelineChatData, migration: :gitlab_ci, feature_category: :continuous_integration do
let!(:batched_migrations) { table(:batched_background_migrations) }
let!(:migration) { described_class::MIGRATION }
describe '#up' do
context 'with migration present' do
let!(:ci_backfill_partition_id_ci_pipeline_chat_data_migration) do
batched_migrations.create!(
job_class_name: 'BackfillPartitionIdCiPipelineChatData',
table_name: :ci_pipeline_chat_data,
column_name: :id,
job_arguments: [],
interval: 2.minutes,
min_value: 1,
max_value: 2,
batch_size: 1000,
sub_batch_size: 100,
gitlab_schema: :gitlab_ci,
status: 3 # finished
)
end
context 'when migration finished successfully' do
it 'does not raise exception' do
expect { migrate! }.not_to raise_error
end
it 'schedules background jobs for each batch of ci_pipeline_chat_data' do
migrate!
expect(migration).to have_scheduled_batched_migration(
gitlab_schema: :gitlab_ci,
table_name: :ci_pipeline_chat_data,
column_name: :id,
batch_size: described_class::BATCH_SIZE,
sub_batch_size: described_class::SUB_BATCH_SIZE
)
end
end
end
end
describe '#down' do
it 'deletes all batched migration records' do
migrate!
schema_migrate_down!
expect(migration).not_to have_scheduled_batched_migration
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::PipelineChatData, type: :model, feature_category: :continuous_integration do
it { is_expected.to belong_to(:chat_name) }
it { is_expected.to belong_to(:pipeline) }
it { is_expected.to validate_presence_of(:pipeline_id) }
it { is_expected.to validate_presence_of(:chat_name_id) }
it { is_expected.to validate_presence_of(:response_url) }
describe 'partitioning', :ci_partitionable do
include Ci::PartitioningHelpers
let(:pipeline) { create(:ci_pipeline) }
let(:pipeline_chat_data) { create(:ci_pipeline_chat_data, pipeline: pipeline) }
before do
stub_current_partition_id
end
it 'assigns the same partition id as the one that pipeline has' do
expect(pipeline_chat_data.partition_id).to eq(ci_testing_partition_id)
end
end
end

View File

@ -22,7 +22,7 @@ require (
github.com/sirupsen/logrus v1.9.3
github.com/smartystreets/goconvey v1.8.1
github.com/stretchr/testify v1.8.4
gitlab.com/gitlab-org/gitaly/v16 v16.6.1
gitlab.com/gitlab-org/gitaly/v16 v16.6.2
gitlab.com/gitlab-org/labkit v1.21.0
gocloud.dev v0.35.0
golang.org/x/image v0.14.0

View File

@ -442,8 +442,8 @@ github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
gitlab.com/gitlab-org/gitaly/v16 v16.6.1 h1:/FW8yZ0nPIa9wCO2aLmSC2nZpKmL5ZaOrRrFaXBfnPw=
gitlab.com/gitlab-org/gitaly/v16 v16.6.1/go.mod h1:LbekHBeRUnb1jDQCXIUSNebuPMzVTq8B9PXXp4xVOF4=
gitlab.com/gitlab-org/gitaly/v16 v16.6.2 h1:uwdlOVg0ZFLN9tt9pvOoGTG1IQvBlTJU27V+t+3CUvo=
gitlab.com/gitlab-org/gitaly/v16 v16.6.2/go.mod h1:LbekHBeRUnb1jDQCXIUSNebuPMzVTq8B9PXXp4xVOF4=
gitlab.com/gitlab-org/labkit v1.21.0 h1:hLmdBDtXjD1yOmZ+uJOac3a5Tlo83QaezwhES4IYik4=
gitlab.com/gitlab-org/labkit v1.21.0/go.mod h1:zeATDAaSBelPcPLbTTq8J3ZJEHyPTLVBM1q3nva+/W4=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=