跳转到内容

Gitlab

GitLab问题读取器 #

基类:EventBaseReader

GitLab 问题读取器。

workflows/handler.py 中的源代码llama_index/readers/gitlab/issues/base.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
class GitLabIssuesReader(BaseReader):
    """
    GitLab issues reader.
    """

    class IssueState(enum.Enum):
        """
        Issue type.

        Used to decide what issues to retrieve.

        Attributes:
            - OPEN: Issues that are open.
            - CLOSED: Issues that are closed.
            - ALL: All issues, open and closed.

        """

        OPEN = "opened"
        CLOSED = "closed"
        ALL = "all"

    class IssueType(enum.Enum):
        """
        Issue type.

        Used to decide what issues to retrieve.

        Attributes:
            - ISSUE: Issues.
            - INCIDENT: Incident.
            - TEST_CASE: Test case.
            - TASK: Task.

        """

        ISSUE = "issue"
        INCIDENT = "incident"
        TEST_CASE = "test_case"
        TASK = "task"

    class Scope(enum.Enum):
        """
        Scope.

        Used to determine the scope of the issue.

        Attributes:
            - CREATED_BY_ME: Issues created by the authenticated user.
            - ASSIGNED_TO_ME: Issues assigned to the authenticated user.
            - ALL: All issues.

        """

        CREATED_BY_ME = "created_by_me"
        ASSIGNED_TO_ME = "assigned_to_me"
        ALL = "all"

    def __init__(
        self,
        gitlab_client: gitlab.Gitlab,
        project_id: Optional[int] = None,
        group_id: Optional[int] = None,
        verbose: bool = False,
    ):
        super().__init__()

        self._gl = gitlab_client
        self._project_id = project_id
        self._group_id = group_id
        self._verbose = verbose

    def _build_document_from_issue(self, issue: GitLabIssue) -> Document:
        issue_dict = issue.asdict()
        title = issue_dict["title"]
        description = issue_dict["description"]
        document = Document(
            doc_id=str(issue_dict["iid"]),
            text=f"{title}\n{description}",
        )
        extra_info = {
            "state": issue_dict["state"],
            "labels": issue_dict["labels"],
            "created_at": issue_dict["created_at"],
            "closed_at": issue_dict["closed_at"],
            "url": issue_dict["_links"]["self"],  # API URL
            "source": issue_dict["web_url"],  # HTML URL, more convenient for humans
        }
        if issue_dict["assignee"]:
            extra_info["assignee"] = issue_dict["assignee"]["username"]
        if issue_dict["author"]:
            extra_info["author"] = issue_dict["author"]["username"]
        document.extra_info = extra_info
        return document

    def _get_project_issues(self, **kwargs):
        project = self._gl.projects.get(self._project_id)
        return project.issues.list(**kwargs)

    def _get_group_issues(self, **kwargs):
        group = self._gl.groups.get(self._group_id)
        return group.issues.list(**kwargs)

    def _to_gitlab_datetime_format(self, dt: Optional[datetime]) -> str:
        return dt.strftime("%Y-%m-%dT%H:%M:%S") if dt else None

    def load_data(
        self,
        assignee: Optional[Union[str, int]] = None,
        author: Optional[Union[str, int]] = None,
        confidential: Optional[bool] = None,
        created_after: Optional[datetime] = None,
        created_before: Optional[datetime] = None,
        iids: Optional[List[int]] = None,
        issue_type: Optional[IssueType] = None,
        labels: Optional[List[str]] = None,
        milestone: Optional[str] = None,
        non_archived: Optional[bool] = None,
        scope: Optional[Scope] = None,
        search: Optional[str] = None,
        state: Optional[IssueState] = IssueState.OPEN,
        updated_after: Optional[datetime] = None,
        updated_before: Optional[datetime] = None,
        get_all: bool = False,
        **kwargs: Any,
    ) -> List[Document]:
        """
        Load group or project issues and converts them to documents. Please refer to the GitLab API documentation for the full list of parameters.

        Each issue is converted to a document by doing the following:

            - The doc_id of the document is the issue number.
            - The text of the document is the concatenation of the title and the description of the issue.
            - The extra_info of the document is a dictionary with the following keys:
                - state: State of the issue.
                - labels: List of labels of the issue.
                - created_at: Date when the issue was created.
                - closed_at: Date when the issue was closed. Only present if the issue is closed.
                - url: URL of the issue.
                - source: URL of the issue. More convenient for humans.
                - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

        Args:
            - assignee: Username or ID of the user assigned to the issue.
            - author: Username or ID of the user that created the issue.
            - confidential: Filter confidential issues.
            - created_after: Filter issues created after the specified date.
            - created_before: Filter issues created before the specified date.
            - iids: Return only the issues having the given iid.
            - issue_type: Filter issues by type.
            - labels: List of label names, issues must have all labels to be returned.
            - milestone: The milestone title.
            - non_archived: Return issues from non archived projects.
            - scope: Return issues for the given scope.
            - search: Search issues against their title and description.
            - state: State of the issues to retrieve.
            - updated_after: Filter issues updated after the specified date.
            - updated_before: Filter issues updated before the specified date.
            - get_all: Get all the items without pagination (for a long lists).


        Returns:
            List[Document]: List of documents.

        """
        to_gitlab_datetime_format = self._to_gitlab_datetime_format
        params = {
            "confidential": confidential,
            "created_after": to_gitlab_datetime_format(created_after),
            "created_before": to_gitlab_datetime_format(created_before),
            "iids": iids,
            "issue_type": issue_type.value if issue_type else None,
            "labels": labels,
            "milestone": milestone,
            "non_archived": non_archived,
            "scope": scope.value if scope else None,
            "search": search,
            "state": state.value if state else None,
            "updated_after": to_gitlab_datetime_format(updated_after),
            "updated_before": to_gitlab_datetime_format(updated_before),
            "get_all": get_all,
        }

        if isinstance(assignee, str):
            params["assignee_username"] = assignee
        elif isinstance(assignee, int):
            params["assignee_id"] = assignee

        if isinstance(author, str):
            params["author_username"] = author
        elif isinstance(author, int):
            params["author_id"] = author

        filtered_params = {k: v for k, v in params.items() if v is not None}

        filtered_params.update(kwargs)

        issues = []

        if self._project_id:
            issues = self._get_project_issues(**filtered_params)
        if self._group_id:
            issues = self._get_group_issues(**filtered_params)

        return [self._build_document_from_issue(issue) for issue in issues]

问题状态 #

基类:EventEnum

问题类型。

用于决定检索哪些问题。

属性:

名称 类型 描述
- OPEN

处于开启状态的问题。

- CLOSED

已关闭的问题。

- ALL

所有问题,包括已打开和已关闭的。

workflows/handler.py 中的源代码llama_index/readers/gitlab/issues/base.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class IssueState(enum.Enum):
    """
    Issue type.

    Used to decide what issues to retrieve.

    Attributes:
        - OPEN: Issues that are open.
        - CLOSED: Issues that are closed.
        - ALL: All issues, open and closed.

    """

    OPEN = "opened"
    CLOSED = "closed"
    ALL = "all"

问题类型 #

基类:EventEnum

问题类型。

用于决定检索哪些问题。

属性:

名称 类型 描述
- ISSUE

问题。

- INCIDENT

事件。

- TEST_CASE

测试用例。

- TASK

任务。

workflows/handler.py 中的源代码llama_index/readers/gitlab/issues/base.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class IssueType(enum.Enum):
    """
    Issue type.

    Used to decide what issues to retrieve.

    Attributes:
        - ISSUE: Issues.
        - INCIDENT: Incident.
        - TEST_CASE: Test case.
        - TASK: Task.

    """

    ISSUE = "issue"
    INCIDENT = "incident"
    TEST_CASE = "test_case"
    TASK = "task"

范围 #

基类:EventEnum

范围。

用于确定问题的范围。

属性:

名称 类型 描述
- CREATED_BY_ME

由认证用户创建的问题。

- ASSIGNED_TO_ME

分配给已认证用户的问题。

- ALL

所有问题。

workflows/handler.py 中的源代码llama_index/readers/gitlab/issues/base.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class Scope(enum.Enum):
    """
    Scope.

    Used to determine the scope of the issue.

    Attributes:
        - CREATED_BY_ME: Issues created by the authenticated user.
        - ASSIGNED_TO_ME: Issues assigned to the authenticated user.
        - ALL: All issues.

    """

    CREATED_BY_ME = "created_by_me"
    ASSIGNED_TO_ME = "assigned_to_me"
    ALL = "all"

load_data #

load_data(assignee: Optional[Union[str, int]] = None, author: Optional[Union[str, int]] = None, confidential: Optional[bool] = None, created_after: Optional[datetime] = None, created_before: Optional[datetime] = None, iids: Optional[List[int]] = None, issue_type: Optional[IssueType] = None, labels: Optional[List[str]] = None, milestone: Optional[str] = None, non_archived: Optional[bool] = None, scope: Optional[作用域] = None, search: Optional[str] = None, state: Optional[IssueState] = OPEN, updated_after: Optional[datetime] = None, updated_before: Optional[datetime] = None, get_all: bool = False, **kwargs: Any) -> List[文档]

加载群组或项目问题并将其转换为文档。请参阅 GitLab API 文档获取完整参数列表。

每个问题通过以下步骤转换为文档:

- The doc_id of the document is the issue number.
- The text of the document is the concatenation of the title and the description of the issue.
- The extra_info of the document is a dictionary with the following keys:
    - state: State of the issue.
    - labels: List of labels of the issue.
    - created_at: Date when the issue was created.
    - closed_at: Date when the issue was closed. Only present if the issue is closed.
    - url: URL of the issue.
    - source: URL of the issue. More convenient for humans.
    - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

参数:

名称 类型 描述 默认
- assignee

分配给该问题的用户名或ID。

required
- author

创建该问题的用户名或ID。

required
- confidential

过滤机密问题。

required
- created_after

筛选在指定日期之后创建的问题。

required
- created_before

筛选在指定日期之前创建的问题。

required
- iids

仅返回具有指定 iid 的问题。

required
- issue_type

按类型筛选问题。

required
- labels

标签名称列表,问题必须包含所有指定标签才能被返回。

required
- milestone

里程碑标题。

required
- non_archived

返回未归档项目中的问题。

required
- scope

返回给定范围内的问题。

required
- search

根据标题和描述搜索问题。

required
- state

要检索的问题状态。

required
- updated_after

筛选在指定日期之后更新的问题。

required
- updated_before

筛选在指定日期之前更新的问题。

required
- get_all

获取所有项目,无需分页(适用于长列表)。

required

返回:

类型 描述
List[文档]

List[Document]: 文档列表。

workflows/handler.py 中的源代码llama_index/readers/gitlab/issues/base.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
def load_data(
    self,
    assignee: Optional[Union[str, int]] = None,
    author: Optional[Union[str, int]] = None,
    confidential: Optional[bool] = None,
    created_after: Optional[datetime] = None,
    created_before: Optional[datetime] = None,
    iids: Optional[List[int]] = None,
    issue_type: Optional[IssueType] = None,
    labels: Optional[List[str]] = None,
    milestone: Optional[str] = None,
    non_archived: Optional[bool] = None,
    scope: Optional[Scope] = None,
    search: Optional[str] = None,
    state: Optional[IssueState] = IssueState.OPEN,
    updated_after: Optional[datetime] = None,
    updated_before: Optional[datetime] = None,
    get_all: bool = False,
    **kwargs: Any,
) -> List[Document]:
    """
    Load group or project issues and converts them to documents. Please refer to the GitLab API documentation for the full list of parameters.

    Each issue is converted to a document by doing the following:

        - The doc_id of the document is the issue number.
        - The text of the document is the concatenation of the title and the description of the issue.
        - The extra_info of the document is a dictionary with the following keys:
            - state: State of the issue.
            - labels: List of labels of the issue.
            - created_at: Date when the issue was created.
            - closed_at: Date when the issue was closed. Only present if the issue is closed.
            - url: URL of the issue.
            - source: URL of the issue. More convenient for humans.
            - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

    Args:
        - assignee: Username or ID of the user assigned to the issue.
        - author: Username or ID of the user that created the issue.
        - confidential: Filter confidential issues.
        - created_after: Filter issues created after the specified date.
        - created_before: Filter issues created before the specified date.
        - iids: Return only the issues having the given iid.
        - issue_type: Filter issues by type.
        - labels: List of label names, issues must have all labels to be returned.
        - milestone: The milestone title.
        - non_archived: Return issues from non archived projects.
        - scope: Return issues for the given scope.
        - search: Search issues against their title and description.
        - state: State of the issues to retrieve.
        - updated_after: Filter issues updated after the specified date.
        - updated_before: Filter issues updated before the specified date.
        - get_all: Get all the items without pagination (for a long lists).


    Returns:
        List[Document]: List of documents.

    """
    to_gitlab_datetime_format = self._to_gitlab_datetime_format
    params = {
        "confidential": confidential,
        "created_after": to_gitlab_datetime_format(created_after),
        "created_before": to_gitlab_datetime_format(created_before),
        "iids": iids,
        "issue_type": issue_type.value if issue_type else None,
        "labels": labels,
        "milestone": milestone,
        "non_archived": non_archived,
        "scope": scope.value if scope else None,
        "search": search,
        "state": state.value if state else None,
        "updated_after": to_gitlab_datetime_format(updated_after),
        "updated_before": to_gitlab_datetime_format(updated_before),
        "get_all": get_all,
    }

    if isinstance(assignee, str):
        params["assignee_username"] = assignee
    elif isinstance(assignee, int):
        params["assignee_id"] = assignee

    if isinstance(author, str):
        params["author_username"] = author
    elif isinstance(author, int):
        params["author_id"] = author

    filtered_params = {k: v for k, v in params.items() if v is not None}

    filtered_params.update(kwargs)

    issues = []

    if self._project_id:
        issues = self._get_project_issues(**filtered_params)
    if self._group_id:
        issues = self._get_group_issues(**filtered_params)

    return [self._build_document_from_issue(issue) for issue in issues]

GitLab仓库读取器 #

基类:EventBaseReader

workflows/handler.py 中的源代码llama_index/readers/gitlab/repository/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
class GitLabRepositoryReader(BaseReader):
    def __init__(
        self,
        gitlab_client: gitlab.Gitlab,
        project_id: int,
        use_parser: bool = False,
        verbose: bool = False,
    ):
        super().__init__()

        self._gl = gitlab_client
        self._use_parser = use_parser
        self._verbose = verbose
        self._project_url = f"{gitlab_client.api_url}/projects/{project_id}"

        self._project = gitlab_client.projects.get(project_id)

    def _parse_file_content(self, file_properties: dict, file_content: str) -> Document:
        raise NotImplementedError

    def _load_single_file(self, file_path: str, ref: Optional[str] = None) -> Document:
        file = self._project.files.get(file_path=file_path, ref=ref)
        file_properties = file.asdict()
        file_content = file.decode()

        if self._use_parser:
            return self._parse_file_content(file_properties, file_content)

        return Document(
            doc_id=file_properties["blob_id"],
            text=file_content,
            extra_info={
                "file_path": file_properties["file_path"],
                "file_name": file_properties["file_name"],
                "size": file_properties["size"],
                "url": f"{self._project_url}/projects/repository/files/{file_properties['file_path']}/raw",
            },
        )

    def load_data(
        self,
        ref: str,
        file_path: Optional[str] = None,
        path: Optional[str] = None,
        recursive: bool = False,
        iterator: bool = False,
    ) -> List[Document]:
        """
        Load data from a GitLab repository.

        Args:
            ref: The name of a repository branch or commit id
            file_path: Path to the file to load.
            path: Path to the directory to load.
            recursive: Whether to load files recursively.
            iterator: Return a generator handling API pagination automatically

        Returns:
            List[Document]: List of documents loaded from the repository

        """
        if file_path:
            return [self._load_single_file(file_path, ref)]

        project = self._project

        params = {
            "ref": ref,
            "path": path,
            "recursive": recursive,
            "iterator": iterator,
        }

        filtered_params = {k: v for k, v in params.items() if v is not None}

        repo_items = project.repository_tree(**filtered_params)

        documents = []

        for item in repo_items:
            if item["type"] == "blob":
                documents.append(self._load_single_file(item["path"], ref))

        return documents

load_data #

load_data(ref: str, file_path: Optional[str] = None, path: Optional[str] = None, recursive: bool = False, iterator: bool = False) -> List[文档]

从 GitLab 仓库加载数据。

参数:

名称 类型 描述 默认
ref str

仓库分支的名称或提交ID

required
file_path Optional[str]

要加载文件的路径。

None
path Optional[str]

要加载的目录路径。

None
recursive bool

是否递归加载文件。

False
iterator bool

返回一个自动处理API分页的生成器

False

返回:

类型 描述
List[文档]

List[Document]: 从存储库加载的文档列表

workflows/handler.py 中的源代码llama_index/readers/gitlab/repository/base.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def load_data(
    self,
    ref: str,
    file_path: Optional[str] = None,
    path: Optional[str] = None,
    recursive: bool = False,
    iterator: bool = False,
) -> List[Document]:
    """
    Load data from a GitLab repository.

    Args:
        ref: The name of a repository branch or commit id
        file_path: Path to the file to load.
        path: Path to the directory to load.
        recursive: Whether to load files recursively.
        iterator: Return a generator handling API pagination automatically

    Returns:
        List[Document]: List of documents loaded from the repository

    """
    if file_path:
        return [self._load_single_file(file_path, ref)]

    project = self._project

    params = {
        "ref": ref,
        "path": path,
        "recursive": recursive,
        "iterator": iterator,
    }

    filtered_params = {k: v for k, v in params.items() if v is not None}

    repo_items = project.repository_tree(**filtered_params)

    documents = []

    for item in repo_items:
        if item["type"] == "blob":
            documents.append(self._load_single_file(item["path"], ref))

    return documents

选项: 成员:- GitLabIssuesReader - GitLabRepositoryReader