通过组 ID 检查 Gitlab 存储库上的多个文件的 Python 代码

问题描述 投票:0回答:1

我想检查存储库中的多个文件,以及下面的Python脚本中这些文件是否存在。

情况是,存储库中的文件存在,但结果显示找不到该文件。

import requests
from urllib.parse import quote_plus
import json
import os

# Set your GitLab API URL, private token, and group ID as environment variables
gitlab_api_url = os.getenv("GITLAB_API_URL", "https://gitlab.com/api/v4")
private_token = os.getenv("GITLAB_PRIVATE_TOKEN", "xxx")
group_id = os.getenv("GITLAB_GROUP_ID", "xxx")

def get_project_ids(api_url, private_token, group_id):
    endpoint = f"{api_url}/groups/{group_id}/projects"
    headers = {"PRIVATE-TOKEN": private_token}
    
    response = requests.get(endpoint, headers=headers)

    if response.status_code == 200:
        projects = response.json()
        return [(project['id'], project['name']) for project in projects]
    else:
        print(f"Failed to retrieve projects for group {group_id}. Status code: {response.status_code}")
        return []

def check_files_in_project(api_url, private_token, group_id, project_id, project_name, filenames, output_filename):
    # Specify the private token for authentication
    headers = {"PRIVATE-TOKEN": private_token}

    # Create a dictionary to store the output information
    output_data = {
        "group_id": group_id,
        "project_id": project_id,
        "project_name": project_name,
        "files": []
    }

    for filename in filenames:
        # Encode special characters in the filename and construct the URL
        encoded_filename = quote_plus(filename)
        endpoint = f"{api_url}/projects/{project_id}/repository/tree?recursive=1&path={encoded_filename}&ref=dev"

        print(f"\nChecking for file: {filename} in project {project_name}")
        print(f"Constructed URL: {endpoint}")

        # Make the API request to get the repository tree
        response = requests.get(endpoint, headers=headers)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the JSON response
            repository_tree = response.json()

            # Check if the specified file exists in any folder
            file_found = any(filename == item.get("name", "") for item in repository_tree)
            print(f"File {filename} found in project {project_name}: {file_found}")

            # Append file information to output_data
            output_data["files"].append({"filename": filename, "file_found": file_found})
        else:
            print(f"Failed to retrieve repository tree for project {project_name}. Status code: {response.status_code}")

        # Append output data to a single JSON file after checking each file
        with open(output_filename, 'a') as json_file:
            json.dump(output_data, json_file, indent=2)
            json_file.write('\n')  # Add newline to separate entries

# Get the list of project IDs and names in the group
projects_info = get_project_ids(gitlab_api_url, private_token, group_id)

# Specify the list of filenames you want to check
filenames_to_check = ["serverless.yaml", "serverless.yml"]

# Single output file for all projects
output_filename = "output_all_projects.json"

# Open the file with an initial '[' to start a JSON array
with open(output_filename, 'w') as json_file:
    json_file.write('[')

# Iterate over each project ID and check for the files in any folder
for project_id, project_name in projects_info:
    check_files_in_project(gitlab_api_url, private_token, group_id, project_id, project_name, filenames_to_check, output_filename)

# Close the file with a ']' to close the JSON array
with open(output_filename, 'a') as json_file:
    json_file.write(']')

这是 JSON 文件上的结果

[{
  "group_id": "gid",
  "project_id": xxx,
  "project_name": "name,
  "files": []
}
{
  "group_id": "gid",
  "project_id": xxx,
  "project_name": "name",
  "files": []
}
{
  "group_id": "gid",
  "project_id": xxx,
  "project_name": "names",
  "files": []
}

注:

  • xxx 我的个人数据。

您的帮助对我来说非常宝贵,谢谢。

python gitlab repository
1个回答
0
投票

根据docs

path
参数用于指定子目录而不是文件:

存储库内的路径。用于获取子目录的内容。

目前端点中有

&path=<filename>
参数:

endpoint = f"{api_url}/projects/{project_id}/repository/tree?recursive=1&path={encoded_filename}&ref=dev"

这就是它不起作用的原因。因此只需从端点删除

path
参数即可。

还要记住分页 - 根据文档 -

per_page
参数默认值为 20。它会产生影响,因为您正在使用
recursive
标志 - 这意味着如果您的项目中有许多文件和子目录 - 这是不可能的通过一个请求处理所有路径。

© www.soinside.com 2019 - 2024. All rights reserved.