我想检查存储库中的多个文件,以及下面的Python脚本中这些文件是否存在。
情况是,存储库中的文件存在,但结果显示找不到该文件。
import requests
from urllib.parse import quote_plus
import json
import os
# Set your GitLab API URL, private token, and group ID as environment variables
gitlab_api_url = os.getenv("GITLAB_API_URL", "https://gitlab.com/api/v4")
private_token = os.getenv("GITLAB_PRIVATE_TOKEN", "xxx")
group_id = os.getenv("GITLAB_GROUP_ID", "xxx")
def get_project_ids(api_url, private_token, group_id):
endpoint = f"{api_url}/groups/{group_id}/projects"
headers = {"PRIVATE-TOKEN": private_token}
response = requests.get(endpoint, headers=headers)
if response.status_code == 200:
projects = response.json()
return [(project['id'], project['name']) for project in projects]
else:
print(f"Failed to retrieve projects for group {group_id}. Status code: {response.status_code}")
return []
def check_files_in_project(api_url, private_token, group_id, project_id, project_name, filenames, output_filename):
# Specify the private token for authentication
headers = {"PRIVATE-TOKEN": private_token}
# Create a dictionary to store the output information
output_data = {
"group_id": group_id,
"project_id": project_id,
"project_name": project_name,
"files": []
}
for filename in filenames:
# Encode special characters in the filename and construct the URL
encoded_filename = quote_plus(filename)
endpoint = f"{api_url}/projects/{project_id}/repository/tree?recursive=1&path={encoded_filename}&ref=dev"
print(f"\nChecking for file: {filename} in project {project_name}")
print(f"Constructed URL: {endpoint}")
# Make the API request to get the repository tree
response = requests.get(endpoint, headers=headers)
# Check if the request was successful
if response.status_code == 200:
# Parse the JSON response
repository_tree = response.json()
# Check if the specified file exists in any folder
file_found = any(filename == item.get("name", "") for item in repository_tree)
print(f"File {filename} found in project {project_name}: {file_found}")
# Append file information to output_data
output_data["files"].append({"filename": filename, "file_found": file_found})
else:
print(f"Failed to retrieve repository tree for project {project_name}. Status code: {response.status_code}")
# Append output data to a single JSON file after checking each file
with open(output_filename, 'a') as json_file:
json.dump(output_data, json_file, indent=2)
json_file.write('\n') # Add newline to separate entries
# Get the list of project IDs and names in the group
projects_info = get_project_ids(gitlab_api_url, private_token, group_id)
# Specify the list of filenames you want to check
filenames_to_check = ["serverless.yaml", "serverless.yml"]
# Single output file for all projects
output_filename = "output_all_projects.json"
# Open the file with an initial '[' to start a JSON array
with open(output_filename, 'w') as json_file:
json_file.write('[')
# Iterate over each project ID and check for the files in any folder
for project_id, project_name in projects_info:
check_files_in_project(gitlab_api_url, private_token, group_id, project_id, project_name, filenames_to_check, output_filename)
# Close the file with a ']' to close the JSON array
with open(output_filename, 'a') as json_file:
json_file.write(']')
这是 JSON 文件上的结果
[{
"group_id": "gid",
"project_id": xxx,
"project_name": "name,
"files": []
}
{
"group_id": "gid",
"project_id": xxx,
"project_name": "name",
"files": []
}
{
"group_id": "gid",
"project_id": xxx,
"project_name": "names",
"files": []
}
注:
您的帮助对我来说非常宝贵,谢谢。
根据docs,
path
参数用于指定子目录而不是文件:
存储库内的路径。用于获取子目录的内容。
目前端点中有
&path=<filename>
参数:
endpoint = f"{api_url}/projects/{project_id}/repository/tree?recursive=1&path={encoded_filename}&ref=dev"
这就是它不起作用的原因。因此只需从端点删除
path
参数即可。
还要记住分页 - 根据文档 -
per_page
参数默认值为 20。它会产生影响,因为您正在使用 recursive
标志 - 这意味着如果您的项目中有许多文件和子目录 - 这是不可能的通过一个请求处理所有路径。