Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get repos from graphql #120

Merged
merged 2 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 77 additions & 3 deletions shared/torngit/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@

METRICS_PREFIX = "services.torngit.github"

GITHUB_REPO_COUNT_QUERY = """

class GitHubGraphQLQueries(object):
_queries = dict(
REPO_TOTALCOUNT="""
query {
viewer {
repositories(
Expand All @@ -47,11 +50,46 @@
}
}
}
"""
""",
REPOS_FROM_NODEIDS="""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does github let you fetch the whole list without pagination or will it paginate for you with some default page size?

codecov/engineering-team#139 has an example of pagination with github's graphql api if we need to add it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a very good question that I probably should have put in the PR description.
I spent quite some time trying to get pagination in this query. From the docs it indicates you can specify a first, last and get pageInfo cursors.

But from the explorer docs the nodes query doesn't include the inputs for first and last (I tried, didn't work) AND the Repostory object doesn't include pageInfo as a possible field to query. For that reason I don't think there's pagination? At least I couldn't find a way to paginate the request.

Screenshot 2024-02-07 at 09 08 46

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update: I decided to implement simple pagination from our side just in case.

query GetReposFromNodeIds($node_ids: [ID!]!) {
nodes(ids: $node_ids) {
__typename
... on Repository {
# databaseId == service_id
databaseId
name
primaryLanguage {
name
}
isPrivate
defaultBranchRef {
name
}
owner {
# This ID is actually the node_id, not the ownerid
id
login
}
Comment on lines +69 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a separate query for getting owner information or could we include databaseId here?

Copy link
Contributor Author

@giovanni-guidini giovanni-guidini Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have included if it was possible, but that is not a User or Organization object. It's some other node that only include those 2 pieces of information.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RIP

}
}
}
""",
)

def get(self, query_name: str) -> Optional[str]:
return self._queries.get(query_name, None)

def prepare(self, query_name: str, variables: dict) -> Optional[dict]:
# If Query was an object we could validate the variables
query = self.get(query_name)
if query is not None:
return {"query": query, "variables": variables}


class Github(TorngitBaseAdapter):
service = "github"
graphql = GitHubGraphQLQueries()
urls = dict(
repo="{username}/{name}",
owner="{username}",
Expand Down Expand Up @@ -587,11 +625,47 @@ async def _fetch_number_of_repos(self, client, token):
client,
"post",
"/graphql",
body=dict(query=GITHUB_REPO_COUNT_QUERY),
body=dict(query=self.graphql.get("REPO_TOTALCOUNT")),
token=token,
)
return res["data"]["viewer"]["repositories"]["totalCount"]

async def get_repos_from_nodeids_generator(
self, repo_node_ids: List[str], expected_owner_username, *, token=None
):
token = self.get_token_by_type_if_none(token, TokenType.read)
async with self.get_client() as client:
query = self.graphql.prepare(
"REPOS_FROM_NODEIDS", variables={"node_ids": repo_node_ids}
)
res = await self.api(
client,
"post",
"/graphql",
body=query,
token=token,
)
for raw_repo_data in res["data"]["nodes"]:
primary_language = raw_repo_data.get("primaryLanguage")
default_branch = raw_repo_data.get("defaultBranchRef")
repo = {
"service_id": raw_repo_data["databaseId"],
"name": raw_repo_data["name"],
"language": self._validate_language(
primary_language.get("name") if primary_language else None
),
"private": raw_repo_data["isPrivate"],
"branch": default_branch.get("name") if default_branch else None,
"owner": {
"node_id": raw_repo_data["owner"]["id"],
"username": raw_repo_data["owner"]["login"],
},
}
repo["owner"]["is_expected_owner"] = (
repo["owner"]["username"] == expected_owner_username
)
yield repo

async def list_repos_using_installation(self, username=None):
"""
returns list of repositories included in this integration
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
interactions:
- request:
body: '{"query": "\nquery GetReposFromNodeIds($node_ids: [ID!]!) {\n nodes(ids:
$node_ids) {\n __typename \n ... on Repository {\n #
databaseId == service_id\n databaseId\n name\n primaryLanguage
{\n name\n }\n isPrivate\n defaultBranchRef
{\n name\n }\n owner {\n #
This ID is actually the node_id, not the ownerid\n id\n login\n }\n }\n }\n}\n",
matt-codecov marked this conversation as resolved.
Show resolved Hide resolved
"variables": {"node_ids": ["R_kgDOHrbKcg", "R_kgDOLEJx2g"]}}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '613'
content-type:
- application/json
host:
- api.github.com
user-agent:
- Default
method: POST
uri: https://api.github.com/graphql
response:
content: '{"data":{"nodes":[{"__typename":"Repository","databaseId":515295858,"name":"example-python","primaryLanguage":{"name":"Shell"},"isPrivate":false,"defaultBranchRef":{"name":"master"},"owner":{"id":"U_kgDOBZOfKw","login":"codecove2e"}},{"__typename":"Repository","databaseId":742552026,"name":"test-no-languages","primaryLanguage":null,"isPrivate":false,"defaultBranchRef":null,"owner":{"id":"U_kgDOBZOfKw","login":"codecove2e"}}]}}'
headers:
Access-Control-Allow-Origin:
- '*'
Access-Control-Expose-Headers:
- ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining,
X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes,
X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO,
X-GitHub-Request-Id, Deprecation, Sunset
Content-Encoding:
- gzip
Content-Security-Policy:
- default-src 'none'
Content-Type:
- application/json; charset=utf-8
Date:
- Tue, 06 Feb 2024 13:21:07 GMT
Referrer-Policy:
- origin-when-cross-origin, strict-origin-when-cross-origin
Server:
- GitHub.com
Strict-Transport-Security:
- max-age=31536000; includeSubdomains; preload
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding, Accept, X-Requested-With
X-Accepted-OAuth-Scopes:
- repo
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- deny
X-GitHub-Media-Type:
- github.v4
X-GitHub-Request-Id:
- C11E:116D76:8B8D4:94D71:65C23242
X-OAuth-Scopes:
- repo
X-RateLimit-Limit:
- '5000'
X-RateLimit-Remaining:
- '4997'
X-RateLimit-Reset:
- '1707227531'
X-RateLimit-Resource:
- graphql
X-RateLimit-Used:
- '3'
X-XSS-Protection:
- '0'
http_version: HTTP/1.1
status_code: 200
version: 1
36 changes: 36 additions & 0 deletions tests/integration/test_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -1815,3 +1815,39 @@ async def test_list_github_app_webhook_redelivery(self, codecov_vcr):
)
res = await ghapp_handler.request_webhook_redelivery(17322555251)
assert res is True

@pytest.mark.asyncio
async def test_get_repos_from_nodeids_generator(self, valid_handler, codecov_vcr):
repo_node_ids = ["R_kgDOHrbKcg", "R_kgDOLEJx2g"]
expected = [
{
"service_id": 515295858,
"name": "example-python",
"language": "shell",
"private": False,
"branch": "master",
"owner": {
"node_id": "U_kgDOBZOfKw",
"username": "codecove2e",
"is_expected_owner": True,
},
},
{
"service_id": 742552026,
"name": "test-no-languages",
"language": None,
"private": False,
"branch": None,
"owner": {
"node_id": "U_kgDOBZOfKw",
"username": "codecove2e",
"is_expected_owner": True,
},
},
]
received = []
async for repo in valid_handler.get_repos_from_nodeids_generator(
repo_node_ids, "codecove2e"
):
received.append(repo)
assert received == expected
Loading