Coverage for src/gitlabracadabra/packages/pypi.py: 79%
119 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-23 06:44 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-23 06:44 +0200
1#
2# Copyright (C) 2019-2025 Mathieu Parent <math.parent@gmail.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Lesser General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
17from __future__ import annotations
19from html import unescape
20from logging import getLogger
21from posixpath import join as posixpath_join
22from typing import TYPE_CHECKING, Any
23from urllib.parse import quote as urlquote
24from urllib.parse import urljoin, urlparse, urlunparse
25from urllib.request import parse_keqv_list
27from html5lib import parse as html5lib_parse
28from packaging.requirements import InvalidRequirement, Requirement
29from packaging.utils import canonicalize_name
30from packaging.version import InvalidVersion, Version
31from requests import codes
33from gitlabracadabra.packages.package_file import PackageFile
34from gitlabracadabra.packages.pip import extract_version_from_fragment
35from gitlabracadabra.packages.source import Source
37if TYPE_CHECKING: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true
38 from requests.models import Response
40 from gitlabracadabra.packages.destination import Destination
42try:
43 from packaging.utils import parse_wheel_filename
45 HAS_PACKAGING_PARSERS = True
46except ImportError: # packaging << 20.9
47 HAS_PACKAGING_PARSERS = False
49logger = getLogger(__name__)
52class PyPI(Source):
53 """PyPI repository."""
55 def __init__(
56 self,
57 *,
58 log_prefix: str = "",
59 index_url: str | None = None,
60 requirements: str | list[str],
61 ) -> None:
62 """Initialize a PyPI repository object.
64 Args:
65 log_prefix: Log prefix.
66 index_url: index-url (default to https://pypi.org/simple).
67 requirements: Python requirements as list or string.
68 """
69 super().__init__()
70 self._log_prefix = log_prefix
71 self._index_url = index_url or "https://pypi.org/simple"
72 if isinstance(requirements, str):
73 self._requirements = requirements.splitlines()
74 else:
75 self._requirements = [req for reqs in requirements for req in reqs.splitlines()]
77 def __str__(self) -> str:
78 """Return string representation.
80 Returns:
81 A string.
82 """
83 return "PyPI repository"
85 def package_files(
86 self,
87 destination: Destination, # noqa: ARG002
88 ) -> list[PackageFile]:
89 """Return list of package files.
91 Returns:
92 List of package files.
93 """
94 package_files: list[PackageFile] = []
95 if not HAS_PACKAGING_PARSERS: 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true
96 logger.error(
97 "%sPyPI packages mirroring requires packaging >= 20.9",
98 self._log_prefix,
99 )
100 return package_files
101 for requirement_string in self._requirements:
102 if requirement_string.lstrip().startswith("#"):
103 continue
104 package_files_from_requirement_string = self._package_files_from_requirement_string(requirement_string)
105 if not package_files_from_requirement_string: 105 ↛ 106line 105 didn't jump to line 106 because the condition on line 105 was never true
106 logger.warning(
107 "%sNo package files matching found for requirement: %s",
108 self._log_prefix,
109 requirement_string,
110 )
111 package_files.extend(package_files_from_requirement_string)
112 return package_files
114 def _package_files_from_requirement_string(self, requirement_string: str) -> list[PackageFile]:
115 try:
116 req = Requirement(requirement_string)
117 except InvalidRequirement:
118 logger.warning(
119 '%sInvalid requirement "%s"',
120 self._log_prefix,
121 requirement_string,
122 )
123 return []
124 return self._package_files_from_requirement(req)
126 def _package_files_from_requirement(self, req: Requirement) -> list[PackageFile]:
127 index_url = self._get_index_url(req.name)
128 index_response = self.session.request("get", index_url)
129 if index_response.status_code != codes["ok"]: 129 ↛ 130line 129 didn't jump to line 130 because the condition on line 129 was never true
130 logger.warning(
131 "%sUnexpected HTTP status for PyPI index %s: received %i %s",
132 self._log_prefix,
133 index_url,
134 index_response.status_code,
135 index_response.reason,
136 )
137 return []
138 return self._package_files_from_requirement_and_response(req, index_response)
140 def _get_index_url(self, project_name: str) -> str:
141 loc = posixpath_join(
142 self._index_url,
143 urlquote(canonicalize_name(project_name)),
144 )
145 if not loc.endswith("/"): 145 ↛ 147line 145 didn't jump to line 147 because the condition on line 145 was always true
146 loc = f"{loc}/"
147 return loc
149 def _package_files_from_requirement_and_response(
150 self,
151 req: Requirement,
152 response: Response,
153 ) -> list[PackageFile]:
154 document = html5lib_parse(
155 response.content,
156 transport_encoding=response.encoding,
157 namespaceHTMLElements=False,
158 )
160 base_url = self._get_base_url(response, document)
162 package_files: dict[Version, list[PackageFile]] = {}
163 for anchor in document.findall(".//a"):
164 version, package_file = self._package_file_from_requirement_and_anchor(req, anchor, base_url)
165 if version and package_file:
166 if version not in package_files:
167 package_files[version] = []
168 package_files[version].append(package_file)
170 try:
171 best_match = sorted(package_files, reverse=True)[0]
172 except IndexError:
173 return []
174 return package_files[best_match]
176 def _get_base_url(self, response: Response, document: Any) -> str:
177 base_url = response.url
178 for base in document.findall(".//base"): 178 ↛ 179line 178 didn't jump to line 179 because the loop on line 178 never started
179 href = base.get("href")
180 if href is not None:
181 base_url = href
182 break
183 return base_url
185 def _package_file_from_requirement_and_anchor(
186 self,
187 req: Requirement,
188 anchor: Any,
189 base_url: str,
190 ) -> tuple[Version | None, PackageFile | None]:
191 if "href" not in anchor.keys(): # noqa: SIM118 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true
192 return None, None
193 if anchor.get("data-yanked") and not str(req.specifier).startswith("=="):
194 return None, None
196 parsed_url = urlparse(urljoin(base_url, anchor.get("href")))
198 filename = parsed_url.path.split("/")[-1]
199 try:
200 name, ver = self._parse_filename(filename, canonicalize_name(req.name))
201 except InvalidVersion:
202 # Ignore invalid versions, like in pbr-0.5.2.5.g5b3e942.tar.gz
203 logger.debug(
204 "%sIgnoring invalid version for filename %s",
205 self._log_prefix,
206 filename,
207 )
208 return None, None
210 if name is None or ver is None or ver not in req.specifier:
211 return None, None
213 metadata = parse_keqv_list(parsed_url.fragment.split("&"))
215 if "data-requires-python" in anchor.keys(): # noqa: SIM118
216 metadata["requires-python"] = unescape(anchor.get("data-requires-python"))
218 return ver, PackageFile(
219 urlunparse(parsed_url._replace(fragment="")),
220 "pypi",
221 name,
222 str(ver),
223 filename,
224 metadata=metadata,
225 )
227 def _parse_filename(self, filename: str, canonical_name: str) -> tuple[str | None, Version | None]:
228 if filename.endswith(".whl"):
229 name, ver, _, _ = parse_wheel_filename(filename)
230 return name, ver
231 if filename.endswith(".egg"):
232 # Ignore egg files for now
233 return None, None
234 if filename.endswith(".tar.gz"): 234 ↛ 238line 234 didn't jump to line 238 because the condition on line 234 was always true
235 ver_str = extract_version_from_fragment(filename[:-7], canonical_name)
236 if ver_str: 236 ↛ 238line 236 didn't jump to line 238 because the condition on line 236 was always true
237 return canonical_name, Version(ver_str)
238 return None, None