Skip to content

Commit 12058f2

Browse files
cpcloudgforsyth
authored andcommitted
docs(pypi-metadata-post): add Fortran pattern and fix regex
1 parent 8f4d73a commit 12058f2

File tree

2 files changed

+17
-11
lines changed
  • docs
    • _freeze/posts/querying-pypi-metadata-compiled-languages/index/execute-results
    • posts/querying-pypi-metadata-compiled-languages

2 files changed

+17
-11
lines changed

docs/_freeze/posts/querying-pypi-metadata-compiled-languages/index/execute-results/html.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/posts/querying-pypi-metadata-compiled-languages/index.qmd

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ always viable -- we're in Python land so why not grab the filenames using
2727
```{python}
2828
import urllib3
2929
30-
http = urllib3.PoolManager()
30+
url = "https://raw.githubusercontent.com/pypi-data/data/main/links/dataset.txt"
3131
32-
resp = http.request("GET", "https://github.com/pypi-data/data/raw/main/links/dataset.txt")
32+
with urllib3.PoolManager() as http:
33+
resp = http.request("GET", url)
3334
3435
parquet_files = resp.data.decode().split()
3536
parquet_files
@@ -87,7 +88,7 @@ We can follow Seth's lead and look for things:
8788
```{python}
8889
expr = pypi.filter(
8990
[
90-
_.path.re_search(r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0-2}(?:or)?|go)$"),
91+
_.path.re_search(r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"),
9192
~_.path.re_search(r"(^|/)test(|s|ing)"),
9293
~_.path.contains("/site-packages/"),
9394
]
@@ -144,10 +145,12 @@ We'll do a few things:
144145
```{python}
145146
collapse_names = expr.mutate(
146147
ext=_.ext.re_replace(r"cxx|cpp|cc|c|hpp|h", "C/C++")
148+
.re_replace("^f.*$", "Fortran")
147149
.replace("rs", "Rust")
148150
.replace("go", "Go")
149-
.replace("asm", "Assembly"),
150-
)
151+
.replace("asm", "Assembly")
152+
.nullif(""),
153+
).dropna("ext")
151154
152155
collapse_names
153156
```
@@ -202,7 +205,7 @@ Now that the data are tidied, we can pass our expression directly to Altair and
202205
import altair as alt
203206
204207
chart = (
205-
alt.Chart(collapse_names)
208+
alt.Chart(collapse_names.to_pandas())
206209
.mark_line()
207210
.encode(x="month", y="project_count", color="ext")
208211
.properties(width=600, height=300)
@@ -235,7 +238,7 @@ full_query = (
235238
pypi.filter(
236239
[
237240
_.path.re_search(
238-
r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0-2}(?:or)?|go)$"
241+
r"\.(asm|c|cc|cpp|cxx|h|hpp|rs|[Ff][0-9]{0,2}(?:or)?|go)$"
239242
),
240243
~_.path.re_search(r"(^|/)test(|s|ing)"),
241244
~_.path.contains("/site-packages/"),
@@ -249,15 +252,18 @@ full_query = (
249252
.order_by(_.month.desc())
250253
.mutate(
251254
ext=_.ext.re_replace(r"cxx|cpp|cc|c|hpp|h", "C/C++")
255+
.re_replace("^f.*$", "Fortran")
252256
.replace("rs", "Rust")
253257
.replace("go", "Go")
254-
.replace("asm", "Assembly"),
258+
.replace("asm", "Assembly")
259+
.nullif(""),
255260
)
261+
.dropna("ext")
256262
.group_by(["month", "ext"])
257263
.aggregate(project_count=flatten(_.projects.collect()).unique().length())
258264
)
259265
chart = (
260-
alt.Chart(full_query)
266+
alt.Chart(full_query.to_pandas())
261267
.mark_line()
262268
.encode(x="month", y="project_count", color="ext")
263269
.properties(width=600, height=300)

0 commit comments

Comments
 (0)