Skip to content

Commit 22dcce1

Browse files
authored
docs: update ci-analysis post (#10290)
1 parent ea1c179 commit 22dcce1

File tree

5 files changed

+34
-27
lines changed

5 files changed

+34
-27
lines changed

docs/_freeze/posts/ci-analysis/index/execute-results/html.json

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Loading
Loading
Loading

docs/posts/ci-analysis/index.qmd

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Alright, let's jump into some data!
9393

9494

9595
```{python}
96-
jobs = con.tables.jobs[_.started_at < "2023-01-09"]
96+
jobs = con.tables.jobs
9797
jobs
9898
```
9999

@@ -211,7 +211,7 @@ stats = stats.mutate(
211211
.else_("NA")
212212
.end()
213213
),
214-
team_plan=ibis.where(_.raw_improvements > 1, "Poetry + Team Plan", "None"),
214+
team_plan=ibis.ifelse(_.raw_improvements > 1, "Poetry + Team Plan", "None"),
215215
)
216216
stats
217217
```
@@ -221,7 +221,7 @@ Finally, we can summarize by averaging the different durations, grouping on the
221221
```{python}
222222
USECS_PER_MIN = 60_000_000
223223
224-
agged = stats.group_by([_.started_date, _.improvements, _.team_plan]).agg(
224+
agged = stats.group_by(_.started_date, _.improvements, _.team_plan).agg(
225225
job=_.job_duration.div(USECS_PER_MIN).mean(),
226226
workflow=_.workflow_duration.div(USECS_PER_MIN).mean(),
227227
queueing_time=_.queueing_time.div(USECS_PER_MIN).mean(),
@@ -242,23 +242,22 @@ Ibis doesn't have builtin plotting support, so we need to pull our results into
242242

243243
Here I'm using `plotnine` (a Python port of `ggplot2`), which has great integration with pandas DataFrames.
244244

245-
```{python}
246-
raw_df = agged.execute()
247-
raw_df
248-
```
249-
250-
Generally, `plotnine` works with long, tidy data so let's use `pandas.melt` to get there.
245+
Generally, `plotnine` works with long, tidy data so let's use Ibis's
246+
[`pivot_longer`](../../reference/expression-tables.qmd#ibis.expr.types.relations.Table.pivot_longer)
247+
to get there.
251248

252249

253250
```{python}
254-
import pandas as pd
255-
256-
df = pd.melt(
257-
raw_df,
258-
id_vars=["started_date", "improvements", "team_plan"],
259-
var_name="entity",
260-
value_name="duration",
251+
agged_pivoted = (
252+
agged.pivot_longer(
253+
("job", "workflow", "queueing_time"),
254+
names_to="entity",
255+
values_to="duration",
256+
)
257+
.mutate(started_date=_.started_date.cast("timestamp").truncate("D"))
261258
)
259+
260+
df = agged_pivoted.execute()
262261
df.head()
263262
```
264263

@@ -286,12 +285,16 @@ import logging
286285
287286
# without this, findfont logging spams the notebook making it unusable
288287
logging.getLogger('matplotlib.font_manager').disabled = True
288+
logging.getLogger('plotnine').disabled = True
289289
```
290290

291291
Here we show job durations, coloring the points differently depending on whether they have no improvements, poetry, or poetry + team plan.
292292

293293
```{python}
294-
(
294+
import pandas as pd
295+
296+
297+
g = (
295298
ggplot(
296299
df.loc[df.entity == "job"].reset_index(drop=True),
297300
aes(x="started_date", y="duration", color="factor(improvements)"),
@@ -307,8 +310,8 @@ Here we show job durations, coloring the points differently depending on whether
307310
type='qual',
308311
limits=["None", "Poetry", "Poetry + Team Plan"],
309312
)
310-
+ geom_text(x=POETRY_MERGED_DATE, label=poetry_label, y=15, color="blue")
311-
+ geom_text(x=TEAMIZATION_DATE, label=team_label, y=10, color="blue")
313+
+ geom_text(aes("x", "y"), label=poetry_label, data=pd.DataFrame({"x": [POETRY_MERGED_DATE], "y": [15]}), color="blue")
314+
+ geom_text(aes("x", "y"), label=team_label, data=pd.DataFrame({"x": [TEAMIZATION_DATE], "y": [10]}), color="blue")
312315
+ stat_smooth(method="lm")
313316
+ labs(x="Date", y="Duration (minutes)")
314317
+ ggtitle("Job Duration")
@@ -318,6 +321,7 @@ Here we show job durations, coloring the points differently depending on whether
318321
legend_direction="vertical",
319322
)
320323
)
324+
g.show()
321325
```
322326

323327
## Result #1: Job Duration
@@ -331,7 +335,7 @@ A few things pop out to me right away:
331335
- Moving to the team plan had little to no effect on job run duration.
332336

333337
```{python}
334-
(
338+
g = (
335339
ggplot(
336340
df.loc[df.entity != "job"].reset_index(drop=True),
337341
aes(x="started_date", y="duration", color="factor(improvements)"),
@@ -347,8 +351,8 @@ A few things pop out to me right away:
347351
type='qual',
348352
limits=["None", "Poetry", "Poetry + Team Plan"],
349353
)
350-
+ geom_text(x=POETRY_MERGED_DATE, label=poetry_label, y=75, color="blue")
351-
+ geom_text(x=TEAMIZATION_DATE, label=team_label, y=50, color="blue")
354+
+ geom_text(aes("x", "y"), label=poetry_label, data=pd.DataFrame({"x": [POETRY_MERGED_DATE], "y": [75]}), color="blue")
355+
+ geom_text(aes("x", "y"), label=team_label, data=pd.DataFrame({"x": [TEAMIZATION_DATE], "y": [50]}), color="blue")
352356
+ stat_smooth(method="lm")
353357
+ labs(x="Date", y="Duration (minutes)")
354358
+ ggtitle("Workflow Duration")
@@ -358,6 +362,7 @@ A few things pop out to me right away:
358362
legend_direction="vertical",
359363
)
360364
)
365+
g.show()
361366
```
362367

363368
## Result #2: Workflow Duration and Queueing Time
@@ -377,15 +382,16 @@ Another interesting result.
377382
In the next plot we'll look at that correlation.
378383

379384
```{python}
380-
(
381-
ggplot(raw_df, aes(x="workflow", y="queueing_time"))
385+
g = (
386+
ggplot(agged.execute(), aes(x="workflow", y="queueing_time"))
382387
+ geom_point()
383388
+ geom_rug()
384389
+ facet_grid(". ~ team_plan")
385390
+ labs(x="Workflow Duration (minutes)", y="Queueing Time (minutes)")
386391
+ ggtitle("Workflow Duration vs. Queueing Time")
387392
+ theme(figure_size=(22, 6))
388393
)
394+
g.show()
389395
```
390396

391397
## Result #3: Workflow Duration and Queueing Duration are correlated

0 commit comments

Comments
 (0)