Skip to content

Integrate crates_io_og_image into the application #11481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,7 @@ jobs:
- run: cargo test --workspace
env:
# Set the path to the Fira Sans font for Typst.
# The path is relative to the `crates_io_og_image` crate root.
TYPST_FONT_PATH: ../../Fira-4.202/otf
TYPST_FONT_PATH: ${{ github.workspace }}/Fira-4.202/otf

frontend-lint:
name: Frontend / Lint
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ crates_io_env_vars = { path = "crates/crates_io_env_vars" }
crates_io_github = { path = "crates/crates_io_github" }
crates_io_index = { path = "crates/crates_io_index" }
crates_io_markdown = { path = "crates/crates_io_markdown" }
crates_io_og_image = { path = "crates/crates_io_og_image" }
crates_io_pagerduty = { path = "crates/crates_io_pagerduty" }
crates_io_session = { path = "crates/crates_io_session" }
crates_io_tarball = { path = "crates/crates_io_tarball" }
Expand Down
2 changes: 2 additions & 0 deletions src/bin/background-worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crates_io::{Emails, config};
use crates_io_docs_rs::RealDocsRsClient;
use crates_io_env_vars::var;
use crates_io_index::RepositoryConfig;
use crates_io_og_image::OgImageGenerator;
use crates_io_team_repo::TeamRepoImpl;
use crates_io_worker::Runner;
use object_store::prefix::PrefixStore;
Expand Down Expand Up @@ -102,6 +103,7 @@ fn main() -> anyhow::Result<()> {
.emails(emails)
.maybe_docs_rs(docs_rs)
.team_repo(Box::new(team_repo))
.og_image_generator(OgImageGenerator::from_environment()?)
.build();

let environment = Arc::new(environment);
Expand Down
123 changes: 123 additions & 0 deletions src/bin/crates-admin/backfill_og_images.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use anyhow::Result;
use crates_io::db;
use crates_io::schema::{background_jobs, crates};
use crates_io::worker::jobs::GenerateOgImage;
use crates_io_worker::BackgroundJob;
use diesel::prelude::*;
use diesel_async::RunQueryDsl;
use tracing::{info, warn};

#[derive(clap::Parser, Debug)]
#[command(
name = "backfill-og-images",
about = "Enqueue OG image generation jobs for existing crates"
)]
pub struct Opts {
#[arg(long, default_value = "1000")]
/// Batch size for enqueueing crates (default: 1000)
batch_size: usize,

#[arg(long)]
/// Only generate OG images for crates with names starting with this prefix
prefix: Option<String>,

#[arg(long)]
/// Offset to start enqueueing from (useful for resuming)
offset: Option<i64>,
}

pub async fn run(opts: Opts) -> Result<()> {
let mut conn = db::oneoff_connection().await?;

info!("Starting OG image backfill with options: {opts:?}");

// Helper function to build query
let build_query = |offset: i64| {
let mut query = crates::table
.select(crates::name)
.order(crates::name)
.into_boxed();

if let Some(prefix) = &opts.prefix {
query = query.filter(crates::name.like(format!("{prefix}%")));
}

query.offset(offset)
};

// Count total crates to process
let mut count_query = crates::table.into_boxed();
if let Some(prefix) = &opts.prefix {
count_query = count_query.filter(crates::name.like(format!("{prefix}%")));
}
let total_crates: i64 = count_query.count().get_result(&mut conn).await?;

info!("Total crates to enqueue: {total_crates}");

let mut offset = opts.offset.unwrap_or(0);
let mut enqueued = 0;
let mut errors = 0;

loop {
// Fetch batch of crate names
let crate_names: Vec<String> = build_query(offset)
.limit(opts.batch_size as i64)
.load(&mut conn)
.await?;

if crate_names.is_empty() {
break;
}

let batch_size = crate_names.len();
info!(
"Enqueueing batch {}-{} of {total_crates}",
offset + 1,
offset + batch_size as i64
);

// Create batch of jobs
let jobs = crate_names
.into_iter()
.map(GenerateOgImage::new)
.map(|job| {
Ok((
background_jobs::job_type.eq(GenerateOgImage::JOB_NAME),
background_jobs::data.eq(serde_json::to_value(job)?),
background_jobs::priority.eq(-10),
))
})
.collect::<serde_json::Result<Vec<_>>>()?;

// Batch insert all jobs
let result = diesel::insert_into(background_jobs::table)
.values(jobs)
.execute(&mut conn)
.await;

match result {
Ok(inserted_count) => {
enqueued += inserted_count;
info!("Enqueued {enqueued} jobs so far...");
}
Err(e) => {
errors += batch_size;
warn!("Failed to enqueue batch of OG image jobs: {e}");
}
}

// Break if we've processed fewer than batch_size (last batch)
if batch_size < opts.batch_size {
break;
}

offset += opts.batch_size as i64;
}

info!("Jobs enqueued: {enqueued}");
if errors > 0 {
warn!("{errors} jobs failed to enqueue. Check logs above for details.");
}

Ok(())
}
11 changes: 11 additions & 0 deletions src/bin/crates-admin/enqueue_job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ pub enum Command {
#[arg()]
name: String,
},
/// Generate OpenGraph images for the specified crates
GenerateOgImage {
/// Crate names to generate OpenGraph images for
#[arg(required = true)]
names: Vec<String>,
},
ProcessCdnLogQueue(jobs::ProcessCdnLogQueue),
SyncAdmins {
/// Force a sync even if one is already in progress
Expand Down Expand Up @@ -143,6 +149,11 @@ pub async fn run(command: Command) -> Result<()> {

jobs::CheckTyposquat::new(&name).enqueue(&mut conn).await?;
}
Command::GenerateOgImage { names } => {
for name in names {
jobs::GenerateOgImage::new(name).enqueue(&mut conn).await?;
}
}
Command::SendTokenExpiryNotifications => {
jobs::SendTokenExpiryNotifications
.enqueue(&mut conn)
Expand Down
3 changes: 3 additions & 0 deletions src/bin/crates-admin/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#[macro_use]
extern crate tracing;

mod backfill_og_images;
mod default_versions;
mod delete_crate;
mod delete_version;
Expand All @@ -17,6 +18,7 @@ mod yank_version;
#[derive(clap::Parser, Debug)]
#[command(name = "crates-admin")]
enum Command {
BackfillOgImages(backfill_og_images::Opts),
DeleteCrate(delete_crate::Opts),
DeleteVersion(delete_version::Opts),
Populate(populate::Opts),
Expand Down Expand Up @@ -46,6 +48,7 @@ async fn main() -> anyhow::Result<()> {
span.record("command", tracing::field::debug(&command));

match command {
Command::BackfillOgImages(opts) => backfill_og_images::run(opts).await,
Command::DeleteCrate(opts) => delete_crate::run(opts).await,
Command::DeleteVersion(opts) => delete_version::run(opts).await,
Command::Populate(opts) => populate::run(opts).await,
Expand Down
14 changes: 11 additions & 3 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::app::AppState;
use crate::auth::{AuthCheck, AuthHeader, Authentication};
use crate::worker::jobs::{
self, CheckTyposquat, SendPublishNotificationsJob, UpdateDefaultVersion,
self, CheckTyposquat, GenerateOgImage, SendPublishNotificationsJob, UpdateDefaultVersion,
};
use axum::Json;
use axum::body::{Body, Bytes};
Expand Down Expand Up @@ -549,14 +549,14 @@ pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<Go
// Compared to only using a background job, this prevents us from getting into a
// situation where a crate exists in the `crates` table but doesn't have a default
// version in the `default_versions` table.
if let Some((existing_default_version, _)) = existing_default_version {
if let Some((existing_default_version, _)) = &existing_default_version {
let published_default_version = DefaultVersion {
id: version.id,
num: semver,
yanked: false,
};

if existing_default_version < published_default_version {
if existing_default_version < &published_default_version {
diesel::update(default_versions::table)
.filter(default_versions::crate_id.eq(krate.id))
.set(default_versions::version_id.eq(version.id))
Expand Down Expand Up @@ -631,6 +631,14 @@ pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<Go
}),
)?;

// Enqueue OG image generation job if not handled by UpdateDefaultVersion
if existing_default_version.is_none() {
let og_image_job = GenerateOgImage::new(krate.name.clone());
if let Err(error) = og_image_job.enqueue(conn).await {
error!("Failed to enqueue `GenerateOgImage` job: {error}");
}
};

// Experiment: check new crates for potential typosquatting.
if existing_crate.is_none() {
let crates_feed_job = jobs::rss::SyncCratesFeed;
Expand Down
80 changes: 80 additions & 0 deletions src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,18 @@ use tracing::{instrument, warn};

const PREFIX_CRATES: &str = "crates";
const PREFIX_READMES: &str = "readmes";
const PREFIX_OG_IMAGES: &str = "og-images";
const DEFAULT_REGION: &str = "us-west-1";
const CONTENT_TYPE_CRATE: &str = "application/gzip";
const CONTENT_TYPE_GZIP: &str = "application/gzip";
const CONTENT_TYPE_ZIP: &str = "application/zip";
const CONTENT_TYPE_INDEX: &str = "text/plain";
const CONTENT_TYPE_README: &str = "text/html";
const CONTENT_TYPE_OG_IMAGE: &str = "image/png";
const CACHE_CONTROL_IMMUTABLE: &str = "public,max-age=31536000,immutable";
const CACHE_CONTROL_INDEX: &str = "public,max-age=600";
const CACHE_CONTROL_README: &str = "public,max-age=604800";
const CACHE_CONTROL_OG_IMAGE: &str = "public,max-age=86400";

type StdPath = std::path::Path;

Expand Down Expand Up @@ -209,6 +212,13 @@ impl Storage {
apply_cdn_prefix(&self.cdn_prefix, &readme_path(name, version)).replace('+', "%2B")
}

/// Returns the URL of an uploaded crate's Open Graph image.
///
/// The function doesn't check for the existence of the file.
pub fn og_image_location(&self, name: &str) -> String {
apply_cdn_prefix(&self.cdn_prefix, &og_image_path(name))
}

/// Returns the URL of an uploaded RSS feed.
pub fn feed_url(&self, feed_id: &FeedId<'_>) -> String {
apply_cdn_prefix(&self.cdn_prefix, &feed_id.into()).replace('+', "%2B")
Expand Down Expand Up @@ -240,6 +250,13 @@ impl Storage {
self.store.delete(&path).await
}

/// Deletes the Open Graph image for the given crate.
#[instrument(skip(self))]
pub async fn delete_og_image(&self, name: &str) -> Result<()> {
let path = og_image_path(name);
self.store.delete(&path).await
}

#[instrument(skip(self))]
pub async fn delete_feed(&self, feed_id: &FeedId<'_>) -> Result<()> {
let path = feed_id.into();
Expand Down Expand Up @@ -270,6 +287,19 @@ impl Storage {
Ok(())
}

/// Uploads an Open Graph image for the given crate.
#[instrument(skip(self, bytes))]
pub async fn upload_og_image(&self, name: &str, bytes: Bytes) -> Result<()> {
let path = og_image_path(name);
let attributes = self.attrs([
(Attribute::ContentType, CONTENT_TYPE_OG_IMAGE),
(Attribute::CacheControl, CACHE_CONTROL_OG_IMAGE),
]);
let opts = attributes.into();
self.store.put_opts(&path, bytes.into(), opts).await?;
Ok(())
}

#[instrument(skip(self, channel))]
pub async fn upload_feed(
&self,
Expand Down Expand Up @@ -385,6 +415,10 @@ fn readme_path(name: &str, version: &str) -> Path {
format!("{PREFIX_READMES}/{name}/{name}-{version}.html").into()
}

fn og_image_path(name: &str) -> Path {
format!("{PREFIX_OG_IMAGES}/{name}.png").into()
}

fn apply_cdn_prefix(cdn_prefix: &Option<String>, path: &Path) -> String {
match cdn_prefix {
Some(cdn_prefix) if !cdn_prefix.starts_with("https://") => {
Expand Down Expand Up @@ -484,6 +518,17 @@ mod tests {
for (name, version, expected) in readme_tests {
assert_eq!(storage.readme_location(name, version), expected);
}

let og_image_tests = vec![
("foo", "https://static.crates.io/og-images/foo.png"),
(
"some-long-crate-name",
"https://static.crates.io/og-images/some-long-crate-name.png",
),
];
for (name, expected) in og_image_tests {
assert_eq!(storage.og_image_location(name), expected);
}
}

#[test]
Expand Down Expand Up @@ -661,4 +706,39 @@ mod tests {
let expected_files = vec![target];
assert_eq!(stored_files(&s.store).await, expected_files);
}

#[tokio::test]
async fn upload_og_image() {
let s = Storage::from_config(&StorageConfig::in_memory());

let bytes = Bytes::from_static(b"fake png data");
s.upload_og_image("foo", bytes.clone()).await.unwrap();

let expected_files = vec!["og-images/foo.png"];
assert_eq!(stored_files(&s.store).await, expected_files);

s.upload_og_image("some-long-crate-name", bytes)
.await
.unwrap();

let expected_files = vec!["og-images/foo.png", "og-images/some-long-crate-name.png"];
assert_eq!(stored_files(&s.store).await, expected_files);
}

#[tokio::test]
async fn delete_og_image() {
let s = Storage::from_config(&StorageConfig::in_memory());

let bytes = Bytes::from_static(b"fake png data");
s.upload_og_image("foo", bytes.clone()).await.unwrap();
s.upload_og_image("bar", bytes).await.unwrap();

let expected_files = vec!["og-images/bar.png", "og-images/foo.png"];
assert_eq!(stored_files(&s.store).await, expected_files);

s.delete_og_image("foo").await.unwrap();

let expected_files = vec!["og-images/bar.png"];
assert_eq!(stored_files(&s.store).await, expected_files);
}
}
Loading
Loading