diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0a73b8e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "twitter-na-pewno-bot" +version = "0.1.0" +edition = "2021" +license = "MIT" +readme = "README.md" +authors = ["mskwr "] +homepage = "https://github.com/mimuw-jnp2-rust/project-na-pewno-twitter-bot.git" +repository = "https://github.com/mimuw-jnp2-rust/project-na-pewno-twitter-bot.git" +description = "A bot drawing the attention of Poles to the most common spelling mistake they make." +keywords = ["twitter", "bot", "rust", "na-pewno"] + +[dependencies] +twitter-v2 = "0.1.8" +tokio = { version = "1.24.2", features = ["macros"] } +time = "0.3.17" +rand = "0.8.5" +dotenv = "0.15.0" diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..bd70ce1 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Michał Skwarek + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 58e9e49..2143578 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,40 @@ -# Frobnicator (this is a template) +# na pewno bot for Twitter ([@napewnobot](https://twitter.com/napewnobot)) +[![Twitter URL](https://i.imgur.com/DPXcjpZ.png)](https://twitter.com/napewnobot) ## Authors -- Andrzej Głuszak (@agluszak on GitHub) -- Linus Torvalds (@torvalds on GitHub) +- Michał Skwarek ([@mskwr](https://github.com/mskwr)) ## Description -Frobnicator is going to be a platformer game similar to Super Mario Bros made using Bevy game engine. +Statistics show that for many years, by far the most common spelling mistake made by Poles online is the phrase "na pewno" (which means "certainly"), incorrectly spelled as "napewno". On Twitter, this mistake is made by up to 1000 unique users every day. The job of the na pewno bot for Twitter is to improve these terrible statistics. -## Features -- map generator -- shooting -- enemy AI -- game state saving and loading -- scores +The bot provides the following features: +* finding all tweets containing "napewno" +* correcting spelling by automatically replying to these tweets +* maintaining statistics, including how many people per day made this mistake +* providing daily reports on the profile, including comparing results with the previous day -## Plan -In the first part we're going to implement the basics: movement, physics and shooting. The enemies will simply bounce from one edge of the platform to the other. There will be only a single map. +## Getting started +1. Sign up for the Twitter API to get the keys and tokens necessary for authorization. Set them as an environment variables. +2. Clone the repository. + ```sh + git clone https://github.com/mimuw-jnp2-rust/twitter-na-pewno-bot.git + cd twitter-na-pewno-bot + ``` +3. Build the project. + ```sh + cargo build --release + ``` +4. Run the project. + ```sh + cargo run + ``` -In the second part we're going to add random map generator, saving/loading, scores and a more sophisticated AI. +## License +Distributed under the MIT License. See `LICENSE.txt` for more information. ## Libraries -- Bevy -- Serde (for serialization) +- [twitter_v2](https://docs.rs/twitter-v2/latest/twitter_v2/) +- [tokio](https://docs.rs/tokio/latest/tokio/) +- [rand](https://docs.rs/rand/latest/rand/) +- [time](https://docs.rs/time/latest/time/) +- [dotenv](https://docs.rs/dotenv/latest/dotenv/) diff --git a/src/auth.rs b/src/auth.rs new file mode 100644 index 0000000..f6d5f67 --- /dev/null +++ b/src/auth.rs @@ -0,0 +1,40 @@ +use std::env::var; +use twitter_v2::authorization::{BearerToken, Oauth1aToken}; +use twitter_v2::TwitterApi; + +// Gets api from app context (Read). +pub fn get_api_app_context() -> TwitterApi { + TwitterApi::new(BearerToken::new( + var("BEARER_TOKEN").expect("BEARER_TOKEN not found"), + )) +} + +// Gets api from user context (Read and Write). +pub fn get_api_user_context() -> TwitterApi { + TwitterApi::new(Oauth1aToken::new( + var("API_KEY").expect("API_KEY not found"), + var("API_SECRET").expect("API_SECRET not found"), + var("ACCESS_TOKEN").expect("ACCESS_TOKEN not found"), + var("ACCESS_SECRET").expect("ACCESS_SECRET not found"), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use twitter_v2::Result; + + #[tokio::test] + async fn test_get_api_app_context() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + assert!(get_api_app_context().with_user_ctx().await.is_err()); + Ok(()) + } + + #[tokio::test] + async fn test_get_api_user_context() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + assert!(get_api_user_context().with_user_ctx().await.is_ok()); + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..1f99f0e --- /dev/null +++ b/src/main.rs @@ -0,0 +1,79 @@ +mod auth; +mod requests; +mod strings; + +use crate::requests::{ + count_tweets_with_mistake, get_initial_tweet, get_latest_tweet, get_my_user_id, + get_my_username, get_name_by_id, get_tweets_with_mistake, get_username_by_id, + post_reply_with_message, post_tweet_with_message, +}; +use crate::strings::{ + extract_statistics, generate_reply, generate_tweet, print_end_message, print_reply_message, + print_start_message, print_update_message, +}; + +use std::thread::sleep; +use std::time::Duration; +use time::OffsetDateTime; + +// Due to the limit of 100 Tweets per hour. +const REQUEST_TIMEOUT_SECS: u64 = 60; + +#[tokio::main] +async fn main() { + // Load environment variables from .env file. + dotenv::dotenv().expect(".env file should be readable"); + print_start_message(); + + let my_id = get_my_user_id().await.expect("invalid authorization"); + let my_username = get_my_username().await.expect("invalid authorization"); + let my_latest_tweet = get_latest_tweet(my_id).await; + + // All time variables are in UTC. + let cur_date = OffsetDateTime::now_utc().date(); + let prev_date = cur_date.previous_day().expect("invalid date"); + + // Post daily updates with statistics on the profile. + if let Some(tweet) = my_latest_tweet { + let last_date = tweet.created_at.map(|t| t.date()); + + // Do not post anything if update was already made today. + if last_date != Some(cur_date) { + let cur_stat = count_tweets_with_mistake(&prev_date).await; + // Only extract previous stats if update was made on the previous day. + let prev_stat = if last_date == cur_date.previous_day() { + extract_statistics(tweet.text.as_str()).unwrap_or(0) + } else { + 0 + }; + + let msg = generate_tweet(prev_stat, cur_stat); + post_tweet_with_message(msg).await; + print_update_message(my_username); + } + } else { + // No updates on the profile yet. + let cur_stat = count_tweets_with_mistake(&prev_date).await; + let msg = generate_tweet(0, cur_stat); + post_tweet_with_message(msg).await; + print_update_message(my_username); + } + + let initial_tweet_id = get_initial_tweet(my_id).await; + let tweets_with_mistake = get_tweets_with_mistake(initial_tweet_id).await; + + // Can not use iterator here because of instability of async closures. + for tweet in tweets_with_mistake { + let id = tweet.author_id.expect("invalid user"); + let username = get_username_by_id(id).await.expect("invalid user"); + let name = get_name_by_id(id).await.expect("invalid user"); + let msg = generate_reply(name.as_str()); + post_reply_with_message(tweet.id, msg).await; + print_reply_message(tweet.id, username); + + // Avoid shadowban and never exceed the limit of posts. + sleep(Duration::from_secs(REQUEST_TIMEOUT_SECS)); + } + + print_end_message(); +} diff --git a/src/requests.rs b/src/requests.rs new file mode 100644 index 0000000..c43c04b --- /dev/null +++ b/src/requests.rs @@ -0,0 +1,244 @@ +use crate::auth::{get_api_app_context, get_api_user_context}; + +use std::collections::HashSet; +use std::ops::Sub; +use time::Date; +use twitter_v2::id::NumericId; +use twitter_v2::query::Exclude::Replies; +use twitter_v2::query::TweetField::{AuthorId, CreatedAt}; +use twitter_v2::Tweet; + +const MINIMUM_NUMBER_OF_RESULTS: usize = 5; +const MAXIMUM_NUMBER_OF_RESULTS: usize = 100; +const MINIMUM_BREAK_AFTER_RUN: f32 = 100.0; +const MISTAKE: &str = "napewno -is:retweet"; + +// Gets id of currently authorized user. +pub async fn get_my_user_id() -> Option { + let api = get_api_user_context(); + let me = api + .get_users_me() + .send() + .await + .expect("invalid user") + .into_data(); + + me.map(|user| user.id) +} + +// Gets username of currently authorized user. +pub async fn get_my_username() -> Option { + let api = get_api_user_context(); + let me = api + .get_users_me() + .send() + .await + .expect("invalid user") + .into_data(); + + me.map(|user| user.username) +} + +// Gets username by id. +pub async fn get_username_by_id(id: NumericId) -> Option { + let api = get_api_user_context(); + let user = api + .get_user(id) + .send() + .await + .expect("invalid id") + .into_data(); + + user.map(|user| user.username) +} + +// Gets name by id. +pub async fn get_name_by_id(id: NumericId) -> Option { + let api = get_api_user_context(); + let user = api + .get_user(id) + .send() + .await + .expect("invalid id") + .into_data(); + + user.map(|user| user.name) +} + +// Gets the id of the latest tweet, after which user stopped searching. +pub async fn get_initial_tweet(user: NumericId) -> NumericId { + let api = get_api_app_context(); + let tweets = api + .get_user_tweets(user) + .tweet_fields([CreatedAt]) + .max_results(MAXIMUM_NUMBER_OF_RESULTS) + .send() + .await + .expect("invalid user") + .into_data() + .unwrap_or_default(); + + let index = (0..tweets.len() - 1) + .find(|&i| { + let cur_date = tweets[i].created_at.expect("invalid date"); + let next_date = tweets[i + 1].created_at.expect("invalid date"); + cur_date.sub(next_date).as_seconds_f32() > MINIMUM_BREAK_AFTER_RUN + }) + .unwrap_or(tweets.len() - 1); + + tweets[index].id +} + +// Gets the latest tweet of given user. +pub async fn get_latest_tweet(user: NumericId) -> Option { + let api = get_api_app_context(); + let tweets = api + .get_user_tweets(user) + .tweet_fields([CreatedAt]) + .exclude([Replies]) + .max_results(MINIMUM_NUMBER_OF_RESULTS) + .send() + .await + .expect("invalid user") + .into_data() + .unwrap_or_default(); + + tweets.first().cloned() +} + +// Gets tweets with mistake since given tweet. +pub async fn get_tweets_with_mistake(id: NumericId) -> Vec { + let api = get_api_app_context(); + // Gets no more than last MAXIMUM_NUMBER_OF_RESULTS tweets. + let tweets = api + .get_tweets_search_recent(MISTAKE) + .tweet_fields([AuthorId, CreatedAt]) + .since_id(id) + .max_results(MAXIMUM_NUMBER_OF_RESULTS) + .send() + .await + .expect("invalid query") + .into_data() + .unwrap_or_default(); + + // Take oldest tweets first. + tweets.into_iter().rev().collect::>() +} + +// Counts all unique users whose tweets included given word on a given day. +pub async fn count_tweets_with_mistake(date: &Date) -> usize { + let api = get_api_app_context(); + let mut users = HashSet::new(); + let mut size = 1; + + let mut end_date = date + .next_day() + .expect("invalid date") + .midnight() + .assume_utc(); + + while size != 0 { + let tweets = api + .get_tweets_search_recent(MISTAKE) + .tweet_fields([AuthorId, CreatedAt]) + .start_time(date.midnight().assume_utc()) + .end_time(end_date) + .max_results(MAXIMUM_NUMBER_OF_RESULTS) + .send() + .await + .expect("invalid query") + .into_data() + .unwrap_or_default(); + + size = tweets + .iter() + .map(|tweet| users.insert(tweet.author_id)) + .count(); + + if size > 0 { + end_date = tweets[size - 1].created_at.expect("invalid size"); + } + } + + users.len() +} + +// Posts tweet with given message. +pub async fn post_tweet_with_message(message: String) { + let api = get_api_user_context(); + api.post_tweet() + .text(message) + .send() + .await + .expect("invalid message"); +} + +// Posts reply to provided tweet with given message. +pub async fn post_reply_with_message(id: NumericId, message: String) { + let api = get_api_user_context(); + let result = api + .post_tweet() + .text(message) + .in_reply_to_tweet_id(id) + .send() + .await; + + // There are several reasons why replying to certain tweets is just + // impossible - ignore such cases and do not stop program. + if result.is_ok() { + result.unwrap(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use twitter_v2::Result; + + #[tokio::test] + async fn test_get_username_by_id() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + let id = NumericId::new(1608833104919117824); + let username = "napewnobot".to_string(); + assert_eq!(get_username_by_id(id).await.unwrap(), username); + Ok(()) + } + + #[tokio::test] + async fn test_get_name_by_id() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + let id = NumericId::new(1608833104919117824); + let name = "na pewno bot 📢".to_string(); + assert_eq!(get_name_by_id(id).await.unwrap(), name); + Ok(()) + } + + #[tokio::test] + async fn test_get_initial_tweet() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + let user_id = NumericId::new(841324337978306562); + let tweet_id = NumericId::new(1620564152187232256); + assert_eq!(get_initial_tweet(user_id).await, tweet_id); + Ok(()) + } + + #[tokio::test] + async fn test_get_latest_tweet() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + let user_id = NumericId::new(841324337978306562); + let tweet_id = NumericId::new(1355831924690923520); + assert_eq!(get_latest_tweet(user_id).await.unwrap().id, tweet_id); + Ok(()) + } + + #[tokio::test] + async fn test_get_tweets_with_mistake() -> Result<()> { + dotenv::dotenv().expect(".env file should be readable"); + let tweet_id = NumericId::new(1619713517258522625); + let tweets = get_tweets_with_mistake(tweet_id).await; + tweets + .iter() + .for_each(|t| assert!(t.text.to_lowercase().contains("napewno"))); + Ok(()) + } +} diff --git a/src/strings.rs b/src/strings.rs new file mode 100644 index 0000000..d2e8d6a --- /dev/null +++ b/src/strings.rs @@ -0,0 +1,169 @@ +use rand::Rng; +use std::string::ToString; +use time::{format_description, OffsetDateTime}; +use twitter_v2::id::NumericId; + +const GREETINGS: [&str; 5] = ["Cześć", "Czołem", "Hej", "Serwus", "Witaj"]; +const GREETING_EMOJIS: [&str; 8] = ["👋", "🤝", "☺️", "🤓", "🫡", "🍻", "🤖", "👀"]; +const APOLOGIES: [&str; 3] = ["Daruj mi", "Wybacz mi", "Przepraszam za"]; +const BRAVERY_WORDS: [&str; 3] = ["brawurę", "śmiałość", "zuchwałość"]; +const PHRASE_WORDS: [&str; 2] = ["frazę", "wyrażenie"]; +const SPELLING_WORDS: [&str; 2] = ["piszemy", "pisze się"]; +const DIVISION_WORDS: [&str; 3] = ["osobno", "rozdzielnie", "rozłącznie"]; + +const DATE_FORMAT: &str = "[hour]:[minute]:[second]"; + +// Generates reply for tweet containing 'napewno'. +pub fn generate_reply(username: &str) -> String { + let mut rng = rand::thread_rng(); + GREETINGS[rng.gen_range(0..GREETINGS.len())].to_string() + + " " + + username + + "! " + + GREETING_EMOJIS[rng.gen_range(0..GREETING_EMOJIS.len())] + + "\n" + + APOLOGIES[rng.gen_range(0..APOLOGIES.len())] + + " moją " + + BRAVERY_WORDS[rng.gen_range(0..BRAVERY_WORDS.len())] + + ", ale " + + PHRASE_WORDS[rng.gen_range(0..PHRASE_WORDS.len())] + + " 'na pewno' " + + SPELLING_WORDS[rng.gen_range(0..SPELLING_WORDS.len())] + + " " + + DIVISION_WORDS[rng.gen_range(0..DIVISION_WORDS.len())] + + "." +} + +// Generates tweet with daily statistics. +pub fn generate_tweet(prev_stat: usize, cur_stat: usize) -> String { + let diff = prev_stat.abs_diff(cur_stat); + let comparison = if prev_stat == 0 { + // No statistics on the profile yet. + "".to_string() + } else if prev_stat == cur_stat { + "\n\nTo wynik taki sam jak poprzedniego dnia. 📊".to_string() + } else { + "\n\nTo wynik o ".to_string() + + &diff.to_string() + + if prev_stat < cur_stat { + " większy " + } else { + " mniejszy " + } + + "względem poprzedniego dnia " + + if prev_stat < cur_stat { "(+" } else { "(-" } + + &(diff as f32 / prev_stat as f32 * 100.0).round().to_string() + + if prev_stat < cur_stat { + "%). 📈" + } else { + "%). 📉" + } + }; + + let today = OffsetDateTime::now_utc().date(); + "W dniu ".to_owned() + + &today.previous_day().expect("invalid date").to_string() + + " wyrażenie 'na pewno' zostało błędnie napisane przez " + + &cur_stat.to_string() + + " użytkowników Twittera." + + &comparison +} + +// Extracts statistics (first integer) from text. +pub fn extract_statistics(text: &str) -> Option { + let stats = text + .split_whitespace() + .map(|s| s.parse::()) + .find(|s| s.is_ok()) + .expect("invalid string"); + + stats.ok() +} + +// Prints start message. +pub fn print_start_message() { + let format = format_description::parse(DATE_FORMAT).expect("invalid date"); + let msg = "[".to_string() + + &OffsetDateTime::now_utc() + .format(&format) + .expect("invalid date") + + "] Running the bot..."; + + println!("\x1b[1m\x1b[32m{}\x1b[0m", msg); +} + +// Prints message after updating statistics on the profile. +pub fn print_update_message(username: String) { + let format = format_description::parse(DATE_FORMAT).expect("invalid date"); + let msg = "[".to_string() + + &OffsetDateTime::now_utc() + .format(&format) + .expect("invalid date") + + "] Posted a profile update: " + + "https://twitter.com/" + + &username + + "."; + + println!("\x1b[1m{}\x1b[0m", msg); +} + +// Prints message after replying to certain user. +pub fn print_reply_message(post_id: NumericId, username: String) { + let format = format_description::parse(DATE_FORMAT).expect("invalid date"); + let msg = "[".to_string() + + &OffsetDateTime::now_utc() + .format(&format) + .expect("invalid date") + + "] Posted a reply: " + + "https://twitter.com/" + + &username + + "/status/" + + &post_id.to_string() + + "."; + + println!("{}", msg); +} + +// Prints end message. +pub fn print_end_message() { + let format = format_description::parse(DATE_FORMAT).expect("invalid date"); + let msg = "[".to_string() + + &OffsetDateTime::now_utc() + .format(&format) + .expect("invalid date") + + "] Finished."; + + println!("\x1b[1m\x1b[32m{}\x1b[0m", msg); +} + +#[cfg(test)] +mod tests { + use super::*; + use twitter_v2::Result; + + const LONGEST_USERNAME: usize = 15; + const CHARACTERS_LIMIT: usize = 280; + const MAX_ERRORS_DAILY: usize = 100000; + + #[tokio::test] + async fn test_generate_reply() -> Result<()> { + let limit = CHARACTERS_LIMIT - LONGEST_USERNAME; + assert!(generate_reply("").len().le(&limit)); + Ok(()) + } + + #[tokio::test] + async fn test_generate_tweet() -> Result<()> { + let longest = MAX_ERRORS_DAILY; + let limit = CHARACTERS_LIMIT; + assert!(generate_tweet(longest, longest).len().le(&limit)); + Ok(()) + } + + #[tokio::test] + async fn test_extract_statistics() -> Result<()> { + let string = " \n\n\0\0 01a a1 1111-11-11 111"; + assert_eq!(extract_statistics(string).unwrap(), 111); + Ok(()) + } +}