Skip to content

Commit 00c67ce

Browse files
authored
Merge pull request #485 from sparcs-kaist/feature/meal_crawler
New Feature : 학식 API
2 parents f3a05ea + 2dffc33 commit 00c67ce

File tree

7 files changed

+772
-255
lines changed

7 files changed

+772
-255
lines changed

apps/core/management/scripts/meal_crawler.py

Lines changed: 423 additions & 0 deletions
Large diffs are not rendered by default.

apps/core/management/tasks.py

Lines changed: 93 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,93 @@
1-
import time
2-
from collections import defaultdict
3-
4-
from apps.core.management.scripts.reminder_email_for_reply import send_email
5-
from apps.core.models import BestArticle
6-
from apps.kaist.portal.worker import Worker as PortalCrawlWorker
7-
from ara import celery_app, redis
8-
9-
10-
@celery_app.task
11-
def crawl_portal():
12-
PortalCrawlWorker.fetch_and_save_from_the_latest(batch_size=32)
13-
14-
15-
def _get_redis_key(type_):
16-
return f"articles:{type_}"
17-
18-
19-
def _get_best(days, period):
20-
BestArticle.objects.filter(latest=True, period=period).update(latest=False)
21-
22-
type_ = "vote"
23-
to_ts = time.time()
24-
from_ts = to_ts - 24 * 60 * 60 * days
25-
26-
vote_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)
27-
28-
article_votes = defaultdict(int)
29-
for obj in vote_objs:
30-
article_id, vote, _, _ = obj.split(":")
31-
article_votes[article_id] += int(vote)
32-
33-
type_ = "hit"
34-
hit_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)
35-
36-
article_hits = defaultdict(int)
37-
for obj in hit_objs:
38-
article_id, hit, _, _ = obj.split(":")
39-
article_hits[article_id] += int(hit)
40-
41-
hit_sorted = sorted(
42-
article_votes.items(), key=lambda x: article_hits[x[0]], reverse=True
43-
)
44-
articles = []
45-
keys = []
46-
47-
length = len(article_votes)
48-
for key, _ in sorted(hit_sorted, key=lambda x: x[1], reverse=True)[:5]:
49-
articles.append(BestArticle(period=period, article_id=key, latest=True))
50-
keys.append(key)
51-
52-
if length < 5:
53-
for key, _ in sorted(article_hits.items(), key=lambda x: x[1], reverse=True):
54-
if key not in keys:
55-
articles.append(BestArticle(period=period, article_id=key, latest=True))
56-
keys.append(key)
57-
58-
if len(articles) >= 5:
59-
break
60-
61-
return BestArticle.objects.bulk_create(articles)
62-
63-
64-
@celery_app.task
65-
def save_daily_best():
66-
return _get_best(1, BestArticle.PERIOD_CHOICES_DAILY)
67-
68-
69-
@celery_app.task
70-
def save_weekly_best():
71-
return _get_best(7, BestArticle.PERIOD_CHOICES_WEEKLY)
72-
73-
74-
@celery_app.task
75-
def send_email_for_reply_reminder():
76-
send_email()
1+
import time
2+
from collections import defaultdict
3+
4+
from apps.core.management.scripts.portal_crawler import crawl_hour, crawl_view
5+
from apps.core.management.scripts.reminder_email_for_reply import send_email
6+
from apps.core.management.scripts.meal_crawler import crawl_daily_meal
7+
from apps.core.models import BestArticle
8+
from ara import celery_app, redis
9+
10+
from datetime import datetime, timedelta
11+
12+
@celery_app.task
13+
def crawl_portal():
14+
crawl_view()
15+
crawl_hour()
16+
17+
18+
def _get_redis_key(type_):
19+
return f"articles:{type_}"
20+
21+
22+
def _get_best(days, period):
23+
BestArticle.objects.filter(latest=True, period=period).update(latest=False)
24+
25+
type_ = "vote"
26+
to_ts = time.time()
27+
from_ts = to_ts - 24 * 60 * 60 * days
28+
29+
vote_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)
30+
31+
article_votes = defaultdict(int)
32+
for obj in vote_objs:
33+
article_id, vote, _, _ = obj.split(":")
34+
article_votes[article_id] += int(vote)
35+
36+
type_ = "hit"
37+
hit_objs = redis.get_objs_by_values(_get_redis_key(type_), f"({from_ts}", to_ts)
38+
39+
article_hits = defaultdict(int)
40+
for obj in hit_objs:
41+
article_id, hit, _, _ = obj.split(":")
42+
article_hits[article_id] += int(hit)
43+
44+
hit_sorted = sorted(
45+
article_votes.items(), key=lambda x: article_hits[x[0]], reverse=True
46+
)
47+
articles = []
48+
keys = []
49+
50+
length = len(article_votes)
51+
for key, _ in sorted(hit_sorted, key=lambda x: x[1], reverse=True)[:5]:
52+
articles.append(BestArticle(period=period, article_id=key, latest=True))
53+
keys.append(key)
54+
55+
if length < 5:
56+
for key, _ in sorted(article_hits.items(), key=lambda x: x[1], reverse=True):
57+
if key not in keys:
58+
articles.append(BestArticle(period=period, article_id=key, latest=True))
59+
keys.append(key)
60+
61+
if len(articles) >= 5:
62+
break
63+
64+
return BestArticle.objects.bulk_create(articles)
65+
66+
67+
@celery_app.task
68+
def save_daily_best():
69+
return _get_best(1, BestArticle.PERIOD_CHOICES_DAILY)
70+
71+
72+
@celery_app.task
73+
def save_weekly_best():
74+
return _get_best(7, BestArticle.PERIOD_CHOICES_WEEKLY)
75+
76+
77+
@celery_app.task
78+
def send_email_for_reply_reminder():
79+
send_email()
80+
81+
#학식 크롤링
82+
@celery_app.task
83+
def crawl_meal():
84+
#현재 날짜로 부터 앞으로 일주일간 식단 크롤링
85+
# 현재 날짜를 가져오기
86+
current_date = datetime.now()
87+
88+
# 앞 뒤 7일간의 날짜 리스트 생성
89+
dates = [(current_date + timedelta(days=i)).strftime("%Y-%m-%d") for i in range(-7, 7)]
90+
for date in dates:
91+
#식단 크롤링
92+
crawl_daily_meal(date)
93+
time.sleep(2)

apps/core/views/router.py

Lines changed: 78 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,78 @@
1-
from rest_framework import routers
2-
3-
from apps.core.views import viewsets
4-
5-
router = routers.DefaultRouter()
6-
7-
# BoardViewSet
8-
router.register(
9-
prefix=r"boards",
10-
viewset=viewsets.BoardViewSet,
11-
)
12-
13-
router.register(
14-
prefix=r"board_groups",
15-
viewset=viewsets.BoardGroupViewSet,
16-
)
17-
18-
# ArticleViewSet
19-
router.register(
20-
prefix=r"articles",
21-
viewset=viewsets.ArticleViewSet,
22-
)
23-
24-
# CommentViewSet
25-
router.register(
26-
prefix=r"comments",
27-
viewset=viewsets.CommentViewSet,
28-
)
29-
30-
# ReportViewSet
31-
router.register(
32-
prefix=r"reports",
33-
viewset=viewsets.ReportViewSet,
34-
)
35-
36-
# BlockViewSet
37-
router.register(
38-
prefix=r"blocks",
39-
viewset=viewsets.BlockViewSet,
40-
)
41-
42-
# AttachmentViewSet
43-
router.register(
44-
prefix=r"attachments",
45-
viewset=viewsets.AttachmentViewSet,
46-
)
47-
48-
# ScrapViewSet
49-
router.register(
50-
prefix=r"scraps",
51-
viewset=viewsets.ScrapViewSet,
52-
)
53-
54-
# NotificationViewSet
55-
router.register(
56-
prefix=r"notifications",
57-
viewset=viewsets.NotificationViewSet,
58-
)
59-
60-
# FAQViewSet
61-
router.register(
62-
prefix=r"faqs",
63-
viewset=viewsets.FAQViewSet,
64-
)
65-
66-
# BestSearchViewSet
67-
router.register(
68-
prefix=r"best_searches",
69-
viewset=viewsets.BestSearchViewSet,
70-
)
1+
from rest_framework import routers
2+
3+
from apps.core.views import viewsets
4+
5+
router = routers.DefaultRouter()
6+
7+
# BoardViewSet
8+
router.register(
9+
prefix=r"boards",
10+
viewset=viewsets.BoardViewSet,
11+
)
12+
13+
router.register(
14+
prefix=r"board_groups",
15+
viewset=viewsets.BoardGroupViewSet,
16+
)
17+
18+
# ArticleViewSet
19+
router.register(
20+
prefix=r"articles",
21+
viewset=viewsets.ArticleViewSet,
22+
)
23+
24+
# CommentViewSet
25+
router.register(
26+
prefix=r"comments",
27+
viewset=viewsets.CommentViewSet,
28+
)
29+
30+
# ReportViewSet
31+
router.register(
32+
prefix=r"reports",
33+
viewset=viewsets.ReportViewSet,
34+
)
35+
36+
# BlockViewSet
37+
router.register(
38+
prefix=r"blocks",
39+
viewset=viewsets.BlockViewSet,
40+
)
41+
42+
# AttachmentViewSet
43+
router.register(
44+
prefix=r"attachments",
45+
viewset=viewsets.AttachmentViewSet,
46+
)
47+
48+
# ScrapViewSet
49+
router.register(
50+
prefix=r"scraps",
51+
viewset=viewsets.ScrapViewSet,
52+
)
53+
54+
# NotificationViewSet
55+
router.register(
56+
prefix=r"notifications",
57+
viewset=viewsets.NotificationViewSet,
58+
)
59+
60+
# FAQViewSet
61+
router.register(
62+
prefix=r"faqs",
63+
viewset=viewsets.FAQViewSet,
64+
)
65+
66+
# BestSearchViewSet
67+
router.register(
68+
prefix=r"best_searches",
69+
viewset=viewsets.BestSearchViewSet,
70+
)
71+
72+
#meal : 학식 크롤링 기능
73+
router.register(
74+
prefix=r"meals",
75+
viewset=viewsets.MealViewSet,
76+
#단순한 api여서 queryset을 사용하지 않았으므로 basename 설정.
77+
basename="meal",
78+
)

apps/core/views/viewsets/__init__.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from .article import *
2-
from .attachment import *
3-
from .best_search import *
4-
from .block import *
5-
from .board import *
6-
from .board_group import *
7-
from .comment import *
8-
from .faq import *
9-
from .notification import *
10-
from .report import *
11-
from .scrap import *
1+
from .article import *
2+
from .attachment import *
3+
from .best_search import *
4+
from .block import *
5+
from .board import *
6+
from .board_group import *
7+
from .comment import *
8+
from .faq import *
9+
from .notification import *
10+
from .report import *
11+
from .scrap import *
12+
from .meal import *

0 commit comments

Comments
 (0)