6
6
7
7
from apps .kaist .models import Post
8
8
from apps .kaist .portal .post_response import PostResponse
9
+ from ara import redis
10
+ from ara .log import log
9
11
from ara .settings import PORTAL_JSESSIONID
10
12
11
13
@@ -14,8 +16,11 @@ class SessionExpiredException(Exception):
14
16
15
17
16
18
class Crawler :
19
+ SESSION_KEY = "JSESSIONID"
20
+ SESSION_REDIS_KEY = "crawler:jsessionid"
21
+
17
22
_session = requests .Session ()
18
- _session_id = PORTAL_JSESSIONID
23
+ _session . cookies . set ( SESSION_KEY , PORTAL_JSESSIONID )
19
24
20
25
_KST = pytz_timezone ("Asia/Seoul" )
21
26
@@ -68,16 +73,26 @@ def get_post(cls, post_id: int) -> Post:
68
73
:param post_id: The ID of the post to get
69
74
"""
70
75
71
- response = cls ._session .get (
72
- url = f"https://portal.kaist.ac.kr/wz/api/board/recents/{ post_id } ?menuNo=21" ,
73
- cookies = {"JSESSIONID" : cls ._session_id },
74
- )
76
+ retry_count = 1
77
+
78
+ while retry_count >= 0 :
79
+ response = cls ._session .get (
80
+ f"https://portal.kaist.ac.kr/wz/api/board/recents/{ post_id } ?menuNo=21"
81
+ )
75
82
76
- if "application/json" not in response .headers ["Content-Type" ]:
77
- raise SessionExpiredException (f"Failed to get post { post_id } " )
83
+ if cls ._has_fetched_successfully (response ):
84
+ post = cls ._parse_response (response .json ())
85
+ return post
78
86
79
- post = cls ._parse_response (response .json ())
80
- return post
87
+ if retry_count == 0 :
88
+ raise SessionExpiredException (f"Failed to get post { post_id } " )
89
+
90
+ cls .update_session_id ()
91
+ retry_count -= 1
92
+
93
+ @classmethod
94
+ def _has_fetched_successfully (cls , response : requests .Response ) -> bool :
95
+ return "application/json" in response .headers ["Content-Type" ]
81
96
82
97
@classmethod
83
98
def find_next_post (cls , post : Post ) -> Post | None :
@@ -86,5 +101,10 @@ def find_next_post(cls, post: Post) -> Post | None:
86
101
return cls .get_post (post .next_post_id )
87
102
88
103
@classmethod
89
- def update_session_id (cls , session_id : str ) -> None :
90
- cls .session_id = session_id
104
+ def update_session_id (cls ) -> None :
105
+ new_session_id = redis .get (cls .SESSION_REDIS_KEY ).decode ()
106
+ if new_session_id is not None :
107
+ log .info (
108
+ f"KAIST Portal Crawler :: JSESSIONID updated to ({ new_session_id } )"
109
+ )
110
+ cls ._session .cookies .set (cls .SESSION_KEY , new_session_id )
0 commit comments