Skip to content

Commit 559b084

Browse files
authored
Direct the content of log files to stdout of pods in kubernetes deployment (#2871)
Fixes #2357
1 parent 4af5689 commit 559b084

File tree

5 files changed

+57
-9
lines changed

5 files changed

+57
-9
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ sudo make install
347347

348348
### Building Docker images
349349

350-
GraphScope ships with a [Dockerfile](k8s/graphscope.Dockerfile) that can build docker images for releasing. The images are built on a `builder` image with all dependencies installed and copied to
350+
GraphScope ships with a [Dockerfile](k8s/dockerfiles/graphscope-dev.Dockerfile) that can build docker images for releasing. The images are built on a `builder` image with all dependencies installed and copied to
351351
a `runtime-base` image. To build images with latest version of GraphScope, go to the `k8s/internal` directory under root directory and run this command.
352352

353353
```bash

coordinator/gscoordinator/cluster_builder.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def __init__(
163163

164164
self._vineyard_requests = {"cpu": vineyard_cpu, "memory": vineyard_mem}
165165
self._analytical_requests = {"cpu": engine_cpu, "memory": engine_mem}
166+
# Should give executor a smaller value, since it doesn't need to load the graph
166167
self._executor_requests = {"cpu": "2000m", "memory": engine_mem}
167168
self._learning_requests = {"cpu": "1000m", "memory": "256Mi"}
168169
self._frontend_requests = {"cpu": "200m", "memory": "512Mi"}
@@ -265,10 +266,15 @@ def get_engine_container_helper(
265266
)
266267
return container
267268

269+
def _get_tail_if_exists_cmd(self, fname: str):
270+
return (
271+
f"while true; do if [ -e {fname} ]; then tail -f {fname}; fi; sleep 1; done"
272+
)
273+
268274
def get_analytical_container(self, volume_mounts, with_java=False):
269275
name = self.analytical_container_name
270276
image = self._analytical_image if not with_java else self._analytical_java_image
271-
args = ["tail", "-f", "/dev/null"]
277+
args = ["bash", "-c", self._get_tail_if_exists_cmd("/tmp/grape_engine.INFO")]
272278
container = self.get_engine_container_helper(
273279
name,
274280
image,
@@ -292,7 +298,11 @@ def get_analytical_container(self, volume_mounts, with_java=False):
292298
def get_interactive_executor_container(self, volume_mounts):
293299
name = self.interactive_executor_container_name
294300
image = self._interactive_executor_image
295-
args = ["tail", "-f", "/dev/null"]
301+
args = [
302+
"bash",
303+
"-c",
304+
self._get_tail_if_exists_cmd("/var/log/graphscope/current/executor.log"),
305+
]
296306
container = self.get_engine_container_helper(
297307
name,
298308
image,
@@ -445,7 +455,7 @@ def get_engine_headless_service(self):
445455
"ClusterIP", ports, self._engine_labels, None
446456
)
447457

448-
# Necessary, create a headless service for statefulset
458+
# Necessary, create a headless service for statefulsets
449459
service_spec.cluster_ip = "None"
450460
service = ResourceBuilder.get_service(
451461
self._namespace, name, service_spec, self._engine_labels
@@ -534,7 +544,11 @@ def get_graphlearn_service_endpoint(self, api_client, object_id, pod_host_ip_lis
534544
def get_interactive_frontend_container(self):
535545
name = self.interactive_frontend_container_name
536546
image = self._interactive_frontend_image
537-
args = ["tail", "-f", "/dev/null"]
547+
args = [
548+
"bash",
549+
"-c",
550+
self._get_tail_if_exists_cmd("/var/log/graphscope/current/frontend.log"),
551+
]
538552
container = kube_client.V1Container(name=name, image=image, args=args)
539553
container.image_pull_policy = self._image_pull_policy
540554
container.resources = ResourceBuilder.get_resources(

docs/development/how_to_debug.md

+28
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,37 @@ This document shows how to debugging GraphScope under various conditions.
22

33
### Debugging on local deployment
44

5+
## Find the logs
6+
7+
Most of the logs will be streamed through the stdout of client, you could control the log level by
8+
9+
```python
10+
import graphscope
11+
graphscope.set_option(show_log=True)
12+
graphscope.set_option(log_level='DEBUG') # could also be INFO, ERROR
13+
```
14+
15+
As you may know, GraphScope is composed of three engines, where the detailed log location of each engine is
16+
17+
- Analytical Engine: `/tmp/grape_engine.INFO`
18+
- Interactive Engine: Inside `/var/log/graphscope/` or `$HOME/log/graphscope` if GraphScope doesn't have permission of `/var/log`. You may find several folders named with a long number, which is the object id of the graph. There is also a `current` folder links to the log folder of latest created interactive instance.
19+
- Learning Engine: `graphlearn.INFO` in the current directory.
20+
21+
522

623
### Debugging on Kubernetes deployment
724

25+
## Find the logs
26+
27+
In kubernetes environment, besides most of the logs still output to console, you could find detailed logs in each pod's stdout, or files inside each pods.
28+
29+
Note: You could use `kubectl logs <pod>` to inspect the stdout of the pod. Use `kubectl logs <pod> -c <container>` to inspect a specific container inside the pod.
30+
31+
- Coordinator: The stdout of coordinator pod.
32+
- Analytical Engine: The stdout engine container in the engine pod.
33+
- Interactive Engine: The stdout of executor container in the engine pod for the executor log. And the stdout of interactive-frontend pod for the frontend. The log files resides in the `/var/log/graphscope` of each container, respectively.
34+
35+
836
## Commands for Debugging
937

1038
Here is list with commands usually used for checking the status of the GraphScope deployment on K8s.

docs/frequently_asked_questions.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ If you don't find an answer to your question here, feel free to file an `Issues`
3232
3333
graphscope.set_option(show_log=True)
3434
35-
If you are running GraphScope in k8s, you can use `kubectl describe/logs <https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands>`_ to check the log/status of the cluster. If the disk storage is accessible(on local or via Pods), you may also find logs in `/tmp/gs/runtime/logs`.
35+
If you are running GraphScope in k8s, you can use `kubectl describe/logs <https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands>`_ to check the log/status of the pods of GraphScope.
36+
If the disk storage is accessible(on local or via Pods), you may also find logs in `/var/log/graphscope/current` or `$HOME/.local/log/graphscope`.
3637

3738

3839
4. Why I find more Pods than expected with command `kubectl get pod`?

interactive_engine/assembly/src/bin/graphscope/giectl

+8-3
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ start_frontend() {
8080
declare -r pid_dir=${GRAPHSCOPE_RUNTIME}/pid/${object_id}
8181
mkdir -p ${log_dir} ${config_dir} ${pid_dir}
8282

83+
# make a "current" link
84+
unlink ${GS_LOG}/current || true
85+
ln -s ${log_dir} ${GS_LOG}/current
86+
8387
declare java_opt="-server
8488
-verbose:gc
8589
-Xloggc:${log_dir}/frontend.gc.log
@@ -149,6 +153,10 @@ start_executor() {
149153
export LD_LIBRARY_PATH=${GRAPHSCOPE_HOME}/lib:${LD_LIBRARY_PATH}
150154
export DYLD_LIBRARY_PATH=${GRAPHSCOPE_HOME}/lib:${DYLD_LIBRARY_PATH}
151155

156+
# make a "current" link
157+
unlink ${GS_LOG}/current || true
158+
ln -s ${log_dir} ${GS_LOG}/current
159+
152160
# set executor config file
153161
sed -e "s@GRAPH_NAME@${object_id}@g" \
154162
-e "s@VINEYARD_OBJECT_ID@${object_id}@g" \
@@ -206,9 +214,6 @@ create_gremlin_instance_on_local() {
206214
mkdir -p ${GS_LOG}
207215

208216
declare -r log_dir=${GS_LOG}/${object_id}
209-
# make a "current" link
210-
unlink ${GS_LOG}/current || true
211-
ln -s ${log_dir} ${GS_LOG}/current
212217

213218
# Frontend use executor rpc port
214219
network_servers=""

0 commit comments

Comments
 (0)