diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
index afd56ee335..23d1c402d1 100644
--- a/docs/user_guide/trace.md
+++ b/docs/user_guide/trace.md
@@ -427,11 +427,38 @@ The meaning of the trace timestamps is:
 
   * BACKEND_OUTPUT: The tensor in the response of a backend.
 
+## Tracing for BLS models
+
+Triton does not collect traces for child models invoked from
+[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
+models by default.
+
+To include child models into collected traces, user needs to provide the `trace`
+argument (as shown in the example below), when constructing an InferenceRequest object.
+This helps Triton associate the child model with the parent model's trace (`request.trace()`).
+
+```python
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+  ...
+    def execute(self, requests):
+      ...
+      for request in requests:
+        ...
+        inference_request = pb_utils.InferenceRequest(
+            model_name='model_name',
+            requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+            inputs=[<pb_utils.Tensor object>], trace = request.trace())
+
+```
+
 ## OpenTelemetry trace support
 
-Triton provides an option to generate and export traces
-for standalone and ensemble models
-using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
+Triton provides an option to generate and export traces using
+[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
 
 To specify OpenTelemetry mode for tracing, specify the `--trace-config`
 flag as follows:
@@ -477,16 +504,30 @@ The following table shows available OpenTelemetry trace APIs settings for
       trace data.
     </td>
     </tr>
+    <tr>
+    <td><code>resource</code></td>
+    <td><code>service.name=triton-inference-server</code></td>
+    <td>
+      Key-value pairs to be used as resource attributes. <br/>
+      Should be specified following the provided template:<br/>
+      <code>--trace-config opentelemetry,resource=<<text>key</text>>=<<text>value</text>></code><br/>
+      For example:<br/>
+      <code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
+      <code>--trace-config opentelemetry,resource=service.version=1</code><br/>
+      Alternatively, key-vaue attributes can be specified through <br/>
+      <a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
+      OTEL_RESOURCE_ATTRIBUTES</a>
+      environment variable.
+    </td>
+    </tr>
   </tbody>
 </table>
 
+
 ### Limitations
 
 - OpenTelemetry trace mode is not supported on Windows systems.
 
-- Tracing [BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
-models is not supported.
-
 - Triton supports only
 [OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp)
 and allows specification of only url for this exporter through
diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py
index 6ca1c18f49..5055f4e88a 100644
--- a/qa/L0_trace/opentelemetry_unittest.py
+++ b/qa/L0_trace/opentelemetry_unittest.py
@@ -28,7 +28,7 @@
 
 sys.path.append("../common")
 import json
-import time
+import re
 import unittest
 
 import numpy as np
@@ -36,31 +36,67 @@
 import tritonclient.grpc as grpcclient
 import tritonclient.http as httpclient
 
-EXPECTED_NUM_SPANS = 10
+EXPECTED_NUM_SPANS = 16
+# OpenTelemetry OStream exporter sets `parent_span_id` to "0000000000000000",
+# if current span is a root span, i.e. there is no parent span.
+# https://github.com/open-telemetry/opentelemetry-cpp/blob/b7fd057185c4ed2dff507b859cbe058b7609fb4a/exporters/ostream/src/span_exporter.cc#L78C54-L78C68
+NO_PARENT_SPAN = "0000000000000000"
 
 
 class OpenTelemetryTest(tu.TestResultCollector):
     def setUp(self):
-        while True:
-            with open("trace_collector.log", "rt") as f:
-                data = f.read()
-                if data.count("resource_spans") != EXPECTED_NUM_SPANS:
-                    time.sleep(5)
-                    continue
-                else:
-                    break
-
-        data = data.split("\n")
-        full_spans = [
-            entry.split("POST")[0] for entry in data if "resource_spans" in entry
-        ]
-        self.spans = []
-        for span in full_spans:
-            span = json.loads(span)
-            self.spans.append(span["resource_spans"][0]["scope_spans"][0]["spans"][0])
+        # Extracted spans are in json-like format, thus data needs to be
+        # post-processed, so that `json` could accept it for further
+        # processing
+        with open("trace_collector.log", "rt") as f:
+            data = f.read()
+            # Removing new lines and tabs around `{`
+            json_string = re.sub("\n\t{\n\t", "{", data)
+            # `resources` field is a dictionary, so adding `{` and`}`
+            # in the next 2 transformations, `instr-lib` is a next field,
+            # so whatever goes before it, belongs to `resources`.
+            json_string = re.sub(
+                "resources     : \n\t", "resources     : {\n\t", json_string
+            )
+            json_string = re.sub(
+                "\n  instr-lib     :", "}\n  instr-lib     :", json_string
+            )
+            # `json`` expects "key":"value" format, some fields in the
+            # data have empty string as value, so need to add `"",`
+            json_string = re.sub(": \n\t", ':"",', json_string)
+            json_string = re.sub(": \n", ':"",', json_string)
+            # Extracted data missing `,' after each key-value pair,
+            # which `json` exppects
+            json_string = re.sub("\n|\n\t", ",", json_string)
+            # Removing tabs
+            json_string = re.sub("\t", "", json_string)
+            # `json` expects each key and value have `"`'s, so adding them to
+            # every word/number/alpha-numeric entry
+            json_string = re.sub(r"\b([\w.-]+)\b", r'"\1"', json_string)
+            # `span kind`` represents one key
+            json_string = re.sub('"span" "kind"', '"span kind"', json_string)
+            # Removing extra `,`
+            json_string = re.sub("{,", "{", json_string)
+            json_string = re.sub(",}", "}", json_string)
+            # Adding `,` between dictionary entries
+            json_string = re.sub("}{", "},{", json_string)
+            # `events` is a list of dictionaries, `json` will accept it in the
+            # form of "events" : [{....}, {.....}, ...]
+            json_string = re.sub(
+                '"events"        : {', '"events"        : [{', json_string
+            )
+            # Closing `events`' list of dictionaries
+            json_string = re.sub('},  "links"', '}],  "links"', json_string)
+            # Last 2 symbols are not needed
+            json_string = json_string[:-2]
+            # Since now `json_string` is a string, which represents dictionaries,
+            # we  put it into one dictionary, so that `json` could read it as one.
+            json_string = '{ "spans" :[' + json_string + "] }"
+            self.spans = json.loads(json_string)["spans"]
 
         self.simple_model_name = "simple"
         self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
+        self.bls_model_name = "bls_simple"
         self.root_span = "InferRequest"
 
     def _check_events(self, span_name, events):
@@ -121,12 +157,16 @@ def _check_parent(self, child_span, parent_span):
         # Check that child and parent span have the same trace_id
         # and child's `parent_span_id` is the same as parent's `span_id`
         self.assertEqual(child_span["trace_id"], parent_span["trace_id"])
-        self.assertIn(
-            "parent_span_id",
-            child_span,
+        self.assertNotEqual(
+            child_span["parent_span_id"],
+            NO_PARENT_SPAN,
             "child span does not have parent span id specified",
         )
-        self.assertEqual(child_span["parent_span_id"], parent_span["span_id"])
+        self.assertEqual(
+            child_span["parent_span_id"],
+            parent_span["span_id"],
+            "child {} , parent {}".format(child_span, parent_span),
+        )
 
     def test_spans(self):
         parsed_spans = []
@@ -134,19 +174,21 @@ def test_spans(self):
         # Check that collected spans have proper events recorded
         for span in self.spans:
             span_name = span["name"]
-            self._check_events(span_name, json.dumps(span["events"]))
+            self._check_events(span_name, str(span["events"]))
             parsed_spans.append(span_name)
 
-        # There should be 6 spans in total:
-        # 3 for http request, 3 for grpc request, 4 for ensemble
-        self.assertEqual(len(self.spans), 10)
-        # We should have 3 compute spans
-        self.assertEqual(parsed_spans.count("compute"), 3)
-        # 4 request spans (3 named simple - same as our model name, 1 ensemble)
-        self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
-        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
-        # 3 root spans
-        self.assertEqual(parsed_spans.count(self.root_span), 3)
+        # There should be 16 spans in total:
+        # 3 for http request, 3 for grpc request, 4 for ensemble, 6 for bls
+        self.assertEqual(len(self.spans), EXPECTED_NUM_SPANS)
+        # We should have 5 compute spans
+        self.assertEqual(parsed_spans.count("compute"), 5)
+        # 7 request spans
+        # (4 named simple - same as our model name, 2 ensemble, 1 bls)
+        self.assertEqual(parsed_spans.count(self.simple_model_name), 4)
+        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 2)
+        self.assertEqual(parsed_spans.count(self.bls_model_name), 1)
+        # 4 root spans
+        self.assertEqual(parsed_spans.count(self.root_span), 4)
 
     def test_nested_spans(self):
         # First 3 spans in `self.spans` belong to HTTP request
@@ -157,30 +199,33 @@ def test_nested_spans(self):
         for child, parent in zip(self.spans[:3], self.spans[1:3]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[2], "root span has a parent_span_id specified"
-        )
-
         # Next 3 spans in `self.spans` belong to GRPC request
         # Order of spans and their relationship described earlier
         for child, parent in zip(self.spans[3:6], self.spans[4:6]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[5], "root span has a parent_span_id specified"
-        )
-
-        # Final 4 spans in `self.spans` belong to ensemble request
+        # Next 4 spans in `self.spans` belong to ensemble request
         # Order of spans: compute span - request span - request span - root span
         for child, parent in zip(self.spans[6:10], self.spans[7:10]):
             self._check_parent(child, parent)
 
-        # root_span should not have `parent_span_id` field
-        self.assertNotIn(
-            "parent_span_id", self.spans[9], "root span has a parent_span_id specified"
-        )
+        # Final 6 spans in `self.spans` belong to bls with ensemble request
+        # Order of spans:
+        # compute span - request span (simple) - request span (ensemble)-
+        # - compute (for bls) - request (bls) - root span
+        # request span (ensemble) and compute (for bls) are children of
+        # request (bls)
+        children = self.spans[10:]
+        parents = (self.spans[11:13], self.spans[14], self.spans[14:])
+        for child, parent in zip(children, parents[0]):
+            self._check_parent(child, parent)
+
+    def test_resource_attributes(self):
+        for span in self.spans:
+            self.assertIn("test.key", span["resources"])
+            self.assertEqual("test.value", span["resources"]["test.key"])
+            self.assertIn("service.name", span["resources"])
+            self.assertEqual("test_triton", span["resources"]["service.name"])
 
 
 def prepare_data(client):
@@ -214,6 +259,16 @@ def prepare_traces():
     inputs = prepare_data(httpclient)
     triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)
 
+    send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")
+
+
+def send_bls_request(model_name="simple"):
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = prepare_data(httpclient)
+        inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
+        inputs[-1].set_data_from_numpy(np.array([model_name], dtype=np.object_))
+        client.infer("bls_simple", inputs)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh
index 984dcb04c4..8a084349fd 100755
--- a/qa/L0_trace/test.sh
+++ b/qa/L0_trace/test.sh
@@ -34,10 +34,6 @@ CLIENT_LOG="client.log"
 TEST_RESULT_FILE="test_results.txt"
 EXPECTED_NUM_TESTS="6"
 
-TRACE_COLLECTOR=trace_collector.py
-TRACE_COLLECTOR_LOG="trace_collector.log"
-OTLP_PORT=10000
-
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
 if [ "$#" -ge 1 ]; then
     REPO_VERSION=$1
@@ -55,6 +51,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
 ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
+BLSDIR=../python_models/bls_simple
 MODELBASE=onnx_int32_int32_int32
 
 MODELSDIR=`pwd`/trace_models
@@ -78,7 +75,8 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
     rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
     (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
             sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
-            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt)
+            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) && \
+    mkdir -p $MODELSDIR/bls_simple/1 && cp $BLSDIR/bls_simple.py $MODELSDIR/bls_simple/1/model.py
 
 RET=0
 
@@ -618,7 +616,7 @@ wait $SERVER_PID
 
 
 # Check `--trace-config` sets arguments properly
-SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \
+SERVER_ARGS="--trace-config=triton,file=bls_trace.log --trace-config=level=TIMESTAMPS \
             --trace-config=rate=4 --trace-config=count=6 --trace-config=mode=triton --model-repository=$MODELSDIR"
 SERVER_LOG="./inference_server_trace_config.log"
 run_server
@@ -649,10 +647,17 @@ fi
 if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
     RET=1
 fi
-if [ `grep -c "\"trace_file\":\"some_file.log\"" ./curl.out` != "1" ]; then
+if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
     RET=1
 fi
 
+set +e
+# Send bls requests to make sure simple model is traced
+for p in {1..4}; do
+    python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+done
+
 set -e
 
 kill $SERVER_PID
@@ -660,26 +665,97 @@ wait $SERVER_PID
 
 set +e
 
+$TRACE_SUMMARY -t bls_trace.log > summary_bls.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of traced "COMPUTE_INPUT_END" events.\n***"
+    RET=1
+fi
+
+if [ `grep -c ^ensemble_add_sub_int32_int32_int32 summary_bls.log` != "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: BLS child ensemble model wasn't traced. \n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_bls.log` != "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: ensemble's model 'simple' wasn't traced. \n***"
+    RET=1
+fi
+
+if [ `grep -o 'parent_id' bls_trace.log | wc -l` != "2" ]; then
+    cat bls_trace.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of 'parent id' fields. \n***"
+    RET=1
+fi
+
 # Check opentelemetry trace exporter sends proper info.
 # A helper python script starts listening on $OTLP_PORT, where
 # OTLP exporter sends traces.
-# Unittests then check that produced spans have expected format and events
-# FIXME: Redesign this test to remove time sensitivity
+export TRITON_OPENTELEMETRY_TEST='false'
+OTLP_PORT=10000
+OTEL_COLLECTOR_DIR=./opentelemetry-collector
+OTEL_COLLECTOR=./opentelemetry-collector/bin/otelcorecol_*
+OTEL_COLLECTOR_LOG="./trace_collector_http_exporter.log"
+
+# Building the latest version of the OpenTelemetry collector.
+# Ref: https://opentelemetry.io/docs/collector/getting-started/#local
+if [ -d "$OTEL_COLLECTOR_DIR" ]; then rm -Rf $OTEL_COLLECTOR_DIR; fi
+git clone https://github.com/open-telemetry/opentelemetry-collector.git
+cd $OTEL_COLLECTOR_DIR
+make install-tools
+make otelcorecol
+cd ..
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
+                --trace-config=count=100 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_otel_http_exporter.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
 
+$SIMPLE_HTTP_CLIENT >>$CLIENT_LOG 2>&1
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+
+set +e
+
+if ! [[ -s $OTEL_COLLECTOR_LOG && `grep -c 'InstrumentationScope triton-server' $OTEL_COLLECTOR_LOG` == 3 ]] ; then
+    echo -e "\n***\n*** HTTP exporter test failed.\n***"
+    cat $OTEL_COLLECTOR_LOG
+    exit 1
+fi
+
+
+# Unittests then check that produced spans have expected format and events
 OPENTELEMETRY_TEST=opentelemetry_unittest.py
 OPENTELEMETRY_LOG="opentelemetry_unittest.log"
-EXPECTED_NUM_TESTS="2"
+EXPECTED_NUM_TESTS="3"
+
+export TRITON_OPENTELEMETRY_TEST='true'
 
 SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
                 --trace-config=count=100 --trace-config=mode=opentelemetry \
-                --trace-config=opentelemetry,url=localhost:$OTLP_PORT \
+                --trace-config=opentelemetry,resource=test.key=test.value \
+                --trace-config=opentelemetry,resource=service.name=test_triton \
                 --model-repository=$MODELSDIR"
-SERVER_LOG="./inference_server_trace_config.log"
-
-# Increasing OTLP timeout, since we don't use a valid OTLP collector
-# and don't send a proper signal back.
-export OTEL_EXPORTER_OTLP_TIMEOUT=50000
-export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=50000
+SERVER_LOG="./inference_server_otel_ostream_exporter.log"
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -688,18 +764,29 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
-# Using netcat as trace collector
-apt-get update && apt-get install -y netcat
-nc -l -k 127.0.0.1 $OTLP_PORT >> $TRACE_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
-
 set +e
 # Preparing traces for unittest.
-# Note: need to run this separately, to speed up trace collection.
-# Otherwise internal (opentelemetry_unittest.OpenTelemetryTest.setUp) check
-# will slow down collection.
+# Note: running this separately, so that I could extract spans with `grep`
+# from server log later.
 python -c 'import opentelemetry_unittest; \
         opentelemetry_unittest.prepare_traces()' >>$CLIENT_LOG 2>&1
 
+sleep 5
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+grep -z -o -P '({\n(?s).*}\n)' $SERVER_LOG >> trace_collector.log
+
+if ! [ -s trace_collector.log ] ; then
+    echo -e "\n***\n*** $SERVER_LOG did not contain any OpenTelemetry spans.\n***"
+    exit 1
+fi
+
 # Unittest will not start until expected number of spans is collected.
 python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
 if [ $? -ne 0 ]; then
@@ -714,12 +801,4 @@ else
     fi
 fi
 
-kill $COLLECTOR_PID
-wait $COLLECTOR_PID
-
-set -e
-
-kill $SERVER_PID
-wait $SERVER_PID
-
 exit $RET
\ No newline at end of file
diff --git a/qa/L0_trace/trace-config.yaml b/qa/L0_trace/trace-config.yaml
new file mode 100644
index 0000000000..f8fe2424c0
--- /dev/null
+++ b/qa/L0_trace/trace-config.yaml
@@ -0,0 +1,45 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Simple config file for OpenTelemetry collector.
+# It receives all traces, received on localhost:10000 and prints
+# it into the output stream.
+# Ref: https://opentelemetry.io/docs/collector/configuration/
+receivers:
+  otlp:
+    protocols:
+      http:
+        endpoint: 0.0.0.0:10000
+
+exporters:
+  logging:
+    verbosity: detailed
+
+service:
+  pipelines:
+    traces:
+      receivers: [otlp]
+      exporters: [logging]
diff --git a/qa/python_models/bls_simple/bls_simple.py b/qa/python_models/bls_simple/bls_simple.py
new file mode 100644
index 0000000000..962c3834b9
--- /dev/null
+++ b/qa/python_models/bls_simple/bls_simple.py
@@ -0,0 +1,84 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [
+            {"name": "MODEL_NAME", "data_type": "TYPE_STRING", "dims": [1]},
+            {"name": "INPUT0", "data_type": "TYPE_INT32", "dims": [1, 16]},
+            {"name": "INPUT1", "data_type": "TYPE_INT32", "dims": [1, 16]},
+        ]
+        outputs = [
+            {"name": "OUTPUT0", "data_type": "TYPE_INT32", "dims": [16]},
+            {"name": "OUTPUT1", "data_type": "TYPE_INT32", "dims": [16]},
+        ]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        auto_complete_model_config.set_max_batch_size(0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME")
+            model_name_string = model_name.as_numpy()[0]
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name_string,
+                requested_output_names=["OUTPUT0", "OUTPUT1"],
+                inputs=[in_0, in_1],
+                trace=request.trace(),
+            )
+
+            infer_response = infer_request.exec()
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=infer_response.output_tensors()
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/src/tracer.cc b/src/tracer.cc
index c33aeb546e..d9562f0b82 100644
--- a/src/tracer.cc
+++ b/src/tracer.cc
@@ -36,10 +36,14 @@
 #include <cuda_runtime_api.h>
 #endif  // TRITON_ENABLE_GPU
 #ifndef _WIN32
+#include "opentelemetry/exporters/ostream/span_exporter_factory.h"
+#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h"
+#include "opentelemetry/sdk/resource/semantic_conventions.h"
 namespace otlp = opentelemetry::exporter::otlp;
 namespace otel_trace_sdk = opentelemetry::sdk::trace;
 namespace otel_trace_api = opentelemetry::trace;
 namespace otel_common = opentelemetry::common;
+namespace otel_resource = opentelemetry::sdk::resource;
 #endif
 
 namespace triton { namespace server {
@@ -80,6 +84,8 @@ TraceManager::TraceManager(
       false /*filepath_specified*/, false /*mode_specified*/,
       false /*config_map_specified*/));
   trace_files_.emplace(filepath, file);
+
+  InitTracer(config_map);
 }
 
 TRITONSERVER_Error*
@@ -348,37 +354,82 @@ TraceManager::Trace::CaptureTimestamp(
   }
 }
 
-#ifndef _WIN32
 void
-TraceManager::Trace::InitTracer(
-    const triton::server::TraceConfigMap& config_map)
+TraceManager::InitTracer(const triton::server::TraceConfigMap& config_map)
 {
-  otlp::OtlpHttpExporterOptions opts;
-  auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY);
-  auto otel_options_it = config_map.find(mode_key);
-  if (otel_options_it != config_map.end()) {
-    for (const auto& setting : otel_options_it->second) {
-      // FIXME add more configuration options of OTLP HTTP Exporter
-      if (setting.first == "url") {
-        opts.url = setting.second;
+  switch (global_setting_->mode_) {
+    case TRACE_MODE_OPENTELEMETRY: {
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+      otlp::OtlpHttpExporterOptions opts;
+      otel_resource::ResourceAttributes attributes = {};
+      attributes[otel_resource::SemanticConventions::kServiceName] =
+          "triton-inference-server";
+      auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY);
+      auto otel_options_it = config_map.find(mode_key);
+      if (otel_options_it != config_map.end()) {
+        for (const auto& setting : otel_options_it->second) {
+          // FIXME add more configuration options of OTLP HTTP Exporter
+          if (setting.first == "url") {
+            opts.url = setting.second;
+          }
+          if (setting.first == "resource") {
+            auto pos = setting.second.find('=');
+            auto key = setting.second.substr(0, pos);
+            auto value = setting.second.substr(pos + 1);
+            attributes[key] = value;
+          }
+        }
+      }
+      auto exporter = otlp::OtlpHttpExporterFactory::Create(opts);
+      auto test_exporter = triton::server::GetEnvironmentVariableOrDefault(
+          "TRITON_OPENTELEMETRY_TEST", "false");
+      if (test_exporter != "false") {
+        exporter = opentelemetry::exporter::trace::OStreamSpanExporterFactory::
+            Create();
       }
+      auto processor = otel_trace_sdk::SimpleSpanProcessorFactory::Create(
+          std::move(exporter));
+      auto resource = otel_resource::Resource::Create(attributes);
+      std::shared_ptr<otel_trace_api::TracerProvider> provider =
+          otel_trace_sdk::TracerProviderFactory::Create(
+              std::move(processor), resource);
+
+      otel_trace_api::Provider::SetTracerProvider(provider);
+      break;
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(
+                       global_setting_->mode_);
+      break;
+#endif
+    }
+    default:
+      return;
+  }
+}
+
+void
+TraceManager::CleanupTracer()
+{
+  switch (global_setting_->mode_) {
+    case TRACE_MODE_OPENTELEMETRY: {
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+      std::shared_ptr<otel_trace_api::TracerProvider> none;
+      otel_trace_api::Provider::SetTracerProvider(none);
+      break;
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(
+                       global_setting_->mode_);
+      break;
+#endif
     }
+    default:
+      return;
   }
-  exporter_ = otlp::OtlpHttpExporterFactory::Create(opts);
-  processor_ =
-      otel_trace_sdk::SimpleSpanProcessorFactory::Create(std::move(exporter_));
-  provider_ =
-      otel_trace_sdk::TracerProviderFactory::Create(std::move(processor_));
-  auto steady_timestamp_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now().time_since_epoch())
-          .count();
-  auto root_span = StartSpan("InferRequest", steady_timestamp_ns);
-  // Initializing OTel context and storing "InferRequest" span as a root span
-  // to keep it alive for the duration of the request.
-  otel_context_ = opentelemetry::context::Context({kRootSpan, root_span});
 }
 
+#ifndef _WIN32
 void
 TraceManager::Trace::StartSpan(
     std::string span_key, TRITONSERVER_InferenceTrace* trace,
@@ -404,6 +455,10 @@ TraceManager::Trace::StartSpan(
   if (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) {
     parent_span_key = kRootSpan;
   } else if (activity == TRITONSERVER_TRACE_REQUEST_START) {
+    // [FIXME] For BLS requests parent span for children's request spans
+    // should be parent model's compute span. Currently,
+    // this won't work, since parent's compute span will be created
+    // only after children's spans are created.
     parent_span_key = kRequestSpan + std::to_string(parent_id);
   } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) {
     parent_span_key = kRequestSpan + std::to_string(trace_id);
@@ -454,7 +509,8 @@ TraceManager::Trace::StartSpan(
         otel_context_.GetValue(parent_span_key));
     options.parent = parent_span->GetContext();
   }
-  return provider_->GetTracer(kTritonTracer)->StartSpan(display_name, options);
+  auto provider = opentelemetry::trace::Provider::GetTracerProvider();
+  return provider->GetTracer(kTritonTracer)->StartSpan(display_name, options);
 }
 
 void
@@ -575,14 +631,17 @@ TraceManager::Trace::AddEvent(
 void
 TraceManager::TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp)
 {
-  uint64_t parent_id;
+  uint64_t id;
   LOG_TRITONSERVER_ERROR(
-      TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
-      "getting trace parent id");
+      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
+
+  auto ts = reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp);
+  std::lock_guard<std::mutex> lk((*ts)->mtx_);
+  (*ts)->spawned_traces_tracker_.erase(id);
   // The userp will be shared with the trace children, so only delete it
-  // if the root trace is being released
-  if (parent_id == 0) {
-    delete reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp);
+  // if no more TraceRelease calls are expected
+  if ((*ts)->spawned_traces_tracker_.empty()) {
+    delete ts;
   }
   LOG_TRITONSERVER_ERROR(
       TRITONSERVER_InferenceTraceDelete(trace), "deleting trace");
@@ -617,6 +676,10 @@ TraceManager::TraceActivity(
       reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp)->get();
 
   std::lock_guard<std::mutex> lk(ts->mtx_);
+  if (ts->spawned_traces_tracker_.find(id) ==
+      ts->spawned_traces_tracker_.end()) {
+    ts->spawned_traces_tracker_.emplace(id);
+  }
 
   if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
 #ifndef _WIN32
@@ -627,7 +690,6 @@ TraceManager::TraceActivity(
 #endif
     return;
   }
-
   std::stringstream* ss = nullptr;
   {
     if (ts->streams_.find(id) == ts->streams_.end()) {
@@ -733,6 +795,7 @@ TraceManager::TraceTensorActivity(
         std::unique_ptr<std::stringstream> stream(new std::stringstream());
         ss = stream.get();
         ts->streams_.emplace(id, std::move(stream));
+        ts->spawned_traces_tracker_.emplace(id);
       } else {
         ss = ts->streams_[id].get();
         // If the string stream is not newly created, add "," as there is
@@ -988,7 +1051,15 @@ TraceManager::TraceSetting::SampleTrace()
         "getting trace id");
     if (mode_ == TRACE_MODE_OPENTELEMETRY) {
 #ifndef _WIN32
-      lts->InitTracer(config_map_);
+      auto steady_timestamp_ns =
+          std::chrono::duration_cast<std::chrono::nanoseconds>(
+              std::chrono::steady_clock::now().time_since_epoch())
+              .count();
+      auto root_span = lts->StartSpan("InferRequest", steady_timestamp_ns);
+      // Initializing OTel context and storing "InferRequest" span as a root
+      // span to keep it alive for the duration of the request.
+      lts->otel_context_ =
+          opentelemetry::context::Context({kRootSpan, root_span});
 #else
       LOG_ERROR << "Unsupported trace mode: "
                 << TraceManager::InferenceTraceModeString(mode_);
diff --git a/src/tracer.h b/src/tracer.h
index 55bf2b9800..ce214085d3 100644
--- a/src/tracer.h
+++ b/src/tracer.h
@@ -36,14 +36,13 @@
 #include <unordered_map>
 
 #if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
-#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h"
 #include "opentelemetry/nostd/shared_ptr.h"
+#include "opentelemetry/sdk/resource/resource.h"
 #include "opentelemetry/sdk/trace/processor.h"
 #include "opentelemetry/sdk/trace/simple_processor_factory.h"
 #include "opentelemetry/sdk/trace/tracer_provider_factory.h"
 #include "opentelemetry/trace/context.h"
 #include "opentelemetry/trace/provider.h"
-namespace otlp = opentelemetry::exporter::otlp;
 namespace otel_trace_sdk = opentelemetry::sdk::trace;
 namespace otel_trace_api = opentelemetry::trace;
 #endif
@@ -122,7 +121,7 @@ class TraceManager {
       const std::string& filepath, const InferenceTraceMode mode,
       const TraceConfigMap& config_map);
 
-  ~TraceManager() = default;
+  ~TraceManager() { CleanupTracer(); }
 
   // Return a trace that should be used to collected trace activities
   // for an inference request. Return nullptr if no tracing should occur.
@@ -150,6 +149,19 @@ class TraceManager {
 
   static const char* InferenceTraceModeString(InferenceTraceMode mode);
 
+  /// In OpenTelemetry trace mode initializes Opentelemetry exporter, processor,
+  /// and sets the global trace provider.
+  /// In Triton trace mode is a no-op.
+  ///
+  /// \param config_map A config map, which stores all parameters, specified
+  /// by user.
+  void InitTracer(const TraceConfigMap& config_map);
+
+  /// In OpenTelemetry trace mode cleans global tracer provider,
+  /// set by InitTracer.
+  /// In Triton trace mode is a no-op.
+  void CleanupTracer();
+
   struct Trace {
     Trace() : trace_(nullptr), trace_id_(0) {}
     ~Trace();
@@ -157,6 +169,10 @@ class TraceManager {
     // Group the spawned traces by trace ID for better formatting
     std::mutex mtx_;
     std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>> streams_;
+    // We use the set to track the number of spawned traces, so that
+    // when TraceManager::TraceRelease() with 'trace_userp_' is called
+    // we can safely release 'trace_userp_'
+    std::set<uint64_t> spawned_traces_tracker_;
     // Triton trace object that this trace is assosicated with,
     // 'Trace' object does not take ownership of 'trace_'. The caller of
     // SampleTrace() must call TraceManager::TraceRelease() with 'trace_userp_'
@@ -172,12 +188,6 @@ class TraceManager {
     void CaptureTimestamp(const std::string& name, uint64_t timestamp_ns);
 
 #if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
-    /// Initializes Opentelemetry exporter, processor, provider and context.
-    ///
-    /// \param config_map A config map, which stores all parameters, specified
-    /// by user.
-    void InitTracer(const TraceConfigMap& config_map);
-
     /// Reports TRITONSERVER_InferenceTraceActivity as event to
     /// the currently active span. If activity is an instance of
     /// `TRITONSERVER_TRACE_REQUEST_START` or
@@ -198,6 +208,24 @@ class TraceManager {
         TRITONSERVER_InferenceTrace* trace,
         TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns);
 
+    /// Starts a span with the provided timestamp and name.
+    ///
+    /// \param display_name Span's name, which will be shown in the trace.
+    /// \param raw_timestamp_ns Steady timestamp, which is used to calculate
+    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
+    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
+    /// with better precision.
+    /// \param parent_span_key A span key, to find a parent span in the
+    /// OpenTelemetry context. If empty, a root span will be started,
+    /// i.e. with no parent span specified.
+    /// \return A shared pointer to a newly created OpenTelemetry span.
+    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> StartSpan(
+        std::string display_name, const uint64_t& raw_timestamp_ns,
+        std::string parent_span_key = "");
+
+    // OTel context to store spans, created in the current trace
+    opentelemetry::context::Context otel_context_;
+
    private:
     // OpenTelemetry SDK relies on system's clock for event timestamps.
     // Triton Tracing records timestamps using steady_clock. This is a
@@ -215,15 +243,6 @@ class TraceManager {
         std::chrono::duration_cast<std::chrono::nanoseconds>(
             std::chrono::steady_clock::now().time_since_epoch());
 
-    std::unique_ptr<otel_trace_sdk::SpanExporter> exporter_;
-
-    std::unique_ptr<otel_trace_sdk::SpanProcessor> processor_;
-
-    std::shared_ptr<otel_trace_api::TracerProvider> provider_;
-
-    // OTel context to store spans, created in the current trace
-    opentelemetry::context::Context otel_context_;
-
     /// Starts a compute or request span based on `activity`.
     /// For request spans, it will add the following attributes to the span:
     /// `model_name`, `model_version`, `trace_id`, `parent_id`.
@@ -243,21 +262,6 @@ class TraceManager {
         TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
         uint64_t trace_id);
 
-    /// Starts a span with the provided timestamp and name.
-    ///
-    /// \param display_name Span's name, which will be shown in the trace.
-    /// \param raw_timestamp_ns Steady timestamp, which is used to calculate
-    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
-    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
-    /// with better precision.
-    /// \param parent_span_key A span key, to find a parent span in the
-    /// OpenTelemetry context. If empty, a root span will be started,
-    /// i.e. with no parent span specified.
-    /// \return A shared pointer to a newly created OpenTelemetry span.
-    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> StartSpan(
-        std::string display_name, const uint64_t& raw_timestamp_ns,
-        std::string parent_span_key = "");
-
     /// Ends the provided span.
     ///
     /// \param span_key Span's key to retrieve the corresponding span from the