diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2cc931445263f8ba33e2037c597a95e5470010be..5f86f2dcc9f2264ac126ade05c48b5a122fd918b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,7 +1,5 @@
   OTB_BUILD: /src/otb/build/OTB/build  # Local OTB build directory
   OTBTF_SRC: /src/otbtf  # Local OTBTF source directory
   OTB_TEST_DIR: $OTB_BUILD/Testing/Temporary  # OTB testing directory
@@ -19,7 +17,10 @@ variables:
   DOCKERHUB_BASE: mdl4eo/otbtf
   CPU_BASE_IMG: ubuntu:22.04
-  GPU_BASE_IMG: nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04
+  GPU_BASE_IMG: nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
     - if: $CI_MERGE_REQUEST_ID || $CI_COMMIT_REF_NAME =~ /master/ # Execute jobs in merge request context, or commit in master branch
@@ -217,6 +218,11 @@ rio:
     - sudo pip install rasterio
     - python -m pytest --junitxml=$ARTIFACT_TEST_DIR/report_rio.xml $OTBTF_SRC/test/rio_test.py
+  extends: .applications_test_base
+  script:
+    - python -m pytest --junitxml=$ARTIFACT_TEST_DIR/report_nodata.xml $OTBTF_SRC/test/nodata_test.py
   stage: Update dev image
   extends: .docker_build_base
@@ -265,6 +271,7 @@ deploy_gpu:
@@ -274,6 +281,9 @@ deploy_gpu:
     # gpu-opt-dev
     - docker build --build-arg BZL_OPTIONS="--remote_cache=$BAZELCACHE" --tag $IMAGE_GPUOPTDEV --build-arg BASE_IMG=$GPU_BASE_IMG --build-arg KEEP_SRC_OTB=true .
     - docker push $IMAGE_GPUOPTDEV
+    # gpu-opt-dev-trt
+    - docker build --build-arg BZL_OPTIONS="--remote_cache=$BAZELCACHE" --tag $IMAGE_GPUOPTDEV --build-arg BASE_IMG="cuda:11.8.0-cudnn8-devel-ubuntu22.04" --build-arg KEEP_SRC_OTB=true --build-arg TENSORRT="8.5.1-1+cuda11.8" .
+    - docker push $IMAGE_GPUOPTDEVTRT
     # gpu-basic
     - docker build --build-arg BZL_OPTIONS="--remote_cache=$BAZELCACHE" --tag $IMAGE_GPU --build-arg BASE_IMG=$GPU_BASE_IMG --build-arg BZL_CONFIGS="" .
     - docker push $IMAGE_GPU
diff --git a/Dockerfile b/Dockerfile
index a612da257648a30ca2cec7ed44578e950d1b1607..711dc4ba30a70f8985df23d861894921d12b1982 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -44,6 +44,7 @@ RUN git config --global advice.detachedHead false
 ### TF
 ARG TF=v2.12.0
 # Install bazelisk (will read .bazelversion and download the right bazel binary - latest by default)
 RUN wget -qO /opt/otbtf/bin/bazelisk https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-amd64 \
@@ -202,4 +203,5 @@ ENV PATH="/home/otbuser/.local/bin:$PATH"
 RUN python -c "import tensorflow"
 RUN python -c "import otbtf, tricks"
 RUN python -c "import otbApplication as otb; otb.Registry.CreateApplication('ImageClassifierFromDeepFeatures')"
-RUN python -c "from osgeo import gdal"
\ No newline at end of file
+RUN python -c "from osgeo import gdal"
diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt
index fd30467cc836e5b3d75cbedbc891ad9527215697..24d3f247a840518290bd43fee05d7466de743054 100644
@@ -1,3 +1,9 @@
+Version 4.1.0 (23 may 2023)
+* Add no-data values support for inference in TensorflowModelServe application
+* Update base docker image for NVIDIA GPU builds (CUDA 12.1.1)
+* Fix CuDNN version detection in `build-env-tf.sh`
 Version 4.0.0 (5 apr 2023)
 * Big improvement of the documentation:
diff --git a/app/otbTensorflowModelServe.cxx b/app/otbTensorflowModelServe.cxx
index 47a8c95730443c0c3345933d71f63a1bcbcb556d..59969e36f4b90a3e473c3328844d78778a5aa940 100644
--- a/app/otbTensorflowModelServe.cxx
+++ b/app/otbTensorflowModelServe.cxx
@@ -70,11 +70,14 @@ public:
     InputImageSource m_ImageSource;
     SizeType         m_PatchSize;
     std::string      m_Placeholder;
+    float            m_NodataValue;
+    bool             m_HasNodata;
     // Parameters keys
     std::string m_KeyIn;     // Key of input image list
-    std::string m_KeyPszX;   // Key for samples sizes X
-    std::string m_KeyPszY;   // Key for samples sizes Y
+    std::string m_KeyPszX;   // Key for receptive field size in X
+    std::string m_KeyPszY;   // Key for receptive field size in Y
+    std::string m_KeyND;     // Key for no-data value
     std::string m_KeyPHName; // Key for placeholder name in the tensorflow model
@@ -93,7 +96,8 @@ public:
     ss_key_in, ss_desc_in,
     ss_key_dims_x, ss_desc_dims_x,
     ss_key_dims_y, ss_desc_dims_y,
-    ss_key_ph, ss_desc_ph;
+    ss_key_ph, ss_desc_ph,
+    ss_key_nd, ss_desc_nd;
     // Parameter group key/description
     ss_key_group  << "source"                  << inputNumber;
@@ -104,12 +108,14 @@ public:
     ss_key_dims_x  << ss_key_group.str() << ".rfieldx";
     ss_key_dims_y  << ss_key_group.str() << ".rfieldy";
     ss_key_ph      << ss_key_group.str() << ".placeholder";
+    ss_key_nd      << ss_key_group.str() << ".nodata";
     // Parameter group descriptions
     ss_desc_in     << "Input image (or list to stack) for source #" << inputNumber;
     ss_desc_dims_x << "Input receptive field (width) for source #"  << inputNumber;
     ss_desc_dims_y << "Input receptive field (height) for source #" << inputNumber;
     ss_desc_ph     << "Name of the input placeholder for source #"  << inputNumber;
+    ss_desc_nd     << "No-data value for pixels of source #"        << inputNumber;
     // Populate group
     AddParameter(ParameterType_Group,          ss_key_group.str(),  ss_desc_group.str());
@@ -122,6 +128,8 @@ public:
     SetDefaultParameterInt                    (ss_key_dims_y.str(), 1);
     AddParameter(ParameterType_String,         ss_key_ph.str(),     ss_desc_ph.str());
     MandatoryOff                              (ss_key_ph.str());
+    AddParameter(ParameterType_Float,          ss_key_nd.str(), ss_desc_nd.str());
+    MandatoryOff                              (ss_key_nd.str());
     // Add a new bundle
     ProcessObjectsBundle bundle;
@@ -129,6 +137,7 @@ public:
     bundle.m_KeyPszX   = ss_key_dims_x.str();
     bundle.m_KeyPszY   = ss_key_dims_y.str();
     bundle.m_KeyPHName = ss_key_ph.str();
+    bundle.m_KeyND     = ss_key_nd.str();
@@ -183,7 +192,12 @@ public:
     SetDefaultParameterFloat                 ("output.spcscale", 1.0);
     SetParameterDescription                  ("output.spcscale", "The output image size/scale and spacing*scale where size and spacing corresponds to the first input");
     AddParameter(ParameterType_StringList,    "output.names",    "Names of the output tensors");
-    MandatoryOff                            ("output.names");
+    MandatoryOff                             ("output.names");
+    // Output background value
+    AddParameter(ParameterType_Float,         "output.bv", "Output background value");
+    SetDefaultParameterFloat                 ("output.bv", 0.0);
+    SetParameterDescription                  ("output.bv", "The value used when one input has only no-data values in its receptive field");
     // Output Field of Expression
     AddParameter(ParameterType_Int,           "output.efieldx", "The output expression field (width)");
@@ -236,10 +250,14 @@ public:
       bundle.m_Placeholder = GetParameterAsString(bundle.m_KeyPHName);
       bundle.m_PatchSize[0] = GetParameterInt(bundle.m_KeyPszX);
       bundle.m_PatchSize[1] = GetParameterInt(bundle.m_KeyPszY);
+      bundle.m_HasNodata = HasValue(bundle.m_KeyND);
+      bundle.m_NodataValue = (bundle.m_HasNodata == true) ? GetParameterFloat(bundle.m_KeyND) : 0;
       otbAppLogINFO("Source info :");
       otbAppLogINFO("Receptive field  : " << bundle.m_PatchSize  );
       otbAppLogINFO("Placeholder name : " << bundle.m_Placeholder);
+      if (bundle.m_HasNodata == true)
+        otbAppLogINFO("No-data value    : " << bundle.m_NodataValue);
@@ -274,7 +292,7 @@ public:
     // Input sources
     for (auto& bundle: m_Bundles)
-      m_TFFilter->PushBackInputTensorBundle(bundle.m_Placeholder, bundle.m_PatchSize, bundle.m_ImageSource.Get());
+      m_TFFilter->PushBackInputTensorBundle(bundle.m_Placeholder, bundle.m_PatchSize, bundle.m_ImageSource.Get(), bundle.m_HasNodata, bundle.m_NodataValue);
     // Fully convolutional mode on/off
@@ -284,6 +302,11 @@ public:
+    // Output background value
+    const float outBV = GetParameterFloat("output.bv");
+    otbAppLogINFO("Setting background value to " << outBV);
+    m_TFFilter->SetOutputBackgroundValue(outBV);
     // Output field of expression
     FloatVectorImageType::SizeType foe;
     foe[0] = GetParameterInt("output.efieldx");
diff --git a/doc/app_inference.md b/doc/app_inference.md
index d50a7c03235c6fa2ff3feed3b0dafb3a72e08d32..63557d996c1b27852366beada371f0dfad4743b3 100644
--- a/doc/app_inference.md
+++ b/doc/app_inference.md
@@ -23,14 +23,14 @@ known:
 The **scale factor** describes the physical change of spacing of the outputs,
-typically introduced in the model by non unitary strides in pooling or
+typically introduced in the model by non-unitary strides in pooling or
 convolution operators.
 For each output, it is expressed relatively to one single input of the model
 called the *reference input source*.
 Additionally, the names of the *target nodes* must be known (e.g. optimizers
 for Tensorflow API v1).
 Also, the names of *user placeholders*, typically scalars inputs that are
-used to control some parameters of the model, must be know.
+used to control some parameters of the model, must be known.
 The **receptive field** corresponds to the input volume that "sees" the deep
 The **expression field** corresponds to the output volume that the deep net
@@ -58,15 +58,20 @@ computation of one single tile of pixels.
 So, this application takes in input one or multiple _input sources_ (the number
 of _input sources_ can be changed by setting the `OTB_TF_NSOURCES` to the
 desired number) and produce one output of the specified tensors.
-The user is responsible of giving the **receptive field** and **name** of
+The user is responsible for giving the **receptive field** and **name** of
 _input placeholders_, as well as the **expression field**, **scale factor** and
 **name** of _output tensors_.
 The first _input source_ (`source1.il`) corresponds to the _reference input
 As explained, the **scale factor** provided for the
 _output tensors_ is related to this _reference input source_.
-The user can ask for multiple _output tensors_, that will be stack along the
+The user can ask for multiple _output tensors_, that will be stacked along the
 channel dimension of the output raster.
+Since OTBTF 4.1, a no-data value can be provided for each input source (e.g. 
+`source1.nodata`). When all elements of an input are equals to the no-data 
+value in the processed chunk of image, the local inference process is skipped, 
+and the output pixel is filled with the value provided by the `output.bv` 
 !!! Warning
diff --git a/include/otbTensorflowMultisourceModelBase.h b/include/otbTensorflowMultisourceModelBase.h
index 6c943d1f1e777f8f7d26fc6be7f529b34535b5c7..f452cc2269571c95d17276988136a5cc27ce685d 100644
--- a/include/otbTensorflowMultisourceModelBase.h
+++ b/include/otbTensorflowMultisourceModelBase.h
@@ -96,6 +96,8 @@ public:
   typedef std::pair<std::string, tensorflow::Tensor> DictElementType;
   typedef std::vector<std::string>                   StringList;
   typedef std::vector<SizeType>                      SizeListType;
+  typedef std::vector<bool>                          BoolListType;
+  typedef std::vector<InternalPixelType>             ValueListType;
   typedef std::vector<DictElementType>               DictType;
   typedef std::vector<tensorflow::DataType>          DataTypeListType;
   typedef std::vector<tensorflow::TensorShapeProto>  TensorShapeProtoList;
@@ -119,7 +121,13 @@ public:
   /** Model parameters */
-  PushBackInputTensorBundle(std::string name, SizeType receptiveField, ImagePointerType image);
+  PushBackInputTensorBundle(
+    std::string name, 
+    SizeType 
+    receptiveField, 
+    ImagePointerType image,
+    bool useNodata = false,
+    InternalPixelType nodataValue = 0);
   PushBackOuputTensorBundle(std::string name, SizeType expressionField);
@@ -131,6 +139,14 @@ public:
   itkSetMacro(InputReceptiveFields, SizeListType);
   itkGetMacro(InputReceptiveFields, SizeListType);
+  /** Use no-data */
+  itkSetMacro(InputUseNodata, BoolListType);
+  itkGetMacro(InputUseNodata, BoolListType);
+  /** No-data value */
+  itkSetMacro(InputNodataValues, ValueListType);
+  itkGetMacro(InputNodataValues, ValueListType);
   /** Output tensors names */
   itkSetMacro(OutputTensors, StringList);
   itkGetMacro(OutputTensors, StringList);
@@ -172,8 +188,11 @@ protected:
   GenerateDebugReport(DictType & inputs);
   virtual void
-  RunSession(DictType & inputs, TensorListType & outputs);
+  RunSession(DictType & inputs, TensorListType & outputs, bool & nodata);
+  virtual void
+  RunSession(DictType & inputs, TensorListType & outputs);
   TensorflowMultisourceModelBase(const Self &); // purposely not implemented
@@ -183,12 +202,14 @@ private:
   tensorflow::SavedModelBundle * m_SavedModel; // The TensorFlow model
   // Model parameters
-  StringList   m_InputPlaceholders;      // Input placeholders names
-  SizeListType m_InputReceptiveFields;   // Input receptive fields
-  StringList   m_OutputTensors;          // Output tensors names
-  SizeListType m_OutputExpressionFields; // Output expression fields
-  DictType     m_UserPlaceholders;       // User placeholders
-  StringList   m_TargetNodesNames;       // User nodes target
+  StringList    m_InputPlaceholders;      // Input placeholders names
+  SizeListType  m_InputReceptiveFields;   // Input receptive fields
+  ValueListType m_InputNodataValues;      // Input no-data values
+  BoolListType  m_InputUseNodata;         // Input no-data used
+  StringList    m_OutputTensors;          // Output tensors names
+  SizeListType  m_OutputExpressionFields; // Output expression fields
+  DictType      m_UserPlaceholders;       // User placeholders
+  StringList    m_TargetNodesNames;       // User nodes target
   // Internal, read-only
   DataTypeListType     m_InputConstantsDataTypes; // Input constants datatype
diff --git a/include/otbTensorflowMultisourceModelBase.hxx b/include/otbTensorflowMultisourceModelBase.hxx
index ba4612627381d01991dbd5c5d050e8c197c1f851..d9d6d7c69db553a8439fcf3a3939a1b910a62b93 100644
--- a/include/otbTensorflowMultisourceModelBase.hxx
+++ b/include/otbTensorflowMultisourceModelBase.hxx
@@ -55,13 +55,18 @@ TensorflowMultisourceModelBase<TInputImage, TOutputImage>::GetSignatureDef()
 template <class TInputImage, class TOutputImage>
-TensorflowMultisourceModelBase<TInputImage, TOutputImage>::PushBackInputTensorBundle(std::string      placeholder,
-                                                                                     SizeType         receptiveField,
-                                                                                     ImagePointerType image)
+TensorflowMultisourceModelBase<TInputImage, TOutputImage>::PushBackInputTensorBundle(
+  std::string       placeholder,
+  SizeType          receptiveField,
+  ImagePointerType  image,
+  bool              useNodata,
+  InternalPixelType nodataValue)
+  m_InputUseNodata.push_back(useNodata);
+  m_InputNodataValues.push_back(nodataValue);
 template <class TInputImage, class TOutputImage>
@@ -96,10 +101,9 @@ TensorflowMultisourceModelBase<TInputImage, TOutputImage>::GenerateDebugReport(D
   return debugReport;
 template <class TInputImage, class TOutputImage>
-TensorflowMultisourceModelBase<TInputImage, TOutputImage>::RunSession(DictType & inputs, TensorListType & outputs)
+TensorflowMultisourceModelBase<TInputImage, TOutputImage>::RunSession(DictType & inputs, TensorListType & outputs, bool & nodata)
   // Run the TF session here
@@ -119,10 +123,28 @@ TensorflowMultisourceModelBase<TInputImage, TOutputImage>::RunSession(DictType &
   // Add input tensors
+  // During this step we also check for nodata values
+  nodata = false;
   k = 0;
   for (auto & dict : inputs)
-    inputs_new.emplace_back(m_InputLayers[k], dict.second);
+    auto inputTensor = dict.second;
+    inputs_new.emplace_back(m_InputLayers[k], inputTensor);
+    if (m_InputUseNodata[k] == true)
+    {
+      const auto nodataValue = m_InputNodataValues[k];
+      const tensorflow::int64 nElmT = inputTensor.NumElements();
+      tensorflow::int64 ndCount = 0;
+      auto array = inputTensor.flat<InternalPixelType>();
+      for (tensorflow::int64 i = 0 ; i < nElmT ; i++)
+        if (array(i) == nodataValue)
+          ndCount++;
+      if (ndCount == nElmT)
+      {
+        nodata = true;
+        return;
+      }
+    }
     k += 1;
@@ -140,11 +162,19 @@ TensorflowMultisourceModelBase<TInputImage, TOutputImage>::RunSession(DictType &
                       << "Tensorflow error message:\n"
                       << status.ToString()
                       << "\n"
-                         "OTB Filter debug message:\n"
+                        "OTB Filter debug message:\n"
                       << debugReport.str());
+template <class TInputImage, class TOutputImage>
+TensorflowMultisourceModelBase<TInputImage, TOutputImage>::RunSession(DictType & inputs, TensorListType & outputs)
+  bool nodata;
+  this->RunSession(inputs, outputs, nodata);
 template <class TInputImage, class TOutputImage>
 TensorflowMultisourceModelBase<TInputImage, TOutputImage>::GenerateOutputInformation()
@@ -162,6 +192,18 @@ TensorflowMultisourceModelBase<TInputImage, TOutputImage>::GenerateOutputInforma
                       << " and the number of input tensors names is " << m_InputPlaceholders.size());
+  // Check that no-data values size is consistent with the inputs
+  // If no value is specified, set a vector of the same size as the inputs
+  if (m_InputNodataValues.size() == 0 && m_InputUseNodata.size() == 0)
+  {
+    m_InputUseNodata = BoolListType(nbInputs, false);
+    m_InputNodataValues = ValueListType(nbInputs, 0.0);
+  }
+  if (nbInputs != m_InputNodataValues.size() || nbInputs != m_InputUseNodata.size())
+  {
+    itkExceptionMacro("Number of input images is " << nbInputs << " but the number of no-data values is not consistent");
+  }
   //                               Get tensors information
diff --git a/include/otbTensorflowMultisourceModelFilter.h b/include/otbTensorflowMultisourceModelFilter.h
index bdf9a02d0b00e9dbc228f3a0401ac8dab4c49e32..ce855b2858605187d09a75eb8c93f9eaa9f841b1 100644
--- a/include/otbTensorflowMultisourceModelFilter.h
+++ b/include/otbTensorflowMultisourceModelFilter.h
@@ -132,6 +132,8 @@ public:
   itkGetMacro(FullyConvolutional, bool);
   itkSetMacro(OutputSpacingScale, float);
   itkGetMacro(OutputSpacingScale, float);
+  itkSetMacro(OutputBackgroundValue, OutputInternalPixelType);
+  itkGetMacro(OutputBackgroundValue, OutputInternalPixelType);
@@ -162,17 +164,18 @@ private:
   operator=(const Self &); // purposely not implemented
-  SizeType m_OutputGridSize;      // Output grid size
-  bool     m_ForceOutputGridSize; // Force output grid size
-  bool     m_FullyConvolutional;  // Convolution mode
-  float    m_OutputSpacingScale;  // scaling of the output spacings
+  SizeType                m_OutputGridSize;         // Output grid size
+  bool                    m_ForceOutputGridSize;    // Force output grid size
+  bool                    m_FullyConvolutional;     // Convolution mode
+  float                   m_OutputSpacingScale;     // scaling of the output spacings
+  OutputInternalPixelType m_OutputBackgroundValue;  // Output background value
   // Internal
-  SpacingType m_OutputSpacing; // Output image spacing
-  PointType   m_OutputOrigin;  // Output image origin
-  SizeType    m_OutputSize;    // Output image size
-  PixelType   m_NullPixel;     // Pixel filled with zeros
+  SpacingType             m_OutputSpacing;          // Output image spacing
+  PointType               m_OutputOrigin;           // Output image origin
+  SizeType                m_OutputSize;             // Output image size
+  PixelType               m_NullPixel;              // Pixel filled with zeros
 }; // end class
diff --git a/include/otbTensorflowMultisourceModelFilter.hxx b/include/otbTensorflowMultisourceModelFilter.hxx
index 3cbb53d92857466d617e5547940c8e42a0ce971e..0f32334ce18c3ef4ad25e8a30ab9a5e614973728 100644
--- a/include/otbTensorflowMultisourceModelFilter.hxx
+++ b/include/otbTensorflowMultisourceModelFilter.hxx
@@ -302,7 +302,7 @@ TensorflowMultisourceModelFilter<TInputImage, TOutputImage>::GenerateOutputInfor
   // Set null pixel
-  m_NullPixel.Fill(0);
+  m_NullPixel.Fill(m_OutputBackgroundValue);
   //                        Set the tiling layout hint in metadata
@@ -470,31 +470,35 @@ TensorflowMultisourceModelFilter<TInputImage, TOutputImage>::GenerateData()
   // Run session
   // TODO: see if we print some info about inputs/outputs of the model e.g. m_OutputTensors
   TensorListType outputs;
-  this->RunSession(inputs, outputs);
+  bool nodata;
+  this->RunSession(inputs, outputs, nodata);
   // Fill the output buffer with zero value
-  // Get output tensors
-  int bandOffset = 0;
-  for (unsigned int i = 0; i < outputs.size(); i++)
+  if (nodata == false)
-    // The offset (i.e. the starting index of the channel for the output tensor) is updated
-    // during this call
-    // TODO: implement a generic strategy enabling expression field copy in patch-based mode (see
-    // tf::CopyTensorToImageRegion)
-    try
+    // Get output tensors
+    int bandOffset = 0;
+    for (unsigned int i = 0; i < outputs.size(); i++)
-      tf::CopyTensorToImageRegion<TOutputImage>(
-        outputs[i], outputAlignedReqRegion, outputPtr, outputReqRegion, bandOffset);
-    }
-    catch (itk::ExceptionObject & err)
-    {
-      std::stringstream debugMsg = this->GenerateDebugReport(inputs);
-      itkExceptionMacro("Error occurred during tensor to image conversion.\n"
-                        << "Context: " << debugMsg.str() << "Error:" << err);
+      // The offset (i.e. the starting index of the channel for the output tensor) is updated
+      // during this call
+      // TODO: implement a generic strategy enabling expression field copy in patch-based mode (see
+      // tf::CopyTensorToImageRegion)
+      try
+      {
+        tf::CopyTensorToImageRegion<TOutputImage>(
+          outputs[i], outputAlignedReqRegion, outputPtr, outputReqRegion, bandOffset);
+      }
+      catch (itk::ExceptionObject & err)
+      {
+        std::stringstream debugMsg = this->GenerateDebugReport(inputs);
+        itkExceptionMacro("Error occurred during tensor to image conversion.\n"
+                          << "Context: " << debugMsg.str() << "Error:" << err);
+      }
diff --git a/setup.py b/setup.py
index 958be96a107e5d9a145e5b0ad5b47776820f0204..1feeff9c270b86fa03cb7c43ca00956cac4de97f 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
-    version="4.0.0",
+    version="4.1.0",
     author="Remi Cresson",
     description="OTBTF: Orfeo ToolBox meets TensorFlow",
diff --git a/test/data/nd_out.tif b/test/data/nd_out.tif
new file mode 100644
index 0000000000000000000000000000000000000000..4997afd6842ca1613b530316d797d790647ea2ba
--- /dev/null
+++ b/test/data/nd_out.tif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7c74cf9da7d187390078e554098350b33307b06ce6738b7b635fb068d78b84
+size 1411
diff --git a/test/nodata_test.py b/test/nodata_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3892153401d26f000b82da59547dcbc8c890ea7
--- /dev/null
+++ b/test/nodata_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import otbApplication
+import pytest
+import tensorflow as tf
+import unittest
+import otbtf
+from test_utils import resolve_paths, compare
+class NodataInferenceTest(unittest.TestCase):
+    def test_infersimple(self):
+        """
+        In this test, we create a synthetic image:
+            f(x, y) = x * y if x > y else 0
+        Then we use an input no-data value (`source1.nodata 0`) and a
+        background value for the output (`output.bv 1024`).
+        We use the l2_norm SavedModel, forcing otbtf to use a tiling scheme
+        of 4x4. If the test succeeds, the output pixels in 4x4 areas where
+        there is at least one no-data pixel (i.e. 0), should be filled with
+        the `bv` value (i.e. 1024).
+        """
+        sm_dir = resolve_paths("$TMPDIR/l2_norm_savedmodel")
+        # Create model
+        x = tf.keras.Input(shape=[None, None, None], name="x")
+        y = tf.norm(x, axis=-1)
+        model = tf.keras.Model(inputs={"x": x}, outputs={"y": y})
+        model.save(sm_dir)
+        # Input image: f(x, y) = x * y if x > y else 0
+        bmx = otbApplication.Registry.CreateApplication("BandMathX")
+        bmx.SetParameterString("exp", "{idxX>idxY?idxX*idxY:0}")
+        bmx.SetParameterStringList(
+            "il", [resolve_paths("$DATADIR/xs_subset.tif")]
+        )
+        bmx.Execute()
+        infer = otbApplication.Registry.CreateApplication(
+            "TensorflowModelServe"
+        )
+        infer.SetParameterString("model.dir", sm_dir)
+        infer.SetParameterString("model.fullyconv", "on")
+        infer.AddImageToParameterInputImageList(
+            "source1.il", bmx.GetParameterOutputImage("out")
+        )
+        infer.SetParameterFloat("source1.nodata", 0.0)
+        for param in [
+            "source1.rfieldx",
+            "source1.rfieldy",
+            "output.efieldx",
+            "output.efieldy",
+            "optim.tilesizex",
+            "optim.tilesizey",
+        ]:
+            infer.SetParameterInt(param, 4)
+        infer.SetParameterFloat("output.bv", 1024)
+        infer.SetParameterString("out", resolve_paths("$TMPDIR/nd_out.tif"))
+        infer.ExecuteAndWriteOutput()
+        self.assertTrue(
+            compare(
+                raster1=resolve_paths("$TMPDIR/nd_out.tif"),
+                raster2=resolve_paths("$DATADIR/nd_out.tif"),
+            )
+        )
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/docker/build-env-tf.sh b/tools/docker/build-env-tf.sh
index ff5c569210f66540df2872f73db0facd756dfd6e..4eb7c7ff56e4ba0e998715f4fe3ffb3314098845 100644
--- a/tools/docker/build-env-tf.sh
+++ b/tools/docker/build-env-tf.sh
@@ -34,6 +34,16 @@ export TF_NEED_ROCM=0
 export TF_NEED_CUDA=0
 export CUDA_TOOLKIT_PATH=$(find /usr/local -maxdepth 1 -type d -name 'cuda-*')
 if  [ ! -z $CUDA_TOOLKIT_PATH ] ; then
+    if [ ! -z $TENSORRT ]; then
+        echo "Building tensorflow with TensorRT support"
+        apt install \
+            libnvinfer8=$TENSORRT \
+            libnvinfer-dev=$TENSORRT \
+            libnvinfer-plugin8=$TENSORRT \
+            libnvinfer-plugin-dev=$TENSORRT
+        export TF_TENSORRT_VERSION=$(cat $(find /usr/ -type f -name NvInferVersion.h) | grep '#define NV_TENSORRT_MAJOR' | cut -f3 -d' ')
+        export TF_NEED_TENSORRT=1
+    fi
     export TF_CUDA_VERSION=$(echo $CUDA_TOOLKIT_PATH | sed -r 's/.*\/cuda-(.*)/\1/')
     export TF_CUDA_COMPUTE_CAPABILITIES="5.2,6.1,7.0,7.5,8.6"
@@ -41,6 +51,6 @@ if  [ ! -z $CUDA_TOOLKIT_PATH ] ; then
     export TF_CUDA_CLANG=0
     export TF_NEED_TENSORRT=0
     export CUDNN_INSTALL_PATH="/usr/"
-    export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn.h)
+    export TF_CUDNN_VERSION=$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn_version.h)
     export TF_NCCL_VERSION=2