NeurodataWithoutBorders
diff --git a/‎+io/+config/+internal/applyCustomMatNWBPropertyNames.m
Lines changed: 136 additions & 0 deletions b/‎+io/+config/+internal/applyCustomMatNWBPropertyNames.m
Lines changed: 136 additions & 0 deletions
diff --git a/‎+io/+config/+internal/computeChunkSizeFromConfig.m
Lines changed: 118 additions & 0 deletions b/‎+io/+config/+internal/computeChunkSizeFromConfig.m
Lines changed: 118 additions & 0 deletions
diff --git a/‎+io/+config/+internal/configureDataPipeFromData.m
Lines changed: 56 additions & 0 deletions b/‎+io/+config/+internal/configureDataPipeFromData.m
Lines changed: 56 additions & 0 deletions
diff --git a/‎+io/+config/+internal/flipChunkDimensions.m
Lines changed: 40 additions & 0 deletions b/‎+io/+config/+internal/flipChunkDimensions.m
Lines changed: 40 additions & 0 deletions
diff --git a/‎+io/+config/+internal/getDataByteSize.m
Lines changed: 7 additions & 0 deletions b/‎+io/+config/+internal/getDataByteSize.m
Lines changed: 7 additions & 0 deletions
@@ -0,0 +1,136 @@
+function datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+% applyCustomMatNWBPropertyNames - Processes a dataset configuration structure to apply custom MatNWB property names.
+%
+%   datasetConfiguration = applyCustomMatNWBPropertyNames(datasetConfiguration)
+%
+%   This function iterates through each field of the input structure and checks 
+%   if the field corresponds to a known NWB type (using a mapping from short 
+%   names to fully qualified class names). For each recognized field:
+%
+%      - It retrieves the full class name and determines its superclasses.
+%      - If the class is a subclass of "types.untyped.MetaClass":
+%           * If it is also a "types.untyped.GroupClass", the function recursively
+%             processes the subgroup configuration.
+%           * If it is a "types.untyped.DatasetClass", it wraps the existing 
+%             configuration in a structure with a "data" property.
+%      - If the field is not associated with a recognized NWB type, it remains 
+%        unchanged.
+%
+%   Input:
+%       datasetConfiguration - A 1x1 struct containing dataset configuration 
+%           data.
+%
+%   Output:
+%       datasetConfiguration - The updated configuration structure with custom 
+%           property names.
+
+    arguments
+        datasetConfiguration (1,1) struct
+    end
+    
+    classNameMap = getNwbTypesClassnameMap();
+
+    fields = fieldnames(datasetConfiguration);
+
+    for i = 1:numel(fields)
+        
+        thisField = fields{i};
+
+        % Split of last part if the field name is "nested"
+        if contains(thisField, '_')
+            shortName = extractAfter(thisField, '_');
+        else
+            shortName = thisField;
+        end
+
+        if ~isKey(classNameMap, shortName)
+            continue % Not a neurodata / nwb type
+        end
+        
+        fullClassName = classNameMap(shortName);
+        superclassNames = superclasses(fullClassName);
+
+        if any(strcmp(superclassNames, "types.untyped.MetaClass"))
+            thisSubConfig = datasetConfiguration.(thisField);
+            if any(strcmp(superclassNames, "types.untyped.GroupClass"))
+                % Todo: Remove this? Nested specs are currently not supported.
+            elseif any(strcmp(superclassNames, "types.untyped.DatasetClass"))
+                % Rename the field to include the _data suffix
+                newFieldName = sprintf('%s_data', thisField);
+                datasetConfiguration.(newFieldName) = thisSubConfig;
+                datasetConfiguration = rmfield(datasetConfiguration, thisField);
+            end
+        else
+            % For non-NWB types, leave the field unmodified.
+        end
+    end
+end
+
+function ancestorPath = getAncestorPath(initialPath, numSteps)
+% getAncestorPath - Get an ancestor directory path.
+%
+%   ancestorPath = GETANCESTORPATH(initialPath, numSteps)
+%
+%   Input:
+%       initialPath - A string representing the starting file or directory path.
+%       numSteps    - A positive integer indicating the number of directory 
+%                     levels to move up.
+%
+%   Output:
+%       ancestorPath - A string representing the ancestor directory path.
+
+    arguments
+        initialPath (1,1) string
+        numSteps (1,1) double
+    end
+    splitPath = split(initialPath, filesep);
+    
+    ancestorPath = fullfile(splitPath{1:end-numSteps}); % char output
+
+    % Ensure the path starts with a file separator on Unix systems.
+    if isunix && ~startsWith(ancestorPath, filesep)
+        ancestorPath = [filesep ancestorPath];
+    end
+end
+
+function map = getNwbTypesClassnameMap()
+% getNwbTypesClassnameMap - Constructs a mapping between NWB type short names 
+% and their fully qualified class names.
+%
+%   map = GETNWBTYPESCLASSNAMEMAP()
+%
+%   The function locates the directory containing NWB type definitions 
+%   (using the location of 'types.core.NWBFile' as a reference) and searches 
+%   recursively for all MATLAB class definition files (*.m). It then filters 
+%   out files in the '+types/+untyped' and '+types/+util' folders.
+%
+%   Output:
+%       map - A mapping object (either a dictionary or containers.Map) where:
+%             * Keys   : Short class names (derived from file names without the .m extension).
+%             * Values : Fully qualified class names in the format "types.namespace.ClassName".
+
+    typesClassDirectory = getAncestorPath( which('types.core.NWBFile'), 2 );
+    
+    % Find all MATLAB class files recursively within the directory.
+    L = dir(fullfile(typesClassDirectory, '**', '*.m'));
+    
+    % Exclude files from the '+types/+untyped' and '+types/+util' directories.
+    ignore = contains({L.folder}, fullfile('+types', '+untyped')) | ...
+                contains({L.folder}, fullfile('+types', '+util'));
+    L(ignore) = [];
+
+    % Extract namespace and class names from the file paths.
+    [~, namespaceNames] = fileparts({L.folder});
+    namespaceNames = string( strrep(namespaceNames, '+', '') );
+    classNames = string( strrep( {L.name}, '.m', '') );
+
+    % Compose fully qualified class names using the namespace and class name.
+    fullClassNames = matnwb.common.composeFullClassName(namespaceNames, classNames);
+
+    % Create a mapping from the short class names to the fully qualified class names.
+    try
+        map = dictionary(classNames, fullClassNames);
+    catch % Fallback for older versions of MATLAB.
+        map = containers.Map(classNames, fullClassNames);
+    end
+end
@@ -0,0 +1,118 @@
+function chunkSize = computeChunkSizeFromConfig(A, configuration)
+% computeChunkSizeFromConfig - Compute the chunk size for a dataset using the provided configuration.
+%   This function determines the chunk size for a dataset based on the chunk
+%   constraints/strategies provided in the configuration structure. It adjusts
+%   dimensions according to rules: 'max' uses the dataset size, fixed numbers 
+%   use their value, and 'flex' calculates the dimension size to approximate the 
+%   target chunk size in bytes.
+%
+%   Inputs:
+%       A - A numeric dataset whose chunk size is to be computed.
+%       configuration (1,1) struct - Struct defining chunking strategy for
+%       different ranks of a dataset. 
+%
+%   Output:
+%       chunkSize - A vector specifying the chunk size for each dimension of A.
+
+    arguments
+        A {mustBeNumeric}
+        configuration (1,1) struct ...
+            {matnwb.common.mustHaveField(configuration, "strategy_by_rank", "target_chunk_size", "target_chunk_size_unit")}
+    end
+
+    % Get dataset size
+    dataSize = size(A);
+    numDimensions = numel(dataSize);
+
+    % NWB / H5 supports true 1D vectors. If the data is a vector, represent
+    % dataSize as a scalar for computation of chunkSize.
+    if numDimensions == 2 && any(dataSize==1)
+        numDimensions = 1;
+        originalDataSize = dataSize;
+        dataSize(dataSize==1) = [];
+    end
+
+    % Retrieve constraints for current rank.
+    strategy = configuration.strategy_by_rank;
+    rankFieldName = sprintf('x%d', numDimensions); % Adjust for quirk in MATLAB where fieldname of numeric value is prepended with "x" when reading from json 
+    if ~isfield(strategy, rankFieldName)
+        error('NWB:ComputeChunkSizeFromConfig:MatchingRankNotFound', ...
+              'Configuration for %d dimensions is missing.', numDimensions)
+    end
+    constraints = strategy.(rankFieldName);
+    assert(iscell(constraints), ...
+        'Expected constraints for dimensions to be provided as a cell array, got %s.', class(constraints))
+
+    % Determine the target number of array elements per chunk.
+    targetChunkSizeBytes = io.config.internal.getTargetChunkSizeInBytes(configuration);
+    elementSizeBytes = io.config.internal.getDataByteSize(A) / numel(A); % bytes per element
+    targetNumElements = targetChunkSizeBytes / elementSizeBytes; % Per chunk
+
+    % Preallocate arrays.
+    chunkSize = zeros(1, numDimensions);
+    isFlexDim = false(1, numDimensions);
+
+    isFlex = @(x) ischar(x) && strcmp(x, 'flex');
+    isMax = @(x) ischar(x) && strcmp(x, 'max');
+
+    % Calculate chunk size for each dimension
+    for dim = 1:numDimensions
+        if dim > numel(constraints)
+            % Use full size for dimensions beyond the specification
+            chunkSize(dim) = dataSize(dim);
+        else
+            thisDimensionConstraint = constraints{dim};
+            if isFlex(thisDimensionConstraint)
+                isFlexDim(dim) = true;
+                % Leave chunkSize(dim) to be determined.
+            elseif isMax(thisDimensionConstraint)
+                chunkSize(dim) = dataSize(dim);
+            elseif isnumeric(thisDimensionConstraint)
+                chunkSize(dim) = min([thisDimensionConstraint, dataSize(dim)]);
+                % thisDimensionConstraint is upper bound
+            else
+                error('NWB:ComputeChunkSizeFromConfig:InvalidConstraint', ...
+                    'Invalid chunk constraint for dimension %d.', dim);
+            end
+        end
+    end
+
+    % Compute the product of fixed dimensions (number of elements per chunk).
+    if any(~isFlexDim)
+        fixedProduct = prod(chunkSize(~isFlexDim));
+    else
+        fixedProduct = 1;
+    end
+
+    % For flex dimensions, compute the remaining number of elements
+    % and allocate them equally in the exponent space.
+    nFlex = sum(isFlexDim);
+    if nFlex > 0
+        remainingElements = targetNumElements / fixedProduct;
+        % Ensure remainingElements is at least 1.
+        remainingElements = max(remainingElements, 1);
+        % Compute an equal allocation factor for each flex dimension.
+        elementsPerFlexDimension = nthroot(remainingElements, nFlex);
+        % Assign computed chunk size for each flex dimension.
+        for dim = find(isFlexDim)
+            proposedSize = max(1, round(elementsPerFlexDimension));
+            % Do not exceed the full dimension size.
+            chunkSize(dim) = min(proposedSize, dataSize(dim));
+        end
+    end
+
+    % Ensure chunk size does not exceed dataset size in any dimension
+    chunkSize = min(chunkSize, dataSize);
+
+    if numDimensions == 1
+        originalDataSize(originalDataSize~=1) = chunkSize;
+        chunkSize = originalDataSize;
+    end
+
+    actualBytesPerChunk = prod(chunkSize) * elementSizeBytes;
+    if actualBytesPerChunk > targetChunkSizeBytes
+        warning('NWB:ComputeChunkSizeFromConfig:TargetSizeExceeded', ...
+            ['The provided dataset configuration produces chunks that have a ', ...
+            'larger bytesize than the specified target chunk size.'])
+    end
+end
@@ -0,0 +1,56 @@
+function dataPipe = configureDataPipeFromData(numericData, datasetConfig)
+% configureDataPipeFromData - Configure a DataPipe from numeric data and dataset configuration
+    
+    import io.config.internal.computeChunkSizeFromConfig
+    import types.untyped.datapipe.properties.DynamicFilter
+
+    chunkSize = computeChunkSizeFromConfig(numericData, datasetConfig.chunking);
+    maxSize = size(numericData);
+
+    dataPipeArgs = {...
+        "data", numericData, ...
+        "maxSize", maxSize, ...
+        "chunkSize", chunkSize };
+
+    hasShuffle = ~isempty(datasetConfig.compression.prefilters)...
+                 && contains(datasetConfig.compression.prefilters, 'shuffle');
+
+    % Check if the configured compression method is DEFLATE (gzip)
+    if strcmpi(datasetConfig.compression.method, "deflate") ...
+            || strcmpi(datasetConfig.compression.method, "gzip")
+        if isempty(datasetConfig.compression.parameters) ...
+                || ~isfield(datasetConfig.compression.parameters, 'level')
+            defaultCompressionLevel = 3;
+            warning('NWB:DataPipeConfiguration:LevelParameterNotSet', ...
+                ['The dataset configuration does not contain a value for ', ...
+                'the "level" parameter of the Deflate filter. The default ', ...
+                'value %d will be used.'], defaultCompressionLevel)
+            compressionLevel = defaultCompressionLevel;
+        else
+            compressionLevel = datasetConfig.compression.parameters.level;
+        end
+        % Use standard compression filters
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'hasShuffle', hasShuffle, ...
+            'compressionLevel', compressionLevel} ...
+            ];
+    else
+        % Create property list of custom filters for dataset creation
+        parameters = struct2cell(datasetConfig.compression.parameters);
+        compressionFilter = DynamicFilter( ...
+            datasetConfig.compression.method, ...
+            parameters{:} );
+        
+        if hasShuffle
+            shuffleFilter = types.untyped.datapipe.properties.Shuffle();
+            filters = [shuffleFilter compressionFilter];
+        else
+            filters = compressionFilter;
+        end
+        dataPipeArgs = [ dataPipeArgs, ...
+            {'filters', filters} ];
+    end
+
+    % Create the datapipe.
+    dataPipe = types.untyped.DataPipe( dataPipeArgs{:} );
+end
@@ -0,0 +1,40 @@
+function configuration = flipChunkDimensions(configuration)
+%FLIPCHUNKDIMENSIONS Reverses (flips left-right) the chunk dimension arrays 
+%   in a structure.
+%
+%   configuration = flipChunkDimensions(configuration) locates the 
+%   strategy_by_rank substructure in a configuration structure and flips the 
+%   array for each rank field.
+%
+%   This is needed because MatNWB dimensions are flipped upon export to
+%   hdf5 files and the specification is defined based on the dimension
+%   ordering in NWB schemas / hdf5
+
+    if isstruct(configuration)
+        fields = fieldnames(configuration);
+        for i = 1:length(fields)
+            fieldName = fields{i};
+            if strcmp(fieldName, 'strategy_by_rank')
+                % Process the chunk_dimensions field
+                configuration.(fieldName) = ...
+                    processChunkDimensions(configuration.(fieldName));
+            else
+                % Otherwise, recursively process the field
+                configuration.(fieldName) = ...
+                    io.config.internal.flipChunkDimensions(configuration.(fieldName));
+            end
+        end
+    else
+        % Pass
+    end
+end
+
+function cd = processChunkDimensions(cd)
+    % Process the chunk_dimensions field.
+    rankFieldNames = fieldnames(cd);
+
+    for i = 1:numel(rankFieldNames)
+        thisRank = rankFieldNames{i};
+        cd.(thisRank) = flipud(cd.(thisRank));
+    end
+end
@@ -0,0 +1,7 @@
+function byteSize = getDataByteSize(data)
+% getDataByteSize - Get bytesize of a numeric array
+    dataType = class(data);
+    bytesPerDataPoint = io.getMatTypeSize(dataType);
+
+    byteSize = numel(data) .* bytesPerDataPoint;
+end