001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.List;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties, datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 * <p>
029 * For more details on dataset,
030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
031 * <p>
032 *
033 * @see hdf.object.ScalarDS
034 * @see hdf.object.CompoundDS
035 *
036 * @version 1.1 9/4/2007
037 * @author Peter X. Cao
038 */
039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat {
040    private static final long serialVersionUID    = -3360885430038261178L;
041
042    private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
043
044    /**
045     * The memory buffer that holds the raw data array of the dataset.
046     */
047    protected transient Object          data;
048
049    /**
050     * The number of dimensions of the dataset.
051     */
052    protected int             rank;
053
054    /**
055     * The current dimension sizes of the dataset
056     */
057    protected long[]          dims;
058
059    /**
060     * The max dimension sizes of the dataset
061     */
062    protected long[]          maxDims;
063
064    /**
065     * Array that contains the number of data points selected (for read/write)
066     * in each dimension.
067     * <p>
068     * The selected size must be less than or equal to the current dimension size.
069     * A subset of a rectangle selection is defined by the starting position and
070     * selected sizes.
071     * <p>
072     * For example, if a 4 X 5 dataset is as follows:
073     *
074     * <pre>
075     *     0,  1,  2,  3,  4
076     *    10, 11, 12, 13, 14
077     *    20, 21, 22, 23, 24
078     *    30, 31, 32, 33, 34
079     * long[] dims = {4, 5};
080     * long[] startDims = {1, 2};
081     * long[] selectedDims = {3, 3};
082     * then the following subset is selected by the startDims and selectedDims above:
083     *     12, 13, 14
084     *     22, 23, 24
085     *     32, 33, 34
086     * </pre>
087     */
088    protected long[]          selectedDims;
089
090    /**
091     * The starting position of each dimension of a selected subset. With both
092     * the starting position and selected sizes, the subset of a rectangle
093     * selection is fully defined.
094     */
095    protected long[]          startDims;
096
097    /**
098     * Array that contains the indices of the dimensions selected for display.
099     * <p>
100     * <B>selectedIndex[] is provided for two purposes:</B>
101     * <OL>
102     * <LI>
103     * selectedIndex[] is used to indicate the order of dimensions for display,
104     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
105     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
106     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
107     * dim[2] is selected as column index and dim[3] is selected as depth index.
108     * <LI>
109     * selectedIndex[] is also used to select dimensions for display for
110     * datasets with three or more dimensions. We assume that applications such
111     * as HDFView can only display data up to three dimensions (a 2D
112     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
113     * cut from). For datasets with more than three dimensions, we need
114     * selectedIndex[] to store which three dimensions are chosen for display.
115     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
116     * then dim[1] is selected as row index, dim[2] is selected as column index
117     * and dim[3] is selected as depth index. dim[0] is not selected. Its
118     * location is fixed at 0 by default.
119     * </OL>
120     */
121    protected final int[]     selectedIndex;
122
123    /**
124     * The number of elements to move from the start location in each dimension.
125     * For example, if selectedStride[0] = 2, every other data point is selected
126     * along dim[0].
127     */
128    protected long[]          selectedStride;
129
130    /**
131     * The array of dimension sizes for a chunk.
132     */
133    protected long[]          chunkSize;
134
135    /** The compression information. */
136    protected StringBuilder   compression;
137    public static final String COMPRESSION_GZIP_TXT = "GZIP: level = ";
138
139    /** The filters information. */
140    protected StringBuilder   filters;
141
142    /** The storage layout information. */
143    protected StringBuilder   storageLayout;
144
145    /** The storage information. */
146    protected StringBuilder   storage;
147
148    /** The datatype object of the dataset. */
149    protected Datatype        datatype;
150
151    /**
152     * Array of strings that represent the dimension names. It is null if dimension names do not exist.
153     */
154    protected String[]        dimNames;
155
156    /** Flag to indicate if the byte[] array is converted to strings */
157    protected boolean         convertByteToString = true;
158
159    /** Flag to indicate if data values are loaded into memory. */
160    protected boolean         isDataLoaded        = false;
161
162    /** Flag to indicate if this dataset has been initialized */
163    protected boolean         inited = false;
164
165    /** The number of data points in the memory buffer. */
166    protected long            nPoints             = 1;
167
168    /**
169     * The data buffer that contains the raw data directly reading from file
170     * (before any data conversion).
171     */
172    protected transient Object originalBuf = null;
173
174    /**
175     * The array that holds the converted data of unsigned C-type integers.
176     * <p>
177     * For example, Suppose that the original data is an array of unsigned
178     * 16-bit short integers. Since Java does not support unsigned integer, the
179     * data is converted to an array of 32-bit singed integer. In that case, the
180     * converted buffer is the array of 32-bit singed integer.
181     */
182    protected transient Object convertedBuf = null;
183
184    /**
185     * Constructs a Dataset object with a given file, name and path.
186     *
187     * @param theFile
188     *            the file that contains the dataset.
189     * @param dsName
190     *            the name of the Dataset, e.g. "dset1".
191     * @param dsPath
192     *            the full group path of this Dataset, e.g. "/arrays/".
193     */
194    public Dataset(FileFormat theFile, String dsName, String dsPath) {
195        this(theFile, dsName, dsPath, null);
196    }
197
198    /**
199     * @deprecated Not for public use in the future. <br>
200     *             Using {@link #Dataset(FileFormat, String, String)}
201     *
202     * @param theFile
203     *            the file that contains the dataset.
204     * @param dsName
205     *            the name of the Dataset, e.g. "dset1".
206     * @param dsPath
207     *            the full group path of this Dataset, e.g. "/arrays/".
208     * @param oid
209     *            the oid of this Dataset.
210     */
211    @Deprecated
212    public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) {
213        super(theFile, dsName, dsPath, oid);
214        log.trace("Dataset: start {}", dsName);
215
216        datatype = null;
217        rank = -1;
218        data = null;
219        dims = null;
220        maxDims = null;
221        selectedDims = null;
222        startDims = null;
223        selectedStride = null;
224        chunkSize = null;
225        compression = new StringBuilder("NONE");
226        filters = new StringBuilder("NONE");
227        storageLayout = new StringBuilder("NONE");
228        storage = new StringBuilder("NONE");
229        dimNames = null;
230
231        selectedIndex = new int[3];
232        selectedIndex[0] = 0;
233        selectedIndex[1] = 1;
234        selectedIndex[2] = 2;
235    }
236
237    /**
238     * Clears memory held by the dataset, such as the data buffer.
239     */
240    @SuppressWarnings("rawtypes")
241    public void clear() {
242        if (data != null) {
243            if (data instanceof List) {
244                ((List) data).clear();
245            }
246            data = null;
247            originalBuf = null;
248            convertedBuf = null;
249        }
250        isDataLoaded = false;
251    }
252
253    /**
254     * Returns the rank (number of dimensions) of the dataset.
255     *
256     * @return the number of dimensions of the dataset.
257     */
258    @Override
259    public final int getRank() {
260        if (!inited)
261            init();
262
263        return rank;
264    }
265
266    /**
267     * Returns the array that contains the dimension sizes of the dataset.
268     *
269     * @return the dimension sizes of the dataset.
270     */
271    @Override
272    public final long[] getDims() {
273        if (!inited)
274            init();
275
276        return dims;
277    }
278
279    /**
280     * Returns the array that contains the max dimension sizes of the dataset.
281     *
282     * @return the max dimension sizes of the dataset.
283     */
284    public final long[] getMaxDims() {
285        if (!inited) init();
286
287        if (maxDims == null) return dims;
288
289        return maxDims;
290    }
291
292    /**
293     * Returns the dimension sizes of the selected subset.
294     * <p>
295     * The SelectedDims is the number of data points of the selected subset.
296     * Applications can use this array to change the size of selected subset.
297     *
298     * The selected size must be less than or equal to the current dimension size.
299     * Combined with the starting position, selected sizes and stride, the
300     * subset of a rectangle selection is fully defined.
301     * <p>
302     * For example, if a 4 X 5 dataset is as follows:
303     *
304     * <pre>
305     *     0,  1,  2,  3,  4
306     *    10, 11, 12, 13, 14
307     *    20, 21, 22, 23, 24
308     *    30, 31, 32, 33, 34
309     * long[] dims = {4, 5};
310     * long[] startDims = {1, 2};
311     * long[] selectedDims = {3, 3};
312     * long[] selectedStride = {1, 1};
313     * then the following subset is selected by the startDims and selectedDims
314     *     12, 13, 14
315     *     22, 23, 24
316     *     32, 33, 34
317     * </pre>
318     *
319     * @return the dimension sizes of the selected subset.
320     */
321    @Override
322    public final long[] getSelectedDims() {
323        if (!inited) init();
324
325        return selectedDims;
326    }
327
328    /**
329     * Returns the starting position of a selected subset.
330     * <p>
331     * Applications can use this array to change the starting position of a
332     * selection. Combined with the selected dimensions, selected sizes and
333     * stride, the subset of a rectangle selection is fully defined.
334     * <p>
335     * For example, if a 4 X 5 dataset is as follows:
336     *
337     * <pre>
338     *     0,  1,  2,  3,  4
339     *    10, 11, 12, 13, 14
340     *    20, 21, 22, 23, 24
341     *    30, 31, 32, 33, 34
342     * long[] dims = {4, 5};
343     * long[] startDims = {1, 2};
344     * long[] selectedDims = {3, 3};
345     * long[] selectedStride = {1, 1};
346     * then the following subset is selected by the startDims and selectedDims
347     *     12, 13, 14
348     *     22, 23, 24
349     *     32, 33, 34
350     * </pre>
351     *
352     * @return the starting position of a selected subset.
353     */
354    @Override
355    public final long[] getStartDims() {
356        if (!inited) init();
357
358        return startDims;
359    }
360
361    /**
362     * Returns the selectedStride of the selected dataset.
363     * <p>
364     * Applications can use this array to change how many elements to move in
365     * each dimension.
366     *
367     * Combined with the starting position and selected sizes, the subset of a
368     * rectangle selection is defined.
369     * <p>
370     * For example, if a 4 X 5 dataset is as follows:
371     *
372     * <pre>
373     *     0,  1,  2,  3,  4
374     *    10, 11, 12, 13, 14
375     *    20, 21, 22, 23, 24
376     *    30, 31, 32, 33, 34
377     * long[] dims = {4, 5};
378     * long[] startDims = {0, 0};
379     * long[] selectedDims = {2, 2};
380     * long[] selectedStride = {2, 3};
381     * then the following subset is selected by the startDims and selectedDims
382     *     0,   3
383     *     20, 23
384     * </pre>
385     *
386     * @return the selectedStride of the selected dataset.
387     */
388    @Override
389    public final long[] getStride() {
390        if (!inited) init();
391
392        if (rank <= 0) {
393            return null;
394        }
395
396        if (selectedStride == null) {
397            selectedStride = new long[rank];
398            for (int i = 0; i < rank; i++) {
399                selectedStride[i] = 1;
400            }
401        }
402
403        return selectedStride;
404    }
405
406    /**
407     * Sets the flag that indicates if a byte array is converted to a string
408     * array.
409     * <p>
410     * In a string dataset, the raw data from file is stored in a byte array. By
411     * default, this byte array is converted to an array of strings. For a large
412     * dataset (e.g. more than one million strings), the conversion takes a long
413     * time and requires a lot of memory space to store the strings. In some
414     * applications, such a conversion can be delayed. For example, A GUI
415     * application may convert only the part of the strings that is visible to the
416     * users, not the entire data array.
417     * <p>
418     * setConvertByteToString(boolean b) allows users to set the flag so that
419     * applications can choose to perform the byte-to-string conversion or not.
420     * If the flag is set to false, the getData() returns an array of byte
421     * instead of an array of strings.
422     *
423     * @param b
424     *            convert bytes to strings if b is true; otherwise, if false, do
425     *            not convert bytes to strings.
426     */
427    public final void setConvertByteToString(boolean b) {
428        convertByteToString = b;
429    }
430
431    /**
432     * Returns the flag that indicates if a byte array is converted to a string
433     * array.
434     *
435     * @return true if byte array is converted to string; otherwise, returns
436     *         false if there is no conversion.
437     */
438    public final boolean getConvertByteToString() {
439        return convertByteToString;
440    }
441
442    /**
443     * Reads the raw data of the dataset from file to a byte array.
444     * <p>
445     * readBytes() reads raw data to an array of bytes instead of array of its
446     * datatype. For example, for a one-dimension 32-bit integer dataset of
447     * size 5, readBytes() returns a byte array of size 20 instead of an
448     * int array of 5.
449     * <p>
450     * readBytes() can be used to copy data from one dataset to another
451     * efficiently because the raw data is not converted to its native type, it
452     * saves memory space and CPU time.
453     *
454     * @return the byte array of the raw data.
455     *
456     * @throws Exception if data can not be read
457     */
458    public abstract byte[] readBytes() throws Exception;
459
460    /**
461     * Writes the memory buffer of this dataset to file.
462     *
463     * @throws Exception if buffer can not be written
464     */
465    @Override
466    public final void write() throws Exception {
467        if (data != null) {
468            write(data);
469        }
470    }
471
472    /**
473     * Creates a new dataset and writes the data buffer to the new dataset.
474     * <p>
475     * This function allows applications to create a new dataset for a given
476     * data buffer. For example, users can select a specific interesting part
477     * from a large image and create a new image with the selection.
478     * <p>
479     * The new dataset retains the datatype and dataset creation properties of
480     * this dataset.
481     *
482     * @param pgroup
483     *            the group which the dataset is copied to.
484     * @param name
485     *            the name of the new dataset.
486     * @param dims
487     *            the dimension sizes of the the new dataset.
488     * @param data
489     *            the data values of the subset to be copied.
490     *
491     * @return the new dataset.
492     *
493     * @throws Exception if dataset can not be copied
494     */
495    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
496
497    @Override
498    public final boolean isInited() {
499        return inited;
500    }
501
502    /**
503     * Returns the data buffer of the dataset in memory.
504     * <p>
505     * If data is already loaded into memory, returns the data; otherwise, calls
506     * read() to read data from file into a memory buffer and returns the memory
507     * buffer.
508     * <p>
509     * By default, the whole dataset is read into memory. Users can also select
510     * a subset to read. Subsetting is done in an implicit way.
511     * <p>
512     * <b>How to Select a Subset</b>
513     * <p>
514     * A selection is specified by three arrays: start, stride and count.
515     * <ol>
516     * <li>start: offset of a selection
517     * <li>stride: determines how many elements to move in each dimension
518     * <li>count: number of elements to select in each dimension
519     * </ol>
520     * getStartDims(), getStride() and getSelectedDims() returns the start,
521     * stride and count arrays respectively. Applications can make a selection
522     * by changing the values of the arrays.
523     * <p>
524     * The following example shows how to make a subset. In the example, the
525     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
526     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
527     * We want to select every other data point in dims[1] and dims[2]
528     *
529     * <pre>
530     * int rank = dataset.getRank(); // number of dimensions of the dataset
531     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
532     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
533     * long[] start = dataset.getStartDims(); // the offset of the selection
534     * long[] stride = dataset.getStride(); // the stride of the dataset
535     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
536     *
537     * // select dim1 and dim2 as 2D data for display,and slice through dim0
538     * selectedIndex[0] = 1;
539     * selectedIndex[1] = 2;
540     * selectedIndex[1] = 0;
541     *
542     * // reset the selection arrays
543     * for (int i = 0; i &lt; rank; i++) {
544     *     start[i] = 0;
545     *     selected[i] = 1;
546     *     stride[i] = 1;
547     * }
548     *
549     * // set stride to 2 on dim1 and dim2 so that every other data point is
550     * // selected.
551     * stride[1] = 2;
552     * stride[2] = 2;
553     *
554     * // set the selection size of dim1 and dim2
555     * selected[1] = dims[1] / stride[1];
556     * selected[2] = dims[1] / stride[2];
557     *
558     * // when dataset.getData() is called, the selection above will be used since
559     * // the dimension arrays are passed by reference. Changes of these arrays
560     * // outside the dataset object directly change the values of these array
561     * // in the dataset object.
562     * </pre>
563     * <p>
564     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
565     * short, int, float, double or String type based on the datatype of the
566     * dataset.
567     * <p>
568     * For CompoundDS, the memory data object is an java.util.List object. Each
569     * element of the list is a data array that corresponds to a compound field.
570     * <p>
571     * For example, if compound dataset "comp" has the following nested
572     * structure, and member datatypes
573     *
574     * <pre>
575     * comp --&gt; m01 (int)
576     * comp --&gt; m02 (float)
577     * comp --&gt; nest1 --&gt; m11 (char)
578     * comp --&gt; nest1 --&gt; m12 (String)
579     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
580     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
581     * </pre>
582     *
583     * getData() returns a list of six arrays: {int[], float[], char[],
584     * String[], long[] and double[]}.
585     *
586     * @return the memory buffer of the dataset.
587     *
588     * @throws Exception if object can not be read
589     * @throws OutOfMemoryError if memory is exhausted
590     */
591    @Override
592    public final Object getData() throws Exception, OutOfMemoryError {
593        if (!isDataLoaded) {
594            data = read(); // load the data
595            originalBuf = data;
596            isDataLoaded = true;
597            nPoints = 1;
598            log.trace("getData: selectedDims length={}",selectedDims.length);
599            for (int j = 0; j < selectedDims.length; j++) {
600                nPoints *= selectedDims[j];
601            }
602            log.trace("getData: read {}", nPoints);
603        }
604
605        return data;
606    }
607
608    /**
609     * Not for public use in the future.
610     * <p>
611     * setData() is not safe to use because it changes memory buffer
612     * of the dataset object. Dataset operations such as write/read
613     * will fail if the buffer type or size is changed.
614     *
615     * @param d  the object data -must be an array of Objects
616     */
617    @Override
618    public final void setData(Object d) {
619        if (!(this instanceof Attribute))
620            throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects");
621
622        data = d;
623    }
624
625    /**
626     * Clears the current data buffer in memory and forces the next read() to load
627     * the data from file.
628     * <p>
629     * The function read() loads data from file into memory only if the data is
630     * not read. If data is already in memory, read() just returns the memory
631     * buffer. Sometimes we want to force read() to re-read data from file. For
632     * example, when the selection is changed, we need to re-read the data.
633     *
634     * @see #getData()
635     * @see #read()
636     */
637    @Override
638    public void clearData() {
639        isDataLoaded = false;
640    }
641
642    /**
643     * Returns the dimension size of the vertical axis.
644     *
645     * <p>
646     * This function is used by GUI applications such as HDFView. GUI
647     * applications display a dataset in a 2D table or 2D image. The display
648     * order is specified by the index array of selectedIndex as follow:
649     * <dl>
650     * <dt>selectedIndex[0] -- height</dt>
651     * <dd>The vertical axis</dd>
652     * <dt>selectedIndex[1] -- width</dt>
653     * <dd>The horizontal axis</dd>
654     * <dt>selectedIndex[2] -- depth</dt>
655     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
656     * </dl>
657     * Applications can use getSelectedIndex() to access and change the display
658     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
659     * following code will set the height=200 and width=50.
660     *
661     * <pre>
662     * int[] selectedIndex = dataset.getSelectedIndex();
663     * selectedIndex[0] = 0;
664     * selectedIndex[1] = 1;
665     * </pre>
666     *
667     * @see #getSelectedIndex()
668     * @see #getWidth()
669     *
670     * @return the size of dimension of the vertical axis.
671     */
672    @Override
673    public final long getHeight() {
674        if (!inited) init();
675
676        if ((selectedDims == null) || (selectedIndex == null)) {
677            return 0;
678        }
679
680        return selectedDims[selectedIndex[0]];
681    }
682
683    /**
684     * Returns the dimension size of the horizontal axis.
685     *
686     * <p>
687     * This function is used by GUI applications such as HDFView. GUI
688     * applications display a dataset in 2D Table or 2D Image. The display order is
689     * specified by the index array of selectedIndex as follow:
690     * <dl>
691     * <dt>selectedIndex[0] -- height</dt>
692     * <dd>The vertical axis</dd>
693     * <dt>selectedIndex[1] -- width</dt>
694     * <dd>The horizontal axis</dd>
695     * <dt>selectedIndex[2] -- depth</dt>
696     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
697     * </dl>
698     * Applications can use getSelectedIndex() to access and change the display
699     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
700     * following code will set the height=200 and width=100.
701     *
702     * <pre>
703     * int[] selectedIndex = dataset.getSelectedIndex();
704     * selectedIndex[0] = 0;
705     * selectedIndex[1] = 1;
706     * </pre>
707     *
708     * @see #getSelectedIndex()
709     * @see #getHeight()
710     *
711     * @return the size of dimension of the horizontal axis.
712     */
713    @Override
714    public final long getWidth() {
715        if (!inited) init();
716
717        if ((selectedDims == null) || (selectedIndex == null)) {
718            return 0;
719        }
720
721        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
722            return 1;
723        }
724
725        return selectedDims[selectedIndex[1]];
726    }
727
728    /**
729     * Returns the indices of display order.
730     * <p>
731     *
732     * selectedIndex[] is provided for two purposes:
733     * <OL>
734     * <LI>
735     * selectedIndex[] is used to indicate the order of dimensions for display.
736     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
737     * selectedIndex[2] for the depth.
738     * <p>
739     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
740     * then dim[1] is selected as row index, dim[2] is selected as column index
741     * and dim[3] is selected as depth index.
742     * <LI>
743     * selectedIndex[] is also used to select dimensions for display for
744     * datasets with three or more dimensions. We assume that applications such
745     * as HDFView can only display data values up to three dimensions (2D
746     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
747     * is selected from). For datasets with more than three dimensions, we need
748     * selectedIndex[] to tell applications which three dimensions are chosen
749     * for display. <br>
750     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
751     * then dim[1] is selected as row index, dim[2] is selected as column index
752     * and dim[3] is selected as depth index. dim[0] is not selected. Its
753     * location is fixed at 0 by default.
754     * </OL>
755     *
756     * @return the array of the indices of display order.
757     */
758    @Override
759    public final int[] getSelectedIndex() {
760        if (!inited) init();
761
762        return selectedIndex;
763    }
764
765    /**
766     * Returns the string representation of compression information.
767     * <p>
768     * For example,
769     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
770     *
771     * @return the string representation of compression information.
772     */
773    @Override
774    public final String getCompression() {
775        if (!inited) init();
776
777        return compression.toString();
778    }
779
780    /**
781     * Returns the string representation of filter information.
782     *
783     * @return the string representation of filter information.
784     */
785    public final String getFilters() {
786        if (!inited) init();
787
788        return filters.toString();
789    }
790
791    /**
792     * Returns the string representation of storage layout information.
793     *
794     * @return the string representation of storage layout information.
795     */
796    public final String getStorageLayout() {
797        if (!inited) init();
798
799        return storageLayout.toString();
800    }
801
802    /**
803     * Returns the string representation of storage information.
804     *
805     * @return the string representation of storage information.
806     */
807    public final String getStorage() {
808        if (!inited) init();
809
810        return storage.toString();
811    }
812
813    /**
814     * Returns the array that contains the dimension sizes of the chunk of the
815     * dataset. Returns null if the dataset is not chunked.
816     *
817     * @return the array of chunk sizes or returns null if the dataset is not
818     *         chunked.
819     */
820    public final long[] getChunkSize() {
821        if (!inited) init();
822
823        return chunkSize;
824    }
825
826    @Override
827    public Datatype getDatatype() {
828        return datatype;
829    }
830
831    /**
832     * @deprecated Not for public use in the future. <br>
833     *             Using {@link #convertFromUnsignedC(Object, Object)}
834     *
835     * @param dataIN  the object data
836     *
837     * @return the converted object
838     */
839    @Deprecated
840    public static Object convertFromUnsignedC(Object dataIN) {
841        return Dataset.convertFromUnsignedC(dataIN, null);
842    }
843
844    /**
845     * Converts one-dimension array of unsigned C-type integers to a new array
846     * of appropriate Java integer in memory.
847     * <p>
848     * Since Java does not support unsigned integer, values of unsigned C-type
849     * integers must be converted into its appropriate Java integer. Otherwise,
850     * the data value will not displayed correctly. For example, if an unsigned
851     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
852     * the correct value of 200.
853     * <p>
854     * Unsigned C integers are upgrade to Java integers according to the
855     * following table:
856     *  <table border=1>
857     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
858     * <TR>
859     * <TD><B>Unsigned C Integer</B></TD>
860     * <TD><B>JAVA Intege</B>r</TD>
861     * </TR>
862     * <TR>
863     * <TD>unsigned byte</TD>
864     * <TD>signed short</TD>
865     * </TR>
866     * <TR>
867     * <TD>unsigned short</TD>
868     * <TD>signed int</TD>
869     * </TR>
870     * <TR>
871     * <TD>unsigned int</TD>
872     * <TD>signed long</TD>
873     * </TR>
874     * <TR>
875     * <TD>unsigned long</TD>
876     * <TD>signed long</TD>
877     * </TR>
878     * </TABLE>
879     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
880     * Therefore, the values of unsigned 64-bit datasets may be wrong in Java
881     * applications</strong>.
882     * <p>
883     * If memory data of unsigned integers is converted by
884     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
885     * the data back to unsigned C before data is written into file.
886     *
887     * @see #convertToUnsignedC(Object, Object)
888     *
889     * @param dataIN
890     *            the input 1D array of the unsigned C-type integers.
891     * @param dataOUT
892     *            the output converted (or upgraded) 1D array of Java integers.
893     *
894     * @return the upgraded 1D array of Java integers.
895     */
896    @SuppressWarnings("rawtypes")
897    public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) {
898        if (dataIN == null) {
899            log.debug("convertFromUnsignedC(): data_in is null");
900            return null;
901        }
902
903        Class dataClass = dataIN.getClass();
904        if (!dataClass.isArray()) {
905            log.debug("convertFromUnsignedC(): data_in not an array");
906            return null;
907        }
908
909        if (dataOUT != null) {
910            Class dataClassOut = dataOUT.getClass();
911            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
912                log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size");
913                dataOUT = null;
914            }
915        }
916
917        String cname = dataClass.getName();
918        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
919        int size = Array.getLength(dataIN);
920        log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size);
921
922        if (dname == 'B') {
923            log.debug("convertFromUnsignedC(): Java convert byte to short");
924            short[] sdata = null;
925            if (dataOUT == null) {
926                sdata = new short[size];
927            }
928            else {
929                sdata = (short[]) dataOUT;
930            }
931
932            byte[] bdata = (byte[]) dataIN;
933            for (int i = 0; i < size; i++) {
934                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
935            }
936
937            dataOUT = sdata;
938        }
939        else if (dname == 'S') {
940            log.debug("convertFromUnsignedC(): Java convert short to int");
941            int[] idata = null;
942            if (dataOUT == null) {
943                idata = new int[size];
944            }
945            else {
946                idata = (int[]) dataOUT;
947            }
948
949            short[] sdata = (short[]) dataIN;
950            for (int i = 0; i < size; i++) {
951                idata[i] = (sdata[i] + 65536) & 0xFFFF;
952            }
953
954            dataOUT = idata;
955        }
956        else if (dname == 'I') {
957            log.debug("convertFromUnsignedC(): Java convert int to long");
958            long[] ldata = null;
959            if (dataOUT == null) {
960                ldata = new long[size];
961            }
962            else {
963                ldata = (long[]) dataOUT;
964            }
965
966            int[] idata = (int[]) dataIN;
967            for (int i = 0; i < size; i++) {
968                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
969            }
970
971            dataOUT = ldata;
972        }
973        else {
974            dataOUT = dataIN;
975            log.debug("convertFromUnsignedC(): Java does not support unsigned long");
976        }
977
978        return dataOUT;
979    }
980
981    /**
982     * @deprecated Not for public use in the future. <br>
983     *             Using {@link #convertToUnsignedC(Object, Object)}
984     *
985     * @param dataIN
986     *            the input 1D array of the unsigned C-type integers.
987     *
988     * @return the upgraded 1D array of Java integers.
989     */
990    @Deprecated
991    public static Object convertToUnsignedC(Object dataIN) {
992        return Dataset.convertToUnsignedC(dataIN, null);
993    }
994
995    /**
996     * Converts the array of converted unsigned integers back to unsigned C-type
997     * integer data in memory.
998     * <p>
999     * If memory data of unsigned integers is converted by
1000     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1001     * the data back to unsigned C before data is written into file.
1002     *
1003     * @see #convertFromUnsignedC(Object, Object)
1004     *
1005     * @param dataIN
1006     *            the input array of the Java integer.
1007     * @param dataOUT
1008     *            the output array of the unsigned C-type integer.
1009     *
1010     * @return the converted data of unsigned C-type integer array.
1011     */
1012    @SuppressWarnings("rawtypes")
1013    public static Object convertToUnsignedC(Object dataIN, Object dataOUT) {
1014        if (dataIN == null) {
1015            log.debug("convertToUnsignedC(): data_in is null");
1016            return null;
1017        }
1018
1019        Class dataClass = dataIN.getClass();
1020        if (!dataClass.isArray()) {
1021            log.debug("convertToUnsignedC(): data_in not an array");
1022            return null;
1023        }
1024
1025        if (dataOUT != null) {
1026            Class dataClassOut = dataOUT.getClass();
1027            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
1028                log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size");
1029                dataOUT = null;
1030            }
1031        }
1032
1033        String cname = dataClass.getName();
1034        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
1035        int size = Array.getLength(dataIN);
1036        log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1037
1038        if (dname == 'S') {
1039            log.debug("convertToUnsignedC(): Java convert short to byte");
1040            byte[] bdata = null;
1041            if (dataOUT == null) {
1042                bdata = new byte[size];
1043            }
1044            else {
1045                bdata = (byte[]) dataOUT;
1046            }
1047            short[] sdata = (short[]) dataIN;
1048            for (int i = 0; i < size; i++) {
1049                bdata[i] = (byte) sdata[i];
1050            }
1051            dataOUT = bdata;
1052        }
1053        else if (dname == 'I') {
1054            log.debug("convertToUnsignedC(): Java convert int to short");
1055            short[] sdata = null;
1056            if (dataOUT == null) {
1057                sdata = new short[size];
1058            }
1059            else {
1060                sdata = (short[]) dataOUT;
1061            }
1062            int[] idata = (int[]) dataIN;
1063            for (int i = 0; i < size; i++) {
1064                sdata[i] = (short) idata[i];
1065            }
1066            dataOUT = sdata;
1067        }
1068        else if (dname == 'J') {
1069            log.debug("convertToUnsignedC(): Java convert long to int");
1070            int[] idata = null;
1071            if (dataOUT == null) {
1072                idata = new int[size];
1073            }
1074            else {
1075                idata = (int[]) dataOUT;
1076            }
1077            long[] ldata = (long[]) dataIN;
1078            for (int i = 0; i < size; i++) {
1079                idata[i] = (int) ldata[i];
1080            }
1081            dataOUT = idata;
1082        }
1083        else {
1084            dataOUT = dataIN;
1085            log.debug("convertToUnsignedC(): Java does not support unsigned long");
1086        }
1087
1088        return dataOUT;
1089    }
1090
1091    /**
1092     * Converts an array of bytes into an array of Strings for a fixed string
1093     * dataset.
1094     * <p>
1095     * A C-string is an array of chars while an Java String is an object. When a
1096     * string dataset is read into a Java application, the data is stored in an
1097     * array of Java bytes. byteToString() is used to convert the array of bytes
1098     * into an array of Java strings so that applications can display and modify
1099     * the data content.
1100     * <p>
1101     * For example, the content of a two element C string dataset is {"ABC",
1102     * "abc"}. Java applications will read the data into a byte array of {65,
1103     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1104     * String of strs[0]="ABC", and strs[1]="abc".
1105     * <p>
1106     * If memory data of strings is converted to Java Strings, stringToByte()
1107     * must be called to convert the memory data back to byte array before data
1108     * is written to file.
1109     *
1110     * @see #stringToByte(String[], int)
1111     *
1112     * @param bytes
1113     *            the array of bytes to convert.
1114     * @param length
1115     *            the length of string.
1116     *
1117     * @return the array of Java String.
1118     */
1119    public static final String[] byteToString(byte[] bytes, int length) {
1120        if (bytes == null) {
1121            log.debug("byteToString(): input is null");
1122            return null;
1123        }
1124
1125        int n = bytes.length / length;
1126        log.trace("byteToString(): n={} from length of {}", n, length);
1127        String[] strArray = new String[n];
1128        String str = null;
1129        int idx = 0;
1130        for (int i = 0; i < n; i++) {
1131            str = new String(bytes, i * length, length);
1132            idx = str.indexOf('\0');
1133            if (idx >= 0) {
1134                str = str.substring(0, idx);
1135            }
1136
1137            // trim only the end
1138            int end = str.length();
1139            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1140                end--;
1141
1142            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1143        }
1144
1145        return strArray;
1146    }
1147
1148    /**
1149     * Converts a string array into an array of bytes for a fixed string
1150     * dataset.
1151     * <p>
1152     * If memory data of strings is converted to Java Strings, stringToByte()
1153     * must be called to convert the memory data back to byte array before data
1154     * is written to file.
1155     *
1156     * @see #byteToString(byte[] bytes, int length)
1157     *
1158     * @param strings
1159     *            the array of string.
1160     * @param length
1161     *            the length of string.
1162     *
1163     * @return the array of bytes.
1164     */
1165    public static final byte[] stringToByte(String[] strings, int length) {
1166        if (strings == null) {
1167            log.debug("stringToByte(): input is null");
1168            return null;
1169        }
1170
1171        int size = strings.length;
1172        byte[] bytes = new byte[size * length];
1173        log.trace("stringToByte(): size={} length={}", size, length);
1174        StringBuilder strBuff = new StringBuilder(length);
1175        for (int i = 0; i < size; i++) {
1176            // initialize the string with spaces
1177            strBuff.replace(0, length, " ");
1178
1179            if (strings[i] != null) {
1180                if (strings[i].length() > length) {
1181                    strings[i] = strings[i].substring(0, length);
1182                }
1183                strBuff.replace(0, length, strings[i]);
1184            }
1185
1186            strBuff.setLength(length);
1187            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1188        }
1189
1190        return bytes;
1191    }
1192
1193    /**
1194     * Returns the array of strings that represent the dimension names. Returns
1195     * null if there is no dimension name.
1196     * <p>
1197     * Some datasets have pre-defined names for each dimension such as
1198     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1199     * names.
1200     *
1201     * @return the names of dimensions, or null if there is no dimension name.
1202     */
1203    public final String[] getDimNames() {
1204        if (!inited) init();
1205
1206        return dimNames;
1207    }
1208
1209    /**
1210     * Checks if a given datatype is a string. Sub-classes must replace this
1211     * default implementation.
1212     *
1213     * @param tid
1214     *            The data type identifier.
1215     *
1216     * @return true if the datatype is a string; otherwise returns false.
1217     */
1218    public boolean isString(long tid) {
1219        return false;
1220    }
1221
1222    /**
1223     * Returns the size in bytes of a given datatype. Sub-classes must replace
1224     * this default implementation.
1225     *
1226     * @param tid
1227     *            The data type identifier.
1228     *
1229     * @return The size of the datatype
1230     */
1231    public long getSize(long tid) {
1232        return -1;
1233    }
1234
1235    /**
1236     * Get Class of the original data buffer if converted.
1237     *
1238     * @return the Class of originalBuf
1239     */
1240    @Override
1241    @SuppressWarnings("rawtypes")
1242    public final Class getOriginalClass() {
1243        return originalBuf.getClass();
1244    }
1245
1246    /*
1247     * Checks if dataset is virtual. Sub-classes must replace
1248     * this default implementation.
1249     *
1250     * @return true if the dataset is virtual; otherwise returns false.
1251     */
1252    public boolean isVirtual() {
1253        return false;
1254    }
1255
1256    /*
1257     * Gets the source file name at index if dataset is virtual. Sub-classes must replace
1258     * this default implementation.
1259     *
1260     * @return filename if the dataset is virtual; otherwise returns null.
1261     */
1262    public String getVirtualFilename(int index) {
1263        return null;
1264    }
1265
1266    /*
1267     * Gets the number of source files if dataset is virtual. Sub-classes must replace
1268     * this default implementation.
1269     *
1270     * @return the list size if the dataset is virtual; otherwise returns negative.
1271     */
1272    public int getVirtualMaps() {
1273        return -1;
1274    }
1275}