001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.Vector;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties, datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 * <p>
029 * For more details on dataset,
030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
031 * <p>
032 *
033 * @see hdf.object.ScalarDS
034 * @see hdf.object.CompoundDS
035 *
036 * @version 1.1 9/4/2007
037 * @author Peter X. Cao
038 */
039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat {
040    private static final long serialVersionUID    = -3360885430038261178L;
041
042    private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
043
044    /**
045     * The memory buffer that holds the raw data of the dataset.
046     */
047    protected Object          data;
048
049    /**
050     * The number of dimensions of the dataset.
051     */
052    protected int             rank;
053
054    /**
055     * The current dimension sizes of the dataset
056     */
057    protected long[]          dims;
058
059    /**
060     * The max dimension sizes of the dataset
061     */
062    protected long[]          maxDims;
063
064    /**
065     * Array that contains the number of data points selected (for read/write)
066     * in each dimension.
067     * <p>
068     * The selected size must be less than or equal to the current dimension size.
069     * A subset of a rectangle selection is defined by the starting position and
070     * selected sizes.
071     * <p>
072     * For example, if a 4 X 5 dataset is as follows:
073     *
074     * <pre>
075     *     0,  1,  2,  3,  4
076     *    10, 11, 12, 13, 14
077     *    20, 21, 22, 23, 24
078     *    30, 31, 32, 33, 34
079     * long[] dims = {4, 5};
080     * long[] startDims = {1, 2};
081     * long[] selectedDims = {3, 3};
082     * then the following subset is selected by the startDims and selectedDims above:
083     *     12, 13, 14
084     *     22, 23, 24
085     *     32, 33, 34
086     * </pre>
087     */
088    protected long[]          selectedDims;
089
090    /**
091     * The starting position of each dimension of a selected subset. With both
092     * the starting position and selected sizes, the subset of a rectangle
093     * selection is fully defined.
094     */
095    protected long[]          startDims;
096
097    /**
098     * Array that contains the indices of the dimensions selected for display.
099     * <p>
100     * <B>selectedIndex[] is provided for two purposes:</B>
101     * <OL>
102     * <LI>
103     * selectedIndex[] is used to indicate the order of dimensions for display,
104     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
105     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
106     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
107     * dim[2] is selected as column index and dim[3] is selected as depth index.
108     * <LI>
109     * selectedIndex[] is also used to select dimensions for display for
110     * datasets with three or more dimensions. We assume that applications such
111     * as HDFView can only display data up to three dimensions (a 2D
112     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
113     * cut from). For datasets with more than three dimensions, we need
114     * selectedIndex[] to store which three dimensions are chosen for display.
115     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
116     * then dim[1] is selected as row index, dim[2] is selected as column index
117     * and dim[3] is selected as depth index. dim[0] is not selected. Its
118     * location is fixed at 0 by default.
119     * </OL>
120     */
121    protected final int[]     selectedIndex;
122
123    /**
124     * The number of elements to move from the start location in each dimension.
125     * For example, if selectedStride[0] = 2, every other data point is selected
126     * along dim[0].
127     */
128    protected long[]          selectedStride;
129
130    /**
131     * The array of dimension sizes for a chunk.
132     */
133    protected long[]          chunkSize;
134
135    /** The compression information. */
136    protected String          compression;
137    public final static String          compression_gzip_txt = "GZIP: level = ";
138
139    /** The filters information. */
140    protected String          filters;
141
142    /** The storage layout information. */
143    protected String          storage_layout;
144
145    /** The storage information. */
146    protected String          storage;
147
148    /** The datatype object of the dataset. */
149    protected Datatype        datatype;
150
151    /**
152     * Array of strings that represent the dimension names. It is null if dimension names do not exist.
153     */
154    protected String[]        dimNames;
155
156    /** Flag to indicate if the byte[] array is converted to strings */
157    protected boolean         convertByteToString = true;
158
159    /** Flag to indicate if data values are loaded into memory. */
160    protected boolean         isDataLoaded        = false;
161
162    /** Flag to indicate if this dataset has been initialized */
163    protected boolean         inited = false;
164
165    /** The number of data points in the memory buffer. */
166    protected long            nPoints             = 1;
167
168    /**
169     * The data buffer that contains the raw data directly reading from file
170     * (before any data conversion).
171     */
172    protected Object          originalBuf         = null;
173
174    /**
175     * The array that holds the converted data of unsigned C-type integers.
176     * <p>
177     * For example, Suppose that the original data is an array of unsigned
178     * 16-bit short integers. Since Java does not support unsigned integer, the
179     * data is converted to an array of 32-bit singed integer. In that case, the
180     * converted buffer is the array of 32-bit singed integer.
181     */
182    protected Object          convertedBuf        = null;
183
184    /**
185     * Constructs a Dataset object with a given file, name and path.
186     *
187     * @param theFile
188     *            the file that contains the dataset.
189     * @param dsName
190     *            the name of the Dataset, e.g. "dset1".
191     * @param dsPath
192     *            the full group path of this Dataset, e.g. "/arrays/".
193     */
194    public Dataset(FileFormat theFile, String dsName, String dsPath) {
195        this(theFile, dsName, dsPath, null);
196    }
197
198    /**
199     * @deprecated Not for public use in the future. <br>
200     *             Using {@link #Dataset(FileFormat, String, String)}
201     *
202     * @param theFile
203     *            the file that contains the dataset.
204     * @param dsName
205     *            the name of the Dataset, e.g. "dset1".
206     * @param dsPath
207     *            the full group path of this Dataset, e.g. "/arrays/".
208     * @param oid
209     *            the oid of this Dataset.
210     */
211    @Deprecated
212    public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) {
213        super(theFile, dsName, dsPath, oid);
214
215        datatype = null;
216        rank = -1;
217        data = null;
218        dims = null;
219        maxDims = null;
220        selectedDims = null;
221        startDims = null;
222        selectedStride = null;
223        chunkSize = null;
224        compression = "NONE";
225        filters = "NONE";
226        storage = "NONE";
227        dimNames = null;
228
229        selectedIndex = new int[3];
230        selectedIndex[0] = 0;
231        selectedIndex[1] = 1;
232        selectedIndex[2] = 2;
233    }
234
235    /**
236     * Clears memory held by the dataset, such as the data buffer.
237     */
238    @SuppressWarnings("rawtypes")
239    public void clear() {
240        if (data != null) {
241            if (data instanceof Vector) {
242                ((Vector) data).setSize(0);
243            }
244            data = null;
245            originalBuf = null;
246            convertedBuf = null;
247        }
248        isDataLoaded = false;
249    }
250
251    /**
252     * Returns the rank (number of dimensions) of the dataset.
253     *
254     * @return the number of dimensions of the dataset.
255     */
256    @Override
257    public final int getRank() {
258        if (!inited)
259            init();
260
261        return rank;
262    }
263
264    /**
265     * Returns the array that contains the dimension sizes of the dataset.
266     *
267     * @return the dimension sizes of the dataset.
268     */
269    @Override
270    public final long[] getDims() {
271        if (!inited)
272            init();
273
274        return dims;
275    }
276
277    /**
278     * Returns the array that contains the max dimension sizes of the dataset.
279     *
280     * @return the max dimension sizes of the dataset.
281     */
282    public final long[] getMaxDims() {
283        if (!inited) init();
284
285        if (maxDims == null) return dims;
286
287        return maxDims;
288    }
289
290    /**
291     * Returns the dimension sizes of the selected subset.
292     * <p>
293     * The SelectedDims is the number of data points of the selected subset.
294     * Applications can use this array to change the size of selected subset.
295     *
296     * The selected size must be less than or equal to the current dimension size.
297     * Combined with the starting position, selected sizes and stride, the
298     * subset of a rectangle selection is fully defined.
299     * <p>
300     * For example, if a 4 X 5 dataset is as follows:
301     *
302     * <pre>
303     *     0,  1,  2,  3,  4
304     *    10, 11, 12, 13, 14
305     *    20, 21, 22, 23, 24
306     *    30, 31, 32, 33, 34
307     * long[] dims = {4, 5};
308     * long[] startDims = {1, 2};
309     * long[] selectedDims = {3, 3};
310     * long[] selectedStride = {1, 1};
311     * then the following subset is selected by the startDims and selectedDims
312     *     12, 13, 14
313     *     22, 23, 24
314     *     32, 33, 34
315     * </pre>
316     *
317     * @return the dimension sizes of the selected subset.
318     */
319    @Override
320    public final long[] getSelectedDims() {
321        if (!inited) init();
322
323        return selectedDims;
324    }
325
326    /**
327     * Returns the starting position of a selected subset.
328     * <p>
329     * Applications can use this array to change the starting position of a
330     * selection. Combined with the selected dimensions, selected sizes and
331     * stride, the subset of a rectangle selection is fully defined.
332     * <p>
333     * For example, if a 4 X 5 dataset is as follows:
334     *
335     * <pre>
336     *     0,  1,  2,  3,  4
337     *    10, 11, 12, 13, 14
338     *    20, 21, 22, 23, 24
339     *    30, 31, 32, 33, 34
340     * long[] dims = {4, 5};
341     * long[] startDims = {1, 2};
342     * long[] selectedDims = {3, 3};
343     * long[] selectedStride = {1, 1};
344     * then the following subset is selected by the startDims and selectedDims
345     *     12, 13, 14
346     *     22, 23, 24
347     *     32, 33, 34
348     * </pre>
349     *
350     * @return the starting position of a selected subset.
351     */
352    @Override
353    public final long[] getStartDims() {
354        if (!inited) init();
355
356        return startDims;
357    }
358
359    /**
360     * Returns the selectedStride of the selected dataset.
361     * <p>
362     * Applications can use this array to change how many elements to move in
363     * each dimension.
364     *
365     * Combined with the starting position and selected sizes, the subset of a
366     * rectangle selection is defined.
367     * <p>
368     * For example, if a 4 X 5 dataset is as follows:
369     *
370     * <pre>
371     *     0,  1,  2,  3,  4
372     *    10, 11, 12, 13, 14
373     *    20, 21, 22, 23, 24
374     *    30, 31, 32, 33, 34
375     * long[] dims = {4, 5};
376     * long[] startDims = {0, 0};
377     * long[] selectedDims = {2, 2};
378     * long[] selectedStride = {2, 3};
379     * then the following subset is selected by the startDims and selectedDims
380     *     0,   3
381     *     20, 23
382     * </pre>
383     *
384     * @return the selectedStride of the selected dataset.
385     */
386    @Override
387    public final long[] getStride() {
388        if (!inited) init();
389
390        if (rank <= 0) {
391            return null;
392        }
393
394        if (selectedStride == null) {
395            selectedStride = new long[rank];
396            for (int i = 0; i < rank; i++) {
397                selectedStride[i] = 1;
398            }
399        }
400
401        return selectedStride;
402    }
403
404    /**
405     * Sets the flag that indicates if a byte array is converted to a string
406     * array.
407     * <p>
408     * In a string dataset, the raw data from file is stored in a byte array. By
409     * default, this byte array is converted to an array of strings. For a large
410     * dataset (e.g. more than one million strings), the conversion takes a long
411     * time and requires a lot of memory space to store the strings. In some
412     * applications, such a conversion can be delayed. For example, A GUI
413     * application may convert only the part of the strings that is visible to the
414     * users, not the entire data array.
415     * <p>
416     * setConvertByteToString(boolean b) allows users to set the flag so that
417     * applications can choose to perform the byte-to-string conversion or not.
418     * If the flag is set to false, the getData() returns an array of byte
419     * instead of an array of strings.
420     *
421     * @param b
422     *            convert bytes to strings if b is true; otherwise, if false, do
423     *            not convert bytes to strings.
424     */
425    public final void setConvertByteToString(boolean b) {
426        convertByteToString = b;
427    }
428
429    /**
430     * Returns the flag that indicates if a byte array is converted to a string
431     * array.
432     *
433     * @return true if byte array is converted to string; otherwise, returns
434     *         false if there is no conversion.
435     */
436    public final boolean getConvertByteToString() {
437        return convertByteToString;
438    }
439
440    /**
441     * Reads the raw data of the dataset from file to a byte array.
442     * <p>
443     * readBytes() reads raw data to an array of bytes instead of array of its
444     * datatype. For example, for a one-dimension 32-bit integer dataset of
445     * size 5, readBytes() returns a byte array of size 20 instead of an
446     * int array of 5.
447     * <p>
448     * readBytes() can be used to copy data from one dataset to another
449     * efficiently because the raw data is not converted to its native type, it
450     * saves memory space and CPU time.
451     *
452     * @return the byte array of the raw data.
453     *
454     * @throws Exception if data can not be read
455     */
456    public abstract byte[] readBytes() throws Exception;
457
458    /**
459     * Writes the memory buffer of this dataset to file.
460     *
461     * @throws Exception if buffer can not be written
462     */
463    @Override
464    public final void write() throws Exception {
465        if (data != null) {
466            write(data);
467        }
468    }
469
470    /**
471     * Creates a new dataset and writes the data buffer to the new dataset.
472     * <p>
473     * This function allows applications to create a new dataset for a given
474     * data buffer. For example, users can select a specific interesting part
475     * from a large image and create a new image with the selection.
476     * <p>
477     * The new dataset retains the datatype and dataset creation properties of
478     * this dataset.
479     *
480     * @param pgroup
481     *            the group which the dataset is copied to.
482     * @param name
483     *            the name of the new dataset.
484     * @param dims
485     *            the dimension sizes of the the new dataset.
486     * @param data
487     *            the data values of the subset to be copied.
488     *
489     * @return the new dataset.
490     *
491     * @throws Exception if dataset can not be copied
492     */
493    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
494
495    @Override
496    public final boolean isInited() {
497        return inited;
498    }
499
500    /**
501     * Returns the data buffer of the dataset in memory.
502     * <p>
503     * If data is already loaded into memory, returns the data; otherwise, calls
504     * read() to read data from file into a memory buffer and returns the memory
505     * buffer.
506     * <p>
507     * By default, the whole dataset is read into memory. Users can also select
508     * a subset to read. Subsetting is done in an implicit way.
509     * <p>
510     * <b>How to Select a Subset</b>
511     * <p>
512     * A selection is specified by three arrays: start, stride and count.
513     * <ol>
514     * <li>start: offset of a selection
515     * <li>stride: determines how many elements to move in each dimension
516     * <li>count: number of elements to select in each dimension
517     * </ol>
518     * getStartDims(), getStride() and getSelectedDims() returns the start,
519     * stride and count arrays respectively. Applications can make a selection
520     * by changing the values of the arrays.
521     * <p>
522     * The following example shows how to make a subset. In the example, the
523     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
524     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
525     * We want to select every other data point in dims[1] and dims[2]
526     *
527     * <pre>
528     * int rank = dataset.getRank(); // number of dimensions of the dataset
529     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
530     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
531     * long[] start = dataset.getStartDims(); // the offset of the selection
532     * long[] stride = dataset.getStride(); // the stride of the dataset
533     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
534     *
535     * // select dim1 and dim2 as 2D data for display,and slice through dim0
536     * selectedIndex[0] = 1;
537     * selectedIndex[1] = 2;
538     * selectedIndex[1] = 0;
539     *
540     * // reset the selection arrays
541     * for (int i = 0; i &lt; rank; i++) {
542     *     start[i] = 0;
543     *     selected[i] = 1;
544     *     stride[i] = 1;
545     * }
546     *
547     * // set stride to 2 on dim1 and dim2 so that every other data point is
548     * // selected.
549     * stride[1] = 2;
550     * stride[2] = 2;
551     *
552     * // set the selection size of dim1 and dim2
553     * selected[1] = dims[1] / stride[1];
554     * selected[2] = dims[1] / stride[2];
555     *
556     * // when dataset.getData() is called, the selection above will be used since
557     * // the dimension arrays are passed by reference. Changes of these arrays
558     * // outside the dataset object directly change the values of these array
559     * // in the dataset object.
560     * </pre>
561     * <p>
562     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
563     * short, int, float, double or String type based on the datatype of the
564     * dataset.
565     * <p>
566     * For CompoundDS, the memory data object is an java.util.List object. Each
567     * element of the list is a data array that corresponds to a compound field.
568     * <p>
569     * For example, if compound dataset "comp" has the following nested
570     * structure, and member datatypes
571     *
572     * <pre>
573     * comp --&gt; m01 (int)
574     * comp --&gt; m02 (float)
575     * comp --&gt; nest1 --&gt; m11 (char)
576     * comp --&gt; nest1 --&gt; m12 (String)
577     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
578     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
579     * </pre>
580     *
581     * getData() returns a list of six arrays: {int[], float[], char[],
582     * String[], long[] and double[]}.
583     *
584     * @return the memory buffer of the dataset.
585     *
586     * @throws Exception if object can not be read
587     * @throws OutOfMemoryError if memory is exhausted
588     */
589    @Override
590    public final Object getData() throws Exception, OutOfMemoryError {
591        if (!isDataLoaded) {
592            log.trace("getData: read");
593            data = read(); // load the data;
594            originalBuf = data;
595            isDataLoaded = true;
596            nPoints = 1;
597            log.trace("getData: selectedDims length={}",selectedDims.length);
598            for (int j = 0; j < selectedDims.length; j++) {
599                nPoints *= selectedDims[j];
600            }
601            log.trace("getData: read {}", nPoints);
602        }
603
604        return data;
605    }
606
607    /**
608     * Not for public use in the future.
609     * <p>
610     * setData() is not safe to use because it changes memory buffer
611     * of the dataset object. Dataset operations such as write/read
612     * will fail if the buffer type or size is changed.
613     *
614     * @param d  the object data
615     */
616    @Override
617    public final void setData(Object d) {
618        if (!(this instanceof Attribute))
619            throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects");
620
621        data = d;
622    }
623
624    /**
625     * Clears the current data buffer in memory and forces the next read() to load
626     * the data from file.
627     * <p>
628     * The function read() loads data from file into memory only if the data is
629     * not read. If data is already in memory, read() just returns the memory
630     * buffer. Sometimes we want to force read() to re-read data from file. For
631     * example, when the selection is changed, we need to re-read the data.
632     *
633     * @see #getData()
634     * @see #read()
635     */
636    @Override
637    public void clearData() {
638        isDataLoaded = false;
639    }
640
641    /**
642     * Returns the dimension size of the vertical axis.
643     *
644     * <p>
645     * This function is used by GUI applications such as HDFView. GUI
646     * applications display a dataset in a 2D table or 2D image. The display
647     * order is specified by the index array of selectedIndex as follow:
648     * <dl>
649     * <dt>selectedIndex[0] -- height</dt>
650     * <dd>The vertical axis</dd>
651     * <dt>selectedIndex[1] -- width</dt>
652     * <dd>The horizontal axis</dd>
653     * <dt>selectedIndex[2] -- depth</dt>
654     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
655     * </dl>
656     * Applications can use getSelectedIndex() to access and change the display
657     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
658     * following code will set the height=200 and width=50.
659     *
660     * <pre>
661     * int[] selectedIndex = dataset.getSelectedIndex();
662     * selectedIndex[0] = 0;
663     * selectedIndex[1] = 1;
664     * </pre>
665     *
666     * @see #getSelectedIndex()
667     * @see #getWidth()
668     *
669     * @return the size of dimension of the vertical axis.
670     */
671    @Override
672    public final long getHeight() {
673        if (!inited) init();
674
675        if ((selectedDims == null) || (selectedIndex == null)) {
676            return 0;
677        }
678
679        return selectedDims[selectedIndex[0]];
680    }
681
682    /**
683     * Returns the dimension size of the horizontal axis.
684     *
685     * <p>
686     * This function is used by GUI applications such as HDFView. GUI
687     * applications display a dataset in 2D Table or 2D Image. The display order is
688     * specified by the index array of selectedIndex as follow:
689     * <dl>
690     * <dt>selectedIndex[0] -- height</dt>
691     * <dd>The vertical axis</dd>
692     * <dt>selectedIndex[1] -- width</dt>
693     * <dd>The horizontal axis</dd>
694     * <dt>selectedIndex[2] -- depth</dt>
695     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
696     * </dl>
697     * Applications can use getSelectedIndex() to access and change the display
698     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
699     * following code will set the height=200 and width=100.
700     *
701     * <pre>
702     * int[] selectedIndex = dataset.getSelectedIndex();
703     * selectedIndex[0] = 0;
704     * selectedIndex[1] = 1;
705     * </pre>
706     *
707     * @see #getSelectedIndex()
708     * @see #getHeight()
709     *
710     * @return the size of dimension of the horizontal axis.
711     */
712    @Override
713    public final long getWidth() {
714        if (!inited) init();
715
716        if ((selectedDims == null) || (selectedIndex == null)) {
717            return 0;
718        }
719
720        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
721            return 1;
722        }
723
724        return selectedDims[selectedIndex[1]];
725    }
726
727    /**
728     * Returns the indices of display order.
729     * <p>
730     *
731     * selectedIndex[] is provided for two purposes:
732     * <OL>
733     * <LI>
734     * selectedIndex[] is used to indicate the order of dimensions for display.
735     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
736     * selectedIndex[2] for the depth.
737     * <p>
738     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
739     * then dim[1] is selected as row index, dim[2] is selected as column index
740     * and dim[3] is selected as depth index.
741     * <LI>
742     * selectedIndex[] is also used to select dimensions for display for
743     * datasets with three or more dimensions. We assume that applications such
744     * as HDFView can only display data values up to three dimensions (2D
745     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
746     * is selected from). For datasets with more than three dimensions, we need
747     * selectedIndex[] to tell applications which three dimensions are chosen
748     * for display. <br>
749     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
750     * then dim[1] is selected as row index, dim[2] is selected as column index
751     * and dim[3] is selected as depth index. dim[0] is not selected. Its
752     * location is fixed at 0 by default.
753     * </OL>
754     *
755     * @return the array of the indices of display order.
756     */
757    @Override
758    public final int[] getSelectedIndex() {
759        if (!inited) init();
760
761        return selectedIndex;
762    }
763
764    /**
765     * Returns the string representation of compression information.
766     * <p>
767     * For example,
768     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
769     *
770     * @return the string representation of compression information.
771     */
772    @Override
773    public final String getCompression() {
774        if (!inited) init();
775
776        return compression;
777    }
778
779    /**
780     * Returns the string representation of filter information.
781     *
782     * @return the string representation of filter information.
783     */
784    public final String getFilters() {
785        if (!inited) init();
786
787        return filters;
788    }
789
790    /**
791     * Returns the string representation of storage layout information.
792     *
793     * @return the string representation of storage layout information.
794     */
795    public final String getStorageLayout() {
796        if (!inited) init();
797
798        return storage_layout;
799    }
800
801    /**
802     * Returns the string representation of storage information.
803     *
804     * @return the string representation of storage information.
805     */
806    public final String getStorage() {
807        if (!inited) init();
808
809        return storage;
810    }
811
812    /**
813     * Returns the array that contains the dimension sizes of the chunk of the
814     * dataset. Returns null if the dataset is not chunked.
815     *
816     * @return the array of chunk sizes or returns null if the dataset is not
817     *         chunked.
818     */
819    public final long[] getChunkSize() {
820        if (!inited) init();
821
822        return chunkSize;
823    }
824
825    @Override
826    public Datatype getDatatype() {
827        return datatype;
828    }
829
830    /**
831     * @deprecated Not for public use in the future. <br>
832     *             Using {@link #convertFromUnsignedC(Object, Object)}
833     *
834     * @param data_in  the object data
835     *
836     * @return the converted object
837     */
838    @Deprecated
839    public static Object convertFromUnsignedC(Object data_in) {
840        return Dataset.convertFromUnsignedC(data_in, null);
841    }
842
843    /**
844     * Converts one-dimension array of unsigned C-type integers to a new array
845     * of appropriate Java integer in memory.
846     * <p>
847     * Since Java does not support unsigned integer, values of unsigned C-type
848     * integers must be converted into its appropriate Java integer. Otherwise,
849     * the data value will not displayed correctly. For example, if an unsigned
850     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
851     * the correct value of 200.
852     * <p>
853     * Unsigned C integers are upgrade to Java integers according to the
854     * following table:
855     * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400>
856     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
857     * <TR>
858     * <TD><B>Unsigned C Integer</B></TD>
859     * <TD><B>JAVA Intege</B>r</TD>
860     * </TR>
861     * <TR>
862     * <TD>unsigned byte</TD>
863     * <TD>signed short</TD>
864     * </TR>
865     * <TR>
866     * <TD>unsigned short</TD>
867     * <TD>signed int</TD>
868     * </TR>
869     * <TR>
870     * <TD>unsigned int</TD>
871     * <TD>signed long</TD>
872     * </TR>
873     * <TR>
874     * <TD>unsigned long</TD>
875     * <TD>signed long</TD>
876     * </TR>
877     * </TABLE>
878     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
879     * Therefore, the values of unsigned 64-bit datasets may be wrong in Java
880     * applications</strong>.
881     * <p>
882     * If memory data of unsigned integers is converted by
883     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
884     * the data back to unsigned C before data is written into file.
885     *
886     * @see #convertToUnsignedC(Object, Object)
887     *
888     * @param data_in
889     *            the input 1D array of the unsigned C-type integers.
890     * @param data_out
891     *            the output converted (or upgraded) 1D array of Java integers.
892     *
893     * @return the upgraded 1D array of Java integers.
894     */
895    @SuppressWarnings("rawtypes")
896    public static Object convertFromUnsignedC(Object data_in, Object data_out) {
897        log.trace("convertFromUnsignedC(): start");
898
899        if (data_in == null) {
900            log.debug("convertFromUnsignedC(): data_in is null");
901            log.trace("convertFromUnsignedC(): finish");
902            return null;
903        }
904
905        Class data_class = data_in.getClass();
906        if (!data_class.isArray()) {
907            log.debug("convertFromUnsignedC(): data_in not an array");
908            log.trace("convertFromUnsignedC(): finish");
909            return null;
910        }
911
912        if (data_out != null) {
913            Class data_class_out = data_out.getClass();
914            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
915                log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size");
916                data_out = null;
917            }
918        }
919
920        String cname = data_class.getName();
921        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
922        int size = Array.getLength(data_in);
923        log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size);
924
925        if (dname == 'B') {
926            short[] sdata = null;
927            if (data_out == null) {
928                sdata = new short[size];
929            }
930            else {
931                sdata = (short[]) data_out;
932            }
933
934            byte[] bdata = (byte[]) data_in;
935            for (int i = 0; i < size; i++) {
936                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
937            }
938
939            data_out = sdata;
940        }
941        else if (dname == 'S') {
942            int[] idata = null;
943            if (data_out == null) {
944                idata = new int[size];
945            }
946            else {
947                idata = (int[]) data_out;
948            }
949
950            short[] sdata = (short[]) data_in;
951            for (int i = 0; i < size; i++) {
952                idata[i] = (sdata[i] + 65536) & 0xFFFF;
953            }
954
955            data_out = idata;
956        }
957        else if (dname == 'I') {
958            long[] ldata = null;
959            if (data_out == null) {
960                ldata = new long[size];
961            }
962            else {
963                ldata = (long[]) data_out;
964            }
965
966            int[] idata = (int[]) data_in;
967            for (int i = 0; i < size; i++) {
968                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
969            }
970
971            data_out = ldata;
972        }
973        else {
974            data_out = data_in;
975            log.debug("convertFromUnsignedC(): Java does not support unsigned long");
976        }
977
978        return data_out;
979    }
980
981    /**
982     * @deprecated Not for public use in the future. <br>
983     *             Using {@link #convertToUnsignedC(Object, Object)}
984     *
985     * @param data_in
986     *            the input 1D array of the unsigned C-type integers.
987     *
988     * @return the upgraded 1D array of Java integers.
989     */
990    @Deprecated
991    public static Object convertToUnsignedC(Object data_in) {
992        return Dataset.convertToUnsignedC(data_in, null);
993    }
994
995    /**
996     * Converts the array of converted unsigned integers back to unsigned C-type
997     * integer data in memory.
998     * <p>
999     * If memory data of unsigned integers is converted by
1000     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1001     * the data back to unsigned C before data is written into file.
1002     *
1003     * @see #convertFromUnsignedC(Object, Object)
1004     *
1005     * @param data_in
1006     *            the input array of the Java integer.
1007     * @param data_out
1008     *            the output array of the unsigned C-type integer.
1009     *
1010     * @return the converted data of unsigned C-type integer array.
1011     */
1012    @SuppressWarnings("rawtypes")
1013    public static Object convertToUnsignedC(Object data_in, Object data_out) {
1014        log.trace("convertToUnsignedC(): start");
1015
1016        if (data_in == null) {
1017            log.debug("convertToUnsignedC(): data_in is null");
1018            log.trace("convertToUnsignedC(): finish");
1019            return null;
1020        }
1021
1022        Class data_class = data_in.getClass();
1023        if (!data_class.isArray()) {
1024            log.debug("convertToUnsignedC(): data_in not an array");
1025            log.trace("convertToUnsignedC(): finish");
1026            return null;
1027        }
1028
1029        if (data_out != null) {
1030            Class data_class_out = data_out.getClass();
1031            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1032                log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size");
1033                data_out = null;
1034            }
1035        }
1036
1037        String cname = data_class.getName();
1038        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1039        int size = Array.getLength(data_in);
1040        log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1041
1042        if (dname == 'S') {
1043            byte[] bdata = null;
1044            if (data_out == null) {
1045                bdata = new byte[size];
1046            }
1047            else {
1048                bdata = (byte[]) data_out;
1049            }
1050            short[] sdata = (short[]) data_in;
1051            for (int i = 0; i < size; i++) {
1052                bdata[i] = (byte) sdata[i];
1053            }
1054            data_out = bdata;
1055        }
1056        else if (dname == 'I') {
1057            short[] sdata = null;
1058            if (data_out == null) {
1059                sdata = new short[size];
1060            }
1061            else {
1062                sdata = (short[]) data_out;
1063            }
1064            int[] idata = (int[]) data_in;
1065            for (int i = 0; i < size; i++) {
1066                sdata[i] = (short) idata[i];
1067            }
1068            data_out = sdata;
1069        }
1070        else if (dname == 'J') {
1071            int[] idata = null;
1072            if (data_out == null) {
1073                idata = new int[size];
1074            }
1075            else {
1076                idata = (int[]) data_out;
1077            }
1078            long[] ldata = (long[]) data_in;
1079            for (int i = 0; i < size; i++) {
1080                idata[i] = (int) ldata[i];
1081            }
1082            data_out = idata;
1083        }
1084        else {
1085            data_out = data_in;
1086            log.debug("convertToUnsignedC(): Java does not support unsigned long");
1087        }
1088
1089        return data_out;
1090    }
1091
1092    /**
1093     * Converts an array of bytes into an array of Strings for a fixed string
1094     * dataset.
1095     * <p>
1096     * A C-string is an array of chars while an Java String is an object. When a
1097     * string dataset is read into a Java application, the data is stored in an
1098     * array of Java bytes. byteToString() is used to convert the array of bytes
1099     * into an array of Java strings so that applications can display and modify
1100     * the data content.
1101     * <p>
1102     * For example, the content of a two element C string dataset is {"ABC",
1103     * "abc"}. Java applications will read the data into a byte array of {65,
1104     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1105     * String of strs[0]="ABC", and strs[1]="abc".
1106     * <p>
1107     * If memory data of strings is converted to Java Strings, stringToByte()
1108     * must be called to convert the memory data back to byte array before data
1109     * is written to file.
1110     *
1111     * @see #stringToByte(String[], int)
1112     *
1113     * @param bytes
1114     *            the array of bytes to convert.
1115     * @param length
1116     *            the length of string.
1117     *
1118     * @return the array of Java String.
1119     */
1120    public static final String[] byteToString(byte[] bytes, int length) {
1121        log.trace("byteToString(): start");
1122
1123        if (bytes == null) {
1124            log.debug("byteToString(): input is null");
1125            log.trace("byteToString(): finish");
1126            return null;
1127        }
1128
1129        int n = bytes.length / length;
1130        log.trace("byteToString(): n={} from length of {}", n, length);
1131        // String bigstr = new String(bytes);
1132        String[] strArray = new String[n];
1133        String str = null;
1134        int idx = 0;
1135        for (int i = 0; i < n; i++) {
1136            str = new String(bytes, i * length, length);
1137            // bigstr.substring uses less memory space
1138            // NOTE: bigstr does not work on linux if bytes.length is very large
1139            // see bug 1091
1140            // offset = i*length;
1141            // str = bigstr.substring(offset, offset+length);
1142
1143            idx = str.indexOf('\0');
1144            if (idx >= 0) {
1145                str = str.substring(0, idx);
1146            }
1147
1148            // trim only the end
1149            int end = str.length();
1150            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1151                end--;
1152
1153            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1154
1155            // trim both start and end
1156            // strArray[i] = str.trim();
1157        }
1158
1159        log.trace("byteToString(): finish");
1160        return strArray;
1161    }
1162
1163    /**
1164     * Converts a string array into an array of bytes for a fixed string
1165     * dataset.
1166     * <p>
1167     * If memory data of strings is converted to Java Strings, stringToByte()
1168     * must be called to convert the memory data back to byte array before data
1169     * is written to file.
1170     *
1171     * @see #byteToString(byte[] bytes, int length)
1172     *
1173     * @param strings
1174     *            the array of string.
1175     * @param length
1176     *            the length of string.
1177     *
1178     * @return the array of bytes.
1179     */
1180    public static final byte[] stringToByte(String[] strings, int length) {
1181        log.trace("stringToByte(): start");
1182
1183        if (strings == null) {
1184            log.debug("stringToByte(): input is null");
1185            log.trace("stringToByte(): finish");
1186            return null;
1187        }
1188
1189        int size = strings.length;
1190        byte[] bytes = new byte[size * length];
1191        log.trace("stringToByte(): size={} length={}", size, length);
1192        StringBuffer strBuff = new StringBuffer(length);
1193        for (int i = 0; i < size; i++) {
1194            // initialize the string with spaces
1195            strBuff.replace(0, length, " ");
1196
1197            if (strings[i] != null) {
1198                if (strings[i].length() > length) {
1199                    strings[i] = strings[i].substring(0, length);
1200                }
1201                strBuff.replace(0, length, strings[i]);
1202            }
1203
1204            strBuff.setLength(length);
1205            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1206        }
1207
1208        log.trace("stringToByte(): finish");
1209
1210        return bytes;
1211    }
1212
1213    /**
1214     * Returns the array of strings that represent the dimension names. Returns
1215     * null if there is no dimension name.
1216     * <p>
1217     * Some datasets have pre-defined names for each dimension such as
1218     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1219     * names.
1220     *
1221     * @return the names of dimensions, or null if there is no dimension name.
1222     */
1223    public final String[] getDimNames() {
1224        if (!inited) init();
1225
1226        return dimNames;
1227    }
1228
1229    /**
1230     * Checks if a given datatype is a string. Sub-classes must replace this
1231     * default implementation.
1232     *
1233     * @param tid
1234     *            The data type identifier.
1235     *
1236     * @return true if the datatype is a string; otherwise returns false.
1237     */
1238    public boolean isString(long tid) {
1239        return false;
1240    }
1241
1242    /**
1243     * Returns the size in bytes of a given datatype. Sub-classes must replace
1244     * this default implementation.
1245     *
1246     * @param tid
1247     *            The data type identifier.
1248     *
1249     * @return The size of the datatype
1250     */
1251    public long getSize(long tid) {
1252        return -1;
1253    }
1254
1255    /**
1256     * Get Class of the original data buffer if converted.
1257     *
1258     * @return the Class of originalBuf
1259     */
1260    @Override
1261    @SuppressWarnings("rawtypes")
1262    public final Class getOriginalClass() {
1263        return originalBuf.getClass();
1264    }
1265
1266    /*
1267     * Checks if dataset is virtual. Sub-classes must replace
1268     * this default implementation.
1269     *
1270     * @return true if the dataset is virtual; otherwise returns false.
1271     */
1272    public boolean isVirtual() {
1273        return false;
1274    }
1275
1276    /*
1277     * Gets the source file name at index if dataset is virtual. Sub-classes must replace
1278     * this default implementation.
1279     *
1280     * @return filename if the dataset is virtual; otherwise returns null.
1281     */
1282    public String getVirtualFilename(int index) {
1283        return null;
1284    }
1285
1286    /*
1287     * Gets the number of source files if dataset is virtual. Sub-classes must replace
1288     * this default implementation.
1289     *
1290     * @return the list size if the dataset is virtual; otherwise returns negative.
1291     */
1292    public int getVirtualMaps() {
1293        return -1;
1294    }
1295}