001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.List;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties, datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 * <p>
029 * For more details on dataset,
030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
031 * <p>
032 *
033 * @see hdf.object.ScalarDS
034 * @see hdf.object.CompoundDS
035 *
036 * @version 1.1 9/4/2007
037 * @author Peter X. Cao
038 */
039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat {
040    private static final long serialVersionUID    = -3360885430038261178L;
041
042    private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
043
044    /**
045     * The memory buffer that holds the raw data array of the dataset.
046     */
047    protected transient Object          data;
048
049    /**
050     * The number of dimensions of the dataset.
051     */
052    protected int             rank;
053
054    /**
055     * The current dimension sizes of the dataset
056     */
057    protected long[]          dims;
058
059    /**
060     * The max dimension sizes of the dataset
061     */
062    protected long[]          maxDims;
063
064    /**
065     * Array that contains the number of data points selected (for read/write)
066     * in each dimension.
067     * <p>
068     * The selected size must be less than or equal to the current dimension size.
069     * A subset of a rectangle selection is defined by the starting position and
070     * selected sizes.
071     * <p>
072     * For example, if a 4 X 5 dataset is as follows:
073     *
074     * <pre>
075     *     0,  1,  2,  3,  4
076     *    10, 11, 12, 13, 14
077     *    20, 21, 22, 23, 24
078     *    30, 31, 32, 33, 34
079     * long[] dims = {4, 5};
080     * long[] startDims = {1, 2};
081     * long[] selectedDims = {3, 3};
082     * then the following subset is selected by the startDims and selectedDims above:
083     *     12, 13, 14
084     *     22, 23, 24
085     *     32, 33, 34
086     * </pre>
087     */
088    protected long[]          selectedDims;
089
090    /**
091     * The starting position of each dimension of a selected subset. With both
092     * the starting position and selected sizes, the subset of a rectangle
093     * selection is fully defined.
094     */
095    protected long[]          startDims;
096
097    /**
098     * Array that contains the indices of the dimensions selected for display.
099     * <p>
100     * <B>selectedIndex[] is provided for two purposes:</B>
101     * <OL>
102     * <LI>
103     * selectedIndex[] is used to indicate the order of dimensions for display,
104     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
105     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
106     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
107     * dim[2] is selected as column index and dim[3] is selected as depth index.
108     * <LI>
109     * selectedIndex[] is also used to select dimensions for display for
110     * datasets with three or more dimensions. We assume that applications such
111     * as HDFView can only display data up to three dimensions (a 2D
112     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
113     * cut from). For datasets with more than three dimensions, we need
114     * selectedIndex[] to store which three dimensions are chosen for display.
115     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
116     * then dim[1] is selected as row index, dim[2] is selected as column index
117     * and dim[3] is selected as depth index. dim[0] is not selected. Its
118     * location is fixed at 0 by default.
119     * </OL>
120     */
121    protected final int[]     selectedIndex;
122
123    /**
124     * The number of elements to move from the start location in each dimension.
125     * For example, if selectedStride[0] = 2, every other data point is selected
126     * along dim[0].
127     */
128    protected long[]          selectedStride;
129
130    /**
131     * The array of dimension sizes for a chunk.
132     */
133    protected long[]          chunkSize;
134
135    /** The compression information. */
136    protected StringBuilder   compression;
137    public static final String COMPRESSION_GZIP_TXT = "GZIP: level = ";
138
139    /** The filters information. */
140    protected StringBuilder   filters;
141
142    /** The storage layout information. */
143    protected StringBuilder   storageLayout;
144
145    /** The storage information. */
146    protected StringBuilder   storage;
147
148    /** The datatype object of the dataset. */
149    protected Datatype        datatype;
150
151    /**
152     * Array of strings that represent the dimension names. It is null if dimension names do not exist.
153     */
154    protected String[]        dimNames;
155
156    /** Flag to indicate if the byte[] array is converted to strings */
157    protected boolean         convertByteToString = true;
158
159    /** Flag to indicate if data values are loaded into memory. */
160    protected boolean         isDataLoaded        = false;
161
162    /** Flag to indicate if this dataset has been initialized */
163    protected boolean         inited = false;
164
165    /** The number of data points in the memory buffer. */
166    protected long            nPoints             = 1;
167
168    /**
169     * The data buffer that contains the raw data directly reading from file
170     * (before any data conversion).
171     */
172    protected transient Object originalBuf = null;
173
174    /**
175     * The array that holds the converted data of unsigned C-type integers.
176     * <p>
177     * For example, Suppose that the original data is an array of unsigned
178     * 16-bit short integers. Since Java does not support unsigned integer, the
179     * data is converted to an array of 32-bit singed integer. In that case, the
180     * converted buffer is the array of 32-bit singed integer.
181     */
182    protected transient Object convertedBuf = null;
183
184    /**
185     * Constructs a Dataset object with a given file, name and path.
186     *
187     * @param theFile
188     *            the file that contains the dataset.
189     * @param dsName
190     *            the name of the Dataset, e.g. "dset1".
191     * @param dsPath
192     *            the full group path of this Dataset, e.g. "/arrays/".
193     */
194    public Dataset(FileFormat theFile, String dsName, String dsPath) {
195        this(theFile, dsName, dsPath, null);
196    }
197
198    /**
199     * @deprecated Not for public use in the future. <br>
200     *             Using {@link #Dataset(FileFormat, String, String)}
201     *
202     * @param theFile
203     *            the file that contains the dataset.
204     * @param dsName
205     *            the name of the Dataset, e.g. "dset1".
206     * @param dsPath
207     *            the full group path of this Dataset, e.g. "/arrays/".
208     * @param oid
209     *            the oid of this Dataset.
210     */
211    @Deprecated
212    public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) {
213        super(theFile, dsName, dsPath, oid);
214        log.trace("Dataset: start {}", dsName);
215
216        datatype = null;
217        rank = -1;
218        data = null;
219        dims = null;
220        maxDims = null;
221        selectedDims = null;
222        startDims = null;
223        selectedStride = null;
224        chunkSize = null;
225        compression = new StringBuilder("NONE");
226        filters = new StringBuilder("NONE");
227        storageLayout = new StringBuilder("NONE");
228        storage = new StringBuilder("NONE");
229        dimNames = null;
230
231        selectedIndex = new int[3];
232        selectedIndex[0] = 0;
233        selectedIndex[1] = 1;
234        selectedIndex[2] = 2;
235    }
236
237    /**
238     * Clears memory held by the dataset, such as the data buffer.
239     */
240    @SuppressWarnings("rawtypes")
241    public void clear() {
242        if (data != null) {
243            if (data instanceof List) {
244                ((List) data).clear();
245            }
246            data = null;
247            originalBuf = null;
248            convertedBuf = null;
249        }
250        isDataLoaded = false;
251    }
252
253    /**
254     * Returns the rank (number of dimensions) of the dataset.
255     *
256     * @return the number of dimensions of the dataset.
257     */
258    @Override
259    public final int getRank() {
260        if (!inited)
261            init();
262
263        return rank;
264    }
265
266    /**
267     * Returns the array that contains the dimension sizes of the dataset.
268     *
269     * @return the dimension sizes of the dataset.
270     */
271    @Override
272    public final long[] getDims() {
273        if (!inited)
274            init();
275
276        return dims;
277    }
278
279    /**
280     * Returns the array that contains the max dimension sizes of the dataset.
281     *
282     * @return the max dimension sizes of the dataset.
283     */
284    public final long[] getMaxDims() {
285        if (!inited) init();
286
287        if (maxDims == null) return dims;
288
289        return maxDims;
290    }
291
292    /**
293     * Returns the dimension sizes of the selected subset.
294     * <p>
295     * The SelectedDims is the number of data points of the selected subset.
296     * Applications can use this array to change the size of selected subset.
297     *
298     * The selected size must be less than or equal to the current dimension size.
299     * Combined with the starting position, selected sizes and stride, the
300     * subset of a rectangle selection is fully defined.
301     * <p>
302     * For example, if a 4 X 5 dataset is as follows:
303     *
304     * <pre>
305     *     0,  1,  2,  3,  4
306     *    10, 11, 12, 13, 14
307     *    20, 21, 22, 23, 24
308     *    30, 31, 32, 33, 34
309     * long[] dims = {4, 5};
310     * long[] startDims = {1, 2};
311     * long[] selectedDims = {3, 3};
312     * long[] selectedStride = {1, 1};
313     * then the following subset is selected by the startDims and selectedDims
314     *     12, 13, 14
315     *     22, 23, 24
316     *     32, 33, 34
317     * </pre>
318     *
319     * @return the dimension sizes of the selected subset.
320     */
321    @Override
322    public final long[] getSelectedDims() {
323        if (!inited) init();
324
325        return selectedDims;
326    }
327
328    /**
329     * Returns the starting position of a selected subset.
330     * <p>
331     * Applications can use this array to change the starting position of a
332     * selection. Combined with the selected dimensions, selected sizes and
333     * stride, the subset of a rectangle selection is fully defined.
334     * <p>
335     * For example, if a 4 X 5 dataset is as follows:
336     *
337     * <pre>
338     *     0,  1,  2,  3,  4
339     *    10, 11, 12, 13, 14
340     *    20, 21, 22, 23, 24
341     *    30, 31, 32, 33, 34
342     * long[] dims = {4, 5};
343     * long[] startDims = {1, 2};
344     * long[] selectedDims = {3, 3};
345     * long[] selectedStride = {1, 1};
346     * then the following subset is selected by the startDims and selectedDims
347     *     12, 13, 14
348     *     22, 23, 24
349     *     32, 33, 34
350     * </pre>
351     *
352     * @return the starting position of a selected subset.
353     */
354    @Override
355    public final long[] getStartDims() {
356        if (!inited) init();
357
358        return startDims;
359    }
360
361    /**
362     * Returns the selectedStride of the selected dataset.
363     * <p>
364     * Applications can use this array to change how many elements to move in
365     * each dimension.
366     *
367     * Combined with the starting position and selected sizes, the subset of a
368     * rectangle selection is defined.
369     * <p>
370     * For example, if a 4 X 5 dataset is as follows:
371     *
372     * <pre>
373     *     0,  1,  2,  3,  4
374     *    10, 11, 12, 13, 14
375     *    20, 21, 22, 23, 24
376     *    30, 31, 32, 33, 34
377     * long[] dims = {4, 5};
378     * long[] startDims = {0, 0};
379     * long[] selectedDims = {2, 2};
380     * long[] selectedStride = {2, 3};
381     * then the following subset is selected by the startDims and selectedDims
382     *     0,   3
383     *     20, 23
384     * </pre>
385     *
386     * @return the selectedStride of the selected dataset.
387     */
388    @Override
389    public final long[] getStride() {
390        if (!inited) init();
391
392        if (rank <= 0) {
393            return null;
394        }
395
396        if (selectedStride == null) {
397            selectedStride = new long[rank];
398            for (int i = 0; i < rank; i++) {
399                selectedStride[i] = 1;
400            }
401        }
402
403        return selectedStride;
404    }
405
406    /**
407     * Sets the flag that indicates if a byte array is converted to a string
408     * array.
409     * <p>
410     * In a string dataset, the raw data from file is stored in a byte array. By
411     * default, this byte array is converted to an array of strings. For a large
412     * dataset (e.g. more than one million strings), the conversion takes a long
413     * time and requires a lot of memory space to store the strings. In some
414     * applications, such a conversion can be delayed. For example, A GUI
415     * application may convert only the part of the strings that is visible to the
416     * users, not the entire data array.
417     * <p>
418     * setConvertByteToString(boolean b) allows users to set the flag so that
419     * applications can choose to perform the byte-to-string conversion or not.
420     * If the flag is set to false, the getData() returns an array of byte
421     * instead of an array of strings.
422     *
423     * @param b
424     *            convert bytes to strings if b is true; otherwise, if false, do
425     *            not convert bytes to strings.
426     */
427    public final void setConvertByteToString(boolean b) {
428        convertByteToString = b;
429    }
430
431    /**
432     * Returns the flag that indicates if a byte array is converted to a string
433     * array.
434     *
435     * @return true if byte array is converted to string; otherwise, returns
436     *         false if there is no conversion.
437     */
438    public final boolean getConvertByteToString() {
439        return convertByteToString;
440    }
441
442    /**
443     * Reads the raw data of the dataset from file to a byte array.
444     * <p>
445     * readBytes() reads raw data to an array of bytes instead of array of its
446     * datatype. For example, for a one-dimension 32-bit integer dataset of
447     * size 5, readBytes() returns a byte array of size 20 instead of an
448     * int array of 5.
449     * <p>
450     * readBytes() can be used to copy data from one dataset to another
451     * efficiently because the raw data is not converted to its native type, it
452     * saves memory space and CPU time.
453     *
454     * @return the byte array of the raw data.
455     *
456     * @throws Exception if data can not be read
457     */
458    public abstract byte[] readBytes() throws Exception;
459
460    /**
461     * Writes the memory buffer of this dataset to file.
462     *
463     * @throws Exception if buffer can not be written
464     */
465    @Override
466    public final void write() throws Exception {
467        if (data != null) {
468            write(data);
469        }
470    }
471
472    /**
473     * Creates a new dataset and writes the data buffer to the new dataset.
474     * <p>
475     * This function allows applications to create a new dataset for a given
476     * data buffer. For example, users can select a specific interesting part
477     * from a large image and create a new image with the selection.
478     * <p>
479     * The new dataset retains the datatype and dataset creation properties of
480     * this dataset.
481     *
482     * @param pgroup
483     *            the group which the dataset is copied to.
484     * @param name
485     *            the name of the new dataset.
486     * @param dims
487     *            the dimension sizes of the the new dataset.
488     * @param data
489     *            the data values of the subset to be copied.
490     *
491     * @return the new dataset.
492     *
493     * @throws Exception if dataset can not be copied
494     */
495    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
496
497    @Override
498    public final boolean isInited() {
499        return inited;
500    }
501
502    /**
503     * Returns the data buffer of the dataset in memory.
504     * <p>
505     * If data is already loaded into memory, returns the data; otherwise, calls
506     * read() to read data from file into a memory buffer and returns the memory
507     * buffer.
508     * <p>
509     * By default, the whole dataset is read into memory. Users can also select
510     * a subset to read. Subsetting is done in an implicit way.
511     * <p>
512     * <b>How to Select a Subset</b>
513     * <p>
514     * A selection is specified by three arrays: start, stride and count.
515     * <ol>
516     * <li>start: offset of a selection
517     * <li>stride: determines how many elements to move in each dimension
518     * <li>count: number of elements to select in each dimension
519     * </ol>
520     * getStartDims(), getStride() and getSelectedDims() returns the start,
521     * stride and count arrays respectively. Applications can make a selection
522     * by changing the values of the arrays.
523     * <p>
524     * The following example shows how to make a subset. In the example, the
525     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
526     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
527     * We want to select every other data point in dims[1] and dims[2]
528     *
529     * <pre>
530     * int rank = dataset.getRank(); // number of dimensions of the dataset
531     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
532     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
533     * long[] start = dataset.getStartDims(); // the offset of the selection
534     * long[] stride = dataset.getStride(); // the stride of the dataset
535     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
536     *
537     * // select dim1 and dim2 as 2D data for display,and slice through dim0
538     * selectedIndex[0] = 1;
539     * selectedIndex[1] = 2;
540     * selectedIndex[1] = 0;
541     *
542     * // reset the selection arrays
543     * for (int i = 0; i &lt; rank; i++) {
544     *     start[i] = 0;
545     *     selected[i] = 1;
546     *     stride[i] = 1;
547     * }
548     *
549     * // set stride to 2 on dim1 and dim2 so that every other data point is
550     * // selected.
551     * stride[1] = 2;
552     * stride[2] = 2;
553     *
554     * // set the selection size of dim1 and dim2
555     * selected[1] = dims[1] / stride[1];
556     * selected[2] = dims[1] / stride[2];
557     *
558     * // when dataset.getData() is called, the selection above will be used since
559     * // the dimension arrays are passed by reference. Changes of these arrays
560     * // outside the dataset object directly change the values of these array
561     * // in the dataset object.
562     * </pre>
563     * <p>
564     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
565     * short, int, float, double or String type based on the datatype of the
566     * dataset.
567     * <p>
568     * For CompoundDS, the memory data object is an java.util.List object. Each
569     * element of the list is a data array that corresponds to a compound field.
570     * <p>
571     * For example, if compound dataset "comp" has the following nested
572     * structure, and member datatypes
573     *
574     * <pre>
575     * comp --&gt; m01 (int)
576     * comp --&gt; m02 (float)
577     * comp --&gt; nest1 --&gt; m11 (char)
578     * comp --&gt; nest1 --&gt; m12 (String)
579     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
580     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
581     * </pre>
582     *
583     * getData() returns a list of six arrays: {int[], float[], char[],
584     * String[], long[] and double[]}.
585     *
586     * @return the memory buffer of the dataset.
587     *
588     * @throws Exception if object can not be read
589     * @throws OutOfMemoryError if memory is exhausted
590     */
591    @Override
592    public final Object getData() throws Exception, OutOfMemoryError {
593        if (!isDataLoaded) {
594            data = read(); // load the data
595            if (data != null) {
596                originalBuf = data;
597                isDataLoaded = true;
598                nPoints = 1;
599                log.trace("getData: selectedDims length={}",selectedDims.length);
600                for (int j = 0; j < selectedDims.length; j++) {
601                    nPoints *= selectedDims[j];
602                }
603            }
604            log.trace("getData: read {}", nPoints);
605        }
606
607        return data;
608    }
609
610    /**
611     * Not for public use in the future.
612     * <p>
613     * setData() is not safe to use because it changes memory buffer
614     * of the dataset object. Dataset operations such as write/read
615     * will fail if the buffer type or size is changed.
616     *
617     * @param d  the object data -must be an array of Objects
618     */
619    @Override
620    public final void setData(Object d) {
621        if (!(this instanceof Attribute))
622            throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects");
623
624        data = d;
625    }
626
627    /**
628     * Clears the current data buffer in memory and forces the next read() to load
629     * the data from file.
630     * <p>
631     * The function read() loads data from file into memory only if the data is
632     * not read. If data is already in memory, read() just returns the memory
633     * buffer. Sometimes we want to force read() to re-read data from file. For
634     * example, when the selection is changed, we need to re-read the data.
635     *
636     * @see #getData()
637     * @see #read()
638     */
639    @Override
640    public void clearData() {
641        isDataLoaded = false;
642    }
643
644    /**
645     * Returns the dimension size of the vertical axis.
646     *
647     * <p>
648     * This function is used by GUI applications such as HDFView. GUI
649     * applications display a dataset in a 2D table or 2D image. The display
650     * order is specified by the index array of selectedIndex as follow:
651     * <dl>
652     * <dt>selectedIndex[0] -- height</dt>
653     * <dd>The vertical axis</dd>
654     * <dt>selectedIndex[1] -- width</dt>
655     * <dd>The horizontal axis</dd>
656     * <dt>selectedIndex[2] -- depth</dt>
657     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
658     * </dl>
659     * Applications can use getSelectedIndex() to access and change the display
660     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
661     * following code will set the height=200 and width=50.
662     *
663     * <pre>
664     * int[] selectedIndex = dataset.getSelectedIndex();
665     * selectedIndex[0] = 0;
666     * selectedIndex[1] = 1;
667     * </pre>
668     *
669     * @see #getSelectedIndex()
670     * @see #getWidth()
671     *
672     * @return the size of dimension of the vertical axis.
673     */
674    @Override
675    public final long getHeight() {
676        if (!inited) init();
677
678        if ((selectedDims == null) || (selectedIndex == null)) {
679            return 0;
680        }
681
682        return selectedDims[selectedIndex[0]];
683    }
684
685    /**
686     * Returns the dimension size of the horizontal axis.
687     *
688     * <p>
689     * This function is used by GUI applications such as HDFView. GUI
690     * applications display a dataset in 2D Table or 2D Image. The display order is
691     * specified by the index array of selectedIndex as follow:
692     * <dl>
693     * <dt>selectedIndex[0] -- height</dt>
694     * <dd>The vertical axis</dd>
695     * <dt>selectedIndex[1] -- width</dt>
696     * <dd>The horizontal axis</dd>
697     * <dt>selectedIndex[2] -- depth</dt>
698     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
699     * </dl>
700     * Applications can use getSelectedIndex() to access and change the display
701     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
702     * following code will set the height=200 and width=100.
703     *
704     * <pre>
705     * int[] selectedIndex = dataset.getSelectedIndex();
706     * selectedIndex[0] = 0;
707     * selectedIndex[1] = 1;
708     * </pre>
709     *
710     * @see #getSelectedIndex()
711     * @see #getHeight()
712     *
713     * @return the size of dimension of the horizontal axis.
714     */
715    @Override
716    public final long getWidth() {
717        if (!inited) init();
718
719        if ((selectedDims == null) || (selectedIndex == null)) {
720            return 0;
721        }
722
723        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
724            return 1;
725        }
726
727        return selectedDims[selectedIndex[1]];
728    }
729
730    /**
731     * Returns the indices of display order.
732     * <p>
733     *
734     * selectedIndex[] is provided for two purposes:
735     * <OL>
736     * <LI>
737     * selectedIndex[] is used to indicate the order of dimensions for display.
738     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
739     * selectedIndex[2] for the depth.
740     * <p>
741     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
742     * then dim[1] is selected as row index, dim[2] is selected as column index
743     * and dim[3] is selected as depth index.
744     * <LI>
745     * selectedIndex[] is also used to select dimensions for display for
746     * datasets with three or more dimensions. We assume that applications such
747     * as HDFView can only display data values up to three dimensions (2D
748     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
749     * is selected from). For datasets with more than three dimensions, we need
750     * selectedIndex[] to tell applications which three dimensions are chosen
751     * for display. <br>
752     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
753     * then dim[1] is selected as row index, dim[2] is selected as column index
754     * and dim[3] is selected as depth index. dim[0] is not selected. Its
755     * location is fixed at 0 by default.
756     * </OL>
757     *
758     * @return the array of the indices of display order.
759     */
760    @Override
761    public final int[] getSelectedIndex() {
762        if (!inited) init();
763
764        return selectedIndex;
765    }
766
767    /**
768     * Returns the string representation of compression information.
769     * <p>
770     * For example,
771     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
772     *
773     * @return the string representation of compression information.
774     */
775    @Override
776    public final String getCompression() {
777        if (!inited) init();
778
779        return compression.toString();
780    }
781
782    /**
783     * Returns the string representation of filter information.
784     *
785     * @return the string representation of filter information.
786     */
787    public final String getFilters() {
788        if (!inited) init();
789
790        return filters.toString();
791    }
792
793    /**
794     * Returns the string representation of storage layout information.
795     *
796     * @return the string representation of storage layout information.
797     */
798    public final String getStorageLayout() {
799        if (!inited) init();
800
801        return storageLayout.toString();
802    }
803
804    /**
805     * Returns the string representation of storage information.
806     *
807     * @return the string representation of storage information.
808     */
809    public final String getStorage() {
810        if (!inited) init();
811
812        return storage.toString();
813    }
814
815    /**
816     * Returns the array that contains the dimension sizes of the chunk of the
817     * dataset. Returns null if the dataset is not chunked.
818     *
819     * @return the array of chunk sizes or returns null if the dataset is not
820     *         chunked.
821     */
822    public final long[] getChunkSize() {
823        if (!inited) init();
824
825        return chunkSize;
826    }
827
828    @Override
829    public Datatype getDatatype() {
830        return datatype;
831    }
832
833    /**
834     * @deprecated Not for public use in the future. <br>
835     *             Using {@link #convertFromUnsignedC(Object, Object)}
836     *
837     * @param dataIN  the object data
838     *
839     * @return the converted object
840     */
841    @Deprecated
842    public static Object convertFromUnsignedC(Object dataIN) {
843        return Dataset.convertFromUnsignedC(dataIN, null);
844    }
845
846    /**
847     * Converts one-dimension array of unsigned C-type integers to a new array
848     * of appropriate Java integer in memory.
849     * <p>
850     * Since Java does not support unsigned integer, values of unsigned C-type
851     * integers must be converted into its appropriate Java integer. Otherwise,
852     * the data value will not displayed correctly. For example, if an unsigned
853     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
854     * the correct value of 200.
855     * <p>
856     * Unsigned C integers are upgrade to Java integers according to the
857     * following table:
858     *  <table border=1>
859     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
860     * <TR>
861     * <TD><B>Unsigned C Integer</B></TD>
862     * <TD><B>JAVA Intege</B>r</TD>
863     * </TR>
864     * <TR>
865     * <TD>unsigned byte</TD>
866     * <TD>signed short</TD>
867     * </TR>
868     * <TR>
869     * <TD>unsigned short</TD>
870     * <TD>signed int</TD>
871     * </TR>
872     * <TR>
873     * <TD>unsigned int</TD>
874     * <TD>signed long</TD>
875     * </TR>
876     * <TR>
877     * <TD>unsigned long</TD>
878     * <TD>signed long</TD>
879     * </TR>
880     * </TABLE>
881     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
882     * Therefore, the values of unsigned 64-bit datasets may be wrong in Java
883     * applications</strong>.
884     * <p>
885     * If memory data of unsigned integers is converted by
886     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
887     * the data back to unsigned C before data is written into file.
888     *
889     * @see #convertToUnsignedC(Object, Object)
890     *
891     * @param dataIN
892     *            the input 1D array of the unsigned C-type integers.
893     * @param dataOUT
894     *            the output converted (or upgraded) 1D array of Java integers.
895     *
896     * @return the upgraded 1D array of Java integers.
897     */
898    @SuppressWarnings("rawtypes")
899    public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) {
900        if (dataIN == null) {
901            log.debug("convertFromUnsignedC(): data_in is null");
902            return null;
903        }
904
905        Class dataClass = dataIN.getClass();
906        if (!dataClass.isArray()) {
907            log.debug("convertFromUnsignedC(): data_in not an array");
908            return null;
909        }
910
911        if (dataOUT != null) {
912            Class dataClassOut = dataOUT.getClass();
913            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
914                log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size");
915                dataOUT = null;
916            }
917        }
918
919        String cname = dataClass.getName();
920        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
921        int size = Array.getLength(dataIN);
922        log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size);
923
924        if (dname == 'B') {
925            log.debug("convertFromUnsignedC(): Java convert byte to short");
926            short[] sdata = null;
927            if (dataOUT == null) {
928                sdata = new short[size];
929            }
930            else {
931                sdata = (short[]) dataOUT;
932            }
933
934            byte[] bdata = (byte[]) dataIN;
935            for (int i = 0; i < size; i++) {
936                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
937            }
938
939            dataOUT = sdata;
940        }
941        else if (dname == 'S') {
942            log.debug("convertFromUnsignedC(): Java convert short to int");
943            int[] idata = null;
944            if (dataOUT == null) {
945                idata = new int[size];
946            }
947            else {
948                idata = (int[]) dataOUT;
949            }
950
951            short[] sdata = (short[]) dataIN;
952            for (int i = 0; i < size; i++) {
953                idata[i] = (sdata[i] + 65536) & 0xFFFF;
954            }
955
956            dataOUT = idata;
957        }
958        else if (dname == 'I') {
959            log.debug("convertFromUnsignedC(): Java convert int to long");
960            long[] ldata = null;
961            if (dataOUT == null) {
962                ldata = new long[size];
963            }
964            else {
965                ldata = (long[]) dataOUT;
966            }
967
968            int[] idata = (int[]) dataIN;
969            for (int i = 0; i < size; i++) {
970                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
971            }
972
973            dataOUT = ldata;
974        }
975        else {
976            dataOUT = dataIN;
977            log.debug("convertFromUnsignedC(): Java does not support unsigned long");
978        }
979
980        return dataOUT;
981    }
982
983    /**
984     * @deprecated Not for public use in the future. <br>
985     *             Using {@link #convertToUnsignedC(Object, Object)}
986     *
987     * @param dataIN
988     *            the input 1D array of the unsigned C-type integers.
989     *
990     * @return the upgraded 1D array of Java integers.
991     */
992    @Deprecated
993    public static Object convertToUnsignedC(Object dataIN) {
994        return Dataset.convertToUnsignedC(dataIN, null);
995    }
996
997    /**
998     * Converts the array of converted unsigned integers back to unsigned C-type
999     * integer data in memory.
1000     * <p>
1001     * If memory data of unsigned integers is converted by
1002     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1003     * the data back to unsigned C before data is written into file.
1004     *
1005     * @see #convertFromUnsignedC(Object, Object)
1006     *
1007     * @param dataIN
1008     *            the input array of the Java integer.
1009     * @param dataOUT
1010     *            the output array of the unsigned C-type integer.
1011     *
1012     * @return the converted data of unsigned C-type integer array.
1013     */
1014    @SuppressWarnings("rawtypes")
1015    public static Object convertToUnsignedC(Object dataIN, Object dataOUT) {
1016        if (dataIN == null) {
1017            log.debug("convertToUnsignedC(): data_in is null");
1018            return null;
1019        }
1020
1021        Class dataClass = dataIN.getClass();
1022        if (!dataClass.isArray()) {
1023            log.debug("convertToUnsignedC(): data_in not an array");
1024            return null;
1025        }
1026
1027        if (dataOUT != null) {
1028            Class dataClassOut = dataOUT.getClass();
1029            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
1030                log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size");
1031                dataOUT = null;
1032            }
1033        }
1034
1035        String cname = dataClass.getName();
1036        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
1037        int size = Array.getLength(dataIN);
1038        log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1039
1040        if (dname == 'S') {
1041            log.debug("convertToUnsignedC(): Java convert short to byte");
1042            byte[] bdata = null;
1043            if (dataOUT == null) {
1044                bdata = new byte[size];
1045            }
1046            else {
1047                bdata = (byte[]) dataOUT;
1048            }
1049            short[] sdata = (short[]) dataIN;
1050            for (int i = 0; i < size; i++) {
1051                bdata[i] = (byte) sdata[i];
1052            }
1053            dataOUT = bdata;
1054        }
1055        else if (dname == 'I') {
1056            log.debug("convertToUnsignedC(): Java convert int to short");
1057            short[] sdata = null;
1058            if (dataOUT == null) {
1059                sdata = new short[size];
1060            }
1061            else {
1062                sdata = (short[]) dataOUT;
1063            }
1064            int[] idata = (int[]) dataIN;
1065            for (int i = 0; i < size; i++) {
1066                sdata[i] = (short) idata[i];
1067            }
1068            dataOUT = sdata;
1069        }
1070        else if (dname == 'J') {
1071            log.debug("convertToUnsignedC(): Java convert long to int");
1072            int[] idata = null;
1073            if (dataOUT == null) {
1074                idata = new int[size];
1075            }
1076            else {
1077                idata = (int[]) dataOUT;
1078            }
1079            long[] ldata = (long[]) dataIN;
1080            for (int i = 0; i < size; i++) {
1081                idata[i] = (int) ldata[i];
1082            }
1083            dataOUT = idata;
1084        }
1085        else {
1086            dataOUT = dataIN;
1087            log.debug("convertToUnsignedC(): Java does not support unsigned long");
1088        }
1089
1090        return dataOUT;
1091    }
1092
1093    /**
1094     * Converts an array of bytes into an array of Strings for a fixed string
1095     * dataset.
1096     * <p>
1097     * A C-string is an array of chars while an Java String is an object. When a
1098     * string dataset is read into a Java application, the data is stored in an
1099     * array of Java bytes. byteToString() is used to convert the array of bytes
1100     * into an array of Java strings so that applications can display and modify
1101     * the data content.
1102     * <p>
1103     * For example, the content of a two element C string dataset is {"ABC",
1104     * "abc"}. Java applications will read the data into a byte array of {65,
1105     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1106     * String of strs[0]="ABC", and strs[1]="abc".
1107     * <p>
1108     * If memory data of strings is converted to Java Strings, stringToByte()
1109     * must be called to convert the memory data back to byte array before data
1110     * is written to file.
1111     *
1112     * @see #stringToByte(String[], int)
1113     *
1114     * @param bytes
1115     *            the array of bytes to convert.
1116     * @param length
1117     *            the length of string.
1118     *
1119     * @return the array of Java String.
1120     */
1121    public static final String[] byteToString(byte[] bytes, int length) {
1122        if (bytes == null) {
1123            log.debug("byteToString(): input is null");
1124            return null;
1125        }
1126
1127        int n = bytes.length / length;
1128        log.trace("byteToString(): n={} from length of {}", n, length);
1129        String[] strArray = new String[n];
1130        String str = null;
1131        int idx = 0;
1132        for (int i = 0; i < n; i++) {
1133            str = new String(bytes, i * length, length);
1134            idx = str.indexOf('\0');
1135            if (idx >= 0) {
1136                str = str.substring(0, idx);
1137            }
1138
1139            // trim only the end
1140            int end = str.length();
1141            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1142                end--;
1143
1144            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1145        }
1146
1147        return strArray;
1148    }
1149
1150    /**
1151     * Converts a string array into an array of bytes for a fixed string
1152     * dataset.
1153     * <p>
1154     * If memory data of strings is converted to Java Strings, stringToByte()
1155     * must be called to convert the memory data back to byte array before data
1156     * is written to file.
1157     *
1158     * @see #byteToString(byte[] bytes, int length)
1159     *
1160     * @param strings
1161     *            the array of string.
1162     * @param length
1163     *            the length of string.
1164     *
1165     * @return the array of bytes.
1166     */
1167    public static final byte[] stringToByte(String[] strings, int length) {
1168        if (strings == null) {
1169            log.debug("stringToByte(): input is null");
1170            return null;
1171        }
1172
1173        int size = strings.length;
1174        byte[] bytes = new byte[size * length];
1175        log.trace("stringToByte(): size={} length={}", size, length);
1176        StringBuilder strBuff = new StringBuilder(length);
1177        for (int i = 0; i < size; i++) {
1178            // initialize the string with spaces
1179            strBuff.replace(0, length, " ");
1180
1181            if (strings[i] != null) {
1182                if (strings[i].length() > length) {
1183                    strings[i] = strings[i].substring(0, length);
1184                }
1185                strBuff.replace(0, length, strings[i]);
1186            }
1187
1188            strBuff.setLength(length);
1189            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1190        }
1191
1192        return bytes;
1193    }
1194
1195    /**
1196     * Returns the array of strings that represent the dimension names. Returns
1197     * null if there is no dimension name.
1198     * <p>
1199     * Some datasets have pre-defined names for each dimension such as
1200     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1201     * names.
1202     *
1203     * @return the names of dimensions, or null if there is no dimension name.
1204     */
1205    public final String[] getDimNames() {
1206        if (!inited) init();
1207
1208        return dimNames;
1209    }
1210
1211    /**
1212     * Checks if a given datatype is a string. Sub-classes must replace this
1213     * default implementation.
1214     *
1215     * @param tid
1216     *            The data type identifier.
1217     *
1218     * @return true if the datatype is a string; otherwise returns false.
1219     */
1220    public boolean isString(long tid) {
1221        return false;
1222    }
1223
1224    /**
1225     * Returns the size in bytes of a given datatype. Sub-classes must replace
1226     * this default implementation.
1227     *
1228     * @param tid
1229     *            The data type identifier.
1230     *
1231     * @return The size of the datatype
1232     */
1233    public long getSize(long tid) {
1234        return -1;
1235    }
1236
1237    /**
1238     * Get Class of the original data buffer if converted.
1239     *
1240     * @return the Class of originalBuf
1241     */
1242    @Override
1243    @SuppressWarnings("rawtypes")
1244    public final Class getOriginalClass() {
1245        return originalBuf.getClass();
1246    }
1247
1248    /*
1249     * Checks if dataset is virtual. Sub-classes must replace
1250     * this default implementation.
1251     *
1252     * @return true if the dataset is virtual; otherwise returns false.
1253     */
1254    public boolean isVirtual() {
1255        return false;
1256    }
1257
1258    /*
1259     * Gets the source file name at index if dataset is virtual. Sub-classes must replace
1260     * this default implementation.
1261     *
1262     * @return filename if the dataset is virtual; otherwise returns null.
1263     */
1264    public String getVirtualFilename(int index) {
1265        return null;
1266    }
1267
1268    /*
1269     * Gets the number of source files if dataset is virtual. Sub-classes must replace
1270     * this default implementation.
1271     *
1272     * @return the list size if the dataset is virtual; otherwise returns negative.
1273     */
1274    public int getVirtualMaps() {
1275        return -1;
1276    }
1277}