001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.math.BigDecimal;
019import java.math.BigInteger;
020import java.text.DecimalFormat;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Iterator;
024import java.util.List;
025import java.util.Vector;
026
027import hdf.hdf5lib.HDFNativeData;
028
029import hdf.object.Attribute;
030import hdf.object.CompoundDS;
031import hdf.object.Dataset;
032import hdf.object.Datatype;
033import hdf.object.FileFormat;
034import hdf.object.Group;
035import hdf.object.HObject;
036import hdf.object.MetaDataContainer;
037import hdf.object.Utils;
038
039/**
040 * A CompoundDS is a dataset with compound datatype.
041 *
042 * A compound datatype is an aggregation of one or more datatypes. Each member
043 * of a compound type has a name which is unique within that type, and a
044 * datatype of that member in a compound datum. Compound datatypes can be nested,
045 * i.e. members of a compound datatype can be some other compound datatype.
046 *
047 * For more details on compound datatypes,
048 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
049 *
050 * Since Java cannot handle C-structured compound data, data in a compound dataset
051 * is loaded in to an Java List. Each element of the list is a data array that
052 * corresponds to a compound field. The data is read/written by compound field.
053 *
054 * For example, if compound dataset "comp" has the following nested structure,
055 * and member datatypes
056 *
057 * <pre>
058 * comp --&gt; m01 (int)
059 * comp --&gt; m02 (float)
060 * comp --&gt; nest1 --&gt; m11 (char)
061 * comp --&gt; nest1 --&gt; m12 (String)
062 * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
063 * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
064 * </pre>
065 *
066 * The data object is a Java list of six arrays: {int[], float[], char[],
067 * Stirng[], long[] and double[]}.
068 *
069 *
070 * @version 1.1 9/4/2007
071 * @author Peter X. Cao
072 */
073public abstract class CompoundDS extends Dataset implements CompoundDataFormat
074{
075    private static final long serialVersionUID = -4880399929644095662L;
076
077    private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(CompoundDS.class);
078
079    /**
080     * A single character to separate the names of nested compound fields. An
081     * extended ASCII character, 0x95, is used to avoid common characters in
082     * compound names.
083     */
084    public static final String SEPARATOR = "\u0095";
085
086    /**
087     * The number of members of the compound dataset.
088     */
089    protected int numberOfMembers;
090
091    /**
092     * The names of members of the compound dataset.
093     */
094    protected String[] memberNames;
095
096    /**
097     * Returns array containing the total number of elements of the members of
098     * this compound dataset.
099     *
100     * For example, a compound dataset COMP has members of A, B and C as
101     *
102     * <pre>
103     *     COMP {
104     *         int A;
105     *         float B[5];
106     *         double C[2][3];
107     *     }
108     * </pre>
109     *
110     * memberOrders is an integer array of {1, 5, 6} to indicate that member A
111     * has one element, member B has 5 elements, and member C has 6 elements.
112     */
113    protected int[] memberOrders;
114
115    /**
116     * The dimension sizes of each member.
117     *
118     * The i-th element of the Object[] is an integer array (int[]) that
119     * contains the dimension sizes of the i-th member.
120     */
121    protected transient Object[] memberDims;
122
123    /**
124     * The datatypes of compound members.
125     */
126    protected Datatype[] memberTypes;
127
128    /**
129     * The array to store flags to indicate if a member of this compound
130     * dataset is selected for read/write.
131     *
132     * If a member is selected, the read/write will perform on the member.
133     * Applications such as HDFView will only display the selected members of
134     * the compound dataset.
135     *
136     * <pre>
137     * For example, if a compound dataset has four members
138     *     String[] memberNames = {"X", "Y", "Z", "TIME"};
139     * and
140     *     boolean[] isMemberSelected = {true, false, false, true};
141     * members "X" and "TIME" are selected for read and write.
142     * </pre>
143     */
144    protected boolean[] isMemberSelected;
145
146    /**
147     * A list of names of all fields including nested fields.
148     *
149     * The nested names are separated by CompoundDS.SEPARATOR. For example, if compound dataset "A" has
150     * the following nested structure,
151     *
152     * <pre>
153     * A --&gt; m01
154     * A --&gt; m02
155     * A --&gt; nest1 --&gt; m11
156     * A --&gt; nest1 --&gt; m12
157     * A --&gt; nest1 --&gt; nest2 --&gt; m21
158     * A --&gt; nest1 --&gt; nest2 --&gt; m22
159     * i.e.
160     * A = { m01, m02, nest1{m11, m12, nest2{ m21, m22}}}
161     * </pre>
162     *
163     * The flatNameList of compound dataset "A" will be {m01, m02, nest1[m11, nest1[m12,
164     * nest1[nest2[m21, nest1[nest2[m22}
165     *
166     */
167    protected List<String> flatNameList;
168
169    /**
170     * A list of datatypes of all fields including nested fields.
171     */
172    protected List<Datatype> flatTypeList;
173
174    /**
175     * Constructs a CompoundDS object with the given file, dataset name and path.
176     *
177     * The dataset object represents an existing dataset in the file. For
178     * example, new CompoundDS(file, "dset1", "/g0/") constructs a dataset
179     * object that corresponds to the dataset, "dset1", at group "/g0/".
180     *
181     * This object is usually constructed at FileFormat.open(), which loads the
182     * file structure and object information into memory. It is rarely used
183     * elsewhere.
184     *
185     * @param theFile
186     *            the file that contains the data object.
187     * @param theName
188     *            the name of the data object, e.g. "dset".
189     * @param thePath
190     *            the full path of the data object, e.g. "/arrays/".
191     */
192    public CompoundDS(FileFormat theFile, String theName, String thePath) {
193        this(theFile, theName, thePath, null);
194    }
195
196    /**
197     * @deprecated Not for public use in the future.<br>
198     *             Using {@link #CompoundDS(FileFormat, String, String)}
199     *
200     * @param theFile
201     *            the file that contains the data object.
202     * @param dsName
203     *            the name of the data object, e.g. "dset".
204     * @param dsPath
205     *            the full path of the data object, e.g. "/arrays/".
206     * @param oid
207     *            the oid of the data object.
208     */
209    @Deprecated
210    public CompoundDS(FileFormat theFile, String dsName, String dsPath, long[] oid) {
211        super(theFile, dsName, dsPath, oid);
212
213        numberOfMembers = 0;
214        memberNames = null;
215        isMemberSelected = null;
216        memberTypes = null;
217    }
218
219    /**
220     * Resets selection of dataspace
221     */
222    @Override
223    protected void resetSelection() {
224        super.resetSelection();
225        setAllMemberSelection(true);
226    }
227
228    /**
229     * Returns the number of members of the compound dataset.
230     *
231     * @return the number of members of the compound dataset.
232     */
233    @Override
234    public final int getMemberCount() {
235        return numberOfMembers;
236    }
237
238    /**
239     * Returns the number of selected members of the compound dataset.
240     *
241     * Selected members are the compound fields which are selected for
242     * read/write.
243     *
244     * For example, in a compound datatype of {int A, float B, char[] C},
245     * users can choose to retrieve only {A, C} from the dataset. In this
246     * case, getSelectedMemberCount() returns two.
247     *
248     * @return the number of selected members.
249     */
250    @Override
251    public final int getSelectedMemberCount() {
252        int count = 0;
253
254        if (isMemberSelected != null) {
255            for (int i = 0; i < isMemberSelected.length; i++) {
256                if (isMemberSelected[i])
257                    count++;
258            }
259        }
260        log.trace("count of selected members={}", count);
261
262        return count;
263    }
264
265    /**
266     * Returns the names of the members of the compound dataset. The names of
267     * compound members are stored in an array of Strings.
268     *
269     * For example, for a compound datatype of {int A, float B, char[] C}
270     * getMemberNames() returns ["A", "B", "C"}.
271     *
272     * @return the names of compound members.
273     */
274    @Override
275    public final String[] getMemberNames() {
276        return memberNames;
277    }
278
279    /**
280     * Returns an array of the names of the selected members of the compound dataset.
281     *
282     * @return an array of the names of the selected members of the compound dataset.
283     */
284    public final String[] getSelectedMemberNames() {
285        if (isMemberSelected == null) {
286            log.debug("getSelectedMemberNames(): isMemberSelected array is null");
287            return memberNames;
288        }
289
290        int idx = 0;
291        String[] names = new String[getSelectedMemberCount()];
292        for (int i = 0; i < isMemberSelected.length; i++) {
293            if (isMemberSelected[i])
294                names[idx++] = memberNames[i];
295        }
296
297        return names;
298    }
299
300    /**
301     * Checks if a member of the compound dataset is selected for read/write.
302     *
303     * @param idx
304     *            the index of compound member.
305     *
306     * @return true if the i-th memeber is selected; otherwise returns false.
307     */
308    @Override
309    public final boolean isMemberSelected(int idx) {
310        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
311            return isMemberSelected[idx];
312        else
313            return false;
314    }
315
316    /**
317     * Selects the i-th member for read/write.
318     *
319     * @param idx
320     *            the index of compound member.
321     */
322    @Override
323    public final void selectMember(int idx) {
324        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
325            isMemberSelected[idx] = true;
326    }
327
328    /**
329     * Selects/deselects all members.
330     *
331     * @param selectAll
332     *            The indicator to select or deselect all members. If true, all
333     *            members are selected for read/write. If false, no member is
334     *            selected for read/write.
335     */
336    @Override
337    public final void setAllMemberSelection(boolean selectAll) {
338        if (isMemberSelected == null)
339            return;
340
341        for (int i = 0; i < isMemberSelected.length; i++)
342            isMemberSelected[i] = selectAll;
343    }
344
345    /**
346     * Returns array containing the total number of elements of the members of
347     * the compound dataset.
348     *
349     * For example, a compound dataset COMP has members of A, B and C as
350     *
351     * <pre>
352     *     COMP {
353     *         int A;
354     *         float B[5];
355     *         double C[2][3];
356     *     }
357     * </pre>
358     *
359     * getMemberOrders() will return an integer array of {1, 5, 6} to indicate
360     * that member A has one element, member B has 5 elements, and member C has
361     * 6 elements.
362     *
363     * @return the array containing the total number of elements of the members
364     *         of compound.
365     */
366    @Override
367    public final int[] getMemberOrders() {
368        return memberOrders;
369    }
370
371    /**
372     * Returns array containing the total number of elements of the selected
373     * members of the compound dataset.
374     *
375     * For example, a compound dataset COMP has members of A, B and C as
376     *
377     * <pre>
378     *     COMP {
379     *         int A;
380     *         float B[5];
381     *         double C[2][3];
382     *     }
383     * </pre>
384     *
385     * If A and B are selected, getSelectedMemberOrders() returns an array of
386     * {1, 5}
387     *
388     * @return array containing the total number of elements of the selected
389     *         members of compound.
390     */
391    @Override
392    public final int[] getSelectedMemberOrders() {
393        if (isMemberSelected == null) {
394            log.debug("getSelectedMemberOrders(): isMemberSelected array is null");
395            return memberOrders;
396        }
397
398        int idx = 0;
399        int[] orders = new int[getSelectedMemberCount()];
400        for (int i = 0; i < isMemberSelected.length; i++) {
401            if (isMemberSelected[i])
402                orders[idx++] = memberOrders[i];
403        }
404
405        return orders;
406    }
407
408    /**
409     * Returns the dimension sizes of the i-th member.
410     *
411     * For example, a compound dataset COMP has members of A, B and C as
412     *
413     * <pre>
414     *     COMP {
415     *         int A;
416     *         float B[5];
417     *         double C[2][3];
418     *     }
419     * </pre>
420     *
421     * getMemberDims(2) returns an array of {2, 3}, while getMemberDims(1)
422     * returns an array of {5}, and getMemberDims(0) returns null.
423     *
424     * @param i  the i-th member
425     *
426     * @return the dimension sizes of the i-th member, null if the compound
427     *         member is not an array.
428     */
429    @Override
430    public final int[] getMemberDims(int i) {
431        if (memberDims == null) {
432            return null;
433        }
434        return (int[]) memberDims[i];
435    }
436
437    /**
438     * Returns an array of datatype objects of compound members.
439     *
440     * Each member of a compound dataset has its own datatype. The datatype of a
441     * member can be atomic or other compound datatype (nested compound).
442     * Sub-classes set up the datatype objects at init().
443     *
444     * @return the array of datatype objects of the compound members.
445     */
446    @Override
447    public final Datatype[] getMemberTypes() {
448        return memberTypes;
449    }
450
451    /**
452     * Returns an array of datatype objects of selected compound members.
453     *
454     * @return an array of datatype objects of selected compound members.
455     */
456    @Override
457    public final Datatype[] getSelectedMemberTypes() {
458        if (isMemberSelected == null) {
459            log.debug("getSelectedMemberTypes(): isMemberSelected array is null");
460            return memberTypes;
461        }
462
463        int idx = 0;
464        Datatype[] types = new Datatype[getSelectedMemberCount()];
465        for (int i = 0; i < isMemberSelected.length; i++) {
466            if (isMemberSelected[i])
467                types[idx++] = memberTypes[i];
468        }
469
470        return types;
471    }
472
473    /**
474     * Returns the fill values for the data object.
475     *
476     * @return the fill values for the data object.
477     */
478    @Override
479    public Object getFillValue() {
480        return null;
481    }
482
483    /**
484     * @deprecated Not implemented for compound dataset.
485     */
486    @Deprecated
487    @Override
488    public Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception {
489        throw new UnsupportedOperationException(
490                "Writing a subset of a compound dataset to a new dataset is not implemented.");
491    }
492
493    /**
494     * Routine to convert datatypes that are read in as byte arrays to
495     * regular types.
496     *
497     * @param dtype
498     *        the datatype to convert to
499     * @param byteData
500     *        the bytes to convert
501     *
502     * @return the converted object
503     */
504    protected Object convertByteMember(final Datatype dtype, byte[] byteData) {
505        Object theObj = null;
506
507        if (dtype.getDatatypeSize() == 1) {
508            /*
509             * Normal byte[] type, such as an integer datatype of size 1.
510             */
511            theObj = byteData;
512        }
513        else if (dtype.isString() && !dtype.isVarStr() && convertByteToString) {
514            log.trace("convertByteMember(): converting byte array to string array");
515
516            theObj = byteToString(byteData, (int) dtype.getDatatypeSize());
517        }
518        else if (dtype.isInteger()) {
519            log.trace("convertByteMember(): converting byte array to integer array");
520
521            switch ((int)dtype.getDatatypeSize()) {
522            case 1:
523                /*
524                 * Normal byte[] type, such as an integer datatype of size 1.
525                 */
526                theObj = byteData;
527                break;
528            case 2:
529                theObj = HDFNativeData.byteToShort(byteData);
530                break;
531            case 4:
532                theObj = HDFNativeData.byteToInt(byteData);
533                break;
534            case 8:
535                theObj = HDFNativeData.byteToLong(byteData);
536                break;
537            default:
538                log.debug("convertByteMember(): invalid datatype size");
539                theObj = new String("*ERROR*");
540                break;
541            }
542        }
543        else if (dtype.isFloat()) {
544            log.trace("convertByteMember(): converting byte array to float array");
545
546            if (dtype.getDatatypeSize() == 8)
547                theObj = HDFNativeData.byteToDouble(byteData);
548            else
549                theObj = HDFNativeData.byteToFloat(byteData);
550        }
551        else if (dtype.isRef()) {
552            log.trace("convertByteMember(): reference type - converting byte array to long array");
553
554            theObj = HDFNativeData.byteToLong(byteData);
555        }
556        else if (dtype.isArray()) {
557            Datatype baseType = dtype.getDatatypeBase();
558
559            /*
560             * Retrieve the real base datatype in the case of ARRAY of ARRAY datatypes.
561             */
562            while (baseType.isArray())
563                baseType = baseType.getDatatypeBase();
564
565            /*
566             * Optimize for the common cases of Arrays.
567             */
568            switch (baseType.getDatatypeClass()) {
569                case Datatype.CLASS_INTEGER:
570                case Datatype.CLASS_FLOAT:
571                case Datatype.CLASS_CHAR:
572                case Datatype.CLASS_STRING:
573                case Datatype.CLASS_BITFIELD:
574                case Datatype.CLASS_OPAQUE:
575                case Datatype.CLASS_COMPOUND:
576                case Datatype.CLASS_REFERENCE:
577                case Datatype.CLASS_ENUM:
578                case Datatype.CLASS_VLEN:
579                case Datatype.CLASS_TIME:
580                    theObj = convertByteMember(baseType, byteData);
581                    break;
582
583                case Datatype.CLASS_ARRAY:
584                {
585                    Datatype arrayType = dtype.getDatatypeBase();
586
587                    long[] arrayDims = dtype.getArrayDims();
588                    int arrSize = 1;
589                    for (int i = 0; i < arrayDims.length; i++)
590                        arrSize *= arrayDims[i];
591
592                    theObj = new Object[arrSize];
593
594                    for (int i = 0; i < arrSize; i++) {
595                        byte[] indexedBytes = Arrays.copyOfRange(byteData, (int) (i * arrayType.getDatatypeSize()),
596                                (int) ((i + 1) * arrayType.getDatatypeSize()));
597                        ((Object[]) theObj)[i] = convertByteMember(arrayType, indexedBytes);
598                    }
599
600                    break;
601                }
602
603                case Datatype.CLASS_NO_CLASS:
604                default:
605                    log.debug("convertByteMember(): invalid datatype class");
606                    theObj = new String("*ERROR*");
607            }
608        }
609        else if (dtype.isCompound()) {
610            /*
611             * TODO: still valid after reading change?
612             */
613            theObj = convertCompoundByteMembers(dtype, byteData);
614        }
615        else {
616            theObj = byteData;
617        }
618
619        return theObj;
620    }
621
622    /**
623     * Given an array of bytes representing a compound Datatype, converts each of
624     * its members into Objects and returns the results.
625     *
626     * @param dtype
627     *            The compound datatype to convert
628     * @param data
629     *            The byte array representing the data of the compound Datatype
630     * @return The converted types of the bytes
631     */
632    protected Object convertCompoundByteMembers(final Datatype dtype, byte[] data) {
633        List<Object> theData = null;
634
635        List<Datatype> allSelectedTypes = Arrays.asList(this.getSelectedMemberTypes());
636        List<Datatype> localTypes = new ArrayList<>(dtype.getCompoundMemberTypes());
637        Iterator<Datatype> localIt = localTypes.iterator();
638        while (localIt.hasNext()) {
639            Datatype curType = localIt.next();
640
641            if (curType.isCompound())
642                continue;
643
644            if (!allSelectedTypes.contains(curType))
645                localIt.remove();
646        }
647
648        theData = new ArrayList<>(localTypes.size());
649        for (int i = 0, index = 0; i < localTypes.size(); i++) {
650            Datatype curType = localTypes.get(i);
651
652            if (curType.isCompound())
653                theData.add(convertCompoundByteMembers(curType,
654                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
655            else
656                theData.add(convertByteMember(curType,
657                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
658
659            index += curType.getDatatypeSize();
660        }
661
662        return theData;
663    }
664}