001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the COPYING file, which can be found  *
009 * at the root of the source code distribution tree,                         *
010 * or in https://www.hdfgroup.org/licenses.                                  *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.math.BigDecimal;
019import java.math.BigInteger;
020import java.text.DecimalFormat;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Iterator;
024import java.util.List;
025import java.util.Vector;
026
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import hdf.hdf5lib.HDFNativeData;
031
032import hdf.object.Attribute;
033import hdf.object.CompoundDS;
034import hdf.object.Dataset;
035import hdf.object.Datatype;
036import hdf.object.FileFormat;
037import hdf.object.Group;
038import hdf.object.HObject;
039import hdf.object.Utils;
040
041/**
042 * A CompoundDS is a dataset with compound datatype.
043 *
044 * A compound datatype is an aggregation of one or more datatypes. Each member of a compound type has a name which is
045 * unique within that type, and a datatype of that member in a compound datum. Compound datatypes can be nested, i.e.
046 * members of a compound datatype can be some other compound datatype.
047 *
048 * For more details on compound datatypes, See
049 * <a href="https://hdfgroup.github.io/hdf5/_h5_t__u_g.html#sec_datatype">HDF5 Datatypes in HDF5 User Guide</a>
050 *
051 * Since Java cannot handle C-structured compound data, data in a compound dataset is loaded in to an Java List. Each
052 * element of the list is a data array that corresponds to a compound field. The data is read/written by compound field.
053 *
054 * For example, if compound dataset "comp" has the following nested structure, and member datatypes
055 *
056 * <pre>
057 * comp --&gt; m01 (int)
058 * comp --&gt; m02 (float)
059 * comp --&gt; nest1 --&gt; m11 (char)
060 * comp --&gt; nest1 --&gt; m12 (String)
061 * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
062 * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
063 * </pre>
064 *
065 * The data object is a Java list of six arrays: {int[], float[], char[], Stirng[], long[] and double[]}.
066 *
067 *
068 * @version 1.1 9/4/2007
069 * @author Peter X. Cao
070 */
071public abstract class CompoundDS extends Dataset implements CompoundDataFormat
072{
073    private static final long serialVersionUID = -4880399929644095662L;
074
075    private static final Logger log = LoggerFactory.getLogger(CompoundDS.class);
076
077    /**
078     * A single character to separate the names of nested compound fields. An
079     * extended ASCII character, 0x95, is used to avoid common characters in
080     * compound names.
081     */
082    public static final String SEPARATOR = "\u0095";
083
084    /**
085     * The number of members of the compound dataset.
086     */
087    protected int numberOfMembers;
088
089    /**
090     * The names of members of the compound dataset.
091     */
092    protected String[] memberNames;
093
094    /**
095     * Returns array containing the total number of elements of the members of
096     * this compound dataset.
097     *
098     * For example, a compound dataset COMP has members of A, B and C as
099     *
100     * <pre>
101     *     COMP {
102     *         int A;
103     *         float B[5];
104     *         double C[2][3];
105     *     }
106     * </pre>
107     *
108     * memberOrders is an integer array of {1, 5, 6} to indicate that member A
109     * has one element, member B has 5 elements, and member C has 6 elements.
110     */
111    protected int[] memberOrders;
112
113    /**
114     * The dimension sizes of each member.
115     *
116     * The i-th element of the Object[] is an integer array (int[]) that
117     * contains the dimension sizes of the i-th member.
118     */
119    protected transient Object[] memberDims;
120
121    /**
122     * The datatypes of compound members.
123     */
124    protected Datatype[] memberTypes;
125
126    /**
127     * The array to store flags to indicate if a member of this compound
128     * dataset is selected for read/write.
129     *
130     * If a member is selected, the read/write will perform on the member.
131     * Applications such as HDFView will only display the selected members of
132     * the compound dataset.
133     *
134     * <pre>
135     * For example, if a compound dataset has four members
136     *     String[] memberNames = {"X", "Y", "Z", "TIME"};
137     * and
138     *     boolean[] isMemberSelected = {true, false, false, true};
139     * members "X" and "TIME" are selected for read and write.
140     * </pre>
141     */
142    protected boolean[] isMemberSelected;
143
144    /**
145     * A list of names of all fields including nested fields.
146     *
147     * The nested names are separated by CompoundDS.SEPARATOR. For example, if compound dataset "A" has
148     * the following nested structure,
149     *
150     * <pre>
151     * A --&gt; m01
152     * A --&gt; m02
153     * A --&gt; nest1 --&gt; m11
154     * A --&gt; nest1 --&gt; m12
155     * A --&gt; nest1 --&gt; nest2 --&gt; m21
156     * A --&gt; nest1 --&gt; nest2 --&gt; m22
157     * i.e.
158     * A = { m01, m02, nest1{m11, m12, nest2{ m21, m22}}}
159     * </pre>
160     *
161     * The flatNameList of compound dataset "A" will be {m01, m02, nest1[m11, nest1[m12,
162     * nest1[nest2[m21, nest1[nest2[m22}
163     *
164     */
165    protected List<String> flatNameList;
166
167    /**
168     * A list of datatypes of all fields including nested fields.
169     */
170    protected List<Datatype> flatTypeList;
171
172    /**
173     * Constructs a CompoundDS object with the given file, dataset name and path.
174     *
175     * The dataset object represents an existing dataset in the file. For
176     * example, new CompoundDS(file, "dset1", "/g0/") constructs a dataset
177     * object that corresponds to the dataset, "dset1", at group "/g0/".
178     *
179     * This object is usually constructed at FileFormat.open(), which loads the
180     * file structure and object information into memory. It is rarely used
181     * elsewhere.
182     *
183     * @param theFile
184     *            the file that contains the data object.
185     * @param theName
186     *            the name of the data object, e.g. "dset".
187     * @param thePath
188     *            the full path of the data object, e.g. "/arrays/".
189     */
190    public CompoundDS(FileFormat theFile, String theName, String thePath) {
191        this(theFile, theName, thePath, null);
192    }
193
194    /**
195     * @deprecated Not for public use in the future.<br>
196     *             Using {@link #CompoundDS(FileFormat, String, String)}
197     *
198     * @param theFile
199     *            the file that contains the data object.
200     * @param dsName
201     *            the name of the data object, e.g. "dset".
202     * @param dsPath
203     *            the full path of the data object, e.g. "/arrays/".
204     * @param oid
205     *            the oid of the data object.
206     */
207    @Deprecated
208    public CompoundDS(FileFormat theFile, String dsName, String dsPath, long[] oid) {
209        super(theFile, dsName, dsPath, oid);
210
211        numberOfMembers = 0;
212        memberNames = null;
213        isMemberSelected = null;
214        memberTypes = null;
215    }
216
217    /**
218     * Resets selection of dataspace
219     */
220    @Override
221    protected void resetSelection() {
222        super.resetSelection();
223        setAllMemberSelection(true);
224    }
225
226    /**
227     * Returns the number of members of the compound dataset.
228     *
229     * @return the number of members of the compound dataset.
230     */
231    @Override
232    public final int getMemberCount() {
233        return numberOfMembers;
234    }
235
236    /**
237     * Returns the number of selected members of the compound dataset.
238     *
239     * Selected members are the compound fields which are selected for
240     * read/write.
241     *
242     * For example, in a compound datatype of {int A, float B, char[] C},
243     * users can choose to retrieve only {A, C} from the dataset. In this
244     * case, getSelectedMemberCount() returns two.
245     *
246     * @return the number of selected members.
247     */
248    @Override
249    public final int getSelectedMemberCount() {
250        int count = 0;
251
252        if (isMemberSelected != null) {
253            for (int i = 0; i < isMemberSelected.length; i++) {
254                if (isMemberSelected[i])
255                    count++;
256            }
257        }
258        log.trace("count of selected members={}", count);
259
260        return count;
261    }
262
263    /**
264     * Returns the names of the members of the compound dataset. The names of
265     * compound members are stored in an array of Strings.
266     *
267     * For example, for a compound datatype of {int A, float B, char[] C}
268     * getMemberNames() returns ["A", "B", "C"}.
269     *
270     * @return the names of compound members.
271     */
272    @Override
273    public final String[] getMemberNames() {
274        return memberNames;
275    }
276
277    /**
278     * Returns an array of the names of the selected members of the compound dataset.
279     *
280     * @return an array of the names of the selected members of the compound dataset.
281     */
282    @Override
283    public final String[] getSelectedMemberNames() {
284        if (isMemberSelected == null) {
285            log.debug("getSelectedMemberNames(): isMemberSelected array is null");
286            return memberNames;
287        }
288
289        int idx = 0;
290        String[] names = new String[getSelectedMemberCount()];
291        for (int i = 0; i < isMemberSelected.length; i++) {
292            if (isMemberSelected[i])
293                names[idx++] = memberNames[i];
294        }
295
296        return names;
297    }
298
299    /**
300     * Checks if a member of the compound dataset is selected for read/write.
301     *
302     * @param idx
303     *            the index of compound member.
304     *
305     * @return true if the i-th memeber is selected; otherwise returns false.
306     */
307    @Override
308    public final boolean isMemberSelected(int idx) {
309        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
310            return isMemberSelected[idx];
311        else
312            return false;
313    }
314
315    /**
316     * Selects the i-th member for read/write.
317     *
318     * @param idx
319     *            the index of compound member.
320     */
321    @Override
322    public final void selectMember(int idx) {
323        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
324            isMemberSelected[idx] = true;
325    }
326
327    /**
328     * Selects/deselects all members.
329     *
330     * @param selectAll
331     *            The indicator to select or deselect all members. If true, all
332     *            members are selected for read/write. If false, no member is
333     *            selected for read/write.
334     */
335    @Override
336    public final void setAllMemberSelection(boolean selectAll) {
337        if (isMemberSelected == null)
338            return;
339
340        for (int i = 0; i < isMemberSelected.length; i++)
341            isMemberSelected[i] = selectAll;
342    }
343
344    /**
345     * Returns array containing the total number of elements of the members of
346     * the compound dataset.
347     *
348     * For example, a compound dataset COMP has members of A, B and C as
349     *
350     * <pre>
351     *     COMP {
352     *         int A;
353     *         float B[5];
354     *         double C[2][3];
355     *     }
356     * </pre>
357     *
358     * getMemberOrders() will return an integer array of {1, 5, 6} to indicate
359     * that member A has one element, member B has 5 elements, and member C has
360     * 6 elements.
361     *
362     * @return the array containing the total number of elements of the members
363     *         of compound.
364     */
365    @Override
366    public final int[] getMemberOrders() {
367        return memberOrders;
368    }
369
370    /**
371     * Returns array containing the total number of elements of the selected
372     * members of the compound dataset.
373     *
374     * For example, a compound dataset COMP has members of A, B and C as
375     *
376     * <pre>
377     *     COMP {
378     *         int A;
379     *         float B[5];
380     *         double C[2][3];
381     *     }
382     * </pre>
383     *
384     * If A and B are selected, getSelectedMemberOrders() returns an array of
385     * {1, 5}
386     *
387     * @return array containing the total number of elements of the selected
388     *         members of compound.
389     */
390    @Override
391    public final int[] getSelectedMemberOrders() {
392        if (isMemberSelected == null) {
393            log.debug("getSelectedMemberOrders(): isMemberSelected array is null");
394            return memberOrders;
395        }
396
397        int idx = 0;
398        int[] orders = new int[getSelectedMemberCount()];
399        for (int i = 0; i < isMemberSelected.length; i++) {
400            if (isMemberSelected[i])
401                orders[idx++] = memberOrders[i];
402        }
403
404        return orders;
405    }
406
407    /**
408     * Returns the dimension sizes of the i-th member.
409     *
410     * For example, a compound dataset COMP has members of A, B and C as
411     *
412     * <pre>
413     *     COMP {
414     *         int A;
415     *         float B[5];
416     *         double C[2][3];
417     *     }
418     * </pre>
419     *
420     * getMemberDims(2) returns an array of {2, 3}, while getMemberDims(1)
421     * returns an array of {5}, and getMemberDims(0) returns null.
422     *
423     * @param i  the i-th member
424     *
425     * @return the dimension sizes of the i-th member, null if the compound
426     *         member is not an array.
427     */
428    @Override
429    public final int[] getMemberDims(int i) {
430        if (memberDims == null) {
431            return null;
432        }
433        return (int[]) memberDims[i];
434    }
435
436    /**
437     * Returns an array of datatype objects of compound members.
438     *
439     * Each member of a compound dataset has its own datatype. The datatype of a
440     * member can be atomic or other compound datatype (nested compound).
441     * Sub-classes set up the datatype objects at init().
442     *
443     * @return the array of datatype objects of the compound members.
444     */
445    @Override
446    public final Datatype[] getMemberTypes() {
447        return memberTypes;
448    }
449
450    /**
451     * Returns an array of datatype objects of selected compound members.
452     *
453     * @return an array of datatype objects of selected compound members.
454     */
455    @Override
456    public final Datatype[] getSelectedMemberTypes() {
457        if (isMemberSelected == null) {
458            log.debug("getSelectedMemberTypes(): isMemberSelected array is null");
459            return memberTypes;
460        }
461
462        int idx = 0;
463        Datatype[] types = new Datatype[getSelectedMemberCount()];
464        for (int i = 0; i < isMemberSelected.length; i++) {
465            if (isMemberSelected[i])
466                types[idx++] = memberTypes[i];
467        }
468
469        return types;
470    }
471
472    /**
473     * Returns the fill values for the data object.
474     *
475     * @return the fill values for the data object.
476     */
477    @Override
478    public Object getFillValue() {
479        return null;
480    }
481
482    /**
483     * @deprecated Not implemented for compound dataset.
484     */
485    @Deprecated
486    @Override
487    public Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception {
488        throw new UnsupportedOperationException(
489                "Writing a subset of a compound dataset to a new dataset is not implemented.");
490    }
491
492    /**
493     * Routine to convert datatypes that are read in as byte arrays to
494     * regular types.
495     *
496     * @param dtype
497     *        the datatype to convert to
498     * @param byteData
499     *        the bytes to convert
500     *
501     * @return the converted object
502     */
503    protected Object convertByteMember(final Datatype dtype, byte[] byteData) {
504        Object theObj = null;
505        log.trace("convertByteMember(): byteData={} start", byteData);
506
507        if (dtype.getDatatypeSize() == 1) {
508            /*
509             * Normal byte[] type, such as an integer datatype of size 1.
510             */
511            theObj = byteData;
512        }
513        else if (dtype.isString() && !dtype.isVarStr() && convertByteToString && (byteData instanceof byte[])) {
514            log.trace("convertByteMember(): converting byte array to string array");
515
516            theObj = byteToString(byteData, (int) dtype.getDatatypeSize());
517        }
518        else if (dtype.isInteger()) {
519            log.trace("convertByteMember(): converting byte array to integer array");
520
521            switch ((int)dtype.getDatatypeSize()) {
522            case 1:
523                /*
524                 * Normal byte[] type, such as an integer datatype of size 1.
525                 */
526                theObj = byteData;
527                break;
528            case 2:
529                theObj = HDFNativeData.byteToShort(byteData);
530                break;
531            case 4:
532                theObj = HDFNativeData.byteToInt(byteData);
533                break;
534            case 8:
535                theObj = HDFNativeData.byteToLong(byteData);
536                break;
537            default:
538                log.debug("convertByteMember(): invalid datatype size");
539                theObj = new String("*ERROR*");
540                break;
541            }
542        }
543        else if (dtype.isFloat()) {
544            log.trace("convertByteMember(): converting byte array to float array");
545
546            if (dtype.getDatatypeSize() == 8)
547                theObj = HDFNativeData.byteToDouble(byteData);
548            else
549                theObj = HDFNativeData.byteToFloat(byteData);
550        }
551        else if (dtype.isArray()) {
552            Datatype baseType = dtype.getDatatypeBase();
553            log.trace("convertByteMember(): converting byte array to baseType array");
554
555            /*
556             * Retrieve the real base datatype in the case of ARRAY of ARRAY datatypes.
557             */
558            while (baseType.isArray())
559                baseType = baseType.getDatatypeBase();
560
561            /*
562             * Optimize for the common cases of Arrays.
563             */
564            switch (baseType.getDatatypeClass()) {
565            case Datatype.CLASS_INTEGER:
566            case Datatype.CLASS_FLOAT:
567            case Datatype.CLASS_CHAR:
568            case Datatype.CLASS_STRING:
569            case Datatype.CLASS_BITFIELD:
570            case Datatype.CLASS_OPAQUE:
571            case Datatype.CLASS_COMPOUND:
572            case Datatype.CLASS_REFERENCE:
573            case Datatype.CLASS_ENUM:
574            case Datatype.CLASS_VLEN:
575            case Datatype.CLASS_TIME:
576                theObj = convertByteMember(baseType, byteData);
577                break;
578
579            case Datatype.CLASS_ARRAY:
580            {
581                Datatype arrayType = dtype.getDatatypeBase();
582
583                long[] arrayDims = dtype.getArrayDims();
584                int arrSize = 1;
585                for (int i = 0; i < arrayDims.length; i++)
586                    arrSize *= arrayDims[i];
587                log.trace("convertByteMember(): no CLASS_ARRAY arrayType={} arrSize={}", arrayType, arrSize);
588
589                theObj = new Object[arrSize];
590
591                for (int i = 0; i < arrSize; i++) {
592                    byte[] indexedBytes = Arrays.copyOfRange(byteData, (int) (i * arrayType.getDatatypeSize()),
593                            (int) ((i + 1) * arrayType.getDatatypeSize()));
594                    ((Object[]) theObj)[i] = convertByteMember(arrayType, indexedBytes);
595                }
596
597                break;
598            }
599
600            case Datatype.CLASS_NO_CLASS:
601            default:
602                log.debug("convertByteMember(): invalid datatype class");
603                theObj = new String("*ERROR*");
604            }
605        }
606        else if (dtype.isCompound()) {
607            log.debug("convertByteMember(): compound datatype class");
608            /*
609             * TODO: still valid after reading change?
610             */
611            theObj = convertCompoundByteMembers(dtype, byteData);
612        }
613        else {
614            log.debug("convertByteMember(): byteData={}", byteData);
615            theObj = byteData;
616        }
617
618        return theObj;
619    }
620
621    /**
622     * Given an array of bytes representing a compound Datatype, converts each of
623     * its members into Objects and returns the results.
624     *
625     * @param dtype
626     *            The compound datatype to convert
627     * @param data
628     *            The byte array representing the data of the compound Datatype
629     * @return The converted types of the bytes
630     */
631    protected Object convertCompoundByteMembers(final Datatype dtype, byte[] data) {
632        List<Object> theData = null;
633
634        List<Datatype> allSelectedTypes = Arrays.asList(this.getSelectedMemberTypes());
635        List<Datatype> localTypes = new ArrayList<>(dtype.getCompoundMemberTypes());
636        Iterator<Datatype> localIt = localTypes.iterator();
637        while (localIt.hasNext()) {
638            Datatype curType = localIt.next();
639
640            if (curType.isCompound())
641                continue;
642
643            if (!allSelectedTypes.contains(curType))
644                localIt.remove();
645        }
646
647        theData = new ArrayList<>(localTypes.size());
648        for (int i = 0, index = 0; i < localTypes.size(); i++) {
649            Datatype curType = localTypes.get(i);
650
651            if (curType.isCompound())
652                theData.add(convertCompoundByteMembers(curType,
653                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
654            else
655                theData.add(convertByteMember(curType,
656                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
657
658            index += curType.getDatatypeSize();
659        }
660
661        return theData;
662    }
663}