001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.math.BigDecimal;
019import java.math.BigInteger;
020import java.text.DecimalFormat;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Iterator;
024import java.util.List;
025import java.util.Vector;
026
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import hdf.hdf5lib.HDFNativeData;
031
032import hdf.object.Attribute;
033import hdf.object.CompoundDS;
034import hdf.object.Dataset;
035import hdf.object.Datatype;
036import hdf.object.FileFormat;
037import hdf.object.Group;
038import hdf.object.HObject;
039import hdf.object.Utils;
040
041/**
042 * A CompoundDS is a dataset with compound datatype.
043 *
044 * A compound datatype is an aggregation of one or more datatypes. Each member
045 * of a compound type has a name which is unique within that type, and a
046 * datatype of that member in a compound datum. Compound datatypes can be nested,
047 * i.e. members of a compound datatype can be some other compound datatype.
048 *
049 * For more details on compound datatypes,
050 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
051 *
052 * Since Java cannot handle C-structured compound data, data in a compound dataset
053 * is loaded in to an Java List. Each element of the list is a data array that
054 * corresponds to a compound field. The data is read/written by compound field.
055 *
056 * For example, if compound dataset "comp" has the following nested structure,
057 * and member datatypes
058 *
059 * <pre>
060 * comp --&gt; m01 (int)
061 * comp --&gt; m02 (float)
062 * comp --&gt; nest1 --&gt; m11 (char)
063 * comp --&gt; nest1 --&gt; m12 (String)
064 * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
065 * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
066 * </pre>
067 *
068 * The data object is a Java list of six arrays: {int[], float[], char[],
069 * Stirng[], long[] and double[]}.
070 *
071 *
072 * @version 1.1 9/4/2007
073 * @author Peter X. Cao
074 */
075public abstract class CompoundDS extends Dataset implements CompoundDataFormat
076{
077    private static final long serialVersionUID = -4880399929644095662L;
078
079    private static final Logger log = LoggerFactory.getLogger(CompoundDS.class);
080
081    /**
082     * A single character to separate the names of nested compound fields. An
083     * extended ASCII character, 0x95, is used to avoid common characters in
084     * compound names.
085     */
086    public static final String SEPARATOR = "\u0095";
087
088    /**
089     * The number of members of the compound dataset.
090     */
091    protected int numberOfMembers;
092
093    /**
094     * The names of members of the compound dataset.
095     */
096    protected String[] memberNames;
097
098    /**
099     * Returns array containing the total number of elements of the members of
100     * this compound dataset.
101     *
102     * For example, a compound dataset COMP has members of A, B and C as
103     *
104     * <pre>
105     *     COMP {
106     *         int A;
107     *         float B[5];
108     *         double C[2][3];
109     *     }
110     * </pre>
111     *
112     * memberOrders is an integer array of {1, 5, 6} to indicate that member A
113     * has one element, member B has 5 elements, and member C has 6 elements.
114     */
115    protected int[] memberOrders;
116
117    /**
118     * The dimension sizes of each member.
119     *
120     * The i-th element of the Object[] is an integer array (int[]) that
121     * contains the dimension sizes of the i-th member.
122     */
123    protected transient Object[] memberDims;
124
125    /**
126     * The datatypes of compound members.
127     */
128    protected Datatype[] memberTypes;
129
130    /**
131     * The array to store flags to indicate if a member of this compound
132     * dataset is selected for read/write.
133     *
134     * If a member is selected, the read/write will perform on the member.
135     * Applications such as HDFView will only display the selected members of
136     * the compound dataset.
137     *
138     * <pre>
139     * For example, if a compound dataset has four members
140     *     String[] memberNames = {"X", "Y", "Z", "TIME"};
141     * and
142     *     boolean[] isMemberSelected = {true, false, false, true};
143     * members "X" and "TIME" are selected for read and write.
144     * </pre>
145     */
146    protected boolean[] isMemberSelected;
147
148    /**
149     * A list of names of all fields including nested fields.
150     *
151     * The nested names are separated by CompoundDS.SEPARATOR. For example, if compound dataset "A" has
152     * the following nested structure,
153     *
154     * <pre>
155     * A --&gt; m01
156     * A --&gt; m02
157     * A --&gt; nest1 --&gt; m11
158     * A --&gt; nest1 --&gt; m12
159     * A --&gt; nest1 --&gt; nest2 --&gt; m21
160     * A --&gt; nest1 --&gt; nest2 --&gt; m22
161     * i.e.
162     * A = { m01, m02, nest1{m11, m12, nest2{ m21, m22}}}
163     * </pre>
164     *
165     * The flatNameList of compound dataset "A" will be {m01, m02, nest1[m11, nest1[m12,
166     * nest1[nest2[m21, nest1[nest2[m22}
167     *
168     */
169    protected List<String> flatNameList;
170
171    /**
172     * A list of datatypes of all fields including nested fields.
173     */
174    protected List<Datatype> flatTypeList;
175
176    /**
177     * Constructs a CompoundDS object with the given file, dataset name and path.
178     *
179     * The dataset object represents an existing dataset in the file. For
180     * example, new CompoundDS(file, "dset1", "/g0/") constructs a dataset
181     * object that corresponds to the dataset, "dset1", at group "/g0/".
182     *
183     * This object is usually constructed at FileFormat.open(), which loads the
184     * file structure and object information into memory. It is rarely used
185     * elsewhere.
186     *
187     * @param theFile
188     *            the file that contains the data object.
189     * @param theName
190     *            the name of the data object, e.g. "dset".
191     * @param thePath
192     *            the full path of the data object, e.g. "/arrays/".
193     */
194    public CompoundDS(FileFormat theFile, String theName, String thePath) {
195        this(theFile, theName, thePath, null);
196    }
197
198    /**
199     * @deprecated Not for public use in the future.<br>
200     *             Using {@link #CompoundDS(FileFormat, String, String)}
201     *
202     * @param theFile
203     *            the file that contains the data object.
204     * @param dsName
205     *            the name of the data object, e.g. "dset".
206     * @param dsPath
207     *            the full path of the data object, e.g. "/arrays/".
208     * @param oid
209     *            the oid of the data object.
210     */
211    @Deprecated
212    public CompoundDS(FileFormat theFile, String dsName, String dsPath, long[] oid) {
213        super(theFile, dsName, dsPath, oid);
214
215        numberOfMembers = 0;
216        memberNames = null;
217        isMemberSelected = null;
218        memberTypes = null;
219    }
220
221    /**
222     * Resets selection of dataspace
223     */
224    @Override
225    protected void resetSelection() {
226        super.resetSelection();
227        setAllMemberSelection(true);
228    }
229
230    /**
231     * Returns the number of members of the compound dataset.
232     *
233     * @return the number of members of the compound dataset.
234     */
235    @Override
236    public final int getMemberCount() {
237        return numberOfMembers;
238    }
239
240    /**
241     * Returns the number of selected members of the compound dataset.
242     *
243     * Selected members are the compound fields which are selected for
244     * read/write.
245     *
246     * For example, in a compound datatype of {int A, float B, char[] C},
247     * users can choose to retrieve only {A, C} from the dataset. In this
248     * case, getSelectedMemberCount() returns two.
249     *
250     * @return the number of selected members.
251     */
252    @Override
253    public final int getSelectedMemberCount() {
254        int count = 0;
255
256        if (isMemberSelected != null) {
257            for (int i = 0; i < isMemberSelected.length; i++) {
258                if (isMemberSelected[i])
259                    count++;
260            }
261        }
262        log.trace("count of selected members={}", count);
263
264        return count;
265    }
266
267    /**
268     * Returns the names of the members of the compound dataset. The names of
269     * compound members are stored in an array of Strings.
270     *
271     * For example, for a compound datatype of {int A, float B, char[] C}
272     * getMemberNames() returns ["A", "B", "C"}.
273     *
274     * @return the names of compound members.
275     */
276    @Override
277    public final String[] getMemberNames() {
278        return memberNames;
279    }
280
281    /**
282     * Returns an array of the names of the selected members of the compound dataset.
283     *
284     * @return an array of the names of the selected members of the compound dataset.
285     */
286    @Override
287    public final String[] getSelectedMemberNames() {
288        if (isMemberSelected == null) {
289            log.debug("getSelectedMemberNames(): isMemberSelected array is null");
290            return memberNames;
291        }
292
293        int idx = 0;
294        String[] names = new String[getSelectedMemberCount()];
295        for (int i = 0; i < isMemberSelected.length; i++) {
296            if (isMemberSelected[i])
297                names[idx++] = memberNames[i];
298        }
299
300        return names;
301    }
302
303    /**
304     * Checks if a member of the compound dataset is selected for read/write.
305     *
306     * @param idx
307     *            the index of compound member.
308     *
309     * @return true if the i-th memeber is selected; otherwise returns false.
310     */
311    @Override
312    public final boolean isMemberSelected(int idx) {
313        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
314            return isMemberSelected[idx];
315        else
316            return false;
317    }
318
319    /**
320     * Selects the i-th member for read/write.
321     *
322     * @param idx
323     *            the index of compound member.
324     */
325    @Override
326    public final void selectMember(int idx) {
327        if ((isMemberSelected != null) && (isMemberSelected.length > idx))
328            isMemberSelected[idx] = true;
329    }
330
331    /**
332     * Selects/deselects all members.
333     *
334     * @param selectAll
335     *            The indicator to select or deselect all members. If true, all
336     *            members are selected for read/write. If false, no member is
337     *            selected for read/write.
338     */
339    @Override
340    public final void setAllMemberSelection(boolean selectAll) {
341        if (isMemberSelected == null)
342            return;
343
344        for (int i = 0; i < isMemberSelected.length; i++)
345            isMemberSelected[i] = selectAll;
346    }
347
348    /**
349     * Returns array containing the total number of elements of the members of
350     * the compound dataset.
351     *
352     * For example, a compound dataset COMP has members of A, B and C as
353     *
354     * <pre>
355     *     COMP {
356     *         int A;
357     *         float B[5];
358     *         double C[2][3];
359     *     }
360     * </pre>
361     *
362     * getMemberOrders() will return an integer array of {1, 5, 6} to indicate
363     * that member A has one element, member B has 5 elements, and member C has
364     * 6 elements.
365     *
366     * @return the array containing the total number of elements of the members
367     *         of compound.
368     */
369    @Override
370    public final int[] getMemberOrders() {
371        return memberOrders;
372    }
373
374    /**
375     * Returns array containing the total number of elements of the selected
376     * members of the compound dataset.
377     *
378     * For example, a compound dataset COMP has members of A, B and C as
379     *
380     * <pre>
381     *     COMP {
382     *         int A;
383     *         float B[5];
384     *         double C[2][3];
385     *     }
386     * </pre>
387     *
388     * If A and B are selected, getSelectedMemberOrders() returns an array of
389     * {1, 5}
390     *
391     * @return array containing the total number of elements of the selected
392     *         members of compound.
393     */
394    @Override
395    public final int[] getSelectedMemberOrders() {
396        if (isMemberSelected == null) {
397            log.debug("getSelectedMemberOrders(): isMemberSelected array is null");
398            return memberOrders;
399        }
400
401        int idx = 0;
402        int[] orders = new int[getSelectedMemberCount()];
403        for (int i = 0; i < isMemberSelected.length; i++) {
404            if (isMemberSelected[i])
405                orders[idx++] = memberOrders[i];
406        }
407
408        return orders;
409    }
410
411    /**
412     * Returns the dimension sizes of the i-th member.
413     *
414     * For example, a compound dataset COMP has members of A, B and C as
415     *
416     * <pre>
417     *     COMP {
418     *         int A;
419     *         float B[5];
420     *         double C[2][3];
421     *     }
422     * </pre>
423     *
424     * getMemberDims(2) returns an array of {2, 3}, while getMemberDims(1)
425     * returns an array of {5}, and getMemberDims(0) returns null.
426     *
427     * @param i  the i-th member
428     *
429     * @return the dimension sizes of the i-th member, null if the compound
430     *         member is not an array.
431     */
432    @Override
433    public final int[] getMemberDims(int i) {
434        if (memberDims == null) {
435            return null;
436        }
437        return (int[]) memberDims[i];
438    }
439
440    /**
441     * Returns an array of datatype objects of compound members.
442     *
443     * Each member of a compound dataset has its own datatype. The datatype of a
444     * member can be atomic or other compound datatype (nested compound).
445     * Sub-classes set up the datatype objects at init().
446     *
447     * @return the array of datatype objects of the compound members.
448     */
449    @Override
450    public final Datatype[] getMemberTypes() {
451        return memberTypes;
452    }
453
454    /**
455     * Returns an array of datatype objects of selected compound members.
456     *
457     * @return an array of datatype objects of selected compound members.
458     */
459    @Override
460    public final Datatype[] getSelectedMemberTypes() {
461        if (isMemberSelected == null) {
462            log.debug("getSelectedMemberTypes(): isMemberSelected array is null");
463            return memberTypes;
464        }
465
466        int idx = 0;
467        Datatype[] types = new Datatype[getSelectedMemberCount()];
468        for (int i = 0; i < isMemberSelected.length; i++) {
469            if (isMemberSelected[i])
470                types[idx++] = memberTypes[i];
471        }
472
473        return types;
474    }
475
476    /**
477     * Returns the fill values for the data object.
478     *
479     * @return the fill values for the data object.
480     */
481    @Override
482    public Object getFillValue() {
483        return null;
484    }
485
486    /**
487     * @deprecated Not implemented for compound dataset.
488     */
489    @Deprecated
490    @Override
491    public Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception {
492        throw new UnsupportedOperationException(
493                "Writing a subset of a compound dataset to a new dataset is not implemented.");
494    }
495
496    /**
497     * Routine to convert datatypes that are read in as byte arrays to
498     * regular types.
499     *
500     * @param dtype
501     *        the datatype to convert to
502     * @param byteData
503     *        the bytes to convert
504     *
505     * @return the converted object
506     */
507    protected Object convertByteMember(final Datatype dtype, byte[] byteData) {
508        Object theObj = null;
509
510        if (dtype.getDatatypeSize() == 1) {
511            /*
512             * Normal byte[] type, such as an integer datatype of size 1.
513             */
514            theObj = byteData;
515        }
516        else if (dtype.isString() && !dtype.isVarStr() && convertByteToString && (byteData instanceof byte[])) {
517            log.trace("convertByteMember(): converting byte array to string array");
518
519            theObj = byteToString(byteData, (int) dtype.getDatatypeSize());
520        }
521        else if (dtype.isInteger()) {
522            log.trace("convertByteMember(): converting byte array to integer array");
523
524            switch ((int)dtype.getDatatypeSize()) {
525            case 1:
526                /*
527                 * Normal byte[] type, such as an integer datatype of size 1.
528                 */
529                theObj = byteData;
530                break;
531            case 2:
532                theObj = HDFNativeData.byteToShort(byteData);
533                break;
534            case 4:
535                theObj = HDFNativeData.byteToInt(byteData);
536                break;
537            case 8:
538                theObj = HDFNativeData.byteToLong(byteData);
539                break;
540            default:
541                log.debug("convertByteMember(): invalid datatype size");
542                theObj = new String("*ERROR*");
543                break;
544            }
545        }
546        else if (dtype.isFloat()) {
547            log.trace("convertByteMember(): converting byte array to float array");
548
549            if (dtype.getDatatypeSize() == 8)
550                theObj = HDFNativeData.byteToDouble(byteData);
551            else
552                theObj = HDFNativeData.byteToFloat(byteData);
553        }
554        else if (dtype.isArray()) {
555            Datatype baseType = dtype.getDatatypeBase();
556
557            /*
558             * Retrieve the real base datatype in the case of ARRAY of ARRAY datatypes.
559             */
560            while (baseType.isArray())
561                baseType = baseType.getDatatypeBase();
562
563            /*
564             * Optimize for the common cases of Arrays.
565             */
566            switch (baseType.getDatatypeClass()) {
567            case Datatype.CLASS_INTEGER:
568            case Datatype.CLASS_FLOAT:
569            case Datatype.CLASS_CHAR:
570            case Datatype.CLASS_STRING:
571            case Datatype.CLASS_BITFIELD:
572            case Datatype.CLASS_OPAQUE:
573            case Datatype.CLASS_COMPOUND:
574            case Datatype.CLASS_REFERENCE:
575            case Datatype.CLASS_ENUM:
576            case Datatype.CLASS_VLEN:
577            case Datatype.CLASS_TIME:
578                theObj = convertByteMember(baseType, byteData);
579                break;
580
581            case Datatype.CLASS_ARRAY:
582            {
583                Datatype arrayType = dtype.getDatatypeBase();
584
585                long[] arrayDims = dtype.getArrayDims();
586                int arrSize = 1;
587                for (int i = 0; i < arrayDims.length; i++)
588                    arrSize *= arrayDims[i];
589                log.trace("convertByteMember(): no CLASS_ARRAY arrayType={} arrSize={}", arrayType, arrSize);
590
591                theObj = new Object[arrSize];
592
593                for (int i = 0; i < arrSize; i++) {
594                    byte[] indexedBytes = Arrays.copyOfRange(byteData, (int) (i * arrayType.getDatatypeSize()),
595                            (int) ((i + 1) * arrayType.getDatatypeSize()));
596                    ((Object[]) theObj)[i] = convertByteMember(arrayType, indexedBytes);
597                }
598
599                break;
600            }
601
602            case Datatype.CLASS_NO_CLASS:
603            default:
604                log.debug("convertByteMember(): invalid datatype class");
605                theObj = new String("*ERROR*");
606            }
607        }
608        else if (dtype.isCompound()) {
609            /*
610             * TODO: still valid after reading change?
611             */
612            theObj = convertCompoundByteMembers(dtype, byteData);
613        }
614        else {
615            theObj = byteData;
616        }
617
618        return theObj;
619    }
620
621    /**
622     * Given an array of bytes representing a compound Datatype, converts each of
623     * its members into Objects and returns the results.
624     *
625     * @param dtype
626     *            The compound datatype to convert
627     * @param data
628     *            The byte array representing the data of the compound Datatype
629     * @return The converted types of the bytes
630     */
631    protected Object convertCompoundByteMembers(final Datatype dtype, byte[] data) {
632        List<Object> theData = null;
633
634        List<Datatype> allSelectedTypes = Arrays.asList(this.getSelectedMemberTypes());
635        List<Datatype> localTypes = new ArrayList<>(dtype.getCompoundMemberTypes());
636        Iterator<Datatype> localIt = localTypes.iterator();
637        while (localIt.hasNext()) {
638            Datatype curType = localIt.next();
639
640            if (curType.isCompound())
641                continue;
642
643            if (!allSelectedTypes.contains(curType))
644                localIt.remove();
645        }
646
647        theData = new ArrayList<>(localTypes.size());
648        for (int i = 0, index = 0; i < localTypes.size(); i++) {
649            Datatype curType = localTypes.get(i);
650
651            if (curType.isCompound())
652                theData.add(convertCompoundByteMembers(curType,
653                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
654            else
655                theData.add(convertByteMember(curType,
656                        Arrays.copyOfRange(data, index, index + (int) curType.getDatatypeSize())));
657
658            index += curType.getDatatypeSize();
659        }
660
661        return theData;
662    }
663}