001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see https://support.hdfgroup.org/products/licenses.html * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.Vector; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties, datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * <p> 029 * For more details on dataset, 030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b> 031 * <p> 032 * 033 * @see hdf.object.ScalarDS 034 * @see hdf.object.CompoundDS 035 * 036 * @version 1.1 9/4/2007 037 * @author Peter X. Cao 038 */ 039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat { 040 private static final long serialVersionUID = -3360885430038261178L; 041 042 private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 043 044 /** 045 * The memory buffer that holds the raw data of the dataset. 046 */ 047 protected Object data; 048 049 /** 050 * The number of dimensions of the dataset. 051 */ 052 protected int rank; 053 054 /** 055 * The current dimension sizes of the dataset 056 */ 057 protected long[] dims; 058 059 /** 060 * The max dimension sizes of the dataset 061 */ 062 protected long[] maxDims; 063 064 /** 065 * Array that contains the number of data points selected (for read/write) 066 * in each dimension. 067 * <p> 068 * The selected size must be less than or equal to the current dimension size. 069 * A subset of a rectangle selection is defined by the starting position and 070 * selected sizes. 071 * <p> 072 * For example, if a 4 X 5 dataset is as follows: 073 * 074 * <pre> 075 * 0, 1, 2, 3, 4 076 * 10, 11, 12, 13, 14 077 * 20, 21, 22, 23, 24 078 * 30, 31, 32, 33, 34 079 * long[] dims = {4, 5}; 080 * long[] startDims = {1, 2}; 081 * long[] selectedDims = {3, 3}; 082 * then the following subset is selected by the startDims and selectedDims above: 083 * 12, 13, 14 084 * 22, 23, 24 085 * 32, 33, 34 086 * </pre> 087 */ 088 protected long[] selectedDims; 089 090 /** 091 * The starting position of each dimension of a selected subset. With both 092 * the starting position and selected sizes, the subset of a rectangle 093 * selection is fully defined. 094 */ 095 protected long[] startDims; 096 097 /** 098 * Array that contains the indices of the dimensions selected for display. 099 * <p> 100 * <B>selectedIndex[] is provided for two purposes:</B> 101 * <OL> 102 * <LI> 103 * selectedIndex[] is used to indicate the order of dimensions for display, 104 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 105 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 106 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 107 * dim[2] is selected as column index and dim[3] is selected as depth index. 108 * <LI> 109 * selectedIndex[] is also used to select dimensions for display for 110 * datasets with three or more dimensions. We assume that applications such 111 * as HDFView can only display data up to three dimensions (a 2D 112 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 113 * cut from). For datasets with more than three dimensions, we need 114 * selectedIndex[] to store which three dimensions are chosen for display. 115 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 116 * then dim[1] is selected as row index, dim[2] is selected as column index 117 * and dim[3] is selected as depth index. dim[0] is not selected. Its 118 * location is fixed at 0 by default. 119 * </OL> 120 */ 121 protected final int[] selectedIndex; 122 123 /** 124 * The number of elements to move from the start location in each dimension. 125 * For example, if selectedStride[0] = 2, every other data point is selected 126 * along dim[0]. 127 */ 128 protected long[] selectedStride; 129 130 /** 131 * The array of dimension sizes for a chunk. 132 */ 133 protected long[] chunkSize; 134 135 /** The compression information. */ 136 protected String compression; 137 public final static String compression_gzip_txt = "GZIP: level = "; 138 139 /** The filters information. */ 140 protected String filters; 141 142 /** The storage layout information. */ 143 protected String storage_layout; 144 145 /** The storage information. */ 146 protected String storage; 147 148 /** The datatype object of the dataset. */ 149 protected Datatype datatype; 150 151 /** 152 * Array of strings that represent the dimension names. It is null if dimension names do not exist. 153 */ 154 protected String[] dimNames; 155 156 /** Flag to indicate if the byte[] array is converted to strings */ 157 protected boolean convertByteToString = true; 158 159 /** Flag to indicate if data values are loaded into memory. */ 160 protected boolean isDataLoaded = false; 161 162 /** Flag to indicate if this dataset has been initialized */ 163 protected boolean inited = false; 164 165 /** The number of data points in the memory buffer. */ 166 protected long nPoints = 1; 167 168 /** 169 * The data buffer that contains the raw data directly reading from file 170 * (before any data conversion). 171 */ 172 protected Object originalBuf = null; 173 174 /** 175 * The array that holds the converted data of unsigned C-type integers. 176 * <p> 177 * For example, Suppose that the original data is an array of unsigned 178 * 16-bit short integers. Since Java does not support unsigned integer, the 179 * data is converted to an array of 32-bit singed integer. In that case, the 180 * converted buffer is the array of 32-bit singed integer. 181 */ 182 protected Object convertedBuf = null; 183 184 /** 185 * Constructs a Dataset object with a given file, name and path. 186 * 187 * @param theFile 188 * the file that contains the dataset. 189 * @param dsName 190 * the name of the Dataset, e.g. "dset1". 191 * @param dsPath 192 * the full group path of this Dataset, e.g. "/arrays/". 193 */ 194 public Dataset(FileFormat theFile, String dsName, String dsPath) { 195 this(theFile, dsName, dsPath, null); 196 } 197 198 /** 199 * @deprecated Not for public use in the future. <br> 200 * Using {@link #Dataset(FileFormat, String, String)} 201 * 202 * @param theFile 203 * the file that contains the dataset. 204 * @param dsName 205 * the name of the Dataset, e.g. "dset1". 206 * @param dsPath 207 * the full group path of this Dataset, e.g. "/arrays/". 208 * @param oid 209 * the oid of this Dataset. 210 */ 211 @Deprecated 212 public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) { 213 super(theFile, dsName, dsPath, oid); 214 215 datatype = null; 216 rank = -1; 217 data = null; 218 dims = null; 219 maxDims = null; 220 selectedDims = null; 221 startDims = null; 222 selectedStride = null; 223 chunkSize = null; 224 compression = "NONE"; 225 filters = "NONE"; 226 storage = "NONE"; 227 dimNames = null; 228 229 selectedIndex = new int[3]; 230 selectedIndex[0] = 0; 231 selectedIndex[1] = 1; 232 selectedIndex[2] = 2; 233 } 234 235 /** 236 * Clears memory held by the dataset, such as the data buffer. 237 */ 238 @SuppressWarnings("rawtypes") 239 public void clear() { 240 if (data != null) { 241 if (data instanceof Vector) { 242 ((Vector) data).setSize(0); 243 } 244 data = null; 245 originalBuf = null; 246 convertedBuf = null; 247 } 248 isDataLoaded = false; 249 } 250 251 /** 252 * Returns the rank (number of dimensions) of the dataset. 253 * 254 * @return the number of dimensions of the dataset. 255 */ 256 @Override 257 public final int getRank() { 258 if (!inited) 259 init(); 260 261 return rank; 262 } 263 264 /** 265 * Returns the array that contains the dimension sizes of the dataset. 266 * 267 * @return the dimension sizes of the dataset. 268 */ 269 @Override 270 public final long[] getDims() { 271 if (!inited) 272 init(); 273 274 return dims; 275 } 276 277 /** 278 * Returns the array that contains the max dimension sizes of the dataset. 279 * 280 * @return the max dimension sizes of the dataset. 281 */ 282 public final long[] getMaxDims() { 283 if (!inited) init(); 284 285 if (maxDims == null) return dims; 286 287 return maxDims; 288 } 289 290 /** 291 * Returns the dimension sizes of the selected subset. 292 * <p> 293 * The SelectedDims is the number of data points of the selected subset. 294 * Applications can use this array to change the size of selected subset. 295 * 296 * The selected size must be less than or equal to the current dimension size. 297 * Combined with the starting position, selected sizes and stride, the 298 * subset of a rectangle selection is fully defined. 299 * <p> 300 * For example, if a 4 X 5 dataset is as follows: 301 * 302 * <pre> 303 * 0, 1, 2, 3, 4 304 * 10, 11, 12, 13, 14 305 * 20, 21, 22, 23, 24 306 * 30, 31, 32, 33, 34 307 * long[] dims = {4, 5}; 308 * long[] startDims = {1, 2}; 309 * long[] selectedDims = {3, 3}; 310 * long[] selectedStride = {1, 1}; 311 * then the following subset is selected by the startDims and selectedDims 312 * 12, 13, 14 313 * 22, 23, 24 314 * 32, 33, 34 315 * </pre> 316 * 317 * @return the dimension sizes of the selected subset. 318 */ 319 @Override 320 public final long[] getSelectedDims() { 321 if (!inited) init(); 322 323 return selectedDims; 324 } 325 326 /** 327 * Returns the starting position of a selected subset. 328 * <p> 329 * Applications can use this array to change the starting position of a 330 * selection. Combined with the selected dimensions, selected sizes and 331 * stride, the subset of a rectangle selection is fully defined. 332 * <p> 333 * For example, if a 4 X 5 dataset is as follows: 334 * 335 * <pre> 336 * 0, 1, 2, 3, 4 337 * 10, 11, 12, 13, 14 338 * 20, 21, 22, 23, 24 339 * 30, 31, 32, 33, 34 340 * long[] dims = {4, 5}; 341 * long[] startDims = {1, 2}; 342 * long[] selectedDims = {3, 3}; 343 * long[] selectedStride = {1, 1}; 344 * then the following subset is selected by the startDims and selectedDims 345 * 12, 13, 14 346 * 22, 23, 24 347 * 32, 33, 34 348 * </pre> 349 * 350 * @return the starting position of a selected subset. 351 */ 352 @Override 353 public final long[] getStartDims() { 354 if (!inited) init(); 355 356 return startDims; 357 } 358 359 /** 360 * Returns the selectedStride of the selected dataset. 361 * <p> 362 * Applications can use this array to change how many elements to move in 363 * each dimension. 364 * 365 * Combined with the starting position and selected sizes, the subset of a 366 * rectangle selection is defined. 367 * <p> 368 * For example, if a 4 X 5 dataset is as follows: 369 * 370 * <pre> 371 * 0, 1, 2, 3, 4 372 * 10, 11, 12, 13, 14 373 * 20, 21, 22, 23, 24 374 * 30, 31, 32, 33, 34 375 * long[] dims = {4, 5}; 376 * long[] startDims = {0, 0}; 377 * long[] selectedDims = {2, 2}; 378 * long[] selectedStride = {2, 3}; 379 * then the following subset is selected by the startDims and selectedDims 380 * 0, 3 381 * 20, 23 382 * </pre> 383 * 384 * @return the selectedStride of the selected dataset. 385 */ 386 @Override 387 public final long[] getStride() { 388 if (!inited) init(); 389 390 if (rank <= 0) { 391 return null; 392 } 393 394 if (selectedStride == null) { 395 selectedStride = new long[rank]; 396 for (int i = 0; i < rank; i++) { 397 selectedStride[i] = 1; 398 } 399 } 400 401 return selectedStride; 402 } 403 404 /** 405 * Sets the flag that indicates if a byte array is converted to a string 406 * array. 407 * <p> 408 * In a string dataset, the raw data from file is stored in a byte array. By 409 * default, this byte array is converted to an array of strings. For a large 410 * dataset (e.g. more than one million strings), the conversion takes a long 411 * time and requires a lot of memory space to store the strings. In some 412 * applications, such a conversion can be delayed. For example, A GUI 413 * application may convert only the part of the strings that is visible to the 414 * users, not the entire data array. 415 * <p> 416 * setConvertByteToString(boolean b) allows users to set the flag so that 417 * applications can choose to perform the byte-to-string conversion or not. 418 * If the flag is set to false, the getData() returns an array of byte 419 * instead of an array of strings. 420 * 421 * @param b 422 * convert bytes to strings if b is true; otherwise, if false, do 423 * not convert bytes to strings. 424 */ 425 public final void setConvertByteToString(boolean b) { 426 convertByteToString = b; 427 } 428 429 /** 430 * Returns the flag that indicates if a byte array is converted to a string 431 * array. 432 * 433 * @return true if byte array is converted to string; otherwise, returns 434 * false if there is no conversion. 435 */ 436 public final boolean getConvertByteToString() { 437 return convertByteToString; 438 } 439 440 /** 441 * Reads the raw data of the dataset from file to a byte array. 442 * <p> 443 * readBytes() reads raw data to an array of bytes instead of array of its 444 * datatype. For example, for a one-dimension 32-bit integer dataset of 445 * size 5, readBytes() returns a byte array of size 20 instead of an 446 * int array of 5. 447 * <p> 448 * readBytes() can be used to copy data from one dataset to another 449 * efficiently because the raw data is not converted to its native type, it 450 * saves memory space and CPU time. 451 * 452 * @return the byte array of the raw data. 453 * 454 * @throws Exception if data can not be read 455 */ 456 public abstract byte[] readBytes() throws Exception; 457 458 /** 459 * Writes the memory buffer of this dataset to file. 460 * 461 * @throws Exception if buffer can not be written 462 */ 463 @Override 464 public final void write() throws Exception { 465 if (data != null) { 466 write(data); 467 } 468 } 469 470 /** 471 * Creates a new dataset and writes the data buffer to the new dataset. 472 * <p> 473 * This function allows applications to create a new dataset for a given 474 * data buffer. For example, users can select a specific interesting part 475 * from a large image and create a new image with the selection. 476 * <p> 477 * The new dataset retains the datatype and dataset creation properties of 478 * this dataset. 479 * 480 * @param pgroup 481 * the group which the dataset is copied to. 482 * @param name 483 * the name of the new dataset. 484 * @param dims 485 * the dimension sizes of the the new dataset. 486 * @param data 487 * the data values of the subset to be copied. 488 * 489 * @return the new dataset. 490 * 491 * @throws Exception if dataset can not be copied 492 */ 493 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 494 495 @Override 496 public final boolean isInited() { 497 return inited; 498 } 499 500 /** 501 * Returns the data buffer of the dataset in memory. 502 * <p> 503 * If data is already loaded into memory, returns the data; otherwise, calls 504 * read() to read data from file into a memory buffer and returns the memory 505 * buffer. 506 * <p> 507 * By default, the whole dataset is read into memory. Users can also select 508 * a subset to read. Subsetting is done in an implicit way. 509 * <p> 510 * <b>How to Select a Subset</b> 511 * <p> 512 * A selection is specified by three arrays: start, stride and count. 513 * <ol> 514 * <li>start: offset of a selection 515 * <li>stride: determines how many elements to move in each dimension 516 * <li>count: number of elements to select in each dimension 517 * </ol> 518 * getStartDims(), getStride() and getSelectedDims() returns the start, 519 * stride and count arrays respectively. Applications can make a selection 520 * by changing the values of the arrays. 521 * <p> 522 * The following example shows how to make a subset. In the example, the 523 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 524 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 525 * We want to select every other data point in dims[1] and dims[2] 526 * 527 * <pre> 528 * int rank = dataset.getRank(); // number of dimensions of the dataset 529 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 530 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 531 * long[] start = dataset.getStartDims(); // the offset of the selection 532 * long[] stride = dataset.getStride(); // the stride of the dataset 533 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 534 * 535 * // select dim1 and dim2 as 2D data for display,and slice through dim0 536 * selectedIndex[0] = 1; 537 * selectedIndex[1] = 2; 538 * selectedIndex[1] = 0; 539 * 540 * // reset the selection arrays 541 * for (int i = 0; i < rank; i++) { 542 * start[i] = 0; 543 * selected[i] = 1; 544 * stride[i] = 1; 545 * } 546 * 547 * // set stride to 2 on dim1 and dim2 so that every other data point is 548 * // selected. 549 * stride[1] = 2; 550 * stride[2] = 2; 551 * 552 * // set the selection size of dim1 and dim2 553 * selected[1] = dims[1] / stride[1]; 554 * selected[2] = dims[1] / stride[2]; 555 * 556 * // when dataset.getData() is called, the selection above will be used since 557 * // the dimension arrays are passed by reference. Changes of these arrays 558 * // outside the dataset object directly change the values of these array 559 * // in the dataset object. 560 * </pre> 561 * <p> 562 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 563 * short, int, float, double or String type based on the datatype of the 564 * dataset. 565 * <p> 566 * For CompoundDS, the memory data object is an java.util.List object. Each 567 * element of the list is a data array that corresponds to a compound field. 568 * <p> 569 * For example, if compound dataset "comp" has the following nested 570 * structure, and member datatypes 571 * 572 * <pre> 573 * comp --> m01 (int) 574 * comp --> m02 (float) 575 * comp --> nest1 --> m11 (char) 576 * comp --> nest1 --> m12 (String) 577 * comp --> nest1 --> nest2 --> m21 (long) 578 * comp --> nest1 --> nest2 --> m22 (double) 579 * </pre> 580 * 581 * getData() returns a list of six arrays: {int[], float[], char[], 582 * String[], long[] and double[]}. 583 * 584 * @return the memory buffer of the dataset. 585 * 586 * @throws Exception if object can not be read 587 * @throws OutOfMemoryError if memory is exhausted 588 */ 589 @Override 590 public final Object getData() throws Exception, OutOfMemoryError { 591 if (!isDataLoaded) { 592 log.trace("getData: read"); 593 data = read(); // load the data; 594 originalBuf = data; 595 isDataLoaded = true; 596 nPoints = 1; 597 log.trace("getData: selectedDims length={}",selectedDims.length); 598 for (int j = 0; j < selectedDims.length; j++) { 599 nPoints *= selectedDims[j]; 600 } 601 log.trace("getData: read {}", nPoints); 602 } 603 604 return data; 605 } 606 607 /** 608 * Not for public use in the future. 609 * <p> 610 * setData() is not safe to use because it changes memory buffer 611 * of the dataset object. Dataset operations such as write/read 612 * will fail if the buffer type or size is changed. 613 * 614 * @param d the object data 615 */ 616 @Override 617 public final void setData(Object d) { 618 if (!(this instanceof Attribute)) 619 throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects"); 620 621 data = d; 622 } 623 624 /** 625 * Clears the current data buffer in memory and forces the next read() to load 626 * the data from file. 627 * <p> 628 * The function read() loads data from file into memory only if the data is 629 * not read. If data is already in memory, read() just returns the memory 630 * buffer. Sometimes we want to force read() to re-read data from file. For 631 * example, when the selection is changed, we need to re-read the data. 632 * 633 * @see #getData() 634 * @see #read() 635 */ 636 @Override 637 public void clearData() { 638 isDataLoaded = false; 639 } 640 641 /** 642 * Returns the dimension size of the vertical axis. 643 * 644 * <p> 645 * This function is used by GUI applications such as HDFView. GUI 646 * applications display a dataset in a 2D table or 2D image. The display 647 * order is specified by the index array of selectedIndex as follow: 648 * <dl> 649 * <dt>selectedIndex[0] -- height</dt> 650 * <dd>The vertical axis</dd> 651 * <dt>selectedIndex[1] -- width</dt> 652 * <dd>The horizontal axis</dd> 653 * <dt>selectedIndex[2] -- depth</dt> 654 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 655 * </dl> 656 * Applications can use getSelectedIndex() to access and change the display 657 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 658 * following code will set the height=200 and width=50. 659 * 660 * <pre> 661 * int[] selectedIndex = dataset.getSelectedIndex(); 662 * selectedIndex[0] = 0; 663 * selectedIndex[1] = 1; 664 * </pre> 665 * 666 * @see #getSelectedIndex() 667 * @see #getWidth() 668 * 669 * @return the size of dimension of the vertical axis. 670 */ 671 @Override 672 public final long getHeight() { 673 if (!inited) init(); 674 675 if ((selectedDims == null) || (selectedIndex == null)) { 676 return 0; 677 } 678 679 return selectedDims[selectedIndex[0]]; 680 } 681 682 /** 683 * Returns the dimension size of the horizontal axis. 684 * 685 * <p> 686 * This function is used by GUI applications such as HDFView. GUI 687 * applications display a dataset in 2D Table or 2D Image. The display order is 688 * specified by the index array of selectedIndex as follow: 689 * <dl> 690 * <dt>selectedIndex[0] -- height</dt> 691 * <dd>The vertical axis</dd> 692 * <dt>selectedIndex[1] -- width</dt> 693 * <dd>The horizontal axis</dd> 694 * <dt>selectedIndex[2] -- depth</dt> 695 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 696 * </dl> 697 * Applications can use getSelectedIndex() to access and change the display 698 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 699 * following code will set the height=200 and width=100. 700 * 701 * <pre> 702 * int[] selectedIndex = dataset.getSelectedIndex(); 703 * selectedIndex[0] = 0; 704 * selectedIndex[1] = 1; 705 * </pre> 706 * 707 * @see #getSelectedIndex() 708 * @see #getHeight() 709 * 710 * @return the size of dimension of the horizontal axis. 711 */ 712 @Override 713 public final long getWidth() { 714 if (!inited) init(); 715 716 if ((selectedDims == null) || (selectedIndex == null)) { 717 return 0; 718 } 719 720 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 721 return 1; 722 } 723 724 return selectedDims[selectedIndex[1]]; 725 } 726 727 /** 728 * Returns the indices of display order. 729 * <p> 730 * 731 * selectedIndex[] is provided for two purposes: 732 * <OL> 733 * <LI> 734 * selectedIndex[] is used to indicate the order of dimensions for display. 735 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 736 * selectedIndex[2] for the depth. 737 * <p> 738 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 739 * then dim[1] is selected as row index, dim[2] is selected as column index 740 * and dim[3] is selected as depth index. 741 * <LI> 742 * selectedIndex[] is also used to select dimensions for display for 743 * datasets with three or more dimensions. We assume that applications such 744 * as HDFView can only display data values up to three dimensions (2D 745 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 746 * is selected from). For datasets with more than three dimensions, we need 747 * selectedIndex[] to tell applications which three dimensions are chosen 748 * for display. <br> 749 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 750 * then dim[1] is selected as row index, dim[2] is selected as column index 751 * and dim[3] is selected as depth index. dim[0] is not selected. Its 752 * location is fixed at 0 by default. 753 * </OL> 754 * 755 * @return the array of the indices of display order. 756 */ 757 @Override 758 public final int[] getSelectedIndex() { 759 if (!inited) init(); 760 761 return selectedIndex; 762 } 763 764 /** 765 * Returns the string representation of compression information. 766 * <p> 767 * For example, 768 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 769 * 770 * @return the string representation of compression information. 771 */ 772 @Override 773 public final String getCompression() { 774 if (!inited) init(); 775 776 return compression; 777 } 778 779 /** 780 * Returns the string representation of filter information. 781 * 782 * @return the string representation of filter information. 783 */ 784 public final String getFilters() { 785 if (!inited) init(); 786 787 return filters; 788 } 789 790 /** 791 * Returns the string representation of storage layout information. 792 * 793 * @return the string representation of storage layout information. 794 */ 795 public final String getStorageLayout() { 796 if (!inited) init(); 797 798 return storage_layout; 799 } 800 801 /** 802 * Returns the string representation of storage information. 803 * 804 * @return the string representation of storage information. 805 */ 806 public final String getStorage() { 807 if (!inited) init(); 808 809 return storage; 810 } 811 812 /** 813 * Returns the array that contains the dimension sizes of the chunk of the 814 * dataset. Returns null if the dataset is not chunked. 815 * 816 * @return the array of chunk sizes or returns null if the dataset is not 817 * chunked. 818 */ 819 public final long[] getChunkSize() { 820 if (!inited) init(); 821 822 return chunkSize; 823 } 824 825 @Override 826 public Datatype getDatatype() { 827 return datatype; 828 } 829 830 /** 831 * @deprecated Not for public use in the future. <br> 832 * Using {@link #convertFromUnsignedC(Object, Object)} 833 * 834 * @param data_in the object data 835 * 836 * @return the converted object 837 */ 838 @Deprecated 839 public static Object convertFromUnsignedC(Object data_in) { 840 return Dataset.convertFromUnsignedC(data_in, null); 841 } 842 843 /** 844 * Converts one-dimension array of unsigned C-type integers to a new array 845 * of appropriate Java integer in memory. 846 * <p> 847 * Since Java does not support unsigned integer, values of unsigned C-type 848 * integers must be converted into its appropriate Java integer. Otherwise, 849 * the data value will not displayed correctly. For example, if an unsigned 850 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 851 * the correct value of 200. 852 * <p> 853 * Unsigned C integers are upgrade to Java integers according to the 854 * following table: 855 * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400> 856 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 857 * <TR> 858 * <TD><B>Unsigned C Integer</B></TD> 859 * <TD><B>JAVA Intege</B>r</TD> 860 * </TR> 861 * <TR> 862 * <TD>unsigned byte</TD> 863 * <TD>signed short</TD> 864 * </TR> 865 * <TR> 866 * <TD>unsigned short</TD> 867 * <TD>signed int</TD> 868 * </TR> 869 * <TR> 870 * <TD>unsigned int</TD> 871 * <TD>signed long</TD> 872 * </TR> 873 * <TR> 874 * <TD>unsigned long</TD> 875 * <TD>signed long</TD> 876 * </TR> 877 * </TABLE> 878 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 879 * Therefore, the values of unsigned 64-bit datasets may be wrong in Java 880 * applications</strong>. 881 * <p> 882 * If memory data of unsigned integers is converted by 883 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 884 * the data back to unsigned C before data is written into file. 885 * 886 * @see #convertToUnsignedC(Object, Object) 887 * 888 * @param data_in 889 * the input 1D array of the unsigned C-type integers. 890 * @param data_out 891 * the output converted (or upgraded) 1D array of Java integers. 892 * 893 * @return the upgraded 1D array of Java integers. 894 */ 895 @SuppressWarnings("rawtypes") 896 public static Object convertFromUnsignedC(Object data_in, Object data_out) { 897 log.trace("convertFromUnsignedC(): start"); 898 899 if (data_in == null) { 900 log.debug("convertFromUnsignedC(): data_in is null"); 901 log.trace("convertFromUnsignedC(): finish"); 902 return null; 903 } 904 905 Class data_class = data_in.getClass(); 906 if (!data_class.isArray()) { 907 log.debug("convertFromUnsignedC(): data_in not an array"); 908 log.trace("convertFromUnsignedC(): finish"); 909 return null; 910 } 911 912 if (data_out != null) { 913 Class data_class_out = data_out.getClass(); 914 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 915 log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size"); 916 data_out = null; 917 } 918 } 919 920 String cname = data_class.getName(); 921 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 922 int size = Array.getLength(data_in); 923 log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size); 924 925 if (dname == 'B') { 926 short[] sdata = null; 927 if (data_out == null) { 928 sdata = new short[size]; 929 } 930 else { 931 sdata = (short[]) data_out; 932 } 933 934 byte[] bdata = (byte[]) data_in; 935 for (int i = 0; i < size; i++) { 936 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 937 } 938 939 data_out = sdata; 940 } 941 else if (dname == 'S') { 942 int[] idata = null; 943 if (data_out == null) { 944 idata = new int[size]; 945 } 946 else { 947 idata = (int[]) data_out; 948 } 949 950 short[] sdata = (short[]) data_in; 951 for (int i = 0; i < size; i++) { 952 idata[i] = (sdata[i] + 65536) & 0xFFFF; 953 } 954 955 data_out = idata; 956 } 957 else if (dname == 'I') { 958 long[] ldata = null; 959 if (data_out == null) { 960 ldata = new long[size]; 961 } 962 else { 963 ldata = (long[]) data_out; 964 } 965 966 int[] idata = (int[]) data_in; 967 for (int i = 0; i < size; i++) { 968 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 969 } 970 971 data_out = ldata; 972 } 973 else { 974 data_out = data_in; 975 log.debug("convertFromUnsignedC(): Java does not support unsigned long"); 976 } 977 978 return data_out; 979 } 980 981 /** 982 * @deprecated Not for public use in the future. <br> 983 * Using {@link #convertToUnsignedC(Object, Object)} 984 * 985 * @param data_in 986 * the input 1D array of the unsigned C-type integers. 987 * 988 * @return the upgraded 1D array of Java integers. 989 */ 990 @Deprecated 991 public static Object convertToUnsignedC(Object data_in) { 992 return Dataset.convertToUnsignedC(data_in, null); 993 } 994 995 /** 996 * Converts the array of converted unsigned integers back to unsigned C-type 997 * integer data in memory. 998 * <p> 999 * If memory data of unsigned integers is converted by 1000 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1001 * the data back to unsigned C before data is written into file. 1002 * 1003 * @see #convertFromUnsignedC(Object, Object) 1004 * 1005 * @param data_in 1006 * the input array of the Java integer. 1007 * @param data_out 1008 * the output array of the unsigned C-type integer. 1009 * 1010 * @return the converted data of unsigned C-type integer array. 1011 */ 1012 @SuppressWarnings("rawtypes") 1013 public static Object convertToUnsignedC(Object data_in, Object data_out) { 1014 log.trace("convertToUnsignedC(): start"); 1015 1016 if (data_in == null) { 1017 log.debug("convertToUnsignedC(): data_in is null"); 1018 log.trace("convertToUnsignedC(): finish"); 1019 return null; 1020 } 1021 1022 Class data_class = data_in.getClass(); 1023 if (!data_class.isArray()) { 1024 log.debug("convertToUnsignedC(): data_in not an array"); 1025 log.trace("convertToUnsignedC(): finish"); 1026 return null; 1027 } 1028 1029 if (data_out != null) { 1030 Class data_class_out = data_out.getClass(); 1031 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1032 log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size"); 1033 data_out = null; 1034 } 1035 } 1036 1037 String cname = data_class.getName(); 1038 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1039 int size = Array.getLength(data_in); 1040 log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1041 1042 if (dname == 'S') { 1043 byte[] bdata = null; 1044 if (data_out == null) { 1045 bdata = new byte[size]; 1046 } 1047 else { 1048 bdata = (byte[]) data_out; 1049 } 1050 short[] sdata = (short[]) data_in; 1051 for (int i = 0; i < size; i++) { 1052 bdata[i] = (byte) sdata[i]; 1053 } 1054 data_out = bdata; 1055 } 1056 else if (dname == 'I') { 1057 short[] sdata = null; 1058 if (data_out == null) { 1059 sdata = new short[size]; 1060 } 1061 else { 1062 sdata = (short[]) data_out; 1063 } 1064 int[] idata = (int[]) data_in; 1065 for (int i = 0; i < size; i++) { 1066 sdata[i] = (short) idata[i]; 1067 } 1068 data_out = sdata; 1069 } 1070 else if (dname == 'J') { 1071 int[] idata = null; 1072 if (data_out == null) { 1073 idata = new int[size]; 1074 } 1075 else { 1076 idata = (int[]) data_out; 1077 } 1078 long[] ldata = (long[]) data_in; 1079 for (int i = 0; i < size; i++) { 1080 idata[i] = (int) ldata[i]; 1081 } 1082 data_out = idata; 1083 } 1084 else { 1085 data_out = data_in; 1086 log.debug("convertToUnsignedC(): Java does not support unsigned long"); 1087 } 1088 1089 return data_out; 1090 } 1091 1092 /** 1093 * Converts an array of bytes into an array of Strings for a fixed string 1094 * dataset. 1095 * <p> 1096 * A C-string is an array of chars while an Java String is an object. When a 1097 * string dataset is read into a Java application, the data is stored in an 1098 * array of Java bytes. byteToString() is used to convert the array of bytes 1099 * into an array of Java strings so that applications can display and modify 1100 * the data content. 1101 * <p> 1102 * For example, the content of a two element C string dataset is {"ABC", 1103 * "abc"}. Java applications will read the data into a byte array of {65, 1104 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1105 * String of strs[0]="ABC", and strs[1]="abc". 1106 * <p> 1107 * If memory data of strings is converted to Java Strings, stringToByte() 1108 * must be called to convert the memory data back to byte array before data 1109 * is written to file. 1110 * 1111 * @see #stringToByte(String[], int) 1112 * 1113 * @param bytes 1114 * the array of bytes to convert. 1115 * @param length 1116 * the length of string. 1117 * 1118 * @return the array of Java String. 1119 */ 1120 public static final String[] byteToString(byte[] bytes, int length) { 1121 log.trace("byteToString(): start"); 1122 1123 if (bytes == null) { 1124 log.debug("byteToString(): input is null"); 1125 log.trace("byteToString(): finish"); 1126 return null; 1127 } 1128 1129 int n = bytes.length / length; 1130 log.trace("byteToString(): n={} from length of {}", n, length); 1131 // String bigstr = new String(bytes); 1132 String[] strArray = new String[n]; 1133 String str = null; 1134 int idx = 0; 1135 for (int i = 0; i < n; i++) { 1136 str = new String(bytes, i * length, length); 1137 // bigstr.substring uses less memory space 1138 // NOTE: bigstr does not work on linux if bytes.length is very large 1139 // see bug 1091 1140 // offset = i*length; 1141 // str = bigstr.substring(offset, offset+length); 1142 1143 idx = str.indexOf('\0'); 1144 if (idx >= 0) { 1145 str = str.substring(0, idx); 1146 } 1147 1148 // trim only the end 1149 int end = str.length(); 1150 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1151 end--; 1152 1153 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1154 1155 // trim both start and end 1156 // strArray[i] = str.trim(); 1157 } 1158 1159 log.trace("byteToString(): finish"); 1160 return strArray; 1161 } 1162 1163 /** 1164 * Converts a string array into an array of bytes for a fixed string 1165 * dataset. 1166 * <p> 1167 * If memory data of strings is converted to Java Strings, stringToByte() 1168 * must be called to convert the memory data back to byte array before data 1169 * is written to file. 1170 * 1171 * @see #byteToString(byte[] bytes, int length) 1172 * 1173 * @param strings 1174 * the array of string. 1175 * @param length 1176 * the length of string. 1177 * 1178 * @return the array of bytes. 1179 */ 1180 public static final byte[] stringToByte(String[] strings, int length) { 1181 log.trace("stringToByte(): start"); 1182 1183 if (strings == null) { 1184 log.debug("stringToByte(): input is null"); 1185 log.trace("stringToByte(): finish"); 1186 return null; 1187 } 1188 1189 int size = strings.length; 1190 byte[] bytes = new byte[size * length]; 1191 log.trace("stringToByte(): size={} length={}", size, length); 1192 StringBuffer strBuff = new StringBuffer(length); 1193 for (int i = 0; i < size; i++) { 1194 // initialize the string with spaces 1195 strBuff.replace(0, length, " "); 1196 1197 if (strings[i] != null) { 1198 if (strings[i].length() > length) { 1199 strings[i] = strings[i].substring(0, length); 1200 } 1201 strBuff.replace(0, length, strings[i]); 1202 } 1203 1204 strBuff.setLength(length); 1205 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1206 } 1207 1208 log.trace("stringToByte(): finish"); 1209 1210 return bytes; 1211 } 1212 1213 /** 1214 * Returns the array of strings that represent the dimension names. Returns 1215 * null if there is no dimension name. 1216 * <p> 1217 * Some datasets have pre-defined names for each dimension such as 1218 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1219 * names. 1220 * 1221 * @return the names of dimensions, or null if there is no dimension name. 1222 */ 1223 public final String[] getDimNames() { 1224 if (!inited) init(); 1225 1226 return dimNames; 1227 } 1228 1229 /** 1230 * Checks if a given datatype is a string. Sub-classes must replace this 1231 * default implementation. 1232 * 1233 * @param tid 1234 * The data type identifier. 1235 * 1236 * @return true if the datatype is a string; otherwise returns false. 1237 */ 1238 public boolean isString(long tid) { 1239 return false; 1240 } 1241 1242 /** 1243 * Returns the size in bytes of a given datatype. Sub-classes must replace 1244 * this default implementation. 1245 * 1246 * @param tid 1247 * The data type identifier. 1248 * 1249 * @return The size of the datatype 1250 */ 1251 public long getSize(long tid) { 1252 return -1; 1253 } 1254 1255 /** 1256 * Get Class of the original data buffer if converted. 1257 * 1258 * @return the Class of originalBuf 1259 */ 1260 @Override 1261 @SuppressWarnings("rawtypes") 1262 public final Class getOriginalClass() { 1263 return originalBuf.getClass(); 1264 } 1265 1266 /* 1267 * Checks if dataset is virtual. Sub-classes must replace 1268 * this default implementation. 1269 * 1270 * @return true if the dataset is virtual; otherwise returns false. 1271 */ 1272 public boolean isVirtual() { 1273 return false; 1274 } 1275 1276 /* 1277 * Gets the source file name at index if dataset is virtual. Sub-classes must replace 1278 * this default implementation. 1279 * 1280 * @return filename if the dataset is virtual; otherwise returns null. 1281 */ 1282 public String getVirtualFilename(int index) { 1283 return null; 1284 } 1285 1286 /* 1287 * Gets the number of source files if dataset is virtual. Sub-classes must replace 1288 * this default implementation. 1289 * 1290 * @return the list size if the dataset is virtual; otherwise returns negative. 1291 */ 1292 public int getVirtualMaps() { 1293 return -1; 1294 } 1295}