BioHDF version 0.3 alpha
Scalable NGS Data Storage Based on HDF5
bioh5g_alignments.h
00001 /*****************************************************************************
00002  * Copyright by The HDF Group                                                *
00003  * All rights reserved.                                                      *
00004  *                                                                           *
00005  * This file is part of BioHDF.  The full BioHDF copyright notice, including *
00006  * terms governing use, modification, and redistribution, is contained in    *
00007  * the file COPYING.  COPYING can be found at the root of the source code    *
00008  * distribution tree.  If you do not have access to this file, you may       *
00009  * request a copy from help@hdfgroup.org.                                    *
00010  *****************************************************************************/
00011  
00020 #ifndef _BIOH5G_ALIGNMENTS_H
00021 #define _BIOH5G_ALIGNMENTS_H
00022 
00023 #include <stdio.h>
00024 
00025 #include "biohdf_api.h"
00026 #include "biohdf_error.h"
00027 #include "biohdf_file.h"
00028 #include "biohdf_utility.h"
00029 
00030 #include "bioh5g_api.h"
00031 #include "bioh5g_reads.h"
00032 
00033 
00034 
00035 /*****************************************************************************
00036  * Attribute Names                                                           *
00037  *****************************************************************************/
00038 
00039 
00040 
00042 #define BIOH5G_READS_PATH_ATTR "READS_PATH"
00043 
00044 
00045 
00047 #define BIOH5G_INDEX_METHOD_ATTR "INDEX_METHOD"
00048 
00049 
00050 
00051 /*****************************************************************************
00052  * Type definitions                                                          *
00053  *****************************************************************************/
00054 
00055 
00056 
00058 typedef struct _bioh5g_alignments *bioh5g_alignments;
00059 
00060 
00061 
00063 typedef struct _bioh5g_alignments_creation_properties *bioh5g_alignments_creation_properties;
00064 
00065 
00066 
00068 typedef struct _bioh5g_alignments_iterator *bioh5g_alignments_iterator;
00069 
00070 
00071 
00072 /*****************************************************************************
00073  * Structs and enums                                                         *
00074  *****************************************************************************/
00075 
00076 
00077 
00079 typedef enum
00080 {
00081     SAM_FORMAT      
00082 } bioh5g_alignments_format;
00083 
00084 
00085 
00087 typedef enum
00088 {
00089     UNINDEXED = 0,       
00090     REF_POS_SECONDARY = 1  
00092     /* REF_POS_PRIMARY = 2, */
00093     /* REF_POS_NCLIST_PRIMARY = 3, */
00094     /* REF_POS_NCLIST_PRIMARY = 4, */
00095 
00096 } bioh5g_alignments_index_method;
00097 
00098 
00099 
00100 /*****************************************************************************
00101  * Data container                                                            *
00102  *****************************************************************************/
00103 
00104 
00105 
00110 typedef struct
00111 {
00112     /* basic alignment data */
00113     int64_t   read_index;   
00114     char      *reference;   
00115     int32_t   position;     
00116     int32_t   length;       
00118     /* SAM data */
00119     unsigned char  sam_mapq;     
00120     int32_t        sam_flags;    
00121     char           *sam_cigar;   
00122     char           *sam_tags;    
00124     /* SAM template data */
00125     char      *sam_rnext;   
00126     int32_t   sam_pnext;    
00127     int32_t   sam_tlen;     
00129 } bioh5g_alignment_data;
00130 
00131 
00132 
00133 /*****************************************************************************
00134  * Create, open, close                                                       *
00135  *****************************************************************************/
00136 
00137 
00138 
00154 BIOHDF_API biohdf_error
00155 BIOH5Gcheck_alignments_presence(const biohdf_file file,
00156                                 const char *path,
00157                                 /*OUT*/ int *presence);
00158 
00159 
00160 
00173 BIOHDF_API biohdf_error 
00174 BIOH5Gcreate_alignments_collection(const biohdf_file file,
00175                                    const bioh5g_alignments_creation_properties props,
00176                                    const char *path,
00177                                    /*OUT*/ bioh5g_alignments *aligns);
00178 
00179 
00180 
00190 BIOHDF_API biohdf_error 
00191 BIOH5Gopen_alignments_collection(const biohdf_file file,
00192                                  const char *path,
00193                                  biohdf_open_mode mode,
00194                                  /*OUT*/ bioh5g_alignments *aligns);
00195 
00196 
00197 
00206 BIOHDF_API biohdf_error 
00207 BIOH5Gclose_alignments_collection(/*IN-OUT*/ bioh5g_alignments *aligns);
00208 
00209 
00210 
00223 BIOHDF_API biohdf_error 
00224 BIOH5Gget_reads_path(const bioh5g_alignments aligns,
00225                      /*OUT*/ char **reads_path);
00226 
00227 
00228 
00229 /*****************************************************************************
00230  * Append and read data                                                      *
00231  *****************************************************************************/
00232 
00233 
00234 
00242 BIOHDF_API biohdf_error
00243 BIOH5Gget_alignments_count(const bioh5g_alignments aligns,
00244                            /*OUT*/ int64_t *count);
00245 
00246 
00247 
00255 BIOHDF_API biohdf_error
00256 BIOH5Gcreate_alignments_iterator(const bioh5g_alignments aligns,
00257                                  /*OUT*/ bioh5g_alignments_iterator *iter);
00258 
00259 
00272 BIOHDF_API biohdf_error
00273 BIOH5Gadd_alignments_iterator_range_filter(bioh5g_alignments_iterator iter,
00274                                            const char *reference,
00275                                            int32_t start,
00276                                            int32_t end);
00277 
00278 
00279 
00293 BIOHDF_API biohdf_error
00294 BIOH5Gadd_alignments_iterator_mapq_filter(bioh5g_alignments_iterator iter,
00295                                           unsigned char min_mapq);
00296 
00297 
00298 
00313 BIOHDF_API biohdf_error
00314 BIOH5Gadd_alignments_iterator_flags_filter(bioh5g_alignments_iterator iter,
00315                                            uint32_t mask);
00316 
00317 
00318 
00327 BIOHDF_API biohdf_error
00328 BIOH5Gdestroy_alignments_iterator(/*IN-OUT*/ bioh5g_alignments_iterator *iter);
00329 
00330 
00331 
00339 BIOHDF_API biohdf_error
00340 BIOH5Gadd_alignment(const bioh5g_alignments aligns,
00341                     const bioh5g_alignment_data *data);
00342 
00343 
00344 
00354 BIOHDF_API biohdf_error
00355 BIOH5Gget_index_of_last_added_alignment(const bioh5g_alignments aligns,
00356                                         /*OUT*/ int64_t *index);
00357 
00358 
00359 
00368 BIOHDF_API biohdf_error
00369 BIOH5Gget_next_alignment(bioh5g_alignments_iterator iter,
00370                          /*OUT*/ int64_t *index,
00371                          /*OUT*/ bioh5g_alignment_data **data);
00372 
00373 
00374 
00383 BIOHDF_API biohdf_error
00384 BIOH5Gget_alignment(const bioh5g_alignments aligns,
00385                     int64_t index,
00386                     /*OUT*/ bioh5g_alignment_data **data);
00387 
00388 
00389 
00398 BIOHDF_API biohdf_error
00399 BIOH5Gfree_alignment_data(/*IN-OUT*/ bioh5g_alignment_data **data);
00400 
00401 
00402 
00403 /*****************************************************************************
00404  * Alignment hit index functionality                                         *
00405  *****************************************************************************/
00406 
00407 
00408 
00419 BIOHDF_API biohdf_error
00420 BIOH5Gcreate_alignments_index(bioh5g_alignments aligns,
00421                               bioh5g_alignments_index_method method,
00422                               biohdf_index_creation_properties props);
00423 
00424 
00425 
00426 /*****************************************************************************
00427  * External file header storage                                              *
00428  *****************************************************************************/
00429 
00430 
00441 BIOHDF_API biohdf_error
00442 BIOH5Gstore_alignment_file_header(const bioh5g_alignments aligns,
00443                                   bioh5g_alignments_format format,
00444                                   const char *header);
00445 
00446 
00447 
00460 BIOHDF_API biohdf_error
00461 BIOH5Gget_alignment_file_header(const bioh5g_alignments aligns,
00462                                 /*OUT*/ bioh5g_alignments_format *format,
00463                                 /*OUT*/ char **header);
00464 
00465 
00466 
00467 /*****************************************************************************
00468  * Data formats                                                              *
00469  *****************************************************************************/
00470 
00471 
00485 BIOHDF_API biohdf_error
00486 BIOH5Gcreate_alignment_string(const bioh5g_alignment_data *alignment,
00487                               const bioh5g_read_data *read,
00488                               bioh5g_alignments_format format,
00489                               /*OUT*/ char **alignment_string);
00490 
00491 
00492 
00509 BIOHDF_API biohdf_error
00510 BIOH5Gwrite_alignment_to_stream(const bioh5g_alignment_data *alignment,
00511                                 const bioh5g_read_data *read,
00512                                 bioh5g_alignments_format format,
00513                                 FILE *stream);
00514 
00515 
00516 
00517 
00518 /*****************************************************************************
00519  * Accessor functions (needed for higher-language interoperation)            *
00520  *****************************************************************************/
00521 
00527 BIOHDF_API biohdf_error
00528 BIOH5Gcreate_alignment_data(/*OUT*/ bioh5g_alignment_data **data);
00529 
00530 
00531 
00532 BIOHDF_API biohdf_error
00533 BIOH5Gget_alignment_read_index(bioh5g_alignment_data *data,
00534                                /*OUT*/ int64_t *read_index);
00535 
00536 
00537 
00538 BIOHDF_API biohdf_error
00539 BIOH5Gset_alignment_read_index(bioh5g_alignment_data *data,
00540                                int64_t read_index);
00541 
00542 
00543 
00544 BIOHDF_API biohdf_error
00545 BIOH5Gget_alignment_reference(bioh5g_alignment_data *data,
00546                               /*OUT*/ char **reference);
00547 
00548 
00549 
00550 BIOHDF_API biohdf_error
00551 BIOH5Gset_alignment_reference(bioh5g_alignment_data *data,
00552                               char *reference);
00553 
00554 
00555 
00556 BIOHDF_API biohdf_error
00557 BIOH5Gget_alignment_position(bioh5g_alignment_data *data,
00558                              /*OUT*/ int32_t *position);
00559 
00560 
00561 
00562 BIOHDF_API biohdf_error
00563 BIOH5Gset_alignment_position(bioh5g_alignment_data *data,
00564                              int32_t position);
00565 
00566 
00567 
00568 BIOHDF_API biohdf_error
00569 BIOH5Gget_alignment_length(bioh5g_alignment_data *data,
00570                            /*OUT*/ int32_t *length);
00571 
00572 
00573 
00574 BIOHDF_API biohdf_error
00575 BIOH5Gset_alignment_length(bioh5g_alignment_data *data,
00576                            int32_t length);
00577 
00578 
00579 
00580 BIOHDF_API biohdf_error
00581 BIOH5Gget_alignment_sam_mapq(bioh5g_alignment_data *data,
00582                              /*OUT*/ unsigned char *sam_mapq);
00583 
00584 
00585 
00586 BIOHDF_API biohdf_error
00587 BIOH5Gset_alignment_sam_mapq(bioh5g_alignment_data *data,
00588                              unsigned char sam_mapq);
00589 
00590 
00591 
00592 BIOHDF_API biohdf_error
00593 BIOH5Gget_alignment_sam_flags(bioh5g_alignment_data *data,
00594                               /*OUT*/ uint32_t *sam_flags);
00595 
00596 
00597 
00598 BIOHDF_API biohdf_error
00599 BIOH5Gset_alignment_sam_flags(bioh5g_alignment_data *data,
00600                               uint32_t sam_flags);
00601 
00602 
00603 
00604 BIOHDF_API biohdf_error
00605 BIOH5Gget_alignment_sam_cigar(bioh5g_alignment_data *data,
00606                               /*OUT*/ char **sam_cigar);
00607 
00608 
00609 
00610 BIOHDF_API biohdf_error
00611 BIOH5Gset_alignment_sam_cigar(bioh5g_alignment_data *data,
00612                               char *sam_cigar);
00613 
00614 
00615 
00616 BIOHDF_API biohdf_error
00617 BIOH5Gget_alignment_sam_tags(bioh5g_alignment_data *data,
00618                              /*OUT*/ char **sam_tags);
00619 
00620 
00621 
00622 BIOHDF_API biohdf_error
00623 BIOH5Gset_alignment_sam_tags(bioh5g_alignment_data *data,
00624                              char *sam_tags);
00625 
00626 
00627 
00628 BIOHDF_API biohdf_error
00629 BIOH5Gget_alignment_sam_rnext(bioh5g_alignment_data *data,
00630                               /*OUT*/ char **sam_rnext);
00631 
00632 
00633 
00634 BIOHDF_API biohdf_error
00635 BIOH5Gset_alignment_sam_rnext(bioh5g_alignment_data *data,
00636                               char *sam_rnext);
00637 
00638 
00639 
00640 BIOHDF_API biohdf_error
00641 BIOH5Gget_alignment_sam_pnext(bioh5g_alignment_data *data,
00642                               /*OUT*/ int32_t *sam_pnext);
00643 
00644 
00645 
00646 BIOHDF_API biohdf_error
00647 BIOH5Gset_alignment_sam_pnext(bioh5g_alignment_data *data,
00648                               int32_t sam_pnext);
00649 
00650 
00651 
00652 BIOHDF_API biohdf_error
00653 BIOH5Gget_alignment_sam_tlen(bioh5g_alignment_data *data,
00654                              /*OUT*/ int32_t *sam_tlen);
00655 
00656 
00657 
00658 BIOHDF_API biohdf_error
00659 BIOH5Gset_alignment_sam_tlen(bioh5g_alignment_data *data,
00660                              int32_t sam_tlen);
00661 
00664 /*****************************************************************************
00665  * Alignments properties - create, destroy, access                           *
00666  *****************************************************************************/
00667 
00673 BIOHDF_API biohdf_error
00674 BIOH5Gcreate_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props);
00675 
00676 
00677 
00678 BIOHDF_API biohdf_error
00679 BIOH5Gdestroy_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props);
00680 
00681 
00682 
00683 BIOHDF_API biohdf_error
00684 BIOH5Gset_alignments_properties_reads_path(bioh5g_alignments_creation_properties props,
00685                                            char *reads_path);
00686 
00687 
00688 
00689 BIOHDF_API biohdf_error
00690 BIOH5Gset_alignments_properties_refs_scheme(bioh5g_alignments_creation_properties props,
00691                                             biohdf_string_storage_scheme scheme);
00692 
00693 
00694 
00695 BIOHDF_API biohdf_error
00696 BIOH5Gset_alignments_properties_tags_scheme(bioh5g_alignments_creation_properties props,
00697                                                  biohdf_string_storage_scheme scheme);
00698 
00699 
00700 
00701 BIOHDF_API biohdf_error
00702 BIOH5Gset_alignments_properties_cigar_scheme(bioh5g_alignments_creation_properties props,
00703                                              biohdf_string_storage_scheme scheme);
00704 
00705 
00706 
00707 BIOHDF_API biohdf_error
00708 BIOH5Gset_alignments_properties_refs_length(bioh5g_alignments_creation_properties props,
00709                                             size_t length);
00710 
00711 
00712 
00713 BIOHDF_API biohdf_error
00714 BIOH5Gset_alignments_properties_tags_length(bioh5g_alignments_creation_properties props,
00715                                             size_t length);
00716 
00717 
00718 
00719 BIOHDF_API biohdf_error
00720 BIOH5Gset_alignments_properties_cigar_length(bioh5g_alignments_creation_properties props,
00721                                              size_t length);
00722 
00723 
00724 
00725 BIOHDF_API biohdf_error
00726 BIOH5Gset_alignments_properties_chunk_size(bioh5g_alignments_creation_properties props,
00727                                            int64_t chunk_size);
00728 
00729 
00730 
00731 BIOHDF_API biohdf_error
00732 BIOH5Gset_alignments_properties_compression_level(bioh5g_alignments_creation_properties props,
00733                                                   compression_level level);
00734 
00735 
00739 #endif
 All Data Structures Variables