#!/usr/bin/env perl
##
# Copyright by The HDF Group.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the LICENSE file, which can be found at the root of the source code
# distribution tree, or in https://www.hdfgroup.org/licenses.
# If you do not have access to either file, you may request a copy from
# help@hdfgroup.org.
##

##
# Process H5ARG_TRACE() macros (defined in H5private.h) used in H5ES_insert(),
# updating them with the caller's parameters, etc.
##

require 5.003;
use warnings;
$Source = "";

##############################################################################
# A map from type name to type letter.  We use this map for two reasons:
#  1. We want the debugging stuff in the source code to be as unobtrusive as
#     possible, which means as compact as possible.
#  2. It's easier (faster) to parse these one and two-letter types in the C
#     functions that display debugging results.
#
# All type strings are one or two characters.  One-character strings
# are always lower case and should be used for common types.
# Two-character strings begin with an upper-case letter which is
# usually the same as the package name.
#
# These map to the types in H5trace.c (which should be updated if you add
# a type here), so there are more types listed here than are used by the
# existing macros.
#
%TypeString = ("haddr_t"                    => "a",
               "H5A_info_t"                 => "Ai",
               "H5A_operator1_t"            => "Ao",
               "H5A_operator2_t"            => "AO",
               "bool"                       => "b",
               "H5AC_cache_config_t"        => "Cc",
               "H5AC_cache_image_config_t"  => "CC",
               "double"                     => "d",
               "H5D_alloc_time_t"           => "Da",
               "H5D_append_cb_t"            => "DA",
               "H5FD_mpio_collective_opt_t" => "Dc",
               "H5D_selection_io_mode_t"    => "DC",
               "H5D_fill_time_t"            => "Df",
               "H5D_fill_value_t"           => "DF",
               "H5D_gather_func_t"          => "Dg",
               "H5FD_mpio_chunk_opt_t"      => "Dh",
               "H5D_mpio_actual_io_mode_t"  => "Di",
               "H5FD_file_image_callbacks_t" => "DI",
               "H5D_chunk_index_t"          => "Dk",
               "H5D_layout_t"               => "Dl",
               "H5D_mpio_no_collective_cause_t" => "Dn",
               "H5D_mpio_actual_chunk_opt_mode_t" => "Do",
               "H5D_operator_t"             => "DO",
               "H5D_space_status_t"         => "Ds",
               "H5D_scatter_func_t"         => "DS",
               "H5FD_mpio_xfer_t"           => "Dt",
               "H5D_vds_view_t"             => "Dv",
               "H5FD_class_value_t"         => "DV",
               "H5D_chunk_iter_op_t"        => "x",
               "herr_t"                     => "e",
               "H5E_auto1_t"                => "Ea",
               "H5E_auto2_t"                => "EA",
               "H5ES_event_complete_func_t" => "EC",
               "H5E_direction_t"            => "Ed",
               "H5E_error_t"                => "Ee",
               "H5ES_event_insert_func_t"   => "EI",
               "H5ES_status_t"              => "Es",
               "H5E_type_t"                 => "Et",
               "H5FD_class_t"               => "FC",
               "H5F_close_degree_t"         => "Fd",
               "H5F_fspace_strategy_t"      => "Ff",
               "H5F_flush_cb_t"             => "FF",
               "H5F_info2_t"                => "FI",
               "H5F_mem_t"                  => "Fm",
               "H5F_scope_t"                => "Fs",
               "H5F_file_space_type_t"      => "Ft",
               "H5F_libver_t"               => "Fv",
               "H5G_iterate_t"              => "Gi",
               "H5G_info_t"                 => "GI",
               "H5G_obj_t"                  => "Go",
               "H5G_stat_t"                 => "Gs",
               "hsize_t"                    => "h",
               "H5_atclose_func_t"          => "Hc",
               "hssize_t"                   => "Hs",
               "H5E_major_t"                => "i",     # H5E_major_t is typedef'd to hid_t
               "H5E_minor_t"                => "i",     # H5E_minor_t is typedef'd to hid_t
               "hid_t"                      => "i",
               "H5I_future_discard_func_t"  => "ID",
               "H5I_free_t"                 => "If",
               "H5_index_t"                 => "Ii",
               "H5I_iterate_func_t"         => "II",
               "H5_iter_order_t"            => "Io",
               "H5FD_subfiling_ioc_select_t" => "IO",
               "H5I_future_realize_func_t"  => "IR",
               "int"                        => "Is",
               "int32_t"                    => "Is",
               "H5I_search_func_t"          => "IS",
               "H5I_type_t"                 => "It",
               "unsigned"                   => "Iu",
               "unsigned int"               => "Iu",
               "uint32_t"                   => "Iu",
               "H5O_token_t"                => "k",
               "H5L_iterate1_t"             => "Li",
               "H5L_iterate2_t"             => "LI",
               "H5G_link_t"                 => "Ll", #Same as H5L_type_t now
               "H5L_type_t"                 => "Ll",
               "H5L_elink_traverse_t"       => "Lt",
               "H5MM_allocate_t"            => "Ma",
               "MPI_Comm"                   => "Mc",
               "H5MM_free_t"                => "Mf",
               "MPI_Info"                   => "Mi",
               "H5M_iterate_t"              => 'MI',
               "H5FD_mem_t"                 => "Mt",
               "off_t"                      => "o",
               "HDoff_t"                    => "Ho",
               "H5O_iterate1_t"             => "Oi",
               "H5O_iterate2_t"             => "OI",
               "H5O_mcdt_search_cb_t"       => "Os",
               "H5O_type_t"                 => "Ot",
               "H5P_class_t"                => "p",
               "H5P_cls_create_func_t"      => "Pc",
               "H5P_prp_create_func_t"      => "PC",
               "H5P_prp_delete_func_t"      => "PD",
               "H5P_prp_get_func_t"         => "PG",
               "H5P_iterate_t"              => "Pi",
               "H5P_cls_close_func_t"       => "Pl",
               "H5P_prp_close_func_t"       => "PL",
               "H5P_prp_compare_func_t"     => "PM",
               "H5P_cls_copy_func_t"        => "Po",
               "H5P_prp_copy_func_t"        => "PO",
               "H5P_prp_set_func_t"         => "PS",
               "hdset_reg_ref_t"            => "Rd",
               "hobj_ref_t"                 => "Ro",
               "H5R_ref_t"                  => "Rr",
               "H5R_type_t"                 => "Rt",
               "char"                       => "s",
               "unsigned char"              => "s",
               "H5S_class_t"                => "Sc",
               "H5S_seloper_t"              => "Ss",
               "H5S_sel_type"               => "St",
               "htri_t"                     => "t",
               "H5T_cset_t",                => "Tc",
               "H5T_conv_t"                 => "TC",
               "H5T_direction_t",           => "Td",
               "H5T_pers_t"                 => "Te",
               "H5T_conv_except_func_t"     => "TE",
               "H5T_norm_t"                 => "Tn",
               "H5T_order_t"                => "To",
               "H5T_pad_t"                  => "Tp",
               "H5T_sign_t"                 => "Ts",
               "H5T_class_t"                => "Tt",
               "H5T_str_t"                  => "Tz",
               "unsigned long"              => "Ul",
               "unsigned long long"         => "UL",
               "uint64_t"                   => "UL",
               "H5VL_attr_get_t"            => "Va",
               "H5VL_blob_optional_t"       => "VA",
               "H5VL_attr_specific_t"       => "Vb",
               "H5VL_blob_specific_t"       => "VB",
               "H5VL_dataset_get_t"         => "Vc",
               "H5VL_class_value_t"         => "VC",
               "H5VL_dataset_specific_t"    => "Vd",
               "H5VL_datatype_get_t"        => "Ve",
               "H5VL_datatype_specific_t"   => "Vf",
               "H5VL_file_get_t"            => "Vg",
               "H5VL_file_specific_t"       => "Vh",
               "H5VL_group_get_t"           => "Vi",
               "H5VL_group_specific_t"      => "Vj",
               "H5VL_link_create_t"         => "Vk",
               "H5VL_link_get_t"            => "Vl",
               "H5VL_get_conn_lvl_t"        => "VL",
               "H5VL_link_specific_t"       => "Vm",
               "H5VL_object_get_t"          => "Vn",
               "H5VL_request_notify_t"      => "VN",
               "H5VL_object_specific_t"     => "Vo",
               "H5VL_request_specific_t"    => "Vr",
               "H5VL_attr_optional_t"       => "Vs",
               "H5VL_subclass_t"            => "VS",
               "H5VL_dataset_optional_t"    => "Vt",
               "H5VL_datatype_optional_t"   => "Vu",
               "H5VL_file_optional_t"       => "Vv",
               "H5VL_group_optional_t"      => "Vw",
               "H5VL_link_optional_t"       => "Vx",
               "H5VL_object_optional_t"     => "Vy",
               "H5VL_request_optional_t"    => "Vz",
               "va_list"                    => "x",
               "void"                       => "x",
               "size_t"                     => "z",
               "H5Z_SO_scale_type_t"        => "Za",
               "H5Z_class_t"                => "Zc",
               "H5Z_EDC_t"                  => "Ze",
               "H5Z_filter_t"               => "Zf",
               "H5Z_filter_func_t"          => "ZF",
               "ssize_t"                    => "Zs",

# Types below must be defined here, as they appear in function arguments,
# but they are not yet supported in the H5_trace_args() routine yet.  If
# they are used as an actual parameter type (and not just as a pointer to
# to the type), they must have a "real" abbreviation added (like the ones
# above), moved to the section of entries above, and support for displaying
# the type must be added to H5_trace_args().
               "H5ES_err_info_t"            => "#",
               "H5FD_t"                     => "#",
               "H5FD_hdfs_fapl_t"           => "#",
               "H5FD_mirror_fapl_t"         => "#",
               "H5FD_onion_fapl_t"          => "#",
               "H5FD_ros3_fapl_t"           => "#",
               "H5FD_splitter_vfd_config_t" => "#",
               "H5L_class_t"                => "#",
               "H5VL_class_t"               => "#",
               "H5VL_loc_params_t"          => "#",
               "H5VL_request_status_t"      => "#",
              );

##############################################################################
# Print an error message.
#
my $found_errors = 0;

sub errmesg ($$@) {
    my ($file, $func, @mesg) = @_;
    my ($mesg) = join "", @mesg;
    my ($lineno) = 1;
    if ($Source =~ /(.*?\n)($func)/s) {
        local $_ = $1;
        $lineno = tr/\n/\n/;
    }

    $found_errors = 1;

    print "$file: in function \`$func\':\n";
    print "$file:$lineno: $mesg\n";
}

##############################################################################
# Given a C data type, return the type string that goes with it
#
sub argstring ($$$) {
    my ($file, $func, $atype) = @_;
    my ($ptr, $tstr, $array) = (0, "!", "");
    my ($fq_atype);

    # Normalize the data type by removing redundant white space,
    # certain type qualifiers, and indirection.
    $atype =~ s/^\bconst\b//;     # Leading const
    $atype =~ s/\s*const\s*//;    # const after type, possibly in the middle of '*'s
    $atype =~ s/^\bstatic\b//;
    $atype =~ s/\bH5_ATTR_UNUSED\b//g;
    $atype =~ s/\bH5_ATTR_DEPRECATED_USED\b//g;
    $atype =~ s/\bH5_ATTR_NDEBUG_UNUSED\b//g;
    $atype =~ s/\bH5_ATTR_PARALLEL_UNUSED\b//g;
    $atype =~ s/\bH5_ATTR_PARALLEL_USED\b//g;
    $atype =~ s/\s+/ /g;
    $ptr = length $1 if  $atype =~ s/(\*+)//;
    $atype =~ s/^\s+//;
    $atype =~ s/\s+$//;
    if ($atype =~ /(.*)\[(.*)\]$/) {
        ($array, $atype) = ($2, $1);
        $atype =~ s/\s+$//;
    }
    $fq_atype = $atype . ('*' x $ptr);

    if ($ptr>0 && exists $TypeString{$fq_atype}) {
        $ptr = 0;
        $tstr = $TypeString{$fq_atype};
    } elsif ($ptr>0 && exists $TypeString{"$atype*"}) {
        --$ptr;
        $tstr = $TypeString{"$atype*"};
    } elsif (!exists $TypeString{$atype}) {
        # Defer throwing error until type is actually used
        #    errmesg $file, $func, "untraceable type \`$atype", '*'x$ptr, "\'";
    } else {
        $tstr = $TypeString{$atype};
    }
    return ("*" x $ptr) . ($array ? "[$array]" : "") . $tstr;
}

##############################################################################
# Given information about an API function, rewrite that function with
# updated tracing information.
#
my $file_args = 0;
my $total_args = 0;
sub rewrite_func ($$$$$) {
    my ($file, $type, $name, $args, $body) = @_;
    my ($arg, $argtrace);
    my (@arg_name, @arg_str, @arg_type);
    local $_;

    # Keep copy of original arguments
    my $orig_args = $args;

    # Parse arguments
    if ($args eq "void") {
        $argtrace = "H5ARG_TRACE0(\"\")";
    } else {
        # Split arguments
        #
        # First remove:
        #   * /*in*/, /*out*/, /*in_out*/, and /*in,out*/ comments
        #   * preprocessor lines that start with #
        #
        # then split the function arguments on commas
        $args =~ s/\/\*\s*in\s*\*\///g;  # Get rid of /*in*/
        $args =~ s/\/\*\s*out\s*\*\///g; # Get rid of /*out*/
        $args =~ s/\/\*\s*in,\s*out\s*\*\///g; # Get rid of /*in,out*/
        $args =~ s/\/\*\s*in_out\s*\*\///g; # Get rid of /*in_out*/
        $args =~ s/\n#.*?\n/\n/g;        # Remove lines beginning with '#'
        my @args = split /,[\s\n]*/, $args;
        my $argno = 0;
        my %names;

        for $arg (@args) {
            if($arg=~/\w*\.{3}\w*/){  # Skip "..." for varargs parameter
                next;
            }
            unless ($arg=~/^((\s*[a-z_A-Z](\w|\*)*\s+)+(\s*\*\s*|\s*const\s*|\s*restrict\s*|\s*volatile\s*)*)
                      ([a-z_A-Z]\w*)(\[.*?\])?\s*$/x) {
                errmesg $file, $name, "unable to parse \`$arg\'";
                goto error;
            } else {
                my ($atype, $aname, $array, $adir) = ($1, $5, $6, $8);

                $names{$aname} = $argno++;
                $adir ||= "in";
                $atype =~ s/\s+$//;
                push @arg_name, $aname;
                push @arg_type, $atype;

                if ($adir eq "out") {
                    push @arg_str, "x";
                } else {
                    if (defined $array) {
                        $atype .= "*";
                        if ($array =~ /^\[\/\*([a-z_A-Z]\w*)\*\/\]$/) {
                            my $asize = $1;

                            if (exists $names{$asize}) {
                                $atype .= '[a' . $names{$asize} . ']';
                            } else {
                                warn "bad array size: $asize";
                                $atype .= "*";
                            }
                        }
                    }

                    push @arg_str, argstring $file, $name, $atype;
                }
            }
        }

        # Compose the trace macro
        $argtrace = "H5ARG_TRACE" . scalar(@arg_str) . "(__func__, \"";
        $argtrace .= join("", @arg_str) . "\"";

        # Append arguments
        for my $i (0 .. $#arg_name) {
            $argtrace .= ", $arg_name[$i]";
        }

        # Append final ')' for macro
        $argtrace .= ")";
    }

    # Check for H5ARG_TRACE macros to update
    if ( $body =~ /H5ARG_TRACE/ ) {
        my $orig_body = $body;

        # Check for untraceable type (deferred until $argtrace used)
        if ( $argtrace =~ /(^!)|([^*]!)/ ) {
            errmesg $file, $name, "untraceable type in args";
            print "args = '$orig_args'\n";
            goto error;
        }

        # Update H5ARG_TRACE macro
        $body =~ s/(H5ARG_TRACE(\d+\s*\(.*?\))?)/"$argtrace"/esg;

        # Increment # of non-API routines modified if anything changed
        if ($orig_body ne $body) {
            $file_args++;
        }
    }

error:
    return "\n$type\n$name($orig_args)\n$body";
}

##############################################################################
# Process each source file, rewriting API functions with updated
# tracing information.
#
for $file (@ARGV) {
    $file_args = 0;

    # Ignore the "external" source files that don't include H5private.h
    unless ($file eq "H5FDmulti.c" or $file eq "src/H5FDmulti.c" or $file eq "H5FDstdio.c" or $file eq "src/H5FDstdio.c") {

        # Snarf up the entire file
        open SOURCE, $file or die "$file: $!\n";
        $Source = join "", <SOURCE>;
        close SOURCE;

        # Make a copy of the original data
        my $original = $Source;

        # Make modifications
        $Source =~ s/\n([A-Za-z]\w*(\s+[A-Za-z]\w*)*\s*\**)\n #type
                         (H5[A-Z]{0,2}_?[a-zA-Z0-9_]\w*)      #name
                         \s*\((.*?)\)\s*                      #args
                         (\{.*?\n\}[^\n]*)                    #body
                         /rewrite_func($file,$1,$3,$4,$5)/segx;

        # If the source changed then print out the new version
        if ($original ne $Source) {
            printf "%s: Instrumented %d argument list%s\n",
                    $file, $file_args, (1 == $file_args ? "" : "s");
            rename $file, "$file~" or die "unable to make backup";
            open SOURCE, ">$file" or die "unable to modify source";
            print SOURCE $Source;
            close SOURCE;

            $total_args += $file_args;
        }
    }
}

if ($found_errors eq 1) {
    printf "\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    printf "*** ERRORS FOUND *** ERRORS FOUND *** ERRORS FOUND ****\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    exit 1;
} else {
    printf "Finished processing H5ES_insert() calls:\n";
    printf "\tInstrumented %d argument list%s\n",
             $total_args, (1 == $total_args ? "" : "s");
}

