...

Text file src/go.mongodb.org/mongo-driver/testdata/bson-corpus/bsonview

Documentation: go.mongodb.org/mongo-driver/testdata/bson-corpus

     1#!/usr/bin/env perl
     2use v5.10;
     3use strict;
     4use warnings;
     5use utf8;
     6use open qw/:std :utf8/;
     7
     8use Getopt::Long;
     9use Pod::Usage;
    10
    11use if $^O eq 'MSWin32', 'Win32::Console::ANSI';
    12use Term::ANSIColor;
    13
    14use constant {
    15    NULL              => "\x00",
    16    BSON_TYPE         => "C",
    17    BSON_ENAME        => "Z*",
    18    BSON_TYPE_NAME    => "CZ*",
    19    BSON_DOUBLE       => "d",
    20    BSON_STRING       => "l/A",
    21    BSON_BOOLEAN      => "C",
    22    BSON_REGEX        => "Z*Z*",
    23    BSON_JSCODE       => "",
    24    BSON_INT32        => "l",
    25    BSON_INT64        => "q",
    26    BSON_TIMESTAMP    => "q",
    27    BSON_CODE_W_SCOPE => "l",
    28    BSON_REMAINING    => 'a*',
    29    BSON_SKIP_4_BYTES => 'x4',
    30    BSON_OBJECTID     => 'a12',
    31    BSON_BINARY_TYPE  => 'C',
    32    BSON_CSTRING      => 'Z*',
    33    BSON_BYTES        => 'a*'
    34};
    35
    36my $BOLD = $^O eq 'MSWin32' ? "bold " : "";
    37
    38# minimum field size
    39my %FIELD_SIZES = (
    40    0x01 => 8,
    41    0x02 => 5,
    42    0x03 => 5,
    43    0x04 => 5,
    44    0x05 => 5,
    45    0x06 => 0,
    46    0x07 => 12,
    47    0x08 => 1,
    48    0x09 => 8,
    49    0x0A => 0,
    50    0x0B => 2,
    51    0x0C => 17,
    52    0x0D => 5,
    53    0x0E => 5,
    54    0x0F => 14,
    55    0x10 => 4,
    56    0x11 => 8,
    57    0x12 => 8,
    58    0x7F => 0,
    59    0xFF => 0,
    60);
    61
    62sub main {
    63    my ( $hex, $file, $help );
    64    GetOptions(
    65        "file=s" => \$file,
    66        "x"      => \$hex,
    67        "help|h" => \$help,
    68    ) or die("Error in command line args");
    69    pod2usage( { -exitval => 2, -verbose => 2, } ) if $help;
    70
    71    if ( $file ) {
    72        dump_file($file);
    73    }
    74    else {
    75        dump_stdin($hex);
    76    }
    77}
    78
    79sub dump_stdin {
    80    my $hex = shift;
    81    while ( defined( my $bson = <STDIN> ) ) {
    82        chomp $bson;
    83        if ( !length($bson) ) {
    84            print_error("[ no document ]\n");
    85            next;
    86        }
    87        # in -x mode, treat leading # as a comment
    88        if ( $hex && index( $bson, "#" ) == 0 ) {
    89            say $bson;
    90            next;
    91        }
    92        $bson =~ s[ ][]g if $hex;
    93        $bson = pack( "H*", $bson ) if $hex;
    94        dump_document( \$bson );
    95        print "\n";
    96    }
    97}
    98
    99sub dump_file {
   100    my $file = shift;
   101    open my $fh, "<", $file;
   102    binmode($fh);
   103    my $data = do { local $/; <$fh> };
   104    while ( length $data ) {
   105        my $len = unpack( BSON_INT32, $data );
   106        my $bson = substr($data,0,$len,'');
   107        dump_document(\$bson);
   108        print "\n";
   109    }
   110}
   111
   112sub dump_document {
   113    my ( $ref, $is_array ) = @_;
   114    print $is_array ? " [" : " {" if defined $is_array;
   115    dump_header($ref);
   116    1 while dump_field($ref);
   117    print_error( " " . unpack( "H*", $$ref ) ) if length($$ref);
   118    print $is_array ? " ]" : " }" if defined $is_array;
   119    return;
   120}
   121
   122sub dump_header {
   123    my ($ref) = @_;
   124
   125    my $len = get_length( $ref, 4 );
   126    return unless defined $len;
   127
   128    if ( $len < 5 || $len < length($$ref) + 4 ) {
   129        print_length( $len, 'red' );
   130    }
   131    else {
   132        print_length( $len, 'blue' );
   133    }
   134}
   135
   136sub dump_field {
   137    my ($ref) = @_;
   138
   139    # detect end of document
   140    if ( length($$ref) < 2 ) {
   141        if ( length($$ref) == 0 ) {
   142            print_error(" [missing terminator]");
   143        }
   144        else {
   145            my $end = substr( $$ref, 0, 1, '' );
   146            print_hex( $end, $end eq NULL ? 'blue' : 'red' );
   147        }
   148        return;
   149    }
   150
   151    # unpack type
   152    my $type = unpack( BSON_TYPE, substr( $$ref, 0, 1, '' ) );
   153
   154    if ( !exists $FIELD_SIZES{$type} ) {
   155        print_type( $type, 'red' );
   156        return;
   157    }
   158
   159    print_type($type);
   160
   161    # check for key termination
   162    my $key_end = index( $$ref, NULL );
   163    return if $key_end == -1;
   164
   165    # unpack key
   166    my $key = unpack( BSON_CSTRING, substr( $$ref, 0, $key_end + 1, '' ) );
   167    print_key($key);
   168
   169    # Check if there is enough data to complete field for this type
   170    # This is greedy, so it checks length, not length -1
   171    my $min_size = $FIELD_SIZES{$type};
   172    return if length($$ref) < $min_size;
   173
   174    # fields without payload: 0x06, 0x0A, 0x7F, 0xFF
   175    return 1 if $min_size == 0;
   176
   177    # document or array
   178    if ( $type == 0x03 || $type == 0x04 ) {
   179        my ($len) = unpack( BSON_INT32, $$ref );
   180        my $doc = substr( $$ref, 0, $len, '' );
   181        dump_document( \$doc, $type == 0x04 );
   182        return 1;
   183    }
   184
   185    # fixed width fields
   186    if (   $type == 0x01
   187        || $type == 0x07
   188        || $type == 0x09
   189        || $type == 0x10
   190        || $type == 0x11
   191        || $type == 0x12 )
   192    {
   193        my $len = ( $type == 0x10 ? 4 : $type == 0x07 ? 12 : 8 );
   194        print_hex( substr( $$ref, 0, $len, '' ) );
   195        return 1;
   196    }
   197
   198    # boolean
   199    if ( $type == 0x08 ) {
   200        my $bool = substr( $$ref, 0, 1, '' );
   201        print_hex( $bool, ( $bool eq "\x00" || $bool eq "\x01" ) ? 'green' : 'red' );
   202        return 1;
   203    }
   204
   205    # binary field
   206    if ( $type == 0x05 ) {
   207        my $len = get_length( $ref, -1 );
   208        my $subtype = substr( $$ref, 0, 1, '' );
   209
   210        if ( !defined($len) ) {
   211            print_hex($subtype);
   212            return;
   213        }
   214
   215        my $binary = substr( $$ref, 0, $len, '' );
   216
   217        print_length($len);
   218        print_hex($subtype);
   219
   220        if ( $subtype eq "\x02" ) {
   221            my $bin_len = get_length( \$binary );
   222            if ( !defined($bin_len) ) {
   223                print_hex( $binary, 'red' );
   224                return;
   225            }
   226            if ( $bin_len != length($binary) ) {
   227                print_length( $bin_len, 'red' );
   228                print_hex( $binary, 'red' );
   229                return;
   230            }
   231        }
   232
   233        print_hex($binary) if length($binary);
   234        return 1;
   235    }
   236
   237    # string or symbol or code
   238    if ( $type == 0x02 || $type == 0x0e || $type == 0x0d ) {
   239        my ( $len, $string ) = get_string($ref);
   240        return unless defined $len;
   241
   242        print_length( $len, 'cyan' );
   243        print_string($string);
   244        return 1;
   245
   246    }
   247
   248    # regex 0x0B
   249    if ( $type == 0x0B ) {
   250        my ( $pattern, $flag ) = unpack( BSON_CSTRING . BSON_CSTRING, $$ref );
   251        substr( $$ref, 0, length($pattern) + length($flag) + 2, '' );
   252        print_string($pattern);
   253        print_string($flag);
   254        return 1;
   255    }
   256
   257    # code with scope 0x0F
   258    if ( $type == 0x0F ) {
   259        my $len = get_length( $ref, 4 );
   260        return unless defined $len;
   261
   262        # len + string + doc minimum size is 4 + 5 + 5
   263        if ( $len < 14 ) {
   264            print_length( $len, 'red' );
   265            return;
   266        }
   267
   268        print_length($len);
   269
   270        my $cws = substr( $$ref, 0, $len - 4, '' );
   271
   272        my ( $strlen, $string ) = get_string( \$cws );
   273
   274        if ( !defined $strlen ) {
   275            print_hex( $cws, 'red' );
   276            return;
   277        }
   278
   279        print_length($strlen);
   280        print_string($string);
   281
   282        dump_document( \$cws, 0 );
   283
   284        return 1;
   285    }
   286
   287    # dbpointer 0x0C
   288    if ( $type == 0x0C ) {
   289        my ( $len, $string ) = get_string($ref);
   290        return unless defined $len;
   291
   292        print_length($len);
   293        print_string($string);
   294
   295        # Check if there are 12 bytes (plus terminator) or more
   296        return if length($$ref) < 13;
   297
   298        my $oid = substr( $$ref, 0, 12, '' );
   299        print_hex($oid);
   300
   301        return 1;
   302    }
   303
   304    die "Shouldn't reach here";
   305}
   306
   307sub get_length {
   308    my ( $ref, $adj ) = @_;
   309    $adj ||= 0;
   310    my $len = unpack( BSON_INT32, substr( $$ref, 0, 4, '' ) );
   311    return unless defined $len;
   312
   313    # check if requested length is too long
   314    if ( $len < 0 || $len > length($$ref) + $adj ) {
   315        print_length( $len, 'red' );
   316        return;
   317    }
   318
   319    return $len;
   320}
   321
   322sub get_string {
   323    my ($ref) = @_;
   324
   325    my $len = get_length($ref);
   326    return unless defined $len;
   327
   328    # len must be at least 1 for trailing 0x00
   329    if ( $len == 0 ) {
   330        print_length( $len, 'red' );
   331        return;
   332    }
   333
   334    my $string = substr( $$ref, 0, $len, '' );
   335
   336    # check if null terminated
   337    if ( substr( $string, -1, 1 ) ne NULL ) {
   338        print_length($len);
   339        print_hex( $string, 'red' );
   340        return;
   341    }
   342
   343    # remove trailing null
   344    chop($string);
   345
   346    # try to decode to UTF-8
   347    if ( !utf8::decode($string) ) {
   348        print_length($len);
   349        print_hex( $string . "\x00", 'red' );
   350        return;
   351    }
   352
   353    return ( $len, $string );
   354}
   355
   356sub print_error {
   357    my ($text) = @_;
   358    print colored( ["${BOLD}red"], $text );
   359}
   360
   361sub print_type {
   362    my ( $type, $color ) = @_;
   363    $color ||= 'magenta';
   364    print colored( ["$BOLD$color"], sprintf( " %02x", $type ) );
   365}
   366
   367sub print_key {
   368    my ($string) = @_;
   369    print_string( $string, 'yellow' );
   370}
   371
   372sub print_string {
   373    my ( $string, $color ) = @_;
   374    $color ||= 'green';
   375    $string =~ s{([^[:graph:]])}{sprintf("\\x%02x",ord($1))}ge;
   376    print colored( ["$BOLD$color"], qq[ "$string"] . " 00" );
   377}
   378
   379sub print_length {
   380    my ( $len, $color ) = @_;
   381    $color ||= 'cyan';
   382    print colored( ["$BOLD$color"], " " . unpack( "H*", pack( BSON_INT32, $len ) ) );
   383}
   384
   385sub print_hex {
   386    my ( $value, $color ) = @_;
   387    $color ||= 'green';
   388    print colored( ["$BOLD$color"], " " . uc( unpack( "H*", $value ) ) );
   389}
   390
   391main();
   392
   393__END__
   394
   395=head1 NAME
   396
   397bsonview - dump a BSON string with color output showing structure
   398
   399=head1 SYNOPSIS
   400
   401    cat file.bson | bsondump
   402
   403    echo "0500000000" | bsondump -x
   404
   405=head1 OPTIONS
   406
   407    -x          input is in hex format (default is 0)
   408    --help, -h  show help
   409
   410=head1 USAGE
   411
   412Reads from C<STDIN> and dumps colored structures to C<STDOUT>.
   413
   414=head1 AUTHOR
   415
   416=over 4
   417
   418=item *
   419
   420David Golden <david@mongodb.com>
   421
   422=back
   423
   424=head1 COPYRIGHT AND LICENSE
   425
   426This software is Copyright (c) 2016 by MongoDB, Inc..
   427
   428This is free software, licensed under:
   429
   430  The Apache License, Version 2.0, January 2004
   431
   432=cut
   433
   434=cut

View as plain text