# Cross_Qry.pl # (single pass query filter) use strict; use warnings; ############################################################################## # This will query an input Intermediate file given the input filter regex. # An intermediate file is a file constructed with 'Cross.pl'. # Each file contains fixed width words, ex. 7 letters wide. # Each line contain a seed word (left column) followed by ALL words that differ # by one letter from only the seed word. # Example: ablare - ablate - ablaze - aflare - aglare # ---------- # These intermediate files need only be generated once. # There is pre-generated list on my website. # They are the superset used for lightning fast specific subset query's. ############################################################################### my $usage = "\nUsage: Cross_Qry.pl \n". " Reqd: \n". " regex filter\n". " Opt: prepended to '_Filt.txt' output name\n". " supress word list display of generated difs\n" ; if ( @ARGV < 2 ) { die ( $usage ); } my ($fname, $out, $rxstr, $quiet) = ("", "", "", 0); my $p; for ( $p = 0; $p < @ARGV; $p++ ) { # Get intermediate file name if ( $ARGV[$p] =~ /(?i)^In=(.+)/ ) { $fname = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get output file name seed if ( $ARGV[$p] =~ /(?i)^Out=(.+)/ ) { $out = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get regex string if ( $ARGV[$p] =~ /(?i)^Rx=(.+)/ ) { $rxstr = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get quiet flag if ( $ARGV[$p] =~ /(?i)^quiet/ ) { $quiet = 1; last; }} if ( $fname eq "" ) { die ( "$usage-------\nIntermediate file name required, 'In=...'\n"); } if ( $rxstr eq "" ) { die ( "$usage-------\nRegex filter string required, 'rx=...'\n"); } # Compile imput regex string # my $rx = qr/$rxstr/; # Regex to use to filter diff letter # Open Intermediate file # open(my $fh, "<", $fname) or die "\nCannot open intermediate file '$fname' for reading: $!"; my @AllLines = (); my $start = 0; while (my $line = <$fh>) { chomp($line); $line =~ s/^\s+|\s+$//g; if ( $start > 0 ) { push ( @AllLines, $line ); # print $line, "\n"; } else { if ( $line =~ /^\S{3,} - \S{3,}/ ) { $start = 1; } } } close($fh); ### HEADER ### my $hdr = "\nQuery of Diffs-by-one using intermediate file\n :: input file : $fname \n" . " :: filter : rx = $rx \n"; if ( $out ne '' ) { $hdr .= " :: output capture file : $out" . "_Filt.txt\n"; } $hdr .= "--------------------------------------------------------------\n"; print $hdr; # Open the output file # my $fho; if ( $out ne '' ) { my $fout = $out . "_Filt.txt"; open($fho, "+>", $fout) or die "Cannot open file '$fout' for output: $!"; print $fho $hdr; } ## QUERY BODY ## my $res = 0; foreach my $line (@AllLines) { my @LineWords = split( / - /, $line ); my $Wlen = @LineWords; my @DiffAry = (); my $Owd = $LineWords[0]; push ( @DiffAry, $Owd ); # push the seed object word my @Ochrs = split ( //, $Owd ); # split object word into letters for ( my $i = 1; $i < $Wlen; $i++ ) { my $Twd = $LineWords[ $i ]; # check this target word for diff against obj word my @Tchrs = split ( //, $Twd ); my $dcnt = 0; my $dfiltcnt = 0; my $tlen = @Tchrs; for ( my $k = 0; $k < $tlen && $dcnt < 2; $k++ ) { if ( $Tchrs[ $k ] ne $Ochrs[ $k ] ) { if ( ++$dcnt > 1 ) { $k = $tlen; } elsif ( $Tchrs[$k] =~ $rx && $Ochrs[$k] =~ $rx ) { ++$dfiltcnt; } } } if ( $dcnt == 1 && $dfiltcnt == 1 ) { # Based on filter regex, found a diff by one char, push the target word push ( @DiffAry, $Twd ); } } ### PRINT array of difs-by-one of the current word $Owd from the @AllWirds array if ( @DiffAry > 1 ) { my $difout = join(" - ", @DiffAry); if ( $quiet != 1 ) { print "$difout\n"; } print $fho "$difout\n" if ( $out ne '' ); $res = 1; } } if ( $res == 0 ) { print ("Query results: None found \n" ); print $fho "Query results: None found \n" if ( $out ne '' ); } close ($fho) if ( $out ne '' );