# Cross.pl # (creates intermediate files) use strict; use warnings; ## PARSE INPUT PARAMS ## my $usage = "\nUsage: Cross.pl - Intermediate file generator\n\n". " Reqd: input name of dictionary (single word/line)\n". " word size ranges (3 letters minimum)\n". " or -1 will do all sizes\n\n". " Opt: prepended to '#Ltr.txt' output name\n". " filter regex, calls Cross_Qry.pl\n". " 1 or 2 ? print letter distribution. 2 ? then exits\n". " supress word list display of generated difs\n"; ; if ( @ARGV < 2 ) { die ( $usage ); } my ($fname, $out, $wslo, $wshi, $rxstr, $dist, $quiet) = ("", "", 0, 0, "", 0, 0); my $p; for ( $p = 0; $p < @ARGV; $p++ ) { # Get dictionary file name if ( $ARGV[$p] =~ /(?i)^In=(.+)/ ) { $fname = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get output file name seed if ( $ARGV[$p] =~ /(?i)^Out=(.+)/ ) { $out = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get word size if ( $ARGV[$p] =~ /(?i)^Ws=(-1|[3-9]|[1-9]\d+)(?:-(\d+))?/ ) { $wslo = $1; if ( defined($2) && $2 >= $1 ) { $wshi = $2; } else { $wshi = $wslo; } if ( $wshi > 999 ) { $wshi = 999; } last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get regex string if ( $ARGV[$p] =~ /(?i)^Rx=(.+)/ ) { $rxstr = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get distribution flag if ( $ARGV[$p] =~ /(?i)^Dist=(\d)/ ) { $dist = $1; last; }} for ( $p = 0; $p < @ARGV; $p++ ) { # Get quiet flag if ( $ARGV[$p] =~ /(?i)^quiet/ ) { $quiet = 1; last; }} if ( $fname eq "" ) { die ( "$usage-------\nWord file Dictionary name required, 'In=...'\n"); } if ( $wslo == 0 && $dist != 2 ) { die ( "$usage-------\nWord size required, 'ws=3-#' range or 'ws=-1' all sizes\n"); } ## Print paramater info print "\n", "Input File Dictionary = $fname\n"; print "Word size Range = "; if ( $dist != 2 ) { if ( $wslo == -1 ) { print "-1 (all)";} else { print "$wslo-$wshi"; } print "\n"; print "Output File Seed = $out + '#Ltr.txt'\n" if ( $out ne "" ); print "Regex query = $rxstr (will call 'Cross_Qry.pl')\n" if ($rxstr ne "" ); } else { print "N/A\n"; } print "Letter distribution show = $dist "; print " (yes, then exit)\n" if ( $dist == 2 ); print " (yes)\n" if ( $dist == 1 ); print " (no)\n" if ( $dist == 0 ); ## Open the dictionary file and read in the words ## open(my $fh, "<", $fname) or die "Cannot open Dictionary file '$fname' for reading: $!"; my @AllLines = (); while (my $line = <$fh>) { chomp($line); $line =~ s/^\s+|\s+$//g; if ( length($line) > 0 ) { push ( @AllLines, $line ); # print $line, "\n"; } } close($fh); my $showLtrDistribution = $dist; # == 1 ? Print letter distribution # == 2 ? Print distribution then exit ## GET LETTER DISTRIBUTION ## my %HDistCnts = (); my %HDistAry = (); my $dlen; my $max_ltr_size = 0; foreach my $Owd (@AllLines) { $dlen = length( $Owd ); if ( $dlen > $max_ltr_size ) { # track maximum letters in a word $max_ltr_size = $dlen; } if (defined($HDistCnts{ $dlen })) { ++$HDistCnts{ $dlen }; } else { $HDistCnts{ $dlen } = 1; $HDistAry{ $dlen } = []; } if ( $showLtrDistribution != 2 ) { push ( @{$HDistAry{ $dlen }}, $Owd ); } } if ( $showLtrDistribution > 0 ) { print ("Total sample = ", scalar(@AllLines), " words\nMax letter size in words = ", $max_ltr_size, "\n\n"); foreach my $key (sort { $a <=> $b } keys %HDistCnts) { printf ( "%2d letters @ $HDistCnts{ $key } words\n", $key ); } if ( $showLtrDistribution > 1 ) { exit(); } print ( "\n" ); } @AllLines = (); # clear unneeded array ## Set the word size loop variables ## my ($Wd_Start, $Wd_End) = (0, 0); if ( $wslo == -1 ) { $Wd_Start = 3; $Wd_End = 99; } else { $Wd_Start = $wslo; $Wd_End = $wshi; } ## LOOP WORD SIZES ## ## for ( my $width = $Wd_Start; $width <= $Wd_End; $width++ ) { # Does there exist letters of this width if ( !defined($HDistAry{ $width }) ) { next; } print "\n\nDiffs-by-one of $width length words\n"; my $pre_fout = sprintf( "%s_%02d", $out, $width); my $fout = $pre_fout . "Ltr.txt"; print "Intermediate Output File: $fout\n--------------------------------------------------\n"; # Open the output file # open(my $fho, "+>", $fout) or die "Cannot open file '$fout' for output: $!"; # Set the width array # my @SubWords = (); @SubWords = @{$HDistAry{ $width }}; my $Wlen = @SubWords; my $res = 0; foreach my $Owd (@SubWords) { my @DiffAry = (); push ( @DiffAry, $Owd ); my @Ochrs = split ( //, $Owd ); for ( my $i = 0; $i < $Wlen; $i++ ) { my $Twd = $SubWords[ $i ]; # check this target word for diff against obj word my @Tchrs = split ( //, $Twd ); my $dcnt = 0; my $tlen = @Tchrs; for ( my $k = 0; $k < $tlen && $dcnt < 2; $k++ ) { if ( $Tchrs[ $k ] ne $Ochrs[ $k ] ) { if ( ++$dcnt > 1 ) { $k = $tlen; } } } if ( $dcnt == 1 ) { # found a diff by one char, push the target word push ( @DiffAry, $Twd ); } } ### PRINT array of difs-by-one of the current word $Owd from the @AllLines array if ( @DiffAry > 1 ) { my $difout = join(" - ", @DiffAry); if ( $quiet == 0 ) { print "$difout\n"; } print $fho "$difout\n"; $res = 1; } } close ($fho); # Check if anything was written to the intermediate file # if ( $res == 0 ) { print ("Diffs-by-one results: None found \n" ); unlink( $fout ); } # Check if we should spawn the query script wit this intermediate file # if ( $res == 1 && $rxstr ne '' ) { my $script = "Cross_Qry.pl"; my @args = ( "In=$fout", "rx=$rxstr" ); if ( $quiet == 1 ) { push (@args, "quiet" ); } push (@args, "Out=$pre_fout" . "Ltr"); system($^X, $script, @args); } } ### End ###