| 1 | 
#!/bin/sh | 
| 2 | 
exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*- | 
| 3 | 
#!perl -w | 
| 4 | 
 | 
| 5 | 
############################################################## | 
| 6 | 
###                                                        ### | 
| 7 | 
### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ### | 
| 8 | 
###                                                        ### | 
| 9 | 
############################################################## | 
| 10 | 
 | 
| 11 | 
## $Revision: 1.1 $ | 
| 12 | 
## $Date: 2005-04-14 21:41:56 $ | 
| 13 | 
## $Author: gezelter $ | 
| 14 | 
## | 
| 15 | 
##   (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL. | 
| 16 | 
##  | 
| 17 | 
##   (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.) | 
| 18 | 
## | 
| 19 | 
## cvs2cl.pl is free software; you can redistribute it and/or modify | 
| 20 | 
## it under the terms of the GNU General Public License as published by | 
| 21 | 
## the Free Software Foundation; either version 2, or (at your option) | 
| 22 | 
## any later version. | 
| 23 | 
## | 
| 24 | 
## cvs2cl.pl is distributed in the hope that it will be useful, | 
| 25 | 
## but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 26 | 
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 27 | 
## GNU General Public License for more details. | 
| 28 | 
## | 
| 29 | 
## You may have received a copy of the GNU General Public License | 
| 30 | 
## along with cvs2cl.pl; see the file COPYING.  If not, write to the | 
| 31 | 
## Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 
| 32 | 
## Boston, MA 02111-1307, USA. | 
| 33 | 
 | 
| 34 | 
 | 
| 35 | 
 | 
| 36 | 
use strict; | 
| 37 | 
use Text::Wrap; | 
| 38 | 
use Time::Local; | 
| 39 | 
use File::Basename; | 
| 40 | 
 | 
| 41 | 
 | 
| 42 | 
# The Plan: | 
| 43 | 
# | 
| 44 | 
# Read in the logs for multiple files, spit out a nice ChangeLog that | 
| 45 | 
# mirrors the information entered during `cvs commit'. | 
| 46 | 
# | 
| 47 | 
# The problem presents some challenges. In an ideal world, we could | 
| 48 | 
# detect files with the same author, log message, and checkin time -- | 
| 49 | 
# each <filelist, author, time, logmessage> would be a changelog entry. | 
| 50 | 
# We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic* | 
| 51 | 
# so checkins can span a range of times.  Also, the directory structure | 
| 52 | 
# could be hierarchical. | 
| 53 | 
# | 
| 54 | 
# Another question is whether we really want to have the ChangeLog | 
| 55 | 
# exactly reflect commits. An author could issue two related commits, | 
| 56 | 
# with different log entries, reflecting a single logical change to the | 
| 57 | 
# source. GNU style ChangeLogs group these under a single author/date. | 
| 58 | 
# We try to do the same. | 
| 59 | 
# | 
| 60 | 
# So, we parse the output of `cvs log', storing log messages in a | 
| 61 | 
# multilevel hash that stores the mapping: | 
| 62 | 
#   directory => author => time => message => filelist | 
| 63 | 
# As we go, we notice "nearby" commit times and store them together | 
| 64 | 
# (i.e., under the same timestamp), so they appear in the same log | 
| 65 | 
# entry. | 
| 66 | 
# | 
| 67 | 
# When we've read all the logs, we twist this mapping into | 
| 68 | 
# a time => author => message => filelist mapping for each directory. | 
| 69 | 
# | 
| 70 | 
# If we're not using the `--distributed' flag, the directory is always | 
| 71 | 
# considered to be `./', even as descend into subdirectories. | 
| 72 | 
 | 
| 73 | 
 | 
| 74 | 
############### Globals ################ | 
| 75 | 
 | 
| 76 | 
 | 
| 77 | 
# What we run to generate it: | 
| 78 | 
my $Log_Source_Command = "cvs log"; | 
| 79 | 
 | 
| 80 | 
# In case we have to print it out: | 
| 81 | 
my $VERSION = '$Revision: 1.1 $'; | 
| 82 | 
$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/; | 
| 83 | 
 | 
| 84 | 
## Vars set by options: | 
| 85 | 
 | 
| 86 | 
# Print debugging messages? | 
| 87 | 
my $Debug = 0; | 
| 88 | 
 | 
| 89 | 
# Just show version and exit? | 
| 90 | 
my $Print_Version = 0; | 
| 91 | 
 | 
| 92 | 
# Just print usage message and exit? | 
| 93 | 
my $Print_Usage = 0; | 
| 94 | 
 | 
| 95 | 
# Single top-level ChangeLog, or one per subdirectory? | 
| 96 | 
my $Distributed = 0; | 
| 97 | 
 | 
| 98 | 
# What file should we generate (defaults to "ChangeLog")? | 
| 99 | 
my $Log_File_Name = "ChangeLog"; | 
| 100 | 
 | 
| 101 | 
# Expand usernames to email addresses based on a map file? | 
| 102 | 
my $User_Map_File = ""; | 
| 103 | 
 | 
| 104 | 
# Output to a file or to stdout? | 
| 105 | 
my $Output_To_Stdout = 0; | 
| 106 | 
 | 
| 107 | 
# Eliminate empty log messages? | 
| 108 | 
my $Prune_Empty_Msgs = 0; | 
| 109 | 
 | 
| 110 | 
# Don't call Text::Wrap on the body of the message | 
| 111 | 
my $No_Wrap = 0; | 
| 112 | 
 | 
| 113 | 
# Separates header from log message.  Code assumes it is either " " or | 
| 114 | 
# "\n\n", so if there's ever an option to set it to something else, | 
| 115 | 
# make sure to go through all conditionals that use this var. | 
| 116 | 
my $After_Header = " "; | 
| 117 | 
 | 
| 118 | 
# Format more for programs than for humans. | 
| 119 | 
my $XML_Output = 0; | 
| 120 | 
 | 
| 121 | 
# Do some special tweaks for log data that was written in FSF | 
| 122 | 
# ChangeLog style. | 
| 123 | 
my $FSF_Style = 0; | 
| 124 | 
 | 
| 125 | 
# Show times in UTC instead of local time | 
| 126 | 
my $UTC_Times = 0; | 
| 127 | 
 | 
| 128 | 
# Show day of week in output? | 
| 129 | 
my $Show_Day_Of_Week = 0; | 
| 130 | 
 | 
| 131 | 
# Show revision numbers in output? | 
| 132 | 
my $Show_Revisions = 0; | 
| 133 | 
 | 
| 134 | 
# Show tags (symbolic names) in output? | 
| 135 | 
my $Show_Tags = 0; | 
| 136 | 
 | 
| 137 | 
# Show branches by symbolic name in output? | 
| 138 | 
my $Show_Branches = 0; | 
| 139 | 
 | 
| 140 | 
# Show only revisions on these branches or their ancestors. | 
| 141 | 
my @Follow_Branches; | 
| 142 | 
 | 
| 143 | 
# Don't bother with files matching this regexp. | 
| 144 | 
my @Ignore_Files; | 
| 145 | 
 | 
| 146 | 
# How exactly we match entries.  We definitely want "o", | 
| 147 | 
# and user might add "i" by using --case-insensitive option. | 
| 148 | 
my $Case_Insensitive = 0; | 
| 149 | 
 | 
| 150 | 
# Maybe only show log messages matching a certain regular expression. | 
| 151 | 
my $Regexp_Gate = ""; | 
| 152 | 
 | 
| 153 | 
# Pass this global option string along to cvs, to the left of `log': | 
| 154 | 
my $Global_Opts = ""; | 
| 155 | 
 | 
| 156 | 
# Pass this option string along to the cvs log subcommand: | 
| 157 | 
my $Command_Opts = ""; | 
| 158 | 
 | 
| 159 | 
# Read log output from stdin instead of invoking cvs log? | 
| 160 | 
my $Input_From_Stdin = 0; | 
| 161 | 
 | 
| 162 | 
# Don't show filenames in output. | 
| 163 | 
my $Hide_Filenames = 0; | 
| 164 | 
 | 
| 165 | 
# Max checkin duration. CVS checkin is not atomic, so we may have checkin | 
| 166 | 
# times that span a range of time. We assume that checkins will last no | 
| 167 | 
# longer than $Max_Checkin_Duration seconds, and that similarly, no | 
| 168 | 
# checkins will happen from the same users with the same message less | 
| 169 | 
# than $Max_Checkin_Duration seconds apart. | 
| 170 | 
my $Max_Checkin_Duration = 180; | 
| 171 | 
 | 
| 172 | 
# What to put at the front of [each] ChangeLog.   | 
| 173 | 
my $ChangeLog_Header = ""; | 
| 174 | 
 | 
| 175 | 
## end vars set by options. | 
| 176 | 
 | 
| 177 | 
# In 'cvs log' output, one long unbroken line of equal signs separates | 
| 178 | 
# files: | 
| 179 | 
my $file_separator = "=======================================" | 
| 180 | 
                   . "======================================"; | 
| 181 | 
 | 
| 182 | 
# In 'cvs log' output, a shorter line of dashes separates log messages | 
| 183 | 
# within a file: | 
| 184 | 
my $logmsg_separator = "----------------------------"; | 
| 185 | 
 | 
| 186 | 
 | 
| 187 | 
############### End globals ############ | 
| 188 | 
 | 
| 189 | 
 | 
| 190 | 
 | 
| 191 | 
 | 
| 192 | 
&parse_options (); | 
| 193 | 
&derive_change_log (); | 
| 194 | 
 | 
| 195 | 
 | 
| 196 | 
 | 
| 197 | 
### Everything below is subroutine definitions. ### | 
| 198 | 
 | 
| 199 | 
# Fills up a ChangeLog structure in the current directory. | 
| 200 | 
sub derive_change_log () | 
| 201 | 
{ | 
| 202 | 
  # See "The Plan" above for a full explanation. | 
| 203 | 
   | 
| 204 | 
  my %grand_poobah; | 
| 205 | 
 | 
| 206 | 
  my $file_full_path; | 
| 207 | 
  my $time; | 
| 208 | 
  my $revision; | 
| 209 | 
  my $author; | 
| 210 | 
  my $msg_txt; | 
| 211 | 
  my $detected_file_separator; | 
| 212 | 
 | 
| 213 | 
  # We might be expanding usernames | 
| 214 | 
  my %usermap; | 
| 215 | 
 | 
| 216 | 
  # In general, it's probably not very maintainable to use state | 
| 217 | 
  # variables like this to tell the loop what it's doing at any given | 
| 218 | 
  # moment, but this is only the first one, and if we never have more | 
| 219 | 
  # than a few of these, it's okay. | 
| 220 | 
  my $collecting_symbolic_names = 0; | 
| 221 | 
  my %symbolic_names;    # Where tag names get stored. | 
| 222 | 
  my %branch_names;      # We'll grab branch names while we're at it. | 
| 223 | 
  my %branch_numbers;    # Save some revisions for @Follow_Branches | 
| 224 | 
  my @branch_roots;      # For showing which files are branch ancestors. | 
| 225 | 
 | 
| 226 | 
  # Bleargh.  Compensate for a deficiency of custom wrapping. | 
| 227 | 
  if (($After_Header ne " ") and $FSF_Style) | 
| 228 | 
  { | 
| 229 | 
    $After_Header .= "\t"; | 
| 230 | 
  } | 
| 231 | 
 | 
| 232 | 
  if (! $Input_From_Stdin) { | 
| 233 | 
    open (LOG_SOURCE, "$Log_Source_Command |") | 
| 234 | 
        or die "unable to run \"${Log_Source_Command}\""; | 
| 235 | 
  } | 
| 236 | 
  else { | 
| 237 | 
    open (LOG_SOURCE, "-") or die "unable to open stdin for reading"; | 
| 238 | 
  } | 
| 239 | 
 | 
| 240 | 
  %usermap = &maybe_read_user_map_file (); | 
| 241 | 
 | 
| 242 | 
  while (<LOG_SOURCE>) | 
| 243 | 
  { | 
| 244 | 
    # If on a new file and don't see filename, skip until we find it, and | 
| 245 | 
    # when we find it, grab it. | 
| 246 | 
    if ((! (defined $file_full_path)) and /^Working file: (.*)/)  | 
| 247 | 
    { | 
| 248 | 
      $file_full_path = $1; | 
| 249 | 
      if (@Ignore_Files)  | 
| 250 | 
      { | 
| 251 | 
        my $base; | 
| 252 | 
        ($base, undef, undef) = fileparse ($file_full_path); | 
| 253 | 
        # Ouch, I wish trailing operators in regexps could be | 
| 254 | 
        # evaluated on the fly! | 
| 255 | 
        if ($Case_Insensitive) { | 
| 256 | 
          if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) { | 
| 257 | 
            undef $file_full_path; | 
| 258 | 
          } | 
| 259 | 
        } | 
| 260 | 
        elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) { | 
| 261 | 
          undef $file_full_path; | 
| 262 | 
        } | 
| 263 | 
      } | 
| 264 | 
      next; | 
| 265 | 
    } | 
| 266 | 
 | 
| 267 | 
    # Just spin wheels if no file defined yet. | 
| 268 | 
    next if (! $file_full_path); | 
| 269 | 
 | 
| 270 | 
    # Collect tag names in case we're asked to print them in the output. | 
| 271 | 
    if (/^symbolic names:$/) { | 
| 272 | 
      $collecting_symbolic_names = 1; | 
| 273 | 
      next;  # There's no more info on this line, so skip to next | 
| 274 | 
    } | 
| 275 | 
    if ($collecting_symbolic_names) | 
| 276 | 
    { | 
| 277 | 
      # All tag names are listed with whitespace in front in cvs log | 
| 278 | 
      # output; so if see non-whitespace, then we're done collecting. | 
| 279 | 
      if (/^\S/) { | 
| 280 | 
        $collecting_symbolic_names = 0; | 
| 281 | 
      } | 
| 282 | 
      else    # we're looking at a tag name, so parse & store it | 
| 283 | 
      { | 
| 284 | 
        # According to the Cederqvist manual, in node "Tags", tag | 
| 285 | 
        # names must start with an uppercase or lowercase letter and | 
| 286 | 
        # can contain uppercase and lowercase letters, digits, `-', | 
| 287 | 
        # and `_'.  However, it's not our place to enforce that, so | 
| 288 | 
        # we'll allow anything CVS hands us to be a tag: | 
| 289 | 
        /^\s+([^:]+): ([\d.]+)$/; | 
| 290 | 
        my $tag_name = $1; | 
| 291 | 
        my $tag_rev  = $2; | 
| 292 | 
 | 
| 293 | 
        # A branch number either has an odd number of digit sections | 
| 294 | 
        # (and hence an even number of dots), or has ".0." as the | 
| 295 | 
        # second-to-last digit section.  Test for these conditions. | 
| 296 | 
        my $real_branch_rev = ""; | 
| 297 | 
        if (($tag_rev =~ /^(\d+\.\d+\.)+\d+$/)   # Even number of dots... | 
| 298 | 
            and (! ($tag_rev =~ /^(1\.)+1$/)))   # ...but not "1.[1.]1" | 
| 299 | 
        { | 
| 300 | 
          $real_branch_rev = $tag_rev; | 
| 301 | 
        } | 
| 302 | 
        elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/)  # Has ".0." | 
| 303 | 
        { | 
| 304 | 
          $real_branch_rev = $1 . $3; | 
| 305 | 
        } | 
| 306 | 
        # If we got a branch, record its number. | 
| 307 | 
        if ($real_branch_rev) | 
| 308 | 
        { | 
| 309 | 
          $branch_names{$real_branch_rev} = $tag_name; | 
| 310 | 
          if (@Follow_Branches) { | 
| 311 | 
            if (grep ($_ eq $tag_name, @Follow_Branches)) { | 
| 312 | 
              $branch_numbers{$tag_name} = $real_branch_rev; | 
| 313 | 
            } | 
| 314 | 
          } | 
| 315 | 
        } | 
| 316 | 
        else { | 
| 317 | 
          # Else it's just a regular (non-branch) tag. | 
| 318 | 
          push (@{$symbolic_names{$tag_rev}}, $tag_name); | 
| 319 | 
        } | 
| 320 | 
      } | 
| 321 | 
    } | 
| 322 | 
    # End of code for collecting tag names. | 
| 323 | 
 | 
| 324 | 
    # If have file name, but not revision, and see revision, then grab | 
| 325 | 
    # it.  (We collect unconditionally, even though we may or may not | 
| 326 | 
    # ever use it.) | 
| 327 | 
    if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/)) | 
| 328 | 
    { | 
| 329 | 
      $revision = $1; | 
| 330 | 
 | 
| 331 | 
      if (@Follow_Branches) | 
| 332 | 
      { | 
| 333 | 
        foreach my $branch (@Follow_Branches)  | 
| 334 | 
        { | 
| 335 | 
          # Special case for following trunk revisions | 
| 336 | 
          if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/)) | 
| 337 | 
          { | 
| 338 | 
            goto dengo; | 
| 339 | 
          } | 
| 340 | 
 | 
| 341 | 
          my $branch_number = $branch_numbers{$branch}; | 
| 342 | 
          if ($branch_number)  | 
| 343 | 
          { | 
| 344 | 
            # Are we on one of the follow branches or an ancestor of | 
| 345 | 
            # same? | 
| 346 | 
            # | 
| 347 | 
            # If this revision is a prefix of the branch number, or | 
| 348 | 
            # possibly is less in the minormost number, OR if this | 
| 349 | 
            # branch number is a prefix of the revision, then yes. | 
| 350 | 
            # Otherwise, no. | 
| 351 | 
            # | 
| 352 | 
            # So below, we determine if any of those conditions are | 
| 353 | 
            # met. | 
| 354 | 
             | 
| 355 | 
            # Trivial case: is this revision on the branch? | 
| 356 | 
            # (Compare this way to avoid regexps that screw up Emacs | 
| 357 | 
            # indentation, argh.) | 
| 358 | 
            if ((substr ($revision, 0, ((length ($branch_number)) + 1))) | 
| 359 | 
                eq ($branch_number . ".")) | 
| 360 | 
            { | 
| 361 | 
              goto dengo; | 
| 362 | 
            } | 
| 363 | 
            # Non-trivial case: check if rev is ancestral to branch | 
| 364 | 
            elsif ((length ($branch_number)) > (length ($revision))) | 
| 365 | 
            { | 
| 366 | 
              $revision =~ /^((?:\d+\.)+)(\d+)$/; | 
| 367 | 
              my $r_left = $1;          # still has the trailing "." | 
| 368 | 
              my $r_end = $2; | 
| 369 | 
               | 
| 370 | 
              $branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/; | 
| 371 | 
              my $b_left = $1;  # still has trailing "." | 
| 372 | 
              my $b_mid  = $2;   # has no trailing "." | 
| 373 | 
 | 
| 374 | 
              if (($r_left eq $b_left) | 
| 375 | 
                  && ($r_end <= $b_mid)) | 
| 376 | 
              { | 
| 377 | 
                goto dengo; | 
| 378 | 
              } | 
| 379 | 
            } | 
| 380 | 
          } | 
| 381 | 
        } | 
| 382 | 
      } | 
| 383 | 
      else    # (! @Follow_Branches) | 
| 384 | 
      { | 
| 385 | 
        next; | 
| 386 | 
      } | 
| 387 | 
 | 
| 388 | 
      # Else we are following branches, but this revision isn't on the | 
| 389 | 
      # path.  So skip it. | 
| 390 | 
      undef $revision; | 
| 391 | 
    dengo: | 
| 392 | 
      next; | 
| 393 | 
    } | 
| 394 | 
     | 
| 395 | 
    # If we don't have a revision right now, we couldn't possibly | 
| 396 | 
    # be looking at anything useful.  | 
| 397 | 
    if (! (defined ($revision))) { | 
| 398 | 
      $detected_file_separator = /^$file_separator$/o; | 
| 399 | 
      if ($detected_file_separator) { | 
| 400 | 
        # No revisions for this file; can happen, e.g. "cvs log -d DATE" | 
| 401 | 
        goto CLEAR; | 
| 402 | 
      } | 
| 403 | 
      else { | 
| 404 | 
        next; | 
| 405 | 
      } | 
| 406 | 
    } | 
| 407 | 
 | 
| 408 | 
    # If have file name but not date and author, and see date or | 
| 409 | 
    # author, then grab them: | 
| 410 | 
    unless (defined $time)  | 
| 411 | 
    { | 
| 412 | 
      if (/^date: .*/) | 
| 413 | 
      { | 
| 414 | 
        ($time, $author) = &parse_date_and_author ($_); | 
| 415 | 
        if (defined ($usermap{$author}) and $usermap{$author}) { | 
| 416 | 
          $author = $usermap{$author}; | 
| 417 | 
        } | 
| 418 | 
      } | 
| 419 | 
      else { | 
| 420 | 
        $detected_file_separator = /^$file_separator$/o; | 
| 421 | 
        if ($detected_file_separator) { | 
| 422 | 
          # No revisions for this file; can happen, e.g. "cvs log -d DATE" | 
| 423 | 
          goto CLEAR; | 
| 424 | 
        } | 
| 425 | 
      } | 
| 426 | 
      # If the date/time/author hasn't been found yet, we couldn't | 
| 427 | 
      # possibly care about anything we see.  So skip: | 
| 428 | 
      next; | 
| 429 | 
    } | 
| 430 | 
 | 
| 431 | 
    # A "branches: ..." line here indicates that one or more branches | 
| 432 | 
    # are rooted at this revision.  If we're showing branches, then we | 
| 433 | 
    # want to show that fact as well, so we collect all the branches | 
| 434 | 
    # that this is the latest ancestor of and store them in | 
| 435 | 
    # @branch_roots.  Just for reference, the format of the line we're | 
| 436 | 
    # seeing at this point is: | 
| 437 | 
    # | 
| 438 | 
    #    branches:  1.5.2;  1.5.4;  ...; | 
| 439 | 
    # | 
| 440 | 
    # Okay, here goes: | 
| 441 | 
 | 
| 442 | 
    if (/^branches:\s+(.*);$/) | 
| 443 | 
    { | 
| 444 | 
      if ($Show_Branches) | 
| 445 | 
      { | 
| 446 | 
        my $lst = $1; | 
| 447 | 
        $lst =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1 | 
| 448 | 
        if ($lst) { | 
| 449 | 
          @branch_roots = split (/;\s+/, $lst); | 
| 450 | 
        } | 
| 451 | 
        else { | 
| 452 | 
          undef @branch_roots; | 
| 453 | 
        } | 
| 454 | 
        next; | 
| 455 | 
      } | 
| 456 | 
      else | 
| 457 | 
      { | 
| 458 | 
        # Ugh.  This really bothers me.  Suppose we see a log entry | 
| 459 | 
        # like this: | 
| 460 | 
        # | 
| 461 | 
        #    ---------------------------- | 
| 462 | 
        #    revision 1.1 | 
| 463 | 
        #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp; | 
| 464 | 
        #    branches:  1.1.2; | 
| 465 | 
        #    Intended first line of log message begins here. | 
| 466 | 
        #    ---------------------------- | 
| 467 | 
        # | 
| 468 | 
        # The question is, how we can tell the difference between that | 
| 469 | 
        # log message and a *two*-line log message whose first line is | 
| 470 | 
        #  | 
| 471 | 
        #    "branches:  1.1.2;" | 
| 472 | 
        # | 
| 473 | 
        # See the problem?  The output of "cvs log" is inherently | 
| 474 | 
        # ambiguous. | 
| 475 | 
        # | 
| 476 | 
        # For now, we punt: we liberally assume that people don't | 
| 477 | 
        # write log messages like that, and just toss a "branches:" | 
| 478 | 
        # line if we see it but are not showing branches.  I hope no | 
| 479 | 
        # one ever loses real log data because of this. | 
| 480 | 
        next; | 
| 481 | 
      } | 
| 482 | 
    } | 
| 483 | 
 | 
| 484 | 
    # If have file name, time, and author, then we're just grabbing | 
| 485 | 
    # log message texts: | 
| 486 | 
    $detected_file_separator = /^$file_separator$/o; | 
| 487 | 
    if ($detected_file_separator && ! (defined $revision)) { | 
| 488 | 
      # No revisions for this file; can happen, e.g. "cvs log -d DATE" | 
| 489 | 
      goto CLEAR; | 
| 490 | 
    } | 
| 491 | 
    unless ($detected_file_separator || /^$logmsg_separator$/o) | 
| 492 | 
    { | 
| 493 | 
      $msg_txt .= $_;   # Normally, just accumulate the message... | 
| 494 | 
      next; | 
| 495 | 
    } | 
| 496 | 
    # ... until a msg separator is encountered: | 
| 497 | 
    # Ensure the message contains something: | 
| 498 | 
    if ((! $msg_txt) | 
| 499 | 
        || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) | 
| 500 | 
        || ($msg_txt =~ /\*\*\* empty log message \*\*\*/))  | 
| 501 | 
    { | 
| 502 | 
      if ($Prune_Empty_Msgs) { | 
| 503 | 
        goto CLEAR; | 
| 504 | 
      } | 
| 505 | 
      # else | 
| 506 | 
      $msg_txt = "[no log message]\n"; | 
| 507 | 
    } | 
| 508 | 
 | 
| 509 | 
    ### Store it all in the Grand Poobah: | 
| 510 | 
    { | 
| 511 | 
      my $dir_key;        # key into %grand_poobah | 
| 512 | 
      my %qunk;           # complicated little jobbie, see below | 
| 513 | 
 | 
| 514 | 
      # Each revision of a file has a little data structure (a `qunk')  | 
| 515 | 
      # associated with it.  That data structure holds not only the | 
| 516 | 
      # file's name, but any additional information about the file | 
| 517 | 
      # that might be needed in the output, such as the revision | 
| 518 | 
      # number, tags, branches, etc.  The reason to have these things | 
| 519 | 
      # arranged in a data structure, instead of just appending them | 
| 520 | 
      # textually to the file's name, is that we may want to do a | 
| 521 | 
      # little rearranging later as we write the output.  For example, | 
| 522 | 
      # all the files on a given tag/branch will go together, followed | 
| 523 | 
      # by the tag in parentheses (so trunk or otherwise non-tagged | 
| 524 | 
      # files would go at the end of the file list for a given log | 
| 525 | 
      # message).  This rearrangement is a lot easier to do if we | 
| 526 | 
      # don't have to reparse the text. | 
| 527 | 
      # | 
| 528 | 
      # A qunk looks like this: | 
| 529 | 
      # | 
| 530 | 
      #   {  | 
| 531 | 
      #     filename    =>    "hello.c", | 
| 532 | 
      #     revision    =>    "1.4.3.2", | 
| 533 | 
      #     time        =>    a timegm() return value (moment of commit) | 
| 534 | 
      #     tags        =>    [ "tag1", "tag2", ... ], | 
| 535 | 
      #     branch      =>    "branchname" # There should be only one, right? | 
| 536 | 
      #     branchroots =>    [ "branchtag1", "branchtag2", ... ] | 
| 537 | 
      #   } | 
| 538 | 
 | 
| 539 | 
      if ($Distributed) { | 
| 540 | 
        # Just the basename, don't include the path. | 
| 541 | 
        ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path); | 
| 542 | 
      } | 
| 543 | 
      else { | 
| 544 | 
        $dir_key = "./"; | 
| 545 | 
        $qunk{'filename'} = $file_full_path; | 
| 546 | 
      } | 
| 547 | 
 | 
| 548 | 
      # This may someday be used in a more sophisticated calculation | 
| 549 | 
      # of what other files are involved in this commit.  For now, we | 
| 550 | 
      # don't use it, because the common-commit-detection algorithm is | 
| 551 | 
      # hypothesized to be "good enough" as it stands. | 
| 552 | 
      $qunk{'time'} = $time; | 
| 553 | 
 | 
| 554 | 
      # We might be including revision numbers and/or tags and/or | 
| 555 | 
      # branch names in the output.  Most of the code from here to | 
| 556 | 
      # loop-end deals with organizing these in qunk. | 
| 557 | 
 | 
| 558 | 
      $qunk{'revision'} = $revision; | 
| 559 | 
 | 
| 560 | 
      # Grab the branch, even though we may or may not need it: | 
| 561 | 
      $qunk{'revision'} =~ /((?:\d+\.)+)\d+/; | 
| 562 | 
      my $branch_prefix = $1; | 
| 563 | 
      $branch_prefix =~ s/\.$//;  # strip off final dot | 
| 564 | 
      if ($branch_names{$branch_prefix}) { | 
| 565 | 
        $qunk{'branch'} = $branch_names{$branch_prefix}; | 
| 566 | 
      } | 
| 567 | 
 | 
| 568 | 
      # If there's anything in the @branch_roots array, then this | 
| 569 | 
      # revision is the root of at least one branch.  We'll display | 
| 570 | 
      # them as branch names instead of revision numbers, the | 
| 571 | 
      # substitution for which is done directly in the array: | 
| 572 | 
      if (@branch_roots) { | 
| 573 | 
        my @roots = map { $branch_names{$_} } @branch_roots; | 
| 574 | 
        $qunk{'branchroots'} = \@roots; | 
| 575 | 
      } | 
| 576 | 
 | 
| 577 | 
      # Save tags too. | 
| 578 | 
      if (defined ($symbolic_names{$revision})) { | 
| 579 | 
        $qunk{'tags'} = $symbolic_names{$revision}; | 
| 580 | 
        delete $symbolic_names{$revision}; | 
| 581 | 
      } | 
| 582 | 
 | 
| 583 | 
      # Add this file to the list | 
| 584 | 
      # (We use many spoonfuls of autovivication magic. Hashes and arrays | 
| 585 | 
      # will spring into existence if they aren't there already.) | 
| 586 | 
 | 
| 587 | 
      &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n"); | 
| 588 | 
 | 
| 589 | 
      # Store with the files in this commit.  Later we'll loop through | 
| 590 | 
      # again, making sure that revisions with the same log message | 
| 591 | 
      # and nearby commit times are grouped together as one commit. | 
| 592 | 
      push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk); | 
| 593 | 
    } | 
| 594 | 
 | 
| 595 | 
  CLEAR: | 
| 596 | 
    # Make way for the next message | 
| 597 | 
    undef $msg_txt; | 
| 598 | 
    undef $time; | 
| 599 | 
    undef $revision; | 
| 600 | 
    undef $author; | 
| 601 | 
    undef @branch_roots; | 
| 602 | 
 | 
| 603 | 
    # Maybe even make way for the next file: | 
| 604 | 
    if ($detected_file_separator) { | 
| 605 | 
      undef $file_full_path; | 
| 606 | 
      undef %branch_names; | 
| 607 | 
      undef %branch_numbers; | 
| 608 | 
      undef %symbolic_names; | 
| 609 | 
    } | 
| 610 | 
  } | 
| 611 | 
 | 
| 612 | 
  close (LOG_SOURCE); | 
| 613 | 
 | 
| 614 | 
  ### Process each ChangeLog | 
| 615 | 
 | 
| 616 | 
  while (my ($dir,$authorhash) = each %grand_poobah) | 
| 617 | 
  { | 
| 618 | 
    &debug ("DOING DIR: $dir\n"); | 
| 619 | 
 | 
| 620 | 
    # Here we twist our hash around, from being | 
| 621 | 
    #   author => time => message => filelist | 
| 622 | 
    # in %$authorhash to | 
| 623 | 
    #   time => author => message => filelist | 
| 624 | 
    # in %changelog.   | 
| 625 | 
    # | 
| 626 | 
    # This is also where we merge entries.  The algorithm proceeds | 
| 627 | 
    # through the timeline of the changelog with a sliding window of | 
| 628 | 
    # $Max_Checkin_Duration seconds; within that window, entries that | 
| 629 | 
    # have the same log message are merged. | 
| 630 | 
    # | 
| 631 | 
    # (To save space, we zap %$authorhash after we've copied | 
| 632 | 
    # everything out of it.)  | 
| 633 | 
 | 
| 634 | 
    my %changelog; | 
| 635 | 
    while (my ($author,$timehash) = each %$authorhash) | 
| 636 | 
    { | 
| 637 | 
      my $lasttime; | 
| 638 | 
      my %stamptime; | 
| 639 | 
      foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash)) | 
| 640 | 
      { | 
| 641 | 
        my $msghash = $timehash->{$time}; | 
| 642 | 
        while (my ($msg,$qunklist) = each %$msghash) | 
| 643 | 
        { | 
| 644 | 
          my $stamptime = $stamptime{$msg}; | 
| 645 | 
          if ((defined $stamptime) | 
| 646 | 
              and (($time - $stamptime) < $Max_Checkin_Duration) | 
| 647 | 
              and (defined $changelog{$stamptime}{$author}{$msg})) | 
| 648 | 
          { | 
| 649 | 
            push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist); | 
| 650 | 
          } | 
| 651 | 
          else { | 
| 652 | 
            $changelog{$time}{$author}{$msg} = $qunklist; | 
| 653 | 
            $stamptime{$msg} = $time; | 
| 654 | 
          } | 
| 655 | 
        } | 
| 656 | 
      } | 
| 657 | 
    } | 
| 658 | 
    undef (%$authorhash); | 
| 659 | 
 | 
| 660 | 
    ### Now we can write out the ChangeLog! | 
| 661 | 
 | 
| 662 | 
    my ($logfile_here, $logfile_bak, $tmpfile); | 
| 663 | 
 | 
| 664 | 
    if (! $Output_To_Stdout) { | 
| 665 | 
      $logfile_here =  $dir . $Log_File_Name; | 
| 666 | 
      $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem | 
| 667 | 
      $tmpfile      = "${logfile_here}.cvs2cl$$.tmp"; | 
| 668 | 
      $logfile_bak  = "${logfile_here}.bak"; | 
| 669 | 
 | 
| 670 | 
      open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; | 
| 671 | 
    } | 
| 672 | 
    else { | 
| 673 | 
      open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; | 
| 674 | 
    } | 
| 675 | 
 | 
| 676 | 
    print LOG_OUT $ChangeLog_Header; | 
| 677 | 
 | 
| 678 | 
    if ($XML_Output) { | 
| 679 | 
      print LOG_OUT "<?xml version=\"1.0\"?>\n\n" | 
| 680 | 
          . "<changelog xmlns=\"http://www.red-bean.com/xmlns/cvs2cl/\">\n\n"; | 
| 681 | 
    } | 
| 682 | 
 | 
| 683 | 
    foreach my $time (sort {$main::b <=> $main::a} (keys %changelog)) | 
| 684 | 
    { | 
| 685 | 
      my $authorhash = $changelog{$time}; | 
| 686 | 
      while (my ($author,$mesghash) = each %$authorhash) | 
| 687 | 
      { | 
| 688 | 
        # If XML, escape in outer loop to avoid compound quoting: | 
| 689 | 
        if ($XML_Output) { | 
| 690 | 
          $author = &xml_escape ($author); | 
| 691 | 
        } | 
| 692 | 
 | 
| 693 | 
        while (my ($msg,$qunklist) = each %$mesghash) | 
| 694 | 
        { | 
| 695 | 
          my $files               = &pretty_file_list ($qunklist); | 
| 696 | 
          my $header_line;          # date and author | 
| 697 | 
          my $body;                 # see below | 
| 698 | 
          my $wholething;           # $header_line + $body | 
| 699 | 
 | 
| 700 | 
          # Set up the date/author line. | 
| 701 | 
          # kff todo: do some more XML munging here, on the header | 
| 702 | 
          # part of the entry: | 
| 703 | 
          my ($ignore,$min,$hour,$mday,$mon,$year,$wday) | 
| 704 | 
              = $UTC_Times ? gmtime($time) : localtime($time); | 
| 705 | 
 | 
| 706 | 
          # XML output includes everything else, we might as well make | 
| 707 | 
          # it always include Day Of Week too, for consistency. | 
| 708 | 
          if ($Show_Day_Of_Week or $XML_Output) { | 
| 709 | 
            $wday = ("Sunday", "Monday", "Tuesday", "Wednesday", | 
| 710 | 
                     "Thursday", "Friday", "Saturday")[$wday]; | 
| 711 | 
            $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday"; | 
| 712 | 
          } | 
| 713 | 
          else { | 
| 714 | 
            $wday = ""; | 
| 715 | 
          } | 
| 716 | 
 | 
| 717 | 
          if ($XML_Output) { | 
| 718 | 
            $header_line =  | 
| 719 | 
                sprintf ("<date>%4u-%02u-%02u</date>\n" | 
| 720 | 
                         . "${wday}" | 
| 721 | 
                         . "<time>%02u:%02u</time>\n" | 
| 722 | 
                         . "<author>%s</author>\n", | 
| 723 | 
                         $year+1900, $mon+1, $mday, $hour, $min, $author); | 
| 724 | 
          } | 
| 725 | 
          else { | 
| 726 | 
            $header_line =  | 
| 727 | 
                sprintf ("%4u-%02u-%02u${wday} %02u:%02u  %s\n\n", | 
| 728 | 
                         $year+1900, $mon+1, $mday, $hour, $min, $author); | 
| 729 | 
          } | 
| 730 | 
 | 
| 731 | 
          # Reshape the body according to user preferences. | 
| 732 | 
          if ($XML_Output)  | 
| 733 | 
          { | 
| 734 | 
            $msg = &preprocess_msg_text ($msg); | 
| 735 | 
            $body = $files . $msg; | 
| 736 | 
          } | 
| 737 | 
          elsif ($No_Wrap)  | 
| 738 | 
          { | 
| 739 | 
            $msg = &preprocess_msg_text ($msg); | 
| 740 | 
            $files = wrap ("\t", "      ", "$files"); | 
| 741 | 
            $msg =~ s/\n(.*)/\n\t$1/g; | 
| 742 | 
            unless ($After_Header eq " ") { | 
| 743 | 
              $msg =~ s/^(.*)/\t$1/g; | 
| 744 | 
            } | 
| 745 | 
            $body = $files . $After_Header . $msg; | 
| 746 | 
          } | 
| 747 | 
          else  # do wrapping, either FSF-style or regular | 
| 748 | 
          { | 
| 749 | 
            if ($FSF_Style) | 
| 750 | 
            { | 
| 751 | 
              $files = wrap ("\t", "        ", "$files"); | 
| 752 | 
               | 
| 753 | 
              my $files_last_line_len = 0; | 
| 754 | 
              if ($After_Header eq " ") | 
| 755 | 
              { | 
| 756 | 
                $files_last_line_len = &last_line_len ($files); | 
| 757 | 
                $files_last_line_len += 1;  # for $After_Header | 
| 758 | 
              } | 
| 759 | 
               | 
| 760 | 
              $msg = &wrap_log_entry | 
| 761 | 
                  ($msg, "\t", 69 - $files_last_line_len, 69); | 
| 762 | 
              $body = $files . $After_Header . $msg; | 
| 763 | 
            } | 
| 764 | 
            else  # not FSF-style | 
| 765 | 
            { | 
| 766 | 
              $msg = &preprocess_msg_text ($msg); | 
| 767 | 
              $body = $files . $After_Header . $msg; | 
| 768 | 
              $body = wrap ("\t", "        ", "$body"); | 
| 769 | 
            } | 
| 770 | 
          } | 
| 771 | 
 | 
| 772 | 
          $wholething = $header_line . $body; | 
| 773 | 
 | 
| 774 | 
          if ($XML_Output) { | 
| 775 | 
            $wholething = "<entry>\n${wholething}</entry>\n"; | 
| 776 | 
          } | 
| 777 | 
 | 
| 778 | 
          # One last check: make sure it passes the regexp test, if the | 
| 779 | 
          # user asked for that.  We have to do it here, so that the | 
| 780 | 
          # test can match against information in the header as well | 
| 781 | 
          # as in the text of the log message. | 
| 782 | 
 | 
| 783 | 
          # How annoying to duplicate so much code just because I | 
| 784 | 
          # can't figure out a way to evaluate scalars on the trailing | 
| 785 | 
          # operator portion of a regular expression.  Grrr. | 
| 786 | 
          if ($Case_Insensitive) { | 
| 787 | 
            unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) {  | 
| 788 | 
              print LOG_OUT "${wholething}\n"; | 
| 789 | 
            } | 
| 790 | 
          } | 
| 791 | 
          else { | 
| 792 | 
            unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) {  | 
| 793 | 
              print LOG_OUT "${wholething}\n"; | 
| 794 | 
            } | 
| 795 | 
          } | 
| 796 | 
        } | 
| 797 | 
      } | 
| 798 | 
    } | 
| 799 | 
 | 
| 800 | 
    if ($XML_Output) { | 
| 801 | 
      print LOG_OUT "</changelog>\n"; | 
| 802 | 
    } | 
| 803 | 
 | 
| 804 | 
    close (LOG_OUT); | 
| 805 | 
 | 
| 806 | 
    if (! $Output_To_Stdout)  | 
| 807 | 
    { | 
| 808 | 
      if (-f $logfile_here) { | 
| 809 | 
        rename ($logfile_here, $logfile_bak); | 
| 810 | 
      }  | 
| 811 | 
      rename ($tmpfile, $logfile_here); | 
| 812 | 
    } | 
| 813 | 
  } | 
| 814 | 
} | 
| 815 | 
 | 
| 816 | 
 | 
| 817 | 
sub parse_date_and_author () | 
| 818 | 
{ | 
| 819 | 
  # Parses the date/time and author out of a line like:  | 
| 820 | 
  # | 
| 821 | 
  # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp; | 
| 822 | 
 | 
| 823 | 
  my $line = shift; | 
| 824 | 
 | 
| 825 | 
  my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~ | 
| 826 | 
      m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);# | 
| 827 | 
          or  die "Couldn't parse date ``$line''"; | 
| 828 | 
  die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); | 
| 829 | 
  # Kinda arbitrary, but useful as a sanity check | 
| 830 | 
  my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); | 
| 831 | 
 | 
| 832 | 
  return ($time, $author); | 
| 833 | 
} | 
| 834 | 
 | 
| 835 | 
 | 
| 836 | 
# Here we take a bunch of qunks and convert them into printed | 
| 837 | 
# summary that will include all the information the user asked for. | 
| 838 | 
sub pretty_file_list () | 
| 839 | 
{ | 
| 840 | 
  if ($Hide_Filenames and (! $XML_Output)) { | 
| 841 | 
    return ""; | 
| 842 | 
  } | 
| 843 | 
 | 
| 844 | 
  my $qunksref = shift; | 
| 845 | 
  my @qunkrefs = @$qunksref; | 
| 846 | 
  my @filenames; | 
| 847 | 
  my $beauty = "";          # The accumulating header string for this entry. | 
| 848 | 
  my %non_unanimous_tags;   # Tags found in a proper subset of qunks | 
| 849 | 
  my %unanimous_tags;       # Tags found in all qunks | 
| 850 | 
  my %all_branches;         # Branches found in any qunk | 
| 851 | 
  my $common_dir = undef;   # Dir prefix common to all files ("" if none) | 
| 852 | 
  my $fbegun = 0;           # Did we begin printing filenames yet? | 
| 853 | 
   | 
| 854 | 
  # First, loop over the qunks gathering all the tag/branch names. | 
| 855 | 
  # We'll put them all in non_unanimous_tags, and take out the | 
| 856 | 
  # unanimous ones later. | 
| 857 | 
  foreach my $qunkref (@qunkrefs)  | 
| 858 | 
  { | 
| 859 | 
    # Keep track of whether all the files in this commit were in the | 
| 860 | 
    # same directory, and memorize it if so.  We can make the output a | 
| 861 | 
    # little more compact by mentioning the directory only once. | 
| 862 | 
    if ((scalar (@qunkrefs)) > 1) | 
| 863 | 
    { | 
| 864 | 
      if (! (defined ($common_dir))) | 
| 865 | 
      { | 
| 866 | 
        my ($base, $dir); | 
| 867 | 
        ($base, $dir, undef) = fileparse ($$qunkref{'filename'}); | 
| 868 | 
 | 
| 869 | 
        if ((! (defined ($dir)))  # this first case is sheer paranoia | 
| 870 | 
            or ($dir eq "") | 
| 871 | 
            or ($dir eq "./") | 
| 872 | 
            or ($dir eq ".\\"))  | 
| 873 | 
        { | 
| 874 | 
          $common_dir = ""; | 
| 875 | 
        } | 
| 876 | 
        else | 
| 877 | 
        { | 
| 878 | 
          $common_dir = $dir; | 
| 879 | 
        } | 
| 880 | 
      } | 
| 881 | 
      elsif ($common_dir ne "") | 
| 882 | 
      { | 
| 883 | 
        # Already have a common dir prefix, so how much of it can we preserve? | 
| 884 | 
        $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir); | 
| 885 | 
      } | 
| 886 | 
    } | 
| 887 | 
    else  # only one file in this entry anyway, so common dir not an issue | 
| 888 | 
    { | 
| 889 | 
      $common_dir = ""; | 
| 890 | 
    } | 
| 891 | 
 | 
| 892 | 
    if (defined ($$qunkref{'branch'})) { | 
| 893 | 
      $all_branches{$$qunkref{'branch'}} = 1; | 
| 894 | 
    } | 
| 895 | 
    if (defined ($$qunkref{'tags'})) { | 
| 896 | 
      foreach my $tag (@{$$qunkref{'tags'}}) { | 
| 897 | 
        $non_unanimous_tags{$tag} = 1; | 
| 898 | 
      } | 
| 899 | 
    } | 
| 900 | 
  } | 
| 901 | 
 | 
| 902 | 
  # Any tag held by all qunks will be printed specially... but only if | 
| 903 | 
  # there are multiple qunks in the first place! | 
| 904 | 
  if ((scalar (@qunkrefs)) > 1) { | 
| 905 | 
    foreach my $tag (keys (%non_unanimous_tags)) { | 
| 906 | 
      my $everyone_has_this_tag = 1; | 
| 907 | 
      foreach my $qunkref (@qunkrefs) { | 
| 908 | 
        if ((! (defined ($$qunkref{'tags'}))) | 
| 909 | 
            or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) { | 
| 910 | 
          $everyone_has_this_tag = 0; | 
| 911 | 
        } | 
| 912 | 
      } | 
| 913 | 
      if ($everyone_has_this_tag) { | 
| 914 | 
        $unanimous_tags{$tag} = 1; | 
| 915 | 
        delete $non_unanimous_tags{$tag}; | 
| 916 | 
      } | 
| 917 | 
    } | 
| 918 | 
  } | 
| 919 | 
 | 
| 920 | 
  if ($XML_Output) | 
| 921 | 
  { | 
| 922 | 
    # If outputting XML, then our task is pretty simple, because we | 
| 923 | 
    # don't have to detect common dir, common tags, branch prefixing, | 
| 924 | 
    # etc.  We just output exactly what we have, and don't worry about | 
| 925 | 
    # redundancy or readability. | 
| 926 | 
 | 
| 927 | 
    foreach my $qunkref (@qunkrefs)  | 
| 928 | 
    { | 
| 929 | 
      my $filename    = $$qunkref{'filename'}; | 
| 930 | 
      my $revision    = $$qunkref{'revision'}; | 
| 931 | 
      my $tags        = $$qunkref{'tags'}; | 
| 932 | 
      my $branch      = $$qunkref{'branch'}; | 
| 933 | 
      my $branchroots = $$qunkref{'branchroots'}; | 
| 934 | 
 | 
| 935 | 
      $filename = &xml_escape ($filename);   # probably paranoia | 
| 936 | 
      $revision = &xml_escape ($revision);   # definitely paranoia | 
| 937 | 
 | 
| 938 | 
      $beauty .= "<file>\n"; | 
| 939 | 
      $beauty .= "<name>${filename}</name>\n"; | 
| 940 | 
      $beauty .= "<revision>${revision}</revision>\n"; | 
| 941 | 
      if ($branch) { | 
| 942 | 
        $branch   = &xml_escape ($branch);     # more paranoia | 
| 943 | 
        $beauty .= "<branch>${branch}</branch>\n"; | 
| 944 | 
      } | 
| 945 | 
      foreach my $tag (@$tags) { | 
| 946 | 
        $tag = &xml_escape ($tag);  # by now you're used to the paranoia | 
| 947 | 
        $beauty .= "<tag>${tag}</tag>\n"; | 
| 948 | 
      } | 
| 949 | 
      foreach my $root (@$branchroots) { | 
| 950 | 
        $root = &xml_escape ($root);  # which is good, because it will continue | 
| 951 | 
        $beauty .= "<branchroot>${root}</branchroot>\n"; | 
| 952 | 
      } | 
| 953 | 
      $beauty .= "</file>\n"; | 
| 954 | 
    } | 
| 955 | 
 | 
| 956 | 
    # Theoretically, we could go home now.  But as long as we're here, | 
| 957 | 
    # let's print out the common_dir and utags, as a convenience to | 
| 958 | 
    # the receiver (after all, earlier code calculated that stuff | 
| 959 | 
    # anyway, so we might as well take advantage of it). | 
| 960 | 
 | 
| 961 | 
    if ((scalar (keys (%unanimous_tags))) > 1) { | 
| 962 | 
      foreach my $utag ((keys (%unanimous_tags))) { | 
| 963 | 
        $utag = &xml_escape ($utag);   # the usual paranoia | 
| 964 | 
        $beauty .= "<utag>${utag}</utag>\n"; | 
| 965 | 
      } | 
| 966 | 
    } | 
| 967 | 
    if ($common_dir) { | 
| 968 | 
      $common_dir = &xml_escape ($common_dir); | 
| 969 | 
      $beauty .= "<commondir>${common_dir}</commondir>\n"; | 
| 970 | 
    } | 
| 971 | 
 | 
| 972 | 
    # That's enough for XML, time to go home: | 
| 973 | 
    return $beauty; | 
| 974 | 
  } | 
| 975 | 
 | 
| 976 | 
  # Else not XML output, so complexly compactify for chordate | 
| 977 | 
  # consumption.  At this point we have enough global information | 
| 978 | 
  # about all the qunks to organize them non-redundantly for output. | 
| 979 | 
 | 
| 980 | 
  if ($common_dir) { | 
| 981 | 
    # Note that $common_dir still has its trailing slash | 
| 982 | 
    $beauty .= "$common_dir: "; | 
| 983 | 
  } | 
| 984 | 
 | 
| 985 | 
  if ($Show_Branches) | 
| 986 | 
  { | 
| 987 | 
    # For trailing revision numbers. | 
| 988 | 
    my @brevisions; | 
| 989 | 
 | 
| 990 | 
    foreach my $branch (keys (%all_branches)) | 
| 991 | 
    { | 
| 992 | 
      foreach my $qunkref (@qunkrefs) | 
| 993 | 
      { | 
| 994 | 
        if ((defined ($$qunkref{'branch'})) | 
| 995 | 
            and ($$qunkref{'branch'} eq $branch)) | 
| 996 | 
        { | 
| 997 | 
          if ($fbegun) { | 
| 998 | 
            # kff todo: comma-delimited in XML too?  Sure. | 
| 999 | 
            $beauty .= ", "; | 
| 1000 | 
          }  | 
| 1001 | 
          else { | 
| 1002 | 
            $fbegun = 1; | 
| 1003 | 
          } | 
| 1004 | 
          my $fname = substr ($$qunkref{'filename'}, length ($common_dir)); | 
| 1005 | 
          $beauty .= $fname; | 
| 1006 | 
          $$qunkref{'printed'} = 1;  # Just setting a mark bit, basically | 
| 1007 | 
 | 
| 1008 | 
          if ($Show_Tags && (defined @{$$qunkref{'tags'}})) { | 
| 1009 | 
            my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); | 
| 1010 | 
            if (@tags) { | 
| 1011 | 
              $beauty .= " (tags: "; | 
| 1012 | 
              $beauty .= join (', ', @tags); | 
| 1013 | 
              $beauty .= ")"; | 
| 1014 | 
            } | 
| 1015 | 
          } | 
| 1016 | 
 | 
| 1017 | 
          if ($Show_Revisions) { | 
| 1018 | 
            # Collect the revision numbers' last components, but don't | 
| 1019 | 
            # print them -- they'll get printed with the branch name | 
| 1020 | 
            # later. | 
| 1021 | 
            $$qunkref{'revision'} =~ /.+\.([\d]+)$/; | 
| 1022 | 
            push (@brevisions, $1); | 
| 1023 | 
 | 
| 1024 | 
            # todo: we're still collecting branch roots, but we're not | 
| 1025 | 
            # showing them anywhere.  If we do show them, it would be | 
| 1026 | 
            # nifty to just call them revision "0" on a the branch. | 
| 1027 | 
            # Yeah, that's the ticket. | 
| 1028 | 
          } | 
| 1029 | 
        } | 
| 1030 | 
      } | 
| 1031 | 
      $beauty .= " ($branch"; | 
| 1032 | 
      if (@brevisions) { | 
| 1033 | 
        if ((scalar (@brevisions)) > 1) { | 
| 1034 | 
          $beauty .= ".["; | 
| 1035 | 
          $beauty .= (join (',', @brevisions)); | 
| 1036 | 
          $beauty .= "]"; | 
| 1037 | 
        } | 
| 1038 | 
        else { | 
| 1039 | 
          $beauty .= ".$brevisions[0]"; | 
| 1040 | 
        } | 
| 1041 | 
      } | 
| 1042 | 
      $beauty .= ")"; | 
| 1043 | 
    } | 
| 1044 | 
  } | 
| 1045 | 
 | 
| 1046 | 
  # Okay; any qunks that were done according to branch are taken care | 
| 1047 | 
  # of, and marked as printed.  Now print everyone else. | 
| 1048 | 
 | 
| 1049 | 
  foreach my $qunkref (@qunkrefs) | 
| 1050 | 
  { | 
| 1051 | 
    next if (defined ($$qunkref{'printed'}));   # skip if already printed | 
| 1052 | 
         | 
| 1053 | 
    if ($fbegun) { | 
| 1054 | 
      $beauty .= ", "; | 
| 1055 | 
    } | 
| 1056 | 
    else { | 
| 1057 | 
      $fbegun = 1; | 
| 1058 | 
    } | 
| 1059 | 
    $beauty .= substr ($$qunkref{'filename'}, length ($common_dir)); | 
| 1060 | 
    # todo: Shlomo's change was this: | 
| 1061 | 
    # $beauty .= substr ($$qunkref{'filename'},  | 
| 1062 | 
    #              (($common_dir eq "./") ? "" : length ($common_dir))); | 
| 1063 | 
    $$qunkref{'printed'} = 1;  # Set a mark bit. | 
| 1064 | 
     | 
| 1065 | 
    if ($Show_Revisions || $Show_Tags) | 
| 1066 | 
    { | 
| 1067 | 
      my $started_addendum = 0; | 
| 1068 | 
 | 
| 1069 | 
      if ($Show_Revisions) { | 
| 1070 | 
        $started_addendum = 1; | 
| 1071 | 
        $beauty .= " ("; | 
| 1072 | 
        $beauty .= "$$qunkref{'revision'}"; | 
| 1073 | 
      } | 
| 1074 | 
      if ($Show_Tags && (defined $$qunkref{'tags'})) { | 
| 1075 | 
        my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); | 
| 1076 | 
        if ((scalar (@tags)) > 0) { | 
| 1077 | 
          if ($started_addendum) { | 
| 1078 | 
            $beauty .= ", "; | 
| 1079 | 
          } | 
| 1080 | 
          else { | 
| 1081 | 
            $beauty .= " (tags: "; | 
| 1082 | 
          } | 
| 1083 | 
          $beauty .= join (', ', @tags); | 
| 1084 | 
          $started_addendum = 1; | 
| 1085 | 
        } | 
| 1086 | 
      } | 
| 1087 | 
      if ($started_addendum) { | 
| 1088 | 
        $beauty .= ")"; | 
| 1089 | 
      } | 
| 1090 | 
    } | 
| 1091 | 
  } | 
| 1092 | 
 | 
| 1093 | 
  # Unanimous tags always come last. | 
| 1094 | 
  if ($Show_Tags && %unanimous_tags) | 
| 1095 | 
  { | 
| 1096 | 
    $beauty .= " (utags: "; | 
| 1097 | 
    $beauty .= join (', ', keys (%unanimous_tags)); | 
| 1098 | 
    $beauty .= ")"; | 
| 1099 | 
  } | 
| 1100 | 
 | 
| 1101 | 
  # todo: still have to take care of branch_roots? | 
| 1102 | 
 | 
| 1103 | 
  $beauty = "* $beauty:"; | 
| 1104 | 
 | 
| 1105 | 
  return $beauty; | 
| 1106 | 
} | 
| 1107 | 
 | 
| 1108 | 
 | 
| 1109 | 
sub common_path_prefix () | 
| 1110 | 
{ | 
| 1111 | 
  my $path1 = shift; | 
| 1112 | 
  my $path2 = shift; | 
| 1113 | 
 | 
| 1114 | 
  my ($dir1, $dir2); | 
| 1115 | 
  (undef, $dir1, undef) = fileparse ($path1); | 
| 1116 | 
  (undef, $dir2, undef) = fileparse ($path2); | 
| 1117 | 
 | 
| 1118 | 
  # Transmogrify Windows filenames to look like Unix.   | 
| 1119 | 
  # (It is far more likely that someone is running cvs2cl.pl under | 
| 1120 | 
  # Windows than that they would genuinely have backslashes in their | 
| 1121 | 
  # filenames.) | 
| 1122 | 
  $dir1 =~ tr#\\#/#; | 
| 1123 | 
  $dir2 =~ tr#\\#/#; | 
| 1124 | 
 | 
| 1125 | 
  my $accum1 = ""; | 
| 1126 | 
  my $accum2 = ""; | 
| 1127 | 
  my $last_common_prefix = ""; | 
| 1128 | 
 | 
| 1129 | 
  while ($accum1 eq $accum2) | 
| 1130 | 
  { | 
| 1131 | 
    $last_common_prefix = $accum1; | 
| 1132 | 
    last if ($accum1 eq $dir1); | 
| 1133 | 
    my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1)))); | 
| 1134 | 
    my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2)))); | 
| 1135 | 
    $accum1 .= "$tmp1/" if ((defined ($tmp1)) and $tmp1); | 
| 1136 | 
    $accum2 .= "$tmp2/" if ((defined ($tmp2)) and $tmp2); | 
| 1137 | 
  } | 
| 1138 | 
 | 
| 1139 | 
  return $last_common_prefix; | 
| 1140 | 
} | 
| 1141 | 
 | 
| 1142 | 
 | 
| 1143 | 
sub preprocess_msg_text () | 
| 1144 | 
{ | 
| 1145 | 
  my $text = shift; | 
| 1146 | 
 | 
| 1147 | 
  # Strip out carriage returns (as they probably result from DOSsy editors). | 
| 1148 | 
  $text =~ s/\r\n/\n/g; | 
| 1149 | 
 | 
| 1150 | 
  # If it *looks* like two newlines, make it *be* two newlines: | 
| 1151 | 
  $text =~ s/\n\s*\n/\n\n/g; | 
| 1152 | 
 | 
| 1153 | 
  if ($XML_Output) | 
| 1154 | 
  { | 
| 1155 | 
    $text = &xml_escape ($text); | 
| 1156 | 
    $text = "<msg>${text}</msg>\n"; | 
| 1157 | 
  } | 
| 1158 | 
  elsif (! $No_Wrap) | 
| 1159 | 
  { | 
| 1160 | 
    # Strip off lone newlines, but only for lines that don't begin with | 
| 1161 | 
    # whitespace or a mail-quoting character, since we want to preserve | 
| 1162 | 
    # that kind of formatting.  Also don't strip newlines that follow a | 
| 1163 | 
    # period; we handle those specially next.  And don't strip | 
| 1164 | 
    # newlines that precede an open paren. | 
| 1165 | 
    1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g); | 
| 1166 | 
     | 
| 1167 | 
    # If a newline follows a period, make sure that when we bring up the | 
| 1168 | 
    # bottom sentence, it begins with two spaces.  | 
| 1169 | 
    1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g); | 
| 1170 | 
  } | 
| 1171 | 
 | 
| 1172 | 
  return $text; | 
| 1173 | 
} | 
| 1174 | 
 | 
| 1175 | 
 | 
| 1176 | 
sub last_line_len () | 
| 1177 | 
{ | 
| 1178 | 
  my $files_list = shift; | 
| 1179 | 
  my @lines = split (/\n/, $files_list); | 
| 1180 | 
  my $last_line = pop (@lines); | 
| 1181 | 
  return length ($last_line); | 
| 1182 | 
} | 
| 1183 | 
 | 
| 1184 | 
 | 
| 1185 | 
# A custom wrap function, sensitive to some common constructs used in | 
| 1186 | 
# log entries. | 
| 1187 | 
sub wrap_log_entry () | 
| 1188 | 
{ | 
| 1189 | 
  my $text = shift;                  # The text to wrap. | 
| 1190 | 
  my $left_pad_str = shift;          # String to pad with on the left. | 
| 1191 | 
 | 
| 1192 | 
  # These do NOT take left_pad_str into account: | 
| 1193 | 
  my $length_remaining = shift;      # Amount left on current line. | 
| 1194 | 
  my $max_line_length  = shift;      # Amount left for a blank line. | 
| 1195 | 
 | 
| 1196 | 
  my $wrapped_text = "";             # The accumulating wrapped entry. | 
| 1197 | 
  my $user_indent = "";              # Inherited user_indent from prev line. | 
| 1198 | 
 | 
| 1199 | 
  my $first_time = 1;                # First iteration of the loop? | 
| 1200 | 
  my $suppress_line_start_match = 0; # Set to disable line start checks. | 
| 1201 | 
 | 
| 1202 | 
  my @lines = split (/\n/, $text); | 
| 1203 | 
  while (@lines)   # Don't use `foreach' here, it won't work. | 
| 1204 | 
  { | 
| 1205 | 
    my $this_line = shift (@lines); | 
| 1206 | 
    chomp $this_line; | 
| 1207 | 
 | 
| 1208 | 
    if ($this_line =~ /^(\s+)/) { | 
| 1209 | 
      $user_indent = $1; | 
| 1210 | 
    } | 
| 1211 | 
    else { | 
| 1212 | 
      $user_indent = ""; | 
| 1213 | 
    } | 
| 1214 | 
 | 
| 1215 | 
    # If it matches any of the line-start regexps, print a newline now... | 
| 1216 | 
    if ($suppress_line_start_match) | 
| 1217 | 
    { | 
| 1218 | 
      $suppress_line_start_match = 0; | 
| 1219 | 
    } | 
| 1220 | 
    elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/) | 
| 1221 | 
           || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/) | 
| 1222 | 
           || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/) | 
| 1223 | 
           || ($this_line =~ /^(\s+)(\S+)/) | 
| 1224 | 
           || ($this_line =~ /^(\s*)- +/) | 
| 1225 | 
           || ($this_line =~ /^()\s*$/) | 
| 1226 | 
           || ($this_line =~ /^(\s*)\*\) +/) | 
| 1227 | 
           || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/)) | 
| 1228 | 
    { | 
| 1229 | 
      # Make a line break immediately, unless header separator is set | 
| 1230 | 
      # and this line is the first line in the entry, in which case | 
| 1231 | 
      # we're getting the blank line for free already and shouldn't | 
| 1232 | 
      # add an extra one. | 
| 1233 | 
      unless (($After_Header ne " ") and ($first_time)) | 
| 1234 | 
      { | 
| 1235 | 
        if ($this_line =~ /^()\s*$/) { | 
| 1236 | 
          $suppress_line_start_match = 1; | 
| 1237 | 
          $wrapped_text .= "\n${left_pad_str}"; | 
| 1238 | 
        } | 
| 1239 | 
 | 
| 1240 | 
        $wrapped_text .= "\n${left_pad_str}"; | 
| 1241 | 
      } | 
| 1242 | 
       | 
| 1243 | 
      $length_remaining = $max_line_length - (length ($user_indent)); | 
| 1244 | 
    } | 
| 1245 | 
 | 
| 1246 | 
    # Now that any user_indent has been preserved, strip off leading | 
| 1247 | 
    # whitespace, so up-folding has no ugly side-effects. | 
| 1248 | 
    $this_line =~ s/^\s*//; | 
| 1249 | 
 | 
| 1250 | 
    # Accumulate the line, and adjust parameters for next line. | 
| 1251 | 
    my $this_len = length ($this_line); | 
| 1252 | 
    if ($this_len == 0) | 
| 1253 | 
    { | 
| 1254 | 
      # Blank lines should cancel any user_indent level. | 
| 1255 | 
      $user_indent = ""; | 
| 1256 | 
      $length_remaining = $max_line_length; | 
| 1257 | 
    } | 
| 1258 | 
    elsif ($this_len >= $length_remaining) # Line too long, try breaking it. | 
| 1259 | 
    { | 
| 1260 | 
      # Walk backwards from the end.  At first acceptable spot, break | 
| 1261 | 
      # a new line. | 
| 1262 | 
      my $idx = $length_remaining - 1; | 
| 1263 | 
      if ($idx < 0) { $idx = 0 }; | 
| 1264 | 
      while ($idx > 0) | 
| 1265 | 
      { | 
| 1266 | 
        if (substr ($this_line, $idx, 1) =~ /\s/) | 
| 1267 | 
        { | 
| 1268 | 
          my $line_now = substr ($this_line, 0, $idx); | 
| 1269 | 
          my $next_line = substr ($this_line, $idx); | 
| 1270 | 
          $this_line = $line_now; | 
| 1271 | 
           | 
| 1272 | 
          # Clean whitespace off the end. | 
| 1273 | 
          chomp $this_line; | 
| 1274 | 
 | 
| 1275 | 
          # The current line is ready to be printed. | 
| 1276 | 
          $this_line .= "\n${left_pad_str}"; | 
| 1277 | 
 | 
| 1278 | 
          # Make sure the next line is allowed full room. | 
| 1279 | 
          $length_remaining = $max_line_length - (length ($user_indent)); | 
| 1280 | 
 | 
| 1281 | 
          # Strip next_line, but then preserve any user_indent. | 
| 1282 | 
          $next_line =~ s/^\s*//; | 
| 1283 | 
 | 
| 1284 | 
          # Sneak a peek at the user_indent of the upcoming line, so | 
| 1285 | 
          # $next_line (which will now precede it) can inherit that | 
| 1286 | 
          # indent level.  Otherwise, use whatever user_indent level | 
| 1287 | 
          # we currently have, which might be none. | 
| 1288 | 
          my $next_next_line = shift (@lines); | 
| 1289 | 
          if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) { | 
| 1290 | 
            $next_line = $1 . $next_line if (defined ($1)); | 
| 1291 | 
            # $length_remaining = $max_line_length - (length ($1)); | 
| 1292 | 
            $next_next_line =~ s/^\s*//; | 
| 1293 | 
          } | 
| 1294 | 
          else { | 
| 1295 | 
            $next_line = $user_indent . $next_line; | 
| 1296 | 
          } | 
| 1297 | 
          if (defined ($next_next_line)) { | 
| 1298 | 
            unshift (@lines, $next_next_line); | 
| 1299 | 
          } | 
| 1300 | 
          unshift (@lines, $next_line); | 
| 1301 | 
 | 
| 1302 | 
          # Our new next line might, coincidentally, begin with one of | 
| 1303 | 
          # the line-start regexps, so we temporarily turn off | 
| 1304 | 
          # sensitivity to that until we're past the line. | 
| 1305 | 
          $suppress_line_start_match = 1;  | 
| 1306 | 
 | 
| 1307 | 
          last; | 
| 1308 | 
        } | 
| 1309 | 
        else | 
| 1310 | 
        { | 
| 1311 | 
          $idx--; | 
| 1312 | 
        } | 
| 1313 | 
      } | 
| 1314 | 
 | 
| 1315 | 
      if ($idx == 0) | 
| 1316 | 
      { | 
| 1317 | 
        # We bottomed out because the line is longer than the | 
| 1318 | 
        # available space.  But that could be because the space is | 
| 1319 | 
        # small, or because the line is longer than even the maximum | 
| 1320 | 
        # possible space.  Handle both cases below. | 
| 1321 | 
 | 
| 1322 | 
        if ($length_remaining == ($max_line_length - (length ($user_indent)))) | 
| 1323 | 
        { | 
| 1324 | 
          # The line is simply too long -- there is no hope of ever | 
| 1325 | 
          # breaking it nicely, so just insert it verbatim, with | 
| 1326 | 
          # appropriate padding. | 
| 1327 | 
          $this_line = "\n${left_pad_str}${this_line}"; | 
| 1328 | 
        } | 
| 1329 | 
        else | 
| 1330 | 
        { | 
| 1331 | 
          # Can't break it here, but may be able to on the next round... | 
| 1332 | 
          unshift (@lines, $this_line); | 
| 1333 | 
          $length_remaining = $max_line_length - (length ($user_indent)); | 
| 1334 | 
          $this_line = "\n${left_pad_str}"; | 
| 1335 | 
        } | 
| 1336 | 
      } | 
| 1337 | 
    } | 
| 1338 | 
    else  # $this_len < $length_remaining, so tack on what we can. | 
| 1339 | 
    { | 
| 1340 | 
      # Leave a note for the next iteration. | 
| 1341 | 
      $length_remaining = $length_remaining - $this_len; | 
| 1342 | 
 | 
| 1343 | 
      if ($this_line =~ /\.$/) | 
| 1344 | 
      { | 
| 1345 | 
        $this_line .= "  "; | 
| 1346 | 
        $length_remaining -= 2; | 
| 1347 | 
      } | 
| 1348 | 
      else  # not a sentence end | 
| 1349 | 
      { | 
| 1350 | 
        $this_line .= " "; | 
| 1351 | 
        $length_remaining -= 1; | 
| 1352 | 
      } | 
| 1353 | 
    } | 
| 1354 | 
 | 
| 1355 | 
    # Unconditionally indicate that loop has run at least once. | 
| 1356 | 
    $first_time = 0; | 
| 1357 | 
 | 
| 1358 | 
    $wrapped_text .= "${user_indent}${this_line}"; | 
| 1359 | 
  } | 
| 1360 | 
 | 
| 1361 | 
  # One last bit of padding. | 
| 1362 | 
  $wrapped_text .= "\n"; | 
| 1363 | 
 | 
| 1364 | 
  return $wrapped_text; | 
| 1365 | 
} | 
| 1366 | 
 | 
| 1367 | 
 | 
| 1368 | 
sub xml_escape () | 
| 1369 | 
{ | 
| 1370 | 
  my $txt = shift; | 
| 1371 | 
  $txt =~ s/&/&/g; | 
| 1372 | 
  $txt =~ s/</</g; | 
| 1373 | 
  $txt =~ s/>/>/g; | 
| 1374 | 
  return $txt; | 
| 1375 | 
} | 
| 1376 | 
 | 
| 1377 | 
 | 
| 1378 | 
sub maybe_read_user_map_file () | 
| 1379 | 
{ | 
| 1380 | 
  my %expansions; | 
| 1381 | 
 | 
| 1382 | 
  if ($User_Map_File) | 
| 1383 | 
  { | 
| 1384 | 
    open (MAPFILE, "<$User_Map_File") | 
| 1385 | 
        or die ("Unable to open $User_Map_File ($!)"); | 
| 1386 | 
 | 
| 1387 | 
    while (<MAPFILE>)  | 
| 1388 | 
    { | 
| 1389 | 
      next if /^\s*#/;  # Skip comment lines. | 
| 1390 | 
      next if not /:/;  # Skip lines without colons. | 
| 1391 | 
 | 
| 1392 | 
      # It is now safe to split on ':'. | 
| 1393 | 
      my ($username, $expansion) = split ':'; | 
| 1394 | 
      chomp $expansion; | 
| 1395 | 
      $expansion =~ s/^'(.*)'$/$1/; | 
| 1396 | 
      $expansion =~ s/^"(.*)"$/$1/; | 
| 1397 | 
 | 
| 1398 | 
      # If it looks like the expansion has a real name already, then | 
| 1399 | 
      # we toss the username we got from CVS log.  Otherwise, keep | 
| 1400 | 
      # it to use in combination with the email address. | 
| 1401 | 
 | 
| 1402 | 
      if ($expansion =~ /^\s*<{0,1}\S+@.*/) { | 
| 1403 | 
        # Also, add angle brackets if none present | 
| 1404 | 
        if (! ($expansion =~ /<\S+@\S+>/)) { | 
| 1405 | 
          $expansions{$username} = "$username <$expansion>"; | 
| 1406 | 
        } | 
| 1407 | 
        else { | 
| 1408 | 
          $expansions{$username} = "$username $expansion"; | 
| 1409 | 
        } | 
| 1410 | 
      } | 
| 1411 | 
      else { | 
| 1412 | 
        $expansions{$username} = $expansion; | 
| 1413 | 
      } | 
| 1414 | 
    } | 
| 1415 | 
 | 
| 1416 | 
    close (MAPFILE); | 
| 1417 | 
  } | 
| 1418 | 
 | 
| 1419 | 
  return %expansions; | 
| 1420 | 
} | 
| 1421 | 
 | 
| 1422 | 
 | 
| 1423 | 
sub parse_options () | 
| 1424 | 
{ | 
| 1425 | 
  # Check this internally before setting the global variable. | 
| 1426 | 
  my $output_file; | 
| 1427 | 
 | 
| 1428 | 
  # If this gets set, we encountered unknown options and will exit at | 
| 1429 | 
  # the end of this subroutine. | 
| 1430 | 
  my $exit_with_admonishment = 0; | 
| 1431 | 
 | 
| 1432 | 
  while (my $arg = shift (@ARGV))  | 
| 1433 | 
  { | 
| 1434 | 
    if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) { | 
| 1435 | 
      $Print_Usage = 1; | 
| 1436 | 
    } | 
| 1437 | 
    elsif ($arg =~ /^--debug$/) {        # unadvertised option, heh | 
| 1438 | 
      $Debug = 1; | 
| 1439 | 
    } | 
| 1440 | 
    elsif ($arg =~ /^--version$/) { | 
| 1441 | 
      $Print_Version = 1; | 
| 1442 | 
    } | 
| 1443 | 
    elsif ($arg =~ /^-g$|^--global-opts$/) { | 
| 1444 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1445 | 
      # Don't assume CVS is called "cvs" on the user's system: | 
| 1446 | 
      $Log_Source_Command =~ s/(^\S*)/$1 $narg/; | 
| 1447 | 
    } | 
| 1448 | 
    elsif ($arg =~ /^-l$|^--log-opts$/) { | 
| 1449 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1450 | 
      $Log_Source_Command .= " $narg"; | 
| 1451 | 
    } | 
| 1452 | 
    elsif ($arg =~ /^-f$|^--file$/) { | 
| 1453 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1454 | 
      $output_file = $narg; | 
| 1455 | 
    } | 
| 1456 | 
    elsif ($arg =~ /^--fsf$/) { | 
| 1457 | 
      $FSF_Style = 1; | 
| 1458 | 
    } | 
| 1459 | 
    elsif ($arg =~ /^-U$|^--usermap$/) { | 
| 1460 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1461 | 
      $User_Map_File = $narg; | 
| 1462 | 
    } | 
| 1463 | 
    elsif ($arg =~ /^-W$|^--window$/) { | 
| 1464 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1465 | 
      $Max_Checkin_Duration = $narg; | 
| 1466 | 
    } | 
| 1467 | 
    elsif ($arg =~ /^-I$|^--ignore$/) { | 
| 1468 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1469 | 
      push (@Ignore_Files, $narg); | 
| 1470 | 
    } | 
| 1471 | 
    elsif ($arg =~ /^-C$|^--case-insensitive$/) { | 
| 1472 | 
      $Case_Insensitive = 1; | 
| 1473 | 
    } | 
| 1474 | 
    elsif ($arg =~ /^-R$|^--regexp$/) { | 
| 1475 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1476 | 
      $Regexp_Gate = $narg; | 
| 1477 | 
    } | 
| 1478 | 
    elsif ($arg =~ /^--stdout$/) { | 
| 1479 | 
      $Output_To_Stdout = 1; | 
| 1480 | 
    } | 
| 1481 | 
    elsif ($arg =~ /^--version$/) { | 
| 1482 | 
      $Print_Version = 1; | 
| 1483 | 
    } | 
| 1484 | 
    elsif ($arg =~ /^-d$|^--distributed$/) { | 
| 1485 | 
      $Distributed = 1; | 
| 1486 | 
    } | 
| 1487 | 
    elsif ($arg =~ /^-P$|^--prune$/) { | 
| 1488 | 
      $Prune_Empty_Msgs = 1; | 
| 1489 | 
    } | 
| 1490 | 
    elsif ($arg =~ /^-S$|^--separate-header$/) { | 
| 1491 | 
      $After_Header = "\n\n"; | 
| 1492 | 
    } | 
| 1493 | 
    elsif ($arg =~ /^--no-wrap$/) { | 
| 1494 | 
      $No_Wrap = 1; | 
| 1495 | 
    } | 
| 1496 | 
    elsif ($arg =~ /^--gmt$|^--utc$/) { | 
| 1497 | 
      $UTC_Times = 1; | 
| 1498 | 
    } | 
| 1499 | 
    elsif ($arg =~ /^-w$|^--day-of-week$/) { | 
| 1500 | 
      $Show_Day_Of_Week = 1; | 
| 1501 | 
    } | 
| 1502 | 
    elsif ($arg =~ /^-r$|^--revisions$/) { | 
| 1503 | 
      $Show_Revisions = 1; | 
| 1504 | 
    } | 
| 1505 | 
    elsif ($arg =~ /^-t$|^--tags$/) { | 
| 1506 | 
      $Show_Tags = 1; | 
| 1507 | 
    } | 
| 1508 | 
    elsif ($arg =~ /^-b$|^--branches$/) { | 
| 1509 | 
      $Show_Branches = 1; | 
| 1510 | 
    } | 
| 1511 | 
    elsif ($arg =~ /^-F$|^--follow$/) { | 
| 1512 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1513 | 
      push (@Follow_Branches, $narg); | 
| 1514 | 
    } | 
| 1515 | 
    elsif ($arg =~ /^--stdin$/) { | 
| 1516 | 
      $Input_From_Stdin = 1; | 
| 1517 | 
    } | 
| 1518 | 
    elsif ($arg =~ /^--header$/) { | 
| 1519 | 
      my $narg = shift (@ARGV) || die "$arg needs argument.\n"; | 
| 1520 | 
      $ChangeLog_Header = &slurp_file ($narg); | 
| 1521 | 
      if (! defined ($ChangeLog_Header)) { | 
| 1522 | 
        $ChangeLog_Header = ""; | 
| 1523 | 
      } | 
| 1524 | 
    } | 
| 1525 | 
    elsif ($arg =~ /^--xml$/) { | 
| 1526 | 
      $XML_Output = 1; | 
| 1527 | 
    } | 
| 1528 | 
    elsif ($arg =~ /^--hide-filenames$/) { | 
| 1529 | 
      $Hide_Filenames = 1; | 
| 1530 | 
      $After_Header = ""; | 
| 1531 | 
    } | 
| 1532 | 
    else { | 
| 1533 | 
      # Just add a filename as argument to the log command | 
| 1534 | 
      $Log_Source_Command .= " $arg"; | 
| 1535 | 
    } | 
| 1536 | 
  } | 
| 1537 | 
 | 
| 1538 | 
  ## Check for contradictions... | 
| 1539 | 
 | 
| 1540 | 
  if ($Output_To_Stdout && $Distributed) { | 
| 1541 | 
    print STDERR "cannot pass both --stdout and --distributed\n"; | 
| 1542 | 
    $exit_with_admonishment = 1; | 
| 1543 | 
  } | 
| 1544 | 
 | 
| 1545 | 
  if ($Output_To_Stdout && $output_file) { | 
| 1546 | 
    print STDERR "cannot pass both --stdout and --file\n"; | 
| 1547 | 
    $exit_with_admonishment = 1; | 
| 1548 | 
  } | 
| 1549 | 
 | 
| 1550 | 
  # Or if any other error message has already been printed out, we | 
| 1551 | 
  # just leave now: | 
| 1552 | 
  if ($exit_with_admonishment) { | 
| 1553 | 
    &usage (); | 
| 1554 | 
    exit (1); | 
| 1555 | 
  } | 
| 1556 | 
  elsif ($Print_Usage) { | 
| 1557 | 
    &usage (); | 
| 1558 | 
    exit (0); | 
| 1559 | 
  } | 
| 1560 | 
  elsif ($Print_Version) { | 
| 1561 | 
    &version (); | 
| 1562 | 
    exit (0); | 
| 1563 | 
  } | 
| 1564 | 
 | 
| 1565 | 
  ## Else no problems, so proceed. | 
| 1566 | 
 | 
| 1567 | 
  if ($Output_To_Stdout) { | 
| 1568 | 
    undef $Log_File_Name;       # not actually necessary | 
| 1569 | 
  } | 
| 1570 | 
  elsif ($output_file) { | 
| 1571 | 
    $Log_File_Name = $output_file; | 
| 1572 | 
  } | 
| 1573 | 
} | 
| 1574 | 
 | 
| 1575 | 
 | 
| 1576 | 
sub slurp_file () | 
| 1577 | 
{ | 
| 1578 | 
  my $filename = shift || die ("no filename passed to slurp_file()"); | 
| 1579 | 
  my $retstr; | 
| 1580 | 
 | 
| 1581 | 
  open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)"); | 
| 1582 | 
  my $saved_sep = $/; | 
| 1583 | 
  undef $/; | 
| 1584 | 
  $retstr = <SLURPEE>; | 
| 1585 | 
  $/ = $saved_sep; | 
| 1586 | 
  close (SLURPEE); | 
| 1587 | 
  return $retstr; | 
| 1588 | 
} | 
| 1589 | 
 | 
| 1590 | 
 | 
| 1591 | 
sub debug () | 
| 1592 | 
{ | 
| 1593 | 
  if ($Debug) { | 
| 1594 | 
    my $msg = shift; | 
| 1595 | 
    print STDERR $msg; | 
| 1596 | 
  } | 
| 1597 | 
} | 
| 1598 | 
 | 
| 1599 | 
 | 
| 1600 | 
sub version () | 
| 1601 | 
{ | 
| 1602 | 
  print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n"; | 
| 1603 | 
} | 
| 1604 | 
 | 
| 1605 | 
 | 
| 1606 | 
sub usage () | 
| 1607 | 
{ | 
| 1608 | 
  &version (); | 
| 1609 | 
  print <<'END_OF_INFO'; | 
| 1610 | 
Generate GNU-style ChangeLogs in CVS working copies. | 
| 1611 | 
 | 
| 1612 | 
Notes about the output format(s): | 
| 1613 | 
 | 
| 1614 | 
   The default output of cvs2cl.pl is designed to be compact, formally | 
| 1615 | 
   unambiguous, but still easy for humans to read.  It is largely | 
| 1616 | 
   self-explanatory, I hope; the one abbreviation that might not be | 
| 1617 | 
   obvious is "utags".  That stands for "universal tags" -- a | 
| 1618 | 
   universal tag is one held by all the files in a given change entry. | 
| 1619 | 
 | 
| 1620 | 
   If you need output that's easy for a program to parse, use the | 
| 1621 | 
   --xml option.  Note that with XML output, just about all available | 
| 1622 | 
   information is included with each change entry, whether you asked | 
| 1623 | 
   for it or not, on the theory that your parser can ignore anything | 
| 1624 | 
   it's not looking for. | 
| 1625 | 
 | 
| 1626 | 
Notes about the options and arguments (the actual options are listed | 
| 1627 | 
last in this usage message): | 
| 1628 | 
 | 
| 1629 | 
  * The -I and -F options may appear multiple times. | 
| 1630 | 
 | 
| 1631 | 
  * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works). | 
| 1632 | 
    This is okay because no would ever, ever be crazy enough to name a | 
| 1633 | 
    branch "trunk", right?  Right. | 
| 1634 | 
 | 
| 1635 | 
  * For the -U option, the UFILE should be formatted like | 
| 1636 | 
    CVSROOT/users. That is, each line of UFILE looks like this | 
| 1637 | 
       jrandom:jrandom@red-bean.com | 
| 1638 | 
    or maybe even like this | 
| 1639 | 
       jrandom:'Jesse Q. Random <jrandom@red-bean.com>' | 
| 1640 | 
    Don't forget to quote the portion after the colon if necessary. | 
| 1641 | 
   | 
| 1642 | 
  * Many people want to filter by date.  To do so, invoke cvs2cl.pl | 
| 1643 | 
    like this:  | 
| 1644 | 
       cvs2cl.pl -l "-d'DATESPEC'" | 
| 1645 | 
    where DATESPEC is any date specification valid for "cvs log -d". | 
| 1646 | 
    (Note that CVS 1.10.7 and below requires there be no space between | 
| 1647 | 
    -d and its argument). | 
| 1648 | 
 | 
| 1649 | 
Options/Arguments: | 
| 1650 | 
 | 
| 1651 | 
  -h, -help, --help, or -?     Show this usage and exit | 
| 1652 | 
  --version                    Show version and exit | 
| 1653 | 
  -r, --revisions              Show revision numbers in output | 
| 1654 | 
  -b, --branches               Show branch names in revisions when possible | 
| 1655 | 
  -t, --tags                   Show tags (symbolic names) in output | 
| 1656 | 
  --stdin                      Read from stdin, don't run cvs log | 
| 1657 | 
  --stdout                     Output to stdout not to ChangeLog | 
| 1658 | 
  -d, --distributed            Put ChangeLogs in subdirs | 
| 1659 | 
  -f FILE, --file FILE         Write to FILE instead of "ChangeLog" | 
| 1660 | 
  --fsf                        Use this if log data is in FSF ChangeLog style | 
| 1661 | 
  -W SECS, --window SECS       Window of time within which log entries unify | 
| 1662 | 
  -U UFILE, --usermap UFILE    Expand usernames to email addresses from UFILE | 
| 1663 | 
  -R REGEXP, --regexp REGEXP   Include only entries that match REGEXP | 
| 1664 | 
  -I REGEXP, --ignore REGEXP   Ignore files whose names match REGEXP | 
| 1665 | 
  -C, --case-insensitive       Any regexp matching is done case-insensitively | 
| 1666 | 
  -F BRANCH, --follow BRANCH   Show only revisions on or ancestral to BRANCH | 
| 1667 | 
  -S, --separate-header        Blank line between each header and log message | 
| 1668 | 
  --no-wrap                    Don't auto-wrap log message (recommend -S also) | 
| 1669 | 
  --gmt, --utc                 Show times in GMT/UTC instead of local time | 
| 1670 | 
  -w, --day-of-week            Show day of week | 
| 1671 | 
  --header FILE                Get ChangeLog header from FILE ("-" means stdin) | 
| 1672 | 
  --xml                        Output XML instead of ChangeLog format | 
| 1673 | 
  --hide-filenames             Don't show filenames (ignored for XML output) | 
| 1674 | 
  -P, --prune                  Don't show empty log messages | 
| 1675 | 
  -g OPTS, --global-opts OPTS  Invoke like this "cvs OPTS log ..." | 
| 1676 | 
  -l OPTS, --log-opts OPTS     Invoke like this "cvs ... log OPTS" | 
| 1677 | 
  FILE1 [FILE2 ...]            Show only log information for the named FILE(s) | 
| 1678 | 
 | 
| 1679 | 
See http://www.red-bean.com/cvs2cl for maintenance and bug info. | 
| 1680 | 
END_OF_INFO | 
| 1681 | 
} | 
| 1682 | 
 | 
| 1683 | 
__END__ | 
| 1684 | 
 | 
| 1685 | 
=head1 NAME | 
| 1686 | 
 | 
| 1687 | 
cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by | 
| 1688 | 
    running "cvs log" and parsing the output.  Shared log entries are | 
| 1689 | 
    unified in an intuitive way. | 
| 1690 | 
 | 
| 1691 | 
=head1 DESCRIPTION | 
| 1692 | 
 | 
| 1693 | 
This script generates GNU-style ChangeLog files from CVS log | 
| 1694 | 
information.  Basic usage: just run it inside a working copy and a | 
| 1695 | 
ChangeLog will appear.  It requires repository access (i.e., 'cvs log' | 
| 1696 | 
must work).  Run "cvs2cl.pl --help" to see more advanced options. | 
| 1697 | 
 | 
| 1698 | 
See http://www.red-bean.com/cvs2cl for updates, and for instructions | 
| 1699 | 
on getting anonymous CVS access to this script. | 
| 1700 | 
 | 
| 1701 | 
Maintainer: Karl Fogel <kfogel@red-bean.com> | 
| 1702 | 
Please report bugs to <bug-cvs2cl@red-bean.com>. | 
| 1703 | 
 | 
| 1704 | 
=head1 README | 
| 1705 | 
 | 
| 1706 | 
This script generates GNU-style ChangeLog files from CVS log | 
| 1707 | 
information.  Basic usage: just run it inside a working copy and a | 
| 1708 | 
ChangeLog will appear.  It requires repository access (i.e., 'cvs log' | 
| 1709 | 
must work).  Run "cvs2cl.pl --help" to see more advanced options. | 
| 1710 | 
 | 
| 1711 | 
See http://www.red-bean.com/cvs2cl for updates, and for instructions | 
| 1712 | 
on getting anonymous CVS access to this script. | 
| 1713 | 
 | 
| 1714 | 
Maintainer: Karl Fogel <kfogel@red-bean.com> | 
| 1715 | 
Please report bugs to <bug-cvs2cl@red-bean.com>. | 
| 1716 | 
 | 
| 1717 | 
=head1 PREREQUISITES | 
| 1718 | 
 | 
| 1719 | 
This script requires C<Text::Wrap>, C<Time::Local>, and | 
| 1720 | 
C<File::Basename>. | 
| 1721 | 
It also seems to require C<Perl 5.004_04> or higher. | 
| 1722 | 
 | 
| 1723 | 
=pod OSNAMES | 
| 1724 | 
 | 
| 1725 | 
any | 
| 1726 | 
 | 
| 1727 | 
=pod SCRIPT CATEGORIES | 
| 1728 | 
 | 
| 1729 | 
Version_Control/CVS | 
| 1730 | 
 | 
| 1731 | 
=cut | 
| 1732 | 
 | 
| 1733 | 
 | 
| 1734 | 
-*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- | 
| 1735 | 
 | 
| 1736 | 
Note about a bug-slash-opportunity: | 
| 1737 | 
----------------------------------- | 
| 1738 | 
 | 
| 1739 | 
There's a bug in Text::Wrap, which affects cvs2cl.  This script | 
| 1740 | 
reveals it: | 
| 1741 | 
 | 
| 1742 | 
  #!/usr/bin/perl -w | 
| 1743 | 
   | 
| 1744 | 
  use Text::Wrap; | 
| 1745 | 
   | 
| 1746 | 
  my $test_text = | 
| 1747 | 
  "This script demonstrates a bug in Text::Wrap.  The very long line | 
| 1748 | 
  following this paragraph will be relocated relative to the surrounding | 
| 1749 | 
  text: | 
| 1750 | 
   | 
| 1751 | 
  ==================================================================== | 
| 1752 | 
   | 
| 1753 | 
  See?  When the bug happens, we'll get the line of equal signs below | 
| 1754 | 
  this paragraph, even though it should be above."; | 
| 1755 | 
   | 
| 1756 | 
   | 
| 1757 | 
  # Print out the test text with no wrapping: | 
| 1758 | 
  print "$test_text"; | 
| 1759 | 
  print "\n"; | 
| 1760 | 
  print "\n"; | 
| 1761 | 
   | 
| 1762 | 
  # Now print it out wrapped, and see the bug: | 
| 1763 | 
  print wrap ("\t", "        ", "$test_text"); | 
| 1764 | 
  print "\n"; | 
| 1765 | 
  print "\n"; | 
| 1766 | 
 | 
| 1767 | 
If the line of equal signs were one shorter, then the bug doesn't | 
| 1768 | 
happen.  Interesting. | 
| 1769 | 
 | 
| 1770 | 
Anyway, rather than fix this in Text::Wrap, we might as well write a | 
| 1771 | 
new wrap() which has the following much-needed features: | 
| 1772 | 
 | 
| 1773 | 
* initial indentation, like current Text::Wrap() | 
| 1774 | 
* subsequent line indentation, like current Text::Wrap() | 
| 1775 | 
* user chooses among: force-break long words, leave them alone, or die()? | 
| 1776 | 
* preserve existing indentation: chopped chunks from an indented line | 
| 1777 | 
  are indented by same (like this line, not counting the asterisk!) | 
| 1778 | 
* optional list of things to preserve on line starts, default ">" | 
| 1779 | 
 | 
| 1780 | 
Note that the last two are essentially the same concept, so unify in | 
| 1781 | 
implementation and give a good interface to controlling them. | 
| 1782 | 
 | 
| 1783 | 
And how about: | 
| 1784 | 
 | 
| 1785 | 
Optionally, when encounter a line pre-indented by same as previous | 
| 1786 | 
line, then strip the newline and refill, but indent by the same. | 
| 1787 | 
Yeah... |