8 use OpenILS::Utils::MFHD::Date;
10 use base 'MARC::Field';
15 my $class = ref($proto) || $proto;
17 my $last_enum = undef;
19 $self->{_mfhdc_ENUMS} = {};
20 $self->{_mfhdc_CHRONS} = {};
21 $self->{_mfhdc_PATTERN} = {};
22 $self->{_mfhdc_COPY} = undef;
23 $self->{_mfhdc_UNIT} = undef;
24 $self->{_mfhdc_COMPRESSIBLE} = 1; # until proven otherwise
26 foreach my $subfield ($self->subfields) {
27 my ($key, $val) = @$subfield;
30 } elsif ($key =~ /[a-h]/) {
31 # Enumeration Captions
32 $self->{_mfhdc_ENUMS}->{$key} = {CAPTION => $val,
40 } elsif ($key =~ /[i-m]/) {
42 $self->{_mfhdc_CHRONS}->{$key} = $val;
43 } elsif ($key eq 'u') {
44 # Bib units per next higher enumeration level
45 carp('$u specified for top-level enumeration')
46 unless defined($last_enum);
47 $self->{_mfhdc_ENUMS}->{$last_enum}->{COUNT} = $val;
48 } elsif ($key eq 'v') {
49 carp '$v specified for top-level enumeration'
50 unless defined($last_enum);
51 $self->{_mfhdc_ENUMS}->{$last_enum}->{RESTART} = ($val eq 'r');
52 } elsif ($key =~ /[npwz]/) {
53 # Publication Pattern info ('o' == type of unit, 'q'..'t' undefined)
54 $self->{_mfhdc_PATTERN}->{$key} = $val;
55 } elsif ($key =~ /x/) {
56 # Calendar change can have multiple comma-separated values
57 $self->{_mfhdc_PATTERN}->{x} = [split /,/, $val];
58 } elsif ($key eq 'y') {
59 $self->{_mfhdc_PATTERN}->{y} = {}
60 unless exists $self->{_mfhdc_PATTERN}->{y};
61 update_pattern($self, $val);
62 } elsif ($key eq 'o') {
64 $self->{_mfhdc_UNIT} = $val;
65 } elsif ($key eq 't') {
66 $self->{_mfhdc_COPY} = $val;
68 carp "Unknown caption subfield '$key'";
72 # subsequent levels of enumeration (primary and alternate)
73 # If an enumeration level doesn't document the number
74 # of "issues" per "volume", or whether numbering of issues
75 # restarts, then we can't compress.
76 foreach my $key ('b', 'c', 'd', 'e', 'f', 'h') {
77 if (exists $self->{_mfhdc_ENUMS}->{$key}) {
78 my $pattern = $self->{_mfhdc_ENUMS}->{$key};
79 if (!$pattern->{RESTART} || !$pattern->{COUNT}
80 || ($pattern->{COUNT} eq 'var')
81 || ($pattern->{COUNT} eq 'und')) {
82 $self->{_mfhdc_COMPRESSIBLE} = 0;
88 my $pat = $self->{_mfhdc_PATTERN};
90 # Sanity check publication frequency vs publication pattern:
91 # if the frequency is a number, then the pattern better
92 # have that number of values associated with it.
93 if (exists($pat->{w}) && ($pat->{w} =~ /^\d+$/)
94 && ($pat->{w} != scalar(@{$pat->{y}->{p}}))) {
95 carp("Caption::new: publication frequency '$pat->{w}' != publication pattern @{$pat->{y}->{p}}");
99 # If there's a $x subfield and a $j, then it's compressible
100 if (exists $pat->{x} && exists $self->{_mfhdc_CHRONS}->{'j'}) {
101 $self->{_mfhdc_COMPRESSIBLE} = 1;
104 bless ($self, $class);
112 my $pathash = $self->{_mfhdc_PATTERN}->{y};
113 my ($pubcode, $pat) = unpack("a1a*", $val);
115 $pathash->{$pubcode} = [] unless exists $pathash->{$pubcode};
116 push @{$pathash->{$pubcode}}, $pat;
121 my $pattern = $self->{_mfhdc_PATTERN}->{y};
129 return $self->{_mfhdc_COMPRESSIBLE};
136 if (exists $self->{_mfhdc_CHRONS}->{$key}) {
137 return $self->{_mfhdc_CHRONS}->{$key};
147 if (exists $self->{_mfhdc_ENUMS}->{$key}) {
148 return $self->{_mfhdc_ENUMS}->{$key};
149 } elsif (exists $self->{_mfhdc_CHRONS}->{$key}) {
150 return $self->{_mfhdc_CHRONS}->{$key};
159 my $val = $self->capfield($key);
162 return $val->{CAPTION};
168 sub calendar_change {
171 return $self->{_mfhdc_PATTERN}->{x};
174 # If items are identified by chronology only, with no separate
175 # enumeration (eg, a newspaper issue), then the chronology is
176 # recorded in the enumeration subfields $a - $f. We can tell
177 # that this is the case if there are $a - $f subfields and no
178 # chronology subfields ($i-$k), and none of the $a-$f subfields
179 # have associated $u or $v subfields, but there's a $w and no $x
181 sub enumeration_is_chronology {
184 # There is always a '$a' subfield in well-formed fields.
185 return 0 if exists $self->{_mfhdc_CHRONS}->{i}
186 || exists $self->{_mfhdc_PATTERN}->{x};
188 foreach my $key ('a' .. 'f') {
191 last if !exists $self->{_mfhdc_ENUMS}->{$key};
193 $enum = $self->{_mfhdc_ENUMS}->{$key};
194 return 0 if defined $enum->{COUNT} || defined $enum->{RESTART};
197 return (exists $self->{_mfhdc_PATTERN}->{w});
200 sub regularity_match {
205 # we can't match something that doesn't exist.
206 return 0 if !exists $self->{_mfhdc_PATTERN}->{y}->{$pubcode};
208 foreach my $regularity (@{$self->{_mfhdc_PATTERN}->{y}->{$pubcode}}) {
209 my $chroncode= substr($regularity, 0, 1);
210 my $matchfunc = MFHD::Date::dispatch($chroncode);
211 my @pats = split(/,/, substr($regularity, 1));
213 if (!defined $matchfunc) {
214 carp "Unrecognized chroncode '$chroncode'";
219 foreach my $pat (@pats) {
220 $pat =~ s|/.+||; # If it's a combined date, match the start
221 if ($matchfunc->($pat, @date)) {
234 # printf("# is_omitted: testing date %s: %d\n", join('/', @date),
235 # $self->regularity_match('o', @date));
236 return $self->regularity_match('o', @date);
243 return $self->regularity_match('p', @date);
250 return $self->regularity_match('c', @date);
253 sub enum_is_combined {
255 my $subfield = shift;
257 my $level = ord($subfield) - ord('a') + 1;
259 return 0 if !exists $self->{_mfhdc_PATTERN}->{y}->{c};
261 foreach my $regularity (@{$self->{_mfhdc_PATTERN}->{y}->{c}}) {
262 next unless $regularity =~ m/^e$level/o;
264 my @pats = split(/,/, substr($regularity, 2));
266 foreach my $pat (@pats) {
267 $pat =~ s|/.+||; # if it's a combined issue, match the start
268 return 1 if ($iss eq $pat);
276 # Test to see if $dt1 is on or after $dt2
277 # if length(@{$dt2} == 2, then just month/day are compared
278 # if length(@{$dt2} == 1, then just the months are compared
283 # printf("# on_or_after(%s, %s): ", join('/', @{$dt1}), join('/', @{$dt2}));
285 foreach my $i (0..(scalar(@{$dt2})-1)) {
286 if ($dt1->[$i] > $dt2->[$i]) {
287 # $dt1 occurs AFTER $dt2
289 } elsif ($dt1->[$i] < $dt2->[$i]) {
290 # $dt1 occurs BEFORE $dt2
293 # both are still equal, keep going
296 # We fell out of the loop with them being equal, so it's 'on'
300 sub calendar_increment {
304 my $cal_change = $self->calendar_change;
310 # A calendar change is defined, need to check if it applies
311 if ((scalar(@{$new}) == 2 && $new->[1] > 20) || (scalar(@{$new}) == 1)) {
312 carp "Can't calculate date change for ", $self->as_string;
316 foreach my $change (@{$cal_change}) {
319 if (length($change) == 2) {
321 } elsif (length($change) == 4) {
322 ($month, $day) = unpack("a2a2", $change);
325 printf("# calendar_increment('%s', '%s'): change on '%s/%s'\n",
326 join('/', @{$cur}), join('/', @{$new}),
327 $month, defined($day) ? $day : 'UNDEF');
329 if ($cur->[0] == $new->[0]) {
330 # Same year, so a 'simple' month/day comparison will be fine
331 $incr = (!on_or_after([$cur->[1], $cur->[2]], [$month, $day])
332 && on_or_after([$new->[1], $new->[2]], [$month, $day]));
334 # @cur is in the year before @new. There are
335 # two possible cases for the calendar change date that
336 # indicate that it's time to change the volume:
337 # (1) the change date is AFTER @cur in the year, or
338 # (2) the change date is BEFORE @new in the year.
340 # -------|------|------X------|------|
341 # @cur (1) Jan 1 (2) @new
343 $incr = (on_or_after([$new->[1], $new->[2]], [$month, $day])
344 || !on_or_after([$cur->[1], $cur->[2]], [$month, $day]));
346 return $incr if $incr;
359 my @newend; # only used for combined issues
362 my $reg = $self->{_mfhdc_REGULARITY};
363 my $pattern = $self->{_mfhdc_PATTERN};
364 my $freq = $pattern->{w};
366 foreach my $i (0..$#keys) {
367 $cur[$i] = $next->{$keys[$i]} if exists $next->{$keys[$i]};
370 # If the current issue has a combined date (eg, May/June)
371 # get rid of the first date and base the calculation
372 # on the final date in the combined issue.
373 $cur[-1] =~ s|^[^/]+/||;
375 if (defined $pattern->{y}->{p}) {
376 # There is a $y publication pattern defined in the record:
377 # use it to calculate the next issue date.
379 # XXX TODO: need to handle combined issues.
380 foreach my $pubpat (@{$pattern->{y}->{p}}) {
381 my $chroncode = substr($pubpat, 0, 1);
382 my $genfunc = MFHD::Date::generator($chroncode);
383 my @pats = split(/,/, substr($pubpat, 1));
385 if (!defined $genfunc) {
386 carp "Unrecognized chroncode '$chroncode'";
390 foreach my $pat (@pats) {
391 printf("# next_date: generating with pattern '%s'\n", $pat);
392 my @candidate = $genfunc->($pat, @cur);
394 while ($self->is_omitted(@candidate)) {
395 # printf("# pubpat omitting date '%s'\n",
396 # join('/', @candidate));
397 @candidate = $genfunc->($pat, @candidate);
400 printf("# testing new candidate '%s' against '%s'\n",
401 join('/', @candidate), join('/', @new));
402 if (!defined($new[0])
403 || !on_or_after(\@candidate, \@new)) {
404 # first time through the loop
405 # or @candidate is before @new => @candidate is the next
408 printf("# selecting candidate date '%s'\n", join('/', @new));
413 # Now check for combined issues, like "May/June"
414 foreach my $combpat (@{$pattern->{y}->{c}}) {
415 my $chroncode = substr($combpat, 0, 1);
416 my $genfunc = MFHD::Date::generator($chroncode);
417 my @pats = split(/,/, substr($combpat, 1));
419 foreach my $combined (@pats) {
420 my ($start, $end) = split('/', $combined, 2);
421 my @candidate = $genfunc->($start, @cur);
423 # We don't need to check for omitted issues because
424 # combined issues are always published. OR ARE THEY????
425 if (!defined($new[0])
426 || !on_or_after(\@candidate, \@new)) {
427 # Haven't found a next issue at all yet, or
428 # this one is before the best guess so far
430 @newend = $genfunc->($end, @cur);
435 if (defined($newend[0])) {
436 # The best match was a combined issue
437 foreach my $i (0..$#new) {
438 # don't combine identical fields
439 next if $new[$i] eq $newend[$i];
440 $new[$i] .= '/' . $newend[$i];
444 # There is no $y publication pattern defined, so use
445 # the $w frequency to figure out the next date
447 if (!defined($freq)) {
448 carp "Undefined frequency in next_date!";
449 } elsif (!MFHD::Date::can_increment($freq)) {
450 carp "Don't know how to deal with frequency '$freq'!";
453 # One of the standard defined issue frequencies
455 @new = MFHD::Date::incr_date($freq, @cur);
457 while ($self->is_omitted(@new)) {
458 @new = MFHD::Date::incr_date($freq, @new);
461 if ($self->is_combined(@new)) {
462 my @second_date = MFHD::Date::incr_date($freq, @new);
464 # I am cheating: This code assumes that only the smallest
465 # time increment is combined. So, no "Apr 15/May 1" allowed.
466 $new[-1] = $new[-1] . '/' . $second_date[-1];
471 for my $i (0..$#new) {
472 $next->{$keys[$i]} = $new[$i];
474 # Figure out if we need to adust volume number
475 # right now just use the $carry that was passed in.
476 # in long run, need to base this on ($carry or date_change)
478 # if $carry is set, the date doesn't matter: we're not
479 # going to increment the v. number twice at year-change.
480 $next->{a} += $carry;
481 } elsif (defined $pattern->{x}) {
482 $next->{a} += $self->calendar_increment(\@cur, \@new);
490 # First handle any "alternative enumeration", since they're
491 # a lot simpler, and don't depend on the the calendar
492 foreach my $key ('h', 'g') {
493 next if !exists $next->{$key};
494 if (!$self->capstr($key)) {
495 warn "Holding data exists for $key, but no caption specified";
500 my $cap = $self->capfield($key);
501 if ($cap->{RESTART} && $cap->{COUNT}
502 && ($next->{$key} == $cap->{COUNT})) {
516 # $carry keeps track of whether we need to carry into the next
517 # higher level of enumeration. It's not actually necessary except
518 # for when the loop ends: if we need to carry from $b into $a
519 # then $carry will be set when the loop ends.
521 # We need to keep track of this because there are two different
522 # reasons why we might increment the highest level of enumeration ($a)
523 # 1) we hit the correct number of items in $b (ie, 5th iss of quarterly)
524 # 2) it's the right time of the year.
527 foreach my $key (reverse('b'..'f')) {
528 next if !exists $next->{$key};
530 if (!$self->capstr($key)) {
531 # Just assume that it increments continuously and give up
532 warn "Holding data exists for $key, but no caption specified";
538 # If the current issue has a combined issue number (eg, 2/3)
539 # get rid of the first issue number and base the calculation
540 # on the final issue number in the combined issue.
541 if ($next->{$key} =~ m|/|) {
542 $next->{$key} =~ s|^[^/]+/||;
545 my $cap = $self->capfield($key);
546 if ($cap->{RESTART} && $cap->{COUNT}
547 && ($next->{$key} eq $cap->{COUNT})) {
551 # If I don't need to "carry" beyond here, then I just increment
552 # this level of the enumeration and stop looping, since the
553 # "next" hash has been initialized with the current values
559 # You can't have a combined issue that spans two volumes: no.12/1
561 if ($self->enum_is_combined($key, $next->{$key})) {
562 $next->{$key} .= '/' . ($next->{$key} + 1);
568 # The easy part is done. There are two things left to do:
569 # 1) Calculate the date of the next issue, if necessary
570 # 2) Increment the highest level of enumeration (either by date
571 # or because $carry is set because of the above loop
573 if (!$self->subfield('i')) {
574 # The simple case: if there is no chronology specified
575 # then just check $carry and return
576 $next->{'a'} += $carry;
578 # Figure out date of next issue, then decide if we need
579 # to adjust top level enumeration based on that
580 $self->next_date($next, $carry, ('i'..'m'));
589 # Initialize $next with current enumeration & chronology, then
590 # we can just operate on $next, based on the contents of the caption
592 if ($self->enumeration_is_chronology) {
593 foreach my $key ('a' .. 'h') {
594 $next->{$key} = $holding->{_mfhdh_SUBFIELDS}->{$key}
595 if defined $holding->{_mfhdh_SUBFIELDS}->{$key};
597 $self->next_date($next, 0, ('a' .. 'h'));
602 foreach my $key ('a' .. 'h') {
603 $next->{$key} = $holding->{_mfhdh_SUBFIELDS}->{$key}->{HOLDINGS}
604 if defined $holding->{_mfhdh_SUBFIELDS}->{$key};
607 foreach my $key ('i'..'m') {
608 $next->{$key} = $holding->{_mfhdh_SUBFIELDS}->{$key}
609 if defined $holding->{_mfhdh_SUBFIELDS}->{$key};
612 if (exists $next->{'h'}) {
613 $self->next_alt_enum($next);
616 $self->next_enum($next);