8 use OpenILS::Utils::MFHD::Date;
10 use base 'MARC::Field';
14 my $class = ref($proto) || $proto;
16 my $last_enum = undef;
18 $self->{_mfhdc_ENUMS} = {};
19 $self->{_mfhdc_CHRONS} = {};
20 $self->{_mfhdc_PATTERN} = {};
21 $self->{_mfhdc_COPY} = undef;
22 $self->{_mfhdc_UNIT} = undef;
23 $self->{_mfhdc_LINK_ID} = undef;
24 $self->{_mfhdc_COMPRESSIBLE} = 1; # until proven otherwise
26 foreach my $subfield ($self->subfields) {
27 my ($key, $val) = @$subfield;
29 $self->{_mfhdc_LINK_ID} = $val;
30 } elsif ($key =~ /[a-h]/) {
31 # Enumeration Captions
32 $self->{_mfhdc_ENUMS}->{$key} = {
42 } elsif ($key =~ /[i-m]/) {
44 $self->{_mfhdc_CHRONS}->{$key} = $val;
45 } elsif ($key eq 'u') {
46 # Bib units per next higher enumeration level
48 # Some files seem to have "empty" $u subfields,
49 # especially for top level of enumeration. Just drop them
50 next if (!defined($val) || !$val);
52 carp('$u specified for top-level enumeration')
53 unless defined($last_enum);
54 $self->{_mfhdc_ENUMS}->{$last_enum}->{COUNT} = $val;
55 } elsif ($key eq 'v') {
56 # Is this level of enumeration continuous, or does it restart?
58 # Some files seem to have "empty" $v subfields,
59 # especially for top level of enumeration. Just drop them
60 next if (!defined($val) || !$val);
62 carp '$v specified for top-level enumeration'
63 unless defined($last_enum);
64 $self->{_mfhdc_ENUMS}->{$last_enum}->{RESTART} = ($val eq 'r');
65 } elsif ($key =~ /[npwz]/) {
66 # Publication Pattern info ('o' == type of unit, 'q'..'t' undefined)
67 $self->{_mfhdc_PATTERN}->{$key} = $val;
68 } elsif ($key =~ /x/) {
69 # Calendar change can have multiple comma-separated values
70 $self->{_mfhdc_PATTERN}->{x} = [split /,/, $val];
71 } elsif ($key eq 'y') {
72 $self->{_mfhdc_PATTERN}->{y} = {}
73 unless exists $self->{_mfhdc_PATTERN}->{y};
74 update_pattern($self, $val);
75 } elsif ($key eq 'o') {
77 $self->{_mfhdc_UNIT} = $val;
78 } elsif ($key eq 't') {
79 $self->{_mfhdc_COPY} = $val;
81 carp "Unknown caption subfield '$key'";
85 # subsequent levels of enumeration (primary and alternate)
86 # If an enumeration level doesn't document the number
87 # of "issues" per "volume", or whether numbering of issues
88 # restarts, then we can't compress.
89 foreach my $key ('b', 'c', 'd', 'e', 'f', 'h') {
90 if (exists $self->{_mfhdc_ENUMS}->{$key}) {
91 my $pattern = $self->{_mfhdc_ENUMS}->{$key};
92 if ( !$pattern->{RESTART}
94 || ($pattern->{COUNT} eq 'var')
95 || ($pattern->{COUNT} eq 'und')) {
96 $self->{_mfhdc_COMPRESSIBLE} = 0;
102 my $pat = $self->{_mfhdc_PATTERN};
104 # If there's a $x subfield and a $j, then it's compressible
105 if (exists $pat->{x} && exists $self->{_mfhdc_CHRONS}->{'j'}) {
106 $self->{_mfhdc_COMPRESSIBLE} = 1;
109 bless($self, $class);
117 my $pathash = $self->{_mfhdc_PATTERN}->{y};
118 my ($pubcode, $pat) = unpack("a1a*", $val);
120 $pathash->{$pubcode} = [] unless exists $pathash->{$pubcode};
121 push @{$pathash->{$pubcode}}, $pat;
126 my $pattern = $self->{_mfhdc_PATTERN}->{y};
134 return $self->{_mfhdc_PATTERN};
140 return $self->{_mfhdc_COMPRESSIBLE};
147 if (exists $self->{_mfhdc_CHRONS}->{$key}) {
148 return $self->{_mfhdc_CHRONS}->{$key};
158 if (exists $self->{_mfhdc_ENUMS}->{$key}) {
159 return $self->{_mfhdc_ENUMS}->{$key};
160 } elsif (exists $self->{_mfhdc_CHRONS}->{$key}) {
161 return $self->{_mfhdc_CHRONS}->{$key};
170 my $val = $self->capfield($key);
173 return $val->{CAPTION};
182 return $self->{_mfhdc_UNIT};
188 return $self->{_mfhdc_LINK_ID};
191 sub calendar_change {
194 return $self->{_mfhdc_PATTERN}->{x};
197 # If items are identified by chronology only, with no separate
198 # enumeration (eg, a newspaper issue), then the chronology is
199 # recorded in the enumeration subfields $a - $f. We can tell
200 # that this is the case if there are $a - $f subfields and no
201 # chronology subfields ($i-$k), and none of the $a-$f subfields
202 # have associated $u or $v subfields, but there's a $w and no $x
204 sub enumeration_is_chronology {
207 # There is always a '$a' subfield in well-formed fields.
209 if exists $self->{_mfhdc_CHRONS}->{i}
210 || exists $self->{_mfhdc_PATTERN}->{x};
212 foreach my $key ('a'..'f') {
215 last if !exists $self->{_mfhdc_ENUMS}->{$key};
217 $enum = $self->{_mfhdc_ENUMS}->{$key};
218 return 0 if defined $enum->{COUNT} || defined $enum->{RESTART};
221 return (exists $self->{_mfhdc_PATTERN}->{w});
224 sub regularity_match {
229 # we can't match something that doesn't exist.
230 return 0 if !exists $self->{_mfhdc_PATTERN}->{y}->{$pubcode};
232 foreach my $regularity (@{$self->{_mfhdc_PATTERN}->{y}->{$pubcode}}) {
233 my $chroncode = substr($regularity, 0, 1);
234 my $matchfunc = MFHD::Date::dispatch($chroncode);
235 my @pats = split(/,/, substr($regularity, 1));
237 if (!defined $matchfunc) {
238 carp "Unrecognized chroncode '$chroncode'";
243 foreach my $pat (@pats) {
244 $pat =~ s|/.+||; # If it's a combined date, match the start
245 if ($matchfunc->($pat, @date)) {
258 # printf("# is_omitted: testing date %s: %d\n", join('/', @date),
259 # $self->regularity_match('o', @date));
260 return $self->regularity_match('o', @date);
267 return $self->regularity_match('p', @date);
274 return $self->regularity_match('c', @date);
277 sub enum_is_combined {
279 my $subfield = shift;
281 my $level = ord($subfield) - ord('a') + 1;
283 return 0 if !exists $self->{_mfhdc_PATTERN}->{y}->{c};
285 foreach my $regularity (@{$self->{_mfhdc_PATTERN}->{y}->{c}}) {
286 next unless $regularity =~ m/^e$level/o;
288 my @pats = split(/,/, substr($regularity, 2));
290 foreach my $pat (@pats) {
291 $pat =~ s|/.+||; # if it's a combined issue, match the start
292 return 1 if ($iss eq $pat);
299 # Test to see if $dt1 is on or after $dt2
300 # if length(@{$dt2} == 2, then just month/day are compared
301 # if length(@{$dt2} == 1, then just the months are compared
306 # printf("# on_or_after(%s, %s): ", join('/', @{$dt1}), join('/', @{$dt2}));
308 foreach my $i (0..(scalar(@{$dt2}) - 1)) {
309 if ($dt1->[$i] > $dt2->[$i]) {
310 # printf("after - pass\n");
311 # $dt1 occurs AFTER $dt2
313 } elsif ($dt1->[$i] < $dt2->[$i]) {
314 # printf("before - fail\n");
315 # $dt1 occurs BEFORE $dt2
318 # both are still equal, keep going
321 # We fell out of the loop with them being equal, so it's 'on'
322 # printf("on - pass\n");
326 sub calendar_increment {
330 my $cal_change = $self->calendar_change;
336 # A calendar change is defined, need to check if it applies
337 if (scalar(@{$new}) == 1) {
338 carp "Can't calculate date change for ", $self->as_string;
342 foreach my $change (@{$cal_change}) {
345 if (length($change) == 2) {
347 } elsif (length($change) == 4) {
348 ($month, $day) = unpack("a2a2", $change);
351 # printf("# calendar_increment('%s', '%s'): change on '%s/%s'\n",
352 # join('/', @{$cur}), join('/', @{$new}),
353 # $month, defined($day) ? $day : 'UNDEF');
355 if ($cur->[0] == $new->[0]) {
356 # Same year, so a 'simple' month/day comparison will be fine
358 ( !on_or_after([$cur->[1], $cur->[2]], [$month, $day])
359 && on_or_after([$new->[1], $new->[2]], [$month, $day]));
361 # @cur is in the year before @new. There are
362 # two possible cases for the calendar change date that
363 # indicate that it's time to change the volume:
364 # (1) the change date is AFTER @cur in the year, or
365 # (2) the change date is BEFORE @new in the year.
367 # -------|------|------X------|------|
368 # @cur (1) Jan 1 (2) @new
371 (on_or_after([$new->[1], $new->[2]], [$month, $day])
372 || !on_or_after([$cur->[1], $cur->[2]], [$month, $day]));
374 return $incr if $incr;
387 my @newend; # only used for combined issues
390 my $reg = $self->{_mfhdc_REGULARITY};
391 my $pattern = $self->{_mfhdc_PATTERN};
392 my $freq = $pattern->{w};
394 foreach my $i (0..$#keys) {
395 if (exists $next->{$keys[$i]}) {
396 $cur[$i] = $next->{$keys[$i]};
397 # If the current issue has a combined date (eg, May/June)
398 # get rid of the first date and base the calculation
399 # on the final date in the combined issue.
400 $cur[$i] =~ s|^[^/]+/||;
404 if (defined $pattern->{y}->{p}) {
405 # There is a $y publication pattern defined in the record:
406 # use it to calculate the next issue date.
408 foreach my $pubpat (@{$pattern->{y}->{p}}, @{$pattern->{y}->{c}}) {
409 my $chroncode = substr($pubpat, 0, 1);
410 my $genfunc = MFHD::Date::generator($chroncode);
411 my @pats = split(/,/, substr($pubpat, 1));
413 next if $chroncode eq 'e';
415 if (!defined $genfunc) {
416 carp "Unrecognized chroncode '$chroncode'";
420 foreach my $pat (@pats) {
421 my $combined = $pat =~ m|/|;
425 # printf("# next_date: generating with pattern '%s'\n", $pat);
428 ($start, $end) = split('/', $pat, 2);
430 ($start, $end) = (undef, undef);
433 @candidate = $genfunc->($start || $pat, \@cur, $self);
435 while ($self->is_omitted(@candidate)) {
436 # printf("# pubpat omitting date '%s'\n",
437 # join('/', @candidate));
438 @candidate = $genfunc->($start || $pat, \@candidate, $self);
441 # printf("# testing new candidate '%s' against '%s'\n",
442 # join('/', @candidate), join('/', @new));
444 if (!defined($new[0]) || !on_or_after(\@candidate, \@new)) {
445 # first time through the loop
446 # or @candidate is before @new =>
447 # @candidate is the next issue.
450 @newend = $genfunc->($end, \@cur, $self);
455 # printf("# selecting candidate date '%s'\n", join('/', @new));
460 $new[1] = 24 if ($new[1] == 20); # restore fake early winter
462 if (defined($newend[0])) {
463 # The best match was a combined issue
464 foreach my $i (0..$#new) {
465 # don't combine identical fields
466 next if $new[$i] eq $newend[$i];
467 $new[$i] .= '/' . $newend[$i];
472 if (scalar @new == 0) {
473 # There was no suitable publication pattern defined,
474 # so use the $w frequency to figure out the next date
475 if (!defined($freq)) {
476 carp "Undefined frequency in next_chron!";
477 } elsif (!MFHD::Date::can_increment($freq)) {
478 croak "Don't know how to deal with frequency '$freq'!";
480 # One of the standard defined issue frequencies
481 @new = MFHD::Date::incr_date($freq, @cur);
483 while ($self->is_omitted(@new)) {
484 @new = MFHD::Date::incr_date($freq, @new);
487 if ($self->is_combined(@new)) {
488 my @second_date = MFHD::Date::incr_date($freq, @new);
489 foreach my $i (0..$#new) {
490 # don't combine identical fields
491 next if $new[$i] eq $second_date[$i];
492 $new[$i] .= '/' . $second_date[$i];
498 for my $i (0..$#new) {
499 $next->{$keys[$i]} = $new[$i];
502 # Figure out if we need to adjust volume number
504 # If we are incrementing based on date, $carry doesn't
505 # matter: we're not going to increment the v. number twice
507 # It is conceivable that a serial could increment based on date for some
508 # volumes and issue numbering for other volumes, but until a real case
509 # comes up, let's assume that defined calendar changes always trump $u
510 if (defined $pattern->{x}) {
511 my $increment = $self->calendar_increment(\@cur, \@new);
512 # if we hit a calendar change, restart dependant restarters
513 # regardless of whether they thought they should
515 $next->{a} += $increment;
516 foreach my $key ('b'..'f') {
517 next if !exists $next->{$key};
518 my $cap = $self->capfield($key);
519 if ($cap->{RESTART}) {
521 if ($self->enum_is_combined($key, $next->{$key})) {
522 $next->{$key} .= '/' . ($next->{$key} + 1);
525 last; # if we find a non-restarting level, stop
530 $next->{a} += $carry;
534 sub winter_starts_year {
537 my $pubpats = $self->{_mfhdc_PATTERN}->{y}->{p};
538 my $freq = $self->{_mfhdc_PATTERN}->{w};
540 if ($freq =~ /^\d$/) {
541 foreach my $pubpat (@$pubpats) {
542 my $chroncode = substr($pubpat, 0, 1);
543 if ($chroncode eq 's') {
544 # check first instance only
545 if (substr($pubpat, 1, 2) == 24) {
561 # First handle any "alternative enumeration", since they're
562 # a lot simpler, and don't depend on the the calendar
563 foreach my $key ('h', 'g') {
564 next if !exists $next->{$key};
565 if (!$self->capstr($key)) {
566 warn "Holding data exists for $key, but no caption specified";
571 my $cap = $self->capfield($key);
574 && ($next->{$key} == $cap->{COUNT})) {
583 # Check caption for $ype subfield, specifying that there's a
584 # particular publication pattern for the given level of enumeration
585 # returns the pattern string or undef
590 return undef if !exists $self->{_mfhdc_PATTERN}->{y}->{p};
592 foreach my $reg (@{$self->{_mfhdc_PATTERN}->{y}->{p}}) {
593 if ($reg =~ m/^e$level/o) {
594 return substr($reg, 2);
605 # $carry keeps track of whether we need to carry into the next
606 # higher level of enumeration. It's not actually necessary except
607 # for when the loop ends: if we need to carry from $b into $a
608 # then $carry will be set when the loop ends.
610 # We need to keep track of this because there are two different
611 # reasons why we might increment the highest level of enumeration ($a)
612 # 1) we hit the correct number of items in $b (ie, 5th iss of quarterly)
613 # 2) it's the right time of the year.
616 # If there's a subfield b, then we will go through the loop at
617 # least once. If there's no subfield b, then there's only a single
618 # level of enumeration, so we just add one to it and we're done.
619 if (exists $next->{b}) {
624 foreach my $key (reverse('b'..'f')) {
628 next if !exists $next->{$key};
630 # If the current issue has a combined issue number (eg, 2/3)
631 # get rid of the first issue number and base the calculation
632 # on the final issue number in the combined issue.
633 if ($next->{$key} =~ m|/|) {
634 $next->{$key} =~ s|^[^/]+/||;
637 $level = ord($key) - ord('a') + 1; # enumeration level
639 $pubpat = $self->enum_pubpat($level);
642 # printf("# next_enum: found pubpat '%s' for subfield '%s'\n",
644 my @pats = split(/,/, $pubpat);
646 # If we fall out the bottom of the loop, then $carry
647 # will still be 1, and we will reset the current
648 # level to the first value in @pats and increment
649 # then next higher level.
652 foreach my $pat (@pats) {
653 my $combined = $pat =~ m|/|;
656 # printf("# next_enum: checking current '%s' against pat '%s'\n",
657 # $next->{$key}, $pat);
660 ($pat, $end) = split('/', $pat, 2);
665 if ($pat > $next->{$key}) {
667 $next->{$key} = $pat;
668 $next->{$key} .= '/' . $end if $end;
669 # printf("# next_enum: selecting new issue no. %s\n", $next->{$key});
670 last; # We've found the correct next issue number
674 $next->{$key} = $pats[0];
676 last; # exit the top level loop because we're done
680 # No enumeration publication pattern specified for this level,
681 # just keep adding one.
683 if (!$self->capstr($key)) {
684 # Just assume that it increments continuously and give up
685 warn "Holding data exists for $key, but no caption specified";
691 # printf("# next_enum: no publication pattern, using frequency\n");
693 my $cap = $self->capfield($key);
696 && ($next->{$key} eq $cap->{COUNT})) {
699 } elsif ($cap->{COUNT} > 0 and !($next->{$key} % $cap->{COUNT})) {
700 # If we have a non-restarting enum, but we define a count,
701 # we need to carry to the next level when the current value
702 # divides evenly by the count
703 # XXX: this code naively assumes that there has never been an
704 # issue number anomaly of any kind (like an extra issue), but this
705 # limit is inherent in the standard
709 # If I don't need to "carry" beyond here, then I just increment
710 # this level of the enumeration and stop looping, since the
711 # "next" hash has been initialized with the current values
717 # You can't have a combined issue that spans two volumes: no.12/1
719 if ($self->enum_is_combined($key, $next->{$key})) {
720 $next->{$key} .= '/' . ($next->{$key} + 1);
727 # The easy part is done. There are two things left to do:
728 # 1) Calculate the date of the next issue, if necessary
729 # 2) Increment the highest level of enumeration (either by date
730 # or because $carry is set because of the above loop
732 if (!$self->subfield('i') || !$next->{i}) {
733 # The simple case: if there is no chronology specified
734 # then just check $carry and return
735 $next->{'a'} += $carry;
737 # Figure out date of next issue, then decide if we need
738 # to adjust top level enumeration based on that
739 $self->next_chron($next, $carry, ('i'..'m'));
748 # If the holding is compressed and not open ended, base next() on the
749 # closing date. If the holding is open-ended, next() is undefined
751 if ($holding->is_compressed) {
752 return undef if $holding->is_open_ended;
753 # TODO: error on next for open-ended holdings?
759 # Initialize $next with current enumeration & chronology, then
760 # we can just operate on $next, based on the contents of the caption
761 foreach my $key ('a'..'m') {
762 my $holding_values = $holding->field_values($key);
763 $next->{$key} = ${$holding_values}[$index] if defined $holding_values;
766 if ($self->enumeration_is_chronology) {
767 $self->next_chron($next, 0, ('a'..'h'));
771 if (exists $next->{'h'}) {
772 $self->next_alt_enum($next);
775 $self->next_enum($next);
780 # return a simple subfields list
785 foreach my $subfield ($self->subfields) {
786 push(@subfields, $subfield->[0], $subfield->[1]);