DfsObjDatabase.java
- /*
- * Copyright (C) 2011, Google Inc. and others
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Distribution License v. 1.0 which is available at
- * https://www.eclipse.org/org/documents/edl-v10.php.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
- package org.eclipse.jgit.internal.storage.dfs;
- import static java.util.stream.Collectors.joining;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Collection;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import java.util.concurrent.atomic.AtomicReference;
- import org.eclipse.jgit.internal.storage.pack.PackExt;
- import org.eclipse.jgit.lib.AnyObjectId;
- import org.eclipse.jgit.lib.ObjectDatabase;
- import org.eclipse.jgit.lib.ObjectInserter;
- import org.eclipse.jgit.lib.ObjectReader;
- /**
- * Manages objects stored in
- * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage
- * system.
- */
- public abstract class DfsObjDatabase extends ObjectDatabase {
- private static final PackList NO_PACKS = new PackList(
- new DfsPackFile[0],
- new DfsReftable[0]) {
- @Override
- boolean dirty() {
- return true;
- }
- @Override
- void clearDirty() {
- // Always dirty.
- }
- @Override
- public void markDirty() {
- // Always dirty.
- }
- };
- /**
- * Sources for a pack file.
- * <p>
- * <strong>Note:</strong> When sorting packs by source, do not use the default
- * comparator based on {@link Enum#compareTo}. Prefer {@link
- * #DEFAULT_COMPARATOR} or your own {@link ComparatorBuilder}.
- */
- public enum PackSource {
- /** The pack is created by ObjectInserter due to local activity. */
- INSERT,
- /**
- * The pack is created by PackParser due to a network event.
- * <p>
- * A received pack can be from either a push into the repository, or a
- * fetch into the repository, the direction doesn't matter. A received
- * pack was built by the remote Git implementation and may not match the
- * storage layout preferred by this version. Received packs are likely
- * to be either compacted or garbage collected in the future.
- */
- RECEIVE,
- /**
- * The pack was created by compacting multiple packs together.
- * <p>
- * Packs created by compacting multiple packs together aren't nearly as
- * efficient as a fully garbage collected repository, but may save disk
- * space by reducing redundant copies of base objects.
- *
- * @see DfsPackCompactor
- */
- COMPACT,
- /**
- * Pack was created by Git garbage collection by this implementation.
- * <p>
- * This source is only used by the {@link DfsGarbageCollector} when it
- * builds a pack file by traversing the object graph and copying all
- * reachable objects into a new pack stream.
- *
- * @see DfsGarbageCollector
- */
- GC,
- /** Created from non-heads by {@link DfsGarbageCollector}. */
- GC_REST,
- /**
- * Pack was created by Git garbage collection.
- * <p>
- * This pack contains only unreachable garbage that was found during the
- * last GC pass. It is retained in a new pack until it is safe to prune
- * these objects from the repository.
- */
- UNREACHABLE_GARBAGE;
- /**
- * Default comparator for sources.
- * <p>
- * Sorts generally newer, smaller types such as {@code INSERT} and {@code
- * RECEIVE} earlier; older, larger types such as {@code GC} later; and
- * {@code UNREACHABLE_GARBAGE} at the end.
- */
- public static final Comparator<PackSource> DEFAULT_COMPARATOR =
- new ComparatorBuilder()
- .add(INSERT, RECEIVE)
- .add(COMPACT)
- .add(GC)
- .add(GC_REST)
- .add(UNREACHABLE_GARBAGE)
- .build();
- /**
- * Builder for describing {@link PackSource} ordering where some values are
- * explicitly considered equal to others.
- */
- public static class ComparatorBuilder {
- private final Map<PackSource, Integer> ranks = new HashMap<>();
- private int counter;
- /**
- * Add a collection of sources that should sort as equal.
- * <p>
- * Sources in the input will sort after sources listed in previous calls
- * to this method.
- *
- * @param sources
- * sources in this equivalence class.
- * @return this.
- */
- public ComparatorBuilder add(PackSource... sources) {
- for (PackSource s : sources) {
- ranks.put(s, Integer.valueOf(counter));
- }
- counter++;
- return this;
- }
- /**
- * Build the comparator.
- *
- * @return new comparator instance.
- * @throws IllegalArgumentException
- * not all {@link PackSource} instances were explicitly assigned
- * an equivalence class.
- */
- public Comparator<PackSource> build() {
- return new PackSourceComparator(ranks);
- }
- }
- private static class PackSourceComparator implements Comparator<PackSource> {
- private final Map<PackSource, Integer> ranks;
- private PackSourceComparator(Map<PackSource, Integer> ranks) {
- if (!ranks.keySet().equals(
- new HashSet<>(Arrays.asList(PackSource.values())))) {
- throw new IllegalArgumentException();
- }
- this.ranks = new HashMap<>(ranks);
- }
- @Override
- public int compare(PackSource a, PackSource b) {
- return ranks.get(a).compareTo(ranks.get(b));
- }
- @Override
- public String toString() {
- return Arrays.stream(PackSource.values())
- .map(s -> s + "=" + ranks.get(s)) //$NON-NLS-1$
- .collect(joining(", ", getClass().getSimpleName() + "{", "}")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
- }
- }
- }
- private final AtomicReference<PackList> packList;
- private final DfsRepository repository;
- private DfsReaderOptions readerOptions;
- private Comparator<DfsPackDescription> packComparator;
- /**
- * Initialize an object database for our repository.
- *
- * @param repository
- * repository owning this object database.
- * @param options
- * how readers should access the object database.
- */
- protected DfsObjDatabase(DfsRepository repository,
- DfsReaderOptions options) {
- this.repository = repository;
- this.packList = new AtomicReference<>(NO_PACKS);
- this.readerOptions = options;
- this.packComparator = DfsPackDescription.objectLookupComparator();
- }
- /**
- * Get configured reader options, such as read-ahead.
- *
- * @return configured reader options, such as read-ahead.
- */
- public DfsReaderOptions getReaderOptions() {
- return readerOptions;
- }
- /**
- * Set the comparator used when searching for objects across packs.
- * <p>
- * An optimal comparator will find more objects without having to load large
- * idx files from storage only to find that they don't contain the object.
- * See {@link DfsPackDescription#objectLookupComparator()} for the default
- * heuristics.
- *
- * @param packComparator
- * comparator.
- */
- public void setPackComparator(Comparator<DfsPackDescription> packComparator) {
- this.packComparator = packComparator;
- }
- /** {@inheritDoc} */
- @Override
- public DfsReader newReader() {
- return new DfsReader(this);
- }
- /** {@inheritDoc} */
- @Override
- public ObjectInserter newInserter() {
- return new DfsInserter(this);
- }
- /**
- * Scan and list all available pack files in the repository.
- *
- * @return list of available packs. The returned array is shared with the
- * implementation and must not be modified by the caller.
- * @throws java.io.IOException
- * the pack list cannot be initialized.
- */
- public DfsPackFile[] getPacks() throws IOException {
- return getPackList().packs;
- }
- /**
- * Scan and list all available reftable files in the repository.
- *
- * @return list of available reftables. The returned array is shared with
- * the implementation and must not be modified by the caller.
- * @throws java.io.IOException
- * the pack list cannot be initialized.
- */
- public DfsReftable[] getReftables() throws IOException {
- return getPackList().reftables;
- }
- /**
- * Scan and list all available pack files in the repository.
- *
- * @return list of available packs, with some additional metadata. The
- * returned array is shared with the implementation and must not be
- * modified by the caller.
- * @throws java.io.IOException
- * the pack list cannot be initialized.
- */
- public PackList getPackList() throws IOException {
- return scanPacks(NO_PACKS);
- }
- /**
- * Get repository owning this object database.
- *
- * @return repository owning this object database.
- */
- protected DfsRepository getRepository() {
- return repository;
- }
- /**
- * List currently known pack files in the repository, without scanning.
- *
- * @return list of available packs. The returned array is shared with the
- * implementation and must not be modified by the caller.
- */
- public DfsPackFile[] getCurrentPacks() {
- return getCurrentPackList().packs;
- }
- /**
- * List currently known reftable files in the repository, without scanning.
- *
- * @return list of available reftables. The returned array is shared with
- * the implementation and must not be modified by the caller.
- */
- public DfsReftable[] getCurrentReftables() {
- return getCurrentPackList().reftables;
- }
- /**
- * List currently known pack files in the repository, without scanning.
- *
- * @return list of available packs, with some additional metadata. The
- * returned array is shared with the implementation and must not be
- * modified by the caller.
- */
- public PackList getCurrentPackList() {
- return packList.get();
- }
- /**
- * Does the requested object exist in this database?
- * <p>
- * This differs from ObjectDatabase's implementation in that we can selectively
- * ignore unreachable (garbage) objects.
- *
- * @param objectId
- * identity of the object to test for existence of.
- * @param avoidUnreachableObjects
- * if true, ignore objects that are unreachable.
- * @return true if the specified object is stored in this database.
- * @throws java.io.IOException
- * the object store cannot be accessed.
- */
- public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
- throws IOException {
- try (ObjectReader or = newReader()) {
- or.setAvoidUnreachableObjects(avoidUnreachableObjects);
- return or.has(objectId);
- }
- }
- /**
- * Generate a new unique name for a pack file.
- *
- * @param source
- * where the pack stream is created.
- * @return a unique name for the pack file. Must not collide with any other
- * pack file name in the same DFS.
- * @throws java.io.IOException
- * a new unique pack description cannot be generated.
- */
- protected abstract DfsPackDescription newPack(PackSource source)
- throws IOException;
- /**
- * Generate a new unique name for a pack file.
- *
- * <p>
- * Default implementation of this method would be equivalent to
- * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the
- * clients can override this method to use the given
- * {@code estomatedPackSize} value more efficiently in the process of
- * creating a new
- * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object.
- *
- * @param source
- * where the pack stream is created.
- * @param estimatedPackSize
- * the estimated size of the pack.
- * @return a unique name for the pack file. Must not collide with any other
- * pack file name in the same DFS.
- * @throws java.io.IOException
- * a new unique pack description cannot be generated.
- */
- protected DfsPackDescription newPack(PackSource source,
- long estimatedPackSize) throws IOException {
- DfsPackDescription pack = newPack(source);
- pack.setEstimatedPackSize(estimatedPackSize);
- return pack;
- }
- /**
- * Commit a pack and index pair that was written to the DFS.
- * <p>
- * Committing the pack/index pair makes them visible to readers. The JGit
- * DFS code always writes the pack, then the index. This allows a simple
- * commit process to do nothing if readers always look for both files to
- * exist and the DFS performs atomic creation of the file (e.g. stream to a
- * temporary file and rename to target on close).
- * <p>
- * During pack compaction or GC the new pack file may be replacing other
- * older files. Implementations should remove those older files (if any) as
- * part of the commit of the new file.
- * <p>
- * This method is a trivial wrapper around
- * {@link #commitPackImpl(Collection, Collection)} that calls the
- * implementation and fires events.
- *
- * @param desc
- * description of the new packs.
- * @param replaces
- * if not null, list of packs to remove.
- * @throws java.io.IOException
- * the packs cannot be committed. On failure a rollback must
- * also be attempted by the caller.
- */
- protected void commitPack(Collection<DfsPackDescription> desc,
- Collection<DfsPackDescription> replaces) throws IOException {
- commitPackImpl(desc, replaces);
- getRepository().fireEvent(new DfsPacksChangedEvent());
- }
- /**
- * Implementation of pack commit.
- *
- * @see #commitPack(Collection, Collection)
- * @param desc
- * description of the new packs.
- * @param replaces
- * if not null, list of packs to remove.
- * @throws java.io.IOException
- * the packs cannot be committed.
- */
- protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
- Collection<DfsPackDescription> replaces) throws IOException;
- /**
- * Try to rollback a pack creation.
- * <p>
- * JGit DFS always writes the pack first, then the index. If the pack does
- * not yet exist, then neither does the index. A safe DFS implementation
- * would try to remove both files to ensure they are really gone.
- * <p>
- * A rollback does not support failures, as it only occurs when there is
- * already a failure in progress. A DFS implementor may wish to log
- * warnings/error messages when a rollback fails, but should not send new
- * exceptions up the Java callstack.
- *
- * @param desc
- * pack to delete.
- */
- protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
- /**
- * List the available pack files.
- * <p>
- * The returned list must support random access and must be mutable by the
- * caller. It is sorted in place using the natural sorting of the returned
- * DfsPackDescription objects.
- *
- * @return available packs. May be empty if there are no packs.
- * @throws java.io.IOException
- * the packs cannot be listed and the object database is not
- * functional to the caller.
- */
- protected abstract List<DfsPackDescription> listPacks() throws IOException;
- /**
- * Open a pack, pack index, or other related file for reading.
- *
- * @param desc
- * description of pack related to the data that will be read.
- * This is an instance previously obtained from
- * {@link #listPacks()}, but not necessarily from the same
- * DfsObjDatabase instance.
- * @param ext
- * file extension that will be read i.e "pack" or "idx".
- * @return channel to read the file.
- * @throws java.io.FileNotFoundException
- * the file does not exist.
- * @throws java.io.IOException
- * the file cannot be opened.
- */
- protected abstract ReadableChannel openFile(
- DfsPackDescription desc, PackExt ext)
- throws FileNotFoundException, IOException;
- /**
- * Open a pack, pack index, or other related file for writing.
- *
- * @param desc
- * description of pack related to the data that will be written.
- * This is an instance previously obtained from
- * {@link #newPack(PackSource)}.
- * @param ext
- * file extension that will be written i.e "pack" or "idx".
- * @return channel to write the file.
- * @throws java.io.IOException
- * the file cannot be opened.
- */
- protected abstract DfsOutputStream writeFile(
- DfsPackDescription desc, PackExt ext) throws IOException;
- void addPack(DfsPackFile newPack) throws IOException {
- PackList o, n;
- do {
- o = packList.get();
- if (o == NO_PACKS) {
- // The repository may not have needed any existing objects to
- // complete the current task of creating a pack (e.g. push of a
- // pack with no external deltas). Because we don't scan for
- // newly added packs on missed object lookups, scan now to
- // make sure all older packs are available in the packList.
- o = scanPacks(o);
- // Its possible the scan identified the pack we were asked to
- // add, as the pack was already committed via commitPack().
- // If this is the case return without changing the list.
- for (DfsPackFile p : o.packs) {
- if (p.key.equals(newPack.key)) {
- return;
- }
- }
- }
- DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
- packs[0] = newPack;
- System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
- n = new PackListImpl(packs, o.reftables);
- } while (!packList.compareAndSet(o, n));
- }
- void addReftable(DfsPackDescription add, Set<DfsPackDescription> remove)
- throws IOException {
- PackList o, n;
- do {
- o = packList.get();
- if (o == NO_PACKS) {
- o = scanPacks(o);
- for (DfsReftable t : o.reftables) {
- if (t.getPackDescription().equals(add)) {
- return;
- }
- }
- }
- List<DfsReftable> tables = new ArrayList<>(1 + o.reftables.length);
- for (DfsReftable t : o.reftables) {
- if (!remove.contains(t.getPackDescription())) {
- tables.add(t);
- }
- }
- tables.add(new DfsReftable(add));
- n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0]));
- } while (!packList.compareAndSet(o, n));
- }
- PackList scanPacks(PackList original) throws IOException {
- PackList o, n;
- synchronized (packList) {
- do {
- o = packList.get();
- if (o != original) {
- // Another thread did the scan for us, while we
- // were blocked on the monitor above.
- //
- return o;
- }
- n = scanPacksImpl(o);
- if (n == o)
- return n;
- } while (!packList.compareAndSet(o, n));
- }
- getRepository().fireEvent(new DfsPacksChangedEvent());
- return n;
- }
- private PackList scanPacksImpl(PackList old) throws IOException {
- DfsBlockCache cache = DfsBlockCache.getInstance();
- Map<DfsPackDescription, DfsPackFile> packs = packMap(old);
- Map<DfsPackDescription, DfsReftable> reftables = reftableMap(old);
- List<DfsPackDescription> scanned = listPacks();
- Collections.sort(scanned, packComparator);
- List<DfsPackFile> newPacks = new ArrayList<>(scanned.size());
- List<DfsReftable> newReftables = new ArrayList<>(scanned.size());
- boolean foundNew = false;
- for (DfsPackDescription dsc : scanned) {
- DfsPackFile oldPack = packs.remove(dsc);
- if (oldPack != null) {
- newPacks.add(oldPack);
- } else if (dsc.hasFileExt(PackExt.PACK)) {
- newPacks.add(new DfsPackFile(cache, dsc));
- foundNew = true;
- }
- DfsReftable oldReftable = reftables.remove(dsc);
- if (oldReftable != null) {
- newReftables.add(oldReftable);
- } else if (dsc.hasFileExt(PackExt.REFTABLE)) {
- newReftables.add(new DfsReftable(cache, dsc));
- foundNew = true;
- }
- }
- if (newPacks.isEmpty() && newReftables.isEmpty())
- return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables);
- if (!foundNew) {
- old.clearDirty();
- return old;
- }
- Collections.sort(newReftables, reftableComparator());
- return new PackListImpl(
- newPacks.toArray(new DfsPackFile[0]),
- newReftables.toArray(new DfsReftable[0]));
- }
- private static Map<DfsPackDescription, DfsPackFile> packMap(PackList old) {
- Map<DfsPackDescription, DfsPackFile> forReuse = new HashMap<>();
- for (DfsPackFile p : old.packs) {
- if (!p.invalid()) {
- forReuse.put(p.desc, p);
- }
- }
- return forReuse;
- }
- private static Map<DfsPackDescription, DfsReftable> reftableMap(PackList old) {
- Map<DfsPackDescription, DfsReftable> forReuse = new HashMap<>();
- for (DfsReftable p : old.reftables) {
- if (!p.invalid()) {
- forReuse.put(p.desc, p);
- }
- }
- return forReuse;
- }
- /**
- * Get comparator to sort {@link DfsReftable} by priority.
- *
- * @return comparator to sort {@link DfsReftable} by priority.
- */
- protected Comparator<DfsReftable> reftableComparator() {
- return Comparator.comparing(
- DfsReftable::getPackDescription,
- DfsPackDescription.reftableComparator());
- }
- /**
- * Clears the cached list of packs, forcing them to be scanned again.
- */
- protected void clearCache() {
- packList.set(NO_PACKS);
- }
- /** {@inheritDoc} */
- @Override
- public void close() {
- packList.set(NO_PACKS);
- }
- /** Snapshot of packs scanned in a single pass. */
- public abstract static class PackList {
- /** All known packs, sorted. */
- public final DfsPackFile[] packs;
- /** All known reftables, sorted. */
- public final DfsReftable[] reftables;
- private long lastModified = -1;
- PackList(DfsPackFile[] packs, DfsReftable[] reftables) {
- this.packs = packs;
- this.reftables = reftables;
- }
- /** @return last modified time of all packs, in milliseconds. */
- public long getLastModified() {
- if (lastModified < 0) {
- long max = 0;
- for (DfsPackFile pack : packs) {
- max = Math.max(max, pack.getPackDescription().getLastModified());
- }
- lastModified = max;
- }
- return lastModified;
- }
- abstract boolean dirty();
- abstract void clearDirty();
- /**
- * Mark pack list as dirty.
- * <p>
- * Used when the caller knows that new data might have been written to the
- * repository that could invalidate open readers depending on this pack list,
- * for example if refs are newly scanned.
- */
- public abstract void markDirty();
- }
- private static final class PackListImpl extends PackList {
- private volatile boolean dirty;
- PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) {
- super(packs, reftables);
- }
- @Override
- boolean dirty() {
- return dirty;
- }
- @Override
- void clearDirty() {
- dirty = false;
- }
- @Override
- public void markDirty() {
- dirty = true;
- }
- }
- }