DfsPackCompactor.java

  1. /*
  2.  * Copyright (C) 2011, Google Inc. and others
  3.  *
  4.  * This program and the accompanying materials are made available under the
  5.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  6.  * https://www.eclipse.org/org/documents/edl-v10.php.
  7.  *
  8.  * SPDX-License-Identifier: BSD-3-Clause
  9.  */

  10. package org.eclipse.jgit.internal.storage.dfs;

  11. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
  12. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
  13. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  14. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  15. import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
  16. import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;

  17. import java.io.IOException;
  18. import java.util.ArrayList;
  19. import java.util.Collection;
  20. import java.util.Collections;
  21. import java.util.Comparator;
  22. import java.util.HashSet;
  23. import java.util.Iterator;
  24. import java.util.List;
  25. import java.util.Set;

  26. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  27. import org.eclipse.jgit.internal.JGitText;
  28. import org.eclipse.jgit.internal.storage.file.PackIndex;
  29. import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
  30. import org.eclipse.jgit.internal.storage.pack.PackWriter;
  31. import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
  32. import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
  33. import org.eclipse.jgit.lib.AnyObjectId;
  34. import org.eclipse.jgit.lib.NullProgressMonitor;
  35. import org.eclipse.jgit.lib.ObjectId;
  36. import org.eclipse.jgit.lib.ObjectIdSet;
  37. import org.eclipse.jgit.lib.ProgressMonitor;
  38. import org.eclipse.jgit.revwalk.RevFlag;
  39. import org.eclipse.jgit.revwalk.RevObject;
  40. import org.eclipse.jgit.revwalk.RevWalk;
  41. import org.eclipse.jgit.storage.pack.PackConfig;
  42. import org.eclipse.jgit.storage.pack.PackStatistics;
  43. import org.eclipse.jgit.util.BlockList;
  44. import org.eclipse.jgit.util.io.CountingOutputStream;

  45. /**
  46.  * Combine several pack files into one pack.
  47.  * <p>
  48.  * The compactor combines several pack files together by including all objects
  49.  * contained in each pack file into the same output pack. If an object appears
  50.  * multiple times, it is only included once in the result. Because the new pack
  51.  * is constructed by enumerating the indexes of the source packs, it is quicker
  52.  * than doing a full repack of the repository, however the result is not nearly
  53.  * as space efficient as new delta compression is disabled.
  54.  * <p>
  55.  * This method is suitable for quickly combining several packs together after
  56.  * receiving a number of small fetch or push operations into a repository,
  57.  * allowing the system to maintain reasonable read performance without expending
  58.  * a lot of time repacking the entire repository.
  59.  */
  60. public class DfsPackCompactor {
  61.     private final DfsRepository repo;
  62.     private final List<DfsPackFile> srcPacks;
  63.     private final List<DfsReftable> srcReftables;
  64.     private final List<ObjectIdSet> exclude;

  65.     private PackStatistics newStats;
  66.     private DfsPackDescription outDesc;

  67.     private int autoAddSize;
  68.     private ReftableConfig reftableConfig;

  69.     private RevWalk rw;
  70.     private RevFlag added;
  71.     private RevFlag isBase;

  72.     /**
  73.      * Initialize a pack compactor.
  74.      *
  75.      * @param repository
  76.      *            repository objects to be packed will be read from.
  77.      */
  78.     public DfsPackCompactor(DfsRepository repository) {
  79.         repo = repository;
  80.         autoAddSize = 5 * 1024 * 1024; // 5 MiB
  81.         srcPacks = new ArrayList<>();
  82.         srcReftables = new ArrayList<>();
  83.         exclude = new ArrayList<>(4);
  84.     }

  85.     /**
  86.      * Set configuration to write a reftable.
  87.      *
  88.      * @param cfg
  89.      *            configuration to write a reftable. Reftable compacting is
  90.      *            disabled (default) when {@code cfg} is {@code null}.
  91.      * @return {@code this}
  92.      */
  93.     public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
  94.         reftableConfig = cfg;
  95.         return this;
  96.     }

  97.     /**
  98.      * Add a pack to be compacted.
  99.      * <p>
  100.      * All of the objects in this pack will be copied into the resulting pack.
  101.      * The resulting pack will order objects according to the source pack's own
  102.      * description ordering (which is based on creation date), and then by the
  103.      * order the objects appear in the source pack.
  104.      *
  105.      * @param pack
  106.      *            a pack to combine into the resulting pack.
  107.      * @return {@code this}
  108.      */
  109.     public DfsPackCompactor add(DfsPackFile pack) {
  110.         srcPacks.add(pack);
  111.         return this;
  112.     }

  113.     /**
  114.      * Add a reftable to be compacted.
  115.      *
  116.      * @param table
  117.      *            a reftable to combine.
  118.      * @return {@code this}
  119.      */
  120.     public DfsPackCompactor add(DfsReftable table) {
  121.         srcReftables.add(table);
  122.         return this;
  123.     }

  124.     /**
  125.      * Automatically select pack and reftables to be included, and add them.
  126.      * <p>
  127.      * Packs are selected based on size, smaller packs get included while bigger
  128.      * ones are omitted.
  129.      *
  130.      * @return {@code this}
  131.      * @throws java.io.IOException
  132.      *             existing packs cannot be read.
  133.      */
  134.     public DfsPackCompactor autoAdd() throws IOException {
  135.         DfsObjDatabase objdb = repo.getObjectDatabase();
  136.         for (DfsPackFile pack : objdb.getPacks()) {
  137.             DfsPackDescription d = pack.getPackDescription();
  138.             if (d.getFileSize(PACK) < autoAddSize)
  139.                 add(pack);
  140.             else
  141.                 exclude(pack);
  142.         }

  143.         if (reftableConfig != null) {
  144.             for (DfsReftable table : objdb.getReftables()) {
  145.                 DfsPackDescription d = table.getPackDescription();
  146.                 if (d.getPackSource() != GC
  147.                         && d.getFileSize(REFTABLE) < autoAddSize) {
  148.                     add(table);
  149.                 }
  150.             }
  151.         }
  152.         return this;
  153.     }

  154.     /**
  155.      * Exclude objects from the compacted pack.
  156.      *
  157.      * @param set
  158.      *            objects to not include.
  159.      * @return {@code this}.
  160.      */
  161.     public DfsPackCompactor exclude(ObjectIdSet set) {
  162.         exclude.add(set);
  163.         return this;
  164.     }

  165.     /**
  166.      * Exclude objects from the compacted pack.
  167.      *
  168.      * @param pack
  169.      *            objects to not include.
  170.      * @return {@code this}.
  171.      * @throws java.io.IOException
  172.      *             pack index cannot be loaded.
  173.      */
  174.     public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
  175.         final PackIndex idx;
  176.         try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
  177.             idx = pack.getPackIndex(ctx);
  178.         }
  179.         return exclude(idx);
  180.     }

  181.     /**
  182.      * Compact the pack files together.
  183.      *
  184.      * @param pm
  185.      *            progress monitor to receive updates on as packing may take a
  186.      *            while, depending on the size of the repository.
  187.      * @throws java.io.IOException
  188.      *             the packs cannot be compacted.
  189.      */
  190.     public void compact(ProgressMonitor pm) throws IOException {
  191.         if (pm == null) {
  192.             pm = NullProgressMonitor.INSTANCE;
  193.         }

  194.         DfsObjDatabase objdb = repo.getObjectDatabase();
  195.         try (DfsReader ctx = objdb.newReader()) {
  196.             if (reftableConfig != null && !srcReftables.isEmpty()) {
  197.                 compactReftables(ctx);
  198.             }
  199.             compactPacks(ctx, pm);

  200.             List<DfsPackDescription> commit = getNewPacks();
  201.             Collection<DfsPackDescription> remove = toPrune();
  202.             if (!commit.isEmpty() || !remove.isEmpty()) {
  203.                 objdb.commitPack(commit, remove);
  204.             }
  205.         } finally {
  206.             rw = null;
  207.         }
  208.     }

  209.     private void compactPacks(DfsReader ctx, ProgressMonitor pm)
  210.             throws IOException, IncorrectObjectTypeException {
  211.         DfsObjDatabase objdb = repo.getObjectDatabase();
  212.         PackConfig pc = new PackConfig(repo);
  213.         pc.setIndexVersion(2);
  214.         pc.setDeltaCompress(false);
  215.         pc.setReuseDeltas(true);
  216.         pc.setReuseObjects(true);

  217.         try (PackWriter pw = new PackWriter(pc, ctx)) {
  218.             pw.setDeltaBaseAsOffset(true);
  219.             pw.setReuseDeltaCommits(false);

  220.             addObjectsToPack(pw, ctx, pm);
  221.             if (pw.getObjectCount() == 0) {
  222.                 return;
  223.             }

  224.             boolean rollback = true;
  225.             initOutDesc(objdb);
  226.             try {
  227.                 writePack(objdb, outDesc, pw, pm);
  228.                 writeIndex(objdb, outDesc, pw);

  229.                 PackStatistics stats = pw.getStatistics();

  230.                 outDesc.setPackStats(stats);
  231.                 newStats = stats;
  232.                 rollback = false;
  233.             } finally {
  234.                 if (rollback) {
  235.                     objdb.rollbackPack(Collections.singletonList(outDesc));
  236.                 }
  237.             }
  238.         }
  239.     }

  240.     private long estimatePackSize() {
  241.         // Every pack file contains 12 bytes of header and 20 bytes of trailer.
  242.         // Include the final pack file header and trailer size here and ignore
  243.         // the same from individual pack files.
  244.         long size = 32;
  245.         for (DfsPackFile pack : srcPacks) {
  246.             size += pack.getPackDescription().getFileSize(PACK) - 32;
  247.         }
  248.         return size;
  249.     }

  250.     private void compactReftables(DfsReader ctx) throws IOException {
  251.         DfsObjDatabase objdb = repo.getObjectDatabase();
  252.         Collections.sort(srcReftables, objdb.reftableComparator());

  253.         initOutDesc(objdb);
  254.         try (DfsReftableStack stack = DfsReftableStack.open(ctx, srcReftables);
  255.              DfsOutputStream out = objdb.writeFile(outDesc, REFTABLE)) {
  256.             ReftableCompactor compact = new ReftableCompactor(out);
  257.             compact.addAll(stack.readers());
  258.             compact.setIncludeDeletes(true);
  259.             compact.setConfig(configureReftable(reftableConfig, out));
  260.             compact.compact();
  261.             outDesc.addFileExt(REFTABLE);
  262.             outDesc.setReftableStats(compact.getStats());
  263.         }
  264.     }

  265.     private void initOutDesc(DfsObjDatabase objdb) throws IOException {
  266.         if (outDesc == null) {
  267.             outDesc = objdb.newPack(COMPACT, estimatePackSize());
  268.         }
  269.     }

  270.     /**
  271.      * Get all of the source packs that fed into this compaction.
  272.      *
  273.      * @return all of the source packs that fed into this compaction.
  274.      */
  275.     public Collection<DfsPackDescription> getSourcePacks() {
  276.         Set<DfsPackDescription> src = new HashSet<>();
  277.         for (DfsPackFile pack : srcPacks) {
  278.             src.add(pack.getPackDescription());
  279.         }
  280.         for (DfsReftable table : srcReftables) {
  281.             src.add(table.getPackDescription());
  282.         }
  283.         return src;
  284.     }

  285.     /**
  286.      * Get new packs created by this compaction.
  287.      *
  288.      * @return new packs created by this compaction.
  289.      */
  290.     public List<DfsPackDescription> getNewPacks() {
  291.         return outDesc != null
  292.                 ? Collections.singletonList(outDesc)
  293.                 : Collections.emptyList();
  294.     }

  295.     /**
  296.      * Get statistics corresponding to the {@link #getNewPacks()}.
  297.      * May be null if statistics are not available.
  298.      *
  299.      * @return statistics corresponding to the {@link #getNewPacks()}.
  300.      *
  301.      */
  302.     public List<PackStatistics> getNewPackStatistics() {
  303.         return outDesc != null
  304.                 ? Collections.singletonList(newStats)
  305.                 : Collections.emptyList();
  306.     }

  307.     private Collection<DfsPackDescription> toPrune() {
  308.         Set<DfsPackDescription> packs = new HashSet<>();
  309.         for (DfsPackFile pack : srcPacks) {
  310.             packs.add(pack.getPackDescription());
  311.         }

  312.         Set<DfsPackDescription> reftables = new HashSet<>();
  313.         for (DfsReftable table : srcReftables) {
  314.             reftables.add(table.getPackDescription());
  315.         }

  316.         for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
  317.             DfsPackDescription d = i.next();
  318.             if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
  319.                 i.remove();
  320.             }
  321.         }

  322.         for (Iterator<DfsPackDescription> i = reftables.iterator();
  323.                 i.hasNext();) {
  324.             DfsPackDescription d = i.next();
  325.             if (d.hasFileExt(PACK) && !packs.contains(d)) {
  326.                 i.remove();
  327.             }
  328.         }

  329.         Set<DfsPackDescription> toPrune = new HashSet<>();
  330.         toPrune.addAll(packs);
  331.         toPrune.addAll(reftables);
  332.         return toPrune;
  333.     }

  334.     private void addObjectsToPack(PackWriter pw, DfsReader ctx,
  335.             ProgressMonitor pm) throws IOException,
  336.             IncorrectObjectTypeException {
  337.         // Sort packs by description ordering, this places newer packs before
  338.         // older packs, allowing the PackWriter to be handed newer objects
  339.         // first and older objects last.
  340.         Collections.sort(
  341.                 srcPacks,
  342.                 Comparator.comparing(
  343.                         DfsPackFile::getPackDescription,
  344.                         DfsPackDescription.objectLookupComparator()));

  345.         rw = new RevWalk(ctx);
  346.         added = rw.newFlag("ADDED"); //$NON-NLS-1$
  347.         isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
  348.         List<RevObject> baseObjects = new BlockList<>();

  349.         pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
  350.         for (DfsPackFile src : srcPacks) {
  351.             List<ObjectIdWithOffset> want = toInclude(src, ctx);
  352.             if (want.isEmpty())
  353.                 continue;

  354.             PackReverseIndex rev = src.getReverseIdx(ctx);
  355.             DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
  356.             for (ObjectIdWithOffset id : want) {
  357.                 int type = src.getObjectType(ctx, id.offset);
  358.                 RevObject obj = rw.lookupAny(id, type);
  359.                 if (obj.has(added))
  360.                     continue;

  361.                 pm.update(1);
  362.                 pw.addObject(obj);
  363.                 obj.add(added);

  364.                 src.representation(rep, id.offset, ctx, rev);
  365.                 if (rep.getFormat() != PACK_DELTA)
  366.                     continue;

  367.                 RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
  368.                 if (!base.has(added) && !base.has(isBase)) {
  369.                     baseObjects.add(base);
  370.                     base.add(isBase);
  371.                 }
  372.             }
  373.         }
  374.         for (RevObject obj : baseObjects) {
  375.             if (!obj.has(added)) {
  376.                 pm.update(1);
  377.                 pw.addObject(obj);
  378.                 obj.add(added);
  379.             }
  380.         }
  381.         pm.endTask();
  382.     }

  383.     private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
  384.             throws IOException {
  385.         PackIndex srcIdx = src.getPackIndex(ctx);
  386.         List<ObjectIdWithOffset> want = new BlockList<>(
  387.                 (int) srcIdx.getObjectCount());
  388.         SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
  389.             ObjectId id = ent.toObjectId();
  390.             RevObject obj = rw.lookupOrNull(id);
  391.             if (obj != null && (obj.has(added) || obj.has(isBase)))
  392.                 continue;
  393.             for (ObjectIdSet e : exclude)
  394.                 if (e.contains(id))
  395.                     continue SCAN;
  396.             want.add(new ObjectIdWithOffset(id, ent.getOffset()));
  397.         }
  398.         Collections.sort(want, (ObjectIdWithOffset a,
  399.                 ObjectIdWithOffset b) -> Long.signum(a.offset - b.offset));
  400.         return want;
  401.     }

  402.     private static void writePack(DfsObjDatabase objdb,
  403.             DfsPackDescription pack,
  404.             PackWriter pw, ProgressMonitor pm) throws IOException {
  405.         try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
  406.             pw.writePack(pm, pm, out);
  407.             pack.addFileExt(PACK);
  408.             pack.setBlockSize(PACK, out.blockSize());
  409.         }
  410.     }

  411.     private static void writeIndex(DfsObjDatabase objdb,
  412.             DfsPackDescription pack,
  413.             PackWriter pw) throws IOException {
  414.         try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
  415.             CountingOutputStream cnt = new CountingOutputStream(out);
  416.             pw.writeIndex(cnt);
  417.             pack.addFileExt(INDEX);
  418.             pack.setFileSize(INDEX, cnt.getCount());
  419.             pack.setBlockSize(INDEX, out.blockSize());
  420.             pack.setIndexVersion(pw.getIndexVersion());
  421.         }
  422.     }

  423.     static ReftableConfig configureReftable(ReftableConfig cfg,
  424.             DfsOutputStream out) {
  425.         int bs = out.blockSize();
  426.         if (bs > 0) {
  427.             cfg = new ReftableConfig(cfg);
  428.             cfg.setRefBlockSize(bs);
  429.             cfg.setAlignBlocks(true);
  430.         }
  431.         return cfg;
  432.     }

  433.     private static class ObjectIdWithOffset extends ObjectId {
  434.         final long offset;

  435.         ObjectIdWithOffset(AnyObjectId id, long ofs) {
  436.             super(id);
  437.             offset = ofs;
  438.         }
  439.     }
  440. }