package hook

import (
	"bufio"
	"bytes"
	"context"
	"crypto/sha1"
	"fmt"
	"io"
	"strings"

	"gitlab.com/gitlab-org/gitaly/v18/internal/git"
	"gitlab.com/gitlab-org/gitaly/v18/internal/git/gitcmd"
	"gitlab.com/gitlab-org/gitaly/v18/internal/gitaly/storage"
	"gitlab.com/gitlab-org/gitaly/v18/internal/transaction/voting"
)

// ReferenceTransactionHook captures all reference updates taking place in the
// repository.
//
// When using praefect, this allows us to vote on reference updates and ensure
// that all nodes are in sync.
//
// When using WAL, this allows us to capture all the reference updates that take
// place in a transaction and add it to the transaction.
//
// We only capture regular reference updates, and updates to the default branch.
// Any other reference updates are ignored.
func (m *GitLabHookManager) ReferenceTransactionHook(ctx context.Context, state ReferenceTransactionState, env []string, stdin io.Reader) error {
	payload, err := gitcmd.HooksPayloadFromEnv(env)
	if err != nil {
		return fmt.Errorf("extracting hooks payload: %w", err)
	}

	objectHash, err := git.ObjectHashByFormat(payload.ObjectFormat)
	if err != nil {
		return fmt.Errorf("looking up object hash: %w", err)
	}

	changes, err := io.ReadAll(stdin)
	if err != nil {
		return fmt.Errorf("reading stdin from request: %w", err)
	}

	var tx storage.Transaction
	if payload.TransactionID > 0 {
		tx, err = m.txRegistry.Get(payload.TransactionID)
		if err != nil {
			return fmt.Errorf("get transaction: %w", err)
		}
	}

	var phase voting.Phase
	switch state {
	// We're voting in prepared state as this is the only stage in Git's reference transaction
	// which allows us to abort the transaction.
	case ReferenceTransactionPrepared:
		phase = voting.Prepared

		if tx != nil {
			updates, err := parseChanges(objectHash, bytes.NewReader(changes))
			if err != nil {
				return fmt.Errorf("parse changes: %w", err)
			}

			initialValues := map[git.ReferenceName]git.Reference{}
			for reference, update := range updates {
				if update.OldOID != "" {
					initialValues[reference] = git.NewReference(reference, update.OldOID)
				} else {
					initialValues[reference] = git.NewSymbolicReference(reference, update.OldTarget)
				}
			}

			// Only record the initial values of the reference in the prepare step as this
			// change hasn't yet been committed.
			if err := tx.RecordInitialReferenceValues(ctx, initialValues); err != nil {
				return fmt.Errorf("record initial reference value: %w", err)
			}
		}
	// We're also voting in committed state to tell Praefect we've actually persisted the
	// changes. This is necessary as some RPCs fail return errors in the response body rather
	// than as an error code. Praefect can't tell if these RPCs have failed. Voting on committed
	// ensure Praefect sees either a missing vote or that the RPC did commit the changes.
	case ReferenceTransactionCommitted:
		phase = voting.Committed

		if tx != nil {
			updates, err := parseChanges(objectHash, bytes.NewReader(changes))
			if err != nil {
				return fmt.Errorf("parse changes: %w", err)
			}

			if err := tx.UpdateReferences(ctx, updates); err != nil {
				return fmt.Errorf("update references: %w", err)
			}
		}
	default:
		return nil
	}

	// When deleting references, git has to delete them both in the packed-refs backend as well
	// as any loose refs -- if only the loose ref was deleted, it would potentially unshadow the
	// value contained in the packed-refs file and vice versa. As a result, git will create two
	// transactions when any ref exists in both backends: one session to force-delete all
	// existing refs in the packed-refs backend, and then one transaction to update all loose
	// refs. This is problematic for us, as our voting logic now requires all nodes to have the
	// same packed state, which we do not and cannot guarantee.
	//
	// We're lucky though and can fix this quite easily: git only needs to cope with unshadowing
	// refs when deleting loose refs, so it will only ever _delete_ refs from the packed-refs
	// backend and never _update_ any refs. And if such a force-deletion happens, the same
	// deletion will also get queued to the loose backend no matter whether the loose ref exists
	// or not given that it must be locked during the whole transaction. As such, we can easily
	// recognize those packed-refs cleanups: all queued ref updates are force deletions.
	//
	// The workaround is thus clear: we simply do not cast a vote on any reference transaction
	// which consists only of force-deletions -- the vote will instead only happen on the loose
	// backend transaction, which contains the full record of all refs which are to be updated.
	if isForceDeletionsOnly(objectHash, bytes.NewReader(changes)) {
		return nil
	}

	hash := sha1.Sum(changes)

	if err := m.voteOnTransaction(ctx, hash, phase, payload); err != nil {
		return fmt.Errorf("error voting on transaction: %w", err)
	}

	return nil
}

// parseChanges parses the changes from the reader. We only consider and parse certain refs:
// 1. All regular refs which have a 'refs/' prefix.
// 2. The 'HEAD' root ref (used to track default branch changes in reftables).
// No other refs will be parsed.
// See the documentation of the reference-transaction hook for details on the format:
// https://git-scm.com/docs/githooks#_reference_transaction
func parseChanges(objectHash git.ObjectHash, changes io.Reader) (git.ReferenceUpdates, error) {
	scanner := bufio.NewScanner(changes)

	updates := git.ReferenceUpdates{}
	for scanner.Scan() {
		line := scanner.Text()
		components := strings.Split(line, " ")
		if len(components) != 3 {
			return nil, fmt.Errorf("unexpected change line: %q", line)
		}

		reference := git.ReferenceName(components[2])

		// We only track updates made in the 'refs/' directory.
		// An exception to this is when HEAD is updated.
		if !strings.HasPrefix(reference.String(), "refs/") && reference.String() != "HEAD" {
			continue
		}

		update := git.ReferenceUpdate{}

		var err error

		if _, target, ok := strings.Cut(components[0], "ref:"); ok {
			update.OldTarget = git.ReferenceName(target)
		} else {
			update.OldOID, err = objectHash.FromHex(components[0])
			if err != nil {
				return nil, fmt.Errorf("parse old: %w", err)
			}
		}

		if _, target, ok := strings.Cut(components[1], "ref:"); ok {
			update.NewTarget = git.ReferenceName(target)
		} else {
			update.NewOID, err = objectHash.FromHex(components[1])
			if err != nil {
				return nil, fmt.Errorf("parse new: %w", err)
			}
		}

		// Only capture default branch changes and ignore all other symbolic reference updates.
		if reference.String() != "HEAD" && (update.NewTarget != "" || update.OldTarget != "") {
			continue
		}

		// If the default branch is being deleted, we ignore updating HEAD
		//
		// TODO: This can be removed once the bug in Git itself is fixed
		// https://gitlab.com/gitlab-org/git/-/issues/348
		if reference.String() == "HEAD" && update.NewTarget == "" {
			continue
		}

		updates[reference] = update
	}

	return updates, nil
}

// isForceDeletionsOnly determines whether the given changes only consist of force-deletions.
func isForceDeletionsOnly(objectHash git.ObjectHash, changes io.Reader) bool {
	// forceDeletionPrefix is the prefix of a queued reference transaction which deletes a
	// reference without checking its current value.
	forceDeletionPrefix := fmt.Sprintf("%[1]s %[1]s ", objectHash.ZeroOID)

	scanner := bufio.NewScanner(changes)

	for scanner.Scan() {
		line := scanner.Bytes()

		if bytes.HasPrefix(line, []byte(forceDeletionPrefix)) {
			continue
		}

		return false
	}

	return true
}
