mirror of
https://github.com/owncloud/ocis
synced 2026-04-25 17:25:21 +02:00
309 lines
8.3 KiB
Go
309 lines
8.3 KiB
Go
package githubextractor
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
apiBase = "https://api.github.com"
|
|
requestDelay = 1 * time.Second
|
|
retryDelay = 2 * time.Second
|
|
maxRetries = 3
|
|
)
|
|
|
|
type Extractor struct {
|
|
client *http.Client
|
|
token string
|
|
}
|
|
|
|
func NewExtractor(client *http.Client, token string) *Extractor {
|
|
return &Extractor{
|
|
client: client,
|
|
token: token,
|
|
}
|
|
}
|
|
|
|
type PRInfo struct {
|
|
Number int `json:"number"`
|
|
Head struct {
|
|
Ref string `json:"ref"`
|
|
SHA string `json:"sha"`
|
|
} `json:"head"`
|
|
Base struct {
|
|
Ref string `json:"ref"`
|
|
} `json:"base"`
|
|
}
|
|
|
|
type PRCommit struct {
|
|
SHA string `json:"sha"`
|
|
Commit struct {
|
|
Author struct {
|
|
Date string `json:"date"`
|
|
} `json:"author"`
|
|
Message string `json:"message"`
|
|
} `json:"commit"`
|
|
}
|
|
|
|
type CommitListItem struct {
|
|
SHA string `json:"sha"`
|
|
HTMLURL string `json:"html_url"`
|
|
Commit struct {
|
|
Author struct {
|
|
Date string `json:"date"`
|
|
} `json:"author"`
|
|
Message string `json:"message"`
|
|
} `json:"commit"`
|
|
}
|
|
|
|
type CombinedStatus struct {
|
|
State string `json:"state"`
|
|
Statuses []StatusContext `json:"statuses"`
|
|
}
|
|
|
|
type StatusContext struct {
|
|
Context string `json:"context"`
|
|
State string `json:"state"`
|
|
TargetURL string `json:"target_url"`
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
func (e *Extractor) GetPRInfo(repo string, prNumber int) (*PRInfo, error) {
|
|
url := fmt.Sprintf("%s/repos/%s/pulls/%d", apiBase, repo, prNumber)
|
|
var prInfo PRInfo
|
|
return &prInfo, e.get(url, &prInfo)
|
|
}
|
|
|
|
func (e *Extractor) GetPRCommits(repo string, prNumber int) ([]PRCommit, error) {
|
|
url := fmt.Sprintf("%s/repos/%s/pulls/%d/commits", apiBase, repo, prNumber)
|
|
var commits []PRCommit
|
|
return commits, e.get(url, &commits)
|
|
}
|
|
|
|
func (e *Extractor) GetCommits(repo, branch string, maxCommits int, since string) ([]CommitListItem, error) {
|
|
var commits []CommitListItem
|
|
page := 1
|
|
|
|
for len(commits) < maxCommits {
|
|
url := fmt.Sprintf("%s/repos/%s/commits?sha=%s&per_page=100&page=%d", apiBase, repo, branch, page)
|
|
if since != "" {
|
|
url += "&since=" + since
|
|
}
|
|
|
|
resp, err := e.request(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var pageCommits []CommitListItem
|
|
if err := json.NewDecoder(resp.Body).Decode(&pageCommits); err != nil {
|
|
resp.Body.Close()
|
|
return nil, err
|
|
}
|
|
resp.Body.Close()
|
|
|
|
if len(pageCommits) == 0 {
|
|
break
|
|
}
|
|
|
|
commits = append(commits, pageCommits...)
|
|
if len(commits) >= maxCommits {
|
|
commits = commits[:maxCommits]
|
|
break
|
|
}
|
|
|
|
page++
|
|
time.Sleep(requestDelay)
|
|
}
|
|
|
|
return commits, nil
|
|
}
|
|
|
|
func (e *Extractor) GetCommitStatus(repo, sha string) (*CombinedStatus, error) {
|
|
url := fmt.Sprintf("%s/repos/%s/commits/%s/status", apiBase, repo, sha)
|
|
var status CombinedStatus
|
|
return &status, e.get(url, &status)
|
|
}
|
|
|
|
// GetPRsForCommit returns PRs associated with a commit (e.g. merged PR that introduced the commit).
|
|
// See https://docs.github.com/en/rest/commits/commits#list-pull-requests-associated-with-a-commit
|
|
func (e *Extractor) GetPRsForCommit(repo, commitSHA string) ([]PRInfo, error) {
|
|
url := fmt.Sprintf("%s/repos/%s/commits/%s/pulls", apiBase, repo, commitSHA)
|
|
var prs []PRInfo
|
|
return prs, e.get(url, &prs)
|
|
}
|
|
|
|
func isRetryableNetworkError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
s := strings.ToLower(err.Error())
|
|
return strings.Contains(s, "broken pipe") ||
|
|
strings.Contains(s, "connection reset") ||
|
|
strings.Contains(s, "connection refused") ||
|
|
strings.Contains(s, "EOF")
|
|
}
|
|
|
|
// get makes an HTTP GET request, decodes JSON response, and handles rate limiting
|
|
func (e *Extractor) get(url string, result any) error {
|
|
resp, err := e.request(url)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(result); err != nil {
|
|
return err
|
|
}
|
|
|
|
time.Sleep(requestDelay)
|
|
return nil
|
|
}
|
|
|
|
func (e *Extractor) request(url string) (*http.Response, error) {
|
|
var lastErr error
|
|
for attempt := 0; attempt < maxRetries; attempt++ {
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Accept", "application/vnd.github.v3+json")
|
|
if e.token != "" {
|
|
req.Header.Set("Authorization", "Bearer "+e.token)
|
|
}
|
|
resp, err := e.client.Do(req)
|
|
if err != nil {
|
|
lastErr = err
|
|
if isRetryableNetworkError(err) && attempt < maxRetries-1 {
|
|
time.Sleep(retryDelay)
|
|
continue
|
|
}
|
|
return nil, err
|
|
}
|
|
return resp, nil
|
|
}
|
|
return nil, lastErr
|
|
}
|
|
|
|
// Pipeline represents a single Drone pipeline run
|
|
type Pipeline struct {
|
|
ID int `json:"id"`
|
|
Status string `json:"status"`
|
|
Started int64 `json:"started"`
|
|
Finished int64 `json:"finished"`
|
|
Duration float64 `json:"duration_minutes"`
|
|
Trigger string `json:"trigger"`
|
|
}
|
|
|
|
// OriginalCommit represents an individual commit from the PR branch before squashing
|
|
type OriginalCommit struct {
|
|
SHA string `json:"sha"`
|
|
Title string `json:"title"`
|
|
Pipelines []Pipeline `json:"pipelines,omitempty"`
|
|
}
|
|
|
|
// ExtractedCommit represents a commit with PR information
|
|
type ExtractedCommit struct {
|
|
PR int `json:"pr"`
|
|
SHA string `json:"sha"`
|
|
Date string `json:"date"`
|
|
Title string `json:"title"`
|
|
HTMLURL string `json:"html_url"`
|
|
Pipelines []Pipeline `json:"pipelines,omitempty"`
|
|
OriginalCommits []OriginalCommit `json:"original_commits,omitempty"`
|
|
}
|
|
|
|
// ExtractedData represents the output structure
|
|
type ExtractedData struct {
|
|
Commits []ExtractedCommit `json:"commits"`
|
|
}
|
|
|
|
// extractSubject extracts the first line of the commit message
|
|
func extractSubject(message string) string {
|
|
if idx := strings.Index(message, "\n"); idx >= 0 {
|
|
return strings.TrimSpace(message[:idx])
|
|
}
|
|
return strings.TrimSpace(message)
|
|
}
|
|
|
|
// ExtractCommitData fetches commits and extracts them into structured format
|
|
func (e *Extractor) ExtractCommitData(repo, branch string, maxCommits int, since string) (*ExtractedData, error) {
|
|
commits, err := e.GetCommits(repo, branch, maxCommits, since)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetching commits: %w", err)
|
|
}
|
|
|
|
extractedCommits := make([]ExtractedCommit, 0, len(commits))
|
|
originalCommitSHAs := make(map[string]bool) // Track SHAs that appear as original commits
|
|
|
|
for _, commit := range commits {
|
|
title := extractSubject(commit.Commit.Message)
|
|
|
|
// Fetch PR information from GitHub API
|
|
prNumber := 0
|
|
var originalCommits []OriginalCommit
|
|
prs, err := e.GetPRsForCommit(repo, commit.SHA)
|
|
if err == nil && len(prs) > 0 {
|
|
// Use the first PR number if multiple PRs are associated
|
|
prNumber = prs[0].Number
|
|
|
|
// Fetch original commits from the PR before squashing
|
|
prCommits, err := e.GetPRCommits(repo, prNumber)
|
|
if err == nil && len(prCommits) > 0 {
|
|
originalCommits = make([]OriginalCommit, 0, len(prCommits))
|
|
for _, prCommit := range prCommits {
|
|
// Skip if this is the same commit (don't self-reference)
|
|
if prCommit.SHA == commit.SHA {
|
|
continue
|
|
}
|
|
originalCommits = append(originalCommits, OriginalCommit{
|
|
SHA: prCommit.SHA,
|
|
Title: extractSubject(prCommit.Commit.Message),
|
|
})
|
|
// Track this SHA as an original commit
|
|
originalCommitSHAs[prCommit.SHA] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
extractedCommits = append(extractedCommits, ExtractedCommit{
|
|
PR: prNumber,
|
|
SHA: commit.SHA,
|
|
Date: commit.Commit.Author.Date,
|
|
Title: title,
|
|
HTMLURL: commit.HTMLURL,
|
|
OriginalCommits: originalCommits,
|
|
})
|
|
}
|
|
|
|
// Filter out commits that are already listed as original commits in merge commits
|
|
// These are PR commits that appear in master history but should only show under the merge commit
|
|
filteredCommits := make([]ExtractedCommit, 0, len(extractedCommits))
|
|
for _, commit := range extractedCommits {
|
|
// Keep the commit ONLY if it's NOT in the originalCommitSHAs set
|
|
// If a commit appears in any other commit's original_commits, it should not be a top-level entry
|
|
if !originalCommitSHAs[commit.SHA] {
|
|
filteredCommits = append(filteredCommits, commit)
|
|
}
|
|
}
|
|
|
|
return &ExtractedData{
|
|
Commits: filteredCommits,
|
|
}, nil
|
|
}
|