diff --git a/.gitignore b/.gitignore index 8e19a9be..f0264bea 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,7 @@ logo-manager/package-lock.json # others /sample-gsoc-guide/ -/things-to-do/ \ No newline at end of file +/things-to-do/ + +# data backups (local safety net, not for version control) +/new-api-details-backup-*/ \ No newline at end of file diff --git a/CHANGELOG-2026.md b/CHANGELOG-2026.md new file mode 100644 index 00000000..26feaae2 --- /dev/null +++ b/CHANGELOG-2026.md @@ -0,0 +1,149 @@ +# GSoC 2026 Data Integration — Changelog + +## Data Pipeline: 4 new scripts + +### `scripts/fetch-year-data.ts` +Fetches raw org data from Google's API. Reusable for any year. +```bash +npx tsx scripts/fetch-year-data.ts --year 2026 +``` +Output: `new-api-details/yearly/google-summer-of-code-2026-organizations-raw.json` + +--- + +### `scripts/transform-year-organizations.ts` +Reads raw API JSON → updates/creates per-org JSON files → regenerates `index.json` + `metadata.json`. + +```bash +npx tsx scripts/transform-year-organizations.ts --year 2026 +``` + +What it does for **returning orgs** (156): +``` +- adds 2026 to active_years +- updates last_year to 2026 +- sets is_currently_active: true +- merges new technologies/topics (union, no deletions) +- updates contact/social from API +``` + +What it does for **new orgs** (29): +``` +- creates new JSON file with first_year: 2026, active_years: [2026] +- maps Google API fields → internal format +``` + +What it does for **orgs not in 2026** (48): +``` +- sets is_currently_active: false (nothing else changed) +``` + +`first_time` is **derived** in the index, not stored per-org: +```ts +first_time: data.first_year === YEAR +``` + +--- + +### `scripts/generate-yearly-page-from-json.ts` +Produces `new-api-details/yearly/google-summer-of-code-2026.json` from org JSON files. No DB. + +```bash +npx tsx scripts/generate-yearly-page-from-json.ts --year 2026 +``` + +Output matches `YearlyPageData` type. Projects array is empty (not yet announced). `finalized: false`. + +--- + +### `scripts/regenerate-tech-topics-from-json.ts` +Rebuilds all tech-stack, topics, and homepage JSON from org files. No DB. Years are derived dynamically from the data. + +```bash +npx tsx scripts/regenerate-tech-topics-from-json.ts +``` + +Regenerated: +- 825 tech-stack JSON files + index (all now include 2026 in `popularity_by_year`) +- 1566 topic JSON files + index (all now include 2026 in `yearlyStats`) +- `homepage.json` (updated metrics: 533 total orgs, 185 active) + +--- + +## UI fixes + +### `app/organizations/filters-sidebar.tsx` +Added 2026 to the `YEARS` filter array. +```ts +// before +const YEARS = [2025, 2024, ...] +// after +const YEARS = [2026, 2025, 2024, ...] +``` + +### `app/yearly/page.tsx` +- Added `{ year: 2026, slug: "google-summer-of-code-2026" }` to `yearlyPages` +- Updated stats: "11" years, "11,000+" projects, "2026" latest +- CTA button now links to 2026 + +### `app/yearly/[slug]/page.tsx` +Added `{ slug: "google-summer-of-code-2026" }` to `generateStaticParams`. + +### `lib/projects-page-types.ts` +Added 2026 to `getAvailableProjectYears()`. + +### `package.json` +Added npm scripts: +```json +"gsoc:fetch": "npx tsx scripts/fetch-year-data.ts", +"gsoc:transform": "npx tsx scripts/transform-year-organizations.ts", +"gsoc:yearly": "npx tsx scripts/generate-yearly-page-from-json.ts", +"gsoc:regen": "npx tsx scripts/regenerate-tech-topics-from-json.ts", +"gsoc:sync": "... all four in sequence ..." +``` + +### `.gitignore` +Added `/new-api-details-backup-*/` to ignore backup folders. + +--- + +## Backup + +`new-api-details-backup-pre2026/` — full copy of all data before any 2026 changes. Gitignored. + +--- + +## Data summary + +| Metric | Value | +|---|---| +| Total orgs in index | 533 (was 504) | +| Active in 2026 | 185 | +| Returning | 156 | +| First-time | 29 | +| Marked inactive | 48 | +| Projects | 0 (not yet announced) | +| Top language | Python (121 orgs) | +| Years covered | 11 (2016–2026) | + +--- + +## Future year workflow + +```bash +npm run gsoc:fetch -- --year 2027 +npm run gsoc:transform -- --year 2027 +npm run gsoc:yearly -- --year 2027 +npm run gsoc:regen +``` + +Then update 3 hardcoded places: +1. `app/yearly/page.tsx` — `yearlyPages` array +2. `app/yearly/[slug]/page.tsx` — `generateStaticParams` +3. `app/organizations/filters-sidebar.tsx` — `YEARS` array + +--- + +## Important: dev server + +Next.js caches JSON imports at startup. After running any script that changes JSON files, **restart the dev server** (`Ctrl+C` then `npm run dev`) for changes to appear. diff --git a/app/organizations/filters-sidebar.tsx b/app/organizations/filters-sidebar.tsx index 18b5f699..7e016909 100644 --- a/app/organizations/filters-sidebar.tsx +++ b/app/organizations/filters-sidebar.tsx @@ -23,9 +23,10 @@ interface FiltersSidebarProps { onFilterChange: (filters: FilterState) => void filters: FilterState availableTechs: Array<{ name: string; count: number }> + firstTimeCount?: number } -const YEARS = [2025, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012] +const YEARS = [2026, 2025, 2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012] const CATEGORIES = [ 'Artificial Intelligence', 'Data', @@ -49,7 +50,7 @@ const TOPICS = [ 'Database', ] -export function FiltersSidebar({ onFilterChange, filters, availableTechs }: FiltersSidebarProps) { +export function FiltersSidebar({ onFilterChange, filters, availableTechs, firstTimeCount }: FiltersSidebarProps) { const [sidebarSearch] = useState('') const [expandedSections, setExpandedSections] = useState({ @@ -225,7 +226,7 @@ export function FiltersSidebar({ onFilterChange, filters, availableTechs }: Filt onChange={toggleFirstTime} /> First-time organizations only - (14) + {firstTimeCount !== undefined && ({firstTimeCount})} diff --git a/app/organizations/organizations-client.tsx b/app/organizations/organizations-client.tsx index 7c3a2028..dc1dc0e2 100644 --- a/app/organizations/organizations-client.tsx +++ b/app/organizations/organizations-client.tsx @@ -16,9 +16,10 @@ interface OrganizationsClientProps { initialData: PaginatedResponse initialPage: number initialTechs: Array<{ name: string; count: number }> + firstTimeCount?: number } -export function OrganizationsClient({ initialData, initialPage, initialTechs }: OrganizationsClientProps) { +export function OrganizationsClient({ initialData, initialPage, initialTechs, firstTimeCount }: OrganizationsClientProps) { const router = useRouter() const searchParams = useSearchParams() const [data, setData] = useState>(initialData) @@ -27,6 +28,14 @@ export function OrganizationsClient({ initialData, initialPage, initialTechs }: const isInitialMount = useRef(true) const lastFetchParams = useRef('') const lastUrlString = useRef('') + + // Sync server-rendered data when initialData/initialPage change after navigation. + // Without this, router.push() re-renders on the server but the client keeps stale state. + useEffect(() => { + setData(initialData) + setCurrentPage(initialPage) + setIsLoading(false) + }, [initialData, initialPage]) // Memoize filters from URL using primitives to avoid unnecessary recalculations const urlFilters = useMemo(() => { @@ -207,53 +216,26 @@ export function OrganizationsClient({ initialData, initialPage, initialTechs }: } }, []) - // Handle page changes from URL - // Only fetch if page actually changed AND we're not on initial mount - useEffect(() => { - if (isInitialMount.current) { - return - } - - const page = Number(searchParams.get('page')) || 1 - if (page !== currentPage) { - setCurrentPage(page) - // Only fetch if we have dynamic filters (search or complex filters) - // Otherwise, pagination should be handled client-side with static data - const hasDynamicFilters = filters.search || - filters.yearsLogic === 'AND' || - filters.categoriesLogic === 'AND' || - filters.techsLogic === 'AND' || - filters.topicsLogic === 'AND' || - (filters.years.length > 0 && filters.categories.length > 0 && filters.techs.length > 0) - - if (hasDynamicFilters) { - fetchOrganizations(page, filters) - } - } - }, [searchParams, currentPage, filters, fetchOrganizations]) + // Page changes are handled via router.push() → server re-render → initialData sync. + // No client-side fetch needed for pagination. - // Only fetch when filters change (not on initial mount, as we have initialData) - // Only fetch if we have dynamic filters that require API (search or complex filters) + // Filters and search are handled server-side via router.push() → server re-render → initialData sync. + // Only need client-side API fetch for AND logic filters (rare edge case). useEffect(() => { if (isInitialMount.current) { return } - // Determine if we need API (same logic as server) const needsAPI = - filters.search.trim().length > 0 || filters.yearsLogic === 'AND' || filters.categoriesLogic === 'AND' || filters.techsLogic === 'AND' || - filters.topicsLogic === 'AND' || - (filters.years.length > 0 && filters.categories.length > 0 && filters.techs.length > 0 && filters.topics.length > 0) + filters.topicsLogic === 'AND' - // Only fetch if we need API, otherwise filters are handled client-side with static data if (!needsAPI) { return } - // Reset to page 1 when filters change const page = 1 setCurrentPage(page) fetchOrganizations(page, filters) @@ -341,7 +323,7 @@ export function OrganizationsClient({ initialData, initialPage, initialTechs }:
{/* Sidebar - Fixed left, 280px width */} {/* Main Content - with left margin for sidebar */} diff --git a/app/organizations/page.tsx b/app/organizations/page.tsx index 88f523bb..cb545cfc 100644 --- a/app/organizations/page.tsx +++ b/app/organizations/page.tsx @@ -96,14 +96,6 @@ function shouldUseAPI(params: { techsLogic?: string; topicsLogic?: string; }): boolean { - // Always use API for search (text search requires DB) - if (params.q && params.q.trim().length > 0) { - if (process.env.NODE_ENV === 'development') { - console.log('[ORGS] Using API: search query detected'); - } - return true; - } - // Use API for complex filter logic (AND mode requires DB) if (params.yearsLogic === 'AND' || params.categoriesLogic === 'AND' || params.techsLogic === 'AND' || params.topicsLogic === 'AND') { @@ -113,26 +105,10 @@ function shouldUseAPI(params: { return true; } - // Use API if multiple filter types are combined (complex combinations) - const filterCount = [ - params.years && params.years.trim().length > 0, - params.categories && params.categories.trim().length > 0, - params.techs && params.techs.trim().length > 0, - params.topics && params.topics.trim().length > 0, - params.firstTimeOnly === 'true', - ].filter(Boolean).length; - - // If more than 2 filter types, use API for better performance - if (filterCount > 2) { - if (process.env.NODE_ENV === 'development') { - console.log('[ORGS] Using API: multiple filter types detected', filterCount); - } - return true; - } - - // Otherwise, use static JSON + // All other cases (including text search) use static JSON. + // Text search over ~500 orgs in memory is fast and includes new orgs not yet in DB. if (process.env.NODE_ENV === 'development') { - console.log('[ORGS] Using static JSON: simple filters or no filters'); + console.log('[ORGS] Using static JSON'); } return false; } @@ -204,12 +180,13 @@ async function getOrganizations(params: { ); } - // Filter organizations in memory + // Filter organizations in memory (supports text search + all filters) let filtered = indexData.organizations; - // Apply filters - if (params.years || params.categories || params.techs || params.topics || params.firstTimeOnly) { + const hasFilters = params.q || params.years || params.categories || params.techs || params.topics || params.firstTimeOnly || params.tech; + if (hasFilters) { filtered = filterOrganizations(indexData.organizations, { + query: params.q, years: params.years ? params.years.split(',').map(y => parseInt(y)).filter(n => !isNaN(n)) : undefined, categories: params.categories ? params.categories.split(',') : undefined, techs: params.techs ? params.techs.split(',') : params.tech ? [params.tech] : undefined, @@ -239,8 +216,8 @@ export default async function OrganizationsPage({ searchParams }: PageProps) { const params = await searchParams; const page = Number(params.page) || 1; - // Parallel data fetching: Orgs + Tech Stack - const [data, techStackIndex] = await Promise.all([ + // Parallel data fetching: Orgs + Tech Stack + Org index (for first-time count) + const [data, techStackIndex, orgIndex] = await Promise.all([ getOrganizations({ page, limit: 20, @@ -257,7 +234,8 @@ export default async function OrganizationsPage({ searchParams }: PageProps) { techsLogic: params.techsLogic, topicsLogic: params.topicsLogic, }), - loadTechStackIndexData() + loadTechStackIndexData(), + loadOrganizationsIndexData() ]); // Transform tech stack data for sidebar @@ -266,6 +244,10 @@ export default async function OrganizationsPage({ searchParams }: PageProps) { count: t.org_count })) || []; + const firstTimeCount = orgIndex?.organizations.filter( + (o: { first_time: boolean | null }) => o.first_time === true + ).length ?? 0; + return ( @@ -279,6 +261,7 @@ export default async function OrganizationsPage({ searchParams }: PageProps) { initialData={data} initialPage={page} initialTechs={initialTechs} + firstTimeCount={firstTimeCount} /> ); diff --git a/app/sitemap.ts b/app/sitemap.ts index a3ac2943..c0c4d0cf 100644 --- a/app/sitemap.ts +++ b/app/sitemap.ts @@ -1,130 +1,67 @@ import { MetadataRoute } from 'next' import { SITE_URL } from '@/lib/constants' -import prisma from '@/lib/prisma' +import { loadOrganizationsIndexData } from '@/lib/organizations-page-types' +import { loadTechStackIndexData } from '@/lib/tech-stack-page-types' import { loadTopicsIndexData } from '@/lib/topics-page-types' import { getAvailableProjectYears } from '@/lib/projects-page-types' /** - * Fetch all organization slugs directly from database + * All sitemap data is sourced from static JSON files — + * no database dependency. When a new year's data is added + * (via the transform + regenerate scripts), the sitemap + * picks it up automatically on the next build. */ + async function getAllOrganizationSlugs(): Promise { try { - const organizations = await prisma.organizations.findMany({ - select: { slug: true }, - orderBy: { name: 'asc' }, - }) - return organizations.map(org => org.slug) + const data = await loadOrganizationsIndexData() + if (!data) return [] + return data.organizations.map(org => org.slug) } catch (error) { - console.error('Error fetching organization slugs for sitemap:', error) - return [] // Return empty array on error to prevent sitemap generation failure + console.error('[SITEMAP] Error loading organization slugs:', error) + return [] } } -/** - * Fetch all tech stack slugs directly from database - */ async function getAllTechStackSlugs(): Promise { try { - // Get all organizations with their technologies - const organizations = await prisma.organizations.findMany({ - select: { technologies: true }, - }) - - // Extract and deduplicate technologies - const techMap = new Set() - organizations.forEach((org) => { - org.technologies.forEach((tech) => { - const slug = tech.toLowerCase().replace(/[^a-z0-9]+/g, '-') - techMap.add(slug) - }) - }) - - return Array.from(techMap) + const data = await loadTechStackIndexData() + if (!data) return [] + return data.all_techs.map(t => t.slug) } catch (error) { - console.error('Error fetching tech stack slugs for sitemap:', error) + console.error('[SITEMAP] Error loading tech stack slugs:', error) return [] } } -/** - * Get all topic slugs from static JSON data - */ async function getAllTopicSlugs(): Promise { try { const topicsData = await loadTopicsIndexData() - if (!topicsData) { - // Fallback to hardcoded list if JSON not available - return [ - 'web-development', - 'machine-learning', - 'systems-programming', - 'data-science', - 'security-privacy', - 'cloud-infrastructure', - 'mobile-development', - 'devtools', - 'graphics-multimedia', - 'databases', - 'programming-languages', - 'documentation', - ] - } + if (!topicsData) return [] return topicsData.topics.map(topic => topic.slug) } catch (error) { - if (process.env.NODE_ENV === 'development') { - console.error('[SITEMAP] Error loading topics:', error) - } + console.error('[SITEMAP] Error loading topic slugs:', error) return [] } } -/** - * Get all project IDs for project detail pages - * Fetches from database to get all unique project IDs - */ -async function getAllProjectIds(): Promise> { - try { - const projects = await prisma.projects.findMany({ - select: { - project_id: true, - org_slug: true, - }, - distinct: ['project_id'], - }) - return projects - } catch (error) { - if (process.env.NODE_ENV === 'development') { - console.error('[SITEMAP] Error fetching project IDs:', error) - } - return [] - } +function getYearlySlugs(): string[] { + const years = getAvailableProjectYears() + return years.map(y => `google-summer-of-code-${y}`) } -/** - * Sitemap Generation - * - * Currently generated at build time (no revalidate configured). - * If you add `export const revalidate = X` to this file, it becomes ISR instead of build-only. - * - * Base URL validation: - * - Ensures no trailing slash - * - Uses https protocol - * - Falls back to production URL if env not set - */ export default async function sitemap(): Promise { - // Ensure baseUrl has no trailing slash and uses https const baseUrl = SITE_URL.replace(/\/$/, '').replace(/^http:/, 'https:') - // Fetch dynamic routes in parallel - const [orgSlugs, techSlugs, topicSlugs, projectIds] = await Promise.all([ + const [orgSlugs, techSlugs, topicSlugs] = await Promise.all([ getAllOrganizationSlugs(), getAllTechStackSlugs(), getAllTopicSlugs(), - getAllProjectIds(), ]) - // Static routes explicitly listed (excluding dynamic children like /organizations/[slug]) - // These are top-level pages without dynamic parameters + const yearlySlugs = getYearlySlugs() + const projectYears = getAvailableProjectYears() + const staticRoutes = [ '', '/about', @@ -139,79 +76,49 @@ export default async function sitemap(): Promise { '/blog', ] - // Generate year-based routes (2016 to current year - 1, excluding future years) - // Only include years that have actually completed GSoC - // Using new /yearly/google-summer-of-code-YYYY format for SEO - const currentYear = new Date().getFullYear() - const lastCompletedYear = currentYear - 1 // Exclude current year and future years - const yearRoutes = [] - for (let year = 2016; year <= lastCompletedYear; year++) { - yearRoutes.push(`/yearly/google-summer-of-code-${year}`) - } - - // Generate project year routes (2016 to current year) - const projectYearRoutes = getAvailableProjectYears().map(year => `/projects/${year}`) - - // Combine all routes with appropriate priorities const routes: MetadataRoute.Sitemap = [ - // Homepage - highest priority ...staticRoutes.map((route) => ({ url: `${baseUrl}${route}`, lastModified: new Date(), changeFrequency: route === '' ? 'daily' as const : 'weekly' as const, priority: route === '' ? 1.0 : route === '/organizations' ? 0.9 : 0.8, })), - - // Organization detail pages - high priority (money pages for SEO) + ...orgSlugs.map((slug) => ({ url: `${baseUrl}/organizations/${slug}`, lastModified: new Date(), changeFrequency: 'weekly' as const, priority: 0.9, })), - - // Tech stack detail pages - high priority + ...techSlugs.map((slug) => ({ url: `${baseUrl}/tech-stack/${slug}`, lastModified: new Date(), changeFrequency: 'weekly' as const, priority: 0.85, })), - - // Topic detail pages - high priority + ...topicSlugs.map((slug) => ({ url: `${baseUrl}/topics/${slug}`, lastModified: new Date(), changeFrequency: 'weekly' as const, priority: 0.85, })), - - // Year pages - medium priority - ...yearRoutes.map((route) => ({ - url: `${baseUrl}${route}`, + + ...yearlySlugs.map((slug) => ({ + url: `${baseUrl}/yearly/${slug}`, lastModified: new Date(), changeFrequency: 'yearly' as const, priority: 0.8, })), - - // Project year pages - medium priority - ...projectYearRoutes.map((route) => ({ - url: `${baseUrl}${route}`, + + ...projectYears.map((year) => ({ + url: `${baseUrl}/projects/${year}`, lastModified: new Date(), changeFrequency: 'yearly' as const, priority: 0.75, })), - - // Project detail pages - lower priority (deep pages, many URLs) - // Deprioritized to preserve crawl budget for higher-value pages - ...projectIds.map(({ project_id, org_slug }) => ({ - url: `${baseUrl}/organizations/${org_slug}/projects/${project_id}`, - lastModified: new Date(), - changeFrequency: 'monthly' as const, - priority: 0.6, - })), ] return routes } - diff --git a/app/yearly/[slug]/page.tsx b/app/yearly/[slug]/page.tsx index e11b96f9..caa002f7 100644 --- a/app/yearly/[slug]/page.tsx +++ b/app/yearly/[slug]/page.tsx @@ -21,6 +21,7 @@ import { Header } from "@/components/header"; import { Footer } from "@/components/Footer"; import { loadYearlyPageData } from "@/lib/yearly-page-types"; import { getFullUrl } from "@/lib/constants"; +import { getAvailableProjectYears } from "@/lib/projects-page-types"; import { ExpandableOrgList, ExpandableProjectList, MentorsContributorsTable } from "./client-components"; import { StudentSlotsBarChart, @@ -32,21 +33,12 @@ import { // Static Generation - cache forever export const revalidate = false; -// Generate static params for all known yearly pages +// Derived from the single source of truth in getAvailableProjectYears(). +// Adding a year there auto-updates yearly pages, project pages, and sitemap. export async function generateStaticParams() { - // Add more years as JSON files are created - return [ - { slug: "google-summer-of-code-2025" }, - { slug: "google-summer-of-code-2024" }, - { slug: "google-summer-of-code-2023" }, - { slug: "google-summer-of-code-2022" }, - { slug: "google-summer-of-code-2021" }, - { slug: "google-summer-of-code-2020" }, - { slug: "google-summer-of-code-2019" }, - { slug: "google-summer-of-code-2018" }, - { slug: "google-summer-of-code-2017" }, - { slug: "google-summer-of-code-2016" }, - ]; + return getAvailableProjectYears().map(year => ({ + slug: `google-summer-of-code-${year}`, + })); } export async function generateMetadata({ diff --git a/app/yearly/page.tsx b/app/yearly/page.tsx index 541de3ed..410e7c22 100644 --- a/app/yearly/page.tsx +++ b/app/yearly/page.tsx @@ -18,11 +18,12 @@ export const revalidate = false; export const metadata = { title: "GSoC Yearly Stats & Trends", - description: "Explore Google Summer of Code statistics, trends, and insights by year. Historical data from 2016 to 2025.", + description: "Explore Google Summer of Code statistics, trends, and insights by year. Historical data from 2016 to 2026.", }; // Available years with their slugs const yearlyPages = [ + { year: 2026, slug: "google-summer-of-code-2026" }, { year: 2025, slug: "google-summer-of-code-2025" }, { year: 2024, slug: "google-summer-of-code-2024" }, { year: 2023, slug: "google-summer-of-code-2023" }, @@ -57,7 +58,7 @@ export default function YearlyIndexPage() { } label="Years Covered" - value="10" + value="11" /> } @@ -67,12 +68,12 @@ export default function YearlyIndexPage() { } label="Projects" - value="10,000+" + value="11,000+" /> } label="Latest Year" - value="2025" + value="2026" />
@@ -115,9 +116,9 @@ export default function YearlyIndexPage() {