package ogtags import ( "reflect" "strings" "testing" "time" "golang.org/x/net/html" ) // TestExtractOGTags updated with correct expectations based on filtering logic func TestExtractOGTags(t *testing.T) { // Use a cache instance that reflects the default approved lists testCache := NewOGTagCache("", false, time.Minute) // Manually set approved tags/prefixes based on the user request for clarity testCache.approvedTags = []string{"description"} testCache.approvedPrefixes = []string{"og:"} tests := []struct { name string htmlStr string expected map[string]string }{ { name: "Basic OG tags", // Includes standard 'description' meta tag htmlStr: ` `, expected: map[string]string{ "og:title": "Test Title", "og:description": "Test Description", "description": "Regular Description", }, }, { name: "OG tags with name attribute", htmlStr: ` `, expected: map[string]string{ "og:title": "Test Title", "og:description": "Test Description", // twitter:card is still not approved }, }, { name: "No approved OG tags", // Contains only standard 'description' htmlStr: ` `, expected: map[string]string{ "description": "Test Description", }, }, { name: "Empty content", htmlStr: ` `, expected: map[string]string{ "og:title": "", "og:description": "Test Description", }, }, { name: "Explicitly approved tag", htmlStr: ` `, expected: map[string]string{ // This is approved because "description" is in cache.approvedTags "description": "Approved Description Tag", }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { doc, err := html.Parse(strings.NewReader(tt.htmlStr)) if err != nil { t.Fatalf("failed to parse HTML: %v", err) } ogTags := testCache.extractOGTags(doc) if !reflect.DeepEqual(ogTags, tt.expected) { t.Errorf("expected %v, got %v", tt.expected, ogTags) } }) } } func TestIsOGMetaTag(t *testing.T) { tests := []struct { name string nodeHTML string targetNode string // Helper to find the right node in parsed fragment expected bool }{ { name: "Meta OG tag", nodeHTML: ``, targetNode: "meta", expected: true, }, { name: "Regular meta tag", nodeHTML: ``, targetNode: "meta", expected: true, }, { name: "Not a meta tag", nodeHTML: `
Test
`, targetNode: "div", expected: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Wrap the partial HTML in basic structure for parsing fullHTML := "" + tt.nodeHTML + "" doc, err := html.Parse(strings.NewReader(fullHTML)) if err != nil { t.Fatalf("failed to parse HTML: %v", err) } // Find the target element node (meta or div based on targetNode) var node *html.Node var findNode func(*html.Node) findNode = func(n *html.Node) { // Skip finding if already found if node != nil { return } // Check if current node matches type and tag data if n.Type == html.ElementNode && n.Data == tt.targetNode { node = n return } // Recursively check children for c := n.FirstChild; c != nil; c = c.NextSibling { findNode(c) } } findNode(doc) // Start search from root if node == nil { t.Fatalf("Could not find target node '%s' in test HTML", tt.targetNode) } // Call the function under test result := isOGMetaTag(node) if result != tt.expected { t.Errorf("expected %v, got %v", tt.expected, result) } }) } } func TestExtractMetaTagInfo(t *testing.T) { // Use a cache instance that reflects the default approved lists testCache := NewOGTagCache("", false, time.Minute) testCache.approvedTags = []string{"description"} testCache.approvedPrefixes = []string{"og:"} tests := []struct { name string nodeHTML string expectedProperty string expectedContent string }{ { name: "OG title with property (approved by prefix)", nodeHTML: ``, expectedProperty: "og:title", expectedContent: "Test Title", }, { name: "OG description with name (approved by prefix)", nodeHTML: ``, expectedProperty: "og:description", expectedContent: "Test Description", }, { name: "Regular meta tag (name=description, approved by exact match)", // Updated name for clarity nodeHTML: ``, expectedProperty: "description", expectedContent: "Test Description", }, { name: "Regular meta tag (name=keywords, not approved)", nodeHTML: ``, expectedProperty: "", expectedContent: "Test Keywords", }, { name: "Twitter tag (not approved by default)", nodeHTML: ``, expectedProperty: "", expectedContent: "summary", }, { name: "No content (but approved property)", nodeHTML: ``, expectedProperty: "og:title", expectedContent: "", }, { name: "No property/name attribute", nodeHTML: ``, expectedProperty: "", expectedContent: "No property", }, { name: "Explicitly approved tag with property attribute", nodeHTML: ``, expectedProperty: "description", // Approved by exact match in approvedTags expectedContent: "Approved Description Tag", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { fullHTML := "" + tt.nodeHTML + "" doc, err := html.Parse(strings.NewReader(fullHTML)) if err != nil { t.Fatalf("failed to parse HTML: %v", err) } var node *html.Node var findMetaNode func(*html.Node) findMetaNode = func(n *html.Node) { if node != nil { // Stop searching once found return } if n.Type == html.ElementNode && n.Data == "meta" { node = n return } for c := n.FirstChild; c != nil; c = c.NextSibling { findMetaNode(c) } } findMetaNode(doc) // Start search from root if node == nil { // Handle cases where the input might not actually contain a meta tag, though all test cases do. // If the test case is *designed* not to have a meta tag, this check should be different. // But for these tests, failure to find implies an issue with the test setup or parser. t.Fatalf("Could not find meta node in test HTML: %s", tt.nodeHTML) } // Call extractMetaTagInfo using the test cache instance property, content := testCache.extractMetaTagInfo(node) if property != tt.expectedProperty { t.Errorf("expected property '%s', got '%s'", tt.expectedProperty, property) } if content != tt.expectedContent { t.Errorf("expected content '%s', got '%s'", tt.expectedContent, content) } }) } }