|
<%@ Import Namespace="System" %> <%@ Import Namespace="System.IO" %> <%@ Import Namespace="System.Xml" %> <%@ Import Namespace="System.Net" %> <script runat="server"> Dim discussionGroupPage As String = "http://www.facebook.com/PUBLIC-GROUP" ' URL to Group Wall Dim rssCacheClear As Integer = 0 ' 1 = clear server-cache, 0 = do not clear server-cache. ' RSS Content Provider Data Dim postLink As String = "thumbnail" ' "topic" = put link to topic in <link></link> of RSS feed, "thumbnail" = put link to thumbnail in <link></link> of RSS feed, "post" = put link to the poster's post Dim rssDescriptionLen As Integer = 60 ' Number of characters to allow in the poster's post (description) Dim rssIncludeHeader As Integer = 0 ' 0 = do not include RSS provider data (below), 1 = include Dim feed_title As String = "FEED TITLE" Dim feed_link As String = "http://www.facebook.com/PUBLIC-GROUP/" Dim feed_description As String = "FEED DESCRIPTION" Dim feed_language As String = "en-us" Dim feed_pubdate As String = "20 Apr 2007 9:40:00 GMT" Dim feed_copyright As String = Nothing Dim feed_webmaster As String = "WEBMASTER@YOUREMAIL.COM" ' Do Not Use Dim discussionPageData As String = Nothing Dim crawlAgent As String = Nothing Dim crawlError As String = Nothing ' TRAP CONNECT ERRORS IN CRAWLPAGE - SEND TO GLOBAL FOR CHECKING Private Function crawlPage(ByVal URL As String) As String Dim buffSize As Integer = 2048 Dim crawlOutput As String = Nothing Dim crawlMethod As String = "GET" Dim crawlURL As String = URL Try Dim myRequest As HttpWebRequest = CType(WebRequest.Create(crawlURL), HttpWebRequest) myRequest.UserAgent = crawlAgent myRequest.Method = crawlMethod Dim myResponse As HttpWebResponse = CType(myRequest.GetResponse(), HttpWebResponse) Dim streamResponse As Stream = myResponse.GetResponseStream() Dim streamRead As New StreamReader(streamResponse) Dim readBuff(buffSize) As [Char] Dim lineStep As Integer = streamRead.Read(readBuff, 0, buffSize) While lineStep > 0 Dim outputData As New [String](readBuff, 0, lineStep) crawlOutput = crawlOutput & outputData lineStep = streamRead.Read(readBuff, 0, buffSize) End While streamRead.Close() streamResponse.Close() myResponse.Close() Catch ex As Exception crawlError = Server.HtmlEncode(ex.Message) End Try Return (crawlOutput) End Function Private Sub Page_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) ' Get User's Agent Data crawlAgent = Request.UserAgent ' Querystring Data Dim feedName As String = CStr(Request.QueryString("name")) If Len(feedName) = 0 Then : feedName = "default" : End If Dim maxFeedsTotalAccept As Integer = CInt(Request.QueryString("feedNumber")) If maxFeedsTotalAccept <= 0 Then : maxFeedsTotalAccept = 10 : End If ' Do Not Use Dim rssFeed As String = Nothing Dim feedContent As String = Nothing If rssCacheClear = 0 Then ' Build Fresh RSS If HttpContext.Current.Cache(feedName) Is Nothing Then ' Load Wall Page discussionPageData = crawlPage(discussionGroupPage) ' Filter Content discussionPageData = Replace(Replace(Replace(Replace(Replace(discussionPageData, vbCrLf, ""), "\/", "/"), "\""", """"), "\n ", ""), vbCr, "") If Len(crawlError) <= 0 Then ' Extract Content Dim collector As String = Nothing Dim elemLines() As String = Split(discussionPageData, ">") Dim postBlockFound As Integer = 0 Dim postURL As String = Nothing Dim topicTitle As String = Nothing Dim topicTitleURL As String = Nothing Dim postTimeStamp As String = Nothing Dim postUserName As String = Nothing Dim postUserThumbnail As String = Nothing Dim postUserContent As String = Nothing Dim groupPostData(,) As String Dim groupPostDataSize As Integer = 0 For S As Integer = 0 To UBound(elemLines) If postBlockFound = 7 Then postBlockFound = 1 postTimeStamp = Replace(elemLines(S), "</a", "") ' Compensate for FB Change #1 if InStr(postTimeStamp, "class=""timestamp""", vbTextCompare) - 1 > -1 Then postTimeStamp = Split(postTimeStamp, """")(3) End if ' Compensate for FB code change #2 if InStr(postTimeStamp, "data-date=", vbTextCompare) - 1 > -1 Then postTimeStamp = Split(postTimeStamp, """")(3) End if ' Compensate for future tag markup use if InStr(postTimeStamp, "<", vbTextCompare) - 1 > -1 OR InStr(postTimeStamp, ">", vbTextCompare) - 1 > -1 Then postTimeStamp = "" End if ' Save ReDim Preserve groupPostData(6, groupPostDataSize) groupPostData(0, groupPostDataSize) = postURL groupPostData(1, groupPostDataSize) = feed_title groupPostData(2, groupPostDataSize) = feed_link groupPostData(3, groupPostDataSize) = postTimeStamp groupPostData(4, groupPostDataSize) = postUserName groupPostData(5, groupPostDataSize) = postUserThumbnail groupPostData(6, groupPostDataSize) = postUserContent groupPostDataSize = groupPostDataSize + 1 ' Clear postURL = "" : topicTitle = "" : topicTitleURL = "" : postTimeStamp = "" : postUserName = "" : postUserThumbnail = "" : postUserContent = "" End If If postBlockFound = 6 Then postBlockFound = 7 End If If postBlockFound = 5 Then If InStr(elemLines(S), "class=""UIIntentionalStory_Time""", vbTextCompare) - 1 > -1 Then postBlockFound = 6 End If End If If postBlockFound = 4 Then If InStr(elemLines(S), "</h3", vbTextCompare) - 1 > -1 Then If Len(collector) > 2 Then postBlockFound = 5 postUserContent = collector : collector = "" If Len(postUserContent) > rssDescriptionLen Then postUserContent = Mid(postUserContent, 1, rssDescriptionLen) & "..." if InStr(postUserContent, ""...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, ""...", ""...") ElseIf InStr(postUserContent, "&quo...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&quo...", ""...") ElseIf InStr(postUserContent, "&qu...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&qu...", ""...") ElseIf InStr(postUserContent, "&q...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&q...", "...") ElseIf InStr(postUserContent, "&...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&...", "...") End if End If End If ElseIf InStr(elemLines(S), "<form ", vbTextCompare) - 1 > -1 Then postBlockFound = 5 postUserContent = collector If Len(postUserContent) > rssDescriptionLen Then postUserContent = Mid(postUserContent, 1, rssDescriptionLen) & "..." if InStr(postUserContent, ""...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, ""...", ""...") ElseIf InStr(postUserContent, "&quo...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&quo...", ""...") ElseIf InStr(postUserContent, "&qu...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&qu...", ""...") ElseIf InStr(postUserContent, "&q...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&q...", "...") ElseIf InStr(postUserContent, "&...", vbTextCompare) - 1 > -1 Then postUserContent = Replace(postUserContent, "&...", "...") End if End If Else elemLines(S) = elemLines(S) & ">" ' Filter Out Tags Dim tmp_titleFiltered As String = Nothing Dim flipTitleIndex As Integer = 0 For T As Integer = 0 To Len(elemLines(S)) Dim tmp_topicChar As String = Mid(elemLines(S), T + 1, 1) If tmp_topicChar = "<" Then : flipTitleIndex = 1 : End If If flipTitleIndex = 0 Then : tmp_titleFiltered = tmp_titleFiltered & tmp_topicChar : End If If tmp_topicChar = ">" Then : flipTitleIndex = 0 : End If Next collector = collector & " " & tmp_titleFiltered End If End If If postBlockFound = 3 Then If InStr(elemLines(S), "class=""UIStory_Message""", vbTextCompare) > 0 Then : postBlockFound = 4 : End If End If If postBlockFound = 2 Then postBlockFound = 3 postUserThumbnail = Split(elemLines(S), """")(3) End If If postBlockFound = 1 Then If InStr(elemLines(S), "class=""UIIntentionalStory_Pic""", vbTextCompare) > 0 Then postBlockFound = 2 postURL = Split(elemLines(S), """")(1) Try ' UserName in href tag postUserName = Split(elemLines(S), """")(5) Catch ex As Exception ' UserName out of sync in a span tag postUserName = Split(elemLines(S), """")(3) End Try End If End If If InStr(elemLines(S), "class=""minifeedwall""", vbTextCompare) > 0 And postBlockFound = 0 Then : postBlockFound = 1 : End If Next If groupPostDataSize > 0 Then ' Assemble Data Into RSS Dim feedStep As Integer = 0 For R As Integer = 0 To groupPostDataSize - 1 postURL = groupPostData(0, R) topicTitle = groupPostData(1, R) topicTitleURL = groupPostData(2, R) postTimeStamp = groupPostData(3, R) postUserName = groupPostData(4, R) postUserThumbnail = groupPostData(5, R) postUserContent = groupPostData(6, R) feedStep = feedStep + 1 If feedStep <= maxFeedsTotalAccept Then feedContent = feedContent & "<item>" & vbCrLf feedContent = feedContent & "<title>" & topicTitle & "</title>" & vbCrLf If LCase(postLink) = "thumbnail" Then feedContent = feedContent & "<link>" & postUserThumbnail & "</link>" & vbCrLf ElseIf LCase(postLink) = "topic" Then feedContent = feedContent & "<link>" & topicTitleURL & "</link>" & vbCrLf Else feedContent = feedContent & "<link>" & postURL & "</link>" & vbCrLf End If feedContent = feedContent & "<description>" & Server.HtmlEncode(postUserContent) & "</description>" & vbCrLf feedContent = feedContent & "<author>" & Server.HtmlEncode(postUserName) & "</author>" & vbCrLf feedContent = feedContent & "<date>" & postTimeStamp & "</date>" & vbCrLf feedContent = feedContent & "</item>" & vbCrLf End If Next Else ' Nothing Found If Len(crawlError) <= 0 Then feedContent = feedContent & "<item>" & vbCrLf feedContent = feedContent & "<title>Page Found But Data Not Recognized</title>" & vbCrLf feedContent = feedContent & "<link></link>" & vbCrLf feedContent = feedContent & "<description>Page Found But Data Not Recognized</description>" & vbCrLf feedContent = feedContent & "<author></author>" & vbCrLf feedContent = feedContent & "<date>01 Jan 1900 00:00:01 GMT</date>" & vbCrLf feedContent = feedContent & "</item>" & vbCrLf End If End If ' END groupPostDataSize > 0 If Len(crawlError) <= 0 Then ' Compile RSS Data rssFeed = rssFeed & "<rss version=""2.0"">" & vbCrLf rssFeed = rssFeed & "<channel>" & vbCrLf If rssIncludeHeader = 1 Then rssFeed = rssFeed & "<title>" & feed_title & "</title>" & vbCrLf rssFeed = rssFeed & "<link>" & feed_link & "</link>" & vbCrLf rssFeed = rssFeed & "<description>" & feed_description & "</description>" & vbCrLf rssFeed = rssFeed & "<language>" & feed_language & "</language>" & vbCrLf rssFeed = rssFeed & "<date>" & feed_pubdate & "</date>" & vbCrLf rssFeed = rssFeed & "<copyright>" & feed_copyright & "</copyright>" & vbCrLf rssFeed = rssFeed & "<webmaster>" & feed_webmaster & "</webmaster>" & vbCrLf End If rssFeed = rssFeed & feedContent rssFeed = rssFeed & "</channel>" & vbCrLf rssFeed = rssFeed & "</rss>" ' Save Into Server Cache HttpContext.Current.Cache.Add(feedName, rssFeed, Nothing, DateTime.Now.AddDays(1), System.Web.Caching.Cache.NoSlidingExpiration, CacheItemPriority.Normal, Nothing) End If Else ' No Data End If ' END Len(crawlError) <= 0 ' Error With Contacting Target. Index Local RSS Copy. If Len(crawlError) > 0 Then Dim baseFolderName As String = feedName Dim currentFolder As String = Server.MapPath(".") currentFolder = Replace(currentFolder & "\rssCache.rss") Dim objFSO = Server.CreateObject("Scripting.FilesystemObject") If objFSO.fileExists(currentFolder) = True Then Dim augment = objFSO.OpenTextFile(currentFolder) rssFeed = augment.readAll augment.Close() End If End If Else ' Use RSS From Server Cache rssFeed = CType(HttpContext.Current.Cache(feedName), String) End If ' END HttpContext.Current.Cache(feedName) Is Nothing Else ' Clear Server Cache HttpContext.Current.Cache.Remove(feedName) ' Compile RSS Data feedContent = feedContent & "<item>" & vbCrLf feedContent = feedContent & "<title>Server Cache Cleared</title>" & vbCrLf feedContent = feedContent & "<link></link>" & vbCrLf feedContent = feedContent & "<description>Server Cache Cleared</description>" & vbCrLf feedContent = feedContent & "<author></author>" & vbCrLf feedContent = feedContent & "<date>01 Jan 1900 00:00:01 GMT</date>" & vbCrLf feedContent = feedContent & "</item>" & vbCrLf rssFeed = rssFeed & "<rss version=""2.0"">" & vbCrLf rssFeed = rssFeed & "<channel>" & vbCrLf If rssIncludeHeader = 1 Then rssFeed = rssFeed & "<title>" & feed_title & "</title>" & vbCrLf rssFeed = rssFeed & "<link>" & feed_link & "</link>" & vbCrLf rssFeed = rssFeed & "<description>" & feed_description & "</description>" & vbCrLf rssFeed = rssFeed & "<language>" & feed_language & "</language>" & vbCrLf rssFeed = rssFeed & "<date>" & feed_pubdate & "</date>" & vbCrLf rssFeed = rssFeed & "<copyright>" & feed_copyright & "</copyright>" & vbCrLf rssFeed = rssFeed & "<webmaster>" & feed_webmaster & "</webmaster>" & vbCrLf End If rssFeed = rssFeed & feedContent rssFeed = rssFeed & "</channel>" & vbCrLf rssFeed = rssFeed & "</rss>" End If ' END rssCacheClear = 0 ' Generate Output Response.Buffer = False If rssIncludeHeader = 1 Then Response.ContentType = "application/rss+xml" Else Response.ContentType = "text/xml" End If Response.Write("<" & "?" & "xml version=""1.0"" encoding=""utf-8""" & "?" & ">" & vbCrLf) Response.Write(rssFeed) End Sub </script> |
|
#!/usr/bin/perl -w ######################################################## # Facebook Public Group Wall RSS Generator # ######################################################## ##### Declare Container For Response Stream local our $captured_response; ##### Variable Arguments Passed Into Bot Engine local our $port, $agent_name, $agent_method, $target_domain, $target_full_address, $request_method, $request_protocol; ##### Other local our $captured_response, $captured_response_filtered, $record, $collectchars, $absPath, $absPathTick, $absPathQuote; local our $rssDescriptionLen, $maxFeedsTotalAccept, $feedContent, $postLink, $rssIncludeHeader; local our $feed_title, $feed_link, $feed_description, $feed_language, $feed_pubdate, $feed_copyright, $feed_webmaster; ##### Connection Settings $target_domain = "www.facebook.com"; # Site to contact $target_full_address = "/PUBLIC-GROUP"; # Full path to public group wall $port = 80; $agent_name = $ENV{HTTP_USER_AGENT}; $agent_method = "http-get/0.1"; $request_method = "GET"; $request_protocol = "HTTP/1.0"; ##### RSS Settings $rssDescriptionLen = 45; # Maximum number of characters to allow in the title $maxFeedsTotalAccept = 6; # Maximum number of links to get $postLink = "thumbnail"; # "thumbnail" - URL to thumbnail image, "topic" - URL to topic, "post" - URL to post $rssIncludeHeader = 1; # 1 - include channel information (RSS), 0 - leave channel information out (XML) $feed_title = "FEED TITLE"; $feed_link = "http://www.facebook.com/PUBLIC-GROUP/"; $feed_description = "FEED DESCRIPTION"; $feed_language = "en-us"; $feed_pubdate = "11 Apr 2001 01:01:00 GMT"; $feed_copyright = "WEBMASTER\@YOUREMAIL.COM"; $feed_webmaster = ""; ##### Date Filter sub translateTimeStamp() { my $filteredTimeStamp, $tmpDate, $useMonth, $useDay, $useYear, $useHour, $useMinute, $useSeconds; $useSeconds = "00"; @tmpDate1 = (); @tmpDate2 = (); @dateSegments = (); @timeSegments = (); # Parse Arguments Array (variables being passed into this subroutine) my ($rawdate) = @_; @tmpDate1 = split(/on\s/, $rawdate); @tmpDate2 = split(/\sat\s/, $tmpDate1[1]); $tmpDate2[0] =~ s/\,//g; @dateSegments = split(/\s/, $tmpDate2[0]); $useMonth = substr($dateSegments[0], 0, 3); if ($dateSegments[1] < 10) { $useDay = "0" . $dateSegments[1]; } else { $useDay = $dateSegments[1]; } $useYear = $dateSegments[2]; @timeSegments = split(/\:/, $tmpDate2[1]); if ($timeSegments[1] =~ /am/i) { $useHour = "0" . $timeSegments[0]; } else { $useHour = 12 + $timeSegments[0]; } $useMinute = $timeSegments[1]; $useMinute =~ s/am//g; $useMinute =~ s/pm//g; $filteredTimeStamp = $useDay . " " . $useMonth . " " . $useYear . " " . $useHour . ":" . $useMinute . ":" . $useSeconds . "GMT"; return $filteredTimeStamp; } ##### The Crawler sub botEngine() { # Define Some Local (Private) Variables my $remote, $data_result, $error, $volume, $datastream, $len; # Parse Arguments Array (variables being passed into this subroutine) my ($port, $agent_name, $agent_method, $target_domain, $target_full_address, $request_method, $request_protocol) = @_; # Define Socket Connection sub TCP { join("", getprotobyname('tcp')); } sub SOCK_STREAM { 1; } sub AF_INET { 2; } sub PF_INET { &AF_INET; } # Build Identification Header @Headers = ("User-Agent:$agent_name", $agent_method); # Resolve Domain To IP Address Using IPv4 if ($target_domain =~ /^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/) { @addrs = pack('C4', split(/\./,$target_domain)); } else { ($dummy,$dummy,$dummy,$dummy, @addrs) = gethostbyname($target_domain); } $remote = pack("S n a4 x8", &AF_INET, $port, $addrs[0]); # Open Socket Connection unless (socket(S, &PF_INET, &SOCK_STREAM, &TCP)) { $error = 1; $data_result = "Socket Connection Error 1"; } # Send Request. Gather Response Stream. if ($error == 0) { unless (connect(S, $remote)) { $error = 1; $data_result = "Socket Connection Error 1 - Attempt 2"; } if ($error == 0) { select(S); $| = 1; select(STDOUT); $| = 1; $request = "$request_method $target_full_address $request_protocol\r\n"; while ($#Headers > 0) { $request = $request . "$Headers[0]: $Headers[1]\r\n"; shift(@Headers); shift(@Headers); } $request = $request . "\r\n"; print(S $request); $volume = 1024*1024; while ($len = sysread(S, $datastream, $volume)) { $captured_response .= $datastream; } } } # Close Socket Connection. unless (close(S)) { $data_result = "Socket Close Error 1"; } else { $data_result = "Connection Successful"; } # Return Status Event Response. return $data_result; } $filename = "$ENV{'DOCUMENT_ROOT'}/rssDay.txt"; $dayNumber = (localtime)[7]; $foundDay = -1; if (-e $filename) { open (FILE, "$filename"); $foundDay = if ($dayNumber != $foundDay) { ##### Invoke Crawler if (length($target_domain) < 0 && length($target_full_address) > 0) { $response = &botEngine($port, $agent_name, $agent_method, $target_domain, $target_full_address, $request_method, $request_protocol); } if ($response eq "Connection Successful") { ##### Filter Content $captured_response_filtered = $captured_response; $captured_response_filtered =~ s/\r\n//g; $captured_response_filtered =~ s/\n//g; $captured_response_filtered =~ s/\r//g; $captured_response_filtered =~ s/\\r\\n//g;$captured_response_filtered =~ s/\\n//g; $captured_response_filtered =~ s/\\r//g; $captured_response_filtered =~ s/\\\//\//g; $captured_response_filtered =~ s/\\"/"/g; ##### Extract Content @lines = split(/, $captured_response_filtered); $captured_response_filtered = ""; @groupPostData = (); $groupPostDataCount = 0; local our $postBlockFound; $postBlockFound = 0; local our $topicTitleURL; $topicTitleURL = ""; local our $topicTitle; $topicTitle = ""; local our $postURL; $postURL = ""; local our $postUserName; $postUserName = ""; local our $postTimeStamp; $postTimeStamp = ""; local our $postUserThumbnail; $postUserThumbnail = ""; local our $postUserContent; $postUserContent = ""; foreach $line (@lines) { if ($postBlockFound == 7) { if ($groupPostDataCount < $maxFeedsTotalAccept) { $postBlockFound = 1; $postTimeStamp = $line; $postTimeStamp =~ s/<\/a//g; if ($postTimeStamp =~ / @tmp = split(/\"/, $line); $postTimeStamp = $tmp[3]; } # Gather Output $groupPostData[$groupPostDataCount][0] = $feed_link; $groupPostData[$groupPostDataCount][1] = $feed_title; $groupPostData[$groupPostDataCount][2] = $postURL; $groupPostData[$groupPostDataCount][3] = $postUserName; $groupPostData[$groupPostDataCount][4] = $postTimeStamp; $groupPostData[$groupPostDataCount][5] = $postUserThumbnail; $groupPostData[$groupPostDataCount][6] = $postUserContent; $groupPostDataCount = $groupPostDataCount + 1; } # Clear $postURL = ""; $topicTitle = ""; $topicTitleURL = ""; $postTimeStamp = ""; $postUserName = ""; } if ($postBlockFound == 6) { $postBlockFound = 7; } if ($postBlockFound == 5) { if ($line =~ /class=\"UIIntentionalStory\_Time\"/i) { $postBlockFound = 6; } } if ($postBlockFound == 4) { $line =~ s/\\n\s//g; if ($line =~ /<\/h3/i) { if (length($collector) > 2) { $postBlockFound = 5; $postUserContent = $collector; $collector = ""; if (length($postUserContent) > $rssDescriptionLen) { $postUserContent = substr($postUserContent, 0, $rssDescriptionLen) . "..."; } } } elsif ($line =~ / $postBlockFound = 5; $postUserContent = $collector; $collector = ""; if (length($postUserContent) > $rssDescriptionLen) { $postUserContent = substr($postUserContent, 0, $rssDescriptionLen) . "..."; } } else { $line = $line . ">"; my $tmp_titleFiltered = ""; my $flipTitleIndex = 0; for (my $step = 0; $step < length($line); $step++) { my $tmp_topicChar = substr($line, $step, 1); if ($tmp_topicChar eq "<") { $flipTitleIndex = 1; } if ($flipTitleIndex == 0) { $tmp_titleFiltered = $tmp_titleFiltered . $tmp_topicChar; } if ($tmp_topicChar eq ">") { $flipTitleIndex = 0; } } $collector = $collector . " " . $tmp_titleFiltered; } } if ($postBlockFound == 3) { if ($line =~ /class=\"UIStory\_Message\"/i) { $postBlockFound = 4; } } if ($postBlockFound == 2) { $postBlockFound = 3; @tmp = split(/\"/, $line); $postUserThumbnail = $tmp[3]; } if ($postBlockFound == 1) { if ($line =~ /class=\"UIIntentionalStory\_Pic\"/i) { $postBlockFound = 2; @tmp = split(/\"/, $line); $postURL = $tmp[1]; $postUserName = $tmp[5]; } } if ($line =~ /class=\"minifeedwall\"/i) { $postBlockFound = 1; } } ##### CREATE RSS/XML for ($x = 0; $x < $groupPostDataCount; $x++) { $topicTitleURL = $groupPostData[$x][0]; $topicTitle = $groupPostData[$x][1]; $postURL = $groupPostData[$x][2]; $postUserName = $groupPostData[$x][3]; $postTimeStamp = $groupPostData[$x][4]; $postUserThumbnail = $groupPostData[$x][5]; $postUserContent = $groupPostData[$x][6]; $topicTitleURL =~ s/&/&/g; $postURL =~ s/&/&/g; if (length($postUserContent) == 0) { $postUserContent = $topicTitle; } $postUserContent =~ s/&/&/g; $feedContent = $feedContent . "<item>\r\n"; $feedContent = $feedContent . "<title>" . $topicTitle . "</title>\r\n"; if ($postLink eq "thumbnail") { $feedContent = $feedContent . "<link>" . $postUserThumbnail . "</link>\r\n"; } elsif ($postLink eq "topic") { $feedContent = $feedContent . "<link>" . $topicTitleURL . "</link>\r\n"; } else { $feedContent = $feedContent . "<link>" . $postURL . "</link>\r\n"; } $feedContent = $feedContent . "<description>" . $postUserContent . "</description>\r\n"; $feedContent = $feedContent . "<author>" . $postUserName . "</author>\r\n"; $feedContent = $feedContent . "<date>" . $postTimeStamp . "</date>\r\n"; $feedContent = $feedContent . "</item>\r\n"; } # Save Generated Data $filename = "$ENV{'DOCUMENT_ROOT'}/rssCopy.rss"; if (-e $filename) { chmod 0666, $filename; } open (FILE, ">$filename"); print FILE $feedContent; close (FILE); chmod 0644, $filename; # Save Day Number $filename = "$ENV{'DOCUMENT_ROOT'}/rssDay.txt"; if (-e $filename) { chmod 0666, $filename; } open (FILE, ">$filename"); print FILE (localtime)[7]; close (FILE); chmod 0644, $filename; } # End if $response else { # Error With Contacting Target. Index Local RSS Copy. $filename = "$ENV{'DOCUMENT_ROOT'}/rssCopy.rss"; open (FILE, "$filename"); @chosen = foreach (@chosen) { chop $_; $feedContent = $feedContent . $_ . "\r\n"; } } } # End if $foundDay does not match else { # On current day. Use Local RSS Copy. $filename = "$ENV{'DOCUMENT_ROOT'}/rssCopy.rss"; open (FILE, "$filename"); @chosen = foreach (@chosen) { chop $_; $feedContent = $feedContent . $_ . "\r\n"; } } ##### Generate RSS/XML Output if ($rssIncludeHeader == 1) { print "Content-type: application/rss+xml\n\n"; } else { print "Content-type: text/xml\n\n"; } print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n"; print "<rss version=\"2.0\">\r\n"; print "<channel>\r\n"; if ($rssIncludeHeader == 1) { print "<title>" . $feed_title . "</title>\r\n"; print "<link>" . $feed_link . "</link>\r\n"; print "<description>" . $feed_description . "</description>\r\n"; print "<language>" . $feed_language . "</language>\r\n"; print "<date>" . $feed_pubdate . "</date>\r\n"; print "<copyright>" . $feed_copyright . "</copyright>\r\n"; print "<webmaster>" . $feed_webmaster . "</webmaster>\r\n"; } print $feedContent; print "</channel>\r\n"; print "</rss>\r\n"; exit; |
|
<?php /* Globals */ global $target_domain, $target_full_address, $agent_name; global $rssDescriptionLen, $maxFeedsTotalAccept, $postLink, $rssIncludeHeader; global $feed_title, $feed_link, $feed_description, $feed_language, $feed_pubdate, $feed_copyright, $feed_webmaster; global $captured_response, $feedContent; /* Connection Settings */ $target_domain = "www.facebook.com"; // Site to contact $target_full_address = "/PUBLIC-GROUP"; // Full path to public group wall $agent_name = $_SERVER['HTTP_USER_AGENT']; /* RSS Settings */ $rssDescriptionLen = 45; // Maximum number of characters to allow in the description $maxFeedsTotalAccept = 6; // Maximum number of links to get $postLink = "thumbnail"; // "thumbnail" - URL to thumbnail image, "topic" - URL to topic, "post" - URL to post $rssIncludeHeader = 1; // 1 - include channel information (RSS), 0 - leave channel information out (XML) $feed_title = "FEED TITLE"; $feed_link = "http://www.facebook.com" . $target_full_address; $feed_description = "FEED DESCRIPTION"; $feed_language = "en-us"; $feed_pubdate = "11 Apr 2001 01:01:00 GMT"; $feed_copyright = ""; $feed_webmaster = "WEBMASTER@YOUREMAIL.COM"; $captured_response = ""; $feedContent = ""; /* The Crawler */ function botEngine($tdomain, $taddress) { global $captured_response, $agent_name; $captured_response = ""; $targetURL = "http://" . $tdomain . $taddress; $response = ""; $ch = curl_init(); curl_setopt($ch, CURLOPT_USERAGENT, $agent_name); curl_setopt($ch, CURLOPT_URL, $targetURL); curl_setopt($ch, CURLOPT_FAILONERROR, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_AUTOREFERER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $captured_response = curl_exec($ch); if (!$captured_response) { $response = "Connection Failed"; } else { $response = "Connection Successful"; } return($response); } /* Begin */ $postBlockFound = 0; $topicTitleURL = ""; $topicTitle = ""; $postURL = ""; $postUserName = ""; $postTimeStamp = ""; $postUserThumbnail = ""; $postUserContent = ""; $groupPostData = array(); $groupPostDataCount = 0; $filename = $_SERVER['DOCUMENT_ROOT'] . "/rssDay.txt"; $dayNumber = floor((time()-mktime(null,null,null,1,0,date("Y")))/86400); $foundDay = -1; if (file_exists($filename)) { $foundDay = (int)file_get_contents($filename, true); } if ($dayNumber != $foundDay) { /* Grab Content */ $response = botEngine($target_domain, $target_full_address); $captured_response_filtered = preg_replace("/(\r\n)+|(\n|\r)+/", "", $captured_response); $captured_response_filtered = str_replace("\/", "/", $captured_response_filtered); $captured_response_filtered = str_replace("\\\"", "\"", $captured_response_filtered); $captured_response_filtered = str_replace("\n", "", $captured_response_filtered); $lines = explode(">", $captured_response_filtered); $collector = ""; foreach ($lines as $line) { if ($postBlockFound == 7) { if ($groupPostDataCount < $maxFeedsTotalAccept) { $postBlockFound = 1; $postTimeStamp = $line; $postTimeStamp = str_replace("</a", "", $postTimeStamp); if (preg_match("/class=\"timestamp\"/i", $postTimeStamp)) { $tmp = explode("\"", $postTimeStamp); $postTimeStamp = $tmp[3]; } // Gather Output $groupPostData[$groupPostDataCount][0] = $feed_link; $groupPostData[$groupPostDataCount][1] = $feed_title; $groupPostData[$groupPostDataCount][2] = $postURL; $groupPostData[$groupPostDataCount][3] = $postUserName; $groupPostData[$groupPostDataCount][4] = $postTimeStamp; $groupPostData[$groupPostDataCount][5] = $postUserThumbnail; $groupPostData[$groupPostDataCount][6] = $postUserContent; $groupPostDataCount = $groupPostDataCount + 1; } // Clear $postURL = ""; $postUserName = ""; $postTimeStamp = ""; $postUserThumbnail = ""; $postUserContent = ""; } if ($postBlockFound == 6) { $postBlockFound = 7; } if ($postBlockFound == 5) { if (preg_match("/class=\"UIIntentionalStory_Time\"/i", $line)) { $postBlockFound = 6; } } if ($postBlockFound == 4) { $line = str_replace("\n ", "", $line); if (preg_match("/<\/h3/i", $line)) { if (strlen($collector) > 2) { $postBlockFound = 5; $postUserContent = $collector; $collector = ""; if (strlen($postUserContent) > $rssDescriptionLen) { $postUserContent = substr($postUserContent, 0, $rssDescriptionLen) . "..."; } } } else if (preg_match("/<form /i", $line)) { $postBlockFound = 5; $postUserContent = $collector; $collector = ""; if (strlen($postUserContent) > $rssDescriptionLen) { $postUserContent = substr($postUserContent, 0, $rssDescriptionLen) . "..."; } } else { $line = $line . ">"; $flipTitleIndex = 0; $tmp_titleFiltered = ""; for ($step = 0; $step < strlen($line); $step++) { $tmp_topicChar = substr($line, $step, 1); if ($tmp_topicChar == "<") { $flipTitleIndex = 1; } if ($flipTitleIndex == 0) { $tmp_titleFiltered = $tmp_titleFiltered . $tmp_topicChar; } if ($tmp_topicChar == ">") { $flipTitleIndex = 0; } } $collector = $collector . " " . $tmp_titleFiltered; } } if ($postBlockFound == 3) { if (preg_match("/class=\"UIStory_Message\"/i", $line)) { $postBlockFound = 4; } } if ($postBlockFound == 2) { $postBlockFound = 3; $tmp = explode("\"", $line); $postUserThumbnail = $tmp[3]; } if ($postBlockFound == 1) { if (preg_match("/class=\"UIIntentionalStory_Pic\"/i", $line)) { $postBlockFound = 2; $tmp = explode("\"", $line); $postURL = $tmp[1]; if (count($tmp) > 4) { // UserName in href tag $postUserName = $tmp[5]; } else { // UserName out of sync in a span tag $postUserName = $tmp[3]; } } } if (preg_match("/class=\"minifeedwall\"/i", $line)) { $postBlockFound = 1; } } /* Create RSS/XML */ for ($x = 0; $x < $groupPostDataCount; $x++) { $topicTitleURL = $groupPostData[$x][0]; $topicTitle = $groupPostData[$x][1]; $postURL = $groupPostData[$x][2]; $postUserName = $groupPostData[$x][3]; $postTimeStamp = $groupPostData[$x][4]; $postUserThumbnail = $groupPostData[$x][5]; $postUserContent = $groupPostData[$x][6]; $topicTitleURL = str_replace("&", "&", $topicTitleURL); $postURL = str_replace("&", "&", $postURL); if (strlen($postUserContent) == 0) { $postUserContent = $topicTitle; } $postUserContent = str_replace("&", "&", $postUserContent); $feedContent = $feedContent . "<item>\r\n"; $feedContent = $feedContent . "<title>" . $topicTitle . "</title>\r\n"; if ($postLink == "thumbnail") { $feedContent = $feedContent . "<link>" . $postUserThumbnail . "</link>\r\n"; } else if ($postLink == "topic") { $feedContent = $feedContent . "<link>" . $topicTitleURL . "</link>\r\n"; } else { $feedContent = $feedContent . "<link>" . $postURL . "</link>\r\n"; } $feedContent = $feedContent . "<description>" . $postUserContent . "</description>\r\n"; $feedContent = $feedContent . "<author>" . $postUserName . "</author>\r\n"; $feedContent = $feedContent . "<date>" . $postTimeStamp . "</date>\r\n"; $feedContent = $feedContent . "</item>\r\n"; } /* Save Generated Data */ $filename = $_SERVER['DOCUMENT_ROOT'] . "/rssCopy.txt"; if (file_exists($filename)) { /*File Found*/ } else { /*No File*/ } $filePointer = fopen($filename, "w"); fputs($filePointer, $feedContent); fclose($filePointer); /* Save Day Number */ $filename = $_SERVER['DOCUMENT_ROOT'] . "/rssDay.txt"; if (file_exists($filename)) { /*File Found*/ } else { /*No File*/ } $filePointer = fopen($filename, "w"); fputs($filePointer, $dayNumber); fclose($filePointer); } else { /* On current day. Use Local RSS Copy. */ $filename = $_SERVER['DOCUMENT_ROOT'] . "/rssCopy.txt"; $feedContent = file_get_contents($filename, true); } /* Generate RSS/XML Output */ if ($rssIncludeHeader == 1) { header("Content-type: application/rss+xml"); } else { header("Content-type: text/xml"); } print "<" . "?" . "xml version=\"1.0\" encoding=\"utf-8\"" . "?" . ">\r\n"; print "<rss version=\"2.0\">\r\n"; print "<channel>\r\n"; if ($rssIncludeHeader == 1) { print "<title>" . $feed_title . "</title>\r\n"; print "<link>" . $feed_link . "</link>\r\n"; print "<description>" . $feed_description . "</description>\r\n"; print "<language>" . $feed_language . "</language>\r\n"; print "<date>" . $feed_pubdate . "</date>\r\n"; print "<copyright>" . $feed_copyright . "</copyright>\r\n"; print "<webmaster>" . $feed_webmaster . "</webmaster>\r\n"; } print $feedContent; print "</channel>\r\n"; print "</rss>\r\n"; exit; ?> |