From 3d81ef5174be97aa56f1a94f37bc0e0be20e56c1 Mon Sep 17 00:00:00 2001 From: chrispr Date: Sat, 28 Feb 2026 17:54:03 -0500 Subject: [PATCH] - Whitelisted subreddits only - Filter criteria for posts - Added subreddit to Submission.cs - Increased context window --- Model/Submission.cs | 1 + Program.cs | 149 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 138 insertions(+), 12 deletions(-) diff --git a/Model/Submission.cs b/Model/Submission.cs index 7f48387..de5576d 100644 --- a/Model/Submission.cs +++ b/Model/Submission.cs @@ -5,6 +5,7 @@ public class Submission Replies = new List(); } public String Author { get; set; } + public String Subreddit { get; set; } public String Title { get; set; } public String Body { get; set; } public String Link { get; set; } diff --git a/Program.cs b/Program.cs index 88680a6..c16fba8 100644 --- a/Program.cs +++ b/Program.cs @@ -4,12 +4,16 @@ using System.Linq; using Microsoft.Extensions.Configuration; using SaltMiner.Data; using System.Formats.Tar; +using System.Text.Json; +using OllamaSharp.Models; +using System.Text.Json.Nodes; namespace SaltMiner { internal class Program { + private static int maxPromptSize = 32768; static void Main(string[] args) { @@ -20,22 +24,34 @@ namespace SaltMiner var config = configurationBuilder.Build(); - String linkID = "3yyupi"; + //String linkID = args[0]; + + DateTime startDate = new DateTime(2016, 01,10); + DateTime endDate = new DateTime(2016, 01, 20); using(var ctx = new RedditContext()) { - var link = ctx.Links.Where(l => l.LinkId == linkID).First(); - var comments = ctx.Comments.Where(c => c.LinkId == linkID).ToList(); - Submission sub = new Submission(); - sub.Title = link.Title; - sub.Author = link.Author; - sub.Link = link.Url; - sub.Body = link.SelfText; - foreach(var comment in comments.Where(c => c.ParentId == null)) - { //top level - sub.Replies.Add(PopulateComments(sub, comments, comment)); - } + String[] whiteListedSubreddits = + ["sanfrancisco", "bayarea", "4chan", "greentext", "California", "Chicago", "Detroit"]; + var subreddits = ctx.Subreddits.Where(s => whiteListedSubreddits.Contains(s.Name)); + foreach (var link in ctx.Links.Where(l => l.Created >= startDate && l.Created <= endDate && l.Score > 100 && l.Author != "[deleted]" && subreddits.Any(s => s.SubredditId == l.SubredditId)).Skip(1).Take(100).ToList()) + { + //var link = ctx.Links.Where(l => l.LinkId == linkID).First(); + var comments = ctx.Comments.Where(c => c.LinkId == link.LinkId).ToList(); + Submission sub = new Submission(); + sub.Title = link.Title; + sub.Subreddit = subreddits.First((s => s.SubredditId == link.SubredditId)).Name; + sub.Author = link.Author; + sub.Link = link.Url; + sub.Body = link.SelfText; + foreach (var comment in comments.Where(c => c.ParentId == null)) + { + //top level + sub.Replies.Add(PopulateComments(sub, comments, comment)); + } + RequestThreadSentiment(sub); + } } } @@ -51,5 +67,114 @@ namespace SaltMiner } return comment; } + + static void RequestThreadSentiment(Submission sub) + { + var ollama = new OllamaApiClient(new Uri("http://localhost:11434")); + string modelName = "gemma3:12b-it-qat"; + + String requestJson = PruneThreadToSize(sub, maxPromptSize - 100) ?? String.Empty; + + if(String.IsNullOrEmpty(requestJson)) + { + Console.WriteLine("Error pruning thread down to size"); + return; + } + + string prompt = $@" + Given the following internet forum thread, represented in JSON, identify the part of the thread that is composed mostly of hateful, mean-spirited insults, if any. Simple disagreement or debate should not count: + + '{requestJson}' + "; + + if(prompt.Length > maxPromptSize) + { + Console.WriteLine("Converted prompt is too long, try another thread"); + return; + } + try + { + var request = new GenerateRequest + { + Model = modelName, + Prompt = prompt, + Format = "json", + Stream = false + }; + + var response = ollama.GenerateAsync(request).StreamToEndAsync().Result; + + if (!string.IsNullOrWhiteSpace(response.Response)) + { + //Logger.Debug("\nRaw JSON Response from Ollama:"); + //Logger.Debug(response.Response); + Console.WriteLine("Response:" + response.Response); + } + } + catch (Exception ex) + { + Console.WriteLine(ex); + } + } + + static String? PruneThreadToSize(Submission sub, Int32 maxThreadLength) + { + //Try the full thread first: + String threadJson = JsonSerializer.Serialize(sub); + + if(threadJson.Length <= maxThreadLength) + return threadJson; + + //Cut thread to only 2-layers deep: + foreach(Comments comment in sub.Replies) + { + foreach(Comments subComment in comment.Replies) + { + subComment.Replies.Clear(); + } + } + + threadJson = JsonSerializer.Serialize(sub); + + if(threadJson.Length <= maxThreadLength) + return threadJson; + + //keep only top level + foreach(Comments subComment in sub.Replies) + { + subComment.Replies.Clear(); + } + + threadJson = JsonSerializer.Serialize(sub); + + if(threadJson.Length <= maxThreadLength) + return threadJson; + + //prune to top 10 comments by upvotes + sub.Replies = sub.Replies.Take(10).ToList(); + + threadJson = JsonSerializer.Serialize(sub); + + if(threadJson.Length <= maxThreadLength) + return threadJson; + else return null; + + } + + static void GetProtests() + { + HttpClient client = new HttpClient(); + var httpResponse = client.GetAsync("https://www.mobilize.us/indivisible/?is_virtual=false").Result; + String response = httpResponse.Content.ReadAsStringAsync().Result; + int startIdx = response.IndexOf("window.__MLZ_EMBEDDED_DATA__ = ") + 31; + int endIdx = response.IndexOf(Environment.NewLine, startIdx); + String dataSubstring = response.Substring(startIdx, endIdx - startIdx - 1); //-1 to remove JS ; + JsonNode jsonNode = JsonNode.Parse(dataSubstring); + foreach(var protestEvent in jsonNode["data"]["events"].AsArray()) + { + Console.WriteLine(protestEvent["name"]); + } + //Console.WriteLine(dataSubstring); + } } } \ No newline at end of file