- Whitelisted subreddits only

- Filter criteria for posts
- Added subreddit to Submission.cs
- Increased context window
This commit is contained in:
2026-02-28 17:54:03 -05:00
parent a574922178
commit 3d81ef5174
2 changed files with 138 additions and 12 deletions

View File

@@ -5,6 +5,7 @@ public class Submission
Replies = new List<Comments>();
}
public String Author { get; set; }
public String Subreddit { get; set; }
public String Title { get; set; }
public String Body { get; set; }
public String Link { get; set; }

View File

@@ -4,12 +4,16 @@ using System.Linq;
using Microsoft.Extensions.Configuration;
using SaltMiner.Data;
using System.Formats.Tar;
using System.Text.Json;
using OllamaSharp.Models;
using System.Text.Json.Nodes;
namespace SaltMiner
{
internal class Program
{
private static int maxPromptSize = 32768;
static void Main(string[] args)
{
@@ -20,22 +24,34 @@ namespace SaltMiner
var config = configurationBuilder.Build();
String linkID = "3yyupi";
//String linkID = args[0];
DateTime startDate = new DateTime(2016, 01,10);
DateTime endDate = new DateTime(2016, 01, 20);
using(var ctx = new RedditContext())
{
var link = ctx.Links.Where(l => l.LinkId == linkID).First();
var comments = ctx.Comments.Where(c => c.LinkId == linkID).ToList();
Submission sub = new Submission();
sub.Title = link.Title;
sub.Author = link.Author;
sub.Link = link.Url;
sub.Body = link.SelfText;
foreach(var comment in comments.Where(c => c.ParentId == null))
{ //top level
sub.Replies.Add(PopulateComments(sub, comments, comment));
}
String[] whiteListedSubreddits =
["sanfrancisco", "bayarea", "4chan", "greentext", "California", "Chicago", "Detroit"];
var subreddits = ctx.Subreddits.Where(s => whiteListedSubreddits.Contains(s.Name));
foreach (var link in ctx.Links.Where(l => l.Created >= startDate && l.Created <= endDate && l.Score > 100 && l.Author != "[deleted]" && subreddits.Any(s => s.SubredditId == l.SubredditId)).Skip(1).Take(100).ToList())
{
//var link = ctx.Links.Where(l => l.LinkId == linkID).First();
var comments = ctx.Comments.Where(c => c.LinkId == link.LinkId).ToList();
Submission sub = new Submission();
sub.Title = link.Title;
sub.Subreddit = subreddits.First((s => s.SubredditId == link.SubredditId)).Name;
sub.Author = link.Author;
sub.Link = link.Url;
sub.Body = link.SelfText;
foreach (var comment in comments.Where(c => c.ParentId == null))
{
//top level
sub.Replies.Add(PopulateComments(sub, comments, comment));
}
RequestThreadSentiment(sub);
}
}
}
@@ -51,5 +67,114 @@ namespace SaltMiner
}
return comment;
}
static void RequestThreadSentiment(Submission sub)
{
var ollama = new OllamaApiClient(new Uri("http://localhost:11434"));
string modelName = "gemma3:12b-it-qat";
String requestJson = PruneThreadToSize(sub, maxPromptSize - 100) ?? String.Empty;
if(String.IsNullOrEmpty(requestJson))
{
Console.WriteLine("Error pruning thread down to size");
return;
}
string prompt = $@"
Given the following internet forum thread, represented in JSON, identify the part of the thread that is composed mostly of hateful, mean-spirited insults, if any. Simple disagreement or debate should not count:
'{requestJson}'
";
if(prompt.Length > maxPromptSize)
{
Console.WriteLine("Converted prompt is too long, try another thread");
return;
}
try
{
var request = new GenerateRequest
{
Model = modelName,
Prompt = prompt,
Format = "json",
Stream = false
};
var response = ollama.GenerateAsync(request).StreamToEndAsync().Result;
if (!string.IsNullOrWhiteSpace(response.Response))
{
//Logger.Debug("\nRaw JSON Response from Ollama:");
//Logger.Debug(response.Response);
Console.WriteLine("Response:" + response.Response);
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}
static String? PruneThreadToSize(Submission sub, Int32 maxThreadLength)
{
//Try the full thread first:
String threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//Cut thread to only 2-layers deep:
foreach(Comments comment in sub.Replies)
{
foreach(Comments subComment in comment.Replies)
{
subComment.Replies.Clear();
}
}
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//keep only top level
foreach(Comments subComment in sub.Replies)
{
subComment.Replies.Clear();
}
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//prune to top 10 comments by upvotes
sub.Replies = sub.Replies.Take(10).ToList();
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
else return null;
}
static void GetProtests()
{
HttpClient client = new HttpClient();
var httpResponse = client.GetAsync("https://www.mobilize.us/indivisible/?is_virtual=false").Result;
String response = httpResponse.Content.ReadAsStringAsync().Result;
int startIdx = response.IndexOf("window.__MLZ_EMBEDDED_DATA__ = ") + 31;
int endIdx = response.IndexOf(Environment.NewLine, startIdx);
String dataSubstring = response.Substring(startIdx, endIdx - startIdx - 1); //-1 to remove JS ;
JsonNode jsonNode = JsonNode.Parse(dataSubstring);
foreach(var protestEvent in jsonNode["data"]["events"].AsArray())
{
Console.WriteLine(protestEvent["name"]);
}
//Console.WriteLine(dataSubstring);
}
}
}