Files
SaltMiner/Program.cs
chrispr 3d81ef5174 - Whitelisted subreddits only
- Filter criteria for posts
- Added subreddit to Submission.cs
- Increased context window
2026-02-28 17:54:03 -05:00

180 lines
6.7 KiB
C#

using System;
using OllamaSharp;
using System.Linq;
using Microsoft.Extensions.Configuration;
using SaltMiner.Data;
using System.Formats.Tar;
using System.Text.Json;
using OllamaSharp.Models;
using System.Text.Json.Nodes;
namespace SaltMiner
{
internal class Program
{
private static int maxPromptSize = 32768;
static void Main(string[] args)
{
var configurationBuilder = new ConfigurationBuilder();
configurationBuilder.SetBasePath(System.IO.Directory.GetCurrentDirectory());
configurationBuilder.AddJsonFile(path: "appSettings.json", optional: false, reloadOnChange: true);
var config = configurationBuilder.Build();
//String linkID = args[0];
DateTime startDate = new DateTime(2016, 01,10);
DateTime endDate = new DateTime(2016, 01, 20);
using(var ctx = new RedditContext())
{
String[] whiteListedSubreddits =
["sanfrancisco", "bayarea", "4chan", "greentext", "California", "Chicago", "Detroit"];
var subreddits = ctx.Subreddits.Where(s => whiteListedSubreddits.Contains(s.Name));
foreach (var link in ctx.Links.Where(l => l.Created >= startDate && l.Created <= endDate && l.Score > 100 && l.Author != "[deleted]" && subreddits.Any(s => s.SubredditId == l.SubredditId)).Skip(1).Take(100).ToList())
{
//var link = ctx.Links.Where(l => l.LinkId == linkID).First();
var comments = ctx.Comments.Where(c => c.LinkId == link.LinkId).ToList();
Submission sub = new Submission();
sub.Title = link.Title;
sub.Subreddit = subreddits.First((s => s.SubredditId == link.SubredditId)).Name;
sub.Author = link.Author;
sub.Link = link.Url;
sub.Body = link.SelfText;
foreach (var comment in comments.Where(c => c.ParentId == null))
{
//top level
sub.Replies.Add(PopulateComments(sub, comments, comment));
}
RequestThreadSentiment(sub);
}
}
}
static Comments PopulateComments(Submission sub, IEnumerable<Data.Comment> comments, Data.Comment targetComment)
{
Comments comment = new Comments();
comment.Author = targetComment.Author;
comment.Body = targetComment.Body;
foreach(var reply in comments.Where(c => c.ParentId == targetComment.CommentId))
{
comment.Replies.Add(PopulateComments(sub, comments, reply));
}
return comment;
}
static void RequestThreadSentiment(Submission sub)
{
var ollama = new OllamaApiClient(new Uri("http://localhost:11434"));
string modelName = "gemma3:12b-it-qat";
String requestJson = PruneThreadToSize(sub, maxPromptSize - 100) ?? String.Empty;
if(String.IsNullOrEmpty(requestJson))
{
Console.WriteLine("Error pruning thread down to size");
return;
}
string prompt = $@"
Given the following internet forum thread, represented in JSON, identify the part of the thread that is composed mostly of hateful, mean-spirited insults, if any. Simple disagreement or debate should not count:
'{requestJson}'
";
if(prompt.Length > maxPromptSize)
{
Console.WriteLine("Converted prompt is too long, try another thread");
return;
}
try
{
var request = new GenerateRequest
{
Model = modelName,
Prompt = prompt,
Format = "json",
Stream = false
};
var response = ollama.GenerateAsync(request).StreamToEndAsync().Result;
if (!string.IsNullOrWhiteSpace(response.Response))
{
//Logger.Debug("\nRaw JSON Response from Ollama:");
//Logger.Debug(response.Response);
Console.WriteLine("Response:" + response.Response);
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}
static String? PruneThreadToSize(Submission sub, Int32 maxThreadLength)
{
//Try the full thread first:
String threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//Cut thread to only 2-layers deep:
foreach(Comments comment in sub.Replies)
{
foreach(Comments subComment in comment.Replies)
{
subComment.Replies.Clear();
}
}
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//keep only top level
foreach(Comments subComment in sub.Replies)
{
subComment.Replies.Clear();
}
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
//prune to top 10 comments by upvotes
sub.Replies = sub.Replies.Take(10).ToList();
threadJson = JsonSerializer.Serialize(sub);
if(threadJson.Length <= maxThreadLength)
return threadJson;
else return null;
}
static void GetProtests()
{
HttpClient client = new HttpClient();
var httpResponse = client.GetAsync("https://www.mobilize.us/indivisible/?is_virtual=false").Result;
String response = httpResponse.Content.ReadAsStringAsync().Result;
int startIdx = response.IndexOf("window.__MLZ_EMBEDDED_DATA__ = ") + 31;
int endIdx = response.IndexOf(Environment.NewLine, startIdx);
String dataSubstring = response.Substring(startIdx, endIdx - startIdx - 1); //-1 to remove JS ;
JsonNode jsonNode = JsonNode.Parse(dataSubstring);
foreach(var protestEvent in jsonNode["data"]["events"].AsArray())
{
Console.WriteLine(protestEvent["name"]);
}
//Console.WriteLine(dataSubstring);
}
}
}