- Filter criteria for posts - Added subreddit to Submission.cs - Increased context window
180 lines
6.7 KiB
C#
180 lines
6.7 KiB
C#
using System;
|
|
using OllamaSharp;
|
|
using System.Linq;
|
|
using Microsoft.Extensions.Configuration;
|
|
using SaltMiner.Data;
|
|
using System.Formats.Tar;
|
|
using System.Text.Json;
|
|
using OllamaSharp.Models;
|
|
using System.Text.Json.Nodes;
|
|
|
|
|
|
namespace SaltMiner
|
|
{
|
|
internal class Program
|
|
{
|
|
private static int maxPromptSize = 32768;
|
|
static void Main(string[] args)
|
|
{
|
|
|
|
var configurationBuilder = new ConfigurationBuilder();
|
|
|
|
configurationBuilder.SetBasePath(System.IO.Directory.GetCurrentDirectory());
|
|
configurationBuilder.AddJsonFile(path: "appSettings.json", optional: false, reloadOnChange: true);
|
|
|
|
var config = configurationBuilder.Build();
|
|
|
|
//String linkID = args[0];
|
|
|
|
DateTime startDate = new DateTime(2016, 01,10);
|
|
DateTime endDate = new DateTime(2016, 01, 20);
|
|
|
|
using(var ctx = new RedditContext())
|
|
{
|
|
String[] whiteListedSubreddits =
|
|
["sanfrancisco", "bayarea", "4chan", "greentext", "California", "Chicago", "Detroit"];
|
|
var subreddits = ctx.Subreddits.Where(s => whiteListedSubreddits.Contains(s.Name));
|
|
foreach (var link in ctx.Links.Where(l => l.Created >= startDate && l.Created <= endDate && l.Score > 100 && l.Author != "[deleted]" && subreddits.Any(s => s.SubredditId == l.SubredditId)).Skip(1).Take(100).ToList())
|
|
{
|
|
//var link = ctx.Links.Where(l => l.LinkId == linkID).First();
|
|
var comments = ctx.Comments.Where(c => c.LinkId == link.LinkId).ToList();
|
|
Submission sub = new Submission();
|
|
sub.Title = link.Title;
|
|
sub.Subreddit = subreddits.First((s => s.SubredditId == link.SubredditId)).Name;
|
|
sub.Author = link.Author;
|
|
sub.Link = link.Url;
|
|
sub.Body = link.SelfText;
|
|
foreach (var comment in comments.Where(c => c.ParentId == null))
|
|
{
|
|
//top level
|
|
sub.Replies.Add(PopulateComments(sub, comments, comment));
|
|
}
|
|
|
|
RequestThreadSentiment(sub);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
static Comments PopulateComments(Submission sub, IEnumerable<Data.Comment> comments, Data.Comment targetComment)
|
|
{
|
|
Comments comment = new Comments();
|
|
comment.Author = targetComment.Author;
|
|
comment.Body = targetComment.Body;
|
|
foreach(var reply in comments.Where(c => c.ParentId == targetComment.CommentId))
|
|
{
|
|
comment.Replies.Add(PopulateComments(sub, comments, reply));
|
|
}
|
|
return comment;
|
|
}
|
|
|
|
static void RequestThreadSentiment(Submission sub)
|
|
{
|
|
var ollama = new OllamaApiClient(new Uri("http://localhost:11434"));
|
|
string modelName = "gemma3:12b-it-qat";
|
|
|
|
String requestJson = PruneThreadToSize(sub, maxPromptSize - 100) ?? String.Empty;
|
|
|
|
if(String.IsNullOrEmpty(requestJson))
|
|
{
|
|
Console.WriteLine("Error pruning thread down to size");
|
|
return;
|
|
}
|
|
|
|
string prompt = $@"
|
|
Given the following internet forum thread, represented in JSON, identify the part of the thread that is composed mostly of hateful, mean-spirited insults, if any. Simple disagreement or debate should not count:
|
|
|
|
'{requestJson}'
|
|
";
|
|
|
|
if(prompt.Length > maxPromptSize)
|
|
{
|
|
Console.WriteLine("Converted prompt is too long, try another thread");
|
|
return;
|
|
}
|
|
try
|
|
{
|
|
var request = new GenerateRequest
|
|
{
|
|
Model = modelName,
|
|
Prompt = prompt,
|
|
Format = "json",
|
|
Stream = false
|
|
};
|
|
|
|
var response = ollama.GenerateAsync(request).StreamToEndAsync().Result;
|
|
|
|
if (!string.IsNullOrWhiteSpace(response.Response))
|
|
{
|
|
//Logger.Debug("\nRaw JSON Response from Ollama:");
|
|
//Logger.Debug(response.Response);
|
|
Console.WriteLine("Response:" + response.Response);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Console.WriteLine(ex);
|
|
}
|
|
}
|
|
|
|
static String? PruneThreadToSize(Submission sub, Int32 maxThreadLength)
|
|
{
|
|
//Try the full thread first:
|
|
String threadJson = JsonSerializer.Serialize(sub);
|
|
|
|
if(threadJson.Length <= maxThreadLength)
|
|
return threadJson;
|
|
|
|
//Cut thread to only 2-layers deep:
|
|
foreach(Comments comment in sub.Replies)
|
|
{
|
|
foreach(Comments subComment in comment.Replies)
|
|
{
|
|
subComment.Replies.Clear();
|
|
}
|
|
}
|
|
|
|
threadJson = JsonSerializer.Serialize(sub);
|
|
|
|
if(threadJson.Length <= maxThreadLength)
|
|
return threadJson;
|
|
|
|
//keep only top level
|
|
foreach(Comments subComment in sub.Replies)
|
|
{
|
|
subComment.Replies.Clear();
|
|
}
|
|
|
|
threadJson = JsonSerializer.Serialize(sub);
|
|
|
|
if(threadJson.Length <= maxThreadLength)
|
|
return threadJson;
|
|
|
|
//prune to top 10 comments by upvotes
|
|
sub.Replies = sub.Replies.Take(10).ToList();
|
|
|
|
threadJson = JsonSerializer.Serialize(sub);
|
|
|
|
if(threadJson.Length <= maxThreadLength)
|
|
return threadJson;
|
|
else return null;
|
|
|
|
}
|
|
|
|
static void GetProtests()
|
|
{
|
|
HttpClient client = new HttpClient();
|
|
var httpResponse = client.GetAsync("https://www.mobilize.us/indivisible/?is_virtual=false").Result;
|
|
String response = httpResponse.Content.ReadAsStringAsync().Result;
|
|
int startIdx = response.IndexOf("window.__MLZ_EMBEDDED_DATA__ = ") + 31;
|
|
int endIdx = response.IndexOf(Environment.NewLine, startIdx);
|
|
String dataSubstring = response.Substring(startIdx, endIdx - startIdx - 1); //-1 to remove JS ;
|
|
JsonNode jsonNode = JsonNode.Parse(dataSubstring);
|
|
foreach(var protestEvent in jsonNode["data"]["events"].AsArray())
|
|
{
|
|
Console.WriteLine(protestEvent["name"]);
|
|
}
|
|
//Console.WriteLine(dataSubstring);
|
|
}
|
|
}
|
|
} |