Sparse skills

Python

1a

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd

# Task 1a: Data Loading and Preprocessing

# 1. Load the Data: Load the CSV file into a Pandas DataFrame
file_path = "data.csv" # Replace with your file path
df = pd.read_csv(file_path)
print("Data loaded successfully.")

# 2. Data Cleaning: Check for missing values and fill them appropriately
if df.isnull().sum().sum() > 0:
print("Missing values found. Filling with appropriate values...")
# Assuming numeric columns are filled with 0 and categorical with 'Unknown'
for column in df.columns:
if df[column].dtype == 'object':
df[column].fillna('Unknown', inplace=True)
else:
df[column].fillna(0, inplace=True)
else:
print("No missing values found.")

# 3. Convert the Date column to a datetime object
if 'Date' in df.columns:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
if df['Date'].isnull().sum() > 0:
print("Some dates were invalid. Converting invalid dates to a default value (e.g., today's date).")
df['Date'].fillna(pd.Timestamp.today(), inplace=True)
print("Date column converted to datetime successfully.")

# 4. Ensure the Total column correctly represents the product of Quantity and Price
if 'Total' in df.columns and 'Quantity' in df.columns and 'Price' in df.columns:
df['Calculated_Total'] = df['Quantity'] * df['Price']
discrepancy = df[df['Total'] != df['Calculated_Total']]
if not discrepancy.empty:
print("Discrepancies found in the Total column. Correcting them...")
df['Total'] = df['Calculated_Total']
print("Total column verified and corrected if necessary.")
else:
print("Required columns (Total, Quantity, or Price) not found.")

# Display the cleaned DataFrame (Optional)
print("Cleaned DataFrame:")
print(df.head())

# Save the cleaned data (Optional)
df.to_csv("cleaned_data.csv", index=False)
print("Cleaned data saved to 'cleaned_data.csv'.")

1b

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import matplotlib.pyplot as plt
data = {
'Product': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B', 'A', 'C'],
'Quantity': [10, 15, 12, 20, 25, 10, 30, 15, 10, 5],
'Date': [
'2023-01-15', '2023-01-20', '2023-02-10', '2023-02-18',
'2023-03-05', '2023-03-12', '2023-04-07', '2023-04-15',
'2023-05-01', '2023-05-10'
]
}

df = pd.DataFrame(data)

# Ensure 'Date' is a datetime type
df['Date'] = pd.to_datetime(df['Date'])

# 1. Product Sales Distribution (Bar Chart)
product_sales = df.groupby('Product')['Quantity'].sum()

plt.figure(figsize=(10, 6))
product_sales.plot(kind='bar')
plt.title('Total Quantity Sold for Each Product')
plt.xlabel('Product')
plt.ylabel('Total Quantity Sold')
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# 2. Sales Over Time (Line Plot)
df['Month'] = df['Date'].dt.to_period('M') # Extract month and year
monthly_sales = df.groupby('Month')['Quantity'].sum()

plt.figure(figsize=(10, 6))
monthly_sales.plot(kind='line', marker='o')
plt.title('Total Sales Over the Months of 2023')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.grid(axis='both', linestyle='--', alpha=0.7)
plt.show()

2a

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# 2a: Database Creation, Table Creation, and Data Insertion

import sqlite3

# Connect to (or create) the SQLite database named SalesDB
conn = sqlite3.connect('SalesDB.db')
cursor = conn.cursor()

# Create the Sales table with appropriate columns
# Ensuring no duplicate entries can be done by setting a UNIQUE constraint on (Date, Product)
cursor.execute('''
CREATE TABLE IF NOT EXISTS Sales (
Date TEXT,
Product TEXT,
Quantity INTEGER,
Price REAL,
Total REAL,
UNIQUE(Date, Product)
)
''')

# Insert cleaned and processed data from Part 1
# Assuming we have a list of tuples as our cleaned data, for example:
# cleaned_data = [
# ("2023-01-01", "Widget A", 10, 9.99, 99.90),
# ("2023-01-01", "Widget B", 5, 19.99, 99.95),
# ("2023-01-02", "Widget A", 8, 9.99, 79.92),
# ...
# ]
# Replace the above with your actual cleaned data list

cleaned_data = [
("2023-01-01", "Widget A", 10, 9.99, 99.90),
("2023-01-01", "Widget B", 5, 19.99, 99.95),
("2023-02-10", "Widget A", 20, 9.99, 199.80),
("2023-03-15", "Widget C", 3, 29.99, 89.97),
("2023-03-15", "Widget B", 2, 19.99, 39.98),
]

# Insert data using "INSERT OR IGNORE" to avoid duplicates
for record in cleaned_data:
cursor.execute('''
INSERT OR IGNORE INTO Sales (Date, Product, Quantity, Price, Total)
VALUES (?, ?, ?, ?, ?)
''', record)

conn.commit()
conn.close()

2b

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 2b: Querying the Database

import sqlite3

conn = sqlite3.connect('SalesDB.db')
cursor = conn.cursor()

# Query 1: Total Sales for the year 2023
cursor.execute('''
SELECT SUM(Total) AS total_sales_2023
FROM Sales
WHERE Date LIKE '2023-%';
''')
total_sales_2023 = cursor.fetchone()[0]
print("Total Sales in 2023:", total_sales_2023)

# Query 2: Product Sales Summary for 2023 (total quantity sold per product, descending order)
cursor.execute('''
SELECT Product, SUM(Quantity) as total_quantity
FROM Sales
WHERE Date LIKE '2023-%'
GROUP BY Product
ORDER BY total_quantity DESC;
''')
product_sales_summary = cursor.fetchall()
print("Product Sales Summary for 2023:")
for row in product_sales_summary:
print(row) # (Product, total_quantity)

conn.close()

3a:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

# Task 3a: Model Initialization
class SimpleNN(nn.Module):
def __init__(self, input_size):
super(SimpleNN, self).__init__()
self.linear = nn.Linear(input_size, 1)

def forward(self, x):
return self.linear(x)

# Instantiate the m
input_size = 10
model = SimpleNN(input_size)

# Define loss function and optimizer
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

3b

1
2
3
4
5
6
# Generate synthetic data
num_samples = 100
num_features = 10

data = torch.randn(num_samples, num_features)
target = torch.randn(num_samples, 1) # 100 target values reshaped for MSE

3c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
num_epochs = 20
losses = []

for epoch in range(num_epochs):
# Forward pass
predictions = model(data)
loss = loss_function(predictions, target)

# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()

# Record loss
losses.append(loss.item())
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

3d

1
2
3
4
5
6
plt.plot(range(1, num_epochs + 1), losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Over Epochs')
plt.grid(True)
plt.show()

Java

1a

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
package petcare;

public class Animal {
private String name;
private AnimalSize size;
private int comfortableTemperatureLower;
private int comfortableTemperatureUpper;

public Animal(String name, AnimalSize size, int comfortableTemperatureLower, int comfortableTemperatureUpper) {
if (name == null || name.length() < 3) {
throw new IllegalArgumentException("Name must be at least 3 characters long.");
}
if (comfortableTemperatureLower < 0 || comfortableTemperatureUpper > 50 || comfortableTemperatureLower > comfortableTemperatureUpper) {
throw new IllegalArgumentException("Temperature range must be between 0 and 50 and valid.");
}
this.name = name;
this.size = size;
this.comfortableTemperatureLower = comfortableTemperatureLower;
this.comfortableTemperatureUpper = comfortableTemperatureUpper;
}
}

1b

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@Override
public String toString() {
return "Animal{" +
"name='" + name + '\'' +
", size=" + size +
", comfortableTemperatureLower=" + comfortableTemperatureLower +
", comfortableTemperatureUpper=" + comfortableTemperatureUpper +
'}';
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Animal animal = (Animal) o;
return size == animal.size && name.equals(animal.name);
}

@Override
public int hashCode() {
return Objects.hash(name, size);
}

// Getters and Setters
public String getName() {
return name;
}

public void setName(String name) {
if (name == null || name.length() < 3) {
throw new IllegalArgumentException("Name must be at least 3 characters long.");
}
this.name = name;
}

public AnimalSize getSize() {
return size;
}

public void setSize(AnimalSize size) {
this.size = size;
}

public int getComfortableTemperatureLower() {
return comfortableTemperatureLower;
}

public void setComfortableTemperatureLower(int comfortableTemperatureLower) {
if (comfortableTemperatureLower < 0 || comfortableTemperatureLower > comfortableTemperatureUpper) {
throw new IllegalArgumentException("Invalid lower temperature.");
}
this.comfortableTemperatureLower = comfortableTemperatureLower;
}

public int getComfortableTemperatureUpper() {
return comfortableTemperatureUpper;
}

public void setComfortableTemperatureUpper(int comfortableTemperatureUpper) {
if (comfortableTemperatureUpper > 50 || comfortableTemperatureUpper < comfortableTemperatureLower) {
throw new IllegalArgumentException("Invalid upper temperature.");
}
this.comfortableTemperatureUpper = comfortableTemperatureUpper;
}

2a

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
package petcare;

public class Enclosure {
private AnimalSize size;
private int temperature;
private int runningCosts;
private Animal occupant;

public Enclosure(AnimalSize size, int temperature, int runningCosts) {
this.size = size;
this.temperature = temperature;
this.runningCosts = runningCosts;
this.occupant = null;
}

public AnimalSize getSize() {
return size;
}

public int getTemperature() {
return temperature;
}

public int getRunningCosts() {
return runningCosts;
}

public Animal getOccupant() {
return occupant;
}
}

2b

1
2
3
4
5
6
7
8
9
public boolean checkCompatibility(Animal animal) {
if (animal == null) {
throw new IllegalArgumentException("Animal cannot be null.");
}
return animal.getSize().ordinal() <= size.ordinal() &&
temperature >= animal.getComfortableTemperatureLower() &&
temperature <= animal.getComfortableTemperatureUpper();
}

2c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
public void addAnimal(Animal animal) {
if (occupant != null) {
throw new IllegalArgumentException("Enclosure already has an occupant.");
}
if (!checkCompatibility(animal)) {
throw new IllegalArgumentException("Animal is not compatible with this enclosure.");
}
this.occupant = animal;
}

public void removeAnimal() {
this.occupant = null;
}

3a

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
package petcare;

import java.util.ArrayList;
import java.util.List;

public class PetService {
private List<Enclosure> enclosures;

public PetService() {
this.enclosures = new ArrayList<>();
}

public List<Enclosure> getEnclosures() {
return enclosures;
}
}

3b

1
2
3
4
5
6
7
8
9
10
11
12
13
public void addEnclosure(Enclosure enclosure) {
if (enclosure == null) {
throw new IllegalArgumentException("Enclosure cannot be null.");
}
enclosures.add(enclosure);
}

public void printAllEnclosures() {
for (Enclosure enclosure : enclosures) {
System.out.println(enclosure);
}
}

3c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
public boolean allocateAnimal(Animal animal) {
Enclosure bestEnclosure = null;
for (Enclosure enclosure : enclosures) {
if (enclosure.getOccupant() == null && enclosure.checkCompatibility(animal)) {
if (bestEnclosure == null || enclosure.getRunningCosts() < bestEnclosure.getRunningCosts()) {
bestEnclosure = enclosure;
}
}
}
if (bestEnclosure != null) {
bestEnclosure.addAnimal(animal);
return true;
}
return false;
}

3d

1
2
3
4
5
6
7
8
9
public void removeAnimal(Animal animal) {
for (Enclosure enclosure : enclosures) {
if (animal.equals(enclosure.getOccupant())) {
enclosure.removeAnimal();
return;
}
}
}

CS skills

Git study

Knapsack Problem

你有一个容量为 W=10 的背包,以及以下物品:

物品 重量(kg) 价值(元)
物品 1 2 6
物品 2 3 10
物品 3 4 12

目标是选择物品装入背包,使得总价值最大,同时不超过背包容量。

动态规划:

步骤:

  1. 定义状态

用 dp[i][w]表示在前 i 个物品中,容量不超过 w 时的最大价值。

  1. 状态转移方程

如果不选择第i个物品: dp[i][w] = dp[i-1][w]

如果选择第i个物品: dp[i][w] = dp[i-1][w-weight[i]]+value[i]

  1. 初始化

当i = 0 或 w = 0, 即没有物品或背包容量为 0 时: dp[i][w]=0

  1. 最终结果

dp[n][W] 表示在前 n 个物品中,容量不超过 W 时的最大价值。

![image-20241202125650466](../images/:Users:joshua:Library:Application Support:typora-user-images:image-20241202125650466.png)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
def knapsack(weights, values, W):
n = len(weights)
dp = [[0] * (W + 1) for _ in range(n + 1)]

for i in range(1, n + 1):
for w in range(W + 1):
if weights[i-1] <= w:
dp[i][w] = max(dp[i-1][w], dp[i-1][w-weights[i-1]] + values[i-1])
else:
dp[i][w] = dp[i-1][w]

return dp[n][W]

# 示例数据
weights = [2, 3, 4]
values = [6, 10, 12]
W = 10

print("最大价值为:", knapsack(weights, values, W))

如果背包问题变成连续空间呢?

YARN

Yet Another Resource Negotiator

YARN 的核心组件

  1. ResourceManager (RM)
    • 集群范围的资源管理器,负责协调整个集群的资源分配。
    • 包含两个主要模块:
      • Scheduler:根据调度策略为应用程序分配资源。
      • ApplicationsManager:负责应用程序的生命周期管理,比如启动 ApplicationMaster。
  2. NodeManager (NM)
    • 每个节点上的代理,负责单节点资源的管理和任务的执行。
    • 监控容器(Container)的资源使用(CPU、内存等)并报告给 ResourceManager。
  3. ApplicationMaster (AM)
    • 每个应用程序的具体作业管理器,负责作业的调度和任务执行的管理。
    • 通过与 ResourceManager 和 NodeManager 通信来请求资源和启动任务。
  4. Container
    • YARN 中的最小资源分配单元,包含一定数量的 CPU 核心和内存。
    • 每个任务运行在一个或多个 Container 中。

工作原理

应用提交

  • 客户端向 ResourceManager 提交一个应用请求。
  • ResourceManager 启动 ApplicationMaster 来管理该应用。

资源分配

  • ApplicationMaster 与 ResourceManager 协商资源需求。
  • ResourceManager 根据当前的资源调度策略分配容器。

任务执行

  • ApplicationMaster 请求 NodeManager 启动 Container 并运行任务。
  • NodeManager 在分配的容器中执行任务,并将状态返回给 ApplicationMaster。

任务完成

  • ApplicationMaster 汇总任务执行结果并向 ResourceManager 汇报应用完成状态。
Donate
  • Copyright: Copyright is owned by the author. For commercial reprints, please contact the author for authorization. For non-commercial reprints, please indicate the source.
  • Copyrights © 2021-2024 Mingwei Li
  • Visitors: | Views:

Buy me a bottle of beer please~

支付宝
微信